1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 //   -H       - Print the name of each header file used.
16 //   -d[DNI] - Dump various things.
17 //   -fworking-directory - #line's with preprocessor's working dir.
18 //   -fpreprocessed
19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 //   -W*
21 //   -w
22 //
23 // Messages to emit:
24 //   "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/PTHManager.h"
42 #include "clang/Lex/Pragma.h"
43 #include "clang/Lex/PreprocessingRecord.h"
44 #include "clang/Lex/PreprocessorOptions.h"
45 #include "clang/Lex/ScratchBuffer.h"
46 #include "llvm/ADT/APInt.h"
47 #include "llvm/ADT/DenseMap.h"
48 #include "llvm/ADT/SmallString.h"
49 #include "llvm/ADT/SmallVector.h"
50 #include "llvm/ADT/STLExtras.h"
51 #include "llvm/ADT/StringRef.h"
52 #include "llvm/ADT/StringSwitch.h"
53 #include "llvm/Support/Capacity.h"
54 #include "llvm/Support/ErrorHandling.h"
55 #include "llvm/Support/MemoryBuffer.h"
56 #include "llvm/Support/raw_ostream.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <memory>
60 #include <string>
61 #include <utility>
62 #include <vector>
63 
64 using namespace clang;
65 
66 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
67 
68 //===----------------------------------------------------------------------===//
69 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
70 
71 Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
72                            DiagnosticsEngine &diags, LangOptions &opts,
73                            SourceManager &SM, MemoryBufferCache &PCMCache,
74                            HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
75                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
76                            TranslationUnitKind TUKind)
77     : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr),
78       AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
79       PCMCache(PCMCache), ScratchBuf(new ScratchBuffer(SourceMgr)),
80       HeaderInfo(Headers), TheModuleLoader(TheModuleLoader),
81       ExternalSource(nullptr), Identifiers(opts, IILookup),
82       PragmaHandlers(new PragmaNamespace(StringRef())),
83       IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
84       CodeCompletionFile(nullptr), CodeCompletionOffset(0),
85       LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
86       CodeCompletionReached(false), CodeCompletionII(nullptr),
87       MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
88       CurDirLookup(nullptr), CurLexerKind(CLK_Lexer),
89       CurLexerSubmodule(nullptr), Callbacks(nullptr),
90       CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
91       Record(nullptr), MIChainHead(nullptr) {
92   OwnsHeaderSearch = OwnsHeaders;
93 
94   CounterValue = 0; // __COUNTER__ starts at 0.
95 
96   // Clear stats.
97   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
98   NumIf = NumElse = NumEndif = 0;
99   NumEnteredSourceFiles = 0;
100   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
101   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
102   MaxIncludeStackDepth = 0;
103   NumSkipped = 0;
104 
105   // Default to discarding comments.
106   KeepComments = false;
107   KeepMacroComments = false;
108   SuppressIncludeNotFoundError = false;
109 
110   // Macro expansion is enabled.
111   DisableMacroExpansion = false;
112   MacroExpansionInDirectivesOverride = false;
113   InMacroArgs = false;
114   InMacroArgPreExpansion = false;
115   NumCachedTokenLexers = 0;
116   PragmasEnabled = true;
117   ParsingIfOrElifDirective = false;
118   PreprocessedOutput = false;
119 
120   CachedLexPos = 0;
121 
122   // We haven't read anything from the external source.
123   ReadMacrosFromExternalSource = false;
124 
125   // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
126   // a macro. They get unpoisoned where it is allowed.
127   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
128   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
129   if (getLangOpts().CPlusPlus2a) {
130     (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
131     SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
132   } else {
133     Ident__VA_OPT__ = nullptr;
134   }
135 
136   // Initialize the pragma handlers.
137   RegisterBuiltinPragmas();
138 
139   // Initialize builtin macros like __LINE__ and friends.
140   RegisterBuiltinMacros();
141 
142   if(LangOpts.Borland) {
143     Ident__exception_info        = getIdentifierInfo("_exception_info");
144     Ident___exception_info       = getIdentifierInfo("__exception_info");
145     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
146     Ident__exception_code        = getIdentifierInfo("_exception_code");
147     Ident___exception_code       = getIdentifierInfo("__exception_code");
148     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
149     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
150     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
151     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
152   } else {
153     Ident__exception_info = Ident__exception_code = nullptr;
154     Ident__abnormal_termination = Ident___exception_info = nullptr;
155     Ident___exception_code = Ident___abnormal_termination = nullptr;
156     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
157     Ident_AbnormalTermination = nullptr;
158   }
159 
160   if (this->PPOpts->GeneratePreamble)
161     PreambleConditionalStack.startRecording();
162 }
163 
164 Preprocessor::~Preprocessor() {
165   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
166 
167   IncludeMacroStack.clear();
168 
169   // Destroy any macro definitions.
170   while (MacroInfoChain *I = MIChainHead) {
171     MIChainHead = I->Next;
172     I->~MacroInfoChain();
173   }
174 
175   // Free any cached macro expanders.
176   // This populates MacroArgCache, so all TokenLexers need to be destroyed
177   // before the code below that frees up the MacroArgCache list.
178   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
179   CurTokenLexer.reset();
180 
181   // Free any cached MacroArgs.
182   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
183     ArgList = ArgList->deallocate();
184 
185   // Delete the header search info, if we own it.
186   if (OwnsHeaderSearch)
187     delete &HeaderInfo;
188 }
189 
190 void Preprocessor::Initialize(const TargetInfo &Target,
191                               const TargetInfo *AuxTarget) {
192   assert((!this->Target || this->Target == &Target) &&
193          "Invalid override of target information");
194   this->Target = &Target;
195 
196   assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
197          "Invalid override of aux target information.");
198   this->AuxTarget = AuxTarget;
199 
200   // Initialize information about built-ins.
201   BuiltinInfo.InitializeTarget(Target, AuxTarget);
202   HeaderInfo.setTarget(Target);
203 }
204 
205 void Preprocessor::InitializeForModelFile() {
206   NumEnteredSourceFiles = 0;
207 
208   // Reset pragmas
209   PragmaHandlersBackup = std::move(PragmaHandlers);
210   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
211   RegisterBuiltinPragmas();
212 
213   // Reset PredefinesFileID
214   PredefinesFileID = FileID();
215 }
216 
217 void Preprocessor::FinalizeForModelFile() {
218   NumEnteredSourceFiles = 1;
219 
220   PragmaHandlers = std::move(PragmaHandlersBackup);
221 }
222 
223 void Preprocessor::setPTHManager(PTHManager* pm) {
224   PTH.reset(pm);
225   FileMgr.addStatCache(PTH->createStatCache());
226 }
227 
228 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
229   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
230                << getSpelling(Tok) << "'";
231 
232   if (!DumpFlags) return;
233 
234   llvm::errs() << "\t";
235   if (Tok.isAtStartOfLine())
236     llvm::errs() << " [StartOfLine]";
237   if (Tok.hasLeadingSpace())
238     llvm::errs() << " [LeadingSpace]";
239   if (Tok.isExpandDisabled())
240     llvm::errs() << " [ExpandDisabled]";
241   if (Tok.needsCleaning()) {
242     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
243     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
244                  << "']";
245   }
246 
247   llvm::errs() << "\tLoc=<";
248   DumpLocation(Tok.getLocation());
249   llvm::errs() << ">";
250 }
251 
252 void Preprocessor::DumpLocation(SourceLocation Loc) const {
253   Loc.dump(SourceMgr);
254 }
255 
256 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
257   llvm::errs() << "MACRO: ";
258   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
259     DumpToken(MI.getReplacementToken(i));
260     llvm::errs() << "  ";
261   }
262   llvm::errs() << "\n";
263 }
264 
265 void Preprocessor::PrintStats() {
266   llvm::errs() << "\n*** Preprocessor Stats:\n";
267   llvm::errs() << NumDirectives << " directives found:\n";
268   llvm::errs() << "  " << NumDefined << " #define.\n";
269   llvm::errs() << "  " << NumUndefined << " #undef.\n";
270   llvm::errs() << "  #include/#include_next/#import:\n";
271   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
272   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
273   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
274   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
275   llvm::errs() << "  " << NumEndif << " #endif.\n";
276   llvm::errs() << "  " << NumPragma << " #pragma.\n";
277   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
278 
279   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
280              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
281              << NumFastMacroExpanded << " on the fast path.\n";
282   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
283              << " token paste (##) operations performed, "
284              << NumFastTokenPaste << " on the fast path.\n";
285 
286   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
287 
288   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
289   llvm::errs() << "\n  Macro Expanded Tokens: "
290                << llvm::capacity_in_bytes(MacroExpandedTokens);
291   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
292   // FIXME: List information for all submodules.
293   llvm::errs() << "\n  Macros: "
294                << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
295   llvm::errs() << "\n  #pragma push_macro Info: "
296                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
297   llvm::errs() << "\n  Poison Reasons: "
298                << llvm::capacity_in_bytes(PoisonReasons);
299   llvm::errs() << "\n  Comment Handlers: "
300                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
301 }
302 
303 Preprocessor::macro_iterator
304 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
305   if (IncludeExternalMacros && ExternalSource &&
306       !ReadMacrosFromExternalSource) {
307     ReadMacrosFromExternalSource = true;
308     ExternalSource->ReadDefinedMacros();
309   }
310 
311   // Make sure we cover all macros in visible modules.
312   for (const ModuleMacro &Macro : ModuleMacros)
313     CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
314 
315   return CurSubmoduleState->Macros.begin();
316 }
317 
318 size_t Preprocessor::getTotalMemory() const {
319   return BP.getTotalMemory()
320     + llvm::capacity_in_bytes(MacroExpandedTokens)
321     + Predefines.capacity() /* Predefines buffer. */
322     // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
323     // and ModuleMacros.
324     + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
325     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
326     + llvm::capacity_in_bytes(PoisonReasons)
327     + llvm::capacity_in_bytes(CommentHandlers);
328 }
329 
330 Preprocessor::macro_iterator
331 Preprocessor::macro_end(bool IncludeExternalMacros) const {
332   if (IncludeExternalMacros && ExternalSource &&
333       !ReadMacrosFromExternalSource) {
334     ReadMacrosFromExternalSource = true;
335     ExternalSource->ReadDefinedMacros();
336   }
337 
338   return CurSubmoduleState->Macros.end();
339 }
340 
341 /// \brief Compares macro tokens with a specified token value sequence.
342 static bool MacroDefinitionEquals(const MacroInfo *MI,
343                                   ArrayRef<TokenValue> Tokens) {
344   return Tokens.size() == MI->getNumTokens() &&
345       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
346 }
347 
348 StringRef Preprocessor::getLastMacroWithSpelling(
349                                     SourceLocation Loc,
350                                     ArrayRef<TokenValue> Tokens) const {
351   SourceLocation BestLocation;
352   StringRef BestSpelling;
353   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
354        I != E; ++I) {
355     const MacroDirective::DefInfo
356       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
357     if (!Def || !Def.getMacroInfo())
358       continue;
359     if (!Def.getMacroInfo()->isObjectLike())
360       continue;
361     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
362       continue;
363     SourceLocation Location = Def.getLocation();
364     // Choose the macro defined latest.
365     if (BestLocation.isInvalid() ||
366         (Location.isValid() &&
367          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
368       BestLocation = Location;
369       BestSpelling = I->first->getName();
370     }
371   }
372   return BestSpelling;
373 }
374 
375 void Preprocessor::recomputeCurLexerKind() {
376   if (CurLexer)
377     CurLexerKind = CLK_Lexer;
378   else if (CurPTHLexer)
379     CurLexerKind = CLK_PTHLexer;
380   else if (CurTokenLexer)
381     CurLexerKind = CLK_TokenLexer;
382   else
383     CurLexerKind = CLK_CachingLexer;
384 }
385 
386 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
387                                           unsigned CompleteLine,
388                                           unsigned CompleteColumn) {
389   assert(File);
390   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
391   assert(!CodeCompletionFile && "Already set");
392 
393   using llvm::MemoryBuffer;
394 
395   // Load the actual file's contents.
396   bool Invalid = false;
397   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
398   if (Invalid)
399     return true;
400 
401   // Find the byte position of the truncation point.
402   const char *Position = Buffer->getBufferStart();
403   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
404     for (; *Position; ++Position) {
405       if (*Position != '\r' && *Position != '\n')
406         continue;
407 
408       // Eat \r\n or \n\r as a single line.
409       if ((Position[1] == '\r' || Position[1] == '\n') &&
410           Position[0] != Position[1])
411         ++Position;
412       ++Position;
413       break;
414     }
415   }
416 
417   Position += CompleteColumn - 1;
418 
419   // If pointing inside the preamble, adjust the position at the beginning of
420   // the file after the preamble.
421   if (SkipMainFilePreamble.first &&
422       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
423     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
424       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
425   }
426 
427   if (Position > Buffer->getBufferEnd())
428     Position = Buffer->getBufferEnd();
429 
430   CodeCompletionFile = File;
431   CodeCompletionOffset = Position - Buffer->getBufferStart();
432 
433   std::unique_ptr<MemoryBuffer> NewBuffer =
434       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
435                                           Buffer->getBufferIdentifier());
436   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
437   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
438   *NewPos = '\0';
439   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
440   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
441 
442   return false;
443 }
444 
445 void Preprocessor::CodeCompleteNaturalLanguage() {
446   if (CodeComplete)
447     CodeComplete->CodeCompleteNaturalLanguage();
448   setCodeCompletionReached();
449 }
450 
451 /// getSpelling - This method is used to get the spelling of a token into a
452 /// SmallVector. Note that the returned StringRef may not point to the
453 /// supplied buffer if a copy can be avoided.
454 StringRef Preprocessor::getSpelling(const Token &Tok,
455                                           SmallVectorImpl<char> &Buffer,
456                                           bool *Invalid) const {
457   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
458   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
459     // Try the fast path.
460     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
461       return II->getName();
462   }
463 
464   // Resize the buffer if we need to copy into it.
465   if (Tok.needsCleaning())
466     Buffer.resize(Tok.getLength());
467 
468   const char *Ptr = Buffer.data();
469   unsigned Len = getSpelling(Tok, Ptr, Invalid);
470   return StringRef(Ptr, Len);
471 }
472 
473 /// CreateString - Plop the specified string into a scratch buffer and return a
474 /// location for it.  If specified, the source location provides a source
475 /// location for the token.
476 void Preprocessor::CreateString(StringRef Str, Token &Tok,
477                                 SourceLocation ExpansionLocStart,
478                                 SourceLocation ExpansionLocEnd) {
479   Tok.setLength(Str.size());
480 
481   const char *DestPtr;
482   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
483 
484   if (ExpansionLocStart.isValid())
485     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
486                                        ExpansionLocEnd, Str.size());
487   Tok.setLocation(Loc);
488 
489   // If this is a raw identifier or a literal token, set the pointer data.
490   if (Tok.is(tok::raw_identifier))
491     Tok.setRawIdentifierData(DestPtr);
492   else if (Tok.isLiteral())
493     Tok.setLiteralData(DestPtr);
494 }
495 
496 Module *Preprocessor::getCurrentModule() {
497   if (!getLangOpts().isCompilingModule())
498     return nullptr;
499 
500   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
501 }
502 
503 //===----------------------------------------------------------------------===//
504 // Preprocessor Initialization Methods
505 //===----------------------------------------------------------------------===//
506 
507 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
508 /// which implicitly adds the builtin defines etc.
509 void Preprocessor::EnterMainSourceFile() {
510   // We do not allow the preprocessor to reenter the main file.  Doing so will
511   // cause FileID's to accumulate information from both runs (e.g. #line
512   // information) and predefined macros aren't guaranteed to be set properly.
513   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
514   FileID MainFileID = SourceMgr.getMainFileID();
515 
516   // If MainFileID is loaded it means we loaded an AST file, no need to enter
517   // a main file.
518   if (!SourceMgr.isLoadedFileID(MainFileID)) {
519     // Enter the main file source buffer.
520     EnterSourceFile(MainFileID, nullptr, SourceLocation());
521 
522     // If we've been asked to skip bytes in the main file (e.g., as part of a
523     // precompiled preamble), do so now.
524     if (SkipMainFilePreamble.first > 0)
525       CurLexer->SetByteOffset(SkipMainFilePreamble.first,
526                               SkipMainFilePreamble.second);
527 
528     // Tell the header info that the main file was entered.  If the file is later
529     // #imported, it won't be re-entered.
530     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
531       HeaderInfo.IncrementIncludeCount(FE);
532   }
533 
534   // Preprocess Predefines to populate the initial preprocessor state.
535   std::unique_ptr<llvm::MemoryBuffer> SB =
536     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
537   assert(SB && "Cannot create predefined source buffer");
538   FileID FID = SourceMgr.createFileID(std::move(SB));
539   assert(FID.isValid() && "Could not create FileID for predefines?");
540   setPredefinesFileID(FID);
541 
542   // Start parsing the predefines.
543   EnterSourceFile(FID, nullptr, SourceLocation());
544 }
545 
546 void Preprocessor::replayPreambleConditionalStack() {
547   // Restore the conditional stack from the preamble, if there is one.
548   if (PreambleConditionalStack.isReplaying()) {
549     assert(CurPPLexer &&
550            "CurPPLexer is null when calling replayPreambleConditionalStack.");
551     CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
552     PreambleConditionalStack.doneReplaying();
553     if (PreambleConditionalStack.reachedEOFWhileSkipping())
554       SkipExcludedConditionalBlock(
555           PreambleConditionalStack.SkipInfo->HashTokenLoc,
556           PreambleConditionalStack.SkipInfo->IfTokenLoc,
557           PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
558           PreambleConditionalStack.SkipInfo->FoundElse,
559           PreambleConditionalStack.SkipInfo->ElseLoc);
560   }
561 }
562 
563 void Preprocessor::EndSourceFile() {
564   // Notify the client that we reached the end of the source file.
565   if (Callbacks)
566     Callbacks->EndOfMainFile();
567 }
568 
569 //===----------------------------------------------------------------------===//
570 // Lexer Event Handling.
571 //===----------------------------------------------------------------------===//
572 
573 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
574 /// identifier information for the token and install it into the token,
575 /// updating the token kind accordingly.
576 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
577   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
578 
579   // Look up this token, see if it is a macro, or if it is a language keyword.
580   IdentifierInfo *II;
581   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
582     // No cleaning needed, just use the characters from the lexed buffer.
583     II = getIdentifierInfo(Identifier.getRawIdentifier());
584   } else {
585     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
586     SmallString<64> IdentifierBuffer;
587     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
588 
589     if (Identifier.hasUCN()) {
590       SmallString<64> UCNIdentifierBuffer;
591       expandUCNs(UCNIdentifierBuffer, CleanedStr);
592       II = getIdentifierInfo(UCNIdentifierBuffer);
593     } else {
594       II = getIdentifierInfo(CleanedStr);
595     }
596   }
597 
598   // Update the token info (identifier info and appropriate token kind).
599   Identifier.setIdentifierInfo(II);
600   if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() &&
601       getSourceManager().isInSystemHeader(Identifier.getLocation()))
602     Identifier.setKind(clang::tok::identifier);
603   else
604     Identifier.setKind(II->getTokenID());
605 
606   return II;
607 }
608 
609 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
610   PoisonReasons[II] = DiagID;
611 }
612 
613 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
614   assert(Ident__exception_code && Ident__exception_info);
615   assert(Ident___exception_code && Ident___exception_info);
616   Ident__exception_code->setIsPoisoned(Poison);
617   Ident___exception_code->setIsPoisoned(Poison);
618   Ident_GetExceptionCode->setIsPoisoned(Poison);
619   Ident__exception_info->setIsPoisoned(Poison);
620   Ident___exception_info->setIsPoisoned(Poison);
621   Ident_GetExceptionInfo->setIsPoisoned(Poison);
622   Ident__abnormal_termination->setIsPoisoned(Poison);
623   Ident___abnormal_termination->setIsPoisoned(Poison);
624   Ident_AbnormalTermination->setIsPoisoned(Poison);
625 }
626 
627 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
628   assert(Identifier.getIdentifierInfo() &&
629          "Can't handle identifiers without identifier info!");
630   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
631     PoisonReasons.find(Identifier.getIdentifierInfo());
632   if(it == PoisonReasons.end())
633     Diag(Identifier, diag::err_pp_used_poisoned_id);
634   else
635     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
636 }
637 
638 /// \brief Returns a diagnostic message kind for reporting a future keyword as
639 /// appropriate for the identifier and specified language.
640 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
641                                           const LangOptions &LangOpts) {
642   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
643 
644   if (LangOpts.CPlusPlus)
645     return llvm::StringSwitch<diag::kind>(II.getName())
646 #define CXX11_KEYWORD(NAME, FLAGS)                                             \
647         .Case(#NAME, diag::warn_cxx11_keyword)
648 #define CXX2A_KEYWORD(NAME, FLAGS)                                             \
649         .Case(#NAME, diag::warn_cxx2a_keyword)
650 #include "clang/Basic/TokenKinds.def"
651         ;
652 
653   llvm_unreachable(
654       "Keyword not known to come from a newer Standard or proposed Standard");
655 }
656 
657 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
658   assert(II.isOutOfDate() && "not out of date");
659   getExternalSource()->updateOutOfDateIdentifier(II);
660 }
661 
662 /// HandleIdentifier - This callback is invoked when the lexer reads an
663 /// identifier.  This callback looks up the identifier in the map and/or
664 /// potentially macro expands it or turns it into a named token (like 'for').
665 ///
666 /// Note that callers of this method are guarded by checking the
667 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
668 /// IdentifierInfo methods that compute these properties will need to change to
669 /// match.
670 bool Preprocessor::HandleIdentifier(Token &Identifier) {
671   assert(Identifier.getIdentifierInfo() &&
672          "Can't handle identifiers without identifier info!");
673 
674   IdentifierInfo &II = *Identifier.getIdentifierInfo();
675 
676   // If the information about this identifier is out of date, update it from
677   // the external source.
678   // We have to treat __VA_ARGS__ in a special way, since it gets
679   // serialized with isPoisoned = true, but our preprocessor may have
680   // unpoisoned it if we're defining a C99 macro.
681   if (II.isOutOfDate()) {
682     bool CurrentIsPoisoned = false;
683     const bool IsSpecialVariadicMacro =
684         &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
685     if (IsSpecialVariadicMacro)
686       CurrentIsPoisoned = II.isPoisoned();
687 
688     updateOutOfDateIdentifier(II);
689     Identifier.setKind(II.getTokenID());
690 
691     if (IsSpecialVariadicMacro)
692       II.setIsPoisoned(CurrentIsPoisoned);
693   }
694 
695   // If this identifier was poisoned, and if it was not produced from a macro
696   // expansion, emit an error.
697   if (II.isPoisoned() && CurPPLexer) {
698     HandlePoisonedIdentifier(Identifier);
699   }
700 
701   // If this is a macro to be expanded, do it.
702   if (MacroDefinition MD = getMacroDefinition(&II)) {
703     auto *MI = MD.getMacroInfo();
704     assert(MI && "macro definition with no macro info?");
705     if (!DisableMacroExpansion) {
706       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
707         // C99 6.10.3p10: If the preprocessing token immediately after the
708         // macro name isn't a '(', this macro should not be expanded.
709         if (!MI->isFunctionLike() || isNextPPTokenLParen())
710           return HandleMacroExpandedIdentifier(Identifier, MD);
711       } else {
712         // C99 6.10.3.4p2 says that a disabled macro may never again be
713         // expanded, even if it's in a context where it could be expanded in the
714         // future.
715         Identifier.setFlag(Token::DisableExpand);
716         if (MI->isObjectLike() || isNextPPTokenLParen())
717           Diag(Identifier, diag::pp_disabled_macro_expansion);
718       }
719     }
720   }
721 
722   // If this identifier is a keyword in a newer Standard or proposed Standard,
723   // produce a warning. Don't warn if we're not considering macro expansion,
724   // since this identifier might be the name of a macro.
725   // FIXME: This warning is disabled in cases where it shouldn't be, like
726   //   "#define constexpr constexpr", "int constexpr;"
727   if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
728     Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
729         << II.getName();
730     // Don't diagnose this keyword again in this translation unit.
731     II.setIsFutureCompatKeyword(false);
732   }
733 
734   // If this is an extension token, diagnose its use.
735   // We avoid diagnosing tokens that originate from macro definitions.
736   // FIXME: This warning is disabled in cases where it shouldn't be,
737   // like "#define TY typeof", "TY(1) x".
738   if (II.isExtensionToken() && !DisableMacroExpansion)
739     Diag(Identifier, diag::ext_token_used);
740 
741   // If this is the 'import' contextual keyword following an '@', note
742   // that the next token indicates a module name.
743   //
744   // Note that we do not treat 'import' as a contextual
745   // keyword when we're in a caching lexer, because caching lexers only get
746   // used in contexts where import declarations are disallowed.
747   //
748   // Likewise if this is the C++ Modules TS import keyword.
749   if (((LastTokenWasAt && II.isModulesImport()) ||
750        Identifier.is(tok::kw_import)) &&
751       !InMacroArgs && !DisableMacroExpansion &&
752       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
753       CurLexerKind != CLK_CachingLexer) {
754     ModuleImportLoc = Identifier.getLocation();
755     ModuleImportPath.clear();
756     ModuleImportExpectsIdentifier = true;
757     CurLexerKind = CLK_LexAfterModuleImport;
758   }
759   return true;
760 }
761 
762 void Preprocessor::Lex(Token &Result) {
763   // We loop here until a lex function returns a token; this avoids recursion.
764   bool ReturnedToken;
765   do {
766     switch (CurLexerKind) {
767     case CLK_Lexer:
768       ReturnedToken = CurLexer->Lex(Result);
769       break;
770     case CLK_PTHLexer:
771       ReturnedToken = CurPTHLexer->Lex(Result);
772       break;
773     case CLK_TokenLexer:
774       ReturnedToken = CurTokenLexer->Lex(Result);
775       break;
776     case CLK_CachingLexer:
777       CachingLex(Result);
778       ReturnedToken = true;
779       break;
780     case CLK_LexAfterModuleImport:
781       LexAfterModuleImport(Result);
782       ReturnedToken = true;
783       break;
784     }
785   } while (!ReturnedToken);
786 
787   if (Result.is(tok::code_completion))
788     setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
789 
790   LastTokenWasAt = Result.is(tok::at);
791 }
792 
793 /// \brief Lex a token following the 'import' contextual keyword.
794 ///
795 void Preprocessor::LexAfterModuleImport(Token &Result) {
796   // Figure out what kind of lexer we actually have.
797   recomputeCurLexerKind();
798 
799   // Lex the next token.
800   Lex(Result);
801 
802   // The token sequence
803   //
804   //   import identifier (. identifier)*
805   //
806   // indicates a module import directive. We already saw the 'import'
807   // contextual keyword, so now we're looking for the identifiers.
808   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
809     // We expected to see an identifier here, and we did; continue handling
810     // identifiers.
811     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
812                                               Result.getLocation()));
813     ModuleImportExpectsIdentifier = false;
814     CurLexerKind = CLK_LexAfterModuleImport;
815     return;
816   }
817 
818   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
819   // see the next identifier. (We can also see a '[[' that begins an
820   // attribute-specifier-seq here under the C++ Modules TS.)
821   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
822     ModuleImportExpectsIdentifier = true;
823     CurLexerKind = CLK_LexAfterModuleImport;
824     return;
825   }
826 
827   // If we have a non-empty module path, load the named module.
828   if (!ModuleImportPath.empty()) {
829     // Under the Modules TS, the dot is just part of the module name, and not
830     // a real hierarachy separator. Flatten such module names now.
831     //
832     // FIXME: Is this the right level to be performing this transformation?
833     std::string FlatModuleName;
834     if (getLangOpts().ModulesTS) {
835       for (auto &Piece : ModuleImportPath) {
836         if (!FlatModuleName.empty())
837           FlatModuleName += ".";
838         FlatModuleName += Piece.first->getName();
839       }
840       SourceLocation FirstPathLoc = ModuleImportPath[0].second;
841       ModuleImportPath.clear();
842       ModuleImportPath.push_back(
843           std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
844     }
845 
846     Module *Imported = nullptr;
847     if (getLangOpts().Modules) {
848       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
849                                             ModuleImportPath,
850                                             Module::Hidden,
851                                             /*IsIncludeDirective=*/false);
852       if (Imported)
853         makeModuleVisible(Imported, ModuleImportLoc);
854     }
855     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
856       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
857   }
858 }
859 
860 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
861   CurSubmoduleState->VisibleModules.setVisible(
862       M, Loc, [](Module *) {},
863       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
864         // FIXME: Include the path in the diagnostic.
865         // FIXME: Include the import location for the conflicting module.
866         Diag(ModuleImportLoc, diag::warn_module_conflict)
867             << Path[0]->getFullModuleName()
868             << Conflict->getFullModuleName()
869             << Message;
870       });
871 
872   // Add this module to the imports list of the currently-built submodule.
873   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
874     BuildingSubmoduleStack.back().M->Imports.insert(M);
875 }
876 
877 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
878                                           const char *DiagnosticTag,
879                                           bool AllowMacroExpansion) {
880   // We need at least one string literal.
881   if (Result.isNot(tok::string_literal)) {
882     Diag(Result, diag::err_expected_string_literal)
883       << /*Source='in...'*/0 << DiagnosticTag;
884     return false;
885   }
886 
887   // Lex string literal tokens, optionally with macro expansion.
888   SmallVector<Token, 4> StrToks;
889   do {
890     StrToks.push_back(Result);
891 
892     if (Result.hasUDSuffix())
893       Diag(Result, diag::err_invalid_string_udl);
894 
895     if (AllowMacroExpansion)
896       Lex(Result);
897     else
898       LexUnexpandedToken(Result);
899   } while (Result.is(tok::string_literal));
900 
901   // Concatenate and parse the strings.
902   StringLiteralParser Literal(StrToks, *this);
903   assert(Literal.isAscii() && "Didn't allow wide strings in");
904 
905   if (Literal.hadError)
906     return false;
907 
908   if (Literal.Pascal) {
909     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
910       << /*Source='in...'*/0 << DiagnosticTag;
911     return false;
912   }
913 
914   String = Literal.GetString();
915   return true;
916 }
917 
918 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
919   assert(Tok.is(tok::numeric_constant));
920   SmallString<8> IntegerBuffer;
921   bool NumberInvalid = false;
922   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
923   if (NumberInvalid)
924     return false;
925   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
926   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
927     return false;
928   llvm::APInt APVal(64, 0);
929   if (Literal.GetIntegerValue(APVal))
930     return false;
931   Lex(Tok);
932   Value = APVal.getLimitedValue();
933   return true;
934 }
935 
936 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
937   assert(Handler && "NULL comment handler");
938   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
939          CommentHandlers.end() && "Comment handler already registered");
940   CommentHandlers.push_back(Handler);
941 }
942 
943 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
944   std::vector<CommentHandler *>::iterator Pos
945   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
946   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
947   CommentHandlers.erase(Pos);
948 }
949 
950 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
951   bool AnyPendingTokens = false;
952   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
953        HEnd = CommentHandlers.end();
954        H != HEnd; ++H) {
955     if ((*H)->HandleComment(*this, Comment))
956       AnyPendingTokens = true;
957   }
958   if (!AnyPendingTokens || getCommentRetentionState())
959     return false;
960   Lex(result);
961   return true;
962 }
963 
964 ModuleLoader::~ModuleLoader() { }
965 
966 CommentHandler::~CommentHandler() { }
967 
968 CodeCompletionHandler::~CodeCompletionHandler() { }
969 
970 void Preprocessor::createPreprocessingRecord() {
971   if (Record)
972     return;
973 
974   Record = new PreprocessingRecord(getSourceManager());
975   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
976 }
977