1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 //   -H       - Print the name of each header file used.
16 //   -d[DNI] - Dump various things.
17 //   -fworking-directory - #line's with preprocessor's working dir.
18 //   -fpreprocessed
19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 //   -W*
21 //   -w
22 //
23 // Messages to emit:
24 //   "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/PTHManager.h"
42 #include "clang/Lex/Pragma.h"
43 #include "clang/Lex/PreprocessingRecord.h"
44 #include "clang/Lex/PreprocessorOptions.h"
45 #include "clang/Lex/ScratchBuffer.h"
46 #include "llvm/ADT/APInt.h"
47 #include "llvm/ADT/DenseMap.h"
48 #include "llvm/ADT/SmallString.h"
49 #include "llvm/ADT/SmallVector.h"
50 #include "llvm/ADT/STLExtras.h"
51 #include "llvm/ADT/StringRef.h"
52 #include "llvm/ADT/StringSwitch.h"
53 #include "llvm/Support/Capacity.h"
54 #include "llvm/Support/ErrorHandling.h"
55 #include "llvm/Support/MemoryBuffer.h"
56 #include "llvm/Support/raw_ostream.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <memory>
60 #include <string>
61 #include <utility>
62 #include <vector>
63 
64 using namespace clang;
65 
66 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
67 
68 //===----------------------------------------------------------------------===//
69 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
70 
71 Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
72                            DiagnosticsEngine &diags, LangOptions &opts,
73                            SourceManager &SM, MemoryBufferCache &PCMCache,
74                            HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
75                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
76                            TranslationUnitKind TUKind)
77     : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr),
78       AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
79       PCMCache(PCMCache), ScratchBuf(new ScratchBuffer(SourceMgr)),
80       HeaderInfo(Headers), TheModuleLoader(TheModuleLoader),
81       ExternalSource(nullptr), Identifiers(opts, IILookup),
82       PragmaHandlers(new PragmaNamespace(StringRef())),
83       IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
84       CodeCompletionFile(nullptr), CodeCompletionOffset(0),
85       LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
86       CodeCompletionReached(false), CodeCompletionII(nullptr),
87       MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
88       CurDirLookup(nullptr), CurLexerKind(CLK_Lexer),
89       CurLexerSubmodule(nullptr), Callbacks(nullptr),
90       CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
91       Record(nullptr), MIChainHead(nullptr) {
92   OwnsHeaderSearch = OwnsHeaders;
93 
94   CounterValue = 0; // __COUNTER__ starts at 0.
95 
96   // Clear stats.
97   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
98   NumIf = NumElse = NumEndif = 0;
99   NumEnteredSourceFiles = 0;
100   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
101   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
102   MaxIncludeStackDepth = 0;
103   NumSkipped = 0;
104 
105   // Default to discarding comments.
106   KeepComments = false;
107   KeepMacroComments = false;
108   SuppressIncludeNotFoundError = false;
109 
110   // Macro expansion is enabled.
111   DisableMacroExpansion = false;
112   MacroExpansionInDirectivesOverride = false;
113   InMacroArgs = false;
114   InMacroArgPreExpansion = false;
115   NumCachedTokenLexers = 0;
116   PragmasEnabled = true;
117   ParsingIfOrElifDirective = false;
118   PreprocessedOutput = false;
119 
120   CachedLexPos = 0;
121 
122   // We haven't read anything from the external source.
123   ReadMacrosFromExternalSource = false;
124 
125   // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
126   // a macro. They get unpoisoned where it is allowed.
127   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
128   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
129   if (getLangOpts().CPlusPlus2a) {
130     (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
131     SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
132   } else {
133     Ident__VA_OPT__ = nullptr;
134   }
135 
136   // Initialize the pragma handlers.
137   RegisterBuiltinPragmas();
138 
139   // Initialize builtin macros like __LINE__ and friends.
140   RegisterBuiltinMacros();
141 
142   if(LangOpts.Borland) {
143     Ident__exception_info        = getIdentifierInfo("_exception_info");
144     Ident___exception_info       = getIdentifierInfo("__exception_info");
145     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
146     Ident__exception_code        = getIdentifierInfo("_exception_code");
147     Ident___exception_code       = getIdentifierInfo("__exception_code");
148     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
149     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
150     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
151     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
152   } else {
153     Ident__exception_info = Ident__exception_code = nullptr;
154     Ident__abnormal_termination = Ident___exception_info = nullptr;
155     Ident___exception_code = Ident___abnormal_termination = nullptr;
156     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
157     Ident_AbnormalTermination = nullptr;
158   }
159 
160   if (this->PPOpts->GeneratePreamble)
161     PreambleConditionalStack.startRecording();
162 }
163 
164 Preprocessor::~Preprocessor() {
165   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
166 
167   IncludeMacroStack.clear();
168 
169   // Destroy any macro definitions.
170   while (MacroInfoChain *I = MIChainHead) {
171     MIChainHead = I->Next;
172     I->~MacroInfoChain();
173   }
174 
175   // Free any cached macro expanders.
176   // This populates MacroArgCache, so all TokenLexers need to be destroyed
177   // before the code below that frees up the MacroArgCache list.
178   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
179   CurTokenLexer.reset();
180 
181   // Free any cached MacroArgs.
182   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
183     ArgList = ArgList->deallocate();
184 
185   // Delete the header search info, if we own it.
186   if (OwnsHeaderSearch)
187     delete &HeaderInfo;
188 }
189 
190 void Preprocessor::Initialize(const TargetInfo &Target,
191                               const TargetInfo *AuxTarget) {
192   assert((!this->Target || this->Target == &Target) &&
193          "Invalid override of target information");
194   this->Target = &Target;
195 
196   assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
197          "Invalid override of aux target information.");
198   this->AuxTarget = AuxTarget;
199 
200   // Initialize information about built-ins.
201   BuiltinInfo.InitializeTarget(Target, AuxTarget);
202   HeaderInfo.setTarget(Target);
203 }
204 
205 void Preprocessor::InitializeForModelFile() {
206   NumEnteredSourceFiles = 0;
207 
208   // Reset pragmas
209   PragmaHandlersBackup = std::move(PragmaHandlers);
210   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
211   RegisterBuiltinPragmas();
212 
213   // Reset PredefinesFileID
214   PredefinesFileID = FileID();
215 }
216 
217 void Preprocessor::FinalizeForModelFile() {
218   NumEnteredSourceFiles = 1;
219 
220   PragmaHandlers = std::move(PragmaHandlersBackup);
221 }
222 
223 void Preprocessor::setPTHManager(PTHManager* pm) {
224   PTH.reset(pm);
225   FileMgr.addStatCache(PTH->createStatCache());
226 }
227 
228 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
229   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
230                << getSpelling(Tok) << "'";
231 
232   if (!DumpFlags) return;
233 
234   llvm::errs() << "\t";
235   if (Tok.isAtStartOfLine())
236     llvm::errs() << " [StartOfLine]";
237   if (Tok.hasLeadingSpace())
238     llvm::errs() << " [LeadingSpace]";
239   if (Tok.isExpandDisabled())
240     llvm::errs() << " [ExpandDisabled]";
241   if (Tok.needsCleaning()) {
242     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
243     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
244                  << "']";
245   }
246 
247   llvm::errs() << "\tLoc=<";
248   DumpLocation(Tok.getLocation());
249   llvm::errs() << ">";
250 }
251 
252 void Preprocessor::DumpLocation(SourceLocation Loc) const {
253   Loc.dump(SourceMgr);
254 }
255 
256 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
257   llvm::errs() << "MACRO: ";
258   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
259     DumpToken(MI.getReplacementToken(i));
260     llvm::errs() << "  ";
261   }
262   llvm::errs() << "\n";
263 }
264 
265 void Preprocessor::PrintStats() {
266   llvm::errs() << "\n*** Preprocessor Stats:\n";
267   llvm::errs() << NumDirectives << " directives found:\n";
268   llvm::errs() << "  " << NumDefined << " #define.\n";
269   llvm::errs() << "  " << NumUndefined << " #undef.\n";
270   llvm::errs() << "  #include/#include_next/#import:\n";
271   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
272   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
273   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
274   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
275   llvm::errs() << "  " << NumEndif << " #endif.\n";
276   llvm::errs() << "  " << NumPragma << " #pragma.\n";
277   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
278 
279   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
280              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
281              << NumFastMacroExpanded << " on the fast path.\n";
282   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
283              << " token paste (##) operations performed, "
284              << NumFastTokenPaste << " on the fast path.\n";
285 
286   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
287 
288   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
289   llvm::errs() << "\n  Macro Expanded Tokens: "
290                << llvm::capacity_in_bytes(MacroExpandedTokens);
291   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
292   // FIXME: List information for all submodules.
293   llvm::errs() << "\n  Macros: "
294                << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
295   llvm::errs() << "\n  #pragma push_macro Info: "
296                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
297   llvm::errs() << "\n  Poison Reasons: "
298                << llvm::capacity_in_bytes(PoisonReasons);
299   llvm::errs() << "\n  Comment Handlers: "
300                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
301 }
302 
303 Preprocessor::macro_iterator
304 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
305   if (IncludeExternalMacros && ExternalSource &&
306       !ReadMacrosFromExternalSource) {
307     ReadMacrosFromExternalSource = true;
308     ExternalSource->ReadDefinedMacros();
309   }
310 
311   // Make sure we cover all macros in visible modules.
312   for (const ModuleMacro &Macro : ModuleMacros)
313     CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
314 
315   return CurSubmoduleState->Macros.begin();
316 }
317 
318 size_t Preprocessor::getTotalMemory() const {
319   return BP.getTotalMemory()
320     + llvm::capacity_in_bytes(MacroExpandedTokens)
321     + Predefines.capacity() /* Predefines buffer. */
322     // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
323     // and ModuleMacros.
324     + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
325     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
326     + llvm::capacity_in_bytes(PoisonReasons)
327     + llvm::capacity_in_bytes(CommentHandlers);
328 }
329 
330 Preprocessor::macro_iterator
331 Preprocessor::macro_end(bool IncludeExternalMacros) const {
332   if (IncludeExternalMacros && ExternalSource &&
333       !ReadMacrosFromExternalSource) {
334     ReadMacrosFromExternalSource = true;
335     ExternalSource->ReadDefinedMacros();
336   }
337 
338   return CurSubmoduleState->Macros.end();
339 }
340 
341 /// \brief Compares macro tokens with a specified token value sequence.
342 static bool MacroDefinitionEquals(const MacroInfo *MI,
343                                   ArrayRef<TokenValue> Tokens) {
344   return Tokens.size() == MI->getNumTokens() &&
345       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
346 }
347 
348 StringRef Preprocessor::getLastMacroWithSpelling(
349                                     SourceLocation Loc,
350                                     ArrayRef<TokenValue> Tokens) const {
351   SourceLocation BestLocation;
352   StringRef BestSpelling;
353   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
354        I != E; ++I) {
355     const MacroDirective::DefInfo
356       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
357     if (!Def || !Def.getMacroInfo())
358       continue;
359     if (!Def.getMacroInfo()->isObjectLike())
360       continue;
361     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
362       continue;
363     SourceLocation Location = Def.getLocation();
364     // Choose the macro defined latest.
365     if (BestLocation.isInvalid() ||
366         (Location.isValid() &&
367          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
368       BestLocation = Location;
369       BestSpelling = I->first->getName();
370     }
371   }
372   return BestSpelling;
373 }
374 
375 void Preprocessor::recomputeCurLexerKind() {
376   if (CurLexer)
377     CurLexerKind = CLK_Lexer;
378   else if (CurPTHLexer)
379     CurLexerKind = CLK_PTHLexer;
380   else if (CurTokenLexer)
381     CurLexerKind = CLK_TokenLexer;
382   else
383     CurLexerKind = CLK_CachingLexer;
384 }
385 
386 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
387                                           unsigned CompleteLine,
388                                           unsigned CompleteColumn) {
389   assert(File);
390   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
391   assert(!CodeCompletionFile && "Already set");
392 
393   using llvm::MemoryBuffer;
394 
395   // Load the actual file's contents.
396   bool Invalid = false;
397   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
398   if (Invalid)
399     return true;
400 
401   // Find the byte position of the truncation point.
402   const char *Position = Buffer->getBufferStart();
403   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
404     for (; *Position; ++Position) {
405       if (*Position != '\r' && *Position != '\n')
406         continue;
407 
408       // Eat \r\n or \n\r as a single line.
409       if ((Position[1] == '\r' || Position[1] == '\n') &&
410           Position[0] != Position[1])
411         ++Position;
412       ++Position;
413       break;
414     }
415   }
416 
417   Position += CompleteColumn - 1;
418 
419   // If pointing inside the preamble, adjust the position at the beginning of
420   // the file after the preamble.
421   if (SkipMainFilePreamble.first &&
422       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
423     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
424       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
425   }
426 
427   if (Position > Buffer->getBufferEnd())
428     Position = Buffer->getBufferEnd();
429 
430   CodeCompletionFile = File;
431   CodeCompletionOffset = Position - Buffer->getBufferStart();
432 
433   std::unique_ptr<MemoryBuffer> NewBuffer =
434       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
435                                           Buffer->getBufferIdentifier());
436   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
437   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
438   *NewPos = '\0';
439   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
440   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
441 
442   return false;
443 }
444 
445 void Preprocessor::CodeCompleteNaturalLanguage() {
446   if (CodeComplete)
447     CodeComplete->CodeCompleteNaturalLanguage();
448   setCodeCompletionReached();
449 }
450 
451 /// getSpelling - This method is used to get the spelling of a token into a
452 /// SmallVector. Note that the returned StringRef may not point to the
453 /// supplied buffer if a copy can be avoided.
454 StringRef Preprocessor::getSpelling(const Token &Tok,
455                                           SmallVectorImpl<char> &Buffer,
456                                           bool *Invalid) const {
457   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
458   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
459     // Try the fast path.
460     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
461       return II->getName();
462   }
463 
464   // Resize the buffer if we need to copy into it.
465   if (Tok.needsCleaning())
466     Buffer.resize(Tok.getLength());
467 
468   const char *Ptr = Buffer.data();
469   unsigned Len = getSpelling(Tok, Ptr, Invalid);
470   return StringRef(Ptr, Len);
471 }
472 
473 /// CreateString - Plop the specified string into a scratch buffer and return a
474 /// location for it.  If specified, the source location provides a source
475 /// location for the token.
476 void Preprocessor::CreateString(StringRef Str, Token &Tok,
477                                 SourceLocation ExpansionLocStart,
478                                 SourceLocation ExpansionLocEnd) {
479   Tok.setLength(Str.size());
480 
481   const char *DestPtr;
482   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
483 
484   if (ExpansionLocStart.isValid())
485     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
486                                        ExpansionLocEnd, Str.size());
487   Tok.setLocation(Loc);
488 
489   // If this is a raw identifier or a literal token, set the pointer data.
490   if (Tok.is(tok::raw_identifier))
491     Tok.setRawIdentifierData(DestPtr);
492   else if (Tok.isLiteral())
493     Tok.setLiteralData(DestPtr);
494 }
495 
496 Module *Preprocessor::getCurrentModule() {
497   if (!getLangOpts().isCompilingModule())
498     return nullptr;
499 
500   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
501 }
502 
503 //===----------------------------------------------------------------------===//
504 // Preprocessor Initialization Methods
505 //===----------------------------------------------------------------------===//
506 
507 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
508 /// which implicitly adds the builtin defines etc.
509 void Preprocessor::EnterMainSourceFile() {
510   // We do not allow the preprocessor to reenter the main file.  Doing so will
511   // cause FileID's to accumulate information from both runs (e.g. #line
512   // information) and predefined macros aren't guaranteed to be set properly.
513   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
514   FileID MainFileID = SourceMgr.getMainFileID();
515 
516   // If MainFileID is loaded it means we loaded an AST file, no need to enter
517   // a main file.
518   if (!SourceMgr.isLoadedFileID(MainFileID)) {
519     // Enter the main file source buffer.
520     EnterSourceFile(MainFileID, nullptr, SourceLocation());
521 
522     // If we've been asked to skip bytes in the main file (e.g., as part of a
523     // precompiled preamble), do so now.
524     if (SkipMainFilePreamble.first > 0)
525       CurLexer->SetByteOffset(SkipMainFilePreamble.first,
526                               SkipMainFilePreamble.second);
527 
528     // Tell the header info that the main file was entered.  If the file is later
529     // #imported, it won't be re-entered.
530     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
531       HeaderInfo.IncrementIncludeCount(FE);
532   }
533 
534   // Preprocess Predefines to populate the initial preprocessor state.
535   std::unique_ptr<llvm::MemoryBuffer> SB =
536     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
537   assert(SB && "Cannot create predefined source buffer");
538   FileID FID = SourceMgr.createFileID(std::move(SB));
539   assert(FID.isValid() && "Could not create FileID for predefines?");
540   setPredefinesFileID(FID);
541 
542   // Start parsing the predefines.
543   EnterSourceFile(FID, nullptr, SourceLocation());
544 }
545 
546 void Preprocessor::replayPreambleConditionalStack() {
547   // Restore the conditional stack from the preamble, if there is one.
548   if (PreambleConditionalStack.isReplaying()) {
549     assert(CurPPLexer &&
550            "CurPPLexer is null when calling replayPreambleConditionalStack.");
551     CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
552     PreambleConditionalStack.doneReplaying();
553   }
554 }
555 
556 void Preprocessor::EndSourceFile() {
557   // Notify the client that we reached the end of the source file.
558   if (Callbacks)
559     Callbacks->EndOfMainFile();
560 }
561 
562 //===----------------------------------------------------------------------===//
563 // Lexer Event Handling.
564 //===----------------------------------------------------------------------===//
565 
566 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
567 /// identifier information for the token and install it into the token,
568 /// updating the token kind accordingly.
569 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
570   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
571 
572   // Look up this token, see if it is a macro, or if it is a language keyword.
573   IdentifierInfo *II;
574   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
575     // No cleaning needed, just use the characters from the lexed buffer.
576     II = getIdentifierInfo(Identifier.getRawIdentifier());
577   } else {
578     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
579     SmallString<64> IdentifierBuffer;
580     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
581 
582     if (Identifier.hasUCN()) {
583       SmallString<64> UCNIdentifierBuffer;
584       expandUCNs(UCNIdentifierBuffer, CleanedStr);
585       II = getIdentifierInfo(UCNIdentifierBuffer);
586     } else {
587       II = getIdentifierInfo(CleanedStr);
588     }
589   }
590 
591   // Update the token info (identifier info and appropriate token kind).
592   Identifier.setIdentifierInfo(II);
593   if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() &&
594       getSourceManager().isInSystemHeader(Identifier.getLocation()))
595     Identifier.setKind(clang::tok::identifier);
596   else
597     Identifier.setKind(II->getTokenID());
598 
599   return II;
600 }
601 
602 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
603   PoisonReasons[II] = DiagID;
604 }
605 
606 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
607   assert(Ident__exception_code && Ident__exception_info);
608   assert(Ident___exception_code && Ident___exception_info);
609   Ident__exception_code->setIsPoisoned(Poison);
610   Ident___exception_code->setIsPoisoned(Poison);
611   Ident_GetExceptionCode->setIsPoisoned(Poison);
612   Ident__exception_info->setIsPoisoned(Poison);
613   Ident___exception_info->setIsPoisoned(Poison);
614   Ident_GetExceptionInfo->setIsPoisoned(Poison);
615   Ident__abnormal_termination->setIsPoisoned(Poison);
616   Ident___abnormal_termination->setIsPoisoned(Poison);
617   Ident_AbnormalTermination->setIsPoisoned(Poison);
618 }
619 
620 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
621   assert(Identifier.getIdentifierInfo() &&
622          "Can't handle identifiers without identifier info!");
623   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
624     PoisonReasons.find(Identifier.getIdentifierInfo());
625   if(it == PoisonReasons.end())
626     Diag(Identifier, diag::err_pp_used_poisoned_id);
627   else
628     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
629 }
630 
631 /// \brief Returns a diagnostic message kind for reporting a future keyword as
632 /// appropriate for the identifier and specified language.
633 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
634                                           const LangOptions &LangOpts) {
635   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
636 
637   if (LangOpts.CPlusPlus)
638     return llvm::StringSwitch<diag::kind>(II.getName())
639 #define CXX11_KEYWORD(NAME, FLAGS)                                             \
640         .Case(#NAME, diag::warn_cxx11_keyword)
641 #define CXX2A_KEYWORD(NAME, FLAGS)                                             \
642         .Case(#NAME, diag::warn_cxx2a_keyword)
643 #include "clang/Basic/TokenKinds.def"
644         ;
645 
646   llvm_unreachable(
647       "Keyword not known to come from a newer Standard or proposed Standard");
648 }
649 
650 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
651   assert(II.isOutOfDate() && "not out of date");
652   getExternalSource()->updateOutOfDateIdentifier(II);
653 }
654 
655 /// HandleIdentifier - This callback is invoked when the lexer reads an
656 /// identifier.  This callback looks up the identifier in the map and/or
657 /// potentially macro expands it or turns it into a named token (like 'for').
658 ///
659 /// Note that callers of this method are guarded by checking the
660 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
661 /// IdentifierInfo methods that compute these properties will need to change to
662 /// match.
663 bool Preprocessor::HandleIdentifier(Token &Identifier) {
664   assert(Identifier.getIdentifierInfo() &&
665          "Can't handle identifiers without identifier info!");
666 
667   IdentifierInfo &II = *Identifier.getIdentifierInfo();
668 
669   // If the information about this identifier is out of date, update it from
670   // the external source.
671   // We have to treat __VA_ARGS__ in a special way, since it gets
672   // serialized with isPoisoned = true, but our preprocessor may have
673   // unpoisoned it if we're defining a C99 macro.
674   if (II.isOutOfDate()) {
675     bool CurrentIsPoisoned = false;
676     const bool IsSpecialVariadicMacro =
677         &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
678     if (IsSpecialVariadicMacro)
679       CurrentIsPoisoned = II.isPoisoned();
680 
681     updateOutOfDateIdentifier(II);
682     Identifier.setKind(II.getTokenID());
683 
684     if (IsSpecialVariadicMacro)
685       II.setIsPoisoned(CurrentIsPoisoned);
686   }
687 
688   // If this identifier was poisoned, and if it was not produced from a macro
689   // expansion, emit an error.
690   if (II.isPoisoned() && CurPPLexer) {
691     HandlePoisonedIdentifier(Identifier);
692   }
693 
694   // If this is a macro to be expanded, do it.
695   if (MacroDefinition MD = getMacroDefinition(&II)) {
696     auto *MI = MD.getMacroInfo();
697     assert(MI && "macro definition with no macro info?");
698     if (!DisableMacroExpansion) {
699       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
700         // C99 6.10.3p10: If the preprocessing token immediately after the
701         // macro name isn't a '(', this macro should not be expanded.
702         if (!MI->isFunctionLike() || isNextPPTokenLParen())
703           return HandleMacroExpandedIdentifier(Identifier, MD);
704       } else {
705         // C99 6.10.3.4p2 says that a disabled macro may never again be
706         // expanded, even if it's in a context where it could be expanded in the
707         // future.
708         Identifier.setFlag(Token::DisableExpand);
709         if (MI->isObjectLike() || isNextPPTokenLParen())
710           Diag(Identifier, diag::pp_disabled_macro_expansion);
711       }
712     }
713   }
714 
715   // If this identifier is a keyword in a newer Standard or proposed Standard,
716   // produce a warning. Don't warn if we're not considering macro expansion,
717   // since this identifier might be the name of a macro.
718   // FIXME: This warning is disabled in cases where it shouldn't be, like
719   //   "#define constexpr constexpr", "int constexpr;"
720   if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
721     Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
722         << II.getName();
723     // Don't diagnose this keyword again in this translation unit.
724     II.setIsFutureCompatKeyword(false);
725   }
726 
727   // If this is an extension token, diagnose its use.
728   // We avoid diagnosing tokens that originate from macro definitions.
729   // FIXME: This warning is disabled in cases where it shouldn't be,
730   // like "#define TY typeof", "TY(1) x".
731   if (II.isExtensionToken() && !DisableMacroExpansion)
732     Diag(Identifier, diag::ext_token_used);
733 
734   // If this is the 'import' contextual keyword following an '@', note
735   // that the next token indicates a module name.
736   //
737   // Note that we do not treat 'import' as a contextual
738   // keyword when we're in a caching lexer, because caching lexers only get
739   // used in contexts where import declarations are disallowed.
740   //
741   // Likewise if this is the C++ Modules TS import keyword.
742   if (((LastTokenWasAt && II.isModulesImport()) ||
743        Identifier.is(tok::kw_import)) &&
744       !InMacroArgs && !DisableMacroExpansion &&
745       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
746       CurLexerKind != CLK_CachingLexer) {
747     ModuleImportLoc = Identifier.getLocation();
748     ModuleImportPath.clear();
749     ModuleImportExpectsIdentifier = true;
750     CurLexerKind = CLK_LexAfterModuleImport;
751   }
752   return true;
753 }
754 
755 void Preprocessor::Lex(Token &Result) {
756   // We loop here until a lex function returns a token; this avoids recursion.
757   bool ReturnedToken;
758   do {
759     switch (CurLexerKind) {
760     case CLK_Lexer:
761       ReturnedToken = CurLexer->Lex(Result);
762       break;
763     case CLK_PTHLexer:
764       ReturnedToken = CurPTHLexer->Lex(Result);
765       break;
766     case CLK_TokenLexer:
767       ReturnedToken = CurTokenLexer->Lex(Result);
768       break;
769     case CLK_CachingLexer:
770       CachingLex(Result);
771       ReturnedToken = true;
772       break;
773     case CLK_LexAfterModuleImport:
774       LexAfterModuleImport(Result);
775       ReturnedToken = true;
776       break;
777     }
778   } while (!ReturnedToken);
779 
780   if (Result.is(tok::code_completion))
781     setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
782 
783   LastTokenWasAt = Result.is(tok::at);
784 }
785 
786 /// \brief Lex a token following the 'import' contextual keyword.
787 ///
788 void Preprocessor::LexAfterModuleImport(Token &Result) {
789   // Figure out what kind of lexer we actually have.
790   recomputeCurLexerKind();
791 
792   // Lex the next token.
793   Lex(Result);
794 
795   // The token sequence
796   //
797   //   import identifier (. identifier)*
798   //
799   // indicates a module import directive. We already saw the 'import'
800   // contextual keyword, so now we're looking for the identifiers.
801   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
802     // We expected to see an identifier here, and we did; continue handling
803     // identifiers.
804     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
805                                               Result.getLocation()));
806     ModuleImportExpectsIdentifier = false;
807     CurLexerKind = CLK_LexAfterModuleImport;
808     return;
809   }
810 
811   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
812   // see the next identifier. (We can also see a '[[' that begins an
813   // attribute-specifier-seq here under the C++ Modules TS.)
814   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
815     ModuleImportExpectsIdentifier = true;
816     CurLexerKind = CLK_LexAfterModuleImport;
817     return;
818   }
819 
820   // If we have a non-empty module path, load the named module.
821   if (!ModuleImportPath.empty()) {
822     // Under the Modules TS, the dot is just part of the module name, and not
823     // a real hierarachy separator. Flatten such module names now.
824     //
825     // FIXME: Is this the right level to be performing this transformation?
826     std::string FlatModuleName;
827     if (getLangOpts().ModulesTS) {
828       for (auto &Piece : ModuleImportPath) {
829         if (!FlatModuleName.empty())
830           FlatModuleName += ".";
831         FlatModuleName += Piece.first->getName();
832       }
833       SourceLocation FirstPathLoc = ModuleImportPath[0].second;
834       ModuleImportPath.clear();
835       ModuleImportPath.push_back(
836           std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
837     }
838 
839     Module *Imported = nullptr;
840     if (getLangOpts().Modules) {
841       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
842                                             ModuleImportPath,
843                                             Module::Hidden,
844                                             /*IsIncludeDirective=*/false);
845       if (Imported)
846         makeModuleVisible(Imported, ModuleImportLoc);
847     }
848     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
849       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
850   }
851 }
852 
853 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
854   CurSubmoduleState->VisibleModules.setVisible(
855       M, Loc, [](Module *) {},
856       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
857         // FIXME: Include the path in the diagnostic.
858         // FIXME: Include the import location for the conflicting module.
859         Diag(ModuleImportLoc, diag::warn_module_conflict)
860             << Path[0]->getFullModuleName()
861             << Conflict->getFullModuleName()
862             << Message;
863       });
864 
865   // Add this module to the imports list of the currently-built submodule.
866   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
867     BuildingSubmoduleStack.back().M->Imports.insert(M);
868 }
869 
870 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
871                                           const char *DiagnosticTag,
872                                           bool AllowMacroExpansion) {
873   // We need at least one string literal.
874   if (Result.isNot(tok::string_literal)) {
875     Diag(Result, diag::err_expected_string_literal)
876       << /*Source='in...'*/0 << DiagnosticTag;
877     return false;
878   }
879 
880   // Lex string literal tokens, optionally with macro expansion.
881   SmallVector<Token, 4> StrToks;
882   do {
883     StrToks.push_back(Result);
884 
885     if (Result.hasUDSuffix())
886       Diag(Result, diag::err_invalid_string_udl);
887 
888     if (AllowMacroExpansion)
889       Lex(Result);
890     else
891       LexUnexpandedToken(Result);
892   } while (Result.is(tok::string_literal));
893 
894   // Concatenate and parse the strings.
895   StringLiteralParser Literal(StrToks, *this);
896   assert(Literal.isAscii() && "Didn't allow wide strings in");
897 
898   if (Literal.hadError)
899     return false;
900 
901   if (Literal.Pascal) {
902     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
903       << /*Source='in...'*/0 << DiagnosticTag;
904     return false;
905   }
906 
907   String = Literal.GetString();
908   return true;
909 }
910 
911 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
912   assert(Tok.is(tok::numeric_constant));
913   SmallString<8> IntegerBuffer;
914   bool NumberInvalid = false;
915   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
916   if (NumberInvalid)
917     return false;
918   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
919   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
920     return false;
921   llvm::APInt APVal(64, 0);
922   if (Literal.GetIntegerValue(APVal))
923     return false;
924   Lex(Tok);
925   Value = APVal.getLimitedValue();
926   return true;
927 }
928 
929 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
930   assert(Handler && "NULL comment handler");
931   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
932          CommentHandlers.end() && "Comment handler already registered");
933   CommentHandlers.push_back(Handler);
934 }
935 
936 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
937   std::vector<CommentHandler *>::iterator Pos
938   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
939   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
940   CommentHandlers.erase(Pos);
941 }
942 
943 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
944   bool AnyPendingTokens = false;
945   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
946        HEnd = CommentHandlers.end();
947        H != HEnd; ++H) {
948     if ((*H)->HandleComment(*this, Comment))
949       AnyPendingTokens = true;
950   }
951   if (!AnyPendingTokens || getCommentRetentionState())
952     return false;
953   Lex(result);
954   return true;
955 }
956 
957 ModuleLoader::~ModuleLoader() { }
958 
959 CommentHandler::~CommentHandler() { }
960 
961 CodeCompletionHandler::~CodeCompletionHandler() { }
962 
963 void Preprocessor::createPreprocessingRecord() {
964   if (Record)
965     return;
966 
967   Record = new PreprocessingRecord(getSourceManager());
968   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
969 }
970