1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 //   -H       - Print the name of each header file used.
16 //   -d[DNI] - Dump various things.
17 //   -fworking-directory - #line's with preprocessor's working dir.
18 //   -fpreprocessed
19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 //   -W*
21 //   -w
22 //
23 // Messages to emit:
24 //   "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/Pragma.h"
42 #include "clang/Lex/PreprocessingRecord.h"
43 #include "clang/Lex/PreprocessorOptions.h"
44 #include "clang/Lex/ScratchBuffer.h"
45 #include "llvm/ADT/APFloat.h"
46 #include "llvm/ADT/STLExtras.h"
47 #include "llvm/ADT/SmallString.h"
48 #include "llvm/ADT/StringExtras.h"
49 #include "llvm/Support/Capacity.h"
50 #include "llvm/Support/ConvertUTF.h"
51 #include "llvm/Support/MemoryBuffer.h"
52 #include "llvm/Support/raw_ostream.h"
53 using namespace clang;
54 
55 //===----------------------------------------------------------------------===//
56 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
57 
58 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
59                            DiagnosticsEngine &diags, LangOptions &opts,
60                            SourceManager &SM, HeaderSearch &Headers,
61                            ModuleLoader &TheModuleLoader,
62                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
63                            TranslationUnitKind TUKind)
64     : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
65       FileMgr(Headers.getFileMgr()), SourceMgr(SM),
66       ScratchBuf(new ScratchBuffer(SourceMgr)),HeaderInfo(Headers),
67       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
68       Identifiers(opts, IILookup),
69       PragmaHandlers(new PragmaNamespace(StringRef())),
70       IncrementalProcessing(false), TUKind(TUKind),
71       CodeComplete(nullptr), CodeCompletionFile(nullptr),
72       CodeCompletionOffset(0), LastTokenWasAt(false),
73       ModuleImportExpectsIdentifier(false), CodeCompletionReached(0),
74       MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
75       CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr),
76       Callbacks(nullptr), MacroArgCache(nullptr), Record(nullptr),
77       MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
78   OwnsHeaderSearch = OwnsHeaders;
79 
80   CounterValue = 0; // __COUNTER__ starts at 0.
81 
82   // Clear stats.
83   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
84   NumIf = NumElse = NumEndif = 0;
85   NumEnteredSourceFiles = 0;
86   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
87   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
88   MaxIncludeStackDepth = 0;
89   NumSkipped = 0;
90 
91   // Default to discarding comments.
92   KeepComments = false;
93   KeepMacroComments = false;
94   SuppressIncludeNotFoundError = false;
95 
96   // Macro expansion is enabled.
97   DisableMacroExpansion = false;
98   MacroExpansionInDirectivesOverride = false;
99   InMacroArgs = false;
100   InMacroArgPreExpansion = false;
101   NumCachedTokenLexers = 0;
102   PragmasEnabled = true;
103   ParsingIfOrElifDirective = false;
104   PreprocessedOutput = false;
105 
106   CachedLexPos = 0;
107 
108   // We haven't read anything from the external source.
109   ReadMacrosFromExternalSource = false;
110 
111   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
112   // This gets unpoisoned where it is allowed.
113   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
114   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
115 
116   // Initialize the pragma handlers.
117   RegisterBuiltinPragmas();
118 
119   // Initialize builtin macros like __LINE__ and friends.
120   RegisterBuiltinMacros();
121 
122   if(LangOpts.Borland) {
123     Ident__exception_info        = getIdentifierInfo("_exception_info");
124     Ident___exception_info       = getIdentifierInfo("__exception_info");
125     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
126     Ident__exception_code        = getIdentifierInfo("_exception_code");
127     Ident___exception_code       = getIdentifierInfo("__exception_code");
128     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
129     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
130     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
131     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
132   } else {
133     Ident__exception_info = Ident__exception_code = nullptr;
134     Ident__abnormal_termination = Ident___exception_info = nullptr;
135     Ident___exception_code = Ident___abnormal_termination = nullptr;
136     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
137     Ident_AbnormalTermination = nullptr;
138   }
139 }
140 
141 Preprocessor::~Preprocessor() {
142   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
143 
144   IncludeMacroStack.clear();
145 
146   // Destroy any macro definitions.
147   while (MacroInfoChain *I = MIChainHead) {
148     MIChainHead = I->Next;
149     I->~MacroInfoChain();
150   }
151 
152   // Free any cached macro expanders.
153   // This populates MacroArgCache, so all TokenLexers need to be destroyed
154   // before the code below that frees up the MacroArgCache list.
155   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
156   CurTokenLexer.reset();
157 
158   while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
159     DeserialMIChainHead = I->Next;
160     I->~DeserializedMacroInfoChain();
161   }
162 
163   // Free any cached MacroArgs.
164   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
165     ArgList = ArgList->deallocate();
166 
167   // Delete the header search info, if we own it.
168   if (OwnsHeaderSearch)
169     delete &HeaderInfo;
170 }
171 
172 void Preprocessor::Initialize(const TargetInfo &Target) {
173   assert((!this->Target || this->Target == &Target) &&
174          "Invalid override of target information");
175   this->Target = &Target;
176 
177   // Initialize information about built-ins.
178   BuiltinInfo.InitializeTarget(Target);
179   HeaderInfo.setTarget(Target);
180 }
181 
182 void Preprocessor::InitializeForModelFile() {
183   NumEnteredSourceFiles = 0;
184 
185   // Reset pragmas
186   PragmaHandlersBackup = std::move(PragmaHandlers);
187   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
188   RegisterBuiltinPragmas();
189 
190   // Reset PredefinesFileID
191   PredefinesFileID = FileID();
192 }
193 
194 void Preprocessor::FinalizeForModelFile() {
195   NumEnteredSourceFiles = 1;
196 
197   PragmaHandlers = std::move(PragmaHandlersBackup);
198 }
199 
200 void Preprocessor::setPTHManager(PTHManager* pm) {
201   PTH.reset(pm);
202   FileMgr.addStatCache(PTH->createStatCache());
203 }
204 
205 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
206   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
207                << getSpelling(Tok) << "'";
208 
209   if (!DumpFlags) return;
210 
211   llvm::errs() << "\t";
212   if (Tok.isAtStartOfLine())
213     llvm::errs() << " [StartOfLine]";
214   if (Tok.hasLeadingSpace())
215     llvm::errs() << " [LeadingSpace]";
216   if (Tok.isExpandDisabled())
217     llvm::errs() << " [ExpandDisabled]";
218   if (Tok.needsCleaning()) {
219     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
220     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
221                  << "']";
222   }
223 
224   llvm::errs() << "\tLoc=<";
225   DumpLocation(Tok.getLocation());
226   llvm::errs() << ">";
227 }
228 
229 void Preprocessor::DumpLocation(SourceLocation Loc) const {
230   Loc.dump(SourceMgr);
231 }
232 
233 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
234   llvm::errs() << "MACRO: ";
235   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
236     DumpToken(MI.getReplacementToken(i));
237     llvm::errs() << "  ";
238   }
239   llvm::errs() << "\n";
240 }
241 
242 void Preprocessor::PrintStats() {
243   llvm::errs() << "\n*** Preprocessor Stats:\n";
244   llvm::errs() << NumDirectives << " directives found:\n";
245   llvm::errs() << "  " << NumDefined << " #define.\n";
246   llvm::errs() << "  " << NumUndefined << " #undef.\n";
247   llvm::errs() << "  #include/#include_next/#import:\n";
248   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
249   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
250   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
251   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
252   llvm::errs() << "  " << NumEndif << " #endif.\n";
253   llvm::errs() << "  " << NumPragma << " #pragma.\n";
254   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
255 
256   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
257              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
258              << NumFastMacroExpanded << " on the fast path.\n";
259   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
260              << " token paste (##) operations performed, "
261              << NumFastTokenPaste << " on the fast path.\n";
262 
263   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
264 
265   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
266   llvm::errs() << "\n  Macro Expanded Tokens: "
267                << llvm::capacity_in_bytes(MacroExpandedTokens);
268   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
269   llvm::errs() << "\n  Macros: " << llvm::capacity_in_bytes(Macros);
270   llvm::errs() << "\n  #pragma push_macro Info: "
271                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
272   llvm::errs() << "\n  Poison Reasons: "
273                << llvm::capacity_in_bytes(PoisonReasons);
274   llvm::errs() << "\n  Comment Handlers: "
275                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
276 }
277 
278 Preprocessor::macro_iterator
279 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
280   if (IncludeExternalMacros && ExternalSource &&
281       !ReadMacrosFromExternalSource) {
282     ReadMacrosFromExternalSource = true;
283     ExternalSource->ReadDefinedMacros();
284   }
285 
286   return Macros.begin();
287 }
288 
289 size_t Preprocessor::getTotalMemory() const {
290   return BP.getTotalMemory()
291     + llvm::capacity_in_bytes(MacroExpandedTokens)
292     + Predefines.capacity() /* Predefines buffer. */
293     + llvm::capacity_in_bytes(Macros)
294     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
295     + llvm::capacity_in_bytes(PoisonReasons)
296     + llvm::capacity_in_bytes(CommentHandlers);
297 }
298 
299 Preprocessor::macro_iterator
300 Preprocessor::macro_end(bool IncludeExternalMacros) const {
301   if (IncludeExternalMacros && ExternalSource &&
302       !ReadMacrosFromExternalSource) {
303     ReadMacrosFromExternalSource = true;
304     ExternalSource->ReadDefinedMacros();
305   }
306 
307   return Macros.end();
308 }
309 
310 /// \brief Compares macro tokens with a specified token value sequence.
311 static bool MacroDefinitionEquals(const MacroInfo *MI,
312                                   ArrayRef<TokenValue> Tokens) {
313   return Tokens.size() == MI->getNumTokens() &&
314       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
315 }
316 
317 StringRef Preprocessor::getLastMacroWithSpelling(
318                                     SourceLocation Loc,
319                                     ArrayRef<TokenValue> Tokens) const {
320   SourceLocation BestLocation;
321   StringRef BestSpelling;
322   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
323        I != E; ++I) {
324     const MacroDirective::DefInfo
325       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
326     if (!Def || !Def.getMacroInfo())
327       continue;
328     if (!Def.getMacroInfo()->isObjectLike())
329       continue;
330     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
331       continue;
332     SourceLocation Location = Def.getLocation();
333     // Choose the macro defined latest.
334     if (BestLocation.isInvalid() ||
335         (Location.isValid() &&
336          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
337       BestLocation = Location;
338       BestSpelling = I->first->getName();
339     }
340   }
341   return BestSpelling;
342 }
343 
344 void Preprocessor::recomputeCurLexerKind() {
345   if (CurLexer)
346     CurLexerKind = CLK_Lexer;
347   else if (CurPTHLexer)
348     CurLexerKind = CLK_PTHLexer;
349   else if (CurTokenLexer)
350     CurLexerKind = CLK_TokenLexer;
351   else
352     CurLexerKind = CLK_CachingLexer;
353 }
354 
355 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
356                                           unsigned CompleteLine,
357                                           unsigned CompleteColumn) {
358   assert(File);
359   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
360   assert(!CodeCompletionFile && "Already set");
361 
362   using llvm::MemoryBuffer;
363 
364   // Load the actual file's contents.
365   bool Invalid = false;
366   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
367   if (Invalid)
368     return true;
369 
370   // Find the byte position of the truncation point.
371   const char *Position = Buffer->getBufferStart();
372   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
373     for (; *Position; ++Position) {
374       if (*Position != '\r' && *Position != '\n')
375         continue;
376 
377       // Eat \r\n or \n\r as a single line.
378       if ((Position[1] == '\r' || Position[1] == '\n') &&
379           Position[0] != Position[1])
380         ++Position;
381       ++Position;
382       break;
383     }
384   }
385 
386   Position += CompleteColumn - 1;
387 
388   // If pointing inside the preamble, adjust the position at the beginning of
389   // the file after the preamble.
390   if (SkipMainFilePreamble.first &&
391       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
392     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
393       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
394   }
395 
396   if (Position > Buffer->getBufferEnd())
397     Position = Buffer->getBufferEnd();
398 
399   CodeCompletionFile = File;
400   CodeCompletionOffset = Position - Buffer->getBufferStart();
401 
402   std::unique_ptr<MemoryBuffer> NewBuffer =
403       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
404                                           Buffer->getBufferIdentifier());
405   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
406   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
407   *NewPos = '\0';
408   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
409   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
410 
411   return false;
412 }
413 
414 void Preprocessor::CodeCompleteNaturalLanguage() {
415   if (CodeComplete)
416     CodeComplete->CodeCompleteNaturalLanguage();
417   setCodeCompletionReached();
418 }
419 
420 /// getSpelling - This method is used to get the spelling of a token into a
421 /// SmallVector. Note that the returned StringRef may not point to the
422 /// supplied buffer if a copy can be avoided.
423 StringRef Preprocessor::getSpelling(const Token &Tok,
424                                           SmallVectorImpl<char> &Buffer,
425                                           bool *Invalid) const {
426   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
427   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
428     // Try the fast path.
429     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
430       return II->getName();
431   }
432 
433   // Resize the buffer if we need to copy into it.
434   if (Tok.needsCleaning())
435     Buffer.resize(Tok.getLength());
436 
437   const char *Ptr = Buffer.data();
438   unsigned Len = getSpelling(Tok, Ptr, Invalid);
439   return StringRef(Ptr, Len);
440 }
441 
442 /// CreateString - Plop the specified string into a scratch buffer and return a
443 /// location for it.  If specified, the source location provides a source
444 /// location for the token.
445 void Preprocessor::CreateString(StringRef Str, Token &Tok,
446                                 SourceLocation ExpansionLocStart,
447                                 SourceLocation ExpansionLocEnd) {
448   Tok.setLength(Str.size());
449 
450   const char *DestPtr;
451   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
452 
453   if (ExpansionLocStart.isValid())
454     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
455                                        ExpansionLocEnd, Str.size());
456   Tok.setLocation(Loc);
457 
458   // If this is a raw identifier or a literal token, set the pointer data.
459   if (Tok.is(tok::raw_identifier))
460     Tok.setRawIdentifierData(DestPtr);
461   else if (Tok.isLiteral())
462     Tok.setLiteralData(DestPtr);
463 }
464 
465 Module *Preprocessor::getCurrentModule() {
466   if (getLangOpts().CurrentModule.empty())
467     return nullptr;
468 
469   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
470 }
471 
472 //===----------------------------------------------------------------------===//
473 // Preprocessor Initialization Methods
474 //===----------------------------------------------------------------------===//
475 
476 
477 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
478 /// which implicitly adds the builtin defines etc.
479 void Preprocessor::EnterMainSourceFile() {
480   // We do not allow the preprocessor to reenter the main file.  Doing so will
481   // cause FileID's to accumulate information from both runs (e.g. #line
482   // information) and predefined macros aren't guaranteed to be set properly.
483   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
484   FileID MainFileID = SourceMgr.getMainFileID();
485 
486   // If MainFileID is loaded it means we loaded an AST file, no need to enter
487   // a main file.
488   if (!SourceMgr.isLoadedFileID(MainFileID)) {
489     // Enter the main file source buffer.
490     EnterSourceFile(MainFileID, nullptr, SourceLocation());
491 
492     // If we've been asked to skip bytes in the main file (e.g., as part of a
493     // precompiled preamble), do so now.
494     if (SkipMainFilePreamble.first > 0)
495       CurLexer->SkipBytes(SkipMainFilePreamble.first,
496                           SkipMainFilePreamble.second);
497 
498     // Tell the header info that the main file was entered.  If the file is later
499     // #imported, it won't be re-entered.
500     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
501       HeaderInfo.IncrementIncludeCount(FE);
502   }
503 
504   // Preprocess Predefines to populate the initial preprocessor state.
505   std::unique_ptr<llvm::MemoryBuffer> SB =
506     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
507   assert(SB && "Cannot create predefined source buffer");
508   FileID FID = SourceMgr.createFileID(std::move(SB));
509   assert(!FID.isInvalid() && "Could not create FileID for predefines?");
510   setPredefinesFileID(FID);
511 
512   // Start parsing the predefines.
513   EnterSourceFile(FID, nullptr, SourceLocation());
514 }
515 
516 void Preprocessor::EndSourceFile() {
517   // Notify the client that we reached the end of the source file.
518   if (Callbacks)
519     Callbacks->EndOfMainFile();
520 }
521 
522 //===----------------------------------------------------------------------===//
523 // Lexer Event Handling.
524 //===----------------------------------------------------------------------===//
525 
526 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
527 /// identifier information for the token and install it into the token,
528 /// updating the token kind accordingly.
529 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
530   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
531 
532   // Look up this token, see if it is a macro, or if it is a language keyword.
533   IdentifierInfo *II;
534   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
535     // No cleaning needed, just use the characters from the lexed buffer.
536     II = getIdentifierInfo(Identifier.getRawIdentifier());
537   } else {
538     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
539     SmallString<64> IdentifierBuffer;
540     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
541 
542     if (Identifier.hasUCN()) {
543       SmallString<64> UCNIdentifierBuffer;
544       expandUCNs(UCNIdentifierBuffer, CleanedStr);
545       II = getIdentifierInfo(UCNIdentifierBuffer);
546     } else {
547       II = getIdentifierInfo(CleanedStr);
548     }
549   }
550 
551   // Update the token info (identifier info and appropriate token kind).
552   Identifier.setIdentifierInfo(II);
553   Identifier.setKind(II->getTokenID());
554 
555   return II;
556 }
557 
558 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
559   PoisonReasons[II] = DiagID;
560 }
561 
562 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
563   assert(Ident__exception_code && Ident__exception_info);
564   assert(Ident___exception_code && Ident___exception_info);
565   Ident__exception_code->setIsPoisoned(Poison);
566   Ident___exception_code->setIsPoisoned(Poison);
567   Ident_GetExceptionCode->setIsPoisoned(Poison);
568   Ident__exception_info->setIsPoisoned(Poison);
569   Ident___exception_info->setIsPoisoned(Poison);
570   Ident_GetExceptionInfo->setIsPoisoned(Poison);
571   Ident__abnormal_termination->setIsPoisoned(Poison);
572   Ident___abnormal_termination->setIsPoisoned(Poison);
573   Ident_AbnormalTermination->setIsPoisoned(Poison);
574 }
575 
576 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
577   assert(Identifier.getIdentifierInfo() &&
578          "Can't handle identifiers without identifier info!");
579   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
580     PoisonReasons.find(Identifier.getIdentifierInfo());
581   if(it == PoisonReasons.end())
582     Diag(Identifier, diag::err_pp_used_poisoned_id);
583   else
584     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
585 }
586 
587 /// HandleIdentifier - This callback is invoked when the lexer reads an
588 /// identifier.  This callback looks up the identifier in the map and/or
589 /// potentially macro expands it or turns it into a named token (like 'for').
590 ///
591 /// Note that callers of this method are guarded by checking the
592 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
593 /// IdentifierInfo methods that compute these properties will need to change to
594 /// match.
595 bool Preprocessor::HandleIdentifier(Token &Identifier) {
596   assert(Identifier.getIdentifierInfo() &&
597          "Can't handle identifiers without identifier info!");
598 
599   IdentifierInfo &II = *Identifier.getIdentifierInfo();
600 
601   // If the information about this identifier is out of date, update it from
602   // the external source.
603   // We have to treat __VA_ARGS__ in a special way, since it gets
604   // serialized with isPoisoned = true, but our preprocessor may have
605   // unpoisoned it if we're defining a C99 macro.
606   if (II.isOutOfDate()) {
607     bool CurrentIsPoisoned = false;
608     if (&II == Ident__VA_ARGS__)
609       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
610 
611     ExternalSource->updateOutOfDateIdentifier(II);
612     Identifier.setKind(II.getTokenID());
613 
614     if (&II == Ident__VA_ARGS__)
615       II.setIsPoisoned(CurrentIsPoisoned);
616   }
617 
618   // If this identifier was poisoned, and if it was not produced from a macro
619   // expansion, emit an error.
620   if (II.isPoisoned() && CurPPLexer) {
621     HandlePoisonedIdentifier(Identifier);
622   }
623 
624   // If this is a macro to be expanded, do it.
625   if (MacroDefinition MD = getMacroDefinition(&II)) {
626     auto *MI = MD.getMacroInfo();
627     assert(MI && "macro definition with no macro info?");
628     if (!DisableMacroExpansion) {
629       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
630         // C99 6.10.3p10: If the preprocessing token immediately after the
631         // macro name isn't a '(', this macro should not be expanded.
632         if (!MI->isFunctionLike() || isNextPPTokenLParen())
633           return HandleMacroExpandedIdentifier(Identifier, MD);
634       } else {
635         // C99 6.10.3.4p2 says that a disabled macro may never again be
636         // expanded, even if it's in a context where it could be expanded in the
637         // future.
638         Identifier.setFlag(Token::DisableExpand);
639         if (MI->isObjectLike() || isNextPPTokenLParen())
640           Diag(Identifier, diag::pp_disabled_macro_expansion);
641       }
642     }
643   }
644 
645   // If this identifier is a keyword in C++11, produce a warning. Don't warn if
646   // we're not considering macro expansion, since this identifier might be the
647   // name of a macro.
648   // FIXME: This warning is disabled in cases where it shouldn't be, like
649   //   "#define constexpr constexpr", "int constexpr;"
650   if (II.isCXX11CompatKeyword() && !DisableMacroExpansion) {
651     Diag(Identifier, diag::warn_cxx11_keyword) << II.getName();
652     // Don't diagnose this keyword again in this translation unit.
653     II.setIsCXX11CompatKeyword(false);
654   }
655 
656   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
657   // then we act as if it is the actual operator and not the textual
658   // representation of it.
659   if (II.isCPlusPlusOperatorKeyword())
660     Identifier.setIdentifierInfo(nullptr);
661 
662   // If this is an extension token, diagnose its use.
663   // We avoid diagnosing tokens that originate from macro definitions.
664   // FIXME: This warning is disabled in cases where it shouldn't be,
665   // like "#define TY typeof", "TY(1) x".
666   if (II.isExtensionToken() && !DisableMacroExpansion)
667     Diag(Identifier, diag::ext_token_used);
668 
669   // If this is the 'import' contextual keyword following an '@', note
670   // that the next token indicates a module name.
671   //
672   // Note that we do not treat 'import' as a contextual
673   // keyword when we're in a caching lexer, because caching lexers only get
674   // used in contexts where import declarations are disallowed.
675   if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
676       !DisableMacroExpansion &&
677       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
678       CurLexerKind != CLK_CachingLexer) {
679     ModuleImportLoc = Identifier.getLocation();
680     ModuleImportPath.clear();
681     ModuleImportExpectsIdentifier = true;
682     CurLexerKind = CLK_LexAfterModuleImport;
683   }
684   return true;
685 }
686 
687 void Preprocessor::Lex(Token &Result) {
688   // We loop here until a lex function retuns a token; this avoids recursion.
689   bool ReturnedToken;
690   do {
691     switch (CurLexerKind) {
692     case CLK_Lexer:
693       ReturnedToken = CurLexer->Lex(Result);
694       break;
695     case CLK_PTHLexer:
696       ReturnedToken = CurPTHLexer->Lex(Result);
697       break;
698     case CLK_TokenLexer:
699       ReturnedToken = CurTokenLexer->Lex(Result);
700       break;
701     case CLK_CachingLexer:
702       CachingLex(Result);
703       ReturnedToken = true;
704       break;
705     case CLK_LexAfterModuleImport:
706       LexAfterModuleImport(Result);
707       ReturnedToken = true;
708       break;
709     }
710   } while (!ReturnedToken);
711 
712   LastTokenWasAt = Result.is(tok::at);
713 }
714 
715 
716 /// \brief Lex a token following the 'import' contextual keyword.
717 ///
718 void Preprocessor::LexAfterModuleImport(Token &Result) {
719   // Figure out what kind of lexer we actually have.
720   recomputeCurLexerKind();
721 
722   // Lex the next token.
723   Lex(Result);
724 
725   // The token sequence
726   //
727   //   import identifier (. identifier)*
728   //
729   // indicates a module import directive. We already saw the 'import'
730   // contextual keyword, so now we're looking for the identifiers.
731   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
732     // We expected to see an identifier here, and we did; continue handling
733     // identifiers.
734     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
735                                               Result.getLocation()));
736     ModuleImportExpectsIdentifier = false;
737     CurLexerKind = CLK_LexAfterModuleImport;
738     return;
739   }
740 
741   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
742   // see the next identifier.
743   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
744     ModuleImportExpectsIdentifier = true;
745     CurLexerKind = CLK_LexAfterModuleImport;
746     return;
747   }
748 
749   // If we have a non-empty module path, load the named module.
750   if (!ModuleImportPath.empty()) {
751     Module *Imported = nullptr;
752     if (getLangOpts().Modules) {
753       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
754                                             ModuleImportPath,
755                                             Module::Hidden,
756                                             /*IsIncludeDirective=*/false);
757       if (Imported)
758         makeModuleVisible(Imported, ModuleImportLoc);
759     }
760     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
761       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
762   }
763 }
764 
765 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
766   VisibleModules.setVisible(
767       M, Loc, [](Module *) {},
768       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
769         // FIXME: Include the path in the diagnostic.
770         // FIXME: Include the import location for the conflicting module.
771         Diag(ModuleImportLoc, diag::warn_module_conflict)
772             << Path[0]->getFullModuleName()
773             << Conflict->getFullModuleName()
774             << Message;
775       });
776 
777   // Add this module to the imports list of the currently-built submodule.
778   if (!BuildingSubmoduleStack.empty())
779     BuildingSubmoduleStack.back().M->Imports.insert(M);
780 }
781 
782 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
783                                           const char *DiagnosticTag,
784                                           bool AllowMacroExpansion) {
785   // We need at least one string literal.
786   if (Result.isNot(tok::string_literal)) {
787     Diag(Result, diag::err_expected_string_literal)
788       << /*Source='in...'*/0 << DiagnosticTag;
789     return false;
790   }
791 
792   // Lex string literal tokens, optionally with macro expansion.
793   SmallVector<Token, 4> StrToks;
794   do {
795     StrToks.push_back(Result);
796 
797     if (Result.hasUDSuffix())
798       Diag(Result, diag::err_invalid_string_udl);
799 
800     if (AllowMacroExpansion)
801       Lex(Result);
802     else
803       LexUnexpandedToken(Result);
804   } while (Result.is(tok::string_literal));
805 
806   // Concatenate and parse the strings.
807   StringLiteralParser Literal(StrToks, *this);
808   assert(Literal.isAscii() && "Didn't allow wide strings in");
809 
810   if (Literal.hadError)
811     return false;
812 
813   if (Literal.Pascal) {
814     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
815       << /*Source='in...'*/0 << DiagnosticTag;
816     return false;
817   }
818 
819   String = Literal.GetString();
820   return true;
821 }
822 
823 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
824   assert(Tok.is(tok::numeric_constant));
825   SmallString<8> IntegerBuffer;
826   bool NumberInvalid = false;
827   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
828   if (NumberInvalid)
829     return false;
830   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
831   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
832     return false;
833   llvm::APInt APVal(64, 0);
834   if (Literal.GetIntegerValue(APVal))
835     return false;
836   Lex(Tok);
837   Value = APVal.getLimitedValue();
838   return true;
839 }
840 
841 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
842   assert(Handler && "NULL comment handler");
843   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
844          CommentHandlers.end() && "Comment handler already registered");
845   CommentHandlers.push_back(Handler);
846 }
847 
848 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
849   std::vector<CommentHandler *>::iterator Pos
850   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
851   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
852   CommentHandlers.erase(Pos);
853 }
854 
855 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
856   bool AnyPendingTokens = false;
857   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
858        HEnd = CommentHandlers.end();
859        H != HEnd; ++H) {
860     if ((*H)->HandleComment(*this, Comment))
861       AnyPendingTokens = true;
862   }
863   if (!AnyPendingTokens || getCommentRetentionState())
864     return false;
865   Lex(result);
866   return true;
867 }
868 
869 ModuleLoader::~ModuleLoader() { }
870 
871 CommentHandler::~CommentHandler() { }
872 
873 CodeCompletionHandler::~CodeCompletionHandler() { }
874 
875 void Preprocessor::createPreprocessingRecord() {
876   if (Record)
877     return;
878 
879   Record = new PreprocessingRecord(getSourceManager());
880   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
881 }
882