1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 //   -H       - Print the name of each header file used.
16 //   -d[DNI] - Dump various things.
17 //   -fworking-directory - #line's with preprocessor's working dir.
18 //   -fpreprocessed
19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 //   -W*
21 //   -w
22 //
23 // Messages to emit:
24 //   "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/Pragma.h"
42 #include "clang/Lex/PreprocessingRecord.h"
43 #include "clang/Lex/PreprocessorOptions.h"
44 #include "clang/Lex/ScratchBuffer.h"
45 #include "llvm/ADT/APFloat.h"
46 #include "llvm/ADT/STLExtras.h"
47 #include "llvm/ADT/SmallString.h"
48 #include "llvm/ADT/StringExtras.h"
49 #include "llvm/Support/Capacity.h"
50 #include "llvm/Support/ConvertUTF.h"
51 #include "llvm/Support/MemoryBuffer.h"
52 #include "llvm/Support/raw_ostream.h"
53 using namespace clang;
54 
55 //===----------------------------------------------------------------------===//
56 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
57 
58 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
59                            DiagnosticsEngine &diags, LangOptions &opts,
60                            SourceManager &SM, HeaderSearch &Headers,
61                            ModuleLoader &TheModuleLoader,
62                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
63                            TranslationUnitKind TUKind)
64     : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
65       FileMgr(Headers.getFileMgr()), SourceMgr(SM),
66       ScratchBuf(new ScratchBuffer(SourceMgr)),HeaderInfo(Headers),
67       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
68       Identifiers(opts, IILookup),
69       PragmaHandlers(new PragmaNamespace(StringRef())),
70       IncrementalProcessing(false), TUKind(TUKind),
71       CodeComplete(nullptr), CodeCompletionFile(nullptr),
72       CodeCompletionOffset(0), LastTokenWasAt(false),
73       ModuleImportExpectsIdentifier(false), CodeCompletionReached(0),
74       MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
75       CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr),
76       Callbacks(nullptr), MacroArgCache(nullptr), Record(nullptr),
77       MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
78   OwnsHeaderSearch = OwnsHeaders;
79 
80   CounterValue = 0; // __COUNTER__ starts at 0.
81 
82   // Clear stats.
83   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
84   NumIf = NumElse = NumEndif = 0;
85   NumEnteredSourceFiles = 0;
86   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
87   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
88   MaxIncludeStackDepth = 0;
89   NumSkipped = 0;
90 
91   // Default to discarding comments.
92   KeepComments = false;
93   KeepMacroComments = false;
94   SuppressIncludeNotFoundError = false;
95 
96   // Macro expansion is enabled.
97   DisableMacroExpansion = false;
98   MacroExpansionInDirectivesOverride = false;
99   InMacroArgs = false;
100   InMacroArgPreExpansion = false;
101   NumCachedTokenLexers = 0;
102   PragmasEnabled = true;
103   ParsingIfOrElifDirective = false;
104   PreprocessedOutput = false;
105 
106   CachedLexPos = 0;
107 
108   // We haven't read anything from the external source.
109   ReadMacrosFromExternalSource = false;
110 
111   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
112   // This gets unpoisoned where it is allowed.
113   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
114   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
115 
116   // Initialize the pragma handlers.
117   RegisterBuiltinPragmas();
118 
119   // Initialize builtin macros like __LINE__ and friends.
120   RegisterBuiltinMacros();
121 
122   if(LangOpts.Borland) {
123     Ident__exception_info        = getIdentifierInfo("_exception_info");
124     Ident___exception_info       = getIdentifierInfo("__exception_info");
125     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
126     Ident__exception_code        = getIdentifierInfo("_exception_code");
127     Ident___exception_code       = getIdentifierInfo("__exception_code");
128     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
129     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
130     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
131     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
132   } else {
133     Ident__exception_info = Ident__exception_code = nullptr;
134     Ident__abnormal_termination = Ident___exception_info = nullptr;
135     Ident___exception_code = Ident___abnormal_termination = nullptr;
136     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
137     Ident_AbnormalTermination = nullptr;
138   }
139 }
140 
141 Preprocessor::~Preprocessor() {
142   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
143 
144   IncludeMacroStack.clear();
145 
146   // Destroy any macro definitions.
147   while (MacroInfoChain *I = MIChainHead) {
148     MIChainHead = I->Next;
149     I->~MacroInfoChain();
150   }
151 
152   // Free any cached macro expanders.
153   // This populates MacroArgCache, so all TokenLexers need to be destroyed
154   // before the code below that frees up the MacroArgCache list.
155   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
156   CurTokenLexer.reset();
157 
158   while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
159     DeserialMIChainHead = I->Next;
160     I->~DeserializedMacroInfoChain();
161   }
162 
163   // Free any cached MacroArgs.
164   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
165     ArgList = ArgList->deallocate();
166 
167   // Delete the header search info, if we own it.
168   if (OwnsHeaderSearch)
169     delete &HeaderInfo;
170 }
171 
172 void Preprocessor::Initialize(const TargetInfo &Target) {
173   assert((!this->Target || this->Target == &Target) &&
174          "Invalid override of target information");
175   this->Target = &Target;
176 
177   // Initialize information about built-ins.
178   BuiltinInfo.InitializeTarget(Target);
179   HeaderInfo.setTarget(Target);
180 }
181 
182 void Preprocessor::InitializeForModelFile() {
183   NumEnteredSourceFiles = 0;
184 
185   // Reset pragmas
186   PragmaHandlersBackup = std::move(PragmaHandlers);
187   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
188   RegisterBuiltinPragmas();
189 
190   // Reset PredefinesFileID
191   PredefinesFileID = FileID();
192 }
193 
194 void Preprocessor::FinalizeForModelFile() {
195   NumEnteredSourceFiles = 1;
196 
197   PragmaHandlers = std::move(PragmaHandlersBackup);
198 }
199 
200 void Preprocessor::setPTHManager(PTHManager* pm) {
201   PTH.reset(pm);
202   FileMgr.addStatCache(PTH->createStatCache());
203 }
204 
205 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
206   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
207                << getSpelling(Tok) << "'";
208 
209   if (!DumpFlags) return;
210 
211   llvm::errs() << "\t";
212   if (Tok.isAtStartOfLine())
213     llvm::errs() << " [StartOfLine]";
214   if (Tok.hasLeadingSpace())
215     llvm::errs() << " [LeadingSpace]";
216   if (Tok.isExpandDisabled())
217     llvm::errs() << " [ExpandDisabled]";
218   if (Tok.needsCleaning()) {
219     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
220     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
221                  << "']";
222   }
223 
224   llvm::errs() << "\tLoc=<";
225   DumpLocation(Tok.getLocation());
226   llvm::errs() << ">";
227 }
228 
229 void Preprocessor::DumpLocation(SourceLocation Loc) const {
230   Loc.dump(SourceMgr);
231 }
232 
233 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
234   llvm::errs() << "MACRO: ";
235   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
236     DumpToken(MI.getReplacementToken(i));
237     llvm::errs() << "  ";
238   }
239   llvm::errs() << "\n";
240 }
241 
242 void Preprocessor::PrintStats() {
243   llvm::errs() << "\n*** Preprocessor Stats:\n";
244   llvm::errs() << NumDirectives << " directives found:\n";
245   llvm::errs() << "  " << NumDefined << " #define.\n";
246   llvm::errs() << "  " << NumUndefined << " #undef.\n";
247   llvm::errs() << "  #include/#include_next/#import:\n";
248   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
249   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
250   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
251   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
252   llvm::errs() << "  " << NumEndif << " #endif.\n";
253   llvm::errs() << "  " << NumPragma << " #pragma.\n";
254   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
255 
256   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
257              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
258              << NumFastMacroExpanded << " on the fast path.\n";
259   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
260              << " token paste (##) operations performed, "
261              << NumFastTokenPaste << " on the fast path.\n";
262 
263   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
264 
265   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
266   llvm::errs() << "\n  Macro Expanded Tokens: "
267                << llvm::capacity_in_bytes(MacroExpandedTokens);
268   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
269   llvm::errs() << "\n  Macros: " << llvm::capacity_in_bytes(Macros);
270   llvm::errs() << "\n  #pragma push_macro Info: "
271                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
272   llvm::errs() << "\n  Poison Reasons: "
273                << llvm::capacity_in_bytes(PoisonReasons);
274   llvm::errs() << "\n  Comment Handlers: "
275                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
276 }
277 
278 Preprocessor::macro_iterator
279 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
280   if (IncludeExternalMacros && ExternalSource &&
281       !ReadMacrosFromExternalSource) {
282     ReadMacrosFromExternalSource = true;
283     ExternalSource->ReadDefinedMacros();
284   }
285 
286   return Macros.begin();
287 }
288 
289 size_t Preprocessor::getTotalMemory() const {
290   return BP.getTotalMemory()
291     + llvm::capacity_in_bytes(MacroExpandedTokens)
292     + Predefines.capacity() /* Predefines buffer. */
293     + llvm::capacity_in_bytes(Macros)
294     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
295     + llvm::capacity_in_bytes(PoisonReasons)
296     + llvm::capacity_in_bytes(CommentHandlers);
297 }
298 
299 Preprocessor::macro_iterator
300 Preprocessor::macro_end(bool IncludeExternalMacros) const {
301   if (IncludeExternalMacros && ExternalSource &&
302       !ReadMacrosFromExternalSource) {
303     ReadMacrosFromExternalSource = true;
304     ExternalSource->ReadDefinedMacros();
305   }
306 
307   return Macros.end();
308 }
309 
310 /// \brief Compares macro tokens with a specified token value sequence.
311 static bool MacroDefinitionEquals(const MacroInfo *MI,
312                                   ArrayRef<TokenValue> Tokens) {
313   return Tokens.size() == MI->getNumTokens() &&
314       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
315 }
316 
317 StringRef Preprocessor::getLastMacroWithSpelling(
318                                     SourceLocation Loc,
319                                     ArrayRef<TokenValue> Tokens) const {
320   SourceLocation BestLocation;
321   StringRef BestSpelling;
322   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
323        I != E; ++I) {
324     const MacroDirective::DefInfo
325       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
326     if (!Def || !Def.getMacroInfo())
327       continue;
328     if (!Def.getMacroInfo()->isObjectLike())
329       continue;
330     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
331       continue;
332     SourceLocation Location = Def.getLocation();
333     // Choose the macro defined latest.
334     if (BestLocation.isInvalid() ||
335         (Location.isValid() &&
336          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
337       BestLocation = Location;
338       BestSpelling = I->first->getName();
339     }
340   }
341   return BestSpelling;
342 }
343 
344 void Preprocessor::recomputeCurLexerKind() {
345   if (CurLexer)
346     CurLexerKind = CLK_Lexer;
347   else if (CurPTHLexer)
348     CurLexerKind = CLK_PTHLexer;
349   else if (CurTokenLexer)
350     CurLexerKind = CLK_TokenLexer;
351   else
352     CurLexerKind = CLK_CachingLexer;
353 }
354 
355 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
356                                           unsigned CompleteLine,
357                                           unsigned CompleteColumn) {
358   assert(File);
359   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
360   assert(!CodeCompletionFile && "Already set");
361 
362   using llvm::MemoryBuffer;
363 
364   // Load the actual file's contents.
365   bool Invalid = false;
366   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
367   if (Invalid)
368     return true;
369 
370   // Find the byte position of the truncation point.
371   const char *Position = Buffer->getBufferStart();
372   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
373     for (; *Position; ++Position) {
374       if (*Position != '\r' && *Position != '\n')
375         continue;
376 
377       // Eat \r\n or \n\r as a single line.
378       if ((Position[1] == '\r' || Position[1] == '\n') &&
379           Position[0] != Position[1])
380         ++Position;
381       ++Position;
382       break;
383     }
384   }
385 
386   Position += CompleteColumn - 1;
387 
388   // If pointing inside the preamble, adjust the position at the beginning of
389   // the file after the preamble.
390   if (SkipMainFilePreamble.first &&
391       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
392     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
393       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
394   }
395 
396   if (Position > Buffer->getBufferEnd())
397     Position = Buffer->getBufferEnd();
398 
399   CodeCompletionFile = File;
400   CodeCompletionOffset = Position - Buffer->getBufferStart();
401 
402   std::unique_ptr<MemoryBuffer> NewBuffer =
403       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
404                                           Buffer->getBufferIdentifier());
405   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
406   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
407   *NewPos = '\0';
408   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
409   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
410 
411   return false;
412 }
413 
414 void Preprocessor::CodeCompleteNaturalLanguage() {
415   if (CodeComplete)
416     CodeComplete->CodeCompleteNaturalLanguage();
417   setCodeCompletionReached();
418 }
419 
420 /// getSpelling - This method is used to get the spelling of a token into a
421 /// SmallVector. Note that the returned StringRef may not point to the
422 /// supplied buffer if a copy can be avoided.
423 StringRef Preprocessor::getSpelling(const Token &Tok,
424                                           SmallVectorImpl<char> &Buffer,
425                                           bool *Invalid) const {
426   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
427   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
428     // Try the fast path.
429     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
430       return II->getName();
431   }
432 
433   // Resize the buffer if we need to copy into it.
434   if (Tok.needsCleaning())
435     Buffer.resize(Tok.getLength());
436 
437   const char *Ptr = Buffer.data();
438   unsigned Len = getSpelling(Tok, Ptr, Invalid);
439   return StringRef(Ptr, Len);
440 }
441 
442 /// CreateString - Plop the specified string into a scratch buffer and return a
443 /// location for it.  If specified, the source location provides a source
444 /// location for the token.
445 void Preprocessor::CreateString(StringRef Str, Token &Tok,
446                                 SourceLocation ExpansionLocStart,
447                                 SourceLocation ExpansionLocEnd) {
448   Tok.setLength(Str.size());
449 
450   const char *DestPtr;
451   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
452 
453   if (ExpansionLocStart.isValid())
454     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
455                                        ExpansionLocEnd, Str.size());
456   Tok.setLocation(Loc);
457 
458   // If this is a raw identifier or a literal token, set the pointer data.
459   if (Tok.is(tok::raw_identifier))
460     Tok.setRawIdentifierData(DestPtr);
461   else if (Tok.isLiteral())
462     Tok.setLiteralData(DestPtr);
463 }
464 
465 Module *Preprocessor::getCurrentModule() {
466   if (getLangOpts().CurrentModule.empty())
467     return nullptr;
468 
469   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
470 }
471 
472 //===----------------------------------------------------------------------===//
473 // Preprocessor Initialization Methods
474 //===----------------------------------------------------------------------===//
475 
476 
477 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
478 /// which implicitly adds the builtin defines etc.
479 void Preprocessor::EnterMainSourceFile() {
480   // We do not allow the preprocessor to reenter the main file.  Doing so will
481   // cause FileID's to accumulate information from both runs (e.g. #line
482   // information) and predefined macros aren't guaranteed to be set properly.
483   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
484   FileID MainFileID = SourceMgr.getMainFileID();
485 
486   // If MainFileID is loaded it means we loaded an AST file, no need to enter
487   // a main file.
488   if (!SourceMgr.isLoadedFileID(MainFileID)) {
489     // Enter the main file source buffer.
490     EnterSourceFile(MainFileID, nullptr, SourceLocation());
491 
492     // If we've been asked to skip bytes in the main file (e.g., as part of a
493     // precompiled preamble), do so now.
494     if (SkipMainFilePreamble.first > 0)
495       CurLexer->SkipBytes(SkipMainFilePreamble.first,
496                           SkipMainFilePreamble.second);
497 
498     // Tell the header info that the main file was entered.  If the file is later
499     // #imported, it won't be re-entered.
500     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
501       HeaderInfo.IncrementIncludeCount(FE);
502   }
503 
504   // Preprocess Predefines to populate the initial preprocessor state.
505   std::unique_ptr<llvm::MemoryBuffer> SB =
506     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
507   assert(SB && "Cannot create predefined source buffer");
508   FileID FID = SourceMgr.createFileID(std::move(SB));
509   assert(!FID.isInvalid() && "Could not create FileID for predefines?");
510   setPredefinesFileID(FID);
511 
512   // Start parsing the predefines.
513   EnterSourceFile(FID, nullptr, SourceLocation());
514 }
515 
516 void Preprocessor::EndSourceFile() {
517   // Notify the client that we reached the end of the source file.
518   if (Callbacks)
519     Callbacks->EndOfMainFile();
520 }
521 
522 //===----------------------------------------------------------------------===//
523 // Lexer Event Handling.
524 //===----------------------------------------------------------------------===//
525 
526 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
527 /// identifier information for the token and install it into the token,
528 /// updating the token kind accordingly.
529 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
530   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
531 
532   // Look up this token, see if it is a macro, or if it is a language keyword.
533   IdentifierInfo *II;
534   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
535     // No cleaning needed, just use the characters from the lexed buffer.
536     II = getIdentifierInfo(Identifier.getRawIdentifier());
537   } else {
538     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
539     SmallString<64> IdentifierBuffer;
540     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
541 
542     if (Identifier.hasUCN()) {
543       SmallString<64> UCNIdentifierBuffer;
544       expandUCNs(UCNIdentifierBuffer, CleanedStr);
545       II = getIdentifierInfo(UCNIdentifierBuffer);
546     } else {
547       II = getIdentifierInfo(CleanedStr);
548     }
549   }
550 
551   // Update the token info (identifier info and appropriate token kind).
552   Identifier.setIdentifierInfo(II);
553   Identifier.setKind(II->getTokenID());
554 
555   return II;
556 }
557 
558 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
559   PoisonReasons[II] = DiagID;
560 }
561 
562 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
563   assert(Ident__exception_code && Ident__exception_info);
564   assert(Ident___exception_code && Ident___exception_info);
565   Ident__exception_code->setIsPoisoned(Poison);
566   Ident___exception_code->setIsPoisoned(Poison);
567   Ident_GetExceptionCode->setIsPoisoned(Poison);
568   Ident__exception_info->setIsPoisoned(Poison);
569   Ident___exception_info->setIsPoisoned(Poison);
570   Ident_GetExceptionInfo->setIsPoisoned(Poison);
571   Ident__abnormal_termination->setIsPoisoned(Poison);
572   Ident___abnormal_termination->setIsPoisoned(Poison);
573   Ident_AbnormalTermination->setIsPoisoned(Poison);
574 }
575 
576 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
577   assert(Identifier.getIdentifierInfo() &&
578          "Can't handle identifiers without identifier info!");
579   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
580     PoisonReasons.find(Identifier.getIdentifierInfo());
581   if(it == PoisonReasons.end())
582     Diag(Identifier, diag::err_pp_used_poisoned_id);
583   else
584     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
585 }
586 
587 /// \brief Returns a diagnostic message kind for reporting a future keyword as
588 /// appropriate for the identifier and specified language.
589 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
590                                           const LangOptions &LangOpts) {
591   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
592 
593   if (LangOpts.CPlusPlus)
594     return llvm::StringSwitch<diag::kind>(II.getName())
595 #define CXX11_KEYWORD(NAME, FLAGS)                                             \
596         .Case(#NAME, diag::warn_cxx11_keyword)
597 #include "clang/Basic/TokenKinds.def"
598         ;
599 
600   llvm_unreachable(
601       "Keyword not known to come from a newer Standard or proposed Standard");
602 }
603 
604 /// HandleIdentifier - This callback is invoked when the lexer reads an
605 /// identifier.  This callback looks up the identifier in the map and/or
606 /// potentially macro expands it or turns it into a named token (like 'for').
607 ///
608 /// Note that callers of this method are guarded by checking the
609 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
610 /// IdentifierInfo methods that compute these properties will need to change to
611 /// match.
612 bool Preprocessor::HandleIdentifier(Token &Identifier) {
613   assert(Identifier.getIdentifierInfo() &&
614          "Can't handle identifiers without identifier info!");
615 
616   IdentifierInfo &II = *Identifier.getIdentifierInfo();
617 
618   // If the information about this identifier is out of date, update it from
619   // the external source.
620   // We have to treat __VA_ARGS__ in a special way, since it gets
621   // serialized with isPoisoned = true, but our preprocessor may have
622   // unpoisoned it if we're defining a C99 macro.
623   if (II.isOutOfDate()) {
624     bool CurrentIsPoisoned = false;
625     if (&II == Ident__VA_ARGS__)
626       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
627 
628     ExternalSource->updateOutOfDateIdentifier(II);
629     Identifier.setKind(II.getTokenID());
630 
631     if (&II == Ident__VA_ARGS__)
632       II.setIsPoisoned(CurrentIsPoisoned);
633   }
634 
635   // If this identifier was poisoned, and if it was not produced from a macro
636   // expansion, emit an error.
637   if (II.isPoisoned() && CurPPLexer) {
638     HandlePoisonedIdentifier(Identifier);
639   }
640 
641   // If this is a macro to be expanded, do it.
642   if (MacroDefinition MD = getMacroDefinition(&II)) {
643     auto *MI = MD.getMacroInfo();
644     assert(MI && "macro definition with no macro info?");
645     if (!DisableMacroExpansion) {
646       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
647         // C99 6.10.3p10: If the preprocessing token immediately after the
648         // macro name isn't a '(', this macro should not be expanded.
649         if (!MI->isFunctionLike() || isNextPPTokenLParen())
650           return HandleMacroExpandedIdentifier(Identifier, MD);
651       } else {
652         // C99 6.10.3.4p2 says that a disabled macro may never again be
653         // expanded, even if it's in a context where it could be expanded in the
654         // future.
655         Identifier.setFlag(Token::DisableExpand);
656         if (MI->isObjectLike() || isNextPPTokenLParen())
657           Diag(Identifier, diag::pp_disabled_macro_expansion);
658       }
659     }
660   }
661 
662   // If this identifier is a keyword in a newer Standard or proposed Standard,
663   // produce a warning. Don't warn if we're not considering macro expansion,
664   // since this identifier might be the name of a macro.
665   // FIXME: This warning is disabled in cases where it shouldn't be, like
666   //   "#define constexpr constexpr", "int constexpr;"
667   if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
668     Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
669         << II.getName();
670     // Don't diagnose this keyword again in this translation unit.
671     II.setIsFutureCompatKeyword(false);
672   }
673 
674   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
675   // then we act as if it is the actual operator and not the textual
676   // representation of it.
677   if (II.isCPlusPlusOperatorKeyword())
678     Identifier.setIdentifierInfo(nullptr);
679 
680   // If this is an extension token, diagnose its use.
681   // We avoid diagnosing tokens that originate from macro definitions.
682   // FIXME: This warning is disabled in cases where it shouldn't be,
683   // like "#define TY typeof", "TY(1) x".
684   if (II.isExtensionToken() && !DisableMacroExpansion)
685     Diag(Identifier, diag::ext_token_used);
686 
687   // If this is the 'import' contextual keyword following an '@', note
688   // that the next token indicates a module name.
689   //
690   // Note that we do not treat 'import' as a contextual
691   // keyword when we're in a caching lexer, because caching lexers only get
692   // used in contexts where import declarations are disallowed.
693   if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
694       !DisableMacroExpansion &&
695       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
696       CurLexerKind != CLK_CachingLexer) {
697     ModuleImportLoc = Identifier.getLocation();
698     ModuleImportPath.clear();
699     ModuleImportExpectsIdentifier = true;
700     CurLexerKind = CLK_LexAfterModuleImport;
701   }
702   return true;
703 }
704 
705 void Preprocessor::Lex(Token &Result) {
706   // We loop here until a lex function retuns a token; this avoids recursion.
707   bool ReturnedToken;
708   do {
709     switch (CurLexerKind) {
710     case CLK_Lexer:
711       ReturnedToken = CurLexer->Lex(Result);
712       break;
713     case CLK_PTHLexer:
714       ReturnedToken = CurPTHLexer->Lex(Result);
715       break;
716     case CLK_TokenLexer:
717       ReturnedToken = CurTokenLexer->Lex(Result);
718       break;
719     case CLK_CachingLexer:
720       CachingLex(Result);
721       ReturnedToken = true;
722       break;
723     case CLK_LexAfterModuleImport:
724       LexAfterModuleImport(Result);
725       ReturnedToken = true;
726       break;
727     }
728   } while (!ReturnedToken);
729 
730   LastTokenWasAt = Result.is(tok::at);
731 }
732 
733 
734 /// \brief Lex a token following the 'import' contextual keyword.
735 ///
736 void Preprocessor::LexAfterModuleImport(Token &Result) {
737   // Figure out what kind of lexer we actually have.
738   recomputeCurLexerKind();
739 
740   // Lex the next token.
741   Lex(Result);
742 
743   // The token sequence
744   //
745   //   import identifier (. identifier)*
746   //
747   // indicates a module import directive. We already saw the 'import'
748   // contextual keyword, so now we're looking for the identifiers.
749   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
750     // We expected to see an identifier here, and we did; continue handling
751     // identifiers.
752     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
753                                               Result.getLocation()));
754     ModuleImportExpectsIdentifier = false;
755     CurLexerKind = CLK_LexAfterModuleImport;
756     return;
757   }
758 
759   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
760   // see the next identifier.
761   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
762     ModuleImportExpectsIdentifier = true;
763     CurLexerKind = CLK_LexAfterModuleImport;
764     return;
765   }
766 
767   // If we have a non-empty module path, load the named module.
768   if (!ModuleImportPath.empty()) {
769     Module *Imported = nullptr;
770     if (getLangOpts().Modules) {
771       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
772                                             ModuleImportPath,
773                                             Module::Hidden,
774                                             /*IsIncludeDirective=*/false);
775       if (Imported)
776         makeModuleVisible(Imported, ModuleImportLoc);
777     }
778     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
779       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
780   }
781 }
782 
783 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
784   VisibleModules.setVisible(
785       M, Loc, [](Module *) {},
786       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
787         // FIXME: Include the path in the diagnostic.
788         // FIXME: Include the import location for the conflicting module.
789         Diag(ModuleImportLoc, diag::warn_module_conflict)
790             << Path[0]->getFullModuleName()
791             << Conflict->getFullModuleName()
792             << Message;
793       });
794 
795   // Add this module to the imports list of the currently-built submodule.
796   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
797     BuildingSubmoduleStack.back().M->Imports.insert(M);
798 }
799 
800 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
801                                           const char *DiagnosticTag,
802                                           bool AllowMacroExpansion) {
803   // We need at least one string literal.
804   if (Result.isNot(tok::string_literal)) {
805     Diag(Result, diag::err_expected_string_literal)
806       << /*Source='in...'*/0 << DiagnosticTag;
807     return false;
808   }
809 
810   // Lex string literal tokens, optionally with macro expansion.
811   SmallVector<Token, 4> StrToks;
812   do {
813     StrToks.push_back(Result);
814 
815     if (Result.hasUDSuffix())
816       Diag(Result, diag::err_invalid_string_udl);
817 
818     if (AllowMacroExpansion)
819       Lex(Result);
820     else
821       LexUnexpandedToken(Result);
822   } while (Result.is(tok::string_literal));
823 
824   // Concatenate and parse the strings.
825   StringLiteralParser Literal(StrToks, *this);
826   assert(Literal.isAscii() && "Didn't allow wide strings in");
827 
828   if (Literal.hadError)
829     return false;
830 
831   if (Literal.Pascal) {
832     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
833       << /*Source='in...'*/0 << DiagnosticTag;
834     return false;
835   }
836 
837   String = Literal.GetString();
838   return true;
839 }
840 
841 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
842   assert(Tok.is(tok::numeric_constant));
843   SmallString<8> IntegerBuffer;
844   bool NumberInvalid = false;
845   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
846   if (NumberInvalid)
847     return false;
848   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
849   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
850     return false;
851   llvm::APInt APVal(64, 0);
852   if (Literal.GetIntegerValue(APVal))
853     return false;
854   Lex(Tok);
855   Value = APVal.getLimitedValue();
856   return true;
857 }
858 
859 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
860   assert(Handler && "NULL comment handler");
861   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
862          CommentHandlers.end() && "Comment handler already registered");
863   CommentHandlers.push_back(Handler);
864 }
865 
866 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
867   std::vector<CommentHandler *>::iterator Pos
868   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
869   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
870   CommentHandlers.erase(Pos);
871 }
872 
873 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
874   bool AnyPendingTokens = false;
875   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
876        HEnd = CommentHandlers.end();
877        H != HEnd; ++H) {
878     if ((*H)->HandleComment(*this, Comment))
879       AnyPendingTokens = true;
880   }
881   if (!AnyPendingTokens || getCommentRetentionState())
882     return false;
883   Lex(result);
884   return true;
885 }
886 
887 ModuleLoader::~ModuleLoader() { }
888 
889 CommentHandler::~CommentHandler() { }
890 
891 CodeCompletionHandler::~CodeCompletionHandler() { }
892 
893 void Preprocessor::createPreprocessingRecord() {
894   if (Record)
895     return;
896 
897   Record = new PreprocessingRecord(getSourceManager());
898   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
899 }
900