1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 //   -H       - Print the name of each header file used.
16 //   -d[DNI] - Dump various things.
17 //   -fworking-directory - #line's with preprocessor's working dir.
18 //   -fpreprocessed
19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 //   -W*
21 //   -w
22 //
23 // Messages to emit:
24 //   "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/Pragma.h"
42 #include "clang/Lex/PreprocessingRecord.h"
43 #include "clang/Lex/PreprocessorOptions.h"
44 #include "clang/Lex/ScratchBuffer.h"
45 #include "llvm/ADT/APFloat.h"
46 #include "llvm/ADT/STLExtras.h"
47 #include "llvm/ADT/SmallString.h"
48 #include "llvm/ADT/StringExtras.h"
49 #include "llvm/Support/Capacity.h"
50 #include "llvm/Support/ConvertUTF.h"
51 #include "llvm/Support/MemoryBuffer.h"
52 #include "llvm/Support/raw_ostream.h"
53 using namespace clang;
54 
55 //===----------------------------------------------------------------------===//
56 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
57 
58 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
59                            DiagnosticsEngine &diags, LangOptions &opts,
60                            SourceManager &SM, HeaderSearch &Headers,
61                            ModuleLoader &TheModuleLoader,
62                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
63                            TranslationUnitKind TUKind)
64     : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
65       FileMgr(Headers.getFileMgr()), SourceMgr(SM), HeaderInfo(Headers),
66       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
67       Identifiers(opts, IILookup), IncrementalProcessing(false), TUKind(TUKind),
68       CodeComplete(nullptr), CodeCompletionFile(nullptr),
69       CodeCompletionOffset(0), LastTokenWasAt(false),
70       ModuleImportExpectsIdentifier(false), CodeCompletionReached(0),
71       SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
72       CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr),
73       Callbacks(nullptr), MacroArgCache(nullptr), Record(nullptr),
74       MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
75   OwnsHeaderSearch = OwnsHeaders;
76 
77   ScratchBuf = new ScratchBuffer(SourceMgr);
78   CounterValue = 0; // __COUNTER__ starts at 0.
79 
80   // Clear stats.
81   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
82   NumIf = NumElse = NumEndif = 0;
83   NumEnteredSourceFiles = 0;
84   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
85   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
86   MaxIncludeStackDepth = 0;
87   NumSkipped = 0;
88 
89   // Default to discarding comments.
90   KeepComments = false;
91   KeepMacroComments = false;
92   SuppressIncludeNotFoundError = false;
93 
94   // Macro expansion is enabled.
95   DisableMacroExpansion = false;
96   MacroExpansionInDirectivesOverride = false;
97   InMacroArgs = false;
98   InMacroArgPreExpansion = false;
99   NumCachedTokenLexers = 0;
100   PragmasEnabled = true;
101   ParsingIfOrElifDirective = false;
102   PreprocessedOutput = false;
103 
104   CachedLexPos = 0;
105 
106   // We haven't read anything from the external source.
107   ReadMacrosFromExternalSource = false;
108 
109   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
110   // This gets unpoisoned where it is allowed.
111   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
112   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
113 
114   // Initialize the pragma handlers.
115   PragmaHandlers = new PragmaNamespace(StringRef());
116   RegisterBuiltinPragmas();
117 
118   // Initialize builtin macros like __LINE__ and friends.
119   RegisterBuiltinMacros();
120 
121   if(LangOpts.Borland) {
122     Ident__exception_info        = getIdentifierInfo("_exception_info");
123     Ident___exception_info       = getIdentifierInfo("__exception_info");
124     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
125     Ident__exception_code        = getIdentifierInfo("_exception_code");
126     Ident___exception_code       = getIdentifierInfo("__exception_code");
127     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
128     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
129     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
130     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
131   } else {
132     Ident__exception_info = Ident__exception_code = nullptr;
133     Ident__abnormal_termination = Ident___exception_info = nullptr;
134     Ident___exception_code = Ident___abnormal_termination = nullptr;
135     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
136     Ident_AbnormalTermination = nullptr;
137   }
138 }
139 
140 Preprocessor::~Preprocessor() {
141   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
142 
143   IncludeMacroStack.clear();
144 
145   // Destroy any macro definitions.
146   while (MacroInfoChain *I = MIChainHead) {
147     MIChainHead = I->Next;
148     I->~MacroInfoChain();
149   }
150 
151   // Free any cached macro expanders.
152   // This populates MacroArgCache, so all TokenLexers need to be destroyed
153   // before the code below that frees up the MacroArgCache list.
154   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
155   CurTokenLexer.reset();
156 
157   while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
158     DeserialMIChainHead = I->Next;
159     I->~DeserializedMacroInfoChain();
160   }
161 
162   // Free any cached MacroArgs.
163   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
164     ArgList = ArgList->deallocate();
165 
166   // Release pragma information.
167   delete PragmaHandlers;
168 
169   // Delete the scratch buffer info.
170   delete ScratchBuf;
171 
172   // Delete the header search info, if we own it.
173   if (OwnsHeaderSearch)
174     delete &HeaderInfo;
175 
176   delete Callbacks;
177 }
178 
179 void Preprocessor::Initialize(const TargetInfo &Target) {
180   assert((!this->Target || this->Target == &Target) &&
181          "Invalid override of target information");
182   this->Target = &Target;
183 
184   // Initialize information about built-ins.
185   BuiltinInfo.InitializeTarget(Target);
186   HeaderInfo.setTarget(Target);
187 }
188 
189 void Preprocessor::InitializeForModelFile() {
190   NumEnteredSourceFiles = 0;
191 
192   // Reset pragmas
193   PragmaHandlersBackup = PragmaHandlers;
194   PragmaHandlers = new PragmaNamespace(StringRef());
195   RegisterBuiltinPragmas();
196 
197   // Reset PredefinesFileID
198   PredefinesFileID = FileID();
199 }
200 
201 void Preprocessor::FinalizeForModelFile() {
202   NumEnteredSourceFiles = 1;
203 
204   delete PragmaHandlers;
205   PragmaHandlers = PragmaHandlersBackup;
206 }
207 
208 void Preprocessor::setPTHManager(PTHManager* pm) {
209   PTH.reset(pm);
210   FileMgr.addStatCache(PTH->createStatCache());
211 }
212 
213 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
214   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
215                << getSpelling(Tok) << "'";
216 
217   if (!DumpFlags) return;
218 
219   llvm::errs() << "\t";
220   if (Tok.isAtStartOfLine())
221     llvm::errs() << " [StartOfLine]";
222   if (Tok.hasLeadingSpace())
223     llvm::errs() << " [LeadingSpace]";
224   if (Tok.isExpandDisabled())
225     llvm::errs() << " [ExpandDisabled]";
226   if (Tok.needsCleaning()) {
227     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
228     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
229                  << "']";
230   }
231 
232   llvm::errs() << "\tLoc=<";
233   DumpLocation(Tok.getLocation());
234   llvm::errs() << ">";
235 }
236 
237 void Preprocessor::DumpLocation(SourceLocation Loc) const {
238   Loc.dump(SourceMgr);
239 }
240 
241 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
242   llvm::errs() << "MACRO: ";
243   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
244     DumpToken(MI.getReplacementToken(i));
245     llvm::errs() << "  ";
246   }
247   llvm::errs() << "\n";
248 }
249 
250 void Preprocessor::PrintStats() {
251   llvm::errs() << "\n*** Preprocessor Stats:\n";
252   llvm::errs() << NumDirectives << " directives found:\n";
253   llvm::errs() << "  " << NumDefined << " #define.\n";
254   llvm::errs() << "  " << NumUndefined << " #undef.\n";
255   llvm::errs() << "  #include/#include_next/#import:\n";
256   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
257   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
258   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
259   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
260   llvm::errs() << "  " << NumEndif << " #endif.\n";
261   llvm::errs() << "  " << NumPragma << " #pragma.\n";
262   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
263 
264   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
265              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
266              << NumFastMacroExpanded << " on the fast path.\n";
267   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
268              << " token paste (##) operations performed, "
269              << NumFastTokenPaste << " on the fast path.\n";
270 
271   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
272 
273   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
274   llvm::errs() << "\n  Macro Expanded Tokens: "
275                << llvm::capacity_in_bytes(MacroExpandedTokens);
276   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
277   llvm::errs() << "\n  Macros: " << llvm::capacity_in_bytes(Macros);
278   llvm::errs() << "\n  #pragma push_macro Info: "
279                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
280   llvm::errs() << "\n  Poison Reasons: "
281                << llvm::capacity_in_bytes(PoisonReasons);
282   llvm::errs() << "\n  Comment Handlers: "
283                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
284 }
285 
286 Preprocessor::macro_iterator
287 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
288   if (IncludeExternalMacros && ExternalSource &&
289       !ReadMacrosFromExternalSource) {
290     ReadMacrosFromExternalSource = true;
291     ExternalSource->ReadDefinedMacros();
292   }
293 
294   return Macros.begin();
295 }
296 
297 size_t Preprocessor::getTotalMemory() const {
298   return BP.getTotalMemory()
299     + llvm::capacity_in_bytes(MacroExpandedTokens)
300     + Predefines.capacity() /* Predefines buffer. */
301     + llvm::capacity_in_bytes(Macros)
302     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
303     + llvm::capacity_in_bytes(PoisonReasons)
304     + llvm::capacity_in_bytes(CommentHandlers);
305 }
306 
307 Preprocessor::macro_iterator
308 Preprocessor::macro_end(bool IncludeExternalMacros) const {
309   if (IncludeExternalMacros && ExternalSource &&
310       !ReadMacrosFromExternalSource) {
311     ReadMacrosFromExternalSource = true;
312     ExternalSource->ReadDefinedMacros();
313   }
314 
315   return Macros.end();
316 }
317 
318 /// \brief Compares macro tokens with a specified token value sequence.
319 static bool MacroDefinitionEquals(const MacroInfo *MI,
320                                   ArrayRef<TokenValue> Tokens) {
321   return Tokens.size() == MI->getNumTokens() &&
322       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
323 }
324 
325 StringRef Preprocessor::getLastMacroWithSpelling(
326                                     SourceLocation Loc,
327                                     ArrayRef<TokenValue> Tokens) const {
328   SourceLocation BestLocation;
329   StringRef BestSpelling;
330   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
331        I != E; ++I) {
332     if (!I->second->getMacroInfo()->isObjectLike())
333       continue;
334     const MacroDirective::DefInfo
335       Def = I->second->findDirectiveAtLoc(Loc, SourceMgr);
336     if (!Def)
337       continue;
338     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
339       continue;
340     SourceLocation Location = Def.getLocation();
341     // Choose the macro defined latest.
342     if (BestLocation.isInvalid() ||
343         (Location.isValid() &&
344          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
345       BestLocation = Location;
346       BestSpelling = I->first->getName();
347     }
348   }
349   return BestSpelling;
350 }
351 
352 void Preprocessor::recomputeCurLexerKind() {
353   if (CurLexer)
354     CurLexerKind = CLK_Lexer;
355   else if (CurPTHLexer)
356     CurLexerKind = CLK_PTHLexer;
357   else if (CurTokenLexer)
358     CurLexerKind = CLK_TokenLexer;
359   else
360     CurLexerKind = CLK_CachingLexer;
361 }
362 
363 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
364                                           unsigned CompleteLine,
365                                           unsigned CompleteColumn) {
366   assert(File);
367   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
368   assert(!CodeCompletionFile && "Already set");
369 
370   using llvm::MemoryBuffer;
371 
372   // Load the actual file's contents.
373   bool Invalid = false;
374   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
375   if (Invalid)
376     return true;
377 
378   // Find the byte position of the truncation point.
379   const char *Position = Buffer->getBufferStart();
380   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
381     for (; *Position; ++Position) {
382       if (*Position != '\r' && *Position != '\n')
383         continue;
384 
385       // Eat \r\n or \n\r as a single line.
386       if ((Position[1] == '\r' || Position[1] == '\n') &&
387           Position[0] != Position[1])
388         ++Position;
389       ++Position;
390       break;
391     }
392   }
393 
394   Position += CompleteColumn - 1;
395 
396   // Insert '\0' at the code-completion point.
397   if (Position < Buffer->getBufferEnd()) {
398     CodeCompletionFile = File;
399     CodeCompletionOffset = Position - Buffer->getBufferStart();
400 
401     std::unique_ptr<MemoryBuffer> NewBuffer =
402         MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
403                                             Buffer->getBufferIdentifier());
404     char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
405     char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
406     *NewPos = '\0';
407     std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
408     SourceMgr.overrideFileContents(File, std::move(NewBuffer));
409   }
410 
411   return false;
412 }
413 
414 void Preprocessor::CodeCompleteNaturalLanguage() {
415   if (CodeComplete)
416     CodeComplete->CodeCompleteNaturalLanguage();
417   setCodeCompletionReached();
418 }
419 
420 /// getSpelling - This method is used to get the spelling of a token into a
421 /// SmallVector. Note that the returned StringRef may not point to the
422 /// supplied buffer if a copy can be avoided.
423 StringRef Preprocessor::getSpelling(const Token &Tok,
424                                           SmallVectorImpl<char> &Buffer,
425                                           bool *Invalid) const {
426   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
427   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
428     // Try the fast path.
429     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
430       return II->getName();
431   }
432 
433   // Resize the buffer if we need to copy into it.
434   if (Tok.needsCleaning())
435     Buffer.resize(Tok.getLength());
436 
437   const char *Ptr = Buffer.data();
438   unsigned Len = getSpelling(Tok, Ptr, Invalid);
439   return StringRef(Ptr, Len);
440 }
441 
442 /// CreateString - Plop the specified string into a scratch buffer and return a
443 /// location for it.  If specified, the source location provides a source
444 /// location for the token.
445 void Preprocessor::CreateString(StringRef Str, Token &Tok,
446                                 SourceLocation ExpansionLocStart,
447                                 SourceLocation ExpansionLocEnd) {
448   Tok.setLength(Str.size());
449 
450   const char *DestPtr;
451   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
452 
453   if (ExpansionLocStart.isValid())
454     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
455                                        ExpansionLocEnd, Str.size());
456   Tok.setLocation(Loc);
457 
458   // If this is a raw identifier or a literal token, set the pointer data.
459   if (Tok.is(tok::raw_identifier))
460     Tok.setRawIdentifierData(DestPtr);
461   else if (Tok.isLiteral())
462     Tok.setLiteralData(DestPtr);
463 }
464 
465 Module *Preprocessor::getCurrentModule() {
466   if (getLangOpts().CurrentModule.empty())
467     return nullptr;
468 
469   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
470 }
471 
472 //===----------------------------------------------------------------------===//
473 // Preprocessor Initialization Methods
474 //===----------------------------------------------------------------------===//
475 
476 
477 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
478 /// which implicitly adds the builtin defines etc.
479 void Preprocessor::EnterMainSourceFile() {
480   // We do not allow the preprocessor to reenter the main file.  Doing so will
481   // cause FileID's to accumulate information from both runs (e.g. #line
482   // information) and predefined macros aren't guaranteed to be set properly.
483   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
484   FileID MainFileID = SourceMgr.getMainFileID();
485 
486   // If MainFileID is loaded it means we loaded an AST file, no need to enter
487   // a main file.
488   if (!SourceMgr.isLoadedFileID(MainFileID)) {
489     // Enter the main file source buffer.
490     EnterSourceFile(MainFileID, nullptr, SourceLocation());
491 
492     // If we've been asked to skip bytes in the main file (e.g., as part of a
493     // precompiled preamble), do so now.
494     if (SkipMainFilePreamble.first > 0)
495       CurLexer->SkipBytes(SkipMainFilePreamble.first,
496                           SkipMainFilePreamble.second);
497 
498     // Tell the header info that the main file was entered.  If the file is later
499     // #imported, it won't be re-entered.
500     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
501       HeaderInfo.IncrementIncludeCount(FE);
502   }
503 
504   // Preprocess Predefines to populate the initial preprocessor state.
505   std::unique_ptr<llvm::MemoryBuffer> SB =
506     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
507   assert(SB && "Cannot create predefined source buffer");
508   FileID FID = SourceMgr.createFileID(std::move(SB));
509   assert(!FID.isInvalid() && "Could not create FileID for predefines?");
510   setPredefinesFileID(FID);
511 
512   // Start parsing the predefines.
513   EnterSourceFile(FID, nullptr, SourceLocation());
514 }
515 
516 void Preprocessor::EndSourceFile() {
517   // Notify the client that we reached the end of the source file.
518   if (Callbacks)
519     Callbacks->EndOfMainFile();
520 }
521 
522 //===----------------------------------------------------------------------===//
523 // Lexer Event Handling.
524 //===----------------------------------------------------------------------===//
525 
526 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
527 /// identifier information for the token and install it into the token,
528 /// updating the token kind accordingly.
529 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
530   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
531 
532   // Look up this token, see if it is a macro, or if it is a language keyword.
533   IdentifierInfo *II;
534   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
535     // No cleaning needed, just use the characters from the lexed buffer.
536     II = getIdentifierInfo(Identifier.getRawIdentifier());
537   } else {
538     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
539     SmallString<64> IdentifierBuffer;
540     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
541 
542     if (Identifier.hasUCN()) {
543       SmallString<64> UCNIdentifierBuffer;
544       expandUCNs(UCNIdentifierBuffer, CleanedStr);
545       II = getIdentifierInfo(UCNIdentifierBuffer);
546     } else {
547       II = getIdentifierInfo(CleanedStr);
548     }
549   }
550 
551   // Update the token info (identifier info and appropriate token kind).
552   Identifier.setIdentifierInfo(II);
553   Identifier.setKind(II->getTokenID());
554 
555   return II;
556 }
557 
558 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
559   PoisonReasons[II] = DiagID;
560 }
561 
562 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
563   assert(Ident__exception_code && Ident__exception_info);
564   assert(Ident___exception_code && Ident___exception_info);
565   Ident__exception_code->setIsPoisoned(Poison);
566   Ident___exception_code->setIsPoisoned(Poison);
567   Ident_GetExceptionCode->setIsPoisoned(Poison);
568   Ident__exception_info->setIsPoisoned(Poison);
569   Ident___exception_info->setIsPoisoned(Poison);
570   Ident_GetExceptionInfo->setIsPoisoned(Poison);
571   Ident__abnormal_termination->setIsPoisoned(Poison);
572   Ident___abnormal_termination->setIsPoisoned(Poison);
573   Ident_AbnormalTermination->setIsPoisoned(Poison);
574 }
575 
576 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
577   assert(Identifier.getIdentifierInfo() &&
578          "Can't handle identifiers without identifier info!");
579   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
580     PoisonReasons.find(Identifier.getIdentifierInfo());
581   if(it == PoisonReasons.end())
582     Diag(Identifier, diag::err_pp_used_poisoned_id);
583   else
584     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
585 }
586 
587 /// HandleIdentifier - This callback is invoked when the lexer reads an
588 /// identifier.  This callback looks up the identifier in the map and/or
589 /// potentially macro expands it or turns it into a named token (like 'for').
590 ///
591 /// Note that callers of this method are guarded by checking the
592 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
593 /// IdentifierInfo methods that compute these properties will need to change to
594 /// match.
595 bool Preprocessor::HandleIdentifier(Token &Identifier) {
596   assert(Identifier.getIdentifierInfo() &&
597          "Can't handle identifiers without identifier info!");
598 
599   IdentifierInfo &II = *Identifier.getIdentifierInfo();
600 
601   // If the information about this identifier is out of date, update it from
602   // the external source.
603   // We have to treat __VA_ARGS__ in a special way, since it gets
604   // serialized with isPoisoned = true, but our preprocessor may have
605   // unpoisoned it if we're defining a C99 macro.
606   if (II.isOutOfDate()) {
607     bool CurrentIsPoisoned = false;
608     if (&II == Ident__VA_ARGS__)
609       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
610 
611     ExternalSource->updateOutOfDateIdentifier(II);
612     Identifier.setKind(II.getTokenID());
613 
614     if (&II == Ident__VA_ARGS__)
615       II.setIsPoisoned(CurrentIsPoisoned);
616   }
617 
618   // If this identifier was poisoned, and if it was not produced from a macro
619   // expansion, emit an error.
620   if (II.isPoisoned() && CurPPLexer) {
621     HandlePoisonedIdentifier(Identifier);
622   }
623 
624   // If this is a macro to be expanded, do it.
625   if (MacroDirective *MD = getMacroDirective(&II)) {
626     MacroInfo *MI = MD->getMacroInfo();
627     if (!DisableMacroExpansion) {
628       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
629         // C99 6.10.3p10: If the preprocessing token immediately after the
630         // macro name isn't a '(', this macro should not be expanded.
631         if (!MI->isFunctionLike() || isNextPPTokenLParen())
632           return HandleMacroExpandedIdentifier(Identifier, MD);
633       } else {
634         // C99 6.10.3.4p2 says that a disabled macro may never again be
635         // expanded, even if it's in a context where it could be expanded in the
636         // future.
637         Identifier.setFlag(Token::DisableExpand);
638         if (MI->isObjectLike() || isNextPPTokenLParen())
639           Diag(Identifier, diag::pp_disabled_macro_expansion);
640       }
641     }
642   }
643 
644   // If this identifier is a keyword in C++11, produce a warning. Don't warn if
645   // we're not considering macro expansion, since this identifier might be the
646   // name of a macro.
647   // FIXME: This warning is disabled in cases where it shouldn't be, like
648   //   "#define constexpr constexpr", "int constexpr;"
649   if (II.isCXX11CompatKeyword() && !DisableMacroExpansion) {
650     Diag(Identifier, diag::warn_cxx11_keyword) << II.getName();
651     // Don't diagnose this keyword again in this translation unit.
652     II.setIsCXX11CompatKeyword(false);
653   }
654 
655   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
656   // then we act as if it is the actual operator and not the textual
657   // representation of it.
658   if (II.isCPlusPlusOperatorKeyword())
659     Identifier.setIdentifierInfo(nullptr);
660 
661   // If this is an extension token, diagnose its use.
662   // We avoid diagnosing tokens that originate from macro definitions.
663   // FIXME: This warning is disabled in cases where it shouldn't be,
664   // like "#define TY typeof", "TY(1) x".
665   if (II.isExtensionToken() && !DisableMacroExpansion)
666     Diag(Identifier, diag::ext_token_used);
667 
668   // If this is the 'import' contextual keyword following an '@', note
669   // that the next token indicates a module name.
670   //
671   // Note that we do not treat 'import' as a contextual
672   // keyword when we're in a caching lexer, because caching lexers only get
673   // used in contexts where import declarations are disallowed.
674   if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
675       !DisableMacroExpansion && getLangOpts().Modules &&
676       CurLexerKind != CLK_CachingLexer) {
677     ModuleImportLoc = Identifier.getLocation();
678     ModuleImportPath.clear();
679     ModuleImportExpectsIdentifier = true;
680     CurLexerKind = CLK_LexAfterModuleImport;
681   }
682   return true;
683 }
684 
685 void Preprocessor::Lex(Token &Result) {
686   // We loop here until a lex function retuns a token; this avoids recursion.
687   bool ReturnedToken;
688   do {
689     switch (CurLexerKind) {
690     case CLK_Lexer:
691       ReturnedToken = CurLexer->Lex(Result);
692       break;
693     case CLK_PTHLexer:
694       ReturnedToken = CurPTHLexer->Lex(Result);
695       break;
696     case CLK_TokenLexer:
697       ReturnedToken = CurTokenLexer->Lex(Result);
698       break;
699     case CLK_CachingLexer:
700       CachingLex(Result);
701       ReturnedToken = true;
702       break;
703     case CLK_LexAfterModuleImport:
704       LexAfterModuleImport(Result);
705       ReturnedToken = true;
706       break;
707     }
708   } while (!ReturnedToken);
709 
710   LastTokenWasAt = Result.is(tok::at);
711 }
712 
713 
714 /// \brief Lex a token following the 'import' contextual keyword.
715 ///
716 void Preprocessor::LexAfterModuleImport(Token &Result) {
717   // Figure out what kind of lexer we actually have.
718   recomputeCurLexerKind();
719 
720   // Lex the next token.
721   Lex(Result);
722 
723   // The token sequence
724   //
725   //   import identifier (. identifier)*
726   //
727   // indicates a module import directive. We already saw the 'import'
728   // contextual keyword, so now we're looking for the identifiers.
729   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
730     // We expected to see an identifier here, and we did; continue handling
731     // identifiers.
732     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
733                                               Result.getLocation()));
734     ModuleImportExpectsIdentifier = false;
735     CurLexerKind = CLK_LexAfterModuleImport;
736     return;
737   }
738 
739   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
740   // see the next identifier.
741   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
742     ModuleImportExpectsIdentifier = true;
743     CurLexerKind = CLK_LexAfterModuleImport;
744     return;
745   }
746 
747   // If we have a non-empty module path, load the named module.
748   if (!ModuleImportPath.empty() && getLangOpts().Modules) {
749     Module *Imported = TheModuleLoader.loadModule(ModuleImportLoc,
750                                                   ModuleImportPath,
751                                                   Module::MacrosVisible,
752                                                   /*IsIncludeDirective=*/false);
753     if (Callbacks)
754       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
755   }
756 }
757 
758 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
759                                           const char *DiagnosticTag,
760                                           bool AllowMacroExpansion) {
761   // We need at least one string literal.
762   if (Result.isNot(tok::string_literal)) {
763     Diag(Result, diag::err_expected_string_literal)
764       << /*Source='in...'*/0 << DiagnosticTag;
765     return false;
766   }
767 
768   // Lex string literal tokens, optionally with macro expansion.
769   SmallVector<Token, 4> StrToks;
770   do {
771     StrToks.push_back(Result);
772 
773     if (Result.hasUDSuffix())
774       Diag(Result, diag::err_invalid_string_udl);
775 
776     if (AllowMacroExpansion)
777       Lex(Result);
778     else
779       LexUnexpandedToken(Result);
780   } while (Result.is(tok::string_literal));
781 
782   // Concatenate and parse the strings.
783   StringLiteralParser Literal(StrToks, *this);
784   assert(Literal.isAscii() && "Didn't allow wide strings in");
785 
786   if (Literal.hadError)
787     return false;
788 
789   if (Literal.Pascal) {
790     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
791       << /*Source='in...'*/0 << DiagnosticTag;
792     return false;
793   }
794 
795   String = Literal.GetString();
796   return true;
797 }
798 
799 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
800   assert(Tok.is(tok::numeric_constant));
801   SmallString<8> IntegerBuffer;
802   bool NumberInvalid = false;
803   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
804   if (NumberInvalid)
805     return false;
806   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
807   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
808     return false;
809   llvm::APInt APVal(64, 0);
810   if (Literal.GetIntegerValue(APVal))
811     return false;
812   Lex(Tok);
813   Value = APVal.getLimitedValue();
814   return true;
815 }
816 
817 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
818   assert(Handler && "NULL comment handler");
819   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
820          CommentHandlers.end() && "Comment handler already registered");
821   CommentHandlers.push_back(Handler);
822 }
823 
824 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
825   std::vector<CommentHandler *>::iterator Pos
826   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
827   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
828   CommentHandlers.erase(Pos);
829 }
830 
831 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
832   bool AnyPendingTokens = false;
833   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
834        HEnd = CommentHandlers.end();
835        H != HEnd; ++H) {
836     if ((*H)->HandleComment(*this, Comment))
837       AnyPendingTokens = true;
838   }
839   if (!AnyPendingTokens || getCommentRetentionState())
840     return false;
841   Lex(result);
842   return true;
843 }
844 
845 ModuleLoader::~ModuleLoader() { }
846 
847 CommentHandler::~CommentHandler() { }
848 
849 CodeCompletionHandler::~CodeCompletionHandler() { }
850 
851 void Preprocessor::createPreprocessingRecord() {
852   if (Record)
853     return;
854 
855   Record = new PreprocessingRecord(getSourceManager());
856   addPPCallbacks(Record);
857 }
858