1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 //   -H       - Print the name of each header file used.
16 //   -d[DNI] - Dump various things.
17 //   -fworking-directory - #line's with preprocessor's working dir.
18 //   -fpreprocessed
19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 //   -W*
21 //   -w
22 //
23 // Messages to emit:
24 //   "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/PTHManager.h"
42 #include "clang/Lex/Pragma.h"
43 #include "clang/Lex/PreprocessingRecord.h"
44 #include "clang/Lex/PreprocessorOptions.h"
45 #include "clang/Lex/ScratchBuffer.h"
46 #include "llvm/ADT/STLExtras.h"
47 #include "llvm/ADT/SmallString.h"
48 #include "llvm/ADT/StringExtras.h"
49 #include "llvm/ADT/StringSwitch.h"
50 #include "llvm/Support/Capacity.h"
51 #include "llvm/Support/ConvertUTF.h"
52 #include "llvm/Support/MemoryBuffer.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <utility>
55 using namespace clang;
56 
57 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
58 
59 //===----------------------------------------------------------------------===//
60 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
61 
62 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
63                            DiagnosticsEngine &diags, LangOptions &opts,
64                            SourceManager &SM, HeaderSearch &Headers,
65                            ModuleLoader &TheModuleLoader,
66                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
67                            TranslationUnitKind TUKind)
68     : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr),
69       AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
70       ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
71       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
72       Identifiers(opts, IILookup),
73       PragmaHandlers(new PragmaNamespace(StringRef())),
74       IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
75       CodeCompletionFile(nullptr), CodeCompletionOffset(0),
76       LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
77       CodeCompletionReached(0), CodeCompletionII(0), MainFileDir(nullptr),
78       SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr),
79       CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr),
80       CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
81       Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
82   OwnsHeaderSearch = OwnsHeaders;
83 
84   CounterValue = 0; // __COUNTER__ starts at 0.
85 
86   // Clear stats.
87   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
88   NumIf = NumElse = NumEndif = 0;
89   NumEnteredSourceFiles = 0;
90   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
91   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
92   MaxIncludeStackDepth = 0;
93   NumSkipped = 0;
94 
95   // Default to discarding comments.
96   KeepComments = false;
97   KeepMacroComments = false;
98   SuppressIncludeNotFoundError = false;
99 
100   // Macro expansion is enabled.
101   DisableMacroExpansion = false;
102   MacroExpansionInDirectivesOverride = false;
103   InMacroArgs = false;
104   InMacroArgPreExpansion = false;
105   NumCachedTokenLexers = 0;
106   PragmasEnabled = true;
107   ParsingIfOrElifDirective = false;
108   PreprocessedOutput = false;
109 
110   CachedLexPos = 0;
111 
112   // We haven't read anything from the external source.
113   ReadMacrosFromExternalSource = false;
114 
115   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
116   // This gets unpoisoned where it is allowed.
117   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
118   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
119 
120   // Initialize the pragma handlers.
121   RegisterBuiltinPragmas();
122 
123   // Initialize builtin macros like __LINE__ and friends.
124   RegisterBuiltinMacros();
125 
126   if(LangOpts.Borland) {
127     Ident__exception_info        = getIdentifierInfo("_exception_info");
128     Ident___exception_info       = getIdentifierInfo("__exception_info");
129     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
130     Ident__exception_code        = getIdentifierInfo("_exception_code");
131     Ident___exception_code       = getIdentifierInfo("__exception_code");
132     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
133     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
134     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
135     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
136   } else {
137     Ident__exception_info = Ident__exception_code = nullptr;
138     Ident__abnormal_termination = Ident___exception_info = nullptr;
139     Ident___exception_code = Ident___abnormal_termination = nullptr;
140     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
141     Ident_AbnormalTermination = nullptr;
142   }
143 }
144 
145 Preprocessor::~Preprocessor() {
146   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
147 
148   IncludeMacroStack.clear();
149 
150   // Destroy any macro definitions.
151   while (MacroInfoChain *I = MIChainHead) {
152     MIChainHead = I->Next;
153     I->~MacroInfoChain();
154   }
155 
156   // Free any cached macro expanders.
157   // This populates MacroArgCache, so all TokenLexers need to be destroyed
158   // before the code below that frees up the MacroArgCache list.
159   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
160   CurTokenLexer.reset();
161 
162   while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
163     DeserialMIChainHead = I->Next;
164     I->~DeserializedMacroInfoChain();
165   }
166 
167   // Free any cached MacroArgs.
168   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
169     ArgList = ArgList->deallocate();
170 
171   // Delete the header search info, if we own it.
172   if (OwnsHeaderSearch)
173     delete &HeaderInfo;
174 }
175 
176 void Preprocessor::Initialize(const TargetInfo &Target,
177                               const TargetInfo *AuxTarget) {
178   assert((!this->Target || this->Target == &Target) &&
179          "Invalid override of target information");
180   this->Target = &Target;
181 
182   assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
183          "Invalid override of aux target information.");
184   this->AuxTarget = AuxTarget;
185 
186   // Initialize information about built-ins.
187   BuiltinInfo.InitializeTarget(Target, AuxTarget);
188   HeaderInfo.setTarget(Target);
189 }
190 
191 void Preprocessor::InitializeForModelFile() {
192   NumEnteredSourceFiles = 0;
193 
194   // Reset pragmas
195   PragmaHandlersBackup = std::move(PragmaHandlers);
196   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
197   RegisterBuiltinPragmas();
198 
199   // Reset PredefinesFileID
200   PredefinesFileID = FileID();
201 }
202 
203 void Preprocessor::FinalizeForModelFile() {
204   NumEnteredSourceFiles = 1;
205 
206   PragmaHandlers = std::move(PragmaHandlersBackup);
207 }
208 
209 void Preprocessor::setPTHManager(PTHManager* pm) {
210   PTH.reset(pm);
211   FileMgr.addStatCache(PTH->createStatCache());
212 }
213 
214 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
215   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
216                << getSpelling(Tok) << "'";
217 
218   if (!DumpFlags) return;
219 
220   llvm::errs() << "\t";
221   if (Tok.isAtStartOfLine())
222     llvm::errs() << " [StartOfLine]";
223   if (Tok.hasLeadingSpace())
224     llvm::errs() << " [LeadingSpace]";
225   if (Tok.isExpandDisabled())
226     llvm::errs() << " [ExpandDisabled]";
227   if (Tok.needsCleaning()) {
228     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
229     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
230                  << "']";
231   }
232 
233   llvm::errs() << "\tLoc=<";
234   DumpLocation(Tok.getLocation());
235   llvm::errs() << ">";
236 }
237 
238 void Preprocessor::DumpLocation(SourceLocation Loc) const {
239   Loc.dump(SourceMgr);
240 }
241 
242 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
243   llvm::errs() << "MACRO: ";
244   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
245     DumpToken(MI.getReplacementToken(i));
246     llvm::errs() << "  ";
247   }
248   llvm::errs() << "\n";
249 }
250 
251 void Preprocessor::PrintStats() {
252   llvm::errs() << "\n*** Preprocessor Stats:\n";
253   llvm::errs() << NumDirectives << " directives found:\n";
254   llvm::errs() << "  " << NumDefined << " #define.\n";
255   llvm::errs() << "  " << NumUndefined << " #undef.\n";
256   llvm::errs() << "  #include/#include_next/#import:\n";
257   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
258   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
259   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
260   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
261   llvm::errs() << "  " << NumEndif << " #endif.\n";
262   llvm::errs() << "  " << NumPragma << " #pragma.\n";
263   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
264 
265   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
266              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
267              << NumFastMacroExpanded << " on the fast path.\n";
268   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
269              << " token paste (##) operations performed, "
270              << NumFastTokenPaste << " on the fast path.\n";
271 
272   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
273 
274   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
275   llvm::errs() << "\n  Macro Expanded Tokens: "
276                << llvm::capacity_in_bytes(MacroExpandedTokens);
277   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
278   // FIXME: List information for all submodules.
279   llvm::errs() << "\n  Macros: "
280                << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
281   llvm::errs() << "\n  #pragma push_macro Info: "
282                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
283   llvm::errs() << "\n  Poison Reasons: "
284                << llvm::capacity_in_bytes(PoisonReasons);
285   llvm::errs() << "\n  Comment Handlers: "
286                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
287 }
288 
289 Preprocessor::macro_iterator
290 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
291   if (IncludeExternalMacros && ExternalSource &&
292       !ReadMacrosFromExternalSource) {
293     ReadMacrosFromExternalSource = true;
294     ExternalSource->ReadDefinedMacros();
295   }
296 
297   // Make sure we cover all macros in visible modules.
298   for (const ModuleMacro &Macro : ModuleMacros)
299     CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
300 
301   return CurSubmoduleState->Macros.begin();
302 }
303 
304 size_t Preprocessor::getTotalMemory() const {
305   return BP.getTotalMemory()
306     + llvm::capacity_in_bytes(MacroExpandedTokens)
307     + Predefines.capacity() /* Predefines buffer. */
308     // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
309     // and ModuleMacros.
310     + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
311     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
312     + llvm::capacity_in_bytes(PoisonReasons)
313     + llvm::capacity_in_bytes(CommentHandlers);
314 }
315 
316 Preprocessor::macro_iterator
317 Preprocessor::macro_end(bool IncludeExternalMacros) const {
318   if (IncludeExternalMacros && ExternalSource &&
319       !ReadMacrosFromExternalSource) {
320     ReadMacrosFromExternalSource = true;
321     ExternalSource->ReadDefinedMacros();
322   }
323 
324   return CurSubmoduleState->Macros.end();
325 }
326 
327 /// \brief Compares macro tokens with a specified token value sequence.
328 static bool MacroDefinitionEquals(const MacroInfo *MI,
329                                   ArrayRef<TokenValue> Tokens) {
330   return Tokens.size() == MI->getNumTokens() &&
331       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
332 }
333 
334 StringRef Preprocessor::getLastMacroWithSpelling(
335                                     SourceLocation Loc,
336                                     ArrayRef<TokenValue> Tokens) const {
337   SourceLocation BestLocation;
338   StringRef BestSpelling;
339   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
340        I != E; ++I) {
341     const MacroDirective::DefInfo
342       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
343     if (!Def || !Def.getMacroInfo())
344       continue;
345     if (!Def.getMacroInfo()->isObjectLike())
346       continue;
347     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
348       continue;
349     SourceLocation Location = Def.getLocation();
350     // Choose the macro defined latest.
351     if (BestLocation.isInvalid() ||
352         (Location.isValid() &&
353          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
354       BestLocation = Location;
355       BestSpelling = I->first->getName();
356     }
357   }
358   return BestSpelling;
359 }
360 
361 void Preprocessor::recomputeCurLexerKind() {
362   if (CurLexer)
363     CurLexerKind = CLK_Lexer;
364   else if (CurPTHLexer)
365     CurLexerKind = CLK_PTHLexer;
366   else if (CurTokenLexer)
367     CurLexerKind = CLK_TokenLexer;
368   else
369     CurLexerKind = CLK_CachingLexer;
370 }
371 
372 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
373                                           unsigned CompleteLine,
374                                           unsigned CompleteColumn) {
375   assert(File);
376   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
377   assert(!CodeCompletionFile && "Already set");
378 
379   using llvm::MemoryBuffer;
380 
381   // Load the actual file's contents.
382   bool Invalid = false;
383   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
384   if (Invalid)
385     return true;
386 
387   // Find the byte position of the truncation point.
388   const char *Position = Buffer->getBufferStart();
389   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
390     for (; *Position; ++Position) {
391       if (*Position != '\r' && *Position != '\n')
392         continue;
393 
394       // Eat \r\n or \n\r as a single line.
395       if ((Position[1] == '\r' || Position[1] == '\n') &&
396           Position[0] != Position[1])
397         ++Position;
398       ++Position;
399       break;
400     }
401   }
402 
403   Position += CompleteColumn - 1;
404 
405   // If pointing inside the preamble, adjust the position at the beginning of
406   // the file after the preamble.
407   if (SkipMainFilePreamble.first &&
408       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
409     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
410       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
411   }
412 
413   if (Position > Buffer->getBufferEnd())
414     Position = Buffer->getBufferEnd();
415 
416   CodeCompletionFile = File;
417   CodeCompletionOffset = Position - Buffer->getBufferStart();
418 
419   std::unique_ptr<MemoryBuffer> NewBuffer =
420       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
421                                           Buffer->getBufferIdentifier());
422   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
423   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
424   *NewPos = '\0';
425   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
426   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
427 
428   return false;
429 }
430 
431 void Preprocessor::CodeCompleteNaturalLanguage() {
432   if (CodeComplete)
433     CodeComplete->CodeCompleteNaturalLanguage();
434   setCodeCompletionReached();
435 }
436 
437 /// getSpelling - This method is used to get the spelling of a token into a
438 /// SmallVector. Note that the returned StringRef may not point to the
439 /// supplied buffer if a copy can be avoided.
440 StringRef Preprocessor::getSpelling(const Token &Tok,
441                                           SmallVectorImpl<char> &Buffer,
442                                           bool *Invalid) const {
443   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
444   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
445     // Try the fast path.
446     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
447       return II->getName();
448   }
449 
450   // Resize the buffer if we need to copy into it.
451   if (Tok.needsCleaning())
452     Buffer.resize(Tok.getLength());
453 
454   const char *Ptr = Buffer.data();
455   unsigned Len = getSpelling(Tok, Ptr, Invalid);
456   return StringRef(Ptr, Len);
457 }
458 
459 /// CreateString - Plop the specified string into a scratch buffer and return a
460 /// location for it.  If specified, the source location provides a source
461 /// location for the token.
462 void Preprocessor::CreateString(StringRef Str, Token &Tok,
463                                 SourceLocation ExpansionLocStart,
464                                 SourceLocation ExpansionLocEnd) {
465   Tok.setLength(Str.size());
466 
467   const char *DestPtr;
468   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
469 
470   if (ExpansionLocStart.isValid())
471     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
472                                        ExpansionLocEnd, Str.size());
473   Tok.setLocation(Loc);
474 
475   // If this is a raw identifier or a literal token, set the pointer data.
476   if (Tok.is(tok::raw_identifier))
477     Tok.setRawIdentifierData(DestPtr);
478   else if (Tok.isLiteral())
479     Tok.setLiteralData(DestPtr);
480 }
481 
482 Module *Preprocessor::getCurrentModule() {
483   if (!getLangOpts().CompilingModule)
484     return nullptr;
485 
486   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
487 }
488 
489 //===----------------------------------------------------------------------===//
490 // Preprocessor Initialization Methods
491 //===----------------------------------------------------------------------===//
492 
493 
494 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
495 /// which implicitly adds the builtin defines etc.
496 void Preprocessor::EnterMainSourceFile() {
497   // We do not allow the preprocessor to reenter the main file.  Doing so will
498   // cause FileID's to accumulate information from both runs (e.g. #line
499   // information) and predefined macros aren't guaranteed to be set properly.
500   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
501   FileID MainFileID = SourceMgr.getMainFileID();
502 
503   // If MainFileID is loaded it means we loaded an AST file, no need to enter
504   // a main file.
505   if (!SourceMgr.isLoadedFileID(MainFileID)) {
506     // Enter the main file source buffer.
507     EnterSourceFile(MainFileID, nullptr, SourceLocation());
508 
509     // If we've been asked to skip bytes in the main file (e.g., as part of a
510     // precompiled preamble), do so now.
511     if (SkipMainFilePreamble.first > 0)
512       CurLexer->SkipBytes(SkipMainFilePreamble.first,
513                           SkipMainFilePreamble.second);
514 
515     // Tell the header info that the main file was entered.  If the file is later
516     // #imported, it won't be re-entered.
517     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
518       HeaderInfo.IncrementIncludeCount(FE);
519   }
520 
521   // Preprocess Predefines to populate the initial preprocessor state.
522   std::unique_ptr<llvm::MemoryBuffer> SB =
523     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
524   assert(SB && "Cannot create predefined source buffer");
525   FileID FID = SourceMgr.createFileID(std::move(SB));
526   assert(FID.isValid() && "Could not create FileID for predefines?");
527   setPredefinesFileID(FID);
528 
529   // Start parsing the predefines.
530   EnterSourceFile(FID, nullptr, SourceLocation());
531 }
532 
533 void Preprocessor::EndSourceFile() {
534   // Notify the client that we reached the end of the source file.
535   if (Callbacks)
536     Callbacks->EndOfMainFile();
537 }
538 
539 //===----------------------------------------------------------------------===//
540 // Lexer Event Handling.
541 //===----------------------------------------------------------------------===//
542 
543 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
544 /// identifier information for the token and install it into the token,
545 /// updating the token kind accordingly.
546 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
547   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
548 
549   // Look up this token, see if it is a macro, or if it is a language keyword.
550   IdentifierInfo *II;
551   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
552     // No cleaning needed, just use the characters from the lexed buffer.
553     II = getIdentifierInfo(Identifier.getRawIdentifier());
554   } else {
555     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
556     SmallString<64> IdentifierBuffer;
557     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
558 
559     if (Identifier.hasUCN()) {
560       SmallString<64> UCNIdentifierBuffer;
561       expandUCNs(UCNIdentifierBuffer, CleanedStr);
562       II = getIdentifierInfo(UCNIdentifierBuffer);
563     } else {
564       II = getIdentifierInfo(CleanedStr);
565     }
566   }
567 
568   // Update the token info (identifier info and appropriate token kind).
569   Identifier.setIdentifierInfo(II);
570   Identifier.setKind(II->getTokenID());
571 
572   return II;
573 }
574 
575 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
576   PoisonReasons[II] = DiagID;
577 }
578 
579 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
580   assert(Ident__exception_code && Ident__exception_info);
581   assert(Ident___exception_code && Ident___exception_info);
582   Ident__exception_code->setIsPoisoned(Poison);
583   Ident___exception_code->setIsPoisoned(Poison);
584   Ident_GetExceptionCode->setIsPoisoned(Poison);
585   Ident__exception_info->setIsPoisoned(Poison);
586   Ident___exception_info->setIsPoisoned(Poison);
587   Ident_GetExceptionInfo->setIsPoisoned(Poison);
588   Ident__abnormal_termination->setIsPoisoned(Poison);
589   Ident___abnormal_termination->setIsPoisoned(Poison);
590   Ident_AbnormalTermination->setIsPoisoned(Poison);
591 }
592 
593 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
594   assert(Identifier.getIdentifierInfo() &&
595          "Can't handle identifiers without identifier info!");
596   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
597     PoisonReasons.find(Identifier.getIdentifierInfo());
598   if(it == PoisonReasons.end())
599     Diag(Identifier, diag::err_pp_used_poisoned_id);
600   else
601     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
602 }
603 
604 /// \brief Returns a diagnostic message kind for reporting a future keyword as
605 /// appropriate for the identifier and specified language.
606 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
607                                           const LangOptions &LangOpts) {
608   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
609 
610   if (LangOpts.CPlusPlus)
611     return llvm::StringSwitch<diag::kind>(II.getName())
612 #define CXX11_KEYWORD(NAME, FLAGS)                                             \
613         .Case(#NAME, diag::warn_cxx11_keyword)
614 #include "clang/Basic/TokenKinds.def"
615         ;
616 
617   llvm_unreachable(
618       "Keyword not known to come from a newer Standard or proposed Standard");
619 }
620 
621 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
622   assert(II.isOutOfDate() && "not out of date");
623   getExternalSource()->updateOutOfDateIdentifier(II);
624 }
625 
626 /// HandleIdentifier - This callback is invoked when the lexer reads an
627 /// identifier.  This callback looks up the identifier in the map and/or
628 /// potentially macro expands it or turns it into a named token (like 'for').
629 ///
630 /// Note that callers of this method are guarded by checking the
631 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
632 /// IdentifierInfo methods that compute these properties will need to change to
633 /// match.
634 bool Preprocessor::HandleIdentifier(Token &Identifier) {
635   assert(Identifier.getIdentifierInfo() &&
636          "Can't handle identifiers without identifier info!");
637 
638   IdentifierInfo &II = *Identifier.getIdentifierInfo();
639 
640   // If the information about this identifier is out of date, update it from
641   // the external source.
642   // We have to treat __VA_ARGS__ in a special way, since it gets
643   // serialized with isPoisoned = true, but our preprocessor may have
644   // unpoisoned it if we're defining a C99 macro.
645   if (II.isOutOfDate()) {
646     bool CurrentIsPoisoned = false;
647     if (&II == Ident__VA_ARGS__)
648       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
649 
650     updateOutOfDateIdentifier(II);
651     Identifier.setKind(II.getTokenID());
652 
653     if (&II == Ident__VA_ARGS__)
654       II.setIsPoisoned(CurrentIsPoisoned);
655   }
656 
657   // If this identifier was poisoned, and if it was not produced from a macro
658   // expansion, emit an error.
659   if (II.isPoisoned() && CurPPLexer) {
660     HandlePoisonedIdentifier(Identifier);
661   }
662 
663   // If this is a macro to be expanded, do it.
664   if (MacroDefinition MD = getMacroDefinition(&II)) {
665     auto *MI = MD.getMacroInfo();
666     assert(MI && "macro definition with no macro info?");
667     if (!DisableMacroExpansion) {
668       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
669         // C99 6.10.3p10: If the preprocessing token immediately after the
670         // macro name isn't a '(', this macro should not be expanded.
671         if (!MI->isFunctionLike() || isNextPPTokenLParen())
672           return HandleMacroExpandedIdentifier(Identifier, MD);
673       } else {
674         // C99 6.10.3.4p2 says that a disabled macro may never again be
675         // expanded, even if it's in a context where it could be expanded in the
676         // future.
677         Identifier.setFlag(Token::DisableExpand);
678         if (MI->isObjectLike() || isNextPPTokenLParen())
679           Diag(Identifier, diag::pp_disabled_macro_expansion);
680       }
681     }
682   }
683 
684   // If this identifier is a keyword in a newer Standard or proposed Standard,
685   // produce a warning. Don't warn if we're not considering macro expansion,
686   // since this identifier might be the name of a macro.
687   // FIXME: This warning is disabled in cases where it shouldn't be, like
688   //   "#define constexpr constexpr", "int constexpr;"
689   if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
690     Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
691         << II.getName();
692     // Don't diagnose this keyword again in this translation unit.
693     II.setIsFutureCompatKeyword(false);
694   }
695 
696   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
697   // then we act as if it is the actual operator and not the textual
698   // representation of it.
699   if (II.isCPlusPlusOperatorKeyword())
700     Identifier.setIdentifierInfo(nullptr);
701 
702   // If this is an extension token, diagnose its use.
703   // We avoid diagnosing tokens that originate from macro definitions.
704   // FIXME: This warning is disabled in cases where it shouldn't be,
705   // like "#define TY typeof", "TY(1) x".
706   if (II.isExtensionToken() && !DisableMacroExpansion)
707     Diag(Identifier, diag::ext_token_used);
708 
709   // If this is the 'import' contextual keyword following an '@', note
710   // that the next token indicates a module name.
711   //
712   // Note that we do not treat 'import' as a contextual
713   // keyword when we're in a caching lexer, because caching lexers only get
714   // used in contexts where import declarations are disallowed.
715   if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
716       !DisableMacroExpansion &&
717       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
718       CurLexerKind != CLK_CachingLexer) {
719     ModuleImportLoc = Identifier.getLocation();
720     ModuleImportPath.clear();
721     ModuleImportExpectsIdentifier = true;
722     CurLexerKind = CLK_LexAfterModuleImport;
723   }
724   return true;
725 }
726 
727 void Preprocessor::Lex(Token &Result) {
728   // We loop here until a lex function returns a token; this avoids recursion.
729   bool ReturnedToken;
730   do {
731     switch (CurLexerKind) {
732     case CLK_Lexer:
733       ReturnedToken = CurLexer->Lex(Result);
734       break;
735     case CLK_PTHLexer:
736       ReturnedToken = CurPTHLexer->Lex(Result);
737       break;
738     case CLK_TokenLexer:
739       ReturnedToken = CurTokenLexer->Lex(Result);
740       break;
741     case CLK_CachingLexer:
742       CachingLex(Result);
743       ReturnedToken = true;
744       break;
745     case CLK_LexAfterModuleImport:
746       LexAfterModuleImport(Result);
747       ReturnedToken = true;
748       break;
749     }
750   } while (!ReturnedToken);
751 
752   if (Result.is(tok::code_completion))
753     setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
754 
755   LastTokenWasAt = Result.is(tok::at);
756 }
757 
758 
759 /// \brief Lex a token following the 'import' contextual keyword.
760 ///
761 void Preprocessor::LexAfterModuleImport(Token &Result) {
762   // Figure out what kind of lexer we actually have.
763   recomputeCurLexerKind();
764 
765   // Lex the next token.
766   Lex(Result);
767 
768   // The token sequence
769   //
770   //   import identifier (. identifier)*
771   //
772   // indicates a module import directive. We already saw the 'import'
773   // contextual keyword, so now we're looking for the identifiers.
774   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
775     // We expected to see an identifier here, and we did; continue handling
776     // identifiers.
777     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
778                                               Result.getLocation()));
779     ModuleImportExpectsIdentifier = false;
780     CurLexerKind = CLK_LexAfterModuleImport;
781     return;
782   }
783 
784   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
785   // see the next identifier.
786   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
787     ModuleImportExpectsIdentifier = true;
788     CurLexerKind = CLK_LexAfterModuleImport;
789     return;
790   }
791 
792   // If we have a non-empty module path, load the named module.
793   if (!ModuleImportPath.empty()) {
794     Module *Imported = nullptr;
795     if (getLangOpts().Modules) {
796       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
797                                             ModuleImportPath,
798                                             Module::Hidden,
799                                             /*IsIncludeDirective=*/false);
800       if (Imported)
801         makeModuleVisible(Imported, ModuleImportLoc);
802     }
803     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
804       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
805   }
806 }
807 
808 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
809   CurSubmoduleState->VisibleModules.setVisible(
810       M, Loc, [](Module *) {},
811       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
812         // FIXME: Include the path in the diagnostic.
813         // FIXME: Include the import location for the conflicting module.
814         Diag(ModuleImportLoc, diag::warn_module_conflict)
815             << Path[0]->getFullModuleName()
816             << Conflict->getFullModuleName()
817             << Message;
818       });
819 
820   // Add this module to the imports list of the currently-built submodule.
821   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
822     BuildingSubmoduleStack.back().M->Imports.insert(M);
823 }
824 
825 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
826                                           const char *DiagnosticTag,
827                                           bool AllowMacroExpansion) {
828   // We need at least one string literal.
829   if (Result.isNot(tok::string_literal)) {
830     Diag(Result, diag::err_expected_string_literal)
831       << /*Source='in...'*/0 << DiagnosticTag;
832     return false;
833   }
834 
835   // Lex string literal tokens, optionally with macro expansion.
836   SmallVector<Token, 4> StrToks;
837   do {
838     StrToks.push_back(Result);
839 
840     if (Result.hasUDSuffix())
841       Diag(Result, diag::err_invalid_string_udl);
842 
843     if (AllowMacroExpansion)
844       Lex(Result);
845     else
846       LexUnexpandedToken(Result);
847   } while (Result.is(tok::string_literal));
848 
849   // Concatenate and parse the strings.
850   StringLiteralParser Literal(StrToks, *this);
851   assert(Literal.isAscii() && "Didn't allow wide strings in");
852 
853   if (Literal.hadError)
854     return false;
855 
856   if (Literal.Pascal) {
857     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
858       << /*Source='in...'*/0 << DiagnosticTag;
859     return false;
860   }
861 
862   String = Literal.GetString();
863   return true;
864 }
865 
866 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
867   assert(Tok.is(tok::numeric_constant));
868   SmallString<8> IntegerBuffer;
869   bool NumberInvalid = false;
870   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
871   if (NumberInvalid)
872     return false;
873   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
874   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
875     return false;
876   llvm::APInt APVal(64, 0);
877   if (Literal.GetIntegerValue(APVal))
878     return false;
879   Lex(Tok);
880   Value = APVal.getLimitedValue();
881   return true;
882 }
883 
884 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
885   assert(Handler && "NULL comment handler");
886   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
887          CommentHandlers.end() && "Comment handler already registered");
888   CommentHandlers.push_back(Handler);
889 }
890 
891 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
892   std::vector<CommentHandler *>::iterator Pos
893   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
894   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
895   CommentHandlers.erase(Pos);
896 }
897 
898 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
899   bool AnyPendingTokens = false;
900   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
901        HEnd = CommentHandlers.end();
902        H != HEnd; ++H) {
903     if ((*H)->HandleComment(*this, Comment))
904       AnyPendingTokens = true;
905   }
906   if (!AnyPendingTokens || getCommentRetentionState())
907     return false;
908   Lex(result);
909   return true;
910 }
911 
912 ModuleLoader::~ModuleLoader() { }
913 
914 CommentHandler::~CommentHandler() { }
915 
916 CodeCompletionHandler::~CodeCompletionHandler() { }
917 
918 void Preprocessor::createPreprocessingRecord() {
919   if (Record)
920     return;
921 
922   Record = new PreprocessingRecord(getSourceManager());
923   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
924 }
925