1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 //   -H       - Print the name of each header file used.
16 //   -d[DNI] - Dump various things.
17 //   -fworking-directory - #line's with preprocessor's working dir.
18 //   -fpreprocessed
19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 //   -W*
21 //   -w
22 //
23 // Messages to emit:
24 //   "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/PTHManager.h"
42 #include "clang/Lex/Pragma.h"
43 #include "clang/Lex/PreprocessingRecord.h"
44 #include "clang/Lex/PreprocessorOptions.h"
45 #include "clang/Lex/ScratchBuffer.h"
46 #include "llvm/ADT/STLExtras.h"
47 #include "llvm/ADT/SmallString.h"
48 #include "llvm/ADT/StringExtras.h"
49 #include "llvm/ADT/StringSwitch.h"
50 #include "llvm/Support/Capacity.h"
51 #include "llvm/Support/ConvertUTF.h"
52 #include "llvm/Support/MemoryBuffer.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <utility>
55 using namespace clang;
56 
57 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
58 
59 //===----------------------------------------------------------------------===//
60 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
61 
62 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
63                            DiagnosticsEngine &diags, LangOptions &opts,
64                            SourceManager &SM, HeaderSearch &Headers,
65                            ModuleLoader &TheModuleLoader,
66                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
67                            TranslationUnitKind TUKind)
68     : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr),
69       AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
70       ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
71       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
72       Identifiers(opts, IILookup),
73       PragmaHandlers(new PragmaNamespace(StringRef())),
74       IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
75       CodeCompletionFile(nullptr), CodeCompletionOffset(0),
76       LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
77       CodeCompletionReached(0), CodeCompletionII(0), MainFileDir(nullptr),
78       SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr),
79       CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr),
80       CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
81       Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
82   OwnsHeaderSearch = OwnsHeaders;
83 
84   CounterValue = 0; // __COUNTER__ starts at 0.
85 
86   // Clear stats.
87   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
88   NumIf = NumElse = NumEndif = 0;
89   NumEnteredSourceFiles = 0;
90   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
91   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
92   MaxIncludeStackDepth = 0;
93   NumSkipped = 0;
94 
95   // Default to discarding comments.
96   KeepComments = false;
97   KeepMacroComments = false;
98   SuppressIncludeNotFoundError = false;
99 
100   // Macro expansion is enabled.
101   DisableMacroExpansion = false;
102   MacroExpansionInDirectivesOverride = false;
103   InMacroArgs = false;
104   InMacroArgPreExpansion = false;
105   NumCachedTokenLexers = 0;
106   PragmasEnabled = true;
107   ParsingIfOrElifDirective = false;
108   PreprocessedOutput = false;
109 
110   CachedLexPos = 0;
111 
112   // We haven't read anything from the external source.
113   ReadMacrosFromExternalSource = false;
114 
115   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
116   // This gets unpoisoned where it is allowed.
117   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
118   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
119 
120   // Initialize the pragma handlers.
121   RegisterBuiltinPragmas();
122 
123   // Initialize builtin macros like __LINE__ and friends.
124   RegisterBuiltinMacros();
125 
126   if(LangOpts.Borland) {
127     Ident__exception_info        = getIdentifierInfo("_exception_info");
128     Ident___exception_info       = getIdentifierInfo("__exception_info");
129     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
130     Ident__exception_code        = getIdentifierInfo("_exception_code");
131     Ident___exception_code       = getIdentifierInfo("__exception_code");
132     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
133     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
134     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
135     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
136   } else {
137     Ident__exception_info = Ident__exception_code = nullptr;
138     Ident__abnormal_termination = Ident___exception_info = nullptr;
139     Ident___exception_code = Ident___abnormal_termination = nullptr;
140     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
141     Ident_AbnormalTermination = nullptr;
142   }
143 }
144 
145 Preprocessor::~Preprocessor() {
146   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
147 
148   IncludeMacroStack.clear();
149 
150   // Destroy any macro definitions.
151   while (MacroInfoChain *I = MIChainHead) {
152     MIChainHead = I->Next;
153     I->~MacroInfoChain();
154   }
155 
156   // Free any cached macro expanders.
157   // This populates MacroArgCache, so all TokenLexers need to be destroyed
158   // before the code below that frees up the MacroArgCache list.
159   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
160   CurTokenLexer.reset();
161 
162   while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
163     DeserialMIChainHead = I->Next;
164     I->~DeserializedMacroInfoChain();
165   }
166 
167   // Free any cached MacroArgs.
168   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
169     ArgList = ArgList->deallocate();
170 
171   // Delete the header search info, if we own it.
172   if (OwnsHeaderSearch)
173     delete &HeaderInfo;
174 }
175 
176 void Preprocessor::Initialize(const TargetInfo &Target,
177                               const TargetInfo *AuxTarget) {
178   assert((!this->Target || this->Target == &Target) &&
179          "Invalid override of target information");
180   this->Target = &Target;
181 
182   assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
183          "Invalid override of aux target information.");
184   this->AuxTarget = AuxTarget;
185 
186   // Initialize information about built-ins.
187   BuiltinInfo.InitializeTarget(Target, AuxTarget);
188   HeaderInfo.setTarget(Target);
189 }
190 
191 void Preprocessor::InitializeForModelFile() {
192   NumEnteredSourceFiles = 0;
193 
194   // Reset pragmas
195   PragmaHandlersBackup = std::move(PragmaHandlers);
196   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
197   RegisterBuiltinPragmas();
198 
199   // Reset PredefinesFileID
200   PredefinesFileID = FileID();
201 }
202 
203 void Preprocessor::FinalizeForModelFile() {
204   NumEnteredSourceFiles = 1;
205 
206   PragmaHandlers = std::move(PragmaHandlersBackup);
207 }
208 
209 void Preprocessor::setPTHManager(PTHManager* pm) {
210   PTH.reset(pm);
211   FileMgr.addStatCache(PTH->createStatCache());
212 }
213 
214 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
215   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
216                << getSpelling(Tok) << "'";
217 
218   if (!DumpFlags) return;
219 
220   llvm::errs() << "\t";
221   if (Tok.isAtStartOfLine())
222     llvm::errs() << " [StartOfLine]";
223   if (Tok.hasLeadingSpace())
224     llvm::errs() << " [LeadingSpace]";
225   if (Tok.isExpandDisabled())
226     llvm::errs() << " [ExpandDisabled]";
227   if (Tok.needsCleaning()) {
228     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
229     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
230                  << "']";
231   }
232 
233   llvm::errs() << "\tLoc=<";
234   DumpLocation(Tok.getLocation());
235   llvm::errs() << ">";
236 }
237 
238 void Preprocessor::DumpLocation(SourceLocation Loc) const {
239   Loc.dump(SourceMgr);
240 }
241 
242 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
243   llvm::errs() << "MACRO: ";
244   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
245     DumpToken(MI.getReplacementToken(i));
246     llvm::errs() << "  ";
247   }
248   llvm::errs() << "\n";
249 }
250 
251 void Preprocessor::PrintStats() {
252   llvm::errs() << "\n*** Preprocessor Stats:\n";
253   llvm::errs() << NumDirectives << " directives found:\n";
254   llvm::errs() << "  " << NumDefined << " #define.\n";
255   llvm::errs() << "  " << NumUndefined << " #undef.\n";
256   llvm::errs() << "  #include/#include_next/#import:\n";
257   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
258   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
259   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
260   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
261   llvm::errs() << "  " << NumEndif << " #endif.\n";
262   llvm::errs() << "  " << NumPragma << " #pragma.\n";
263   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
264 
265   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
266              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
267              << NumFastMacroExpanded << " on the fast path.\n";
268   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
269              << " token paste (##) operations performed, "
270              << NumFastTokenPaste << " on the fast path.\n";
271 
272   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
273 
274   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
275   llvm::errs() << "\n  Macro Expanded Tokens: "
276                << llvm::capacity_in_bytes(MacroExpandedTokens);
277   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
278   // FIXME: List information for all submodules.
279   llvm::errs() << "\n  Macros: "
280                << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
281   llvm::errs() << "\n  #pragma push_macro Info: "
282                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
283   llvm::errs() << "\n  Poison Reasons: "
284                << llvm::capacity_in_bytes(PoisonReasons);
285   llvm::errs() << "\n  Comment Handlers: "
286                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
287 }
288 
289 Preprocessor::macro_iterator
290 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
291   if (IncludeExternalMacros && ExternalSource &&
292       !ReadMacrosFromExternalSource) {
293     ReadMacrosFromExternalSource = true;
294     ExternalSource->ReadDefinedMacros();
295   }
296 
297   // Make sure we cover all macros in visible modules.
298   for (const ModuleMacro &Macro : ModuleMacros)
299     CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
300 
301   return CurSubmoduleState->Macros.begin();
302 }
303 
304 size_t Preprocessor::getTotalMemory() const {
305   return BP.getTotalMemory()
306     + llvm::capacity_in_bytes(MacroExpandedTokens)
307     + Predefines.capacity() /* Predefines buffer. */
308     // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
309     // and ModuleMacros.
310     + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
311     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
312     + llvm::capacity_in_bytes(PoisonReasons)
313     + llvm::capacity_in_bytes(CommentHandlers);
314 }
315 
316 Preprocessor::macro_iterator
317 Preprocessor::macro_end(bool IncludeExternalMacros) const {
318   if (IncludeExternalMacros && ExternalSource &&
319       !ReadMacrosFromExternalSource) {
320     ReadMacrosFromExternalSource = true;
321     ExternalSource->ReadDefinedMacros();
322   }
323 
324   return CurSubmoduleState->Macros.end();
325 }
326 
327 /// \brief Compares macro tokens with a specified token value sequence.
328 static bool MacroDefinitionEquals(const MacroInfo *MI,
329                                   ArrayRef<TokenValue> Tokens) {
330   return Tokens.size() == MI->getNumTokens() &&
331       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
332 }
333 
334 StringRef Preprocessor::getLastMacroWithSpelling(
335                                     SourceLocation Loc,
336                                     ArrayRef<TokenValue> Tokens) const {
337   SourceLocation BestLocation;
338   StringRef BestSpelling;
339   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
340        I != E; ++I) {
341     const MacroDirective::DefInfo
342       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
343     if (!Def || !Def.getMacroInfo())
344       continue;
345     if (!Def.getMacroInfo()->isObjectLike())
346       continue;
347     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
348       continue;
349     SourceLocation Location = Def.getLocation();
350     // Choose the macro defined latest.
351     if (BestLocation.isInvalid() ||
352         (Location.isValid() &&
353          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
354       BestLocation = Location;
355       BestSpelling = I->first->getName();
356     }
357   }
358   return BestSpelling;
359 }
360 
361 void Preprocessor::recomputeCurLexerKind() {
362   if (CurLexer)
363     CurLexerKind = CLK_Lexer;
364   else if (CurPTHLexer)
365     CurLexerKind = CLK_PTHLexer;
366   else if (CurTokenLexer)
367     CurLexerKind = CLK_TokenLexer;
368   else
369     CurLexerKind = CLK_CachingLexer;
370 }
371 
372 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
373                                           unsigned CompleteLine,
374                                           unsigned CompleteColumn) {
375   assert(File);
376   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
377   assert(!CodeCompletionFile && "Already set");
378 
379   using llvm::MemoryBuffer;
380 
381   // Load the actual file's contents.
382   bool Invalid = false;
383   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
384   if (Invalid)
385     return true;
386 
387   // Find the byte position of the truncation point.
388   const char *Position = Buffer->getBufferStart();
389   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
390     for (; *Position; ++Position) {
391       if (*Position != '\r' && *Position != '\n')
392         continue;
393 
394       // Eat \r\n or \n\r as a single line.
395       if ((Position[1] == '\r' || Position[1] == '\n') &&
396           Position[0] != Position[1])
397         ++Position;
398       ++Position;
399       break;
400     }
401   }
402 
403   Position += CompleteColumn - 1;
404 
405   // If pointing inside the preamble, adjust the position at the beginning of
406   // the file after the preamble.
407   if (SkipMainFilePreamble.first &&
408       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
409     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
410       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
411   }
412 
413   if (Position > Buffer->getBufferEnd())
414     Position = Buffer->getBufferEnd();
415 
416   CodeCompletionFile = File;
417   CodeCompletionOffset = Position - Buffer->getBufferStart();
418 
419   std::unique_ptr<MemoryBuffer> NewBuffer =
420       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
421                                           Buffer->getBufferIdentifier());
422   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
423   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
424   *NewPos = '\0';
425   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
426   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
427 
428   return false;
429 }
430 
431 void Preprocessor::CodeCompleteNaturalLanguage() {
432   if (CodeComplete)
433     CodeComplete->CodeCompleteNaturalLanguage();
434   setCodeCompletionReached();
435 }
436 
437 /// getSpelling - This method is used to get the spelling of a token into a
438 /// SmallVector. Note that the returned StringRef may not point to the
439 /// supplied buffer if a copy can be avoided.
440 StringRef Preprocessor::getSpelling(const Token &Tok,
441                                           SmallVectorImpl<char> &Buffer,
442                                           bool *Invalid) const {
443   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
444   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
445     // Try the fast path.
446     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
447       return II->getName();
448   }
449 
450   // Resize the buffer if we need to copy into it.
451   if (Tok.needsCleaning())
452     Buffer.resize(Tok.getLength());
453 
454   const char *Ptr = Buffer.data();
455   unsigned Len = getSpelling(Tok, Ptr, Invalid);
456   return StringRef(Ptr, Len);
457 }
458 
459 /// CreateString - Plop the specified string into a scratch buffer and return a
460 /// location for it.  If specified, the source location provides a source
461 /// location for the token.
462 void Preprocessor::CreateString(StringRef Str, Token &Tok,
463                                 SourceLocation ExpansionLocStart,
464                                 SourceLocation ExpansionLocEnd) {
465   Tok.setLength(Str.size());
466 
467   const char *DestPtr;
468   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
469 
470   if (ExpansionLocStart.isValid())
471     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
472                                        ExpansionLocEnd, Str.size());
473   Tok.setLocation(Loc);
474 
475   // If this is a raw identifier or a literal token, set the pointer data.
476   if (Tok.is(tok::raw_identifier))
477     Tok.setRawIdentifierData(DestPtr);
478   else if (Tok.isLiteral())
479     Tok.setLiteralData(DestPtr);
480 }
481 
482 Module *Preprocessor::getCurrentModule() {
483   if (!getLangOpts().CompilingModule)
484     return nullptr;
485 
486   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
487 }
488 
489 //===----------------------------------------------------------------------===//
490 // Preprocessor Initialization Methods
491 //===----------------------------------------------------------------------===//
492 
493 
494 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
495 /// which implicitly adds the builtin defines etc.
496 void Preprocessor::EnterMainSourceFile() {
497   // We do not allow the preprocessor to reenter the main file.  Doing so will
498   // cause FileID's to accumulate information from both runs (e.g. #line
499   // information) and predefined macros aren't guaranteed to be set properly.
500   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
501   FileID MainFileID = SourceMgr.getMainFileID();
502 
503   // If MainFileID is loaded it means we loaded an AST file, no need to enter
504   // a main file.
505   if (!SourceMgr.isLoadedFileID(MainFileID)) {
506     // Enter the main file source buffer.
507     EnterSourceFile(MainFileID, nullptr, SourceLocation());
508 
509     // If we've been asked to skip bytes in the main file (e.g., as part of a
510     // precompiled preamble), do so now.
511     if (SkipMainFilePreamble.first > 0)
512       CurLexer->SkipBytes(SkipMainFilePreamble.first,
513                           SkipMainFilePreamble.second);
514 
515     // Tell the header info that the main file was entered.  If the file is later
516     // #imported, it won't be re-entered.
517     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
518       HeaderInfo.IncrementIncludeCount(FE);
519   }
520 
521   // Preprocess Predefines to populate the initial preprocessor state.
522   std::unique_ptr<llvm::MemoryBuffer> SB =
523     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
524   assert(SB && "Cannot create predefined source buffer");
525   FileID FID = SourceMgr.createFileID(std::move(SB));
526   assert(FID.isValid() && "Could not create FileID for predefines?");
527   setPredefinesFileID(FID);
528 
529   // Start parsing the predefines.
530   EnterSourceFile(FID, nullptr, SourceLocation());
531 }
532 
533 void Preprocessor::EndSourceFile() {
534   // Notify the client that we reached the end of the source file.
535   if (Callbacks)
536     Callbacks->EndOfMainFile();
537 }
538 
539 //===----------------------------------------------------------------------===//
540 // Lexer Event Handling.
541 //===----------------------------------------------------------------------===//
542 
543 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
544 /// identifier information for the token and install it into the token,
545 /// updating the token kind accordingly.
546 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
547   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
548 
549   // Look up this token, see if it is a macro, or if it is a language keyword.
550   IdentifierInfo *II;
551   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
552     // No cleaning needed, just use the characters from the lexed buffer.
553     II = getIdentifierInfo(Identifier.getRawIdentifier());
554   } else {
555     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
556     SmallString<64> IdentifierBuffer;
557     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
558 
559     if (Identifier.hasUCN()) {
560       SmallString<64> UCNIdentifierBuffer;
561       expandUCNs(UCNIdentifierBuffer, CleanedStr);
562       II = getIdentifierInfo(UCNIdentifierBuffer);
563     } else {
564       II = getIdentifierInfo(CleanedStr);
565     }
566   }
567 
568   // Update the token info (identifier info and appropriate token kind).
569   Identifier.setIdentifierInfo(II);
570   Identifier.setKind(II->getTokenID());
571 
572   return II;
573 }
574 
575 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
576   PoisonReasons[II] = DiagID;
577 }
578 
579 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
580   assert(Ident__exception_code && Ident__exception_info);
581   assert(Ident___exception_code && Ident___exception_info);
582   Ident__exception_code->setIsPoisoned(Poison);
583   Ident___exception_code->setIsPoisoned(Poison);
584   Ident_GetExceptionCode->setIsPoisoned(Poison);
585   Ident__exception_info->setIsPoisoned(Poison);
586   Ident___exception_info->setIsPoisoned(Poison);
587   Ident_GetExceptionInfo->setIsPoisoned(Poison);
588   Ident__abnormal_termination->setIsPoisoned(Poison);
589   Ident___abnormal_termination->setIsPoisoned(Poison);
590   Ident_AbnormalTermination->setIsPoisoned(Poison);
591 }
592 
593 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
594   assert(Identifier.getIdentifierInfo() &&
595          "Can't handle identifiers without identifier info!");
596   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
597     PoisonReasons.find(Identifier.getIdentifierInfo());
598   if(it == PoisonReasons.end())
599     Diag(Identifier, diag::err_pp_used_poisoned_id);
600   else
601     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
602 }
603 
604 /// \brief Returns a diagnostic message kind for reporting a future keyword as
605 /// appropriate for the identifier and specified language.
606 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
607                                           const LangOptions &LangOpts) {
608   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
609 
610   if (LangOpts.CPlusPlus)
611     return llvm::StringSwitch<diag::kind>(II.getName())
612 #define CXX11_KEYWORD(NAME, FLAGS)                                             \
613         .Case(#NAME, diag::warn_cxx11_keyword)
614 #include "clang/Basic/TokenKinds.def"
615         ;
616 
617   llvm_unreachable(
618       "Keyword not known to come from a newer Standard or proposed Standard");
619 }
620 
621 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
622   assert(II.isOutOfDate() && "not out of date");
623   getExternalSource()->updateOutOfDateIdentifier(II);
624 }
625 
626 /// HandleIdentifier - This callback is invoked when the lexer reads an
627 /// identifier.  This callback looks up the identifier in the map and/or
628 /// potentially macro expands it or turns it into a named token (like 'for').
629 ///
630 /// Note that callers of this method are guarded by checking the
631 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
632 /// IdentifierInfo methods that compute these properties will need to change to
633 /// match.
634 bool Preprocessor::HandleIdentifier(Token &Identifier) {
635   assert(Identifier.getIdentifierInfo() &&
636          "Can't handle identifiers without identifier info!");
637 
638   IdentifierInfo &II = *Identifier.getIdentifierInfo();
639 
640   // If the information about this identifier is out of date, update it from
641   // the external source.
642   // We have to treat __VA_ARGS__ in a special way, since it gets
643   // serialized with isPoisoned = true, but our preprocessor may have
644   // unpoisoned it if we're defining a C99 macro.
645   if (II.isOutOfDate()) {
646     bool CurrentIsPoisoned = false;
647     if (&II == Ident__VA_ARGS__)
648       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
649 
650     updateOutOfDateIdentifier(II);
651     Identifier.setKind(II.getTokenID());
652 
653     if (&II == Ident__VA_ARGS__)
654       II.setIsPoisoned(CurrentIsPoisoned);
655   }
656 
657   // If this identifier was poisoned, and if it was not produced from a macro
658   // expansion, emit an error.
659   if (II.isPoisoned() && CurPPLexer) {
660     HandlePoisonedIdentifier(Identifier);
661   }
662 
663   // If this is a macro to be expanded, do it.
664   if (MacroDefinition MD = getMacroDefinition(&II)) {
665     auto *MI = MD.getMacroInfo();
666     assert(MI && "macro definition with no macro info?");
667     if (!DisableMacroExpansion) {
668       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
669         // C99 6.10.3p10: If the preprocessing token immediately after the
670         // macro name isn't a '(', this macro should not be expanded.
671         if (!MI->isFunctionLike() || isNextPPTokenLParen())
672           return HandleMacroExpandedIdentifier(Identifier, MD);
673       } else {
674         // C99 6.10.3.4p2 says that a disabled macro may never again be
675         // expanded, even if it's in a context where it could be expanded in the
676         // future.
677         Identifier.setFlag(Token::DisableExpand);
678         if (MI->isObjectLike() || isNextPPTokenLParen())
679           Diag(Identifier, diag::pp_disabled_macro_expansion);
680       }
681     }
682   }
683 
684   // If this identifier is a keyword in a newer Standard or proposed Standard,
685   // produce a warning. Don't warn if we're not considering macro expansion,
686   // since this identifier might be the name of a macro.
687   // FIXME: This warning is disabled in cases where it shouldn't be, like
688   //   "#define constexpr constexpr", "int constexpr;"
689   if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
690     Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
691         << II.getName();
692     // Don't diagnose this keyword again in this translation unit.
693     II.setIsFutureCompatKeyword(false);
694   }
695 
696   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
697   // then we act as if it is the actual operator and not the textual
698   // representation of it.
699   if (II.isCPlusPlusOperatorKeyword())
700     Identifier.setIdentifierInfo(nullptr);
701 
702   // If this is an extension token, diagnose its use.
703   // We avoid diagnosing tokens that originate from macro definitions.
704   // FIXME: This warning is disabled in cases where it shouldn't be,
705   // like "#define TY typeof", "TY(1) x".
706   if (II.isExtensionToken() && !DisableMacroExpansion)
707     Diag(Identifier, diag::ext_token_used);
708 
709   // If this is the 'import' contextual keyword following an '@', note
710   // that the next token indicates a module name.
711   //
712   // Note that we do not treat 'import' as a contextual
713   // keyword when we're in a caching lexer, because caching lexers only get
714   // used in contexts where import declarations are disallowed.
715   //
716   // Likewise if this is the C++ Modules TS import keyword.
717   if (((LastTokenWasAt && II.isModulesImport()) ||
718        Identifier.is(tok::kw_import)) &&
719       !InMacroArgs && !DisableMacroExpansion &&
720       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
721       CurLexerKind != CLK_CachingLexer) {
722     ModuleImportLoc = Identifier.getLocation();
723     ModuleImportPath.clear();
724     ModuleImportExpectsIdentifier = true;
725     CurLexerKind = CLK_LexAfterModuleImport;
726   }
727   return true;
728 }
729 
730 void Preprocessor::Lex(Token &Result) {
731   // We loop here until a lex function returns a token; this avoids recursion.
732   bool ReturnedToken;
733   do {
734     switch (CurLexerKind) {
735     case CLK_Lexer:
736       ReturnedToken = CurLexer->Lex(Result);
737       break;
738     case CLK_PTHLexer:
739       ReturnedToken = CurPTHLexer->Lex(Result);
740       break;
741     case CLK_TokenLexer:
742       ReturnedToken = CurTokenLexer->Lex(Result);
743       break;
744     case CLK_CachingLexer:
745       CachingLex(Result);
746       ReturnedToken = true;
747       break;
748     case CLK_LexAfterModuleImport:
749       LexAfterModuleImport(Result);
750       ReturnedToken = true;
751       break;
752     }
753   } while (!ReturnedToken);
754 
755   if (Result.is(tok::code_completion))
756     setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
757 
758   LastTokenWasAt = Result.is(tok::at);
759 }
760 
761 
762 /// \brief Lex a token following the 'import' contextual keyword.
763 ///
764 void Preprocessor::LexAfterModuleImport(Token &Result) {
765   // Figure out what kind of lexer we actually have.
766   recomputeCurLexerKind();
767 
768   // Lex the next token.
769   Lex(Result);
770 
771   // The token sequence
772   //
773   //   import identifier (. identifier)*
774   //
775   // indicates a module import directive. We already saw the 'import'
776   // contextual keyword, so now we're looking for the identifiers.
777   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
778     // We expected to see an identifier here, and we did; continue handling
779     // identifiers.
780     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
781                                               Result.getLocation()));
782     ModuleImportExpectsIdentifier = false;
783     CurLexerKind = CLK_LexAfterModuleImport;
784     return;
785   }
786 
787   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
788   // see the next identifier. (We can also see a '[[' that begins an
789   // attribute-specifier-seq here under the C++ Modules TS.)
790   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
791     ModuleImportExpectsIdentifier = true;
792     CurLexerKind = CLK_LexAfterModuleImport;
793     return;
794   }
795 
796   // If we have a non-empty module path, load the named module.
797   if (!ModuleImportPath.empty()) {
798     Module *Imported = nullptr;
799     if (getLangOpts().Modules) {
800       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
801                                             ModuleImportPath,
802                                             Module::Hidden,
803                                             /*IsIncludeDirective=*/false);
804       if (Imported)
805         makeModuleVisible(Imported, ModuleImportLoc);
806     }
807     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
808       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
809   }
810 }
811 
812 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
813   CurSubmoduleState->VisibleModules.setVisible(
814       M, Loc, [](Module *) {},
815       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
816         // FIXME: Include the path in the diagnostic.
817         // FIXME: Include the import location for the conflicting module.
818         Diag(ModuleImportLoc, diag::warn_module_conflict)
819             << Path[0]->getFullModuleName()
820             << Conflict->getFullModuleName()
821             << Message;
822       });
823 
824   // Add this module to the imports list of the currently-built submodule.
825   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
826     BuildingSubmoduleStack.back().M->Imports.insert(M);
827 }
828 
829 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
830                                           const char *DiagnosticTag,
831                                           bool AllowMacroExpansion) {
832   // We need at least one string literal.
833   if (Result.isNot(tok::string_literal)) {
834     Diag(Result, diag::err_expected_string_literal)
835       << /*Source='in...'*/0 << DiagnosticTag;
836     return false;
837   }
838 
839   // Lex string literal tokens, optionally with macro expansion.
840   SmallVector<Token, 4> StrToks;
841   do {
842     StrToks.push_back(Result);
843 
844     if (Result.hasUDSuffix())
845       Diag(Result, diag::err_invalid_string_udl);
846 
847     if (AllowMacroExpansion)
848       Lex(Result);
849     else
850       LexUnexpandedToken(Result);
851   } while (Result.is(tok::string_literal));
852 
853   // Concatenate and parse the strings.
854   StringLiteralParser Literal(StrToks, *this);
855   assert(Literal.isAscii() && "Didn't allow wide strings in");
856 
857   if (Literal.hadError)
858     return false;
859 
860   if (Literal.Pascal) {
861     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
862       << /*Source='in...'*/0 << DiagnosticTag;
863     return false;
864   }
865 
866   String = Literal.GetString();
867   return true;
868 }
869 
870 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
871   assert(Tok.is(tok::numeric_constant));
872   SmallString<8> IntegerBuffer;
873   bool NumberInvalid = false;
874   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
875   if (NumberInvalid)
876     return false;
877   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
878   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
879     return false;
880   llvm::APInt APVal(64, 0);
881   if (Literal.GetIntegerValue(APVal))
882     return false;
883   Lex(Tok);
884   Value = APVal.getLimitedValue();
885   return true;
886 }
887 
888 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
889   assert(Handler && "NULL comment handler");
890   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
891          CommentHandlers.end() && "Comment handler already registered");
892   CommentHandlers.push_back(Handler);
893 }
894 
895 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
896   std::vector<CommentHandler *>::iterator Pos
897   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
898   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
899   CommentHandlers.erase(Pos);
900 }
901 
902 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
903   bool AnyPendingTokens = false;
904   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
905        HEnd = CommentHandlers.end();
906        H != HEnd; ++H) {
907     if ((*H)->HandleComment(*this, Comment))
908       AnyPendingTokens = true;
909   }
910   if (!AnyPendingTokens || getCommentRetentionState())
911     return false;
912   Lex(result);
913   return true;
914 }
915 
916 ModuleLoader::~ModuleLoader() { }
917 
918 CommentHandler::~CommentHandler() { }
919 
920 CodeCompletionHandler::~CodeCompletionHandler() { }
921 
922 void Preprocessor::createPreprocessingRecord() {
923   if (Record)
924     return;
925 
926   Record = new PreprocessingRecord(getSourceManager());
927   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
928 }
929