1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 //   -H       - Print the name of each header file used.
16 //   -d[DNI] - Dump various things.
17 //   -fworking-directory - #line's with preprocessor's working dir.
18 //   -fpreprocessed
19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 //   -W*
21 //   -w
22 //
23 // Messages to emit:
24 //   "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/Pragma.h"
42 #include "clang/Lex/PreprocessingRecord.h"
43 #include "clang/Lex/PreprocessorOptions.h"
44 #include "clang/Lex/ScratchBuffer.h"
45 #include "llvm/ADT/APFloat.h"
46 #include "llvm/ADT/STLExtras.h"
47 #include "llvm/ADT/SmallString.h"
48 #include "llvm/ADT/StringExtras.h"
49 #include "llvm/Support/Capacity.h"
50 #include "llvm/Support/ConvertUTF.h"
51 #include "llvm/Support/MemoryBuffer.h"
52 #include "llvm/Support/raw_ostream.h"
53 using namespace clang;
54 
55 //===----------------------------------------------------------------------===//
56 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
57 
58 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
59                            DiagnosticsEngine &diags, LangOptions &opts,
60                            SourceManager &SM, HeaderSearch &Headers,
61                            ModuleLoader &TheModuleLoader,
62                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
63                            TranslationUnitKind TUKind)
64     : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
65       AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
66       ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
67       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
68       Identifiers(opts, IILookup),
69       PragmaHandlers(new PragmaNamespace(StringRef())),
70       IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
71       CodeCompletionFile(nullptr), CodeCompletionOffset(0),
72       LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
73       CodeCompletionReached(0), MainFileDir(nullptr),
74       SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr),
75       CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr),
76       CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
77       Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
78   OwnsHeaderSearch = OwnsHeaders;
79 
80   CounterValue = 0; // __COUNTER__ starts at 0.
81 
82   // Clear stats.
83   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
84   NumIf = NumElse = NumEndif = 0;
85   NumEnteredSourceFiles = 0;
86   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
87   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
88   MaxIncludeStackDepth = 0;
89   NumSkipped = 0;
90 
91   // Default to discarding comments.
92   KeepComments = false;
93   KeepMacroComments = false;
94   SuppressIncludeNotFoundError = false;
95 
96   // Macro expansion is enabled.
97   DisableMacroExpansion = false;
98   MacroExpansionInDirectivesOverride = false;
99   InMacroArgs = false;
100   InMacroArgPreExpansion = false;
101   NumCachedTokenLexers = 0;
102   PragmasEnabled = true;
103   ParsingIfOrElifDirective = false;
104   PreprocessedOutput = false;
105 
106   CachedLexPos = 0;
107 
108   // We haven't read anything from the external source.
109   ReadMacrosFromExternalSource = false;
110 
111   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
112   // This gets unpoisoned where it is allowed.
113   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
114   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
115 
116   // Initialize the pragma handlers.
117   RegisterBuiltinPragmas();
118 
119   // Initialize builtin macros like __LINE__ and friends.
120   RegisterBuiltinMacros();
121 
122   if(LangOpts.Borland) {
123     Ident__exception_info        = getIdentifierInfo("_exception_info");
124     Ident___exception_info       = getIdentifierInfo("__exception_info");
125     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
126     Ident__exception_code        = getIdentifierInfo("_exception_code");
127     Ident___exception_code       = getIdentifierInfo("__exception_code");
128     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
129     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
130     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
131     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
132   } else {
133     Ident__exception_info = Ident__exception_code = nullptr;
134     Ident__abnormal_termination = Ident___exception_info = nullptr;
135     Ident___exception_code = Ident___abnormal_termination = nullptr;
136     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
137     Ident_AbnormalTermination = nullptr;
138   }
139 }
140 
141 Preprocessor::~Preprocessor() {
142   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
143 
144   IncludeMacroStack.clear();
145 
146   // Destroy any macro definitions.
147   while (MacroInfoChain *I = MIChainHead) {
148     MIChainHead = I->Next;
149     I->~MacroInfoChain();
150   }
151 
152   // Free any cached macro expanders.
153   // This populates MacroArgCache, so all TokenLexers need to be destroyed
154   // before the code below that frees up the MacroArgCache list.
155   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
156   CurTokenLexer.reset();
157 
158   while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
159     DeserialMIChainHead = I->Next;
160     I->~DeserializedMacroInfoChain();
161   }
162 
163   // Free any cached MacroArgs.
164   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
165     ArgList = ArgList->deallocate();
166 
167   // Delete the header search info, if we own it.
168   if (OwnsHeaderSearch)
169     delete &HeaderInfo;
170 }
171 
172 void Preprocessor::Initialize(const TargetInfo &Target,
173                               const TargetInfo *AuxTarget) {
174   assert((!this->Target || this->Target == &Target) &&
175          "Invalid override of target information");
176   this->Target = &Target;
177 
178   assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
179          "Invalid override of aux target information.");
180   this->AuxTarget = AuxTarget;
181 
182   // Initialize information about built-ins.
183   BuiltinInfo.InitializeTarget(Target, AuxTarget);
184   HeaderInfo.setTarget(Target);
185 }
186 
187 void Preprocessor::InitializeForModelFile() {
188   NumEnteredSourceFiles = 0;
189 
190   // Reset pragmas
191   PragmaHandlersBackup = std::move(PragmaHandlers);
192   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
193   RegisterBuiltinPragmas();
194 
195   // Reset PredefinesFileID
196   PredefinesFileID = FileID();
197 }
198 
199 void Preprocessor::FinalizeForModelFile() {
200   NumEnteredSourceFiles = 1;
201 
202   PragmaHandlers = std::move(PragmaHandlersBackup);
203 }
204 
205 void Preprocessor::setPTHManager(PTHManager* pm) {
206   PTH.reset(pm);
207   FileMgr.addStatCache(PTH->createStatCache());
208 }
209 
210 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
211   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
212                << getSpelling(Tok) << "'";
213 
214   if (!DumpFlags) return;
215 
216   llvm::errs() << "\t";
217   if (Tok.isAtStartOfLine())
218     llvm::errs() << " [StartOfLine]";
219   if (Tok.hasLeadingSpace())
220     llvm::errs() << " [LeadingSpace]";
221   if (Tok.isExpandDisabled())
222     llvm::errs() << " [ExpandDisabled]";
223   if (Tok.needsCleaning()) {
224     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
225     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
226                  << "']";
227   }
228 
229   llvm::errs() << "\tLoc=<";
230   DumpLocation(Tok.getLocation());
231   llvm::errs() << ">";
232 }
233 
234 void Preprocessor::DumpLocation(SourceLocation Loc) const {
235   Loc.dump(SourceMgr);
236 }
237 
238 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
239   llvm::errs() << "MACRO: ";
240   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
241     DumpToken(MI.getReplacementToken(i));
242     llvm::errs() << "  ";
243   }
244   llvm::errs() << "\n";
245 }
246 
247 void Preprocessor::PrintStats() {
248   llvm::errs() << "\n*** Preprocessor Stats:\n";
249   llvm::errs() << NumDirectives << " directives found:\n";
250   llvm::errs() << "  " << NumDefined << " #define.\n";
251   llvm::errs() << "  " << NumUndefined << " #undef.\n";
252   llvm::errs() << "  #include/#include_next/#import:\n";
253   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
254   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
255   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
256   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
257   llvm::errs() << "  " << NumEndif << " #endif.\n";
258   llvm::errs() << "  " << NumPragma << " #pragma.\n";
259   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
260 
261   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
262              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
263              << NumFastMacroExpanded << " on the fast path.\n";
264   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
265              << " token paste (##) operations performed, "
266              << NumFastTokenPaste << " on the fast path.\n";
267 
268   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
269 
270   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
271   llvm::errs() << "\n  Macro Expanded Tokens: "
272                << llvm::capacity_in_bytes(MacroExpandedTokens);
273   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
274   // FIXME: List information for all submodules.
275   llvm::errs() << "\n  Macros: "
276                << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
277   llvm::errs() << "\n  #pragma push_macro Info: "
278                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
279   llvm::errs() << "\n  Poison Reasons: "
280                << llvm::capacity_in_bytes(PoisonReasons);
281   llvm::errs() << "\n  Comment Handlers: "
282                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
283 }
284 
285 Preprocessor::macro_iterator
286 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
287   if (IncludeExternalMacros && ExternalSource &&
288       !ReadMacrosFromExternalSource) {
289     ReadMacrosFromExternalSource = true;
290     ExternalSource->ReadDefinedMacros();
291   }
292 
293   // Make sure we cover all macros in visible modules.
294   for (const ModuleMacro &Macro : ModuleMacros)
295     CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
296 
297   return CurSubmoduleState->Macros.begin();
298 }
299 
300 size_t Preprocessor::getTotalMemory() const {
301   return BP.getTotalMemory()
302     + llvm::capacity_in_bytes(MacroExpandedTokens)
303     + Predefines.capacity() /* Predefines buffer. */
304     // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
305     // and ModuleMacros.
306     + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
307     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
308     + llvm::capacity_in_bytes(PoisonReasons)
309     + llvm::capacity_in_bytes(CommentHandlers);
310 }
311 
312 Preprocessor::macro_iterator
313 Preprocessor::macro_end(bool IncludeExternalMacros) const {
314   if (IncludeExternalMacros && ExternalSource &&
315       !ReadMacrosFromExternalSource) {
316     ReadMacrosFromExternalSource = true;
317     ExternalSource->ReadDefinedMacros();
318   }
319 
320   return CurSubmoduleState->Macros.end();
321 }
322 
323 /// \brief Compares macro tokens with a specified token value sequence.
324 static bool MacroDefinitionEquals(const MacroInfo *MI,
325                                   ArrayRef<TokenValue> Tokens) {
326   return Tokens.size() == MI->getNumTokens() &&
327       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
328 }
329 
330 StringRef Preprocessor::getLastMacroWithSpelling(
331                                     SourceLocation Loc,
332                                     ArrayRef<TokenValue> Tokens) const {
333   SourceLocation BestLocation;
334   StringRef BestSpelling;
335   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
336        I != E; ++I) {
337     const MacroDirective::DefInfo
338       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
339     if (!Def || !Def.getMacroInfo())
340       continue;
341     if (!Def.getMacroInfo()->isObjectLike())
342       continue;
343     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
344       continue;
345     SourceLocation Location = Def.getLocation();
346     // Choose the macro defined latest.
347     if (BestLocation.isInvalid() ||
348         (Location.isValid() &&
349          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
350       BestLocation = Location;
351       BestSpelling = I->first->getName();
352     }
353   }
354   return BestSpelling;
355 }
356 
357 void Preprocessor::recomputeCurLexerKind() {
358   if (CurLexer)
359     CurLexerKind = CLK_Lexer;
360   else if (CurPTHLexer)
361     CurLexerKind = CLK_PTHLexer;
362   else if (CurTokenLexer)
363     CurLexerKind = CLK_TokenLexer;
364   else
365     CurLexerKind = CLK_CachingLexer;
366 }
367 
368 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
369                                           unsigned CompleteLine,
370                                           unsigned CompleteColumn) {
371   assert(File);
372   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
373   assert(!CodeCompletionFile && "Already set");
374 
375   using llvm::MemoryBuffer;
376 
377   // Load the actual file's contents.
378   bool Invalid = false;
379   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
380   if (Invalid)
381     return true;
382 
383   // Find the byte position of the truncation point.
384   const char *Position = Buffer->getBufferStart();
385   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
386     for (; *Position; ++Position) {
387       if (*Position != '\r' && *Position != '\n')
388         continue;
389 
390       // Eat \r\n or \n\r as a single line.
391       if ((Position[1] == '\r' || Position[1] == '\n') &&
392           Position[0] != Position[1])
393         ++Position;
394       ++Position;
395       break;
396     }
397   }
398 
399   Position += CompleteColumn - 1;
400 
401   // If pointing inside the preamble, adjust the position at the beginning of
402   // the file after the preamble.
403   if (SkipMainFilePreamble.first &&
404       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
405     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
406       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
407   }
408 
409   if (Position > Buffer->getBufferEnd())
410     Position = Buffer->getBufferEnd();
411 
412   CodeCompletionFile = File;
413   CodeCompletionOffset = Position - Buffer->getBufferStart();
414 
415   std::unique_ptr<MemoryBuffer> NewBuffer =
416       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
417                                           Buffer->getBufferIdentifier());
418   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
419   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
420   *NewPos = '\0';
421   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
422   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
423 
424   return false;
425 }
426 
427 void Preprocessor::CodeCompleteNaturalLanguage() {
428   if (CodeComplete)
429     CodeComplete->CodeCompleteNaturalLanguage();
430   setCodeCompletionReached();
431 }
432 
433 /// getSpelling - This method is used to get the spelling of a token into a
434 /// SmallVector. Note that the returned StringRef may not point to the
435 /// supplied buffer if a copy can be avoided.
436 StringRef Preprocessor::getSpelling(const Token &Tok,
437                                           SmallVectorImpl<char> &Buffer,
438                                           bool *Invalid) const {
439   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
440   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
441     // Try the fast path.
442     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
443       return II->getName();
444   }
445 
446   // Resize the buffer if we need to copy into it.
447   if (Tok.needsCleaning())
448     Buffer.resize(Tok.getLength());
449 
450   const char *Ptr = Buffer.data();
451   unsigned Len = getSpelling(Tok, Ptr, Invalid);
452   return StringRef(Ptr, Len);
453 }
454 
455 /// CreateString - Plop the specified string into a scratch buffer and return a
456 /// location for it.  If specified, the source location provides a source
457 /// location for the token.
458 void Preprocessor::CreateString(StringRef Str, Token &Tok,
459                                 SourceLocation ExpansionLocStart,
460                                 SourceLocation ExpansionLocEnd) {
461   Tok.setLength(Str.size());
462 
463   const char *DestPtr;
464   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
465 
466   if (ExpansionLocStart.isValid())
467     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
468                                        ExpansionLocEnd, Str.size());
469   Tok.setLocation(Loc);
470 
471   // If this is a raw identifier or a literal token, set the pointer data.
472   if (Tok.is(tok::raw_identifier))
473     Tok.setRawIdentifierData(DestPtr);
474   else if (Tok.isLiteral())
475     Tok.setLiteralData(DestPtr);
476 }
477 
478 Module *Preprocessor::getCurrentModule() {
479   if (getLangOpts().CurrentModule.empty())
480     return nullptr;
481 
482   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
483 }
484 
485 //===----------------------------------------------------------------------===//
486 // Preprocessor Initialization Methods
487 //===----------------------------------------------------------------------===//
488 
489 
490 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
491 /// which implicitly adds the builtin defines etc.
492 void Preprocessor::EnterMainSourceFile() {
493   // We do not allow the preprocessor to reenter the main file.  Doing so will
494   // cause FileID's to accumulate information from both runs (e.g. #line
495   // information) and predefined macros aren't guaranteed to be set properly.
496   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
497   FileID MainFileID = SourceMgr.getMainFileID();
498 
499   // If MainFileID is loaded it means we loaded an AST file, no need to enter
500   // a main file.
501   if (!SourceMgr.isLoadedFileID(MainFileID)) {
502     // Enter the main file source buffer.
503     EnterSourceFile(MainFileID, nullptr, SourceLocation());
504 
505     // If we've been asked to skip bytes in the main file (e.g., as part of a
506     // precompiled preamble), do so now.
507     if (SkipMainFilePreamble.first > 0)
508       CurLexer->SkipBytes(SkipMainFilePreamble.first,
509                           SkipMainFilePreamble.second);
510 
511     // Tell the header info that the main file was entered.  If the file is later
512     // #imported, it won't be re-entered.
513     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
514       HeaderInfo.IncrementIncludeCount(FE);
515   }
516 
517   // Preprocess Predefines to populate the initial preprocessor state.
518   std::unique_ptr<llvm::MemoryBuffer> SB =
519     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
520   assert(SB && "Cannot create predefined source buffer");
521   FileID FID = SourceMgr.createFileID(std::move(SB));
522   assert(!FID.isInvalid() && "Could not create FileID for predefines?");
523   setPredefinesFileID(FID);
524 
525   // Start parsing the predefines.
526   EnterSourceFile(FID, nullptr, SourceLocation());
527 }
528 
529 void Preprocessor::EndSourceFile() {
530   // Notify the client that we reached the end of the source file.
531   if (Callbacks)
532     Callbacks->EndOfMainFile();
533 }
534 
535 //===----------------------------------------------------------------------===//
536 // Lexer Event Handling.
537 //===----------------------------------------------------------------------===//
538 
539 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
540 /// identifier information for the token and install it into the token,
541 /// updating the token kind accordingly.
542 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
543   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
544 
545   // Look up this token, see if it is a macro, or if it is a language keyword.
546   IdentifierInfo *II;
547   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
548     // No cleaning needed, just use the characters from the lexed buffer.
549     II = getIdentifierInfo(Identifier.getRawIdentifier());
550   } else {
551     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
552     SmallString<64> IdentifierBuffer;
553     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
554 
555     if (Identifier.hasUCN()) {
556       SmallString<64> UCNIdentifierBuffer;
557       expandUCNs(UCNIdentifierBuffer, CleanedStr);
558       II = getIdentifierInfo(UCNIdentifierBuffer);
559     } else {
560       II = getIdentifierInfo(CleanedStr);
561     }
562   }
563 
564   // Update the token info (identifier info and appropriate token kind).
565   Identifier.setIdentifierInfo(II);
566   Identifier.setKind(II->getTokenID());
567 
568   return II;
569 }
570 
571 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
572   PoisonReasons[II] = DiagID;
573 }
574 
575 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
576   assert(Ident__exception_code && Ident__exception_info);
577   assert(Ident___exception_code && Ident___exception_info);
578   Ident__exception_code->setIsPoisoned(Poison);
579   Ident___exception_code->setIsPoisoned(Poison);
580   Ident_GetExceptionCode->setIsPoisoned(Poison);
581   Ident__exception_info->setIsPoisoned(Poison);
582   Ident___exception_info->setIsPoisoned(Poison);
583   Ident_GetExceptionInfo->setIsPoisoned(Poison);
584   Ident__abnormal_termination->setIsPoisoned(Poison);
585   Ident___abnormal_termination->setIsPoisoned(Poison);
586   Ident_AbnormalTermination->setIsPoisoned(Poison);
587 }
588 
589 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
590   assert(Identifier.getIdentifierInfo() &&
591          "Can't handle identifiers without identifier info!");
592   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
593     PoisonReasons.find(Identifier.getIdentifierInfo());
594   if(it == PoisonReasons.end())
595     Diag(Identifier, diag::err_pp_used_poisoned_id);
596   else
597     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
598 }
599 
600 /// \brief Returns a diagnostic message kind for reporting a future keyword as
601 /// appropriate for the identifier and specified language.
602 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
603                                           const LangOptions &LangOpts) {
604   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
605 
606   if (LangOpts.CPlusPlus)
607     return llvm::StringSwitch<diag::kind>(II.getName())
608 #define CXX11_KEYWORD(NAME, FLAGS)                                             \
609         .Case(#NAME, diag::warn_cxx11_keyword)
610 #include "clang/Basic/TokenKinds.def"
611         ;
612 
613   llvm_unreachable(
614       "Keyword not known to come from a newer Standard or proposed Standard");
615 }
616 
617 /// HandleIdentifier - This callback is invoked when the lexer reads an
618 /// identifier.  This callback looks up the identifier in the map and/or
619 /// potentially macro expands it or turns it into a named token (like 'for').
620 ///
621 /// Note that callers of this method are guarded by checking the
622 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
623 /// IdentifierInfo methods that compute these properties will need to change to
624 /// match.
625 bool Preprocessor::HandleIdentifier(Token &Identifier) {
626   assert(Identifier.getIdentifierInfo() &&
627          "Can't handle identifiers without identifier info!");
628 
629   IdentifierInfo &II = *Identifier.getIdentifierInfo();
630 
631   // If the information about this identifier is out of date, update it from
632   // the external source.
633   // We have to treat __VA_ARGS__ in a special way, since it gets
634   // serialized with isPoisoned = true, but our preprocessor may have
635   // unpoisoned it if we're defining a C99 macro.
636   if (II.isOutOfDate()) {
637     bool CurrentIsPoisoned = false;
638     if (&II == Ident__VA_ARGS__)
639       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
640 
641     ExternalSource->updateOutOfDateIdentifier(II);
642     Identifier.setKind(II.getTokenID());
643 
644     if (&II == Ident__VA_ARGS__)
645       II.setIsPoisoned(CurrentIsPoisoned);
646   }
647 
648   // If this identifier was poisoned, and if it was not produced from a macro
649   // expansion, emit an error.
650   if (II.isPoisoned() && CurPPLexer) {
651     HandlePoisonedIdentifier(Identifier);
652   }
653 
654   // If this is a macro to be expanded, do it.
655   if (MacroDefinition MD = getMacroDefinition(&II)) {
656     auto *MI = MD.getMacroInfo();
657     assert(MI && "macro definition with no macro info?");
658     if (!DisableMacroExpansion) {
659       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
660         // C99 6.10.3p10: If the preprocessing token immediately after the
661         // macro name isn't a '(', this macro should not be expanded.
662         if (!MI->isFunctionLike() || isNextPPTokenLParen())
663           return HandleMacroExpandedIdentifier(Identifier, MD);
664       } else {
665         // C99 6.10.3.4p2 says that a disabled macro may never again be
666         // expanded, even if it's in a context where it could be expanded in the
667         // future.
668         Identifier.setFlag(Token::DisableExpand);
669         if (MI->isObjectLike() || isNextPPTokenLParen())
670           Diag(Identifier, diag::pp_disabled_macro_expansion);
671       }
672     }
673   }
674 
675   // If this identifier is a keyword in a newer Standard or proposed Standard,
676   // produce a warning. Don't warn if we're not considering macro expansion,
677   // since this identifier might be the name of a macro.
678   // FIXME: This warning is disabled in cases where it shouldn't be, like
679   //   "#define constexpr constexpr", "int constexpr;"
680   if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
681     Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
682         << II.getName();
683     // Don't diagnose this keyword again in this translation unit.
684     II.setIsFutureCompatKeyword(false);
685   }
686 
687   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
688   // then we act as if it is the actual operator and not the textual
689   // representation of it.
690   if (II.isCPlusPlusOperatorKeyword())
691     Identifier.setIdentifierInfo(nullptr);
692 
693   // If this is an extension token, diagnose its use.
694   // We avoid diagnosing tokens that originate from macro definitions.
695   // FIXME: This warning is disabled in cases where it shouldn't be,
696   // like "#define TY typeof", "TY(1) x".
697   if (II.isExtensionToken() && !DisableMacroExpansion)
698     Diag(Identifier, diag::ext_token_used);
699 
700   // If this is the 'import' contextual keyword following an '@', note
701   // that the next token indicates a module name.
702   //
703   // Note that we do not treat 'import' as a contextual
704   // keyword when we're in a caching lexer, because caching lexers only get
705   // used in contexts where import declarations are disallowed.
706   if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
707       !DisableMacroExpansion &&
708       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
709       CurLexerKind != CLK_CachingLexer) {
710     ModuleImportLoc = Identifier.getLocation();
711     ModuleImportPath.clear();
712     ModuleImportExpectsIdentifier = true;
713     CurLexerKind = CLK_LexAfterModuleImport;
714   }
715   return true;
716 }
717 
718 void Preprocessor::Lex(Token &Result) {
719   // We loop here until a lex function returns a token; this avoids recursion.
720   bool ReturnedToken;
721   do {
722     switch (CurLexerKind) {
723     case CLK_Lexer:
724       ReturnedToken = CurLexer->Lex(Result);
725       break;
726     case CLK_PTHLexer:
727       ReturnedToken = CurPTHLexer->Lex(Result);
728       break;
729     case CLK_TokenLexer:
730       ReturnedToken = CurTokenLexer->Lex(Result);
731       break;
732     case CLK_CachingLexer:
733       CachingLex(Result);
734       ReturnedToken = true;
735       break;
736     case CLK_LexAfterModuleImport:
737       LexAfterModuleImport(Result);
738       ReturnedToken = true;
739       break;
740     }
741   } while (!ReturnedToken);
742 
743   LastTokenWasAt = Result.is(tok::at);
744 }
745 
746 
747 /// \brief Lex a token following the 'import' contextual keyword.
748 ///
749 void Preprocessor::LexAfterModuleImport(Token &Result) {
750   // Figure out what kind of lexer we actually have.
751   recomputeCurLexerKind();
752 
753   // Lex the next token.
754   Lex(Result);
755 
756   // The token sequence
757   //
758   //   import identifier (. identifier)*
759   //
760   // indicates a module import directive. We already saw the 'import'
761   // contextual keyword, so now we're looking for the identifiers.
762   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
763     // We expected to see an identifier here, and we did; continue handling
764     // identifiers.
765     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
766                                               Result.getLocation()));
767     ModuleImportExpectsIdentifier = false;
768     CurLexerKind = CLK_LexAfterModuleImport;
769     return;
770   }
771 
772   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
773   // see the next identifier.
774   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
775     ModuleImportExpectsIdentifier = true;
776     CurLexerKind = CLK_LexAfterModuleImport;
777     return;
778   }
779 
780   // If we have a non-empty module path, load the named module.
781   if (!ModuleImportPath.empty()) {
782     Module *Imported = nullptr;
783     if (getLangOpts().Modules) {
784       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
785                                             ModuleImportPath,
786                                             Module::Hidden,
787                                             /*IsIncludeDirective=*/false);
788       if (Imported)
789         makeModuleVisible(Imported, ModuleImportLoc);
790     }
791     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
792       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
793   }
794 }
795 
796 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
797   CurSubmoduleState->VisibleModules.setVisible(
798       M, Loc, [](Module *) {},
799       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
800         // FIXME: Include the path in the diagnostic.
801         // FIXME: Include the import location for the conflicting module.
802         Diag(ModuleImportLoc, diag::warn_module_conflict)
803             << Path[0]->getFullModuleName()
804             << Conflict->getFullModuleName()
805             << Message;
806       });
807 
808   // Add this module to the imports list of the currently-built submodule.
809   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
810     BuildingSubmoduleStack.back().M->Imports.insert(M);
811 }
812 
813 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
814                                           const char *DiagnosticTag,
815                                           bool AllowMacroExpansion) {
816   // We need at least one string literal.
817   if (Result.isNot(tok::string_literal)) {
818     Diag(Result, diag::err_expected_string_literal)
819       << /*Source='in...'*/0 << DiagnosticTag;
820     return false;
821   }
822 
823   // Lex string literal tokens, optionally with macro expansion.
824   SmallVector<Token, 4> StrToks;
825   do {
826     StrToks.push_back(Result);
827 
828     if (Result.hasUDSuffix())
829       Diag(Result, diag::err_invalid_string_udl);
830 
831     if (AllowMacroExpansion)
832       Lex(Result);
833     else
834       LexUnexpandedToken(Result);
835   } while (Result.is(tok::string_literal));
836 
837   // Concatenate and parse the strings.
838   StringLiteralParser Literal(StrToks, *this);
839   assert(Literal.isAscii() && "Didn't allow wide strings in");
840 
841   if (Literal.hadError)
842     return false;
843 
844   if (Literal.Pascal) {
845     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
846       << /*Source='in...'*/0 << DiagnosticTag;
847     return false;
848   }
849 
850   String = Literal.GetString();
851   return true;
852 }
853 
854 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
855   assert(Tok.is(tok::numeric_constant));
856   SmallString<8> IntegerBuffer;
857   bool NumberInvalid = false;
858   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
859   if (NumberInvalid)
860     return false;
861   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
862   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
863     return false;
864   llvm::APInt APVal(64, 0);
865   if (Literal.GetIntegerValue(APVal))
866     return false;
867   Lex(Tok);
868   Value = APVal.getLimitedValue();
869   return true;
870 }
871 
872 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
873   assert(Handler && "NULL comment handler");
874   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
875          CommentHandlers.end() && "Comment handler already registered");
876   CommentHandlers.push_back(Handler);
877 }
878 
879 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
880   std::vector<CommentHandler *>::iterator Pos
881   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
882   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
883   CommentHandlers.erase(Pos);
884 }
885 
886 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
887   bool AnyPendingTokens = false;
888   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
889        HEnd = CommentHandlers.end();
890        H != HEnd; ++H) {
891     if ((*H)->HandleComment(*this, Comment))
892       AnyPendingTokens = true;
893   }
894   if (!AnyPendingTokens || getCommentRetentionState())
895     return false;
896   Lex(result);
897   return true;
898 }
899 
900 ModuleLoader::~ModuleLoader() { }
901 
902 CommentHandler::~CommentHandler() { }
903 
904 CodeCompletionHandler::~CodeCompletionHandler() { }
905 
906 void Preprocessor::createPreprocessingRecord() {
907   if (Record)
908     return;
909 
910   Record = new PreprocessingRecord(getSourceManager());
911   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
912 }
913