1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 //   -H       - Print the name of each header file used.
16 //   -d[DNI] - Dump various things.
17 //   -fworking-directory - #line's with preprocessor's working dir.
18 //   -fpreprocessed
19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 //   -W*
21 //   -w
22 //
23 // Messages to emit:
24 //   "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/PTHManager.h"
42 #include "clang/Lex/Pragma.h"
43 #include "clang/Lex/PreprocessingRecord.h"
44 #include "clang/Lex/PreprocessorOptions.h"
45 #include "clang/Lex/ScratchBuffer.h"
46 #include "llvm/ADT/APFloat.h"
47 #include "llvm/ADT/STLExtras.h"
48 #include "llvm/ADT/SmallString.h"
49 #include "llvm/ADT/StringExtras.h"
50 #include "llvm/Support/Capacity.h"
51 #include "llvm/Support/ConvertUTF.h"
52 #include "llvm/Support/MemoryBuffer.h"
53 #include "llvm/Support/raw_ostream.h"
54 using namespace clang;
55 
56 template class llvm::Registry<clang::PragmaHandler>;
57 
58 //===----------------------------------------------------------------------===//
59 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
60 
61 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
62                            DiagnosticsEngine &diags, LangOptions &opts,
63                            SourceManager &SM, HeaderSearch &Headers,
64                            ModuleLoader &TheModuleLoader,
65                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
66                            TranslationUnitKind TUKind)
67     : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
68       AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
69       ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
70       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
71       Identifiers(opts, IILookup),
72       PragmaHandlers(new PragmaNamespace(StringRef())),
73       IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
74       CodeCompletionFile(nullptr), CodeCompletionOffset(0),
75       LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
76       CodeCompletionReached(0), MainFileDir(nullptr),
77       SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr),
78       CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr),
79       CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
80       Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
81   OwnsHeaderSearch = OwnsHeaders;
82 
83   CounterValue = 0; // __COUNTER__ starts at 0.
84 
85   // Clear stats.
86   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
87   NumIf = NumElse = NumEndif = 0;
88   NumEnteredSourceFiles = 0;
89   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
90   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
91   MaxIncludeStackDepth = 0;
92   NumSkipped = 0;
93 
94   // Default to discarding comments.
95   KeepComments = false;
96   KeepMacroComments = false;
97   SuppressIncludeNotFoundError = false;
98 
99   // Macro expansion is enabled.
100   DisableMacroExpansion = false;
101   MacroExpansionInDirectivesOverride = false;
102   InMacroArgs = false;
103   InMacroArgPreExpansion = false;
104   NumCachedTokenLexers = 0;
105   PragmasEnabled = true;
106   ParsingIfOrElifDirective = false;
107   PreprocessedOutput = false;
108 
109   CachedLexPos = 0;
110 
111   // We haven't read anything from the external source.
112   ReadMacrosFromExternalSource = false;
113 
114   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
115   // This gets unpoisoned where it is allowed.
116   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
117   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
118 
119   // Initialize the pragma handlers.
120   RegisterBuiltinPragmas();
121 
122   // Initialize builtin macros like __LINE__ and friends.
123   RegisterBuiltinMacros();
124 
125   if(LangOpts.Borland) {
126     Ident__exception_info        = getIdentifierInfo("_exception_info");
127     Ident___exception_info       = getIdentifierInfo("__exception_info");
128     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
129     Ident__exception_code        = getIdentifierInfo("_exception_code");
130     Ident___exception_code       = getIdentifierInfo("__exception_code");
131     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
132     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
133     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
134     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
135   } else {
136     Ident__exception_info = Ident__exception_code = nullptr;
137     Ident__abnormal_termination = Ident___exception_info = nullptr;
138     Ident___exception_code = Ident___abnormal_termination = nullptr;
139     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
140     Ident_AbnormalTermination = nullptr;
141   }
142 }
143 
144 Preprocessor::~Preprocessor() {
145   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
146 
147   IncludeMacroStack.clear();
148 
149   // Destroy any macro definitions.
150   while (MacroInfoChain *I = MIChainHead) {
151     MIChainHead = I->Next;
152     I->~MacroInfoChain();
153   }
154 
155   // Free any cached macro expanders.
156   // This populates MacroArgCache, so all TokenLexers need to be destroyed
157   // before the code below that frees up the MacroArgCache list.
158   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
159   CurTokenLexer.reset();
160 
161   while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
162     DeserialMIChainHead = I->Next;
163     I->~DeserializedMacroInfoChain();
164   }
165 
166   // Free any cached MacroArgs.
167   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
168     ArgList = ArgList->deallocate();
169 
170   // Delete the header search info, if we own it.
171   if (OwnsHeaderSearch)
172     delete &HeaderInfo;
173 }
174 
175 void Preprocessor::Initialize(const TargetInfo &Target,
176                               const TargetInfo *AuxTarget) {
177   assert((!this->Target || this->Target == &Target) &&
178          "Invalid override of target information");
179   this->Target = &Target;
180 
181   assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
182          "Invalid override of aux target information.");
183   this->AuxTarget = AuxTarget;
184 
185   // Initialize information about built-ins.
186   BuiltinInfo.InitializeTarget(Target, AuxTarget);
187   HeaderInfo.setTarget(Target);
188 }
189 
190 void Preprocessor::InitializeForModelFile() {
191   NumEnteredSourceFiles = 0;
192 
193   // Reset pragmas
194   PragmaHandlersBackup = std::move(PragmaHandlers);
195   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
196   RegisterBuiltinPragmas();
197 
198   // Reset PredefinesFileID
199   PredefinesFileID = FileID();
200 }
201 
202 void Preprocessor::FinalizeForModelFile() {
203   NumEnteredSourceFiles = 1;
204 
205   PragmaHandlers = std::move(PragmaHandlersBackup);
206 }
207 
208 void Preprocessor::setPTHManager(PTHManager* pm) {
209   PTH.reset(pm);
210   FileMgr.addStatCache(PTH->createStatCache());
211 }
212 
213 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
214   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
215                << getSpelling(Tok) << "'";
216 
217   if (!DumpFlags) return;
218 
219   llvm::errs() << "\t";
220   if (Tok.isAtStartOfLine())
221     llvm::errs() << " [StartOfLine]";
222   if (Tok.hasLeadingSpace())
223     llvm::errs() << " [LeadingSpace]";
224   if (Tok.isExpandDisabled())
225     llvm::errs() << " [ExpandDisabled]";
226   if (Tok.needsCleaning()) {
227     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
228     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
229                  << "']";
230   }
231 
232   llvm::errs() << "\tLoc=<";
233   DumpLocation(Tok.getLocation());
234   llvm::errs() << ">";
235 }
236 
237 void Preprocessor::DumpLocation(SourceLocation Loc) const {
238   Loc.dump(SourceMgr);
239 }
240 
241 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
242   llvm::errs() << "MACRO: ";
243   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
244     DumpToken(MI.getReplacementToken(i));
245     llvm::errs() << "  ";
246   }
247   llvm::errs() << "\n";
248 }
249 
250 void Preprocessor::PrintStats() {
251   llvm::errs() << "\n*** Preprocessor Stats:\n";
252   llvm::errs() << NumDirectives << " directives found:\n";
253   llvm::errs() << "  " << NumDefined << " #define.\n";
254   llvm::errs() << "  " << NumUndefined << " #undef.\n";
255   llvm::errs() << "  #include/#include_next/#import:\n";
256   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
257   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
258   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
259   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
260   llvm::errs() << "  " << NumEndif << " #endif.\n";
261   llvm::errs() << "  " << NumPragma << " #pragma.\n";
262   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
263 
264   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
265              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
266              << NumFastMacroExpanded << " on the fast path.\n";
267   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
268              << " token paste (##) operations performed, "
269              << NumFastTokenPaste << " on the fast path.\n";
270 
271   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
272 
273   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
274   llvm::errs() << "\n  Macro Expanded Tokens: "
275                << llvm::capacity_in_bytes(MacroExpandedTokens);
276   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
277   // FIXME: List information for all submodules.
278   llvm::errs() << "\n  Macros: "
279                << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
280   llvm::errs() << "\n  #pragma push_macro Info: "
281                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
282   llvm::errs() << "\n  Poison Reasons: "
283                << llvm::capacity_in_bytes(PoisonReasons);
284   llvm::errs() << "\n  Comment Handlers: "
285                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
286 }
287 
288 Preprocessor::macro_iterator
289 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
290   if (IncludeExternalMacros && ExternalSource &&
291       !ReadMacrosFromExternalSource) {
292     ReadMacrosFromExternalSource = true;
293     ExternalSource->ReadDefinedMacros();
294   }
295 
296   // Make sure we cover all macros in visible modules.
297   for (const ModuleMacro &Macro : ModuleMacros)
298     CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
299 
300   return CurSubmoduleState->Macros.begin();
301 }
302 
303 size_t Preprocessor::getTotalMemory() const {
304   return BP.getTotalMemory()
305     + llvm::capacity_in_bytes(MacroExpandedTokens)
306     + Predefines.capacity() /* Predefines buffer. */
307     // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
308     // and ModuleMacros.
309     + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
310     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
311     + llvm::capacity_in_bytes(PoisonReasons)
312     + llvm::capacity_in_bytes(CommentHandlers);
313 }
314 
315 Preprocessor::macro_iterator
316 Preprocessor::macro_end(bool IncludeExternalMacros) const {
317   if (IncludeExternalMacros && ExternalSource &&
318       !ReadMacrosFromExternalSource) {
319     ReadMacrosFromExternalSource = true;
320     ExternalSource->ReadDefinedMacros();
321   }
322 
323   return CurSubmoduleState->Macros.end();
324 }
325 
326 /// \brief Compares macro tokens with a specified token value sequence.
327 static bool MacroDefinitionEquals(const MacroInfo *MI,
328                                   ArrayRef<TokenValue> Tokens) {
329   return Tokens.size() == MI->getNumTokens() &&
330       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
331 }
332 
333 StringRef Preprocessor::getLastMacroWithSpelling(
334                                     SourceLocation Loc,
335                                     ArrayRef<TokenValue> Tokens) const {
336   SourceLocation BestLocation;
337   StringRef BestSpelling;
338   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
339        I != E; ++I) {
340     const MacroDirective::DefInfo
341       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
342     if (!Def || !Def.getMacroInfo())
343       continue;
344     if (!Def.getMacroInfo()->isObjectLike())
345       continue;
346     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
347       continue;
348     SourceLocation Location = Def.getLocation();
349     // Choose the macro defined latest.
350     if (BestLocation.isInvalid() ||
351         (Location.isValid() &&
352          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
353       BestLocation = Location;
354       BestSpelling = I->first->getName();
355     }
356   }
357   return BestSpelling;
358 }
359 
360 void Preprocessor::recomputeCurLexerKind() {
361   if (CurLexer)
362     CurLexerKind = CLK_Lexer;
363   else if (CurPTHLexer)
364     CurLexerKind = CLK_PTHLexer;
365   else if (CurTokenLexer)
366     CurLexerKind = CLK_TokenLexer;
367   else
368     CurLexerKind = CLK_CachingLexer;
369 }
370 
371 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
372                                           unsigned CompleteLine,
373                                           unsigned CompleteColumn) {
374   assert(File);
375   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
376   assert(!CodeCompletionFile && "Already set");
377 
378   using llvm::MemoryBuffer;
379 
380   // Load the actual file's contents.
381   bool Invalid = false;
382   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
383   if (Invalid)
384     return true;
385 
386   // Find the byte position of the truncation point.
387   const char *Position = Buffer->getBufferStart();
388   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
389     for (; *Position; ++Position) {
390       if (*Position != '\r' && *Position != '\n')
391         continue;
392 
393       // Eat \r\n or \n\r as a single line.
394       if ((Position[1] == '\r' || Position[1] == '\n') &&
395           Position[0] != Position[1])
396         ++Position;
397       ++Position;
398       break;
399     }
400   }
401 
402   Position += CompleteColumn - 1;
403 
404   // If pointing inside the preamble, adjust the position at the beginning of
405   // the file after the preamble.
406   if (SkipMainFilePreamble.first &&
407       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
408     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
409       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
410   }
411 
412   if (Position > Buffer->getBufferEnd())
413     Position = Buffer->getBufferEnd();
414 
415   CodeCompletionFile = File;
416   CodeCompletionOffset = Position - Buffer->getBufferStart();
417 
418   std::unique_ptr<MemoryBuffer> NewBuffer =
419       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
420                                           Buffer->getBufferIdentifier());
421   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
422   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
423   *NewPos = '\0';
424   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
425   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
426 
427   return false;
428 }
429 
430 void Preprocessor::CodeCompleteNaturalLanguage() {
431   if (CodeComplete)
432     CodeComplete->CodeCompleteNaturalLanguage();
433   setCodeCompletionReached();
434 }
435 
436 /// getSpelling - This method is used to get the spelling of a token into a
437 /// SmallVector. Note that the returned StringRef may not point to the
438 /// supplied buffer if a copy can be avoided.
439 StringRef Preprocessor::getSpelling(const Token &Tok,
440                                           SmallVectorImpl<char> &Buffer,
441                                           bool *Invalid) const {
442   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
443   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
444     // Try the fast path.
445     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
446       return II->getName();
447   }
448 
449   // Resize the buffer if we need to copy into it.
450   if (Tok.needsCleaning())
451     Buffer.resize(Tok.getLength());
452 
453   const char *Ptr = Buffer.data();
454   unsigned Len = getSpelling(Tok, Ptr, Invalid);
455   return StringRef(Ptr, Len);
456 }
457 
458 /// CreateString - Plop the specified string into a scratch buffer and return a
459 /// location for it.  If specified, the source location provides a source
460 /// location for the token.
461 void Preprocessor::CreateString(StringRef Str, Token &Tok,
462                                 SourceLocation ExpansionLocStart,
463                                 SourceLocation ExpansionLocEnd) {
464   Tok.setLength(Str.size());
465 
466   const char *DestPtr;
467   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
468 
469   if (ExpansionLocStart.isValid())
470     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
471                                        ExpansionLocEnd, Str.size());
472   Tok.setLocation(Loc);
473 
474   // If this is a raw identifier or a literal token, set the pointer data.
475   if (Tok.is(tok::raw_identifier))
476     Tok.setRawIdentifierData(DestPtr);
477   else if (Tok.isLiteral())
478     Tok.setLiteralData(DestPtr);
479 }
480 
481 Module *Preprocessor::getCurrentModule() {
482   if (!getLangOpts().CompilingModule)
483     return nullptr;
484 
485   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
486 }
487 
488 //===----------------------------------------------------------------------===//
489 // Preprocessor Initialization Methods
490 //===----------------------------------------------------------------------===//
491 
492 
493 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
494 /// which implicitly adds the builtin defines etc.
495 void Preprocessor::EnterMainSourceFile() {
496   // We do not allow the preprocessor to reenter the main file.  Doing so will
497   // cause FileID's to accumulate information from both runs (e.g. #line
498   // information) and predefined macros aren't guaranteed to be set properly.
499   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
500   FileID MainFileID = SourceMgr.getMainFileID();
501 
502   // If MainFileID is loaded it means we loaded an AST file, no need to enter
503   // a main file.
504   if (!SourceMgr.isLoadedFileID(MainFileID)) {
505     // Enter the main file source buffer.
506     EnterSourceFile(MainFileID, nullptr, SourceLocation());
507 
508     // If we've been asked to skip bytes in the main file (e.g., as part of a
509     // precompiled preamble), do so now.
510     if (SkipMainFilePreamble.first > 0)
511       CurLexer->SkipBytes(SkipMainFilePreamble.first,
512                           SkipMainFilePreamble.second);
513 
514     // Tell the header info that the main file was entered.  If the file is later
515     // #imported, it won't be re-entered.
516     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
517       HeaderInfo.IncrementIncludeCount(FE);
518   }
519 
520   // Preprocess Predefines to populate the initial preprocessor state.
521   std::unique_ptr<llvm::MemoryBuffer> SB =
522     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
523   assert(SB && "Cannot create predefined source buffer");
524   FileID FID = SourceMgr.createFileID(std::move(SB));
525   assert(FID.isValid() && "Could not create FileID for predefines?");
526   setPredefinesFileID(FID);
527 
528   // Start parsing the predefines.
529   EnterSourceFile(FID, nullptr, SourceLocation());
530 }
531 
532 void Preprocessor::EndSourceFile() {
533   // Notify the client that we reached the end of the source file.
534   if (Callbacks)
535     Callbacks->EndOfMainFile();
536 }
537 
538 //===----------------------------------------------------------------------===//
539 // Lexer Event Handling.
540 //===----------------------------------------------------------------------===//
541 
542 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
543 /// identifier information for the token and install it into the token,
544 /// updating the token kind accordingly.
545 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
546   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
547 
548   // Look up this token, see if it is a macro, or if it is a language keyword.
549   IdentifierInfo *II;
550   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
551     // No cleaning needed, just use the characters from the lexed buffer.
552     II = getIdentifierInfo(Identifier.getRawIdentifier());
553   } else {
554     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
555     SmallString<64> IdentifierBuffer;
556     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
557 
558     if (Identifier.hasUCN()) {
559       SmallString<64> UCNIdentifierBuffer;
560       expandUCNs(UCNIdentifierBuffer, CleanedStr);
561       II = getIdentifierInfo(UCNIdentifierBuffer);
562     } else {
563       II = getIdentifierInfo(CleanedStr);
564     }
565   }
566 
567   // Update the token info (identifier info and appropriate token kind).
568   Identifier.setIdentifierInfo(II);
569   Identifier.setKind(II->getTokenID());
570 
571   return II;
572 }
573 
574 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
575   PoisonReasons[II] = DiagID;
576 }
577 
578 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
579   assert(Ident__exception_code && Ident__exception_info);
580   assert(Ident___exception_code && Ident___exception_info);
581   Ident__exception_code->setIsPoisoned(Poison);
582   Ident___exception_code->setIsPoisoned(Poison);
583   Ident_GetExceptionCode->setIsPoisoned(Poison);
584   Ident__exception_info->setIsPoisoned(Poison);
585   Ident___exception_info->setIsPoisoned(Poison);
586   Ident_GetExceptionInfo->setIsPoisoned(Poison);
587   Ident__abnormal_termination->setIsPoisoned(Poison);
588   Ident___abnormal_termination->setIsPoisoned(Poison);
589   Ident_AbnormalTermination->setIsPoisoned(Poison);
590 }
591 
592 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
593   assert(Identifier.getIdentifierInfo() &&
594          "Can't handle identifiers without identifier info!");
595   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
596     PoisonReasons.find(Identifier.getIdentifierInfo());
597   if(it == PoisonReasons.end())
598     Diag(Identifier, diag::err_pp_used_poisoned_id);
599   else
600     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
601 }
602 
603 /// \brief Returns a diagnostic message kind for reporting a future keyword as
604 /// appropriate for the identifier and specified language.
605 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
606                                           const LangOptions &LangOpts) {
607   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
608 
609   if (LangOpts.CPlusPlus)
610     return llvm::StringSwitch<diag::kind>(II.getName())
611 #define CXX11_KEYWORD(NAME, FLAGS)                                             \
612         .Case(#NAME, diag::warn_cxx11_keyword)
613 #include "clang/Basic/TokenKinds.def"
614         ;
615 
616   llvm_unreachable(
617       "Keyword not known to come from a newer Standard or proposed Standard");
618 }
619 
620 /// HandleIdentifier - This callback is invoked when the lexer reads an
621 /// identifier.  This callback looks up the identifier in the map and/or
622 /// potentially macro expands it or turns it into a named token (like 'for').
623 ///
624 /// Note that callers of this method are guarded by checking the
625 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
626 /// IdentifierInfo methods that compute these properties will need to change to
627 /// match.
628 bool Preprocessor::HandleIdentifier(Token &Identifier) {
629   assert(Identifier.getIdentifierInfo() &&
630          "Can't handle identifiers without identifier info!");
631 
632   IdentifierInfo &II = *Identifier.getIdentifierInfo();
633 
634   // If the information about this identifier is out of date, update it from
635   // the external source.
636   // We have to treat __VA_ARGS__ in a special way, since it gets
637   // serialized with isPoisoned = true, but our preprocessor may have
638   // unpoisoned it if we're defining a C99 macro.
639   if (II.isOutOfDate()) {
640     bool CurrentIsPoisoned = false;
641     if (&II == Ident__VA_ARGS__)
642       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
643 
644     ExternalSource->updateOutOfDateIdentifier(II);
645     Identifier.setKind(II.getTokenID());
646 
647     if (&II == Ident__VA_ARGS__)
648       II.setIsPoisoned(CurrentIsPoisoned);
649   }
650 
651   // If this identifier was poisoned, and if it was not produced from a macro
652   // expansion, emit an error.
653   if (II.isPoisoned() && CurPPLexer) {
654     HandlePoisonedIdentifier(Identifier);
655   }
656 
657   // If this is a macro to be expanded, do it.
658   if (MacroDefinition MD = getMacroDefinition(&II)) {
659     auto *MI = MD.getMacroInfo();
660     assert(MI && "macro definition with no macro info?");
661     if (!DisableMacroExpansion) {
662       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
663         // C99 6.10.3p10: If the preprocessing token immediately after the
664         // macro name isn't a '(', this macro should not be expanded.
665         if (!MI->isFunctionLike() || isNextPPTokenLParen())
666           return HandleMacroExpandedIdentifier(Identifier, MD);
667       } else {
668         // C99 6.10.3.4p2 says that a disabled macro may never again be
669         // expanded, even if it's in a context where it could be expanded in the
670         // future.
671         Identifier.setFlag(Token::DisableExpand);
672         if (MI->isObjectLike() || isNextPPTokenLParen())
673           Diag(Identifier, diag::pp_disabled_macro_expansion);
674       }
675     }
676   }
677 
678   // If this identifier is a keyword in a newer Standard or proposed Standard,
679   // produce a warning. Don't warn if we're not considering macro expansion,
680   // since this identifier might be the name of a macro.
681   // FIXME: This warning is disabled in cases where it shouldn't be, like
682   //   "#define constexpr constexpr", "int constexpr;"
683   if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
684     Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
685         << II.getName();
686     // Don't diagnose this keyword again in this translation unit.
687     II.setIsFutureCompatKeyword(false);
688   }
689 
690   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
691   // then we act as if it is the actual operator and not the textual
692   // representation of it.
693   if (II.isCPlusPlusOperatorKeyword())
694     Identifier.setIdentifierInfo(nullptr);
695 
696   // If this is an extension token, diagnose its use.
697   // We avoid diagnosing tokens that originate from macro definitions.
698   // FIXME: This warning is disabled in cases where it shouldn't be,
699   // like "#define TY typeof", "TY(1) x".
700   if (II.isExtensionToken() && !DisableMacroExpansion)
701     Diag(Identifier, diag::ext_token_used);
702 
703   // If this is the 'import' contextual keyword following an '@', note
704   // that the next token indicates a module name.
705   //
706   // Note that we do not treat 'import' as a contextual
707   // keyword when we're in a caching lexer, because caching lexers only get
708   // used in contexts where import declarations are disallowed.
709   if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
710       !DisableMacroExpansion &&
711       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
712       CurLexerKind != CLK_CachingLexer) {
713     ModuleImportLoc = Identifier.getLocation();
714     ModuleImportPath.clear();
715     ModuleImportExpectsIdentifier = true;
716     CurLexerKind = CLK_LexAfterModuleImport;
717   }
718   return true;
719 }
720 
721 void Preprocessor::Lex(Token &Result) {
722   // We loop here until a lex function returns a token; this avoids recursion.
723   bool ReturnedToken;
724   do {
725     switch (CurLexerKind) {
726     case CLK_Lexer:
727       ReturnedToken = CurLexer->Lex(Result);
728       break;
729     case CLK_PTHLexer:
730       ReturnedToken = CurPTHLexer->Lex(Result);
731       break;
732     case CLK_TokenLexer:
733       ReturnedToken = CurTokenLexer->Lex(Result);
734       break;
735     case CLK_CachingLexer:
736       CachingLex(Result);
737       ReturnedToken = true;
738       break;
739     case CLK_LexAfterModuleImport:
740       LexAfterModuleImport(Result);
741       ReturnedToken = true;
742       break;
743     }
744   } while (!ReturnedToken);
745 
746   LastTokenWasAt = Result.is(tok::at);
747 }
748 
749 
750 /// \brief Lex a token following the 'import' contextual keyword.
751 ///
752 void Preprocessor::LexAfterModuleImport(Token &Result) {
753   // Figure out what kind of lexer we actually have.
754   recomputeCurLexerKind();
755 
756   // Lex the next token.
757   Lex(Result);
758 
759   // The token sequence
760   //
761   //   import identifier (. identifier)*
762   //
763   // indicates a module import directive. We already saw the 'import'
764   // contextual keyword, so now we're looking for the identifiers.
765   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
766     // We expected to see an identifier here, and we did; continue handling
767     // identifiers.
768     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
769                                               Result.getLocation()));
770     ModuleImportExpectsIdentifier = false;
771     CurLexerKind = CLK_LexAfterModuleImport;
772     return;
773   }
774 
775   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
776   // see the next identifier.
777   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
778     ModuleImportExpectsIdentifier = true;
779     CurLexerKind = CLK_LexAfterModuleImport;
780     return;
781   }
782 
783   // If we have a non-empty module path, load the named module.
784   if (!ModuleImportPath.empty()) {
785     Module *Imported = nullptr;
786     if (getLangOpts().Modules) {
787       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
788                                             ModuleImportPath,
789                                             Module::Hidden,
790                                             /*IsIncludeDirective=*/false);
791       if (Imported)
792         makeModuleVisible(Imported, ModuleImportLoc);
793     }
794     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
795       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
796   }
797 }
798 
799 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
800   CurSubmoduleState->VisibleModules.setVisible(
801       M, Loc, [](Module *) {},
802       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
803         // FIXME: Include the path in the diagnostic.
804         // FIXME: Include the import location for the conflicting module.
805         Diag(ModuleImportLoc, diag::warn_module_conflict)
806             << Path[0]->getFullModuleName()
807             << Conflict->getFullModuleName()
808             << Message;
809       });
810 
811   // Add this module to the imports list of the currently-built submodule.
812   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
813     BuildingSubmoduleStack.back().M->Imports.insert(M);
814 }
815 
816 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
817                                           const char *DiagnosticTag,
818                                           bool AllowMacroExpansion) {
819   // We need at least one string literal.
820   if (Result.isNot(tok::string_literal)) {
821     Diag(Result, diag::err_expected_string_literal)
822       << /*Source='in...'*/0 << DiagnosticTag;
823     return false;
824   }
825 
826   // Lex string literal tokens, optionally with macro expansion.
827   SmallVector<Token, 4> StrToks;
828   do {
829     StrToks.push_back(Result);
830 
831     if (Result.hasUDSuffix())
832       Diag(Result, diag::err_invalid_string_udl);
833 
834     if (AllowMacroExpansion)
835       Lex(Result);
836     else
837       LexUnexpandedToken(Result);
838   } while (Result.is(tok::string_literal));
839 
840   // Concatenate and parse the strings.
841   StringLiteralParser Literal(StrToks, *this);
842   assert(Literal.isAscii() && "Didn't allow wide strings in");
843 
844   if (Literal.hadError)
845     return false;
846 
847   if (Literal.Pascal) {
848     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
849       << /*Source='in...'*/0 << DiagnosticTag;
850     return false;
851   }
852 
853   String = Literal.GetString();
854   return true;
855 }
856 
857 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
858   assert(Tok.is(tok::numeric_constant));
859   SmallString<8> IntegerBuffer;
860   bool NumberInvalid = false;
861   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
862   if (NumberInvalid)
863     return false;
864   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
865   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
866     return false;
867   llvm::APInt APVal(64, 0);
868   if (Literal.GetIntegerValue(APVal))
869     return false;
870   Lex(Tok);
871   Value = APVal.getLimitedValue();
872   return true;
873 }
874 
875 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
876   assert(Handler && "NULL comment handler");
877   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
878          CommentHandlers.end() && "Comment handler already registered");
879   CommentHandlers.push_back(Handler);
880 }
881 
882 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
883   std::vector<CommentHandler *>::iterator Pos
884   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
885   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
886   CommentHandlers.erase(Pos);
887 }
888 
889 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
890   bool AnyPendingTokens = false;
891   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
892        HEnd = CommentHandlers.end();
893        H != HEnd; ++H) {
894     if ((*H)->HandleComment(*this, Comment))
895       AnyPendingTokens = true;
896   }
897   if (!AnyPendingTokens || getCommentRetentionState())
898     return false;
899   Lex(result);
900   return true;
901 }
902 
903 ModuleLoader::~ModuleLoader() { }
904 
905 CommentHandler::~CommentHandler() { }
906 
907 CodeCompletionHandler::~CodeCompletionHandler() { }
908 
909 void Preprocessor::createPreprocessingRecord() {
910   if (Record)
911     return;
912 
913   Record = new PreprocessingRecord(getSourceManager());
914   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
915 }
916