1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 //   -H       - Print the name of each header file used.
16 //   -d[DNI] - Dump various things.
17 //   -fworking-directory - #line's with preprocessor's working dir.
18 //   -fpreprocessed
19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 //   -W*
21 //   -w
22 //
23 // Messages to emit:
24 //   "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/Pragma.h"
42 #include "clang/Lex/PreprocessingRecord.h"
43 #include "clang/Lex/PreprocessorOptions.h"
44 #include "clang/Lex/ScratchBuffer.h"
45 #include "llvm/ADT/APFloat.h"
46 #include "llvm/ADT/STLExtras.h"
47 #include "llvm/ADT/SmallString.h"
48 #include "llvm/ADT/StringExtras.h"
49 #include "llvm/Support/Capacity.h"
50 #include "llvm/Support/ConvertUTF.h"
51 #include "llvm/Support/MemoryBuffer.h"
52 #include "llvm/Support/raw_ostream.h"
53 using namespace clang;
54 
55 //===----------------------------------------------------------------------===//
56 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
57 
58 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
59                            DiagnosticsEngine &diags, LangOptions &opts,
60                            SourceManager &SM, HeaderSearch &Headers,
61                            ModuleLoader &TheModuleLoader,
62                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
63                            TranslationUnitKind TUKind)
64     : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
65       FileMgr(Headers.getFileMgr()), SourceMgr(SM),
66       ScratchBuf(new ScratchBuffer(SourceMgr)),HeaderInfo(Headers),
67       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
68       Identifiers(opts, IILookup),
69       PragmaHandlers(new PragmaNamespace(StringRef())),
70       IncrementalProcessing(false), TUKind(TUKind),
71       CodeComplete(nullptr), CodeCompletionFile(nullptr),
72       CodeCompletionOffset(0), LastTokenWasAt(false),
73       ModuleImportExpectsIdentifier(false), CodeCompletionReached(0),
74       MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
75       CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr),
76       Callbacks(nullptr), CurSubmoduleState(&NullSubmoduleState),
77       MacroArgCache(nullptr), Record(nullptr),
78       MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
79   OwnsHeaderSearch = OwnsHeaders;
80 
81   CounterValue = 0; // __COUNTER__ starts at 0.
82 
83   // Clear stats.
84   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
85   NumIf = NumElse = NumEndif = 0;
86   NumEnteredSourceFiles = 0;
87   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
88   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
89   MaxIncludeStackDepth = 0;
90   NumSkipped = 0;
91 
92   // Default to discarding comments.
93   KeepComments = false;
94   KeepMacroComments = false;
95   SuppressIncludeNotFoundError = false;
96 
97   // Macro expansion is enabled.
98   DisableMacroExpansion = false;
99   MacroExpansionInDirectivesOverride = false;
100   InMacroArgs = false;
101   InMacroArgPreExpansion = false;
102   NumCachedTokenLexers = 0;
103   PragmasEnabled = true;
104   ParsingIfOrElifDirective = false;
105   PreprocessedOutput = false;
106 
107   CachedLexPos = 0;
108 
109   // We haven't read anything from the external source.
110   ReadMacrosFromExternalSource = false;
111 
112   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
113   // This gets unpoisoned where it is allowed.
114   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
115   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
116 
117   // Initialize the pragma handlers.
118   RegisterBuiltinPragmas();
119 
120   // Initialize builtin macros like __LINE__ and friends.
121   RegisterBuiltinMacros();
122 
123   if(LangOpts.Borland) {
124     Ident__exception_info        = getIdentifierInfo("_exception_info");
125     Ident___exception_info       = getIdentifierInfo("__exception_info");
126     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
127     Ident__exception_code        = getIdentifierInfo("_exception_code");
128     Ident___exception_code       = getIdentifierInfo("__exception_code");
129     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
130     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
131     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
132     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
133   } else {
134     Ident__exception_info = Ident__exception_code = nullptr;
135     Ident__abnormal_termination = Ident___exception_info = nullptr;
136     Ident___exception_code = Ident___abnormal_termination = nullptr;
137     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
138     Ident_AbnormalTermination = nullptr;
139   }
140 }
141 
142 Preprocessor::~Preprocessor() {
143   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
144 
145   IncludeMacroStack.clear();
146 
147   // Destroy any macro definitions.
148   while (MacroInfoChain *I = MIChainHead) {
149     MIChainHead = I->Next;
150     I->~MacroInfoChain();
151   }
152 
153   // Free any cached macro expanders.
154   // This populates MacroArgCache, so all TokenLexers need to be destroyed
155   // before the code below that frees up the MacroArgCache list.
156   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
157   CurTokenLexer.reset();
158 
159   while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
160     DeserialMIChainHead = I->Next;
161     I->~DeserializedMacroInfoChain();
162   }
163 
164   // Free any cached MacroArgs.
165   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
166     ArgList = ArgList->deallocate();
167 
168   // Delete the header search info, if we own it.
169   if (OwnsHeaderSearch)
170     delete &HeaderInfo;
171 }
172 
173 void Preprocessor::Initialize(const TargetInfo &Target) {
174   assert((!this->Target || this->Target == &Target) &&
175          "Invalid override of target information");
176   this->Target = &Target;
177 
178   // Initialize information about built-ins.
179   BuiltinInfo.InitializeTarget(Target);
180   HeaderInfo.setTarget(Target);
181 }
182 
183 void Preprocessor::InitializeForModelFile() {
184   NumEnteredSourceFiles = 0;
185 
186   // Reset pragmas
187   PragmaHandlersBackup = std::move(PragmaHandlers);
188   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
189   RegisterBuiltinPragmas();
190 
191   // Reset PredefinesFileID
192   PredefinesFileID = FileID();
193 }
194 
195 void Preprocessor::FinalizeForModelFile() {
196   NumEnteredSourceFiles = 1;
197 
198   PragmaHandlers = std::move(PragmaHandlersBackup);
199 }
200 
201 void Preprocessor::setPTHManager(PTHManager* pm) {
202   PTH.reset(pm);
203   FileMgr.addStatCache(PTH->createStatCache());
204 }
205 
206 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
207   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
208                << getSpelling(Tok) << "'";
209 
210   if (!DumpFlags) return;
211 
212   llvm::errs() << "\t";
213   if (Tok.isAtStartOfLine())
214     llvm::errs() << " [StartOfLine]";
215   if (Tok.hasLeadingSpace())
216     llvm::errs() << " [LeadingSpace]";
217   if (Tok.isExpandDisabled())
218     llvm::errs() << " [ExpandDisabled]";
219   if (Tok.needsCleaning()) {
220     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
221     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
222                  << "']";
223   }
224 
225   llvm::errs() << "\tLoc=<";
226   DumpLocation(Tok.getLocation());
227   llvm::errs() << ">";
228 }
229 
230 void Preprocessor::DumpLocation(SourceLocation Loc) const {
231   Loc.dump(SourceMgr);
232 }
233 
234 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
235   llvm::errs() << "MACRO: ";
236   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
237     DumpToken(MI.getReplacementToken(i));
238     llvm::errs() << "  ";
239   }
240   llvm::errs() << "\n";
241 }
242 
243 void Preprocessor::PrintStats() {
244   llvm::errs() << "\n*** Preprocessor Stats:\n";
245   llvm::errs() << NumDirectives << " directives found:\n";
246   llvm::errs() << "  " << NumDefined << " #define.\n";
247   llvm::errs() << "  " << NumUndefined << " #undef.\n";
248   llvm::errs() << "  #include/#include_next/#import:\n";
249   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
250   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
251   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
252   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
253   llvm::errs() << "  " << NumEndif << " #endif.\n";
254   llvm::errs() << "  " << NumPragma << " #pragma.\n";
255   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
256 
257   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
258              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
259              << NumFastMacroExpanded << " on the fast path.\n";
260   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
261              << " token paste (##) operations performed, "
262              << NumFastTokenPaste << " on the fast path.\n";
263 
264   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
265 
266   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
267   llvm::errs() << "\n  Macro Expanded Tokens: "
268                << llvm::capacity_in_bytes(MacroExpandedTokens);
269   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
270   // FIXME: List information for all submodules.
271   llvm::errs() << "\n  Macros: "
272                << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
273   llvm::errs() << "\n  #pragma push_macro Info: "
274                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
275   llvm::errs() << "\n  Poison Reasons: "
276                << llvm::capacity_in_bytes(PoisonReasons);
277   llvm::errs() << "\n  Comment Handlers: "
278                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
279 }
280 
281 Preprocessor::macro_iterator
282 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
283   if (IncludeExternalMacros && ExternalSource &&
284       !ReadMacrosFromExternalSource) {
285     ReadMacrosFromExternalSource = true;
286     ExternalSource->ReadDefinedMacros();
287   }
288 
289   return CurSubmoduleState->Macros.begin();
290 }
291 
292 size_t Preprocessor::getTotalMemory() const {
293   return BP.getTotalMemory()
294     + llvm::capacity_in_bytes(MacroExpandedTokens)
295     + Predefines.capacity() /* Predefines buffer. */
296     // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
297     // and ModuleMacros.
298     + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
299     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
300     + llvm::capacity_in_bytes(PoisonReasons)
301     + llvm::capacity_in_bytes(CommentHandlers);
302 }
303 
304 Preprocessor::macro_iterator
305 Preprocessor::macro_end(bool IncludeExternalMacros) const {
306   if (IncludeExternalMacros && ExternalSource &&
307       !ReadMacrosFromExternalSource) {
308     ReadMacrosFromExternalSource = true;
309     ExternalSource->ReadDefinedMacros();
310   }
311 
312   return CurSubmoduleState->Macros.end();
313 }
314 
315 /// \brief Compares macro tokens with a specified token value sequence.
316 static bool MacroDefinitionEquals(const MacroInfo *MI,
317                                   ArrayRef<TokenValue> Tokens) {
318   return Tokens.size() == MI->getNumTokens() &&
319       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
320 }
321 
322 StringRef Preprocessor::getLastMacroWithSpelling(
323                                     SourceLocation Loc,
324                                     ArrayRef<TokenValue> Tokens) const {
325   SourceLocation BestLocation;
326   StringRef BestSpelling;
327   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
328        I != E; ++I) {
329     const MacroDirective::DefInfo
330       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
331     if (!Def || !Def.getMacroInfo())
332       continue;
333     if (!Def.getMacroInfo()->isObjectLike())
334       continue;
335     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
336       continue;
337     SourceLocation Location = Def.getLocation();
338     // Choose the macro defined latest.
339     if (BestLocation.isInvalid() ||
340         (Location.isValid() &&
341          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
342       BestLocation = Location;
343       BestSpelling = I->first->getName();
344     }
345   }
346   return BestSpelling;
347 }
348 
349 void Preprocessor::recomputeCurLexerKind() {
350   if (CurLexer)
351     CurLexerKind = CLK_Lexer;
352   else if (CurPTHLexer)
353     CurLexerKind = CLK_PTHLexer;
354   else if (CurTokenLexer)
355     CurLexerKind = CLK_TokenLexer;
356   else
357     CurLexerKind = CLK_CachingLexer;
358 }
359 
360 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
361                                           unsigned CompleteLine,
362                                           unsigned CompleteColumn) {
363   assert(File);
364   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
365   assert(!CodeCompletionFile && "Already set");
366 
367   using llvm::MemoryBuffer;
368 
369   // Load the actual file's contents.
370   bool Invalid = false;
371   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
372   if (Invalid)
373     return true;
374 
375   // Find the byte position of the truncation point.
376   const char *Position = Buffer->getBufferStart();
377   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
378     for (; *Position; ++Position) {
379       if (*Position != '\r' && *Position != '\n')
380         continue;
381 
382       // Eat \r\n or \n\r as a single line.
383       if ((Position[1] == '\r' || Position[1] == '\n') &&
384           Position[0] != Position[1])
385         ++Position;
386       ++Position;
387       break;
388     }
389   }
390 
391   Position += CompleteColumn - 1;
392 
393   // If pointing inside the preamble, adjust the position at the beginning of
394   // the file after the preamble.
395   if (SkipMainFilePreamble.first &&
396       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
397     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
398       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
399   }
400 
401   if (Position > Buffer->getBufferEnd())
402     Position = Buffer->getBufferEnd();
403 
404   CodeCompletionFile = File;
405   CodeCompletionOffset = Position - Buffer->getBufferStart();
406 
407   std::unique_ptr<MemoryBuffer> NewBuffer =
408       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
409                                           Buffer->getBufferIdentifier());
410   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
411   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
412   *NewPos = '\0';
413   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
414   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
415 
416   return false;
417 }
418 
419 void Preprocessor::CodeCompleteNaturalLanguage() {
420   if (CodeComplete)
421     CodeComplete->CodeCompleteNaturalLanguage();
422   setCodeCompletionReached();
423 }
424 
425 /// getSpelling - This method is used to get the spelling of a token into a
426 /// SmallVector. Note that the returned StringRef may not point to the
427 /// supplied buffer if a copy can be avoided.
428 StringRef Preprocessor::getSpelling(const Token &Tok,
429                                           SmallVectorImpl<char> &Buffer,
430                                           bool *Invalid) const {
431   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
432   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
433     // Try the fast path.
434     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
435       return II->getName();
436   }
437 
438   // Resize the buffer if we need to copy into it.
439   if (Tok.needsCleaning())
440     Buffer.resize(Tok.getLength());
441 
442   const char *Ptr = Buffer.data();
443   unsigned Len = getSpelling(Tok, Ptr, Invalid);
444   return StringRef(Ptr, Len);
445 }
446 
447 /// CreateString - Plop the specified string into a scratch buffer and return a
448 /// location for it.  If specified, the source location provides a source
449 /// location for the token.
450 void Preprocessor::CreateString(StringRef Str, Token &Tok,
451                                 SourceLocation ExpansionLocStart,
452                                 SourceLocation ExpansionLocEnd) {
453   Tok.setLength(Str.size());
454 
455   const char *DestPtr;
456   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
457 
458   if (ExpansionLocStart.isValid())
459     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
460                                        ExpansionLocEnd, Str.size());
461   Tok.setLocation(Loc);
462 
463   // If this is a raw identifier or a literal token, set the pointer data.
464   if (Tok.is(tok::raw_identifier))
465     Tok.setRawIdentifierData(DestPtr);
466   else if (Tok.isLiteral())
467     Tok.setLiteralData(DestPtr);
468 }
469 
470 Module *Preprocessor::getCurrentModule() {
471   if (getLangOpts().CurrentModule.empty())
472     return nullptr;
473 
474   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
475 }
476 
477 //===----------------------------------------------------------------------===//
478 // Preprocessor Initialization Methods
479 //===----------------------------------------------------------------------===//
480 
481 
482 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
483 /// which implicitly adds the builtin defines etc.
484 void Preprocessor::EnterMainSourceFile() {
485   // We do not allow the preprocessor to reenter the main file.  Doing so will
486   // cause FileID's to accumulate information from both runs (e.g. #line
487   // information) and predefined macros aren't guaranteed to be set properly.
488   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
489   FileID MainFileID = SourceMgr.getMainFileID();
490 
491   // If MainFileID is loaded it means we loaded an AST file, no need to enter
492   // a main file.
493   if (!SourceMgr.isLoadedFileID(MainFileID)) {
494     // Enter the main file source buffer.
495     EnterSourceFile(MainFileID, nullptr, SourceLocation());
496 
497     // If we've been asked to skip bytes in the main file (e.g., as part of a
498     // precompiled preamble), do so now.
499     if (SkipMainFilePreamble.first > 0)
500       CurLexer->SkipBytes(SkipMainFilePreamble.first,
501                           SkipMainFilePreamble.second);
502 
503     // Tell the header info that the main file was entered.  If the file is later
504     // #imported, it won't be re-entered.
505     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
506       HeaderInfo.IncrementIncludeCount(FE);
507   }
508 
509   // Preprocess Predefines to populate the initial preprocessor state.
510   std::unique_ptr<llvm::MemoryBuffer> SB =
511     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
512   assert(SB && "Cannot create predefined source buffer");
513   FileID FID = SourceMgr.createFileID(std::move(SB));
514   assert(!FID.isInvalid() && "Could not create FileID for predefines?");
515   setPredefinesFileID(FID);
516 
517   // Start parsing the predefines.
518   EnterSourceFile(FID, nullptr, SourceLocation());
519 }
520 
521 void Preprocessor::EndSourceFile() {
522   // Notify the client that we reached the end of the source file.
523   if (Callbacks)
524     Callbacks->EndOfMainFile();
525 }
526 
527 //===----------------------------------------------------------------------===//
528 // Lexer Event Handling.
529 //===----------------------------------------------------------------------===//
530 
531 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
532 /// identifier information for the token and install it into the token,
533 /// updating the token kind accordingly.
534 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
535   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
536 
537   // Look up this token, see if it is a macro, or if it is a language keyword.
538   IdentifierInfo *II;
539   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
540     // No cleaning needed, just use the characters from the lexed buffer.
541     II = getIdentifierInfo(Identifier.getRawIdentifier());
542   } else {
543     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
544     SmallString<64> IdentifierBuffer;
545     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
546 
547     if (Identifier.hasUCN()) {
548       SmallString<64> UCNIdentifierBuffer;
549       expandUCNs(UCNIdentifierBuffer, CleanedStr);
550       II = getIdentifierInfo(UCNIdentifierBuffer);
551     } else {
552       II = getIdentifierInfo(CleanedStr);
553     }
554   }
555 
556   // Update the token info (identifier info and appropriate token kind).
557   Identifier.setIdentifierInfo(II);
558   Identifier.setKind(II->getTokenID());
559 
560   return II;
561 }
562 
563 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
564   PoisonReasons[II] = DiagID;
565 }
566 
567 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
568   assert(Ident__exception_code && Ident__exception_info);
569   assert(Ident___exception_code && Ident___exception_info);
570   Ident__exception_code->setIsPoisoned(Poison);
571   Ident___exception_code->setIsPoisoned(Poison);
572   Ident_GetExceptionCode->setIsPoisoned(Poison);
573   Ident__exception_info->setIsPoisoned(Poison);
574   Ident___exception_info->setIsPoisoned(Poison);
575   Ident_GetExceptionInfo->setIsPoisoned(Poison);
576   Ident__abnormal_termination->setIsPoisoned(Poison);
577   Ident___abnormal_termination->setIsPoisoned(Poison);
578   Ident_AbnormalTermination->setIsPoisoned(Poison);
579 }
580 
581 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
582   assert(Identifier.getIdentifierInfo() &&
583          "Can't handle identifiers without identifier info!");
584   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
585     PoisonReasons.find(Identifier.getIdentifierInfo());
586   if(it == PoisonReasons.end())
587     Diag(Identifier, diag::err_pp_used_poisoned_id);
588   else
589     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
590 }
591 
592 /// \brief Returns a diagnostic message kind for reporting a future keyword as
593 /// appropriate for the identifier and specified language.
594 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
595                                           const LangOptions &LangOpts) {
596   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
597 
598   if (LangOpts.CPlusPlus)
599     return llvm::StringSwitch<diag::kind>(II.getName())
600 #define CXX11_KEYWORD(NAME, FLAGS)                                             \
601         .Case(#NAME, diag::warn_cxx11_keyword)
602 #include "clang/Basic/TokenKinds.def"
603         ;
604 
605   llvm_unreachable(
606       "Keyword not known to come from a newer Standard or proposed Standard");
607 }
608 
609 /// HandleIdentifier - This callback is invoked when the lexer reads an
610 /// identifier.  This callback looks up the identifier in the map and/or
611 /// potentially macro expands it or turns it into a named token (like 'for').
612 ///
613 /// Note that callers of this method are guarded by checking the
614 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
615 /// IdentifierInfo methods that compute these properties will need to change to
616 /// match.
617 bool Preprocessor::HandleIdentifier(Token &Identifier) {
618   assert(Identifier.getIdentifierInfo() &&
619          "Can't handle identifiers without identifier info!");
620 
621   IdentifierInfo &II = *Identifier.getIdentifierInfo();
622 
623   // If the information about this identifier is out of date, update it from
624   // the external source.
625   // We have to treat __VA_ARGS__ in a special way, since it gets
626   // serialized with isPoisoned = true, but our preprocessor may have
627   // unpoisoned it if we're defining a C99 macro.
628   if (II.isOutOfDate()) {
629     bool CurrentIsPoisoned = false;
630     if (&II == Ident__VA_ARGS__)
631       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
632 
633     ExternalSource->updateOutOfDateIdentifier(II);
634     Identifier.setKind(II.getTokenID());
635 
636     if (&II == Ident__VA_ARGS__)
637       II.setIsPoisoned(CurrentIsPoisoned);
638   }
639 
640   // If this identifier was poisoned, and if it was not produced from a macro
641   // expansion, emit an error.
642   if (II.isPoisoned() && CurPPLexer) {
643     HandlePoisonedIdentifier(Identifier);
644   }
645 
646   // If this is a macro to be expanded, do it.
647   if (MacroDefinition MD = getMacroDefinition(&II)) {
648     auto *MI = MD.getMacroInfo();
649     assert(MI && "macro definition with no macro info?");
650     if (!DisableMacroExpansion) {
651       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
652         // C99 6.10.3p10: If the preprocessing token immediately after the
653         // macro name isn't a '(', this macro should not be expanded.
654         if (!MI->isFunctionLike() || isNextPPTokenLParen())
655           return HandleMacroExpandedIdentifier(Identifier, MD);
656       } else {
657         // C99 6.10.3.4p2 says that a disabled macro may never again be
658         // expanded, even if it's in a context where it could be expanded in the
659         // future.
660         Identifier.setFlag(Token::DisableExpand);
661         if (MI->isObjectLike() || isNextPPTokenLParen())
662           Diag(Identifier, diag::pp_disabled_macro_expansion);
663       }
664     }
665   }
666 
667   // If this identifier is a keyword in a newer Standard or proposed Standard,
668   // produce a warning. Don't warn if we're not considering macro expansion,
669   // since this identifier might be the name of a macro.
670   // FIXME: This warning is disabled in cases where it shouldn't be, like
671   //   "#define constexpr constexpr", "int constexpr;"
672   if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
673     Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
674         << II.getName();
675     // Don't diagnose this keyword again in this translation unit.
676     II.setIsFutureCompatKeyword(false);
677   }
678 
679   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
680   // then we act as if it is the actual operator and not the textual
681   // representation of it.
682   if (II.isCPlusPlusOperatorKeyword())
683     Identifier.setIdentifierInfo(nullptr);
684 
685   // If this is an extension token, diagnose its use.
686   // We avoid diagnosing tokens that originate from macro definitions.
687   // FIXME: This warning is disabled in cases where it shouldn't be,
688   // like "#define TY typeof", "TY(1) x".
689   if (II.isExtensionToken() && !DisableMacroExpansion)
690     Diag(Identifier, diag::ext_token_used);
691 
692   // If this is the 'import' contextual keyword following an '@', note
693   // that the next token indicates a module name.
694   //
695   // Note that we do not treat 'import' as a contextual
696   // keyword when we're in a caching lexer, because caching lexers only get
697   // used in contexts where import declarations are disallowed.
698   if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
699       !DisableMacroExpansion &&
700       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
701       CurLexerKind != CLK_CachingLexer) {
702     ModuleImportLoc = Identifier.getLocation();
703     ModuleImportPath.clear();
704     ModuleImportExpectsIdentifier = true;
705     CurLexerKind = CLK_LexAfterModuleImport;
706   }
707   return true;
708 }
709 
710 void Preprocessor::Lex(Token &Result) {
711   // We loop here until a lex function retuns a token; this avoids recursion.
712   bool ReturnedToken;
713   do {
714     switch (CurLexerKind) {
715     case CLK_Lexer:
716       ReturnedToken = CurLexer->Lex(Result);
717       break;
718     case CLK_PTHLexer:
719       ReturnedToken = CurPTHLexer->Lex(Result);
720       break;
721     case CLK_TokenLexer:
722       ReturnedToken = CurTokenLexer->Lex(Result);
723       break;
724     case CLK_CachingLexer:
725       CachingLex(Result);
726       ReturnedToken = true;
727       break;
728     case CLK_LexAfterModuleImport:
729       LexAfterModuleImport(Result);
730       ReturnedToken = true;
731       break;
732     }
733   } while (!ReturnedToken);
734 
735   LastTokenWasAt = Result.is(tok::at);
736 }
737 
738 
739 /// \brief Lex a token following the 'import' contextual keyword.
740 ///
741 void Preprocessor::LexAfterModuleImport(Token &Result) {
742   // Figure out what kind of lexer we actually have.
743   recomputeCurLexerKind();
744 
745   // Lex the next token.
746   Lex(Result);
747 
748   // The token sequence
749   //
750   //   import identifier (. identifier)*
751   //
752   // indicates a module import directive. We already saw the 'import'
753   // contextual keyword, so now we're looking for the identifiers.
754   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
755     // We expected to see an identifier here, and we did; continue handling
756     // identifiers.
757     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
758                                               Result.getLocation()));
759     ModuleImportExpectsIdentifier = false;
760     CurLexerKind = CLK_LexAfterModuleImport;
761     return;
762   }
763 
764   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
765   // see the next identifier.
766   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
767     ModuleImportExpectsIdentifier = true;
768     CurLexerKind = CLK_LexAfterModuleImport;
769     return;
770   }
771 
772   // If we have a non-empty module path, load the named module.
773   if (!ModuleImportPath.empty()) {
774     Module *Imported = nullptr;
775     if (getLangOpts().Modules) {
776       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
777                                             ModuleImportPath,
778                                             Module::Hidden,
779                                             /*IsIncludeDirective=*/false);
780       if (Imported)
781         makeModuleVisible(Imported, ModuleImportLoc);
782     }
783     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
784       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
785   }
786 }
787 
788 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
789   CurSubmoduleState->VisibleModules.setVisible(
790       M, Loc, [](Module *) {},
791       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
792         // FIXME: Include the path in the diagnostic.
793         // FIXME: Include the import location for the conflicting module.
794         Diag(ModuleImportLoc, diag::warn_module_conflict)
795             << Path[0]->getFullModuleName()
796             << Conflict->getFullModuleName()
797             << Message;
798       });
799 
800   // Add this module to the imports list of the currently-built submodule.
801   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
802     BuildingSubmoduleStack.back().M->Imports.insert(M);
803 }
804 
805 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
806                                           const char *DiagnosticTag,
807                                           bool AllowMacroExpansion) {
808   // We need at least one string literal.
809   if (Result.isNot(tok::string_literal)) {
810     Diag(Result, diag::err_expected_string_literal)
811       << /*Source='in...'*/0 << DiagnosticTag;
812     return false;
813   }
814 
815   // Lex string literal tokens, optionally with macro expansion.
816   SmallVector<Token, 4> StrToks;
817   do {
818     StrToks.push_back(Result);
819 
820     if (Result.hasUDSuffix())
821       Diag(Result, diag::err_invalid_string_udl);
822 
823     if (AllowMacroExpansion)
824       Lex(Result);
825     else
826       LexUnexpandedToken(Result);
827   } while (Result.is(tok::string_literal));
828 
829   // Concatenate and parse the strings.
830   StringLiteralParser Literal(StrToks, *this);
831   assert(Literal.isAscii() && "Didn't allow wide strings in");
832 
833   if (Literal.hadError)
834     return false;
835 
836   if (Literal.Pascal) {
837     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
838       << /*Source='in...'*/0 << DiagnosticTag;
839     return false;
840   }
841 
842   String = Literal.GetString();
843   return true;
844 }
845 
846 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
847   assert(Tok.is(tok::numeric_constant));
848   SmallString<8> IntegerBuffer;
849   bool NumberInvalid = false;
850   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
851   if (NumberInvalid)
852     return false;
853   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
854   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
855     return false;
856   llvm::APInt APVal(64, 0);
857   if (Literal.GetIntegerValue(APVal))
858     return false;
859   Lex(Tok);
860   Value = APVal.getLimitedValue();
861   return true;
862 }
863 
864 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
865   assert(Handler && "NULL comment handler");
866   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
867          CommentHandlers.end() && "Comment handler already registered");
868   CommentHandlers.push_back(Handler);
869 }
870 
871 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
872   std::vector<CommentHandler *>::iterator Pos
873   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
874   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
875   CommentHandlers.erase(Pos);
876 }
877 
878 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
879   bool AnyPendingTokens = false;
880   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
881        HEnd = CommentHandlers.end();
882        H != HEnd; ++H) {
883     if ((*H)->HandleComment(*this, Comment))
884       AnyPendingTokens = true;
885   }
886   if (!AnyPendingTokens || getCommentRetentionState())
887     return false;
888   Lex(result);
889   return true;
890 }
891 
892 ModuleLoader::~ModuleLoader() { }
893 
894 CommentHandler::~CommentHandler() { }
895 
896 CodeCompletionHandler::~CodeCompletionHandler() { }
897 
898 void Preprocessor::createPreprocessingRecord() {
899   if (Record)
900     return;
901 
902   Record = new PreprocessingRecord(getSourceManager());
903   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
904 }
905