1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 //   -H       - Print the name of each header file used.
16 //   -d[DNI] - Dump various things.
17 //   -fworking-directory - #line's with preprocessor's working dir.
18 //   -fpreprocessed
19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 //   -W*
21 //   -w
22 //
23 // Messages to emit:
24 //   "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/Pragma.h"
42 #include "clang/Lex/PreprocessingRecord.h"
43 #include "clang/Lex/PreprocessorOptions.h"
44 #include "clang/Lex/ScratchBuffer.h"
45 #include "llvm/ADT/APFloat.h"
46 #include "llvm/ADT/STLExtras.h"
47 #include "llvm/ADT/SmallString.h"
48 #include "llvm/ADT/StringExtras.h"
49 #include "llvm/Support/Capacity.h"
50 #include "llvm/Support/ConvertUTF.h"
51 #include "llvm/Support/MemoryBuffer.h"
52 #include "llvm/Support/raw_ostream.h"
53 using namespace clang;
54 
55 //===----------------------------------------------------------------------===//
56 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
57 
58 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
59                            DiagnosticsEngine &diags, LangOptions &opts,
60                            SourceManager &SM, HeaderSearch &Headers,
61                            ModuleLoader &TheModuleLoader,
62                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
63                            TranslationUnitKind TUKind)
64     : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
65       FileMgr(Headers.getFileMgr()), SourceMgr(SM),
66       ScratchBuf(new ScratchBuffer(SourceMgr)),HeaderInfo(Headers),
67       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
68       Identifiers(opts, IILookup),
69       PragmaHandlers(new PragmaNamespace(StringRef())),
70       IncrementalProcessing(false), TUKind(TUKind),
71       CodeComplete(nullptr), CodeCompletionFile(nullptr),
72       CodeCompletionOffset(0), LastTokenWasAt(false),
73       ModuleImportExpectsIdentifier(false), CodeCompletionReached(0),
74       MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
75       CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr),
76       Callbacks(nullptr), CurSubmoduleState(&NullSubmoduleState),
77       MacroArgCache(nullptr), Record(nullptr),
78       MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
79   OwnsHeaderSearch = OwnsHeaders;
80 
81   CounterValue = 0; // __COUNTER__ starts at 0.
82 
83   // Clear stats.
84   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
85   NumIf = NumElse = NumEndif = 0;
86   NumEnteredSourceFiles = 0;
87   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
88   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
89   MaxIncludeStackDepth = 0;
90   NumSkipped = 0;
91 
92   // Default to discarding comments.
93   KeepComments = false;
94   KeepMacroComments = false;
95   SuppressIncludeNotFoundError = false;
96 
97   // Macro expansion is enabled.
98   DisableMacroExpansion = false;
99   MacroExpansionInDirectivesOverride = false;
100   InMacroArgs = false;
101   InMacroArgPreExpansion = false;
102   NumCachedTokenLexers = 0;
103   PragmasEnabled = true;
104   ParsingIfOrElifDirective = false;
105   PreprocessedOutput = false;
106 
107   CachedLexPos = 0;
108 
109   // We haven't read anything from the external source.
110   ReadMacrosFromExternalSource = false;
111 
112   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
113   // This gets unpoisoned where it is allowed.
114   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
115   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
116 
117   // Initialize the pragma handlers.
118   RegisterBuiltinPragmas();
119 
120   // Initialize builtin macros like __LINE__ and friends.
121   RegisterBuiltinMacros();
122 
123   if(LangOpts.Borland) {
124     Ident__exception_info        = getIdentifierInfo("_exception_info");
125     Ident___exception_info       = getIdentifierInfo("__exception_info");
126     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
127     Ident__exception_code        = getIdentifierInfo("_exception_code");
128     Ident___exception_code       = getIdentifierInfo("__exception_code");
129     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
130     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
131     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
132     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
133   } else {
134     Ident__exception_info = Ident__exception_code = nullptr;
135     Ident__abnormal_termination = Ident___exception_info = nullptr;
136     Ident___exception_code = Ident___abnormal_termination = nullptr;
137     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
138     Ident_AbnormalTermination = nullptr;
139   }
140 }
141 
142 Preprocessor::~Preprocessor() {
143   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
144 
145   IncludeMacroStack.clear();
146 
147   // Destroy any macro definitions.
148   while (MacroInfoChain *I = MIChainHead) {
149     MIChainHead = I->Next;
150     I->~MacroInfoChain();
151   }
152 
153   // Free any cached macro expanders.
154   // This populates MacroArgCache, so all TokenLexers need to be destroyed
155   // before the code below that frees up the MacroArgCache list.
156   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
157   CurTokenLexer.reset();
158 
159   while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
160     DeserialMIChainHead = I->Next;
161     I->~DeserializedMacroInfoChain();
162   }
163 
164   // Free any cached MacroArgs.
165   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
166     ArgList = ArgList->deallocate();
167 
168   // Delete the header search info, if we own it.
169   if (OwnsHeaderSearch)
170     delete &HeaderInfo;
171 }
172 
173 void Preprocessor::Initialize(const TargetInfo &Target) {
174   assert((!this->Target || this->Target == &Target) &&
175          "Invalid override of target information");
176   this->Target = &Target;
177 
178   // Initialize information about built-ins.
179   BuiltinInfo.initializeTarget(Target);
180   HeaderInfo.setTarget(Target);
181 }
182 
183 void Preprocessor::InitializeForModelFile() {
184   NumEnteredSourceFiles = 0;
185 
186   // Reset pragmas
187   PragmaHandlersBackup = std::move(PragmaHandlers);
188   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
189   RegisterBuiltinPragmas();
190 
191   // Reset PredefinesFileID
192   PredefinesFileID = FileID();
193 }
194 
195 void Preprocessor::FinalizeForModelFile() {
196   NumEnteredSourceFiles = 1;
197 
198   PragmaHandlers = std::move(PragmaHandlersBackup);
199 }
200 
201 void Preprocessor::setPTHManager(PTHManager* pm) {
202   PTH.reset(pm);
203   FileMgr.addStatCache(PTH->createStatCache());
204 }
205 
206 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
207   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
208                << getSpelling(Tok) << "'";
209 
210   if (!DumpFlags) return;
211 
212   llvm::errs() << "\t";
213   if (Tok.isAtStartOfLine())
214     llvm::errs() << " [StartOfLine]";
215   if (Tok.hasLeadingSpace())
216     llvm::errs() << " [LeadingSpace]";
217   if (Tok.isExpandDisabled())
218     llvm::errs() << " [ExpandDisabled]";
219   if (Tok.needsCleaning()) {
220     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
221     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
222                  << "']";
223   }
224 
225   llvm::errs() << "\tLoc=<";
226   DumpLocation(Tok.getLocation());
227   llvm::errs() << ">";
228 }
229 
230 void Preprocessor::DumpLocation(SourceLocation Loc) const {
231   Loc.dump(SourceMgr);
232 }
233 
234 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
235   llvm::errs() << "MACRO: ";
236   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
237     DumpToken(MI.getReplacementToken(i));
238     llvm::errs() << "  ";
239   }
240   llvm::errs() << "\n";
241 }
242 
243 void Preprocessor::PrintStats() {
244   llvm::errs() << "\n*** Preprocessor Stats:\n";
245   llvm::errs() << NumDirectives << " directives found:\n";
246   llvm::errs() << "  " << NumDefined << " #define.\n";
247   llvm::errs() << "  " << NumUndefined << " #undef.\n";
248   llvm::errs() << "  #include/#include_next/#import:\n";
249   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
250   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
251   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
252   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
253   llvm::errs() << "  " << NumEndif << " #endif.\n";
254   llvm::errs() << "  " << NumPragma << " #pragma.\n";
255   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
256 
257   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
258              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
259              << NumFastMacroExpanded << " on the fast path.\n";
260   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
261              << " token paste (##) operations performed, "
262              << NumFastTokenPaste << " on the fast path.\n";
263 
264   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
265 
266   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
267   llvm::errs() << "\n  Macro Expanded Tokens: "
268                << llvm::capacity_in_bytes(MacroExpandedTokens);
269   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
270   // FIXME: List information for all submodules.
271   llvm::errs() << "\n  Macros: "
272                << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
273   llvm::errs() << "\n  #pragma push_macro Info: "
274                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
275   llvm::errs() << "\n  Poison Reasons: "
276                << llvm::capacity_in_bytes(PoisonReasons);
277   llvm::errs() << "\n  Comment Handlers: "
278                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
279 }
280 
281 Preprocessor::macro_iterator
282 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
283   if (IncludeExternalMacros && ExternalSource &&
284       !ReadMacrosFromExternalSource) {
285     ReadMacrosFromExternalSource = true;
286     ExternalSource->ReadDefinedMacros();
287   }
288 
289   // Make sure we cover all macros in visible modules.
290   for (const ModuleMacro &Macro : ModuleMacros)
291     CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
292 
293   return CurSubmoduleState->Macros.begin();
294 }
295 
296 size_t Preprocessor::getTotalMemory() const {
297   return BP.getTotalMemory()
298     + llvm::capacity_in_bytes(MacroExpandedTokens)
299     + Predefines.capacity() /* Predefines buffer. */
300     // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
301     // and ModuleMacros.
302     + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
303     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
304     + llvm::capacity_in_bytes(PoisonReasons)
305     + llvm::capacity_in_bytes(CommentHandlers);
306 }
307 
308 Preprocessor::macro_iterator
309 Preprocessor::macro_end(bool IncludeExternalMacros) const {
310   if (IncludeExternalMacros && ExternalSource &&
311       !ReadMacrosFromExternalSource) {
312     ReadMacrosFromExternalSource = true;
313     ExternalSource->ReadDefinedMacros();
314   }
315 
316   return CurSubmoduleState->Macros.end();
317 }
318 
319 /// \brief Compares macro tokens with a specified token value sequence.
320 static bool MacroDefinitionEquals(const MacroInfo *MI,
321                                   ArrayRef<TokenValue> Tokens) {
322   return Tokens.size() == MI->getNumTokens() &&
323       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
324 }
325 
326 StringRef Preprocessor::getLastMacroWithSpelling(
327                                     SourceLocation Loc,
328                                     ArrayRef<TokenValue> Tokens) const {
329   SourceLocation BestLocation;
330   StringRef BestSpelling;
331   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
332        I != E; ++I) {
333     const MacroDirective::DefInfo
334       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
335     if (!Def || !Def.getMacroInfo())
336       continue;
337     if (!Def.getMacroInfo()->isObjectLike())
338       continue;
339     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
340       continue;
341     SourceLocation Location = Def.getLocation();
342     // Choose the macro defined latest.
343     if (BestLocation.isInvalid() ||
344         (Location.isValid() &&
345          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
346       BestLocation = Location;
347       BestSpelling = I->first->getName();
348     }
349   }
350   return BestSpelling;
351 }
352 
353 void Preprocessor::recomputeCurLexerKind() {
354   if (CurLexer)
355     CurLexerKind = CLK_Lexer;
356   else if (CurPTHLexer)
357     CurLexerKind = CLK_PTHLexer;
358   else if (CurTokenLexer)
359     CurLexerKind = CLK_TokenLexer;
360   else
361     CurLexerKind = CLK_CachingLexer;
362 }
363 
364 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
365                                           unsigned CompleteLine,
366                                           unsigned CompleteColumn) {
367   assert(File);
368   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
369   assert(!CodeCompletionFile && "Already set");
370 
371   using llvm::MemoryBuffer;
372 
373   // Load the actual file's contents.
374   bool Invalid = false;
375   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
376   if (Invalid)
377     return true;
378 
379   // Find the byte position of the truncation point.
380   const char *Position = Buffer->getBufferStart();
381   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
382     for (; *Position; ++Position) {
383       if (*Position != '\r' && *Position != '\n')
384         continue;
385 
386       // Eat \r\n or \n\r as a single line.
387       if ((Position[1] == '\r' || Position[1] == '\n') &&
388           Position[0] != Position[1])
389         ++Position;
390       ++Position;
391       break;
392     }
393   }
394 
395   Position += CompleteColumn - 1;
396 
397   // If pointing inside the preamble, adjust the position at the beginning of
398   // the file after the preamble.
399   if (SkipMainFilePreamble.first &&
400       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
401     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
402       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
403   }
404 
405   if (Position > Buffer->getBufferEnd())
406     Position = Buffer->getBufferEnd();
407 
408   CodeCompletionFile = File;
409   CodeCompletionOffset = Position - Buffer->getBufferStart();
410 
411   std::unique_ptr<MemoryBuffer> NewBuffer =
412       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
413                                           Buffer->getBufferIdentifier());
414   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
415   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
416   *NewPos = '\0';
417   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
418   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
419 
420   return false;
421 }
422 
423 void Preprocessor::CodeCompleteNaturalLanguage() {
424   if (CodeComplete)
425     CodeComplete->CodeCompleteNaturalLanguage();
426   setCodeCompletionReached();
427 }
428 
429 /// getSpelling - This method is used to get the spelling of a token into a
430 /// SmallVector. Note that the returned StringRef may not point to the
431 /// supplied buffer if a copy can be avoided.
432 StringRef Preprocessor::getSpelling(const Token &Tok,
433                                           SmallVectorImpl<char> &Buffer,
434                                           bool *Invalid) const {
435   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
436   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
437     // Try the fast path.
438     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
439       return II->getName();
440   }
441 
442   // Resize the buffer if we need to copy into it.
443   if (Tok.needsCleaning())
444     Buffer.resize(Tok.getLength());
445 
446   const char *Ptr = Buffer.data();
447   unsigned Len = getSpelling(Tok, Ptr, Invalid);
448   return StringRef(Ptr, Len);
449 }
450 
451 /// CreateString - Plop the specified string into a scratch buffer and return a
452 /// location for it.  If specified, the source location provides a source
453 /// location for the token.
454 void Preprocessor::CreateString(StringRef Str, Token &Tok,
455                                 SourceLocation ExpansionLocStart,
456                                 SourceLocation ExpansionLocEnd) {
457   Tok.setLength(Str.size());
458 
459   const char *DestPtr;
460   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
461 
462   if (ExpansionLocStart.isValid())
463     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
464                                        ExpansionLocEnd, Str.size());
465   Tok.setLocation(Loc);
466 
467   // If this is a raw identifier or a literal token, set the pointer data.
468   if (Tok.is(tok::raw_identifier))
469     Tok.setRawIdentifierData(DestPtr);
470   else if (Tok.isLiteral())
471     Tok.setLiteralData(DestPtr);
472 }
473 
474 Module *Preprocessor::getCurrentModule() {
475   if (getLangOpts().CurrentModule.empty())
476     return nullptr;
477 
478   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
479 }
480 
481 //===----------------------------------------------------------------------===//
482 // Preprocessor Initialization Methods
483 //===----------------------------------------------------------------------===//
484 
485 
486 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
487 /// which implicitly adds the builtin defines etc.
488 void Preprocessor::EnterMainSourceFile() {
489   // We do not allow the preprocessor to reenter the main file.  Doing so will
490   // cause FileID's to accumulate information from both runs (e.g. #line
491   // information) and predefined macros aren't guaranteed to be set properly.
492   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
493   FileID MainFileID = SourceMgr.getMainFileID();
494 
495   // If MainFileID is loaded it means we loaded an AST file, no need to enter
496   // a main file.
497   if (!SourceMgr.isLoadedFileID(MainFileID)) {
498     // Enter the main file source buffer.
499     EnterSourceFile(MainFileID, nullptr, SourceLocation());
500 
501     // If we've been asked to skip bytes in the main file (e.g., as part of a
502     // precompiled preamble), do so now.
503     if (SkipMainFilePreamble.first > 0)
504       CurLexer->SkipBytes(SkipMainFilePreamble.first,
505                           SkipMainFilePreamble.second);
506 
507     // Tell the header info that the main file was entered.  If the file is later
508     // #imported, it won't be re-entered.
509     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
510       HeaderInfo.IncrementIncludeCount(FE);
511   }
512 
513   // Preprocess Predefines to populate the initial preprocessor state.
514   std::unique_ptr<llvm::MemoryBuffer> SB =
515     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
516   assert(SB && "Cannot create predefined source buffer");
517   FileID FID = SourceMgr.createFileID(std::move(SB));
518   assert(!FID.isInvalid() && "Could not create FileID for predefines?");
519   setPredefinesFileID(FID);
520 
521   // Start parsing the predefines.
522   EnterSourceFile(FID, nullptr, SourceLocation());
523 }
524 
525 void Preprocessor::EndSourceFile() {
526   // Notify the client that we reached the end of the source file.
527   if (Callbacks)
528     Callbacks->EndOfMainFile();
529 }
530 
531 //===----------------------------------------------------------------------===//
532 // Lexer Event Handling.
533 //===----------------------------------------------------------------------===//
534 
535 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
536 /// identifier information for the token and install it into the token,
537 /// updating the token kind accordingly.
538 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
539   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
540 
541   // Look up this token, see if it is a macro, or if it is a language keyword.
542   IdentifierInfo *II;
543   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
544     // No cleaning needed, just use the characters from the lexed buffer.
545     II = getIdentifierInfo(Identifier.getRawIdentifier());
546   } else {
547     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
548     SmallString<64> IdentifierBuffer;
549     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
550 
551     if (Identifier.hasUCN()) {
552       SmallString<64> UCNIdentifierBuffer;
553       expandUCNs(UCNIdentifierBuffer, CleanedStr);
554       II = getIdentifierInfo(UCNIdentifierBuffer);
555     } else {
556       II = getIdentifierInfo(CleanedStr);
557     }
558   }
559 
560   // Update the token info (identifier info and appropriate token kind).
561   Identifier.setIdentifierInfo(II);
562   Identifier.setKind(II->getTokenID());
563 
564   return II;
565 }
566 
567 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
568   PoisonReasons[II] = DiagID;
569 }
570 
571 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
572   assert(Ident__exception_code && Ident__exception_info);
573   assert(Ident___exception_code && Ident___exception_info);
574   Ident__exception_code->setIsPoisoned(Poison);
575   Ident___exception_code->setIsPoisoned(Poison);
576   Ident_GetExceptionCode->setIsPoisoned(Poison);
577   Ident__exception_info->setIsPoisoned(Poison);
578   Ident___exception_info->setIsPoisoned(Poison);
579   Ident_GetExceptionInfo->setIsPoisoned(Poison);
580   Ident__abnormal_termination->setIsPoisoned(Poison);
581   Ident___abnormal_termination->setIsPoisoned(Poison);
582   Ident_AbnormalTermination->setIsPoisoned(Poison);
583 }
584 
585 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
586   assert(Identifier.getIdentifierInfo() &&
587          "Can't handle identifiers without identifier info!");
588   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
589     PoisonReasons.find(Identifier.getIdentifierInfo());
590   if(it == PoisonReasons.end())
591     Diag(Identifier, diag::err_pp_used_poisoned_id);
592   else
593     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
594 }
595 
596 /// \brief Returns a diagnostic message kind for reporting a future keyword as
597 /// appropriate for the identifier and specified language.
598 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
599                                           const LangOptions &LangOpts) {
600   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
601 
602   if (LangOpts.CPlusPlus)
603     return llvm::StringSwitch<diag::kind>(II.getName())
604 #define CXX11_KEYWORD(NAME, FLAGS)                                             \
605         .Case(#NAME, diag::warn_cxx11_keyword)
606 #include "clang/Basic/TokenKinds.def"
607         ;
608 
609   llvm_unreachable(
610       "Keyword not known to come from a newer Standard or proposed Standard");
611 }
612 
613 /// HandleIdentifier - This callback is invoked when the lexer reads an
614 /// identifier.  This callback looks up the identifier in the map and/or
615 /// potentially macro expands it or turns it into a named token (like 'for').
616 ///
617 /// Note that callers of this method are guarded by checking the
618 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
619 /// IdentifierInfo methods that compute these properties will need to change to
620 /// match.
621 bool Preprocessor::HandleIdentifier(Token &Identifier) {
622   assert(Identifier.getIdentifierInfo() &&
623          "Can't handle identifiers without identifier info!");
624 
625   IdentifierInfo &II = *Identifier.getIdentifierInfo();
626 
627   // If the information about this identifier is out of date, update it from
628   // the external source.
629   // We have to treat __VA_ARGS__ in a special way, since it gets
630   // serialized with isPoisoned = true, but our preprocessor may have
631   // unpoisoned it if we're defining a C99 macro.
632   if (II.isOutOfDate()) {
633     bool CurrentIsPoisoned = false;
634     if (&II == Ident__VA_ARGS__)
635       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
636 
637     ExternalSource->updateOutOfDateIdentifier(II);
638     Identifier.setKind(II.getTokenID());
639 
640     if (&II == Ident__VA_ARGS__)
641       II.setIsPoisoned(CurrentIsPoisoned);
642   }
643 
644   // If this identifier was poisoned, and if it was not produced from a macro
645   // expansion, emit an error.
646   if (II.isPoisoned() && CurPPLexer) {
647     HandlePoisonedIdentifier(Identifier);
648   }
649 
650   // If this is a macro to be expanded, do it.
651   if (MacroDefinition MD = getMacroDefinition(&II)) {
652     auto *MI = MD.getMacroInfo();
653     assert(MI && "macro definition with no macro info?");
654     if (!DisableMacroExpansion) {
655       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
656         // C99 6.10.3p10: If the preprocessing token immediately after the
657         // macro name isn't a '(', this macro should not be expanded.
658         if (!MI->isFunctionLike() || isNextPPTokenLParen())
659           return HandleMacroExpandedIdentifier(Identifier, MD);
660       } else {
661         // C99 6.10.3.4p2 says that a disabled macro may never again be
662         // expanded, even if it's in a context where it could be expanded in the
663         // future.
664         Identifier.setFlag(Token::DisableExpand);
665         if (MI->isObjectLike() || isNextPPTokenLParen())
666           Diag(Identifier, diag::pp_disabled_macro_expansion);
667       }
668     }
669   }
670 
671   // If this identifier is a keyword in a newer Standard or proposed Standard,
672   // produce a warning. Don't warn if we're not considering macro expansion,
673   // since this identifier might be the name of a macro.
674   // FIXME: This warning is disabled in cases where it shouldn't be, like
675   //   "#define constexpr constexpr", "int constexpr;"
676   if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
677     Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
678         << II.getName();
679     // Don't diagnose this keyword again in this translation unit.
680     II.setIsFutureCompatKeyword(false);
681   }
682 
683   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
684   // then we act as if it is the actual operator and not the textual
685   // representation of it.
686   if (II.isCPlusPlusOperatorKeyword())
687     Identifier.setIdentifierInfo(nullptr);
688 
689   // If this is an extension token, diagnose its use.
690   // We avoid diagnosing tokens that originate from macro definitions.
691   // FIXME: This warning is disabled in cases where it shouldn't be,
692   // like "#define TY typeof", "TY(1) x".
693   if (II.isExtensionToken() && !DisableMacroExpansion)
694     Diag(Identifier, diag::ext_token_used);
695 
696   // If this is the 'import' contextual keyword following an '@', note
697   // that the next token indicates a module name.
698   //
699   // Note that we do not treat 'import' as a contextual
700   // keyword when we're in a caching lexer, because caching lexers only get
701   // used in contexts where import declarations are disallowed.
702   if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
703       !DisableMacroExpansion &&
704       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
705       CurLexerKind != CLK_CachingLexer) {
706     ModuleImportLoc = Identifier.getLocation();
707     ModuleImportPath.clear();
708     ModuleImportExpectsIdentifier = true;
709     CurLexerKind = CLK_LexAfterModuleImport;
710   }
711   return true;
712 }
713 
714 void Preprocessor::Lex(Token &Result) {
715   // We loop here until a lex function retuns a token; this avoids recursion.
716   bool ReturnedToken;
717   do {
718     switch (CurLexerKind) {
719     case CLK_Lexer:
720       ReturnedToken = CurLexer->Lex(Result);
721       break;
722     case CLK_PTHLexer:
723       ReturnedToken = CurPTHLexer->Lex(Result);
724       break;
725     case CLK_TokenLexer:
726       ReturnedToken = CurTokenLexer->Lex(Result);
727       break;
728     case CLK_CachingLexer:
729       CachingLex(Result);
730       ReturnedToken = true;
731       break;
732     case CLK_LexAfterModuleImport:
733       LexAfterModuleImport(Result);
734       ReturnedToken = true;
735       break;
736     }
737   } while (!ReturnedToken);
738 
739   LastTokenWasAt = Result.is(tok::at);
740 }
741 
742 
743 /// \brief Lex a token following the 'import' contextual keyword.
744 ///
745 void Preprocessor::LexAfterModuleImport(Token &Result) {
746   // Figure out what kind of lexer we actually have.
747   recomputeCurLexerKind();
748 
749   // Lex the next token.
750   Lex(Result);
751 
752   // The token sequence
753   //
754   //   import identifier (. identifier)*
755   //
756   // indicates a module import directive. We already saw the 'import'
757   // contextual keyword, so now we're looking for the identifiers.
758   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
759     // We expected to see an identifier here, and we did; continue handling
760     // identifiers.
761     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
762                                               Result.getLocation()));
763     ModuleImportExpectsIdentifier = false;
764     CurLexerKind = CLK_LexAfterModuleImport;
765     return;
766   }
767 
768   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
769   // see the next identifier.
770   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
771     ModuleImportExpectsIdentifier = true;
772     CurLexerKind = CLK_LexAfterModuleImport;
773     return;
774   }
775 
776   // If we have a non-empty module path, load the named module.
777   if (!ModuleImportPath.empty()) {
778     Module *Imported = nullptr;
779     if (getLangOpts().Modules) {
780       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
781                                             ModuleImportPath,
782                                             Module::Hidden,
783                                             /*IsIncludeDirective=*/false);
784       if (Imported)
785         makeModuleVisible(Imported, ModuleImportLoc);
786     }
787     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
788       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
789   }
790 }
791 
792 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
793   CurSubmoduleState->VisibleModules.setVisible(
794       M, Loc, [](Module *) {},
795       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
796         // FIXME: Include the path in the diagnostic.
797         // FIXME: Include the import location for the conflicting module.
798         Diag(ModuleImportLoc, diag::warn_module_conflict)
799             << Path[0]->getFullModuleName()
800             << Conflict->getFullModuleName()
801             << Message;
802       });
803 
804   // Add this module to the imports list of the currently-built submodule.
805   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
806     BuildingSubmoduleStack.back().M->Imports.insert(M);
807 }
808 
809 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
810                                           const char *DiagnosticTag,
811                                           bool AllowMacroExpansion) {
812   // We need at least one string literal.
813   if (Result.isNot(tok::string_literal)) {
814     Diag(Result, diag::err_expected_string_literal)
815       << /*Source='in...'*/0 << DiagnosticTag;
816     return false;
817   }
818 
819   // Lex string literal tokens, optionally with macro expansion.
820   SmallVector<Token, 4> StrToks;
821   do {
822     StrToks.push_back(Result);
823 
824     if (Result.hasUDSuffix())
825       Diag(Result, diag::err_invalid_string_udl);
826 
827     if (AllowMacroExpansion)
828       Lex(Result);
829     else
830       LexUnexpandedToken(Result);
831   } while (Result.is(tok::string_literal));
832 
833   // Concatenate and parse the strings.
834   StringLiteralParser Literal(StrToks, *this);
835   assert(Literal.isAscii() && "Didn't allow wide strings in");
836 
837   if (Literal.hadError)
838     return false;
839 
840   if (Literal.Pascal) {
841     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
842       << /*Source='in...'*/0 << DiagnosticTag;
843     return false;
844   }
845 
846   String = Literal.GetString();
847   return true;
848 }
849 
850 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
851   assert(Tok.is(tok::numeric_constant));
852   SmallString<8> IntegerBuffer;
853   bool NumberInvalid = false;
854   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
855   if (NumberInvalid)
856     return false;
857   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
858   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
859     return false;
860   llvm::APInt APVal(64, 0);
861   if (Literal.GetIntegerValue(APVal))
862     return false;
863   Lex(Tok);
864   Value = APVal.getLimitedValue();
865   return true;
866 }
867 
868 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
869   assert(Handler && "NULL comment handler");
870   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
871          CommentHandlers.end() && "Comment handler already registered");
872   CommentHandlers.push_back(Handler);
873 }
874 
875 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
876   std::vector<CommentHandler *>::iterator Pos
877   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
878   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
879   CommentHandlers.erase(Pos);
880 }
881 
882 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
883   bool AnyPendingTokens = false;
884   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
885        HEnd = CommentHandlers.end();
886        H != HEnd; ++H) {
887     if ((*H)->HandleComment(*this, Comment))
888       AnyPendingTokens = true;
889   }
890   if (!AnyPendingTokens || getCommentRetentionState())
891     return false;
892   Lex(result);
893   return true;
894 }
895 
896 ModuleLoader::~ModuleLoader() { }
897 
898 CommentHandler::~CommentHandler() { }
899 
900 CodeCompletionHandler::~CodeCompletionHandler() { }
901 
902 void Preprocessor::createPreprocessingRecord() {
903   if (Record)
904     return;
905 
906   Record = new PreprocessingRecord(getSourceManager());
907   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
908 }
909