1 //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This code simply runs the preprocessor on the input file and prints out the
10 // result.  This is the traditional behavior of the -E option.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Frontend/Utils.h"
15 #include "clang/Basic/CharInfo.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Frontend/PreprocessorOutputOptions.h"
19 #include "clang/Lex/MacroInfo.h"
20 #include "clang/Lex/PPCallbacks.h"
21 #include "clang/Lex/Pragma.h"
22 #include "clang/Lex/Preprocessor.h"
23 #include "clang/Lex/TokenConcatenation.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cstdio>
30 using namespace clang;
31 
32 /// PrintMacroDefinition - Print a macro definition in a form that will be
33 /// properly accepted back as a definition.
34 static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
35                                  Preprocessor &PP, raw_ostream &OS) {
36   OS << "#define " << II.getName();
37 
38   if (MI.isFunctionLike()) {
39     OS << '(';
40     if (!MI.param_empty()) {
41       MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end();
42       for (; AI+1 != E; ++AI) {
43         OS << (*AI)->getName();
44         OS << ',';
45       }
46 
47       // Last argument.
48       if ((*AI)->getName() == "__VA_ARGS__")
49         OS << "...";
50       else
51         OS << (*AI)->getName();
52     }
53 
54     if (MI.isGNUVarargs())
55       OS << "...";  // #define foo(x...)
56 
57     OS << ')';
58   }
59 
60   // GCC always emits a space, even if the macro body is empty.  However, do not
61   // want to emit two spaces if the first token has a leading space.
62   if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
63     OS << ' ';
64 
65   SmallString<128> SpellingBuffer;
66   for (const auto &T : MI.tokens()) {
67     if (T.hasLeadingSpace())
68       OS << ' ';
69 
70     OS << PP.getSpelling(T, SpellingBuffer);
71   }
72 }
73 
74 //===----------------------------------------------------------------------===//
75 // Preprocessed token printer
76 //===----------------------------------------------------------------------===//
77 
78 namespace {
79 class PrintPPOutputPPCallbacks : public PPCallbacks {
80   Preprocessor &PP;
81   SourceManager &SM;
82   TokenConcatenation ConcatInfo;
83 public:
84   raw_ostream &OS;
85 private:
86   unsigned CurLine;
87 
88   bool EmittedTokensOnThisLine;
89   bool EmittedDirectiveOnThisLine;
90   SrcMgr::CharacteristicKind FileType;
91   SmallString<512> CurFilename;
92   bool Initialized;
93   bool DisableLineMarkers;
94   bool DumpDefines;
95   bool DumpIncludeDirectives;
96   bool UseLineDirectives;
97   bool IsFirstFileEntered;
98 public:
99   PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os, bool lineMarkers,
100                            bool defines, bool DumpIncludeDirectives,
101                            bool UseLineDirectives)
102       : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
103         DisableLineMarkers(lineMarkers), DumpDefines(defines),
104         DumpIncludeDirectives(DumpIncludeDirectives),
105         UseLineDirectives(UseLineDirectives) {
106     CurLine = 0;
107     CurFilename += "<uninit>";
108     EmittedTokensOnThisLine = false;
109     EmittedDirectiveOnThisLine = false;
110     FileType = SrcMgr::C_User;
111     Initialized = false;
112     IsFirstFileEntered = false;
113   }
114 
115   void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
116   bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
117 
118   void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; }
119   bool hasEmittedDirectiveOnThisLine() const {
120     return EmittedDirectiveOnThisLine;
121   }
122 
123   bool startNewLineIfNeeded(bool ShouldUpdateCurrentLine = true);
124 
125   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
126                    SrcMgr::CharacteristicKind FileType,
127                    FileID PrevFID) override;
128   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
129                           StringRef FileName, bool IsAngled,
130                           CharSourceRange FilenameRange, const FileEntry *File,
131                           StringRef SearchPath, StringRef RelativePath,
132                           const Module *Imported,
133                           SrcMgr::CharacteristicKind FileType) override;
134   void Ident(SourceLocation Loc, StringRef str) override;
135   void PragmaMessage(SourceLocation Loc, StringRef Namespace,
136                      PragmaMessageKind Kind, StringRef Str) override;
137   void PragmaDebug(SourceLocation Loc, StringRef DebugType) override;
138   void PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) override;
139   void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override;
140   void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
141                         diag::Severity Map, StringRef Str) override;
142   void PragmaWarning(SourceLocation Loc, StringRef WarningSpec,
143                      ArrayRef<int> Ids) override;
144   void PragmaWarningPush(SourceLocation Loc, int Level) override;
145   void PragmaWarningPop(SourceLocation Loc) override;
146   void PragmaAssumeNonNullBegin(SourceLocation Loc) override;
147   void PragmaAssumeNonNullEnd(SourceLocation Loc) override;
148 
149   bool HandleFirstTokOnLine(Token &Tok);
150 
151   /// Move to the line of the provided source location. This will
152   /// return true if the output stream required adjustment or if
153   /// the requested location is on the first line.
154   bool MoveToLine(SourceLocation Loc) {
155     PresumedLoc PLoc = SM.getPresumedLoc(Loc);
156     if (PLoc.isInvalid())
157       return false;
158     return MoveToLine(PLoc.getLine()) || (PLoc.getLine() == 1);
159   }
160   bool MoveToLine(unsigned LineNo);
161 
162   bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
163                    const Token &Tok) {
164     return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok);
165   }
166   void WriteLineInfo(unsigned LineNo, const char *Extra=nullptr,
167                      unsigned ExtraLen=0);
168   bool LineMarkersAreDisabled() const { return DisableLineMarkers; }
169   void HandleNewlinesInToken(const char *TokStr, unsigned Len);
170 
171   /// MacroDefined - This hook is called whenever a macro definition is seen.
172   void MacroDefined(const Token &MacroNameTok,
173                     const MacroDirective *MD) override;
174 
175   /// MacroUndefined - This hook is called whenever a macro #undef is seen.
176   void MacroUndefined(const Token &MacroNameTok,
177                       const MacroDefinition &MD,
178                       const MacroDirective *Undef) override;
179 
180   void BeginModule(const Module *M);
181   void EndModule(const Module *M);
182 };
183 }  // end anonymous namespace
184 
185 void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
186                                              const char *Extra,
187                                              unsigned ExtraLen) {
188   startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
189 
190   // Emit #line directives or GNU line markers depending on what mode we're in.
191   if (UseLineDirectives) {
192     OS << "#line" << ' ' << LineNo << ' ' << '"';
193     OS.write_escaped(CurFilename);
194     OS << '"';
195   } else {
196     OS << '#' << ' ' << LineNo << ' ' << '"';
197     OS.write_escaped(CurFilename);
198     OS << '"';
199 
200     if (ExtraLen)
201       OS.write(Extra, ExtraLen);
202 
203     if (FileType == SrcMgr::C_System)
204       OS.write(" 3", 2);
205     else if (FileType == SrcMgr::C_ExternCSystem)
206       OS.write(" 3 4", 4);
207   }
208   OS << '\n';
209 }
210 
211 /// MoveToLine - Move the output to the source line specified by the location
212 /// object.  We can do this by emitting some number of \n's, or be emitting a
213 /// #line directive.  This returns false if already at the specified line, true
214 /// if some newlines were emitted.
215 bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo) {
216   // If this line is "close enough" to the original line, just print newlines,
217   // otherwise print a #line directive.
218   if (LineNo-CurLine <= 8) {
219     if (LineNo-CurLine == 1)
220       OS << '\n';
221     else if (LineNo == CurLine)
222       return false;    // Spelling line moved, but expansion line didn't.
223     else {
224       const char *NewLines = "\n\n\n\n\n\n\n\n";
225       OS.write(NewLines, LineNo-CurLine);
226     }
227   } else if (!DisableLineMarkers) {
228     // Emit a #line or line marker.
229     WriteLineInfo(LineNo, nullptr, 0);
230   } else {
231     // Okay, we're in -P mode, which turns off line markers.  However, we still
232     // need to emit a newline between tokens on different lines.
233     startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
234   }
235 
236   CurLine = LineNo;
237   return true;
238 }
239 
240 bool
241 PrintPPOutputPPCallbacks::startNewLineIfNeeded(bool ShouldUpdateCurrentLine) {
242   if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
243     OS << '\n';
244     EmittedTokensOnThisLine = false;
245     EmittedDirectiveOnThisLine = false;
246     if (ShouldUpdateCurrentLine)
247       ++CurLine;
248     return true;
249   }
250 
251   return false;
252 }
253 
254 /// FileChanged - Whenever the preprocessor enters or exits a #include file
255 /// it invokes this handler.  Update our conception of the current source
256 /// position.
257 void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
258                                            FileChangeReason Reason,
259                                        SrcMgr::CharacteristicKind NewFileType,
260                                        FileID PrevFID) {
261   // Unless we are exiting a #include, make sure to skip ahead to the line the
262   // #include directive was at.
263   SourceManager &SourceMgr = SM;
264 
265   PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc);
266   if (UserLoc.isInvalid())
267     return;
268 
269   unsigned NewLine = UserLoc.getLine();
270 
271   if (Reason == PPCallbacks::EnterFile) {
272     SourceLocation IncludeLoc = UserLoc.getIncludeLoc();
273     if (IncludeLoc.isValid())
274       MoveToLine(IncludeLoc);
275   } else if (Reason == PPCallbacks::SystemHeaderPragma) {
276     // GCC emits the # directive for this directive on the line AFTER the
277     // directive and emits a bunch of spaces that aren't needed. This is because
278     // otherwise we will emit a line marker for THIS line, which requires an
279     // extra blank line after the directive to avoid making all following lines
280     // off by one. We can do better by simply incrementing NewLine here.
281     NewLine += 1;
282   }
283 
284   CurLine = NewLine;
285 
286   CurFilename.clear();
287   CurFilename += UserLoc.getFilename();
288   FileType = NewFileType;
289 
290   if (DisableLineMarkers) {
291     startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
292     return;
293   }
294 
295   if (!Initialized) {
296     WriteLineInfo(CurLine);
297     Initialized = true;
298   }
299 
300   // Do not emit an enter marker for the main file (which we expect is the first
301   // entered file). This matches gcc, and improves compatibility with some tools
302   // which track the # line markers as a way to determine when the preprocessed
303   // output is in the context of the main file.
304   if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) {
305     IsFirstFileEntered = true;
306     return;
307   }
308 
309   switch (Reason) {
310   case PPCallbacks::EnterFile:
311     WriteLineInfo(CurLine, " 1", 2);
312     break;
313   case PPCallbacks::ExitFile:
314     WriteLineInfo(CurLine, " 2", 2);
315     break;
316   case PPCallbacks::SystemHeaderPragma:
317   case PPCallbacks::RenameFile:
318     WriteLineInfo(CurLine);
319     break;
320   }
321 }
322 
323 void PrintPPOutputPPCallbacks::InclusionDirective(
324     SourceLocation HashLoc,
325     const Token &IncludeTok,
326     StringRef FileName,
327     bool IsAngled,
328     CharSourceRange FilenameRange,
329     const FileEntry *File,
330     StringRef SearchPath,
331     StringRef RelativePath,
332     const Module *Imported,
333     SrcMgr::CharacteristicKind FileType) {
334   // In -dI mode, dump #include directives prior to dumping their content or
335   // interpretation.
336   if (DumpIncludeDirectives) {
337     startNewLineIfNeeded();
338     MoveToLine(HashLoc);
339     const std::string TokenText = PP.getSpelling(IncludeTok);
340     assert(!TokenText.empty());
341     OS << "#" << TokenText << " "
342        << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
343        << " /* clang -E -dI */";
344     setEmittedDirectiveOnThisLine();
345     startNewLineIfNeeded();
346   }
347 
348   // When preprocessing, turn implicit imports into module import pragmas.
349   if (Imported) {
350     switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
351     case tok::pp_include:
352     case tok::pp_import:
353     case tok::pp_include_next:
354       startNewLineIfNeeded();
355       MoveToLine(HashLoc);
356       OS << "#pragma clang module import " << Imported->getFullModuleName(true)
357          << " /* clang -E: implicit import for "
358          << "#" << PP.getSpelling(IncludeTok) << " "
359          << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
360          << " */";
361       // Since we want a newline after the pragma, but not a #<line>, start a
362       // new line immediately.
363       EmittedTokensOnThisLine = true;
364       startNewLineIfNeeded();
365       break;
366 
367     case tok::pp___include_macros:
368       // #__include_macros has no effect on a user of a preprocessed source
369       // file; the only effect is on preprocessing.
370       //
371       // FIXME: That's not *quite* true: it causes the module in question to
372       // be loaded, which can affect downstream diagnostics.
373       break;
374 
375     default:
376       llvm_unreachable("unknown include directive kind");
377       break;
378     }
379   }
380 }
381 
382 /// Handle entering the scope of a module during a module compilation.
383 void PrintPPOutputPPCallbacks::BeginModule(const Module *M) {
384   startNewLineIfNeeded();
385   OS << "#pragma clang module begin " << M->getFullModuleName(true);
386   setEmittedDirectiveOnThisLine();
387 }
388 
389 /// Handle leaving the scope of a module during a module compilation.
390 void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
391   startNewLineIfNeeded();
392   OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/";
393   setEmittedDirectiveOnThisLine();
394 }
395 
396 /// Ident - Handle #ident directives when read by the preprocessor.
397 ///
398 void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
399   MoveToLine(Loc);
400 
401   OS.write("#ident ", strlen("#ident "));
402   OS.write(S.begin(), S.size());
403   EmittedTokensOnThisLine = true;
404 }
405 
406 /// MacroDefined - This hook is called whenever a macro definition is seen.
407 void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok,
408                                             const MacroDirective *MD) {
409   const MacroInfo *MI = MD->getMacroInfo();
410   // Only print out macro definitions in -dD mode.
411   if (!DumpDefines ||
412       // Ignore __FILE__ etc.
413       MI->isBuiltinMacro()) return;
414 
415   MoveToLine(MI->getDefinitionLoc());
416   PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS);
417   setEmittedDirectiveOnThisLine();
418 }
419 
420 void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
421                                               const MacroDefinition &MD,
422                                               const MacroDirective *Undef) {
423   // Only print out macro definitions in -dD mode.
424   if (!DumpDefines) return;
425 
426   MoveToLine(MacroNameTok.getLocation());
427   OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
428   setEmittedDirectiveOnThisLine();
429 }
430 
431 static void outputPrintable(raw_ostream &OS, StringRef Str) {
432   for (unsigned char Char : Str) {
433     if (isPrintable(Char) && Char != '\\' && Char != '"')
434       OS << (char)Char;
435     else // Output anything hard as an octal escape.
436       OS << '\\'
437          << (char)('0' + ((Char >> 6) & 7))
438          << (char)('0' + ((Char >> 3) & 7))
439          << (char)('0' + ((Char >> 0) & 7));
440   }
441 }
442 
443 void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
444                                              StringRef Namespace,
445                                              PragmaMessageKind Kind,
446                                              StringRef Str) {
447   startNewLineIfNeeded();
448   MoveToLine(Loc);
449   OS << "#pragma ";
450   if (!Namespace.empty())
451     OS << Namespace << ' ';
452   switch (Kind) {
453     case PMK_Message:
454       OS << "message(\"";
455       break;
456     case PMK_Warning:
457       OS << "warning \"";
458       break;
459     case PMK_Error:
460       OS << "error \"";
461       break;
462   }
463 
464   outputPrintable(OS, Str);
465   OS << '"';
466   if (Kind == PMK_Message)
467     OS << ')';
468   setEmittedDirectiveOnThisLine();
469 }
470 
471 void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
472                                            StringRef DebugType) {
473   startNewLineIfNeeded();
474   MoveToLine(Loc);
475 
476   OS << "#pragma clang __debug ";
477   OS << DebugType;
478 
479   setEmittedDirectiveOnThisLine();
480 }
481 
482 void PrintPPOutputPPCallbacks::
483 PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
484   startNewLineIfNeeded();
485   MoveToLine(Loc);
486   OS << "#pragma " << Namespace << " diagnostic push";
487   setEmittedDirectiveOnThisLine();
488 }
489 
490 void PrintPPOutputPPCallbacks::
491 PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
492   startNewLineIfNeeded();
493   MoveToLine(Loc);
494   OS << "#pragma " << Namespace << " diagnostic pop";
495   setEmittedDirectiveOnThisLine();
496 }
497 
498 void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
499                                                 StringRef Namespace,
500                                                 diag::Severity Map,
501                                                 StringRef Str) {
502   startNewLineIfNeeded();
503   MoveToLine(Loc);
504   OS << "#pragma " << Namespace << " diagnostic ";
505   switch (Map) {
506   case diag::Severity::Remark:
507     OS << "remark";
508     break;
509   case diag::Severity::Warning:
510     OS << "warning";
511     break;
512   case diag::Severity::Error:
513     OS << "error";
514     break;
515   case diag::Severity::Ignored:
516     OS << "ignored";
517     break;
518   case diag::Severity::Fatal:
519     OS << "fatal";
520     break;
521   }
522   OS << " \"" << Str << '"';
523   setEmittedDirectiveOnThisLine();
524 }
525 
526 void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
527                                              StringRef WarningSpec,
528                                              ArrayRef<int> Ids) {
529   startNewLineIfNeeded();
530   MoveToLine(Loc);
531   OS << "#pragma warning(" << WarningSpec << ':';
532   for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
533     OS << ' ' << *I;
534   OS << ')';
535   setEmittedDirectiveOnThisLine();
536 }
537 
538 void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
539                                                  int Level) {
540   startNewLineIfNeeded();
541   MoveToLine(Loc);
542   OS << "#pragma warning(push";
543   if (Level >= 0)
544     OS << ", " << Level;
545   OS << ')';
546   setEmittedDirectiveOnThisLine();
547 }
548 
549 void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) {
550   startNewLineIfNeeded();
551   MoveToLine(Loc);
552   OS << "#pragma warning(pop)";
553   setEmittedDirectiveOnThisLine();
554 }
555 
556 void PrintPPOutputPPCallbacks::
557 PragmaAssumeNonNullBegin(SourceLocation Loc) {
558   startNewLineIfNeeded();
559   MoveToLine(Loc);
560   OS << "#pragma clang assume_nonnull begin";
561   setEmittedDirectiveOnThisLine();
562 }
563 
564 void PrintPPOutputPPCallbacks::
565 PragmaAssumeNonNullEnd(SourceLocation Loc) {
566   startNewLineIfNeeded();
567   MoveToLine(Loc);
568   OS << "#pragma clang assume_nonnull end";
569   setEmittedDirectiveOnThisLine();
570 }
571 
572 /// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
573 /// is called for the first token on each new line.  If this really is the start
574 /// of a new logical line, handle it and return true, otherwise return false.
575 /// This may not be the start of a logical line because the "start of line"
576 /// marker is set for spelling lines, not expansion ones.
577 bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
578   // Figure out what line we went to and insert the appropriate number of
579   // newline characters.
580   if (!MoveToLine(Tok.getLocation()))
581     return false;
582 
583   // Print out space characters so that the first token on a line is
584   // indented for easy reading.
585   unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation());
586 
587   // The first token on a line can have a column number of 1, yet still expect
588   // leading white space, if a macro expansion in column 1 starts with an empty
589   // macro argument, or an empty nested macro expansion. In this case, move the
590   // token to column 2.
591   if (ColNo == 1 && Tok.hasLeadingSpace())
592     ColNo = 2;
593 
594   // This hack prevents stuff like:
595   // #define HASH #
596   // HASH define foo bar
597   // From having the # character end up at column 1, which makes it so it
598   // is not handled as a #define next time through the preprocessor if in
599   // -fpreprocessed mode.
600   if (ColNo <= 1 && Tok.is(tok::hash))
601     OS << ' ';
602 
603   // Otherwise, indent the appropriate number of spaces.
604   for (; ColNo > 1; --ColNo)
605     OS << ' ';
606 
607   return true;
608 }
609 
610 void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
611                                                      unsigned Len) {
612   unsigned NumNewlines = 0;
613   for (; Len; --Len, ++TokStr) {
614     if (*TokStr != '\n' &&
615         *TokStr != '\r')
616       continue;
617 
618     ++NumNewlines;
619 
620     // If we have \n\r or \r\n, skip both and count as one line.
621     if (Len != 1 &&
622         (TokStr[1] == '\n' || TokStr[1] == '\r') &&
623         TokStr[0] != TokStr[1]) {
624       ++TokStr;
625       --Len;
626     }
627   }
628 
629   if (NumNewlines == 0) return;
630 
631   CurLine += NumNewlines;
632 }
633 
634 
635 namespace {
636 struct UnknownPragmaHandler : public PragmaHandler {
637   const char *Prefix;
638   PrintPPOutputPPCallbacks *Callbacks;
639 
640   // Set to true if tokens should be expanded
641   bool ShouldExpandTokens;
642 
643   UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks,
644                        bool RequireTokenExpansion)
645       : Prefix(prefix), Callbacks(callbacks),
646         ShouldExpandTokens(RequireTokenExpansion) {}
647   void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
648                     Token &PragmaTok) override {
649     // Figure out what line we went to and insert the appropriate number of
650     // newline characters.
651     Callbacks->startNewLineIfNeeded();
652     Callbacks->MoveToLine(PragmaTok.getLocation());
653     Callbacks->OS.write(Prefix, strlen(Prefix));
654 
655     if (ShouldExpandTokens) {
656       // The first token does not have expanded macros. Expand them, if
657       // required.
658       auto Toks = llvm::make_unique<Token[]>(1);
659       Toks[0] = PragmaTok;
660       PP.EnterTokenStream(std::move(Toks), /*NumToks=*/1,
661                           /*DisableMacroExpansion=*/false);
662       PP.Lex(PragmaTok);
663     }
664     Token PrevToken;
665     Token PrevPrevToken;
666     PrevToken.startToken();
667     PrevPrevToken.startToken();
668 
669     // Read and print all of the pragma tokens.
670     while (PragmaTok.isNot(tok::eod)) {
671       if (PragmaTok.hasLeadingSpace() ||
672           Callbacks->AvoidConcat(PrevPrevToken, PrevToken, PragmaTok))
673         Callbacks->OS << ' ';
674       std::string TokSpell = PP.getSpelling(PragmaTok);
675       Callbacks->OS.write(&TokSpell[0], TokSpell.size());
676 
677       PrevPrevToken = PrevToken;
678       PrevToken = PragmaTok;
679 
680       if (ShouldExpandTokens)
681         PP.Lex(PragmaTok);
682       else
683         PP.LexUnexpandedToken(PragmaTok);
684     }
685     Callbacks->setEmittedDirectiveOnThisLine();
686   }
687 };
688 } // end anonymous namespace
689 
690 
691 static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
692                                     PrintPPOutputPPCallbacks *Callbacks,
693                                     raw_ostream &OS) {
694   bool DropComments = PP.getLangOpts().TraditionalCPP &&
695                       !PP.getCommentRetentionState();
696 
697   char Buffer[256];
698   Token PrevPrevTok, PrevTok;
699   PrevPrevTok.startToken();
700   PrevTok.startToken();
701   while (1) {
702     if (Callbacks->hasEmittedDirectiveOnThisLine()) {
703       Callbacks->startNewLineIfNeeded();
704       Callbacks->MoveToLine(Tok.getLocation());
705     }
706 
707     // If this token is at the start of a line, emit newlines if needed.
708     if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
709       // done.
710     } else if (Tok.hasLeadingSpace() ||
711                // If we haven't emitted a token on this line yet, PrevTok isn't
712                // useful to look at and no concatenation could happen anyway.
713                (Callbacks->hasEmittedTokensOnThisLine() &&
714                 // Don't print "-" next to "-", it would form "--".
715                 Callbacks->AvoidConcat(PrevPrevTok, PrevTok, Tok))) {
716       OS << ' ';
717     }
718 
719     if (DropComments && Tok.is(tok::comment)) {
720       // Skip comments. Normally the preprocessor does not generate
721       // tok::comment nodes at all when not keeping comments, but under
722       // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
723       SourceLocation StartLoc = Tok.getLocation();
724       Callbacks->MoveToLine(StartLoc.getLocWithOffset(Tok.getLength()));
725     } else if (Tok.is(tok::eod)) {
726       // Don't print end of directive tokens, since they are typically newlines
727       // that mess up our line tracking. These come from unknown pre-processor
728       // directives or hash-prefixed comments in standalone assembly files.
729       PP.Lex(Tok);
730       continue;
731     } else if (Tok.is(tok::annot_module_include)) {
732       // PrintPPOutputPPCallbacks::InclusionDirective handles producing
733       // appropriate output here. Ignore this token entirely.
734       PP.Lex(Tok);
735       continue;
736     } else if (Tok.is(tok::annot_module_begin)) {
737       // FIXME: We retrieve this token after the FileChanged callback, and
738       // retrieve the module_end token before the FileChanged callback, so
739       // we render this within the file and render the module end outside the
740       // file, but this is backwards from the token locations: the module_begin
741       // token is at the include location (outside the file) and the module_end
742       // token is at the EOF location (within the file).
743       Callbacks->BeginModule(
744           reinterpret_cast<Module *>(Tok.getAnnotationValue()));
745       PP.Lex(Tok);
746       continue;
747     } else if (Tok.is(tok::annot_module_end)) {
748       Callbacks->EndModule(
749           reinterpret_cast<Module *>(Tok.getAnnotationValue()));
750       PP.Lex(Tok);
751       continue;
752     } else if (Tok.isAnnotation()) {
753       // Ignore annotation tokens created by pragmas - the pragmas themselves
754       // will be reproduced in the preprocessed output.
755       PP.Lex(Tok);
756       continue;
757     } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
758       OS << II->getName();
759     } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
760                Tok.getLiteralData()) {
761       OS.write(Tok.getLiteralData(), Tok.getLength());
762     } else if (Tok.getLength() < llvm::array_lengthof(Buffer)) {
763       const char *TokPtr = Buffer;
764       unsigned Len = PP.getSpelling(Tok, TokPtr);
765       OS.write(TokPtr, Len);
766 
767       // Tokens that can contain embedded newlines need to adjust our current
768       // line number.
769       if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
770         Callbacks->HandleNewlinesInToken(TokPtr, Len);
771     } else {
772       std::string S = PP.getSpelling(Tok);
773       OS.write(&S[0], S.size());
774 
775       // Tokens that can contain embedded newlines need to adjust our current
776       // line number.
777       if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
778         Callbacks->HandleNewlinesInToken(&S[0], S.size());
779     }
780     Callbacks->setEmittedTokensOnThisLine();
781 
782     if (Tok.is(tok::eof)) break;
783 
784     PrevPrevTok = PrevTok;
785     PrevTok = Tok;
786     PP.Lex(Tok);
787   }
788 }
789 
790 typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair;
791 static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS) {
792   return LHS->first->getName().compare(RHS->first->getName());
793 }
794 
795 static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) {
796   // Ignore unknown pragmas.
797   PP.IgnorePragmas();
798 
799   // -dM mode just scans and ignores all tokens in the files, then dumps out
800   // the macro table at the end.
801   PP.EnterMainSourceFile();
802 
803   Token Tok;
804   do PP.Lex(Tok);
805   while (Tok.isNot(tok::eof));
806 
807   SmallVector<id_macro_pair, 128> MacrosByID;
808   for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
809        I != E; ++I) {
810     auto *MD = I->second.getLatest();
811     if (MD && MD->isDefined())
812       MacrosByID.push_back(id_macro_pair(I->first, MD->getMacroInfo()));
813   }
814   llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
815 
816   for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
817     MacroInfo &MI = *MacrosByID[i].second;
818     // Ignore computed macros like __LINE__ and friends.
819     if (MI.isBuiltinMacro()) continue;
820 
821     PrintMacroDefinition(*MacrosByID[i].first, MI, PP, *OS);
822     *OS << '\n';
823   }
824 }
825 
826 /// DoPrintPreprocessedInput - This implements -E mode.
827 ///
828 void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
829                                      const PreprocessorOutputOptions &Opts) {
830   // Show macros with no output is handled specially.
831   if (!Opts.ShowCPP) {
832     assert(Opts.ShowMacros && "Not yet implemented!");
833     DoPrintMacros(PP, OS);
834     return;
835   }
836 
837   // Inform the preprocessor whether we want it to retain comments or not, due
838   // to -C or -CC.
839   PP.SetCommentRetentionState(Opts.ShowComments, Opts.ShowMacroComments);
840 
841   PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
842       PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
843       Opts.ShowIncludeDirectives, Opts.UseLineDirectives);
844 
845   // Expand macros in pragmas with -fms-extensions.  The assumption is that
846   // the majority of pragmas in such a file will be Microsoft pragmas.
847   // Remember the handlers we will add so that we can remove them later.
848   std::unique_ptr<UnknownPragmaHandler> MicrosoftExtHandler(
849       new UnknownPragmaHandler(
850           "#pragma", Callbacks,
851           /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
852 
853   std::unique_ptr<UnknownPragmaHandler> GCCHandler(new UnknownPragmaHandler(
854       "#pragma GCC", Callbacks,
855       /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
856 
857   std::unique_ptr<UnknownPragmaHandler> ClangHandler(new UnknownPragmaHandler(
858       "#pragma clang", Callbacks,
859       /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
860 
861   PP.AddPragmaHandler(MicrosoftExtHandler.get());
862   PP.AddPragmaHandler("GCC", GCCHandler.get());
863   PP.AddPragmaHandler("clang", ClangHandler.get());
864 
865   // The tokens after pragma omp need to be expanded.
866   //
867   //  OpenMP [2.1, Directive format]
868   //  Preprocessing tokens following the #pragma omp are subject to macro
869   //  replacement.
870   std::unique_ptr<UnknownPragmaHandler> OpenMPHandler(
871       new UnknownPragmaHandler("#pragma omp", Callbacks,
872                                /*RequireTokenExpansion=*/true));
873   PP.AddPragmaHandler("omp", OpenMPHandler.get());
874 
875   PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks));
876 
877   // After we have configured the preprocessor, enter the main file.
878   PP.EnterMainSourceFile();
879 
880   // Consume all of the tokens that come from the predefines buffer.  Those
881   // should not be emitted into the output and are guaranteed to be at the
882   // start.
883   const SourceManager &SourceMgr = PP.getSourceManager();
884   Token Tok;
885   do {
886     PP.Lex(Tok);
887     if (Tok.is(tok::eof) || !Tok.getLocation().isFileID())
888       break;
889 
890     PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
891     if (PLoc.isInvalid())
892       break;
893 
894     if (strcmp(PLoc.getFilename(), "<built-in>"))
895       break;
896   } while (true);
897 
898   // Read all the preprocessed tokens, printing them out to the stream.
899   PrintPreprocessedTokens(PP, Tok, Callbacks, *OS);
900   *OS << '\n';
901 
902   // Remove the handlers we just added to leave the preprocessor in a sane state
903   // so that it can be reused (for example by a clang::Parser instance).
904   PP.RemovePragmaHandler(MicrosoftExtHandler.get());
905   PP.RemovePragmaHandler("GCC", GCCHandler.get());
906   PP.RemovePragmaHandler("clang", ClangHandler.get());
907   PP.RemovePragmaHandler("omp", OpenMPHandler.get());
908 }
909