1 //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This code simply runs the preprocessor on the input file and prints out the
10 // result. This is the traditional behavior of the -E option.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "clang/Frontend/Utils.h"
15 #include "clang/Basic/CharInfo.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Frontend/PreprocessorOutputOptions.h"
19 #include "clang/Lex/MacroInfo.h"
20 #include "clang/Lex/PPCallbacks.h"
21 #include "clang/Lex/Pragma.h"
22 #include "clang/Lex/Preprocessor.h"
23 #include "clang/Lex/TokenConcatenation.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cstdio>
30 using namespace clang;
31
32 /// PrintMacroDefinition - Print a macro definition in a form that will be
33 /// properly accepted back as a definition.
PrintMacroDefinition(const IdentifierInfo & II,const MacroInfo & MI,Preprocessor & PP,raw_ostream & OS)34 static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
35 Preprocessor &PP, raw_ostream &OS) {
36 OS << "#define " << II.getName();
37
38 if (MI.isFunctionLike()) {
39 OS << '(';
40 if (!MI.param_empty()) {
41 MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end();
42 for (; AI+1 != E; ++AI) {
43 OS << (*AI)->getName();
44 OS << ',';
45 }
46
47 // Last argument.
48 if ((*AI)->getName() == "__VA_ARGS__")
49 OS << "...";
50 else
51 OS << (*AI)->getName();
52 }
53
54 if (MI.isGNUVarargs())
55 OS << "..."; // #define foo(x...)
56
57 OS << ')';
58 }
59
60 // GCC always emits a space, even if the macro body is empty. However, do not
61 // want to emit two spaces if the first token has a leading space.
62 if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
63 OS << ' ';
64
65 SmallString<128> SpellingBuffer;
66 for (const auto &T : MI.tokens()) {
67 if (T.hasLeadingSpace())
68 OS << ' ';
69
70 OS << PP.getSpelling(T, SpellingBuffer);
71 }
72 }
73
74 //===----------------------------------------------------------------------===//
75 // Preprocessed token printer
76 //===----------------------------------------------------------------------===//
77
78 namespace {
79 class PrintPPOutputPPCallbacks : public PPCallbacks {
80 Preprocessor &PP;
81 SourceManager &SM;
82 TokenConcatenation ConcatInfo;
83 public:
84 raw_ostream &OS;
85 private:
86 unsigned CurLine;
87
88 bool EmittedTokensOnThisLine;
89 bool EmittedDirectiveOnThisLine;
90 SrcMgr::CharacteristicKind FileType;
91 SmallString<512> CurFilename;
92 bool Initialized;
93 bool DisableLineMarkers;
94 bool DumpDefines;
95 bool DumpIncludeDirectives;
96 bool UseLineDirectives;
97 bool IsFirstFileEntered;
98 bool MinimizeWhitespace;
99 bool DirectivesOnly;
100
101 Token PrevTok;
102 Token PrevPrevTok;
103
104 public:
PrintPPOutputPPCallbacks(Preprocessor & pp,raw_ostream & os,bool lineMarkers,bool defines,bool DumpIncludeDirectives,bool UseLineDirectives,bool MinimizeWhitespace,bool DirectivesOnly)105 PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os, bool lineMarkers,
106 bool defines, bool DumpIncludeDirectives,
107 bool UseLineDirectives, bool MinimizeWhitespace,
108 bool DirectivesOnly)
109 : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
110 DisableLineMarkers(lineMarkers), DumpDefines(defines),
111 DumpIncludeDirectives(DumpIncludeDirectives),
112 UseLineDirectives(UseLineDirectives),
113 MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly) {
114 CurLine = 0;
115 CurFilename += "<uninit>";
116 EmittedTokensOnThisLine = false;
117 EmittedDirectiveOnThisLine = false;
118 FileType = SrcMgr::C_User;
119 Initialized = false;
120 IsFirstFileEntered = false;
121
122 PrevTok.startToken();
123 PrevPrevTok.startToken();
124 }
125
isMinimizeWhitespace() const126 bool isMinimizeWhitespace() const { return MinimizeWhitespace; }
127
setEmittedTokensOnThisLine()128 void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
hasEmittedTokensOnThisLine() const129 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
130
setEmittedDirectiveOnThisLine()131 void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; }
hasEmittedDirectiveOnThisLine() const132 bool hasEmittedDirectiveOnThisLine() const {
133 return EmittedDirectiveOnThisLine;
134 }
135
136 /// Ensure that the output stream position is at the beginning of a new line
137 /// and inserts one if it does not. It is intended to ensure that directives
138 /// inserted by the directives not from the input source (such as #line) are
139 /// in the first column. To insert newlines that represent the input, use
140 /// MoveToLine(/*...*/, /*RequireStartOfLine=*/true).
141 void startNewLineIfNeeded();
142
143 void FileChanged(SourceLocation Loc, FileChangeReason Reason,
144 SrcMgr::CharacteristicKind FileType,
145 FileID PrevFID) override;
146 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
147 StringRef FileName, bool IsAngled,
148 CharSourceRange FilenameRange,
149 Optional<FileEntryRef> File, StringRef SearchPath,
150 StringRef RelativePath, const Module *Imported,
151 SrcMgr::CharacteristicKind FileType) override;
152 void Ident(SourceLocation Loc, StringRef str) override;
153 void PragmaMessage(SourceLocation Loc, StringRef Namespace,
154 PragmaMessageKind Kind, StringRef Str) override;
155 void PragmaDebug(SourceLocation Loc, StringRef DebugType) override;
156 void PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) override;
157 void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override;
158 void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
159 diag::Severity Map, StringRef Str) override;
160 void PragmaWarning(SourceLocation Loc, PragmaWarningSpecifier WarningSpec,
161 ArrayRef<int> Ids) override;
162 void PragmaWarningPush(SourceLocation Loc, int Level) override;
163 void PragmaWarningPop(SourceLocation Loc) override;
164 void PragmaExecCharsetPush(SourceLocation Loc, StringRef Str) override;
165 void PragmaExecCharsetPop(SourceLocation Loc) override;
166 void PragmaAssumeNonNullBegin(SourceLocation Loc) override;
167 void PragmaAssumeNonNullEnd(SourceLocation Loc) override;
168
169 /// Insert whitespace before emitting the next token.
170 ///
171 /// @param Tok Next token to be emitted.
172 /// @param RequireSpace Ensure at least one whitespace is emitted. Useful
173 /// if non-tokens have been emitted to the stream.
174 /// @param RequireSameLine Never emit newlines. Useful when semantics depend
175 /// on being on the same line, such as directives.
176 void HandleWhitespaceBeforeTok(const Token &Tok, bool RequireSpace,
177 bool RequireSameLine);
178
179 /// Move to the line of the provided source location. This will
180 /// return true if a newline was inserted or if
181 /// the requested location is the first token on the first line.
182 /// In these cases the next output will be the first column on the line and
183 /// make it possible to insert indention. The newline was inserted
184 /// implicitly when at the beginning of the file.
185 ///
186 /// @param Tok Token where to move to.
187 /// @param RequireStartOfLine Whether the next line depends on being in the
188 /// first column, such as a directive.
189 ///
190 /// @return Whether column adjustments are necessary.
MoveToLine(const Token & Tok,bool RequireStartOfLine)191 bool MoveToLine(const Token &Tok, bool RequireStartOfLine) {
192 PresumedLoc PLoc = SM.getPresumedLoc(Tok.getLocation());
193 unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine;
194 bool IsFirstInFile =
195 Tok.isAtStartOfLine() && PLoc.isValid() && PLoc.getLine() == 1;
196 return MoveToLine(TargetLine, RequireStartOfLine) || IsFirstInFile;
197 }
198
199 /// Move to the line of the provided source location. Returns true if a new
200 /// line was inserted.
MoveToLine(SourceLocation Loc,bool RequireStartOfLine)201 bool MoveToLine(SourceLocation Loc, bool RequireStartOfLine) {
202 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
203 unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine;
204 return MoveToLine(TargetLine, RequireStartOfLine);
205 }
206 bool MoveToLine(unsigned LineNo, bool RequireStartOfLine);
207
AvoidConcat(const Token & PrevPrevTok,const Token & PrevTok,const Token & Tok)208 bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
209 const Token &Tok) {
210 return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok);
211 }
212 void WriteLineInfo(unsigned LineNo, const char *Extra=nullptr,
213 unsigned ExtraLen=0);
LineMarkersAreDisabled() const214 bool LineMarkersAreDisabled() const { return DisableLineMarkers; }
215 void HandleNewlinesInToken(const char *TokStr, unsigned Len);
216
217 /// MacroDefined - This hook is called whenever a macro definition is seen.
218 void MacroDefined(const Token &MacroNameTok,
219 const MacroDirective *MD) override;
220
221 /// MacroUndefined - This hook is called whenever a macro #undef is seen.
222 void MacroUndefined(const Token &MacroNameTok,
223 const MacroDefinition &MD,
224 const MacroDirective *Undef) override;
225
226 void BeginModule(const Module *M);
227 void EndModule(const Module *M);
228 };
229 } // end anonymous namespace
230
WriteLineInfo(unsigned LineNo,const char * Extra,unsigned ExtraLen)231 void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
232 const char *Extra,
233 unsigned ExtraLen) {
234 startNewLineIfNeeded();
235
236 // Emit #line directives or GNU line markers depending on what mode we're in.
237 if (UseLineDirectives) {
238 OS << "#line" << ' ' << LineNo << ' ' << '"';
239 OS.write_escaped(CurFilename);
240 OS << '"';
241 } else {
242 OS << '#' << ' ' << LineNo << ' ' << '"';
243 OS.write_escaped(CurFilename);
244 OS << '"';
245
246 if (ExtraLen)
247 OS.write(Extra, ExtraLen);
248
249 if (FileType == SrcMgr::C_System)
250 OS.write(" 3", 2);
251 else if (FileType == SrcMgr::C_ExternCSystem)
252 OS.write(" 3 4", 4);
253 }
254 OS << '\n';
255 }
256
257 /// MoveToLine - Move the output to the source line specified by the location
258 /// object. We can do this by emitting some number of \n's, or be emitting a
259 /// #line directive. This returns false if already at the specified line, true
260 /// if some newlines were emitted.
MoveToLine(unsigned LineNo,bool RequireStartOfLine)261 bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo,
262 bool RequireStartOfLine) {
263 // If it is required to start a new line or finish the current, insert
264 // vertical whitespace now and take it into account when moving to the
265 // expected line.
266 bool StartedNewLine = false;
267 if ((RequireStartOfLine && EmittedTokensOnThisLine) ||
268 EmittedDirectiveOnThisLine) {
269 OS << '\n';
270 StartedNewLine = true;
271 CurLine += 1;
272 EmittedTokensOnThisLine = false;
273 EmittedDirectiveOnThisLine = false;
274 }
275
276 // If this line is "close enough" to the original line, just print newlines,
277 // otherwise print a #line directive.
278 if (CurLine == LineNo) {
279 // Nothing to do if we are already on the correct line.
280 } else if (MinimizeWhitespace && DisableLineMarkers) {
281 // With -E -P -fminimize-whitespace, don't emit anything if not necessary.
282 } else if (!StartedNewLine && LineNo - CurLine == 1) {
283 // Printing a single line has priority over printing a #line directive, even
284 // when minimizing whitespace which otherwise would print #line directives
285 // for every single line.
286 OS << '\n';
287 StartedNewLine = true;
288 } else if (!DisableLineMarkers) {
289 if (LineNo - CurLine <= 8) {
290 const char *NewLines = "\n\n\n\n\n\n\n\n";
291 OS.write(NewLines, LineNo - CurLine);
292 } else {
293 // Emit a #line or line marker.
294 WriteLineInfo(LineNo, nullptr, 0);
295 }
296 StartedNewLine = true;
297 } else if (EmittedTokensOnThisLine) {
298 // If we are not on the correct line and don't need to be line-correct,
299 // at least ensure we start on a new line.
300 OS << '\n';
301 StartedNewLine = true;
302 }
303
304 if (StartedNewLine) {
305 EmittedTokensOnThisLine = false;
306 EmittedDirectiveOnThisLine = false;
307 }
308
309 CurLine = LineNo;
310 return StartedNewLine;
311 }
312
startNewLineIfNeeded()313 void PrintPPOutputPPCallbacks::startNewLineIfNeeded() {
314 if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
315 OS << '\n';
316 EmittedTokensOnThisLine = false;
317 EmittedDirectiveOnThisLine = false;
318 }
319 }
320
321 /// FileChanged - Whenever the preprocessor enters or exits a #include file
322 /// it invokes this handler. Update our conception of the current source
323 /// position.
FileChanged(SourceLocation Loc,FileChangeReason Reason,SrcMgr::CharacteristicKind NewFileType,FileID PrevFID)324 void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
325 FileChangeReason Reason,
326 SrcMgr::CharacteristicKind NewFileType,
327 FileID PrevFID) {
328 // Unless we are exiting a #include, make sure to skip ahead to the line the
329 // #include directive was at.
330 SourceManager &SourceMgr = SM;
331
332 PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc);
333 if (UserLoc.isInvalid())
334 return;
335
336 unsigned NewLine = UserLoc.getLine();
337
338 if (Reason == PPCallbacks::EnterFile) {
339 SourceLocation IncludeLoc = UserLoc.getIncludeLoc();
340 if (IncludeLoc.isValid())
341 MoveToLine(IncludeLoc, /*RequireStartOfLine=*/false);
342 } else if (Reason == PPCallbacks::SystemHeaderPragma) {
343 // GCC emits the # directive for this directive on the line AFTER the
344 // directive and emits a bunch of spaces that aren't needed. This is because
345 // otherwise we will emit a line marker for THIS line, which requires an
346 // extra blank line after the directive to avoid making all following lines
347 // off by one. We can do better by simply incrementing NewLine here.
348 NewLine += 1;
349 }
350
351 CurLine = NewLine;
352
353 CurFilename.clear();
354 CurFilename += UserLoc.getFilename();
355 FileType = NewFileType;
356
357 if (DisableLineMarkers) {
358 if (!MinimizeWhitespace)
359 startNewLineIfNeeded();
360 return;
361 }
362
363 if (!Initialized) {
364 WriteLineInfo(CurLine);
365 Initialized = true;
366 }
367
368 // Do not emit an enter marker for the main file (which we expect is the first
369 // entered file). This matches gcc, and improves compatibility with some tools
370 // which track the # line markers as a way to determine when the preprocessed
371 // output is in the context of the main file.
372 if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) {
373 IsFirstFileEntered = true;
374 return;
375 }
376
377 switch (Reason) {
378 case PPCallbacks::EnterFile:
379 WriteLineInfo(CurLine, " 1", 2);
380 break;
381 case PPCallbacks::ExitFile:
382 WriteLineInfo(CurLine, " 2", 2);
383 break;
384 case PPCallbacks::SystemHeaderPragma:
385 case PPCallbacks::RenameFile:
386 WriteLineInfo(CurLine);
387 break;
388 }
389 }
390
InclusionDirective(SourceLocation HashLoc,const Token & IncludeTok,StringRef FileName,bool IsAngled,CharSourceRange FilenameRange,Optional<FileEntryRef> File,StringRef SearchPath,StringRef RelativePath,const Module * Imported,SrcMgr::CharacteristicKind FileType)391 void PrintPPOutputPPCallbacks::InclusionDirective(
392 SourceLocation HashLoc,
393 const Token &IncludeTok,
394 StringRef FileName,
395 bool IsAngled,
396 CharSourceRange FilenameRange,
397 Optional<FileEntryRef> File,
398 StringRef SearchPath,
399 StringRef RelativePath,
400 const Module *Imported,
401 SrcMgr::CharacteristicKind FileType) {
402 // In -dI mode, dump #include directives prior to dumping their content or
403 // interpretation.
404 if (DumpIncludeDirectives) {
405 MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
406 const std::string TokenText = PP.getSpelling(IncludeTok);
407 assert(!TokenText.empty());
408 OS << "#" << TokenText << " "
409 << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
410 << " /* clang -E -dI */";
411 setEmittedDirectiveOnThisLine();
412 }
413
414 // When preprocessing, turn implicit imports into module import pragmas.
415 if (Imported) {
416 switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
417 case tok::pp_include:
418 case tok::pp_import:
419 case tok::pp_include_next:
420 MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
421 OS << "#pragma clang module import " << Imported->getFullModuleName(true)
422 << " /* clang -E: implicit import for "
423 << "#" << PP.getSpelling(IncludeTok) << " "
424 << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
425 << " */";
426 setEmittedDirectiveOnThisLine();
427 break;
428
429 case tok::pp___include_macros:
430 // #__include_macros has no effect on a user of a preprocessed source
431 // file; the only effect is on preprocessing.
432 //
433 // FIXME: That's not *quite* true: it causes the module in question to
434 // be loaded, which can affect downstream diagnostics.
435 break;
436
437 default:
438 llvm_unreachable("unknown include directive kind");
439 break;
440 }
441 }
442 }
443
444 /// Handle entering the scope of a module during a module compilation.
BeginModule(const Module * M)445 void PrintPPOutputPPCallbacks::BeginModule(const Module *M) {
446 startNewLineIfNeeded();
447 OS << "#pragma clang module begin " << M->getFullModuleName(true);
448 setEmittedDirectiveOnThisLine();
449 }
450
451 /// Handle leaving the scope of a module during a module compilation.
EndModule(const Module * M)452 void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
453 startNewLineIfNeeded();
454 OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/";
455 setEmittedDirectiveOnThisLine();
456 }
457
458 /// Ident - Handle #ident directives when read by the preprocessor.
459 ///
Ident(SourceLocation Loc,StringRef S)460 void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
461 MoveToLine(Loc, /*RequireStartOfLine=*/true);
462
463 OS.write("#ident ", strlen("#ident "));
464 OS.write(S.begin(), S.size());
465 setEmittedTokensOnThisLine();
466 }
467
468 /// MacroDefined - This hook is called whenever a macro definition is seen.
MacroDefined(const Token & MacroNameTok,const MacroDirective * MD)469 void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok,
470 const MacroDirective *MD) {
471 const MacroInfo *MI = MD->getMacroInfo();
472 // Print out macro definitions in -dD mode and when we have -fdirectives-only
473 // for C++20 header units.
474 if ((!DumpDefines && !DirectivesOnly) ||
475 // Ignore __FILE__ etc.
476 MI->isBuiltinMacro())
477 return;
478
479 SourceLocation DefLoc = MI->getDefinitionLoc();
480 if (DirectivesOnly && !MI->isUsed()) {
481 SourceManager &SM = PP.getSourceManager();
482 if (SM.isWrittenInBuiltinFile(DefLoc) ||
483 SM.isWrittenInCommandLineFile(DefLoc))
484 return;
485 }
486 MoveToLine(DefLoc, /*RequireStartOfLine=*/true);
487 PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS);
488 setEmittedDirectiveOnThisLine();
489 }
490
MacroUndefined(const Token & MacroNameTok,const MacroDefinition & MD,const MacroDirective * Undef)491 void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
492 const MacroDefinition &MD,
493 const MacroDirective *Undef) {
494 // Print out macro definitions in -dD mode and when we have -fdirectives-only
495 // for C++20 header units.
496 if (!DumpDefines && !DirectivesOnly)
497 return;
498
499 MoveToLine(MacroNameTok.getLocation(), /*RequireStartOfLine=*/true);
500 OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
501 setEmittedDirectiveOnThisLine();
502 }
503
outputPrintable(raw_ostream & OS,StringRef Str)504 static void outputPrintable(raw_ostream &OS, StringRef Str) {
505 for (unsigned char Char : Str) {
506 if (isPrintable(Char) && Char != '\\' && Char != '"')
507 OS << (char)Char;
508 else // Output anything hard as an octal escape.
509 OS << '\\'
510 << (char)('0' + ((Char >> 6) & 7))
511 << (char)('0' + ((Char >> 3) & 7))
512 << (char)('0' + ((Char >> 0) & 7));
513 }
514 }
515
PragmaMessage(SourceLocation Loc,StringRef Namespace,PragmaMessageKind Kind,StringRef Str)516 void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
517 StringRef Namespace,
518 PragmaMessageKind Kind,
519 StringRef Str) {
520 MoveToLine(Loc, /*RequireStartOfLine=*/true);
521 OS << "#pragma ";
522 if (!Namespace.empty())
523 OS << Namespace << ' ';
524 switch (Kind) {
525 case PMK_Message:
526 OS << "message(\"";
527 break;
528 case PMK_Warning:
529 OS << "warning \"";
530 break;
531 case PMK_Error:
532 OS << "error \"";
533 break;
534 }
535
536 outputPrintable(OS, Str);
537 OS << '"';
538 if (Kind == PMK_Message)
539 OS << ')';
540 setEmittedDirectiveOnThisLine();
541 }
542
PragmaDebug(SourceLocation Loc,StringRef DebugType)543 void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
544 StringRef DebugType) {
545 MoveToLine(Loc, /*RequireStartOfLine=*/true);
546
547 OS << "#pragma clang __debug ";
548 OS << DebugType;
549
550 setEmittedDirectiveOnThisLine();
551 }
552
553 void PrintPPOutputPPCallbacks::
PragmaDiagnosticPush(SourceLocation Loc,StringRef Namespace)554 PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
555 MoveToLine(Loc, /*RequireStartOfLine=*/true);
556 OS << "#pragma " << Namespace << " diagnostic push";
557 setEmittedDirectiveOnThisLine();
558 }
559
560 void PrintPPOutputPPCallbacks::
PragmaDiagnosticPop(SourceLocation Loc,StringRef Namespace)561 PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
562 MoveToLine(Loc, /*RequireStartOfLine=*/true);
563 OS << "#pragma " << Namespace << " diagnostic pop";
564 setEmittedDirectiveOnThisLine();
565 }
566
PragmaDiagnostic(SourceLocation Loc,StringRef Namespace,diag::Severity Map,StringRef Str)567 void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
568 StringRef Namespace,
569 diag::Severity Map,
570 StringRef Str) {
571 MoveToLine(Loc, /*RequireStartOfLine=*/true);
572 OS << "#pragma " << Namespace << " diagnostic ";
573 switch (Map) {
574 case diag::Severity::Remark:
575 OS << "remark";
576 break;
577 case diag::Severity::Warning:
578 OS << "warning";
579 break;
580 case diag::Severity::Error:
581 OS << "error";
582 break;
583 case diag::Severity::Ignored:
584 OS << "ignored";
585 break;
586 case diag::Severity::Fatal:
587 OS << "fatal";
588 break;
589 }
590 OS << " \"" << Str << '"';
591 setEmittedDirectiveOnThisLine();
592 }
593
PragmaWarning(SourceLocation Loc,PragmaWarningSpecifier WarningSpec,ArrayRef<int> Ids)594 void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
595 PragmaWarningSpecifier WarningSpec,
596 ArrayRef<int> Ids) {
597 MoveToLine(Loc, /*RequireStartOfLine=*/true);
598
599 OS << "#pragma warning(";
600 switch(WarningSpec) {
601 case PWS_Default: OS << "default"; break;
602 case PWS_Disable: OS << "disable"; break;
603 case PWS_Error: OS << "error"; break;
604 case PWS_Once: OS << "once"; break;
605 case PWS_Suppress: OS << "suppress"; break;
606 case PWS_Level1: OS << '1'; break;
607 case PWS_Level2: OS << '2'; break;
608 case PWS_Level3: OS << '3'; break;
609 case PWS_Level4: OS << '4'; break;
610 }
611 OS << ':';
612
613 for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
614 OS << ' ' << *I;
615 OS << ')';
616 setEmittedDirectiveOnThisLine();
617 }
618
PragmaWarningPush(SourceLocation Loc,int Level)619 void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
620 int Level) {
621 MoveToLine(Loc, /*RequireStartOfLine=*/true);
622 OS << "#pragma warning(push";
623 if (Level >= 0)
624 OS << ", " << Level;
625 OS << ')';
626 setEmittedDirectiveOnThisLine();
627 }
628
PragmaWarningPop(SourceLocation Loc)629 void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) {
630 MoveToLine(Loc, /*RequireStartOfLine=*/true);
631 OS << "#pragma warning(pop)";
632 setEmittedDirectiveOnThisLine();
633 }
634
PragmaExecCharsetPush(SourceLocation Loc,StringRef Str)635 void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc,
636 StringRef Str) {
637 MoveToLine(Loc, /*RequireStartOfLine=*/true);
638 OS << "#pragma character_execution_set(push";
639 if (!Str.empty())
640 OS << ", " << Str;
641 OS << ')';
642 setEmittedDirectiveOnThisLine();
643 }
644
PragmaExecCharsetPop(SourceLocation Loc)645 void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc) {
646 MoveToLine(Loc, /*RequireStartOfLine=*/true);
647 OS << "#pragma character_execution_set(pop)";
648 setEmittedDirectiveOnThisLine();
649 }
650
651 void PrintPPOutputPPCallbacks::
PragmaAssumeNonNullBegin(SourceLocation Loc)652 PragmaAssumeNonNullBegin(SourceLocation Loc) {
653 MoveToLine(Loc, /*RequireStartOfLine=*/true);
654 OS << "#pragma clang assume_nonnull begin";
655 setEmittedDirectiveOnThisLine();
656 }
657
658 void PrintPPOutputPPCallbacks::
PragmaAssumeNonNullEnd(SourceLocation Loc)659 PragmaAssumeNonNullEnd(SourceLocation Loc) {
660 MoveToLine(Loc, /*RequireStartOfLine=*/true);
661 OS << "#pragma clang assume_nonnull end";
662 setEmittedDirectiveOnThisLine();
663 }
664
HandleWhitespaceBeforeTok(const Token & Tok,bool RequireSpace,bool RequireSameLine)665 void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
666 bool RequireSpace,
667 bool RequireSameLine) {
668 // These tokens are not expanded to anything and don't need whitespace before
669 // them.
670 if (Tok.is(tok::eof) ||
671 (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) &&
672 !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end)))
673 return;
674
675 // EmittedDirectiveOnThisLine takes priority over RequireSameLine.
676 if ((!RequireSameLine || EmittedDirectiveOnThisLine) &&
677 MoveToLine(Tok, /*RequireStartOfLine=*/EmittedDirectiveOnThisLine)) {
678 if (MinimizeWhitespace) {
679 // Avoid interpreting hash as a directive under -fpreprocessed.
680 if (Tok.is(tok::hash))
681 OS << ' ';
682 } else {
683 // Print out space characters so that the first token on a line is
684 // indented for easy reading.
685 unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation());
686
687 // The first token on a line can have a column number of 1, yet still
688 // expect leading white space, if a macro expansion in column 1 starts
689 // with an empty macro argument, or an empty nested macro expansion. In
690 // this case, move the token to column 2.
691 if (ColNo == 1 && Tok.hasLeadingSpace())
692 ColNo = 2;
693
694 // This hack prevents stuff like:
695 // #define HASH #
696 // HASH define foo bar
697 // From having the # character end up at column 1, which makes it so it
698 // is not handled as a #define next time through the preprocessor if in
699 // -fpreprocessed mode.
700 if (ColNo <= 1 && Tok.is(tok::hash))
701 OS << ' ';
702
703 // Otherwise, indent the appropriate number of spaces.
704 for (; ColNo > 1; --ColNo)
705 OS << ' ';
706 }
707 } else {
708 // Insert whitespace between the previous and next token if either
709 // - The caller requires it
710 // - The input had whitespace between them and we are not in
711 // whitespace-minimization mode
712 // - The whitespace is necessary to keep the tokens apart and there is not
713 // already a newline between them
714 if (RequireSpace || (!MinimizeWhitespace && Tok.hasLeadingSpace()) ||
715 ((EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) &&
716 AvoidConcat(PrevPrevTok, PrevTok, Tok)))
717 OS << ' ';
718 }
719
720 PrevPrevTok = PrevTok;
721 PrevTok = Tok;
722 }
723
HandleNewlinesInToken(const char * TokStr,unsigned Len)724 void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
725 unsigned Len) {
726 unsigned NumNewlines = 0;
727 for (; Len; --Len, ++TokStr) {
728 if (*TokStr != '\n' &&
729 *TokStr != '\r')
730 continue;
731
732 ++NumNewlines;
733
734 // If we have \n\r or \r\n, skip both and count as one line.
735 if (Len != 1 &&
736 (TokStr[1] == '\n' || TokStr[1] == '\r') &&
737 TokStr[0] != TokStr[1]) {
738 ++TokStr;
739 --Len;
740 }
741 }
742
743 if (NumNewlines == 0) return;
744
745 CurLine += NumNewlines;
746 }
747
748
749 namespace {
750 struct UnknownPragmaHandler : public PragmaHandler {
751 const char *Prefix;
752 PrintPPOutputPPCallbacks *Callbacks;
753
754 // Set to true if tokens should be expanded
755 bool ShouldExpandTokens;
756
UnknownPragmaHandler__anon46d24f460211::UnknownPragmaHandler757 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks,
758 bool RequireTokenExpansion)
759 : Prefix(prefix), Callbacks(callbacks),
760 ShouldExpandTokens(RequireTokenExpansion) {}
HandlePragma__anon46d24f460211::UnknownPragmaHandler761 void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
762 Token &PragmaTok) override {
763 // Figure out what line we went to and insert the appropriate number of
764 // newline characters.
765 Callbacks->MoveToLine(PragmaTok.getLocation(), /*RequireStartOfLine=*/true);
766 Callbacks->OS.write(Prefix, strlen(Prefix));
767 Callbacks->setEmittedTokensOnThisLine();
768
769 if (ShouldExpandTokens) {
770 // The first token does not have expanded macros. Expand them, if
771 // required.
772 auto Toks = std::make_unique<Token[]>(1);
773 Toks[0] = PragmaTok;
774 PP.EnterTokenStream(std::move(Toks), /*NumToks=*/1,
775 /*DisableMacroExpansion=*/false,
776 /*IsReinject=*/false);
777 PP.Lex(PragmaTok);
778 }
779
780 // Read and print all of the pragma tokens.
781 bool IsFirst = true;
782 while (PragmaTok.isNot(tok::eod)) {
783 Callbacks->HandleWhitespaceBeforeTok(PragmaTok, /*RequireSpace=*/IsFirst,
784 /*RequireSameLine=*/true);
785 IsFirst = false;
786 std::string TokSpell = PP.getSpelling(PragmaTok);
787 Callbacks->OS.write(&TokSpell[0], TokSpell.size());
788 Callbacks->setEmittedTokensOnThisLine();
789
790 if (ShouldExpandTokens)
791 PP.Lex(PragmaTok);
792 else
793 PP.LexUnexpandedToken(PragmaTok);
794 }
795 Callbacks->setEmittedDirectiveOnThisLine();
796 }
797 };
798 } // end anonymous namespace
799
800
PrintPreprocessedTokens(Preprocessor & PP,Token & Tok,PrintPPOutputPPCallbacks * Callbacks,raw_ostream & OS)801 static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
802 PrintPPOutputPPCallbacks *Callbacks,
803 raw_ostream &OS) {
804 bool DropComments = PP.getLangOpts().TraditionalCPP &&
805 !PP.getCommentRetentionState();
806
807 bool IsStartOfLine = false;
808 char Buffer[256];
809 while (true) {
810 // Two lines joined with line continuation ('\' as last character on the
811 // line) must be emitted as one line even though Tok.getLine() returns two
812 // different values. In this situation Tok.isAtStartOfLine() is false even
813 // though it may be the first token on the lexical line. When
814 // dropping/skipping a token that is at the start of a line, propagate the
815 // start-of-line-ness to the next token to not append it to the previous
816 // line.
817 IsStartOfLine = IsStartOfLine || Tok.isAtStartOfLine();
818
819 Callbacks->HandleWhitespaceBeforeTok(Tok, /*RequireSpace=*/false,
820 /*RequireSameLine=*/!IsStartOfLine);
821
822 if (DropComments && Tok.is(tok::comment)) {
823 // Skip comments. Normally the preprocessor does not generate
824 // tok::comment nodes at all when not keeping comments, but under
825 // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
826 PP.Lex(Tok);
827 continue;
828 } else if (Tok.is(tok::eod)) {
829 // Don't print end of directive tokens, since they are typically newlines
830 // that mess up our line tracking. These come from unknown pre-processor
831 // directives or hash-prefixed comments in standalone assembly files.
832 PP.Lex(Tok);
833 // FIXME: The token on the next line after #include should have
834 // Tok.isAtStartOfLine() set.
835 IsStartOfLine = true;
836 continue;
837 } else if (Tok.is(tok::annot_module_include)) {
838 // PrintPPOutputPPCallbacks::InclusionDirective handles producing
839 // appropriate output here. Ignore this token entirely.
840 PP.Lex(Tok);
841 IsStartOfLine = true;
842 continue;
843 } else if (Tok.is(tok::annot_module_begin)) {
844 // FIXME: We retrieve this token after the FileChanged callback, and
845 // retrieve the module_end token before the FileChanged callback, so
846 // we render this within the file and render the module end outside the
847 // file, but this is backwards from the token locations: the module_begin
848 // token is at the include location (outside the file) and the module_end
849 // token is at the EOF location (within the file).
850 Callbacks->BeginModule(
851 reinterpret_cast<Module *>(Tok.getAnnotationValue()));
852 PP.Lex(Tok);
853 IsStartOfLine = true;
854 continue;
855 } else if (Tok.is(tok::annot_module_end)) {
856 Callbacks->EndModule(
857 reinterpret_cast<Module *>(Tok.getAnnotationValue()));
858 PP.Lex(Tok);
859 IsStartOfLine = true;
860 continue;
861 } else if (Tok.is(tok::annot_header_unit)) {
862 // This is a header-name that has been (effectively) converted into a
863 // module-name.
864 // FIXME: The module name could contain non-identifier module name
865 // components. We don't have a good way to round-trip those.
866 Module *M = reinterpret_cast<Module *>(Tok.getAnnotationValue());
867 std::string Name = M->getFullModuleName();
868 OS.write(Name.data(), Name.size());
869 Callbacks->HandleNewlinesInToken(Name.data(), Name.size());
870 } else if (Tok.isAnnotation()) {
871 // Ignore annotation tokens created by pragmas - the pragmas themselves
872 // will be reproduced in the preprocessed output.
873 PP.Lex(Tok);
874 continue;
875 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
876 OS << II->getName();
877 } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
878 Tok.getLiteralData()) {
879 OS.write(Tok.getLiteralData(), Tok.getLength());
880 } else if (Tok.getLength() < llvm::array_lengthof(Buffer)) {
881 const char *TokPtr = Buffer;
882 unsigned Len = PP.getSpelling(Tok, TokPtr);
883 OS.write(TokPtr, Len);
884
885 // Tokens that can contain embedded newlines need to adjust our current
886 // line number.
887 // FIXME: The token may end with a newline in which case
888 // setEmittedDirectiveOnThisLine/setEmittedTokensOnThisLine afterwards is
889 // wrong.
890 if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
891 Callbacks->HandleNewlinesInToken(TokPtr, Len);
892 if (Tok.is(tok::comment) && Len >= 2 && TokPtr[0] == '/' &&
893 TokPtr[1] == '/') {
894 // It's a line comment;
895 // Ensure that we don't concatenate anything behind it.
896 Callbacks->setEmittedDirectiveOnThisLine();
897 }
898 } else {
899 std::string S = PP.getSpelling(Tok);
900 OS.write(S.data(), S.size());
901
902 // Tokens that can contain embedded newlines need to adjust our current
903 // line number.
904 if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
905 Callbacks->HandleNewlinesInToken(S.data(), S.size());
906 if (Tok.is(tok::comment) && S.size() >= 2 && S[0] == '/' && S[1] == '/') {
907 // It's a line comment;
908 // Ensure that we don't concatenate anything behind it.
909 Callbacks->setEmittedDirectiveOnThisLine();
910 }
911 }
912 Callbacks->setEmittedTokensOnThisLine();
913 IsStartOfLine = false;
914
915 if (Tok.is(tok::eof)) break;
916
917 PP.Lex(Tok);
918 }
919 }
920
921 typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair;
MacroIDCompare(const id_macro_pair * LHS,const id_macro_pair * RHS)922 static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS) {
923 return LHS->first->getName().compare(RHS->first->getName());
924 }
925
DoPrintMacros(Preprocessor & PP,raw_ostream * OS)926 static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) {
927 // Ignore unknown pragmas.
928 PP.IgnorePragmas();
929
930 // -dM mode just scans and ignores all tokens in the files, then dumps out
931 // the macro table at the end.
932 PP.EnterMainSourceFile();
933
934 Token Tok;
935 do PP.Lex(Tok);
936 while (Tok.isNot(tok::eof));
937
938 SmallVector<id_macro_pair, 128> MacrosByID;
939 for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
940 I != E; ++I) {
941 auto *MD = I->second.getLatest();
942 if (MD && MD->isDefined())
943 MacrosByID.push_back(id_macro_pair(I->first, MD->getMacroInfo()));
944 }
945 llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
946
947 for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
948 MacroInfo &MI = *MacrosByID[i].second;
949 // Ignore computed macros like __LINE__ and friends.
950 if (MI.isBuiltinMacro()) continue;
951
952 PrintMacroDefinition(*MacrosByID[i].first, MI, PP, *OS);
953 *OS << '\n';
954 }
955 }
956
957 /// DoPrintPreprocessedInput - This implements -E mode.
958 ///
DoPrintPreprocessedInput(Preprocessor & PP,raw_ostream * OS,const PreprocessorOutputOptions & Opts)959 void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
960 const PreprocessorOutputOptions &Opts) {
961 // Show macros with no output is handled specially.
962 if (!Opts.ShowCPP) {
963 assert(Opts.ShowMacros && "Not yet implemented!");
964 DoPrintMacros(PP, OS);
965 return;
966 }
967
968 // Inform the preprocessor whether we want it to retain comments or not, due
969 // to -C or -CC.
970 PP.SetCommentRetentionState(Opts.ShowComments, Opts.ShowMacroComments);
971
972 PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
973 PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
974 Opts.ShowIncludeDirectives, Opts.UseLineDirectives,
975 Opts.MinimizeWhitespace, Opts.DirectivesOnly);
976
977 // Expand macros in pragmas with -fms-extensions. The assumption is that
978 // the majority of pragmas in such a file will be Microsoft pragmas.
979 // Remember the handlers we will add so that we can remove them later.
980 std::unique_ptr<UnknownPragmaHandler> MicrosoftExtHandler(
981 new UnknownPragmaHandler(
982 "#pragma", Callbacks,
983 /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
984
985 std::unique_ptr<UnknownPragmaHandler> GCCHandler(new UnknownPragmaHandler(
986 "#pragma GCC", Callbacks,
987 /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
988
989 std::unique_ptr<UnknownPragmaHandler> ClangHandler(new UnknownPragmaHandler(
990 "#pragma clang", Callbacks,
991 /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
992
993 PP.AddPragmaHandler(MicrosoftExtHandler.get());
994 PP.AddPragmaHandler("GCC", GCCHandler.get());
995 PP.AddPragmaHandler("clang", ClangHandler.get());
996
997 // The tokens after pragma omp need to be expanded.
998 //
999 // OpenMP [2.1, Directive format]
1000 // Preprocessing tokens following the #pragma omp are subject to macro
1001 // replacement.
1002 std::unique_ptr<UnknownPragmaHandler> OpenMPHandler(
1003 new UnknownPragmaHandler("#pragma omp", Callbacks,
1004 /*RequireTokenExpansion=*/true));
1005 PP.AddPragmaHandler("omp", OpenMPHandler.get());
1006
1007 PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks));
1008
1009 // After we have configured the preprocessor, enter the main file.
1010 PP.EnterMainSourceFile();
1011 if (Opts.DirectivesOnly)
1012 PP.SetMacroExpansionOnlyInDirectives();
1013
1014 // Consume all of the tokens that come from the predefines buffer. Those
1015 // should not be emitted into the output and are guaranteed to be at the
1016 // start.
1017 const SourceManager &SourceMgr = PP.getSourceManager();
1018 Token Tok;
1019 do {
1020 PP.Lex(Tok);
1021 if (Tok.is(tok::eof) || !Tok.getLocation().isFileID())
1022 break;
1023
1024 PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
1025 if (PLoc.isInvalid())
1026 break;
1027
1028 if (strcmp(PLoc.getFilename(), "<built-in>"))
1029 break;
1030 } while (true);
1031
1032 // Read all the preprocessed tokens, printing them out to the stream.
1033 PrintPreprocessedTokens(PP, Tok, Callbacks, *OS);
1034 *OS << '\n';
1035
1036 // Remove the handlers we just added to leave the preprocessor in a sane state
1037 // so that it can be reused (for example by a clang::Parser instance).
1038 PP.RemovePragmaHandler(MicrosoftExtHandler.get());
1039 PP.RemovePragmaHandler("GCC", GCCHandler.get());
1040 PP.RemovePragmaHandler("clang", ClangHandler.get());
1041 PP.RemovePragmaHandler("omp", OpenMPHandler.get());
1042 }
1043