1 //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This code simply runs the preprocessor on the input file and prints out the
11 // result.  This is the traditional behavior of the -E option.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Frontend/Utils.h"
16 #include "clang/Lex/MacroInfo.h"
17 #include "clang/Lex/PPCallbacks.h"
18 #include "clang/Lex/Preprocessor.h"
19 #include "clang/Lex/Pragma.h"
20 #include "clang/Lex/TokenConcatenation.h"
21 #include "clang/Basic/SourceManager.h"
22 #include "clang/Basic/Diagnostic.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/Config/config.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <cstdio>
28 using namespace clang;
29 
30 /// PrintMacroDefinition - Print a macro definition in a form that will be
31 /// properly accepted back as a definition.
32 static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
33                                  Preprocessor &PP, llvm::raw_ostream &OS) {
34   OS << "#define " << II.getName();
35 
36   if (MI.isFunctionLike()) {
37     OS << '(';
38     if (MI.arg_empty())
39       ;
40     else if (MI.getNumArgs() == 1)
41       OS << (*MI.arg_begin())->getName();
42     else {
43       MacroInfo::arg_iterator AI = MI.arg_begin(), E = MI.arg_end();
44       OS << (*AI++)->getName();
45       while (AI != E)
46         OS << ',' << (*AI++)->getName();
47     }
48 
49     if (MI.isVariadic()) {
50       if (!MI.arg_empty())
51         OS << ',';
52       OS << "...";
53     }
54     OS << ')';
55   }
56 
57   // GCC always emits a space, even if the macro body is empty.  However, do not
58   // want to emit two spaces if the first token has a leading space.
59   if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
60     OS << ' ';
61 
62   llvm::SmallVector<char, 128> SpellingBuffer;
63   for (MacroInfo::tokens_iterator I = MI.tokens_begin(), E = MI.tokens_end();
64        I != E; ++I) {
65     if (I->hasLeadingSpace())
66       OS << ' ';
67 
68     // Make sure we have enough space in the spelling buffer.
69     if (I->getLength() < SpellingBuffer.size())
70       SpellingBuffer.resize(I->getLength());
71     const char *Buffer = SpellingBuffer.data();
72     unsigned SpellingLen = PP.getSpelling(*I, Buffer);
73     OS.write(Buffer, SpellingLen);
74   }
75 }
76 
77 //===----------------------------------------------------------------------===//
78 // Preprocessed token printer
79 //===----------------------------------------------------------------------===//
80 
81 namespace {
82 class PrintPPOutputPPCallbacks : public PPCallbacks {
83   Preprocessor &PP;
84   TokenConcatenation ConcatInfo;
85 public:
86   llvm::raw_ostream &OS;
87 private:
88   unsigned CurLine;
89   bool EmittedTokensOnThisLine;
90   bool EmittedMacroOnThisLine;
91   SrcMgr::CharacteristicKind FileType;
92   llvm::SmallString<512> CurFilename;
93   bool Initialized;
94   bool DisableLineMarkers;
95   bool DumpDefines;
96 public:
97   PrintPPOutputPPCallbacks(Preprocessor &pp, llvm::raw_ostream &os,
98                            bool lineMarkers, bool defines)
99      : PP(pp), ConcatInfo(PP), OS(os), DisableLineMarkers(lineMarkers),
100        DumpDefines(defines) {
101     CurLine = 0;
102     CurFilename += "<uninit>";
103     EmittedTokensOnThisLine = false;
104     EmittedMacroOnThisLine = false;
105     FileType = SrcMgr::C_User;
106     Initialized = false;
107   }
108 
109   void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
110   bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
111 
112   virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
113                            SrcMgr::CharacteristicKind FileType);
114   virtual void Ident(SourceLocation Loc, const std::string &str);
115   virtual void PragmaComment(SourceLocation Loc, const IdentifierInfo *Kind,
116                              const std::string &Str);
117 
118 
119   bool HandleFirstTokOnLine(Token &Tok);
120   bool MoveToLine(SourceLocation Loc);
121   bool AvoidConcat(const Token &PrevTok, const Token &Tok) {
122     return ConcatInfo.AvoidConcat(PrevTok, Tok);
123   }
124   void WriteLineInfo(unsigned LineNo, const char *Extra=0, unsigned ExtraLen=0);
125 
126   void HandleNewlinesInToken(const char *TokStr, unsigned Len);
127 
128   /// MacroDefined - This hook is called whenever a macro definition is seen.
129   void MacroDefined(const IdentifierInfo *II, const MacroInfo *MI);
130 
131 };
132 }  // end anonymous namespace
133 
134 void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
135                                              const char *Extra,
136                                              unsigned ExtraLen) {
137   if (EmittedTokensOnThisLine || EmittedMacroOnThisLine) {
138     OS << '\n';
139     EmittedTokensOnThisLine = false;
140     EmittedMacroOnThisLine = false;
141   }
142 
143   OS << '#' << ' ' << LineNo << ' ' << '"';
144   OS.write(&CurFilename[0], CurFilename.size());
145   OS << '"';
146 
147   if (ExtraLen)
148     OS.write(Extra, ExtraLen);
149 
150   if (FileType == SrcMgr::C_System)
151     OS.write(" 3", 2);
152   else if (FileType == SrcMgr::C_ExternCSystem)
153     OS.write(" 3 4", 4);
154   OS << '\n';
155 }
156 
157 /// MoveToLine - Move the output to the source line specified by the location
158 /// object.  We can do this by emitting some number of \n's, or be emitting a
159 /// #line directive.  This returns false if already at the specified line, true
160 /// if some newlines were emitted.
161 bool PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) {
162   unsigned LineNo = PP.getSourceManager().getInstantiationLineNumber(Loc);
163 
164   if (DisableLineMarkers) {
165     if (LineNo == CurLine) return false;
166 
167     CurLine = LineNo;
168 
169     if (!EmittedTokensOnThisLine && !EmittedMacroOnThisLine)
170       return true;
171 
172     OS << '\n';
173     EmittedTokensOnThisLine = false;
174     EmittedMacroOnThisLine = false;
175     return true;
176   }
177 
178   // If this line is "close enough" to the original line, just print newlines,
179   // otherwise print a #line directive.
180   if (LineNo-CurLine <= 8) {
181     if (LineNo-CurLine == 1)
182       OS << '\n';
183     else if (LineNo == CurLine)
184       return false;    // Spelling line moved, but instantiation line didn't.
185     else {
186       const char *NewLines = "\n\n\n\n\n\n\n\n";
187       OS.write(NewLines, LineNo-CurLine);
188     }
189   } else {
190     WriteLineInfo(LineNo, 0, 0);
191   }
192 
193   CurLine = LineNo;
194   return true;
195 }
196 
197 
198 /// FileChanged - Whenever the preprocessor enters or exits a #include file
199 /// it invokes this handler.  Update our conception of the current source
200 /// position.
201 void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
202                                            FileChangeReason Reason,
203                                        SrcMgr::CharacteristicKind NewFileType) {
204   // Unless we are exiting a #include, make sure to skip ahead to the line the
205   // #include directive was at.
206   SourceManager &SourceMgr = PP.getSourceManager();
207   if (Reason == PPCallbacks::EnterFile) {
208     SourceLocation IncludeLoc = SourceMgr.getPresumedLoc(Loc).getIncludeLoc();
209     if (IncludeLoc.isValid())
210       MoveToLine(IncludeLoc);
211   } else if (Reason == PPCallbacks::SystemHeaderPragma) {
212     MoveToLine(Loc);
213 
214     // TODO GCC emits the # directive for this directive on the line AFTER the
215     // directive and emits a bunch of spaces that aren't needed.  Emulate this
216     // strange behavior.
217   }
218 
219   Loc = SourceMgr.getInstantiationLoc(Loc);
220   // FIXME: Should use presumed line #!
221   CurLine = SourceMgr.getInstantiationLineNumber(Loc);
222 
223   if (DisableLineMarkers) return;
224 
225   CurFilename.clear();
226   CurFilename += SourceMgr.getPresumedLoc(Loc).getFilename();
227   Lexer::Stringify(CurFilename);
228   FileType = NewFileType;
229 
230   if (!Initialized) {
231     WriteLineInfo(CurLine);
232     Initialized = true;
233   }
234 
235   switch (Reason) {
236   case PPCallbacks::EnterFile:
237     WriteLineInfo(CurLine, " 1", 2);
238     break;
239   case PPCallbacks::ExitFile:
240     WriteLineInfo(CurLine, " 2", 2);
241     break;
242   case PPCallbacks::SystemHeaderPragma:
243   case PPCallbacks::RenameFile:
244     WriteLineInfo(CurLine);
245     break;
246   }
247 }
248 
249 /// Ident - Handle #ident directives when read by the preprocessor.
250 ///
251 void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
252   MoveToLine(Loc);
253 
254   OS.write("#ident ", strlen("#ident "));
255   OS.write(&S[0], S.size());
256   EmittedTokensOnThisLine = true;
257 }
258 
259 /// MacroDefined - This hook is called whenever a macro definition is seen.
260 void PrintPPOutputPPCallbacks::MacroDefined(const IdentifierInfo *II,
261                                             const MacroInfo *MI) {
262   // Only print out macro definitions in -dD mode.
263   if (!DumpDefines ||
264       // Ignore __FILE__ etc.
265       MI->isBuiltinMacro()) return;
266 
267   MoveToLine(MI->getDefinitionLoc());
268   PrintMacroDefinition(*II, *MI, PP, OS);
269   EmittedMacroOnThisLine = true;
270 }
271 
272 
273 void PrintPPOutputPPCallbacks::PragmaComment(SourceLocation Loc,
274                                              const IdentifierInfo *Kind,
275                                              const std::string &Str) {
276   MoveToLine(Loc);
277   OS << "#pragma comment(" << Kind->getName();
278 
279   if (!Str.empty()) {
280     OS << ", \"";
281 
282     for (unsigned i = 0, e = Str.size(); i != e; ++i) {
283       unsigned char Char = Str[i];
284       if (isprint(Char) && Char != '\\' && Char != '"')
285         OS << (char)Char;
286       else  // Output anything hard as an octal escape.
287         OS << '\\'
288            << (char)('0'+ ((Char >> 6) & 7))
289            << (char)('0'+ ((Char >> 3) & 7))
290            << (char)('0'+ ((Char >> 0) & 7));
291     }
292     OS << '"';
293   }
294 
295   OS << ')';
296   EmittedTokensOnThisLine = true;
297 }
298 
299 
300 /// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
301 /// is called for the first token on each new line.  If this really is the start
302 /// of a new logical line, handle it and return true, otherwise return false.
303 /// This may not be the start of a logical line because the "start of line"
304 /// marker is set for spelling lines, not instantiation ones.
305 bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
306   // Figure out what line we went to and insert the appropriate number of
307   // newline characters.
308   if (!MoveToLine(Tok.getLocation()))
309     return false;
310 
311   // Print out space characters so that the first token on a line is
312   // indented for easy reading.
313   const SourceManager &SourceMgr = PP.getSourceManager();
314   unsigned ColNo = SourceMgr.getInstantiationColumnNumber(Tok.getLocation());
315 
316   // This hack prevents stuff like:
317   // #define HASH #
318   // HASH define foo bar
319   // From having the # character end up at column 1, which makes it so it
320   // is not handled as a #define next time through the preprocessor if in
321   // -fpreprocessed mode.
322   if (ColNo <= 1 && Tok.is(tok::hash))
323     OS << ' ';
324 
325   // Otherwise, indent the appropriate number of spaces.
326   for (; ColNo > 1; --ColNo)
327     OS << ' ';
328 
329   return true;
330 }
331 
332 void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
333                                                      unsigned Len) {
334   unsigned NumNewlines = 0;
335   for (; Len; --Len, ++TokStr) {
336     if (*TokStr != '\n' &&
337         *TokStr != '\r')
338       continue;
339 
340     ++NumNewlines;
341 
342     // If we have \n\r or \r\n, skip both and count as one line.
343     if (Len != 1 &&
344         (TokStr[1] == '\n' || TokStr[1] == '\r') &&
345         TokStr[0] != TokStr[1])
346       ++TokStr, --Len;
347   }
348 
349   if (NumNewlines == 0) return;
350 
351   CurLine += NumNewlines;
352 }
353 
354 
355 namespace {
356 struct UnknownPragmaHandler : public PragmaHandler {
357   const char *Prefix;
358   PrintPPOutputPPCallbacks *Callbacks;
359 
360   UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
361     : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
362   virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
363     // Figure out what line we went to and insert the appropriate number of
364     // newline characters.
365     Callbacks->MoveToLine(PragmaTok.getLocation());
366     Callbacks->OS.write(Prefix, strlen(Prefix));
367 
368     // Read and print all of the pragma tokens.
369     while (PragmaTok.isNot(tok::eom)) {
370       if (PragmaTok.hasLeadingSpace())
371         Callbacks->OS << ' ';
372       std::string TokSpell = PP.getSpelling(PragmaTok);
373       Callbacks->OS.write(&TokSpell[0], TokSpell.size());
374       PP.LexUnexpandedToken(PragmaTok);
375     }
376     Callbacks->OS << '\n';
377   }
378 };
379 } // end anonymous namespace
380 
381 
382 static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
383                                     PrintPPOutputPPCallbacks *Callbacks,
384                                     llvm::raw_ostream &OS) {
385   char Buffer[256];
386   Token PrevTok;
387   while (1) {
388 
389     // If this token is at the start of a line, emit newlines if needed.
390     if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
391       // done.
392     } else if (Tok.hasLeadingSpace() ||
393                // If we haven't emitted a token on this line yet, PrevTok isn't
394                // useful to look at and no concatenation could happen anyway.
395                (Callbacks->hasEmittedTokensOnThisLine() &&
396                 // Don't print "-" next to "-", it would form "--".
397                 Callbacks->AvoidConcat(PrevTok, Tok))) {
398       OS << ' ';
399     }
400 
401     if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
402       OS.write(II->getName(), II->getLength());
403     } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
404                Tok.getLiteralData()) {
405       OS.write(Tok.getLiteralData(), Tok.getLength());
406     } else if (Tok.getLength() < 256) {
407       const char *TokPtr = Buffer;
408       unsigned Len = PP.getSpelling(Tok, TokPtr);
409       OS.write(TokPtr, Len);
410 
411       // Tokens that can contain embedded newlines need to adjust our current
412       // line number.
413       if (Tok.getKind() == tok::comment)
414         Callbacks->HandleNewlinesInToken(TokPtr, Len);
415     } else {
416       std::string S = PP.getSpelling(Tok);
417       OS.write(&S[0], S.size());
418 
419       // Tokens that can contain embedded newlines need to adjust our current
420       // line number.
421       if (Tok.getKind() == tok::comment)
422         Callbacks->HandleNewlinesInToken(&S[0], S.size());
423     }
424     Callbacks->SetEmittedTokensOnThisLine();
425 
426     if (Tok.is(tok::eof)) break;
427 
428     PrevTok = Tok;
429     PP.Lex(Tok);
430   }
431 }
432 
433 namespace {
434   struct SortMacrosByID {
435     typedef std::pair<IdentifierInfo*, MacroInfo*> id_macro_pair;
436     bool operator()(const id_macro_pair &LHS, const id_macro_pair &RHS) const {
437       return strcmp(LHS.first->getName(), RHS.first->getName()) < 0;
438     }
439   };
440 }
441 
442 void clang::DoPrintMacros(Preprocessor &PP, llvm::raw_ostream *OS) {
443   // -dM mode just scans and ignores all tokens in the files, then dumps out
444   // the macro table at the end.
445   PP.EnterMainSourceFile();
446 
447   Token Tok;
448   do PP.Lex(Tok);
449   while (Tok.isNot(tok::eof));
450 
451   std::vector<std::pair<IdentifierInfo*, MacroInfo*> > MacrosByID;
452   for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
453        I != E; ++I)
454     MacrosByID.push_back(*I);
455   std::sort(MacrosByID.begin(), MacrosByID.end(), SortMacrosByID());
456 
457   for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
458     MacroInfo &MI = *MacrosByID[i].second;
459     // Ignore computed macros like __LINE__ and friends.
460     if (MI.isBuiltinMacro()) continue;
461 
462     PrintMacroDefinition(*MacrosByID[i].first, MI, PP, *OS);
463     *OS << "\n";
464   }
465 }
466 
467 /// DoPrintPreprocessedInput - This implements -E mode.
468 ///
469 void clang::DoPrintPreprocessedInput(Preprocessor &PP, llvm::raw_ostream *OS,
470                                      bool EnableCommentOutput,
471                                      bool EnableMacroCommentOutput,
472                                      bool DisableLineMarkers,
473                                      bool DumpDefines) {
474   // Inform the preprocessor whether we want it to retain comments or not, due
475   // to -C or -CC.
476   PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
477 
478   OS->SetBufferSize(64*1024);
479 
480   PrintPPOutputPPCallbacks *Callbacks =
481       new PrintPPOutputPPCallbacks(PP, *OS, DisableLineMarkers, DumpDefines);
482   PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
483   PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",
484                                                       Callbacks));
485 
486   PP.setPPCallbacks(Callbacks);
487 
488   // After we have configured the preprocessor, enter the main file.
489   PP.EnterMainSourceFile();
490 
491   // Consume all of the tokens that come from the predefines buffer.  Those
492   // should not be emitted into the output and are guaranteed to be at the
493   // start.
494   const SourceManager &SourceMgr = PP.getSourceManager();
495   Token Tok;
496   do PP.Lex(Tok);
497   while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
498          !strcmp(SourceMgr.getPresumedLoc(Tok.getLocation()).getFilename(),
499                  "<built-in>"));
500 
501   // Read all the preprocessed tokens, printing them out to the stream.
502   PrintPreprocessedTokens(PP, Tok, Callbacks, *OS);
503   *OS << '\n';
504 }
505 
506