1 //===--- PlistDiagnostics.cpp - Plist Diagnostics for Paths -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the PlistDiagnostics object.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Basic/FileManager.h"
14 #include "clang/Basic/PlistSupport.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Basic/Version.h"
17 #include "clang/CrossTU/CrossTranslationUnit.h"
18 #include "clang/Frontend/ASTUnit.h"
19 #include "clang/Lex/Preprocessor.h"
20 #include "clang/Lex/TokenConcatenation.h"
21 #include "clang/Rewrite/Core/HTMLRewrite.h"
22 #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
23 #include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
24 #include "clang/StaticAnalyzer/Core/IssueHash.h"
25 #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/Support/Casting.h"
30 
31 using namespace clang;
32 using namespace ento;
33 using namespace markup;
34 
35 //===----------------------------------------------------------------------===//
36 // Declarations of helper classes and functions for emitting bug reports in
37 // plist format.
38 //===----------------------------------------------------------------------===//
39 
40 namespace {
41   class PlistDiagnostics : public PathDiagnosticConsumer {
42     const std::string OutputFile;
43     const Preprocessor &PP;
44     const cross_tu::CrossTranslationUnitContext &CTU;
45     AnalyzerOptions &AnOpts;
46     const bool SupportsCrossFileDiagnostics;
47   public:
48     PlistDiagnostics(AnalyzerOptions &AnalyzerOpts, const std::string &prefix,
49                      const Preprocessor &PP,
50                      const cross_tu::CrossTranslationUnitContext &CTU,
51                      bool supportsMultipleFiles);
52 
53     ~PlistDiagnostics() override {}
54 
55     void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
56                               FilesMade *filesMade) override;
57 
58     StringRef getName() const override {
59       return "PlistDiagnostics";
60     }
61 
62     PathGenerationScheme getGenerationScheme() const override {
63       return Extensive;
64     }
65     bool supportsLogicalOpControlFlow() const override { return true; }
66     bool supportsCrossFileDiagnostics() const override {
67       return SupportsCrossFileDiagnostics;
68     }
69   };
70 } // end anonymous namespace
71 
72 namespace {
73 
74 /// A helper class for emitting a single report.
75 class PlistPrinter {
76   const FIDMap& FM;
77   AnalyzerOptions &AnOpts;
78   const Preprocessor &PP;
79   const cross_tu::CrossTranslationUnitContext &CTU;
80   llvm::SmallVector<const PathDiagnosticMacroPiece *, 0> MacroPieces;
81 
82 public:
83   PlistPrinter(const FIDMap& FM, AnalyzerOptions &AnOpts,
84                const Preprocessor &PP,
85                const cross_tu::CrossTranslationUnitContext &CTU)
86     : FM(FM), AnOpts(AnOpts), PP(PP), CTU(CTU) {
87   }
88 
89   void ReportDiag(raw_ostream &o, const PathDiagnosticPiece& P) {
90     ReportPiece(o, P, /*indent*/ 4, /*depth*/ 0, /*includeControlFlow*/ true);
91 
92     // Don't emit a warning about an unused private field.
93     (void)AnOpts;
94   }
95 
96   /// Print the expansions of the collected macro pieces.
97   ///
98   /// Each time ReportDiag is called on a PathDiagnosticMacroPiece (or, if one
99   /// is found through a call piece, etc), it's subpieces are reported, and the
100   /// piece itself is collected. Call this function after the entire bugpath
101   /// was reported.
102   void ReportMacroExpansions(raw_ostream &o, unsigned indent);
103 
104 private:
105   void ReportPiece(raw_ostream &o, const PathDiagnosticPiece &P,
106                    unsigned indent, unsigned depth, bool includeControlFlow,
107                    bool isKeyEvent = false) {
108     switch (P.getKind()) {
109       case PathDiagnosticPiece::ControlFlow:
110         if (includeControlFlow)
111           ReportControlFlow(o, cast<PathDiagnosticControlFlowPiece>(P), indent);
112         break;
113       case PathDiagnosticPiece::Call:
114         ReportCall(o, cast<PathDiagnosticCallPiece>(P), indent,
115                    depth);
116         break;
117       case PathDiagnosticPiece::Event:
118         ReportEvent(o, cast<PathDiagnosticEventPiece>(P), indent, depth,
119                     isKeyEvent);
120         break;
121       case PathDiagnosticPiece::Macro:
122         ReportMacroSubPieces(o, cast<PathDiagnosticMacroPiece>(P), indent,
123                              depth);
124         break;
125       case PathDiagnosticPiece::Note:
126         ReportNote(o, cast<PathDiagnosticNotePiece>(P), indent);
127         break;
128       case PathDiagnosticPiece::PopUp:
129         ReportPopUp(o, cast<PathDiagnosticPopUpPiece>(P), indent);
130         break;
131     }
132   }
133 
134   void EmitRanges(raw_ostream &o, const ArrayRef<SourceRange> Ranges,
135                   unsigned indent);
136   void EmitMessage(raw_ostream &o, StringRef Message, unsigned indent);
137 
138   void ReportControlFlow(raw_ostream &o,
139                          const PathDiagnosticControlFlowPiece& P,
140                          unsigned indent);
141   void ReportEvent(raw_ostream &o, const PathDiagnosticEventPiece& P,
142                    unsigned indent, unsigned depth, bool isKeyEvent = false);
143   void ReportCall(raw_ostream &o, const PathDiagnosticCallPiece &P,
144                   unsigned indent, unsigned depth);
145   void ReportMacroSubPieces(raw_ostream &o, const PathDiagnosticMacroPiece& P,
146                             unsigned indent, unsigned depth);
147   void ReportNote(raw_ostream &o, const PathDiagnosticNotePiece& P,
148                   unsigned indent);
149 
150   void ReportPopUp(raw_ostream &o, const PathDiagnosticPopUpPiece &P,
151                    unsigned indent);
152 };
153 
154 } // end of anonymous namespace
155 
156 namespace {
157 
158 struct ExpansionInfo {
159   std::string MacroName;
160   std::string Expansion;
161   ExpansionInfo(std::string N, std::string E)
162     : MacroName(std::move(N)), Expansion(std::move(E)) {}
163 };
164 
165 } // end of anonymous namespace
166 
167 static void printBugPath(llvm::raw_ostream &o, const FIDMap& FM,
168                          AnalyzerOptions &AnOpts, const Preprocessor &PP,
169                          const cross_tu::CrossTranslationUnitContext &CTU,
170                          const PathPieces &Path);
171 
172 /// Print coverage information to output stream {@code o}.
173 /// May modify the used list of files {@code Fids} by inserting new ones.
174 static void printCoverage(const PathDiagnostic *D,
175                           unsigned InputIndentLevel,
176                           SmallVectorImpl<FileID> &Fids,
177                           FIDMap &FM,
178                           llvm::raw_fd_ostream &o);
179 
180 static ExpansionInfo
181 getExpandedMacro(SourceLocation MacroLoc, const Preprocessor &PP,
182                  const cross_tu::CrossTranslationUnitContext &CTU);
183 
184 //===----------------------------------------------------------------------===//
185 // Methods of PlistPrinter.
186 //===----------------------------------------------------------------------===//
187 
188 void PlistPrinter::EmitRanges(raw_ostream &o,
189                               const ArrayRef<SourceRange> Ranges,
190                               unsigned indent) {
191 
192   if (Ranges.empty())
193     return;
194 
195   Indent(o, indent) << "<key>ranges</key>\n";
196   Indent(o, indent) << "<array>\n";
197   ++indent;
198 
199   const SourceManager &SM = PP.getSourceManager();
200   const LangOptions &LangOpts = PP.getLangOpts();
201 
202   for (auto &R : Ranges)
203     EmitRange(o, SM,
204               Lexer::getAsCharRange(SM.getExpansionRange(R), SM, LangOpts),
205               FM, indent + 1);
206   --indent;
207   Indent(o, indent) << "</array>\n";
208 }
209 
210 void PlistPrinter::EmitMessage(raw_ostream &o, StringRef Message,
211                                unsigned indent) {
212   // Output the text.
213   assert(!Message.empty());
214   Indent(o, indent) << "<key>extended_message</key>\n";
215   Indent(o, indent);
216   EmitString(o, Message) << '\n';
217 
218   // Output the short text.
219   // FIXME: Really use a short string.
220   Indent(o, indent) << "<key>message</key>\n";
221   Indent(o, indent);
222   EmitString(o, Message) << '\n';
223 }
224 
225 void PlistPrinter::ReportControlFlow(raw_ostream &o,
226                                      const PathDiagnosticControlFlowPiece& P,
227                                      unsigned indent) {
228 
229   const SourceManager &SM = PP.getSourceManager();
230   const LangOptions &LangOpts = PP.getLangOpts();
231 
232   Indent(o, indent) << "<dict>\n";
233   ++indent;
234 
235   Indent(o, indent) << "<key>kind</key><string>control</string>\n";
236 
237   // Emit edges.
238   Indent(o, indent) << "<key>edges</key>\n";
239   ++indent;
240   Indent(o, indent) << "<array>\n";
241   ++indent;
242   for (PathDiagnosticControlFlowPiece::const_iterator I=P.begin(), E=P.end();
243        I!=E; ++I) {
244     Indent(o, indent) << "<dict>\n";
245     ++indent;
246 
247     // Make the ranges of the start and end point self-consistent with adjacent edges
248     // by forcing to use only the beginning of the range.  This simplifies the layout
249     // logic for clients.
250     Indent(o, indent) << "<key>start</key>\n";
251     SourceRange StartEdge(
252         SM.getExpansionLoc(I->getStart().asRange().getBegin()));
253     EmitRange(o, SM, Lexer::getAsCharRange(StartEdge, SM, LangOpts), FM,
254               indent + 1);
255 
256     Indent(o, indent) << "<key>end</key>\n";
257     SourceRange EndEdge(SM.getExpansionLoc(I->getEnd().asRange().getBegin()));
258     EmitRange(o, SM, Lexer::getAsCharRange(EndEdge, SM, LangOpts), FM,
259               indent + 1);
260 
261     --indent;
262     Indent(o, indent) << "</dict>\n";
263   }
264   --indent;
265   Indent(o, indent) << "</array>\n";
266   --indent;
267 
268   // Output any helper text.
269   const auto &s = P.getString();
270   if (!s.empty()) {
271     Indent(o, indent) << "<key>alternate</key>";
272     EmitString(o, s) << '\n';
273   }
274 
275   --indent;
276   Indent(o, indent) << "</dict>\n";
277 }
278 
279 void PlistPrinter::ReportEvent(raw_ostream &o, const PathDiagnosticEventPiece& P,
280                                unsigned indent, unsigned depth,
281                                bool isKeyEvent) {
282 
283   const SourceManager &SM = PP.getSourceManager();
284 
285   Indent(o, indent) << "<dict>\n";
286   ++indent;
287 
288   Indent(o, indent) << "<key>kind</key><string>event</string>\n";
289 
290   if (isKeyEvent) {
291     Indent(o, indent) << "<key>key_event</key><true/>\n";
292   }
293 
294   // Output the location.
295   FullSourceLoc L = P.getLocation().asLocation();
296 
297   Indent(o, indent) << "<key>location</key>\n";
298   EmitLocation(o, SM, L, FM, indent);
299 
300   // Output the ranges (if any).
301   ArrayRef<SourceRange> Ranges = P.getRanges();
302   EmitRanges(o, Ranges, indent);
303 
304   // Output the call depth.
305   Indent(o, indent) << "<key>depth</key>";
306   EmitInteger(o, depth) << '\n';
307 
308   // Output the text.
309   EmitMessage(o, P.getString(), indent);
310 
311   // Finish up.
312   --indent;
313   Indent(o, indent); o << "</dict>\n";
314 }
315 
316 void PlistPrinter::ReportCall(raw_ostream &o, const PathDiagnosticCallPiece &P,
317                               unsigned indent,
318                               unsigned depth) {
319 
320   if (auto callEnter = P.getCallEnterEvent())
321     ReportPiece(o, *callEnter, indent, depth, /*includeControlFlow*/ true,
322                 P.isLastInMainSourceFile());
323 
324 
325   ++depth;
326 
327   if (auto callEnterWithinCaller = P.getCallEnterWithinCallerEvent())
328     ReportPiece(o, *callEnterWithinCaller, indent, depth,
329                 /*includeControlFlow*/ true);
330 
331   for (PathPieces::const_iterator I = P.path.begin(), E = P.path.end();I!=E;++I)
332     ReportPiece(o, **I, indent, depth, /*includeControlFlow*/ true);
333 
334   --depth;
335 
336   if (auto callExit = P.getCallExitEvent())
337     ReportPiece(o, *callExit, indent, depth, /*includeControlFlow*/ true);
338 }
339 
340 void PlistPrinter::ReportMacroSubPieces(raw_ostream &o,
341                                         const PathDiagnosticMacroPiece& P,
342                                         unsigned indent, unsigned depth) {
343   MacroPieces.push_back(&P);
344 
345   for (PathPieces::const_iterator I = P.subPieces.begin(),
346                                   E = P.subPieces.end();
347        I != E; ++I) {
348     ReportPiece(o, **I, indent, depth, /*includeControlFlow*/ false);
349   }
350 }
351 
352 void PlistPrinter::ReportMacroExpansions(raw_ostream &o, unsigned indent) {
353 
354   for (const PathDiagnosticMacroPiece *P : MacroPieces) {
355     const SourceManager &SM = PP.getSourceManager();
356     ExpansionInfo EI = getExpandedMacro(P->getLocation().asLocation(), PP, CTU);
357 
358     Indent(o, indent) << "<dict>\n";
359     ++indent;
360 
361     // Output the location.
362     FullSourceLoc L = P->getLocation().asLocation();
363 
364     Indent(o, indent) << "<key>location</key>\n";
365     EmitLocation(o, SM, L, FM, indent);
366 
367     // Output the ranges (if any).
368     ArrayRef<SourceRange> Ranges = P->getRanges();
369     EmitRanges(o, Ranges, indent);
370 
371     // Output the macro name.
372     Indent(o, indent) << "<key>name</key>";
373     EmitString(o, EI.MacroName) << '\n';
374 
375     // Output what it expands into.
376     Indent(o, indent) << "<key>expansion</key>";
377     EmitString(o, EI.Expansion) << '\n';
378 
379     // Finish up.
380     --indent;
381     Indent(o, indent);
382     o << "</dict>\n";
383   }
384 }
385 
386 void PlistPrinter::ReportNote(raw_ostream &o, const PathDiagnosticNotePiece& P,
387                               unsigned indent) {
388 
389   const SourceManager &SM = PP.getSourceManager();
390 
391   Indent(o, indent) << "<dict>\n";
392   ++indent;
393 
394   // Output the location.
395   FullSourceLoc L = P.getLocation().asLocation();
396 
397   Indent(o, indent) << "<key>location</key>\n";
398   EmitLocation(o, SM, L, FM, indent);
399 
400   // Output the ranges (if any).
401   ArrayRef<SourceRange> Ranges = P.getRanges();
402   EmitRanges(o, Ranges, indent);
403 
404   // Output the text.
405   EmitMessage(o, P.getString(), indent);
406 
407   // Finish up.
408   --indent;
409   Indent(o, indent); o << "</dict>\n";
410 }
411 
412 void PlistPrinter::ReportPopUp(raw_ostream &o,
413                                const PathDiagnosticPopUpPiece &P,
414                                unsigned indent) {
415   const SourceManager &SM = PP.getSourceManager();
416 
417   Indent(o, indent) << "<dict>\n";
418   ++indent;
419 
420   Indent(o, indent) << "<key>kind</key><string>pop-up</string>\n";
421 
422   // Output the location.
423   FullSourceLoc L = P.getLocation().asLocation();
424 
425   Indent(o, indent) << "<key>location</key>\n";
426   EmitLocation(o, SM, L, FM, indent);
427 
428   // Output the ranges (if any).
429   ArrayRef<SourceRange> Ranges = P.getRanges();
430   EmitRanges(o, Ranges, indent);
431 
432   // Output the text.
433   EmitMessage(o, P.getString(), indent);
434 
435   // Finish up.
436   --indent;
437   Indent(o, indent) << "</dict>\n";
438 }
439 
440 //===----------------------------------------------------------------------===//
441 // Static function definitions.
442 //===----------------------------------------------------------------------===//
443 
444 /// Print coverage information to output stream {@code o}.
445 /// May modify the used list of files {@code Fids} by inserting new ones.
446 static void printCoverage(const PathDiagnostic *D,
447                           unsigned InputIndentLevel,
448                           SmallVectorImpl<FileID> &Fids,
449                           FIDMap &FM,
450                           llvm::raw_fd_ostream &o) {
451   unsigned IndentLevel = InputIndentLevel;
452 
453   Indent(o, IndentLevel) << "<key>ExecutedLines</key>\n";
454   Indent(o, IndentLevel) << "<dict>\n";
455   IndentLevel++;
456 
457   // Mapping from file IDs to executed lines.
458   const FilesToLineNumsMap &ExecutedLines = D->getExecutedLines();
459   for (auto I = ExecutedLines.begin(), E = ExecutedLines.end(); I != E; ++I) {
460     unsigned FileKey = AddFID(FM, Fids, I->first);
461     Indent(o, IndentLevel) << "<key>" << FileKey << "</key>\n";
462     Indent(o, IndentLevel) << "<array>\n";
463     IndentLevel++;
464     for (unsigned LineNo : I->second) {
465       Indent(o, IndentLevel);
466       EmitInteger(o, LineNo) << "\n";
467     }
468     IndentLevel--;
469     Indent(o, IndentLevel) << "</array>\n";
470   }
471   IndentLevel--;
472   Indent(o, IndentLevel) << "</dict>\n";
473 
474   assert(IndentLevel == InputIndentLevel);
475 }
476 
477 static void printBugPath(llvm::raw_ostream &o, const FIDMap& FM,
478                          AnalyzerOptions &AnOpts, const Preprocessor &PP,
479                          const cross_tu::CrossTranslationUnitContext &CTU,
480                          const PathPieces &Path) {
481   PlistPrinter Printer(FM, AnOpts, PP, CTU);
482   assert(std::is_partitioned(Path.begin(), Path.end(),
483                              [](const PathDiagnosticPieceRef &E) {
484                                return E->getKind() == PathDiagnosticPiece::Note;
485                              }) &&
486          "PathDiagnostic is not partitioned so that notes precede the rest");
487 
488   PathPieces::const_iterator FirstNonNote = std::partition_point(
489       Path.begin(), Path.end(), [](const PathDiagnosticPieceRef &E) {
490         return E->getKind() == PathDiagnosticPiece::Note;
491       });
492 
493   PathPieces::const_iterator I = Path.begin();
494 
495   if (FirstNonNote != Path.begin()) {
496     o << "   <key>notes</key>\n"
497          "   <array>\n";
498 
499     for (; I != FirstNonNote; ++I)
500       Printer.ReportDiag(o, **I);
501 
502     o << "   </array>\n";
503   }
504 
505   o << "   <key>path</key>\n";
506 
507   o << "   <array>\n";
508 
509   for (PathPieces::const_iterator E = Path.end(); I != E; ++I)
510     Printer.ReportDiag(o, **I);
511 
512   o << "   </array>\n";
513 
514   if (!AnOpts.ShouldDisplayMacroExpansions)
515     return;
516 
517   o << "   <key>macro_expansions</key>\n"
518        "   <array>\n";
519   Printer.ReportMacroExpansions(o, /* indent */ 4);
520   o << "   </array>\n";
521 }
522 
523 //===----------------------------------------------------------------------===//
524 // Methods of PlistDiagnostics.
525 //===----------------------------------------------------------------------===//
526 
527 PlistDiagnostics::PlistDiagnostics(
528     AnalyzerOptions &AnalyzerOpts, const std::string &output,
529     const Preprocessor &PP, const cross_tu::CrossTranslationUnitContext &CTU,
530     bool supportsMultipleFiles)
531     : OutputFile(output), PP(PP), CTU(CTU), AnOpts(AnalyzerOpts),
532       SupportsCrossFileDiagnostics(supportsMultipleFiles) {
533   // FIXME: Will be used by a later planned change.
534   (void)this->CTU;
535 }
536 
537 void ento::createPlistDiagnosticConsumer(
538     AnalyzerOptions &AnalyzerOpts, PathDiagnosticConsumers &C,
539     const std::string &s, const Preprocessor &PP,
540     const cross_tu::CrossTranslationUnitContext &CTU) {
541   C.push_back(new PlistDiagnostics(AnalyzerOpts, s, PP, CTU,
542                                    /*supportsMultipleFiles*/ false));
543 }
544 
545 void ento::createPlistMultiFileDiagnosticConsumer(
546     AnalyzerOptions &AnalyzerOpts, PathDiagnosticConsumers &C,
547     const std::string &s, const Preprocessor &PP,
548     const cross_tu::CrossTranslationUnitContext &CTU) {
549   C.push_back(new PlistDiagnostics(AnalyzerOpts, s, PP, CTU,
550                                    /*supportsMultipleFiles*/ true));
551 }
552 void PlistDiagnostics::FlushDiagnosticsImpl(
553                                     std::vector<const PathDiagnostic *> &Diags,
554                                     FilesMade *filesMade) {
555   // Build up a set of FIDs that we use by scanning the locations and
556   // ranges of the diagnostics.
557   FIDMap FM;
558   SmallVector<FileID, 10> Fids;
559   const SourceManager& SM = PP.getSourceManager();
560   const LangOptions &LangOpts = PP.getLangOpts();
561 
562   auto AddPieceFID = [&FM, &Fids, &SM](const PathDiagnosticPiece &Piece) {
563     AddFID(FM, Fids, SM, Piece.getLocation().asLocation());
564     ArrayRef<SourceRange> Ranges = Piece.getRanges();
565     for (const SourceRange &Range : Ranges) {
566       AddFID(FM, Fids, SM, Range.getBegin());
567       AddFID(FM, Fids, SM, Range.getEnd());
568     }
569   };
570 
571   for (const PathDiagnostic *D : Diags) {
572 
573     SmallVector<const PathPieces *, 5> WorkList;
574     WorkList.push_back(&D->path);
575 
576     while (!WorkList.empty()) {
577       const PathPieces &Path = *WorkList.pop_back_val();
578 
579       for (const auto &Iter : Path) {
580         const PathDiagnosticPiece &Piece = *Iter;
581         AddPieceFID(Piece);
582 
583         if (const PathDiagnosticCallPiece *Call =
584                 dyn_cast<PathDiagnosticCallPiece>(&Piece)) {
585           if (auto CallEnterWithin = Call->getCallEnterWithinCallerEvent())
586             AddPieceFID(*CallEnterWithin);
587 
588           if (auto CallEnterEvent = Call->getCallEnterEvent())
589             AddPieceFID(*CallEnterEvent);
590 
591           WorkList.push_back(&Call->path);
592         } else if (const PathDiagnosticMacroPiece *Macro =
593                        dyn_cast<PathDiagnosticMacroPiece>(&Piece)) {
594           WorkList.push_back(&Macro->subPieces);
595         }
596       }
597     }
598   }
599 
600   // Open the file.
601   std::error_code EC;
602   llvm::raw_fd_ostream o(OutputFile, EC, llvm::sys::fs::OF_Text);
603   if (EC) {
604     llvm::errs() << "warning: could not create file: " << EC.message() << '\n';
605     return;
606   }
607 
608   EmitPlistHeader(o);
609 
610   // Write the root object: a <dict> containing...
611   //  - "clang_version", the string representation of clang version
612   //  - "files", an <array> mapping from FIDs to file names
613   //  - "diagnostics", an <array> containing the path diagnostics
614   o << "<dict>\n" <<
615        " <key>clang_version</key>\n";
616   EmitString(o, getClangFullVersion()) << '\n';
617   o << " <key>diagnostics</key>\n"
618        " <array>\n";
619 
620   for (std::vector<const PathDiagnostic*>::iterator DI=Diags.begin(),
621        DE = Diags.end(); DI!=DE; ++DI) {
622 
623     o << "  <dict>\n";
624 
625     const PathDiagnostic *D = *DI;
626     printBugPath(o, FM, AnOpts, PP, CTU, D->path);
627 
628     // Output the bug type and bug category.
629     o << "   <key>description</key>";
630     EmitString(o, D->getShortDescription()) << '\n';
631     o << "   <key>category</key>";
632     EmitString(o, D->getCategory()) << '\n';
633     o << "   <key>type</key>";
634     EmitString(o, D->getBugType()) << '\n';
635     o << "   <key>check_name</key>";
636     EmitString(o, D->getCheckName()) << '\n';
637 
638     o << "   <!-- This hash is experimental and going to change! -->\n";
639     o << "   <key>issue_hash_content_of_line_in_context</key>";
640     PathDiagnosticLocation UPDLoc = D->getUniqueingLoc();
641     FullSourceLoc L(SM.getExpansionLoc(UPDLoc.isValid()
642                                             ? UPDLoc.asLocation()
643                                             : D->getLocation().asLocation()),
644                     SM);
645     const Decl *DeclWithIssue = D->getDeclWithIssue();
646     EmitString(o, GetIssueHash(SM, L, D->getCheckName(), D->getBugType(),
647                                DeclWithIssue, LangOpts))
648         << '\n';
649 
650     // Output information about the semantic context where
651     // the issue occurred.
652     if (const Decl *DeclWithIssue = D->getDeclWithIssue()) {
653       // FIXME: handle blocks, which have no name.
654       if (const NamedDecl *ND = dyn_cast<NamedDecl>(DeclWithIssue)) {
655         StringRef declKind;
656         switch (ND->getKind()) {
657           case Decl::CXXRecord:
658             declKind = "C++ class";
659             break;
660           case Decl::CXXMethod:
661             declKind = "C++ method";
662             break;
663           case Decl::ObjCMethod:
664             declKind = "Objective-C method";
665             break;
666           case Decl::Function:
667             declKind = "function";
668             break;
669           default:
670             break;
671         }
672         if (!declKind.empty()) {
673           const std::string &declName = ND->getDeclName().getAsString();
674           o << "  <key>issue_context_kind</key>";
675           EmitString(o, declKind) << '\n';
676           o << "  <key>issue_context</key>";
677           EmitString(o, declName) << '\n';
678         }
679 
680         // Output the bug hash for issue unique-ing. Currently, it's just an
681         // offset from the beginning of the function.
682         if (const Stmt *Body = DeclWithIssue->getBody()) {
683 
684           // If the bug uniqueing location exists, use it for the hash.
685           // For example, this ensures that two leaks reported on the same line
686           // will have different issue_hashes and that the hash will identify
687           // the leak location even after code is added between the allocation
688           // site and the end of scope (leak report location).
689           if (UPDLoc.isValid()) {
690             FullSourceLoc UFunL(
691                 SM.getExpansionLoc(
692                     D->getUniqueingDecl()->getBody()->getBeginLoc()),
693                 SM);
694             o << "  <key>issue_hash_function_offset</key><string>"
695               << L.getExpansionLineNumber() - UFunL.getExpansionLineNumber()
696               << "</string>\n";
697 
698           // Otherwise, use the location on which the bug is reported.
699           } else {
700             FullSourceLoc FunL(SM.getExpansionLoc(Body->getBeginLoc()), SM);
701             o << "  <key>issue_hash_function_offset</key><string>"
702               << L.getExpansionLineNumber() - FunL.getExpansionLineNumber()
703               << "</string>\n";
704           }
705 
706         }
707       }
708     }
709 
710     // Output the location of the bug.
711     o << "  <key>location</key>\n";
712     EmitLocation(o, SM, D->getLocation().asLocation(), FM, 2);
713 
714     // Output the diagnostic to the sub-diagnostic client, if any.
715     if (!filesMade->empty()) {
716       StringRef lastName;
717       PDFileEntry::ConsumerFiles *files = filesMade->getFiles(*D);
718       if (files) {
719         for (PDFileEntry::ConsumerFiles::const_iterator CI = files->begin(),
720                 CE = files->end(); CI != CE; ++CI) {
721           StringRef newName = CI->first;
722           if (newName != lastName) {
723             if (!lastName.empty()) {
724               o << "  </array>\n";
725             }
726             lastName = newName;
727             o <<  "  <key>" << lastName << "_files</key>\n";
728             o << "  <array>\n";
729           }
730           o << "   <string>" << CI->second << "</string>\n";
731         }
732         o << "  </array>\n";
733       }
734     }
735 
736     printCoverage(D, /*IndentLevel=*/2, Fids, FM, o);
737 
738     // Close up the entry.
739     o << "  </dict>\n";
740   }
741 
742   o << " </array>\n";
743 
744   o << " <key>files</key>\n"
745        " <array>\n";
746   for (FileID FID : Fids)
747     EmitString(o << "  ", SM.getFileEntryForID(FID)->getName()) << '\n';
748   o << " </array>\n";
749 
750   if (llvm::AreStatisticsEnabled() && AnOpts.ShouldSerializeStats) {
751     o << " <key>statistics</key>\n";
752     std::string stats;
753     llvm::raw_string_ostream os(stats);
754     llvm::PrintStatisticsJSON(os);
755     os.flush();
756     EmitString(o, html::EscapeText(stats)) << '\n';
757   }
758 
759   // Finish.
760   o << "</dict>\n</plist>\n";
761 }
762 
763 //===----------------------------------------------------------------------===//
764 // Declarations of helper functions and data structures for expanding macros.
765 //===----------------------------------------------------------------------===//
766 
767 namespace {
768 
769 using ExpArgTokens = llvm::SmallVector<Token, 2>;
770 
771 /// Maps unexpanded macro arguments to expanded arguments. A macro argument may
772 /// need to expanded further when it is nested inside another macro.
773 class MacroArgMap : public std::map<const IdentifierInfo *, ExpArgTokens> {
774 public:
775   void expandFromPrevMacro(const MacroArgMap &Super);
776 };
777 
778 struct MacroNameAndArgs {
779   std::string Name;
780   const MacroInfo *MI = nullptr;
781   MacroArgMap Args;
782 
783   MacroNameAndArgs(std::string N, const MacroInfo *MI, MacroArgMap M)
784     : Name(std::move(N)), MI(MI), Args(std::move(M)) {}
785 };
786 
787 class TokenPrinter {
788   llvm::raw_ostream &OS;
789   const Preprocessor &PP;
790 
791   Token PrevTok, PrevPrevTok;
792   TokenConcatenation ConcatInfo;
793 
794 public:
795   TokenPrinter(llvm::raw_ostream &OS, const Preprocessor &PP)
796     : OS(OS), PP(PP), ConcatInfo(PP) {
797     PrevTok.setKind(tok::unknown);
798     PrevPrevTok.setKind(tok::unknown);
799   }
800 
801   void printToken(const Token &Tok);
802 };
803 
804 } // end of anonymous namespace
805 
806 /// The implementation method of getMacroExpansion: It prints the expansion of
807 /// a macro to \p Printer, and returns with the name of the macro.
808 ///
809 /// Since macros can be nested in one another, this function may call itself
810 /// recursively.
811 ///
812 /// Unfortunately, macro arguments have to expanded manually. To understand why,
813 /// observe the following example:
814 ///
815 ///   #define PRINT(x) print(x)
816 ///   #define DO_SOMETHING(str) PRINT(str)
817 ///
818 ///   DO_SOMETHING("Cute panda cubs.");
819 ///
820 /// As we expand the last line, we'll immediately replace PRINT(str) with
821 /// print(x). The information that both 'str' and 'x' refers to the same string
822 /// is an information we have to forward, hence the argument \p PrevArgs.
823 ///
824 /// To avoid infinite recursion we maintain the already processed tokens in
825 /// a set. This is carried as a parameter through the recursive calls. The set
826 /// is extended with the currently processed token and after processing it, the
827 /// token is removed. If the token is already in the set, then recursion stops:
828 ///
829 /// #define f(y) x
830 /// #define x f(x)
831 static std::string getMacroNameAndPrintExpansion(
832     TokenPrinter &Printer,
833     SourceLocation MacroLoc,
834     const Preprocessor &PP,
835     const MacroArgMap &PrevArgs,
836     llvm::SmallPtrSet<IdentifierInfo *, 8> &AlreadyProcessedTokens);
837 
838 /// Retrieves the name of the macro and what it's arguments expand into
839 /// at \p ExpanLoc.
840 ///
841 /// For example, for the following macro expansion:
842 ///
843 ///   #define SET_TO_NULL(x) x = 0
844 ///   #define NOT_SUSPICIOUS(a) \
845 ///     {                       \
846 ///       int b = 0;            \
847 ///     }                       \
848 ///     SET_TO_NULL(a)
849 ///
850 ///   int *ptr = new int(4);
851 ///   NOT_SUSPICIOUS(&ptr);
852 ///   *ptr = 5;
853 ///
854 /// When \p ExpanLoc references the last line, the macro name "NOT_SUSPICIOUS"
855 /// and the MacroArgMap map { (a, &ptr) } will be returned.
856 ///
857 /// When \p ExpanLoc references "SET_TO_NULL(a)" within the definition of
858 /// "NOT_SUSPICOUS", the macro name "SET_TO_NULL" and the MacroArgMap map
859 /// { (x, a) } will be returned.
860 static MacroNameAndArgs getMacroNameAndArgs(SourceLocation ExpanLoc,
861                                             const Preprocessor &PP);
862 
863 /// Retrieves the ')' token that matches '(' \p It points to.
864 static MacroInfo::tokens_iterator getMatchingRParen(
865     MacroInfo::tokens_iterator It,
866     MacroInfo::tokens_iterator End);
867 
868 /// Retrieves the macro info for \p II refers to at \p Loc. This is important
869 /// because macros can be redefined or undefined.
870 static const MacroInfo *getMacroInfoForLocation(const Preprocessor &PP,
871                                                 const SourceManager &SM,
872                                                 const IdentifierInfo *II,
873                                                 SourceLocation Loc);
874 
875 //===----------------------------------------------------------------------===//
876 // Definitions of helper functions and methods for expanding macros.
877 //===----------------------------------------------------------------------===//
878 
879 static ExpansionInfo
880 getExpandedMacro(SourceLocation MacroLoc, const Preprocessor &PP,
881                  const cross_tu::CrossTranslationUnitContext &CTU) {
882 
883   const Preprocessor *PPToUse = &PP;
884   if (auto LocAndUnit = CTU.getImportedFromSourceLocation(MacroLoc)) {
885     MacroLoc = LocAndUnit->first;
886     PPToUse = &LocAndUnit->second->getPreprocessor();
887   }
888 
889   llvm::SmallString<200> ExpansionBuf;
890   llvm::raw_svector_ostream OS(ExpansionBuf);
891   TokenPrinter Printer(OS, *PPToUse);
892   llvm::SmallPtrSet<IdentifierInfo*, 8> AlreadyProcessedTokens;
893 
894   std::string MacroName = getMacroNameAndPrintExpansion(
895       Printer, MacroLoc, *PPToUse, MacroArgMap{}, AlreadyProcessedTokens);
896   return { MacroName, OS.str() };
897 }
898 
899 static std::string getMacroNameAndPrintExpansion(
900     TokenPrinter &Printer,
901     SourceLocation MacroLoc,
902     const Preprocessor &PP,
903     const MacroArgMap &PrevArgs,
904     llvm::SmallPtrSet<IdentifierInfo *, 8> &AlreadyProcessedTokens) {
905 
906   const SourceManager &SM = PP.getSourceManager();
907 
908   MacroNameAndArgs Info = getMacroNameAndArgs(SM.getExpansionLoc(MacroLoc), PP);
909   IdentifierInfo* IDInfo = PP.getIdentifierInfo(Info.Name);
910 
911   // TODO: If the macro definition contains another symbol then this function is
912   // called recursively. In case this symbol is the one being defined, it will
913   // be an infinite recursion which is stopped by this "if" statement. However,
914   // in this case we don't get the full expansion text in the Plist file. See
915   // the test file where "value" is expanded to "garbage_" instead of
916   // "garbage_value".
917   if (AlreadyProcessedTokens.find(IDInfo) != AlreadyProcessedTokens.end())
918     return Info.Name;
919   AlreadyProcessedTokens.insert(IDInfo);
920 
921   if (!Info.MI)
922     return Info.Name;
923 
924   // Manually expand its arguments from the previous macro.
925   Info.Args.expandFromPrevMacro(PrevArgs);
926 
927   // Iterate over the macro's tokens and stringify them.
928   for (auto It = Info.MI->tokens_begin(), E = Info.MI->tokens_end(); It != E;
929        ++It) {
930     Token T = *It;
931 
932     // If this token is not an identifier, we only need to print it.
933     if (T.isNot(tok::identifier)) {
934       Printer.printToken(T);
935       continue;
936     }
937 
938     const auto *II = T.getIdentifierInfo();
939     assert(II &&
940           "This token is an identifier but has no IdentifierInfo!");
941 
942     // If this token is a macro that should be expanded inside the current
943     // macro.
944     if (getMacroInfoForLocation(PP, SM, II, T.getLocation())) {
945       getMacroNameAndPrintExpansion(Printer, T.getLocation(), PP, Info.Args,
946                                     AlreadyProcessedTokens);
947 
948       // If this is a function-like macro, skip its arguments, as
949       // getExpandedMacro() already printed them. If this is the case, let's
950       // first jump to the '(' token.
951       auto N = std::next(It);
952       if (N != E && N->is(tok::l_paren))
953         It = getMatchingRParen(++It, E);
954       continue;
955     }
956 
957     // If this token is the current macro's argument, we should expand it.
958     auto ArgMapIt = Info.Args.find(II);
959     if (ArgMapIt != Info.Args.end()) {
960       for (MacroInfo::tokens_iterator ArgIt = ArgMapIt->second.begin(),
961                                       ArgEnd = ArgMapIt->second.end();
962            ArgIt != ArgEnd; ++ArgIt) {
963 
964         // These tokens may still be macros, if that is the case, handle it the
965         // same way we did above.
966         const auto *ArgII = ArgIt->getIdentifierInfo();
967         if (!ArgII) {
968           Printer.printToken(*ArgIt);
969           continue;
970         }
971 
972         const auto *MI = PP.getMacroInfo(ArgII);
973         if (!MI) {
974           Printer.printToken(*ArgIt);
975           continue;
976         }
977 
978         getMacroNameAndPrintExpansion(Printer, ArgIt->getLocation(), PP,
979                                       Info.Args, AlreadyProcessedTokens);
980         // Peek the next token if it is a tok::l_paren. This way we can decide
981         // if this is the application or just a reference to a function maxro
982         // symbol:
983         //
984         // #define apply(f) ...
985         // #define func(x) ...
986         // apply(func)
987         // apply(func(42))
988         auto N = std::next(ArgIt);
989         if (N != ArgEnd && N->is(tok::l_paren))
990           ArgIt = getMatchingRParen(++ArgIt, ArgEnd);
991       }
992       continue;
993     }
994 
995     // If control reached here, then this token isn't a macro identifier, nor an
996     // unexpanded macro argument that we need to handle, print it.
997     Printer.printToken(T);
998   }
999 
1000   AlreadyProcessedTokens.erase(IDInfo);
1001 
1002   return Info.Name;
1003 }
1004 
1005 static MacroNameAndArgs getMacroNameAndArgs(SourceLocation ExpanLoc,
1006                                             const Preprocessor &PP) {
1007 
1008   const SourceManager &SM = PP.getSourceManager();
1009   const LangOptions &LangOpts = PP.getLangOpts();
1010 
1011   // First, we create a Lexer to lex *at the expansion location* the tokens
1012   // referring to the macro's name and its arguments.
1013   std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(ExpanLoc);
1014   const llvm::MemoryBuffer *MB = SM.getBuffer(LocInfo.first);
1015   const char *MacroNameTokenPos = MB->getBufferStart() + LocInfo.second;
1016 
1017   Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts,
1018                  MB->getBufferStart(), MacroNameTokenPos, MB->getBufferEnd());
1019 
1020   // Acquire the macro's name.
1021   Token TheTok;
1022   RawLexer.LexFromRawLexer(TheTok);
1023 
1024   std::string MacroName = PP.getSpelling(TheTok);
1025 
1026   const auto *II = PP.getIdentifierInfo(MacroName);
1027   assert(II && "Failed to acquire the IndetifierInfo for the macro!");
1028 
1029   const MacroInfo *MI = getMacroInfoForLocation(PP, SM, II, ExpanLoc);
1030   // assert(MI && "The macro must've been defined at it's expansion location!");
1031   //
1032   // We should always be able to obtain the MacroInfo in a given TU, but if
1033   // we're running the analyzer with CTU, the Preprocessor won't contain the
1034   // directive history (or anything for that matter) from another TU.
1035   // TODO: assert when we're not running with CTU.
1036   if (!MI)
1037     return { MacroName, MI, {} };
1038 
1039   // Acquire the macro's arguments.
1040   //
1041   // The rough idea here is to lex from the first left parentheses to the last
1042   // right parentheses, and map the macro's unexpanded arguments to what they
1043   // will be expanded to. An expanded macro argument may contain several tokens
1044   // (like '3 + 4'), so we'll lex until we find a tok::comma or tok::r_paren, at
1045   // which point we start lexing the next argument or finish.
1046   ArrayRef<const IdentifierInfo *> MacroArgs = MI->params();
1047   if (MacroArgs.empty())
1048     return { MacroName, MI, {} };
1049 
1050   RawLexer.LexFromRawLexer(TheTok);
1051   // When this is a token which expands to another macro function then its
1052   // parentheses are not at its expansion locaiton. For example:
1053   //
1054   // #define foo(x) int bar() { return x; }
1055   // #define apply_zero(f) f(0)
1056   // apply_zero(foo)
1057   //               ^
1058   //               This is not a tok::l_paren, but foo is a function.
1059   if (TheTok.isNot(tok::l_paren))
1060     return { MacroName, MI, {} };
1061 
1062   MacroArgMap Args;
1063 
1064   // When the macro's argument is a function call, like
1065   //   CALL_FN(someFunctionName(param1, param2))
1066   // we will find tok::l_paren, tok::r_paren, and tok::comma that do not divide
1067   // actual macro arguments, or do not represent the macro argument's closing
1068   // parentheses, so we'll count how many parentheses aren't closed yet.
1069   // If ParanthesesDepth
1070   //   * = 0, then there are no more arguments to lex.
1071   //   * = 1, then if we find a tok::comma, we can start lexing the next arg.
1072   //   * > 1, then tok::comma is a part of the current arg.
1073   int ParenthesesDepth = 1;
1074 
1075   // If we encounter __VA_ARGS__, we will lex until the closing tok::r_paren,
1076   // even if we lex a tok::comma and ParanthesesDepth == 1.
1077   const IdentifierInfo *__VA_ARGS__II = PP.getIdentifierInfo("__VA_ARGS__");
1078 
1079   for (const IdentifierInfo *UnexpArgII : MacroArgs) {
1080     MacroArgMap::mapped_type ExpandedArgTokens;
1081 
1082     // One could also simply not supply a single argument to __VA_ARGS__ -- this
1083     // results in a preprocessor warning, but is not an error:
1084     //   #define VARIADIC(ptr, ...) \
1085     //     someVariadicTemplateFunction(__VA_ARGS__)
1086     //
1087     //   int *ptr;
1088     //   VARIADIC(ptr); // Note that there are no commas, this isn't just an
1089     //                  // empty parameter -- there are no parameters for '...'.
1090     // In any other case, ParenthesesDepth mustn't be 0 here.
1091     if (ParenthesesDepth != 0) {
1092 
1093       // Lex the first token of the next macro parameter.
1094       RawLexer.LexFromRawLexer(TheTok);
1095 
1096       while (!(ParenthesesDepth == 1 &&
1097               (UnexpArgII == __VA_ARGS__II ? false : TheTok.is(tok::comma)))) {
1098         assert(TheTok.isNot(tok::eof) &&
1099                "EOF encountered while looking for expanded macro args!");
1100 
1101         if (TheTok.is(tok::l_paren))
1102           ++ParenthesesDepth;
1103 
1104         if (TheTok.is(tok::r_paren))
1105           --ParenthesesDepth;
1106 
1107         if (ParenthesesDepth == 0)
1108           break;
1109 
1110         if (TheTok.is(tok::raw_identifier))
1111           PP.LookUpIdentifierInfo(TheTok);
1112 
1113         ExpandedArgTokens.push_back(TheTok);
1114         RawLexer.LexFromRawLexer(TheTok);
1115       }
1116     } else {
1117       assert(UnexpArgII == __VA_ARGS__II);
1118     }
1119 
1120     Args.emplace(UnexpArgII, std::move(ExpandedArgTokens));
1121   }
1122 
1123   assert(TheTok.is(tok::r_paren) &&
1124          "Expanded macro argument acquisition failed! After the end of the loop"
1125          " this token should be ')'!");
1126 
1127   return { MacroName, MI, Args };
1128 }
1129 
1130 static MacroInfo::tokens_iterator getMatchingRParen(
1131     MacroInfo::tokens_iterator It,
1132     MacroInfo::tokens_iterator End) {
1133 
1134   assert(It->is(tok::l_paren) && "This token should be '('!");
1135 
1136   // Skip until we find the closing ')'.
1137   int ParenthesesDepth = 1;
1138   while (ParenthesesDepth != 0) {
1139     ++It;
1140 
1141     assert(It->isNot(tok::eof) &&
1142            "Encountered EOF while attempting to skip macro arguments!");
1143     assert(It != End &&
1144            "End of the macro definition reached before finding ')'!");
1145 
1146     if (It->is(tok::l_paren))
1147       ++ParenthesesDepth;
1148 
1149     if (It->is(tok::r_paren))
1150       --ParenthesesDepth;
1151   }
1152   return It;
1153 }
1154 
1155 static const MacroInfo *getMacroInfoForLocation(const Preprocessor &PP,
1156                                                 const SourceManager &SM,
1157                                                 const IdentifierInfo *II,
1158                                                 SourceLocation Loc) {
1159 
1160   const MacroDirective *MD = PP.getLocalMacroDirectiveHistory(II);
1161   if (!MD)
1162     return nullptr;
1163 
1164   return MD->findDirectiveAtLoc(Loc, SM).getMacroInfo();
1165 }
1166 
1167 void MacroArgMap::expandFromPrevMacro(const MacroArgMap &Super) {
1168 
1169   for (value_type &Pair : *this) {
1170     ExpArgTokens &CurrExpArgTokens = Pair.second;
1171 
1172     // For each token in the expanded macro argument.
1173     auto It = CurrExpArgTokens.begin();
1174     while (It != CurrExpArgTokens.end()) {
1175       if (It->isNot(tok::identifier)) {
1176         ++It;
1177         continue;
1178       }
1179 
1180       const auto *II = It->getIdentifierInfo();
1181       assert(II);
1182 
1183       // Is this an argument that "Super" expands further?
1184       if (!Super.count(II)) {
1185         ++It;
1186         continue;
1187       }
1188 
1189       const ExpArgTokens &SuperExpArgTokens = Super.at(II);
1190 
1191       It = CurrExpArgTokens.insert(
1192           It, SuperExpArgTokens.begin(), SuperExpArgTokens.end());
1193       std::advance(It, SuperExpArgTokens.size());
1194       It = CurrExpArgTokens.erase(It);
1195     }
1196   }
1197 }
1198 
1199 void TokenPrinter::printToken(const Token &Tok) {
1200   // If this is the first token to be printed, don't print space.
1201   if (PrevTok.isNot(tok::unknown)) {
1202     // If the tokens were already space separated, or if they must be to avoid
1203     // them being implicitly pasted, add a space between them.
1204     if(Tok.hasLeadingSpace() || ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok,
1205                                                        Tok)) {
1206       // AvoidConcat doesn't check for ##, don't print a space around it.
1207       if (PrevTok.isNot(tok::hashhash) && Tok.isNot(tok::hashhash)) {
1208         OS << ' ';
1209       }
1210     }
1211   }
1212 
1213   if (!Tok.isOneOf(tok::hash, tok::hashhash)) {
1214     if (PrevTok.is(tok::hash))
1215       OS << '\"' << PP.getSpelling(Tok) << '\"';
1216     else
1217       OS << PP.getSpelling(Tok);
1218   }
1219 
1220   PrevPrevTok = PrevTok;
1221   PrevTok = Tok;
1222 }
1223