1 //===--- PlistDiagnostics.cpp - Plist Diagnostics for Paths -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file defines the PlistDiagnostics object.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/FileManager.h"
15 #include "clang/Basic/PlistSupport.h"
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Basic/Version.h"
18 #include "clang/Lex/Preprocessor.h"
19 #include "clang/Rewrite/Core/HTMLRewrite.h"
20 #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
21 #include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
22 #include "clang/StaticAnalyzer/Core/IssueHash.h"
23 #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/Support/Casting.h"
27 using namespace clang;
28 using namespace ento;
29 using namespace markup;
30 
31 namespace {
32   class PlistDiagnostics : public PathDiagnosticConsumer {
33     const std::string OutputFile;
34     const LangOptions &LangOpts;
35     const bool SupportsCrossFileDiagnostics;
36     const bool SerializeStatistics;
37   public:
38     PlistDiagnostics(AnalyzerOptions &AnalyzerOpts,
39                      const std::string& prefix,
40                      const LangOptions &LangOpts,
41                      bool supportsMultipleFiles);
42 
43     ~PlistDiagnostics() override {}
44 
45     void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
46                               FilesMade *filesMade) override;
47 
48     StringRef getName() const override {
49       return "PlistDiagnostics";
50     }
51 
52     PathGenerationScheme getGenerationScheme() const override {
53       return Extensive;
54     }
55     bool supportsLogicalOpControlFlow() const override { return true; }
56     bool supportsCrossFileDiagnostics() const override {
57       return SupportsCrossFileDiagnostics;
58     }
59   };
60 } // end anonymous namespace
61 
62 PlistDiagnostics::PlistDiagnostics(AnalyzerOptions &AnalyzerOpts,
63                                    const std::string& output,
64                                    const LangOptions &LO,
65                                    bool supportsMultipleFiles)
66   : OutputFile(output),
67     LangOpts(LO),
68     SupportsCrossFileDiagnostics(supportsMultipleFiles),
69     SerializeStatistics(AnalyzerOpts.shouldSerializeStats()) {}
70 
71 void ento::createPlistDiagnosticConsumer(AnalyzerOptions &AnalyzerOpts,
72                                          PathDiagnosticConsumers &C,
73                                          const std::string& s,
74                                          const Preprocessor &PP) {
75   C.push_back(new PlistDiagnostics(AnalyzerOpts, s,
76                                    PP.getLangOpts(), false));
77 }
78 
79 void ento::createPlistMultiFileDiagnosticConsumer(AnalyzerOptions &AnalyzerOpts,
80                                                   PathDiagnosticConsumers &C,
81                                                   const std::string &s,
82                                                   const Preprocessor &PP) {
83   C.push_back(new PlistDiagnostics(AnalyzerOpts, s,
84                                    PP.getLangOpts(), true));
85 }
86 
87 static void EmitRanges(raw_ostream &o,
88                        const ArrayRef<SourceRange> Ranges,
89                        const FIDMap& FM,
90                        const SourceManager &SM,
91                        const LangOptions &LangOpts,
92                        unsigned indent) {
93 
94   if (Ranges.empty())
95     return;
96 
97   Indent(o, indent) << "<key>ranges</key>\n";
98   Indent(o, indent) << "<array>\n";
99   ++indent;
100   for (auto &R : Ranges)
101     EmitRange(o, SM,
102               Lexer::getAsCharRange(SM.getExpansionRange(R), SM, LangOpts),
103               FM, indent + 1);
104   --indent;
105   Indent(o, indent) << "</array>\n";
106 }
107 
108 static void EmitMessage(raw_ostream &o, StringRef Message, unsigned indent) {
109   // Output the text.
110   assert(!Message.empty());
111   Indent(o, indent) << "<key>extended_message</key>\n";
112   Indent(o, indent);
113   EmitString(o, Message) << '\n';
114 
115   // Output the short text.
116   // FIXME: Really use a short string.
117   Indent(o, indent) << "<key>message</key>\n";
118   Indent(o, indent);
119   EmitString(o, Message) << '\n';
120 }
121 
122 static void ReportControlFlow(raw_ostream &o,
123                               const PathDiagnosticControlFlowPiece& P,
124                               const FIDMap& FM,
125                               const SourceManager &SM,
126                               const LangOptions &LangOpts,
127                               unsigned indent) {
128 
129   Indent(o, indent) << "<dict>\n";
130   ++indent;
131 
132   Indent(o, indent) << "<key>kind</key><string>control</string>\n";
133 
134   // Emit edges.
135   Indent(o, indent) << "<key>edges</key>\n";
136   ++indent;
137   Indent(o, indent) << "<array>\n";
138   ++indent;
139   for (PathDiagnosticControlFlowPiece::const_iterator I=P.begin(), E=P.end();
140        I!=E; ++I) {
141     Indent(o, indent) << "<dict>\n";
142     ++indent;
143 
144     // Make the ranges of the start and end point self-consistent with adjacent edges
145     // by forcing to use only the beginning of the range.  This simplifies the layout
146     // logic for clients.
147     Indent(o, indent) << "<key>start</key>\n";
148     SourceRange StartEdge(
149         SM.getExpansionLoc(I->getStart().asRange().getBegin()));
150     EmitRange(o, SM, Lexer::getAsCharRange(StartEdge, SM, LangOpts), FM,
151               indent + 1);
152 
153     Indent(o, indent) << "<key>end</key>\n";
154     SourceRange EndEdge(SM.getExpansionLoc(I->getEnd().asRange().getBegin()));
155     EmitRange(o, SM, Lexer::getAsCharRange(EndEdge, SM, LangOpts), FM,
156               indent + 1);
157 
158     --indent;
159     Indent(o, indent) << "</dict>\n";
160   }
161   --indent;
162   Indent(o, indent) << "</array>\n";
163   --indent;
164 
165   // Output any helper text.
166   const auto &s = P.getString();
167   if (!s.empty()) {
168     Indent(o, indent) << "<key>alternate</key>";
169     EmitString(o, s) << '\n';
170   }
171 
172   --indent;
173   Indent(o, indent) << "</dict>\n";
174 }
175 
176 static void ReportEvent(raw_ostream &o, const PathDiagnosticEventPiece& P,
177                         const FIDMap& FM,
178                         const SourceManager &SM,
179                         const LangOptions &LangOpts,
180                         unsigned indent,
181                         unsigned depth,
182                         bool isKeyEvent = false) {
183 
184   Indent(o, indent) << "<dict>\n";
185   ++indent;
186 
187   Indent(o, indent) << "<key>kind</key><string>event</string>\n";
188 
189   if (isKeyEvent) {
190     Indent(o, indent) << "<key>key_event</key><true/>\n";
191   }
192 
193   // Output the location.
194   FullSourceLoc L = P.getLocation().asLocation();
195 
196   Indent(o, indent) << "<key>location</key>\n";
197   EmitLocation(o, SM, L, FM, indent);
198 
199   // Output the ranges (if any).
200   ArrayRef<SourceRange> Ranges = P.getRanges();
201   EmitRanges(o, Ranges, FM, SM, LangOpts, indent);
202 
203   // Output the call depth.
204   Indent(o, indent) << "<key>depth</key>";
205   EmitInteger(o, depth) << '\n';
206 
207   // Output the text.
208   EmitMessage(o, P.getString(), indent);
209 
210   // Finish up.
211   --indent;
212   Indent(o, indent); o << "</dict>\n";
213 }
214 
215 static void ReportPiece(raw_ostream &o,
216                         const PathDiagnosticPiece &P,
217                         const FIDMap& FM, const SourceManager &SM,
218                         const LangOptions &LangOpts,
219                         unsigned indent,
220                         unsigned depth,
221                         bool includeControlFlow,
222                         bool isKeyEvent = false);
223 
224 static void ReportCall(raw_ostream &o,
225                        const PathDiagnosticCallPiece &P,
226                        const FIDMap& FM, const SourceManager &SM,
227                        const LangOptions &LangOpts,
228                        unsigned indent,
229                        unsigned depth) {
230 
231   if (auto callEnter = P.getCallEnterEvent())
232     ReportPiece(o, *callEnter, FM, SM, LangOpts, indent, depth, true,
233                 P.isLastInMainSourceFile());
234 
235 
236   ++depth;
237 
238   if (auto callEnterWithinCaller = P.getCallEnterWithinCallerEvent())
239     ReportPiece(o, *callEnterWithinCaller, FM, SM, LangOpts,
240                 indent, depth, true);
241 
242   for (PathPieces::const_iterator I = P.path.begin(), E = P.path.end();I!=E;++I)
243     ReportPiece(o, **I, FM, SM, LangOpts, indent, depth, true);
244 
245   --depth;
246 
247   if (auto callExit = P.getCallExitEvent())
248     ReportPiece(o, *callExit, FM, SM, LangOpts, indent, depth, true);
249 }
250 
251 static void ReportMacro(raw_ostream &o,
252                         const PathDiagnosticMacroPiece& P,
253                         const FIDMap& FM, const SourceManager &SM,
254                         const LangOptions &LangOpts,
255                         unsigned indent,
256                         unsigned depth) {
257 
258   for (PathPieces::const_iterator I = P.subPieces.begin(), E=P.subPieces.end();
259        I!=E; ++I) {
260     ReportPiece(o, **I, FM, SM, LangOpts, indent, depth, false);
261   }
262 }
263 
264 static void ReportNote(raw_ostream &o, const PathDiagnosticNotePiece& P,
265                         const FIDMap& FM,
266                         const SourceManager &SM,
267                         const LangOptions &LangOpts,
268                         unsigned indent,
269                         unsigned depth) {
270 
271   Indent(o, indent) << "<dict>\n";
272   ++indent;
273 
274   // Output the location.
275   FullSourceLoc L = P.getLocation().asLocation();
276 
277   Indent(o, indent) << "<key>location</key>\n";
278   EmitLocation(o, SM, L, FM, indent);
279 
280   // Output the ranges (if any).
281   ArrayRef<SourceRange> Ranges = P.getRanges();
282   EmitRanges(o, Ranges, FM, SM, LangOpts, indent);
283 
284   // Output the text.
285   EmitMessage(o, P.getString(), indent);
286 
287   // Finish up.
288   --indent;
289   Indent(o, indent); o << "</dict>\n";
290 }
291 
292 static void ReportDiag(raw_ostream &o, const PathDiagnosticPiece& P,
293                        const FIDMap& FM, const SourceManager &SM,
294                        const LangOptions &LangOpts) {
295   ReportPiece(o, P, FM, SM, LangOpts, 4, 0, true);
296 }
297 
298 static void ReportPiece(raw_ostream &o,
299                         const PathDiagnosticPiece &P,
300                         const FIDMap& FM, const SourceManager &SM,
301                         const LangOptions &LangOpts,
302                         unsigned indent,
303                         unsigned depth,
304                         bool includeControlFlow,
305                         bool isKeyEvent) {
306   switch (P.getKind()) {
307     case PathDiagnosticPiece::ControlFlow:
308       if (includeControlFlow)
309         ReportControlFlow(o, cast<PathDiagnosticControlFlowPiece>(P), FM, SM,
310                           LangOpts, indent);
311       break;
312     case PathDiagnosticPiece::Call:
313       ReportCall(o, cast<PathDiagnosticCallPiece>(P), FM, SM, LangOpts,
314                  indent, depth);
315       break;
316     case PathDiagnosticPiece::Event:
317       ReportEvent(o, cast<PathDiagnosticEventPiece>(P), FM, SM, LangOpts,
318                   indent, depth, isKeyEvent);
319       break;
320     case PathDiagnosticPiece::Macro:
321       ReportMacro(o, cast<PathDiagnosticMacroPiece>(P), FM, SM, LangOpts,
322                   indent, depth);
323       break;
324     case PathDiagnosticPiece::Note:
325       ReportNote(o, cast<PathDiagnosticNotePiece>(P), FM, SM, LangOpts,
326                   indent, depth);
327       break;
328   }
329 }
330 
331 /// Print coverage information to output stream {@code o}.
332 /// May modify the used list of files {@code Fids} by inserting new ones.
333 static void printCoverage(const PathDiagnostic *D,
334                           unsigned InputIndentLevel,
335                           SmallVectorImpl<FileID> &Fids,
336                           FIDMap &FM,
337                           llvm::raw_fd_ostream &o) {
338   unsigned IndentLevel = InputIndentLevel;
339 
340   Indent(o, IndentLevel) << "<key>ExecutedLines</key>\n";
341   Indent(o, IndentLevel) << "<dict>\n";
342   IndentLevel++;
343 
344   // Mapping from file IDs to executed lines.
345   const FilesToLineNumsMap &ExecutedLines = D->getExecutedLines();
346   for (auto I = ExecutedLines.begin(), E = ExecutedLines.end(); I != E; ++I) {
347     unsigned FileKey = AddFID(FM, Fids, I->first);
348     Indent(o, IndentLevel) << "<key>" << FileKey << "</key>\n";
349     Indent(o, IndentLevel) << "<array>\n";
350     IndentLevel++;
351     for (unsigned LineNo : I->second) {
352       Indent(o, IndentLevel);
353       EmitInteger(o, LineNo) << "\n";
354     }
355     IndentLevel--;
356     Indent(o, IndentLevel) << "</array>\n";
357   }
358   IndentLevel--;
359   Indent(o, IndentLevel) << "</dict>\n";
360 
361   assert(IndentLevel == InputIndentLevel);
362 }
363 
364 void PlistDiagnostics::FlushDiagnosticsImpl(
365                                     std::vector<const PathDiagnostic *> &Diags,
366                                     FilesMade *filesMade) {
367   // Build up a set of FIDs that we use by scanning the locations and
368   // ranges of the diagnostics.
369   FIDMap FM;
370   SmallVector<FileID, 10> Fids;
371   const SourceManager* SM = nullptr;
372 
373   if (!Diags.empty())
374     SM = &Diags.front()->path.front()->getLocation().getManager();
375 
376   auto AddPieceFID = [&FM, &Fids, SM](const PathDiagnosticPiece &Piece) {
377     AddFID(FM, Fids, *SM, Piece.getLocation().asLocation());
378     ArrayRef<SourceRange> Ranges = Piece.getRanges();
379     for (const SourceRange &Range : Ranges) {
380       AddFID(FM, Fids, *SM, Range.getBegin());
381       AddFID(FM, Fids, *SM, Range.getEnd());
382     }
383   };
384 
385   for (const PathDiagnostic *D : Diags) {
386 
387     SmallVector<const PathPieces *, 5> WorkList;
388     WorkList.push_back(&D->path);
389 
390     while (!WorkList.empty()) {
391       const PathPieces &Path = *WorkList.pop_back_val();
392 
393       for (const auto &Iter : Path) {
394         const PathDiagnosticPiece &Piece = *Iter;
395         AddPieceFID(Piece);
396 
397         if (const PathDiagnosticCallPiece *Call =
398                 dyn_cast<PathDiagnosticCallPiece>(&Piece)) {
399           if (auto CallEnterWithin = Call->getCallEnterWithinCallerEvent())
400             AddPieceFID(*CallEnterWithin);
401 
402           if (auto CallEnterEvent = Call->getCallEnterEvent())
403             AddPieceFID(*CallEnterEvent);
404 
405           WorkList.push_back(&Call->path);
406         } else if (const PathDiagnosticMacroPiece *Macro =
407                        dyn_cast<PathDiagnosticMacroPiece>(&Piece)) {
408           WorkList.push_back(&Macro->subPieces);
409         }
410       }
411     }
412   }
413 
414   // Open the file.
415   std::error_code EC;
416   llvm::raw_fd_ostream o(OutputFile, EC, llvm::sys::fs::F_Text);
417   if (EC) {
418     llvm::errs() << "warning: could not create file: " << EC.message() << '\n';
419     return;
420   }
421 
422   EmitPlistHeader(o);
423 
424   // Write the root object: a <dict> containing...
425   //  - "clang_version", the string representation of clang version
426   //  - "files", an <array> mapping from FIDs to file names
427   //  - "diagnostics", an <array> containing the path diagnostics
428   o << "<dict>\n" <<
429        " <key>clang_version</key>\n";
430   EmitString(o, getClangFullVersion()) << '\n';
431   o << " <key>diagnostics</key>\n"
432        " <array>\n";
433 
434   for (std::vector<const PathDiagnostic*>::iterator DI=Diags.begin(),
435        DE = Diags.end(); DI!=DE; ++DI) {
436 
437     o << "  <dict>\n";
438 
439     const PathDiagnostic *D = *DI;
440     const PathPieces &PP = D->path;
441 
442     assert(std::is_partitioned(
443              PP.begin(), PP.end(),
444              [](const std::shared_ptr<PathDiagnosticPiece> &E)
445                { return E->getKind() == PathDiagnosticPiece::Note; }) &&
446            "PathDiagnostic is not partitioned so that notes precede the rest");
447 
448     PathPieces::const_iterator FirstNonNote = std::partition_point(
449         PP.begin(), PP.end(),
450         [](const std::shared_ptr<PathDiagnosticPiece> &E)
451           { return E->getKind() == PathDiagnosticPiece::Note; });
452 
453     PathPieces::const_iterator I = PP.begin();
454 
455     if (FirstNonNote != PP.begin()) {
456       o << "   <key>notes</key>\n"
457            "   <array>\n";
458 
459       for (; I != FirstNonNote; ++I)
460         ReportDiag(o, **I, FM, *SM, LangOpts);
461 
462       o << "   </array>\n";
463     }
464 
465     o << "   <key>path</key>\n";
466 
467     o << "   <array>\n";
468 
469     for (PathPieces::const_iterator E = PP.end(); I != E; ++I)
470       ReportDiag(o, **I, FM, *SM, LangOpts);
471 
472     o << "   </array>\n";
473 
474     // Output the bug type and bug category.
475     o << "   <key>description</key>";
476     EmitString(o, D->getShortDescription()) << '\n';
477     o << "   <key>category</key>";
478     EmitString(o, D->getCategory()) << '\n';
479     o << "   <key>type</key>";
480     EmitString(o, D->getBugType()) << '\n';
481     o << "   <key>check_name</key>";
482     EmitString(o, D->getCheckName()) << '\n';
483 
484     o << "   <!-- This hash is experimental and going to change! -->\n";
485     o << "   <key>issue_hash_content_of_line_in_context</key>";
486     PathDiagnosticLocation UPDLoc = D->getUniqueingLoc();
487     FullSourceLoc L(SM->getExpansionLoc(UPDLoc.isValid()
488                                             ? UPDLoc.asLocation()
489                                             : D->getLocation().asLocation()),
490                     *SM);
491     const Decl *DeclWithIssue = D->getDeclWithIssue();
492     EmitString(o, GetIssueHash(*SM, L, D->getCheckName(), D->getBugType(),
493                                DeclWithIssue, LangOpts))
494         << '\n';
495 
496     // Output information about the semantic context where
497     // the issue occurred.
498     if (const Decl *DeclWithIssue = D->getDeclWithIssue()) {
499       // FIXME: handle blocks, which have no name.
500       if (const NamedDecl *ND = dyn_cast<NamedDecl>(DeclWithIssue)) {
501         StringRef declKind;
502         switch (ND->getKind()) {
503           case Decl::CXXRecord:
504             declKind = "C++ class";
505             break;
506           case Decl::CXXMethod:
507             declKind = "C++ method";
508             break;
509           case Decl::ObjCMethod:
510             declKind = "Objective-C method";
511             break;
512           case Decl::Function:
513             declKind = "function";
514             break;
515           default:
516             break;
517         }
518         if (!declKind.empty()) {
519           const std::string &declName = ND->getDeclName().getAsString();
520           o << "  <key>issue_context_kind</key>";
521           EmitString(o, declKind) << '\n';
522           o << "  <key>issue_context</key>";
523           EmitString(o, declName) << '\n';
524         }
525 
526         // Output the bug hash for issue unique-ing. Currently, it's just an
527         // offset from the beginning of the function.
528         if (const Stmt *Body = DeclWithIssue->getBody()) {
529 
530           // If the bug uniqueing location exists, use it for the hash.
531           // For example, this ensures that two leaks reported on the same line
532           // will have different issue_hashes and that the hash will identify
533           // the leak location even after code is added between the allocation
534           // site and the end of scope (leak report location).
535           if (UPDLoc.isValid()) {
536             FullSourceLoc UFunL(
537                 SM->getExpansionLoc(
538                     D->getUniqueingDecl()->getBody()->getBeginLoc()),
539                 *SM);
540             o << "  <key>issue_hash_function_offset</key><string>"
541               << L.getExpansionLineNumber() - UFunL.getExpansionLineNumber()
542               << "</string>\n";
543 
544           // Otherwise, use the location on which the bug is reported.
545           } else {
546             FullSourceLoc FunL(SM->getExpansionLoc(Body->getBeginLoc()), *SM);
547             o << "  <key>issue_hash_function_offset</key><string>"
548               << L.getExpansionLineNumber() - FunL.getExpansionLineNumber()
549               << "</string>\n";
550           }
551 
552         }
553       }
554     }
555 
556     // Output the location of the bug.
557     o << "  <key>location</key>\n";
558     EmitLocation(o, *SM, D->getLocation().asLocation(), FM, 2);
559 
560     // Output the diagnostic to the sub-diagnostic client, if any.
561     if (!filesMade->empty()) {
562       StringRef lastName;
563       PDFileEntry::ConsumerFiles *files = filesMade->getFiles(*D);
564       if (files) {
565         for (PDFileEntry::ConsumerFiles::const_iterator CI = files->begin(),
566                 CE = files->end(); CI != CE; ++CI) {
567           StringRef newName = CI->first;
568           if (newName != lastName) {
569             if (!lastName.empty()) {
570               o << "  </array>\n";
571             }
572             lastName = newName;
573             o <<  "  <key>" << lastName << "_files</key>\n";
574             o << "  <array>\n";
575           }
576           o << "   <string>" << CI->second << "</string>\n";
577         }
578         o << "  </array>\n";
579       }
580     }
581 
582     printCoverage(D, /*IndentLevel=*/2, Fids, FM, o);
583 
584     // Close up the entry.
585     o << "  </dict>\n";
586   }
587 
588   o << " </array>\n";
589 
590   o << " <key>files</key>\n"
591        " <array>\n";
592   for (FileID FID : Fids)
593     EmitString(o << "  ", SM->getFileEntryForID(FID)->getName()) << '\n';
594   o << " </array>\n";
595 
596   if (llvm::AreStatisticsEnabled() && SerializeStatistics) {
597     o << " <key>statistics</key>\n";
598     std::string stats;
599     llvm::raw_string_ostream os(stats);
600     llvm::PrintStatisticsJSON(os);
601     os.flush();
602     EmitString(o, html::EscapeText(stats)) << '\n';
603   }
604 
605   // Finish.
606   o << "</dict>\n</plist>";
607 }
608