1 //===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "IncludeCleaner.h"
10 #include "Config.h"
11 #include "Headers.h"
12 #include "ParsedAST.h"
13 #include "Protocol.h"
14 #include "SourceCode.h"
15 #include "support/Logger.h"
16 #include "support/Trace.h"
17 #include "clang/AST/ASTContext.h"
18 #include "clang/AST/ExprCXX.h"
19 #include "clang/AST/RecursiveASTVisitor.h"
20 #include "clang/Basic/SourceLocation.h"
21 #include "clang/Basic/SourceManager.h"
22 #include "clang/Lex/HeaderSearch.h"
23 #include "clang/Lex/Preprocessor.h"
24 #include "clang/Tooling/Syntax/Tokens.h"
25 #include "llvm/ADT/STLFunctionalExtras.h"
26 #include "llvm/Support/FormatVariadic.h"
27 #include "llvm/Support/Path.h"
28 
29 namespace clang {
30 namespace clangd {
31 
32 static bool AnalyzeStdlib = false;
33 void setIncludeCleanerAnalyzesStdlib(bool B) { AnalyzeStdlib = B; }
34 
35 namespace {
36 
37 /// Crawler traverses the AST and feeds in the locations of (sometimes
38 /// implicitly) used symbols into \p Result.
39 class ReferencedLocationCrawler
40     : public RecursiveASTVisitor<ReferencedLocationCrawler> {
41 public:
42   ReferencedLocationCrawler(ReferencedLocations &Result,
43                             const SourceManager &SM)
44       : Result(Result), SM(SM) {}
45 
46   bool VisitDeclRefExpr(DeclRefExpr *DRE) {
47     add(DRE->getDecl());
48     add(DRE->getFoundDecl());
49     return true;
50   }
51 
52   bool VisitMemberExpr(MemberExpr *ME) {
53     add(ME->getMemberDecl());
54     add(ME->getFoundDecl().getDecl());
55     return true;
56   }
57 
58   bool VisitTagType(TagType *TT) {
59     add(TT->getDecl());
60     return true;
61   }
62 
63   bool VisitFunctionDecl(FunctionDecl *FD) {
64     // Function definition will require redeclarations to be included.
65     if (FD->isThisDeclarationADefinition())
66       add(FD);
67     return true;
68   }
69 
70   bool VisitCXXConstructExpr(CXXConstructExpr *CCE) {
71     add(CCE->getConstructor());
72     return true;
73   }
74 
75   bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) {
76     if (isNew(TST)) {
77       add(TST->getTemplateName().getAsTemplateDecl()); // Primary template.
78       add(TST->getAsCXXRecordDecl());                  // Specialization
79     }
80     return true;
81   }
82 
83   bool VisitUsingType(UsingType *UT) {
84     add(UT->getFoundDecl());
85     return true;
86   }
87 
88   bool VisitTypedefType(TypedefType *TT) {
89     add(TT->getDecl());
90     return true;
91   }
92 
93   // Consider types of any subexpression used, even if the type is not named.
94   // This is helpful in getFoo().bar(), where Foo must be complete.
95   // FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to
96   // consider types "used" when they are not directly spelled in code.
97   bool VisitExpr(Expr *E) {
98     TraverseType(E->getType());
99     return true;
100   }
101 
102   bool TraverseType(QualType T) {
103     if (isNew(T.getTypePtrOrNull())) // don't care about quals
104       Base::TraverseType(T);
105     return true;
106   }
107 
108   bool VisitUsingDecl(UsingDecl *D) {
109     for (const auto *Shadow : D->shadows())
110       add(Shadow->getTargetDecl());
111     return true;
112   }
113 
114   // Enums may be usefully forward-declared as *complete* types by specifying
115   // an underlying type. In this case, the definition should see the declaration
116   // so they can be checked for compatibility.
117   bool VisitEnumDecl(EnumDecl *D) {
118     if (D->isThisDeclarationADefinition() && D->getIntegerTypeSourceInfo())
119       add(D);
120     return true;
121   }
122 
123   // When the overload is not resolved yet, mark all candidates as used.
124   bool VisitOverloadExpr(OverloadExpr *E) {
125     for (const auto *ResolutionDecl : E->decls())
126       add(ResolutionDecl);
127     return true;
128   }
129 
130 private:
131   using Base = RecursiveASTVisitor<ReferencedLocationCrawler>;
132 
133   void add(const Decl *D) {
134     if (!D || !isNew(D->getCanonicalDecl()))
135       return;
136     if (auto SS = StdRecognizer(D)) {
137       Result.Stdlib.insert(*SS);
138       return;
139     }
140     // Special case RecordDecls, as it is common for them to be forward
141     // declared multiple times. The most common cases are:
142     // - Definition available in TU, only mark that one as usage. The rest is
143     //   likely to be unnecessary. This might result in false positives when an
144     //   internal definition is visible.
145     // - There's a forward declaration in the main file, no need for other
146     //   redecls.
147     if (const auto *RD = llvm::dyn_cast<RecordDecl>(D)) {
148       if (const auto *Definition = RD->getDefinition()) {
149         Result.User.insert(Definition->getLocation());
150         return;
151       }
152       if (SM.isInMainFile(RD->getMostRecentDecl()->getLocation()))
153         return;
154     }
155     for (const Decl *Redecl : D->redecls())
156       Result.User.insert(Redecl->getLocation());
157   }
158 
159   bool isNew(const void *P) { return P && Visited.insert(P).second; }
160 
161   ReferencedLocations &Result;
162   llvm::DenseSet<const void *> Visited;
163   const SourceManager &SM;
164   tooling::stdlib::Recognizer StdRecognizer;
165 };
166 
167 // Given a set of referenced FileIDs, determines all the potentially-referenced
168 // files and macros by traversing expansion/spelling locations of macro IDs.
169 // This is used to map the referenced SourceLocations onto real files.
170 struct ReferencedFilesBuilder {
171   ReferencedFilesBuilder(const SourceManager &SM) : SM(SM) {}
172   llvm::DenseSet<FileID> Files;
173   llvm::DenseSet<FileID> Macros;
174   const SourceManager &SM;
175 
176   void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); }
177 
178   void add(FileID FID, SourceLocation Loc) {
179     if (FID.isInvalid())
180       return;
181     assert(SM.isInFileID(Loc, FID));
182     if (Loc.isFileID()) {
183       Files.insert(FID);
184       return;
185     }
186     // Don't process the same macro FID twice.
187     if (!Macros.insert(FID).second)
188       return;
189     const auto &Exp = SM.getSLocEntry(FID).getExpansion();
190     add(Exp.getSpellingLoc());
191     add(Exp.getExpansionLocStart());
192     add(Exp.getExpansionLocEnd());
193   }
194 };
195 
196 // Returns the range starting at '#' and ending at EOL. Escaped newlines are not
197 // handled.
198 clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) {
199   clangd::Range Result;
200   Result.end = Result.start = offsetToPosition(Code, HashOffset);
201 
202   // Span the warning until the EOL or EOF.
203   Result.end.character +=
204       lspLength(Code.drop_front(HashOffset).take_until([](char C) {
205         return C == '\n' || C == '\r';
206       }));
207   return Result;
208 }
209 
210 // Finds locations of macros referenced from within the main file. That includes
211 // references that were not yet expanded, e.g `BAR` in `#define FOO BAR`.
212 void findReferencedMacros(const SourceManager &SM, Preprocessor &PP,
213                           const syntax::TokenBuffer *Tokens,
214                           ReferencedLocations &Result) {
215   trace::Span Tracer("IncludeCleaner::findReferencedMacros");
216   // FIXME(kirillbobyrev): The macros from the main file are collected in
217   // ParsedAST's MainFileMacros. However, we can't use it here because it
218   // doesn't handle macro references that were not expanded, e.g. in macro
219   // definitions or preprocessor-disabled sections.
220   //
221   // Extending MainFileMacros to collect missing references and switching to
222   // this mechanism (as opposed to iterating through all tokens) will improve
223   // the performance of findReferencedMacros and also improve other features
224   // relying on MainFileMacros.
225   for (const syntax::Token &Tok : Tokens->spelledTokens(SM.getMainFileID())) {
226     auto Macro = locateMacroAt(Tok, PP);
227     if (!Macro)
228       continue;
229     auto Loc = Macro->Info->getDefinitionLoc();
230     if (Loc.isValid())
231       Result.User.insert(Loc);
232     // FIXME: support stdlib macros
233   }
234 }
235 
236 static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) {
237   if (Inc.BehindPragmaKeep)
238     return false;
239 
240   // FIXME(kirillbobyrev): We currently do not support the umbrella headers.
241   // System headers are likely to be standard library headers.
242   // Until we have good support for umbrella headers, don't warn about them.
243   if (Inc.Written.front() == '<') {
244     if (AnalyzeStdlib && tooling::stdlib::Header::named(Inc.Written))
245       return true;
246     return false;
247   }
248   // Headers without include guards have side effects and are not
249   // self-contained, skip them.
250   assert(Inc.HeaderID);
251   auto FE = AST.getSourceManager().getFileManager().getFile(
252       AST.getIncludeStructure().getRealPath(
253           static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID)));
254   assert(FE);
255   if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded(
256           *FE)) {
257     dlog("{0} doesn't have header guard and will not be considered unused",
258          (*FE)->getName());
259     return false;
260   }
261   return true;
262 }
263 
264 // In case symbols are coming from non self-contained header, we need to find
265 // its first includer that is self-contained. This is the header users can
266 // include, so it will be responsible for bringing the symbols from given
267 // header into the scope.
268 FileID headerResponsible(FileID ID, const SourceManager &SM,
269                          const IncludeStructure &Includes) {
270   // Unroll the chain of non self-contained headers until we find the one that
271   // can be included.
272   for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID();
273        FE = SM.getFileEntryForID(ID)) {
274     // If FE is nullptr, we consider it to be the responsible header.
275     if (!FE)
276       break;
277     auto HID = Includes.getID(FE);
278     assert(HID && "We're iterating over headers already existing in "
279                   "IncludeStructure");
280     if (Includes.isSelfContained(*HID))
281       break;
282     // The header is not self-contained: put the responsibility for its symbols
283     // on its includer.
284     ID = SM.getFileID(SM.getIncludeLoc(ID));
285   }
286   return ID;
287 }
288 
289 } // namespace
290 
291 ReferencedLocations findReferencedLocations(const SourceManager &SM,
292                                             ASTContext &Ctx, Preprocessor &PP,
293                                             const syntax::TokenBuffer *Tokens) {
294   trace::Span Tracer("IncludeCleaner::findReferencedLocations");
295   ReferencedLocations Result;
296   ReferencedLocationCrawler Crawler(Result, SM);
297   Crawler.TraverseAST(Ctx);
298   if (Tokens)
299     findReferencedMacros(SM, PP, Tokens, Result);
300   return Result;
301 }
302 
303 ReferencedLocations findReferencedLocations(ParsedAST &AST) {
304   return findReferencedLocations(AST.getSourceManager(), AST.getASTContext(),
305                                  AST.getPreprocessor(), &AST.getTokens());
306 }
307 
308 ReferencedFiles
309 findReferencedFiles(const ReferencedLocations &Locs, const SourceManager &SM,
310                     llvm::function_ref<FileID(FileID)> HeaderResponsible) {
311   std::vector<SourceLocation> Sorted{Locs.User.begin(), Locs.User.end()};
312   llvm::sort(Sorted); // Group by FileID.
313   ReferencedFilesBuilder Builder(SM);
314   for (auto It = Sorted.begin(); It < Sorted.end();) {
315     FileID FID = SM.getFileID(*It);
316     Builder.add(FID, *It);
317     // Cheaply skip over all the other locations from the same FileID.
318     // This avoids lots of redundant Loc->File lookups for the same file.
319     do
320       ++It;
321     while (It != Sorted.end() && SM.isInFileID(*It, FID));
322   }
323 
324   // If a header is not self-contained, we consider its symbols a logical part
325   // of the including file. Therefore, mark the parents of all used
326   // non-self-contained FileIDs as used. Perform this on FileIDs rather than
327   // HeaderIDs, as each inclusion of a non-self-contained file is distinct.
328   llvm::DenseSet<FileID> UserFiles;
329   for (FileID ID : Builder.Files)
330     UserFiles.insert(HeaderResponsible(ID));
331 
332   llvm::DenseSet<tooling::stdlib::Header> StdlibFiles;
333   for (const auto &Symbol : Locs.Stdlib)
334     for (const auto &Header : Symbol.headers())
335       StdlibFiles.insert(Header);
336 
337   return {std::move(UserFiles), std::move(StdlibFiles)};
338 }
339 
340 ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs,
341                                     const IncludeStructure &Includes,
342                                     const SourceManager &SM) {
343   return findReferencedFiles(Locs, SM, [&SM, &Includes](FileID ID) {
344     return headerResponsible(ID, SM, Includes);
345   });
346 }
347 
348 std::vector<const Inclusion *>
349 getUnused(ParsedAST &AST,
350           const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles) {
351   trace::Span Tracer("IncludeCleaner::getUnused");
352   std::vector<const Inclusion *> Unused;
353   for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) {
354     if (!MFI.HeaderID)
355       continue;
356     auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID);
357     bool Used = ReferencedFiles.contains(IncludeID);
358     if (!Used && !mayConsiderUnused(MFI, AST)) {
359       dlog("{0} was not used, but is not eligible to be diagnosed as unused",
360            MFI.Written);
361       continue;
362     }
363     if (!Used)
364       Unused.push_back(&MFI);
365     dlog("{0} is {1}", MFI.Written, Used ? "USED" : "UNUSED");
366   }
367   return Unused;
368 }
369 
370 #ifndef NDEBUG
371 // Is FID a <built-in>, <scratch space> etc?
372 static bool isSpecialBuffer(FileID FID, const SourceManager &SM) {
373   const SrcMgr::FileInfo &FI = SM.getSLocEntry(FID).getFile();
374   return FI.getName().startswith("<");
375 }
376 #endif
377 
378 llvm::DenseSet<IncludeStructure::HeaderID>
379 translateToHeaderIDs(const ReferencedFiles &Files,
380                      const IncludeStructure &Includes,
381                      const SourceManager &SM) {
382   trace::Span Tracer("IncludeCleaner::translateToHeaderIDs");
383   llvm::DenseSet<IncludeStructure::HeaderID> TranslatedHeaderIDs;
384   TranslatedHeaderIDs.reserve(Files.User.size());
385   for (FileID FID : Files.User) {
386     const FileEntry *FE = SM.getFileEntryForID(FID);
387     if (!FE) {
388       assert(isSpecialBuffer(FID, SM));
389       continue;
390     }
391     const auto File = Includes.getID(FE);
392     assert(File);
393     TranslatedHeaderIDs.insert(*File);
394   }
395   for (tooling::stdlib::Header StdlibUsed : Files.Stdlib)
396     for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed))
397       TranslatedHeaderIDs.insert(HID);
398   return TranslatedHeaderIDs;
399 }
400 
401 std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST) {
402   const auto &SM = AST.getSourceManager();
403 
404   auto Refs = findReferencedLocations(AST);
405   auto ReferencedFileIDs = findReferencedFiles(Refs, AST.getIncludeStructure(),
406                                                AST.getSourceManager());
407   auto ReferencedHeaders =
408       translateToHeaderIDs(ReferencedFileIDs, AST.getIncludeStructure(), SM);
409   return getUnused(AST, ReferencedHeaders);
410 }
411 
412 std::vector<Diag> issueUnusedIncludesDiagnostics(ParsedAST &AST,
413                                                  llvm::StringRef Code) {
414   const Config &Cfg = Config::current();
415   if (Cfg.Diagnostics.UnusedIncludes != Config::UnusedIncludesPolicy::Strict ||
416       Cfg.Diagnostics.SuppressAll ||
417       Cfg.Diagnostics.Suppress.contains("unused-includes"))
418     return {};
419   trace::Span Tracer("IncludeCleaner::issueUnusedIncludesDiagnostics");
420   std::vector<Diag> Result;
421   std::string FileName =
422       AST.getSourceManager()
423           .getFileEntryForID(AST.getSourceManager().getMainFileID())
424           ->getName()
425           .str();
426   for (const auto *Inc : computeUnusedIncludes(AST)) {
427     Diag D;
428     D.Message =
429         llvm::formatv("included header {0} is not used",
430                       llvm::sys::path::filename(
431                           Inc->Written.substr(1, Inc->Written.size() - 2),
432                           llvm::sys::path::Style::posix));
433     D.Name = "unused-includes";
434     D.Source = Diag::DiagSource::Clangd;
435     D.File = FileName;
436     D.Severity = DiagnosticsEngine::Warning;
437     D.Tags.push_back(Unnecessary);
438     D.Range = getDiagnosticRange(Code, Inc->HashOffset);
439     // FIXME(kirillbobyrev): Removing inclusion might break the code if the
440     // used headers are only reachable transitively through this one. Suggest
441     // including them directly instead.
442     // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas
443     // (keep/export) remove the warning once we support IWYU pragmas.
444     D.Fixes.emplace_back();
445     D.Fixes.back().Message = "remove #include directive";
446     D.Fixes.back().Edits.emplace_back();
447     D.Fixes.back().Edits.back().range.start.line = Inc->HashLine;
448     D.Fixes.back().Edits.back().range.end.line = Inc->HashLine + 1;
449     D.InsideMainFile = true;
450     Result.push_back(std::move(D));
451   }
452   return Result;
453 }
454 
455 } // namespace clangd
456 } // namespace clang
457