1 //===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "IncludeCleaner.h"
10 #include "Config.h"
11 #include "Headers.h"
12 #include "ParsedAST.h"
13 #include "Protocol.h"
14 #include "SourceCode.h"
15 #include "support/Logger.h"
16 #include "support/Trace.h"
17 #include "clang/AST/ExprCXX.h"
18 #include "clang/AST/RecursiveASTVisitor.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Lex/HeaderSearch.h"
22 #include "clang/Lex/Preprocessor.h"
23 #include "clang/Tooling/Syntax/Tokens.h"
24 #include "llvm/Support/FormatVariadic.h"
25 #include "llvm/Support/Path.h"
26 
27 namespace clang {
28 namespace clangd {
29 
30 static bool AnalyzeStdlib = false;
31 void setIncludeCleanerAnalyzesStdlib(bool B) { AnalyzeStdlib = B; }
32 
33 namespace {
34 
35 /// Crawler traverses the AST and feeds in the locations of (sometimes
36 /// implicitly) used symbols into \p Result.
37 class ReferencedLocationCrawler
38     : public RecursiveASTVisitor<ReferencedLocationCrawler> {
39 public:
40   ReferencedLocationCrawler(ReferencedLocations &Result,
41                             const SourceManager &SM)
42       : Result(Result), SM(SM) {}
43 
44   bool VisitDeclRefExpr(DeclRefExpr *DRE) {
45     add(DRE->getDecl());
46     add(DRE->getFoundDecl());
47     return true;
48   }
49 
50   bool VisitMemberExpr(MemberExpr *ME) {
51     add(ME->getMemberDecl());
52     add(ME->getFoundDecl().getDecl());
53     return true;
54   }
55 
56   bool VisitTagType(TagType *TT) {
57     add(TT->getDecl());
58     return true;
59   }
60 
61   bool VisitFunctionDecl(FunctionDecl *FD) {
62     // Function definition will require redeclarations to be included.
63     if (FD->isThisDeclarationADefinition())
64       add(FD);
65     return true;
66   }
67 
68   bool VisitCXXConstructExpr(CXXConstructExpr *CCE) {
69     add(CCE->getConstructor());
70     return true;
71   }
72 
73   bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) {
74     if (isNew(TST)) {
75       add(TST->getTemplateName().getAsTemplateDecl()); // Primary template.
76       add(TST->getAsCXXRecordDecl());                  // Specialization
77     }
78     return true;
79   }
80 
81   bool VisitUsingType(UsingType *UT) {
82     add(UT->getFoundDecl());
83     return true;
84   }
85 
86   bool VisitTypedefType(TypedefType *TT) {
87     add(TT->getDecl());
88     return true;
89   }
90 
91   // Consider types of any subexpression used, even if the type is not named.
92   // This is helpful in getFoo().bar(), where Foo must be complete.
93   // FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to
94   // consider types "used" when they are not directly spelled in code.
95   bool VisitExpr(Expr *E) {
96     TraverseType(E->getType());
97     return true;
98   }
99 
100   bool TraverseType(QualType T) {
101     if (isNew(T.getTypePtrOrNull())) // don't care about quals
102       Base::TraverseType(T);
103     return true;
104   }
105 
106   bool VisitUsingDecl(UsingDecl *D) {
107     for (const auto *Shadow : D->shadows())
108       add(Shadow->getTargetDecl());
109     return true;
110   }
111 
112   // Enums may be usefully forward-declared as *complete* types by specifying
113   // an underlying type. In this case, the definition should see the declaration
114   // so they can be checked for compatibility.
115   bool VisitEnumDecl(EnumDecl *D) {
116     if (D->isThisDeclarationADefinition() && D->getIntegerTypeSourceInfo())
117       add(D);
118     return true;
119   }
120 
121   // When the overload is not resolved yet, mark all candidates as used.
122   bool VisitOverloadExpr(OverloadExpr *E) {
123     for (const auto *ResolutionDecl : E->decls())
124       add(ResolutionDecl);
125     return true;
126   }
127 
128 private:
129   using Base = RecursiveASTVisitor<ReferencedLocationCrawler>;
130 
131   void add(const Decl *D) {
132     if (!D || !isNew(D->getCanonicalDecl()))
133       return;
134     if (auto SS = StdRecognizer(D)) {
135       Result.Stdlib.insert(*SS);
136       return;
137     }
138     // Special case RecordDecls, as it is common for them to be forward
139     // declared multiple times. The most common cases are:
140     // - Definition available in TU, only mark that one as usage. The rest is
141     //   likely to be unnecessary. This might result in false positives when an
142     //   internal definition is visible.
143     // - There's a forward declaration in the main file, no need for other
144     //   redecls.
145     if (const auto *RD = llvm::dyn_cast<RecordDecl>(D)) {
146       if (const auto *Definition = RD->getDefinition()) {
147         Result.User.insert(Definition->getLocation());
148         return;
149       }
150       if (SM.isInMainFile(RD->getMostRecentDecl()->getLocation()))
151         return;
152     }
153     for (const Decl *Redecl : D->redecls())
154       Result.User.insert(Redecl->getLocation());
155   }
156 
157   bool isNew(const void *P) { return P && Visited.insert(P).second; }
158 
159   ReferencedLocations &Result;
160   llvm::DenseSet<const void *> Visited;
161   const SourceManager &SM;
162   stdlib::Recognizer StdRecognizer;
163 };
164 
165 // Given a set of referenced FileIDs, determines all the potentially-referenced
166 // files and macros by traversing expansion/spelling locations of macro IDs.
167 // This is used to map the referenced SourceLocations onto real files.
168 struct ReferencedFilesBuilder {
169   ReferencedFilesBuilder(const SourceManager &SM) : SM(SM) {}
170   llvm::DenseSet<FileID> Files;
171   llvm::DenseSet<FileID> Macros;
172   const SourceManager &SM;
173 
174   void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); }
175 
176   void add(FileID FID, SourceLocation Loc) {
177     if (FID.isInvalid())
178       return;
179     assert(SM.isInFileID(Loc, FID));
180     if (Loc.isFileID()) {
181       Files.insert(FID);
182       return;
183     }
184     // Don't process the same macro FID twice.
185     if (!Macros.insert(FID).second)
186       return;
187     const auto &Exp = SM.getSLocEntry(FID).getExpansion();
188     add(Exp.getSpellingLoc());
189     add(Exp.getExpansionLocStart());
190     add(Exp.getExpansionLocEnd());
191   }
192 };
193 
194 // Returns the range starting at '#' and ending at EOL. Escaped newlines are not
195 // handled.
196 clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) {
197   clangd::Range Result;
198   Result.end = Result.start = offsetToPosition(Code, HashOffset);
199 
200   // Span the warning until the EOL or EOF.
201   Result.end.character +=
202       lspLength(Code.drop_front(HashOffset).take_until([](char C) {
203         return C == '\n' || C == '\r';
204       }));
205   return Result;
206 }
207 
208 // Finds locations of macros referenced from within the main file. That includes
209 // references that were not yet expanded, e.g `BAR` in `#define FOO BAR`.
210 void findReferencedMacros(ParsedAST &AST, ReferencedLocations &Result) {
211   trace::Span Tracer("IncludeCleaner::findReferencedMacros");
212   auto &SM = AST.getSourceManager();
213   auto &PP = AST.getPreprocessor();
214   // FIXME(kirillbobyrev): The macros from the main file are collected in
215   // ParsedAST's MainFileMacros. However, we can't use it here because it
216   // doesn't handle macro references that were not expanded, e.g. in macro
217   // definitions or preprocessor-disabled sections.
218   //
219   // Extending MainFileMacros to collect missing references and switching to
220   // this mechanism (as opposed to iterating through all tokens) will improve
221   // the performance of findReferencedMacros and also improve other features
222   // relying on MainFileMacros.
223   for (const syntax::Token &Tok :
224        AST.getTokens().spelledTokens(SM.getMainFileID())) {
225     auto Macro = locateMacroAt(Tok, PP);
226     if (!Macro)
227       continue;
228     auto Loc = Macro->Info->getDefinitionLoc();
229     if (Loc.isValid())
230       Result.User.insert(Loc);
231     // FIXME: support stdlib macros
232   }
233 }
234 
235 static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) {
236   if (Inc.BehindPragmaKeep)
237     return false;
238 
239   // FIXME(kirillbobyrev): We currently do not support the umbrella headers.
240   // System headers are likely to be standard library headers.
241   // Until we have good support for umbrella headers, don't warn about them.
242   if (Inc.Written.front() == '<') {
243     if (AnalyzeStdlib && stdlib::Header::named(Inc.Written))
244       return true;
245     return false;
246   }
247   // Headers without include guards have side effects and are not
248   // self-contained, skip them.
249   assert(Inc.HeaderID);
250   auto FE = AST.getSourceManager().getFileManager().getFile(
251       AST.getIncludeStructure().getRealPath(
252           static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID)));
253   assert(FE);
254   if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded(
255           *FE)) {
256     dlog("{0} doesn't have header guard and will not be considered unused",
257          (*FE)->getName());
258     return false;
259   }
260   return true;
261 }
262 
263 // In case symbols are coming from non self-contained header, we need to find
264 // its first includer that is self-contained. This is the header users can
265 // include, so it will be responsible for bringing the symbols from given
266 // header into the scope.
267 FileID headerResponsible(FileID ID, const SourceManager &SM,
268                          const IncludeStructure &Includes) {
269   // Unroll the chain of non self-contained headers until we find the one that
270   // can be included.
271   for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID();
272        FE = SM.getFileEntryForID(ID)) {
273     // If FE is nullptr, we consider it to be the responsible header.
274     if (!FE)
275       break;
276     auto HID = Includes.getID(FE);
277     assert(HID && "We're iterating over headers already existing in "
278                   "IncludeStructure");
279     if (Includes.isSelfContained(*HID))
280       break;
281     // The header is not self-contained: put the responsibility for its symbols
282     // on its includer.
283     ID = SM.getFileID(SM.getIncludeLoc(ID));
284   }
285   return ID;
286 }
287 
288 } // namespace
289 
290 ReferencedLocations findReferencedLocations(ParsedAST &AST) {
291   trace::Span Tracer("IncludeCleaner::findReferencedLocations");
292   ReferencedLocations Result;
293   ReferencedLocationCrawler Crawler(Result, AST.getSourceManager());
294   Crawler.TraverseAST(AST.getASTContext());
295   findReferencedMacros(AST, Result);
296   return Result;
297 }
298 
299 ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs,
300                                     const IncludeStructure &Includes,
301                                     const SourceManager &SM) {
302   std::vector<SourceLocation> Sorted{Locs.User.begin(), Locs.User.end()};
303   llvm::sort(Sorted); // Group by FileID.
304   ReferencedFilesBuilder Builder(SM);
305   for (auto It = Sorted.begin(); It < Sorted.end();) {
306     FileID FID = SM.getFileID(*It);
307     Builder.add(FID, *It);
308     // Cheaply skip over all the other locations from the same FileID.
309     // This avoids lots of redundant Loc->File lookups for the same file.
310     do
311       ++It;
312     while (It != Sorted.end() && SM.isInFileID(*It, FID));
313   }
314 
315   // If a header is not self-contained, we consider its symbols a logical part
316   // of the including file. Therefore, mark the parents of all used
317   // non-self-contained FileIDs as used. Perform this on FileIDs rather than
318   // HeaderIDs, as each inclusion of a non-self-contained file is distinct.
319   llvm::DenseSet<FileID> UserFiles;
320   for (FileID ID : Builder.Files)
321     UserFiles.insert(headerResponsible(ID, SM, Includes));
322 
323   llvm::DenseSet<stdlib::Header> StdlibFiles;
324   for (const auto &Symbol : Locs.Stdlib)
325     for (const auto &Header : Symbol.headers())
326       StdlibFiles.insert(Header);
327 
328   return {std::move(UserFiles), std::move(StdlibFiles)};
329 }
330 
331 std::vector<const Inclusion *>
332 getUnused(ParsedAST &AST,
333           const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles) {
334   trace::Span Tracer("IncludeCleaner::getUnused");
335   std::vector<const Inclusion *> Unused;
336   for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) {
337     if (!MFI.HeaderID)
338       continue;
339     auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID);
340     bool Used = ReferencedFiles.contains(IncludeID);
341     if (!Used && !mayConsiderUnused(MFI, AST)) {
342       dlog("{0} was not used, but is not eligible to be diagnosed as unused",
343            MFI.Written);
344       continue;
345     }
346     if (!Used)
347       Unused.push_back(&MFI);
348     dlog("{0} is {1}", MFI.Written, Used ? "USED" : "UNUSED");
349   }
350   return Unused;
351 }
352 
353 #ifndef NDEBUG
354 // Is FID a <built-in>, <scratch space> etc?
355 static bool isSpecialBuffer(FileID FID, const SourceManager &SM) {
356   const SrcMgr::FileInfo &FI = SM.getSLocEntry(FID).getFile();
357   return FI.getName().startswith("<");
358 }
359 #endif
360 
361 llvm::DenseSet<IncludeStructure::HeaderID>
362 translateToHeaderIDs(const ReferencedFiles &Files,
363                      const IncludeStructure &Includes,
364                      const SourceManager &SM) {
365   trace::Span Tracer("IncludeCleaner::translateToHeaderIDs");
366   llvm::DenseSet<IncludeStructure::HeaderID> TranslatedHeaderIDs;
367   TranslatedHeaderIDs.reserve(Files.User.size());
368   for (FileID FID : Files.User) {
369     const FileEntry *FE = SM.getFileEntryForID(FID);
370     if (!FE) {
371       assert(isSpecialBuffer(FID, SM));
372       continue;
373     }
374     const auto File = Includes.getID(FE);
375     assert(File);
376     TranslatedHeaderIDs.insert(*File);
377   }
378   for (stdlib::Header StdlibUsed : Files.Stdlib)
379     for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed))
380       TranslatedHeaderIDs.insert(HID);
381   return TranslatedHeaderIDs;
382 }
383 
384 std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST) {
385   const auto &SM = AST.getSourceManager();
386 
387   auto Refs = findReferencedLocations(AST);
388   auto ReferencedFileIDs = findReferencedFiles(Refs, AST.getIncludeStructure(),
389                                                AST.getSourceManager());
390   auto ReferencedHeaders =
391       translateToHeaderIDs(ReferencedFileIDs, AST.getIncludeStructure(), SM);
392   return getUnused(AST, ReferencedHeaders);
393 }
394 
395 std::vector<Diag> issueUnusedIncludesDiagnostics(ParsedAST &AST,
396                                                  llvm::StringRef Code) {
397   const Config &Cfg = Config::current();
398   if (Cfg.Diagnostics.UnusedIncludes != Config::UnusedIncludesPolicy::Strict ||
399       Cfg.Diagnostics.SuppressAll ||
400       Cfg.Diagnostics.Suppress.contains("unused-includes"))
401     return {};
402   trace::Span Tracer("IncludeCleaner::issueUnusedIncludesDiagnostics");
403   std::vector<Diag> Result;
404   std::string FileName =
405       AST.getSourceManager()
406           .getFileEntryForID(AST.getSourceManager().getMainFileID())
407           ->getName()
408           .str();
409   for (const auto *Inc : computeUnusedIncludes(AST)) {
410     Diag D;
411     D.Message =
412         llvm::formatv("included header {0} is not used",
413                       llvm::sys::path::filename(
414                           Inc->Written.substr(1, Inc->Written.size() - 2),
415                           llvm::sys::path::Style::posix));
416     D.Name = "unused-includes";
417     D.Source = Diag::DiagSource::Clangd;
418     D.File = FileName;
419     D.Severity = DiagnosticsEngine::Warning;
420     D.Tags.push_back(Unnecessary);
421     D.Range = getDiagnosticRange(Code, Inc->HashOffset);
422     // FIXME(kirillbobyrev): Removing inclusion might break the code if the
423     // used headers are only reachable transitively through this one. Suggest
424     // including them directly instead.
425     // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas
426     // (keep/export) remove the warning once we support IWYU pragmas.
427     D.Fixes.emplace_back();
428     D.Fixes.back().Message = "remove #include directive";
429     D.Fixes.back().Edits.emplace_back();
430     D.Fixes.back().Edits.back().range.start.line = Inc->HashLine;
431     D.Fixes.back().Edits.back().range.end.line = Inc->HashLine + 1;
432     D.InsideMainFile = true;
433     Result.push_back(std::move(D));
434   }
435   return Result;
436 }
437 
438 } // namespace clangd
439 } // namespace clang
440