1 //===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "IncludeCleaner.h"
10 #include "Config.h"
11 #include "Headers.h"
12 #include "ParsedAST.h"
13 #include "Protocol.h"
14 #include "SourceCode.h"
15 #include "index/CanonicalIncludes.h"
16 #include "support/Logger.h"
17 #include "support/Trace.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/ExprCXX.h"
20 #include "clang/AST/RecursiveASTVisitor.h"
21 #include "clang/Basic/SourceLocation.h"
22 #include "clang/Basic/SourceManager.h"
23 #include "clang/Lex/HeaderSearch.h"
24 #include "clang/Lex/Preprocessor.h"
25 #include "clang/Tooling/Syntax/Tokens.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/STLFunctionalExtras.h"
28 #include "llvm/ADT/SmallString.h"
29 #include "llvm/ADT/StringSet.h"
30 #include "llvm/Support/FormatVariadic.h"
31 #include "llvm/Support/Path.h"
32 #include "llvm/Support/Regex.h"
33 #include <functional>
34 
35 namespace clang {
36 namespace clangd {
37 
38 static bool AnalyzeStdlib = false;
setIncludeCleanerAnalyzesStdlib(bool B)39 void setIncludeCleanerAnalyzesStdlib(bool B) { AnalyzeStdlib = B; }
40 
41 namespace {
42 
43 /// Crawler traverses the AST and feeds in the locations of (sometimes
44 /// implicitly) used symbols into \p Result.
45 class ReferencedLocationCrawler
46     : public RecursiveASTVisitor<ReferencedLocationCrawler> {
47 public:
ReferencedLocationCrawler(ReferencedLocations & Result,const SourceManager & SM)48   ReferencedLocationCrawler(ReferencedLocations &Result,
49                             const SourceManager &SM)
50       : Result(Result), SM(SM) {}
51 
VisitDeclRefExpr(DeclRefExpr * DRE)52   bool VisitDeclRefExpr(DeclRefExpr *DRE) {
53     add(DRE->getDecl());
54     add(DRE->getFoundDecl());
55     return true;
56   }
57 
VisitMemberExpr(MemberExpr * ME)58   bool VisitMemberExpr(MemberExpr *ME) {
59     add(ME->getMemberDecl());
60     add(ME->getFoundDecl().getDecl());
61     return true;
62   }
63 
VisitTagType(TagType * TT)64   bool VisitTagType(TagType *TT) {
65     add(TT->getDecl());
66     return true;
67   }
68 
VisitFunctionDecl(FunctionDecl * FD)69   bool VisitFunctionDecl(FunctionDecl *FD) {
70     // Function definition will require redeclarations to be included.
71     if (FD->isThisDeclarationADefinition())
72       add(FD);
73     return true;
74   }
75 
VisitCXXConstructExpr(CXXConstructExpr * CCE)76   bool VisitCXXConstructExpr(CXXConstructExpr *CCE) {
77     add(CCE->getConstructor());
78     return true;
79   }
80 
VisitTemplateSpecializationType(TemplateSpecializationType * TST)81   bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) {
82     // Using templateName case is handled by the override TraverseTemplateName.
83     if (TST->getTemplateName().getKind() == TemplateName::UsingTemplate)
84       return true;
85     add(TST->getAsCXXRecordDecl());                  // Specialization
86     return true;
87   }
88 
89   // There is no VisitTemplateName in RAV, thus we override the Traverse version
90   // to handle the Using TemplateName case.
TraverseTemplateName(TemplateName TN)91   bool TraverseTemplateName(TemplateName TN) {
92     VisitTemplateName(TN);
93     return Base::TraverseTemplateName(TN);
94   }
95   // A pseudo VisitTemplateName, dispatched by the above TraverseTemplateName!
VisitTemplateName(TemplateName TN)96   bool VisitTemplateName(TemplateName TN) {
97     if (const auto *USD = TN.getAsUsingShadowDecl()) {
98       add(USD);
99       return true;
100     }
101     add(TN.getAsTemplateDecl()); // Primary template.
102     return true;
103   }
104 
VisitUsingType(UsingType * UT)105   bool VisitUsingType(UsingType *UT) {
106     add(UT->getFoundDecl());
107     return true;
108   }
109 
VisitTypedefType(TypedefType * TT)110   bool VisitTypedefType(TypedefType *TT) {
111     add(TT->getDecl());
112     return true;
113   }
114 
115   // Consider types of any subexpression used, even if the type is not named.
116   // This is helpful in getFoo().bar(), where Foo must be complete.
117   // FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to
118   // consider types "used" when they are not directly spelled in code.
VisitExpr(Expr * E)119   bool VisitExpr(Expr *E) {
120     TraverseType(E->getType());
121     return true;
122   }
123 
TraverseType(QualType T)124   bool TraverseType(QualType T) {
125     if (isNew(T.getTypePtrOrNull())) // don't care about quals
126       Base::TraverseType(T);
127     return true;
128   }
129 
VisitUsingDecl(UsingDecl * D)130   bool VisitUsingDecl(UsingDecl *D) {
131     for (const auto *Shadow : D->shadows())
132       add(Shadow->getTargetDecl());
133     return true;
134   }
135 
136   // Enums may be usefully forward-declared as *complete* types by specifying
137   // an underlying type. In this case, the definition should see the declaration
138   // so they can be checked for compatibility.
VisitEnumDecl(EnumDecl * D)139   bool VisitEnumDecl(EnumDecl *D) {
140     if (D->isThisDeclarationADefinition() && D->getIntegerTypeSourceInfo())
141       add(D);
142     return true;
143   }
144 
145   // When the overload is not resolved yet, mark all candidates as used.
VisitOverloadExpr(OverloadExpr * E)146   bool VisitOverloadExpr(OverloadExpr *E) {
147     for (const auto *ResolutionDecl : E->decls())
148       add(ResolutionDecl);
149     return true;
150   }
151 
152 private:
153   using Base = RecursiveASTVisitor<ReferencedLocationCrawler>;
154 
add(const Decl * D)155   void add(const Decl *D) {
156     if (!D || !isNew(D->getCanonicalDecl()))
157       return;
158     if (auto SS = StdRecognizer(D)) {
159       Result.Stdlib.insert(*SS);
160       return;
161     }
162     // Special case RecordDecls, as it is common for them to be forward
163     // declared multiple times. The most common cases are:
164     // - Definition available in TU, only mark that one as usage. The rest is
165     //   likely to be unnecessary. This might result in false positives when an
166     //   internal definition is visible.
167     // - There's a forward declaration in the main file, no need for other
168     //   redecls.
169     if (const auto *RD = llvm::dyn_cast<RecordDecl>(D)) {
170       if (const auto *Definition = RD->getDefinition()) {
171         Result.User.insert(Definition->getLocation());
172         return;
173       }
174       if (SM.isInMainFile(RD->getMostRecentDecl()->getLocation()))
175         return;
176     }
177     for (const Decl *Redecl : D->redecls())
178       Result.User.insert(Redecl->getLocation());
179   }
180 
isNew(const void * P)181   bool isNew(const void *P) { return P && Visited.insert(P).second; }
182 
183   ReferencedLocations &Result;
184   llvm::DenseSet<const void *> Visited;
185   const SourceManager &SM;
186   tooling::stdlib::Recognizer StdRecognizer;
187 };
188 
189 // Given a set of referenced FileIDs, determines all the potentially-referenced
190 // files and macros by traversing expansion/spelling locations of macro IDs.
191 // This is used to map the referenced SourceLocations onto real files.
192 struct ReferencedFilesBuilder {
ReferencedFilesBuilderclang::clangd::__anona29aa5690111::ReferencedFilesBuilder193   ReferencedFilesBuilder(const SourceManager &SM) : SM(SM) {}
194   llvm::DenseSet<FileID> Files;
195   llvm::DenseSet<FileID> Macros;
196   const SourceManager &SM;
197 
addclang::clangd::__anona29aa5690111::ReferencedFilesBuilder198   void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); }
199 
addclang::clangd::__anona29aa5690111::ReferencedFilesBuilder200   void add(FileID FID, SourceLocation Loc) {
201     if (FID.isInvalid())
202       return;
203     assert(SM.isInFileID(Loc, FID));
204     if (Loc.isFileID()) {
205       Files.insert(FID);
206       return;
207     }
208     // Don't process the same macro FID twice.
209     if (!Macros.insert(FID).second)
210       return;
211     const auto &Exp = SM.getSLocEntry(FID).getExpansion();
212     add(Exp.getSpellingLoc());
213     add(Exp.getExpansionLocStart());
214     add(Exp.getExpansionLocEnd());
215   }
216 };
217 
218 // Returns the range starting at '#' and ending at EOL. Escaped newlines are not
219 // handled.
getDiagnosticRange(llvm::StringRef Code,unsigned HashOffset)220 clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) {
221   clangd::Range Result;
222   Result.end = Result.start = offsetToPosition(Code, HashOffset);
223 
224   // Span the warning until the EOL or EOF.
225   Result.end.character +=
226       lspLength(Code.drop_front(HashOffset).take_until([](char C) {
227         return C == '\n' || C == '\r';
228       }));
229   return Result;
230 }
231 
232 // Finds locations of macros referenced from within the main file. That includes
233 // references that were not yet expanded, e.g `BAR` in `#define FOO BAR`.
findReferencedMacros(const SourceManager & SM,Preprocessor & PP,const syntax::TokenBuffer * Tokens,ReferencedLocations & Result)234 void findReferencedMacros(const SourceManager &SM, Preprocessor &PP,
235                           const syntax::TokenBuffer *Tokens,
236                           ReferencedLocations &Result) {
237   trace::Span Tracer("IncludeCleaner::findReferencedMacros");
238   // FIXME(kirillbobyrev): The macros from the main file are collected in
239   // ParsedAST's MainFileMacros. However, we can't use it here because it
240   // doesn't handle macro references that were not expanded, e.g. in macro
241   // definitions or preprocessor-disabled sections.
242   //
243   // Extending MainFileMacros to collect missing references and switching to
244   // this mechanism (as opposed to iterating through all tokens) will improve
245   // the performance of findReferencedMacros and also improve other features
246   // relying on MainFileMacros.
247   for (const syntax::Token &Tok : Tokens->spelledTokens(SM.getMainFileID())) {
248     auto Macro = locateMacroAt(Tok, PP);
249     if (!Macro)
250       continue;
251     auto Loc = Macro->Info->getDefinitionLoc();
252     if (Loc.isValid())
253       Result.User.insert(Loc);
254     // FIXME: support stdlib macros
255   }
256 }
257 
mayConsiderUnused(const Inclusion & Inc,ParsedAST & AST,const Config & Cfg)258 static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST,
259                               const Config &Cfg) {
260   if (Inc.BehindPragmaKeep)
261     return false;
262 
263   // FIXME(kirillbobyrev): We currently do not support the umbrella headers.
264   // System headers are likely to be standard library headers.
265   // Until we have good support for umbrella headers, don't warn about them.
266   if (Inc.Written.front() == '<') {
267     if (AnalyzeStdlib && tooling::stdlib::Header::named(Inc.Written))
268       return true;
269     return false;
270   }
271   assert(Inc.HeaderID);
272   auto HID = static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID);
273   // FIXME: Ignore the headers with IWYU export pragmas for now, remove this
274   // check when we have more precise tracking of exported headers.
275   if (AST.getIncludeStructure().hasIWYUExport(HID))
276     return false;
277   auto FE = AST.getSourceManager().getFileManager().getFileRef(
278       AST.getIncludeStructure().getRealPath(HID));
279   assert(FE);
280   // Headers without include guards have side effects and are not
281   // self-contained, skip them.
282   if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded(
283           &FE->getFileEntry())) {
284     dlog("{0} doesn't have header guard and will not be considered unused",
285          FE->getName());
286     return false;
287   }
288   for (auto &Filter : Cfg.Diagnostics.Includes.IgnoreHeader) {
289     // Convert the path to Unix slashes and try to match aginast the fiilter.
290     llvm::SmallString<64> Path(Inc.Resolved);
291     llvm::sys::path::native(Path, llvm::sys::path::Style::posix);
292     if (Filter(Inc.Resolved)) {
293       dlog("{0} header is filtered out by the configuration", FE->getName());
294       return false;
295     }
296   }
297   return true;
298 }
299 
300 // In case symbols are coming from non self-contained header, we need to find
301 // its first includer that is self-contained. This is the header users can
302 // include, so it will be responsible for bringing the symbols from given
303 // header into the scope.
headerResponsible(FileID ID,const SourceManager & SM,const IncludeStructure & Includes)304 FileID headerResponsible(FileID ID, const SourceManager &SM,
305                          const IncludeStructure &Includes) {
306   // Unroll the chain of non self-contained headers until we find the one that
307   // can be included.
308   for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID();
309        FE = SM.getFileEntryForID(ID)) {
310     // If FE is nullptr, we consider it to be the responsible header.
311     if (!FE)
312       break;
313     auto HID = Includes.getID(FE);
314     assert(HID && "We're iterating over headers already existing in "
315                   "IncludeStructure");
316     if (Includes.isSelfContained(*HID))
317       break;
318     // The header is not self-contained: put the responsibility for its symbols
319     // on its includer.
320     ID = SM.getFileID(SM.getIncludeLoc(ID));
321   }
322   return ID;
323 }
324 
325 } // namespace
326 
findReferencedLocations(ASTContext & Ctx,Preprocessor & PP,const syntax::TokenBuffer * Tokens)327 ReferencedLocations findReferencedLocations(ASTContext &Ctx, Preprocessor &PP,
328                                             const syntax::TokenBuffer *Tokens) {
329   trace::Span Tracer("IncludeCleaner::findReferencedLocations");
330   ReferencedLocations Result;
331   const auto &SM = Ctx.getSourceManager();
332   ReferencedLocationCrawler Crawler(Result, SM);
333   Crawler.TraverseAST(Ctx);
334   if (Tokens)
335     findReferencedMacros(SM, PP, Tokens, Result);
336   return Result;
337 }
338 
findReferencedLocations(ParsedAST & AST)339 ReferencedLocations findReferencedLocations(ParsedAST &AST) {
340   return findReferencedLocations(AST.getASTContext(), AST.getPreprocessor(),
341                                  &AST.getTokens());
342 }
343 
findReferencedFiles(const ReferencedLocations & Locs,const SourceManager & SM,llvm::function_ref<FileID (FileID)> HeaderResponsible,llvm::function_ref<Optional<StringRef> (FileID)> UmbrellaHeader)344 ReferencedFiles findReferencedFiles(
345     const ReferencedLocations &Locs, const SourceManager &SM,
346     llvm::function_ref<FileID(FileID)> HeaderResponsible,
347     llvm::function_ref<Optional<StringRef>(FileID)> UmbrellaHeader) {
348   std::vector<SourceLocation> Sorted{Locs.User.begin(), Locs.User.end()};
349   llvm::sort(Sorted); // Group by FileID.
350   ReferencedFilesBuilder Builder(SM);
351   for (auto It = Sorted.begin(); It < Sorted.end();) {
352     FileID FID = SM.getFileID(*It);
353     Builder.add(FID, *It);
354     // Cheaply skip over all the other locations from the same FileID.
355     // This avoids lots of redundant Loc->File lookups for the same file.
356     do
357       ++It;
358     while (It != Sorted.end() && SM.isInFileID(*It, FID));
359   }
360 
361   // If a header is not self-contained, we consider its symbols a logical part
362   // of the including file. Therefore, mark the parents of all used
363   // non-self-contained FileIDs as used. Perform this on FileIDs rather than
364   // HeaderIDs, as each inclusion of a non-self-contained file is distinct.
365   llvm::DenseSet<FileID> UserFiles;
366   llvm::StringSet<> PublicHeaders;
367   for (FileID ID : Builder.Files) {
368     UserFiles.insert(HeaderResponsible(ID));
369     if (auto PublicHeader = UmbrellaHeader(ID)) {
370       PublicHeaders.insert(*PublicHeader);
371     }
372   }
373 
374   llvm::DenseSet<tooling::stdlib::Header> StdlibFiles;
375   for (const auto &Symbol : Locs.Stdlib)
376     for (const auto &Header : Symbol.headers())
377       StdlibFiles.insert(Header);
378 
379   return {std::move(UserFiles), std::move(StdlibFiles),
380           std::move(PublicHeaders)};
381 }
382 
findReferencedFiles(const ReferencedLocations & Locs,const IncludeStructure & Includes,const CanonicalIncludes & CanonIncludes,const SourceManager & SM)383 ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs,
384                                     const IncludeStructure &Includes,
385                                     const CanonicalIncludes &CanonIncludes,
386                                     const SourceManager &SM) {
387   return findReferencedFiles(
388       Locs, SM,
389       [&SM, &Includes](FileID ID) {
390         return headerResponsible(ID, SM, Includes);
391       },
392       [&SM, &CanonIncludes](FileID ID) -> Optional<StringRef> {
393         auto Entry = SM.getFileEntryRefForID(ID);
394         if (!Entry)
395           return llvm::None;
396         auto PublicHeader = CanonIncludes.mapHeader(*Entry);
397         if (PublicHeader.empty())
398           return llvm::None;
399         return PublicHeader;
400       });
401 }
402 
403 std::vector<const Inclusion *>
getUnused(ParsedAST & AST,const llvm::DenseSet<IncludeStructure::HeaderID> & ReferencedFiles,const llvm::StringSet<> & ReferencedPublicHeaders)404 getUnused(ParsedAST &AST,
405           const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles,
406           const llvm::StringSet<> &ReferencedPublicHeaders) {
407   trace::Span Tracer("IncludeCleaner::getUnused");
408   const Config &Cfg = Config::current();
409   std::vector<const Inclusion *> Unused;
410   for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) {
411     if (!MFI.HeaderID)
412       continue;
413     if (ReferencedPublicHeaders.contains(MFI.Written))
414       continue;
415     auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID);
416     bool Used = ReferencedFiles.contains(IncludeID);
417     if (!Used && !mayConsiderUnused(MFI, AST, Cfg)) {
418       dlog("{0} was not used, but is not eligible to be diagnosed as unused",
419            MFI.Written);
420       continue;
421     }
422     if (!Used)
423       Unused.push_back(&MFI);
424     dlog("{0} is {1}", MFI.Written, Used ? "USED" : "UNUSED");
425   }
426   return Unused;
427 }
428 
429 #ifndef NDEBUG
430 // Is FID a <built-in>, <scratch space> etc?
isSpecialBuffer(FileID FID,const SourceManager & SM)431 static bool isSpecialBuffer(FileID FID, const SourceManager &SM) {
432   const SrcMgr::FileInfo &FI = SM.getSLocEntry(FID).getFile();
433   return FI.getName().startswith("<");
434 }
435 #endif
436 
437 llvm::DenseSet<IncludeStructure::HeaderID>
translateToHeaderIDs(const ReferencedFiles & Files,const IncludeStructure & Includes,const SourceManager & SM)438 translateToHeaderIDs(const ReferencedFiles &Files,
439                      const IncludeStructure &Includes,
440                      const SourceManager &SM) {
441   trace::Span Tracer("IncludeCleaner::translateToHeaderIDs");
442   llvm::DenseSet<IncludeStructure::HeaderID> TranslatedHeaderIDs;
443   TranslatedHeaderIDs.reserve(Files.User.size());
444   for (FileID FID : Files.User) {
445     const FileEntry *FE = SM.getFileEntryForID(FID);
446     if (!FE) {
447       assert(isSpecialBuffer(FID, SM));
448       continue;
449     }
450     const auto File = Includes.getID(FE);
451     assert(File);
452     TranslatedHeaderIDs.insert(*File);
453   }
454   for (tooling::stdlib::Header StdlibUsed : Files.Stdlib)
455     for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed))
456       TranslatedHeaderIDs.insert(HID);
457   return TranslatedHeaderIDs;
458 }
459 
computeUnusedIncludes(ParsedAST & AST)460 std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST) {
461   const auto &SM = AST.getSourceManager();
462 
463   auto Refs = findReferencedLocations(AST);
464   auto ReferencedFiles =
465       findReferencedFiles(Refs, AST.getIncludeStructure(),
466                           AST.getCanonicalIncludes(), AST.getSourceManager());
467   auto ReferencedHeaders =
468       translateToHeaderIDs(ReferencedFiles, AST.getIncludeStructure(), SM);
469   return getUnused(AST, ReferencedHeaders, ReferencedFiles.SpelledUmbrellas);
470 }
471 
issueUnusedIncludesDiagnostics(ParsedAST & AST,llvm::StringRef Code)472 std::vector<Diag> issueUnusedIncludesDiagnostics(ParsedAST &AST,
473                                                  llvm::StringRef Code) {
474   const Config &Cfg = Config::current();
475   if (Cfg.Diagnostics.UnusedIncludes != Config::UnusedIncludesPolicy::Strict ||
476       Cfg.Diagnostics.SuppressAll ||
477       Cfg.Diagnostics.Suppress.contains("unused-includes"))
478     return {};
479   trace::Span Tracer("IncludeCleaner::issueUnusedIncludesDiagnostics");
480   std::vector<Diag> Result;
481   std::string FileName =
482       AST.getSourceManager()
483           .getFileEntryRefForID(AST.getSourceManager().getMainFileID())
484           ->getName()
485           .str();
486   for (const auto *Inc : computeUnusedIncludes(AST)) {
487     Diag D;
488     D.Message =
489         llvm::formatv("included header {0} is not used directly",
490                       llvm::sys::path::filename(
491                           Inc->Written.substr(1, Inc->Written.size() - 2),
492                           llvm::sys::path::Style::posix));
493     D.Name = "unused-includes";
494     D.Source = Diag::DiagSource::Clangd;
495     D.File = FileName;
496     D.Severity = DiagnosticsEngine::Warning;
497     D.Tags.push_back(Unnecessary);
498     D.Range = getDiagnosticRange(Code, Inc->HashOffset);
499     // FIXME(kirillbobyrev): Removing inclusion might break the code if the
500     // used headers are only reachable transitively through this one. Suggest
501     // including them directly instead.
502     // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas
503     // (keep/export) remove the warning once we support IWYU pragmas.
504     D.Fixes.emplace_back();
505     D.Fixes.back().Message = "remove #include directive";
506     D.Fixes.back().Edits.emplace_back();
507     D.Fixes.back().Edits.back().range.start.line = Inc->HashLine;
508     D.Fixes.back().Edits.back().range.end.line = Inc->HashLine + 1;
509     D.InsideMainFile = true;
510     Result.push_back(std::move(D));
511   }
512   return Result;
513 }
514 
515 } // namespace clangd
516 } // namespace clang
517