1 //===--- IncludeCleaner.h - Unused/Missing Headers Analysis -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Include Cleaner is clangd functionality for providing diagnostics for misuse
11 /// of transitive headers and unused includes. It is inspired by
12 /// Include-What-You-Use tool (https://include-what-you-use.org/). Our goal is
13 /// to provide useful warnings in most popular scenarios but not 1:1 exact
14 /// feature compatibility.
15 ///
16 //===----------------------------------------------------------------------===//
17 
18 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INCLUDECLEANER_H
19 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INCLUDECLEANER_H
20 
21 #include "Headers.h"
22 #include "ParsedAST.h"
23 #include "index/CanonicalIncludes.h"
24 #include "clang/Basic/SourceLocation.h"
25 #include "clang/Tooling/Inclusions/StandardLibrary.h"
26 #include "llvm/ADT/DenseSet.h"
27 #include "llvm/ADT/STLFunctionalExtras.h"
28 #include "llvm/ADT/StringSet.h"
29 #include <vector>
30 
31 namespace clang {
32 namespace clangd {
33 
34 struct ReferencedLocations {
35   llvm::DenseSet<SourceLocation> User;
36   llvm::DenseSet<tooling::stdlib::Symbol> Stdlib;
37 };
38 
39 /// Finds locations of all symbols used in the main file.
40 ///
41 /// - RecursiveASTVisitor finds references to symbols and records their
42 ///   associated locations. These may be macro expansions, and are not resolved
43 ///   to their spelling or expansion location. These locations are later used to
44 ///   determine which headers should be marked as "used" and "directly used".
45 /// - If \p Tokens is not nullptr, we also examine all identifier tokens in the
46 ///   file in case they reference macros macros.
47 /// We use this to compute unused headers, so we:
48 ///
49 /// - cover the whole file in a single traversal for efficiency
50 /// - don't attempt to describe where symbols were referenced from in
51 ///   ambiguous cases (e.g. implicitly used symbols, multiple declarations)
52 /// - err on the side of reporting all possible locations
53 ReferencedLocations findReferencedLocations(ASTContext &Ctx, Preprocessor &PP,
54                                             const syntax::TokenBuffer *Tokens);
55 ReferencedLocations findReferencedLocations(ParsedAST &AST);
56 
57 struct ReferencedFiles {
58   llvm::DenseSet<FileID> User;
59   llvm::DenseSet<tooling::stdlib::Header> Stdlib;
60   /// Files responsible for the symbols referenced in the main file and defined
61   /// in private headers (private headers have IWYU pragma: private, include
62   /// "public.h"). We store spelling of the public header (with quotes or angle
63   /// brackets) files here to avoid dealing with full filenames and visibility.
64   llvm::StringSet<> SpelledUmbrellas;
65 };
66 
67 /// Retrieves IDs of all files containing SourceLocations from \p Locs.
68 /// The output only includes things SourceManager sees as files (not macro IDs).
69 /// This can include <built-in>, <scratch space> etc that are not true files.
70 /// \p HeaderResponsible returns the public header that should be included given
71 /// symbols from a file with the given FileID (example: public headers should be
72 /// preferred to non self-contained and private headers).
73 /// \p UmbrellaHeader returns the public public header is responsible for
74 /// providing symbols from a file with the given FileID (example: MyType.h
75 /// should be included instead of MyType_impl.h).
76 ReferencedFiles findReferencedFiles(
77     const ReferencedLocations &Locs, const SourceManager &SM,
78     llvm::function_ref<FileID(FileID)> HeaderResponsible,
79     llvm::function_ref<Optional<StringRef>(FileID)> UmbrellaHeader);
80 ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs,
81                                     const IncludeStructure &Includes,
82                                     const CanonicalIncludes &CanonIncludes,
83                                     const SourceManager &SM);
84 
85 /// Maps FileIDs to the internal IncludeStructure representation (HeaderIDs).
86 /// FileIDs that are not true files (<built-in> etc) are dropped.
87 llvm::DenseSet<IncludeStructure::HeaderID>
88 translateToHeaderIDs(const ReferencedFiles &Files,
89                      const IncludeStructure &Includes, const SourceManager &SM);
90 
91 /// Retrieves headers that are referenced from the main file but not used.
92 /// In unclear cases, headers are not marked as unused.
93 std::vector<const Inclusion *>
94 getUnused(ParsedAST &AST,
95           const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles,
96           const llvm::StringSet<> &ReferencedPublicHeaders);
97 
98 std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST);
99 
100 std::vector<Diag> issueUnusedIncludesDiagnostics(ParsedAST &AST,
101                                                  llvm::StringRef Code);
102 
103 /// Affects whether standard library includes should be considered for
104 /// removal. This is off by default for now due to implementation limitations:
105 /// - macros are not tracked
106 /// - symbol names without a unique associated header are not tracked
107 /// - references to std-namespaced C types are not properly tracked:
108 ///   instead of std::size_t -> <cstddef> we see ::size_t -> <stddef.h>
109 /// FIXME: remove this hack once the implementation is good enough.
110 void setIncludeCleanerAnalyzesStdlib(bool B);
111 
112 } // namespace clangd
113 } // namespace clang
114 
115 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INCLUDECLEANER_H
116