1 //===--- Headers.h - Include headers -----------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H
10 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H
11 
12 #include "Protocol.h"
13 #include "SourceCode.h"
14 #include "index/Symbol.h"
15 #include "support/Path.h"
16 #include "clang/Basic/FileEntry.h"
17 #include "clang/Basic/TokenKinds.h"
18 #include "clang/Format/Format.h"
19 #include "clang/Frontend/CompilerInstance.h"
20 #include "clang/Lex/HeaderSearch.h"
21 #include "clang/Lex/Preprocessor.h"
22 #include "clang/Tooling/Inclusions/HeaderIncludes.h"
23 #include "clang/Tooling/Inclusions/StandardLibrary.h"
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/DenseSet.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/StringSet.h"
28 #include "llvm/Support/Error.h"
29 #include "llvm/Support/FileSystem/UniqueID.h"
30 #include <string>
31 
32 namespace clang {
33 namespace clangd {
34 
35 /// Returns true if \p Include is literal include like "path" or <path>.
36 bool isLiteralInclude(llvm::StringRef Include);
37 
38 /// If Text begins an Include-What-You-Use directive, returns it.
39 /// Given "// IWYU pragma: keep", returns "keep".
40 /// Input is a null-terminated char* as provided by SM.getCharacterData().
41 /// (This should not be StringRef as we do *not* want to scan for its length).
42 llvm::Optional<StringRef> parseIWYUPragma(const char *Text);
43 
44 /// Represents a header file to be #include'd.
45 struct HeaderFile {
46   std::string File;
47   /// If this is true, `File` is a literal string quoted with <> or "" that
48   /// can be #included directly; otherwise, `File` is an absolute file path.
49   bool Verbatim;
50 
51   bool valid() const;
52 };
53 
54 /// Creates a `HeaderFile` from \p Header which can be either a URI or a literal
55 /// include.
56 llvm::Expected<HeaderFile> toHeaderFile(llvm::StringRef Header,
57                                         llvm::StringRef HintPath);
58 
59 // Returns include headers for \p Sym sorted by popularity. If two headers are
60 // equally popular, prefer the shorter one.
61 llvm::SmallVector<llvm::StringRef, 1> getRankedIncludes(const Symbol &Sym);
62 
63 // An #include directive that we found in the main file.
64 struct Inclusion {
65   tok::PPKeywordKind Directive; // Directive used for inclusion, e.g. import
66   std::string Written;          // Inclusion name as written e.g. <vector>.
67   Path Resolved; // Resolved path of included file. Empty if not resolved.
68   unsigned HashOffset = 0; // Byte offset from start of file to #.
69   int HashLine = 0;        // Line number containing the directive, 0-indexed.
70   SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
71   llvm::Optional<unsigned> HeaderID;
72   bool BehindPragmaKeep = false; // Has IWYU pragma: keep right after.
73 };
74 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion &);
75 bool operator==(const Inclusion &LHS, const Inclusion &RHS);
76 
77 // Contains information about one file in the build graph and its direct
78 // dependencies. Doesn't own the strings it references (IncludeGraph is
79 // self-contained).
80 struct IncludeGraphNode {
81   enum class SourceFlag : uint8_t {
82     None = 0,
83     // Whether current file is a main file rather than a header.
84     IsTU = 1 << 0,
85     // Whether current file had any uncompilable errors during indexing.
86     HadErrors = 1 << 1,
87   };
88 
89   SourceFlag Flags = SourceFlag::None;
90   llvm::StringRef URI;
91   FileDigest Digest{{0}};
92   std::vector<llvm::StringRef> DirectIncludes;
93 };
94 // FileURI and FileInclusions are references to keys of the map containing
95 // them.
96 // Important: The graph generated by those callbacks might contain cycles, self
97 // edges and multi edges.
98 using IncludeGraph = llvm::StringMap<IncludeGraphNode>;
99 
100 inline IncludeGraphNode::SourceFlag operator|(IncludeGraphNode::SourceFlag A,
101                                               IncludeGraphNode::SourceFlag B) {
102   return static_cast<IncludeGraphNode::SourceFlag>(static_cast<uint8_t>(A) |
103                                                    static_cast<uint8_t>(B));
104 }
105 
106 inline bool operator&(IncludeGraphNode::SourceFlag A,
107                       IncludeGraphNode::SourceFlag B) {
108   return static_cast<uint8_t>(A) & static_cast<uint8_t>(B);
109 }
110 
111 inline IncludeGraphNode::SourceFlag &
112 operator|=(IncludeGraphNode::SourceFlag &A, IncludeGraphNode::SourceFlag B) {
113   return A = A | B;
114 }
115 
116 // Information captured about the inclusion graph in a translation unit.
117 // This includes detailed information about the direct #includes, and summary
118 // information about all transitive includes.
119 //
120 // It should be built incrementally with collectIncludeStructureCallback().
121 // When we build the preamble, we capture and store its include structure along
122 // with the preamble data. When we use the preamble, we can copy its
123 // IncludeStructure and use another collectIncludeStructureCallback() to fill
124 // in any non-preamble inclusions.
125 class IncludeStructure {
126 public:
IncludeStructure()127   IncludeStructure() {
128     // Reserve HeaderID = 0 for the main file.
129     RealPathNames.emplace_back();
130   }
131 
132   // Inserts a PPCallback and CommentHandler that visits all includes in the
133   // main file and populates the structure. It will also scan for IWYU pragmas
134   // in comments.
135   void collect(const CompilerInstance &CI);
136 
137   // HeaderID identifies file in the include graph. It corresponds to a
138   // FileEntry rather than a FileID, but stays stable across preamble & main
139   // file builds.
140   enum class HeaderID : unsigned {};
141 
142   llvm::Optional<HeaderID> getID(const FileEntry *Entry) const;
143   HeaderID getOrCreateID(FileEntryRef Entry);
144 
getRealPath(HeaderID ID)145   StringRef getRealPath(HeaderID ID) const {
146     assert(static_cast<unsigned>(ID) <= RealPathNames.size());
147     return RealPathNames[static_cast<unsigned>(ID)];
148   }
149 
isSelfContained(HeaderID ID)150   bool isSelfContained(HeaderID ID) const {
151     return !NonSelfContained.contains(ID);
152   }
153 
hasIWYUExport(HeaderID ID)154   bool hasIWYUExport(HeaderID ID) const {
155     return HasIWYUExport.contains(ID);
156   }
157 
158   // Return all transitively reachable files.
allHeaders()159   llvm::ArrayRef<std::string> allHeaders() const { return RealPathNames; }
160 
161   // Return all transitively reachable files, and their minimum include depth.
162   // All transitive includes (absolute paths), with their minimum include depth.
163   // Root --> 0, #included file --> 1, etc.
164   // Root is the ID of the header being visited first.
165   llvm::DenseMap<HeaderID, unsigned>
166   includeDepth(HeaderID Root = MainFileID) const;
167 
168   // Maps HeaderID to the ids of the files included from it.
169   llvm::DenseMap<HeaderID, SmallVector<HeaderID>> IncludeChildren;
170 
171   llvm::DenseMap<tooling::stdlib::Header, llvm::SmallVector<HeaderID>>
172       StdlibHeaders;
173 
174   std::vector<Inclusion> MainFileIncludes;
175 
176   // We reserve HeaderID(0) for the main file and will manually check for that
177   // in getID and getOrCreateID because the UniqueID is not stable when the
178   // content of the main file changes.
179   static const HeaderID MainFileID = HeaderID(0u);
180 
181   class RecordHeaders;
182 
183 private:
184   // MainFileEntry will be used to check if the queried file is the main file
185   // or not.
186   const FileEntry *MainFileEntry = nullptr;
187 
188   std::vector<std::string> RealPathNames; // In HeaderID order.
189   // FileEntry::UniqueID is mapped to the internal representation (HeaderID).
190   // Identifying files in a way that persists from preamble build to subsequent
191   // builds is surprisingly hard. FileID is unavailable in InclusionDirective(),
192   // and RealPathName and UniqueID are not preserved in
193   // the preamble.
194   llvm::DenseMap<llvm::sys::fs::UniqueID, HeaderID> UIDToIndex;
195   // Contains HeaderIDs of all non self-contained entries in the
196   // IncludeStructure.
197   llvm::DenseSet<HeaderID> NonSelfContained;
198   // Contains a set of headers that have either "IWYU pragma: export" or "IWYU
199   // pragma: begin_exports".
200   llvm::DenseSet<HeaderID> HasIWYUExport;
201 };
202 
203 // Calculates insertion edit for including a new header in a file.
204 class IncludeInserter {
205 public:
206   // If \p HeaderSearchInfo is nullptr (e.g. when compile command is
207   // infeasible), this will only try to insert verbatim headers, and
208   // include path of non-verbatim header will not be shortened.
IncludeInserter(StringRef FileName,StringRef Code,const format::FormatStyle & Style,StringRef BuildDir,HeaderSearch * HeaderSearchInfo)209   IncludeInserter(StringRef FileName, StringRef Code,
210                   const format::FormatStyle &Style, StringRef BuildDir,
211                   HeaderSearch *HeaderSearchInfo)
212       : FileName(FileName), Code(Code), BuildDir(BuildDir),
213         HeaderSearchInfo(HeaderSearchInfo),
214         Inserter(FileName, Code, Style.IncludeStyle) {}
215 
216   void addExisting(const Inclusion &Inc);
217 
218   /// Checks whether to add an #include of the header into \p File.
219   /// An #include will not be added if:
220   ///   - Either \p DeclaringHeader or \p InsertedHeader is already (directly)
221   ///   in \p Inclusions (including those included via different paths).
222   ///   - \p DeclaringHeader or \p InsertedHeader is the same as \p File.
223   ///
224   /// \param DeclaringHeader is path of the original header corresponding to \p
225   /// InsertedHeader e.g. the header that declares a symbol.
226   /// \param InsertedHeader The preferred header to be inserted. This could be
227   /// the same as DeclaringHeader but must be provided.
228   bool shouldInsertInclude(PathRef DeclaringHeader,
229                            const HeaderFile &InsertedHeader) const;
230 
231   /// Determines the preferred way to #include a file, taking into account the
232   /// search path. Usually this will prefer a shorter representation like
233   /// 'Foo/Bar.h' over a longer one like 'Baz/include/Foo/Bar.h'.
234   ///
235   /// \param InsertedHeader The preferred header to be inserted.
236   ///
237   /// \param IncludingFile is the absolute path of the file that InsertedHeader
238   /// will be inserted.
239   ///
240   /// \return A quoted "path" or <path> to be included, or None if it couldn't
241   /// be shortened.
242   llvm::Optional<std::string>
243   calculateIncludePath(const HeaderFile &InsertedHeader,
244                        llvm::StringRef IncludingFile) const;
245 
246   /// Calculates an edit that inserts \p VerbatimHeader into code. If the header
247   /// is already included, this returns None.
248   llvm::Optional<TextEdit> insert(llvm::StringRef VerbatimHeader) const;
249 
250 private:
251   StringRef FileName;
252   StringRef Code;
253   StringRef BuildDir;
254   HeaderSearch *HeaderSearchInfo = nullptr;
255   llvm::StringSet<> IncludedHeaders; // Both written and resolved.
256   tooling::HeaderIncludes Inserter;  // Computers insertion replacement.
257 };
258 
259 } // namespace clangd
260 } // namespace clang
261 
262 namespace llvm {
263 
264 // Support HeaderIDs as DenseMap keys.
265 template <> struct DenseMapInfo<clang::clangd::IncludeStructure::HeaderID> {
266   static inline clang::clangd::IncludeStructure::HeaderID getEmptyKey() {
267     return static_cast<clang::clangd::IncludeStructure::HeaderID>(-1);
268   }
269 
270   static inline clang::clangd::IncludeStructure::HeaderID getTombstoneKey() {
271     return static_cast<clang::clangd::IncludeStructure::HeaderID>(-2);
272   }
273 
274   static unsigned
275   getHashValue(const clang::clangd::IncludeStructure::HeaderID &Tag) {
276     return hash_value(static_cast<unsigned>(Tag));
277   }
278 
279   static bool isEqual(const clang::clangd::IncludeStructure::HeaderID &LHS,
280                       const clang::clangd::IncludeStructure::HeaderID &RHS) {
281     return LHS == RHS;
282   }
283 };
284 
285 } // namespace llvm
286 
287 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H
288