1 //===--- Headers.h - Include headers -----------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H 10 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H 11 12 #include "Protocol.h" 13 #include "SourceCode.h" 14 #include "index/Symbol.h" 15 #include "support/Path.h" 16 #include "clang/Basic/FileEntry.h" 17 #include "clang/Basic/TokenKinds.h" 18 #include "clang/Format/Format.h" 19 #include "clang/Frontend/CompilerInstance.h" 20 #include "clang/Lex/HeaderSearch.h" 21 #include "clang/Lex/Preprocessor.h" 22 #include "clang/Tooling/Inclusions/HeaderIncludes.h" 23 #include "clang/Tooling/Inclusions/StandardLibrary.h" 24 #include "llvm/ADT/ArrayRef.h" 25 #include "llvm/ADT/DenseSet.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSet.h" 28 #include "llvm/Support/Error.h" 29 #include "llvm/Support/FileSystem/UniqueID.h" 30 #include <string> 31 32 namespace clang { 33 namespace clangd { 34 35 /// Returns true if \p Include is literal include like "path" or <path>. 36 bool isLiteralInclude(llvm::StringRef Include); 37 38 /// If Text begins an Include-What-You-Use directive, returns it. 39 /// Given "// IWYU pragma: keep", returns "keep". 40 /// Input is a null-terminated char* as provided by SM.getCharacterData(). 41 /// (This should not be StringRef as we do *not* want to scan for its length). 42 llvm::Optional<StringRef> parseIWYUPragma(const char *Text); 43 44 /// Represents a header file to be #include'd. 45 struct HeaderFile { 46 std::string File; 47 /// If this is true, `File` is a literal string quoted with <> or "" that 48 /// can be #included directly; otherwise, `File` is an absolute file path. 49 bool Verbatim; 50 51 bool valid() const; 52 }; 53 54 /// Creates a `HeaderFile` from \p Header which can be either a URI or a literal 55 /// include. 56 llvm::Expected<HeaderFile> toHeaderFile(llvm::StringRef Header, 57 llvm::StringRef HintPath); 58 59 // Returns include headers for \p Sym sorted by popularity. If two headers are 60 // equally popular, prefer the shorter one. 61 llvm::SmallVector<llvm::StringRef, 1> getRankedIncludes(const Symbol &Sym); 62 63 // An #include directive that we found in the main file. 64 struct Inclusion { 65 tok::PPKeywordKind Directive; // Directive used for inclusion, e.g. import 66 std::string Written; // Inclusion name as written e.g. <vector>. 67 Path Resolved; // Resolved path of included file. Empty if not resolved. 68 unsigned HashOffset = 0; // Byte offset from start of file to #. 69 int HashLine = 0; // Line number containing the directive, 0-indexed. 70 SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User; 71 llvm::Optional<unsigned> HeaderID; 72 bool BehindPragmaKeep = false; // Has IWYU pragma: keep right after. 73 }; 74 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion &); 75 bool operator==(const Inclusion &LHS, const Inclusion &RHS); 76 77 // Contains information about one file in the build graph and its direct 78 // dependencies. Doesn't own the strings it references (IncludeGraph is 79 // self-contained). 80 struct IncludeGraphNode { 81 enum class SourceFlag : uint8_t { 82 None = 0, 83 // Whether current file is a main file rather than a header. 84 IsTU = 1 << 0, 85 // Whether current file had any uncompilable errors during indexing. 86 HadErrors = 1 << 1, 87 }; 88 89 SourceFlag Flags = SourceFlag::None; 90 llvm::StringRef URI; 91 FileDigest Digest{{0}}; 92 std::vector<llvm::StringRef> DirectIncludes; 93 }; 94 // FileURI and FileInclusions are references to keys of the map containing 95 // them. 96 // Important: The graph generated by those callbacks might contain cycles, self 97 // edges and multi edges. 98 using IncludeGraph = llvm::StringMap<IncludeGraphNode>; 99 100 inline IncludeGraphNode::SourceFlag operator|(IncludeGraphNode::SourceFlag A, 101 IncludeGraphNode::SourceFlag B) { 102 return static_cast<IncludeGraphNode::SourceFlag>(static_cast<uint8_t>(A) | 103 static_cast<uint8_t>(B)); 104 } 105 106 inline bool operator&(IncludeGraphNode::SourceFlag A, 107 IncludeGraphNode::SourceFlag B) { 108 return static_cast<uint8_t>(A) & static_cast<uint8_t>(B); 109 } 110 111 inline IncludeGraphNode::SourceFlag & 112 operator|=(IncludeGraphNode::SourceFlag &A, IncludeGraphNode::SourceFlag B) { 113 return A = A | B; 114 } 115 116 // Information captured about the inclusion graph in a translation unit. 117 // This includes detailed information about the direct #includes, and summary 118 // information about all transitive includes. 119 // 120 // It should be built incrementally with collectIncludeStructureCallback(). 121 // When we build the preamble, we capture and store its include structure along 122 // with the preamble data. When we use the preamble, we can copy its 123 // IncludeStructure and use another collectIncludeStructureCallback() to fill 124 // in any non-preamble inclusions. 125 class IncludeStructure { 126 public: IncludeStructure()127 IncludeStructure() { 128 // Reserve HeaderID = 0 for the main file. 129 RealPathNames.emplace_back(); 130 } 131 132 // Inserts a PPCallback and CommentHandler that visits all includes in the 133 // main file and populates the structure. It will also scan for IWYU pragmas 134 // in comments. 135 void collect(const CompilerInstance &CI); 136 137 // HeaderID identifies file in the include graph. It corresponds to a 138 // FileEntry rather than a FileID, but stays stable across preamble & main 139 // file builds. 140 enum class HeaderID : unsigned {}; 141 142 llvm::Optional<HeaderID> getID(const FileEntry *Entry) const; 143 HeaderID getOrCreateID(FileEntryRef Entry); 144 getRealPath(HeaderID ID)145 StringRef getRealPath(HeaderID ID) const { 146 assert(static_cast<unsigned>(ID) <= RealPathNames.size()); 147 return RealPathNames[static_cast<unsigned>(ID)]; 148 } 149 isSelfContained(HeaderID ID)150 bool isSelfContained(HeaderID ID) const { 151 return !NonSelfContained.contains(ID); 152 } 153 hasIWYUExport(HeaderID ID)154 bool hasIWYUExport(HeaderID ID) const { 155 return HasIWYUExport.contains(ID); 156 } 157 158 // Return all transitively reachable files. allHeaders()159 llvm::ArrayRef<std::string> allHeaders() const { return RealPathNames; } 160 161 // Return all transitively reachable files, and their minimum include depth. 162 // All transitive includes (absolute paths), with their minimum include depth. 163 // Root --> 0, #included file --> 1, etc. 164 // Root is the ID of the header being visited first. 165 llvm::DenseMap<HeaderID, unsigned> 166 includeDepth(HeaderID Root = MainFileID) const; 167 168 // Maps HeaderID to the ids of the files included from it. 169 llvm::DenseMap<HeaderID, SmallVector<HeaderID>> IncludeChildren; 170 171 llvm::DenseMap<tooling::stdlib::Header, llvm::SmallVector<HeaderID>> 172 StdlibHeaders; 173 174 std::vector<Inclusion> MainFileIncludes; 175 176 // We reserve HeaderID(0) for the main file and will manually check for that 177 // in getID and getOrCreateID because the UniqueID is not stable when the 178 // content of the main file changes. 179 static const HeaderID MainFileID = HeaderID(0u); 180 181 class RecordHeaders; 182 183 private: 184 // MainFileEntry will be used to check if the queried file is the main file 185 // or not. 186 const FileEntry *MainFileEntry = nullptr; 187 188 std::vector<std::string> RealPathNames; // In HeaderID order. 189 // FileEntry::UniqueID is mapped to the internal representation (HeaderID). 190 // Identifying files in a way that persists from preamble build to subsequent 191 // builds is surprisingly hard. FileID is unavailable in InclusionDirective(), 192 // and RealPathName and UniqueID are not preserved in 193 // the preamble. 194 llvm::DenseMap<llvm::sys::fs::UniqueID, HeaderID> UIDToIndex; 195 // Contains HeaderIDs of all non self-contained entries in the 196 // IncludeStructure. 197 llvm::DenseSet<HeaderID> NonSelfContained; 198 // Contains a set of headers that have either "IWYU pragma: export" or "IWYU 199 // pragma: begin_exports". 200 llvm::DenseSet<HeaderID> HasIWYUExport; 201 }; 202 203 // Calculates insertion edit for including a new header in a file. 204 class IncludeInserter { 205 public: 206 // If \p HeaderSearchInfo is nullptr (e.g. when compile command is 207 // infeasible), this will only try to insert verbatim headers, and 208 // include path of non-verbatim header will not be shortened. IncludeInserter(StringRef FileName,StringRef Code,const format::FormatStyle & Style,StringRef BuildDir,HeaderSearch * HeaderSearchInfo)209 IncludeInserter(StringRef FileName, StringRef Code, 210 const format::FormatStyle &Style, StringRef BuildDir, 211 HeaderSearch *HeaderSearchInfo) 212 : FileName(FileName), Code(Code), BuildDir(BuildDir), 213 HeaderSearchInfo(HeaderSearchInfo), 214 Inserter(FileName, Code, Style.IncludeStyle) {} 215 216 void addExisting(const Inclusion &Inc); 217 218 /// Checks whether to add an #include of the header into \p File. 219 /// An #include will not be added if: 220 /// - Either \p DeclaringHeader or \p InsertedHeader is already (directly) 221 /// in \p Inclusions (including those included via different paths). 222 /// - \p DeclaringHeader or \p InsertedHeader is the same as \p File. 223 /// 224 /// \param DeclaringHeader is path of the original header corresponding to \p 225 /// InsertedHeader e.g. the header that declares a symbol. 226 /// \param InsertedHeader The preferred header to be inserted. This could be 227 /// the same as DeclaringHeader but must be provided. 228 bool shouldInsertInclude(PathRef DeclaringHeader, 229 const HeaderFile &InsertedHeader) const; 230 231 /// Determines the preferred way to #include a file, taking into account the 232 /// search path. Usually this will prefer a shorter representation like 233 /// 'Foo/Bar.h' over a longer one like 'Baz/include/Foo/Bar.h'. 234 /// 235 /// \param InsertedHeader The preferred header to be inserted. 236 /// 237 /// \param IncludingFile is the absolute path of the file that InsertedHeader 238 /// will be inserted. 239 /// 240 /// \return A quoted "path" or <path> to be included, or None if it couldn't 241 /// be shortened. 242 llvm::Optional<std::string> 243 calculateIncludePath(const HeaderFile &InsertedHeader, 244 llvm::StringRef IncludingFile) const; 245 246 /// Calculates an edit that inserts \p VerbatimHeader into code. If the header 247 /// is already included, this returns None. 248 llvm::Optional<TextEdit> insert(llvm::StringRef VerbatimHeader) const; 249 250 private: 251 StringRef FileName; 252 StringRef Code; 253 StringRef BuildDir; 254 HeaderSearch *HeaderSearchInfo = nullptr; 255 llvm::StringSet<> IncludedHeaders; // Both written and resolved. 256 tooling::HeaderIncludes Inserter; // Computers insertion replacement. 257 }; 258 259 } // namespace clangd 260 } // namespace clang 261 262 namespace llvm { 263 264 // Support HeaderIDs as DenseMap keys. 265 template <> struct DenseMapInfo<clang::clangd::IncludeStructure::HeaderID> { 266 static inline clang::clangd::IncludeStructure::HeaderID getEmptyKey() { 267 return static_cast<clang::clangd::IncludeStructure::HeaderID>(-1); 268 } 269 270 static inline clang::clangd::IncludeStructure::HeaderID getTombstoneKey() { 271 return static_cast<clang::clangd::IncludeStructure::HeaderID>(-2); 272 } 273 274 static unsigned 275 getHashValue(const clang::clangd::IncludeStructure::HeaderID &Tag) { 276 return hash_value(static_cast<unsigned>(Tag)); 277 } 278 279 static bool isEqual(const clang::clangd::IncludeStructure::HeaderID &LHS, 280 const clang::clangd::IncludeStructure::HeaderID &RHS) { 281 return LHS == RHS; 282 } 283 }; 284 285 } // namespace llvm 286 287 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H 288