1 //===--- Headers.cpp - Include headers ---------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Headers.h"
10 #include "Preamble.h"
11 #include "SourceCode.h"
12 #include "clang/Basic/SourceLocation.h"
13 #include "clang/Basic/SourceManager.h"
14 #include "clang/Frontend/CompilerInstance.h"
15 #include "clang/Lex/HeaderSearch.h"
16 #include "clang/Lex/PPCallbacks.h"
17 #include "clang/Lex/Preprocessor.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Support/Path.h"
20 #include <cstring>
21 
22 namespace clang {
23 namespace clangd {
24 
parseIWYUPragma(const char * Text)25 llvm::Optional<StringRef> parseIWYUPragma(const char *Text) {
26   // This gets called for every comment seen in the preamble, so it's quite hot.
27   constexpr llvm::StringLiteral IWYUPragma = "// IWYU pragma: ";
28   if (strncmp(Text, IWYUPragma.data(), IWYUPragma.size()))
29     return llvm::None;
30   Text += IWYUPragma.size();
31   const char *End = Text;
32   while (*End != 0 && *End != '\n')
33     ++End;
34   return StringRef(Text, End - Text);
35 }
36 
37 class IncludeStructure::RecordHeaders : public PPCallbacks,
38                                         public CommentHandler {
39 public:
RecordHeaders(const CompilerInstance & CI,IncludeStructure * Out)40   RecordHeaders(const CompilerInstance &CI, IncludeStructure *Out)
41       : SM(CI.getSourceManager()),
42         HeaderInfo(CI.getPreprocessor().getHeaderSearchInfo()), Out(Out) {}
43 
44   // Record existing #includes - both written and resolved paths. Only #includes
45   // in the main file are collected.
InclusionDirective(SourceLocation HashLoc,const Token & IncludeTok,llvm::StringRef FileName,bool IsAngled,CharSourceRange,Optional<FileEntryRef> File,llvm::StringRef,llvm::StringRef,const clang::Module *,SrcMgr::CharacteristicKind FileKind)46   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
47                           llvm::StringRef FileName, bool IsAngled,
48                           CharSourceRange /*FilenameRange*/,
49                           Optional<FileEntryRef> File,
50                           llvm::StringRef /*SearchPath*/,
51                           llvm::StringRef /*RelativePath*/,
52                           const clang::Module * /*Imported*/,
53                           SrcMgr::CharacteristicKind FileKind) override {
54     auto MainFID = SM.getMainFileID();
55     // If an include is part of the preamble patch, translate #line directives.
56     if (InBuiltinFile)
57       HashLoc = translatePreamblePatchLocation(HashLoc, SM);
58 
59     // Record main-file inclusions (including those mapped from the preamble
60     // patch).
61     if (isInsideMainFile(HashLoc, SM)) {
62       Out->MainFileIncludes.emplace_back();
63       auto &Inc = Out->MainFileIncludes.back();
64       Inc.Written =
65           (IsAngled ? "<" + FileName + ">" : "\"" + FileName + "\"").str();
66       Inc.Resolved =
67           std::string(File ? File->getFileEntry().tryGetRealPathName() : "");
68       Inc.HashOffset = SM.getFileOffset(HashLoc);
69       Inc.HashLine =
70           SM.getLineNumber(SM.getFileID(HashLoc), Inc.HashOffset) - 1;
71       Inc.FileKind = FileKind;
72       Inc.Directive = IncludeTok.getIdentifierInfo()->getPPKeywordID();
73       if (LastPragmaKeepInMainFileLine == Inc.HashLine)
74         Inc.BehindPragmaKeep = true;
75       if (File) {
76         IncludeStructure::HeaderID HID = Out->getOrCreateID(*File);
77         Inc.HeaderID = static_cast<unsigned>(HID);
78         if (IsAngled)
79           if (auto StdlibHeader = tooling::stdlib::Header::named(Inc.Written)) {
80             auto &IDs = Out->StdlibHeaders[*StdlibHeader];
81             // Few physical files for one stdlib header name, linear scan is ok.
82             if (!llvm::is_contained(IDs, HID))
83               IDs.push_back(HID);
84           }
85       }
86     }
87 
88     // Record include graph (not just for main-file includes)
89     if (File) {
90       auto IncludingFileEntry = SM.getFileEntryRefForID(SM.getFileID(HashLoc));
91       if (!IncludingFileEntry) {
92         assert(SM.getBufferName(HashLoc).startswith("<") &&
93                "Expected #include location to be a file or <built-in>");
94         // Treat as if included from the main file.
95         IncludingFileEntry = SM.getFileEntryRefForID(MainFID);
96       }
97       auto IncludingID = Out->getOrCreateID(*IncludingFileEntry),
98            IncludedID = Out->getOrCreateID(*File);
99       Out->IncludeChildren[IncludingID].push_back(IncludedID);
100     }
101   }
102 
FileChanged(SourceLocation Loc,FileChangeReason Reason,SrcMgr::CharacteristicKind FileType,FileID PrevFID)103   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
104                    SrcMgr::CharacteristicKind FileType,
105                    FileID PrevFID) override {
106     switch (Reason) {
107     case PPCallbacks::EnterFile:
108       ++Level;
109       if (BuiltinFile.isInvalid() && SM.isWrittenInBuiltinFile(Loc)) {
110         BuiltinFile = SM.getFileID(Loc);
111         InBuiltinFile = true;
112       }
113       break;
114     case PPCallbacks::ExitFile: {
115       --Level;
116       if (PrevFID == BuiltinFile)
117         InBuiltinFile = false;
118       // At file exit time HeaderSearchInfo is valid and can be used to
119       // determine whether the file was a self-contained header or not.
120       if (const FileEntry *FE = SM.getFileEntryForID(PrevFID)) {
121         // isSelfContainedHeader only returns true once the full header-guard
122         // structure has been seen, i.e. when exiting the *outer* copy of the
123         // file. So last result wins.
124         if (isSelfContainedHeader(FE, PrevFID, SM, HeaderInfo))
125           Out->NonSelfContained.erase(
126               *Out->getID(SM.getFileEntryForID(PrevFID)));
127         else
128           Out->NonSelfContained.insert(
129               *Out->getID(SM.getFileEntryForID(PrevFID)));
130       }
131       break;
132     }
133     case PPCallbacks::RenameFile:
134     case PPCallbacks::SystemHeaderPragma:
135       break;
136     }
137   }
138 
HandleComment(Preprocessor & PP,SourceRange Range)139   bool HandleComment(Preprocessor &PP, SourceRange Range) override {
140     auto Pragma = parseIWYUPragma(SM.getCharacterData(Range.getBegin()));
141     if (!Pragma)
142       return false;
143 
144     if (inMainFile()) {
145       // Given:
146       //
147       // #include "foo.h"
148       // #include "bar.h" // IWYU pragma: keep
149       //
150       // The order in which the callbacks will be triggered:
151       //
152       // 1. InclusionDirective("foo.h")
153       // 2. handleCommentInMainFile("// IWYU pragma: keep")
154       // 3. InclusionDirective("bar.h")
155       //
156       // This code stores the last location of "IWYU pragma: keep" (or export)
157       // comment in the main file, so that when InclusionDirective is called, it
158       // will know that the next inclusion is behind the IWYU pragma.
159       // FIXME: Support "IWYU pragma: begin_exports" and "IWYU pragma:
160       // end_exports".
161       if (!Pragma->startswith("export") && !Pragma->startswith("keep"))
162         return false;
163       unsigned Offset = SM.getFileOffset(Range.getBegin());
164       LastPragmaKeepInMainFileLine =
165           SM.getLineNumber(SM.getMainFileID(), Offset) - 1;
166     } else {
167       // Memorize headers that that have export pragmas in them. Include Cleaner
168       // does not support them properly yet, so they will be not marked as
169       // unused.
170       // FIXME: Once IncludeCleaner supports export pragmas, remove this.
171       if (!Pragma->startswith("export") && !Pragma->startswith("begin_exports"))
172         return false;
173       Out->HasIWYUExport.insert(
174           *Out->getID(SM.getFileEntryForID(SM.getFileID(Range.getBegin()))));
175     }
176     return false;
177   }
178 
179 private:
180   // Keeps track of include depth for the current file. It's 1 for main file.
181   int Level = 0;
inMainFile() const182   bool inMainFile() const { return Level == 1; }
183 
184   const SourceManager &SM;
185   HeaderSearch &HeaderInfo;
186   // Set after entering the <built-in> file.
187   FileID BuiltinFile;
188   // Indicates whether <built-in> file is part of include stack.
189   bool InBuiltinFile = false;
190 
191   IncludeStructure *Out;
192 
193   // The last line "IWYU pragma: keep" was seen in the main file, 0-indexed.
194   int LastPragmaKeepInMainFileLine = -1;
195 };
196 
isLiteralInclude(llvm::StringRef Include)197 bool isLiteralInclude(llvm::StringRef Include) {
198   return Include.startswith("<") || Include.startswith("\"");
199 }
200 
valid() const201 bool HeaderFile::valid() const {
202   return (Verbatim && isLiteralInclude(File)) ||
203          (!Verbatim && llvm::sys::path::is_absolute(File));
204 }
205 
toHeaderFile(llvm::StringRef Header,llvm::StringRef HintPath)206 llvm::Expected<HeaderFile> toHeaderFile(llvm::StringRef Header,
207                                         llvm::StringRef HintPath) {
208   if (isLiteralInclude(Header))
209     return HeaderFile{Header.str(), /*Verbatim=*/true};
210   auto U = URI::parse(Header);
211   if (!U)
212     return U.takeError();
213 
214   auto IncludePath = URI::includeSpelling(*U);
215   if (!IncludePath)
216     return IncludePath.takeError();
217   if (!IncludePath->empty())
218     return HeaderFile{std::move(*IncludePath), /*Verbatim=*/true};
219 
220   auto Resolved = URI::resolve(*U, HintPath);
221   if (!Resolved)
222     return Resolved.takeError();
223   return HeaderFile{std::move(*Resolved), /*Verbatim=*/false};
224 }
225 
getRankedIncludes(const Symbol & Sym)226 llvm::SmallVector<llvm::StringRef, 1> getRankedIncludes(const Symbol &Sym) {
227   auto Includes = Sym.IncludeHeaders;
228   // Sort in descending order by reference count and header length.
229   llvm::sort(Includes, [](const Symbol::IncludeHeaderWithReferences &LHS,
230                           const Symbol::IncludeHeaderWithReferences &RHS) {
231     if (LHS.References == RHS.References)
232       return LHS.IncludeHeader.size() < RHS.IncludeHeader.size();
233     return LHS.References > RHS.References;
234   });
235   llvm::SmallVector<llvm::StringRef, 1> Headers;
236   for (const auto &Include : Includes)
237     Headers.push_back(Include.IncludeHeader);
238   return Headers;
239 }
240 
collect(const CompilerInstance & CI)241 void IncludeStructure::collect(const CompilerInstance &CI) {
242   auto &SM = CI.getSourceManager();
243   MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
244   auto Collector = std::make_unique<RecordHeaders>(CI, this);
245   CI.getPreprocessor().addCommentHandler(Collector.get());
246   CI.getPreprocessor().addPPCallbacks(std::move(Collector));
247 }
248 
249 llvm::Optional<IncludeStructure::HeaderID>
getID(const FileEntry * Entry) const250 IncludeStructure::getID(const FileEntry *Entry) const {
251   // HeaderID of the main file is always 0;
252   if (Entry == MainFileEntry) {
253     return static_cast<IncludeStructure::HeaderID>(0u);
254   }
255   auto It = UIDToIndex.find(Entry->getUniqueID());
256   if (It == UIDToIndex.end())
257     return llvm::None;
258   return It->second;
259 }
260 
getOrCreateID(FileEntryRef Entry)261 IncludeStructure::HeaderID IncludeStructure::getOrCreateID(FileEntryRef Entry) {
262   // Main file's FileEntry was not known at IncludeStructure creation time.
263   if (&Entry.getFileEntry() == MainFileEntry) {
264     if (RealPathNames.front().empty())
265       RealPathNames.front() = MainFileEntry->tryGetRealPathName().str();
266     return MainFileID;
267   }
268   auto R = UIDToIndex.try_emplace(
269       Entry.getUniqueID(),
270       static_cast<IncludeStructure::HeaderID>(RealPathNames.size()));
271   if (R.second)
272     RealPathNames.emplace_back();
273   IncludeStructure::HeaderID Result = R.first->getSecond();
274   std::string &RealPathName = RealPathNames[static_cast<unsigned>(Result)];
275   if (RealPathName.empty())
276     RealPathName = Entry.getFileEntry().tryGetRealPathName().str();
277   return Result;
278 }
279 
280 llvm::DenseMap<IncludeStructure::HeaderID, unsigned>
includeDepth(HeaderID Root) const281 IncludeStructure::includeDepth(HeaderID Root) const {
282   // Include depth 0 is the main file only.
283   llvm::DenseMap<HeaderID, unsigned> Result;
284   assert(static_cast<unsigned>(Root) < RealPathNames.size());
285   Result[Root] = 0;
286   std::vector<IncludeStructure::HeaderID> CurrentLevel;
287   CurrentLevel.push_back(Root);
288   llvm::DenseSet<IncludeStructure::HeaderID> Seen;
289   Seen.insert(Root);
290 
291   // Each round of BFS traversal finds the next depth level.
292   std::vector<IncludeStructure::HeaderID> PreviousLevel;
293   for (unsigned Level = 1; !CurrentLevel.empty(); ++Level) {
294     PreviousLevel.clear();
295     PreviousLevel.swap(CurrentLevel);
296     for (const auto &Parent : PreviousLevel) {
297       for (const auto &Child : IncludeChildren.lookup(Parent)) {
298         if (Seen.insert(Child).second) {
299           CurrentLevel.push_back(Child);
300           Result[Child] = Level;
301         }
302       }
303     }
304   }
305   return Result;
306 }
307 
addExisting(const Inclusion & Inc)308 void IncludeInserter::addExisting(const Inclusion &Inc) {
309   IncludedHeaders.insert(Inc.Written);
310   if (!Inc.Resolved.empty())
311     IncludedHeaders.insert(Inc.Resolved);
312 }
313 
314 /// FIXME(ioeric): we might not want to insert an absolute include path if the
315 /// path is not shortened.
shouldInsertInclude(PathRef DeclaringHeader,const HeaderFile & InsertedHeader) const316 bool IncludeInserter::shouldInsertInclude(
317     PathRef DeclaringHeader, const HeaderFile &InsertedHeader) const {
318   assert(InsertedHeader.valid());
319   if (!HeaderSearchInfo && !InsertedHeader.Verbatim)
320     return false;
321   if (FileName == DeclaringHeader || FileName == InsertedHeader.File)
322     return false;
323   auto Included = [&](llvm::StringRef Header) {
324     return IncludedHeaders.find(Header) != IncludedHeaders.end();
325   };
326   return !Included(DeclaringHeader) && !Included(InsertedHeader.File);
327 }
328 
329 llvm::Optional<std::string>
calculateIncludePath(const HeaderFile & InsertedHeader,llvm::StringRef IncludingFile) const330 IncludeInserter::calculateIncludePath(const HeaderFile &InsertedHeader,
331                                       llvm::StringRef IncludingFile) const {
332   assert(InsertedHeader.valid());
333   if (InsertedHeader.Verbatim)
334     return InsertedHeader.File;
335   bool IsSystem = false;
336   std::string Suggested;
337   if (HeaderSearchInfo) {
338     Suggested = HeaderSearchInfo->suggestPathToFileForDiagnostics(
339         InsertedHeader.File, BuildDir, IncludingFile, &IsSystem);
340   } else {
341     // Calculate include relative to including file only.
342     StringRef IncludingDir = llvm::sys::path::parent_path(IncludingFile);
343     SmallString<256> RelFile(InsertedHeader.File);
344     // Replacing with "" leaves "/RelFile" if IncludingDir doesn't end in "/".
345     llvm::sys::path::replace_path_prefix(RelFile, IncludingDir, "./");
346     Suggested = llvm::sys::path::convert_to_slash(
347         llvm::sys::path::remove_leading_dotslash(RelFile));
348   }
349   // FIXME: should we allow (some limited number of) "../header.h"?
350   if (llvm::sys::path::is_absolute(Suggested))
351     return None;
352   if (IsSystem)
353     Suggested = "<" + Suggested + ">";
354   else
355     Suggested = "\"" + Suggested + "\"";
356   return Suggested;
357 }
358 
359 llvm::Optional<TextEdit>
insert(llvm::StringRef VerbatimHeader) const360 IncludeInserter::insert(llvm::StringRef VerbatimHeader) const {
361   llvm::Optional<TextEdit> Edit = None;
362   if (auto Insertion = Inserter.insert(VerbatimHeader.trim("\"<>"),
363                                        VerbatimHeader.startswith("<")))
364     Edit = replacementToEdit(Code, *Insertion);
365   return Edit;
366 }
367 
operator <<(llvm::raw_ostream & OS,const Inclusion & Inc)368 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Inclusion &Inc) {
369   return OS << Inc.Written << " = "
370             << (!Inc.Resolved.empty() ? Inc.Resolved : "[unresolved]")
371             << " at line" << Inc.HashLine;
372 }
373 
operator ==(const Inclusion & LHS,const Inclusion & RHS)374 bool operator==(const Inclusion &LHS, const Inclusion &RHS) {
375   return std::tie(LHS.Directive, LHS.FileKind, LHS.HashOffset, LHS.HashLine,
376                   LHS.Resolved, LHS.Written) ==
377          std::tie(RHS.Directive, RHS.FileKind, RHS.HashOffset, RHS.HashLine,
378                   RHS.Resolved, RHS.Written);
379 }
380 
381 } // namespace clangd
382 } // namespace clang
383