1 //===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "IncludeCleaner.h" 10 #include "Config.h" 11 #include "Headers.h" 12 #include "ParsedAST.h" 13 #include "Protocol.h" 14 #include "SourceCode.h" 15 #include "support/Logger.h" 16 #include "support/Trace.h" 17 #include "clang/AST/ExprCXX.h" 18 #include "clang/AST/RecursiveASTVisitor.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Lex/HeaderSearch.h" 22 #include "clang/Lex/Preprocessor.h" 23 #include "clang/Tooling/Syntax/Tokens.h" 24 #include "llvm/Support/FormatVariadic.h" 25 #include "llvm/Support/Path.h" 26 27 namespace clang { 28 namespace clangd { 29 30 static bool AnalyzeStdlib = false; 31 void setIncludeCleanerAnalyzesStdlib(bool B) { AnalyzeStdlib = B; } 32 33 namespace { 34 35 /// Crawler traverses the AST and feeds in the locations of (sometimes 36 /// implicitly) used symbols into \p Result. 37 class ReferencedLocationCrawler 38 : public RecursiveASTVisitor<ReferencedLocationCrawler> { 39 public: 40 ReferencedLocationCrawler(ReferencedLocations &Result, 41 const SourceManager &SM) 42 : Result(Result), SM(SM) {} 43 44 bool VisitDeclRefExpr(DeclRefExpr *DRE) { 45 add(DRE->getDecl()); 46 add(DRE->getFoundDecl()); 47 return true; 48 } 49 50 bool VisitMemberExpr(MemberExpr *ME) { 51 add(ME->getMemberDecl()); 52 add(ME->getFoundDecl().getDecl()); 53 return true; 54 } 55 56 bool VisitTagType(TagType *TT) { 57 add(TT->getDecl()); 58 return true; 59 } 60 61 bool VisitFunctionDecl(FunctionDecl *FD) { 62 // Function definition will require redeclarations to be included. 63 if (FD->isThisDeclarationADefinition()) 64 add(FD); 65 return true; 66 } 67 68 bool VisitCXXConstructExpr(CXXConstructExpr *CCE) { 69 add(CCE->getConstructor()); 70 return true; 71 } 72 73 bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) { 74 if (isNew(TST)) { 75 add(TST->getTemplateName().getAsTemplateDecl()); // Primary template. 76 add(TST->getAsCXXRecordDecl()); // Specialization 77 } 78 return true; 79 } 80 81 bool VisitUsingType(UsingType *UT) { 82 add(UT->getFoundDecl()); 83 return true; 84 } 85 86 bool VisitTypedefType(TypedefType *TT) { 87 add(TT->getDecl()); 88 return true; 89 } 90 91 // Consider types of any subexpression used, even if the type is not named. 92 // This is helpful in getFoo().bar(), where Foo must be complete. 93 // FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to 94 // consider types "used" when they are not directly spelled in code. 95 bool VisitExpr(Expr *E) { 96 TraverseType(E->getType()); 97 return true; 98 } 99 100 bool TraverseType(QualType T) { 101 if (isNew(T.getTypePtrOrNull())) // don't care about quals 102 Base::TraverseType(T); 103 return true; 104 } 105 106 bool VisitUsingDecl(UsingDecl *D) { 107 for (const auto *Shadow : D->shadows()) 108 add(Shadow->getTargetDecl()); 109 return true; 110 } 111 112 // Enums may be usefully forward-declared as *complete* types by specifying 113 // an underlying type. In this case, the definition should see the declaration 114 // so they can be checked for compatibility. 115 bool VisitEnumDecl(EnumDecl *D) { 116 if (D->isThisDeclarationADefinition() && D->getIntegerTypeSourceInfo()) 117 add(D); 118 return true; 119 } 120 121 // When the overload is not resolved yet, mark all candidates as used. 122 bool VisitOverloadExpr(OverloadExpr *E) { 123 for (const auto *ResolutionDecl : E->decls()) 124 add(ResolutionDecl); 125 return true; 126 } 127 128 private: 129 using Base = RecursiveASTVisitor<ReferencedLocationCrawler>; 130 131 void add(const Decl *D) { 132 if (!D || !isNew(D->getCanonicalDecl())) 133 return; 134 if (auto SS = StdRecognizer(D)) { 135 Result.Stdlib.insert(*SS); 136 return; 137 } 138 // Special case RecordDecls, as it is common for them to be forward 139 // declared multiple times. The most common cases are: 140 // - Definition available in TU, only mark that one as usage. The rest is 141 // likely to be unnecessary. This might result in false positives when an 142 // internal definition is visible. 143 // - There's a forward declaration in the main file, no need for other 144 // redecls. 145 if (const auto *RD = llvm::dyn_cast<RecordDecl>(D)) { 146 if (const auto *Definition = RD->getDefinition()) { 147 Result.User.insert(Definition->getLocation()); 148 return; 149 } 150 if (SM.isInMainFile(RD->getMostRecentDecl()->getLocation())) 151 return; 152 } 153 for (const Decl *Redecl : D->redecls()) 154 Result.User.insert(Redecl->getLocation()); 155 } 156 157 bool isNew(const void *P) { return P && Visited.insert(P).second; } 158 159 ReferencedLocations &Result; 160 llvm::DenseSet<const void *> Visited; 161 const SourceManager &SM; 162 stdlib::Recognizer StdRecognizer; 163 }; 164 165 // Given a set of referenced FileIDs, determines all the potentially-referenced 166 // files and macros by traversing expansion/spelling locations of macro IDs. 167 // This is used to map the referenced SourceLocations onto real files. 168 struct ReferencedFilesBuilder { 169 ReferencedFilesBuilder(const SourceManager &SM) : SM(SM) {} 170 llvm::DenseSet<FileID> Files; 171 llvm::DenseSet<FileID> Macros; 172 const SourceManager &SM; 173 174 void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); } 175 176 void add(FileID FID, SourceLocation Loc) { 177 if (FID.isInvalid()) 178 return; 179 assert(SM.isInFileID(Loc, FID)); 180 if (Loc.isFileID()) { 181 Files.insert(FID); 182 return; 183 } 184 // Don't process the same macro FID twice. 185 if (!Macros.insert(FID).second) 186 return; 187 const auto &Exp = SM.getSLocEntry(FID).getExpansion(); 188 add(Exp.getSpellingLoc()); 189 add(Exp.getExpansionLocStart()); 190 add(Exp.getExpansionLocEnd()); 191 } 192 }; 193 194 // Returns the range starting at '#' and ending at EOL. Escaped newlines are not 195 // handled. 196 clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) { 197 clangd::Range Result; 198 Result.end = Result.start = offsetToPosition(Code, HashOffset); 199 200 // Span the warning until the EOL or EOF. 201 Result.end.character += 202 lspLength(Code.drop_front(HashOffset).take_until([](char C) { 203 return C == '\n' || C == '\r'; 204 })); 205 return Result; 206 } 207 208 // Finds locations of macros referenced from within the main file. That includes 209 // references that were not yet expanded, e.g `BAR` in `#define FOO BAR`. 210 void findReferencedMacros(ParsedAST &AST, ReferencedLocations &Result) { 211 trace::Span Tracer("IncludeCleaner::findReferencedMacros"); 212 auto &SM = AST.getSourceManager(); 213 auto &PP = AST.getPreprocessor(); 214 // FIXME(kirillbobyrev): The macros from the main file are collected in 215 // ParsedAST's MainFileMacros. However, we can't use it here because it 216 // doesn't handle macro references that were not expanded, e.g. in macro 217 // definitions or preprocessor-disabled sections. 218 // 219 // Extending MainFileMacros to collect missing references and switching to 220 // this mechanism (as opposed to iterating through all tokens) will improve 221 // the performance of findReferencedMacros and also improve other features 222 // relying on MainFileMacros. 223 for (const syntax::Token &Tok : 224 AST.getTokens().spelledTokens(SM.getMainFileID())) { 225 auto Macro = locateMacroAt(Tok, PP); 226 if (!Macro) 227 continue; 228 auto Loc = Macro->Info->getDefinitionLoc(); 229 if (Loc.isValid()) 230 Result.User.insert(Loc); 231 // FIXME: support stdlib macros 232 } 233 } 234 235 static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) { 236 if (Inc.BehindPragmaKeep) 237 return false; 238 239 // FIXME(kirillbobyrev): We currently do not support the umbrella headers. 240 // System headers are likely to be standard library headers. 241 // Until we have good support for umbrella headers, don't warn about them. 242 if (Inc.Written.front() == '<') { 243 if (AnalyzeStdlib && stdlib::Header::named(Inc.Written)) 244 return true; 245 return false; 246 } 247 // Headers without include guards have side effects and are not 248 // self-contained, skip them. 249 assert(Inc.HeaderID); 250 auto FE = AST.getSourceManager().getFileManager().getFile( 251 AST.getIncludeStructure().getRealPath( 252 static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID))); 253 assert(FE); 254 if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded( 255 *FE)) { 256 dlog("{0} doesn't have header guard and will not be considered unused", 257 (*FE)->getName()); 258 return false; 259 } 260 return true; 261 } 262 263 // In case symbols are coming from non self-contained header, we need to find 264 // its first includer that is self-contained. This is the header users can 265 // include, so it will be responsible for bringing the symbols from given 266 // header into the scope. 267 FileID headerResponsible(FileID ID, const SourceManager &SM, 268 const IncludeStructure &Includes) { 269 // Unroll the chain of non self-contained headers until we find the one that 270 // can be included. 271 for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID(); 272 FE = SM.getFileEntryForID(ID)) { 273 // If FE is nullptr, we consider it to be the responsible header. 274 if (!FE) 275 break; 276 auto HID = Includes.getID(FE); 277 assert(HID && "We're iterating over headers already existing in " 278 "IncludeStructure"); 279 if (Includes.isSelfContained(*HID)) 280 break; 281 // The header is not self-contained: put the responsibility for its symbols 282 // on its includer. 283 ID = SM.getFileID(SM.getIncludeLoc(ID)); 284 } 285 return ID; 286 } 287 288 } // namespace 289 290 ReferencedLocations findReferencedLocations(ParsedAST &AST) { 291 trace::Span Tracer("IncludeCleaner::findReferencedLocations"); 292 ReferencedLocations Result; 293 ReferencedLocationCrawler Crawler(Result, AST.getSourceManager()); 294 Crawler.TraverseAST(AST.getASTContext()); 295 findReferencedMacros(AST, Result); 296 return Result; 297 } 298 299 ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs, 300 const IncludeStructure &Includes, 301 const SourceManager &SM) { 302 std::vector<SourceLocation> Sorted{Locs.User.begin(), Locs.User.end()}; 303 llvm::sort(Sorted); // Group by FileID. 304 ReferencedFilesBuilder Builder(SM); 305 for (auto It = Sorted.begin(); It < Sorted.end();) { 306 FileID FID = SM.getFileID(*It); 307 Builder.add(FID, *It); 308 // Cheaply skip over all the other locations from the same FileID. 309 // This avoids lots of redundant Loc->File lookups for the same file. 310 do 311 ++It; 312 while (It != Sorted.end() && SM.isInFileID(*It, FID)); 313 } 314 315 // If a header is not self-contained, we consider its symbols a logical part 316 // of the including file. Therefore, mark the parents of all used 317 // non-self-contained FileIDs as used. Perform this on FileIDs rather than 318 // HeaderIDs, as each inclusion of a non-self-contained file is distinct. 319 llvm::DenseSet<FileID> UserFiles; 320 for (FileID ID : Builder.Files) 321 UserFiles.insert(headerResponsible(ID, SM, Includes)); 322 323 llvm::DenseSet<stdlib::Header> StdlibFiles; 324 for (const auto &Symbol : Locs.Stdlib) 325 for (const auto &Header : Symbol.headers()) 326 StdlibFiles.insert(Header); 327 328 return {std::move(UserFiles), std::move(StdlibFiles)}; 329 } 330 331 std::vector<const Inclusion *> 332 getUnused(ParsedAST &AST, 333 const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles) { 334 trace::Span Tracer("IncludeCleaner::getUnused"); 335 std::vector<const Inclusion *> Unused; 336 for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) { 337 if (!MFI.HeaderID) 338 continue; 339 auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID); 340 bool Used = ReferencedFiles.contains(IncludeID); 341 if (!Used && !mayConsiderUnused(MFI, AST)) { 342 dlog("{0} was not used, but is not eligible to be diagnosed as unused", 343 MFI.Written); 344 continue; 345 } 346 if (!Used) 347 Unused.push_back(&MFI); 348 dlog("{0} is {1}", MFI.Written, Used ? "USED" : "UNUSED"); 349 } 350 return Unused; 351 } 352 353 #ifndef NDEBUG 354 // Is FID a <built-in>, <scratch space> etc? 355 static bool isSpecialBuffer(FileID FID, const SourceManager &SM) { 356 const SrcMgr::FileInfo &FI = SM.getSLocEntry(FID).getFile(); 357 return FI.getName().startswith("<"); 358 } 359 #endif 360 361 llvm::DenseSet<IncludeStructure::HeaderID> 362 translateToHeaderIDs(const ReferencedFiles &Files, 363 const IncludeStructure &Includes, 364 const SourceManager &SM) { 365 trace::Span Tracer("IncludeCleaner::translateToHeaderIDs"); 366 llvm::DenseSet<IncludeStructure::HeaderID> TranslatedHeaderIDs; 367 TranslatedHeaderIDs.reserve(Files.User.size()); 368 for (FileID FID : Files.User) { 369 const FileEntry *FE = SM.getFileEntryForID(FID); 370 if (!FE) { 371 assert(isSpecialBuffer(FID, SM)); 372 continue; 373 } 374 const auto File = Includes.getID(FE); 375 assert(File); 376 TranslatedHeaderIDs.insert(*File); 377 } 378 for (stdlib::Header StdlibUsed : Files.Stdlib) 379 for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed)) 380 TranslatedHeaderIDs.insert(HID); 381 return TranslatedHeaderIDs; 382 } 383 384 std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST) { 385 const auto &SM = AST.getSourceManager(); 386 387 auto Refs = findReferencedLocations(AST); 388 auto ReferencedFileIDs = findReferencedFiles(Refs, AST.getIncludeStructure(), 389 AST.getSourceManager()); 390 auto ReferencedHeaders = 391 translateToHeaderIDs(ReferencedFileIDs, AST.getIncludeStructure(), SM); 392 return getUnused(AST, ReferencedHeaders); 393 } 394 395 std::vector<Diag> issueUnusedIncludesDiagnostics(ParsedAST &AST, 396 llvm::StringRef Code) { 397 const Config &Cfg = Config::current(); 398 if (Cfg.Diagnostics.UnusedIncludes != Config::UnusedIncludesPolicy::Strict || 399 Cfg.Diagnostics.SuppressAll || 400 Cfg.Diagnostics.Suppress.contains("unused-includes")) 401 return {}; 402 trace::Span Tracer("IncludeCleaner::issueUnusedIncludesDiagnostics"); 403 std::vector<Diag> Result; 404 std::string FileName = 405 AST.getSourceManager() 406 .getFileEntryForID(AST.getSourceManager().getMainFileID()) 407 ->getName() 408 .str(); 409 for (const auto *Inc : computeUnusedIncludes(AST)) { 410 Diag D; 411 D.Message = 412 llvm::formatv("included header {0} is not used", 413 llvm::sys::path::filename( 414 Inc->Written.substr(1, Inc->Written.size() - 2), 415 llvm::sys::path::Style::posix)); 416 D.Name = "unused-includes"; 417 D.Source = Diag::DiagSource::Clangd; 418 D.File = FileName; 419 D.Severity = DiagnosticsEngine::Warning; 420 D.Tags.push_back(Unnecessary); 421 D.Range = getDiagnosticRange(Code, Inc->HashOffset); 422 // FIXME(kirillbobyrev): Removing inclusion might break the code if the 423 // used headers are only reachable transitively through this one. Suggest 424 // including them directly instead. 425 // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas 426 // (keep/export) remove the warning once we support IWYU pragmas. 427 D.Fixes.emplace_back(); 428 D.Fixes.back().Message = "remove #include directive"; 429 D.Fixes.back().Edits.emplace_back(); 430 D.Fixes.back().Edits.back().range.start.line = Inc->HashLine; 431 D.Fixes.back().Edits.back().range.end.line = Inc->HashLine + 1; 432 D.InsideMainFile = true; 433 Result.push_back(std::move(D)); 434 } 435 return Result; 436 } 437 438 } // namespace clangd 439 } // namespace clang 440