1 //===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "IncludeCleaner.h" 10 #include "Config.h" 11 #include "Headers.h" 12 #include "ParsedAST.h" 13 #include "Protocol.h" 14 #include "SourceCode.h" 15 #include "support/Logger.h" 16 #include "support/Trace.h" 17 #include "clang/AST/ASTContext.h" 18 #include "clang/AST/ExprCXX.h" 19 #include "clang/AST/RecursiveASTVisitor.h" 20 #include "clang/Basic/SourceLocation.h" 21 #include "clang/Basic/SourceManager.h" 22 #include "clang/Lex/HeaderSearch.h" 23 #include "clang/Lex/Preprocessor.h" 24 #include "clang/Tooling/Syntax/Tokens.h" 25 #include "llvm/ADT/STLFunctionalExtras.h" 26 #include "llvm/Support/FormatVariadic.h" 27 #include "llvm/Support/Path.h" 28 29 namespace clang { 30 namespace clangd { 31 32 static bool AnalyzeStdlib = false; 33 void setIncludeCleanerAnalyzesStdlib(bool B) { AnalyzeStdlib = B; } 34 35 namespace { 36 37 /// Crawler traverses the AST and feeds in the locations of (sometimes 38 /// implicitly) used symbols into \p Result. 39 class ReferencedLocationCrawler 40 : public RecursiveASTVisitor<ReferencedLocationCrawler> { 41 public: 42 ReferencedLocationCrawler(ReferencedLocations &Result, 43 const SourceManager &SM) 44 : Result(Result), SM(SM) {} 45 46 bool VisitDeclRefExpr(DeclRefExpr *DRE) { 47 add(DRE->getDecl()); 48 add(DRE->getFoundDecl()); 49 return true; 50 } 51 52 bool VisitMemberExpr(MemberExpr *ME) { 53 add(ME->getMemberDecl()); 54 add(ME->getFoundDecl().getDecl()); 55 return true; 56 } 57 58 bool VisitTagType(TagType *TT) { 59 add(TT->getDecl()); 60 return true; 61 } 62 63 bool VisitFunctionDecl(FunctionDecl *FD) { 64 // Function definition will require redeclarations to be included. 65 if (FD->isThisDeclarationADefinition()) 66 add(FD); 67 return true; 68 } 69 70 bool VisitCXXConstructExpr(CXXConstructExpr *CCE) { 71 add(CCE->getConstructor()); 72 return true; 73 } 74 75 bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) { 76 if (isNew(TST)) { 77 add(TST->getTemplateName().getAsTemplateDecl()); // Primary template. 78 add(TST->getAsCXXRecordDecl()); // Specialization 79 } 80 return true; 81 } 82 83 bool VisitUsingType(UsingType *UT) { 84 add(UT->getFoundDecl()); 85 return true; 86 } 87 88 bool VisitTypedefType(TypedefType *TT) { 89 add(TT->getDecl()); 90 return true; 91 } 92 93 // Consider types of any subexpression used, even if the type is not named. 94 // This is helpful in getFoo().bar(), where Foo must be complete. 95 // FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to 96 // consider types "used" when they are not directly spelled in code. 97 bool VisitExpr(Expr *E) { 98 TraverseType(E->getType()); 99 return true; 100 } 101 102 bool TraverseType(QualType T) { 103 if (isNew(T.getTypePtrOrNull())) // don't care about quals 104 Base::TraverseType(T); 105 return true; 106 } 107 108 bool VisitUsingDecl(UsingDecl *D) { 109 for (const auto *Shadow : D->shadows()) 110 add(Shadow->getTargetDecl()); 111 return true; 112 } 113 114 // Enums may be usefully forward-declared as *complete* types by specifying 115 // an underlying type. In this case, the definition should see the declaration 116 // so they can be checked for compatibility. 117 bool VisitEnumDecl(EnumDecl *D) { 118 if (D->isThisDeclarationADefinition() && D->getIntegerTypeSourceInfo()) 119 add(D); 120 return true; 121 } 122 123 // When the overload is not resolved yet, mark all candidates as used. 124 bool VisitOverloadExpr(OverloadExpr *E) { 125 for (const auto *ResolutionDecl : E->decls()) 126 add(ResolutionDecl); 127 return true; 128 } 129 130 private: 131 using Base = RecursiveASTVisitor<ReferencedLocationCrawler>; 132 133 void add(const Decl *D) { 134 if (!D || !isNew(D->getCanonicalDecl())) 135 return; 136 if (auto SS = StdRecognizer(D)) { 137 Result.Stdlib.insert(*SS); 138 return; 139 } 140 // Special case RecordDecls, as it is common for them to be forward 141 // declared multiple times. The most common cases are: 142 // - Definition available in TU, only mark that one as usage. The rest is 143 // likely to be unnecessary. This might result in false positives when an 144 // internal definition is visible. 145 // - There's a forward declaration in the main file, no need for other 146 // redecls. 147 if (const auto *RD = llvm::dyn_cast<RecordDecl>(D)) { 148 if (const auto *Definition = RD->getDefinition()) { 149 Result.User.insert(Definition->getLocation()); 150 return; 151 } 152 if (SM.isInMainFile(RD->getMostRecentDecl()->getLocation())) 153 return; 154 } 155 for (const Decl *Redecl : D->redecls()) 156 Result.User.insert(Redecl->getLocation()); 157 } 158 159 bool isNew(const void *P) { return P && Visited.insert(P).second; } 160 161 ReferencedLocations &Result; 162 llvm::DenseSet<const void *> Visited; 163 const SourceManager &SM; 164 tooling::stdlib::Recognizer StdRecognizer; 165 }; 166 167 // Given a set of referenced FileIDs, determines all the potentially-referenced 168 // files and macros by traversing expansion/spelling locations of macro IDs. 169 // This is used to map the referenced SourceLocations onto real files. 170 struct ReferencedFilesBuilder { 171 ReferencedFilesBuilder(const SourceManager &SM) : SM(SM) {} 172 llvm::DenseSet<FileID> Files; 173 llvm::DenseSet<FileID> Macros; 174 const SourceManager &SM; 175 176 void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); } 177 178 void add(FileID FID, SourceLocation Loc) { 179 if (FID.isInvalid()) 180 return; 181 assert(SM.isInFileID(Loc, FID)); 182 if (Loc.isFileID()) { 183 Files.insert(FID); 184 return; 185 } 186 // Don't process the same macro FID twice. 187 if (!Macros.insert(FID).second) 188 return; 189 const auto &Exp = SM.getSLocEntry(FID).getExpansion(); 190 add(Exp.getSpellingLoc()); 191 add(Exp.getExpansionLocStart()); 192 add(Exp.getExpansionLocEnd()); 193 } 194 }; 195 196 // Returns the range starting at '#' and ending at EOL. Escaped newlines are not 197 // handled. 198 clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) { 199 clangd::Range Result; 200 Result.end = Result.start = offsetToPosition(Code, HashOffset); 201 202 // Span the warning until the EOL or EOF. 203 Result.end.character += 204 lspLength(Code.drop_front(HashOffset).take_until([](char C) { 205 return C == '\n' || C == '\r'; 206 })); 207 return Result; 208 } 209 210 // Finds locations of macros referenced from within the main file. That includes 211 // references that were not yet expanded, e.g `BAR` in `#define FOO BAR`. 212 void findReferencedMacros(const SourceManager &SM, Preprocessor &PP, 213 const syntax::TokenBuffer *Tokens, 214 ReferencedLocations &Result) { 215 trace::Span Tracer("IncludeCleaner::findReferencedMacros"); 216 // FIXME(kirillbobyrev): The macros from the main file are collected in 217 // ParsedAST's MainFileMacros. However, we can't use it here because it 218 // doesn't handle macro references that were not expanded, e.g. in macro 219 // definitions or preprocessor-disabled sections. 220 // 221 // Extending MainFileMacros to collect missing references and switching to 222 // this mechanism (as opposed to iterating through all tokens) will improve 223 // the performance of findReferencedMacros and also improve other features 224 // relying on MainFileMacros. 225 for (const syntax::Token &Tok : Tokens->spelledTokens(SM.getMainFileID())) { 226 auto Macro = locateMacroAt(Tok, PP); 227 if (!Macro) 228 continue; 229 auto Loc = Macro->Info->getDefinitionLoc(); 230 if (Loc.isValid()) 231 Result.User.insert(Loc); 232 // FIXME: support stdlib macros 233 } 234 } 235 236 static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) { 237 if (Inc.BehindPragmaKeep) 238 return false; 239 240 // FIXME(kirillbobyrev): We currently do not support the umbrella headers. 241 // System headers are likely to be standard library headers. 242 // Until we have good support for umbrella headers, don't warn about them. 243 if (Inc.Written.front() == '<') { 244 if (AnalyzeStdlib && tooling::stdlib::Header::named(Inc.Written)) 245 return true; 246 return false; 247 } 248 // Headers without include guards have side effects and are not 249 // self-contained, skip them. 250 assert(Inc.HeaderID); 251 auto FE = AST.getSourceManager().getFileManager().getFile( 252 AST.getIncludeStructure().getRealPath( 253 static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID))); 254 assert(FE); 255 if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded( 256 *FE)) { 257 dlog("{0} doesn't have header guard and will not be considered unused", 258 (*FE)->getName()); 259 return false; 260 } 261 return true; 262 } 263 264 // In case symbols are coming from non self-contained header, we need to find 265 // its first includer that is self-contained. This is the header users can 266 // include, so it will be responsible for bringing the symbols from given 267 // header into the scope. 268 FileID headerResponsible(FileID ID, const SourceManager &SM, 269 const IncludeStructure &Includes) { 270 // Unroll the chain of non self-contained headers until we find the one that 271 // can be included. 272 for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID(); 273 FE = SM.getFileEntryForID(ID)) { 274 // If FE is nullptr, we consider it to be the responsible header. 275 if (!FE) 276 break; 277 auto HID = Includes.getID(FE); 278 assert(HID && "We're iterating over headers already existing in " 279 "IncludeStructure"); 280 if (Includes.isSelfContained(*HID)) 281 break; 282 // The header is not self-contained: put the responsibility for its symbols 283 // on its includer. 284 ID = SM.getFileID(SM.getIncludeLoc(ID)); 285 } 286 return ID; 287 } 288 289 } // namespace 290 291 ReferencedLocations findReferencedLocations(const SourceManager &SM, 292 ASTContext &Ctx, Preprocessor &PP, 293 const syntax::TokenBuffer *Tokens) { 294 trace::Span Tracer("IncludeCleaner::findReferencedLocations"); 295 ReferencedLocations Result; 296 ReferencedLocationCrawler Crawler(Result, SM); 297 Crawler.TraverseAST(Ctx); 298 if (Tokens) 299 findReferencedMacros(SM, PP, Tokens, Result); 300 return Result; 301 } 302 303 ReferencedLocations findReferencedLocations(ParsedAST &AST) { 304 return findReferencedLocations(AST.getSourceManager(), AST.getASTContext(), 305 AST.getPreprocessor(), &AST.getTokens()); 306 } 307 308 ReferencedFiles 309 findReferencedFiles(const ReferencedLocations &Locs, const SourceManager &SM, 310 llvm::function_ref<FileID(FileID)> HeaderResponsible) { 311 std::vector<SourceLocation> Sorted{Locs.User.begin(), Locs.User.end()}; 312 llvm::sort(Sorted); // Group by FileID. 313 ReferencedFilesBuilder Builder(SM); 314 for (auto It = Sorted.begin(); It < Sorted.end();) { 315 FileID FID = SM.getFileID(*It); 316 Builder.add(FID, *It); 317 // Cheaply skip over all the other locations from the same FileID. 318 // This avoids lots of redundant Loc->File lookups for the same file. 319 do 320 ++It; 321 while (It != Sorted.end() && SM.isInFileID(*It, FID)); 322 } 323 324 // If a header is not self-contained, we consider its symbols a logical part 325 // of the including file. Therefore, mark the parents of all used 326 // non-self-contained FileIDs as used. Perform this on FileIDs rather than 327 // HeaderIDs, as each inclusion of a non-self-contained file is distinct. 328 llvm::DenseSet<FileID> UserFiles; 329 for (FileID ID : Builder.Files) 330 UserFiles.insert(HeaderResponsible(ID)); 331 332 llvm::DenseSet<tooling::stdlib::Header> StdlibFiles; 333 for (const auto &Symbol : Locs.Stdlib) 334 for (const auto &Header : Symbol.headers()) 335 StdlibFiles.insert(Header); 336 337 return {std::move(UserFiles), std::move(StdlibFiles)}; 338 } 339 340 ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs, 341 const IncludeStructure &Includes, 342 const SourceManager &SM) { 343 return findReferencedFiles(Locs, SM, [&SM, &Includes](FileID ID) { 344 return headerResponsible(ID, SM, Includes); 345 }); 346 } 347 348 std::vector<const Inclusion *> 349 getUnused(ParsedAST &AST, 350 const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles) { 351 trace::Span Tracer("IncludeCleaner::getUnused"); 352 std::vector<const Inclusion *> Unused; 353 for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) { 354 if (!MFI.HeaderID) 355 continue; 356 auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID); 357 bool Used = ReferencedFiles.contains(IncludeID); 358 if (!Used && !mayConsiderUnused(MFI, AST)) { 359 dlog("{0} was not used, but is not eligible to be diagnosed as unused", 360 MFI.Written); 361 continue; 362 } 363 if (!Used) 364 Unused.push_back(&MFI); 365 dlog("{0} is {1}", MFI.Written, Used ? "USED" : "UNUSED"); 366 } 367 return Unused; 368 } 369 370 #ifndef NDEBUG 371 // Is FID a <built-in>, <scratch space> etc? 372 static bool isSpecialBuffer(FileID FID, const SourceManager &SM) { 373 const SrcMgr::FileInfo &FI = SM.getSLocEntry(FID).getFile(); 374 return FI.getName().startswith("<"); 375 } 376 #endif 377 378 llvm::DenseSet<IncludeStructure::HeaderID> 379 translateToHeaderIDs(const ReferencedFiles &Files, 380 const IncludeStructure &Includes, 381 const SourceManager &SM) { 382 trace::Span Tracer("IncludeCleaner::translateToHeaderIDs"); 383 llvm::DenseSet<IncludeStructure::HeaderID> TranslatedHeaderIDs; 384 TranslatedHeaderIDs.reserve(Files.User.size()); 385 for (FileID FID : Files.User) { 386 const FileEntry *FE = SM.getFileEntryForID(FID); 387 if (!FE) { 388 assert(isSpecialBuffer(FID, SM)); 389 continue; 390 } 391 const auto File = Includes.getID(FE); 392 assert(File); 393 TranslatedHeaderIDs.insert(*File); 394 } 395 for (tooling::stdlib::Header StdlibUsed : Files.Stdlib) 396 for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed)) 397 TranslatedHeaderIDs.insert(HID); 398 return TranslatedHeaderIDs; 399 } 400 401 std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST) { 402 const auto &SM = AST.getSourceManager(); 403 404 auto Refs = findReferencedLocations(AST); 405 auto ReferencedFileIDs = findReferencedFiles(Refs, AST.getIncludeStructure(), 406 AST.getSourceManager()); 407 auto ReferencedHeaders = 408 translateToHeaderIDs(ReferencedFileIDs, AST.getIncludeStructure(), SM); 409 return getUnused(AST, ReferencedHeaders); 410 } 411 412 std::vector<Diag> issueUnusedIncludesDiagnostics(ParsedAST &AST, 413 llvm::StringRef Code) { 414 const Config &Cfg = Config::current(); 415 if (Cfg.Diagnostics.UnusedIncludes != Config::UnusedIncludesPolicy::Strict || 416 Cfg.Diagnostics.SuppressAll || 417 Cfg.Diagnostics.Suppress.contains("unused-includes")) 418 return {}; 419 trace::Span Tracer("IncludeCleaner::issueUnusedIncludesDiagnostics"); 420 std::vector<Diag> Result; 421 std::string FileName = 422 AST.getSourceManager() 423 .getFileEntryForID(AST.getSourceManager().getMainFileID()) 424 ->getName() 425 .str(); 426 for (const auto *Inc : computeUnusedIncludes(AST)) { 427 Diag D; 428 D.Message = 429 llvm::formatv("included header {0} is not used", 430 llvm::sys::path::filename( 431 Inc->Written.substr(1, Inc->Written.size() - 2), 432 llvm::sys::path::Style::posix)); 433 D.Name = "unused-includes"; 434 D.Source = Diag::DiagSource::Clangd; 435 D.File = FileName; 436 D.Severity = DiagnosticsEngine::Warning; 437 D.Tags.push_back(Unnecessary); 438 D.Range = getDiagnosticRange(Code, Inc->HashOffset); 439 // FIXME(kirillbobyrev): Removing inclusion might break the code if the 440 // used headers are only reachable transitively through this one. Suggest 441 // including them directly instead. 442 // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas 443 // (keep/export) remove the warning once we support IWYU pragmas. 444 D.Fixes.emplace_back(); 445 D.Fixes.back().Message = "remove #include directive"; 446 D.Fixes.back().Edits.emplace_back(); 447 D.Fixes.back().Edits.back().range.start.line = Inc->HashLine; 448 D.Fixes.back().Edits.back().range.end.line = Inc->HashLine + 1; 449 D.InsideMainFile = true; 450 Result.push_back(std::move(D)); 451 } 452 return Result; 453 } 454 455 } // namespace clangd 456 } // namespace clang 457