1 //===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" 10 11 #include "clang/Frontend/CompilerInstance.h" 12 #include "clang/Lex/Preprocessor.h" 13 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" 14 #include "llvm/Support/StringSaver.h" 15 16 using namespace clang; 17 using namespace tooling; 18 using namespace dependencies; 19 20 static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts, 21 ASTReader &Reader, 22 const serialization::ModuleFile &MF) { 23 // Only preserve search paths that were used during the dependency scan. 24 std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries; 25 Opts.UserEntries.clear(); 26 27 llvm::BitVector SearchPathUsage(Entries.size()); 28 llvm::DenseSet<const serialization::ModuleFile *> Visited; 29 std::function<void(const serialization::ModuleFile *)> VisitMF = 30 [&](const serialization::ModuleFile *MF) { 31 SearchPathUsage |= MF->SearchPathUsage; 32 Visited.insert(MF); 33 for (const serialization::ModuleFile *Import : MF->Imports) 34 if (!Visited.contains(Import)) 35 VisitMF(Import); 36 }; 37 VisitMF(&MF); 38 39 for (auto Idx : SearchPathUsage.set_bits()) 40 Opts.UserEntries.push_back(Entries[Idx]); 41 } 42 43 CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths( 44 const ModuleDeps &Deps, 45 llvm::function_ref<void(CompilerInvocation &)> Optimize) const { 46 // Make a deep copy of the original Clang invocation. 47 CompilerInvocation CI(OriginalInvocation); 48 49 CI.getLangOpts()->resetNonModularOptions(); 50 CI.getPreprocessorOpts().resetNonModularOptions(); 51 52 // Remove options incompatible with explicit module build or are likely to 53 // differ between identical modules discovered from different translation 54 // units. 55 CI.getFrontendOpts().Inputs.clear(); 56 CI.getFrontendOpts().OutputFile.clear(); 57 CI.getCodeGenOpts().MainFileName.clear(); 58 CI.getCodeGenOpts().DwarfDebugFlags.clear(); 59 CI.getDiagnosticOpts().DiagnosticSerializationFile.clear(); 60 CI.getDependencyOutputOpts().OutputFile.clear(); 61 CI.getDependencyOutputOpts().Targets.clear(); 62 63 CI.getFrontendOpts().ProgramAction = frontend::GenerateModule; 64 CI.getLangOpts()->ModuleName = Deps.ID.ModuleName; 65 CI.getFrontendOpts().IsSystemModule = Deps.IsSystem; 66 67 // Disable implicit modules and canonicalize options that are only used by 68 // implicit modules. 69 CI.getLangOpts()->ImplicitModules = false; 70 CI.getHeaderSearchOpts().ImplicitModuleMaps = false; 71 CI.getHeaderSearchOpts().ModuleCachePath.clear(); 72 CI.getHeaderSearchOpts().ModulesValidateOncePerBuildSession = false; 73 CI.getHeaderSearchOpts().BuildSessionTimestamp = 0; 74 // The specific values we canonicalize to for pruning don't affect behaviour, 75 /// so use the default values so they will be dropped from the command-line. 76 CI.getHeaderSearchOpts().ModuleCachePruneInterval = 7 * 24 * 60 * 60; 77 CI.getHeaderSearchOpts().ModuleCachePruneAfter = 31 * 24 * 60 * 60; 78 79 // Report the prebuilt modules this module uses. 80 for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps) 81 CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile); 82 83 CI.getFrontendOpts().ModuleMapFiles = Deps.ModuleMapFileDeps; 84 85 Optimize(CI); 86 87 // The original invocation probably didn't have strict context hash enabled. 88 // We will use the context hash of this invocation to distinguish between 89 // multiple incompatible versions of the same module and will use it when 90 // reporting dependencies to the clients. Let's make sure we're using 91 // **strict** context hash in order to prevent accidental sharing of 92 // incompatible modules (e.g. with differences in search paths). 93 CI.getHeaderSearchOpts().ModulesStrictContextHash = true; 94 95 return CI; 96 } 97 98 static std::vector<std::string> 99 serializeCompilerInvocation(const CompilerInvocation &CI) { 100 // Set up string allocator. 101 llvm::BumpPtrAllocator Alloc; 102 llvm::StringSaver Strings(Alloc); 103 auto SA = [&Strings](const Twine &Arg) { return Strings.save(Arg).data(); }; 104 105 // Synthesize full command line from the CompilerInvocation, including "-cc1". 106 SmallVector<const char *, 32> Args{"-cc1"}; 107 CI.generateCC1CommandLine(Args, SA); 108 109 // Convert arguments to the return type. 110 return std::vector<std::string>{Args.begin(), Args.end()}; 111 } 112 113 static std::vector<std::string> splitString(std::string S, char Separator) { 114 SmallVector<StringRef> Segments; 115 StringRef(S).split(Segments, Separator); 116 std::vector<std::string> Result; 117 Result.reserve(Segments.size()); 118 for (StringRef Segment : Segments) 119 Result.push_back(Segment.str()); 120 return Result; 121 } 122 123 std::vector<std::string> ModuleDeps::getCanonicalCommandLine( 124 llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)> 125 LookupModuleOutput) const { 126 CompilerInvocation CI(BuildInvocation); 127 FrontendOptions &FrontendOpts = CI.getFrontendOpts(); 128 129 InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(), 130 InputKind::Format::ModuleMap); 131 FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind); 132 FrontendOpts.OutputFile = 133 LookupModuleOutput(ID, ModuleOutputKind::ModuleFile); 134 if (HadSerializedDiagnostics) 135 CI.getDiagnosticOpts().DiagnosticSerializationFile = 136 LookupModuleOutput(ID, ModuleOutputKind::DiagnosticSerializationFile); 137 if (HadDependencyFile) { 138 CI.getDependencyOutputOpts().OutputFile = 139 LookupModuleOutput(ID, ModuleOutputKind::DependencyFile); 140 CI.getDependencyOutputOpts().Targets = splitString( 141 LookupModuleOutput(ID, ModuleOutputKind::DependencyTargets), '\0'); 142 } 143 144 for (ModuleID MID : ClangModuleDeps) 145 FrontendOpts.ModuleFiles.push_back( 146 LookupModuleOutput(MID, ModuleOutputKind::ModuleFile)); 147 148 return serializeCompilerInvocation(CI); 149 } 150 151 std::vector<std::string> 152 ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const { 153 return serializeCompilerInvocation(BuildInvocation); 154 } 155 156 void ModuleDepCollectorPP::FileChanged(SourceLocation Loc, 157 FileChangeReason Reason, 158 SrcMgr::CharacteristicKind FileType, 159 FileID PrevFID) { 160 if (Reason != PPCallbacks::EnterFile) 161 return; 162 163 // This has to be delayed as the context hash can change at the start of 164 // `CompilerInstance::ExecuteAction`. 165 if (MDC.ContextHash.empty()) { 166 MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash(); 167 MDC.Consumer.handleContextHash(MDC.ContextHash); 168 } 169 170 SourceManager &SM = MDC.ScanInstance.getSourceManager(); 171 172 // Dependency generation really does want to go all the way to the 173 // file entry for a source location to find out what is depended on. 174 // We do not want #line markers to affect dependency generation! 175 if (Optional<StringRef> Filename = 176 SM.getNonBuiltinFilenameForID(SM.getFileID(SM.getExpansionLoc(Loc)))) 177 MDC.FileDeps.push_back( 178 std::string(llvm::sys::path::remove_leading_dotslash(*Filename))); 179 } 180 181 void ModuleDepCollectorPP::InclusionDirective( 182 SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, 183 bool IsAngled, CharSourceRange FilenameRange, Optional<FileEntryRef> File, 184 StringRef SearchPath, StringRef RelativePath, const Module *Imported, 185 SrcMgr::CharacteristicKind FileType) { 186 if (!File && !Imported) { 187 // This is a non-modular include that HeaderSearch failed to find. Add it 188 // here as `FileChanged` will never see it. 189 MDC.FileDeps.push_back(std::string(FileName)); 190 } 191 handleImport(Imported); 192 } 193 194 void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc, 195 ModuleIdPath Path, 196 const Module *Imported) { 197 handleImport(Imported); 198 } 199 200 void ModuleDepCollectorPP::handleImport(const Module *Imported) { 201 if (!Imported) 202 return; 203 204 const Module *TopLevelModule = Imported->getTopLevelModule(); 205 206 if (MDC.isPrebuiltModule(TopLevelModule)) 207 DirectPrebuiltModularDeps.insert(TopLevelModule); 208 else 209 DirectModularDeps.insert(TopLevelModule); 210 } 211 212 void ModuleDepCollectorPP::EndOfMainFile() { 213 FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID(); 214 MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager() 215 .getFileEntryForID(MainFileID) 216 ->getName()); 217 218 if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty()) 219 MDC.FileDeps.push_back( 220 MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude); 221 222 for (const Module *M : DirectModularDeps) { 223 // A top-level module might not be actually imported as a module when 224 // -fmodule-name is used to compile a translation unit that imports this 225 // module. In that case it can be skipped. The appropriate header 226 // dependencies will still be reported as expected. 227 if (!M->getASTFile()) 228 continue; 229 handleTopLevelModule(M); 230 } 231 232 MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts); 233 234 for (auto &&I : MDC.ModularDeps) 235 MDC.Consumer.handleModuleDependency(*I.second); 236 237 for (auto &&I : MDC.FileDeps) 238 MDC.Consumer.handleFileDependency(I); 239 240 for (auto &&I : DirectPrebuiltModularDeps) 241 MDC.Consumer.handlePrebuiltModuleDependency(PrebuiltModuleDep{I}); 242 } 243 244 ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { 245 assert(M == M->getTopLevelModule() && "Expected top level module!"); 246 247 // If this module has been handled already, just return its ID. 248 auto ModI = MDC.ModularDeps.insert({M, nullptr}); 249 if (!ModI.second) 250 return ModI.first->second->ID; 251 252 ModI.first->second = std::make_unique<ModuleDeps>(); 253 ModuleDeps &MD = *ModI.first->second; 254 255 MD.ID.ModuleName = M->getFullModuleName(); 256 MD.ImportedByMainFile = DirectModularDeps.contains(M); 257 MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName()); 258 MD.IsSystem = M->IsSystem; 259 260 const FileEntry *ModuleMap = MDC.ScanInstance.getPreprocessor() 261 .getHeaderSearchInfo() 262 .getModuleMap() 263 .getModuleMapFileForUniquing(M); 264 265 if (ModuleMap) { 266 StringRef Path = ModuleMap->tryGetRealPathName(); 267 if (Path.empty()) 268 Path = ModuleMap->getName(); 269 MD.ClangModuleMapFile = std::string(Path); 270 } 271 272 serialization::ModuleFile *MF = 273 MDC.ScanInstance.getASTReader()->getModuleManager().lookup( 274 M->getASTFile()); 275 MDC.ScanInstance.getASTReader()->visitInputFiles( 276 *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) { 277 // __inferred_module.map is the result of the way in which an implicit 278 // module build handles inferred modules. It adds an overlay VFS with 279 // this file in the proper directory and relies on the rest of Clang to 280 // handle it like normal. With explicitly built modules we don't need 281 // to play VFS tricks, so replace it with the correct module map. 282 if (IF.getFile()->getName().endswith("__inferred_module.map")) { 283 MD.FileDeps.insert(ModuleMap->getName()); 284 return; 285 } 286 MD.FileDeps.insert(IF.getFile()->getName()); 287 }); 288 289 // We usually don't need to list the module map files of our dependencies when 290 // building a module explicitly: their semantics will be deserialized from PCM 291 // files. 292 // 293 // However, some module maps loaded implicitly during the dependency scan can 294 // describe anti-dependencies. That happens when this module, let's call it 295 // M1, is marked as '[no_undeclared_includes]' and tries to access a header 296 // "M2/M2.h" from another module, M2, but doesn't have a 'use M2;' 297 // declaration. The explicit build needs the module map for M2 so that it 298 // knows that textually including "M2/M2.h" is not allowed. 299 // E.g., '__has_include("M2/M2.h")' should return false, but without M2's 300 // module map the explicit build would return true. 301 // 302 // An alternative approach would be to tell the explicit build what its 303 // textual dependencies are, instead of having it re-discover its 304 // anti-dependencies. For example, we could create and use an `-ivfs-overlay` 305 // with `fall-through: false` that explicitly listed the dependencies. 306 // However, that's more complicated to implement and harder to reason about. 307 if (M->NoUndeclaredIncludes) { 308 // We don't have a good way to determine which module map described the 309 // anti-dependency (let alone what's the corresponding top-level module 310 // map). We simply specify all the module maps in the order they were loaded 311 // during the implicit build during scan. 312 // TODO: Resolve this by serializing and only using Module::UndeclaredUses. 313 MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps( 314 *MF, [&](const FileEntry *FE) { 315 if (FE->getName().endswith("__inferred_module.map")) 316 return; 317 // The top-level modulemap of this module will be the input file. We 318 // don't need to specify it as a module map. 319 if (FE == ModuleMap) 320 return; 321 MD.ModuleMapFileDeps.push_back(FE->getName().str()); 322 }); 323 } 324 325 // Add direct prebuilt module dependencies now, so that we can use them when 326 // creating a CompilerInvocation and computing context hash for this 327 // ModuleDeps instance. 328 llvm::DenseSet<const Module *> SeenModules; 329 addAllSubmodulePrebuiltDeps(M, MD, SeenModules); 330 331 MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutPaths( 332 MD, [&](CompilerInvocation &BuildInvocation) { 333 if (MDC.OptimizeArgs) 334 optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(), 335 *MDC.ScanInstance.getASTReader(), *MF); 336 }); 337 MD.HadSerializedDiagnostics = !MDC.OriginalInvocation.getDiagnosticOpts() 338 .DiagnosticSerializationFile.empty(); 339 MD.HadDependencyFile = 340 !MDC.OriginalInvocation.getDependencyOutputOpts().OutputFile.empty(); 341 // FIXME: HadSerializedDiagnostics and HadDependencyFile should be included in 342 // the context hash since it can affect the command-line. 343 MD.ID.ContextHash = MD.BuildInvocation.getModuleHash(); 344 345 llvm::DenseSet<const Module *> AddedModules; 346 addAllSubmoduleDeps(M, MD, AddedModules); 347 348 return MD.ID; 349 } 350 351 static void forEachSubmoduleSorted(const Module *M, 352 llvm::function_ref<void(const Module *)> F) { 353 // Submodule order depends on order of header includes for inferred submodules 354 // we don't care about the exact order, so sort so that it's consistent across 355 // TUs to improve sharing. 356 SmallVector<const Module *> Submodules(M->submodule_begin(), 357 M->submodule_end()); 358 llvm::stable_sort(Submodules, [](const Module *A, const Module *B) { 359 return A->Name < B->Name; 360 }); 361 for (const Module *SubM : Submodules) 362 F(SubM); 363 } 364 365 void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps( 366 const Module *M, ModuleDeps &MD, 367 llvm::DenseSet<const Module *> &SeenSubmodules) { 368 addModulePrebuiltDeps(M, MD, SeenSubmodules); 369 370 forEachSubmoduleSorted(M, [&](const Module *SubM) { 371 addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules); 372 }); 373 } 374 375 void ModuleDepCollectorPP::addModulePrebuiltDeps( 376 const Module *M, ModuleDeps &MD, 377 llvm::DenseSet<const Module *> &SeenSubmodules) { 378 for (const Module *Import : M->Imports) 379 if (Import->getTopLevelModule() != M->getTopLevelModule()) 380 if (MDC.isPrebuiltModule(Import->getTopLevelModule())) 381 if (SeenSubmodules.insert(Import->getTopLevelModule()).second) 382 MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule()); 383 } 384 385 void ModuleDepCollectorPP::addAllSubmoduleDeps( 386 const Module *M, ModuleDeps &MD, 387 llvm::DenseSet<const Module *> &AddedModules) { 388 addModuleDep(M, MD, AddedModules); 389 390 forEachSubmoduleSorted(M, [&](const Module *SubM) { 391 addAllSubmoduleDeps(SubM, MD, AddedModules); 392 }); 393 } 394 395 void ModuleDepCollectorPP::addModuleDep( 396 const Module *M, ModuleDeps &MD, 397 llvm::DenseSet<const Module *> &AddedModules) { 398 for (const Module *Import : M->Imports) { 399 if (Import->getTopLevelModule() != M->getTopLevelModule() && 400 !MDC.isPrebuiltModule(Import)) { 401 ModuleID ImportID = handleTopLevelModule(Import->getTopLevelModule()); 402 if (AddedModules.insert(Import->getTopLevelModule()).second) 403 MD.ClangModuleDeps.push_back(ImportID); 404 } 405 } 406 } 407 408 ModuleDepCollector::ModuleDepCollector( 409 std::unique_ptr<DependencyOutputOptions> Opts, 410 CompilerInstance &ScanInstance, DependencyConsumer &C, 411 CompilerInvocation &&OriginalCI, bool OptimizeArgs) 412 : ScanInstance(ScanInstance), Consumer(C), Opts(std::move(Opts)), 413 OriginalInvocation(std::move(OriginalCI)), OptimizeArgs(OptimizeArgs) {} 414 415 void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) { 416 PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this)); 417 } 418 419 void ModuleDepCollector::attachToASTReader(ASTReader &R) {} 420 421 bool ModuleDepCollector::isPrebuiltModule(const Module *M) { 422 std::string Name(M->getTopLevelModuleName()); 423 const auto &PrebuiltModuleFiles = 424 ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles; 425 auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name); 426 if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end()) 427 return false; 428 assert("Prebuilt module came from the expected AST file" && 429 PrebuiltModuleFileIt->second == M->getASTFile()->getName()); 430 return true; 431 } 432