1 //===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" 10 11 #include "clang/Frontend/CompilerInstance.h" 12 #include "clang/Lex/Preprocessor.h" 13 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" 14 #include "llvm/Support/StringSaver.h" 15 16 using namespace clang; 17 using namespace tooling; 18 using namespace dependencies; 19 20 static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts, 21 ASTReader &Reader, 22 const serialization::ModuleFile &MF) { 23 // Only preserve search paths that were used during the dependency scan. 24 std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries; 25 Opts.UserEntries.clear(); 26 for (unsigned I = 0; I < Entries.size(); ++I) 27 if (MF.SearchPathUsage[I]) 28 Opts.UserEntries.push_back(Entries[I]); 29 } 30 31 CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths( 32 const ModuleDeps &Deps, 33 llvm::function_ref<void(CompilerInvocation &)> Optimize) const { 34 // Make a deep copy of the original Clang invocation. 35 CompilerInvocation CI(OriginalInvocation); 36 37 CI.getLangOpts()->resetNonModularOptions(); 38 CI.getPreprocessorOpts().resetNonModularOptions(); 39 40 // Remove options incompatible with explicit module build or are likely to 41 // differ between identical modules discovered from different translation 42 // units. 43 CI.getFrontendOpts().Inputs.clear(); 44 CI.getFrontendOpts().OutputFile.clear(); 45 CI.getCodeGenOpts().MainFileName.clear(); 46 CI.getCodeGenOpts().DwarfDebugFlags.clear(); 47 48 CI.getFrontendOpts().ProgramAction = frontend::GenerateModule; 49 CI.getLangOpts()->ModuleName = Deps.ID.ModuleName; 50 CI.getFrontendOpts().IsSystemModule = Deps.IsSystem; 51 52 CI.getLangOpts()->ImplicitModules = false; 53 CI.getHeaderSearchOpts().ImplicitModuleMaps = false; 54 55 // Report the prebuilt modules this module uses. 56 for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps) 57 CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile); 58 59 CI.getFrontendOpts().ModuleMapFiles = Deps.ModuleMapFileDeps; 60 61 Optimize(CI); 62 63 // The original invocation probably didn't have strict context hash enabled. 64 // We will use the context hash of this invocation to distinguish between 65 // multiple incompatible versions of the same module and will use it when 66 // reporting dependencies to the clients. Let's make sure we're using 67 // **strict** context hash in order to prevent accidental sharing of 68 // incompatible modules (e.g. with differences in search paths). 69 CI.getHeaderSearchOpts().ModulesStrictContextHash = true; 70 71 return CI; 72 } 73 74 static std::vector<std::string> 75 serializeCompilerInvocation(const CompilerInvocation &CI) { 76 // Set up string allocator. 77 llvm::BumpPtrAllocator Alloc; 78 llvm::StringSaver Strings(Alloc); 79 auto SA = [&Strings](const Twine &Arg) { return Strings.save(Arg).data(); }; 80 81 // Synthesize full command line from the CompilerInvocation, including "-cc1". 82 SmallVector<const char *, 32> Args{"-cc1"}; 83 CI.generateCC1CommandLine(Args, SA); 84 85 // Convert arguments to the return type. 86 return std::vector<std::string>{Args.begin(), Args.end()}; 87 } 88 89 std::vector<std::string> ModuleDeps::getCanonicalCommandLine( 90 std::function<StringRef(ModuleID)> LookupPCMPath, 91 std::function<const ModuleDeps &(ModuleID)> LookupModuleDeps) const { 92 CompilerInvocation CI(BuildInvocation); 93 FrontendOptions &FrontendOpts = CI.getFrontendOpts(); 94 95 InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(), 96 InputKind::Format::ModuleMap); 97 FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind); 98 FrontendOpts.OutputFile = std::string(LookupPCMPath(ID)); 99 100 dependencies::detail::collectPCMPaths(ClangModuleDeps, LookupPCMPath, 101 LookupModuleDeps, 102 FrontendOpts.ModuleFiles); 103 104 return serializeCompilerInvocation(CI); 105 } 106 107 std::vector<std::string> 108 ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const { 109 return serializeCompilerInvocation(BuildInvocation); 110 } 111 112 void dependencies::detail::collectPCMPaths( 113 llvm::ArrayRef<ModuleID> Modules, 114 std::function<StringRef(ModuleID)> LookupPCMPath, 115 std::function<const ModuleDeps &(ModuleID)> LookupModuleDeps, 116 std::vector<std::string> &PCMPaths) { 117 llvm::StringSet<> AlreadyAdded; 118 119 std::function<void(llvm::ArrayRef<ModuleID>)> AddArgs = 120 [&](llvm::ArrayRef<ModuleID> Modules) { 121 for (const ModuleID &MID : Modules) { 122 if (!AlreadyAdded.insert(MID.ModuleName + MID.ContextHash).second) 123 continue; 124 const ModuleDeps &M = LookupModuleDeps(MID); 125 // Depth first traversal. 126 AddArgs(M.ClangModuleDeps); 127 PCMPaths.push_back(LookupPCMPath(MID).str()); 128 } 129 }; 130 131 AddArgs(Modules); 132 } 133 134 void ModuleDepCollectorPP::FileChanged(SourceLocation Loc, 135 FileChangeReason Reason, 136 SrcMgr::CharacteristicKind FileType, 137 FileID PrevFID) { 138 if (Reason != PPCallbacks::EnterFile) 139 return; 140 141 // This has to be delayed as the context hash can change at the start of 142 // `CompilerInstance::ExecuteAction`. 143 if (MDC.ContextHash.empty()) { 144 MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash(); 145 MDC.Consumer.handleContextHash(MDC.ContextHash); 146 } 147 148 SourceManager &SM = MDC.ScanInstance.getSourceManager(); 149 150 // Dependency generation really does want to go all the way to the 151 // file entry for a source location to find out what is depended on. 152 // We do not want #line markers to affect dependency generation! 153 if (Optional<StringRef> Filename = 154 SM.getNonBuiltinFilenameForID(SM.getFileID(SM.getExpansionLoc(Loc)))) 155 MDC.FileDeps.push_back( 156 std::string(llvm::sys::path::remove_leading_dotslash(*Filename))); 157 } 158 159 void ModuleDepCollectorPP::InclusionDirective( 160 SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, 161 bool IsAngled, CharSourceRange FilenameRange, const FileEntry *File, 162 StringRef SearchPath, StringRef RelativePath, const Module *Imported, 163 SrcMgr::CharacteristicKind FileType) { 164 if (!File && !Imported) { 165 // This is a non-modular include that HeaderSearch failed to find. Add it 166 // here as `FileChanged` will never see it. 167 MDC.FileDeps.push_back(std::string(FileName)); 168 } 169 handleImport(Imported); 170 } 171 172 void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc, 173 ModuleIdPath Path, 174 const Module *Imported) { 175 handleImport(Imported); 176 } 177 178 void ModuleDepCollectorPP::handleImport(const Module *Imported) { 179 if (!Imported) 180 return; 181 182 const Module *TopLevelModule = Imported->getTopLevelModule(); 183 184 if (MDC.isPrebuiltModule(TopLevelModule)) 185 DirectPrebuiltModularDeps.insert(TopLevelModule); 186 else 187 DirectModularDeps.insert(TopLevelModule); 188 } 189 190 void ModuleDepCollectorPP::EndOfMainFile() { 191 FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID(); 192 MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager() 193 .getFileEntryForID(MainFileID) 194 ->getName()); 195 196 if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty()) 197 MDC.FileDeps.push_back( 198 MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude); 199 200 for (const Module *M : DirectModularDeps) { 201 // A top-level module might not be actually imported as a module when 202 // -fmodule-name is used to compile a translation unit that imports this 203 // module. In that case it can be skipped. The appropriate header 204 // dependencies will still be reported as expected. 205 if (!M->getASTFile()) 206 continue; 207 handleTopLevelModule(M); 208 } 209 210 MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts); 211 212 for (auto &&I : MDC.ModularDeps) 213 MDC.Consumer.handleModuleDependency(I.second); 214 215 for (auto &&I : MDC.FileDeps) 216 MDC.Consumer.handleFileDependency(I); 217 218 for (auto &&I : DirectPrebuiltModularDeps) 219 MDC.Consumer.handlePrebuiltModuleDependency(PrebuiltModuleDep{I}); 220 } 221 222 ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { 223 assert(M == M->getTopLevelModule() && "Expected top level module!"); 224 225 // If this module has been handled already, just return its ID. 226 auto ModI = MDC.ModularDeps.insert({M, ModuleDeps{}}); 227 if (!ModI.second) 228 return ModI.first->second.ID; 229 230 ModuleDeps &MD = ModI.first->second; 231 232 MD.ID.ModuleName = M->getFullModuleName(); 233 MD.ImportedByMainFile = DirectModularDeps.contains(M); 234 MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName()); 235 MD.IsSystem = M->IsSystem; 236 237 const FileEntry *ModuleMap = MDC.ScanInstance.getPreprocessor() 238 .getHeaderSearchInfo() 239 .getModuleMap() 240 .getModuleMapFileForUniquing(M); 241 242 if (ModuleMap) { 243 StringRef Path = ModuleMap->tryGetRealPathName(); 244 if (Path.empty()) 245 Path = ModuleMap->getName(); 246 MD.ClangModuleMapFile = std::string(Path); 247 } 248 249 serialization::ModuleFile *MF = 250 MDC.ScanInstance.getASTReader()->getModuleManager().lookup( 251 M->getASTFile()); 252 MDC.ScanInstance.getASTReader()->visitInputFiles( 253 *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) { 254 // __inferred_module.map is the result of the way in which an implicit 255 // module build handles inferred modules. It adds an overlay VFS with 256 // this file in the proper directory and relies on the rest of Clang to 257 // handle it like normal. With explicitly built modules we don't need 258 // to play VFS tricks, so replace it with the correct module map. 259 if (IF.getFile()->getName().endswith("__inferred_module.map")) { 260 MD.FileDeps.insert(ModuleMap->getName()); 261 return; 262 } 263 MD.FileDeps.insert(IF.getFile()->getName()); 264 }); 265 266 // We usually don't need to list the module map files of our dependencies when 267 // building a module explicitly: their semantics will be deserialized from PCM 268 // files. 269 // 270 // However, some module maps loaded implicitly during the dependency scan can 271 // describe anti-dependencies. That happens when this module, let's call it 272 // M1, is marked as '[no_undeclared_includes]' and tries to access a header 273 // "M2/M2.h" from another module, M2, but doesn't have a 'use M2;' 274 // declaration. The explicit build needs the module map for M2 so that it 275 // knows that textually including "M2/M2.h" is not allowed. 276 // E.g., '__has_include("M2/M2.h")' should return false, but without M2's 277 // module map the explicit build would return true. 278 // 279 // An alternative approach would be to tell the explicit build what its 280 // textual dependencies are, instead of having it re-discover its 281 // anti-dependencies. For example, we could create and use an `-ivfs-overlay` 282 // with `fall-through: false` that explicitly listed the dependencies. 283 // However, that's more complicated to implement and harder to reason about. 284 if (M->NoUndeclaredIncludes) { 285 // We don't have a good way to determine which module map described the 286 // anti-dependency (let alone what's the corresponding top-level module 287 // map). We simply specify all the module maps in the order they were loaded 288 // during the implicit build during scan. 289 // TODO: Resolve this by serializing and only using Module::UndeclaredUses. 290 MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps( 291 *MF, [&](const FileEntry *FE) { 292 if (FE->getName().endswith("__inferred_module.map")) 293 return; 294 // The top-level modulemap of this module will be the input file. We 295 // don't need to specify it as a module map. 296 if (FE == ModuleMap) 297 return; 298 MD.ModuleMapFileDeps.push_back(FE->getName().str()); 299 }); 300 } 301 302 // Add direct prebuilt module dependencies now, so that we can use them when 303 // creating a CompilerInvocation and computing context hash for this 304 // ModuleDeps instance. 305 llvm::DenseSet<const Module *> SeenModules; 306 addAllSubmodulePrebuiltDeps(M, MD, SeenModules); 307 308 MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutPaths( 309 MD, [&](CompilerInvocation &BuildInvocation) { 310 if (MDC.OptimizeArgs) 311 optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(), 312 *MDC.ScanInstance.getASTReader(), *MF); 313 }); 314 MD.ID.ContextHash = MD.BuildInvocation.getModuleHash(); 315 316 llvm::DenseSet<const Module *> AddedModules; 317 addAllSubmoduleDeps(M, MD, AddedModules); 318 319 return MD.ID; 320 } 321 322 void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps( 323 const Module *M, ModuleDeps &MD, 324 llvm::DenseSet<const Module *> &SeenSubmodules) { 325 addModulePrebuiltDeps(M, MD, SeenSubmodules); 326 327 for (const Module *SubM : M->submodules()) 328 addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules); 329 } 330 331 void ModuleDepCollectorPP::addModulePrebuiltDeps( 332 const Module *M, ModuleDeps &MD, 333 llvm::DenseSet<const Module *> &SeenSubmodules) { 334 for (const Module *Import : M->Imports) 335 if (Import->getTopLevelModule() != M->getTopLevelModule()) 336 if (MDC.isPrebuiltModule(Import->getTopLevelModule())) 337 if (SeenSubmodules.insert(Import->getTopLevelModule()).second) 338 MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule()); 339 } 340 341 void ModuleDepCollectorPP::addAllSubmoduleDeps( 342 const Module *M, ModuleDeps &MD, 343 llvm::DenseSet<const Module *> &AddedModules) { 344 addModuleDep(M, MD, AddedModules); 345 346 for (const Module *SubM : M->submodules()) 347 addAllSubmoduleDeps(SubM, MD, AddedModules); 348 } 349 350 void ModuleDepCollectorPP::addModuleDep( 351 const Module *M, ModuleDeps &MD, 352 llvm::DenseSet<const Module *> &AddedModules) { 353 for (const Module *Import : M->Imports) { 354 if (Import->getTopLevelModule() != M->getTopLevelModule() && 355 !MDC.isPrebuiltModule(Import)) { 356 ModuleID ImportID = handleTopLevelModule(Import->getTopLevelModule()); 357 if (AddedModules.insert(Import->getTopLevelModule()).second) 358 MD.ClangModuleDeps.push_back(ImportID); 359 } 360 } 361 } 362 363 ModuleDepCollector::ModuleDepCollector( 364 std::unique_ptr<DependencyOutputOptions> Opts, 365 CompilerInstance &ScanInstance, DependencyConsumer &C, 366 CompilerInvocation &&OriginalCI, bool OptimizeArgs) 367 : ScanInstance(ScanInstance), Consumer(C), Opts(std::move(Opts)), 368 OriginalInvocation(std::move(OriginalCI)), OptimizeArgs(OptimizeArgs) {} 369 370 void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) { 371 PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this)); 372 } 373 374 void ModuleDepCollector::attachToASTReader(ASTReader &R) {} 375 376 bool ModuleDepCollector::isPrebuiltModule(const Module *M) { 377 std::string Name(M->getTopLevelModuleName()); 378 const auto &PrebuiltModuleFiles = 379 ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles; 380 auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name); 381 if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end()) 382 return false; 383 assert("Prebuilt module came from the expected AST file" && 384 PrebuiltModuleFileIt->second == M->getASTFile()->getName()); 385 return true; 386 } 387