1 //===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" 10 11 #include "clang/Frontend/CompilerInstance.h" 12 #include "clang/Lex/Preprocessor.h" 13 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" 14 #include "llvm/Support/StringSaver.h" 15 16 using namespace clang; 17 using namespace tooling; 18 using namespace dependencies; 19 20 static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts, 21 ASTReader &Reader, 22 const serialization::ModuleFile &MF) { 23 // Only preserve search paths that were used during the dependency scan. 24 std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries; 25 Opts.UserEntries.clear(); 26 27 llvm::BitVector SearchPathUsage(Entries.size()); 28 llvm::DenseSet<const serialization::ModuleFile *> Visited; 29 std::function<void(const serialization::ModuleFile *)> VisitMF = 30 [&](const serialization::ModuleFile *MF) { 31 SearchPathUsage |= MF->SearchPathUsage; 32 Visited.insert(MF); 33 for (const serialization::ModuleFile *Import : MF->Imports) 34 if (!Visited.contains(Import)) 35 VisitMF(Import); 36 }; 37 VisitMF(&MF); 38 39 for (auto Idx : SearchPathUsage.set_bits()) 40 Opts.UserEntries.push_back(Entries[Idx]); 41 } 42 43 CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths( 44 const ModuleDeps &Deps, 45 llvm::function_ref<void(CompilerInvocation &)> Optimize) const { 46 // Make a deep copy of the original Clang invocation. 47 CompilerInvocation CI(OriginalInvocation); 48 49 CI.getLangOpts()->resetNonModularOptions(); 50 CI.getPreprocessorOpts().resetNonModularOptions(); 51 52 // Remove options incompatible with explicit module build or are likely to 53 // differ between identical modules discovered from different translation 54 // units. 55 CI.getFrontendOpts().Inputs.clear(); 56 CI.getFrontendOpts().OutputFile.clear(); 57 CI.getCodeGenOpts().MainFileName.clear(); 58 CI.getCodeGenOpts().DwarfDebugFlags.clear(); 59 60 CI.getFrontendOpts().ProgramAction = frontend::GenerateModule; 61 CI.getLangOpts()->ModuleName = Deps.ID.ModuleName; 62 CI.getFrontendOpts().IsSystemModule = Deps.IsSystem; 63 64 CI.getLangOpts()->ImplicitModules = false; 65 CI.getHeaderSearchOpts().ImplicitModuleMaps = false; 66 CI.getHeaderSearchOpts().ModuleCachePath.clear(); 67 68 // Report the prebuilt modules this module uses. 69 for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps) 70 CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile); 71 72 CI.getFrontendOpts().ModuleMapFiles = Deps.ModuleMapFileDeps; 73 74 Optimize(CI); 75 76 // The original invocation probably didn't have strict context hash enabled. 77 // We will use the context hash of this invocation to distinguish between 78 // multiple incompatible versions of the same module and will use it when 79 // reporting dependencies to the clients. Let's make sure we're using 80 // **strict** context hash in order to prevent accidental sharing of 81 // incompatible modules (e.g. with differences in search paths). 82 CI.getHeaderSearchOpts().ModulesStrictContextHash = true; 83 84 return CI; 85 } 86 87 static std::vector<std::string> 88 serializeCompilerInvocation(const CompilerInvocation &CI) { 89 // Set up string allocator. 90 llvm::BumpPtrAllocator Alloc; 91 llvm::StringSaver Strings(Alloc); 92 auto SA = [&Strings](const Twine &Arg) { return Strings.save(Arg).data(); }; 93 94 // Synthesize full command line from the CompilerInvocation, including "-cc1". 95 SmallVector<const char *, 32> Args{"-cc1"}; 96 CI.generateCC1CommandLine(Args, SA); 97 98 // Convert arguments to the return type. 99 return std::vector<std::string>{Args.begin(), Args.end()}; 100 } 101 102 std::vector<std::string> ModuleDeps::getCanonicalCommandLine( 103 std::function<StringRef(ModuleID)> LookupPCMPath) const { 104 CompilerInvocation CI(BuildInvocation); 105 FrontendOptions &FrontendOpts = CI.getFrontendOpts(); 106 107 InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(), 108 InputKind::Format::ModuleMap); 109 FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind); 110 FrontendOpts.OutputFile = std::string(LookupPCMPath(ID)); 111 112 for (ModuleID MID : ClangModuleDeps) 113 FrontendOpts.ModuleFiles.emplace_back(LookupPCMPath(MID)); 114 115 return serializeCompilerInvocation(CI); 116 } 117 118 std::vector<std::string> 119 ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const { 120 return serializeCompilerInvocation(BuildInvocation); 121 } 122 123 void ModuleDepCollectorPP::FileChanged(SourceLocation Loc, 124 FileChangeReason Reason, 125 SrcMgr::CharacteristicKind FileType, 126 FileID PrevFID) { 127 if (Reason != PPCallbacks::EnterFile) 128 return; 129 130 // This has to be delayed as the context hash can change at the start of 131 // `CompilerInstance::ExecuteAction`. 132 if (MDC.ContextHash.empty()) { 133 MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash(); 134 MDC.Consumer.handleContextHash(MDC.ContextHash); 135 } 136 137 SourceManager &SM = MDC.ScanInstance.getSourceManager(); 138 139 // Dependency generation really does want to go all the way to the 140 // file entry for a source location to find out what is depended on. 141 // We do not want #line markers to affect dependency generation! 142 if (Optional<StringRef> Filename = 143 SM.getNonBuiltinFilenameForID(SM.getFileID(SM.getExpansionLoc(Loc)))) 144 MDC.FileDeps.push_back( 145 std::string(llvm::sys::path::remove_leading_dotslash(*Filename))); 146 } 147 148 void ModuleDepCollectorPP::InclusionDirective( 149 SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, 150 bool IsAngled, CharSourceRange FilenameRange, Optional<FileEntryRef> File, 151 StringRef SearchPath, StringRef RelativePath, const Module *Imported, 152 SrcMgr::CharacteristicKind FileType) { 153 if (!File && !Imported) { 154 // This is a non-modular include that HeaderSearch failed to find. Add it 155 // here as `FileChanged` will never see it. 156 MDC.FileDeps.push_back(std::string(FileName)); 157 } 158 handleImport(Imported); 159 } 160 161 void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc, 162 ModuleIdPath Path, 163 const Module *Imported) { 164 handleImport(Imported); 165 } 166 167 void ModuleDepCollectorPP::handleImport(const Module *Imported) { 168 if (!Imported) 169 return; 170 171 const Module *TopLevelModule = Imported->getTopLevelModule(); 172 173 if (MDC.isPrebuiltModule(TopLevelModule)) 174 DirectPrebuiltModularDeps.insert(TopLevelModule); 175 else 176 DirectModularDeps.insert(TopLevelModule); 177 } 178 179 void ModuleDepCollectorPP::EndOfMainFile() { 180 FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID(); 181 MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager() 182 .getFileEntryForID(MainFileID) 183 ->getName()); 184 185 if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty()) 186 MDC.FileDeps.push_back( 187 MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude); 188 189 for (const Module *M : DirectModularDeps) { 190 // A top-level module might not be actually imported as a module when 191 // -fmodule-name is used to compile a translation unit that imports this 192 // module. In that case it can be skipped. The appropriate header 193 // dependencies will still be reported as expected. 194 if (!M->getASTFile()) 195 continue; 196 handleTopLevelModule(M); 197 } 198 199 MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts); 200 201 for (auto &&I : MDC.ModularDeps) 202 MDC.Consumer.handleModuleDependency(*I.second); 203 204 for (auto &&I : MDC.FileDeps) 205 MDC.Consumer.handleFileDependency(I); 206 207 for (auto &&I : DirectPrebuiltModularDeps) 208 MDC.Consumer.handlePrebuiltModuleDependency(PrebuiltModuleDep{I}); 209 } 210 211 ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { 212 assert(M == M->getTopLevelModule() && "Expected top level module!"); 213 214 // If this module has been handled already, just return its ID. 215 auto ModI = MDC.ModularDeps.insert({M, nullptr}); 216 if (!ModI.second) 217 return ModI.first->second->ID; 218 219 ModI.first->second = std::make_unique<ModuleDeps>(); 220 ModuleDeps &MD = *ModI.first->second; 221 222 MD.ID.ModuleName = M->getFullModuleName(); 223 MD.ImportedByMainFile = DirectModularDeps.contains(M); 224 MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName()); 225 MD.IsSystem = M->IsSystem; 226 227 const FileEntry *ModuleMap = MDC.ScanInstance.getPreprocessor() 228 .getHeaderSearchInfo() 229 .getModuleMap() 230 .getModuleMapFileForUniquing(M); 231 232 if (ModuleMap) { 233 StringRef Path = ModuleMap->tryGetRealPathName(); 234 if (Path.empty()) 235 Path = ModuleMap->getName(); 236 MD.ClangModuleMapFile = std::string(Path); 237 } 238 239 serialization::ModuleFile *MF = 240 MDC.ScanInstance.getASTReader()->getModuleManager().lookup( 241 M->getASTFile()); 242 MDC.ScanInstance.getASTReader()->visitInputFiles( 243 *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) { 244 // __inferred_module.map is the result of the way in which an implicit 245 // module build handles inferred modules. It adds an overlay VFS with 246 // this file in the proper directory and relies on the rest of Clang to 247 // handle it like normal. With explicitly built modules we don't need 248 // to play VFS tricks, so replace it with the correct module map. 249 if (IF.getFile()->getName().endswith("__inferred_module.map")) { 250 MD.FileDeps.insert(ModuleMap->getName()); 251 return; 252 } 253 MD.FileDeps.insert(IF.getFile()->getName()); 254 }); 255 256 // We usually don't need to list the module map files of our dependencies when 257 // building a module explicitly: their semantics will be deserialized from PCM 258 // files. 259 // 260 // However, some module maps loaded implicitly during the dependency scan can 261 // describe anti-dependencies. That happens when this module, let's call it 262 // M1, is marked as '[no_undeclared_includes]' and tries to access a header 263 // "M2/M2.h" from another module, M2, but doesn't have a 'use M2;' 264 // declaration. The explicit build needs the module map for M2 so that it 265 // knows that textually including "M2/M2.h" is not allowed. 266 // E.g., '__has_include("M2/M2.h")' should return false, but without M2's 267 // module map the explicit build would return true. 268 // 269 // An alternative approach would be to tell the explicit build what its 270 // textual dependencies are, instead of having it re-discover its 271 // anti-dependencies. For example, we could create and use an `-ivfs-overlay` 272 // with `fall-through: false` that explicitly listed the dependencies. 273 // However, that's more complicated to implement and harder to reason about. 274 if (M->NoUndeclaredIncludes) { 275 // We don't have a good way to determine which module map described the 276 // anti-dependency (let alone what's the corresponding top-level module 277 // map). We simply specify all the module maps in the order they were loaded 278 // during the implicit build during scan. 279 // TODO: Resolve this by serializing and only using Module::UndeclaredUses. 280 MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps( 281 *MF, [&](const FileEntry *FE) { 282 if (FE->getName().endswith("__inferred_module.map")) 283 return; 284 // The top-level modulemap of this module will be the input file. We 285 // don't need to specify it as a module map. 286 if (FE == ModuleMap) 287 return; 288 MD.ModuleMapFileDeps.push_back(FE->getName().str()); 289 }); 290 } 291 292 // Add direct prebuilt module dependencies now, so that we can use them when 293 // creating a CompilerInvocation and computing context hash for this 294 // ModuleDeps instance. 295 llvm::DenseSet<const Module *> SeenModules; 296 addAllSubmodulePrebuiltDeps(M, MD, SeenModules); 297 298 MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutPaths( 299 MD, [&](CompilerInvocation &BuildInvocation) { 300 if (MDC.OptimizeArgs) 301 optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(), 302 *MDC.ScanInstance.getASTReader(), *MF); 303 }); 304 MD.ID.ContextHash = MD.BuildInvocation.getModuleHash(); 305 306 llvm::DenseSet<const Module *> AddedModules; 307 addAllSubmoduleDeps(M, MD, AddedModules); 308 309 return MD.ID; 310 } 311 312 void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps( 313 const Module *M, ModuleDeps &MD, 314 llvm::DenseSet<const Module *> &SeenSubmodules) { 315 addModulePrebuiltDeps(M, MD, SeenSubmodules); 316 317 for (const Module *SubM : M->submodules()) 318 addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules); 319 } 320 321 void ModuleDepCollectorPP::addModulePrebuiltDeps( 322 const Module *M, ModuleDeps &MD, 323 llvm::DenseSet<const Module *> &SeenSubmodules) { 324 for (const Module *Import : M->Imports) 325 if (Import->getTopLevelModule() != M->getTopLevelModule()) 326 if (MDC.isPrebuiltModule(Import->getTopLevelModule())) 327 if (SeenSubmodules.insert(Import->getTopLevelModule()).second) 328 MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule()); 329 } 330 331 void ModuleDepCollectorPP::addAllSubmoduleDeps( 332 const Module *M, ModuleDeps &MD, 333 llvm::DenseSet<const Module *> &AddedModules) { 334 addModuleDep(M, MD, AddedModules); 335 336 for (const Module *SubM : M->submodules()) 337 addAllSubmoduleDeps(SubM, MD, AddedModules); 338 } 339 340 void ModuleDepCollectorPP::addModuleDep( 341 const Module *M, ModuleDeps &MD, 342 llvm::DenseSet<const Module *> &AddedModules) { 343 for (const Module *Import : M->Imports) { 344 if (Import->getTopLevelModule() != M->getTopLevelModule() && 345 !MDC.isPrebuiltModule(Import)) { 346 ModuleID ImportID = handleTopLevelModule(Import->getTopLevelModule()); 347 if (AddedModules.insert(Import->getTopLevelModule()).second) 348 MD.ClangModuleDeps.push_back(ImportID); 349 } 350 } 351 } 352 353 ModuleDepCollector::ModuleDepCollector( 354 std::unique_ptr<DependencyOutputOptions> Opts, 355 CompilerInstance &ScanInstance, DependencyConsumer &C, 356 CompilerInvocation &&OriginalCI, bool OptimizeArgs) 357 : ScanInstance(ScanInstance), Consumer(C), Opts(std::move(Opts)), 358 OriginalInvocation(std::move(OriginalCI)), OptimizeArgs(OptimizeArgs) {} 359 360 void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) { 361 PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this)); 362 } 363 364 void ModuleDepCollector::attachToASTReader(ASTReader &R) {} 365 366 bool ModuleDepCollector::isPrebuiltModule(const Module *M) { 367 std::string Name(M->getTopLevelModuleName()); 368 const auto &PrebuiltModuleFiles = 369 ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles; 370 auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name); 371 if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end()) 372 return false; 373 assert("Prebuilt module came from the expected AST file" && 374 PrebuiltModuleFileIt->second == M->getASTFile()->getName()); 375 return true; 376 } 377