1 //===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
10
11 #include "clang/Basic/MakeSupport.h"
12 #include "clang/Frontend/CompilerInstance.h"
13 #include "clang/Lex/Preprocessor.h"
14 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
15 #include "llvm/Support/StringSaver.h"
16
17 using namespace clang;
18 using namespace tooling;
19 using namespace dependencies;
20
optimizeHeaderSearchOpts(HeaderSearchOptions & Opts,ASTReader & Reader,const serialization::ModuleFile & MF)21 static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts,
22 ASTReader &Reader,
23 const serialization::ModuleFile &MF) {
24 // Only preserve search paths that were used during the dependency scan.
25 std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries;
26 Opts.UserEntries.clear();
27
28 llvm::BitVector SearchPathUsage(Entries.size());
29 llvm::DenseSet<const serialization::ModuleFile *> Visited;
30 std::function<void(const serialization::ModuleFile *)> VisitMF =
31 [&](const serialization::ModuleFile *MF) {
32 SearchPathUsage |= MF->SearchPathUsage;
33 Visited.insert(MF);
34 for (const serialization::ModuleFile *Import : MF->Imports)
35 if (!Visited.contains(Import))
36 VisitMF(Import);
37 };
38 VisitMF(&MF);
39
40 for (auto Idx : SearchPathUsage.set_bits())
41 Opts.UserEntries.push_back(Entries[Idx]);
42 }
43
makeInvocationForModuleBuildWithoutPaths(const ModuleDeps & Deps,llvm::function_ref<void (CompilerInvocation &)> Optimize) const44 CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths(
45 const ModuleDeps &Deps,
46 llvm::function_ref<void(CompilerInvocation &)> Optimize) const {
47 // Make a deep copy of the original Clang invocation.
48 CompilerInvocation CI(OriginalInvocation);
49
50 CI.getLangOpts()->resetNonModularOptions();
51 CI.getPreprocessorOpts().resetNonModularOptions();
52
53 // Remove options incompatible with explicit module build or are likely to
54 // differ between identical modules discovered from different translation
55 // units.
56 CI.getFrontendOpts().Inputs.clear();
57 CI.getFrontendOpts().OutputFile.clear();
58 CI.getCodeGenOpts().MainFileName.clear();
59 CI.getCodeGenOpts().DwarfDebugFlags.clear();
60 CI.getDiagnosticOpts().DiagnosticSerializationFile.clear();
61 CI.getDependencyOutputOpts().OutputFile.clear();
62 CI.getDependencyOutputOpts().Targets.clear();
63
64 CI.getFrontendOpts().ProgramAction = frontend::GenerateModule;
65 CI.getLangOpts()->ModuleName = Deps.ID.ModuleName;
66 CI.getFrontendOpts().IsSystemModule = Deps.IsSystem;
67
68 // Disable implicit modules and canonicalize options that are only used by
69 // implicit modules.
70 CI.getLangOpts()->ImplicitModules = false;
71 CI.getHeaderSearchOpts().ImplicitModuleMaps = false;
72 CI.getHeaderSearchOpts().ModuleCachePath.clear();
73 CI.getHeaderSearchOpts().ModulesValidateOncePerBuildSession = false;
74 CI.getHeaderSearchOpts().BuildSessionTimestamp = 0;
75 // The specific values we canonicalize to for pruning don't affect behaviour,
76 /// so use the default values so they will be dropped from the command-line.
77 CI.getHeaderSearchOpts().ModuleCachePruneInterval = 7 * 24 * 60 * 60;
78 CI.getHeaderSearchOpts().ModuleCachePruneAfter = 31 * 24 * 60 * 60;
79
80 // Report the prebuilt modules this module uses.
81 for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
82 CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
83
84 CI.getFrontendOpts().ModuleMapFiles = Deps.ModuleMapFileDeps;
85
86 Optimize(CI);
87
88 // The original invocation probably didn't have strict context hash enabled.
89 // We will use the context hash of this invocation to distinguish between
90 // multiple incompatible versions of the same module and will use it when
91 // reporting dependencies to the clients. Let's make sure we're using
92 // **strict** context hash in order to prevent accidental sharing of
93 // incompatible modules (e.g. with differences in search paths).
94 CI.getHeaderSearchOpts().ModulesStrictContextHash = true;
95
96 return CI;
97 }
98
99 static std::vector<std::string>
serializeCompilerInvocation(const CompilerInvocation & CI)100 serializeCompilerInvocation(const CompilerInvocation &CI) {
101 // Set up string allocator.
102 llvm::BumpPtrAllocator Alloc;
103 llvm::StringSaver Strings(Alloc);
104 auto SA = [&Strings](const Twine &Arg) { return Strings.save(Arg).data(); };
105
106 // Synthesize full command line from the CompilerInvocation, including "-cc1".
107 SmallVector<const char *, 32> Args{"-cc1"};
108 CI.generateCC1CommandLine(Args, SA);
109
110 // Convert arguments to the return type.
111 return std::vector<std::string>{Args.begin(), Args.end()};
112 }
113
splitString(std::string S,char Separator)114 static std::vector<std::string> splitString(std::string S, char Separator) {
115 SmallVector<StringRef> Segments;
116 StringRef(S).split(Segments, Separator, /*MaxSplit=*/-1, /*KeepEmpty=*/false);
117 std::vector<std::string> Result;
118 Result.reserve(Segments.size());
119 for (StringRef Segment : Segments)
120 Result.push_back(Segment.str());
121 return Result;
122 }
123
getCanonicalCommandLine(llvm::function_ref<std::string (const ModuleID &,ModuleOutputKind)> LookupModuleOutput) const124 std::vector<std::string> ModuleDeps::getCanonicalCommandLine(
125 llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)>
126 LookupModuleOutput) const {
127 CompilerInvocation CI(BuildInvocation);
128 FrontendOptions &FrontendOpts = CI.getFrontendOpts();
129
130 InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(),
131 InputKind::Format::ModuleMap);
132 FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind);
133 FrontendOpts.OutputFile =
134 LookupModuleOutput(ID, ModuleOutputKind::ModuleFile);
135 if (HadSerializedDiagnostics)
136 CI.getDiagnosticOpts().DiagnosticSerializationFile =
137 LookupModuleOutput(ID, ModuleOutputKind::DiagnosticSerializationFile);
138 if (HadDependencyFile) {
139 DependencyOutputOptions &DepOpts = CI.getDependencyOutputOpts();
140 DepOpts.OutputFile =
141 LookupModuleOutput(ID, ModuleOutputKind::DependencyFile);
142 DepOpts.Targets = splitString(
143 LookupModuleOutput(ID, ModuleOutputKind::DependencyTargets), '\0');
144 if (!DepOpts.OutputFile.empty() && DepOpts.Targets.empty()) {
145 // Fallback to -o as dependency target, as in the driver.
146 SmallString<128> Target;
147 quoteMakeTarget(FrontendOpts.OutputFile, Target);
148 DepOpts.Targets.push_back(std::string(Target));
149 }
150 }
151
152 for (ModuleID MID : ClangModuleDeps)
153 FrontendOpts.ModuleFiles.push_back(
154 LookupModuleOutput(MID, ModuleOutputKind::ModuleFile));
155
156 return serializeCompilerInvocation(CI);
157 }
158
159 std::vector<std::string>
getCanonicalCommandLineWithoutModulePaths() const160 ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const {
161 return serializeCompilerInvocation(BuildInvocation);
162 }
163
FileChanged(SourceLocation Loc,FileChangeReason Reason,SrcMgr::CharacteristicKind FileType,FileID PrevFID)164 void ModuleDepCollectorPP::FileChanged(SourceLocation Loc,
165 FileChangeReason Reason,
166 SrcMgr::CharacteristicKind FileType,
167 FileID PrevFID) {
168 if (Reason != PPCallbacks::EnterFile)
169 return;
170
171 // This has to be delayed as the context hash can change at the start of
172 // `CompilerInstance::ExecuteAction`.
173 if (MDC.ContextHash.empty()) {
174 MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash();
175 MDC.Consumer.handleContextHash(MDC.ContextHash);
176 }
177
178 SourceManager &SM = MDC.ScanInstance.getSourceManager();
179
180 // Dependency generation really does want to go all the way to the
181 // file entry for a source location to find out what is depended on.
182 // We do not want #line markers to affect dependency generation!
183 if (Optional<StringRef> Filename =
184 SM.getNonBuiltinFilenameForID(SM.getFileID(SM.getExpansionLoc(Loc))))
185 MDC.FileDeps.push_back(
186 std::string(llvm::sys::path::remove_leading_dotslash(*Filename)));
187 }
188
InclusionDirective(SourceLocation HashLoc,const Token & IncludeTok,StringRef FileName,bool IsAngled,CharSourceRange FilenameRange,Optional<FileEntryRef> File,StringRef SearchPath,StringRef RelativePath,const Module * Imported,SrcMgr::CharacteristicKind FileType)189 void ModuleDepCollectorPP::InclusionDirective(
190 SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
191 bool IsAngled, CharSourceRange FilenameRange, Optional<FileEntryRef> File,
192 StringRef SearchPath, StringRef RelativePath, const Module *Imported,
193 SrcMgr::CharacteristicKind FileType) {
194 if (!File && !Imported) {
195 // This is a non-modular include that HeaderSearch failed to find. Add it
196 // here as `FileChanged` will never see it.
197 MDC.FileDeps.push_back(std::string(FileName));
198 }
199 handleImport(Imported);
200 }
201
moduleImport(SourceLocation ImportLoc,ModuleIdPath Path,const Module * Imported)202 void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc,
203 ModuleIdPath Path,
204 const Module *Imported) {
205 handleImport(Imported);
206 }
207
handleImport(const Module * Imported)208 void ModuleDepCollectorPP::handleImport(const Module *Imported) {
209 if (!Imported)
210 return;
211
212 const Module *TopLevelModule = Imported->getTopLevelModule();
213
214 if (MDC.isPrebuiltModule(TopLevelModule))
215 DirectPrebuiltModularDeps.insert(TopLevelModule);
216 else
217 DirectModularDeps.insert(TopLevelModule);
218 }
219
EndOfMainFile()220 void ModuleDepCollectorPP::EndOfMainFile() {
221 FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID();
222 MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager()
223 .getFileEntryForID(MainFileID)
224 ->getName());
225
226 if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
227 MDC.FileDeps.push_back(
228 MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude);
229
230 for (const Module *M : DirectModularDeps) {
231 // A top-level module might not be actually imported as a module when
232 // -fmodule-name is used to compile a translation unit that imports this
233 // module. In that case it can be skipped. The appropriate header
234 // dependencies will still be reported as expected.
235 if (!M->getASTFile())
236 continue;
237 handleTopLevelModule(M);
238 }
239
240 MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts);
241
242 for (auto &&I : MDC.ModularDeps)
243 MDC.Consumer.handleModuleDependency(*I.second);
244
245 for (auto &&I : MDC.FileDeps)
246 MDC.Consumer.handleFileDependency(I);
247
248 for (auto &&I : DirectPrebuiltModularDeps)
249 MDC.Consumer.handlePrebuiltModuleDependency(PrebuiltModuleDep{I});
250 }
251
handleTopLevelModule(const Module * M)252 ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
253 assert(M == M->getTopLevelModule() && "Expected top level module!");
254
255 // If this module has been handled already, just return its ID.
256 auto ModI = MDC.ModularDeps.insert({M, nullptr});
257 if (!ModI.second)
258 return ModI.first->second->ID;
259
260 ModI.first->second = std::make_unique<ModuleDeps>();
261 ModuleDeps &MD = *ModI.first->second;
262
263 MD.ID.ModuleName = M->getFullModuleName();
264 MD.ImportedByMainFile = DirectModularDeps.contains(M);
265 MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName());
266 MD.IsSystem = M->IsSystem;
267
268 const FileEntry *ModuleMap = MDC.ScanInstance.getPreprocessor()
269 .getHeaderSearchInfo()
270 .getModuleMap()
271 .getModuleMapFileForUniquing(M);
272
273 if (ModuleMap) {
274 StringRef Path = ModuleMap->tryGetRealPathName();
275 if (Path.empty())
276 Path = ModuleMap->getName();
277 MD.ClangModuleMapFile = std::string(Path);
278 }
279
280 serialization::ModuleFile *MF =
281 MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
282 M->getASTFile());
283 MDC.ScanInstance.getASTReader()->visitInputFiles(
284 *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) {
285 // __inferred_module.map is the result of the way in which an implicit
286 // module build handles inferred modules. It adds an overlay VFS with
287 // this file in the proper directory and relies on the rest of Clang to
288 // handle it like normal. With explicitly built modules we don't need
289 // to play VFS tricks, so replace it with the correct module map.
290 if (IF.getFile()->getName().endswith("__inferred_module.map")) {
291 MD.FileDeps.insert(ModuleMap->getName());
292 return;
293 }
294 MD.FileDeps.insert(IF.getFile()->getName());
295 });
296
297 // We usually don't need to list the module map files of our dependencies when
298 // building a module explicitly: their semantics will be deserialized from PCM
299 // files.
300 //
301 // However, some module maps loaded implicitly during the dependency scan can
302 // describe anti-dependencies. That happens when this module, let's call it
303 // M1, is marked as '[no_undeclared_includes]' and tries to access a header
304 // "M2/M2.h" from another module, M2, but doesn't have a 'use M2;'
305 // declaration. The explicit build needs the module map for M2 so that it
306 // knows that textually including "M2/M2.h" is not allowed.
307 // E.g., '__has_include("M2/M2.h")' should return false, but without M2's
308 // module map the explicit build would return true.
309 //
310 // An alternative approach would be to tell the explicit build what its
311 // textual dependencies are, instead of having it re-discover its
312 // anti-dependencies. For example, we could create and use an `-ivfs-overlay`
313 // with `fall-through: false` that explicitly listed the dependencies.
314 // However, that's more complicated to implement and harder to reason about.
315 if (M->NoUndeclaredIncludes) {
316 // We don't have a good way to determine which module map described the
317 // anti-dependency (let alone what's the corresponding top-level module
318 // map). We simply specify all the module maps in the order they were loaded
319 // during the implicit build during scan.
320 // TODO: Resolve this by serializing and only using Module::UndeclaredUses.
321 MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps(
322 *MF, [&](const FileEntry *FE) {
323 if (FE->getName().endswith("__inferred_module.map"))
324 return;
325 // The top-level modulemap of this module will be the input file. We
326 // don't need to specify it as a module map.
327 if (FE == ModuleMap)
328 return;
329 MD.ModuleMapFileDeps.push_back(FE->getName().str());
330 });
331 }
332
333 // Add direct prebuilt module dependencies now, so that we can use them when
334 // creating a CompilerInvocation and computing context hash for this
335 // ModuleDeps instance.
336 llvm::DenseSet<const Module *> SeenModules;
337 addAllSubmodulePrebuiltDeps(M, MD, SeenModules);
338
339 MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutPaths(
340 MD, [&](CompilerInvocation &BuildInvocation) {
341 if (MDC.OptimizeArgs)
342 optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(),
343 *MDC.ScanInstance.getASTReader(), *MF);
344 });
345 MD.HadSerializedDiagnostics = !MDC.OriginalInvocation.getDiagnosticOpts()
346 .DiagnosticSerializationFile.empty();
347 MD.HadDependencyFile =
348 !MDC.OriginalInvocation.getDependencyOutputOpts().OutputFile.empty();
349 // FIXME: HadSerializedDiagnostics and HadDependencyFile should be included in
350 // the context hash since it can affect the command-line.
351 MD.ID.ContextHash = MD.BuildInvocation.getModuleHash();
352
353 llvm::DenseSet<const Module *> AddedModules;
354 addAllSubmoduleDeps(M, MD, AddedModules);
355
356 return MD.ID;
357 }
358
forEachSubmoduleSorted(const Module * M,llvm::function_ref<void (const Module *)> F)359 static void forEachSubmoduleSorted(const Module *M,
360 llvm::function_ref<void(const Module *)> F) {
361 // Submodule order depends on order of header includes for inferred submodules
362 // we don't care about the exact order, so sort so that it's consistent across
363 // TUs to improve sharing.
364 SmallVector<const Module *> Submodules(M->submodule_begin(),
365 M->submodule_end());
366 llvm::stable_sort(Submodules, [](const Module *A, const Module *B) {
367 return A->Name < B->Name;
368 });
369 for (const Module *SubM : Submodules)
370 F(SubM);
371 }
372
addAllSubmodulePrebuiltDeps(const Module * M,ModuleDeps & MD,llvm::DenseSet<const Module * > & SeenSubmodules)373 void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps(
374 const Module *M, ModuleDeps &MD,
375 llvm::DenseSet<const Module *> &SeenSubmodules) {
376 addModulePrebuiltDeps(M, MD, SeenSubmodules);
377
378 forEachSubmoduleSorted(M, [&](const Module *SubM) {
379 addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules);
380 });
381 }
382
addModulePrebuiltDeps(const Module * M,ModuleDeps & MD,llvm::DenseSet<const Module * > & SeenSubmodules)383 void ModuleDepCollectorPP::addModulePrebuiltDeps(
384 const Module *M, ModuleDeps &MD,
385 llvm::DenseSet<const Module *> &SeenSubmodules) {
386 for (const Module *Import : M->Imports)
387 if (Import->getTopLevelModule() != M->getTopLevelModule())
388 if (MDC.isPrebuiltModule(Import->getTopLevelModule()))
389 if (SeenSubmodules.insert(Import->getTopLevelModule()).second)
390 MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule());
391 }
392
addAllSubmoduleDeps(const Module * M,ModuleDeps & MD,llvm::DenseSet<const Module * > & AddedModules)393 void ModuleDepCollectorPP::addAllSubmoduleDeps(
394 const Module *M, ModuleDeps &MD,
395 llvm::DenseSet<const Module *> &AddedModules) {
396 addModuleDep(M, MD, AddedModules);
397
398 forEachSubmoduleSorted(M, [&](const Module *SubM) {
399 addAllSubmoduleDeps(SubM, MD, AddedModules);
400 });
401 }
402
addModuleDep(const Module * M,ModuleDeps & MD,llvm::DenseSet<const Module * > & AddedModules)403 void ModuleDepCollectorPP::addModuleDep(
404 const Module *M, ModuleDeps &MD,
405 llvm::DenseSet<const Module *> &AddedModules) {
406 for (const Module *Import : M->Imports) {
407 if (Import->getTopLevelModule() != M->getTopLevelModule() &&
408 !MDC.isPrebuiltModule(Import)) {
409 ModuleID ImportID = handleTopLevelModule(Import->getTopLevelModule());
410 if (AddedModules.insert(Import->getTopLevelModule()).second)
411 MD.ClangModuleDeps.push_back(ImportID);
412 }
413 }
414 }
415
ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,CompilerInstance & ScanInstance,DependencyConsumer & C,CompilerInvocation && OriginalCI,bool OptimizeArgs)416 ModuleDepCollector::ModuleDepCollector(
417 std::unique_ptr<DependencyOutputOptions> Opts,
418 CompilerInstance &ScanInstance, DependencyConsumer &C,
419 CompilerInvocation &&OriginalCI, bool OptimizeArgs)
420 : ScanInstance(ScanInstance), Consumer(C), Opts(std::move(Opts)),
421 OriginalInvocation(std::move(OriginalCI)), OptimizeArgs(OptimizeArgs) {}
422
attachToPreprocessor(Preprocessor & PP)423 void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) {
424 PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this));
425 }
426
attachToASTReader(ASTReader & R)427 void ModuleDepCollector::attachToASTReader(ASTReader &R) {}
428
isPrebuiltModule(const Module * M)429 bool ModuleDepCollector::isPrebuiltModule(const Module *M) {
430 std::string Name(M->getTopLevelModuleName());
431 const auto &PrebuiltModuleFiles =
432 ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles;
433 auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name);
434 if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end())
435 return false;
436 assert("Prebuilt module came from the expected AST file" &&
437 PrebuiltModuleFileIt->second == M->getASTFile()->getName());
438 return true;
439 }
440