1 //===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
10 
11 #include "clang/Frontend/CompilerInstance.h"
12 #include "clang/Lex/Preprocessor.h"
13 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
14 #include "llvm/Support/StringSaver.h"
15 
16 using namespace clang;
17 using namespace tooling;
18 using namespace dependencies;
19 
20 static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts,
21                                      ASTReader &Reader,
22                                      const serialization::ModuleFile &MF) {
23   // Only preserve search paths that were used during the dependency scan.
24   std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries;
25   Opts.UserEntries.clear();
26 
27   llvm::BitVector SearchPathUsage(Entries.size());
28   llvm::DenseSet<const serialization::ModuleFile *> Visited;
29   std::function<void(const serialization::ModuleFile *)> VisitMF =
30       [&](const serialization::ModuleFile *MF) {
31         SearchPathUsage |= MF->SearchPathUsage;
32         Visited.insert(MF);
33         for (const serialization::ModuleFile *Import : MF->Imports)
34           if (!Visited.contains(Import))
35             VisitMF(Import);
36       };
37   VisitMF(&MF);
38 
39   for (auto Idx : SearchPathUsage.set_bits())
40     Opts.UserEntries.push_back(Entries[Idx]);
41 }
42 
43 CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths(
44     const ModuleDeps &Deps,
45     llvm::function_ref<void(CompilerInvocation &)> Optimize) const {
46   // Make a deep copy of the original Clang invocation.
47   CompilerInvocation CI(OriginalInvocation);
48 
49   CI.getLangOpts()->resetNonModularOptions();
50   CI.getPreprocessorOpts().resetNonModularOptions();
51 
52   // Remove options incompatible with explicit module build or are likely to
53   // differ between identical modules discovered from different translation
54   // units.
55   CI.getFrontendOpts().Inputs.clear();
56   CI.getFrontendOpts().OutputFile.clear();
57   CI.getCodeGenOpts().MainFileName.clear();
58   CI.getCodeGenOpts().DwarfDebugFlags.clear();
59   CI.getDiagnosticOpts().DiagnosticSerializationFile.clear();
60   CI.getDependencyOutputOpts().OutputFile.clear();
61   CI.getDependencyOutputOpts().Targets.clear();
62 
63   CI.getFrontendOpts().ProgramAction = frontend::GenerateModule;
64   CI.getLangOpts()->ModuleName = Deps.ID.ModuleName;
65   CI.getFrontendOpts().IsSystemModule = Deps.IsSystem;
66 
67   // Disable implicit modules and canonicalize options that are only used by
68   // implicit modules.
69   CI.getLangOpts()->ImplicitModules = false;
70   CI.getHeaderSearchOpts().ImplicitModuleMaps = false;
71   CI.getHeaderSearchOpts().ModuleCachePath.clear();
72   CI.getHeaderSearchOpts().ModulesValidateOncePerBuildSession = false;
73   CI.getHeaderSearchOpts().BuildSessionTimestamp = 0;
74   // The specific values we canonicalize to for pruning don't affect behaviour,
75   /// so use the default values so they will be dropped from the command-line.
76   CI.getHeaderSearchOpts().ModuleCachePruneInterval = 7 * 24 * 60 * 60;
77   CI.getHeaderSearchOpts().ModuleCachePruneAfter = 31 * 24 * 60 * 60;
78 
79   // Report the prebuilt modules this module uses.
80   for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
81     CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
82 
83   CI.getFrontendOpts().ModuleMapFiles = Deps.ModuleMapFileDeps;
84 
85   Optimize(CI);
86 
87   // The original invocation probably didn't have strict context hash enabled.
88   // We will use the context hash of this invocation to distinguish between
89   // multiple incompatible versions of the same module and will use it when
90   // reporting dependencies to the clients. Let's make sure we're using
91   // **strict** context hash in order to prevent accidental sharing of
92   // incompatible modules (e.g. with differences in search paths).
93   CI.getHeaderSearchOpts().ModulesStrictContextHash = true;
94 
95   return CI;
96 }
97 
98 static std::vector<std::string>
99 serializeCompilerInvocation(const CompilerInvocation &CI) {
100   // Set up string allocator.
101   llvm::BumpPtrAllocator Alloc;
102   llvm::StringSaver Strings(Alloc);
103   auto SA = [&Strings](const Twine &Arg) { return Strings.save(Arg).data(); };
104 
105   // Synthesize full command line from the CompilerInvocation, including "-cc1".
106   SmallVector<const char *, 32> Args{"-cc1"};
107   CI.generateCC1CommandLine(Args, SA);
108 
109   // Convert arguments to the return type.
110   return std::vector<std::string>{Args.begin(), Args.end()};
111 }
112 
113 static std::vector<std::string> splitString(std::string S, char Separator) {
114   SmallVector<StringRef> Segments;
115   StringRef(S).split(Segments, Separator);
116   std::vector<std::string> Result;
117   Result.reserve(Segments.size());
118   for (StringRef Segment : Segments)
119     Result.push_back(Segment.str());
120   return Result;
121 }
122 
123 std::vector<std::string> ModuleDeps::getCanonicalCommandLine(
124     llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)>
125         LookupModuleOutput) const {
126   CompilerInvocation CI(BuildInvocation);
127   FrontendOptions &FrontendOpts = CI.getFrontendOpts();
128 
129   InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(),
130                                InputKind::Format::ModuleMap);
131   FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind);
132   FrontendOpts.OutputFile =
133       LookupModuleOutput(ID, ModuleOutputKind::ModuleFile);
134   if (HadSerializedDiagnostics)
135     CI.getDiagnosticOpts().DiagnosticSerializationFile =
136         LookupModuleOutput(ID, ModuleOutputKind::DiagnosticSerializationFile);
137   if (HadDependencyFile) {
138     CI.getDependencyOutputOpts().OutputFile =
139         LookupModuleOutput(ID, ModuleOutputKind::DependencyFile);
140     CI.getDependencyOutputOpts().Targets = splitString(
141         LookupModuleOutput(ID, ModuleOutputKind::DependencyTargets), '\0');
142   }
143 
144   for (ModuleID MID : ClangModuleDeps)
145     FrontendOpts.ModuleFiles.push_back(
146         LookupModuleOutput(MID, ModuleOutputKind::ModuleFile));
147 
148   return serializeCompilerInvocation(CI);
149 }
150 
151 std::vector<std::string>
152 ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const {
153   return serializeCompilerInvocation(BuildInvocation);
154 }
155 
156 void ModuleDepCollectorPP::FileChanged(SourceLocation Loc,
157                                        FileChangeReason Reason,
158                                        SrcMgr::CharacteristicKind FileType,
159                                        FileID PrevFID) {
160   if (Reason != PPCallbacks::EnterFile)
161     return;
162 
163   // This has to be delayed as the context hash can change at the start of
164   // `CompilerInstance::ExecuteAction`.
165   if (MDC.ContextHash.empty()) {
166     MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash();
167     MDC.Consumer.handleContextHash(MDC.ContextHash);
168   }
169 
170   SourceManager &SM = MDC.ScanInstance.getSourceManager();
171 
172   // Dependency generation really does want to go all the way to the
173   // file entry for a source location to find out what is depended on.
174   // We do not want #line markers to affect dependency generation!
175   if (Optional<StringRef> Filename =
176           SM.getNonBuiltinFilenameForID(SM.getFileID(SM.getExpansionLoc(Loc))))
177     MDC.FileDeps.push_back(
178         std::string(llvm::sys::path::remove_leading_dotslash(*Filename)));
179 }
180 
181 void ModuleDepCollectorPP::InclusionDirective(
182     SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
183     bool IsAngled, CharSourceRange FilenameRange, Optional<FileEntryRef> File,
184     StringRef SearchPath, StringRef RelativePath, const Module *Imported,
185     SrcMgr::CharacteristicKind FileType) {
186   if (!File && !Imported) {
187     // This is a non-modular include that HeaderSearch failed to find. Add it
188     // here as `FileChanged` will never see it.
189     MDC.FileDeps.push_back(std::string(FileName));
190   }
191   handleImport(Imported);
192 }
193 
194 void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc,
195                                         ModuleIdPath Path,
196                                         const Module *Imported) {
197   handleImport(Imported);
198 }
199 
200 void ModuleDepCollectorPP::handleImport(const Module *Imported) {
201   if (!Imported)
202     return;
203 
204   const Module *TopLevelModule = Imported->getTopLevelModule();
205 
206   if (MDC.isPrebuiltModule(TopLevelModule))
207     DirectPrebuiltModularDeps.insert(TopLevelModule);
208   else
209     DirectModularDeps.insert(TopLevelModule);
210 }
211 
212 void ModuleDepCollectorPP::EndOfMainFile() {
213   FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID();
214   MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager()
215                                  .getFileEntryForID(MainFileID)
216                                  ->getName());
217 
218   if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
219     MDC.FileDeps.push_back(
220         MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude);
221 
222   for (const Module *M : DirectModularDeps) {
223     // A top-level module might not be actually imported as a module when
224     // -fmodule-name is used to compile a translation unit that imports this
225     // module. In that case it can be skipped. The appropriate header
226     // dependencies will still be reported as expected.
227     if (!M->getASTFile())
228       continue;
229     handleTopLevelModule(M);
230   }
231 
232   MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts);
233 
234   for (auto &&I : MDC.ModularDeps)
235     MDC.Consumer.handleModuleDependency(*I.second);
236 
237   for (auto &&I : MDC.FileDeps)
238     MDC.Consumer.handleFileDependency(I);
239 
240   for (auto &&I : DirectPrebuiltModularDeps)
241     MDC.Consumer.handlePrebuiltModuleDependency(PrebuiltModuleDep{I});
242 }
243 
244 ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
245   assert(M == M->getTopLevelModule() && "Expected top level module!");
246 
247   // If this module has been handled already, just return its ID.
248   auto ModI = MDC.ModularDeps.insert({M, nullptr});
249   if (!ModI.second)
250     return ModI.first->second->ID;
251 
252   ModI.first->second = std::make_unique<ModuleDeps>();
253   ModuleDeps &MD = *ModI.first->second;
254 
255   MD.ID.ModuleName = M->getFullModuleName();
256   MD.ImportedByMainFile = DirectModularDeps.contains(M);
257   MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName());
258   MD.IsSystem = M->IsSystem;
259 
260   const FileEntry *ModuleMap = MDC.ScanInstance.getPreprocessor()
261                                    .getHeaderSearchInfo()
262                                    .getModuleMap()
263                                    .getModuleMapFileForUniquing(M);
264 
265   if (ModuleMap) {
266     StringRef Path = ModuleMap->tryGetRealPathName();
267     if (Path.empty())
268       Path = ModuleMap->getName();
269     MD.ClangModuleMapFile = std::string(Path);
270   }
271 
272   serialization::ModuleFile *MF =
273       MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
274           M->getASTFile());
275   MDC.ScanInstance.getASTReader()->visitInputFiles(
276       *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) {
277         // __inferred_module.map is the result of the way in which an implicit
278         // module build handles inferred modules. It adds an overlay VFS with
279         // this file in the proper directory and relies on the rest of Clang to
280         // handle it like normal. With explicitly built modules we don't need
281         // to play VFS tricks, so replace it with the correct module map.
282         if (IF.getFile()->getName().endswith("__inferred_module.map")) {
283           MD.FileDeps.insert(ModuleMap->getName());
284           return;
285         }
286         MD.FileDeps.insert(IF.getFile()->getName());
287       });
288 
289   // We usually don't need to list the module map files of our dependencies when
290   // building a module explicitly: their semantics will be deserialized from PCM
291   // files.
292   //
293   // However, some module maps loaded implicitly during the dependency scan can
294   // describe anti-dependencies. That happens when this module, let's call it
295   // M1, is marked as '[no_undeclared_includes]' and tries to access a header
296   // "M2/M2.h" from another module, M2, but doesn't have a 'use M2;'
297   // declaration. The explicit build needs the module map for M2 so that it
298   // knows that textually including "M2/M2.h" is not allowed.
299   // E.g., '__has_include("M2/M2.h")' should return false, but without M2's
300   // module map the explicit build would return true.
301   //
302   // An alternative approach would be to tell the explicit build what its
303   // textual dependencies are, instead of having it re-discover its
304   // anti-dependencies. For example, we could create and use an `-ivfs-overlay`
305   // with `fall-through: false` that explicitly listed the dependencies.
306   // However, that's more complicated to implement and harder to reason about.
307   if (M->NoUndeclaredIncludes) {
308     // We don't have a good way to determine which module map described the
309     // anti-dependency (let alone what's the corresponding top-level module
310     // map). We simply specify all the module maps in the order they were loaded
311     // during the implicit build during scan.
312     // TODO: Resolve this by serializing and only using Module::UndeclaredUses.
313     MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps(
314         *MF, [&](const FileEntry *FE) {
315           if (FE->getName().endswith("__inferred_module.map"))
316             return;
317           // The top-level modulemap of this module will be the input file. We
318           // don't need to specify it as a module map.
319           if (FE == ModuleMap)
320             return;
321           MD.ModuleMapFileDeps.push_back(FE->getName().str());
322         });
323   }
324 
325   // Add direct prebuilt module dependencies now, so that we can use them when
326   // creating a CompilerInvocation and computing context hash for this
327   // ModuleDeps instance.
328   llvm::DenseSet<const Module *> SeenModules;
329   addAllSubmodulePrebuiltDeps(M, MD, SeenModules);
330 
331   MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutPaths(
332       MD, [&](CompilerInvocation &BuildInvocation) {
333         if (MDC.OptimizeArgs)
334           optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(),
335                                    *MDC.ScanInstance.getASTReader(), *MF);
336       });
337   MD.HadSerializedDiagnostics = !MDC.OriginalInvocation.getDiagnosticOpts()
338                                      .DiagnosticSerializationFile.empty();
339   MD.HadDependencyFile =
340       !MDC.OriginalInvocation.getDependencyOutputOpts().OutputFile.empty();
341   // FIXME: HadSerializedDiagnostics and HadDependencyFile should be included in
342   // the context hash since it can affect the command-line.
343   MD.ID.ContextHash = MD.BuildInvocation.getModuleHash();
344 
345   llvm::DenseSet<const Module *> AddedModules;
346   addAllSubmoduleDeps(M, MD, AddedModules);
347 
348   return MD.ID;
349 }
350 
351 static void forEachSubmoduleSorted(const Module *M,
352                                    llvm::function_ref<void(const Module *)> F) {
353   // Submodule order depends on order of header includes for inferred submodules
354   // we don't care about the exact order, so sort so that it's consistent across
355   // TUs to improve sharing.
356   SmallVector<const Module *> Submodules(M->submodule_begin(),
357                                          M->submodule_end());
358   llvm::stable_sort(Submodules, [](const Module *A, const Module *B) {
359     return A->Name < B->Name;
360   });
361   for (const Module *SubM : Submodules)
362     F(SubM);
363 }
364 
365 void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps(
366     const Module *M, ModuleDeps &MD,
367     llvm::DenseSet<const Module *> &SeenSubmodules) {
368   addModulePrebuiltDeps(M, MD, SeenSubmodules);
369 
370   forEachSubmoduleSorted(M, [&](const Module *SubM) {
371     addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules);
372   });
373 }
374 
375 void ModuleDepCollectorPP::addModulePrebuiltDeps(
376     const Module *M, ModuleDeps &MD,
377     llvm::DenseSet<const Module *> &SeenSubmodules) {
378   for (const Module *Import : M->Imports)
379     if (Import->getTopLevelModule() != M->getTopLevelModule())
380       if (MDC.isPrebuiltModule(Import->getTopLevelModule()))
381         if (SeenSubmodules.insert(Import->getTopLevelModule()).second)
382           MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule());
383 }
384 
385 void ModuleDepCollectorPP::addAllSubmoduleDeps(
386     const Module *M, ModuleDeps &MD,
387     llvm::DenseSet<const Module *> &AddedModules) {
388   addModuleDep(M, MD, AddedModules);
389 
390   forEachSubmoduleSorted(M, [&](const Module *SubM) {
391     addAllSubmoduleDeps(SubM, MD, AddedModules);
392   });
393 }
394 
395 void ModuleDepCollectorPP::addModuleDep(
396     const Module *M, ModuleDeps &MD,
397     llvm::DenseSet<const Module *> &AddedModules) {
398   for (const Module *Import : M->Imports) {
399     if (Import->getTopLevelModule() != M->getTopLevelModule() &&
400         !MDC.isPrebuiltModule(Import)) {
401       ModuleID ImportID = handleTopLevelModule(Import->getTopLevelModule());
402       if (AddedModules.insert(Import->getTopLevelModule()).second)
403         MD.ClangModuleDeps.push_back(ImportID);
404     }
405   }
406 }
407 
408 ModuleDepCollector::ModuleDepCollector(
409     std::unique_ptr<DependencyOutputOptions> Opts,
410     CompilerInstance &ScanInstance, DependencyConsumer &C,
411     CompilerInvocation &&OriginalCI, bool OptimizeArgs)
412     : ScanInstance(ScanInstance), Consumer(C), Opts(std::move(Opts)),
413       OriginalInvocation(std::move(OriginalCI)), OptimizeArgs(OptimizeArgs) {}
414 
415 void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) {
416   PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this));
417 }
418 
419 void ModuleDepCollector::attachToASTReader(ASTReader &R) {}
420 
421 bool ModuleDepCollector::isPrebuiltModule(const Module *M) {
422   std::string Name(M->getTopLevelModuleName());
423   const auto &PrebuiltModuleFiles =
424       ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles;
425   auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name);
426   if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end())
427     return false;
428   assert("Prebuilt module came from the expected AST file" &&
429          PrebuiltModuleFileIt->second == M->getASTFile()->getName());
430   return true;
431 }
432