1 //===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
10 
11 #include "clang/Frontend/CompilerInstance.h"
12 #include "clang/Lex/Preprocessor.h"
13 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
14 #include "llvm/Support/StringSaver.h"
15 
16 using namespace clang;
17 using namespace tooling;
18 using namespace dependencies;
19 
20 static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts,
21                                      ASTReader &Reader,
22                                      const serialization::ModuleFile &MF) {
23   // Only preserve search paths that were used during the dependency scan.
24   std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries;
25   Opts.UserEntries.clear();
26   for (unsigned I = 0; I < Entries.size(); ++I)
27     if (MF.SearchPathUsage[I])
28       Opts.UserEntries.push_back(Entries[I]);
29 }
30 
31 CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths(
32     const ModuleDeps &Deps,
33     llvm::function_ref<void(CompilerInvocation &)> Optimize) const {
34   // Make a deep copy of the original Clang invocation.
35   CompilerInvocation CI(OriginalInvocation);
36 
37   CI.getLangOpts()->resetNonModularOptions();
38   CI.getPreprocessorOpts().resetNonModularOptions();
39 
40   // Remove options incompatible with explicit module build or are likely to
41   // differ between identical modules discovered from different translation
42   // units.
43   CI.getFrontendOpts().Inputs.clear();
44   CI.getFrontendOpts().OutputFile.clear();
45   CI.getCodeGenOpts().MainFileName.clear();
46   CI.getCodeGenOpts().DwarfDebugFlags.clear();
47 
48   CI.getFrontendOpts().ProgramAction = frontend::GenerateModule;
49   CI.getLangOpts()->ModuleName = Deps.ID.ModuleName;
50   CI.getFrontendOpts().IsSystemModule = Deps.IsSystem;
51 
52   CI.getLangOpts()->ImplicitModules = false;
53   CI.getHeaderSearchOpts().ImplicitModuleMaps = false;
54 
55   // Report the prebuilt modules this module uses.
56   for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
57     CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
58 
59   CI.getFrontendOpts().ModuleMapFiles = Deps.ModuleMapFileDeps;
60 
61   Optimize(CI);
62 
63   // The original invocation probably didn't have strict context hash enabled.
64   // We will use the context hash of this invocation to distinguish between
65   // multiple incompatible versions of the same module and will use it when
66   // reporting dependencies to the clients. Let's make sure we're using
67   // **strict** context hash in order to prevent accidental sharing of
68   // incompatible modules (e.g. with differences in search paths).
69   CI.getHeaderSearchOpts().ModulesStrictContextHash = true;
70 
71   return CI;
72 }
73 
74 static std::vector<std::string>
75 serializeCompilerInvocation(const CompilerInvocation &CI) {
76   // Set up string allocator.
77   llvm::BumpPtrAllocator Alloc;
78   llvm::StringSaver Strings(Alloc);
79   auto SA = [&Strings](const Twine &Arg) { return Strings.save(Arg).data(); };
80 
81   // Synthesize full command line from the CompilerInvocation, including "-cc1".
82   SmallVector<const char *, 32> Args{"-cc1"};
83   CI.generateCC1CommandLine(Args, SA);
84 
85   // Convert arguments to the return type.
86   return std::vector<std::string>{Args.begin(), Args.end()};
87 }
88 
89 std::vector<std::string> ModuleDeps::getCanonicalCommandLine(
90     std::function<StringRef(ModuleID)> LookupPCMPath,
91     std::function<const ModuleDeps &(ModuleID)> LookupModuleDeps) const {
92   CompilerInvocation CI(BuildInvocation);
93   FrontendOptions &FrontendOpts = CI.getFrontendOpts();
94 
95   InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(),
96                                InputKind::Format::ModuleMap);
97   FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind);
98   FrontendOpts.OutputFile = std::string(LookupPCMPath(ID));
99 
100   dependencies::detail::collectPCMPaths(ClangModuleDeps, LookupPCMPath,
101                                         LookupModuleDeps,
102                                         FrontendOpts.ModuleFiles);
103 
104   return serializeCompilerInvocation(CI);
105 }
106 
107 std::vector<std::string>
108 ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const {
109   return serializeCompilerInvocation(BuildInvocation);
110 }
111 
112 void dependencies::detail::collectPCMPaths(
113     llvm::ArrayRef<ModuleID> Modules,
114     std::function<StringRef(ModuleID)> LookupPCMPath,
115     std::function<const ModuleDeps &(ModuleID)> LookupModuleDeps,
116     std::vector<std::string> &PCMPaths) {
117   llvm::StringSet<> AlreadyAdded;
118 
119   std::function<void(llvm::ArrayRef<ModuleID>)> AddArgs =
120       [&](llvm::ArrayRef<ModuleID> Modules) {
121         for (const ModuleID &MID : Modules) {
122           if (!AlreadyAdded.insert(MID.ModuleName + MID.ContextHash).second)
123             continue;
124           const ModuleDeps &M = LookupModuleDeps(MID);
125           // Depth first traversal.
126           AddArgs(M.ClangModuleDeps);
127           PCMPaths.push_back(LookupPCMPath(MID).str());
128         }
129       };
130 
131   AddArgs(Modules);
132 }
133 
134 void ModuleDepCollectorPP::FileChanged(SourceLocation Loc,
135                                        FileChangeReason Reason,
136                                        SrcMgr::CharacteristicKind FileType,
137                                        FileID PrevFID) {
138   if (Reason != PPCallbacks::EnterFile)
139     return;
140 
141   // This has to be delayed as the context hash can change at the start of
142   // `CompilerInstance::ExecuteAction`.
143   if (MDC.ContextHash.empty()) {
144     MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash();
145     MDC.Consumer.handleContextHash(MDC.ContextHash);
146   }
147 
148   SourceManager &SM = MDC.ScanInstance.getSourceManager();
149 
150   // Dependency generation really does want to go all the way to the
151   // file entry for a source location to find out what is depended on.
152   // We do not want #line markers to affect dependency generation!
153   if (Optional<StringRef> Filename =
154           SM.getNonBuiltinFilenameForID(SM.getFileID(SM.getExpansionLoc(Loc))))
155     MDC.FileDeps.push_back(
156         std::string(llvm::sys::path::remove_leading_dotslash(*Filename)));
157 }
158 
159 void ModuleDepCollectorPP::InclusionDirective(
160     SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
161     bool IsAngled, CharSourceRange FilenameRange, const FileEntry *File,
162     StringRef SearchPath, StringRef RelativePath, const Module *Imported,
163     SrcMgr::CharacteristicKind FileType) {
164   if (!File && !Imported) {
165     // This is a non-modular include that HeaderSearch failed to find. Add it
166     // here as `FileChanged` will never see it.
167     MDC.FileDeps.push_back(std::string(FileName));
168   }
169   handleImport(Imported);
170 }
171 
172 void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc,
173                                         ModuleIdPath Path,
174                                         const Module *Imported) {
175   handleImport(Imported);
176 }
177 
178 void ModuleDepCollectorPP::handleImport(const Module *Imported) {
179   if (!Imported)
180     return;
181 
182   const Module *TopLevelModule = Imported->getTopLevelModule();
183 
184   if (MDC.isPrebuiltModule(TopLevelModule))
185     DirectPrebuiltModularDeps.insert(TopLevelModule);
186   else
187     DirectModularDeps.insert(TopLevelModule);
188 }
189 
190 void ModuleDepCollectorPP::EndOfMainFile() {
191   FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID();
192   MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager()
193                                  .getFileEntryForID(MainFileID)
194                                  ->getName());
195 
196   if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
197     MDC.FileDeps.push_back(
198         MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude);
199 
200   for (const Module *M : DirectModularDeps) {
201     // A top-level module might not be actually imported as a module when
202     // -fmodule-name is used to compile a translation unit that imports this
203     // module. In that case it can be skipped. The appropriate header
204     // dependencies will still be reported as expected.
205     if (!M->getASTFile())
206       continue;
207     handleTopLevelModule(M);
208   }
209 
210   MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts);
211 
212   for (auto &&I : MDC.ModularDeps)
213     MDC.Consumer.handleModuleDependency(I.second);
214 
215   for (auto &&I : MDC.FileDeps)
216     MDC.Consumer.handleFileDependency(I);
217 
218   for (auto &&I : DirectPrebuiltModularDeps)
219     MDC.Consumer.handlePrebuiltModuleDependency(PrebuiltModuleDep{I});
220 }
221 
222 ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
223   assert(M == M->getTopLevelModule() && "Expected top level module!");
224 
225   // If this module has been handled already, just return its ID.
226   auto ModI = MDC.ModularDeps.insert({M, ModuleDeps{}});
227   if (!ModI.second)
228     return ModI.first->second.ID;
229 
230   ModuleDeps &MD = ModI.first->second;
231 
232   MD.ID.ModuleName = M->getFullModuleName();
233   MD.ImportedByMainFile = DirectModularDeps.contains(M);
234   MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName());
235   MD.IsSystem = M->IsSystem;
236 
237   const FileEntry *ModuleMap = MDC.ScanInstance.getPreprocessor()
238                                    .getHeaderSearchInfo()
239                                    .getModuleMap()
240                                    .getModuleMapFileForUniquing(M);
241 
242   if (ModuleMap) {
243     StringRef Path = ModuleMap->tryGetRealPathName();
244     if (Path.empty())
245       Path = ModuleMap->getName();
246     MD.ClangModuleMapFile = std::string(Path);
247   }
248 
249   serialization::ModuleFile *MF =
250       MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
251           M->getASTFile());
252   MDC.ScanInstance.getASTReader()->visitInputFiles(
253       *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) {
254         // __inferred_module.map is the result of the way in which an implicit
255         // module build handles inferred modules. It adds an overlay VFS with
256         // this file in the proper directory and relies on the rest of Clang to
257         // handle it like normal. With explicitly built modules we don't need
258         // to play VFS tricks, so replace it with the correct module map.
259         if (IF.getFile()->getName().endswith("__inferred_module.map")) {
260           MD.FileDeps.insert(ModuleMap->getName());
261           return;
262         }
263         MD.FileDeps.insert(IF.getFile()->getName());
264       });
265 
266   // We usually don't need to list the module map files of our dependencies when
267   // building a module explicitly: their semantics will be deserialized from PCM
268   // files.
269   //
270   // However, some module maps loaded implicitly during the dependency scan can
271   // describe anti-dependencies. That happens when this module, let's call it
272   // M1, is marked as '[no_undeclared_includes]' and tries to access a header
273   // "M2/M2.h" from another module, M2, but doesn't have a 'use M2;'
274   // declaration. The explicit build needs the module map for M2 so that it
275   // knows that textually including "M2/M2.h" is not allowed.
276   // E.g., '__has_include("M2/M2.h")' should return false, but without M2's
277   // module map the explicit build would return true.
278   //
279   // An alternative approach would be to tell the explicit build what its
280   // textual dependencies are, instead of having it re-discover its
281   // anti-dependencies. For example, we could create and use an `-ivfs-overlay`
282   // with `fall-through: false` that explicitly listed the dependencies.
283   // However, that's more complicated to implement and harder to reason about.
284   if (M->NoUndeclaredIncludes) {
285     // We don't have a good way to determine which module map described the
286     // anti-dependency (let alone what's the corresponding top-level module
287     // map). We simply specify all the module maps in the order they were loaded
288     // during the implicit build during scan.
289     // TODO: Resolve this by serializing and only using Module::UndeclaredUses.
290     MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps(
291         *MF, [&](const FileEntry *FE) {
292           if (FE->getName().endswith("__inferred_module.map"))
293             return;
294           // The top-level modulemap of this module will be the input file. We
295           // don't need to specify it as a module map.
296           if (FE == ModuleMap)
297             return;
298           MD.ModuleMapFileDeps.push_back(FE->getName().str());
299         });
300   }
301 
302   // Add direct prebuilt module dependencies now, so that we can use them when
303   // creating a CompilerInvocation and computing context hash for this
304   // ModuleDeps instance.
305   llvm::DenseSet<const Module *> SeenModules;
306   addAllSubmodulePrebuiltDeps(M, MD, SeenModules);
307 
308   MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutPaths(
309       MD, [&](CompilerInvocation &BuildInvocation) {
310         if (MDC.OptimizeArgs)
311           optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(),
312                                    *MDC.ScanInstance.getASTReader(), *MF);
313       });
314   MD.ID.ContextHash = MD.BuildInvocation.getModuleHash();
315 
316   llvm::DenseSet<const Module *> AddedModules;
317   addAllSubmoduleDeps(M, MD, AddedModules);
318 
319   return MD.ID;
320 }
321 
322 void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps(
323     const Module *M, ModuleDeps &MD,
324     llvm::DenseSet<const Module *> &SeenSubmodules) {
325   addModulePrebuiltDeps(M, MD, SeenSubmodules);
326 
327   for (const Module *SubM : M->submodules())
328     addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules);
329 }
330 
331 void ModuleDepCollectorPP::addModulePrebuiltDeps(
332     const Module *M, ModuleDeps &MD,
333     llvm::DenseSet<const Module *> &SeenSubmodules) {
334   for (const Module *Import : M->Imports)
335     if (Import->getTopLevelModule() != M->getTopLevelModule())
336       if (MDC.isPrebuiltModule(Import->getTopLevelModule()))
337         if (SeenSubmodules.insert(Import->getTopLevelModule()).second)
338           MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule());
339 }
340 
341 void ModuleDepCollectorPP::addAllSubmoduleDeps(
342     const Module *M, ModuleDeps &MD,
343     llvm::DenseSet<const Module *> &AddedModules) {
344   addModuleDep(M, MD, AddedModules);
345 
346   for (const Module *SubM : M->submodules())
347     addAllSubmoduleDeps(SubM, MD, AddedModules);
348 }
349 
350 void ModuleDepCollectorPP::addModuleDep(
351     const Module *M, ModuleDeps &MD,
352     llvm::DenseSet<const Module *> &AddedModules) {
353   for (const Module *Import : M->Imports) {
354     if (Import->getTopLevelModule() != M->getTopLevelModule() &&
355         !MDC.isPrebuiltModule(Import)) {
356       ModuleID ImportID = handleTopLevelModule(Import->getTopLevelModule());
357       if (AddedModules.insert(Import->getTopLevelModule()).second)
358         MD.ClangModuleDeps.push_back(ImportID);
359     }
360   }
361 }
362 
363 ModuleDepCollector::ModuleDepCollector(
364     std::unique_ptr<DependencyOutputOptions> Opts,
365     CompilerInstance &ScanInstance, DependencyConsumer &C,
366     CompilerInvocation &&OriginalCI, bool OptimizeArgs)
367     : ScanInstance(ScanInstance), Consumer(C), Opts(std::move(Opts)),
368       OriginalInvocation(std::move(OriginalCI)), OptimizeArgs(OptimizeArgs) {}
369 
370 void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) {
371   PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this));
372 }
373 
374 void ModuleDepCollector::attachToASTReader(ASTReader &R) {}
375 
376 bool ModuleDepCollector::isPrebuiltModule(const Module *M) {
377   std::string Name(M->getTopLevelModuleName());
378   const auto &PrebuiltModuleFiles =
379       ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles;
380   auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name);
381   if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end())
382     return false;
383   assert("Prebuilt module came from the expected AST file" &&
384          PrebuiltModuleFileIt->second == M->getASTFile()->getName());
385   return true;
386 }
387