1 //===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
10 
11 #include "clang/Frontend/CompilerInstance.h"
12 #include "clang/Lex/Preprocessor.h"
13 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
14 #include "llvm/Support/StringSaver.h"
15 
16 using namespace clang;
17 using namespace tooling;
18 using namespace dependencies;
19 
20 static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts,
21                                      ASTReader &Reader,
22                                      const serialization::ModuleFile &MF) {
23   // Only preserve search paths that were used during the dependency scan.
24   std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries;
25   Opts.UserEntries.clear();
26 
27   llvm::BitVector SearchPathUsage(Entries.size());
28   llvm::DenseSet<const serialization::ModuleFile *> Visited;
29   std::function<void(const serialization::ModuleFile *)> VisitMF =
30       [&](const serialization::ModuleFile *MF) {
31         SearchPathUsage |= MF->SearchPathUsage;
32         Visited.insert(MF);
33         for (const serialization::ModuleFile *Import : MF->Imports)
34           if (!Visited.contains(Import))
35             VisitMF(Import);
36       };
37   VisitMF(&MF);
38 
39   for (auto Idx : SearchPathUsage.set_bits())
40     Opts.UserEntries.push_back(Entries[Idx]);
41 }
42 
43 CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths(
44     const ModuleDeps &Deps,
45     llvm::function_ref<void(CompilerInvocation &)> Optimize) const {
46   // Make a deep copy of the original Clang invocation.
47   CompilerInvocation CI(OriginalInvocation);
48 
49   CI.getLangOpts()->resetNonModularOptions();
50   CI.getPreprocessorOpts().resetNonModularOptions();
51 
52   // Remove options incompatible with explicit module build or are likely to
53   // differ between identical modules discovered from different translation
54   // units.
55   CI.getFrontendOpts().Inputs.clear();
56   CI.getFrontendOpts().OutputFile.clear();
57   CI.getCodeGenOpts().MainFileName.clear();
58   CI.getCodeGenOpts().DwarfDebugFlags.clear();
59 
60   CI.getFrontendOpts().ProgramAction = frontend::GenerateModule;
61   CI.getLangOpts()->ModuleName = Deps.ID.ModuleName;
62   CI.getFrontendOpts().IsSystemModule = Deps.IsSystem;
63 
64   CI.getLangOpts()->ImplicitModules = false;
65   CI.getHeaderSearchOpts().ImplicitModuleMaps = false;
66   CI.getHeaderSearchOpts().ModuleCachePath.clear();
67 
68   // Report the prebuilt modules this module uses.
69   for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
70     CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
71 
72   CI.getFrontendOpts().ModuleMapFiles = Deps.ModuleMapFileDeps;
73 
74   Optimize(CI);
75 
76   // The original invocation probably didn't have strict context hash enabled.
77   // We will use the context hash of this invocation to distinguish between
78   // multiple incompatible versions of the same module and will use it when
79   // reporting dependencies to the clients. Let's make sure we're using
80   // **strict** context hash in order to prevent accidental sharing of
81   // incompatible modules (e.g. with differences in search paths).
82   CI.getHeaderSearchOpts().ModulesStrictContextHash = true;
83 
84   return CI;
85 }
86 
87 static std::vector<std::string>
88 serializeCompilerInvocation(const CompilerInvocation &CI) {
89   // Set up string allocator.
90   llvm::BumpPtrAllocator Alloc;
91   llvm::StringSaver Strings(Alloc);
92   auto SA = [&Strings](const Twine &Arg) { return Strings.save(Arg).data(); };
93 
94   // Synthesize full command line from the CompilerInvocation, including "-cc1".
95   SmallVector<const char *, 32> Args{"-cc1"};
96   CI.generateCC1CommandLine(Args, SA);
97 
98   // Convert arguments to the return type.
99   return std::vector<std::string>{Args.begin(), Args.end()};
100 }
101 
102 std::vector<std::string> ModuleDeps::getCanonicalCommandLine(
103     std::function<StringRef(ModuleID)> LookupPCMPath) const {
104   CompilerInvocation CI(BuildInvocation);
105   FrontendOptions &FrontendOpts = CI.getFrontendOpts();
106 
107   InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(),
108                                InputKind::Format::ModuleMap);
109   FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind);
110   FrontendOpts.OutputFile = std::string(LookupPCMPath(ID));
111 
112   for (ModuleID MID : ClangModuleDeps)
113     FrontendOpts.ModuleFiles.emplace_back(LookupPCMPath(MID));
114 
115   return serializeCompilerInvocation(CI);
116 }
117 
118 std::vector<std::string>
119 ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const {
120   return serializeCompilerInvocation(BuildInvocation);
121 }
122 
123 void ModuleDepCollectorPP::FileChanged(SourceLocation Loc,
124                                        FileChangeReason Reason,
125                                        SrcMgr::CharacteristicKind FileType,
126                                        FileID PrevFID) {
127   if (Reason != PPCallbacks::EnterFile)
128     return;
129 
130   // This has to be delayed as the context hash can change at the start of
131   // `CompilerInstance::ExecuteAction`.
132   if (MDC.ContextHash.empty()) {
133     MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash();
134     MDC.Consumer.handleContextHash(MDC.ContextHash);
135   }
136 
137   SourceManager &SM = MDC.ScanInstance.getSourceManager();
138 
139   // Dependency generation really does want to go all the way to the
140   // file entry for a source location to find out what is depended on.
141   // We do not want #line markers to affect dependency generation!
142   if (Optional<StringRef> Filename =
143           SM.getNonBuiltinFilenameForID(SM.getFileID(SM.getExpansionLoc(Loc))))
144     MDC.FileDeps.push_back(
145         std::string(llvm::sys::path::remove_leading_dotslash(*Filename)));
146 }
147 
148 void ModuleDepCollectorPP::InclusionDirective(
149     SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
150     bool IsAngled, CharSourceRange FilenameRange, Optional<FileEntryRef> File,
151     StringRef SearchPath, StringRef RelativePath, const Module *Imported,
152     SrcMgr::CharacteristicKind FileType) {
153   if (!File && !Imported) {
154     // This is a non-modular include that HeaderSearch failed to find. Add it
155     // here as `FileChanged` will never see it.
156     MDC.FileDeps.push_back(std::string(FileName));
157   }
158   handleImport(Imported);
159 }
160 
161 void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc,
162                                         ModuleIdPath Path,
163                                         const Module *Imported) {
164   handleImport(Imported);
165 }
166 
167 void ModuleDepCollectorPP::handleImport(const Module *Imported) {
168   if (!Imported)
169     return;
170 
171   const Module *TopLevelModule = Imported->getTopLevelModule();
172 
173   if (MDC.isPrebuiltModule(TopLevelModule))
174     DirectPrebuiltModularDeps.insert(TopLevelModule);
175   else
176     DirectModularDeps.insert(TopLevelModule);
177 }
178 
179 void ModuleDepCollectorPP::EndOfMainFile() {
180   FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID();
181   MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager()
182                                  .getFileEntryForID(MainFileID)
183                                  ->getName());
184 
185   if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
186     MDC.FileDeps.push_back(
187         MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude);
188 
189   for (const Module *M : DirectModularDeps) {
190     // A top-level module might not be actually imported as a module when
191     // -fmodule-name is used to compile a translation unit that imports this
192     // module. In that case it can be skipped. The appropriate header
193     // dependencies will still be reported as expected.
194     if (!M->getASTFile())
195       continue;
196     handleTopLevelModule(M);
197   }
198 
199   MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts);
200 
201   for (auto &&I : MDC.ModularDeps)
202     MDC.Consumer.handleModuleDependency(*I.second);
203 
204   for (auto &&I : MDC.FileDeps)
205     MDC.Consumer.handleFileDependency(I);
206 
207   for (auto &&I : DirectPrebuiltModularDeps)
208     MDC.Consumer.handlePrebuiltModuleDependency(PrebuiltModuleDep{I});
209 }
210 
211 ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
212   assert(M == M->getTopLevelModule() && "Expected top level module!");
213 
214   // If this module has been handled already, just return its ID.
215   auto ModI = MDC.ModularDeps.insert({M, nullptr});
216   if (!ModI.second)
217     return ModI.first->second->ID;
218 
219   ModI.first->second = std::make_unique<ModuleDeps>();
220   ModuleDeps &MD = *ModI.first->second;
221 
222   MD.ID.ModuleName = M->getFullModuleName();
223   MD.ImportedByMainFile = DirectModularDeps.contains(M);
224   MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName());
225   MD.IsSystem = M->IsSystem;
226 
227   const FileEntry *ModuleMap = MDC.ScanInstance.getPreprocessor()
228                                    .getHeaderSearchInfo()
229                                    .getModuleMap()
230                                    .getModuleMapFileForUniquing(M);
231 
232   if (ModuleMap) {
233     StringRef Path = ModuleMap->tryGetRealPathName();
234     if (Path.empty())
235       Path = ModuleMap->getName();
236     MD.ClangModuleMapFile = std::string(Path);
237   }
238 
239   serialization::ModuleFile *MF =
240       MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
241           M->getASTFile());
242   MDC.ScanInstance.getASTReader()->visitInputFiles(
243       *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) {
244         // __inferred_module.map is the result of the way in which an implicit
245         // module build handles inferred modules. It adds an overlay VFS with
246         // this file in the proper directory and relies on the rest of Clang to
247         // handle it like normal. With explicitly built modules we don't need
248         // to play VFS tricks, so replace it with the correct module map.
249         if (IF.getFile()->getName().endswith("__inferred_module.map")) {
250           MD.FileDeps.insert(ModuleMap->getName());
251           return;
252         }
253         MD.FileDeps.insert(IF.getFile()->getName());
254       });
255 
256   // We usually don't need to list the module map files of our dependencies when
257   // building a module explicitly: their semantics will be deserialized from PCM
258   // files.
259   //
260   // However, some module maps loaded implicitly during the dependency scan can
261   // describe anti-dependencies. That happens when this module, let's call it
262   // M1, is marked as '[no_undeclared_includes]' and tries to access a header
263   // "M2/M2.h" from another module, M2, but doesn't have a 'use M2;'
264   // declaration. The explicit build needs the module map for M2 so that it
265   // knows that textually including "M2/M2.h" is not allowed.
266   // E.g., '__has_include("M2/M2.h")' should return false, but without M2's
267   // module map the explicit build would return true.
268   //
269   // An alternative approach would be to tell the explicit build what its
270   // textual dependencies are, instead of having it re-discover its
271   // anti-dependencies. For example, we could create and use an `-ivfs-overlay`
272   // with `fall-through: false` that explicitly listed the dependencies.
273   // However, that's more complicated to implement and harder to reason about.
274   if (M->NoUndeclaredIncludes) {
275     // We don't have a good way to determine which module map described the
276     // anti-dependency (let alone what's the corresponding top-level module
277     // map). We simply specify all the module maps in the order they were loaded
278     // during the implicit build during scan.
279     // TODO: Resolve this by serializing and only using Module::UndeclaredUses.
280     MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps(
281         *MF, [&](const FileEntry *FE) {
282           if (FE->getName().endswith("__inferred_module.map"))
283             return;
284           // The top-level modulemap of this module will be the input file. We
285           // don't need to specify it as a module map.
286           if (FE == ModuleMap)
287             return;
288           MD.ModuleMapFileDeps.push_back(FE->getName().str());
289         });
290   }
291 
292   // Add direct prebuilt module dependencies now, so that we can use them when
293   // creating a CompilerInvocation and computing context hash for this
294   // ModuleDeps instance.
295   llvm::DenseSet<const Module *> SeenModules;
296   addAllSubmodulePrebuiltDeps(M, MD, SeenModules);
297 
298   MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutPaths(
299       MD, [&](CompilerInvocation &BuildInvocation) {
300         if (MDC.OptimizeArgs)
301           optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(),
302                                    *MDC.ScanInstance.getASTReader(), *MF);
303       });
304   MD.ID.ContextHash = MD.BuildInvocation.getModuleHash();
305 
306   llvm::DenseSet<const Module *> AddedModules;
307   addAllSubmoduleDeps(M, MD, AddedModules);
308 
309   return MD.ID;
310 }
311 
312 void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps(
313     const Module *M, ModuleDeps &MD,
314     llvm::DenseSet<const Module *> &SeenSubmodules) {
315   addModulePrebuiltDeps(M, MD, SeenSubmodules);
316 
317   for (const Module *SubM : M->submodules())
318     addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules);
319 }
320 
321 void ModuleDepCollectorPP::addModulePrebuiltDeps(
322     const Module *M, ModuleDeps &MD,
323     llvm::DenseSet<const Module *> &SeenSubmodules) {
324   for (const Module *Import : M->Imports)
325     if (Import->getTopLevelModule() != M->getTopLevelModule())
326       if (MDC.isPrebuiltModule(Import->getTopLevelModule()))
327         if (SeenSubmodules.insert(Import->getTopLevelModule()).second)
328           MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule());
329 }
330 
331 void ModuleDepCollectorPP::addAllSubmoduleDeps(
332     const Module *M, ModuleDeps &MD,
333     llvm::DenseSet<const Module *> &AddedModules) {
334   addModuleDep(M, MD, AddedModules);
335 
336   for (const Module *SubM : M->submodules())
337     addAllSubmoduleDeps(SubM, MD, AddedModules);
338 }
339 
340 void ModuleDepCollectorPP::addModuleDep(
341     const Module *M, ModuleDeps &MD,
342     llvm::DenseSet<const Module *> &AddedModules) {
343   for (const Module *Import : M->Imports) {
344     if (Import->getTopLevelModule() != M->getTopLevelModule() &&
345         !MDC.isPrebuiltModule(Import)) {
346       ModuleID ImportID = handleTopLevelModule(Import->getTopLevelModule());
347       if (AddedModules.insert(Import->getTopLevelModule()).second)
348         MD.ClangModuleDeps.push_back(ImportID);
349     }
350   }
351 }
352 
353 ModuleDepCollector::ModuleDepCollector(
354     std::unique_ptr<DependencyOutputOptions> Opts,
355     CompilerInstance &ScanInstance, DependencyConsumer &C,
356     CompilerInvocation &&OriginalCI, bool OptimizeArgs)
357     : ScanInstance(ScanInstance), Consumer(C), Opts(std::move(Opts)),
358       OriginalInvocation(std::move(OriginalCI)), OptimizeArgs(OptimizeArgs) {}
359 
360 void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) {
361   PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this));
362 }
363 
364 void ModuleDepCollector::attachToASTReader(ASTReader &R) {}
365 
366 bool ModuleDepCollector::isPrebuiltModule(const Module *M) {
367   std::string Name(M->getTopLevelModuleName());
368   const auto &PrebuiltModuleFiles =
369       ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles;
370   auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name);
371   if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end())
372     return false;
373   assert("Prebuilt module came from the expected AST file" &&
374          PrebuiltModuleFileIt->second == M->getASTFile()->getName());
375   return true;
376 }
377