1 //===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
10 
11 #include "clang/Frontend/CompilerInstance.h"
12 #include "clang/Lex/Preprocessor.h"
13 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
14 #include "llvm/Support/StringSaver.h"
15 
16 using namespace clang;
17 using namespace tooling;
18 using namespace dependencies;
19 
20 static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts,
21                                      ASTReader &Reader,
22                                      const serialization::ModuleFile &MF) {
23   // Only preserve search paths that were used during the dependency scan.
24   std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries;
25   Opts.UserEntries.clear();
26   for (unsigned I = 0; I < Entries.size(); ++I)
27     if (MF.SearchPathUsage[I])
28       Opts.UserEntries.push_back(Entries[I]);
29 }
30 
31 CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths(
32     const ModuleDeps &Deps,
33     llvm::function_ref<void(CompilerInvocation &)> Optimize) const {
34   // Make a deep copy of the original Clang invocation.
35   CompilerInvocation CI(OriginalInvocation);
36 
37   CI.getLangOpts()->resetNonModularOptions();
38   CI.getPreprocessorOpts().resetNonModularOptions();
39 
40   // Remove options incompatible with explicit module build.
41   CI.getFrontendOpts().Inputs.clear();
42   CI.getFrontendOpts().OutputFile.clear();
43 
44   CI.getFrontendOpts().ProgramAction = frontend::GenerateModule;
45   CI.getLangOpts()->ModuleName = Deps.ID.ModuleName;
46   CI.getFrontendOpts().IsSystemModule = Deps.IsSystem;
47 
48   CI.getLangOpts()->ImplicitModules = false;
49 
50   // Report the prebuilt modules this module uses.
51   for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps) {
52     CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
53     CI.getFrontendOpts().ModuleMapFiles.push_back(PrebuiltModule.ModuleMapFile);
54   }
55 
56   Optimize(CI);
57 
58   return CI;
59 }
60 
61 static std::vector<std::string>
62 serializeCompilerInvocation(const CompilerInvocation &CI) {
63   // Set up string allocator.
64   llvm::BumpPtrAllocator Alloc;
65   llvm::StringSaver Strings(Alloc);
66   auto SA = [&Strings](const Twine &Arg) { return Strings.save(Arg).data(); };
67 
68   // Synthesize full command line from the CompilerInvocation, including "-cc1".
69   SmallVector<const char *, 32> Args{"-cc1"};
70   CI.generateCC1CommandLine(Args, SA);
71 
72   // Convert arguments to the return type.
73   return std::vector<std::string>{Args.begin(), Args.end()};
74 }
75 
76 std::vector<std::string> ModuleDeps::getCanonicalCommandLine(
77     std::function<StringRef(ModuleID)> LookupPCMPath,
78     std::function<const ModuleDeps &(ModuleID)> LookupModuleDeps) const {
79   CompilerInvocation CI(Invocation);
80   FrontendOptions &FrontendOpts = CI.getFrontendOpts();
81 
82   InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(),
83                                InputKind::Format::ModuleMap);
84   FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind);
85   FrontendOpts.OutputFile = std::string(LookupPCMPath(ID));
86 
87   dependencies::detail::collectPCMAndModuleMapPaths(
88       ClangModuleDeps, LookupPCMPath, LookupModuleDeps,
89       FrontendOpts.ModuleFiles, FrontendOpts.ModuleMapFiles);
90 
91   return serializeCompilerInvocation(CI);
92 }
93 
94 std::vector<std::string>
95 ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const {
96   return serializeCompilerInvocation(Invocation);
97 }
98 
99 void dependencies::detail::collectPCMAndModuleMapPaths(
100     llvm::ArrayRef<ModuleID> Modules,
101     std::function<StringRef(ModuleID)> LookupPCMPath,
102     std::function<const ModuleDeps &(ModuleID)> LookupModuleDeps,
103     std::vector<std::string> &PCMPaths, std::vector<std::string> &ModMapPaths) {
104   llvm::StringSet<> AlreadyAdded;
105 
106   std::function<void(llvm::ArrayRef<ModuleID>)> AddArgs =
107       [&](llvm::ArrayRef<ModuleID> Modules) {
108         for (const ModuleID &MID : Modules) {
109           if (!AlreadyAdded.insert(MID.ModuleName + MID.ContextHash).second)
110             continue;
111           const ModuleDeps &M = LookupModuleDeps(MID);
112           // Depth first traversal.
113           AddArgs(M.ClangModuleDeps);
114           PCMPaths.push_back(LookupPCMPath(MID).str());
115           if (!M.ClangModuleMapFile.empty())
116             ModMapPaths.push_back(M.ClangModuleMapFile);
117         }
118       };
119 
120   AddArgs(Modules);
121 }
122 
123 void ModuleDepCollectorPP::FileChanged(SourceLocation Loc,
124                                        FileChangeReason Reason,
125                                        SrcMgr::CharacteristicKind FileType,
126                                        FileID PrevFID) {
127   if (Reason != PPCallbacks::EnterFile)
128     return;
129 
130   // This has to be delayed as the context hash can change at the start of
131   // `CompilerInstance::ExecuteAction`.
132   if (MDC.ContextHash.empty()) {
133     MDC.ContextHash = Instance.getInvocation().getModuleHash();
134     MDC.Consumer.handleContextHash(MDC.ContextHash);
135   }
136 
137   SourceManager &SM = Instance.getSourceManager();
138 
139   // Dependency generation really does want to go all the way to the
140   // file entry for a source location to find out what is depended on.
141   // We do not want #line markers to affect dependency generation!
142   if (Optional<StringRef> Filename =
143           SM.getNonBuiltinFilenameForID(SM.getFileID(SM.getExpansionLoc(Loc))))
144     MDC.FileDeps.push_back(
145         std::string(llvm::sys::path::remove_leading_dotslash(*Filename)));
146 }
147 
148 void ModuleDepCollectorPP::InclusionDirective(
149     SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
150     bool IsAngled, CharSourceRange FilenameRange, const FileEntry *File,
151     StringRef SearchPath, StringRef RelativePath, const Module *Imported,
152     SrcMgr::CharacteristicKind FileType) {
153   if (!File && !Imported) {
154     // This is a non-modular include that HeaderSearch failed to find. Add it
155     // here as `FileChanged` will never see it.
156     MDC.FileDeps.push_back(std::string(FileName));
157   }
158   handleImport(Imported);
159 }
160 
161 void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc,
162                                         ModuleIdPath Path,
163                                         const Module *Imported) {
164   handleImport(Imported);
165 }
166 
167 void ModuleDepCollectorPP::handleImport(const Module *Imported) {
168   if (!Imported)
169     return;
170 
171   const Module *TopLevelModule = Imported->getTopLevelModule();
172 
173   if (MDC.isPrebuiltModule(TopLevelModule))
174     DirectPrebuiltModularDeps.insert(TopLevelModule);
175   else
176     DirectModularDeps.insert(TopLevelModule);
177 }
178 
179 void ModuleDepCollectorPP::EndOfMainFile() {
180   FileID MainFileID = Instance.getSourceManager().getMainFileID();
181   MDC.MainFile = std::string(
182       Instance.getSourceManager().getFileEntryForID(MainFileID)->getName());
183 
184   if (!Instance.getPreprocessorOpts().ImplicitPCHInclude.empty())
185     MDC.FileDeps.push_back(Instance.getPreprocessorOpts().ImplicitPCHInclude);
186 
187   for (const Module *M : DirectModularDeps) {
188     // A top-level module might not be actually imported as a module when
189     // -fmodule-name is used to compile a translation unit that imports this
190     // module. In that case it can be skipped. The appropriate header
191     // dependencies will still be reported as expected.
192     if (!M->getASTFile())
193       continue;
194     handleTopLevelModule(M);
195   }
196 
197   MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts);
198 
199   for (auto &&I : MDC.ModularDeps)
200     MDC.Consumer.handleModuleDependency(I.second);
201 
202   for (auto &&I : MDC.FileDeps)
203     MDC.Consumer.handleFileDependency(I);
204 
205   for (auto &&I : DirectPrebuiltModularDeps)
206     MDC.Consumer.handlePrebuiltModuleDependency(PrebuiltModuleDep{I});
207 }
208 
209 ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
210   assert(M == M->getTopLevelModule() && "Expected top level module!");
211 
212   // If this module has been handled already, just return its ID.
213   auto ModI = MDC.ModularDeps.insert({M, ModuleDeps{}});
214   if (!ModI.second)
215     return ModI.first->second.ID;
216 
217   ModuleDeps &MD = ModI.first->second;
218 
219   MD.ID.ModuleName = M->getFullModuleName();
220   MD.ImportedByMainFile = DirectModularDeps.contains(M);
221   MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName());
222   MD.IsSystem = M->IsSystem;
223 
224   const FileEntry *ModuleMap = Instance.getPreprocessor()
225                                    .getHeaderSearchInfo()
226                                    .getModuleMap()
227                                    .getModuleMapFileForUniquing(M);
228   MD.ClangModuleMapFile = std::string(ModuleMap ? ModuleMap->getName() : "");
229 
230   serialization::ModuleFile *MF =
231       MDC.Instance.getASTReader()->getModuleManager().lookup(M->getASTFile());
232   MDC.Instance.getASTReader()->visitInputFiles(
233       *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) {
234         // __inferred_module.map is the result of the way in which an implicit
235         // module build handles inferred modules. It adds an overlay VFS with
236         // this file in the proper directory and relies on the rest of Clang to
237         // handle it like normal. With explicitly built modules we don't need
238         // to play VFS tricks, so replace it with the correct module map.
239         if (IF.getFile()->getName().endswith("__inferred_module.map")) {
240           MD.FileDeps.insert(ModuleMap->getName());
241           return;
242         }
243         MD.FileDeps.insert(IF.getFile()->getName());
244       });
245 
246   // Add direct prebuilt module dependencies now, so that we can use them when
247   // creating a CompilerInvocation and computing context hash for this
248   // ModuleDeps instance.
249   llvm::DenseSet<const Module *> SeenModules;
250   addAllSubmodulePrebuiltDeps(M, MD, SeenModules);
251 
252   MD.Invocation = MDC.makeInvocationForModuleBuildWithoutPaths(
253       MD, [&](CompilerInvocation &CI) {
254         if (MDC.OptimizeArgs)
255           optimizeHeaderSearchOpts(CI.getHeaderSearchOpts(),
256                                    *MDC.Instance.getASTReader(), *MF);
257       });
258   MD.ID.ContextHash = MD.Invocation.getModuleHash();
259 
260   llvm::DenseSet<const Module *> AddedModules;
261   addAllSubmoduleDeps(M, MD, AddedModules);
262 
263   return MD.ID;
264 }
265 
266 void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps(
267     const Module *M, ModuleDeps &MD,
268     llvm::DenseSet<const Module *> &SeenSubmodules) {
269   addModulePrebuiltDeps(M, MD, SeenSubmodules);
270 
271   for (const Module *SubM : M->submodules())
272     addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules);
273 }
274 
275 void ModuleDepCollectorPP::addModulePrebuiltDeps(
276     const Module *M, ModuleDeps &MD,
277     llvm::DenseSet<const Module *> &SeenSubmodules) {
278   for (const Module *Import : M->Imports)
279     if (Import->getTopLevelModule() != M->getTopLevelModule())
280       if (MDC.isPrebuiltModule(Import->getTopLevelModule()))
281         if (SeenSubmodules.insert(Import->getTopLevelModule()).second)
282           MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule());
283 }
284 
285 void ModuleDepCollectorPP::addAllSubmoduleDeps(
286     const Module *M, ModuleDeps &MD,
287     llvm::DenseSet<const Module *> &AddedModules) {
288   addModuleDep(M, MD, AddedModules);
289 
290   for (const Module *SubM : M->submodules())
291     addAllSubmoduleDeps(SubM, MD, AddedModules);
292 }
293 
294 void ModuleDepCollectorPP::addModuleDep(
295     const Module *M, ModuleDeps &MD,
296     llvm::DenseSet<const Module *> &AddedModules) {
297   for (const Module *Import : M->Imports) {
298     if (Import->getTopLevelModule() != M->getTopLevelModule() &&
299         !MDC.isPrebuiltModule(Import)) {
300       ModuleID ImportID = handleTopLevelModule(Import->getTopLevelModule());
301       if (AddedModules.insert(Import->getTopLevelModule()).second)
302         MD.ClangModuleDeps.push_back(ImportID);
303     }
304   }
305 }
306 
307 ModuleDepCollector::ModuleDepCollector(
308     std::unique_ptr<DependencyOutputOptions> Opts, CompilerInstance &I,
309     DependencyConsumer &C, CompilerInvocation &&OriginalCI, bool OptimizeArgs)
310     : Instance(I), Consumer(C), Opts(std::move(Opts)),
311       OriginalInvocation(std::move(OriginalCI)), OptimizeArgs(OptimizeArgs) {}
312 
313 void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) {
314   PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(Instance, *this));
315 }
316 
317 void ModuleDepCollector::attachToASTReader(ASTReader &R) {}
318 
319 bool ModuleDepCollector::isPrebuiltModule(const Module *M) {
320   std::string Name(M->getTopLevelModuleName());
321   const auto &PrebuiltModuleFiles =
322       Instance.getHeaderSearchOpts().PrebuiltModuleFiles;
323   auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name);
324   if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end())
325     return false;
326   assert("Prebuilt module came from the expected AST file" &&
327          PrebuiltModuleFileIt->second == M->getASTFile()->getName());
328   return true;
329 }
330