1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class for loading and validating a module map or
11 // header list by checking that all headers in the corresponding directories
12 // are accounted for.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Driver/Options.h"
18 #include "clang/Frontend/CompilerInstance.h"
19 #include "clang/Frontend/FrontendActions.h"
20 #include "CoverageChecker.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Support/FileUtilities.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "ModularizeUtilities.h"
27 
28 using namespace clang;
29 using namespace llvm;
30 using namespace Modularize;
31 
32 namespace {
33 // Subclass TargetOptions so we can construct it inline with
34 // the minimal option, the triple.
35 class ModuleMapTargetOptions : public clang::TargetOptions {
36 public:
37   ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); }
38 };
39 } // namespace
40 
41 // ModularizeUtilities class implementation.
42 
43 // Constructor.
44 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths,
45                                          llvm::StringRef Prefix)
46   : InputFilePaths(InputPaths),
47     HeaderPrefix(Prefix),
48     HasModuleMap(false),
49     // Init clang stuff needed for loading the module map and preprocessing.
50     LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
51     DiagnosticOpts(new DiagnosticOptions()),
52     DC(llvm::errs(), DiagnosticOpts.get()),
53     Diagnostics(
54     new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)),
55     TargetOpts(new ModuleMapTargetOptions()),
56     Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)),
57     FileMgr(new FileManager(FileSystemOpts)),
58     SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)),
59     HeaderSearchOpts(new HeaderSearchOptions()),
60     HeaderInfo(new HeaderSearch(HeaderSearchOpts, *SourceMgr, *Diagnostics,
61     *LangOpts, Target.get())) {
62 }
63 
64 // Create instance of ModularizeUtilities, to simplify setting up
65 // subordinate objects.
66 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities(
67     std::vector<std::string> &InputPaths, llvm::StringRef Prefix) {
68 
69   return new ModularizeUtilities(InputPaths, Prefix);
70 }
71 
72 // Load all header lists and dependencies.
73 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() {
74   typedef std::vector<std::string>::iterator Iter;
75   // For each input file.
76   for (Iter I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) {
77     llvm::StringRef InputPath = *I;
78     // If it's a module map.
79     if (InputPath.endswith(".modulemap")) {
80       // Load the module map.
81       if (std::error_code EC = loadModuleMap(InputPath))
82         return EC;
83     }
84     else {
85       // Else we assume it's a header list and load it.
86       if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) {
87         errs() << "modularize: error: Unable to get header list '" << InputPath
88           << "': " << EC.message() << '\n';
89         return EC;
90       }
91     }
92   }
93   return std::error_code();
94 }
95 
96 // Do coverage checks.
97 // For each loaded module map, do header coverage check.
98 // Starting from the directory of the module.map file,
99 // Find all header files, optionally looking only at files
100 // covered by the include path options, and compare against
101 // the headers referenced by the module.map file.
102 // Display warnings for unaccounted-for header files.
103 // Returns 0 if there were no errors or warnings, 1 if there
104 // were warnings, 2 if any other problem, such as a bad
105 // module map path argument was specified.
106 std::error_code ModularizeUtilities::doCoverageCheck(
107     std::vector<std::string> &IncludePaths,
108     llvm::ArrayRef<std::string> CommandLine) {
109   int ModuleMapCount = ModuleMaps.size();
110   int ModuleMapIndex;
111   std::error_code EC;
112   for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) {
113     std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex];
114     CoverageChecker *Checker = CoverageChecker::createCoverageChecker(
115       InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine, ModMap.get());
116     std::error_code LocalEC = Checker->doChecks();
117     if (LocalEC.value() > 0)
118       EC = LocalEC;
119   }
120   return EC;
121 }
122 
123 // Load single header list and dependencies.
124 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies(
125     llvm::StringRef InputPath) {
126 
127   // By default, use the path component of the list file name.
128   SmallString<256> HeaderDirectory(InputPath);
129   llvm::sys::path::remove_filename(HeaderDirectory);
130   SmallString<256> CurrentDirectory;
131   llvm::sys::fs::current_path(CurrentDirectory);
132 
133   // Get the prefix if we have one.
134   if (HeaderPrefix.size() != 0)
135     HeaderDirectory = HeaderPrefix;
136 
137   // Read the header list file into a buffer.
138   ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
139     MemoryBuffer::getFile(InputPath);
140   if (std::error_code EC = listBuffer.getError())
141     return EC;
142 
143   // Parse the header list into strings.
144   SmallVector<StringRef, 32> Strings;
145   listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
146 
147   // Collect the header file names from the string list.
148   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
149     E = Strings.end();
150     I != E; ++I) {
151     StringRef Line = I->trim();
152     // Ignore comments and empty lines.
153     if (Line.empty() || (Line[0] == '#'))
154       continue;
155     std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':');
156     SmallString<256> HeaderFileName;
157     // Prepend header file name prefix if it's not absolute.
158     if (llvm::sys::path::is_absolute(TargetAndDependents.first))
159       llvm::sys::path::native(TargetAndDependents.first, HeaderFileName);
160     else {
161       if (HeaderDirectory.size() != 0)
162         HeaderFileName = HeaderDirectory;
163       else
164         HeaderFileName = CurrentDirectory;
165       llvm::sys::path::append(HeaderFileName, TargetAndDependents.first);
166       llvm::sys::path::native(HeaderFileName);
167     }
168     // Handle optional dependencies.
169     DependentsVector Dependents;
170     SmallVector<StringRef, 4> DependentsList;
171     TargetAndDependents.second.split(DependentsList, " ", -1, false);
172     int Count = DependentsList.size();
173     for (int Index = 0; Index < Count; ++Index) {
174       SmallString<256> Dependent;
175       if (llvm::sys::path::is_absolute(DependentsList[Index]))
176         Dependent = DependentsList[Index];
177       else {
178         if (HeaderDirectory.size() != 0)
179           Dependent = HeaderDirectory;
180         else
181           Dependent = CurrentDirectory;
182         llvm::sys::path::append(Dependent, DependentsList[Index]);
183       }
184       llvm::sys::path::native(Dependent);
185       Dependents.push_back(getCanonicalPath(Dependent.str()));
186     }
187     // Get canonical form.
188     HeaderFileName = getCanonicalPath(HeaderFileName);
189     // Save the resulting header file path and dependencies.
190     HeaderFileNames.push_back(HeaderFileName.str());
191     Dependencies[HeaderFileName.str()] = Dependents;
192   }
193   return std::error_code();
194 }
195 
196 // Load single module map and extract header file list.
197 std::error_code ModularizeUtilities::loadModuleMap(
198     llvm::StringRef InputPath) {
199   // Get file entry for module.modulemap file.
200   const FileEntry *ModuleMapEntry =
201     SourceMgr->getFileManager().getFile(InputPath);
202 
203   // return error if not found.
204   if (!ModuleMapEntry) {
205     llvm::errs() << "error: File \"" << InputPath << "\" not found.\n";
206     return std::error_code(1, std::generic_category());
207   }
208 
209   // Because the module map parser uses a ForwardingDiagnosticConsumer,
210   // which doesn't forward the BeginSourceFile call, we do it explicitly here.
211   DC.BeginSourceFile(*LangOpts, nullptr);
212 
213   // Figure out the home directory for the module map file.
214   const DirectoryEntry *Dir = ModuleMapEntry->getDir();
215   StringRef DirName(Dir->getName());
216   if (llvm::sys::path::filename(DirName) == "Modules") {
217     DirName = llvm::sys::path::parent_path(DirName);
218     if (DirName.endswith(".framework"))
219       Dir = FileMgr->getDirectory(DirName);
220     // FIXME: This assert can fail if there's a race between the above check
221     // and the removal of the directory.
222     assert(Dir && "parent must exist");
223   }
224 
225   std::unique_ptr<ModuleMap> ModMap;
226   ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts,
227     Target.get(), *HeaderInfo));
228 
229   // Parse module.modulemap file into module map.
230   if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) {
231     return std::error_code(1, std::generic_category());
232   }
233 
234   // Do matching end call.
235   DC.EndSourceFile();
236 
237   if (!collectModuleMapHeaders(ModMap.get()))
238     return std::error_code(1, std::generic_category());
239 
240   // Save module map.
241   ModuleMaps.push_back(std::move(ModMap));
242 
243   // Indicate we are using module maps.
244   HasModuleMap = true;
245 
246   return std::error_code();
247 }
248 
249 // Collect module map headers.
250 // Walks the modules and collects referenced headers into
251 // HeaderFileNames.
252 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) {
253   for (ModuleMap::module_iterator I = ModMap->module_begin(),
254     E = ModMap->module_end();
255     I != E; ++I) {
256     if (!collectModuleHeaders(*I->second))
257       return false;
258   }
259   return true;
260 }
261 
262 // Collect referenced headers from one module.
263 // Collects the headers referenced in the given module into
264 // HeaderFileNames.
265 bool ModularizeUtilities::collectModuleHeaders(const Module &Mod) {
266 
267   // Ignore explicit modules because they often have dependencies
268   // we can't know.
269   if (Mod.IsExplicit)
270     return true;
271 
272   // Treat headers in umbrella directory as dependencies.
273   DependentsVector UmbrellaDependents;
274 
275   // Recursively do submodules.
276   for (Module::submodule_const_iterator MI = Mod.submodule_begin(),
277       MIEnd = Mod.submodule_end();
278       MI != MIEnd; ++MI)
279     collectModuleHeaders(**MI);
280 
281   if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader()) {
282     std::string HeaderPath = getCanonicalPath(UmbrellaHeader->getName());
283     // Collect umbrella header.
284     HeaderFileNames.push_back(HeaderPath);
285 
286     // FUTURE: When needed, umbrella header header collection goes here.
287   }
288   else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir()) {
289     // If there normal headers, assume these are umbrellas and skip collection.
290     if (Mod.Headers->size() == 0) {
291       // Collect headers in umbrella directory.
292       if (!collectUmbrellaHeaders(UmbrellaDir->getName(), UmbrellaDependents))
293         return false;
294     }
295   }
296 
297   // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
298   // assuming they are marked as such either because of unsuitability for
299   // modules or because they are meant to be included by another header,
300   // and thus should be ignored by modularize.
301 
302   int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size();
303 
304   for (int Index = 0; Index < NormalHeaderCount; ++Index) {
305     DependentsVector NormalDependents;
306     // Collect normal header.
307     const clang::Module::Header &Header(
308       Mod.Headers[clang::Module::HK_Normal][Index]);
309     std::string HeaderPath = getCanonicalPath(Header.Entry->getName());
310     HeaderFileNames.push_back(HeaderPath);
311   }
312 
313   return true;
314 }
315 
316 // Collect headers from an umbrella directory.
317 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName,
318   DependentsVector &Dependents) {
319   // Initialize directory name.
320   SmallString<256> Directory(UmbrellaDirName);
321   // Walk the directory.
322   std::error_code EC;
323   llvm::sys::fs::file_status Status;
324   for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E;
325     I.increment(EC)) {
326     if (EC)
327       return false;
328     std::string File(I->path());
329     I->status(Status);
330     llvm::sys::fs::file_type Type = Status.type();
331     // If the file is a directory, ignore the name and recurse.
332     if (Type == llvm::sys::fs::file_type::directory_file) {
333       if (!collectUmbrellaHeaders(File, Dependents))
334         return false;
335       continue;
336     }
337     // If the file does not have a common header extension, ignore it.
338     if (!isHeader(File))
339       continue;
340     // Save header name.
341     std::string HeaderPath = getCanonicalPath(File);
342     Dependents.push_back(HeaderPath);
343   }
344   return true;
345 }
346 
347 // Replace .. embedded in path for purposes of having
348 // a canonical path.
349 static std::string replaceDotDot(StringRef Path) {
350   SmallString<128> Buffer;
351   llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path),
352     E = llvm::sys::path::end(Path);
353   while (B != E) {
354     if (B->compare(".") == 0) {
355     }
356     else if (B->compare("..") == 0)
357       llvm::sys::path::remove_filename(Buffer);
358     else
359       llvm::sys::path::append(Buffer, *B);
360     ++B;
361   }
362   if (Path.endswith("/") || Path.endswith("\\"))
363     Buffer.append(1, Path.back());
364   return Buffer.c_str();
365 }
366 
367 // Convert header path to canonical form.
368 // The canonical form is basically just use forward slashes, and remove "./".
369 // \param FilePath The file path, relative to the module map directory.
370 // \returns The file path in canonical form.
371 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) {
372   std::string Tmp(replaceDotDot(FilePath));
373   std::replace(Tmp.begin(), Tmp.end(), '\\', '/');
374   StringRef Tmp2(Tmp);
375   if (Tmp2.startswith("./"))
376     Tmp = Tmp2.substr(2);
377   return Tmp;
378 }
379 
380 // Check for header file extension.
381 // If the file extension is .h, .inc, or missing, it's
382 // assumed to be a header.
383 // \param FileName The file name.  Must not be a directory.
384 // \returns true if it has a header extension or no extension.
385 bool ModularizeUtilities::isHeader(StringRef FileName) {
386   StringRef Extension = llvm::sys::path::extension(FileName);
387   if (Extension.size() == 0)
388     return false;
389   if (Extension.equals_lower(".h"))
390     return true;
391   if (Extension.equals_lower(".inc"))
392     return true;
393   return false;
394 }
395 
396 // Get directory path component from file path.
397 // \returns the component of the given path, which will be
398 // relative if the given path is relative, absolute if the
399 // given path is absolute, or "." if the path has no leading
400 // path component.
401 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) {
402   SmallString<256> Directory(Path);
403   sys::path::remove_filename(Directory);
404   if (Directory.size() == 0)
405     return ".";
406   return Directory.str();
407 }
408