1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class for loading and validating a module map or
11 // header list by checking that all headers in the corresponding directories
12 // are accounted for.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Driver/Options.h"
18 #include "clang/Frontend/CompilerInstance.h"
19 #include "clang/Frontend/FrontendActions.h"
20 #include "CoverageChecker.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Support/FileUtilities.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "ModularizeUtilities.h"
27 
28 using namespace clang;
29 using namespace llvm;
30 using namespace Modularize;
31 
32 namespace {
33 // Subclass TargetOptions so we can construct it inline with
34 // the minimal option, the triple.
35 class ModuleMapTargetOptions : public clang::TargetOptions {
36 public:
37   ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); }
38 };
39 } // namespace
40 
41 // ModularizeUtilities class implementation.
42 
43 // Constructor.
44 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths,
45                                          llvm::StringRef Prefix,
46                                          llvm::StringRef ProblemFilesListPath)
47   : InputFilePaths(InputPaths),
48     HeaderPrefix(Prefix),
49     ProblemFilesPath(ProblemFilesListPath),
50     HasModuleMap(false),
51     MissingHeaderCount(0),
52     // Init clang stuff needed for loading the module map and preprocessing.
53     LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
54     DiagnosticOpts(new DiagnosticOptions()),
55     DC(llvm::errs(), DiagnosticOpts.get()),
56     Diagnostics(
57     new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)),
58     TargetOpts(new ModuleMapTargetOptions()),
59     Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)),
60     FileMgr(new FileManager(FileSystemOpts)),
61     SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)),
62     HeaderSearchOpts(new HeaderSearchOptions()),
63     HeaderInfo(new HeaderSearch(HeaderSearchOpts, *SourceMgr, *Diagnostics,
64     *LangOpts, Target.get())) {
65 }
66 
67 // Create instance of ModularizeUtilities, to simplify setting up
68 // subordinate objects.
69 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities(
70     std::vector<std::string> &InputPaths, llvm::StringRef Prefix,
71     llvm::StringRef ProblemFilesListPath) {
72 
73   return new ModularizeUtilities(InputPaths, Prefix, ProblemFilesListPath);
74 }
75 
76 // Load all header lists and dependencies.
77 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() {
78   typedef std::vector<std::string>::iterator Iter;
79   // For each input file.
80   for (Iter I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) {
81     llvm::StringRef InputPath = *I;
82     // If it's a module map.
83     if (InputPath.endswith(".modulemap")) {
84       // Load the module map.
85       if (std::error_code EC = loadModuleMap(InputPath))
86         return EC;
87     }
88     else {
89       // Else we assume it's a header list and load it.
90       if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) {
91         errs() << "modularize: error: Unable to get header list '" << InputPath
92           << "': " << EC.message() << '\n';
93         return EC;
94       }
95     }
96   }
97   // If we have a problem files list.
98   if (ProblemFilesPath.size() != 0) {
99     // Load problem files list.
100     if (std::error_code EC = loadProblemHeaderList(ProblemFilesPath)) {
101       errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath
102         << "': " << EC.message() << '\n';
103       return EC;
104     }
105   }
106   return std::error_code();
107 }
108 
109 // Do coverage checks.
110 // For each loaded module map, do header coverage check.
111 // Starting from the directory of the module.map file,
112 // Find all header files, optionally looking only at files
113 // covered by the include path options, and compare against
114 // the headers referenced by the module.map file.
115 // Display warnings for unaccounted-for header files.
116 // Returns 0 if there were no errors or warnings, 1 if there
117 // were warnings, 2 if any other problem, such as a bad
118 // module map path argument was specified.
119 std::error_code ModularizeUtilities::doCoverageCheck(
120     std::vector<std::string> &IncludePaths,
121     llvm::ArrayRef<std::string> CommandLine) {
122   int ModuleMapCount = ModuleMaps.size();
123   int ModuleMapIndex;
124   std::error_code EC;
125   for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) {
126     std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex];
127     CoverageChecker *Checker = CoverageChecker::createCoverageChecker(
128       InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine, ModMap.get());
129     std::error_code LocalEC = Checker->doChecks();
130     if (LocalEC.value() > 0)
131       EC = LocalEC;
132   }
133   return EC;
134 }
135 
136 // Load single header list and dependencies.
137 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies(
138     llvm::StringRef InputPath) {
139 
140   // By default, use the path component of the list file name.
141   SmallString<256> HeaderDirectory(InputPath);
142   llvm::sys::path::remove_filename(HeaderDirectory);
143   SmallString<256> CurrentDirectory;
144   llvm::sys::fs::current_path(CurrentDirectory);
145 
146   // Get the prefix if we have one.
147   if (HeaderPrefix.size() != 0)
148     HeaderDirectory = HeaderPrefix;
149 
150   // Read the header list file into a buffer.
151   ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
152     MemoryBuffer::getFile(InputPath);
153   if (std::error_code EC = listBuffer.getError())
154     return EC;
155 
156   // Parse the header list into strings.
157   SmallVector<StringRef, 32> Strings;
158   listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
159 
160   // Collect the header file names from the string list.
161   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
162     E = Strings.end();
163     I != E; ++I) {
164     StringRef Line = I->trim();
165     // Ignore comments and empty lines.
166     if (Line.empty() || (Line[0] == '#'))
167       continue;
168     std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':');
169     SmallString<256> HeaderFileName;
170     // Prepend header file name prefix if it's not absolute.
171     if (llvm::sys::path::is_absolute(TargetAndDependents.first))
172       llvm::sys::path::native(TargetAndDependents.first, HeaderFileName);
173     else {
174       if (HeaderDirectory.size() != 0)
175         HeaderFileName = HeaderDirectory;
176       else
177         HeaderFileName = CurrentDirectory;
178       llvm::sys::path::append(HeaderFileName, TargetAndDependents.first);
179       llvm::sys::path::native(HeaderFileName);
180     }
181     // Handle optional dependencies.
182     DependentsVector Dependents;
183     SmallVector<StringRef, 4> DependentsList;
184     TargetAndDependents.second.split(DependentsList, " ", -1, false);
185     int Count = DependentsList.size();
186     for (int Index = 0; Index < Count; ++Index) {
187       SmallString<256> Dependent;
188       if (llvm::sys::path::is_absolute(DependentsList[Index]))
189         Dependent = DependentsList[Index];
190       else {
191         if (HeaderDirectory.size() != 0)
192           Dependent = HeaderDirectory;
193         else
194           Dependent = CurrentDirectory;
195         llvm::sys::path::append(Dependent, DependentsList[Index]);
196       }
197       llvm::sys::path::native(Dependent);
198       Dependents.push_back(getCanonicalPath(Dependent.str()));
199     }
200     // Get canonical form.
201     HeaderFileName = getCanonicalPath(HeaderFileName);
202     // Save the resulting header file path and dependencies.
203     HeaderFileNames.push_back(HeaderFileName.str());
204     Dependencies[HeaderFileName.str()] = Dependents;
205   }
206   return std::error_code();
207 }
208 
209 // Load problem header list.
210 std::error_code ModularizeUtilities::loadProblemHeaderList(
211   llvm::StringRef InputPath) {
212 
213   // By default, use the path component of the list file name.
214   SmallString<256> HeaderDirectory(InputPath);
215   llvm::sys::path::remove_filename(HeaderDirectory);
216   SmallString<256> CurrentDirectory;
217   llvm::sys::fs::current_path(CurrentDirectory);
218 
219   // Get the prefix if we have one.
220   if (HeaderPrefix.size() != 0)
221     HeaderDirectory = HeaderPrefix;
222 
223   // Read the header list file into a buffer.
224   ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
225     MemoryBuffer::getFile(InputPath);
226   if (std::error_code EC = listBuffer.getError())
227     return EC;
228 
229   // Parse the header list into strings.
230   SmallVector<StringRef, 32> Strings;
231   listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
232 
233   // Collect the header file names from the string list.
234   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
235     E = Strings.end();
236     I != E; ++I) {
237     StringRef Line = I->trim();
238     // Ignore comments and empty lines.
239     if (Line.empty() || (Line[0] == '#'))
240       continue;
241     SmallString<256> HeaderFileName;
242     // Prepend header file name prefix if it's not absolute.
243     if (llvm::sys::path::is_absolute(Line))
244       llvm::sys::path::native(Line, HeaderFileName);
245     else {
246       if (HeaderDirectory.size() != 0)
247         HeaderFileName = HeaderDirectory;
248       else
249         HeaderFileName = CurrentDirectory;
250       llvm::sys::path::append(HeaderFileName, Line);
251       llvm::sys::path::native(HeaderFileName);
252     }
253     // Get canonical form.
254     HeaderFileName = getCanonicalPath(HeaderFileName);
255     // Save the resulting header file path.
256     ProblemFileNames.push_back(HeaderFileName.str());
257   }
258   return std::error_code();
259 }
260 
261 // Load single module map and extract header file list.
262 std::error_code ModularizeUtilities::loadModuleMap(
263     llvm::StringRef InputPath) {
264   // Get file entry for module.modulemap file.
265   const FileEntry *ModuleMapEntry =
266     SourceMgr->getFileManager().getFile(InputPath);
267 
268   // return error if not found.
269   if (!ModuleMapEntry) {
270     llvm::errs() << "error: File \"" << InputPath << "\" not found.\n";
271     return std::error_code(1, std::generic_category());
272   }
273 
274   // Because the module map parser uses a ForwardingDiagnosticConsumer,
275   // which doesn't forward the BeginSourceFile call, we do it explicitly here.
276   DC.BeginSourceFile(*LangOpts, nullptr);
277 
278   // Figure out the home directory for the module map file.
279   const DirectoryEntry *Dir = ModuleMapEntry->getDir();
280   StringRef DirName(Dir->getName());
281   if (llvm::sys::path::filename(DirName) == "Modules") {
282     DirName = llvm::sys::path::parent_path(DirName);
283     if (DirName.endswith(".framework"))
284       Dir = FileMgr->getDirectory(DirName);
285     // FIXME: This assert can fail if there's a race between the above check
286     // and the removal of the directory.
287     assert(Dir && "parent must exist");
288   }
289 
290   std::unique_ptr<ModuleMap> ModMap;
291   ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts,
292     Target.get(), *HeaderInfo));
293 
294   // Parse module.modulemap file into module map.
295   if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) {
296     return std::error_code(1, std::generic_category());
297   }
298 
299   // Do matching end call.
300   DC.EndSourceFile();
301 
302   // Reset missing header count.
303   MissingHeaderCount = 0;
304 
305   if (!collectModuleMapHeaders(ModMap.get()))
306     return std::error_code(1, std::generic_category());
307 
308   // Save module map.
309   ModuleMaps.push_back(std::move(ModMap));
310 
311   // Indicate we are using module maps.
312   HasModuleMap = true;
313 
314   // Return code of 1 for missing headers.
315   if (MissingHeaderCount)
316     return std::error_code(1, std::generic_category());
317 
318   return std::error_code();
319 }
320 
321 // Collect module map headers.
322 // Walks the modules and collects referenced headers into
323 // HeaderFileNames.
324 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) {
325   for (ModuleMap::module_iterator I = ModMap->module_begin(),
326     E = ModMap->module_end();
327     I != E; ++I) {
328     if (!collectModuleHeaders(*I->second))
329       return false;
330   }
331   return true;
332 }
333 
334 // Collect referenced headers from one module.
335 // Collects the headers referenced in the given module into
336 // HeaderFileNames.
337 bool ModularizeUtilities::collectModuleHeaders(const Module &Mod) {
338 
339   // Ignore explicit modules because they often have dependencies
340   // we can't know.
341   if (Mod.IsExplicit)
342     return true;
343 
344   // Treat headers in umbrella directory as dependencies.
345   DependentsVector UmbrellaDependents;
346 
347   // Recursively do submodules.
348   for (Module::submodule_const_iterator MI = Mod.submodule_begin(),
349       MIEnd = Mod.submodule_end();
350       MI != MIEnd; ++MI)
351     collectModuleHeaders(**MI);
352 
353   if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader().Entry) {
354     std::string HeaderPath = getCanonicalPath(UmbrellaHeader->getName());
355     // Collect umbrella header.
356     HeaderFileNames.push_back(HeaderPath);
357 
358     // FUTURE: When needed, umbrella header header collection goes here.
359   }
360   else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir().Entry) {
361     // If there normal headers, assume these are umbrellas and skip collection.
362     if (Mod.Headers->size() == 0) {
363       // Collect headers in umbrella directory.
364       if (!collectUmbrellaHeaders(UmbrellaDir->getName(), UmbrellaDependents))
365         return false;
366     }
367   }
368 
369   // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
370   // assuming they are marked as such either because of unsuitability for
371   // modules or because they are meant to be included by another header,
372   // and thus should be ignored by modularize.
373 
374   int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size();
375 
376   for (int Index = 0; Index < NormalHeaderCount; ++Index) {
377     DependentsVector NormalDependents;
378     // Collect normal header.
379     const clang::Module::Header &Header(
380       Mod.Headers[clang::Module::HK_Normal][Index]);
381     std::string HeaderPath = getCanonicalPath(Header.Entry->getName());
382     HeaderFileNames.push_back(HeaderPath);
383   }
384 
385   int MissingCountThisModule = Mod.MissingHeaders.size();
386 
387   for (int Index = 0; Index < MissingCountThisModule; ++Index) {
388     std::string MissingFile = Mod.MissingHeaders[Index].FileName;
389     SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc;
390     errs() << Loc.printToString(*SourceMgr)
391       << ": error : Header not found: " << MissingFile << "\n";
392   }
393 
394   MissingHeaderCount += MissingCountThisModule;
395 
396   return true;
397 }
398 
399 // Collect headers from an umbrella directory.
400 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName,
401   DependentsVector &Dependents) {
402   // Initialize directory name.
403   SmallString<256> Directory(UmbrellaDirName);
404   // Walk the directory.
405   std::error_code EC;
406   llvm::sys::fs::file_status Status;
407   for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E;
408     I.increment(EC)) {
409     if (EC)
410       return false;
411     std::string File(I->path());
412     I->status(Status);
413     llvm::sys::fs::file_type Type = Status.type();
414     // If the file is a directory, ignore the name and recurse.
415     if (Type == llvm::sys::fs::file_type::directory_file) {
416       if (!collectUmbrellaHeaders(File, Dependents))
417         return false;
418       continue;
419     }
420     // If the file does not have a common header extension, ignore it.
421     if (!isHeader(File))
422       continue;
423     // Save header name.
424     std::string HeaderPath = getCanonicalPath(File);
425     Dependents.push_back(HeaderPath);
426   }
427   return true;
428 }
429 
430 // Replace .. embedded in path for purposes of having
431 // a canonical path.
432 static std::string replaceDotDot(StringRef Path) {
433   SmallString<128> Buffer;
434   llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path),
435     E = llvm::sys::path::end(Path);
436   while (B != E) {
437     if (B->compare(".") == 0) {
438     }
439     else if (B->compare("..") == 0)
440       llvm::sys::path::remove_filename(Buffer);
441     else
442       llvm::sys::path::append(Buffer, *B);
443     ++B;
444   }
445   if (Path.endswith("/") || Path.endswith("\\"))
446     Buffer.append(1, Path.back());
447   return Buffer.c_str();
448 }
449 
450 // Convert header path to canonical form.
451 // The canonical form is basically just use forward slashes, and remove "./".
452 // \param FilePath The file path, relative to the module map directory.
453 // \returns The file path in canonical form.
454 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) {
455   std::string Tmp(replaceDotDot(FilePath));
456   std::replace(Tmp.begin(), Tmp.end(), '\\', '/');
457   StringRef Tmp2(Tmp);
458   if (Tmp2.startswith("./"))
459     Tmp = Tmp2.substr(2);
460   return Tmp;
461 }
462 
463 // Check for header file extension.
464 // If the file extension is .h, .inc, or missing, it's
465 // assumed to be a header.
466 // \param FileName The file name.  Must not be a directory.
467 // \returns true if it has a header extension or no extension.
468 bool ModularizeUtilities::isHeader(StringRef FileName) {
469   StringRef Extension = llvm::sys::path::extension(FileName);
470   if (Extension.size() == 0)
471     return true;
472   if (Extension.equals_lower(".h"))
473     return true;
474   if (Extension.equals_lower(".inc"))
475     return true;
476   return false;
477 }
478 
479 // Get directory path component from file path.
480 // \returns the component of the given path, which will be
481 // relative if the given path is relative, absolute if the
482 // given path is absolute, or "." if the path has no leading
483 // path component.
484 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) {
485   SmallString<256> Directory(Path);
486   sys::path::remove_filename(Directory);
487   if (Directory.size() == 0)
488     return ".";
489   return Directory.str();
490 }
491 
492 // Add unique problem file.
493 // Also standardizes the path.
494 void ModularizeUtilities::addUniqueProblemFile(std::string FilePath) {
495   FilePath = getCanonicalPath(FilePath);
496   // Don't add if already present.
497   for(auto &TestFilePath : ProblemFileNames) {
498     if (TestFilePath == FilePath)
499       return;
500   }
501   ProblemFileNames.push_back(FilePath);
502 }
503 
504 // Add file with no compile errors.
505 // Also standardizes the path.
506 void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath) {
507   FilePath = getCanonicalPath(FilePath);
508   GoodFileNames.push_back(FilePath);
509 }
510 
511 // List problem files.
512 void ModularizeUtilities::displayProblemFiles() {
513   errs() << "\nThese are the files with possible errors:\n\n";
514   for (auto &ProblemFile : ProblemFileNames) {
515     errs() << ProblemFile << "\n";
516   }
517 }
518 
519 // List files with no problems.
520 void ModularizeUtilities::displayGoodFiles() {
521   errs() << "\nThese are the files with no detected errors:\n\n";
522   for (auto &GoodFile : HeaderFileNames) {
523     bool Good = true;
524     for (auto &ProblemFile : ProblemFileNames) {
525       if (ProblemFile == GoodFile) {
526         Good = false;
527         break;
528       }
529     }
530     if (Good)
531       errs() << GoodFile << "\n";
532   }
533 }
534 
535 // List files with problem files commented out.
536 void ModularizeUtilities::displayCombinedFiles() {
537   errs() <<
538     "\nThese are the combined files, with problem files preceded by #:\n\n";
539   for (auto &File : HeaderFileNames) {
540     bool Good = true;
541     for (auto &ProblemFile : ProblemFileNames) {
542       if (ProblemFile == File) {
543         Good = false;
544         break;
545       }
546     }
547     errs() << (Good ? "" : "#") << File << "\n";
548   }
549 }
550