1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class for loading and validating a module map or
11 // header list by checking that all headers in the corresponding directories
12 // are accounted for.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Driver/Options.h"
18 #include "clang/Frontend/CompilerInstance.h"
19 #include "clang/Frontend/FrontendActions.h"
20 #include "CoverageChecker.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Support/FileUtilities.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "ModularizeUtilities.h"
27 
28 using namespace clang;
29 using namespace llvm;
30 using namespace Modularize;
31 
32 namespace {
33 // Subclass TargetOptions so we can construct it inline with
34 // the minimal option, the triple.
35 class ModuleMapTargetOptions : public clang::TargetOptions {
36 public:
37   ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); }
38 };
39 } // namespace
40 
41 // ModularizeUtilities class implementation.
42 
43 // Constructor.
44 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths,
45                                          llvm::StringRef Prefix,
46                                          llvm::StringRef ProblemFilesListPath)
47   : InputFilePaths(InputPaths),
48     HeaderPrefix(Prefix),
49     ProblemFilesPath(ProblemFilesListPath),
50     HasModuleMap(false),
51     MissingHeaderCount(0),
52     // Init clang stuff needed for loading the module map and preprocessing.
53     LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
54     DiagnosticOpts(new DiagnosticOptions()),
55     DC(llvm::errs(), DiagnosticOpts.get()),
56     Diagnostics(
57     new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)),
58     TargetOpts(new ModuleMapTargetOptions()),
59     Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)),
60     FileMgr(new FileManager(FileSystemOpts)),
61     SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)),
62     HeaderSearchOpts(new HeaderSearchOptions()),
63     HeaderInfo(new HeaderSearch(HeaderSearchOpts, *SourceMgr, *Diagnostics,
64     *LangOpts, Target.get())) {
65 }
66 
67 // Create instance of ModularizeUtilities, to simplify setting up
68 // subordinate objects.
69 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities(
70     std::vector<std::string> &InputPaths, llvm::StringRef Prefix,
71     llvm::StringRef ProblemFilesListPath) {
72 
73   return new ModularizeUtilities(InputPaths, Prefix, ProblemFilesListPath);
74 }
75 
76 // Load all header lists and dependencies.
77 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() {
78   // For each input file.
79   for (auto I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) {
80     llvm::StringRef InputPath = *I;
81     // If it's a module map.
82     if (InputPath.endswith(".modulemap")) {
83       // Load the module map.
84       if (std::error_code EC = loadModuleMap(InputPath))
85         return EC;
86     }
87     else {
88       // Else we assume it's a header list and load it.
89       if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) {
90         errs() << "modularize: error: Unable to get header list '" << InputPath
91           << "': " << EC.message() << '\n';
92         return EC;
93       }
94     }
95   }
96   // If we have a problem files list.
97   if (ProblemFilesPath.size() != 0) {
98     // Load problem files list.
99     if (std::error_code EC = loadProblemHeaderList(ProblemFilesPath)) {
100       errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath
101         << "': " << EC.message() << '\n';
102       return EC;
103     }
104   }
105   return std::error_code();
106 }
107 
108 // Do coverage checks.
109 // For each loaded module map, do header coverage check.
110 // Starting from the directory of the module.map file,
111 // Find all header files, optionally looking only at files
112 // covered by the include path options, and compare against
113 // the headers referenced by the module.map file.
114 // Display warnings for unaccounted-for header files.
115 // Returns 0 if there were no errors or warnings, 1 if there
116 // were warnings, 2 if any other problem, such as a bad
117 // module map path argument was specified.
118 std::error_code ModularizeUtilities::doCoverageCheck(
119     std::vector<std::string> &IncludePaths,
120     llvm::ArrayRef<std::string> CommandLine) {
121   int ModuleMapCount = ModuleMaps.size();
122   int ModuleMapIndex;
123   std::error_code EC;
124   for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) {
125     std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex];
126     CoverageChecker *Checker = CoverageChecker::createCoverageChecker(
127       InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine, ModMap.get());
128     std::error_code LocalEC = Checker->doChecks();
129     if (LocalEC.value() > 0)
130       EC = LocalEC;
131   }
132   return EC;
133 }
134 
135 // Load single header list and dependencies.
136 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies(
137     llvm::StringRef InputPath) {
138 
139   // By default, use the path component of the list file name.
140   SmallString<256> HeaderDirectory(InputPath);
141   llvm::sys::path::remove_filename(HeaderDirectory);
142   SmallString<256> CurrentDirectory;
143   llvm::sys::fs::current_path(CurrentDirectory);
144 
145   // Get the prefix if we have one.
146   if (HeaderPrefix.size() != 0)
147     HeaderDirectory = HeaderPrefix;
148 
149   // Read the header list file into a buffer.
150   ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
151     MemoryBuffer::getFile(InputPath);
152   if (std::error_code EC = listBuffer.getError())
153     return EC;
154 
155   // Parse the header list into strings.
156   SmallVector<StringRef, 32> Strings;
157   listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
158 
159   // Collect the header file names from the string list.
160   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
161     E = Strings.end();
162     I != E; ++I) {
163     StringRef Line = I->trim();
164     // Ignore comments and empty lines.
165     if (Line.empty() || (Line[0] == '#'))
166       continue;
167     std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':');
168     SmallString<256> HeaderFileName;
169     // Prepend header file name prefix if it's not absolute.
170     if (llvm::sys::path::is_absolute(TargetAndDependents.first))
171       llvm::sys::path::native(TargetAndDependents.first, HeaderFileName);
172     else {
173       if (HeaderDirectory.size() != 0)
174         HeaderFileName = HeaderDirectory;
175       else
176         HeaderFileName = CurrentDirectory;
177       llvm::sys::path::append(HeaderFileName, TargetAndDependents.first);
178       llvm::sys::path::native(HeaderFileName);
179     }
180     // Handle optional dependencies.
181     DependentsVector Dependents;
182     SmallVector<StringRef, 4> DependentsList;
183     TargetAndDependents.second.split(DependentsList, " ", -1, false);
184     int Count = DependentsList.size();
185     for (int Index = 0; Index < Count; ++Index) {
186       SmallString<256> Dependent;
187       if (llvm::sys::path::is_absolute(DependentsList[Index]))
188         Dependent = DependentsList[Index];
189       else {
190         if (HeaderDirectory.size() != 0)
191           Dependent = HeaderDirectory;
192         else
193           Dependent = CurrentDirectory;
194         llvm::sys::path::append(Dependent, DependentsList[Index]);
195       }
196       llvm::sys::path::native(Dependent);
197       Dependents.push_back(getCanonicalPath(Dependent.str()));
198     }
199     // Get canonical form.
200     HeaderFileName = getCanonicalPath(HeaderFileName);
201     // Save the resulting header file path and dependencies.
202     HeaderFileNames.push_back(HeaderFileName.str());
203     Dependencies[HeaderFileName.str()] = Dependents;
204   }
205   return std::error_code();
206 }
207 
208 // Load problem header list.
209 std::error_code ModularizeUtilities::loadProblemHeaderList(
210   llvm::StringRef InputPath) {
211 
212   // By default, use the path component of the list file name.
213   SmallString<256> HeaderDirectory(InputPath);
214   llvm::sys::path::remove_filename(HeaderDirectory);
215   SmallString<256> CurrentDirectory;
216   llvm::sys::fs::current_path(CurrentDirectory);
217 
218   // Get the prefix if we have one.
219   if (HeaderPrefix.size() != 0)
220     HeaderDirectory = HeaderPrefix;
221 
222   // Read the header list file into a buffer.
223   ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
224     MemoryBuffer::getFile(InputPath);
225   if (std::error_code EC = listBuffer.getError())
226     return EC;
227 
228   // Parse the header list into strings.
229   SmallVector<StringRef, 32> Strings;
230   listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
231 
232   // Collect the header file names from the string list.
233   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
234     E = Strings.end();
235     I != E; ++I) {
236     StringRef Line = I->trim();
237     // Ignore comments and empty lines.
238     if (Line.empty() || (Line[0] == '#'))
239       continue;
240     SmallString<256> HeaderFileName;
241     // Prepend header file name prefix if it's not absolute.
242     if (llvm::sys::path::is_absolute(Line))
243       llvm::sys::path::native(Line, HeaderFileName);
244     else {
245       if (HeaderDirectory.size() != 0)
246         HeaderFileName = HeaderDirectory;
247       else
248         HeaderFileName = CurrentDirectory;
249       llvm::sys::path::append(HeaderFileName, Line);
250       llvm::sys::path::native(HeaderFileName);
251     }
252     // Get canonical form.
253     HeaderFileName = getCanonicalPath(HeaderFileName);
254     // Save the resulting header file path.
255     ProblemFileNames.push_back(HeaderFileName.str());
256   }
257   return std::error_code();
258 }
259 
260 // Load single module map and extract header file list.
261 std::error_code ModularizeUtilities::loadModuleMap(
262     llvm::StringRef InputPath) {
263   // Get file entry for module.modulemap file.
264   const FileEntry *ModuleMapEntry =
265     SourceMgr->getFileManager().getFile(InputPath);
266 
267   // return error if not found.
268   if (!ModuleMapEntry) {
269     llvm::errs() << "error: File \"" << InputPath << "\" not found.\n";
270     return std::error_code(1, std::generic_category());
271   }
272 
273   // Because the module map parser uses a ForwardingDiagnosticConsumer,
274   // which doesn't forward the BeginSourceFile call, we do it explicitly here.
275   DC.BeginSourceFile(*LangOpts, nullptr);
276 
277   // Figure out the home directory for the module map file.
278   const DirectoryEntry *Dir = ModuleMapEntry->getDir();
279   StringRef DirName(Dir->getName());
280   if (llvm::sys::path::filename(DirName) == "Modules") {
281     DirName = llvm::sys::path::parent_path(DirName);
282     if (DirName.endswith(".framework"))
283       Dir = FileMgr->getDirectory(DirName);
284     // FIXME: This assert can fail if there's a race between the above check
285     // and the removal of the directory.
286     assert(Dir && "parent must exist");
287   }
288 
289   std::unique_ptr<ModuleMap> ModMap;
290   ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts,
291     Target.get(), *HeaderInfo));
292 
293   // Parse module.modulemap file into module map.
294   if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) {
295     return std::error_code(1, std::generic_category());
296   }
297 
298   // Do matching end call.
299   DC.EndSourceFile();
300 
301   // Reset missing header count.
302   MissingHeaderCount = 0;
303 
304   if (!collectModuleMapHeaders(ModMap.get()))
305     return std::error_code(1, std::generic_category());
306 
307   // Save module map.
308   ModuleMaps.push_back(std::move(ModMap));
309 
310   // Indicate we are using module maps.
311   HasModuleMap = true;
312 
313   // Return code of 1 for missing headers.
314   if (MissingHeaderCount)
315     return std::error_code(1, std::generic_category());
316 
317   return std::error_code();
318 }
319 
320 // Collect module map headers.
321 // Walks the modules and collects referenced headers into
322 // HeaderFileNames.
323 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) {
324   for (ModuleMap::module_iterator I = ModMap->module_begin(),
325     E = ModMap->module_end();
326     I != E; ++I) {
327     if (!collectModuleHeaders(*I->second))
328       return false;
329   }
330   return true;
331 }
332 
333 // Collect referenced headers from one module.
334 // Collects the headers referenced in the given module into
335 // HeaderFileNames.
336 bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
337 
338   // Ignore explicit modules because they often have dependencies
339   // we can't know.
340   if (Mod.IsExplicit)
341     return true;
342 
343   // Treat headers in umbrella directory as dependencies.
344   DependentsVector UmbrellaDependents;
345 
346   // Recursively do submodules.
347   for (auto MI = Mod.submodule_begin(), MIEnd = Mod.submodule_end();
348        MI != MIEnd; ++MI)
349     collectModuleHeaders(**MI);
350 
351   if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader().Entry) {
352     std::string HeaderPath = getCanonicalPath(UmbrellaHeader->getName());
353     // Collect umbrella header.
354     HeaderFileNames.push_back(HeaderPath);
355 
356     // FUTURE: When needed, umbrella header header collection goes here.
357   }
358   else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir().Entry) {
359     // If there normal headers, assume these are umbrellas and skip collection.
360     if (Mod.Headers->size() == 0) {
361       // Collect headers in umbrella directory.
362       if (!collectUmbrellaHeaders(UmbrellaDir->getName(), UmbrellaDependents))
363         return false;
364     }
365   }
366 
367   // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
368   // assuming they are marked as such either because of unsuitability for
369   // modules or because they are meant to be included by another header,
370   // and thus should be ignored by modularize.
371 
372   int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size();
373 
374   for (int Index = 0; Index < NormalHeaderCount; ++Index) {
375     DependentsVector NormalDependents;
376     // Collect normal header.
377     const clang::Module::Header &Header(
378       Mod.Headers[clang::Module::HK_Normal][Index]);
379     std::string HeaderPath = getCanonicalPath(Header.Entry->getName());
380     HeaderFileNames.push_back(HeaderPath);
381   }
382 
383   int MissingCountThisModule = Mod.MissingHeaders.size();
384 
385   for (int Index = 0; Index < MissingCountThisModule; ++Index) {
386     std::string MissingFile = Mod.MissingHeaders[Index].FileName;
387     SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc;
388     errs() << Loc.printToString(*SourceMgr)
389       << ": error : Header not found: " << MissingFile << "\n";
390   }
391 
392   MissingHeaderCount += MissingCountThisModule;
393 
394   return true;
395 }
396 
397 // Collect headers from an umbrella directory.
398 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName,
399   DependentsVector &Dependents) {
400   // Initialize directory name.
401   SmallString<256> Directory(UmbrellaDirName);
402   // Walk the directory.
403   std::error_code EC;
404   llvm::sys::fs::file_status Status;
405   for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E;
406     I.increment(EC)) {
407     if (EC)
408       return false;
409     std::string File(I->path());
410     I->status(Status);
411     llvm::sys::fs::file_type Type = Status.type();
412     // If the file is a directory, ignore the name and recurse.
413     if (Type == llvm::sys::fs::file_type::directory_file) {
414       if (!collectUmbrellaHeaders(File, Dependents))
415         return false;
416       continue;
417     }
418     // If the file does not have a common header extension, ignore it.
419     if (!isHeader(File))
420       continue;
421     // Save header name.
422     std::string HeaderPath = getCanonicalPath(File);
423     Dependents.push_back(HeaderPath);
424   }
425   return true;
426 }
427 
428 // Replace .. embedded in path for purposes of having
429 // a canonical path.
430 static std::string replaceDotDot(StringRef Path) {
431   SmallString<128> Buffer;
432   llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path),
433     E = llvm::sys::path::end(Path);
434   while (B != E) {
435     if (B->compare(".") == 0) {
436     }
437     else if (B->compare("..") == 0)
438       llvm::sys::path::remove_filename(Buffer);
439     else
440       llvm::sys::path::append(Buffer, *B);
441     ++B;
442   }
443   if (Path.endswith("/") || Path.endswith("\\"))
444     Buffer.append(1, Path.back());
445   return Buffer.c_str();
446 }
447 
448 // Convert header path to canonical form.
449 // The canonical form is basically just use forward slashes, and remove "./".
450 // \param FilePath The file path, relative to the module map directory.
451 // \returns The file path in canonical form.
452 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) {
453   std::string Tmp(replaceDotDot(FilePath));
454   std::replace(Tmp.begin(), Tmp.end(), '\\', '/');
455   StringRef Tmp2(Tmp);
456   if (Tmp2.startswith("./"))
457     Tmp = Tmp2.substr(2);
458   return Tmp;
459 }
460 
461 // Check for header file extension.
462 // If the file extension is .h, .inc, or missing, it's
463 // assumed to be a header.
464 // \param FileName The file name.  Must not be a directory.
465 // \returns true if it has a header extension or no extension.
466 bool ModularizeUtilities::isHeader(StringRef FileName) {
467   StringRef Extension = llvm::sys::path::extension(FileName);
468   if (Extension.size() == 0)
469     return true;
470   if (Extension.equals_lower(".h"))
471     return true;
472   if (Extension.equals_lower(".inc"))
473     return true;
474   return false;
475 }
476 
477 // Get directory path component from file path.
478 // \returns the component of the given path, which will be
479 // relative if the given path is relative, absolute if the
480 // given path is absolute, or "." if the path has no leading
481 // path component.
482 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) {
483   SmallString<256> Directory(Path);
484   sys::path::remove_filename(Directory);
485   if (Directory.size() == 0)
486     return ".";
487   return Directory.str();
488 }
489 
490 // Add unique problem file.
491 // Also standardizes the path.
492 void ModularizeUtilities::addUniqueProblemFile(std::string FilePath) {
493   FilePath = getCanonicalPath(FilePath);
494   // Don't add if already present.
495   for(auto &TestFilePath : ProblemFileNames) {
496     if (TestFilePath == FilePath)
497       return;
498   }
499   ProblemFileNames.push_back(FilePath);
500 }
501 
502 // Add file with no compile errors.
503 // Also standardizes the path.
504 void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath) {
505   FilePath = getCanonicalPath(FilePath);
506   GoodFileNames.push_back(FilePath);
507 }
508 
509 // List problem files.
510 void ModularizeUtilities::displayProblemFiles() {
511   errs() << "\nThese are the files with possible errors:\n\n";
512   for (auto &ProblemFile : ProblemFileNames) {
513     errs() << ProblemFile << "\n";
514   }
515 }
516 
517 // List files with no problems.
518 void ModularizeUtilities::displayGoodFiles() {
519   errs() << "\nThese are the files with no detected errors:\n\n";
520   for (auto &GoodFile : HeaderFileNames) {
521     bool Good = true;
522     for (auto &ProblemFile : ProblemFileNames) {
523       if (ProblemFile == GoodFile) {
524         Good = false;
525         break;
526       }
527     }
528     if (Good)
529       errs() << GoodFile << "\n";
530   }
531 }
532 
533 // List files with problem files commented out.
534 void ModularizeUtilities::displayCombinedFiles() {
535   errs() <<
536     "\nThese are the combined files, with problem files preceded by #:\n\n";
537   for (auto &File : HeaderFileNames) {
538     bool Good = true;
539     for (auto &ProblemFile : ProblemFileNames) {
540       if (ProblemFile == File) {
541         Good = false;
542         break;
543       }
544     }
545     errs() << (Good ? "" : "#") << File << "\n";
546   }
547 }
548