1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class for loading and validating a module map or
11 // header list by checking that all headers in the corresponding directories
12 // are accounted for.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Driver/Options.h"
18 #include "clang/Frontend/CompilerInstance.h"
19 #include "clang/Frontend/FrontendActions.h"
20 #include "CoverageChecker.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Support/FileUtilities.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "ModularizeUtilities.h"
27 
28 using namespace clang;
29 using namespace llvm;
30 using namespace Modularize;
31 
32 namespace {
33 // Subclass TargetOptions so we can construct it inline with
34 // the minimal option, the triple.
35 class ModuleMapTargetOptions : public clang::TargetOptions {
36 public:
37   ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); }
38 };
39 } // namespace
40 
41 // ModularizeUtilities class implementation.
42 
43 // Constructor.
44 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths,
45                                          llvm::StringRef Prefix,
46                                          llvm::StringRef ProblemFilesListPath)
47     : InputFilePaths(InputPaths), HeaderPrefix(Prefix),
48       ProblemFilesPath(ProblemFilesListPath), HasModuleMap(false),
49       MissingHeaderCount(0),
50       // Init clang stuff needed for loading the module map and preprocessing.
51       LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
52       DiagnosticOpts(new DiagnosticOptions()),
53       DC(llvm::errs(), DiagnosticOpts.get()),
54       Diagnostics(
55           new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)),
56       TargetOpts(new ModuleMapTargetOptions()),
57       Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)),
58       FileMgr(new FileManager(FileSystemOpts)),
59       SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)),
60       HeaderInfo(new HeaderSearch(std::make_shared<HeaderSearchOptions>(),
61                                   *SourceMgr, *Diagnostics, *LangOpts,
62                                   Target.get())) {}
63 
64 // Create instance of ModularizeUtilities, to simplify setting up
65 // subordinate objects.
66 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities(
67     std::vector<std::string> &InputPaths, llvm::StringRef Prefix,
68     llvm::StringRef ProblemFilesListPath) {
69 
70   return new ModularizeUtilities(InputPaths, Prefix, ProblemFilesListPath);
71 }
72 
73 // Load all header lists and dependencies.
74 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() {
75   // For each input file.
76   for (auto I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) {
77     llvm::StringRef InputPath = *I;
78     // If it's a module map.
79     if (InputPath.endswith(".modulemap")) {
80       // Load the module map.
81       if (std::error_code EC = loadModuleMap(InputPath))
82         return EC;
83     }
84     else {
85       // Else we assume it's a header list and load it.
86       if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) {
87         errs() << "modularize: error: Unable to get header list '" << InputPath
88           << "': " << EC.message() << '\n';
89         return EC;
90       }
91     }
92   }
93   // If we have a problem files list.
94   if (ProblemFilesPath.size() != 0) {
95     // Load problem files list.
96     if (std::error_code EC = loadProblemHeaderList(ProblemFilesPath)) {
97       errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath
98         << "': " << EC.message() << '\n';
99       return EC;
100     }
101   }
102   return std::error_code();
103 }
104 
105 // Do coverage checks.
106 // For each loaded module map, do header coverage check.
107 // Starting from the directory of the module.map file,
108 // Find all header files, optionally looking only at files
109 // covered by the include path options, and compare against
110 // the headers referenced by the module.map file.
111 // Display warnings for unaccounted-for header files.
112 // Returns 0 if there were no errors or warnings, 1 if there
113 // were warnings, 2 if any other problem, such as a bad
114 // module map path argument was specified.
115 std::error_code ModularizeUtilities::doCoverageCheck(
116     std::vector<std::string> &IncludePaths,
117     llvm::ArrayRef<std::string> CommandLine) {
118   int ModuleMapCount = ModuleMaps.size();
119   int ModuleMapIndex;
120   std::error_code EC;
121   for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) {
122     std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex];
123     CoverageChecker *Checker = CoverageChecker::createCoverageChecker(
124       InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine, ModMap.get());
125     std::error_code LocalEC = Checker->doChecks();
126     if (LocalEC.value() > 0)
127       EC = LocalEC;
128   }
129   return EC;
130 }
131 
132 // Load single header list and dependencies.
133 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies(
134     llvm::StringRef InputPath) {
135 
136   // By default, use the path component of the list file name.
137   SmallString<256> HeaderDirectory(InputPath);
138   llvm::sys::path::remove_filename(HeaderDirectory);
139   SmallString<256> CurrentDirectory;
140   llvm::sys::fs::current_path(CurrentDirectory);
141 
142   // Get the prefix if we have one.
143   if (HeaderPrefix.size() != 0)
144     HeaderDirectory = HeaderPrefix;
145 
146   // Read the header list file into a buffer.
147   ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
148     MemoryBuffer::getFile(InputPath);
149   if (std::error_code EC = listBuffer.getError())
150     return EC;
151 
152   // Parse the header list into strings.
153   SmallVector<StringRef, 32> Strings;
154   listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
155 
156   // Collect the header file names from the string list.
157   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
158     E = Strings.end();
159     I != E; ++I) {
160     StringRef Line = I->trim();
161     // Ignore comments and empty lines.
162     if (Line.empty() || (Line[0] == '#'))
163       continue;
164     std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':');
165     SmallString<256> HeaderFileName;
166     // Prepend header file name prefix if it's not absolute.
167     if (llvm::sys::path::is_absolute(TargetAndDependents.first))
168       llvm::sys::path::native(TargetAndDependents.first, HeaderFileName);
169     else {
170       if (HeaderDirectory.size() != 0)
171         HeaderFileName = HeaderDirectory;
172       else
173         HeaderFileName = CurrentDirectory;
174       llvm::sys::path::append(HeaderFileName, TargetAndDependents.first);
175       llvm::sys::path::native(HeaderFileName);
176     }
177     // Handle optional dependencies.
178     DependentsVector Dependents;
179     SmallVector<StringRef, 4> DependentsList;
180     TargetAndDependents.second.split(DependentsList, " ", -1, false);
181     int Count = DependentsList.size();
182     for (int Index = 0; Index < Count; ++Index) {
183       SmallString<256> Dependent;
184       if (llvm::sys::path::is_absolute(DependentsList[Index]))
185         Dependent = DependentsList[Index];
186       else {
187         if (HeaderDirectory.size() != 0)
188           Dependent = HeaderDirectory;
189         else
190           Dependent = CurrentDirectory;
191         llvm::sys::path::append(Dependent, DependentsList[Index]);
192       }
193       llvm::sys::path::native(Dependent);
194       Dependents.push_back(getCanonicalPath(Dependent.str()));
195     }
196     // Get canonical form.
197     HeaderFileName = getCanonicalPath(HeaderFileName);
198     // Save the resulting header file path and dependencies.
199     HeaderFileNames.push_back(HeaderFileName.str());
200     Dependencies[HeaderFileName.str()] = Dependents;
201   }
202   return std::error_code();
203 }
204 
205 // Load problem header list.
206 std::error_code ModularizeUtilities::loadProblemHeaderList(
207   llvm::StringRef InputPath) {
208 
209   // By default, use the path component of the list file name.
210   SmallString<256> HeaderDirectory(InputPath);
211   llvm::sys::path::remove_filename(HeaderDirectory);
212   SmallString<256> CurrentDirectory;
213   llvm::sys::fs::current_path(CurrentDirectory);
214 
215   // Get the prefix if we have one.
216   if (HeaderPrefix.size() != 0)
217     HeaderDirectory = HeaderPrefix;
218 
219   // Read the header list file into a buffer.
220   ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
221     MemoryBuffer::getFile(InputPath);
222   if (std::error_code EC = listBuffer.getError())
223     return EC;
224 
225   // Parse the header list into strings.
226   SmallVector<StringRef, 32> Strings;
227   listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
228 
229   // Collect the header file names from the string list.
230   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
231     E = Strings.end();
232     I != E; ++I) {
233     StringRef Line = I->trim();
234     // Ignore comments and empty lines.
235     if (Line.empty() || (Line[0] == '#'))
236       continue;
237     SmallString<256> HeaderFileName;
238     // Prepend header file name prefix if it's not absolute.
239     if (llvm::sys::path::is_absolute(Line))
240       llvm::sys::path::native(Line, HeaderFileName);
241     else {
242       if (HeaderDirectory.size() != 0)
243         HeaderFileName = HeaderDirectory;
244       else
245         HeaderFileName = CurrentDirectory;
246       llvm::sys::path::append(HeaderFileName, Line);
247       llvm::sys::path::native(HeaderFileName);
248     }
249     // Get canonical form.
250     HeaderFileName = getCanonicalPath(HeaderFileName);
251     // Save the resulting header file path.
252     ProblemFileNames.push_back(HeaderFileName.str());
253   }
254   return std::error_code();
255 }
256 
257 // Load single module map and extract header file list.
258 std::error_code ModularizeUtilities::loadModuleMap(
259     llvm::StringRef InputPath) {
260   // Get file entry for module.modulemap file.
261   const FileEntry *ModuleMapEntry =
262     SourceMgr->getFileManager().getFile(InputPath);
263 
264   // return error if not found.
265   if (!ModuleMapEntry) {
266     llvm::errs() << "error: File \"" << InputPath << "\" not found.\n";
267     return std::error_code(1, std::generic_category());
268   }
269 
270   // Because the module map parser uses a ForwardingDiagnosticConsumer,
271   // which doesn't forward the BeginSourceFile call, we do it explicitly here.
272   DC.BeginSourceFile(*LangOpts, nullptr);
273 
274   // Figure out the home directory for the module map file.
275   const DirectoryEntry *Dir = ModuleMapEntry->getDir();
276   StringRef DirName(Dir->getName());
277   if (llvm::sys::path::filename(DirName) == "Modules") {
278     DirName = llvm::sys::path::parent_path(DirName);
279     if (DirName.endswith(".framework"))
280       Dir = FileMgr->getDirectory(DirName);
281     // FIXME: This assert can fail if there's a race between the above check
282     // and the removal of the directory.
283     assert(Dir && "parent must exist");
284   }
285 
286   std::unique_ptr<ModuleMap> ModMap;
287   ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts,
288     Target.get(), *HeaderInfo));
289 
290   // Parse module.modulemap file into module map.
291   if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) {
292     return std::error_code(1, std::generic_category());
293   }
294 
295   // Do matching end call.
296   DC.EndSourceFile();
297 
298   // Reset missing header count.
299   MissingHeaderCount = 0;
300 
301   if (!collectModuleMapHeaders(ModMap.get()))
302     return std::error_code(1, std::generic_category());
303 
304   // Save module map.
305   ModuleMaps.push_back(std::move(ModMap));
306 
307   // Indicate we are using module maps.
308   HasModuleMap = true;
309 
310   // Return code of 1 for missing headers.
311   if (MissingHeaderCount)
312     return std::error_code(1, std::generic_category());
313 
314   return std::error_code();
315 }
316 
317 // Collect module map headers.
318 // Walks the modules and collects referenced headers into
319 // HeaderFileNames.
320 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) {
321   for (ModuleMap::module_iterator I = ModMap->module_begin(),
322     E = ModMap->module_end();
323     I != E; ++I) {
324     if (!collectModuleHeaders(*I->second))
325       return false;
326   }
327   return true;
328 }
329 
330 // Collect referenced headers from one module.
331 // Collects the headers referenced in the given module into
332 // HeaderFileNames.
333 bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
334 
335   // Ignore explicit modules because they often have dependencies
336   // we can't know.
337   if (Mod.IsExplicit)
338     return true;
339 
340   // Treat headers in umbrella directory as dependencies.
341   DependentsVector UmbrellaDependents;
342 
343   // Recursively do submodules.
344   for (auto MI = Mod.submodule_begin(), MIEnd = Mod.submodule_end();
345        MI != MIEnd; ++MI)
346     collectModuleHeaders(**MI);
347 
348   if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader().Entry) {
349     std::string HeaderPath = getCanonicalPath(UmbrellaHeader->getName());
350     // Collect umbrella header.
351     HeaderFileNames.push_back(HeaderPath);
352 
353     // FUTURE: When needed, umbrella header header collection goes here.
354   }
355   else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir().Entry) {
356     // If there normal headers, assume these are umbrellas and skip collection.
357     if (Mod.Headers->size() == 0) {
358       // Collect headers in umbrella directory.
359       if (!collectUmbrellaHeaders(UmbrellaDir->getName(), UmbrellaDependents))
360         return false;
361     }
362   }
363 
364   // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
365   // assuming they are marked as such either because of unsuitability for
366   // modules or because they are meant to be included by another header,
367   // and thus should be ignored by modularize.
368 
369   int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size();
370 
371   for (int Index = 0; Index < NormalHeaderCount; ++Index) {
372     DependentsVector NormalDependents;
373     // Collect normal header.
374     const clang::Module::Header &Header(
375       Mod.Headers[clang::Module::HK_Normal][Index]);
376     std::string HeaderPath = getCanonicalPath(Header.Entry->getName());
377     HeaderFileNames.push_back(HeaderPath);
378   }
379 
380   int MissingCountThisModule = Mod.MissingHeaders.size();
381 
382   for (int Index = 0; Index < MissingCountThisModule; ++Index) {
383     std::string MissingFile = Mod.MissingHeaders[Index].FileName;
384     SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc;
385     errs() << Loc.printToString(*SourceMgr)
386       << ": error : Header not found: " << MissingFile << "\n";
387   }
388 
389   MissingHeaderCount += MissingCountThisModule;
390 
391   return true;
392 }
393 
394 // Collect headers from an umbrella directory.
395 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName,
396   DependentsVector &Dependents) {
397   // Initialize directory name.
398   SmallString<256> Directory(UmbrellaDirName);
399   // Walk the directory.
400   std::error_code EC;
401   llvm::sys::fs::file_status Status;
402   for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E;
403     I.increment(EC)) {
404     if (EC)
405       return false;
406     std::string File(I->path());
407     I->status(Status);
408     llvm::sys::fs::file_type Type = Status.type();
409     // If the file is a directory, ignore the name and recurse.
410     if (Type == llvm::sys::fs::file_type::directory_file) {
411       if (!collectUmbrellaHeaders(File, Dependents))
412         return false;
413       continue;
414     }
415     // If the file does not have a common header extension, ignore it.
416     if (!isHeader(File))
417       continue;
418     // Save header name.
419     std::string HeaderPath = getCanonicalPath(File);
420     Dependents.push_back(HeaderPath);
421   }
422   return true;
423 }
424 
425 // Replace .. embedded in path for purposes of having
426 // a canonical path.
427 static std::string replaceDotDot(StringRef Path) {
428   SmallString<128> Buffer;
429   llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path),
430     E = llvm::sys::path::end(Path);
431   while (B != E) {
432     if (B->compare(".") == 0) {
433     }
434     else if (B->compare("..") == 0)
435       llvm::sys::path::remove_filename(Buffer);
436     else
437       llvm::sys::path::append(Buffer, *B);
438     ++B;
439   }
440   if (Path.endswith("/") || Path.endswith("\\"))
441     Buffer.append(1, Path.back());
442   return Buffer.c_str();
443 }
444 
445 // Convert header path to canonical form.
446 // The canonical form is basically just use forward slashes, and remove "./".
447 // \param FilePath The file path, relative to the module map directory.
448 // \returns The file path in canonical form.
449 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) {
450   std::string Tmp(replaceDotDot(FilePath));
451   std::replace(Tmp.begin(), Tmp.end(), '\\', '/');
452   StringRef Tmp2(Tmp);
453   if (Tmp2.startswith("./"))
454     Tmp = Tmp2.substr(2);
455   return Tmp;
456 }
457 
458 // Check for header file extension.
459 // If the file extension is .h, .inc, or missing, it's
460 // assumed to be a header.
461 // \param FileName The file name.  Must not be a directory.
462 // \returns true if it has a header extension or no extension.
463 bool ModularizeUtilities::isHeader(StringRef FileName) {
464   StringRef Extension = llvm::sys::path::extension(FileName);
465   if (Extension.size() == 0)
466     return true;
467   if (Extension.equals_lower(".h"))
468     return true;
469   if (Extension.equals_lower(".inc"))
470     return true;
471   return false;
472 }
473 
474 // Get directory path component from file path.
475 // \returns the component of the given path, which will be
476 // relative if the given path is relative, absolute if the
477 // given path is absolute, or "." if the path has no leading
478 // path component.
479 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) {
480   SmallString<256> Directory(Path);
481   sys::path::remove_filename(Directory);
482   if (Directory.size() == 0)
483     return ".";
484   return Directory.str();
485 }
486 
487 // Add unique problem file.
488 // Also standardizes the path.
489 void ModularizeUtilities::addUniqueProblemFile(std::string FilePath) {
490   FilePath = getCanonicalPath(FilePath);
491   // Don't add if already present.
492   for(auto &TestFilePath : ProblemFileNames) {
493     if (TestFilePath == FilePath)
494       return;
495   }
496   ProblemFileNames.push_back(FilePath);
497 }
498 
499 // Add file with no compile errors.
500 // Also standardizes the path.
501 void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath) {
502   FilePath = getCanonicalPath(FilePath);
503   GoodFileNames.push_back(FilePath);
504 }
505 
506 // List problem files.
507 void ModularizeUtilities::displayProblemFiles() {
508   errs() << "\nThese are the files with possible errors:\n\n";
509   for (auto &ProblemFile : ProblemFileNames) {
510     errs() << ProblemFile << "\n";
511   }
512 }
513 
514 // List files with no problems.
515 void ModularizeUtilities::displayGoodFiles() {
516   errs() << "\nThese are the files with no detected errors:\n\n";
517   for (auto &GoodFile : HeaderFileNames) {
518     bool Good = true;
519     for (auto &ProblemFile : ProblemFileNames) {
520       if (ProblemFile == GoodFile) {
521         Good = false;
522         break;
523       }
524     }
525     if (Good)
526       errs() << GoodFile << "\n";
527   }
528 }
529 
530 // List files with problem files commented out.
531 void ModularizeUtilities::displayCombinedFiles() {
532   errs() <<
533     "\nThese are the combined files, with problem files preceded by #:\n\n";
534   for (auto &File : HeaderFileNames) {
535     bool Good = true;
536     for (auto &ProblemFile : ProblemFileNames) {
537       if (ProblemFile == File) {
538         Good = false;
539         break;
540       }
541     }
542     errs() << (Good ? "" : "#") << File << "\n";
543   }
544 }
545