1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class for loading and validating a module map or
11 // header list by checking that all headers in the corresponding directories
12 // are accounted for.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Driver/Options.h"
18 #include "clang/Frontend/CompilerInstance.h"
19 #include "clang/Frontend/FrontendActions.h"
20 #include "CoverageChecker.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Support/FileUtilities.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "ModularizeUtilities.h"
27 
28 using namespace clang;
29 using namespace llvm;
30 using namespace Modularize;
31 
32 namespace {
33 // Subclass TargetOptions so we can construct it inline with
34 // the minimal option, the triple.
35 class ModuleMapTargetOptions : public clang::TargetOptions {
36 public:
37   ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); }
38 };
39 } // namespace
40 
41 // ModularizeUtilities class implementation.
42 
43 // Constructor.
44 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths,
45                                          llvm::StringRef Prefix)
46   : InputFilePaths(InputPaths),
47     HeaderPrefix(Prefix),
48     HasModuleMap(false),
49     MissingHeaderCount(0),
50     // Init clang stuff needed for loading the module map and preprocessing.
51     LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
52     DiagnosticOpts(new DiagnosticOptions()),
53     DC(llvm::errs(), DiagnosticOpts.get()),
54     Diagnostics(
55     new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)),
56     TargetOpts(new ModuleMapTargetOptions()),
57     Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)),
58     FileMgr(new FileManager(FileSystemOpts)),
59     SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)),
60     HeaderSearchOpts(new HeaderSearchOptions()),
61     HeaderInfo(new HeaderSearch(HeaderSearchOpts, *SourceMgr, *Diagnostics,
62     *LangOpts, Target.get())) {
63 }
64 
65 // Create instance of ModularizeUtilities, to simplify setting up
66 // subordinate objects.
67 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities(
68     std::vector<std::string> &InputPaths, llvm::StringRef Prefix) {
69 
70   return new ModularizeUtilities(InputPaths, Prefix);
71 }
72 
73 // Load all header lists and dependencies.
74 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() {
75   typedef std::vector<std::string>::iterator Iter;
76   // For each input file.
77   for (Iter I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) {
78     llvm::StringRef InputPath = *I;
79     // If it's a module map.
80     if (InputPath.endswith(".modulemap")) {
81       // Load the module map.
82       if (std::error_code EC = loadModuleMap(InputPath))
83         return EC;
84     }
85     else {
86       // Else we assume it's a header list and load it.
87       if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) {
88         errs() << "modularize: error: Unable to get header list '" << InputPath
89           << "': " << EC.message() << '\n';
90         return EC;
91       }
92     }
93   }
94   return std::error_code();
95 }
96 
97 // Do coverage checks.
98 // For each loaded module map, do header coverage check.
99 // Starting from the directory of the module.map file,
100 // Find all header files, optionally looking only at files
101 // covered by the include path options, and compare against
102 // the headers referenced by the module.map file.
103 // Display warnings for unaccounted-for header files.
104 // Returns 0 if there were no errors or warnings, 1 if there
105 // were warnings, 2 if any other problem, such as a bad
106 // module map path argument was specified.
107 std::error_code ModularizeUtilities::doCoverageCheck(
108     std::vector<std::string> &IncludePaths,
109     llvm::ArrayRef<std::string> CommandLine) {
110   int ModuleMapCount = ModuleMaps.size();
111   int ModuleMapIndex;
112   std::error_code EC;
113   for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) {
114     std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex];
115     CoverageChecker *Checker = CoverageChecker::createCoverageChecker(
116       InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine, ModMap.get());
117     std::error_code LocalEC = Checker->doChecks();
118     if (LocalEC.value() > 0)
119       EC = LocalEC;
120   }
121   return EC;
122 }
123 
124 // Load single header list and dependencies.
125 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies(
126     llvm::StringRef InputPath) {
127 
128   // By default, use the path component of the list file name.
129   SmallString<256> HeaderDirectory(InputPath);
130   llvm::sys::path::remove_filename(HeaderDirectory);
131   SmallString<256> CurrentDirectory;
132   llvm::sys::fs::current_path(CurrentDirectory);
133 
134   // Get the prefix if we have one.
135   if (HeaderPrefix.size() != 0)
136     HeaderDirectory = HeaderPrefix;
137 
138   // Read the header list file into a buffer.
139   ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
140     MemoryBuffer::getFile(InputPath);
141   if (std::error_code EC = listBuffer.getError())
142     return EC;
143 
144   // Parse the header list into strings.
145   SmallVector<StringRef, 32> Strings;
146   listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
147 
148   // Collect the header file names from the string list.
149   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
150     E = Strings.end();
151     I != E; ++I) {
152     StringRef Line = I->trim();
153     // Ignore comments and empty lines.
154     if (Line.empty() || (Line[0] == '#'))
155       continue;
156     std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':');
157     SmallString<256> HeaderFileName;
158     // Prepend header file name prefix if it's not absolute.
159     if (llvm::sys::path::is_absolute(TargetAndDependents.first))
160       llvm::sys::path::native(TargetAndDependents.first, HeaderFileName);
161     else {
162       if (HeaderDirectory.size() != 0)
163         HeaderFileName = HeaderDirectory;
164       else
165         HeaderFileName = CurrentDirectory;
166       llvm::sys::path::append(HeaderFileName, TargetAndDependents.first);
167       llvm::sys::path::native(HeaderFileName);
168     }
169     // Handle optional dependencies.
170     DependentsVector Dependents;
171     SmallVector<StringRef, 4> DependentsList;
172     TargetAndDependents.second.split(DependentsList, " ", -1, false);
173     int Count = DependentsList.size();
174     for (int Index = 0; Index < Count; ++Index) {
175       SmallString<256> Dependent;
176       if (llvm::sys::path::is_absolute(DependentsList[Index]))
177         Dependent = DependentsList[Index];
178       else {
179         if (HeaderDirectory.size() != 0)
180           Dependent = HeaderDirectory;
181         else
182           Dependent = CurrentDirectory;
183         llvm::sys::path::append(Dependent, DependentsList[Index]);
184       }
185       llvm::sys::path::native(Dependent);
186       Dependents.push_back(getCanonicalPath(Dependent.str()));
187     }
188     // Get canonical form.
189     HeaderFileName = getCanonicalPath(HeaderFileName);
190     // Save the resulting header file path and dependencies.
191     HeaderFileNames.push_back(HeaderFileName.str());
192     Dependencies[HeaderFileName.str()] = Dependents;
193   }
194   return std::error_code();
195 }
196 
197 // Load single module map and extract header file list.
198 std::error_code ModularizeUtilities::loadModuleMap(
199     llvm::StringRef InputPath) {
200   // Get file entry for module.modulemap file.
201   const FileEntry *ModuleMapEntry =
202     SourceMgr->getFileManager().getFile(InputPath);
203 
204   // return error if not found.
205   if (!ModuleMapEntry) {
206     llvm::errs() << "error: File \"" << InputPath << "\" not found.\n";
207     return std::error_code(1, std::generic_category());
208   }
209 
210   // Because the module map parser uses a ForwardingDiagnosticConsumer,
211   // which doesn't forward the BeginSourceFile call, we do it explicitly here.
212   DC.BeginSourceFile(*LangOpts, nullptr);
213 
214   // Figure out the home directory for the module map file.
215   const DirectoryEntry *Dir = ModuleMapEntry->getDir();
216   StringRef DirName(Dir->getName());
217   if (llvm::sys::path::filename(DirName) == "Modules") {
218     DirName = llvm::sys::path::parent_path(DirName);
219     if (DirName.endswith(".framework"))
220       Dir = FileMgr->getDirectory(DirName);
221     // FIXME: This assert can fail if there's a race between the above check
222     // and the removal of the directory.
223     assert(Dir && "parent must exist");
224   }
225 
226   std::unique_ptr<ModuleMap> ModMap;
227   ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts,
228     Target.get(), *HeaderInfo));
229 
230   // Parse module.modulemap file into module map.
231   if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) {
232     return std::error_code(1, std::generic_category());
233   }
234 
235   // Do matching end call.
236   DC.EndSourceFile();
237 
238   // Reset missing header count.
239   MissingHeaderCount = 0;
240 
241   if (!collectModuleMapHeaders(ModMap.get()))
242     return std::error_code(1, std::generic_category());
243 
244   // Save module map.
245   ModuleMaps.push_back(std::move(ModMap));
246 
247   // Indicate we are using module maps.
248   HasModuleMap = true;
249 
250   // Return code of 1 for missing headers.
251   if (MissingHeaderCount)
252     return std::error_code(1, std::generic_category());
253 
254   return std::error_code();
255 }
256 
257 // Collect module map headers.
258 // Walks the modules and collects referenced headers into
259 // HeaderFileNames.
260 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) {
261   for (ModuleMap::module_iterator I = ModMap->module_begin(),
262     E = ModMap->module_end();
263     I != E; ++I) {
264     if (!collectModuleHeaders(*I->second))
265       return false;
266   }
267   return true;
268 }
269 
270 // Collect referenced headers from one module.
271 // Collects the headers referenced in the given module into
272 // HeaderFileNames.
273 bool ModularizeUtilities::collectModuleHeaders(const Module &Mod) {
274 
275   // Ignore explicit modules because they often have dependencies
276   // we can't know.
277   if (Mod.IsExplicit)
278     return true;
279 
280   // Treat headers in umbrella directory as dependencies.
281   DependentsVector UmbrellaDependents;
282 
283   // Recursively do submodules.
284   for (Module::submodule_const_iterator MI = Mod.submodule_begin(),
285       MIEnd = Mod.submodule_end();
286       MI != MIEnd; ++MI)
287     collectModuleHeaders(**MI);
288 
289   if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader().Entry) {
290     std::string HeaderPath = getCanonicalPath(UmbrellaHeader->getName());
291     // Collect umbrella header.
292     HeaderFileNames.push_back(HeaderPath);
293 
294     // FUTURE: When needed, umbrella header header collection goes here.
295   }
296   else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir().Entry) {
297     // If there normal headers, assume these are umbrellas and skip collection.
298     if (Mod.Headers->size() == 0) {
299       // Collect headers in umbrella directory.
300       if (!collectUmbrellaHeaders(UmbrellaDir->getName(), UmbrellaDependents))
301         return false;
302     }
303   }
304 
305   // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
306   // assuming they are marked as such either because of unsuitability for
307   // modules or because they are meant to be included by another header,
308   // and thus should be ignored by modularize.
309 
310   int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size();
311 
312   for (int Index = 0; Index < NormalHeaderCount; ++Index) {
313     DependentsVector NormalDependents;
314     // Collect normal header.
315     const clang::Module::Header &Header(
316       Mod.Headers[clang::Module::HK_Normal][Index]);
317     std::string HeaderPath = getCanonicalPath(Header.Entry->getName());
318     HeaderFileNames.push_back(HeaderPath);
319   }
320 
321   int MissingCountThisModule = Mod.MissingHeaders.size();
322 
323   for (int Index = 0; Index < MissingCountThisModule; ++Index) {
324     std::string MissingFile = Mod.MissingHeaders[Index].FileName;
325     SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc;
326     errs() << Loc.printToString(*SourceMgr)
327       << ": error : Header not found: " << MissingFile << "\n";
328   }
329 
330   MissingHeaderCount += MissingCountThisModule;
331 
332   return true;
333 }
334 
335 // Collect headers from an umbrella directory.
336 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName,
337   DependentsVector &Dependents) {
338   // Initialize directory name.
339   SmallString<256> Directory(UmbrellaDirName);
340   // Walk the directory.
341   std::error_code EC;
342   llvm::sys::fs::file_status Status;
343   for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E;
344     I.increment(EC)) {
345     if (EC)
346       return false;
347     std::string File(I->path());
348     I->status(Status);
349     llvm::sys::fs::file_type Type = Status.type();
350     // If the file is a directory, ignore the name and recurse.
351     if (Type == llvm::sys::fs::file_type::directory_file) {
352       if (!collectUmbrellaHeaders(File, Dependents))
353         return false;
354       continue;
355     }
356     // If the file does not have a common header extension, ignore it.
357     if (!isHeader(File))
358       continue;
359     // Save header name.
360     std::string HeaderPath = getCanonicalPath(File);
361     Dependents.push_back(HeaderPath);
362   }
363   return true;
364 }
365 
366 // Replace .. embedded in path for purposes of having
367 // a canonical path.
368 static std::string replaceDotDot(StringRef Path) {
369   SmallString<128> Buffer;
370   llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path),
371     E = llvm::sys::path::end(Path);
372   while (B != E) {
373     if (B->compare(".") == 0) {
374     }
375     else if (B->compare("..") == 0)
376       llvm::sys::path::remove_filename(Buffer);
377     else
378       llvm::sys::path::append(Buffer, *B);
379     ++B;
380   }
381   if (Path.endswith("/") || Path.endswith("\\"))
382     Buffer.append(1, Path.back());
383   return Buffer.c_str();
384 }
385 
386 // Convert header path to canonical form.
387 // The canonical form is basically just use forward slashes, and remove "./".
388 // \param FilePath The file path, relative to the module map directory.
389 // \returns The file path in canonical form.
390 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) {
391   std::string Tmp(replaceDotDot(FilePath));
392   std::replace(Tmp.begin(), Tmp.end(), '\\', '/');
393   StringRef Tmp2(Tmp);
394   if (Tmp2.startswith("./"))
395     Tmp = Tmp2.substr(2);
396   return Tmp;
397 }
398 
399 // Check for header file extension.
400 // If the file extension is .h, .inc, or missing, it's
401 // assumed to be a header.
402 // \param FileName The file name.  Must not be a directory.
403 // \returns true if it has a header extension or no extension.
404 bool ModularizeUtilities::isHeader(StringRef FileName) {
405   StringRef Extension = llvm::sys::path::extension(FileName);
406   if (Extension.size() == 0)
407     return false;
408   if (Extension.equals_lower(".h"))
409     return true;
410   if (Extension.equals_lower(".inc"))
411     return true;
412   return false;
413 }
414 
415 // Get directory path component from file path.
416 // \returns the component of the given path, which will be
417 // relative if the given path is relative, absolute if the
418 // given path is absolute, or "." if the path has no leading
419 // path component.
420 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) {
421   SmallString<256> Directory(Path);
422   sys::path::remove_filename(Directory);
423   if (Directory.size() == 0)
424     return ".";
425   return Directory.str();
426 }
427