1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class for loading and validating a module map or
11 // header list by checking that all headers in the corresponding directories
12 // are accounted for.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Driver/Options.h"
18 #include "clang/Frontend/CompilerInstance.h"
19 #include "clang/Frontend/FrontendActions.h"
20 #include "CoverageChecker.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Support/FileUtilities.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "ModularizeUtilities.h"
27 
28 using namespace clang;
29 using namespace llvm;
30 using namespace Modularize;
31 
32 // Subclass TargetOptions so we can construct it inline with
33 // the minimal option, the triple.
34 class ModuleMapTargetOptions : public clang::TargetOptions {
35 public:
36   ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); }
37 };
38 
39 // ModularizeUtilities class implementation.
40 
41 // Constructor.
42 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths,
43                                          llvm::StringRef Prefix)
44   : InputFilePaths(InputPaths),
45     HeaderPrefix(Prefix),
46     HasModuleMap(false),
47     // Init clang stuff needed for loading the module map and preprocessing.
48     LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
49     DiagnosticOpts(new DiagnosticOptions()),
50     DC(llvm::errs(), DiagnosticOpts.get()),
51     Diagnostics(
52     new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)),
53     TargetOpts(new ModuleMapTargetOptions()),
54     Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)),
55     FileMgr(new FileManager(FileSystemOpts)),
56     SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)),
57     HeaderSearchOpts(new HeaderSearchOptions()),
58     HeaderInfo(new HeaderSearch(HeaderSearchOpts, *SourceMgr, *Diagnostics,
59     *LangOpts, Target.get())) {
60 }
61 
62 // Create instance of ModularizeUtilities, to simplify setting up
63 // subordinate objects.
64 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities(
65     std::vector<std::string> &InputPaths, llvm::StringRef Prefix) {
66 
67   return new ModularizeUtilities(InputPaths, Prefix);
68 }
69 
70 // Load all header lists and dependencies.
71 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() {
72   typedef std::vector<std::string>::iterator Iter;
73   // For each input file.
74   for (Iter I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) {
75     llvm::StringRef InputPath = *I;
76     // If it's a module map.
77     if (InputPath.endswith(".modulemap")) {
78       // Load the module map.
79       if (std::error_code EC = loadModuleMap(InputPath))
80         return EC;
81     }
82     else {
83       // Else we assume it's a header list and load it.
84       if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) {
85         errs() << "modularize: error: Unable to get header list '" << InputPath
86           << "': " << EC.message() << '\n';
87         return EC;
88       }
89     }
90   }
91   return std::error_code();
92 }
93 
94 // Do coverage checks.
95 // For each loaded module map, do header coverage check.
96 // Starting from the directory of the module.map file,
97 // Find all header files, optionally looking only at files
98 // covered by the include path options, and compare against
99 // the headers referenced by the module.map file.
100 // Display warnings for unaccounted-for header files.
101 // Returns 0 if there were no errors or warnings, 1 if there
102 // were warnings, 2 if any other problem, such as a bad
103 // module map path argument was specified.
104 std::error_code ModularizeUtilities::doCoverageCheck(
105     std::vector<std::string> &IncludePaths,
106     llvm::ArrayRef<std::string> CommandLine) {
107   int ModuleMapCount = ModuleMaps.size();
108   int ModuleMapIndex;
109   std::error_code EC;
110   for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) {
111     std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex];
112     CoverageChecker *Checker = CoverageChecker::createCoverageChecker(
113       InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine, ModMap.get());
114     std::error_code LocalEC = Checker->doChecks();
115     if (LocalEC.value() > 0)
116       EC = LocalEC;
117   }
118   return EC;
119 }
120 
121 // Load single header list and dependencies.
122 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies(
123     llvm::StringRef InputPath) {
124 
125   // By default, use the path component of the list file name.
126   SmallString<256> HeaderDirectory(InputPath);
127   llvm::sys::path::remove_filename(HeaderDirectory);
128   SmallString<256> CurrentDirectory;
129   llvm::sys::fs::current_path(CurrentDirectory);
130 
131   // Get the prefix if we have one.
132   if (HeaderPrefix.size() != 0)
133     HeaderDirectory = HeaderPrefix;
134 
135   // Read the header list file into a buffer.
136   ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
137     MemoryBuffer::getFile(InputPath);
138   if (std::error_code EC = listBuffer.getError())
139     return EC;
140 
141   // Parse the header list into strings.
142   SmallVector<StringRef, 32> Strings;
143   listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
144 
145   // Collect the header file names from the string list.
146   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
147     E = Strings.end();
148     I != E; ++I) {
149     StringRef Line = I->trim();
150     // Ignore comments and empty lines.
151     if (Line.empty() || (Line[0] == '#'))
152       continue;
153     std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':');
154     SmallString<256> HeaderFileName;
155     // Prepend header file name prefix if it's not absolute.
156     if (llvm::sys::path::is_absolute(TargetAndDependents.first))
157       llvm::sys::path::native(TargetAndDependents.first, HeaderFileName);
158     else {
159       if (HeaderDirectory.size() != 0)
160         HeaderFileName = HeaderDirectory;
161       else
162         HeaderFileName = CurrentDirectory;
163       llvm::sys::path::append(HeaderFileName, TargetAndDependents.first);
164       llvm::sys::path::native(HeaderFileName);
165     }
166     // Handle optional dependencies.
167     DependentsVector Dependents;
168     SmallVector<StringRef, 4> DependentsList;
169     TargetAndDependents.second.split(DependentsList, " ", -1, false);
170     int Count = DependentsList.size();
171     for (int Index = 0; Index < Count; ++Index) {
172       SmallString<256> Dependent;
173       if (llvm::sys::path::is_absolute(DependentsList[Index]))
174         Dependent = DependentsList[Index];
175       else {
176         if (HeaderDirectory.size() != 0)
177           Dependent = HeaderDirectory;
178         else
179           Dependent = CurrentDirectory;
180         llvm::sys::path::append(Dependent, DependentsList[Index]);
181       }
182       llvm::sys::path::native(Dependent);
183       Dependents.push_back(getCanonicalPath(Dependent.str()));
184     }
185     // Get canonical form.
186     HeaderFileName = getCanonicalPath(HeaderFileName);
187     // Save the resulting header file path and dependencies.
188     HeaderFileNames.push_back(HeaderFileName.str());
189     Dependencies[HeaderFileName.str()] = Dependents;
190   }
191   return std::error_code();
192 }
193 
194 // Load single module map and extract header file list.
195 std::error_code ModularizeUtilities::loadModuleMap(
196     llvm::StringRef InputPath) {
197   // Get file entry for module.modulemap file.
198   const FileEntry *ModuleMapEntry =
199     SourceMgr->getFileManager().getFile(InputPath);
200 
201   // return error if not found.
202   if (!ModuleMapEntry) {
203     llvm::errs() << "error: File \"" << InputPath << "\" not found.\n";
204     return std::error_code(1, std::generic_category());
205   }
206 
207   // Because the module map parser uses a ForwardingDiagnosticConsumer,
208   // which doesn't forward the BeginSourceFile call, we do it explicitly here.
209   DC.BeginSourceFile(*LangOpts, nullptr);
210 
211   // Figure out the home directory for the module map file.
212   const DirectoryEntry *Dir = ModuleMapEntry->getDir();
213   StringRef DirName(Dir->getName());
214   if (llvm::sys::path::filename(DirName) == "Modules") {
215     DirName = llvm::sys::path::parent_path(DirName);
216     if (DirName.endswith(".framework"))
217       Dir = FileMgr->getDirectory(DirName);
218     // FIXME: This assert can fail if there's a race between the above check
219     // and the removal of the directory.
220     assert(Dir && "parent must exist");
221   }
222 
223   std::unique_ptr<ModuleMap> ModMap;
224   ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts,
225     Target.get(), *HeaderInfo));
226 
227   // Parse module.modulemap file into module map.
228   if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) {
229     return std::error_code(1, std::generic_category());
230   }
231 
232   // Do matching end call.
233   DC.EndSourceFile();
234 
235   if (!collectModuleMapHeaders(ModMap.get()))
236     return std::error_code(1, std::generic_category());
237 
238   // Save module map.
239   ModuleMaps.push_back(std::move(ModMap));
240 
241   // Indicate we are using module maps.
242   HasModuleMap = true;
243 
244   return std::error_code();
245 }
246 
247 // Collect module map headers.
248 // Walks the modules and collects referenced headers into
249 // HeaderFileNames.
250 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) {
251   for (ModuleMap::module_iterator I = ModMap->module_begin(),
252     E = ModMap->module_end();
253     I != E; ++I) {
254     if (!collectModuleHeaders(*I->second))
255       return false;
256   }
257   return true;
258 }
259 
260 // Collect referenced headers from one module.
261 // Collects the headers referenced in the given module into
262 // HeaderFileNames.
263 bool ModularizeUtilities::collectModuleHeaders(const Module &Mod) {
264 
265   // Ignore explicit modules because they often have dependencies
266   // we can't know.
267   if (Mod.IsExplicit)
268     return true;
269 
270   // Treat headers in umbrella directory as dependencies.
271   DependentsVector UmbrellaDependents;
272 
273   // Recursively do submodules.
274   for (Module::submodule_const_iterator MI = Mod.submodule_begin(),
275       MIEnd = Mod.submodule_end();
276       MI != MIEnd; ++MI)
277     collectModuleHeaders(**MI);
278 
279   if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader()) {
280     std::string HeaderPath = getCanonicalPath(UmbrellaHeader->getName());
281     // Collect umbrella header.
282     HeaderFileNames.push_back(HeaderPath);
283 
284     // FUTURE: When needed, umbrella header header collection goes here.
285   }
286   else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir()) {
287     // If there normal headers, assume these are umbrellas and skip collection.
288     if (Mod.Headers->size() == 0) {
289       // Collect headers in umbrella directory.
290       if (!collectUmbrellaHeaders(UmbrellaDir->getName(), UmbrellaDependents))
291         return false;
292     }
293   }
294 
295   // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
296   // assuming they are marked as such either because of unsuitability for
297   // modules or because they are meant to be included by another header,
298   // and thus should be ignored by modularize.
299 
300   int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size();
301 
302   for (int Index = 0; Index < NormalHeaderCount; ++Index) {
303     DependentsVector NormalDependents;
304     // Collect normal header.
305     const clang::Module::Header &Header(
306       Mod.Headers[clang::Module::HK_Normal][Index]);
307     std::string HeaderPath = getCanonicalPath(Header.Entry->getName());
308     HeaderFileNames.push_back(HeaderPath);
309   }
310 
311   return true;
312 }
313 
314 // Collect headers from an umbrella directory.
315 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName,
316   DependentsVector &Dependents) {
317   // Initialize directory name.
318   SmallString<256> Directory(UmbrellaDirName);
319   // Walk the directory.
320   std::error_code EC;
321   llvm::sys::fs::file_status Status;
322   for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E;
323     I.increment(EC)) {
324     if (EC)
325       return false;
326     std::string File(I->path());
327     I->status(Status);
328     llvm::sys::fs::file_type Type = Status.type();
329     // If the file is a directory, ignore the name and recurse.
330     if (Type == llvm::sys::fs::file_type::directory_file) {
331       if (!collectUmbrellaHeaders(File, Dependents))
332         return false;
333       continue;
334     }
335     // If the file does not have a common header extension, ignore it.
336     if (!isHeader(File))
337       continue;
338     // Save header name.
339     std::string HeaderPath = getCanonicalPath(File);
340     Dependents.push_back(HeaderPath);
341   }
342   return true;
343 }
344 
345 std::string normalize(StringRef Path) {
346   SmallString<128> Buffer;
347   llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path),
348     E = llvm::sys::path::end(Path);
349   while (B != E) {
350     if (B->compare(".") == 0) {
351     }
352     else if (B->compare("..") == 0)
353       llvm::sys::path::remove_filename(Buffer);
354     else
355       llvm::sys::path::append(Buffer, *B);
356     ++B;
357   }
358   if (Path.endswith("/") || Path.endswith("\\"))
359     Buffer.append(1, Path.back());
360   return Buffer.c_str();
361 }
362 
363 // Convert header path to canonical form.
364 // The canonical form is basically just use forward slashes, and remove "./".
365 // \param FilePath The file path, relative to the module map directory.
366 // \returns The file path in canonical form.
367 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) {
368   std::string Tmp(normalize(FilePath));
369   std::replace(Tmp.begin(), Tmp.end(), '\\', '/');
370   StringRef Tmp2(Tmp);
371   if (Tmp2.startswith("./"))
372     Tmp = Tmp2.substr(2);
373   return Tmp;
374 }
375 
376 // Check for header file extension.
377 // If the file extension is .h, .inc, or missing, it's
378 // assumed to be a header.
379 // \param FileName The file name.  Must not be a directory.
380 // \returns true if it has a header extension or no extension.
381 bool ModularizeUtilities::isHeader(StringRef FileName) {
382   StringRef Extension = llvm::sys::path::extension(FileName);
383   if (Extension.size() == 0)
384     return false;
385   if (Extension.equals_lower(".h"))
386     return true;
387   if (Extension.equals_lower(".inc"))
388     return true;
389   return false;
390 }
391 
392 // Get directory path component from file path.
393 // \returns the component of the given path, which will be
394 // relative if the given path is relative, absolute if the
395 // given path is absolute, or "." if the path has no leading
396 // path component.
397 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) {
398   SmallString<256> Directory(Path);
399   sys::path::remove_filename(Directory);
400   if (Directory.size() == 0)
401     return ".";
402   return Directory.str();
403 }
404