1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class for loading and validating a module map or 11 // header list by checking that all headers in the corresponding directories 12 // are accounted for. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "clang/Basic/SourceManager.h" 17 #include "clang/Driver/Options.h" 18 #include "clang/Frontend/CompilerInstance.h" 19 #include "clang/Frontend/FrontendActions.h" 20 #include "CoverageChecker.h" 21 #include "llvm/ADT/SmallString.h" 22 #include "llvm/Support/FileUtilities.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include "llvm/Support/Path.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include "ModularizeUtilities.h" 27 28 using namespace clang; 29 using namespace llvm; 30 using namespace Modularize; 31 32 namespace { 33 // Subclass TargetOptions so we can construct it inline with 34 // the minimal option, the triple. 35 class ModuleMapTargetOptions : public clang::TargetOptions { 36 public: 37 ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); } 38 }; 39 } // namespace 40 41 // ModularizeUtilities class implementation. 42 43 // Constructor. 44 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths, 45 llvm::StringRef Prefix) 46 : InputFilePaths(InputPaths), 47 HeaderPrefix(Prefix), 48 HasModuleMap(false), 49 // Init clang stuff needed for loading the module map and preprocessing. 50 LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()), 51 DiagnosticOpts(new DiagnosticOptions()), 52 DC(llvm::errs(), DiagnosticOpts.get()), 53 Diagnostics( 54 new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)), 55 TargetOpts(new ModuleMapTargetOptions()), 56 Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)), 57 FileMgr(new FileManager(FileSystemOpts)), 58 SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)), 59 HeaderSearchOpts(new HeaderSearchOptions()), 60 HeaderInfo(new HeaderSearch(HeaderSearchOpts, *SourceMgr, *Diagnostics, 61 *LangOpts, Target.get())) { 62 } 63 64 // Create instance of ModularizeUtilities, to simplify setting up 65 // subordinate objects. 66 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities( 67 std::vector<std::string> &InputPaths, llvm::StringRef Prefix) { 68 69 return new ModularizeUtilities(InputPaths, Prefix); 70 } 71 72 // Load all header lists and dependencies. 73 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() { 74 typedef std::vector<std::string>::iterator Iter; 75 // For each input file. 76 for (Iter I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) { 77 llvm::StringRef InputPath = *I; 78 // If it's a module map. 79 if (InputPath.endswith(".modulemap")) { 80 // Load the module map. 81 if (std::error_code EC = loadModuleMap(InputPath)) 82 return EC; 83 } 84 else { 85 // Else we assume it's a header list and load it. 86 if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) { 87 errs() << "modularize: error: Unable to get header list '" << InputPath 88 << "': " << EC.message() << '\n'; 89 return EC; 90 } 91 } 92 } 93 return std::error_code(); 94 } 95 96 // Do coverage checks. 97 // For each loaded module map, do header coverage check. 98 // Starting from the directory of the module.map file, 99 // Find all header files, optionally looking only at files 100 // covered by the include path options, and compare against 101 // the headers referenced by the module.map file. 102 // Display warnings for unaccounted-for header files. 103 // Returns 0 if there were no errors or warnings, 1 if there 104 // were warnings, 2 if any other problem, such as a bad 105 // module map path argument was specified. 106 std::error_code ModularizeUtilities::doCoverageCheck( 107 std::vector<std::string> &IncludePaths, 108 llvm::ArrayRef<std::string> CommandLine) { 109 int ModuleMapCount = ModuleMaps.size(); 110 int ModuleMapIndex; 111 std::error_code EC; 112 for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) { 113 std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex]; 114 CoverageChecker *Checker = CoverageChecker::createCoverageChecker( 115 InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine, ModMap.get()); 116 std::error_code LocalEC = Checker->doChecks(); 117 if (LocalEC.value() > 0) 118 EC = LocalEC; 119 } 120 return EC; 121 } 122 123 // Load single header list and dependencies. 124 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies( 125 llvm::StringRef InputPath) { 126 127 // By default, use the path component of the list file name. 128 SmallString<256> HeaderDirectory(InputPath); 129 llvm::sys::path::remove_filename(HeaderDirectory); 130 SmallString<256> CurrentDirectory; 131 llvm::sys::fs::current_path(CurrentDirectory); 132 133 // Get the prefix if we have one. 134 if (HeaderPrefix.size() != 0) 135 HeaderDirectory = HeaderPrefix; 136 137 // Read the header list file into a buffer. 138 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer = 139 MemoryBuffer::getFile(InputPath); 140 if (std::error_code EC = listBuffer.getError()) 141 return EC; 142 143 // Parse the header list into strings. 144 SmallVector<StringRef, 32> Strings; 145 listBuffer.get()->getBuffer().split(Strings, "\n", -1, false); 146 147 // Collect the header file names from the string list. 148 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(), 149 E = Strings.end(); 150 I != E; ++I) { 151 StringRef Line = I->trim(); 152 // Ignore comments and empty lines. 153 if (Line.empty() || (Line[0] == '#')) 154 continue; 155 std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':'); 156 SmallString<256> HeaderFileName; 157 // Prepend header file name prefix if it's not absolute. 158 if (llvm::sys::path::is_absolute(TargetAndDependents.first)) 159 llvm::sys::path::native(TargetAndDependents.first, HeaderFileName); 160 else { 161 if (HeaderDirectory.size() != 0) 162 HeaderFileName = HeaderDirectory; 163 else 164 HeaderFileName = CurrentDirectory; 165 llvm::sys::path::append(HeaderFileName, TargetAndDependents.first); 166 llvm::sys::path::native(HeaderFileName); 167 } 168 // Handle optional dependencies. 169 DependentsVector Dependents; 170 SmallVector<StringRef, 4> DependentsList; 171 TargetAndDependents.second.split(DependentsList, " ", -1, false); 172 int Count = DependentsList.size(); 173 for (int Index = 0; Index < Count; ++Index) { 174 SmallString<256> Dependent; 175 if (llvm::sys::path::is_absolute(DependentsList[Index])) 176 Dependent = DependentsList[Index]; 177 else { 178 if (HeaderDirectory.size() != 0) 179 Dependent = HeaderDirectory; 180 else 181 Dependent = CurrentDirectory; 182 llvm::sys::path::append(Dependent, DependentsList[Index]); 183 } 184 llvm::sys::path::native(Dependent); 185 Dependents.push_back(getCanonicalPath(Dependent.str())); 186 } 187 // Get canonical form. 188 HeaderFileName = getCanonicalPath(HeaderFileName); 189 // Save the resulting header file path and dependencies. 190 HeaderFileNames.push_back(HeaderFileName.str()); 191 Dependencies[HeaderFileName.str()] = Dependents; 192 } 193 return std::error_code(); 194 } 195 196 // Load single module map and extract header file list. 197 std::error_code ModularizeUtilities::loadModuleMap( 198 llvm::StringRef InputPath) { 199 // Get file entry for module.modulemap file. 200 const FileEntry *ModuleMapEntry = 201 SourceMgr->getFileManager().getFile(InputPath); 202 203 // return error if not found. 204 if (!ModuleMapEntry) { 205 llvm::errs() << "error: File \"" << InputPath << "\" not found.\n"; 206 return std::error_code(1, std::generic_category()); 207 } 208 209 // Because the module map parser uses a ForwardingDiagnosticConsumer, 210 // which doesn't forward the BeginSourceFile call, we do it explicitly here. 211 DC.BeginSourceFile(*LangOpts, nullptr); 212 213 // Figure out the home directory for the module map file. 214 const DirectoryEntry *Dir = ModuleMapEntry->getDir(); 215 StringRef DirName(Dir->getName()); 216 if (llvm::sys::path::filename(DirName) == "Modules") { 217 DirName = llvm::sys::path::parent_path(DirName); 218 if (DirName.endswith(".framework")) 219 Dir = FileMgr->getDirectory(DirName); 220 // FIXME: This assert can fail if there's a race between the above check 221 // and the removal of the directory. 222 assert(Dir && "parent must exist"); 223 } 224 225 std::unique_ptr<ModuleMap> ModMap; 226 ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts, 227 Target.get(), *HeaderInfo)); 228 229 // Parse module.modulemap file into module map. 230 if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) { 231 return std::error_code(1, std::generic_category()); 232 } 233 234 // Do matching end call. 235 DC.EndSourceFile(); 236 237 if (!collectModuleMapHeaders(ModMap.get())) 238 return std::error_code(1, std::generic_category()); 239 240 // Save module map. 241 ModuleMaps.push_back(std::move(ModMap)); 242 243 // Indicate we are using module maps. 244 HasModuleMap = true; 245 246 return std::error_code(); 247 } 248 249 // Collect module map headers. 250 // Walks the modules and collects referenced headers into 251 // HeaderFileNames. 252 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) { 253 for (ModuleMap::module_iterator I = ModMap->module_begin(), 254 E = ModMap->module_end(); 255 I != E; ++I) { 256 if (!collectModuleHeaders(*I->second)) 257 return false; 258 } 259 return true; 260 } 261 262 // Collect referenced headers from one module. 263 // Collects the headers referenced in the given module into 264 // HeaderFileNames. 265 bool ModularizeUtilities::collectModuleHeaders(const Module &Mod) { 266 267 // Ignore explicit modules because they often have dependencies 268 // we can't know. 269 if (Mod.IsExplicit) 270 return true; 271 272 // Treat headers in umbrella directory as dependencies. 273 DependentsVector UmbrellaDependents; 274 275 // Recursively do submodules. 276 for (Module::submodule_const_iterator MI = Mod.submodule_begin(), 277 MIEnd = Mod.submodule_end(); 278 MI != MIEnd; ++MI) 279 collectModuleHeaders(**MI); 280 281 if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader()) { 282 std::string HeaderPath = getCanonicalPath(UmbrellaHeader->getName()); 283 // Collect umbrella header. 284 HeaderFileNames.push_back(HeaderPath); 285 286 // FUTURE: When needed, umbrella header header collection goes here. 287 } 288 else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir()) { 289 // If there normal headers, assume these are umbrellas and skip collection. 290 if (Mod.Headers->size() == 0) { 291 // Collect headers in umbrella directory. 292 if (!collectUmbrellaHeaders(UmbrellaDir->getName(), UmbrellaDependents)) 293 return false; 294 } 295 } 296 297 // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded, 298 // assuming they are marked as such either because of unsuitability for 299 // modules or because they are meant to be included by another header, 300 // and thus should be ignored by modularize. 301 302 int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size(); 303 304 for (int Index = 0; Index < NormalHeaderCount; ++Index) { 305 DependentsVector NormalDependents; 306 // Collect normal header. 307 const clang::Module::Header &Header( 308 Mod.Headers[clang::Module::HK_Normal][Index]); 309 std::string HeaderPath = getCanonicalPath(Header.Entry->getName()); 310 HeaderFileNames.push_back(HeaderPath); 311 } 312 313 return true; 314 } 315 316 // Collect headers from an umbrella directory. 317 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName, 318 DependentsVector &Dependents) { 319 // Initialize directory name. 320 SmallString<256> Directory(UmbrellaDirName); 321 // Walk the directory. 322 std::error_code EC; 323 llvm::sys::fs::file_status Status; 324 for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E; 325 I.increment(EC)) { 326 if (EC) 327 return false; 328 std::string File(I->path()); 329 I->status(Status); 330 llvm::sys::fs::file_type Type = Status.type(); 331 // If the file is a directory, ignore the name and recurse. 332 if (Type == llvm::sys::fs::file_type::directory_file) { 333 if (!collectUmbrellaHeaders(File, Dependents)) 334 return false; 335 continue; 336 } 337 // If the file does not have a common header extension, ignore it. 338 if (!isHeader(File)) 339 continue; 340 // Save header name. 341 std::string HeaderPath = getCanonicalPath(File); 342 Dependents.push_back(HeaderPath); 343 } 344 return true; 345 } 346 347 // Replace .. embedded in path for purposes of having 348 // a canonical path. 349 static std::string replaceDotDot(StringRef Path) { 350 SmallString<128> Buffer; 351 llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path), 352 E = llvm::sys::path::end(Path); 353 while (B != E) { 354 if (B->compare(".") == 0) { 355 } 356 else if (B->compare("..") == 0) 357 llvm::sys::path::remove_filename(Buffer); 358 else 359 llvm::sys::path::append(Buffer, *B); 360 ++B; 361 } 362 if (Path.endswith("/") || Path.endswith("\\")) 363 Buffer.append(1, Path.back()); 364 return Buffer.c_str(); 365 } 366 367 // Convert header path to canonical form. 368 // The canonical form is basically just use forward slashes, and remove "./". 369 // \param FilePath The file path, relative to the module map directory. 370 // \returns The file path in canonical form. 371 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) { 372 std::string Tmp(replaceDotDot(FilePath)); 373 std::replace(Tmp.begin(), Tmp.end(), '\\', '/'); 374 StringRef Tmp2(Tmp); 375 if (Tmp2.startswith("./")) 376 Tmp = Tmp2.substr(2); 377 return Tmp; 378 } 379 380 // Check for header file extension. 381 // If the file extension is .h, .inc, or missing, it's 382 // assumed to be a header. 383 // \param FileName The file name. Must not be a directory. 384 // \returns true if it has a header extension or no extension. 385 bool ModularizeUtilities::isHeader(StringRef FileName) { 386 StringRef Extension = llvm::sys::path::extension(FileName); 387 if (Extension.size() == 0) 388 return false; 389 if (Extension.equals_lower(".h")) 390 return true; 391 if (Extension.equals_lower(".inc")) 392 return true; 393 return false; 394 } 395 396 // Get directory path component from file path. 397 // \returns the component of the given path, which will be 398 // relative if the given path is relative, absolute if the 399 // given path is absolute, or "." if the path has no leading 400 // path component. 401 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) { 402 SmallString<256> Directory(Path); 403 sys::path::remove_filename(Directory); 404 if (Directory.size() == 0) 405 return "."; 406 return Directory.str(); 407 } 408