1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class for loading and validating a module map or 11 // header list by checking that all headers in the corresponding directories 12 // are accounted for. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "clang/Basic/SourceManager.h" 17 #include "clang/Driver/Options.h" 18 #include "clang/Frontend/CompilerInstance.h" 19 #include "clang/Frontend/FrontendActions.h" 20 #include "CoverageChecker.h" 21 #include "llvm/ADT/SmallString.h" 22 #include "llvm/Support/FileUtilities.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include "llvm/Support/Path.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include "ModularizeUtilities.h" 27 28 using namespace clang; 29 using namespace llvm; 30 using namespace Modularize; 31 32 namespace { 33 // Subclass TargetOptions so we can construct it inline with 34 // the minimal option, the triple. 35 class ModuleMapTargetOptions : public clang::TargetOptions { 36 public: 37 ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); } 38 }; 39 } // namespace 40 41 // ModularizeUtilities class implementation. 42 43 // Constructor. 44 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths, 45 llvm::StringRef Prefix, 46 llvm::StringRef ProblemFilesListPath) 47 : InputFilePaths(InputPaths), 48 HeaderPrefix(Prefix), 49 ProblemFilesPath(ProblemFilesListPath), 50 HasModuleMap(false), 51 MissingHeaderCount(0), 52 // Init clang stuff needed for loading the module map and preprocessing. 53 LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()), 54 DiagnosticOpts(new DiagnosticOptions()), 55 DC(llvm::errs(), DiagnosticOpts.get()), 56 Diagnostics( 57 new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)), 58 TargetOpts(new ModuleMapTargetOptions()), 59 Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)), 60 FileMgr(new FileManager(FileSystemOpts)), 61 SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)), 62 HeaderSearchOpts(new HeaderSearchOptions()), 63 HeaderInfo(new HeaderSearch(HeaderSearchOpts, *SourceMgr, *Diagnostics, 64 *LangOpts, Target.get())) { 65 } 66 67 // Create instance of ModularizeUtilities, to simplify setting up 68 // subordinate objects. 69 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities( 70 std::vector<std::string> &InputPaths, llvm::StringRef Prefix, 71 llvm::StringRef ProblemFilesListPath) { 72 73 return new ModularizeUtilities(InputPaths, Prefix, ProblemFilesListPath); 74 } 75 76 // Load all header lists and dependencies. 77 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() { 78 // For each input file. 79 for (auto I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) { 80 llvm::StringRef InputPath = *I; 81 // If it's a module map. 82 if (InputPath.endswith(".modulemap")) { 83 // Load the module map. 84 if (std::error_code EC = loadModuleMap(InputPath)) 85 return EC; 86 } 87 else { 88 // Else we assume it's a header list and load it. 89 if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) { 90 errs() << "modularize: error: Unable to get header list '" << InputPath 91 << "': " << EC.message() << '\n'; 92 return EC; 93 } 94 } 95 } 96 // If we have a problem files list. 97 if (ProblemFilesPath.size() != 0) { 98 // Load problem files list. 99 if (std::error_code EC = loadProblemHeaderList(ProblemFilesPath)) { 100 errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath 101 << "': " << EC.message() << '\n'; 102 return EC; 103 } 104 } 105 return std::error_code(); 106 } 107 108 // Do coverage checks. 109 // For each loaded module map, do header coverage check. 110 // Starting from the directory of the module.map file, 111 // Find all header files, optionally looking only at files 112 // covered by the include path options, and compare against 113 // the headers referenced by the module.map file. 114 // Display warnings for unaccounted-for header files. 115 // Returns 0 if there were no errors or warnings, 1 if there 116 // were warnings, 2 if any other problem, such as a bad 117 // module map path argument was specified. 118 std::error_code ModularizeUtilities::doCoverageCheck( 119 std::vector<std::string> &IncludePaths, 120 llvm::ArrayRef<std::string> CommandLine) { 121 int ModuleMapCount = ModuleMaps.size(); 122 int ModuleMapIndex; 123 std::error_code EC; 124 for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) { 125 std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex]; 126 CoverageChecker *Checker = CoverageChecker::createCoverageChecker( 127 InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine, ModMap.get()); 128 std::error_code LocalEC = Checker->doChecks(); 129 if (LocalEC.value() > 0) 130 EC = LocalEC; 131 } 132 return EC; 133 } 134 135 // Load single header list and dependencies. 136 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies( 137 llvm::StringRef InputPath) { 138 139 // By default, use the path component of the list file name. 140 SmallString<256> HeaderDirectory(InputPath); 141 llvm::sys::path::remove_filename(HeaderDirectory); 142 SmallString<256> CurrentDirectory; 143 llvm::sys::fs::current_path(CurrentDirectory); 144 145 // Get the prefix if we have one. 146 if (HeaderPrefix.size() != 0) 147 HeaderDirectory = HeaderPrefix; 148 149 // Read the header list file into a buffer. 150 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer = 151 MemoryBuffer::getFile(InputPath); 152 if (std::error_code EC = listBuffer.getError()) 153 return EC; 154 155 // Parse the header list into strings. 156 SmallVector<StringRef, 32> Strings; 157 listBuffer.get()->getBuffer().split(Strings, "\n", -1, false); 158 159 // Collect the header file names from the string list. 160 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(), 161 E = Strings.end(); 162 I != E; ++I) { 163 StringRef Line = I->trim(); 164 // Ignore comments and empty lines. 165 if (Line.empty() || (Line[0] == '#')) 166 continue; 167 std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':'); 168 SmallString<256> HeaderFileName; 169 // Prepend header file name prefix if it's not absolute. 170 if (llvm::sys::path::is_absolute(TargetAndDependents.first)) 171 llvm::sys::path::native(TargetAndDependents.first, HeaderFileName); 172 else { 173 if (HeaderDirectory.size() != 0) 174 HeaderFileName = HeaderDirectory; 175 else 176 HeaderFileName = CurrentDirectory; 177 llvm::sys::path::append(HeaderFileName, TargetAndDependents.first); 178 llvm::sys::path::native(HeaderFileName); 179 } 180 // Handle optional dependencies. 181 DependentsVector Dependents; 182 SmallVector<StringRef, 4> DependentsList; 183 TargetAndDependents.second.split(DependentsList, " ", -1, false); 184 int Count = DependentsList.size(); 185 for (int Index = 0; Index < Count; ++Index) { 186 SmallString<256> Dependent; 187 if (llvm::sys::path::is_absolute(DependentsList[Index])) 188 Dependent = DependentsList[Index]; 189 else { 190 if (HeaderDirectory.size() != 0) 191 Dependent = HeaderDirectory; 192 else 193 Dependent = CurrentDirectory; 194 llvm::sys::path::append(Dependent, DependentsList[Index]); 195 } 196 llvm::sys::path::native(Dependent); 197 Dependents.push_back(getCanonicalPath(Dependent.str())); 198 } 199 // Get canonical form. 200 HeaderFileName = getCanonicalPath(HeaderFileName); 201 // Save the resulting header file path and dependencies. 202 HeaderFileNames.push_back(HeaderFileName.str()); 203 Dependencies[HeaderFileName.str()] = Dependents; 204 } 205 return std::error_code(); 206 } 207 208 // Load problem header list. 209 std::error_code ModularizeUtilities::loadProblemHeaderList( 210 llvm::StringRef InputPath) { 211 212 // By default, use the path component of the list file name. 213 SmallString<256> HeaderDirectory(InputPath); 214 llvm::sys::path::remove_filename(HeaderDirectory); 215 SmallString<256> CurrentDirectory; 216 llvm::sys::fs::current_path(CurrentDirectory); 217 218 // Get the prefix if we have one. 219 if (HeaderPrefix.size() != 0) 220 HeaderDirectory = HeaderPrefix; 221 222 // Read the header list file into a buffer. 223 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer = 224 MemoryBuffer::getFile(InputPath); 225 if (std::error_code EC = listBuffer.getError()) 226 return EC; 227 228 // Parse the header list into strings. 229 SmallVector<StringRef, 32> Strings; 230 listBuffer.get()->getBuffer().split(Strings, "\n", -1, false); 231 232 // Collect the header file names from the string list. 233 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(), 234 E = Strings.end(); 235 I != E; ++I) { 236 StringRef Line = I->trim(); 237 // Ignore comments and empty lines. 238 if (Line.empty() || (Line[0] == '#')) 239 continue; 240 SmallString<256> HeaderFileName; 241 // Prepend header file name prefix if it's not absolute. 242 if (llvm::sys::path::is_absolute(Line)) 243 llvm::sys::path::native(Line, HeaderFileName); 244 else { 245 if (HeaderDirectory.size() != 0) 246 HeaderFileName = HeaderDirectory; 247 else 248 HeaderFileName = CurrentDirectory; 249 llvm::sys::path::append(HeaderFileName, Line); 250 llvm::sys::path::native(HeaderFileName); 251 } 252 // Get canonical form. 253 HeaderFileName = getCanonicalPath(HeaderFileName); 254 // Save the resulting header file path. 255 ProblemFileNames.push_back(HeaderFileName.str()); 256 } 257 return std::error_code(); 258 } 259 260 // Load single module map and extract header file list. 261 std::error_code ModularizeUtilities::loadModuleMap( 262 llvm::StringRef InputPath) { 263 // Get file entry for module.modulemap file. 264 const FileEntry *ModuleMapEntry = 265 SourceMgr->getFileManager().getFile(InputPath); 266 267 // return error if not found. 268 if (!ModuleMapEntry) { 269 llvm::errs() << "error: File \"" << InputPath << "\" not found.\n"; 270 return std::error_code(1, std::generic_category()); 271 } 272 273 // Because the module map parser uses a ForwardingDiagnosticConsumer, 274 // which doesn't forward the BeginSourceFile call, we do it explicitly here. 275 DC.BeginSourceFile(*LangOpts, nullptr); 276 277 // Figure out the home directory for the module map file. 278 const DirectoryEntry *Dir = ModuleMapEntry->getDir(); 279 StringRef DirName(Dir->getName()); 280 if (llvm::sys::path::filename(DirName) == "Modules") { 281 DirName = llvm::sys::path::parent_path(DirName); 282 if (DirName.endswith(".framework")) 283 Dir = FileMgr->getDirectory(DirName); 284 // FIXME: This assert can fail if there's a race between the above check 285 // and the removal of the directory. 286 assert(Dir && "parent must exist"); 287 } 288 289 std::unique_ptr<ModuleMap> ModMap; 290 ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts, 291 Target.get(), *HeaderInfo)); 292 293 // Parse module.modulemap file into module map. 294 if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) { 295 return std::error_code(1, std::generic_category()); 296 } 297 298 // Do matching end call. 299 DC.EndSourceFile(); 300 301 // Reset missing header count. 302 MissingHeaderCount = 0; 303 304 if (!collectModuleMapHeaders(ModMap.get())) 305 return std::error_code(1, std::generic_category()); 306 307 // Save module map. 308 ModuleMaps.push_back(std::move(ModMap)); 309 310 // Indicate we are using module maps. 311 HasModuleMap = true; 312 313 // Return code of 1 for missing headers. 314 if (MissingHeaderCount) 315 return std::error_code(1, std::generic_category()); 316 317 return std::error_code(); 318 } 319 320 // Collect module map headers. 321 // Walks the modules and collects referenced headers into 322 // HeaderFileNames. 323 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) { 324 for (ModuleMap::module_iterator I = ModMap->module_begin(), 325 E = ModMap->module_end(); 326 I != E; ++I) { 327 if (!collectModuleHeaders(*I->second)) 328 return false; 329 } 330 return true; 331 } 332 333 // Collect referenced headers from one module. 334 // Collects the headers referenced in the given module into 335 // HeaderFileNames. 336 bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) { 337 338 // Ignore explicit modules because they often have dependencies 339 // we can't know. 340 if (Mod.IsExplicit) 341 return true; 342 343 // Treat headers in umbrella directory as dependencies. 344 DependentsVector UmbrellaDependents; 345 346 // Recursively do submodules. 347 for (auto MI = Mod.submodule_begin(), MIEnd = Mod.submodule_end(); 348 MI != MIEnd; ++MI) 349 collectModuleHeaders(**MI); 350 351 if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader().Entry) { 352 std::string HeaderPath = getCanonicalPath(UmbrellaHeader->getName()); 353 // Collect umbrella header. 354 HeaderFileNames.push_back(HeaderPath); 355 356 // FUTURE: When needed, umbrella header header collection goes here. 357 } 358 else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir().Entry) { 359 // If there normal headers, assume these are umbrellas and skip collection. 360 if (Mod.Headers->size() == 0) { 361 // Collect headers in umbrella directory. 362 if (!collectUmbrellaHeaders(UmbrellaDir->getName(), UmbrellaDependents)) 363 return false; 364 } 365 } 366 367 // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded, 368 // assuming they are marked as such either because of unsuitability for 369 // modules or because they are meant to be included by another header, 370 // and thus should be ignored by modularize. 371 372 int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size(); 373 374 for (int Index = 0; Index < NormalHeaderCount; ++Index) { 375 DependentsVector NormalDependents; 376 // Collect normal header. 377 const clang::Module::Header &Header( 378 Mod.Headers[clang::Module::HK_Normal][Index]); 379 std::string HeaderPath = getCanonicalPath(Header.Entry->getName()); 380 HeaderFileNames.push_back(HeaderPath); 381 } 382 383 int MissingCountThisModule = Mod.MissingHeaders.size(); 384 385 for (int Index = 0; Index < MissingCountThisModule; ++Index) { 386 std::string MissingFile = Mod.MissingHeaders[Index].FileName; 387 SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc; 388 errs() << Loc.printToString(*SourceMgr) 389 << ": error : Header not found: " << MissingFile << "\n"; 390 } 391 392 MissingHeaderCount += MissingCountThisModule; 393 394 return true; 395 } 396 397 // Collect headers from an umbrella directory. 398 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName, 399 DependentsVector &Dependents) { 400 // Initialize directory name. 401 SmallString<256> Directory(UmbrellaDirName); 402 // Walk the directory. 403 std::error_code EC; 404 llvm::sys::fs::file_status Status; 405 for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E; 406 I.increment(EC)) { 407 if (EC) 408 return false; 409 std::string File(I->path()); 410 I->status(Status); 411 llvm::sys::fs::file_type Type = Status.type(); 412 // If the file is a directory, ignore the name and recurse. 413 if (Type == llvm::sys::fs::file_type::directory_file) { 414 if (!collectUmbrellaHeaders(File, Dependents)) 415 return false; 416 continue; 417 } 418 // If the file does not have a common header extension, ignore it. 419 if (!isHeader(File)) 420 continue; 421 // Save header name. 422 std::string HeaderPath = getCanonicalPath(File); 423 Dependents.push_back(HeaderPath); 424 } 425 return true; 426 } 427 428 // Replace .. embedded in path for purposes of having 429 // a canonical path. 430 static std::string replaceDotDot(StringRef Path) { 431 SmallString<128> Buffer; 432 llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path), 433 E = llvm::sys::path::end(Path); 434 while (B != E) { 435 if (B->compare(".") == 0) { 436 } 437 else if (B->compare("..") == 0) 438 llvm::sys::path::remove_filename(Buffer); 439 else 440 llvm::sys::path::append(Buffer, *B); 441 ++B; 442 } 443 if (Path.endswith("/") || Path.endswith("\\")) 444 Buffer.append(1, Path.back()); 445 return Buffer.c_str(); 446 } 447 448 // Convert header path to canonical form. 449 // The canonical form is basically just use forward slashes, and remove "./". 450 // \param FilePath The file path, relative to the module map directory. 451 // \returns The file path in canonical form. 452 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) { 453 std::string Tmp(replaceDotDot(FilePath)); 454 std::replace(Tmp.begin(), Tmp.end(), '\\', '/'); 455 StringRef Tmp2(Tmp); 456 if (Tmp2.startswith("./")) 457 Tmp = Tmp2.substr(2); 458 return Tmp; 459 } 460 461 // Check for header file extension. 462 // If the file extension is .h, .inc, or missing, it's 463 // assumed to be a header. 464 // \param FileName The file name. Must not be a directory. 465 // \returns true if it has a header extension or no extension. 466 bool ModularizeUtilities::isHeader(StringRef FileName) { 467 StringRef Extension = llvm::sys::path::extension(FileName); 468 if (Extension.size() == 0) 469 return true; 470 if (Extension.equals_lower(".h")) 471 return true; 472 if (Extension.equals_lower(".inc")) 473 return true; 474 return false; 475 } 476 477 // Get directory path component from file path. 478 // \returns the component of the given path, which will be 479 // relative if the given path is relative, absolute if the 480 // given path is absolute, or "." if the path has no leading 481 // path component. 482 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) { 483 SmallString<256> Directory(Path); 484 sys::path::remove_filename(Directory); 485 if (Directory.size() == 0) 486 return "."; 487 return Directory.str(); 488 } 489 490 // Add unique problem file. 491 // Also standardizes the path. 492 void ModularizeUtilities::addUniqueProblemFile(std::string FilePath) { 493 FilePath = getCanonicalPath(FilePath); 494 // Don't add if already present. 495 for(auto &TestFilePath : ProblemFileNames) { 496 if (TestFilePath == FilePath) 497 return; 498 } 499 ProblemFileNames.push_back(FilePath); 500 } 501 502 // Add file with no compile errors. 503 // Also standardizes the path. 504 void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath) { 505 FilePath = getCanonicalPath(FilePath); 506 GoodFileNames.push_back(FilePath); 507 } 508 509 // List problem files. 510 void ModularizeUtilities::displayProblemFiles() { 511 errs() << "\nThese are the files with possible errors:\n\n"; 512 for (auto &ProblemFile : ProblemFileNames) { 513 errs() << ProblemFile << "\n"; 514 } 515 } 516 517 // List files with no problems. 518 void ModularizeUtilities::displayGoodFiles() { 519 errs() << "\nThese are the files with no detected errors:\n\n"; 520 for (auto &GoodFile : HeaderFileNames) { 521 bool Good = true; 522 for (auto &ProblemFile : ProblemFileNames) { 523 if (ProblemFile == GoodFile) { 524 Good = false; 525 break; 526 } 527 } 528 if (Good) 529 errs() << GoodFile << "\n"; 530 } 531 } 532 533 // List files with problem files commented out. 534 void ModularizeUtilities::displayCombinedFiles() { 535 errs() << 536 "\nThese are the combined files, with problem files preceded by #:\n\n"; 537 for (auto &File : HeaderFileNames) { 538 bool Good = true; 539 for (auto &ProblemFile : ProblemFileNames) { 540 if (ProblemFile == File) { 541 Good = false; 542 break; 543 } 544 } 545 errs() << (Good ? "" : "#") << File << "\n"; 546 } 547 } 548