1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class for loading and validating a module map or 11 // header list by checking that all headers in the corresponding directories 12 // are accounted for. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "clang/Basic/SourceManager.h" 17 #include "clang/Driver/Options.h" 18 #include "clang/Frontend/CompilerInstance.h" 19 #include "clang/Frontend/FrontendActions.h" 20 #include "CoverageChecker.h" 21 #include "llvm/ADT/SmallString.h" 22 #include "llvm/Support/FileUtilities.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include "llvm/Support/Path.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include "ModularizeUtilities.h" 27 28 using namespace clang; 29 using namespace llvm; 30 using namespace Modularize; 31 32 namespace { 33 // Subclass TargetOptions so we can construct it inline with 34 // the minimal option, the triple. 35 class ModuleMapTargetOptions : public clang::TargetOptions { 36 public: 37 ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); } 38 }; 39 } // namespace 40 41 // ModularizeUtilities class implementation. 42 43 // Constructor. 44 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths, 45 llvm::StringRef Prefix, 46 llvm::StringRef ProblemFilesListPath) 47 : InputFilePaths(InputPaths), HeaderPrefix(Prefix), 48 ProblemFilesPath(ProblemFilesListPath), HasModuleMap(false), 49 MissingHeaderCount(0), 50 // Init clang stuff needed for loading the module map and preprocessing. 51 LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()), 52 DiagnosticOpts(new DiagnosticOptions()), 53 DC(llvm::errs(), DiagnosticOpts.get()), 54 Diagnostics( 55 new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)), 56 TargetOpts(new ModuleMapTargetOptions()), 57 Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)), 58 FileMgr(new FileManager(FileSystemOpts)), 59 SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)), 60 HeaderInfo(new HeaderSearch(std::make_shared<HeaderSearchOptions>(), 61 *SourceMgr, *Diagnostics, *LangOpts, 62 Target.get())) {} 63 64 // Create instance of ModularizeUtilities, to simplify setting up 65 // subordinate objects. 66 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities( 67 std::vector<std::string> &InputPaths, llvm::StringRef Prefix, 68 llvm::StringRef ProblemFilesListPath) { 69 70 return new ModularizeUtilities(InputPaths, Prefix, ProblemFilesListPath); 71 } 72 73 // Load all header lists and dependencies. 74 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() { 75 // For each input file. 76 for (auto I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) { 77 llvm::StringRef InputPath = *I; 78 // If it's a module map. 79 if (InputPath.endswith(".modulemap")) { 80 // Load the module map. 81 if (std::error_code EC = loadModuleMap(InputPath)) 82 return EC; 83 } 84 else { 85 // Else we assume it's a header list and load it. 86 if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) { 87 errs() << "modularize: error: Unable to get header list '" << InputPath 88 << "': " << EC.message() << '\n'; 89 return EC; 90 } 91 } 92 } 93 // If we have a problem files list. 94 if (ProblemFilesPath.size() != 0) { 95 // Load problem files list. 96 if (std::error_code EC = loadProblemHeaderList(ProblemFilesPath)) { 97 errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath 98 << "': " << EC.message() << '\n'; 99 return EC; 100 } 101 } 102 return std::error_code(); 103 } 104 105 // Do coverage checks. 106 // For each loaded module map, do header coverage check. 107 // Starting from the directory of the module.map file, 108 // Find all header files, optionally looking only at files 109 // covered by the include path options, and compare against 110 // the headers referenced by the module.map file. 111 // Display warnings for unaccounted-for header files. 112 // Returns 0 if there were no errors or warnings, 1 if there 113 // were warnings, 2 if any other problem, such as a bad 114 // module map path argument was specified. 115 std::error_code ModularizeUtilities::doCoverageCheck( 116 std::vector<std::string> &IncludePaths, 117 llvm::ArrayRef<std::string> CommandLine) { 118 int ModuleMapCount = ModuleMaps.size(); 119 int ModuleMapIndex; 120 std::error_code EC; 121 for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) { 122 std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex]; 123 auto Checker = CoverageChecker::createCoverageChecker( 124 InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine, 125 ModMap.get()); 126 std::error_code LocalEC = Checker->doChecks(); 127 if (LocalEC.value() > 0) 128 EC = LocalEC; 129 } 130 return EC; 131 } 132 133 // Load single header list and dependencies. 134 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies( 135 llvm::StringRef InputPath) { 136 137 // By default, use the path component of the list file name. 138 SmallString<256> HeaderDirectory(InputPath); 139 llvm::sys::path::remove_filename(HeaderDirectory); 140 SmallString<256> CurrentDirectory; 141 llvm::sys::fs::current_path(CurrentDirectory); 142 143 // Get the prefix if we have one. 144 if (HeaderPrefix.size() != 0) 145 HeaderDirectory = HeaderPrefix; 146 147 // Read the header list file into a buffer. 148 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer = 149 MemoryBuffer::getFile(InputPath); 150 if (std::error_code EC = listBuffer.getError()) 151 return EC; 152 153 // Parse the header list into strings. 154 SmallVector<StringRef, 32> Strings; 155 listBuffer.get()->getBuffer().split(Strings, "\n", -1, false); 156 157 // Collect the header file names from the string list. 158 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(), 159 E = Strings.end(); 160 I != E; ++I) { 161 StringRef Line = I->trim(); 162 // Ignore comments and empty lines. 163 if (Line.empty() || (Line[0] == '#')) 164 continue; 165 std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':'); 166 SmallString<256> HeaderFileName; 167 // Prepend header file name prefix if it's not absolute. 168 if (llvm::sys::path::is_absolute(TargetAndDependents.first)) 169 llvm::sys::path::native(TargetAndDependents.first, HeaderFileName); 170 else { 171 if (HeaderDirectory.size() != 0) 172 HeaderFileName = HeaderDirectory; 173 else 174 HeaderFileName = CurrentDirectory; 175 llvm::sys::path::append(HeaderFileName, TargetAndDependents.first); 176 llvm::sys::path::native(HeaderFileName); 177 } 178 // Handle optional dependencies. 179 DependentsVector Dependents; 180 SmallVector<StringRef, 4> DependentsList; 181 TargetAndDependents.second.split(DependentsList, " ", -1, false); 182 int Count = DependentsList.size(); 183 for (int Index = 0; Index < Count; ++Index) { 184 SmallString<256> Dependent; 185 if (llvm::sys::path::is_absolute(DependentsList[Index])) 186 Dependent = DependentsList[Index]; 187 else { 188 if (HeaderDirectory.size() != 0) 189 Dependent = HeaderDirectory; 190 else 191 Dependent = CurrentDirectory; 192 llvm::sys::path::append(Dependent, DependentsList[Index]); 193 } 194 llvm::sys::path::native(Dependent); 195 Dependents.push_back(getCanonicalPath(Dependent.str())); 196 } 197 // Get canonical form. 198 HeaderFileName = getCanonicalPath(HeaderFileName); 199 // Save the resulting header file path and dependencies. 200 HeaderFileNames.push_back(HeaderFileName.str()); 201 Dependencies[HeaderFileName.str()] = Dependents; 202 } 203 return std::error_code(); 204 } 205 206 // Load problem header list. 207 std::error_code ModularizeUtilities::loadProblemHeaderList( 208 llvm::StringRef InputPath) { 209 210 // By default, use the path component of the list file name. 211 SmallString<256> HeaderDirectory(InputPath); 212 llvm::sys::path::remove_filename(HeaderDirectory); 213 SmallString<256> CurrentDirectory; 214 llvm::sys::fs::current_path(CurrentDirectory); 215 216 // Get the prefix if we have one. 217 if (HeaderPrefix.size() != 0) 218 HeaderDirectory = HeaderPrefix; 219 220 // Read the header list file into a buffer. 221 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer = 222 MemoryBuffer::getFile(InputPath); 223 if (std::error_code EC = listBuffer.getError()) 224 return EC; 225 226 // Parse the header list into strings. 227 SmallVector<StringRef, 32> Strings; 228 listBuffer.get()->getBuffer().split(Strings, "\n", -1, false); 229 230 // Collect the header file names from the string list. 231 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(), 232 E = Strings.end(); 233 I != E; ++I) { 234 StringRef Line = I->trim(); 235 // Ignore comments and empty lines. 236 if (Line.empty() || (Line[0] == '#')) 237 continue; 238 SmallString<256> HeaderFileName; 239 // Prepend header file name prefix if it's not absolute. 240 if (llvm::sys::path::is_absolute(Line)) 241 llvm::sys::path::native(Line, HeaderFileName); 242 else { 243 if (HeaderDirectory.size() != 0) 244 HeaderFileName = HeaderDirectory; 245 else 246 HeaderFileName = CurrentDirectory; 247 llvm::sys::path::append(HeaderFileName, Line); 248 llvm::sys::path::native(HeaderFileName); 249 } 250 // Get canonical form. 251 HeaderFileName = getCanonicalPath(HeaderFileName); 252 // Save the resulting header file path. 253 ProblemFileNames.push_back(HeaderFileName.str()); 254 } 255 return std::error_code(); 256 } 257 258 // Load single module map and extract header file list. 259 std::error_code ModularizeUtilities::loadModuleMap( 260 llvm::StringRef InputPath) { 261 // Get file entry for module.modulemap file. 262 const FileEntry *ModuleMapEntry = 263 SourceMgr->getFileManager().getFile(InputPath); 264 265 // return error if not found. 266 if (!ModuleMapEntry) { 267 llvm::errs() << "error: File \"" << InputPath << "\" not found.\n"; 268 return std::error_code(1, std::generic_category()); 269 } 270 271 // Because the module map parser uses a ForwardingDiagnosticConsumer, 272 // which doesn't forward the BeginSourceFile call, we do it explicitly here. 273 DC.BeginSourceFile(*LangOpts, nullptr); 274 275 // Figure out the home directory for the module map file. 276 const DirectoryEntry *Dir = ModuleMapEntry->getDir(); 277 StringRef DirName(Dir->getName()); 278 if (llvm::sys::path::filename(DirName) == "Modules") { 279 DirName = llvm::sys::path::parent_path(DirName); 280 if (DirName.endswith(".framework")) 281 Dir = FileMgr->getDirectory(DirName); 282 // FIXME: This assert can fail if there's a race between the above check 283 // and the removal of the directory. 284 assert(Dir && "parent must exist"); 285 } 286 287 std::unique_ptr<ModuleMap> ModMap; 288 ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts, 289 Target.get(), *HeaderInfo)); 290 291 // Parse module.modulemap file into module map. 292 if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) { 293 return std::error_code(1, std::generic_category()); 294 } 295 296 // Do matching end call. 297 DC.EndSourceFile(); 298 299 // Reset missing header count. 300 MissingHeaderCount = 0; 301 302 if (!collectModuleMapHeaders(ModMap.get())) 303 return std::error_code(1, std::generic_category()); 304 305 // Save module map. 306 ModuleMaps.push_back(std::move(ModMap)); 307 308 // Indicate we are using module maps. 309 HasModuleMap = true; 310 311 // Return code of 1 for missing headers. 312 if (MissingHeaderCount) 313 return std::error_code(1, std::generic_category()); 314 315 return std::error_code(); 316 } 317 318 // Collect module map headers. 319 // Walks the modules and collects referenced headers into 320 // HeaderFileNames. 321 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) { 322 for (ModuleMap::module_iterator I = ModMap->module_begin(), 323 E = ModMap->module_end(); 324 I != E; ++I) { 325 if (!collectModuleHeaders(*I->second)) 326 return false; 327 } 328 return true; 329 } 330 331 // Collect referenced headers from one module. 332 // Collects the headers referenced in the given module into 333 // HeaderFileNames. 334 bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) { 335 336 // Ignore explicit modules because they often have dependencies 337 // we can't know. 338 if (Mod.IsExplicit) 339 return true; 340 341 // Treat headers in umbrella directory as dependencies. 342 DependentsVector UmbrellaDependents; 343 344 // Recursively do submodules. 345 for (auto MI = Mod.submodule_begin(), MIEnd = Mod.submodule_end(); 346 MI != MIEnd; ++MI) 347 collectModuleHeaders(**MI); 348 349 if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader().Entry) { 350 std::string HeaderPath = getCanonicalPath(UmbrellaHeader->getName()); 351 // Collect umbrella header. 352 HeaderFileNames.push_back(HeaderPath); 353 354 // FUTURE: When needed, umbrella header header collection goes here. 355 } 356 else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir().Entry) { 357 // If there normal headers, assume these are umbrellas and skip collection. 358 if (Mod.Headers->size() == 0) { 359 // Collect headers in umbrella directory. 360 if (!collectUmbrellaHeaders(UmbrellaDir->getName(), UmbrellaDependents)) 361 return false; 362 } 363 } 364 365 // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded, 366 // assuming they are marked as such either because of unsuitability for 367 // modules or because they are meant to be included by another header, 368 // and thus should be ignored by modularize. 369 370 int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size(); 371 372 for (int Index = 0; Index < NormalHeaderCount; ++Index) { 373 DependentsVector NormalDependents; 374 // Collect normal header. 375 const clang::Module::Header &Header( 376 Mod.Headers[clang::Module::HK_Normal][Index]); 377 std::string HeaderPath = getCanonicalPath(Header.Entry->getName()); 378 HeaderFileNames.push_back(HeaderPath); 379 } 380 381 int MissingCountThisModule = Mod.MissingHeaders.size(); 382 383 for (int Index = 0; Index < MissingCountThisModule; ++Index) { 384 std::string MissingFile = Mod.MissingHeaders[Index].FileName; 385 SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc; 386 errs() << Loc.printToString(*SourceMgr) 387 << ": error : Header not found: " << MissingFile << "\n"; 388 } 389 390 MissingHeaderCount += MissingCountThisModule; 391 392 return true; 393 } 394 395 // Collect headers from an umbrella directory. 396 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName, 397 DependentsVector &Dependents) { 398 // Initialize directory name. 399 SmallString<256> Directory(UmbrellaDirName); 400 // Walk the directory. 401 std::error_code EC; 402 llvm::sys::fs::file_status Status; 403 for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E; 404 I.increment(EC)) { 405 if (EC) 406 return false; 407 std::string File(I->path()); 408 I->status(Status); 409 llvm::sys::fs::file_type Type = Status.type(); 410 // If the file is a directory, ignore the name and recurse. 411 if (Type == llvm::sys::fs::file_type::directory_file) { 412 if (!collectUmbrellaHeaders(File, Dependents)) 413 return false; 414 continue; 415 } 416 // If the file does not have a common header extension, ignore it. 417 if (!isHeader(File)) 418 continue; 419 // Save header name. 420 std::string HeaderPath = getCanonicalPath(File); 421 Dependents.push_back(HeaderPath); 422 } 423 return true; 424 } 425 426 // Replace .. embedded in path for purposes of having 427 // a canonical path. 428 static std::string replaceDotDot(StringRef Path) { 429 SmallString<128> Buffer; 430 llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path), 431 E = llvm::sys::path::end(Path); 432 while (B != E) { 433 if (B->compare(".") == 0) { 434 } 435 else if (B->compare("..") == 0) 436 llvm::sys::path::remove_filename(Buffer); 437 else 438 llvm::sys::path::append(Buffer, *B); 439 ++B; 440 } 441 if (Path.endswith("/") || Path.endswith("\\")) 442 Buffer.append(1, Path.back()); 443 return Buffer.c_str(); 444 } 445 446 // Convert header path to canonical form. 447 // The canonical form is basically just use forward slashes, and remove "./". 448 // \param FilePath The file path, relative to the module map directory. 449 // \returns The file path in canonical form. 450 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) { 451 std::string Tmp(replaceDotDot(FilePath)); 452 std::replace(Tmp.begin(), Tmp.end(), '\\', '/'); 453 StringRef Tmp2(Tmp); 454 if (Tmp2.startswith("./")) 455 Tmp = Tmp2.substr(2); 456 return Tmp; 457 } 458 459 // Check for header file extension. 460 // If the file extension is .h, .inc, or missing, it's 461 // assumed to be a header. 462 // \param FileName The file name. Must not be a directory. 463 // \returns true if it has a header extension or no extension. 464 bool ModularizeUtilities::isHeader(StringRef FileName) { 465 StringRef Extension = llvm::sys::path::extension(FileName); 466 if (Extension.size() == 0) 467 return true; 468 if (Extension.equals_lower(".h")) 469 return true; 470 if (Extension.equals_lower(".inc")) 471 return true; 472 return false; 473 } 474 475 // Get directory path component from file path. 476 // \returns the component of the given path, which will be 477 // relative if the given path is relative, absolute if the 478 // given path is absolute, or "." if the path has no leading 479 // path component. 480 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) { 481 SmallString<256> Directory(Path); 482 sys::path::remove_filename(Directory); 483 if (Directory.size() == 0) 484 return "."; 485 return Directory.str(); 486 } 487 488 // Add unique problem file. 489 // Also standardizes the path. 490 void ModularizeUtilities::addUniqueProblemFile(std::string FilePath) { 491 FilePath = getCanonicalPath(FilePath); 492 // Don't add if already present. 493 for(auto &TestFilePath : ProblemFileNames) { 494 if (TestFilePath == FilePath) 495 return; 496 } 497 ProblemFileNames.push_back(FilePath); 498 } 499 500 // Add file with no compile errors. 501 // Also standardizes the path. 502 void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath) { 503 FilePath = getCanonicalPath(FilePath); 504 GoodFileNames.push_back(FilePath); 505 } 506 507 // List problem files. 508 void ModularizeUtilities::displayProblemFiles() { 509 errs() << "\nThese are the files with possible errors:\n\n"; 510 for (auto &ProblemFile : ProblemFileNames) { 511 errs() << ProblemFile << "\n"; 512 } 513 } 514 515 // List files with no problems. 516 void ModularizeUtilities::displayGoodFiles() { 517 errs() << "\nThese are the files with no detected errors:\n\n"; 518 for (auto &GoodFile : HeaderFileNames) { 519 bool Good = true; 520 for (auto &ProblemFile : ProblemFileNames) { 521 if (ProblemFile == GoodFile) { 522 Good = false; 523 break; 524 } 525 } 526 if (Good) 527 errs() << GoodFile << "\n"; 528 } 529 } 530 531 // List files with problem files commented out. 532 void ModularizeUtilities::displayCombinedFiles() { 533 errs() << 534 "\nThese are the combined files, with problem files preceded by #:\n\n"; 535 for (auto &File : HeaderFileNames) { 536 bool Good = true; 537 for (auto &ProblemFile : ProblemFileNames) { 538 if (ProblemFile == File) { 539 Good = false; 540 break; 541 } 542 } 543 errs() << (Good ? "" : "#") << File << "\n"; 544 } 545 } 546