1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class for loading and validating a module map or 10 // header list by checking that all headers in the corresponding directories 11 // are accounted for. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Basic/SourceManager.h" 16 #include "clang/Driver/Options.h" 17 #include "clang/Frontend/CompilerInstance.h" 18 #include "clang/Frontend/FrontendActions.h" 19 #include "CoverageChecker.h" 20 #include "llvm/ADT/SmallString.h" 21 #include "llvm/Support/FileUtilities.h" 22 #include "llvm/Support/MemoryBuffer.h" 23 #include "llvm/Support/Path.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "ModularizeUtilities.h" 26 27 using namespace clang; 28 using namespace llvm; 29 using namespace Modularize; 30 31 namespace { 32 // Subclass TargetOptions so we can construct it inline with 33 // the minimal option, the triple. 34 class ModuleMapTargetOptions : public clang::TargetOptions { 35 public: 36 ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); } 37 }; 38 } // namespace 39 40 // ModularizeUtilities class implementation. 41 42 // Constructor. 43 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths, 44 llvm::StringRef Prefix, 45 llvm::StringRef ProblemFilesListPath) 46 : InputFilePaths(InputPaths), HeaderPrefix(Prefix), 47 ProblemFilesPath(ProblemFilesListPath), HasModuleMap(false), 48 MissingHeaderCount(0), 49 // Init clang stuff needed for loading the module map and preprocessing. 50 LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()), 51 DiagnosticOpts(new DiagnosticOptions()), 52 DC(llvm::errs(), DiagnosticOpts.get()), 53 Diagnostics( 54 new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)), 55 TargetOpts(new ModuleMapTargetOptions()), 56 Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)), 57 FileMgr(new FileManager(FileSystemOpts)), 58 SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)), 59 HeaderInfo(new HeaderSearch(std::make_shared<HeaderSearchOptions>(), 60 *SourceMgr, *Diagnostics, *LangOpts, 61 Target.get())) {} 62 63 // Create instance of ModularizeUtilities, to simplify setting up 64 // subordinate objects. 65 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities( 66 std::vector<std::string> &InputPaths, llvm::StringRef Prefix, 67 llvm::StringRef ProblemFilesListPath) { 68 69 return new ModularizeUtilities(InputPaths, Prefix, ProblemFilesListPath); 70 } 71 72 // Load all header lists and dependencies. 73 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() { 74 // For each input file. 75 for (auto I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) { 76 llvm::StringRef InputPath = *I; 77 // If it's a module map. 78 if (InputPath.endswith(".modulemap")) { 79 // Load the module map. 80 if (std::error_code EC = loadModuleMap(InputPath)) 81 return EC; 82 } 83 else { 84 // Else we assume it's a header list and load it. 85 if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) { 86 errs() << "modularize: error: Unable to get header list '" << InputPath 87 << "': " << EC.message() << '\n'; 88 return EC; 89 } 90 } 91 } 92 // If we have a problem files list. 93 if (ProblemFilesPath.size() != 0) { 94 // Load problem files list. 95 if (std::error_code EC = loadProblemHeaderList(ProblemFilesPath)) { 96 errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath 97 << "': " << EC.message() << '\n'; 98 return EC; 99 } 100 } 101 return std::error_code(); 102 } 103 104 // Do coverage checks. 105 // For each loaded module map, do header coverage check. 106 // Starting from the directory of the module.map file, 107 // Find all header files, optionally looking only at files 108 // covered by the include path options, and compare against 109 // the headers referenced by the module.map file. 110 // Display warnings for unaccounted-for header files. 111 // Returns 0 if there were no errors or warnings, 1 if there 112 // were warnings, 2 if any other problem, such as a bad 113 // module map path argument was specified. 114 std::error_code ModularizeUtilities::doCoverageCheck( 115 std::vector<std::string> &IncludePaths, 116 llvm::ArrayRef<std::string> CommandLine) { 117 int ModuleMapCount = ModuleMaps.size(); 118 int ModuleMapIndex; 119 std::error_code EC; 120 for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) { 121 std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex]; 122 auto Checker = CoverageChecker::createCoverageChecker( 123 InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine, 124 ModMap.get()); 125 std::error_code LocalEC = Checker->doChecks(); 126 if (LocalEC.value() > 0) 127 EC = LocalEC; 128 } 129 return EC; 130 } 131 132 // Load single header list and dependencies. 133 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies( 134 llvm::StringRef InputPath) { 135 136 // By default, use the path component of the list file name. 137 SmallString<256> HeaderDirectory(InputPath); 138 llvm::sys::path::remove_filename(HeaderDirectory); 139 SmallString<256> CurrentDirectory; 140 llvm::sys::fs::current_path(CurrentDirectory); 141 142 // Get the prefix if we have one. 143 if (HeaderPrefix.size() != 0) 144 HeaderDirectory = HeaderPrefix; 145 146 // Read the header list file into a buffer. 147 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer = 148 MemoryBuffer::getFile(InputPath); 149 if (std::error_code EC = listBuffer.getError()) 150 return EC; 151 152 // Parse the header list into strings. 153 SmallVector<StringRef, 32> Strings; 154 listBuffer.get()->getBuffer().split(Strings, "\n", -1, false); 155 156 // Collect the header file names from the string list. 157 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(), 158 E = Strings.end(); 159 I != E; ++I) { 160 StringRef Line = I->trim(); 161 // Ignore comments and empty lines. 162 if (Line.empty() || (Line[0] == '#')) 163 continue; 164 std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':'); 165 SmallString<256> HeaderFileName; 166 // Prepend header file name prefix if it's not absolute. 167 if (llvm::sys::path::is_absolute(TargetAndDependents.first)) 168 llvm::sys::path::native(TargetAndDependents.first, HeaderFileName); 169 else { 170 if (HeaderDirectory.size() != 0) 171 HeaderFileName = HeaderDirectory; 172 else 173 HeaderFileName = CurrentDirectory; 174 llvm::sys::path::append(HeaderFileName, TargetAndDependents.first); 175 llvm::sys::path::native(HeaderFileName); 176 } 177 // Handle optional dependencies. 178 DependentsVector Dependents; 179 SmallVector<StringRef, 4> DependentsList; 180 TargetAndDependents.second.split(DependentsList, " ", -1, false); 181 int Count = DependentsList.size(); 182 for (int Index = 0; Index < Count; ++Index) { 183 SmallString<256> Dependent; 184 if (llvm::sys::path::is_absolute(DependentsList[Index])) 185 Dependent = DependentsList[Index]; 186 else { 187 if (HeaderDirectory.size() != 0) 188 Dependent = HeaderDirectory; 189 else 190 Dependent = CurrentDirectory; 191 llvm::sys::path::append(Dependent, DependentsList[Index]); 192 } 193 llvm::sys::path::native(Dependent); 194 Dependents.push_back(getCanonicalPath(Dependent.str())); 195 } 196 // Get canonical form. 197 HeaderFileName = getCanonicalPath(HeaderFileName); 198 // Save the resulting header file path and dependencies. 199 HeaderFileNames.push_back(std::string(HeaderFileName.str())); 200 Dependencies[HeaderFileName.str()] = Dependents; 201 } 202 return std::error_code(); 203 } 204 205 // Load problem header list. 206 std::error_code ModularizeUtilities::loadProblemHeaderList( 207 llvm::StringRef InputPath) { 208 209 // By default, use the path component of the list file name. 210 SmallString<256> HeaderDirectory(InputPath); 211 llvm::sys::path::remove_filename(HeaderDirectory); 212 SmallString<256> CurrentDirectory; 213 llvm::sys::fs::current_path(CurrentDirectory); 214 215 // Get the prefix if we have one. 216 if (HeaderPrefix.size() != 0) 217 HeaderDirectory = HeaderPrefix; 218 219 // Read the header list file into a buffer. 220 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer = 221 MemoryBuffer::getFile(InputPath); 222 if (std::error_code EC = listBuffer.getError()) 223 return EC; 224 225 // Parse the header list into strings. 226 SmallVector<StringRef, 32> Strings; 227 listBuffer.get()->getBuffer().split(Strings, "\n", -1, false); 228 229 // Collect the header file names from the string list. 230 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(), 231 E = Strings.end(); 232 I != E; ++I) { 233 StringRef Line = I->trim(); 234 // Ignore comments and empty lines. 235 if (Line.empty() || (Line[0] == '#')) 236 continue; 237 SmallString<256> HeaderFileName; 238 // Prepend header file name prefix if it's not absolute. 239 if (llvm::sys::path::is_absolute(Line)) 240 llvm::sys::path::native(Line, HeaderFileName); 241 else { 242 if (HeaderDirectory.size() != 0) 243 HeaderFileName = HeaderDirectory; 244 else 245 HeaderFileName = CurrentDirectory; 246 llvm::sys::path::append(HeaderFileName, Line); 247 llvm::sys::path::native(HeaderFileName); 248 } 249 // Get canonical form. 250 HeaderFileName = getCanonicalPath(HeaderFileName); 251 // Save the resulting header file path. 252 ProblemFileNames.push_back(std::string(HeaderFileName.str())); 253 } 254 return std::error_code(); 255 } 256 257 // Load single module map and extract header file list. 258 std::error_code ModularizeUtilities::loadModuleMap( 259 llvm::StringRef InputPath) { 260 // Get file entry for module.modulemap file. 261 auto ModuleMapEntryOrErr = 262 SourceMgr->getFileManager().getFile(InputPath); 263 264 // return error if not found. 265 if (!ModuleMapEntryOrErr) { 266 llvm::errs() << "error: File \"" << InputPath << "\" not found.\n"; 267 return ModuleMapEntryOrErr.getError(); 268 } 269 const FileEntry *ModuleMapEntry = *ModuleMapEntryOrErr; 270 271 // Because the module map parser uses a ForwardingDiagnosticConsumer, 272 // which doesn't forward the BeginSourceFile call, we do it explicitly here. 273 DC.BeginSourceFile(*LangOpts, nullptr); 274 275 // Figure out the home directory for the module map file. 276 const DirectoryEntry *Dir = ModuleMapEntry->getDir(); 277 StringRef DirName(Dir->getName()); 278 if (llvm::sys::path::filename(DirName) == "Modules") { 279 DirName = llvm::sys::path::parent_path(DirName); 280 if (DirName.endswith(".framework")) { 281 if (auto DirEntry = FileMgr->getDirectory(DirName)) 282 Dir = *DirEntry; 283 else 284 Dir = nullptr; 285 } 286 // FIXME: This assert can fail if there's a race between the above check 287 // and the removal of the directory. 288 assert(Dir && "parent must exist"); 289 } 290 291 std::unique_ptr<ModuleMap> ModMap; 292 ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts, 293 Target.get(), *HeaderInfo)); 294 295 // Parse module.modulemap file into module map. 296 if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) { 297 return std::error_code(1, std::generic_category()); 298 } 299 300 // Do matching end call. 301 DC.EndSourceFile(); 302 303 // Reset missing header count. 304 MissingHeaderCount = 0; 305 306 if (!collectModuleMapHeaders(ModMap.get())) 307 return std::error_code(1, std::generic_category()); 308 309 // Save module map. 310 ModuleMaps.push_back(std::move(ModMap)); 311 312 // Indicate we are using module maps. 313 HasModuleMap = true; 314 315 // Return code of 1 for missing headers. 316 if (MissingHeaderCount) 317 return std::error_code(1, std::generic_category()); 318 319 return std::error_code(); 320 } 321 322 // Collect module map headers. 323 // Walks the modules and collects referenced headers into 324 // HeaderFileNames. 325 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) { 326 for (ModuleMap::module_iterator I = ModMap->module_begin(), 327 E = ModMap->module_end(); 328 I != E; ++I) { 329 if (!collectModuleHeaders(*I->second)) 330 return false; 331 } 332 return true; 333 } 334 335 // Collect referenced headers from one module. 336 // Collects the headers referenced in the given module into 337 // HeaderFileNames. 338 bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) { 339 340 // Ignore explicit modules because they often have dependencies 341 // we can't know. 342 if (Mod.IsExplicit) 343 return true; 344 345 // Treat headers in umbrella directory as dependencies. 346 DependentsVector UmbrellaDependents; 347 348 // Recursively do submodules. 349 for (auto MI = Mod.submodule_begin(), MIEnd = Mod.submodule_end(); 350 MI != MIEnd; ++MI) 351 collectModuleHeaders(**MI); 352 353 if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader().Entry) { 354 std::string HeaderPath = getCanonicalPath(UmbrellaHeader->getName()); 355 // Collect umbrella header. 356 HeaderFileNames.push_back(HeaderPath); 357 358 // FUTURE: When needed, umbrella header header collection goes here. 359 } 360 else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir().Entry) { 361 // If there normal headers, assume these are umbrellas and skip collection. 362 if (Mod.Headers->size() == 0) { 363 // Collect headers in umbrella directory. 364 if (!collectUmbrellaHeaders(UmbrellaDir->getName(), UmbrellaDependents)) 365 return false; 366 } 367 } 368 369 // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded, 370 // assuming they are marked as such either because of unsuitability for 371 // modules or because they are meant to be included by another header, 372 // and thus should be ignored by modularize. 373 374 int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size(); 375 376 for (int Index = 0; Index < NormalHeaderCount; ++Index) { 377 DependentsVector NormalDependents; 378 // Collect normal header. 379 const clang::Module::Header &Header( 380 Mod.Headers[clang::Module::HK_Normal][Index]); 381 std::string HeaderPath = getCanonicalPath(Header.Entry->getName()); 382 HeaderFileNames.push_back(HeaderPath); 383 } 384 385 int MissingCountThisModule = Mod.MissingHeaders.size(); 386 387 for (int Index = 0; Index < MissingCountThisModule; ++Index) { 388 std::string MissingFile = Mod.MissingHeaders[Index].FileName; 389 SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc; 390 errs() << Loc.printToString(*SourceMgr) 391 << ": error : Header not found: " << MissingFile << "\n"; 392 } 393 394 MissingHeaderCount += MissingCountThisModule; 395 396 return true; 397 } 398 399 // Collect headers from an umbrella directory. 400 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName, 401 DependentsVector &Dependents) { 402 // Initialize directory name. 403 SmallString<256> Directory(UmbrellaDirName); 404 // Walk the directory. 405 std::error_code EC; 406 for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E; 407 I.increment(EC)) { 408 if (EC) 409 return false; 410 std::string File(I->path()); 411 llvm::ErrorOr<llvm::sys::fs::basic_file_status> Status = I->status(); 412 if (!Status) 413 return false; 414 llvm::sys::fs::file_type Type = Status->type(); 415 // If the file is a directory, ignore the name and recurse. 416 if (Type == llvm::sys::fs::file_type::directory_file) { 417 if (!collectUmbrellaHeaders(File, Dependents)) 418 return false; 419 continue; 420 } 421 // If the file does not have a common header extension, ignore it. 422 if (!isHeader(File)) 423 continue; 424 // Save header name. 425 std::string HeaderPath = getCanonicalPath(File); 426 Dependents.push_back(HeaderPath); 427 } 428 return true; 429 } 430 431 // Replace .. embedded in path for purposes of having 432 // a canonical path. 433 static std::string replaceDotDot(StringRef Path) { 434 SmallString<128> Buffer; 435 llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path), 436 E = llvm::sys::path::end(Path); 437 while (B != E) { 438 if (B->compare(".") == 0) { 439 } 440 else if (B->compare("..") == 0) 441 llvm::sys::path::remove_filename(Buffer); 442 else 443 llvm::sys::path::append(Buffer, *B); 444 ++B; 445 } 446 if (Path.endswith("/") || Path.endswith("\\")) 447 Buffer.append(1, Path.back()); 448 return Buffer.c_str(); 449 } 450 451 // Convert header path to canonical form. 452 // The canonical form is basically just use forward slashes, and remove "./". 453 // \param FilePath The file path, relative to the module map directory. 454 // \returns The file path in canonical form. 455 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) { 456 std::string Tmp(replaceDotDot(FilePath)); 457 std::replace(Tmp.begin(), Tmp.end(), '\\', '/'); 458 StringRef Tmp2(Tmp); 459 if (Tmp2.startswith("./")) 460 Tmp = std::string(Tmp2.substr(2)); 461 return Tmp; 462 } 463 464 // Check for header file extension. 465 // If the file extension is .h, .inc, or missing, it's 466 // assumed to be a header. 467 // \param FileName The file name. Must not be a directory. 468 // \returns true if it has a header extension or no extension. 469 bool ModularizeUtilities::isHeader(StringRef FileName) { 470 StringRef Extension = llvm::sys::path::extension(FileName); 471 if (Extension.size() == 0) 472 return true; 473 if (Extension.equals_lower(".h")) 474 return true; 475 if (Extension.equals_lower(".inc")) 476 return true; 477 return false; 478 } 479 480 // Get directory path component from file path. 481 // \returns the component of the given path, which will be 482 // relative if the given path is relative, absolute if the 483 // given path is absolute, or "." if the path has no leading 484 // path component. 485 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) { 486 SmallString<256> Directory(Path); 487 sys::path::remove_filename(Directory); 488 if (Directory.size() == 0) 489 return "."; 490 return std::string(Directory.str()); 491 } 492 493 // Add unique problem file. 494 // Also standardizes the path. 495 void ModularizeUtilities::addUniqueProblemFile(std::string FilePath) { 496 FilePath = getCanonicalPath(FilePath); 497 // Don't add if already present. 498 for(auto &TestFilePath : ProblemFileNames) { 499 if (TestFilePath == FilePath) 500 return; 501 } 502 ProblemFileNames.push_back(FilePath); 503 } 504 505 // Add file with no compile errors. 506 // Also standardizes the path. 507 void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath) { 508 FilePath = getCanonicalPath(FilePath); 509 GoodFileNames.push_back(FilePath); 510 } 511 512 // List problem files. 513 void ModularizeUtilities::displayProblemFiles() { 514 errs() << "\nThese are the files with possible errors:\n\n"; 515 for (auto &ProblemFile : ProblemFileNames) { 516 errs() << ProblemFile << "\n"; 517 } 518 } 519 520 // List files with no problems. 521 void ModularizeUtilities::displayGoodFiles() { 522 errs() << "\nThese are the files with no detected errors:\n\n"; 523 for (auto &GoodFile : HeaderFileNames) { 524 bool Good = true; 525 for (auto &ProblemFile : ProblemFileNames) { 526 if (ProblemFile == GoodFile) { 527 Good = false; 528 break; 529 } 530 } 531 if (Good) 532 errs() << GoodFile << "\n"; 533 } 534 } 535 536 // List files with problem files commented out. 537 void ModularizeUtilities::displayCombinedFiles() { 538 errs() << 539 "\nThese are the combined files, with problem files preceded by #:\n\n"; 540 for (auto &File : HeaderFileNames) { 541 bool Good = true; 542 for (auto &ProblemFile : ProblemFileNames) { 543 if (ProblemFile == File) { 544 Good = false; 545 break; 546 } 547 } 548 errs() << (Good ? "" : "#") << File << "\n"; 549 } 550 } 551