1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class for loading and validating a module map or 11 // header list by checking that all headers in the corresponding directories 12 // are accounted for. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "clang/Basic/SourceManager.h" 17 #include "clang/Driver/Options.h" 18 #include "clang/Frontend/CompilerInstance.h" 19 #include "clang/Frontend/FrontendActions.h" 20 #include "CoverageChecker.h" 21 #include "llvm/ADT/SmallString.h" 22 #include "llvm/Support/FileUtilities.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include "llvm/Support/Path.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include "ModularizeUtilities.h" 27 28 using namespace clang; 29 using namespace llvm; 30 using namespace Modularize; 31 32 namespace { 33 // Subclass TargetOptions so we can construct it inline with 34 // the minimal option, the triple. 35 class ModuleMapTargetOptions : public clang::TargetOptions { 36 public: 37 ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); } 38 }; 39 } // namespace 40 41 // ModularizeUtilities class implementation. 42 43 // Constructor. 44 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths, 45 llvm::StringRef Prefix) 46 : InputFilePaths(InputPaths), 47 HeaderPrefix(Prefix), 48 HasModuleMap(false), 49 MissingHeaderCount(0), 50 // Init clang stuff needed for loading the module map and preprocessing. 51 LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()), 52 DiagnosticOpts(new DiagnosticOptions()), 53 DC(llvm::errs(), DiagnosticOpts.get()), 54 Diagnostics( 55 new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)), 56 TargetOpts(new ModuleMapTargetOptions()), 57 Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)), 58 FileMgr(new FileManager(FileSystemOpts)), 59 SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)), 60 HeaderSearchOpts(new HeaderSearchOptions()), 61 HeaderInfo(new HeaderSearch(HeaderSearchOpts, *SourceMgr, *Diagnostics, 62 *LangOpts, Target.get())) { 63 } 64 65 // Create instance of ModularizeUtilities, to simplify setting up 66 // subordinate objects. 67 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities( 68 std::vector<std::string> &InputPaths, llvm::StringRef Prefix) { 69 70 return new ModularizeUtilities(InputPaths, Prefix); 71 } 72 73 // Load all header lists and dependencies. 74 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() { 75 typedef std::vector<std::string>::iterator Iter; 76 // For each input file. 77 for (Iter I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) { 78 llvm::StringRef InputPath = *I; 79 // If it's a module map. 80 if (InputPath.endswith(".modulemap")) { 81 // Load the module map. 82 if (std::error_code EC = loadModuleMap(InputPath)) 83 return EC; 84 } 85 else { 86 // Else we assume it's a header list and load it. 87 if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) { 88 errs() << "modularize: error: Unable to get header list '" << InputPath 89 << "': " << EC.message() << '\n'; 90 return EC; 91 } 92 } 93 } 94 return std::error_code(); 95 } 96 97 // Do coverage checks. 98 // For each loaded module map, do header coverage check. 99 // Starting from the directory of the module.map file, 100 // Find all header files, optionally looking only at files 101 // covered by the include path options, and compare against 102 // the headers referenced by the module.map file. 103 // Display warnings for unaccounted-for header files. 104 // Returns 0 if there were no errors or warnings, 1 if there 105 // were warnings, 2 if any other problem, such as a bad 106 // module map path argument was specified. 107 std::error_code ModularizeUtilities::doCoverageCheck( 108 std::vector<std::string> &IncludePaths, 109 llvm::ArrayRef<std::string> CommandLine) { 110 int ModuleMapCount = ModuleMaps.size(); 111 int ModuleMapIndex; 112 std::error_code EC; 113 for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) { 114 std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex]; 115 CoverageChecker *Checker = CoverageChecker::createCoverageChecker( 116 InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine, ModMap.get()); 117 std::error_code LocalEC = Checker->doChecks(); 118 if (LocalEC.value() > 0) 119 EC = LocalEC; 120 } 121 return EC; 122 } 123 124 // Load single header list and dependencies. 125 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies( 126 llvm::StringRef InputPath) { 127 128 // By default, use the path component of the list file name. 129 SmallString<256> HeaderDirectory(InputPath); 130 llvm::sys::path::remove_filename(HeaderDirectory); 131 SmallString<256> CurrentDirectory; 132 llvm::sys::fs::current_path(CurrentDirectory); 133 134 // Get the prefix if we have one. 135 if (HeaderPrefix.size() != 0) 136 HeaderDirectory = HeaderPrefix; 137 138 // Read the header list file into a buffer. 139 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer = 140 MemoryBuffer::getFile(InputPath); 141 if (std::error_code EC = listBuffer.getError()) 142 return EC; 143 144 // Parse the header list into strings. 145 SmallVector<StringRef, 32> Strings; 146 listBuffer.get()->getBuffer().split(Strings, "\n", -1, false); 147 148 // Collect the header file names from the string list. 149 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(), 150 E = Strings.end(); 151 I != E; ++I) { 152 StringRef Line = I->trim(); 153 // Ignore comments and empty lines. 154 if (Line.empty() || (Line[0] == '#')) 155 continue; 156 std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':'); 157 SmallString<256> HeaderFileName; 158 // Prepend header file name prefix if it's not absolute. 159 if (llvm::sys::path::is_absolute(TargetAndDependents.first)) 160 llvm::sys::path::native(TargetAndDependents.first, HeaderFileName); 161 else { 162 if (HeaderDirectory.size() != 0) 163 HeaderFileName = HeaderDirectory; 164 else 165 HeaderFileName = CurrentDirectory; 166 llvm::sys::path::append(HeaderFileName, TargetAndDependents.first); 167 llvm::sys::path::native(HeaderFileName); 168 } 169 // Handle optional dependencies. 170 DependentsVector Dependents; 171 SmallVector<StringRef, 4> DependentsList; 172 TargetAndDependents.second.split(DependentsList, " ", -1, false); 173 int Count = DependentsList.size(); 174 for (int Index = 0; Index < Count; ++Index) { 175 SmallString<256> Dependent; 176 if (llvm::sys::path::is_absolute(DependentsList[Index])) 177 Dependent = DependentsList[Index]; 178 else { 179 if (HeaderDirectory.size() != 0) 180 Dependent = HeaderDirectory; 181 else 182 Dependent = CurrentDirectory; 183 llvm::sys::path::append(Dependent, DependentsList[Index]); 184 } 185 llvm::sys::path::native(Dependent); 186 Dependents.push_back(getCanonicalPath(Dependent.str())); 187 } 188 // Get canonical form. 189 HeaderFileName = getCanonicalPath(HeaderFileName); 190 // Save the resulting header file path and dependencies. 191 HeaderFileNames.push_back(HeaderFileName.str()); 192 Dependencies[HeaderFileName.str()] = Dependents; 193 } 194 return std::error_code(); 195 } 196 197 // Load single module map and extract header file list. 198 std::error_code ModularizeUtilities::loadModuleMap( 199 llvm::StringRef InputPath) { 200 // Get file entry for module.modulemap file. 201 const FileEntry *ModuleMapEntry = 202 SourceMgr->getFileManager().getFile(InputPath); 203 204 // return error if not found. 205 if (!ModuleMapEntry) { 206 llvm::errs() << "error: File \"" << InputPath << "\" not found.\n"; 207 return std::error_code(1, std::generic_category()); 208 } 209 210 // Because the module map parser uses a ForwardingDiagnosticConsumer, 211 // which doesn't forward the BeginSourceFile call, we do it explicitly here. 212 DC.BeginSourceFile(*LangOpts, nullptr); 213 214 // Figure out the home directory for the module map file. 215 const DirectoryEntry *Dir = ModuleMapEntry->getDir(); 216 StringRef DirName(Dir->getName()); 217 if (llvm::sys::path::filename(DirName) == "Modules") { 218 DirName = llvm::sys::path::parent_path(DirName); 219 if (DirName.endswith(".framework")) 220 Dir = FileMgr->getDirectory(DirName); 221 // FIXME: This assert can fail if there's a race between the above check 222 // and the removal of the directory. 223 assert(Dir && "parent must exist"); 224 } 225 226 std::unique_ptr<ModuleMap> ModMap; 227 ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts, 228 Target.get(), *HeaderInfo)); 229 230 // Parse module.modulemap file into module map. 231 if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) { 232 return std::error_code(1, std::generic_category()); 233 } 234 235 // Do matching end call. 236 DC.EndSourceFile(); 237 238 // Reset missing header count. 239 MissingHeaderCount = 0; 240 241 if (!collectModuleMapHeaders(ModMap.get())) 242 return std::error_code(1, std::generic_category()); 243 244 // Save module map. 245 ModuleMaps.push_back(std::move(ModMap)); 246 247 // Indicate we are using module maps. 248 HasModuleMap = true; 249 250 // Return code of 1 for missing headers. 251 if (MissingHeaderCount) 252 return std::error_code(1, std::generic_category()); 253 254 return std::error_code(); 255 } 256 257 // Collect module map headers. 258 // Walks the modules and collects referenced headers into 259 // HeaderFileNames. 260 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) { 261 for (ModuleMap::module_iterator I = ModMap->module_begin(), 262 E = ModMap->module_end(); 263 I != E; ++I) { 264 if (!collectModuleHeaders(*I->second)) 265 return false; 266 } 267 return true; 268 } 269 270 // Collect referenced headers from one module. 271 // Collects the headers referenced in the given module into 272 // HeaderFileNames. 273 bool ModularizeUtilities::collectModuleHeaders(const Module &Mod) { 274 275 // Ignore explicit modules because they often have dependencies 276 // we can't know. 277 if (Mod.IsExplicit) 278 return true; 279 280 // Treat headers in umbrella directory as dependencies. 281 DependentsVector UmbrellaDependents; 282 283 // Recursively do submodules. 284 for (Module::submodule_const_iterator MI = Mod.submodule_begin(), 285 MIEnd = Mod.submodule_end(); 286 MI != MIEnd; ++MI) 287 collectModuleHeaders(**MI); 288 289 if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader().Entry) { 290 std::string HeaderPath = getCanonicalPath(UmbrellaHeader->getName()); 291 // Collect umbrella header. 292 HeaderFileNames.push_back(HeaderPath); 293 294 // FUTURE: When needed, umbrella header header collection goes here. 295 } 296 else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir().Entry) { 297 // If there normal headers, assume these are umbrellas and skip collection. 298 if (Mod.Headers->size() == 0) { 299 // Collect headers in umbrella directory. 300 if (!collectUmbrellaHeaders(UmbrellaDir->getName(), UmbrellaDependents)) 301 return false; 302 } 303 } 304 305 // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded, 306 // assuming they are marked as such either because of unsuitability for 307 // modules or because they are meant to be included by another header, 308 // and thus should be ignored by modularize. 309 310 int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size(); 311 312 for (int Index = 0; Index < NormalHeaderCount; ++Index) { 313 DependentsVector NormalDependents; 314 // Collect normal header. 315 const clang::Module::Header &Header( 316 Mod.Headers[clang::Module::HK_Normal][Index]); 317 std::string HeaderPath = getCanonicalPath(Header.Entry->getName()); 318 HeaderFileNames.push_back(HeaderPath); 319 } 320 321 int MissingCountThisModule = Mod.MissingHeaders.size(); 322 323 for (int Index = 0; Index < MissingCountThisModule; ++Index) { 324 std::string MissingFile = Mod.MissingHeaders[Index].FileName; 325 SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc; 326 errs() << Loc.printToString(*SourceMgr) 327 << ": error : Header not found: " << MissingFile << "\n"; 328 } 329 330 MissingHeaderCount += MissingCountThisModule; 331 332 return true; 333 } 334 335 // Collect headers from an umbrella directory. 336 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName, 337 DependentsVector &Dependents) { 338 // Initialize directory name. 339 SmallString<256> Directory(UmbrellaDirName); 340 // Walk the directory. 341 std::error_code EC; 342 llvm::sys::fs::file_status Status; 343 for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E; 344 I.increment(EC)) { 345 if (EC) 346 return false; 347 std::string File(I->path()); 348 I->status(Status); 349 llvm::sys::fs::file_type Type = Status.type(); 350 // If the file is a directory, ignore the name and recurse. 351 if (Type == llvm::sys::fs::file_type::directory_file) { 352 if (!collectUmbrellaHeaders(File, Dependents)) 353 return false; 354 continue; 355 } 356 // If the file does not have a common header extension, ignore it. 357 if (!isHeader(File)) 358 continue; 359 // Save header name. 360 std::string HeaderPath = getCanonicalPath(File); 361 Dependents.push_back(HeaderPath); 362 } 363 return true; 364 } 365 366 // Replace .. embedded in path for purposes of having 367 // a canonical path. 368 static std::string replaceDotDot(StringRef Path) { 369 SmallString<128> Buffer; 370 llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path), 371 E = llvm::sys::path::end(Path); 372 while (B != E) { 373 if (B->compare(".") == 0) { 374 } 375 else if (B->compare("..") == 0) 376 llvm::sys::path::remove_filename(Buffer); 377 else 378 llvm::sys::path::append(Buffer, *B); 379 ++B; 380 } 381 if (Path.endswith("/") || Path.endswith("\\")) 382 Buffer.append(1, Path.back()); 383 return Buffer.c_str(); 384 } 385 386 // Convert header path to canonical form. 387 // The canonical form is basically just use forward slashes, and remove "./". 388 // \param FilePath The file path, relative to the module map directory. 389 // \returns The file path in canonical form. 390 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) { 391 std::string Tmp(replaceDotDot(FilePath)); 392 std::replace(Tmp.begin(), Tmp.end(), '\\', '/'); 393 StringRef Tmp2(Tmp); 394 if (Tmp2.startswith("./")) 395 Tmp = Tmp2.substr(2); 396 return Tmp; 397 } 398 399 // Check for header file extension. 400 // If the file extension is .h, .inc, or missing, it's 401 // assumed to be a header. 402 // \param FileName The file name. Must not be a directory. 403 // \returns true if it has a header extension or no extension. 404 bool ModularizeUtilities::isHeader(StringRef FileName) { 405 StringRef Extension = llvm::sys::path::extension(FileName); 406 if (Extension.size() == 0) 407 return false; 408 if (Extension.equals_lower(".h")) 409 return true; 410 if (Extension.equals_lower(".inc")) 411 return true; 412 return false; 413 } 414 415 // Get directory path component from file path. 416 // \returns the component of the given path, which will be 417 // relative if the given path is relative, absolute if the 418 // given path is absolute, or "." if the path has no leading 419 // path component. 420 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) { 421 SmallString<256> Directory(Path); 422 sys::path::remove_filename(Directory); 423 if (Directory.size() == 0) 424 return "."; 425 return Directory.str(); 426 } 427