1 //===- extra/modularize/Modularize.cpp - Check modularized headers --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a tool that checks whether a set of headers provides 11 // the consistent definitions required to use modules. For example, it detects 12 // whether the same entity (say, a NULL macro or size_t typedef) is defined in 13 // multiple headers or whether a header produces different definitions under 14 // different circumstances. These conditions cause modules built from the 15 // headers to behave poorly, and should be fixed before introducing a module 16 // map. 17 // 18 // Modularize takes as argument a file name for a file containing the 19 // newline-separated list of headers to check with respect to each other. 20 // Lines beginning with '#' and empty lines are ignored. 21 // Modularize also accepts regular front-end arguments. 22 // 23 // Usage: modularize [-prefix (optional header path prefix)] 24 // (include-files_list) [(front-end-options) ...] 25 // 26 // Note that unless a "-prefix (header path)" option is specified, 27 // non-absolute file paths in the header list file will be relative 28 // to the header list file directory. Use -prefix to specify a different 29 // directory. 30 // 31 // Note that by default, the underlying Clang front end assumes .h files 32 // contain C source. If your .h files in the file list contain C++ source, 33 // you should append the following to your command lines: -x c++ 34 // 35 // Modularize will do normal parsing, reporting normal errors and warnings, 36 // but will also report special error messages like the following: 37 // 38 // error: '(symbol)' defined at multiple locations: 39 // (file):(row):(column) 40 // (file):(row):(column) 41 // 42 // error: header '(file)' has different contents dependening on how it was 43 // included 44 // 45 // The latter might be followed by messages like the following: 46 // 47 // note: '(symbol)' in (file) at (row):(column) not always provided 48 // 49 // Future directions: 50 // 51 // Basically, we want to add new checks for whatever we can check with respect 52 // to checking headers for module'ability. 53 // 54 // Some ideas: 55 // 56 // 1. Try to figure out the preprocessor conditional directives that 57 // contribute to problems. 58 // 59 // 2. Check for correct and consistent usage of extern "C" {} and other 60 // directives. Warn about #include inside extern "C" {}. 61 // 62 // 3. What else? 63 // 64 // General clean-up and refactoring: 65 // 66 // 1. The Location class seems to be something that we might 67 // want to design to be applicable to a wider range of tools, and stick it 68 // somewhere into Tooling/ in mainline 69 // 70 //===----------------------------------------------------------------------===// 71 72 #include "clang/AST/ASTConsumer.h" 73 #include "clang/AST/ASTContext.h" 74 #include "clang/AST/RecursiveASTVisitor.h" 75 #include "clang/Basic/SourceManager.h" 76 #include "clang/Frontend/CompilerInstance.h" 77 #include "clang/Frontend/FrontendActions.h" 78 #include "clang/Lex/Preprocessor.h" 79 #include "clang/Tooling/CompilationDatabase.h" 80 #include "clang/Tooling/Tooling.h" 81 #include "llvm/ADT/OwningPtr.h" 82 #include "llvm/ADT/StringRef.h" 83 #include "llvm/Config/config.h" 84 #include "llvm/Support/CommandLine.h" 85 #include "llvm/Support/FileSystem.h" 86 #include "llvm/Support/MemoryBuffer.h" 87 #include "llvm/Support/Path.h" 88 #include <algorithm> 89 #include <fstream> 90 #include <iterator> 91 #include <string> 92 #include <vector> 93 94 using namespace clang::tooling; 95 using namespace clang; 96 using namespace llvm; 97 98 // Option to specify a file name for a list of header files to check. 99 cl::opt<std::string> 100 ListFileName(cl::Positional, 101 cl::desc("<name of file containing list of headers to check>")); 102 103 // Collect all other arguments, which will be passed to the front end. 104 cl::list<std::string> CC1Arguments( 105 cl::ConsumeAfter, cl::desc("<arguments to be passed to front end>...")); 106 107 // Option to specify a prefix to be prepended to the header names. 108 cl::opt<std::string> HeaderPrefix( 109 "prefix", cl::init(""), 110 cl::desc( 111 "Prepend header file paths with this prefix." 112 " If not specified," 113 " the files are considered to be relative to the header list file.")); 114 115 // Read the header list file and collect the header file names. 116 error_code getHeaderFileNames(SmallVectorImpl<std::string> &headerFileNames, 117 StringRef listFileName, StringRef headerPrefix) { 118 119 // By default, use the path component of the list file name. 120 SmallString<256> headerDirectory(listFileName); 121 sys::path::remove_filename(headerDirectory); 122 123 // Get the prefix if we have one. 124 if (headerPrefix.size() != 0) 125 headerDirectory = headerPrefix; 126 127 // Read the header list file into a buffer. 128 OwningPtr<MemoryBuffer> listBuffer; 129 if (error_code ec = MemoryBuffer::getFile(listFileName, listBuffer)) { 130 return ec; 131 } 132 133 // Parse the header list into strings. 134 SmallVector<StringRef, 32> strings; 135 listBuffer->getBuffer().split(strings, "\n", -1, false); 136 137 // Collect the header file names from the string list. 138 for (SmallVectorImpl<StringRef>::iterator I = strings.begin(), 139 E = strings.end(); 140 I != E; ++I) { 141 StringRef line = (*I).trim(); 142 // Ignore comments and empty lines. 143 if (line.empty() || (line[0] == '#')) 144 continue; 145 SmallString<256> headerFileName; 146 // Prepend header file name prefix if it's not absolute. 147 if (sys::path::is_absolute(line)) 148 headerFileName = line; 149 else { 150 headerFileName = headerDirectory; 151 sys::path::append(headerFileName, line); 152 } 153 // Save the resulting header file path. 154 headerFileNames.push_back(headerFileName.str()); 155 } 156 157 return error_code::success(); 158 } 159 160 // FIXME: The Location class seems to be something that we might 161 // want to design to be applicable to a wider range of tools, and stick it 162 // somewhere into Tooling/ in mainline 163 struct Location { 164 const FileEntry *File; 165 unsigned Line, Column; 166 167 Location() : File(), Line(), Column() {} 168 169 Location(SourceManager &SM, SourceLocation Loc) : File(), Line(), Column() { 170 Loc = SM.getExpansionLoc(Loc); 171 if (Loc.isInvalid()) 172 return; 173 174 std::pair<FileID, unsigned> Decomposed = SM.getDecomposedLoc(Loc); 175 File = SM.getFileEntryForID(Decomposed.first); 176 if (!File) 177 return; 178 179 Line = SM.getLineNumber(Decomposed.first, Decomposed.second); 180 Column = SM.getColumnNumber(Decomposed.first, Decomposed.second); 181 } 182 183 operator bool() const { return File != 0; } 184 185 friend bool operator==(const Location &X, const Location &Y) { 186 return X.File == Y.File && X.Line == Y.Line && X.Column == Y.Column; 187 } 188 189 friend bool operator!=(const Location &X, const Location &Y) { 190 return !(X == Y); 191 } 192 193 friend bool operator<(const Location &X, const Location &Y) { 194 if (X.File != Y.File) 195 return X.File < Y.File; 196 if (X.Line != Y.Line) 197 return X.Line < Y.Line; 198 return X.Column < Y.Column; 199 } 200 friend bool operator>(const Location &X, const Location &Y) { return Y < X; } 201 friend bool operator<=(const Location &X, const Location &Y) { 202 return !(Y < X); 203 } 204 friend bool operator>=(const Location &X, const Location &Y) { 205 return !(X < Y); 206 } 207 208 }; 209 210 struct Entry { 211 enum EntryKind { 212 EK_Tag, 213 EK_Value, 214 EK_Macro, 215 216 EK_NumberOfKinds 217 } Kind; 218 219 Location Loc; 220 221 StringRef getKindName() { return getKindName(Kind); } 222 static StringRef getKindName(EntryKind kind); 223 }; 224 225 // Return a string representing the given kind. 226 StringRef Entry::getKindName(Entry::EntryKind kind) { 227 switch (kind) { 228 case EK_Tag: 229 return "tag"; 230 case EK_Value: 231 return "value"; 232 case EK_Macro: 233 return "macro"; 234 case EK_NumberOfKinds: 235 break; 236 } 237 llvm_unreachable("invalid Entry kind"); 238 } 239 240 struct HeaderEntry { 241 std::string Name; 242 Location Loc; 243 244 friend bool operator==(const HeaderEntry &X, const HeaderEntry &Y) { 245 return X.Loc == Y.Loc && X.Name == Y.Name; 246 } 247 friend bool operator!=(const HeaderEntry &X, const HeaderEntry &Y) { 248 return !(X == Y); 249 } 250 friend bool operator<(const HeaderEntry &X, const HeaderEntry &Y) { 251 return X.Loc < Y.Loc || (X.Loc == Y.Loc && X.Name < Y.Name); 252 } 253 friend bool operator>(const HeaderEntry &X, const HeaderEntry &Y) { 254 return Y < X; 255 } 256 friend bool operator<=(const HeaderEntry &X, const HeaderEntry &Y) { 257 return !(Y < X); 258 } 259 friend bool operator>=(const HeaderEntry &X, const HeaderEntry &Y) { 260 return !(X < Y); 261 } 262 }; 263 264 typedef std::vector<HeaderEntry> HeaderContents; 265 266 class EntityMap : public StringMap<SmallVector<Entry, 2> > { 267 public: 268 DenseMap<const FileEntry *, HeaderContents> HeaderContentMismatches; 269 270 void add(const std::string &Name, enum Entry::EntryKind Kind, Location Loc) { 271 // Record this entity in its header. 272 HeaderEntry HE = { Name, Loc }; 273 CurHeaderContents[Loc.File].push_back(HE); 274 275 // Check whether we've seen this entry before. 276 SmallVector<Entry, 2> &Entries = (*this)[Name]; 277 for (unsigned I = 0, N = Entries.size(); I != N; ++I) { 278 if (Entries[I].Kind == Kind && Entries[I].Loc == Loc) 279 return; 280 } 281 282 // We have not seen this entry before; record it. 283 Entry E = { Kind, Loc }; 284 Entries.push_back(E); 285 } 286 287 void mergeCurHeaderContents() { 288 for (DenseMap<const FileEntry *, HeaderContents>::iterator 289 H = CurHeaderContents.begin(), 290 HEnd = CurHeaderContents.end(); 291 H != HEnd; ++H) { 292 // Sort contents. 293 std::sort(H->second.begin(), H->second.end()); 294 295 // Check whether we've seen this header before. 296 DenseMap<const FileEntry *, HeaderContents>::iterator KnownH = 297 AllHeaderContents.find(H->first); 298 if (KnownH == AllHeaderContents.end()) { 299 // We haven't seen this header before; record its contents. 300 AllHeaderContents.insert(*H); 301 continue; 302 } 303 304 // If the header contents are the same, we're done. 305 if (H->second == KnownH->second) 306 continue; 307 308 // Determine what changed. 309 std::set_symmetric_difference( 310 H->second.begin(), H->second.end(), KnownH->second.begin(), 311 KnownH->second.end(), 312 std::back_inserter(HeaderContentMismatches[H->first])); 313 } 314 315 CurHeaderContents.clear(); 316 } 317 private: 318 DenseMap<const FileEntry *, HeaderContents> CurHeaderContents; 319 DenseMap<const FileEntry *, HeaderContents> AllHeaderContents; 320 }; 321 322 class CollectEntitiesVisitor : 323 public RecursiveASTVisitor<CollectEntitiesVisitor> { 324 public: 325 CollectEntitiesVisitor(SourceManager &SM, EntityMap &Entities) 326 : SM(SM), Entities(Entities) {} 327 328 bool TraverseStmt(Stmt *S) { return true; } 329 bool TraverseType(QualType T) { return true; } 330 bool TraverseTypeLoc(TypeLoc TL) { return true; } 331 bool TraverseNestedNameSpecifier(NestedNameSpecifier *NNS) { return true; } 332 bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) { 333 return true; 334 } 335 bool TraverseDeclarationNameInfo(DeclarationNameInfo NameInfo) { 336 return true; 337 } 338 bool TraverseTemplateName(TemplateName Template) { return true; } 339 bool TraverseTemplateArgument(const TemplateArgument &Arg) { return true; } 340 bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &ArgLoc) { 341 return true; 342 } 343 bool TraverseTemplateArguments(const TemplateArgument *Args, 344 unsigned NumArgs) { 345 return true; 346 } 347 bool TraverseConstructorInitializer(CXXCtorInitializer *Init) { return true; } 348 bool TraverseLambdaCapture(LambdaExpr::Capture C) { return true; } 349 350 bool VisitNamedDecl(NamedDecl *ND) { 351 // We only care about file-context variables. 352 if (!ND->getDeclContext()->isFileContext()) 353 return true; 354 355 // Skip declarations that tend to be properly multiply-declared. 356 if (isa<NamespaceDecl>(ND) || isa<UsingDirectiveDecl>(ND) || 357 isa<NamespaceAliasDecl>(ND) || 358 isa<ClassTemplateSpecializationDecl>(ND) || isa<UsingDecl>(ND) || 359 isa<UsingShadowDecl>(ND) || isa<FunctionDecl>(ND) || 360 isa<FunctionTemplateDecl>(ND) || 361 (isa<TagDecl>(ND) && 362 !cast<TagDecl>(ND)->isThisDeclarationADefinition())) 363 return true; 364 365 std::string Name = ND->getNameAsString(); 366 if (Name.empty()) 367 return true; 368 369 Location Loc(SM, ND->getLocation()); 370 if (!Loc) 371 return true; 372 373 Entities.add(Name, isa<TagDecl>(ND) ? Entry::EK_Tag : Entry::EK_Value, Loc); 374 return true; 375 } 376 private: 377 SourceManager &SM; 378 EntityMap &Entities; 379 }; 380 381 class CollectEntitiesConsumer : public ASTConsumer { 382 public: 383 CollectEntitiesConsumer(EntityMap &Entities, Preprocessor &PP) 384 : Entities(Entities), PP(PP) {} 385 386 virtual void HandleTranslationUnit(ASTContext &Ctx) { 387 SourceManager &SM = Ctx.getSourceManager(); 388 389 // Collect declared entities. 390 CollectEntitiesVisitor(SM, Entities) 391 .TraverseDecl(Ctx.getTranslationUnitDecl()); 392 393 // Collect macro definitions. 394 for (Preprocessor::macro_iterator M = PP.macro_begin(), 395 MEnd = PP.macro_end(); 396 M != MEnd; ++M) { 397 Location Loc(SM, M->second->getLocation()); 398 if (!Loc) 399 continue; 400 401 Entities.add(M->first->getName().str(), Entry::EK_Macro, Loc); 402 } 403 404 // Merge header contents. 405 Entities.mergeCurHeaderContents(); 406 } 407 private: 408 EntityMap &Entities; 409 Preprocessor &PP; 410 }; 411 412 class CollectEntitiesAction : public SyntaxOnlyAction { 413 public: 414 CollectEntitiesAction(EntityMap &Entities) : Entities(Entities) {} 415 protected: 416 virtual clang::ASTConsumer * 417 CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { 418 return new CollectEntitiesConsumer(Entities, CI.getPreprocessor()); 419 } 420 private: 421 EntityMap &Entities; 422 }; 423 424 class ModularizeFrontendActionFactory : public FrontendActionFactory { 425 public: 426 ModularizeFrontendActionFactory(EntityMap &Entities) : Entities(Entities) {} 427 428 virtual CollectEntitiesAction *create() { 429 return new CollectEntitiesAction(Entities); 430 } 431 private: 432 EntityMap &Entities; 433 }; 434 435 int main(int argc, const char **argv) { 436 437 // This causes options to be parsed. 438 cl::ParseCommandLineOptions(argc, argv, "modularize.\n"); 439 440 // No go if we have no header list file. 441 if (ListFileName.size() == 0) { 442 cl::PrintHelpMessage(); 443 return 1; 444 } 445 446 // Get header file names. 447 SmallVector<std::string, 32> Headers; 448 if (error_code ec = getHeaderFileNames(Headers, ListFileName, HeaderPrefix)) { 449 errs() << argv[0] << ": error: Unable to get header list '" << ListFileName 450 << "': " << ec.message() << '\n'; 451 return 1; 452 } 453 454 // Create the compilation database. 455 SmallString<256> PathBuf; 456 sys::fs::current_path(PathBuf); 457 OwningPtr<CompilationDatabase> Compilations; 458 Compilations.reset( 459 new FixedCompilationDatabase(Twine(PathBuf), CC1Arguments)); 460 461 // Parse all of the headers, detecting duplicates. 462 EntityMap Entities; 463 ClangTool Tool(*Compilations, Headers); 464 int HadErrors = Tool.run(new ModularizeFrontendActionFactory(Entities)); 465 466 // Create a place to save duplicate entity locations, separate bins per kind. 467 typedef SmallVector<Location, 8> LocationArray; 468 typedef SmallVector<LocationArray, Entry::EK_NumberOfKinds> EntryBinArray; 469 EntryBinArray EntryBins; 470 int kindIndex; 471 for (kindIndex = 0; kindIndex < Entry::EK_NumberOfKinds; ++kindIndex) { 472 LocationArray array; 473 EntryBins.push_back(array); 474 } 475 476 // Check for the same entity being defined in multiple places. 477 for (EntityMap::iterator E = Entities.begin(), EEnd = Entities.end(); 478 E != EEnd; ++E) { 479 // If only one occurance, exit early. 480 if (E->second.size() == 1) 481 continue; 482 // Clear entity locations. 483 for (EntryBinArray::iterator CI = EntryBins.begin(), CE = EntryBins.end(); 484 CI != CE; ++CI) { 485 CI->clear(); 486 } 487 // Walk the entities of a single name, collecting the locations, 488 // separated into separate bins. 489 for (unsigned I = 0, N = E->second.size(); I != N; ++I) { 490 EntryBins[E->second[I].Kind].push_back(E->second[I].Loc); 491 } 492 // Report any duplicate entity definition errors. 493 int kindIndex = 0; 494 for (EntryBinArray::iterator DI = EntryBins.begin(), DE = EntryBins.end(); 495 DI != DE; ++DI, ++kindIndex) { 496 int eCount = DI->size(); 497 // If only 1 occurance, skip; 498 if (eCount <= 1) 499 continue; 500 LocationArray::iterator FI = DI->begin(); 501 StringRef kindName = Entry::getKindName((Entry::EntryKind) kindIndex); 502 errs() << "error: " << kindName << " '" << E->first() 503 << "' defined at multiple locations:\n"; 504 for (LocationArray::iterator FE = DI->end(); FI != FE; ++FI) { 505 errs() << " " << FI->File->getName() << ":" << FI->Line << ":" 506 << FI->Column << "\n"; 507 } 508 HadErrors = 1; 509 } 510 } 511 512 // Complain about any headers that have contents that differ based on how 513 // they are included. 514 // FIXME: Could we provide information about which preprocessor conditionals 515 // are involved? 516 for (DenseMap<const FileEntry *, HeaderContents>::iterator 517 H = Entities.HeaderContentMismatches.begin(), 518 HEnd = Entities.HeaderContentMismatches.end(); 519 H != HEnd; ++H) { 520 if (H->second.empty()) { 521 errs() << "internal error: phantom header content mismatch\n"; 522 continue; 523 } 524 525 HadErrors = 1; 526 errs() << "error: header '" << H->first->getName() 527 << "' has different contents depending on how it was included\n"; 528 for (unsigned I = 0, N = H->second.size(); I != N; ++I) { 529 errs() << "note: '" << H->second[I].Name << "' in " << H->second[I] 530 .Loc.File->getName() << " at " << H->second[I].Loc.Line << ":" 531 << H->second[I].Loc.Column << " not always provided\n"; 532 } 533 } 534 535 return HadErrors; 536 } 537