1 //===- extra/modularize/Modularize.cpp - Check modularized headers --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a tool that checks whether a set of headers provides 11 // the consistent definitions required to use modules. For example, it detects 12 // whether the same entity (say, a NULL macro or size_t typedef) is defined in 13 // multiple headers or whether a header produces different definitions under 14 // different circumstances. These conditions cause modules built from the 15 // headers to behave poorly, and should be fixed before introducing a module 16 // map. 17 // 18 // Modularize takes as argument a file name for a file containing the 19 // newline-separated list of headers to check with respect to each other. 20 // Lines beginning with '#' and empty lines are ignored. 21 // Modularize also accepts regular front-end arguments. 22 // 23 // Usage: modularize [-prefix (optional header path prefix)] 24 // (include-files_list) [(front-end-options) ...] 25 // 26 // Note that unless a "-prefix (header path)" option is specified, 27 // non-absolute file paths in the header list file will be relative 28 // to the header list file directory. Use -prefix to specify a different 29 // directory. 30 // 31 // Note that by default, the underlying Clang front end assumes .h files 32 // contain C source. If your .h files in the file list contain C++ source, 33 // you should append the following to your command lines: -x c++ 34 // 35 // Modularize will do normal parsing, reporting normal errors and warnings, 36 // but will also report special error messages like the following: 37 // 38 // error: '(symbol)' defined at multiple locations: 39 // (file):(row):(column) 40 // (file):(row):(column) 41 // 42 // error: header '(file)' has different contents depending on how it was 43 // included 44 // 45 // The latter might be followed by messages like the following: 46 // 47 // note: '(symbol)' in (file) at (row):(column) not always provided 48 // 49 // Checks will also be performed for macro expansions, defined(macro) 50 // expressions, and preprocessor conditional directives that evaluate 51 // inconsistently, and can produce error messages like the following: 52 // 53 // (...)/SubHeader.h:11:5: 54 // #if SYMBOL == 1 55 // ^ 56 // error: Macro instance 'SYMBOL' has different values in this header, 57 // depending on how it was included. 58 // 'SYMBOL' expanded to: '1' with respect to these inclusion paths: 59 // (...)/Header1.h 60 // (...)/SubHeader.h 61 // (...)/SubHeader.h:3:9: 62 // #define SYMBOL 1 63 // ^ 64 // Macro defined here. 65 // 'SYMBOL' expanded to: '2' with respect to these inclusion paths: 66 // (...)/Header2.h 67 // (...)/SubHeader.h 68 // (...)/SubHeader.h:7:9: 69 // #define SYMBOL 2 70 // ^ 71 // Macro defined here. 72 // 73 // See PreprocessorTracker.cpp for additional details. 74 // 75 // Current problems: 76 // 77 // Modularize has problems with C++: 78 // 79 // 1. Modularize doesn't distinguish class of the same name in 80 // different namespaces. The result is erroneous duplicate definition errors. 81 // 82 // 2. Modularize doesn't distinguish between a regular class and a template 83 // class of the same name. 84 // 85 // Other problems: 86 // 87 // 3. There seem to be a lot of spurious "not always provided" messages, 88 // and many duplicates of these. 89 // 90 // 4. There are some legitimate uses of preprocessor macros that 91 // modularize will flag as errors, such as repeatedly #include'ing 92 // a file and using interleaving defined/undefined macros 93 // to change declarations in the included file. Is there a way 94 // to address this? Maybe have modularize accept a list of macros 95 // to ignore. Otherwise you can just exclude the file, after checking 96 // for legitimate errors. 97 // 98 // Future directions: 99 // 100 // Basically, we want to add new checks for whatever we can check with respect 101 // to checking headers for module'ability. 102 // 103 // Some ideas: 104 // 105 // 1. Fix the C++ and other problems. 106 // 107 // 2. Add options to disable any of the checks, in case 108 // there is some problem with them, or the messages get too verbose. 109 // 110 // 3. Try to figure out the preprocessor conditional directives that 111 // contribute to problems and tie them to the inconsistent definitions. 112 // 113 // 4. Check for correct and consistent usage of extern "C" {} and other 114 // directives. Warn about #include inside extern "C" {}. 115 // 116 // 5. To support headers that depend on other headers to be included first 117 // add support for a dependency list to the header list input. 118 // I.e.: header.h: dependent1.h dependent2.h 119 // (Implement using clang's "-include" option"?) 120 // 121 // 6. What else? 122 // 123 // General clean-up and refactoring: 124 // 125 // 1. The Location class seems to be something that we might 126 // want to design to be applicable to a wider range of tools, and stick it 127 // somewhere into Tooling/ in mainline 128 // 129 //===----------------------------------------------------------------------===// 130 131 #include "clang/AST/ASTConsumer.h" 132 #include "clang/AST/ASTContext.h" 133 #include "clang/AST/RecursiveASTVisitor.h" 134 #include "clang/Basic/SourceManager.h" 135 #include "clang/Frontend/CompilerInstance.h" 136 #include "clang/Frontend/FrontendActions.h" 137 #include "clang/Lex/Preprocessor.h" 138 #include "clang/Tooling/CompilationDatabase.h" 139 #include "clang/Tooling/Tooling.h" 140 #include "llvm/ADT/OwningPtr.h" 141 #include "llvm/ADT/StringRef.h" 142 #include "llvm/Config/config.h" 143 #include "llvm/Support/CommandLine.h" 144 #include "llvm/Support/FileSystem.h" 145 #include "llvm/Support/MemoryBuffer.h" 146 #include "llvm/Support/Path.h" 147 #include <algorithm> 148 #include <fstream> 149 #include <iterator> 150 #include <string> 151 #include <vector> 152 #include "PreprocessorTracker.h" 153 154 using namespace clang::tooling; 155 using namespace clang; 156 using namespace llvm; 157 using namespace Modularize; 158 159 // Option to specify a file name for a list of header files to check. 160 cl::opt<std::string> 161 ListFileName(cl::Positional, 162 cl::desc("<name of file containing list of headers to check>")); 163 164 // Collect all other arguments, which will be passed to the front end. 165 cl::list<std::string> 166 CC1Arguments(cl::ConsumeAfter, 167 cl::desc("<arguments to be passed to front end>...")); 168 169 // Option to specify a prefix to be prepended to the header names. 170 cl::opt<std::string> HeaderPrefix( 171 "prefix", cl::init(""), 172 cl::desc( 173 "Prepend header file paths with this prefix." 174 " If not specified," 175 " the files are considered to be relative to the header list file.")); 176 177 // Read the header list file and collect the header file names. 178 error_code getHeaderFileNames(SmallVectorImpl<std::string> &HeaderFileNames, 179 StringRef ListFileName, StringRef HeaderPrefix) { 180 181 // By default, use the path component of the list file name. 182 SmallString<256> HeaderDirectory(ListFileName); 183 sys::path::remove_filename(HeaderDirectory); 184 185 // Get the prefix if we have one. 186 if (HeaderPrefix.size() != 0) 187 HeaderDirectory = HeaderPrefix; 188 189 // Read the header list file into a buffer. 190 OwningPtr<MemoryBuffer> listBuffer; 191 if (error_code ec = MemoryBuffer::getFile(ListFileName, listBuffer)) { 192 return ec; 193 } 194 195 // Parse the header list into strings. 196 SmallVector<StringRef, 32> Strings; 197 listBuffer->getBuffer().split(Strings, "\n", -1, false); 198 199 // Collect the header file names from the string list. 200 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(), 201 E = Strings.end(); 202 I != E; ++I) { 203 StringRef Line = (*I).trim(); 204 // Ignore comments and empty lines. 205 if (Line.empty() || (Line[0] == '#')) 206 continue; 207 SmallString<256> HeaderFileName; 208 // Prepend header file name prefix if it's not absolute. 209 if (sys::path::is_absolute(Line)) 210 HeaderFileName = Line; 211 else { 212 HeaderFileName = HeaderDirectory; 213 sys::path::append(HeaderFileName, Line); 214 } 215 // Save the resulting header file path. 216 HeaderFileNames.push_back(HeaderFileName.str()); 217 } 218 219 return error_code::success(); 220 } 221 222 // FIXME: The Location class seems to be something that we might 223 // want to design to be applicable to a wider range of tools, and stick it 224 // somewhere into Tooling/ in mainline 225 struct Location { 226 const FileEntry *File; 227 unsigned Line, Column; 228 229 Location() : File(), Line(), Column() {} 230 231 Location(SourceManager &SM, SourceLocation Loc) : File(), Line(), Column() { 232 Loc = SM.getExpansionLoc(Loc); 233 if (Loc.isInvalid()) 234 return; 235 236 std::pair<FileID, unsigned> Decomposed = SM.getDecomposedLoc(Loc); 237 File = SM.getFileEntryForID(Decomposed.first); 238 if (!File) 239 return; 240 241 Line = SM.getLineNumber(Decomposed.first, Decomposed.second); 242 Column = SM.getColumnNumber(Decomposed.first, Decomposed.second); 243 } 244 245 operator bool() const { return File != 0; } 246 247 friend bool operator==(const Location &X, const Location &Y) { 248 return X.File == Y.File && X.Line == Y.Line && X.Column == Y.Column; 249 } 250 251 friend bool operator!=(const Location &X, const Location &Y) { 252 return !(X == Y); 253 } 254 255 friend bool operator<(const Location &X, const Location &Y) { 256 if (X.File != Y.File) 257 return X.File < Y.File; 258 if (X.Line != Y.Line) 259 return X.Line < Y.Line; 260 return X.Column < Y.Column; 261 } 262 friend bool operator>(const Location &X, const Location &Y) { return Y < X; } 263 friend bool operator<=(const Location &X, const Location &Y) { 264 return !(Y < X); 265 } 266 friend bool operator>=(const Location &X, const Location &Y) { 267 return !(X < Y); 268 } 269 }; 270 271 struct Entry { 272 enum EntryKind { 273 EK_Tag, 274 EK_Value, 275 EK_Macro, 276 277 EK_NumberOfKinds 278 } Kind; 279 280 Location Loc; 281 282 StringRef getKindName() { return getKindName(Kind); } 283 static StringRef getKindName(EntryKind kind); 284 }; 285 286 // Return a string representing the given kind. 287 StringRef Entry::getKindName(Entry::EntryKind kind) { 288 switch (kind) { 289 case EK_Tag: 290 return "tag"; 291 case EK_Value: 292 return "value"; 293 case EK_Macro: 294 return "macro"; 295 case EK_NumberOfKinds: 296 break; 297 } 298 llvm_unreachable("invalid Entry kind"); 299 } 300 301 struct HeaderEntry { 302 std::string Name; 303 Location Loc; 304 305 friend bool operator==(const HeaderEntry &X, const HeaderEntry &Y) { 306 return X.Loc == Y.Loc && X.Name == Y.Name; 307 } 308 friend bool operator!=(const HeaderEntry &X, const HeaderEntry &Y) { 309 return !(X == Y); 310 } 311 friend bool operator<(const HeaderEntry &X, const HeaderEntry &Y) { 312 return X.Loc < Y.Loc || (X.Loc == Y.Loc && X.Name < Y.Name); 313 } 314 friend bool operator>(const HeaderEntry &X, const HeaderEntry &Y) { 315 return Y < X; 316 } 317 friend bool operator<=(const HeaderEntry &X, const HeaderEntry &Y) { 318 return !(Y < X); 319 } 320 friend bool operator>=(const HeaderEntry &X, const HeaderEntry &Y) { 321 return !(X < Y); 322 } 323 }; 324 325 typedef std::vector<HeaderEntry> HeaderContents; 326 327 class EntityMap : public StringMap<SmallVector<Entry, 2> > { 328 public: 329 DenseMap<const FileEntry *, HeaderContents> HeaderContentMismatches; 330 331 void add(const std::string &Name, enum Entry::EntryKind Kind, Location Loc) { 332 // Record this entity in its header. 333 HeaderEntry HE = { Name, Loc }; 334 CurHeaderContents[Loc.File].push_back(HE); 335 336 // Check whether we've seen this entry before. 337 SmallVector<Entry, 2> &Entries = (*this)[Name]; 338 for (unsigned I = 0, N = Entries.size(); I != N; ++I) { 339 if (Entries[I].Kind == Kind && Entries[I].Loc == Loc) 340 return; 341 } 342 343 // We have not seen this entry before; record it. 344 Entry E = { Kind, Loc }; 345 Entries.push_back(E); 346 } 347 348 void mergeCurHeaderContents() { 349 for (DenseMap<const FileEntry *, HeaderContents>::iterator 350 H = CurHeaderContents.begin(), 351 HEnd = CurHeaderContents.end(); 352 H != HEnd; ++H) { 353 // Sort contents. 354 std::sort(H->second.begin(), H->second.end()); 355 356 // Check whether we've seen this header before. 357 DenseMap<const FileEntry *, HeaderContents>::iterator KnownH = 358 AllHeaderContents.find(H->first); 359 if (KnownH == AllHeaderContents.end()) { 360 // We haven't seen this header before; record its contents. 361 AllHeaderContents.insert(*H); 362 continue; 363 } 364 365 // If the header contents are the same, we're done. 366 if (H->second == KnownH->second) 367 continue; 368 369 // Determine what changed. 370 std::set_symmetric_difference( 371 H->second.begin(), H->second.end(), KnownH->second.begin(), 372 KnownH->second.end(), 373 std::back_inserter(HeaderContentMismatches[H->first])); 374 } 375 376 CurHeaderContents.clear(); 377 } 378 379 private: 380 DenseMap<const FileEntry *, HeaderContents> CurHeaderContents; 381 DenseMap<const FileEntry *, HeaderContents> AllHeaderContents; 382 }; 383 384 class CollectEntitiesVisitor 385 : public RecursiveASTVisitor<CollectEntitiesVisitor> { 386 public: 387 CollectEntitiesVisitor(SourceManager &SM, EntityMap &Entities) 388 : SM(SM), Entities(Entities) {} 389 390 bool TraverseStmt(Stmt *S) { return true; } 391 bool TraverseType(QualType T) { return true; } 392 bool TraverseTypeLoc(TypeLoc TL) { return true; } 393 bool TraverseNestedNameSpecifier(NestedNameSpecifier *NNS) { return true; } 394 bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) { 395 return true; 396 } 397 bool TraverseDeclarationNameInfo(DeclarationNameInfo NameInfo) { 398 return true; 399 } 400 bool TraverseTemplateName(TemplateName Template) { return true; } 401 bool TraverseTemplateArgument(const TemplateArgument &Arg) { return true; } 402 bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &ArgLoc) { 403 return true; 404 } 405 bool TraverseTemplateArguments(const TemplateArgument *Args, 406 unsigned NumArgs) { 407 return true; 408 } 409 bool TraverseConstructorInitializer(CXXCtorInitializer *Init) { return true; } 410 bool TraverseLambdaCapture(LambdaExpr::Capture C) { return true; } 411 412 bool VisitNamedDecl(NamedDecl *ND) { 413 // We only care about file-context variables. 414 if (!ND->getDeclContext()->isFileContext()) 415 return true; 416 417 // Skip declarations that tend to be properly multiply-declared. 418 if (isa<NamespaceDecl>(ND) || isa<UsingDirectiveDecl>(ND) || 419 isa<NamespaceAliasDecl>(ND) || 420 isa<ClassTemplateSpecializationDecl>(ND) || isa<UsingDecl>(ND) || 421 isa<UsingShadowDecl>(ND) || isa<FunctionDecl>(ND) || 422 isa<FunctionTemplateDecl>(ND) || 423 (isa<TagDecl>(ND) && 424 !cast<TagDecl>(ND)->isThisDeclarationADefinition())) 425 return true; 426 427 std::string Name = ND->getNameAsString(); 428 if (Name.empty()) 429 return true; 430 431 Location Loc(SM, ND->getLocation()); 432 if (!Loc) 433 return true; 434 435 Entities.add(Name, isa<TagDecl>(ND) ? Entry::EK_Tag : Entry::EK_Value, Loc); 436 return true; 437 } 438 439 private: 440 SourceManager &SM; 441 EntityMap &Entities; 442 }; 443 444 class CollectEntitiesConsumer : public ASTConsumer { 445 public: 446 CollectEntitiesConsumer(EntityMap &Entities, 447 PreprocessorTracker &preprocessorTracker, 448 Preprocessor &PP, StringRef InFile) 449 : Entities(Entities), PPTracker(preprocessorTracker), PP(PP) { 450 PPTracker.handlePreprocessorEntry(PP, InFile); 451 } 452 453 ~CollectEntitiesConsumer() { PPTracker.handlePreprocessorExit(); } 454 455 virtual void HandleTranslationUnit(ASTContext &Ctx) { 456 SourceManager &SM = Ctx.getSourceManager(); 457 458 // Collect declared entities. 459 CollectEntitiesVisitor(SM, Entities) 460 .TraverseDecl(Ctx.getTranslationUnitDecl()); 461 462 // Collect macro definitions. 463 for (Preprocessor::macro_iterator M = PP.macro_begin(), 464 MEnd = PP.macro_end(); 465 M != MEnd; ++M) { 466 Location Loc(SM, M->second->getLocation()); 467 if (!Loc) 468 continue; 469 470 Entities.add(M->first->getName().str(), Entry::EK_Macro, Loc); 471 } 472 473 // Merge header contents. 474 Entities.mergeCurHeaderContents(); 475 } 476 477 private: 478 EntityMap &Entities; 479 PreprocessorTracker &PPTracker; 480 Preprocessor &PP; 481 }; 482 483 class CollectEntitiesAction : public SyntaxOnlyAction { 484 public: 485 CollectEntitiesAction(EntityMap &Entities, 486 PreprocessorTracker &preprocessorTracker) 487 : Entities(Entities), PPTracker(preprocessorTracker) {} 488 489 protected: 490 virtual clang::ASTConsumer *CreateASTConsumer(CompilerInstance &CI, 491 StringRef InFile) { 492 return new CollectEntitiesConsumer(Entities, PPTracker, 493 CI.getPreprocessor(), InFile); 494 } 495 496 private: 497 EntityMap &Entities; 498 PreprocessorTracker &PPTracker; 499 }; 500 501 class ModularizeFrontendActionFactory : public FrontendActionFactory { 502 public: 503 ModularizeFrontendActionFactory(EntityMap &Entities, 504 PreprocessorTracker &preprocessorTracker) 505 : Entities(Entities), PPTracker(preprocessorTracker) {} 506 507 virtual CollectEntitiesAction *create() { 508 return new CollectEntitiesAction(Entities, PPTracker); 509 } 510 511 private: 512 EntityMap &Entities; 513 PreprocessorTracker &PPTracker; 514 }; 515 516 int main(int Argc, const char **Argv) { 517 518 // This causes options to be parsed. 519 cl::ParseCommandLineOptions(Argc, Argv, "modularize.\n"); 520 521 // No go if we have no header list file. 522 if (ListFileName.size() == 0) { 523 cl::PrintHelpMessage(); 524 return 1; 525 } 526 527 // Get header file names. 528 SmallVector<std::string, 32> Headers; 529 if (error_code EC = getHeaderFileNames(Headers, ListFileName, HeaderPrefix)) { 530 errs() << Argv[0] << ": error: Unable to get header list '" << ListFileName 531 << "': " << EC.message() << '\n'; 532 return 1; 533 } 534 535 // Create the compilation database. 536 SmallString<256> PathBuf; 537 sys::fs::current_path(PathBuf); 538 OwningPtr<CompilationDatabase> Compilations; 539 Compilations.reset( 540 new FixedCompilationDatabase(Twine(PathBuf), CC1Arguments)); 541 542 // Create preprocessor tracker, to watch for macro and conditional problems. 543 OwningPtr<PreprocessorTracker> PPTracker(PreprocessorTracker::create()); 544 545 // Parse all of the headers, detecting duplicates. 546 EntityMap Entities; 547 ClangTool Tool(*Compilations, Headers); 548 int HadErrors = 549 Tool.run(new ModularizeFrontendActionFactory(Entities, *PPTracker)); 550 551 // Create a place to save duplicate entity locations, separate bins per kind. 552 typedef SmallVector<Location, 8> LocationArray; 553 typedef SmallVector<LocationArray, Entry::EK_NumberOfKinds> EntryBinArray; 554 EntryBinArray EntryBins; 555 int KindIndex; 556 for (KindIndex = 0; KindIndex < Entry::EK_NumberOfKinds; ++KindIndex) { 557 LocationArray Array; 558 EntryBins.push_back(Array); 559 } 560 561 // Check for the same entity being defined in multiple places. 562 for (EntityMap::iterator E = Entities.begin(), EEnd = Entities.end(); 563 E != EEnd; ++E) { 564 // If only one occurance, exit early. 565 if (E->second.size() == 1) 566 continue; 567 // Clear entity locations. 568 for (EntryBinArray::iterator CI = EntryBins.begin(), CE = EntryBins.end(); 569 CI != CE; ++CI) { 570 CI->clear(); 571 } 572 // Walk the entities of a single name, collecting the locations, 573 // separated into separate bins. 574 for (unsigned I = 0, N = E->second.size(); I != N; ++I) { 575 EntryBins[E->second[I].Kind].push_back(E->second[I].Loc); 576 } 577 // Report any duplicate entity definition errors. 578 int KindIndex = 0; 579 for (EntryBinArray::iterator DI = EntryBins.begin(), DE = EntryBins.end(); 580 DI != DE; ++DI, ++KindIndex) { 581 int ECount = DI->size(); 582 // If only 1 occurance, skip; 583 if (ECount <= 1) 584 continue; 585 LocationArray::iterator FI = DI->begin(); 586 StringRef kindName = Entry::getKindName((Entry::EntryKind)KindIndex); 587 errs() << "error: " << kindName << " '" << E->first() 588 << "' defined at multiple locations:\n"; 589 for (LocationArray::iterator FE = DI->end(); FI != FE; ++FI) { 590 errs() << " " << FI->File->getName() << ":" << FI->Line << ":" 591 << FI->Column << "\n"; 592 } 593 HadErrors = 1; 594 } 595 } 596 597 // Complain about macro instance in header files that differ based on how 598 // they are included. 599 if (PPTracker->reportInconsistentMacros(errs())) 600 HadErrors = 1; 601 602 // Complain about preprocessor conditional directives in header files that 603 // differ based on how they are included. 604 if (PPTracker->reportInconsistentConditionals(errs())) 605 HadErrors = 1; 606 607 // Complain about any headers that have contents that differ based on how 608 // they are included. 609 // FIXME: Could we provide information about which preprocessor conditionals 610 // are involved? 611 for (DenseMap<const FileEntry *, HeaderContents>::iterator 612 H = Entities.HeaderContentMismatches.begin(), 613 HEnd = Entities.HeaderContentMismatches.end(); 614 H != HEnd; ++H) { 615 if (H->second.empty()) { 616 errs() << "internal error: phantom header content mismatch\n"; 617 continue; 618 } 619 620 HadErrors = 1; 621 errs() << "error: header '" << H->first->getName() 622 << "' has different contents depending on how it was included.\n"; 623 for (unsigned I = 0, N = H->second.size(); I != N; ++I) { 624 errs() << "note: '" << H->second[I].Name << "' in " 625 << H->second[I].Loc.File->getName() << " at " 626 << H->second[I].Loc.Line << ":" << H->second[I].Loc.Column 627 << " not always provided\n"; 628 } 629 } 630 631 return HadErrors; 632 } 633