1 //===- extra/modularize/Modularize.cpp - Check modularized headers --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a tool that checks whether a set of headers provides
11 // the consistent definitions required to use modules. For example, it detects
12 // whether the same entity (say, a NULL macro or size_t typedef) is defined in
13 // multiple headers or whether a header produces different definitions under
14 // different circumstances. These conditions cause modules built from the
15 // headers to behave poorly, and should be fixed before introducing a module
16 // map.
17 //
18 // Modularize takes as argument a file name for a file containing the
19 // newline-separated list of headers to check with respect to each other.
20 // Lines beginning with '#' and empty lines are ignored.
21 // Modularize also accepts regular front-end arguments.
22 //
23 // Usage:   modularize [-prefix (optional header path prefix)]
24 //   (include-files_list) [(front-end-options) ...]
25 //
26 // Note that unless a "-prefix (header path)" option is specified,
27 // non-absolute file paths in the header list file will be relative
28 // to the header list file directory.  Use -prefix to specify a different
29 // directory.
30 //
31 // Note that by default, the underlying Clang front end assumes .h files
32 // contain C source.  If your .h files in the file list contain C++ source,
33 // you should append the following to your command lines: -x c++
34 //
35 // Modularize will do normal parsing, reporting normal errors and warnings,
36 // but will also report special error messages like the following:
37 //
38 //   error: '(symbol)' defined at multiple locations:
39 //       (file):(row):(column)
40 //       (file):(row):(column)
41 //
42 //   error: header '(file)' has different contents depending on how it was
43 //     included
44 //
45 // The latter might be followed by messages like the following:
46 //
47 //   note: '(symbol)' in (file) at (row):(column) not always provided
48 //
49 // Checks will also be performed for macro expansions, defined(macro)
50 // expressions, and preprocessor conditional directives that evaluate
51 // inconsistently, and can produce error messages like the following:
52 //
53 //   (...)/SubHeader.h:11:5:
54 //   #if SYMBOL == 1
55 //       ^
56 //   error: Macro instance 'SYMBOL' has different values in this header,
57 //          depending on how it was included.
58 //     'SYMBOL' expanded to: '1' with respect to these inclusion paths:
59 //       (...)/Header1.h
60 //         (...)/SubHeader.h
61 //   (...)/SubHeader.h:3:9:
62 //   #define SYMBOL 1
63 //             ^
64 //   Macro defined here.
65 //     'SYMBOL' expanded to: '2' with respect to these inclusion paths:
66 //       (...)/Header2.h
67 //           (...)/SubHeader.h
68 //   (...)/SubHeader.h:7:9:
69 //   #define SYMBOL 2
70 //             ^
71 //   Macro defined here.
72 //
73 // See PreprocessorTracker.cpp for additional details.
74 //
75 // Current problems:
76 //
77 // Modularize has problems with C++:
78 //
79 // 1. Modularize doesn't distinguish class of the same name in
80 // different namespaces.  The result is erroneous duplicate definition errors.
81 //
82 // 2. Modularize doesn't distinguish between a regular class and a template
83 // class of the same name.
84 //
85 // Other problems:
86 //
87 // 3. There seem to be a lot of spurious "not always provided" messages,
88 // and many duplicates of these.
89 //
90 // 4. There are some legitimate uses of preprocessor macros that
91 // modularize will flag as errors, such as repeatedly #include'ing
92 // a file and using interleaving defined/undefined macros
93 // to change declarations in the included file.  Is there a way
94 // to address this?  Maybe have modularize accept a list of macros
95 // to ignore.  Otherwise you can just exclude the file, after checking
96 // for legitimate errors.
97 //
98 // Future directions:
99 //
100 // Basically, we want to add new checks for whatever we can check with respect
101 // to checking headers for module'ability.
102 //
103 // Some ideas:
104 //
105 // 1. Fix the C++ and other problems.
106 //
107 // 2. Add options to disable any of the checks, in case
108 // there is some problem with them, or the messages get too verbose.
109 //
110 // 3. Try to figure out the preprocessor conditional directives that
111 // contribute to problems and tie them to the inconsistent definitions.
112 //
113 // 4. Check for correct and consistent usage of extern "C" {} and other
114 // directives. Warn about #include inside extern "C" {}.
115 //
116 // 5. To support headers that depend on other headers to be included first
117 // add support for a dependency list to the header list input.
118 // I.e.: header.h: dependent1.h dependent2.h
119 // (Implement using clang's "-include" option"?)
120 //
121 // 6. What else?
122 //
123 // General clean-up and refactoring:
124 //
125 // 1. The Location class seems to be something that we might
126 // want to design to be applicable to a wider range of tools, and stick it
127 // somewhere into Tooling/ in mainline
128 //
129 //===----------------------------------------------------------------------===//
130 
131 #include "clang/AST/ASTConsumer.h"
132 #include "clang/AST/ASTContext.h"
133 #include "clang/AST/RecursiveASTVisitor.h"
134 #include "clang/Basic/SourceManager.h"
135 #include "clang/Frontend/CompilerInstance.h"
136 #include "clang/Frontend/FrontendActions.h"
137 #include "clang/Lex/Preprocessor.h"
138 #include "clang/Tooling/CompilationDatabase.h"
139 #include "clang/Tooling/Tooling.h"
140 #include "llvm/ADT/OwningPtr.h"
141 #include "llvm/ADT/StringRef.h"
142 #include "llvm/Config/config.h"
143 #include "llvm/Support/CommandLine.h"
144 #include "llvm/Support/FileSystem.h"
145 #include "llvm/Support/MemoryBuffer.h"
146 #include "llvm/Support/Path.h"
147 #include <algorithm>
148 #include <fstream>
149 #include <iterator>
150 #include <string>
151 #include <vector>
152 #include "PreprocessorTracker.h"
153 
154 using namespace clang::tooling;
155 using namespace clang;
156 using namespace llvm;
157 using namespace Modularize;
158 
159 // Option to specify a file name for a list of header files to check.
160 cl::opt<std::string>
161 ListFileName(cl::Positional,
162              cl::desc("<name of file containing list of headers to check>"));
163 
164 // Collect all other arguments, which will be passed to the front end.
165 cl::list<std::string>
166 CC1Arguments(cl::ConsumeAfter,
167              cl::desc("<arguments to be passed to front end>..."));
168 
169 // Option to specify a prefix to be prepended to the header names.
170 cl::opt<std::string> HeaderPrefix(
171     "prefix", cl::init(""),
172     cl::desc(
173         "Prepend header file paths with this prefix."
174         " If not specified,"
175         " the files are considered to be relative to the header list file."));
176 
177 // Read the header list file and collect the header file names.
178 error_code getHeaderFileNames(SmallVectorImpl<std::string> &HeaderFileNames,
179                               StringRef ListFileName, StringRef HeaderPrefix) {
180 
181   // By default, use the path component of the list file name.
182   SmallString<256> HeaderDirectory(ListFileName);
183   sys::path::remove_filename(HeaderDirectory);
184 
185   // Get the prefix if we have one.
186   if (HeaderPrefix.size() != 0)
187     HeaderDirectory = HeaderPrefix;
188 
189   // Read the header list file into a buffer.
190   OwningPtr<MemoryBuffer> listBuffer;
191   if (error_code ec = MemoryBuffer::getFile(ListFileName, listBuffer)) {
192     return ec;
193   }
194 
195   // Parse the header list into strings.
196   SmallVector<StringRef, 32> Strings;
197   listBuffer->getBuffer().split(Strings, "\n", -1, false);
198 
199   // Collect the header file names from the string list.
200   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
201                                             E = Strings.end();
202        I != E; ++I) {
203     StringRef Line = (*I).trim();
204     // Ignore comments and empty lines.
205     if (Line.empty() || (Line[0] == '#'))
206       continue;
207     SmallString<256> HeaderFileName;
208     // Prepend header file name prefix if it's not absolute.
209     if (sys::path::is_absolute(Line))
210       HeaderFileName = Line;
211     else {
212       HeaderFileName = HeaderDirectory;
213       sys::path::append(HeaderFileName, Line);
214     }
215     // Save the resulting header file path.
216     HeaderFileNames.push_back(HeaderFileName.str());
217   }
218 
219   return error_code::success();
220 }
221 
222 // FIXME: The Location class seems to be something that we might
223 // want to design to be applicable to a wider range of tools, and stick it
224 // somewhere into Tooling/ in mainline
225 struct Location {
226   const FileEntry *File;
227   unsigned Line, Column;
228 
229   Location() : File(), Line(), Column() {}
230 
231   Location(SourceManager &SM, SourceLocation Loc) : File(), Line(), Column() {
232     Loc = SM.getExpansionLoc(Loc);
233     if (Loc.isInvalid())
234       return;
235 
236     std::pair<FileID, unsigned> Decomposed = SM.getDecomposedLoc(Loc);
237     File = SM.getFileEntryForID(Decomposed.first);
238     if (!File)
239       return;
240 
241     Line = SM.getLineNumber(Decomposed.first, Decomposed.second);
242     Column = SM.getColumnNumber(Decomposed.first, Decomposed.second);
243   }
244 
245   operator bool() const { return File != 0; }
246 
247   friend bool operator==(const Location &X, const Location &Y) {
248     return X.File == Y.File && X.Line == Y.Line && X.Column == Y.Column;
249   }
250 
251   friend bool operator!=(const Location &X, const Location &Y) {
252     return !(X == Y);
253   }
254 
255   friend bool operator<(const Location &X, const Location &Y) {
256     if (X.File != Y.File)
257       return X.File < Y.File;
258     if (X.Line != Y.Line)
259       return X.Line < Y.Line;
260     return X.Column < Y.Column;
261   }
262   friend bool operator>(const Location &X, const Location &Y) { return Y < X; }
263   friend bool operator<=(const Location &X, const Location &Y) {
264     return !(Y < X);
265   }
266   friend bool operator>=(const Location &X, const Location &Y) {
267     return !(X < Y);
268   }
269 };
270 
271 struct Entry {
272   enum EntryKind {
273     EK_Tag,
274     EK_Value,
275     EK_Macro,
276 
277     EK_NumberOfKinds
278   } Kind;
279 
280   Location Loc;
281 
282   StringRef getKindName() { return getKindName(Kind); }
283   static StringRef getKindName(EntryKind kind);
284 };
285 
286 // Return a string representing the given kind.
287 StringRef Entry::getKindName(Entry::EntryKind kind) {
288   switch (kind) {
289   case EK_Tag:
290     return "tag";
291   case EK_Value:
292     return "value";
293   case EK_Macro:
294     return "macro";
295   case EK_NumberOfKinds:
296     break;
297   }
298   llvm_unreachable("invalid Entry kind");
299 }
300 
301 struct HeaderEntry {
302   std::string Name;
303   Location Loc;
304 
305   friend bool operator==(const HeaderEntry &X, const HeaderEntry &Y) {
306     return X.Loc == Y.Loc && X.Name == Y.Name;
307   }
308   friend bool operator!=(const HeaderEntry &X, const HeaderEntry &Y) {
309     return !(X == Y);
310   }
311   friend bool operator<(const HeaderEntry &X, const HeaderEntry &Y) {
312     return X.Loc < Y.Loc || (X.Loc == Y.Loc && X.Name < Y.Name);
313   }
314   friend bool operator>(const HeaderEntry &X, const HeaderEntry &Y) {
315     return Y < X;
316   }
317   friend bool operator<=(const HeaderEntry &X, const HeaderEntry &Y) {
318     return !(Y < X);
319   }
320   friend bool operator>=(const HeaderEntry &X, const HeaderEntry &Y) {
321     return !(X < Y);
322   }
323 };
324 
325 typedef std::vector<HeaderEntry> HeaderContents;
326 
327 class EntityMap : public StringMap<SmallVector<Entry, 2> > {
328 public:
329   DenseMap<const FileEntry *, HeaderContents> HeaderContentMismatches;
330 
331   void add(const std::string &Name, enum Entry::EntryKind Kind, Location Loc) {
332     // Record this entity in its header.
333     HeaderEntry HE = { Name, Loc };
334     CurHeaderContents[Loc.File].push_back(HE);
335 
336     // Check whether we've seen this entry before.
337     SmallVector<Entry, 2> &Entries = (*this)[Name];
338     for (unsigned I = 0, N = Entries.size(); I != N; ++I) {
339       if (Entries[I].Kind == Kind && Entries[I].Loc == Loc)
340         return;
341     }
342 
343     // We have not seen this entry before; record it.
344     Entry E = { Kind, Loc };
345     Entries.push_back(E);
346   }
347 
348   void mergeCurHeaderContents() {
349     for (DenseMap<const FileEntry *, HeaderContents>::iterator
350              H = CurHeaderContents.begin(),
351              HEnd = CurHeaderContents.end();
352          H != HEnd; ++H) {
353       // Sort contents.
354       std::sort(H->second.begin(), H->second.end());
355 
356       // Check whether we've seen this header before.
357       DenseMap<const FileEntry *, HeaderContents>::iterator KnownH =
358           AllHeaderContents.find(H->first);
359       if (KnownH == AllHeaderContents.end()) {
360         // We haven't seen this header before; record its contents.
361         AllHeaderContents.insert(*H);
362         continue;
363       }
364 
365       // If the header contents are the same, we're done.
366       if (H->second == KnownH->second)
367         continue;
368 
369       // Determine what changed.
370       std::set_symmetric_difference(
371           H->second.begin(), H->second.end(), KnownH->second.begin(),
372           KnownH->second.end(),
373           std::back_inserter(HeaderContentMismatches[H->first]));
374     }
375 
376     CurHeaderContents.clear();
377   }
378 
379 private:
380   DenseMap<const FileEntry *, HeaderContents> CurHeaderContents;
381   DenseMap<const FileEntry *, HeaderContents> AllHeaderContents;
382 };
383 
384 class CollectEntitiesVisitor
385     : public RecursiveASTVisitor<CollectEntitiesVisitor> {
386 public:
387   CollectEntitiesVisitor(SourceManager &SM, EntityMap &Entities)
388       : SM(SM), Entities(Entities) {}
389 
390   bool TraverseStmt(Stmt *S) { return true; }
391   bool TraverseType(QualType T) { return true; }
392   bool TraverseTypeLoc(TypeLoc TL) { return true; }
393   bool TraverseNestedNameSpecifier(NestedNameSpecifier *NNS) { return true; }
394   bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) {
395     return true;
396   }
397   bool TraverseDeclarationNameInfo(DeclarationNameInfo NameInfo) {
398     return true;
399   }
400   bool TraverseTemplateName(TemplateName Template) { return true; }
401   bool TraverseTemplateArgument(const TemplateArgument &Arg) { return true; }
402   bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &ArgLoc) {
403     return true;
404   }
405   bool TraverseTemplateArguments(const TemplateArgument *Args,
406                                  unsigned NumArgs) {
407     return true;
408   }
409   bool TraverseConstructorInitializer(CXXCtorInitializer *Init) { return true; }
410   bool TraverseLambdaCapture(LambdaExpr::Capture C) { return true; }
411 
412   bool VisitNamedDecl(NamedDecl *ND) {
413     // We only care about file-context variables.
414     if (!ND->getDeclContext()->isFileContext())
415       return true;
416 
417     // Skip declarations that tend to be properly multiply-declared.
418     if (isa<NamespaceDecl>(ND) || isa<UsingDirectiveDecl>(ND) ||
419         isa<NamespaceAliasDecl>(ND) ||
420         isa<ClassTemplateSpecializationDecl>(ND) || isa<UsingDecl>(ND) ||
421         isa<UsingShadowDecl>(ND) || isa<FunctionDecl>(ND) ||
422         isa<FunctionTemplateDecl>(ND) ||
423         (isa<TagDecl>(ND) &&
424          !cast<TagDecl>(ND)->isThisDeclarationADefinition()))
425       return true;
426 
427     std::string Name = ND->getNameAsString();
428     if (Name.empty())
429       return true;
430 
431     Location Loc(SM, ND->getLocation());
432     if (!Loc)
433       return true;
434 
435     Entities.add(Name, isa<TagDecl>(ND) ? Entry::EK_Tag : Entry::EK_Value, Loc);
436     return true;
437   }
438 
439 private:
440   SourceManager &SM;
441   EntityMap &Entities;
442 };
443 
444 class CollectEntitiesConsumer : public ASTConsumer {
445 public:
446   CollectEntitiesConsumer(EntityMap &Entities,
447                           PreprocessorTracker &preprocessorTracker,
448                           Preprocessor &PP, StringRef InFile)
449       : Entities(Entities), PPTracker(preprocessorTracker), PP(PP) {
450     PPTracker.handlePreprocessorEntry(PP, InFile);
451   }
452 
453   ~CollectEntitiesConsumer() { PPTracker.handlePreprocessorExit(); }
454 
455   virtual void HandleTranslationUnit(ASTContext &Ctx) {
456     SourceManager &SM = Ctx.getSourceManager();
457 
458     // Collect declared entities.
459     CollectEntitiesVisitor(SM, Entities)
460         .TraverseDecl(Ctx.getTranslationUnitDecl());
461 
462     // Collect macro definitions.
463     for (Preprocessor::macro_iterator M = PP.macro_begin(),
464                                       MEnd = PP.macro_end();
465          M != MEnd; ++M) {
466       Location Loc(SM, M->second->getLocation());
467       if (!Loc)
468         continue;
469 
470       Entities.add(M->first->getName().str(), Entry::EK_Macro, Loc);
471     }
472 
473     // Merge header contents.
474     Entities.mergeCurHeaderContents();
475   }
476 
477 private:
478   EntityMap &Entities;
479   PreprocessorTracker &PPTracker;
480   Preprocessor &PP;
481 };
482 
483 class CollectEntitiesAction : public SyntaxOnlyAction {
484 public:
485   CollectEntitiesAction(EntityMap &Entities,
486                         PreprocessorTracker &preprocessorTracker)
487       : Entities(Entities), PPTracker(preprocessorTracker) {}
488 
489 protected:
490   virtual clang::ASTConsumer *CreateASTConsumer(CompilerInstance &CI,
491                                                 StringRef InFile) {
492     return new CollectEntitiesConsumer(Entities, PPTracker,
493                                        CI.getPreprocessor(), InFile);
494   }
495 
496 private:
497   EntityMap &Entities;
498   PreprocessorTracker &PPTracker;
499 };
500 
501 class ModularizeFrontendActionFactory : public FrontendActionFactory {
502 public:
503   ModularizeFrontendActionFactory(EntityMap &Entities,
504                                   PreprocessorTracker &preprocessorTracker)
505       : Entities(Entities), PPTracker(preprocessorTracker) {}
506 
507   virtual CollectEntitiesAction *create() {
508     return new CollectEntitiesAction(Entities, PPTracker);
509   }
510 
511 private:
512   EntityMap &Entities;
513   PreprocessorTracker &PPTracker;
514 };
515 
516 int main(int Argc, const char **Argv) {
517 
518   // This causes options to be parsed.
519   cl::ParseCommandLineOptions(Argc, Argv, "modularize.\n");
520 
521   // No go if we have no header list file.
522   if (ListFileName.size() == 0) {
523     cl::PrintHelpMessage();
524     return 1;
525   }
526 
527   // Get header file names.
528   SmallVector<std::string, 32> Headers;
529   if (error_code EC = getHeaderFileNames(Headers, ListFileName, HeaderPrefix)) {
530     errs() << Argv[0] << ": error: Unable to get header list '" << ListFileName
531            << "': " << EC.message() << '\n';
532     return 1;
533   }
534 
535   // Create the compilation database.
536   SmallString<256> PathBuf;
537   sys::fs::current_path(PathBuf);
538   OwningPtr<CompilationDatabase> Compilations;
539   Compilations.reset(
540       new FixedCompilationDatabase(Twine(PathBuf), CC1Arguments));
541 
542   // Create preprocessor tracker, to watch for macro and conditional problems.
543   OwningPtr<PreprocessorTracker> PPTracker(PreprocessorTracker::create());
544 
545   // Parse all of the headers, detecting duplicates.
546   EntityMap Entities;
547   ClangTool Tool(*Compilations, Headers);
548   int HadErrors =
549       Tool.run(new ModularizeFrontendActionFactory(Entities, *PPTracker));
550 
551   // Create a place to save duplicate entity locations, separate bins per kind.
552   typedef SmallVector<Location, 8> LocationArray;
553   typedef SmallVector<LocationArray, Entry::EK_NumberOfKinds> EntryBinArray;
554   EntryBinArray EntryBins;
555   int KindIndex;
556   for (KindIndex = 0; KindIndex < Entry::EK_NumberOfKinds; ++KindIndex) {
557     LocationArray Array;
558     EntryBins.push_back(Array);
559   }
560 
561   // Check for the same entity being defined in multiple places.
562   for (EntityMap::iterator E = Entities.begin(), EEnd = Entities.end();
563        E != EEnd; ++E) {
564     // If only one occurance, exit early.
565     if (E->second.size() == 1)
566       continue;
567     // Clear entity locations.
568     for (EntryBinArray::iterator CI = EntryBins.begin(), CE = EntryBins.end();
569          CI != CE; ++CI) {
570       CI->clear();
571     }
572     // Walk the entities of a single name, collecting the locations,
573     // separated into separate bins.
574     for (unsigned I = 0, N = E->second.size(); I != N; ++I) {
575       EntryBins[E->second[I].Kind].push_back(E->second[I].Loc);
576     }
577     // Report any duplicate entity definition errors.
578     int KindIndex = 0;
579     for (EntryBinArray::iterator DI = EntryBins.begin(), DE = EntryBins.end();
580          DI != DE; ++DI, ++KindIndex) {
581       int ECount = DI->size();
582       // If only 1 occurance, skip;
583       if (ECount <= 1)
584         continue;
585       LocationArray::iterator FI = DI->begin();
586       StringRef kindName = Entry::getKindName((Entry::EntryKind)KindIndex);
587       errs() << "error: " << kindName << " '" << E->first()
588              << "' defined at multiple locations:\n";
589       for (LocationArray::iterator FE = DI->end(); FI != FE; ++FI) {
590         errs() << "    " << FI->File->getName() << ":" << FI->Line << ":"
591                << FI->Column << "\n";
592       }
593       HadErrors = 1;
594     }
595   }
596 
597   // Complain about macro instance in header files that differ based on how
598   // they are included.
599   if (PPTracker->reportInconsistentMacros(errs()))
600     HadErrors = 1;
601 
602   // Complain about preprocessor conditional directives in header files that
603   // differ based on how they are included.
604   if (PPTracker->reportInconsistentConditionals(errs()))
605     HadErrors = 1;
606 
607   // Complain about any headers that have contents that differ based on how
608   // they are included.
609   // FIXME: Could we provide information about which preprocessor conditionals
610   // are involved?
611   for (DenseMap<const FileEntry *, HeaderContents>::iterator
612            H = Entities.HeaderContentMismatches.begin(),
613            HEnd = Entities.HeaderContentMismatches.end();
614        H != HEnd; ++H) {
615     if (H->second.empty()) {
616       errs() << "internal error: phantom header content mismatch\n";
617       continue;
618     }
619 
620     HadErrors = 1;
621     errs() << "error: header '" << H->first->getName()
622            << "' has different contents depending on how it was included.\n";
623     for (unsigned I = 0, N = H->second.size(); I != N; ++I) {
624       errs() << "note: '" << H->second[I].Name << "' in "
625              << H->second[I].Loc.File->getName() << " at "
626              << H->second[I].Loc.Line << ":" << H->second[I].Loc.Column
627              << " not always provided\n";
628     }
629   }
630 
631   return HadErrors;
632 }
633