1 //===- extra/modularize/Modularize.cpp - Check modularized headers --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a tool that checks whether a set of headers provides
11 // the consistent definitions required to use modules. For example, it detects
12 // whether the same entity (say, a NULL macro or size_t typedef) is defined in
13 // multiple headers or whether a header produces different definitions under
14 // different circumstances. These conditions cause modules built from the
15 // headers to behave poorly, and should be fixed before introducing a module
16 // map.
17 //
18 // Modularize takes as argument a file name for a file containing the
19 // newline-separated list of headers to check with respect to each other.
20 // Lines beginning with '#' and empty lines are ignored.
21 // Modularize also accepts regular front-end arguments.
22 //
23 // Usage:   modularize [-prefix (optional header path prefix)]
24 //   (include-files_list) [(front-end-options) ...]
25 //
26 // Note that unless a "-prefix (header path)" option is specified,
27 // non-absolute file paths in the header list file will be relative
28 // to the header list file directory.  Use -prefix to specify a different
29 // directory.
30 //
31 // Note that by default, the underlying Clang front end assumes .h files
32 // contain C source.  If your .h files in the file list contain C++ source,
33 // you should append the following to your command lines: -x c++
34 //
35 // Modularize will do normal parsing, reporting normal errors and warnings,
36 // but will also report special error messages like the following:
37 //
38 // error: '(symbol)' defined at multiple locations:
39 //     (file):(row):(column)
40 //     (file):(row):(column)
41 //
42 // error: header '(file)' has different contents dependening on how it was
43 //   included
44 //
45 // The latter might be followed by messages like the following:
46 //
47 // note: '(symbol)' in (file) at (row):(column) not always provided
48 //
49 // Future directions:
50 //
51 // Basically, we want to add new checks for whatever we can check with respect
52 // to checking headers for module'ability.
53 //
54 // Some ideas:
55 //
56 // 1. Try to figure out the preprocessor conditional directives that
57 // contribute to problems.
58 //
59 // 2. Check for correct and consistent usage of extern "C" {} and other
60 // directives. Warn about #include inside extern "C" {}.
61 //
62 // 3. What else?
63 //
64 // General clean-up and refactoring:
65 //
66 // 1. The Location class seems to be something that we might
67 // want to design to be applicable to a wider range of tools, and stick it
68 // somewhere into Tooling/ in mainline
69 //
70 //===----------------------------------------------------------------------===//
71 
72 #include "clang/AST/ASTConsumer.h"
73 #include "clang/AST/ASTContext.h"
74 #include "clang/AST/RecursiveASTVisitor.h"
75 #include "clang/Basic/SourceManager.h"
76 #include "clang/Frontend/CompilerInstance.h"
77 #include "clang/Frontend/FrontendActions.h"
78 #include "clang/Lex/Preprocessor.h"
79 #include "clang/Tooling/CompilationDatabase.h"
80 #include "clang/Tooling/Tooling.h"
81 #include "llvm/ADT/OwningPtr.h"
82 #include "llvm/ADT/StringRef.h"
83 #include "llvm/Config/config.h"
84 #include "llvm/Support/CommandLine.h"
85 #include "llvm/Support/FileSystem.h"
86 #include "llvm/Support/MemoryBuffer.h"
87 #include "llvm/Support/Path.h"
88 #include <algorithm>
89 #include <fstream>
90 #include <iterator>
91 #include <string>
92 #include <vector>
93 
94 using namespace clang::tooling;
95 using namespace clang;
96 using namespace llvm;
97 
98 // Option to specify a file name for a list of header files to check.
99 cl::opt<std::string>
100 ListFileName(cl::Positional,
101              cl::desc("<name of file containing list of headers to check>"));
102 
103 // Collect all other arguments, which will be passed to the front end.
104 cl::list<std::string> CC1Arguments(
105     cl::ConsumeAfter, cl::desc("<arguments to be passed to front end>..."));
106 
107 // Option to specify a prefix to be prepended to the header names.
108 cl::opt<std::string> HeaderPrefix(
109     "prefix", cl::init(""),
110     cl::desc(
111         "Prepend header file paths with this prefix."
112         " If not specified,"
113         " the files are considered to be relative to the header list file."));
114 
115 // Read the header list file and collect the header file names.
116 error_code getHeaderFileNames(SmallVectorImpl<std::string> &headerFileNames,
117                               StringRef listFileName, StringRef headerPrefix) {
118 
119   // By default, use the path component of the list file name.
120   SmallString<256> headerDirectory(listFileName);
121   sys::path::remove_filename(headerDirectory);
122 
123   // Get the prefix if we have one.
124   if (headerPrefix.size() != 0)
125     headerDirectory = headerPrefix;
126 
127   // Read the header list file into a buffer.
128   OwningPtr<MemoryBuffer> listBuffer;
129   if (error_code ec = MemoryBuffer::getFile(listFileName, listBuffer)) {
130     return ec;
131   }
132 
133   // Parse the header list into strings.
134   SmallVector<StringRef, 32> strings;
135   listBuffer->getBuffer().split(strings, "\n", -1, false);
136 
137   // Collect the header file names from the string list.
138   for (SmallVectorImpl<StringRef>::iterator I = strings.begin(),
139                                             E = strings.end();
140        I != E; ++I) {
141     StringRef line = (*I).trim();
142     // Ignore comments and empty lines.
143     if (line.empty() || (line[0] == '#'))
144       continue;
145     SmallString<256> headerFileName;
146     // Prepend header file name prefix if it's not absolute.
147     if (sys::path::is_absolute(line))
148       headerFileName = line;
149     else {
150       headerFileName = headerDirectory;
151       sys::path::append(headerFileName, line);
152     }
153     // Save the resulting header file path.
154     headerFileNames.push_back(headerFileName.str());
155   }
156 
157   return error_code::success();
158 }
159 
160 // FIXME: The Location class seems to be something that we might
161 // want to design to be applicable to a wider range of tools, and stick it
162 // somewhere into Tooling/ in mainline
163 struct Location {
164   const FileEntry *File;
165   unsigned Line, Column;
166 
167   Location() : File(), Line(), Column() {}
168 
169   Location(SourceManager &SM, SourceLocation Loc) : File(), Line(), Column() {
170     Loc = SM.getExpansionLoc(Loc);
171     if (Loc.isInvalid())
172       return;
173 
174     std::pair<FileID, unsigned> Decomposed = SM.getDecomposedLoc(Loc);
175     File = SM.getFileEntryForID(Decomposed.first);
176     if (!File)
177       return;
178 
179     Line = SM.getLineNumber(Decomposed.first, Decomposed.second);
180     Column = SM.getColumnNumber(Decomposed.first, Decomposed.second);
181   }
182 
183   operator bool() const { return File != 0; }
184 
185   friend bool operator==(const Location &X, const Location &Y) {
186     return X.File == Y.File && X.Line == Y.Line && X.Column == Y.Column;
187   }
188 
189   friend bool operator!=(const Location &X, const Location &Y) {
190     return !(X == Y);
191   }
192 
193   friend bool operator<(const Location &X, const Location &Y) {
194     if (X.File != Y.File)
195       return X.File < Y.File;
196     if (X.Line != Y.Line)
197       return X.Line < Y.Line;
198     return X.Column < Y.Column;
199   }
200   friend bool operator>(const Location &X, const Location &Y) { return Y < X; }
201   friend bool operator<=(const Location &X, const Location &Y) {
202     return !(Y < X);
203   }
204   friend bool operator>=(const Location &X, const Location &Y) {
205     return !(X < Y);
206   }
207 
208 };
209 
210 struct Entry {
211   enum EntryKind {
212     EK_Tag,
213     EK_Value,
214     EK_Macro,
215 
216     EK_NumberOfKinds
217   } Kind;
218 
219   Location Loc;
220 
221   StringRef getKindName() { return getKindName(Kind); }
222   static StringRef getKindName(EntryKind kind);
223 };
224 
225 // Return a string representing the given kind.
226 StringRef Entry::getKindName(Entry::EntryKind kind) {
227   switch (kind) {
228   case EK_Tag:
229     return "tag";
230   case EK_Value:
231     return "value";
232   case EK_Macro:
233     return "macro";
234   case EK_NumberOfKinds:
235     break;
236   }
237   llvm_unreachable("invalid Entry kind");
238 }
239 
240 struct HeaderEntry {
241   std::string Name;
242   Location Loc;
243 
244   friend bool operator==(const HeaderEntry &X, const HeaderEntry &Y) {
245     return X.Loc == Y.Loc && X.Name == Y.Name;
246   }
247   friend bool operator!=(const HeaderEntry &X, const HeaderEntry &Y) {
248     return !(X == Y);
249   }
250   friend bool operator<(const HeaderEntry &X, const HeaderEntry &Y) {
251     return X.Loc < Y.Loc || (X.Loc == Y.Loc && X.Name < Y.Name);
252   }
253   friend bool operator>(const HeaderEntry &X, const HeaderEntry &Y) {
254     return Y < X;
255   }
256   friend bool operator<=(const HeaderEntry &X, const HeaderEntry &Y) {
257     return !(Y < X);
258   }
259   friend bool operator>=(const HeaderEntry &X, const HeaderEntry &Y) {
260     return !(X < Y);
261   }
262 };
263 
264 typedef std::vector<HeaderEntry> HeaderContents;
265 
266 class EntityMap : public StringMap<SmallVector<Entry, 2> > {
267 public:
268   DenseMap<const FileEntry *, HeaderContents> HeaderContentMismatches;
269 
270   void add(const std::string &Name, enum Entry::EntryKind Kind, Location Loc) {
271     // Record this entity in its header.
272     HeaderEntry HE = { Name, Loc };
273     CurHeaderContents[Loc.File].push_back(HE);
274 
275     // Check whether we've seen this entry before.
276     SmallVector<Entry, 2> &Entries = (*this)[Name];
277     for (unsigned I = 0, N = Entries.size(); I != N; ++I) {
278       if (Entries[I].Kind == Kind && Entries[I].Loc == Loc)
279         return;
280     }
281 
282     // We have not seen this entry before; record it.
283     Entry E = { Kind, Loc };
284     Entries.push_back(E);
285   }
286 
287   void mergeCurHeaderContents() {
288     for (DenseMap<const FileEntry *, HeaderContents>::iterator
289              H = CurHeaderContents.begin(),
290              HEnd = CurHeaderContents.end();
291          H != HEnd; ++H) {
292       // Sort contents.
293       std::sort(H->second.begin(), H->second.end());
294 
295       // Check whether we've seen this header before.
296       DenseMap<const FileEntry *, HeaderContents>::iterator KnownH =
297           AllHeaderContents.find(H->first);
298       if (KnownH == AllHeaderContents.end()) {
299         // We haven't seen this header before; record its contents.
300         AllHeaderContents.insert(*H);
301         continue;
302       }
303 
304       // If the header contents are the same, we're done.
305       if (H->second == KnownH->second)
306         continue;
307 
308       // Determine what changed.
309       std::set_symmetric_difference(
310           H->second.begin(), H->second.end(), KnownH->second.begin(),
311           KnownH->second.end(),
312           std::back_inserter(HeaderContentMismatches[H->first]));
313     }
314 
315     CurHeaderContents.clear();
316   }
317 private:
318   DenseMap<const FileEntry *, HeaderContents> CurHeaderContents;
319   DenseMap<const FileEntry *, HeaderContents> AllHeaderContents;
320 };
321 
322 class CollectEntitiesVisitor :
323     public RecursiveASTVisitor<CollectEntitiesVisitor> {
324 public:
325   CollectEntitiesVisitor(SourceManager &SM, EntityMap &Entities)
326       : SM(SM), Entities(Entities) {}
327 
328   bool TraverseStmt(Stmt *S) { return true; }
329   bool TraverseType(QualType T) { return true; }
330   bool TraverseTypeLoc(TypeLoc TL) { return true; }
331   bool TraverseNestedNameSpecifier(NestedNameSpecifier *NNS) { return true; }
332   bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) {
333     return true;
334   }
335   bool TraverseDeclarationNameInfo(DeclarationNameInfo NameInfo) {
336     return true;
337   }
338   bool TraverseTemplateName(TemplateName Template) { return true; }
339   bool TraverseTemplateArgument(const TemplateArgument &Arg) { return true; }
340   bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &ArgLoc) {
341     return true;
342   }
343   bool TraverseTemplateArguments(const TemplateArgument *Args,
344                                  unsigned NumArgs) {
345     return true;
346   }
347   bool TraverseConstructorInitializer(CXXCtorInitializer *Init) { return true; }
348   bool TraverseLambdaCapture(LambdaExpr::Capture C) { return true; }
349 
350   bool VisitNamedDecl(NamedDecl *ND) {
351     // We only care about file-context variables.
352     if (!ND->getDeclContext()->isFileContext())
353       return true;
354 
355     // Skip declarations that tend to be properly multiply-declared.
356     if (isa<NamespaceDecl>(ND) || isa<UsingDirectiveDecl>(ND) ||
357         isa<NamespaceAliasDecl>(ND) ||
358         isa<ClassTemplateSpecializationDecl>(ND) || isa<UsingDecl>(ND) ||
359         isa<UsingShadowDecl>(ND) || isa<FunctionDecl>(ND) ||
360         isa<FunctionTemplateDecl>(ND) ||
361         (isa<TagDecl>(ND) &&
362          !cast<TagDecl>(ND)->isThisDeclarationADefinition()))
363       return true;
364 
365     std::string Name = ND->getNameAsString();
366     if (Name.empty())
367       return true;
368 
369     Location Loc(SM, ND->getLocation());
370     if (!Loc)
371       return true;
372 
373     Entities.add(Name, isa<TagDecl>(ND) ? Entry::EK_Tag : Entry::EK_Value, Loc);
374     return true;
375   }
376 private:
377   SourceManager &SM;
378   EntityMap &Entities;
379 };
380 
381 class CollectEntitiesConsumer : public ASTConsumer {
382 public:
383   CollectEntitiesConsumer(EntityMap &Entities, Preprocessor &PP)
384       : Entities(Entities), PP(PP) {}
385 
386   virtual void HandleTranslationUnit(ASTContext &Ctx) {
387     SourceManager &SM = Ctx.getSourceManager();
388 
389     // Collect declared entities.
390     CollectEntitiesVisitor(SM, Entities)
391         .TraverseDecl(Ctx.getTranslationUnitDecl());
392 
393     // Collect macro definitions.
394     for (Preprocessor::macro_iterator M = PP.macro_begin(),
395                                       MEnd = PP.macro_end();
396          M != MEnd; ++M) {
397       Location Loc(SM, M->second->getLocation());
398       if (!Loc)
399         continue;
400 
401       Entities.add(M->first->getName().str(), Entry::EK_Macro, Loc);
402     }
403 
404     // Merge header contents.
405     Entities.mergeCurHeaderContents();
406   }
407 private:
408   EntityMap &Entities;
409   Preprocessor &PP;
410 };
411 
412 class CollectEntitiesAction : public SyntaxOnlyAction {
413 public:
414   CollectEntitiesAction(EntityMap &Entities) : Entities(Entities) {}
415 protected:
416   virtual clang::ASTConsumer *
417   CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
418     return new CollectEntitiesConsumer(Entities, CI.getPreprocessor());
419   }
420 private:
421   EntityMap &Entities;
422 };
423 
424 class ModularizeFrontendActionFactory : public FrontendActionFactory {
425 public:
426   ModularizeFrontendActionFactory(EntityMap &Entities) : Entities(Entities) {}
427 
428   virtual CollectEntitiesAction *create() {
429     return new CollectEntitiesAction(Entities);
430   }
431 private:
432   EntityMap &Entities;
433 };
434 
435 int main(int argc, const char **argv) {
436 
437   // This causes options to be parsed.
438   cl::ParseCommandLineOptions(argc, argv, "modularize.\n");
439 
440   // No go if we have no header list file.
441   if (ListFileName.size() == 0) {
442     cl::PrintHelpMessage();
443     return 1;
444   }
445 
446   // Get header file names.
447   SmallVector<std::string, 32> Headers;
448   if (error_code ec = getHeaderFileNames(Headers, ListFileName, HeaderPrefix)) {
449     errs() << argv[0] << ": error: Unable to get header list '" << ListFileName
450            << "': " << ec.message() << '\n';
451     return 1;
452   }
453 
454   // Create the compilation database.
455   SmallString<256> PathBuf;
456   sys::fs::current_path(PathBuf);
457   OwningPtr<CompilationDatabase> Compilations;
458   Compilations.reset(
459       new FixedCompilationDatabase(Twine(PathBuf), CC1Arguments));
460 
461   // Parse all of the headers, detecting duplicates.
462   EntityMap Entities;
463   ClangTool Tool(*Compilations, Headers);
464   int HadErrors = Tool.run(new ModularizeFrontendActionFactory(Entities));
465 
466   // Create a place to save duplicate entity locations, separate bins per kind.
467   typedef SmallVector<Location, 8> LocationArray;
468   typedef SmallVector<LocationArray, Entry::EK_NumberOfKinds> EntryBinArray;
469   EntryBinArray EntryBins;
470   int kindIndex;
471   for (kindIndex = 0; kindIndex < Entry::EK_NumberOfKinds; ++kindIndex) {
472     LocationArray array;
473     EntryBins.push_back(array);
474   }
475 
476   // Check for the same entity being defined in multiple places.
477   for (EntityMap::iterator E = Entities.begin(), EEnd = Entities.end();
478        E != EEnd; ++E) {
479     // If only one occurance, exit early.
480     if (E->second.size() == 1)
481       continue;
482     // Clear entity locations.
483     for (EntryBinArray::iterator CI = EntryBins.begin(), CE = EntryBins.end();
484          CI != CE; ++CI) {
485       CI->clear();
486     }
487     // Walk the entities of a single name, collecting the locations,
488     // separated into separate bins.
489     for (unsigned I = 0, N = E->second.size(); I != N; ++I) {
490       EntryBins[E->second[I].Kind].push_back(E->second[I].Loc);
491     }
492     // Report any duplicate entity definition errors.
493     int kindIndex = 0;
494     for (EntryBinArray::iterator DI = EntryBins.begin(), DE = EntryBins.end();
495          DI != DE; ++DI, ++kindIndex) {
496       int eCount = DI->size();
497       // If only 1 occurance, skip;
498       if (eCount <= 1)
499         continue;
500       LocationArray::iterator FI = DI->begin();
501       StringRef kindName = Entry::getKindName((Entry::EntryKind) kindIndex);
502       errs() << "error: " << kindName << " '" << E->first()
503              << "' defined at multiple locations:\n";
504       for (LocationArray::iterator FE = DI->end(); FI != FE; ++FI) {
505         errs() << "    " << FI->File->getName() << ":" << FI->Line << ":"
506                << FI->Column << "\n";
507       }
508       HadErrors = 1;
509     }
510   }
511 
512   // Complain about any headers that have contents that differ based on how
513   // they are included.
514   // FIXME: Could we provide information about which preprocessor conditionals
515   // are involved?
516   for (DenseMap<const FileEntry *, HeaderContents>::iterator
517            H = Entities.HeaderContentMismatches.begin(),
518            HEnd = Entities.HeaderContentMismatches.end();
519        H != HEnd; ++H) {
520     if (H->second.empty()) {
521       errs() << "internal error: phantom header content mismatch\n";
522       continue;
523     }
524 
525     HadErrors = 1;
526     errs() << "error: header '" << H->first->getName()
527            << "' has different contents depending on how it was included\n";
528     for (unsigned I = 0, N = H->second.size(); I != N; ++I) {
529       errs() << "note: '" << H->second[I].Name << "' in " << H->second[I]
530           .Loc.File->getName() << " at " << H->second[I].Loc.Line << ":"
531              << H->second[I].Loc.Column << " not always provided\n";
532     }
533   }
534 
535   return HadErrors;
536 }
537