1 //===- tools/clang/Modularize.cpp - Check modularized headers -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a tool that checks whether a set of headers provides
11 // the consistent definitions required to use modules. For example, it detects
12 // whether the same entity (say, a NULL macro or size_t typedef) is defined in
13 // multiple headers or whether a header produces different definitions under
14 // different circumstances. These conditions cause modules built from the
15 // headers to behave poorly, and should be fixed before introducing a module
16 // map.
17 //
18 // Modularize takes as argument a file name for a file containing the
19 // newline-separated list of headers to check with respect to each other.
20 // Modularize also accepts regular front-end arguments.
21 //
22 // Usage:   modularize (include-files_list) [(front-end-options) ...]
23 //
24 // Modularize will do normal parsing, reporting normal errors and warnings,
25 // but will also report special error messages like the following:
26 //
27 // error: '(symbol)' defined at both (file):(row):(column) and
28 //  (file):(row):(column)
29 //
30 // error: header '(file)' has different contents dependening on how it was
31 //   included
32 //
33 // The latter might be followed by messages like the following:
34 //
35 // note: '(symbol)' in (file) at (row):(column) not always provided
36 //
37 // Future directions:
38 //
39 // Basically, we want to add new checks for whatever we can check with respect
40 // to checking headers for module'ability.
41 //
42 // Some ideas:
43 //
44 // 1. Group duplicate definition messages into a single list.
45 //
46 // 2. Try to figure out the preprocessor conditional directives that
47 // contribute to problems.
48 //
49 // 3. Check for correct and consistent usage of extern "C" {} and other
50 // directives. Warn about #include inside extern "C" {}.
51 //
52 // 4. What else?
53 //
54 // General clean-up and refactoring:
55 //
56 // 1. The Location class seems to be something that we might
57 // want to design to be applicable to a wider range of tools, and stick it
58 // somewhere into Tooling/ in mainline
59 //
60 //===----------------------------------------------------------------------===//
61 
62 #include "llvm/Config/config.h"
63 #include "llvm/Support/FileSystem.h"
64 #include "llvm/ADT/StringRef.h"
65 #include "clang/Basic/SourceManager.h"
66 #include "clang/Lex/Preprocessor.h"
67 #include "clang/AST/ASTConsumer.h"
68 #include "clang/AST/ASTContext.h"
69 #include "clang/AST/RecursiveASTVisitor.h"
70 #include "clang/Frontend/CompilerInstance.h"
71 #include "clang/Frontend/FrontendActions.h"
72 #include "clang/Tooling/CompilationDatabase.h"
73 #include "clang/Tooling/Tooling.h"
74 #include <vector>
75 #include <string>
76 #include <fstream>
77 #include <algorithm>
78 #include <iterator>
79 
80 using namespace clang::tooling;
81 using namespace clang;
82 using llvm::StringRef;
83 
84 // FIXME: The Location class seems to be something that we might
85 // want to design to be applicable to a wider range of tools, and stick it
86 // somewhere into Tooling/ in mainline
87 struct Location {
88   const FileEntry *File;
89   unsigned Line, Column;
90 
91   Location() : File(), Line(), Column() { }
92 
93   Location(SourceManager &SM, SourceLocation Loc) : File(), Line(), Column() {
94     Loc = SM.getExpansionLoc(Loc);
95     if (Loc.isInvalid())
96       return;
97 
98     std::pair<FileID, unsigned> Decomposed = SM.getDecomposedLoc(Loc);
99     File = SM.getFileEntryForID(Decomposed.first);
100     if (!File)
101       return;
102 
103     Line = SM.getLineNumber(Decomposed.first, Decomposed.second);
104     Column = SM.getColumnNumber(Decomposed.first, Decomposed.second);
105   }
106 
107   operator bool() const { return File != 0; }
108 
109   friend bool operator==(const Location &X, const Location &Y) {
110     return X.File == Y.File && X.Line == Y.Line && X.Column == Y.Column;
111   }
112 
113   friend bool operator!=(const Location &X, const Location &Y) {
114     return !(X == Y);
115   }
116 
117   friend bool operator<(const Location &X, const Location &Y) {
118     if (X.File != Y.File)
119       return X.File < Y.File;
120     if (X.Line != Y.Line)
121       return X.Line < Y.Line;
122     return X.Column < Y.Column;
123   }
124   friend bool operator>(const Location &X, const Location &Y) {
125     return Y < X;
126   }
127   friend bool operator<=(const Location &X, const Location &Y) {
128     return !(Y < X);
129   }
130   friend bool operator>=(const Location &X, const Location &Y) {
131     return !(X < Y);
132   }
133 
134 };
135 
136 
137 struct Entry {
138   enum Kind {
139     Tag,
140     Value,
141     Macro
142   } Kind;
143 
144   Location Loc;
145 };
146 
147 struct HeaderEntry {
148   std::string Name;
149   Location Loc;
150 
151   friend bool operator==(const HeaderEntry &X, const HeaderEntry &Y) {
152     return X.Loc == Y.Loc && X.Name == Y.Name;
153   }
154   friend bool operator!=(const HeaderEntry &X, const HeaderEntry &Y) {
155     return !(X == Y);
156   }
157   friend bool operator<(const HeaderEntry &X, const HeaderEntry &Y) {
158     return X.Loc < Y.Loc || (X.Loc == Y.Loc && X.Name < Y.Name);
159   }
160   friend bool operator>(const HeaderEntry &X, const HeaderEntry &Y) {
161     return Y < X;
162   }
163   friend bool operator<=(const HeaderEntry &X, const HeaderEntry &Y) {
164     return !(Y < X);
165   }
166   friend bool operator>=(const HeaderEntry &X, const HeaderEntry &Y) {
167     return !(X < Y);
168   }
169 };
170 
171 typedef std::vector<HeaderEntry> HeaderContents;
172 
173 class EntityMap : public llvm::StringMap<llvm::SmallVector<Entry, 2> > {
174 public:
175   llvm::DenseMap<const FileEntry *, HeaderContents> HeaderContentMismatches;
176 
177   void add(const std::string &Name, enum Entry::Kind Kind, Location Loc) {
178     // Record this entity in its header.
179     HeaderEntry HE = { Name, Loc };
180     CurHeaderContents[Loc.File].push_back(HE);
181 
182     // Check whether we've seen this entry before.
183     llvm::SmallVector<Entry, 2> &Entries = (*this)[Name];
184     for (unsigned I = 0, N = Entries.size(); I != N; ++I) {
185       if (Entries[I].Kind == Kind && Entries[I].Loc == Loc)
186         return;
187     }
188 
189     // We have not seen this entry before; record it.
190     Entry E = { Kind, Loc };
191     Entries.push_back(E);
192   }
193 
194   void mergeCurHeaderContents() {
195     for (llvm::DenseMap<const FileEntry *, HeaderContents>::iterator
196            H = CurHeaderContents.begin(), HEnd = CurHeaderContents.end();
197          H != HEnd; ++H) {
198       // Sort contents.
199       std::sort(H->second.begin(), H->second.end());
200 
201       // Check whether we've seen this header before.
202       llvm::DenseMap<const FileEntry *, HeaderContents>::iterator KnownH
203         = AllHeaderContents.find(H->first);
204       if (KnownH == AllHeaderContents.end()) {
205         // We haven't seen this header before; record its contents.
206         AllHeaderContents.insert(*H);
207         continue;
208       }
209 
210       // If the header contents are the same, we're done.
211       if (H->second == KnownH->second)
212         continue;
213 
214       // Determine what changed.
215       std::set_symmetric_difference(H->second.begin(), H->second.end(),
216         KnownH->second.begin(),
217         KnownH->second.end(),
218         std::back_inserter(HeaderContentMismatches[H->first]));
219     }
220 
221     CurHeaderContents.clear();
222   }
223 private:
224   llvm::DenseMap<const FileEntry *, HeaderContents> CurHeaderContents;
225   llvm::DenseMap<const FileEntry *, HeaderContents> AllHeaderContents;
226 };
227 
228 class CollectEntitiesVisitor
229   : public RecursiveASTVisitor<CollectEntitiesVisitor>
230 {
231 public:
232   CollectEntitiesVisitor(SourceManager &SM, EntityMap &Entities)
233     : SM(SM), Entities(Entities) { }
234 
235   bool TraverseStmt(Stmt *S) { return true; }
236   bool TraverseType(QualType T) { return true; }
237   bool TraverseTypeLoc(TypeLoc TL) { return true; }
238   bool TraverseNestedNameSpecifier(NestedNameSpecifier *NNS) { return true; }
239   bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) { return true; }
240   bool TraverseDeclarationNameInfo(DeclarationNameInfo NameInfo) { return true; }
241   bool TraverseTemplateName(TemplateName Template) { return true; }
242   bool TraverseTemplateArgument(const TemplateArgument &Arg) { return true; }
243   bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &ArgLoc) { return true; }
244   bool TraverseTemplateArguments(const TemplateArgument *Args,
245                                  unsigned NumArgs) { return true; }
246   bool TraverseConstructorInitializer(CXXCtorInitializer *Init) { return true; }
247   bool TraverseLambdaCapture(LambdaExpr::Capture C) { return true; }
248 
249   bool VisitNamedDecl(NamedDecl *ND) {
250     // We only care about file-context variables.
251     if (!ND->getDeclContext()->isFileContext())
252       return true;
253 
254     // Skip declarations that tend to be properly multiply-declared.
255     if (isa<NamespaceDecl>(ND) || isa<UsingDirectiveDecl>(ND) ||
256         isa<NamespaceAliasDecl>(ND) ||
257         isa<ClassTemplateSpecializationDecl>(ND) ||
258         isa<UsingDecl>(ND) || isa<UsingShadowDecl>(ND) ||
259         isa<FunctionDecl>(ND) || isa<FunctionTemplateDecl>(ND) ||
260         (isa<TagDecl>(ND) &&
261          !cast<TagDecl>(ND)->isThisDeclarationADefinition()))
262       return true;
263 
264     std::string Name = ND->getNameAsString();
265     if (Name.empty())
266       return true;
267 
268     Location Loc(SM, ND->getLocation());
269     if (!Loc)
270       return true;
271 
272     Entities.add(Name, isa<TagDecl>(ND)? Entry::Tag : Entry::Value, Loc);
273     return true;
274   }
275 private:
276   SourceManager &SM;
277   EntityMap &Entities;
278 };
279 
280 class CollectEntitiesConsumer : public ASTConsumer {
281 public:
282   CollectEntitiesConsumer(EntityMap &Entities, Preprocessor &PP)
283     : Entities(Entities), PP(PP) { }
284 
285   virtual void HandleTranslationUnit(ASTContext &Ctx) {
286     SourceManager &SM = Ctx.getSourceManager();
287 
288     // Collect declared entities.
289     CollectEntitiesVisitor(SM, Entities)
290       .TraverseDecl(Ctx.getTranslationUnitDecl());
291 
292     // Collect macro definitions.
293     for (Preprocessor::macro_iterator M = PP.macro_begin(),
294                                    MEnd = PP.macro_end();
295          M != MEnd; ++M) {
296       Location Loc(SM, M->second->getLocation());
297       if (!Loc)
298         continue;
299 
300       Entities.add(M->first->getName().str(), Entry::Macro, Loc);
301     }
302 
303     // Merge header contents.
304     Entities.mergeCurHeaderContents();
305   }
306 private:
307   EntityMap &Entities;
308   Preprocessor &PP;
309 };
310 
311 class CollectEntitiesAction : public SyntaxOnlyAction {
312 public:
313   CollectEntitiesAction(EntityMap &Entities) : Entities(Entities) { }
314 protected:
315   virtual clang::ASTConsumer *CreateASTConsumer(CompilerInstance &CI,
316                                                 StringRef InFile) {
317     return new CollectEntitiesConsumer(Entities, CI.getPreprocessor());
318   }
319 private:
320   EntityMap &Entities;
321 };
322 
323 class ModularizeFrontendActionFactory : public FrontendActionFactory {
324 public:
325   ModularizeFrontendActionFactory(EntityMap &Entities) : Entities(Entities) { }
326 
327   virtual CollectEntitiesAction *create() {
328     return new CollectEntitiesAction(Entities);
329   }
330 private:
331   EntityMap &Entities;
332 };
333 
334 int main(int argc, const char **argv) {
335   // Figure out command-line arguments.
336   if (argc < 2) {
337     llvm::errs() << "Usage: modularize <file containing header names> <arguments>\n";
338     return 1;
339   }
340 
341   // Load the list of headers.
342   std::string File = argv[1];
343   llvm::SmallVector<std::string, 8> Headers;
344   {
345     std::ifstream In(File.c_str());
346     if (!In) {
347       llvm::errs() << "Unable to open header list file \"" << File.c_str() << "\"\n";
348       return 2;
349     }
350 
351     std::string Line;
352     while (std::getline(In, Line)) {
353       if (Line.empty() || Line[0] == '#')
354         continue;
355 
356       Headers.push_back(Line);
357     }
358   }
359 
360   // Create the compilation database.
361   llvm::OwningPtr<CompilationDatabase> Compilations;
362   {
363     std::vector<std::string> Arguments;
364     for (int I = 2; I < argc; ++I)
365       Arguments.push_back(argv[I]);
366     SmallString<256> PathBuf;
367     llvm::sys::fs::current_path(PathBuf);
368     Compilations.reset(new FixedCompilationDatabase(Twine(PathBuf), Arguments));
369   }
370 
371   // Parse all of the headers, detecting duplicates.
372   EntityMap Entities;
373   ClangTool Tool(*Compilations, Headers);
374   int HadErrors = Tool.run(new ModularizeFrontendActionFactory(Entities));
375 
376   // Check for the same entity being defined in multiple places.
377   // FIXME: Could they be grouped into a list?
378   for (EntityMap::iterator E = Entities.begin(), EEnd = Entities.end();
379        E != EEnd; ++E) {
380     Location Tag, Value, Macro;
381     for (unsigned I = 0, N = E->second.size(); I != N; ++I) {
382       Location *Which;
383       switch (E->second[I].Kind) {
384       case Entry::Tag: Which = &Tag; break;
385       case Entry::Value: Which = &Value; break;
386       case Entry::Macro: Which = &Macro; break;
387       }
388 
389       if (!Which->File) {
390         *Which = E->second[I].Loc;
391         continue;
392       }
393 
394       llvm::errs() << "error: '" << E->first().str().c_str()
395         << "' defined at both " << Which->File->getName()
396         << ":" << Which->Line << ":" << Which->Column
397         << " and " << E->second[I].Loc.File->getName() << ":"
398         << E->second[I].Loc.Line << ":" << E->second[I].Loc.Column << "\n";
399       HadErrors = 1;
400     }
401   }
402 
403   // Complain about any headers that have contents that differ based on how
404   // they are included.
405   // FIXME: Could we provide information about which preprocessor conditionals
406   // are involved?
407   for (llvm::DenseMap<const FileEntry *, HeaderContents>::iterator
408             H = Entities.HeaderContentMismatches.begin(),
409          HEnd = Entities.HeaderContentMismatches.end();
410        H != HEnd; ++H) {
411     if (H->second.empty()) {
412       llvm::errs() << "internal error: phantom header content mismatch\n";
413       continue;
414     }
415 
416     HadErrors = 1;
417     llvm::errs() << "error: header '" << H->first->getName()
418       << "' has different contents dependening on how it was included\n";
419     for (unsigned I = 0, N = H->second.size(); I != N; ++I) {
420       llvm::errs() << "note: '" << H->second[I].Name.c_str()
421         << "' in " << H->second[I].Loc.File->getName() << " at "
422         << H->second[I].Loc.Line << ":" << H->second[I].Loc.Column
423         << " not always provided\n";
424     }
425   }
426 
427   return HadErrors;
428 }
429