1 //===- extra/modularize/Modularize.cpp - Check modularized headers --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Introduction
10 //
11 // This file implements a tool that checks whether a set of headers provides
12 // the consistent definitions required to use modules.  It can also check an
13 // existing module map for full coverage of the headers in a directory tree.
14 //
15 // For example, in examining headers, it detects whether the same entity
16 // (say, a NULL macro or size_t typedef) is defined in multiple headers
17 // or whether a header produces different definitions under
18 // different circumstances. These conditions cause modules built from the
19 // headers to behave poorly, and should be fixed before introducing a module
20 // map.
21 //
22 // Modularize takes as input either one or more module maps (by default,
23 // "module.modulemap") or one or more text files containing lists of headers
24 // to check.
25 //
26 // In the case of a module map, the module map must be well-formed in
27 // terms of syntax.  Modularize will extract the header file names
28 // from the map.  Only normal headers are checked, assuming headers
29 // marked "private", "textual", or "exclude" are not to be checked
30 // as a top-level include, assuming they either are included by
31 // other headers which are checked, or they are not suitable for
32 // modules.
33 //
34 // In the case of a file list, the list is a newline-separated list of headers
35 // to check with respect to each other.
36 // Lines beginning with '#' and empty lines are ignored.
37 // Header file names followed by a colon and other space-separated
38 // file names will include those extra files as dependencies.
39 // The file names can be relative or full paths, but must be on the
40 // same line.
41 //
42 // Modularize also accepts regular clang front-end arguments.
43 //
44 // Usage:   modularize [(modularize options)]
45 //   [(include-files_list)|(module map)]+ [(front-end-options) ...]
46 //
47 // Options:
48 //    -prefix=(optional header path prefix)
49 //          Note that unless a "-prefix (header path)" option is specified,
50 //          non-absolute file paths in the header list file will be relative
51 //          to the header list file directory.  Use -prefix to specify a
52 //          different directory.
53 //    -module-map-path=(module map)
54 //          Skip the checks, and instead act as a module.map generation
55 //          assistant, generating a module map file based on the header list.
56 //          An optional "-root-module=(rootName)" argument can specify a root
57 //          module to be created in the generated module.map file.  Note that
58 //          you will likely need to edit this file to suit the needs of your
59 //          headers.
60 //    -problem-files-list=(problem files list file name)
61 //          For use only with module map assistant.  Input list of files that
62 //          have problems with respect to modules.  These will still be
63 //          included in the generated module map, but will be marked as
64 //          "excluded" headers.
65 //    -root-module=(root module name)
66 //          Specifies a root module to be created in the generated module.map
67 //          file.
68 //    -block-check-header-list-only
69 //          Only warn if #include directives are inside extern or namespace
70 //          blocks if the included header is in the header list.
71 //    -no-coverage-check
72 //          Don't do the coverage check.
73 //    -coverage-check-only
74 //          Only do the coverage check.
75 //    -display-file-lists
76 //          Display lists of good files (no compile errors), problem files,
77 //          and a combined list with problem files preceded by a '#'.
78 //          This can be used to quickly determine which files have problems.
79 //          The latter combined list might be useful in starting to modularize
80 //          a set of headers.  You can start with a full list of headers,
81 //          use -display-file-lists option, and then use the combined list as
82 //          your intermediate list, uncommenting-out headers as you fix them.
83 //
84 // Note that by default, the modularize assumes .h files contain C++ source.
85 // If your .h files in the file list contain another language, you should
86 // append an appropriate -x option to your command line, i.e.:  -x c
87 //
88 // Modularization Issue Checks
89 //
90 // In the process of checking headers for modularization issues, modularize
91 // will do normal parsing, reporting normal errors and warnings,
92 // but will also report special error messages like the following:
93 //
94 //   error: '(symbol)' defined at multiple locations:
95 //       (file):(row):(column)
96 //       (file):(row):(column)
97 //
98 //   error: header '(file)' has different contents depending on how it was
99 //     included
100 //
101 // The latter might be followed by messages like the following:
102 //
103 //   note: '(symbol)' in (file) at (row):(column) not always provided
104 //
105 // Checks will also be performed for macro expansions, defined(macro)
106 // expressions, and preprocessor conditional directives that evaluate
107 // inconsistently, and can produce error messages like the following:
108 //
109 //   (...)/SubHeader.h:11:5:
110 //   #if SYMBOL == 1
111 //       ^
112 //   error: Macro instance 'SYMBOL' has different values in this header,
113 //          depending on how it was included.
114 //     'SYMBOL' expanded to: '1' with respect to these inclusion paths:
115 //       (...)/Header1.h
116 //         (...)/SubHeader.h
117 //   (...)/SubHeader.h:3:9:
118 //   #define SYMBOL 1
119 //             ^
120 //   Macro defined here.
121 //     'SYMBOL' expanded to: '2' with respect to these inclusion paths:
122 //       (...)/Header2.h
123 //           (...)/SubHeader.h
124 //   (...)/SubHeader.h:7:9:
125 //   #define SYMBOL 2
126 //             ^
127 //   Macro defined here.
128 //
129 // Checks will also be performed for '#include' directives that are
130 // nested inside 'extern "C/C++" {}' or 'namespace (name) {}' blocks,
131 // and can produce error message like the following:
132 //
133 // IncludeInExtern.h:2:3
134 //   #include "Empty.h"
135 //   ^
136 // error: Include directive within extern "C" {}.
137 // IncludeInExtern.h:1:1
138 // extern "C" {
139 // ^
140 // The "extern "C" {}" block is here.
141 //
142 // See PreprocessorTracker.cpp for additional details.
143 //
144 // Module Map Coverage Check
145 //
146 // The coverage check uses the Clang ModuleMap class to read and parse the
147 // module map file.  Starting at the module map file directory, or just the
148 // include paths, if specified, it will collect the names of all the files it
149 // considers headers (no extension, .h, or .inc--if you need more, modify the
150 // isHeader function).  It then compares the headers against those referenced
151 // in the module map, either explicitly named, or implicitly named via an
152 // umbrella directory or umbrella file, as parsed by the ModuleMap object.
153 // If headers are found which are not referenced or covered by an umbrella
154 // directory or file, warning messages will be produced, and this program
155 // will return an error code of 1.  Other errors result in an error code of 2.
156 // If no problems are found, an error code of 0 is returned.
157 //
158 // Note that in the case of umbrella headers, this tool invokes the compiler
159 // to preprocess the file, and uses a callback to collect the header files
160 // included by the umbrella header or any of its nested includes.  If any
161 // front end options are needed for these compiler invocations, these
162 // can be included on the command line after the module map file argument.
163 //
164 // Warning message have the form:
165 //
166 //  warning: module.modulemap does not account for file: Level3A.h
167 //
168 // Note that for the case of the module map referencing a file that does
169 // not exist, the module map parser in Clang will (at the time of this
170 // writing) display an error message.
171 //
172 // Module Map Assistant - Module Map Generation
173 //
174 // Modularize also has an option ("-module-map-path=module.modulemap") that will
175 // skip the checks, and instead act as a module.modulemap generation assistant,
176 // generating a module map file based on the header list.  An optional
177 // "-root-module=(rootName)" argument can specify a root module to be
178 // created in the generated module.modulemap file.  Note that you will likely
179 // need to edit this file to suit the needs of your headers.
180 //
181 // An example command line for generating a module.modulemap file:
182 //
183 //   modularize -module-map-path=module.modulemap -root-module=myroot \
184 //      headerlist.txt
185 //
186 // Note that if the headers in the header list have partial paths, sub-modules
187 // will be created for the subdirectories involved, assuming that the
188 // subdirectories contain headers to be grouped into a module, but still with
189 // individual modules for the headers in the subdirectory.
190 //
191 // See the ModuleAssistant.cpp file comments for additional details about the
192 // implementation of the assistant mode.
193 //
194 // Future directions:
195 //
196 // Basically, we want to add new checks for whatever we can check with respect
197 // to checking headers for module'ability.
198 //
199 // Some ideas:
200 //
201 // 1. Omit duplicate "not always provided" messages
202 //
203 // 2. Add options to disable any of the checks, in case
204 // there is some problem with them, or the messages get too verbose.
205 //
206 // 3. Try to figure out the preprocessor conditional directives that
207 // contribute to problems and tie them to the inconsistent definitions.
208 //
209 // 4. There are some legitimate uses of preprocessor macros that
210 // modularize will flag as errors, such as repeatedly #include'ing
211 // a file and using interleaving defined/undefined macros
212 // to change declarations in the included file.  Is there a way
213 // to address this?  Maybe have modularize accept a list of macros
214 // to ignore.  Otherwise you can just exclude the file, after checking
215 // for legitimate errors.
216 //
217 // 5. What else?
218 //
219 // General clean-up and refactoring:
220 //
221 // 1. The Location class seems to be something that we might
222 // want to design to be applicable to a wider range of tools, and stick it
223 // somewhere into Tooling/ in mainline
224 //
225 //===----------------------------------------------------------------------===//
226 
227 #include "Modularize.h"
228 #include "ModularizeUtilities.h"
229 #include "PreprocessorTracker.h"
230 #include "clang/AST/ASTConsumer.h"
231 #include "clang/AST/ASTContext.h"
232 #include "clang/AST/RecursiveASTVisitor.h"
233 #include "clang/Basic/SourceManager.h"
234 #include "clang/Driver/Options.h"
235 #include "clang/Frontend/CompilerInstance.h"
236 #include "clang/Frontend/FrontendAction.h"
237 #include "clang/Frontend/FrontendActions.h"
238 #include "clang/Lex/Preprocessor.h"
239 #include "clang/Tooling/CompilationDatabase.h"
240 #include "clang/Tooling/Tooling.h"
241 #include "llvm/Option/Arg.h"
242 #include "llvm/Option/ArgList.h"
243 #include "llvm/Option/OptTable.h"
244 #include "llvm/Option/Option.h"
245 #include "llvm/Support/CommandLine.h"
246 #include "llvm/Support/FileSystem.h"
247 #include "llvm/Support/MemoryBuffer.h"
248 #include "llvm/Support/Path.h"
249 #include <algorithm>
250 #include <iterator>
251 #include <string>
252 #include <vector>
253 
254 using namespace clang;
255 using namespace clang::driver;
256 using namespace clang::driver::options;
257 using namespace clang::tooling;
258 using namespace llvm;
259 using namespace llvm::opt;
260 using namespace Modularize;
261 
262 // Option to specify a file name for a list of header files to check.
263 static cl::list<std::string>
264     ListFileNames(cl::Positional, cl::value_desc("list"),
265                   cl::desc("<list of one or more header list files>"),
266                   cl::CommaSeparated);
267 
268 // Collect all other arguments, which will be passed to the front end.
269 static cl::list<std::string>
270     CC1Arguments(cl::ConsumeAfter,
271                  cl::desc("<arguments to be passed to front end>..."));
272 
273 // Option to specify a prefix to be prepended to the header names.
274 static cl::opt<std::string> HeaderPrefix(
275     "prefix", cl::init(""),
276     cl::desc(
277         "Prepend header file paths with this prefix."
278         " If not specified,"
279         " the files are considered to be relative to the header list file."));
280 
281 // Option for assistant mode, telling modularize to output a module map
282 // based on the headers list, and where to put it.
283 static cl::opt<std::string> ModuleMapPath(
284     "module-map-path", cl::init(""),
285     cl::desc("Turn on module map output and specify output path or file name."
286              " If no path is specified and if prefix option is specified,"
287              " use prefix for file path."));
288 
289 // Option to specify list of problem files for assistant.
290 // This will cause assistant to exclude these files.
291 static cl::opt<std::string> ProblemFilesList(
292   "problem-files-list", cl::init(""),
293   cl::desc(
294   "List of files with compilation or modularization problems for"
295     " assistant mode.  This will be excluded."));
296 
297 // Option for assistant mode, telling modularize the name of the root module.
298 static cl::opt<std::string>
299 RootModule("root-module", cl::init(""),
300            cl::desc("Specify the name of the root module."));
301 
302 // Option for limiting the #include-inside-extern-or-namespace-block
303 // check to only those headers explicitly listed in the header list.
304 // This is a work-around for private includes that purposefully get
305 // included inside blocks.
306 static cl::opt<bool>
307 BlockCheckHeaderListOnly("block-check-header-list-only", cl::init(false),
308 cl::desc("Only warn if #include directives are inside extern or namespace"
309   " blocks if the included header is in the header list."));
310 
311 // Option for include paths for coverage check.
312 static cl::list<std::string>
313     IncludePaths("I", cl::desc("Include path for coverage check."),
314                  cl::value_desc("path"));
315 
316 // Option for disabling the coverage check.
317 static cl::opt<bool> NoCoverageCheck("no-coverage-check",
318                                      cl::desc("Don't do the coverage check."));
319 
320 // Option for just doing the coverage check.
321 static cl::opt<bool>
322 CoverageCheckOnly("coverage-check-only", cl::init(false),
323 cl::desc("Only do the coverage check."));
324 
325 // Option for displaying lists of good, bad, and mixed files.
326 static cl::opt<bool>
327 DisplayFileLists("display-file-lists", cl::init(false),
328 cl::desc("Display lists of good files (no compile errors), problem files,"
329   " and a combined list with problem files preceded by a '#'."));
330 
331 // Save the program name for error messages.
332 const char *Argv0;
333 // Save the command line for comments.
334 std::string CommandLine;
335 
336 // Helper function for finding the input file in an arguments list.
findInputFile(const CommandLineArguments & CLArgs)337 static std::string findInputFile(const CommandLineArguments &CLArgs) {
338   const unsigned IncludedFlagsBitmask = options::CC1Option;
339   unsigned MissingArgIndex, MissingArgCount;
340   SmallVector<const char *, 256> Argv;
341   for (auto I = CLArgs.begin(), E = CLArgs.end(); I != E; ++I)
342     Argv.push_back(I->c_str());
343   InputArgList Args = getDriverOptTable().ParseArgs(
344       Argv, MissingArgIndex, MissingArgCount, IncludedFlagsBitmask);
345   std::vector<std::string> Inputs = Args.getAllArgValues(OPT_INPUT);
346   return ModularizeUtilities::getCanonicalPath(Inputs.back());
347 }
348 
349 // This arguments adjuster inserts "-include (file)" arguments for header
350 // dependencies.  It also inserts a "-w" option and a "-x c++",
351 // if no other "-x" option is present.
352 static ArgumentsAdjuster
getModularizeArgumentsAdjuster(DependencyMap & Dependencies)353 getModularizeArgumentsAdjuster(DependencyMap &Dependencies) {
354   return [&Dependencies](const CommandLineArguments &Args,
355                          StringRef /*unused*/) {
356     std::string InputFile = findInputFile(Args);
357     DependentsVector &FileDependents = Dependencies[InputFile];
358     CommandLineArguments NewArgs(Args);
359     if (int Count = FileDependents.size()) {
360       for (int Index = 0; Index < Count; ++Index) {
361         NewArgs.push_back("-include");
362         std::string File(std::string("\"") + FileDependents[Index] +
363                          std::string("\""));
364         NewArgs.push_back(FileDependents[Index]);
365       }
366     }
367     // Ignore warnings.  (Insert after "clang_tool" at beginning.)
368     NewArgs.insert(NewArgs.begin() + 1, "-w");
369     // Since we are compiling .h files, assume C++ unless given a -x option.
370     if (!llvm::is_contained(NewArgs, "-x")) {
371       NewArgs.insert(NewArgs.begin() + 2, "-x");
372       NewArgs.insert(NewArgs.begin() + 3, "c++");
373     }
374     return NewArgs;
375   };
376 }
377 
378 // FIXME: The Location class seems to be something that we might
379 // want to design to be applicable to a wider range of tools, and stick it
380 // somewhere into Tooling/ in mainline
381 struct Location {
382   const FileEntry *File;
383   unsigned Line, Column;
384 
LocationLocation385   Location() : File(), Line(), Column() {}
386 
LocationLocation387   Location(SourceManager &SM, SourceLocation Loc) : File(), Line(), Column() {
388     Loc = SM.getExpansionLoc(Loc);
389     if (Loc.isInvalid())
390       return;
391 
392     std::pair<FileID, unsigned> Decomposed = SM.getDecomposedLoc(Loc);
393     File = SM.getFileEntryForID(Decomposed.first);
394     if (!File)
395       return;
396 
397     Line = SM.getLineNumber(Decomposed.first, Decomposed.second);
398     Column = SM.getColumnNumber(Decomposed.first, Decomposed.second);
399   }
400 
operator boolLocation401   operator bool() const { return File != nullptr; }
402 
operator ==(const Location & X,const Location & Y)403   friend bool operator==(const Location &X, const Location &Y) {
404     return X.File == Y.File && X.Line == Y.Line && X.Column == Y.Column;
405   }
406 
operator !=(const Location & X,const Location & Y)407   friend bool operator!=(const Location &X, const Location &Y) {
408     return !(X == Y);
409   }
410 
operator <(const Location & X,const Location & Y)411   friend bool operator<(const Location &X, const Location &Y) {
412     if (X.File != Y.File)
413       return X.File < Y.File;
414     if (X.Line != Y.Line)
415       return X.Line < Y.Line;
416     return X.Column < Y.Column;
417   }
operator >(const Location & X,const Location & Y)418   friend bool operator>(const Location &X, const Location &Y) { return Y < X; }
operator <=(const Location & X,const Location & Y)419   friend bool operator<=(const Location &X, const Location &Y) {
420     return !(Y < X);
421   }
operator >=(const Location & X,const Location & Y)422   friend bool operator>=(const Location &X, const Location &Y) {
423     return !(X < Y);
424   }
425 };
426 
427 struct Entry {
428   enum EntryKind {
429     EK_Tag,
430     EK_Value,
431     EK_Macro,
432 
433     EK_NumberOfKinds
434   } Kind;
435 
436   Location Loc;
437 
getKindNameEntry438   StringRef getKindName() { return getKindName(Kind); }
439   static StringRef getKindName(EntryKind kind);
440 };
441 
442 // Return a string representing the given kind.
getKindName(Entry::EntryKind kind)443 StringRef Entry::getKindName(Entry::EntryKind kind) {
444   switch (kind) {
445   case EK_Tag:
446     return "tag";
447   case EK_Value:
448     return "value";
449   case EK_Macro:
450     return "macro";
451   case EK_NumberOfKinds:
452     break;
453   }
454   llvm_unreachable("invalid Entry kind");
455 }
456 
457 struct HeaderEntry {
458   std::string Name;
459   Location Loc;
460 
operator ==(const HeaderEntry & X,const HeaderEntry & Y)461   friend bool operator==(const HeaderEntry &X, const HeaderEntry &Y) {
462     return X.Loc == Y.Loc && X.Name == Y.Name;
463   }
operator !=(const HeaderEntry & X,const HeaderEntry & Y)464   friend bool operator!=(const HeaderEntry &X, const HeaderEntry &Y) {
465     return !(X == Y);
466   }
operator <(const HeaderEntry & X,const HeaderEntry & Y)467   friend bool operator<(const HeaderEntry &X, const HeaderEntry &Y) {
468     return X.Loc < Y.Loc || (X.Loc == Y.Loc && X.Name < Y.Name);
469   }
operator >(const HeaderEntry & X,const HeaderEntry & Y)470   friend bool operator>(const HeaderEntry &X, const HeaderEntry &Y) {
471     return Y < X;
472   }
operator <=(const HeaderEntry & X,const HeaderEntry & Y)473   friend bool operator<=(const HeaderEntry &X, const HeaderEntry &Y) {
474     return !(Y < X);
475   }
operator >=(const HeaderEntry & X,const HeaderEntry & Y)476   friend bool operator>=(const HeaderEntry &X, const HeaderEntry &Y) {
477     return !(X < Y);
478   }
479 };
480 
481 typedef std::vector<HeaderEntry> HeaderContents;
482 
483 class EntityMap : public StringMap<SmallVector<Entry, 2> > {
484 public:
485   DenseMap<const FileEntry *, HeaderContents> HeaderContentMismatches;
486 
add(const std::string & Name,enum Entry::EntryKind Kind,Location Loc)487   void add(const std::string &Name, enum Entry::EntryKind Kind, Location Loc) {
488     // Record this entity in its header.
489     HeaderEntry HE = { Name, Loc };
490     CurHeaderContents[Loc.File].push_back(HE);
491 
492     // Check whether we've seen this entry before.
493     SmallVector<Entry, 2> &Entries = (*this)[Name];
494     for (unsigned I = 0, N = Entries.size(); I != N; ++I) {
495       if (Entries[I].Kind == Kind && Entries[I].Loc == Loc)
496         return;
497     }
498 
499     // We have not seen this entry before; record it.
500     Entry E = { Kind, Loc };
501     Entries.push_back(E);
502   }
503 
mergeCurHeaderContents()504   void mergeCurHeaderContents() {
505     for (DenseMap<const FileEntry *, HeaderContents>::iterator
506              H = CurHeaderContents.begin(),
507              HEnd = CurHeaderContents.end();
508          H != HEnd; ++H) {
509       // Sort contents.
510       llvm::sort(H->second);
511 
512       // Check whether we've seen this header before.
513       DenseMap<const FileEntry *, HeaderContents>::iterator KnownH =
514           AllHeaderContents.find(H->first);
515       if (KnownH == AllHeaderContents.end()) {
516         // We haven't seen this header before; record its contents.
517         AllHeaderContents.insert(*H);
518         continue;
519       }
520 
521       // If the header contents are the same, we're done.
522       if (H->second == KnownH->second)
523         continue;
524 
525       // Determine what changed.
526       std::set_symmetric_difference(
527           H->second.begin(), H->second.end(), KnownH->second.begin(),
528           KnownH->second.end(),
529           std::back_inserter(HeaderContentMismatches[H->first]));
530     }
531 
532     CurHeaderContents.clear();
533   }
534 
535 private:
536   DenseMap<const FileEntry *, HeaderContents> CurHeaderContents;
537   DenseMap<const FileEntry *, HeaderContents> AllHeaderContents;
538 };
539 
540 class CollectEntitiesVisitor
541     : public RecursiveASTVisitor<CollectEntitiesVisitor> {
542 public:
CollectEntitiesVisitor(SourceManager & SM,EntityMap & Entities,Preprocessor & PP,PreprocessorTracker & PPTracker,int & HadErrors)543   CollectEntitiesVisitor(SourceManager &SM, EntityMap &Entities,
544                          Preprocessor &PP, PreprocessorTracker &PPTracker,
545                          int &HadErrors)
546       : SM(SM), Entities(Entities), PP(PP), PPTracker(PPTracker),
547         HadErrors(HadErrors) {}
548 
TraverseStmt(Stmt * S)549   bool TraverseStmt(Stmt *S) { return true; }
TraverseType(QualType T)550   bool TraverseType(QualType T) { return true; }
TraverseTypeLoc(TypeLoc TL)551   bool TraverseTypeLoc(TypeLoc TL) { return true; }
TraverseNestedNameSpecifier(NestedNameSpecifier * NNS)552   bool TraverseNestedNameSpecifier(NestedNameSpecifier *NNS) { return true; }
TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS)553   bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) {
554     return true;
555   }
TraverseDeclarationNameInfo(DeclarationNameInfo NameInfo)556   bool TraverseDeclarationNameInfo(DeclarationNameInfo NameInfo) {
557     return true;
558   }
TraverseTemplateName(TemplateName Template)559   bool TraverseTemplateName(TemplateName Template) { return true; }
TraverseTemplateArgument(const TemplateArgument & Arg)560   bool TraverseTemplateArgument(const TemplateArgument &Arg) { return true; }
TraverseTemplateArgumentLoc(const TemplateArgumentLoc & ArgLoc)561   bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &ArgLoc) {
562     return true;
563   }
TraverseTemplateArguments(const TemplateArgument * Args,unsigned NumArgs)564   bool TraverseTemplateArguments(const TemplateArgument *Args,
565                                  unsigned NumArgs) {
566     return true;
567   }
TraverseConstructorInitializer(CXXCtorInitializer * Init)568   bool TraverseConstructorInitializer(CXXCtorInitializer *Init) { return true; }
TraverseLambdaCapture(LambdaExpr * LE,const LambdaCapture * C,Expr * Init)569   bool TraverseLambdaCapture(LambdaExpr *LE, const LambdaCapture *C,
570                              Expr *Init) {
571     return true;
572   }
573 
574   // Check 'extern "*" {}' block for #include directives.
VisitLinkageSpecDecl(LinkageSpecDecl * D)575   bool VisitLinkageSpecDecl(LinkageSpecDecl *D) {
576     // Bail if not a block.
577     if (!D->hasBraces())
578       return true;
579     SourceRange BlockRange = D->getSourceRange();
580     const char *LinkageLabel;
581     switch (D->getLanguage()) {
582     case LinkageSpecDecl::lang_c:
583       LinkageLabel = "extern \"C\" {}";
584       break;
585     case LinkageSpecDecl::lang_cxx:
586       LinkageLabel = "extern \"C++\" {}";
587       break;
588     }
589     if (!PPTracker.checkForIncludesInBlock(PP, BlockRange, LinkageLabel,
590                                            errs()))
591       HadErrors = 1;
592     return true;
593   }
594 
595   // Check 'namespace (name) {}' block for #include directives.
VisitNamespaceDecl(const NamespaceDecl * D)596   bool VisitNamespaceDecl(const NamespaceDecl *D) {
597     SourceRange BlockRange = D->getSourceRange();
598     std::string Label("namespace ");
599     Label += D->getName();
600     Label += " {}";
601     if (!PPTracker.checkForIncludesInBlock(PP, BlockRange, Label.c_str(),
602                                            errs()))
603       HadErrors = 1;
604     return true;
605   }
606 
607   // Collect definition entities.
VisitNamedDecl(NamedDecl * ND)608   bool VisitNamedDecl(NamedDecl *ND) {
609     // We only care about file-context variables.
610     if (!ND->getDeclContext()->isFileContext())
611       return true;
612 
613     // Skip declarations that tend to be properly multiply-declared.
614     if (isa<NamespaceDecl>(ND) || isa<UsingDirectiveDecl>(ND) ||
615         isa<NamespaceAliasDecl>(ND) ||
616         isa<ClassTemplateSpecializationDecl>(ND) || isa<UsingDecl>(ND) ||
617         isa<ClassTemplateDecl>(ND) || isa<TemplateTypeParmDecl>(ND) ||
618         isa<TypeAliasTemplateDecl>(ND) || isa<UsingShadowDecl>(ND) ||
619         isa<FunctionDecl>(ND) || isa<FunctionTemplateDecl>(ND) ||
620         (isa<TagDecl>(ND) &&
621          !cast<TagDecl>(ND)->isThisDeclarationADefinition()))
622       return true;
623 
624     // Skip anonymous declarations.
625     if (!ND->getDeclName())
626       return true;
627 
628     // Get the qualified name.
629     std::string Name;
630     llvm::raw_string_ostream OS(Name);
631     ND->printQualifiedName(OS);
632     OS.flush();
633     if (Name.empty())
634       return true;
635 
636     Location Loc(SM, ND->getLocation());
637     if (!Loc)
638       return true;
639 
640     Entities.add(Name, isa<TagDecl>(ND) ? Entry::EK_Tag : Entry::EK_Value, Loc);
641     return true;
642   }
643 
644 private:
645   SourceManager &SM;
646   EntityMap &Entities;
647   Preprocessor &PP;
648   PreprocessorTracker &PPTracker;
649   int &HadErrors;
650 };
651 
652 class CollectEntitiesConsumer : public ASTConsumer {
653 public:
CollectEntitiesConsumer(EntityMap & Entities,PreprocessorTracker & preprocessorTracker,Preprocessor & PP,StringRef InFile,int & HadErrors)654   CollectEntitiesConsumer(EntityMap &Entities,
655                           PreprocessorTracker &preprocessorTracker,
656                           Preprocessor &PP, StringRef InFile, int &HadErrors)
657       : Entities(Entities), PPTracker(preprocessorTracker), PP(PP),
658         HadErrors(HadErrors) {
659     PPTracker.handlePreprocessorEntry(PP, InFile);
660   }
661 
~CollectEntitiesConsumer()662   ~CollectEntitiesConsumer() override { PPTracker.handlePreprocessorExit(); }
663 
HandleTranslationUnit(ASTContext & Ctx)664   void HandleTranslationUnit(ASTContext &Ctx) override {
665     SourceManager &SM = Ctx.getSourceManager();
666 
667     // Collect declared entities.
668     CollectEntitiesVisitor(SM, Entities, PP, PPTracker, HadErrors)
669         .TraverseDecl(Ctx.getTranslationUnitDecl());
670 
671     // Collect macro definitions.
672     for (Preprocessor::macro_iterator M = PP.macro_begin(),
673                                       MEnd = PP.macro_end();
674          M != MEnd; ++M) {
675       Location Loc(SM, M->second.getLatest()->getLocation());
676       if (!Loc)
677         continue;
678 
679       Entities.add(M->first->getName().str(), Entry::EK_Macro, Loc);
680     }
681 
682     // Merge header contents.
683     Entities.mergeCurHeaderContents();
684   }
685 
686 private:
687   EntityMap &Entities;
688   PreprocessorTracker &PPTracker;
689   Preprocessor &PP;
690   int &HadErrors;
691 };
692 
693 class CollectEntitiesAction : public SyntaxOnlyAction {
694 public:
CollectEntitiesAction(EntityMap & Entities,PreprocessorTracker & preprocessorTracker,int & HadErrors)695   CollectEntitiesAction(EntityMap &Entities,
696                         PreprocessorTracker &preprocessorTracker,
697                         int &HadErrors)
698       : Entities(Entities), PPTracker(preprocessorTracker),
699         HadErrors(HadErrors) {}
700 
701 protected:
702   std::unique_ptr<clang::ASTConsumer>
CreateASTConsumer(CompilerInstance & CI,StringRef InFile)703   CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
704     return std::make_unique<CollectEntitiesConsumer>(
705         Entities, PPTracker, CI.getPreprocessor(), InFile, HadErrors);
706   }
707 
708 private:
709   EntityMap &Entities;
710   PreprocessorTracker &PPTracker;
711   int &HadErrors;
712 };
713 
714 class ModularizeFrontendActionFactory : public FrontendActionFactory {
715 public:
ModularizeFrontendActionFactory(EntityMap & Entities,PreprocessorTracker & preprocessorTracker,int & HadErrors)716   ModularizeFrontendActionFactory(EntityMap &Entities,
717                                   PreprocessorTracker &preprocessorTracker,
718                                   int &HadErrors)
719       : Entities(Entities), PPTracker(preprocessorTracker),
720         HadErrors(HadErrors) {}
721 
create()722   std::unique_ptr<FrontendAction> create() override {
723     return std::make_unique<CollectEntitiesAction>(Entities, PPTracker,
724                                                    HadErrors);
725   }
726 
727 private:
728   EntityMap &Entities;
729   PreprocessorTracker &PPTracker;
730   int &HadErrors;
731 };
732 
733 class CompileCheckVisitor
734   : public RecursiveASTVisitor<CompileCheckVisitor> {
735 public:
CompileCheckVisitor()736   CompileCheckVisitor() {}
737 
TraverseStmt(Stmt * S)738   bool TraverseStmt(Stmt *S) { return true; }
TraverseType(QualType T)739   bool TraverseType(QualType T) { return true; }
TraverseTypeLoc(TypeLoc TL)740   bool TraverseTypeLoc(TypeLoc TL) { return true; }
TraverseNestedNameSpecifier(NestedNameSpecifier * NNS)741   bool TraverseNestedNameSpecifier(NestedNameSpecifier *NNS) { return true; }
TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS)742   bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) {
743     return true;
744   }
TraverseDeclarationNameInfo(DeclarationNameInfo NameInfo)745   bool TraverseDeclarationNameInfo(DeclarationNameInfo NameInfo) {
746     return true;
747   }
TraverseTemplateName(TemplateName Template)748   bool TraverseTemplateName(TemplateName Template) { return true; }
TraverseTemplateArgument(const TemplateArgument & Arg)749   bool TraverseTemplateArgument(const TemplateArgument &Arg) { return true; }
TraverseTemplateArgumentLoc(const TemplateArgumentLoc & ArgLoc)750   bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &ArgLoc) {
751     return true;
752   }
TraverseTemplateArguments(const TemplateArgument * Args,unsigned NumArgs)753   bool TraverseTemplateArguments(const TemplateArgument *Args,
754     unsigned NumArgs) {
755     return true;
756   }
TraverseConstructorInitializer(CXXCtorInitializer * Init)757   bool TraverseConstructorInitializer(CXXCtorInitializer *Init) { return true; }
TraverseLambdaCapture(LambdaExpr * LE,const LambdaCapture * C,Expr * Init)758   bool TraverseLambdaCapture(LambdaExpr *LE, const LambdaCapture *C,
759                              Expr *Init) {
760     return true;
761   }
762 
763   // Check 'extern "*" {}' block for #include directives.
VisitLinkageSpecDecl(LinkageSpecDecl * D)764   bool VisitLinkageSpecDecl(LinkageSpecDecl *D) {
765     return true;
766   }
767 
768   // Check 'namespace (name) {}' block for #include directives.
VisitNamespaceDecl(const NamespaceDecl * D)769   bool VisitNamespaceDecl(const NamespaceDecl *D) {
770     return true;
771   }
772 
773   // Collect definition entities.
VisitNamedDecl(NamedDecl * ND)774   bool VisitNamedDecl(NamedDecl *ND) {
775     return true;
776   }
777 };
778 
779 class CompileCheckConsumer : public ASTConsumer {
780 public:
CompileCheckConsumer()781   CompileCheckConsumer() {}
782 
HandleTranslationUnit(ASTContext & Ctx)783   void HandleTranslationUnit(ASTContext &Ctx) override {
784     CompileCheckVisitor().TraverseDecl(Ctx.getTranslationUnitDecl());
785   }
786 };
787 
788 class CompileCheckAction : public SyntaxOnlyAction {
789 public:
CompileCheckAction()790   CompileCheckAction() {}
791 
792 protected:
793   std::unique_ptr<clang::ASTConsumer>
CreateASTConsumer(CompilerInstance & CI,StringRef InFile)794     CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
795     return std::make_unique<CompileCheckConsumer>();
796   }
797 };
798 
799 class CompileCheckFrontendActionFactory : public FrontendActionFactory {
800 public:
CompileCheckFrontendActionFactory()801   CompileCheckFrontendActionFactory() {}
802 
create()803   std::unique_ptr<FrontendAction> create() override {
804     return std::make_unique<CompileCheckAction>();
805   }
806 };
807 
main(int Argc,const char ** Argv)808 int main(int Argc, const char **Argv) {
809 
810   // Save program name for error messages.
811   Argv0 = Argv[0];
812 
813   // Save program arguments for use in module.modulemap comment.
814   CommandLine = std::string(sys::path::stem(sys::path::filename(Argv0)));
815   for (int ArgIndex = 1; ArgIndex < Argc; ArgIndex++) {
816     CommandLine.append(" ");
817     CommandLine.append(Argv[ArgIndex]);
818   }
819 
820   // This causes options to be parsed.
821   cl::ParseCommandLineOptions(Argc, Argv, "modularize.\n");
822 
823   // No go if we have no header list file.
824   if (ListFileNames.size() == 0) {
825     cl::PrintHelpMessage();
826     return 1;
827   }
828 
829   std::unique_ptr<ModularizeUtilities> ModUtil;
830   int HadErrors = 0;
831 
832   ModUtil.reset(
833     ModularizeUtilities::createModularizeUtilities(
834       ListFileNames, HeaderPrefix, ProblemFilesList));
835 
836   // Get header file names and dependencies.
837   if (ModUtil->loadAllHeaderListsAndDependencies())
838     HadErrors = 1;
839 
840   // If we are in assistant mode, output the module map and quit.
841   if (ModuleMapPath.length() != 0) {
842     if (!createModuleMap(ModuleMapPath, ModUtil->HeaderFileNames,
843                          ModUtil->ProblemFileNames,
844                          ModUtil->Dependencies, HeaderPrefix, RootModule))
845       return 1; // Failed.
846     return 0;   // Success - Skip checks in assistant mode.
847   }
848 
849   // If we're doing module maps.
850   if (!NoCoverageCheck && ModUtil->HasModuleMap) {
851     // Do coverage check.
852     if (ModUtil->doCoverageCheck(IncludePaths, CommandLine))
853       HadErrors = 1;
854   }
855 
856   // Bail early if only doing the coverage check.
857   if (CoverageCheckOnly)
858     return HadErrors;
859 
860   // Create the compilation database.
861   SmallString<256> PathBuf;
862   sys::fs::current_path(PathBuf);
863   std::unique_ptr<CompilationDatabase> Compilations;
864   Compilations.reset(
865       new FixedCompilationDatabase(Twine(PathBuf), CC1Arguments));
866 
867   // Create preprocessor tracker, to watch for macro and conditional problems.
868   std::unique_ptr<PreprocessorTracker> PPTracker(
869     PreprocessorTracker::create(ModUtil->HeaderFileNames,
870                                 BlockCheckHeaderListOnly));
871 
872   // Coolect entities here.
873   EntityMap Entities;
874 
875   // Because we can't easily determine which files failed
876   // during the tool run, if we're collecting the file lists
877   // for display, we do a first compile pass on individual
878   // files to find which ones don't compile stand-alone.
879   if (DisplayFileLists) {
880     // First, make a pass to just get compile errors.
881     for (auto &CompileCheckFile : ModUtil->HeaderFileNames) {
882       llvm::SmallVector<std::string, 32> CompileCheckFileArray;
883       CompileCheckFileArray.push_back(CompileCheckFile);
884       ClangTool CompileCheckTool(*Compilations, CompileCheckFileArray);
885       CompileCheckTool.appendArgumentsAdjuster(
886         getModularizeArgumentsAdjuster(ModUtil->Dependencies));
887       int CompileCheckFileErrors = 0;
888       // FIXME: use newFrontendActionFactory.
889       CompileCheckFrontendActionFactory CompileCheckFactory;
890       CompileCheckFileErrors |= CompileCheckTool.run(&CompileCheckFactory);
891       if (CompileCheckFileErrors != 0) {
892         ModUtil->addUniqueProblemFile(CompileCheckFile);   // Save problem file.
893         HadErrors |= 1;
894       }
895       else
896         ModUtil->addNoCompileErrorsFile(CompileCheckFile); // Save good file.
897     }
898   }
899 
900   // Then we make another pass on the good files to do the rest of the work.
901   ClangTool Tool(*Compilations,
902     (DisplayFileLists ? ModUtil->GoodFileNames : ModUtil->HeaderFileNames));
903   Tool.appendArgumentsAdjuster(
904     getModularizeArgumentsAdjuster(ModUtil->Dependencies));
905   ModularizeFrontendActionFactory Factory(Entities, *PPTracker, HadErrors);
906   HadErrors |= Tool.run(&Factory);
907 
908   // Create a place to save duplicate entity locations, separate bins per kind.
909   typedef SmallVector<Location, 8> LocationArray;
910   typedef SmallVector<LocationArray, Entry::EK_NumberOfKinds> EntryBinArray;
911   EntryBinArray EntryBins;
912   int KindIndex;
913   for (KindIndex = 0; KindIndex < Entry::EK_NumberOfKinds; ++KindIndex) {
914     LocationArray Array;
915     EntryBins.push_back(Array);
916   }
917 
918   // Check for the same entity being defined in multiple places.
919   for (EntityMap::iterator E = Entities.begin(), EEnd = Entities.end();
920        E != EEnd; ++E) {
921     // If only one occurrence, exit early.
922     if (E->second.size() == 1)
923       continue;
924     // Clear entity locations.
925     for (EntryBinArray::iterator CI = EntryBins.begin(), CE = EntryBins.end();
926          CI != CE; ++CI) {
927       CI->clear();
928     }
929     // Walk the entities of a single name, collecting the locations,
930     // separated into separate bins.
931     for (unsigned I = 0, N = E->second.size(); I != N; ++I) {
932       EntryBins[E->second[I].Kind].push_back(E->second[I].Loc);
933     }
934     // Report any duplicate entity definition errors.
935     int KindIndex = 0;
936     for (EntryBinArray::iterator DI = EntryBins.begin(), DE = EntryBins.end();
937          DI != DE; ++DI, ++KindIndex) {
938       int ECount = DI->size();
939       // If only 1 occurrence of this entity, skip it, we only report duplicates.
940       if (ECount <= 1)
941         continue;
942       LocationArray::iterator FI = DI->begin();
943       StringRef kindName = Entry::getKindName((Entry::EntryKind)KindIndex);
944       errs() << "error: " << kindName << " '" << E->first()
945              << "' defined at multiple locations:\n";
946       for (LocationArray::iterator FE = DI->end(); FI != FE; ++FI) {
947         errs() << "    " << FI->File->getName() << ":" << FI->Line << ":"
948                << FI->Column << "\n";
949         ModUtil->addUniqueProblemFile(std::string(FI->File->getName()));
950       }
951       HadErrors = 1;
952     }
953   }
954 
955   // Complain about macro instance in header files that differ based on how
956   // they are included.
957   if (PPTracker->reportInconsistentMacros(errs()))
958     HadErrors = 1;
959 
960   // Complain about preprocessor conditional directives in header files that
961   // differ based on how they are included.
962   if (PPTracker->reportInconsistentConditionals(errs()))
963     HadErrors = 1;
964 
965   // Complain about any headers that have contents that differ based on how
966   // they are included.
967   // FIXME: Could we provide information about which preprocessor conditionals
968   // are involved?
969   for (DenseMap<const FileEntry *, HeaderContents>::iterator
970            H = Entities.HeaderContentMismatches.begin(),
971            HEnd = Entities.HeaderContentMismatches.end();
972        H != HEnd; ++H) {
973     if (H->second.empty()) {
974       errs() << "internal error: phantom header content mismatch\n";
975       continue;
976     }
977 
978     HadErrors = 1;
979     ModUtil->addUniqueProblemFile(std::string(H->first->getName()));
980     errs() << "error: header '" << H->first->getName()
981            << "' has different contents depending on how it was included.\n";
982     for (unsigned I = 0, N = H->second.size(); I != N; ++I) {
983       errs() << "note: '" << H->second[I].Name << "' in "
984              << H->second[I].Loc.File->getName() << " at "
985              << H->second[I].Loc.Line << ":" << H->second[I].Loc.Column
986              << " not always provided\n";
987     }
988   }
989 
990   if (DisplayFileLists) {
991     ModUtil->displayProblemFiles();
992     ModUtil->displayGoodFiles();
993     ModUtil->displayCombinedFiles();
994   }
995 
996   return HadErrors;
997 }
998