1 //===--- ModuleDependencyCollector.cpp - Collect module dependencies ------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Collect the dependencies of a set of modules.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/CharInfo.h"
15 #include "clang/Frontend/Utils.h"
16 #include "clang/Lex/Preprocessor.h"
17 #include "clang/Serialization/ASTReader.h"
18 #include "llvm/ADT/iterator_range.h"
19 #include "llvm/Config/llvm-config.h"
20 #include "llvm/Support/FileSystem.h"
21 #include "llvm/Support/Path.h"
22 #include "llvm/Support/raw_ostream.h"
23 
24 using namespace clang;
25 
26 namespace {
27 /// Private implementations for ModuleDependencyCollector
28 class ModuleDependencyListener : public ASTReaderListener {
29   ModuleDependencyCollector &Collector;
30 public:
31   ModuleDependencyListener(ModuleDependencyCollector &Collector)
32       : Collector(Collector) {}
33   bool needsInputFileVisitation() override { return true; }
34   bool needsSystemInputFileVisitation() override { return true; }
35   bool visitInputFile(StringRef Filename, bool IsSystem, bool IsOverridden,
36                       bool IsExplicitModule) override {
37     Collector.addFile(Filename);
38     return true;
39   }
40 };
41 
42 struct ModuleDependencyPPCallbacks : public PPCallbacks {
43   ModuleDependencyCollector &Collector;
44   SourceManager &SM;
45   ModuleDependencyPPCallbacks(ModuleDependencyCollector &Collector,
46                               SourceManager &SM)
47       : Collector(Collector), SM(SM) {}
48 
49   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
50                           StringRef FileName, bool IsAngled,
51                           CharSourceRange FilenameRange, const FileEntry *File,
52                           StringRef SearchPath, StringRef RelativePath,
53                           const Module *Imported) override {
54     if (!File)
55       return;
56     Collector.addFile(File->getName());
57   }
58 };
59 
60 struct ModuleDependencyMMCallbacks : public ModuleMapCallbacks {
61   ModuleDependencyCollector &Collector;
62   ModuleDependencyMMCallbacks(ModuleDependencyCollector &Collector)
63       : Collector(Collector) {}
64 
65   void moduleMapAddHeader(StringRef HeaderPath) override {
66     if (llvm::sys::path::is_absolute(HeaderPath))
67       Collector.addFile(HeaderPath);
68   }
69   void moduleMapAddUmbrellaHeader(FileManager *FileMgr,
70                                   const FileEntry *Header) override {
71     StringRef HeaderFilename = Header->getName();
72     moduleMapAddHeader(HeaderFilename);
73     // The FileManager can find and cache the symbolic link for a framework
74     // header before its real path, this means a module can have some of its
75     // headers to use other paths. Although this is usually not a problem, it's
76     // inconsistent, and not collecting the original path header leads to
77     // umbrella clashes while rebuilding modules in the crash reproducer. For
78     // example:
79     //    ApplicationServices.framework/Frameworks/ImageIO.framework/ImageIO.h
80     // instead of:
81     //    ImageIO.framework/ImageIO.h
82     //
83     // FIXME: this shouldn't be necessary once we have FileName instances
84     // around instead of FileEntry ones. For now, make sure we collect all
85     // that we need for the reproducer to work correctly.
86     StringRef UmbreallDirFromHeader =
87         llvm::sys::path::parent_path(HeaderFilename);
88     StringRef UmbrellaDir = Header->getDir()->getName();
89     if (!UmbrellaDir.equals(UmbreallDirFromHeader)) {
90       SmallString<128> AltHeaderFilename;
91       llvm::sys::path::append(AltHeaderFilename, UmbrellaDir,
92                               llvm::sys::path::filename(HeaderFilename));
93       if (FileMgr->getFile(AltHeaderFilename))
94         moduleMapAddHeader(AltHeaderFilename);
95     }
96   }
97 };
98 
99 }
100 
101 // TODO: move this to Support/Path.h and check for HAVE_REALPATH?
102 static bool real_path(StringRef SrcPath, SmallVectorImpl<char> &RealPath) {
103 #ifdef LLVM_ON_UNIX
104   char CanonicalPath[PATH_MAX];
105 
106   // TODO: emit a warning in case this fails...?
107   if (!realpath(SrcPath.str().c_str(), CanonicalPath))
108     return false;
109 
110   SmallString<256> RPath(CanonicalPath);
111   RealPath.swap(RPath);
112   return true;
113 #else
114   // FIXME: Add support for systems without realpath.
115   return false;
116 #endif
117 }
118 
119 void ModuleDependencyCollector::attachToASTReader(ASTReader &R) {
120   R.addListener(llvm::make_unique<ModuleDependencyListener>(*this));
121 }
122 
123 void ModuleDependencyCollector::attachToPreprocessor(Preprocessor &PP) {
124   PP.addPPCallbacks(llvm::make_unique<ModuleDependencyPPCallbacks>(
125       *this, PP.getSourceManager()));
126   PP.getHeaderSearchInfo().getModuleMap().addModuleMapCallbacks(
127       llvm::make_unique<ModuleDependencyMMCallbacks>(*this));
128 }
129 
130 static bool isCaseSensitivePath(StringRef Path) {
131   SmallString<256> TmpDest = Path, UpperDest, RealDest;
132   // Remove component traversals, links, etc.
133   if (!real_path(Path, TmpDest))
134     return true; // Current default value in vfs.yaml
135   Path = TmpDest;
136 
137   // Change path to all upper case and ask for its real path, if the latter
138   // exists and is equal to Path, it's not case sensitive. Default to case
139   // sensitive in the absence of realpath, since this is what the VFSWriter
140   // already expects when sensitivity isn't setup.
141   for (auto &C : Path)
142     UpperDest.push_back(toUppercase(C));
143   if (real_path(UpperDest, RealDest) && Path.equals(RealDest))
144     return false;
145   return true;
146 }
147 
148 void ModuleDependencyCollector::writeFileMap() {
149   if (Seen.empty())
150     return;
151 
152   StringRef VFSDir = getDest();
153 
154   // Default to use relative overlay directories in the VFS yaml file. This
155   // allows crash reproducer scripts to work across machines.
156   VFSWriter.setOverlayDir(VFSDir);
157 
158   // Do not ignore non existent contents otherwise we might skip something
159   // that should have been collected here.
160   VFSWriter.setIgnoreNonExistentContents(false);
161 
162   // Explicitly set case sensitivity for the YAML writer. For that, find out
163   // the sensitivity at the path where the headers all collected to.
164   VFSWriter.setCaseSensitivity(isCaseSensitivePath(VFSDir));
165 
166   // Do not rely on real path names when executing the crash reproducer scripts
167   // since we only want to actually use the files we have on the VFS cache.
168   VFSWriter.setUseExternalNames(false);
169 
170   std::error_code EC;
171   SmallString<256> YAMLPath = VFSDir;
172   llvm::sys::path::append(YAMLPath, "vfs.yaml");
173   llvm::raw_fd_ostream OS(YAMLPath, EC, llvm::sys::fs::F_Text);
174   if (EC) {
175     HasErrors = true;
176     return;
177   }
178   VFSWriter.write(OS);
179 }
180 
181 bool ModuleDependencyCollector::getRealPath(StringRef SrcPath,
182                                             SmallVectorImpl<char> &Result) {
183   using namespace llvm::sys;
184   SmallString<256> RealPath;
185   StringRef FileName = path::filename(SrcPath);
186   std::string Dir = path::parent_path(SrcPath).str();
187   auto DirWithSymLink = SymLinkMap.find(Dir);
188 
189   // Use real_path to fix any symbolic link component present in a path.
190   // Computing the real path is expensive, cache the search through the
191   // parent path directory.
192   if (DirWithSymLink == SymLinkMap.end()) {
193     if (!real_path(Dir, RealPath))
194       return false;
195     SymLinkMap[Dir] = RealPath.str();
196   } else {
197     RealPath = DirWithSymLink->second;
198   }
199 
200   path::append(RealPath, FileName);
201   Result.swap(RealPath);
202   return true;
203 }
204 
205 std::error_code ModuleDependencyCollector::copyToRoot(StringRef Src,
206                                                       StringRef Dst) {
207   using namespace llvm::sys;
208 
209   // We need an absolute src path to append to the root.
210   SmallString<256> AbsoluteSrc = Src;
211   fs::make_absolute(AbsoluteSrc);
212   // Canonicalize src to a native path to avoid mixed separator styles.
213   path::native(AbsoluteSrc);
214   // Remove redundant leading "./" pieces and consecutive separators.
215   AbsoluteSrc = path::remove_leading_dotslash(AbsoluteSrc);
216 
217   // Canonicalize the source path by removing "..", "." components.
218   SmallString<256> VirtualPath = AbsoluteSrc;
219   path::remove_dots(VirtualPath, /*remove_dot_dot=*/true);
220 
221   // If a ".." component is present after a symlink component, remove_dots may
222   // lead to the wrong real destination path. Let the source be canonicalized
223   // like that but make sure we always use the real path for the destination.
224   SmallString<256> CopyFrom;
225   if (!getRealPath(AbsoluteSrc, CopyFrom))
226     CopyFrom = VirtualPath;
227   SmallString<256> CacheDst = getDest();
228 
229   if (Dst.empty()) {
230     // The common case is to map the virtual path to the same path inside the
231     // cache.
232     path::append(CacheDst, path::relative_path(CopyFrom));
233   } else {
234     // When collecting entries from input vfsoverlays, copy the external
235     // contents into the cache but still map from the source.
236     if (!fs::exists(Dst))
237       return std::error_code();
238     path::append(CacheDst, Dst);
239     CopyFrom = Dst;
240   }
241 
242   // Copy the file into place.
243   if (std::error_code EC = fs::create_directories(path::parent_path(CacheDst),
244                                                   /*IgnoreExisting=*/true))
245     return EC;
246   if (std::error_code EC = fs::copy_file(CopyFrom, CacheDst))
247     return EC;
248 
249   // Always map a canonical src path to its real path into the YAML, by doing
250   // this we map different virtual src paths to the same entry in the VFS
251   // overlay, which is a way to emulate symlink inside the VFS; this is also
252   // needed for correctness, not doing that can lead to module redefinition
253   // errors.
254   addFileMapping(VirtualPath, CacheDst);
255   return std::error_code();
256 }
257 
258 void ModuleDependencyCollector::addFile(StringRef Filename, StringRef FileDst) {
259   if (insertSeen(Filename))
260     if (copyToRoot(Filename, FileDst))
261       HasErrors = true;
262 }
263