1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
10 #include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
11 #include "llvm/Support/MemoryBuffer.h"
12 #include "llvm/Support/SmallVectorMemoryBuffer.h"
13 #include "llvm/Support/Threading.h"
14 
15 using namespace clang;
16 using namespace tooling;
17 using namespace dependencies;
18 
19 CachedFileSystemEntry CachedFileSystemEntry::createFileEntry(
20     StringRef Filename, llvm::vfs::FileSystem &FS, bool Minimize) {
21   // Load the file and its content from the file system.
22   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MaybeFile =
23       FS.openFileForRead(Filename);
24   if (!MaybeFile)
25     return MaybeFile.getError();
26   llvm::ErrorOr<llvm::vfs::Status> Stat = (*MaybeFile)->status();
27   if (!Stat)
28     return Stat.getError();
29 
30   llvm::vfs::File &F = **MaybeFile;
31   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeBuffer =
32       F.getBuffer(Stat->getName());
33   if (!MaybeBuffer)
34     return MaybeBuffer.getError();
35 
36   llvm::SmallString<1024> MinimizedFileContents;
37   // Minimize the file down to directives that might affect the dependencies.
38   const auto &Buffer = *MaybeBuffer;
39   SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens;
40   if (!Minimize || minimizeSourceToDependencyDirectives(
41                        Buffer->getBuffer(), MinimizedFileContents, Tokens)) {
42     // Use the original file unless requested otherwise, or
43     // if the minimization failed.
44     // FIXME: Propage the diagnostic if desired by the client.
45     CachedFileSystemEntry Result;
46     Result.MaybeStat = std::move(*Stat);
47     Result.Contents = std::move(*MaybeBuffer);
48     return Result;
49   }
50 
51   CachedFileSystemEntry Result;
52   size_t Size = MinimizedFileContents.size();
53   Result.MaybeStat = llvm::vfs::Status(Stat->getName(), Stat->getUniqueID(),
54                                        Stat->getLastModificationTime(),
55                                        Stat->getUser(), Stat->getGroup(), Size,
56                                        Stat->getType(), Stat->getPermissions());
57   // The contents produced by the minimizer must be null terminated.
58   assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' &&
59          "not null terminated contents");
60   Result.Contents = std::make_unique<llvm::SmallVectorMemoryBuffer>(
61       std::move(MinimizedFileContents));
62 
63   // Compute the skipped PP ranges that speedup skipping over inactive
64   // preprocessor blocks.
65   llvm::SmallVector<minimize_source_to_dependency_directives::SkippedRange, 32>
66       SkippedRanges;
67   minimize_source_to_dependency_directives::computeSkippedRanges(Tokens,
68                                                                  SkippedRanges);
69   PreprocessorSkippedRangeMapping Mapping;
70   for (const auto &Range : SkippedRanges) {
71     if (Range.Length < 16) {
72       // Ignore small ranges as non-profitable.
73       // FIXME: This is a heuristic, its worth investigating the tradeoffs
74       // when it should be applied.
75       continue;
76     }
77     Mapping[Range.Offset] = Range.Length;
78   }
79   Result.PPSkippedRangeMapping = std::move(Mapping);
80 
81   return Result;
82 }
83 
84 CachedFileSystemEntry
85 CachedFileSystemEntry::createDirectoryEntry(llvm::vfs::Status &&Stat) {
86   assert(Stat.isDirectory() && "not a directory!");
87   auto Result = CachedFileSystemEntry();
88   Result.MaybeStat = std::move(Stat);
89   return Result;
90 }
91 
92 DependencyScanningFilesystemSharedCache::SingleCache::SingleCache() {
93   // This heuristic was chosen using a empirical testing on a
94   // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
95   // sharding gives a performance edge by reducing the lock contention.
96   // FIXME: A better heuristic might also consider the OS to account for
97   // the different cost of lock contention on different OSes.
98   NumShards =
99       std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
100   CacheShards = std::make_unique<CacheShard[]>(NumShards);
101 }
102 
103 DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
104 DependencyScanningFilesystemSharedCache::SingleCache::get(StringRef Key) {
105   CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards];
106   std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
107   auto It = Shard.Cache.try_emplace(Key);
108   return It.first->getValue();
109 }
110 
111 DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
112 DependencyScanningFilesystemSharedCache::get(StringRef Key, bool Minimized) {
113   SingleCache &Cache = Minimized ? CacheMinimized : CacheOriginal;
114   return Cache.get(Key);
115 }
116 
117 /// Whitelist file extensions that should be minimized, treating no extension as
118 /// a source file that should be minimized.
119 ///
120 /// This is kinda hacky, it would be better if we knew what kind of file Clang
121 /// was expecting instead.
122 static bool shouldMinimizeBasedOnExtension(StringRef Filename) {
123   StringRef Ext = llvm::sys::path::extension(Filename);
124   if (Ext.empty())
125     return true; // C++ standard library
126   return llvm::StringSwitch<bool>(Ext)
127     .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
128     .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
129     .CasesLower(".m", ".mm", true)
130     .CasesLower(".i", ".ii", ".mi", ".mmi", true)
131     .CasesLower(".def", ".inc", true)
132     .Default(false);
133 }
134 
135 
136 static bool shouldCacheStatFailures(StringRef Filename) {
137   StringRef Ext = llvm::sys::path::extension(Filename);
138   if (Ext.empty())
139     return false; // This may be the module cache directory.
140   // Only cache stat failures on source files.
141   return shouldMinimizeBasedOnExtension(Filename);
142 }
143 
144 void DependencyScanningWorkerFilesystem::disableMinimization(
145     StringRef RawFilename) {
146   llvm::SmallString<256> Filename;
147   llvm::sys::path::native(RawFilename, Filename);
148   NotToBeMinimized.insert(Filename);
149 }
150 
151 bool DependencyScanningWorkerFilesystem::shouldMinimize(StringRef RawFilename) {
152   if (!shouldMinimizeBasedOnExtension(RawFilename))
153     return false;
154 
155   llvm::SmallString<256> Filename;
156   llvm::sys::path::native(RawFilename, Filename);
157   return !NotToBeMinimized.contains(Filename);
158 }
159 
160 CachedFileSystemEntry DependencyScanningWorkerFilesystem::createFileSystemEntry(
161     llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus, StringRef Filename,
162     bool ShouldMinimize) {
163   if (!MaybeStatus)
164     return CachedFileSystemEntry(MaybeStatus.getError());
165 
166   if (MaybeStatus->isDirectory())
167     return CachedFileSystemEntry::createDirectoryEntry(std::move(*MaybeStatus));
168 
169   return CachedFileSystemEntry::createFileEntry(Filename, getUnderlyingFS(),
170                                                 ShouldMinimize);
171 }
172 
173 llvm::ErrorOr<const CachedFileSystemEntry *>
174 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
175     const StringRef Filename) {
176   bool ShouldMinimize = shouldMinimize(Filename);
177 
178   if (const auto *Entry = Cache.getCachedEntry(Filename, ShouldMinimize))
179     return Entry;
180 
181   // FIXME: Handle PCM/PCH files.
182   // FIXME: Handle module map files.
183 
184   DependencyScanningFilesystemSharedCache::SharedFileSystemEntry
185       &SharedCacheEntry = SharedCache.get(Filename, ShouldMinimize);
186   const CachedFileSystemEntry *Result;
187   {
188     std::lock_guard<std::mutex> LockGuard(SharedCacheEntry.ValueLock);
189     CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value;
190 
191     if (!CacheEntry.isValid()) {
192       auto MaybeStatus = getUnderlyingFS().status(Filename);
193       if (!MaybeStatus && !shouldCacheStatFailures(Filename))
194         // HACK: We need to always restat non source files if the stat fails.
195         //   This is because Clang first looks up the module cache and module
196         //   files before building them, and then looks for them again. If we
197         //   cache the stat failure, it won't see them the second time.
198         return MaybeStatus.getError();
199       CacheEntry = createFileSystemEntry(std::move(MaybeStatus), Filename,
200                                          ShouldMinimize);
201     }
202 
203     Result = &CacheEntry;
204   }
205 
206   // Store the result in the local cache.
207   Cache.setCachedEntry(Filename, ShouldMinimize, Result);
208   return Result;
209 }
210 
211 llvm::ErrorOr<llvm::vfs::Status>
212 DependencyScanningWorkerFilesystem::status(const Twine &Path) {
213   SmallString<256> OwnedFilename;
214   StringRef Filename = Path.toStringRef(OwnedFilename);
215   const llvm::ErrorOr<const CachedFileSystemEntry *> Result =
216       getOrCreateFileSystemEntry(Filename);
217   if (!Result)
218     return Result.getError();
219   return (*Result)->getStatus();
220 }
221 
222 namespace {
223 
224 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
225 /// this subclass.
226 class MinimizedVFSFile final : public llvm::vfs::File {
227 public:
228   MinimizedVFSFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
229                    llvm::vfs::Status Stat)
230       : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
231 
232   static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
233   create(const CachedFileSystemEntry *Entry,
234          ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings);
235 
236   llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
237 
238   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
239   getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
240             bool IsVolatile) override {
241     return std::move(Buffer);
242   }
243 
244   std::error_code close() override { return {}; }
245 
246 private:
247   std::unique_ptr<llvm::MemoryBuffer> Buffer;
248   llvm::vfs::Status Stat;
249 };
250 
251 } // end anonymous namespace
252 
253 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create(
254     const CachedFileSystemEntry *Entry,
255     ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) {
256   if (Entry->isDirectory())
257     return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
258         std::make_error_code(std::errc::is_a_directory));
259   llvm::ErrorOr<StringRef> Contents = Entry->getContents();
260   if (!Contents)
261     return Contents.getError();
262   auto Result = std::make_unique<MinimizedVFSFile>(
263       llvm::MemoryBuffer::getMemBuffer(*Contents, Entry->getName(),
264                                        /*RequiresNullTerminator=*/false),
265       *Entry->getStatus());
266   if (!Entry->getPPSkippedRangeMapping().empty() && PPSkipMappings)
267     (*PPSkipMappings)[Result->Buffer->getBufferStart()] =
268         &Entry->getPPSkippedRangeMapping();
269   return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
270       std::unique_ptr<llvm::vfs::File>(std::move(Result)));
271 }
272 
273 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
274 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
275   SmallString<256> OwnedFilename;
276   StringRef Filename = Path.toStringRef(OwnedFilename);
277 
278   const llvm::ErrorOr<const CachedFileSystemEntry *> Result =
279       getOrCreateFileSystemEntry(Filename);
280   if (!Result)
281     return Result.getError();
282   return MinimizedVFSFile::create(Result.get(), PPSkipMappings);
283 }
284