1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
10 #include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
11 #include "llvm/Support/MemoryBuffer.h"
12 #include "llvm/Support/SmallVectorMemoryBuffer.h"
13 #include "llvm/Support/Threading.h"
14 
15 using namespace clang;
16 using namespace tooling;
17 using namespace dependencies;
18 
19 llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
20 DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
21   // Load the file and its content from the file system.
22   auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
23   if (!MaybeFile)
24     return MaybeFile.getError();
25   auto File = std::move(*MaybeFile);
26 
27   auto MaybeStat = File->status();
28   if (!MaybeStat)
29     return MaybeStat.getError();
30   auto Stat = std::move(*MaybeStat);
31 
32   auto MaybeBuffer = File->getBuffer(Stat.getName());
33   if (!MaybeBuffer)
34     return MaybeBuffer.getError();
35   auto Buffer = std::move(*MaybeBuffer);
36 
37   // If the file size changed between read and stat, pretend it didn't.
38   if (Stat.getSize() != Buffer->getBufferSize())
39     Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
40 
41   return TentativeEntry(Stat, std::move(Buffer));
42 }
43 
44 EntryRef DependencyScanningWorkerFilesystem::minimizeIfNecessary(
45     const CachedFileSystemEntry &Entry, StringRef Filename) {
46   if (Entry.isError() || Entry.isDirectory() || !shouldMinimize(Filename))
47     return EntryRef(/*Minimized=*/false, Filename, Entry);
48 
49   CachedFileContents *Contents = Entry.getContents();
50   assert(Contents && "contents not initialized");
51 
52   // Double-checked locking.
53   if (Contents->MinimizedAccess.load())
54     return EntryRef(/*Minimized=*/true, Filename, Entry);
55 
56   std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
57 
58   // Double-checked locking.
59   if (Contents->MinimizedAccess.load())
60     return EntryRef(/*Minimized=*/true, Filename, Entry);
61 
62   llvm::SmallString<1024> MinimizedFileContents;
63   // Minimize the file down to directives that might affect the dependencies.
64   SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens;
65   if (minimizeSourceToDependencyDirectives(Contents->Original->getBuffer(),
66                                            MinimizedFileContents, Tokens)) {
67     // FIXME: Propagate the diagnostic if desired by the client.
68     // Use the original file if the minimization failed.
69     Contents->MinimizedStorage =
70         llvm::MemoryBuffer::getMemBuffer(*Contents->Original);
71     Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
72     return EntryRef(/*Minimized=*/true, Filename, Entry);
73   }
74 
75   // The contents produced by the minimizer must be null terminated.
76   assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' &&
77          "not null terminated contents");
78 
79   // Compute the skipped PP ranges that speedup skipping over inactive
80   // preprocessor blocks.
81   llvm::SmallVector<minimize_source_to_dependency_directives::SkippedRange, 32>
82       SkippedRanges;
83   minimize_source_to_dependency_directives::computeSkippedRanges(Tokens,
84                                                                  SkippedRanges);
85   PreprocessorSkippedRangeMapping Mapping;
86   for (const auto &Range : SkippedRanges) {
87     if (Range.Length < 16) {
88       // Ignore small ranges as non-profitable.
89       // FIXME: This is a heuristic, its worth investigating the tradeoffs
90       // when it should be applied.
91       continue;
92     }
93     Mapping[Range.Offset] = Range.Length;
94   }
95   Contents->PPSkippedRangeMapping = std::move(Mapping);
96 
97   Contents->MinimizedStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
98       std::move(MinimizedFileContents));
99   // This function performed double-checked locking using `MinimizedAccess`.
100   // Assigning it must be the last thing this function does. If we were to
101   // assign it before `PPSkippedRangeMapping`, other threads may skip the
102   // critical section (`MinimizedAccess != nullptr`) and access the mappings
103   // that are about to be initialized, leading to a data race.
104   Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
105   return EntryRef(/*Minimized=*/true, Filename, Entry);
106 }
107 
108 DependencyScanningFilesystemSharedCache::
109     DependencyScanningFilesystemSharedCache() {
110   // This heuristic was chosen using a empirical testing on a
111   // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
112   // sharding gives a performance edge by reducing the lock contention.
113   // FIXME: A better heuristic might also consider the OS to account for
114   // the different cost of lock contention on different OSes.
115   NumShards =
116       std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
117   CacheShards = std::make_unique<CacheShard[]>(NumShards);
118 }
119 
120 DependencyScanningFilesystemSharedCache::CacheShard &
121 DependencyScanningFilesystemSharedCache::getShardForFilename(
122     StringRef Filename) const {
123   return CacheShards[llvm::hash_value(Filename) % NumShards];
124 }
125 
126 DependencyScanningFilesystemSharedCache::CacheShard &
127 DependencyScanningFilesystemSharedCache::getShardForUID(
128     llvm::sys::fs::UniqueID UID) const {
129   auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
130   return CacheShards[Hash % NumShards];
131 }
132 
133 const CachedFileSystemEntry *
134 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
135     StringRef Filename) const {
136   std::lock_guard<std::mutex> LockGuard(CacheLock);
137   auto It = EntriesByFilename.find(Filename);
138   return It == EntriesByFilename.end() ? nullptr : It->getValue();
139 }
140 
141 const CachedFileSystemEntry *
142 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
143     llvm::sys::fs::UniqueID UID) const {
144   std::lock_guard<std::mutex> LockGuard(CacheLock);
145   auto It = EntriesByUID.find(UID);
146   return It == EntriesByUID.end() ? nullptr : It->getSecond();
147 }
148 
149 const CachedFileSystemEntry &
150 DependencyScanningFilesystemSharedCache::CacheShard::
151     getOrEmplaceEntryForFilename(StringRef Filename,
152                                  llvm::ErrorOr<llvm::vfs::Status> Stat) {
153   std::lock_guard<std::mutex> LockGuard(CacheLock);
154   auto Insertion = EntriesByFilename.insert({Filename, nullptr});
155   if (Insertion.second)
156     Insertion.first->second =
157         new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
158   return *Insertion.first->second;
159 }
160 
161 const CachedFileSystemEntry &
162 DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
163     llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
164     std::unique_ptr<llvm::MemoryBuffer> Contents) {
165   std::lock_guard<std::mutex> LockGuard(CacheLock);
166   auto Insertion = EntriesByUID.insert({UID, nullptr});
167   if (Insertion.second) {
168     CachedFileContents *StoredContents = nullptr;
169     if (Contents)
170       StoredContents = new (ContentsStorage.Allocate())
171           CachedFileContents(std::move(Contents));
172     Insertion.first->second = new (EntryStorage.Allocate())
173         CachedFileSystemEntry(std::move(Stat), StoredContents);
174   }
175   return *Insertion.first->second;
176 }
177 
178 const CachedFileSystemEntry &
179 DependencyScanningFilesystemSharedCache::CacheShard::
180     getOrInsertEntryForFilename(StringRef Filename,
181                                 const CachedFileSystemEntry &Entry) {
182   std::lock_guard<std::mutex> LockGuard(CacheLock);
183   return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
184 }
185 
186 /// Whitelist file extensions that should be minimized, treating no extension as
187 /// a source file that should be minimized.
188 ///
189 /// This is kinda hacky, it would be better if we knew what kind of file Clang
190 /// was expecting instead.
191 static bool shouldMinimizeBasedOnExtension(StringRef Filename) {
192   StringRef Ext = llvm::sys::path::extension(Filename);
193   if (Ext.empty())
194     return true; // C++ standard library
195   return llvm::StringSwitch<bool>(Ext)
196       .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
197       .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
198       .CasesLower(".m", ".mm", true)
199       .CasesLower(".i", ".ii", ".mi", ".mmi", true)
200       .CasesLower(".def", ".inc", true)
201       .Default(false);
202 }
203 
204 static bool shouldCacheStatFailures(StringRef Filename) {
205   StringRef Ext = llvm::sys::path::extension(Filename);
206   if (Ext.empty())
207     return false; // This may be the module cache directory.
208   // Only cache stat failures on source files.
209   return shouldMinimizeBasedOnExtension(Filename);
210 }
211 
212 void DependencyScanningWorkerFilesystem::disableMinimization(
213     StringRef RawFilename) {
214   llvm::SmallString<256> Filename;
215   llvm::sys::path::native(RawFilename, Filename);
216   NotToBeMinimized.insert(Filename);
217 }
218 
219 bool DependencyScanningWorkerFilesystem::shouldMinimize(StringRef RawFilename) {
220   if (!shouldMinimizeBasedOnExtension(RawFilename))
221     return false;
222 
223   llvm::SmallString<256> Filename;
224   llvm::sys::path::native(RawFilename, Filename);
225   return !NotToBeMinimized.contains(Filename);
226 }
227 
228 const CachedFileSystemEntry &
229 DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
230     TentativeEntry TEntry) {
231   auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
232   return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
233                                        std::move(TEntry.Status),
234                                        std::move(TEntry.Contents));
235 }
236 
237 const CachedFileSystemEntry *
238 DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
239     StringRef Filename) {
240   if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
241     return Entry;
242   auto &Shard = SharedCache.getShardForFilename(Filename);
243   if (const auto *Entry = Shard.findEntryByFilename(Filename))
244     return &LocalCache.insertEntryForFilename(Filename, *Entry);
245   return nullptr;
246 }
247 
248 llvm::ErrorOr<const CachedFileSystemEntry &>
249 DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
250   llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
251   if (!Stat) {
252     if (!shouldCacheStatFailures(Filename))
253       return Stat.getError();
254     const auto &Entry =
255         getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
256     return insertLocalEntryForFilename(Filename, Entry);
257   }
258 
259   if (const auto *Entry = findSharedEntryByUID(*Stat))
260     return insertLocalEntryForFilename(Filename, *Entry);
261 
262   auto TEntry =
263       Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename);
264 
265   const CachedFileSystemEntry *SharedEntry = [&]() {
266     if (TEntry) {
267       const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
268       return &getOrInsertSharedEntryForFilename(Filename, UIDEntry);
269     }
270     return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError());
271   }();
272 
273   return insertLocalEntryForFilename(Filename, *SharedEntry);
274 }
275 
276 llvm::ErrorOr<EntryRef>
277 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
278     StringRef Filename) {
279   if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
280     return minimizeIfNecessary(*Entry, Filename).unwrapError();
281   auto MaybeEntry = computeAndStoreResult(Filename);
282   if (!MaybeEntry)
283     return MaybeEntry.getError();
284   return minimizeIfNecessary(*MaybeEntry, Filename).unwrapError();
285 }
286 
287 llvm::ErrorOr<llvm::vfs::Status>
288 DependencyScanningWorkerFilesystem::status(const Twine &Path) {
289   SmallString<256> OwnedFilename;
290   StringRef Filename = Path.toStringRef(OwnedFilename);
291 
292   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
293   if (!Result)
294     return Result.getError();
295   return Result->getStatus();
296 }
297 
298 namespace {
299 
300 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
301 /// this subclass.
302 class MinimizedVFSFile final : public llvm::vfs::File {
303 public:
304   MinimizedVFSFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
305                    llvm::vfs::Status Stat)
306       : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
307 
308   static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
309   create(EntryRef Entry,
310          ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings);
311 
312   llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
313 
314   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
315   getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
316             bool IsVolatile) override {
317     return std::move(Buffer);
318   }
319 
320   std::error_code close() override { return {}; }
321 
322 private:
323   std::unique_ptr<llvm::MemoryBuffer> Buffer;
324   llvm::vfs::Status Stat;
325 };
326 
327 } // end anonymous namespace
328 
329 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create(
330     EntryRef Entry, ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) {
331   assert(!Entry.isError() && "error");
332 
333   if (Entry.isDirectory())
334     return std::make_error_code(std::errc::is_a_directory);
335 
336   auto Result = std::make_unique<MinimizedVFSFile>(
337       llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
338                                        Entry.getStatus().getName(),
339                                        /*RequiresNullTerminator=*/false),
340       Entry.getStatus());
341 
342   const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping();
343   if (EntrySkipMappings && !EntrySkipMappings->empty() && PPSkipMappings)
344     (*PPSkipMappings)[Result->Buffer->getBufferStart()] = EntrySkipMappings;
345 
346   return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
347       std::unique_ptr<llvm::vfs::File>(std::move(Result)));
348 }
349 
350 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
351 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
352   SmallString<256> OwnedFilename;
353   StringRef Filename = Path.toStringRef(OwnedFilename);
354 
355   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
356   if (!Result)
357     return Result.getError();
358   return MinimizedVFSFile::create(Result.get(), PPSkipMappings);
359 }
360