1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
10 #include "llvm/Support/MemoryBuffer.h"
11 #include "llvm/Support/SmallVectorMemoryBuffer.h"
12 #include "llvm/Support/Threading.h"
13
14 using namespace clang;
15 using namespace tooling;
16 using namespace dependencies;
17
18 llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
readFile(StringRef Filename)19 DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
20 // Load the file and its content from the file system.
21 auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
22 if (!MaybeFile)
23 return MaybeFile.getError();
24 auto File = std::move(*MaybeFile);
25
26 auto MaybeStat = File->status();
27 if (!MaybeStat)
28 return MaybeStat.getError();
29 auto Stat = std::move(*MaybeStat);
30
31 auto MaybeBuffer = File->getBuffer(Stat.getName());
32 if (!MaybeBuffer)
33 return MaybeBuffer.getError();
34 auto Buffer = std::move(*MaybeBuffer);
35
36 // If the file size changed between read and stat, pretend it didn't.
37 if (Stat.getSize() != Buffer->getBufferSize())
38 Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
39
40 return TentativeEntry(Stat, std::move(Buffer));
41 }
42
scanForDirectivesIfNecessary(const CachedFileSystemEntry & Entry,StringRef Filename,bool Disable)43 EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
44 const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
45 if (Entry.isError() || Entry.isDirectory() || Disable ||
46 !shouldScanForDirectives(Filename))
47 return EntryRef(Filename, Entry);
48
49 CachedFileContents *Contents = Entry.getCachedContents();
50 assert(Contents && "contents not initialized");
51
52 // Double-checked locking.
53 if (Contents->DepDirectives.load())
54 return EntryRef(Filename, Entry);
55
56 std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
57
58 // Double-checked locking.
59 if (Contents->DepDirectives.load())
60 return EntryRef(Filename, Entry);
61
62 SmallVector<dependency_directives_scan::Directive, 64> Directives;
63 // Scan the file for preprocessor directives that might affect the
64 // dependencies.
65 if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
66 Contents->DepDirectiveTokens,
67 Directives)) {
68 Contents->DepDirectiveTokens.clear();
69 // FIXME: Propagate the diagnostic if desired by the client.
70 Contents->DepDirectives.store(new Optional<DependencyDirectivesTy>());
71 return EntryRef(Filename, Entry);
72 }
73
74 // This function performed double-checked locking using `DepDirectives`.
75 // Assigning it must be the last thing this function does, otherwise other
76 // threads may skip the
77 // critical section (`DepDirectives != nullptr`), leading to a data race.
78 Contents->DepDirectives.store(
79 new Optional<DependencyDirectivesTy>(std::move(Directives)));
80 return EntryRef(Filename, Entry);
81 }
82
83 DependencyScanningFilesystemSharedCache::
DependencyScanningFilesystemSharedCache()84 DependencyScanningFilesystemSharedCache() {
85 // This heuristic was chosen using a empirical testing on a
86 // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
87 // sharding gives a performance edge by reducing the lock contention.
88 // FIXME: A better heuristic might also consider the OS to account for
89 // the different cost of lock contention on different OSes.
90 NumShards =
91 std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
92 CacheShards = std::make_unique<CacheShard[]>(NumShards);
93 }
94
95 DependencyScanningFilesystemSharedCache::CacheShard &
getShardForFilename(StringRef Filename) const96 DependencyScanningFilesystemSharedCache::getShardForFilename(
97 StringRef Filename) const {
98 return CacheShards[llvm::hash_value(Filename) % NumShards];
99 }
100
101 DependencyScanningFilesystemSharedCache::CacheShard &
getShardForUID(llvm::sys::fs::UniqueID UID) const102 DependencyScanningFilesystemSharedCache::getShardForUID(
103 llvm::sys::fs::UniqueID UID) const {
104 auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
105 return CacheShards[Hash % NumShards];
106 }
107
108 const CachedFileSystemEntry *
findEntryByFilename(StringRef Filename) const109 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
110 StringRef Filename) const {
111 std::lock_guard<std::mutex> LockGuard(CacheLock);
112 auto It = EntriesByFilename.find(Filename);
113 return It == EntriesByFilename.end() ? nullptr : It->getValue();
114 }
115
116 const CachedFileSystemEntry *
findEntryByUID(llvm::sys::fs::UniqueID UID) const117 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
118 llvm::sys::fs::UniqueID UID) const {
119 std::lock_guard<std::mutex> LockGuard(CacheLock);
120 auto It = EntriesByUID.find(UID);
121 return It == EntriesByUID.end() ? nullptr : It->getSecond();
122 }
123
124 const CachedFileSystemEntry &
125 DependencyScanningFilesystemSharedCache::CacheShard::
getOrEmplaceEntryForFilename(StringRef Filename,llvm::ErrorOr<llvm::vfs::Status> Stat)126 getOrEmplaceEntryForFilename(StringRef Filename,
127 llvm::ErrorOr<llvm::vfs::Status> Stat) {
128 std::lock_guard<std::mutex> LockGuard(CacheLock);
129 auto Insertion = EntriesByFilename.insert({Filename, nullptr});
130 if (Insertion.second)
131 Insertion.first->second =
132 new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
133 return *Insertion.first->second;
134 }
135
136 const CachedFileSystemEntry &
getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID,llvm::vfs::Status Stat,std::unique_ptr<llvm::MemoryBuffer> Contents)137 DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
138 llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
139 std::unique_ptr<llvm::MemoryBuffer> Contents) {
140 std::lock_guard<std::mutex> LockGuard(CacheLock);
141 auto Insertion = EntriesByUID.insert({UID, nullptr});
142 if (Insertion.second) {
143 CachedFileContents *StoredContents = nullptr;
144 if (Contents)
145 StoredContents = new (ContentsStorage.Allocate())
146 CachedFileContents(std::move(Contents));
147 Insertion.first->second = new (EntryStorage.Allocate())
148 CachedFileSystemEntry(std::move(Stat), StoredContents);
149 }
150 return *Insertion.first->second;
151 }
152
153 const CachedFileSystemEntry &
154 DependencyScanningFilesystemSharedCache::CacheShard::
getOrInsertEntryForFilename(StringRef Filename,const CachedFileSystemEntry & Entry)155 getOrInsertEntryForFilename(StringRef Filename,
156 const CachedFileSystemEntry &Entry) {
157 std::lock_guard<std::mutex> LockGuard(CacheLock);
158 return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
159 }
160
161 /// Whitelist file extensions that should be minimized, treating no extension as
162 /// a source file that should be minimized.
163 ///
164 /// This is kinda hacky, it would be better if we knew what kind of file Clang
165 /// was expecting instead.
shouldScanForDirectivesBasedOnExtension(StringRef Filename)166 static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) {
167 StringRef Ext = llvm::sys::path::extension(Filename);
168 if (Ext.empty())
169 return true; // C++ standard library
170 return llvm::StringSwitch<bool>(Ext)
171 .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
172 .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
173 .CasesLower(".m", ".mm", true)
174 .CasesLower(".i", ".ii", ".mi", ".mmi", true)
175 .CasesLower(".def", ".inc", true)
176 .Default(false);
177 }
178
shouldCacheStatFailures(StringRef Filename)179 static bool shouldCacheStatFailures(StringRef Filename) {
180 StringRef Ext = llvm::sys::path::extension(Filename);
181 if (Ext.empty())
182 return false; // This may be the module cache directory.
183 // Only cache stat failures on source files.
184 return shouldScanForDirectivesBasedOnExtension(Filename);
185 }
186
shouldScanForDirectives(StringRef Filename)187 bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
188 StringRef Filename) {
189 return shouldScanForDirectivesBasedOnExtension(Filename);
190 }
191
192 const CachedFileSystemEntry &
getOrEmplaceSharedEntryForUID(TentativeEntry TEntry)193 DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
194 TentativeEntry TEntry) {
195 auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
196 return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
197 std::move(TEntry.Status),
198 std::move(TEntry.Contents));
199 }
200
201 const CachedFileSystemEntry *
findEntryByFilenameWithWriteThrough(StringRef Filename)202 DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
203 StringRef Filename) {
204 if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
205 return Entry;
206 auto &Shard = SharedCache.getShardForFilename(Filename);
207 if (const auto *Entry = Shard.findEntryByFilename(Filename))
208 return &LocalCache.insertEntryForFilename(Filename, *Entry);
209 return nullptr;
210 }
211
212 llvm::ErrorOr<const CachedFileSystemEntry &>
computeAndStoreResult(StringRef Filename)213 DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
214 llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
215 if (!Stat) {
216 if (!shouldCacheStatFailures(Filename))
217 return Stat.getError();
218 const auto &Entry =
219 getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
220 return insertLocalEntryForFilename(Filename, Entry);
221 }
222
223 if (const auto *Entry = findSharedEntryByUID(*Stat))
224 return insertLocalEntryForFilename(Filename, *Entry);
225
226 auto TEntry =
227 Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename);
228
229 const CachedFileSystemEntry *SharedEntry = [&]() {
230 if (TEntry) {
231 const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
232 return &getOrInsertSharedEntryForFilename(Filename, UIDEntry);
233 }
234 return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError());
235 }();
236
237 return insertLocalEntryForFilename(Filename, *SharedEntry);
238 }
239
240 llvm::ErrorOr<EntryRef>
getOrCreateFileSystemEntry(StringRef Filename,bool DisableDirectivesScanning)241 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
242 StringRef Filename, bool DisableDirectivesScanning) {
243 if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
244 return scanForDirectivesIfNecessary(*Entry, Filename,
245 DisableDirectivesScanning)
246 .unwrapError();
247 auto MaybeEntry = computeAndStoreResult(Filename);
248 if (!MaybeEntry)
249 return MaybeEntry.getError();
250 return scanForDirectivesIfNecessary(*MaybeEntry, Filename,
251 DisableDirectivesScanning)
252 .unwrapError();
253 }
254
255 llvm::ErrorOr<llvm::vfs::Status>
status(const Twine & Path)256 DependencyScanningWorkerFilesystem::status(const Twine &Path) {
257 SmallString<256> OwnedFilename;
258 StringRef Filename = Path.toStringRef(OwnedFilename);
259
260 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
261 if (!Result)
262 return Result.getError();
263 return Result->getStatus();
264 }
265
266 namespace {
267
268 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
269 /// this subclass.
270 class DepScanFile final : public llvm::vfs::File {
271 public:
DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,llvm::vfs::Status Stat)272 DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
273 llvm::vfs::Status Stat)
274 : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
275
276 static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry);
277
status()278 llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
279
280 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
getBuffer(const Twine & Name,int64_t FileSize,bool RequiresNullTerminator,bool IsVolatile)281 getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
282 bool IsVolatile) override {
283 return std::move(Buffer);
284 }
285
close()286 std::error_code close() override { return {}; }
287
288 private:
289 std::unique_ptr<llvm::MemoryBuffer> Buffer;
290 llvm::vfs::Status Stat;
291 };
292
293 } // end anonymous namespace
294
295 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
create(EntryRef Entry)296 DepScanFile::create(EntryRef Entry) {
297 assert(!Entry.isError() && "error");
298
299 if (Entry.isDirectory())
300 return std::make_error_code(std::errc::is_a_directory);
301
302 auto Result = std::make_unique<DepScanFile>(
303 llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
304 Entry.getStatus().getName(),
305 /*RequiresNullTerminator=*/false),
306 Entry.getStatus());
307
308 return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
309 std::unique_ptr<llvm::vfs::File>(std::move(Result)));
310 }
311
312 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
openFileForRead(const Twine & Path)313 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
314 SmallString<256> OwnedFilename;
315 StringRef Filename = Path.toStringRef(OwnedFilename);
316
317 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
318 if (!Result)
319 return Result.getError();
320 return DepScanFile::create(Result.get());
321 }
322