1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
10 #include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
11 #include "llvm/Support/MemoryBuffer.h"
12 #include "llvm/Support/SmallVectorMemoryBuffer.h"
13 #include "llvm/Support/Threading.h"
14 
15 using namespace clang;
16 using namespace tooling;
17 using namespace dependencies;
18 
19 llvm::ErrorOr<llvm::vfs::Status>
20 CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS) {
21   // Load the file and its content from the file system.
22   auto MaybeFile = FS.openFileForRead(Filename);
23   if (!MaybeFile)
24     return MaybeFile.getError();
25   auto File = std::move(*MaybeFile);
26 
27   auto MaybeStat = File->status();
28   if (!MaybeStat)
29     return MaybeStat.getError();
30   auto Stat = std::move(*MaybeStat);
31 
32   auto MaybeBuffer = File->getBuffer(Stat.getName());
33   if (!MaybeBuffer)
34     return MaybeBuffer.getError();
35   auto Buffer = std::move(*MaybeBuffer);
36 
37   OriginalContents = std::move(Buffer);
38   return Stat;
39 }
40 
41 void CachedFileSystemEntry::minimizeFile() {
42   assert(OriginalContents && "minimizing missing contents");
43 
44   llvm::SmallString<1024> MinimizedFileContents;
45   // Minimize the file down to directives that might affect the dependencies.
46   SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens;
47   if (minimizeSourceToDependencyDirectives(OriginalContents->getBuffer(),
48                                            MinimizedFileContents, Tokens)) {
49     // FIXME: Propagate the diagnostic if desired by the client.
50     // Use the original file if the minimization failed.
51     MinimizedContentsStorage =
52         llvm::MemoryBuffer::getMemBuffer(*OriginalContents);
53     MinimizedContentsAccess.store(MinimizedContentsStorage.get());
54     return;
55   }
56 
57   // The contents produced by the minimizer must be null terminated.
58   assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' &&
59          "not null terminated contents");
60 
61   // Compute the skipped PP ranges that speedup skipping over inactive
62   // preprocessor blocks.
63   llvm::SmallVector<minimize_source_to_dependency_directives::SkippedRange, 32>
64       SkippedRanges;
65   minimize_source_to_dependency_directives::computeSkippedRanges(Tokens,
66                                                                  SkippedRanges);
67   PreprocessorSkippedRangeMapping Mapping;
68   for (const auto &Range : SkippedRanges) {
69     if (Range.Length < 16) {
70       // Ignore small ranges as non-profitable.
71       // FIXME: This is a heuristic, its worth investigating the tradeoffs
72       // when it should be applied.
73       continue;
74     }
75     Mapping[Range.Offset] = Range.Length;
76   }
77   PPSkippedRangeMapping = std::move(Mapping);
78 
79   MinimizedContentsStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
80       std::move(MinimizedFileContents));
81   // The algorithm in `getOrCreateFileSystemEntry` uses the presence of
82   // minimized contents to decide whether an entry is up-to-date or not.
83   // If it is up-to-date, the skipped range mappings must be already computed.
84   // This is why we need to store the minimized contents **after** storing the
85   // skipped range mappings. Failing to do so would lead to a data race.
86   MinimizedContentsAccess.store(MinimizedContentsStorage.get());
87 }
88 
89 DependencyScanningFilesystemSharedCache::
90     DependencyScanningFilesystemSharedCache() {
91   // This heuristic was chosen using a empirical testing on a
92   // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
93   // sharding gives a performance edge by reducing the lock contention.
94   // FIXME: A better heuristic might also consider the OS to account for
95   // the different cost of lock contention on different OSes.
96   NumShards =
97       std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
98   CacheShards = std::make_unique<CacheShard[]>(NumShards);
99 }
100 
101 DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
102 DependencyScanningFilesystemSharedCache::get(StringRef Key) {
103   CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards];
104   std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
105   auto It = Shard.Cache.try_emplace(Key);
106   return It.first->getValue();
107 }
108 
109 /// Whitelist file extensions that should be minimized, treating no extension as
110 /// a source file that should be minimized.
111 ///
112 /// This is kinda hacky, it would be better if we knew what kind of file Clang
113 /// was expecting instead.
114 static bool shouldMinimizeBasedOnExtension(StringRef Filename) {
115   StringRef Ext = llvm::sys::path::extension(Filename);
116   if (Ext.empty())
117     return true; // C++ standard library
118   return llvm::StringSwitch<bool>(Ext)
119       .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
120       .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
121       .CasesLower(".m", ".mm", true)
122       .CasesLower(".i", ".ii", ".mi", ".mmi", true)
123       .CasesLower(".def", ".inc", true)
124       .Default(false);
125 }
126 
127 static bool shouldCacheStatFailures(StringRef Filename) {
128   StringRef Ext = llvm::sys::path::extension(Filename);
129   if (Ext.empty())
130     return false; // This may be the module cache directory.
131   // Only cache stat failures on source files.
132   return shouldMinimizeBasedOnExtension(Filename);
133 }
134 
135 void DependencyScanningWorkerFilesystem::disableMinimization(
136     StringRef RawFilename) {
137   llvm::SmallString<256> Filename;
138   llvm::sys::path::native(RawFilename, Filename);
139   NotToBeMinimized.insert(Filename);
140 }
141 
142 bool DependencyScanningWorkerFilesystem::shouldMinimize(StringRef RawFilename) {
143   if (!shouldMinimizeBasedOnExtension(RawFilename))
144     return false;
145 
146   llvm::SmallString<256> Filename;
147   llvm::sys::path::native(RawFilename, Filename);
148   return !NotToBeMinimized.contains(Filename);
149 }
150 
151 void CachedFileSystemEntry::init(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus,
152                                  StringRef Filename,
153                                  llvm::vfs::FileSystem &FS) {
154   if (!MaybeStatus || MaybeStatus->isDirectory())
155     MaybeStat = std::move(MaybeStatus);
156   else
157     MaybeStat = initFile(Filename, FS);
158 }
159 
160 llvm::ErrorOr<EntryRef>
161 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
162     StringRef Filename) {
163   bool ShouldBeMinimized = shouldMinimize(Filename);
164 
165   const auto *Entry = LocalCache.getCachedEntry(Filename);
166   if (Entry && !Entry->needsUpdate(ShouldBeMinimized))
167     return EntryRef(ShouldBeMinimized, *Entry);
168 
169   // FIXME: Handle PCM/PCH files.
170   // FIXME: Handle module map files.
171 
172   auto &SharedCacheEntry = SharedCache.get(Filename);
173   {
174     std::lock_guard<std::mutex> LockGuard(SharedCacheEntry.ValueLock);
175     CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value;
176 
177     if (!CacheEntry.isInitialized()) {
178       auto MaybeStatus = getUnderlyingFS().status(Filename);
179       if (!MaybeStatus && !shouldCacheStatFailures(Filename))
180         // HACK: We need to always restat non source files if the stat fails.
181         //   This is because Clang first looks up the module cache and module
182         //   files before building them, and then looks for them again. If we
183         //   cache the stat failure, it won't see them the second time.
184         return MaybeStatus.getError();
185       CacheEntry.init(std::move(MaybeStatus), Filename, getUnderlyingFS());
186     }
187 
188     // Checking `needsUpdate` verifies the entry represents an opened file.
189     // Only checking `needsMinimization` could lead to minimization of files
190     // that we failed to load (such files don't have `OriginalContents`).
191     if (CacheEntry.needsUpdate(ShouldBeMinimized))
192       CacheEntry.minimizeFile();
193   }
194 
195   // Store the result in the local cache.
196   Entry = &SharedCacheEntry.Value;
197   return EntryRef(ShouldBeMinimized, *Entry);
198 }
199 
200 llvm::ErrorOr<llvm::vfs::Status>
201 DependencyScanningWorkerFilesystem::status(const Twine &Path) {
202   SmallString<256> OwnedFilename;
203   StringRef Filename = Path.toStringRef(OwnedFilename);
204 
205   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
206   if (!Result)
207     return Result.getError();
208   return Result->getStatus();
209 }
210 
211 namespace {
212 
213 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
214 /// this subclass.
215 class MinimizedVFSFile final : public llvm::vfs::File {
216 public:
217   MinimizedVFSFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
218                    llvm::vfs::Status Stat)
219       : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
220 
221   static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
222   create(EntryRef Entry,
223          ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings);
224 
225   llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
226 
227   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
228   getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
229             bool IsVolatile) override {
230     return std::move(Buffer);
231   }
232 
233   std::error_code close() override { return {}; }
234 
235 private:
236   std::unique_ptr<llvm::MemoryBuffer> Buffer;
237   llvm::vfs::Status Stat;
238 };
239 
240 } // end anonymous namespace
241 
242 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create(
243     EntryRef Entry, ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) {
244   if (Entry.isDirectory())
245     return std::make_error_code(std::errc::is_a_directory);
246 
247   llvm::ErrorOr<StringRef> Contents = Entry.getContents();
248   if (!Contents)
249     return Contents.getError();
250   auto Result = std::make_unique<MinimizedVFSFile>(
251       llvm::MemoryBuffer::getMemBuffer(*Contents, Entry.getName(),
252                                        /*RequiresNullTerminator=*/false),
253       *Entry.getStatus());
254 
255   const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping();
256   if (EntrySkipMappings && !EntrySkipMappings->empty() && PPSkipMappings)
257     (*PPSkipMappings)[Result->Buffer->getBufferStart()] = EntrySkipMappings;
258 
259   return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
260       std::unique_ptr<llvm::vfs::File>(std::move(Result)));
261 }
262 
263 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
264 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
265   SmallString<256> OwnedFilename;
266   StringRef Filename = Path.toStringRef(OwnedFilename);
267 
268   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
269   if (!Result)
270     return Result.getError();
271   return MinimizedVFSFile::create(Result.get(), PPSkipMappings);
272 }
273