1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" 10 #include "clang/Lex/DependencyDirectivesSourceMinimizer.h" 11 #include "llvm/Support/MemoryBuffer.h" 12 #include "llvm/Support/SmallVectorMemoryBuffer.h" 13 #include "llvm/Support/Threading.h" 14 15 using namespace clang; 16 using namespace tooling; 17 using namespace dependencies; 18 19 llvm::ErrorOr<llvm::vfs::Status> 20 CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS) { 21 // Load the file and its content from the file system. 22 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MaybeFile = 23 FS.openFileForRead(Filename); 24 if (!MaybeFile) 25 return MaybeFile.getError(); 26 27 llvm::ErrorOr<llvm::vfs::Status> Stat = (*MaybeFile)->status(); 28 if (!Stat) 29 return Stat.getError(); 30 31 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeBuffer = 32 (*MaybeFile)->getBuffer(Stat->getName()); 33 if (!MaybeBuffer) 34 return MaybeBuffer.getError(); 35 36 OriginalContents = std::move(*MaybeBuffer); 37 return Stat; 38 } 39 40 void CachedFileSystemEntry::minimizeFile() { 41 assert(OriginalContents && "minimizing missing contents"); 42 43 llvm::SmallString<1024> MinimizedFileContents; 44 // Minimize the file down to directives that might affect the dependencies. 45 SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens; 46 if (minimizeSourceToDependencyDirectives(OriginalContents->getBuffer(), 47 MinimizedFileContents, Tokens)) { 48 // FIXME: Propagate the diagnostic if desired by the client. 49 // Use the original file if the minimization failed. 50 MinimizedContentsStorage = 51 llvm::MemoryBuffer::getMemBuffer(*OriginalContents); 52 MinimizedContentsAccess.store(MinimizedContentsStorage.get()); 53 return; 54 } 55 56 // The contents produced by the minimizer must be null terminated. 57 assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' && 58 "not null terminated contents"); 59 60 // Compute the skipped PP ranges that speedup skipping over inactive 61 // preprocessor blocks. 62 llvm::SmallVector<minimize_source_to_dependency_directives::SkippedRange, 32> 63 SkippedRanges; 64 minimize_source_to_dependency_directives::computeSkippedRanges(Tokens, 65 SkippedRanges); 66 PreprocessorSkippedRangeMapping Mapping; 67 for (const auto &Range : SkippedRanges) { 68 if (Range.Length < 16) { 69 // Ignore small ranges as non-profitable. 70 // FIXME: This is a heuristic, its worth investigating the tradeoffs 71 // when it should be applied. 72 continue; 73 } 74 Mapping[Range.Offset] = Range.Length; 75 } 76 PPSkippedRangeMapping = std::move(Mapping); 77 78 MinimizedContentsStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>( 79 std::move(MinimizedFileContents)); 80 // The algorithm in `getOrCreateFileSystemEntry` uses the presence of 81 // minimized contents to decide whether an entry is up-to-date or not. 82 // If it is up-to-date, the skipped range mappings must be already computed. 83 // This is why we need to store the minimized contents **after** storing the 84 // skipped range mappings. Failing to do so would lead to a data race. 85 MinimizedContentsAccess.store(MinimizedContentsStorage.get()); 86 } 87 88 DependencyScanningFilesystemSharedCache:: 89 DependencyScanningFilesystemSharedCache() { 90 // This heuristic was chosen using a empirical testing on a 91 // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache 92 // sharding gives a performance edge by reducing the lock contention. 93 // FIXME: A better heuristic might also consider the OS to account for 94 // the different cost of lock contention on different OSes. 95 NumShards = 96 std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4); 97 CacheShards = std::make_unique<CacheShard[]>(NumShards); 98 } 99 100 DependencyScanningFilesystemSharedCache::SharedFileSystemEntry & 101 DependencyScanningFilesystemSharedCache::get(StringRef Key) { 102 CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards]; 103 std::lock_guard<std::mutex> LockGuard(Shard.CacheLock); 104 auto It = Shard.Cache.try_emplace(Key); 105 return It.first->getValue(); 106 } 107 108 /// Whitelist file extensions that should be minimized, treating no extension as 109 /// a source file that should be minimized. 110 /// 111 /// This is kinda hacky, it would be better if we knew what kind of file Clang 112 /// was expecting instead. 113 static bool shouldMinimizeBasedOnExtension(StringRef Filename) { 114 StringRef Ext = llvm::sys::path::extension(Filename); 115 if (Ext.empty()) 116 return true; // C++ standard library 117 return llvm::StringSwitch<bool>(Ext) 118 .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true) 119 .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true) 120 .CasesLower(".m", ".mm", true) 121 .CasesLower(".i", ".ii", ".mi", ".mmi", true) 122 .CasesLower(".def", ".inc", true) 123 .Default(false); 124 } 125 126 static bool shouldCacheStatFailures(StringRef Filename) { 127 StringRef Ext = llvm::sys::path::extension(Filename); 128 if (Ext.empty()) 129 return false; // This may be the module cache directory. 130 // Only cache stat failures on source files. 131 return shouldMinimizeBasedOnExtension(Filename); 132 } 133 134 void DependencyScanningWorkerFilesystem::disableMinimization( 135 StringRef RawFilename) { 136 llvm::SmallString<256> Filename; 137 llvm::sys::path::native(RawFilename, Filename); 138 NotToBeMinimized.insert(Filename); 139 } 140 141 bool DependencyScanningWorkerFilesystem::shouldMinimize(StringRef RawFilename) { 142 if (!shouldMinimizeBasedOnExtension(RawFilename)) 143 return false; 144 145 llvm::SmallString<256> Filename; 146 llvm::sys::path::native(RawFilename, Filename); 147 return !NotToBeMinimized.contains(Filename); 148 } 149 150 void CachedFileSystemEntry::init(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus, 151 StringRef Filename, 152 llvm::vfs::FileSystem &FS) { 153 if (!MaybeStatus || MaybeStatus->isDirectory()) 154 MaybeStat = std::move(MaybeStatus); 155 else 156 MaybeStat = initFile(Filename, FS); 157 } 158 159 llvm::ErrorOr<EntryRef> 160 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( 161 StringRef Filename) { 162 bool ShouldBeMinimized = shouldMinimize(Filename); 163 164 const auto *Entry = LocalCache.getCachedEntry(Filename); 165 if (Entry && !Entry->needsUpdate(ShouldBeMinimized)) 166 return EntryRef(ShouldBeMinimized, *Entry); 167 168 // FIXME: Handle PCM/PCH files. 169 // FIXME: Handle module map files. 170 171 auto &SharedCacheEntry = SharedCache.get(Filename); 172 { 173 std::lock_guard<std::mutex> LockGuard(SharedCacheEntry.ValueLock); 174 CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value; 175 176 if (!CacheEntry.isInitialized()) { 177 auto MaybeStatus = getUnderlyingFS().status(Filename); 178 if (!MaybeStatus && !shouldCacheStatFailures(Filename)) 179 // HACK: We need to always restat non source files if the stat fails. 180 // This is because Clang first looks up the module cache and module 181 // files before building them, and then looks for them again. If we 182 // cache the stat failure, it won't see them the second time. 183 return MaybeStatus.getError(); 184 CacheEntry.init(std::move(MaybeStatus), Filename, getUnderlyingFS()); 185 } 186 187 // Checking `needsUpdate` verifies the entry represents an opened file. 188 // Only checking `needsMinimization` could lead to minimization of files 189 // that we failed to load (such files don't have `OriginalContents`). 190 if (CacheEntry.needsUpdate(ShouldBeMinimized)) 191 CacheEntry.minimizeFile(); 192 } 193 194 // Store the result in the local cache. 195 Entry = &SharedCacheEntry.Value; 196 return EntryRef(ShouldBeMinimized, *Entry); 197 } 198 199 llvm::ErrorOr<llvm::vfs::Status> 200 DependencyScanningWorkerFilesystem::status(const Twine &Path) { 201 SmallString<256> OwnedFilename; 202 StringRef Filename = Path.toStringRef(OwnedFilename); 203 204 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 205 if (!Result) 206 return Result.getError(); 207 return Result->getStatus(); 208 } 209 210 namespace { 211 212 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using 213 /// this subclass. 214 class MinimizedVFSFile final : public llvm::vfs::File { 215 public: 216 MinimizedVFSFile(std::unique_ptr<llvm::MemoryBuffer> Buffer, 217 llvm::vfs::Status Stat) 218 : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} 219 220 static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 221 create(EntryRef Entry, 222 ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings); 223 224 llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; } 225 226 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 227 getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator, 228 bool IsVolatile) override { 229 return std::move(Buffer); 230 } 231 232 std::error_code close() override { return {}; } 233 234 private: 235 std::unique_ptr<llvm::MemoryBuffer> Buffer; 236 llvm::vfs::Status Stat; 237 }; 238 239 } // end anonymous namespace 240 241 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create( 242 EntryRef Entry, ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) { 243 if (Entry.isDirectory()) 244 return std::make_error_code(std::errc::is_a_directory); 245 246 llvm::ErrorOr<StringRef> Contents = Entry.getContents(); 247 if (!Contents) 248 return Contents.getError(); 249 auto Result = std::make_unique<MinimizedVFSFile>( 250 llvm::MemoryBuffer::getMemBuffer(*Contents, Entry.getName(), 251 /*RequiresNullTerminator=*/false), 252 *Entry.getStatus()); 253 254 const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping(); 255 if (EntrySkipMappings && !EntrySkipMappings->empty() && PPSkipMappings) 256 (*PPSkipMappings)[Result->Buffer->getBufferStart()] = EntrySkipMappings; 257 258 return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>( 259 std::unique_ptr<llvm::vfs::File>(std::move(Result))); 260 } 261 262 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 263 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { 264 SmallString<256> OwnedFilename; 265 StringRef Filename = Path.toStringRef(OwnedFilename); 266 267 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 268 if (!Result) 269 return Result.getError(); 270 return MinimizedVFSFile::create(Result.get(), PPSkipMappings); 271 } 272