1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" 10 #include "clang/Lex/DependencyDirectivesSourceMinimizer.h" 11 #include "llvm/Support/MemoryBuffer.h" 12 #include "llvm/Support/SmallVectorMemoryBuffer.h" 13 #include "llvm/Support/Threading.h" 14 15 using namespace clang; 16 using namespace tooling; 17 using namespace dependencies; 18 19 llvm::ErrorOr<llvm::vfs::Status> 20 CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS) { 21 // Load the file and its content from the file system. 22 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MaybeFile = 23 FS.openFileForRead(Filename); 24 if (!MaybeFile) 25 return MaybeFile.getError(); 26 27 llvm::ErrorOr<llvm::vfs::Status> Stat = (*MaybeFile)->status(); 28 if (!Stat) 29 return Stat.getError(); 30 31 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeBuffer = 32 (*MaybeFile)->getBuffer(Stat->getName()); 33 if (!MaybeBuffer) 34 return MaybeBuffer.getError(); 35 36 OriginalContents = std::move(*MaybeBuffer); 37 return Stat; 38 } 39 40 void CachedFileSystemEntry::minimizeFile() { 41 assert(OriginalContents && "minimizing missing contents"); 42 43 llvm::SmallString<1024> MinimizedFileContents; 44 // Minimize the file down to directives that might affect the dependencies. 45 SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens; 46 if (minimizeSourceToDependencyDirectives(OriginalContents->getBuffer(), 47 MinimizedFileContents, Tokens)) { 48 // FIXME: Propagate the diagnostic if desired by the client. 49 // Use the original file if the minimization failed. 50 MinimizedContentsStorage = 51 llvm::MemoryBuffer::getMemBuffer(*OriginalContents); 52 MinimizedContentsAccess.store(MinimizedContentsStorage.get()); 53 return; 54 } 55 56 // The contents produced by the minimizer must be null terminated. 57 assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' && 58 "not null terminated contents"); 59 60 // Compute the skipped PP ranges that speedup skipping over inactive 61 // preprocessor blocks. 62 llvm::SmallVector<minimize_source_to_dependency_directives::SkippedRange, 32> 63 SkippedRanges; 64 minimize_source_to_dependency_directives::computeSkippedRanges(Tokens, 65 SkippedRanges); 66 PreprocessorSkippedRangeMapping Mapping; 67 for (const auto &Range : SkippedRanges) { 68 if (Range.Length < 16) { 69 // Ignore small ranges as non-profitable. 70 // FIXME: This is a heuristic, its worth investigating the tradeoffs 71 // when it should be applied. 72 continue; 73 } 74 Mapping[Range.Offset] = Range.Length; 75 } 76 PPSkippedRangeMapping = std::move(Mapping); 77 78 MinimizedContentsStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>( 79 std::move(MinimizedFileContents)); 80 // The algorithm in `getOrCreateFileSystemEntry` uses the presence of 81 // minimized contents to decide whether an entry is up-to-date or not. 82 // If it is up-to-date, the skipped range mappings must be already computed. 83 // This is why we need to store the minimized contents **after** storing the 84 // skipped range mappings. Failing to do so would lead to a data race. 85 MinimizedContentsAccess.store(MinimizedContentsStorage.get()); 86 } 87 88 DependencyScanningFilesystemSharedCache:: 89 DependencyScanningFilesystemSharedCache() { 90 // This heuristic was chosen using a empirical testing on a 91 // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache 92 // sharding gives a performance edge by reducing the lock contention. 93 // FIXME: A better heuristic might also consider the OS to account for 94 // the different cost of lock contention on different OSes. 95 NumShards = 96 std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4); 97 CacheShards = std::make_unique<CacheShard[]>(NumShards); 98 } 99 100 DependencyScanningFilesystemSharedCache::SharedFileSystemEntry & 101 DependencyScanningFilesystemSharedCache::get(StringRef Key) { 102 CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards]; 103 std::lock_guard<std::mutex> LockGuard(Shard.CacheLock); 104 auto It = Shard.Cache.try_emplace(Key); 105 return It.first->getValue(); 106 } 107 108 /// Whitelist file extensions that should be minimized, treating no extension as 109 /// a source file that should be minimized. 110 /// 111 /// This is kinda hacky, it would be better if we knew what kind of file Clang 112 /// was expecting instead. 113 static bool shouldMinimizeBasedOnExtension(StringRef Filename) { 114 StringRef Ext = llvm::sys::path::extension(Filename); 115 if (Ext.empty()) 116 return true; // C++ standard library 117 return llvm::StringSwitch<bool>(Ext) 118 .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true) 119 .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true) 120 .CasesLower(".m", ".mm", true) 121 .CasesLower(".i", ".ii", ".mi", ".mmi", true) 122 .CasesLower(".def", ".inc", true) 123 .Default(false); 124 } 125 126 127 static bool shouldCacheStatFailures(StringRef Filename) { 128 StringRef Ext = llvm::sys::path::extension(Filename); 129 if (Ext.empty()) 130 return false; // This may be the module cache directory. 131 // Only cache stat failures on source files. 132 return shouldMinimizeBasedOnExtension(Filename); 133 } 134 135 void DependencyScanningWorkerFilesystem::disableMinimization( 136 StringRef RawFilename) { 137 llvm::SmallString<256> Filename; 138 llvm::sys::path::native(RawFilename, Filename); 139 NotToBeMinimized.insert(Filename); 140 } 141 142 bool DependencyScanningWorkerFilesystem::shouldMinimize(StringRef RawFilename) { 143 if (!shouldMinimizeBasedOnExtension(RawFilename)) 144 return false; 145 146 llvm::SmallString<256> Filename; 147 llvm::sys::path::native(RawFilename, Filename); 148 return !NotToBeMinimized.contains(Filename); 149 } 150 151 void CachedFileSystemEntry::init(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus, 152 StringRef Filename, 153 llvm::vfs::FileSystem &FS) { 154 if (!MaybeStatus || MaybeStatus->isDirectory()) 155 MaybeStat = std::move(MaybeStatus); 156 else 157 MaybeStat = initFile(Filename, FS); 158 } 159 160 llvm::ErrorOr<EntryRef> 161 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( 162 StringRef Filename) { 163 bool ShouldBeMinimized = shouldMinimize(Filename); 164 165 const auto *Entry = Cache.getCachedEntry(Filename); 166 if (Entry && !Entry->needsUpdate(ShouldBeMinimized)) 167 return EntryRef(ShouldBeMinimized, Entry); 168 169 // FIXME: Handle PCM/PCH files. 170 // FIXME: Handle module map files. 171 172 auto &SharedCacheEntry = SharedCache.get(Filename); 173 { 174 std::lock_guard<std::mutex> LockGuard(SharedCacheEntry.ValueLock); 175 CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value; 176 177 if (!CacheEntry.isInitialized()) { 178 auto MaybeStatus = getUnderlyingFS().status(Filename); 179 if (!MaybeStatus && !shouldCacheStatFailures(Filename)) 180 // HACK: We need to always restat non source files if the stat fails. 181 // This is because Clang first looks up the module cache and module 182 // files before building them, and then looks for them again. If we 183 // cache the stat failure, it won't see them the second time. 184 return MaybeStatus.getError(); 185 CacheEntry.init(std::move(MaybeStatus), Filename, getUnderlyingFS()); 186 } 187 188 // Checking `needsUpdate` verifies the entry represents an opened file. 189 // Only checking `needsMinimization` could lead to minimization of files 190 // that we failed to load (such files don't have `OriginalContents`). 191 if (CacheEntry.needsUpdate(ShouldBeMinimized)) 192 CacheEntry.minimizeFile(); 193 } 194 195 // Store the result in the local cache. 196 Entry = &SharedCacheEntry.Value; 197 return EntryRef(ShouldBeMinimized, Entry); 198 } 199 200 llvm::ErrorOr<llvm::vfs::Status> 201 DependencyScanningWorkerFilesystem::status(const Twine &Path) { 202 SmallString<256> OwnedFilename; 203 StringRef Filename = Path.toStringRef(OwnedFilename); 204 205 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 206 if (!Result) 207 return Result.getError(); 208 return Result->getStatus(); 209 } 210 211 namespace { 212 213 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using 214 /// this subclass. 215 class MinimizedVFSFile final : public llvm::vfs::File { 216 public: 217 MinimizedVFSFile(std::unique_ptr<llvm::MemoryBuffer> Buffer, 218 llvm::vfs::Status Stat) 219 : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} 220 221 static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 222 create(EntryRef Entry, 223 ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings); 224 225 llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; } 226 227 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 228 getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator, 229 bool IsVolatile) override { 230 return std::move(Buffer); 231 } 232 233 std::error_code close() override { return {}; } 234 235 private: 236 std::unique_ptr<llvm::MemoryBuffer> Buffer; 237 llvm::vfs::Status Stat; 238 }; 239 240 } // end anonymous namespace 241 242 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create( 243 EntryRef Entry, ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) { 244 if (Entry.isDirectory()) 245 return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>( 246 std::make_error_code(std::errc::is_a_directory)); 247 llvm::ErrorOr<StringRef> Contents = Entry.getContents(); 248 if (!Contents) 249 return Contents.getError(); 250 auto Result = std::make_unique<MinimizedVFSFile>( 251 llvm::MemoryBuffer::getMemBuffer(*Contents, Entry.getName(), 252 /*RequiresNullTerminator=*/false), 253 *Entry.getStatus()); 254 255 const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping(); 256 if (EntrySkipMappings && !EntrySkipMappings->empty() && PPSkipMappings) 257 (*PPSkipMappings)[Result->Buffer->getBufferStart()] = EntrySkipMappings; 258 259 return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>( 260 std::unique_ptr<llvm::vfs::File>(std::move(Result))); 261 } 262 263 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 264 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { 265 SmallString<256> OwnedFilename; 266 StringRef Filename = Path.toStringRef(OwnedFilename); 267 268 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 269 if (!Result) 270 return Result.getError(); 271 return MinimizedVFSFile::create(Result.get(), PPSkipMappings); 272 } 273