1 //===--- GlobalCompilationDatabase.cpp ---------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "GlobalCompilationDatabase.h"
10 #include "Config.h"
11 #include "FS.h"
12 #include "SourceCode.h"
13 #include "support/Logger.h"
14 #include "support/Path.h"
15 #include "support/Threading.h"
16 #include "support/ThreadsafeFS.h"
17 #include "clang/Tooling/ArgumentsAdjusters.h"
18 #include "clang/Tooling/CompilationDatabase.h"
19 #include "clang/Tooling/CompilationDatabasePluginRegistry.h"
20 #include "clang/Tooling/JSONCompilationDatabase.h"
21 #include "llvm/ADT/None.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/ADT/PointerIntPair.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/ScopeExit.h"
26 #include "llvm/ADT/SmallString.h"
27 #include "llvm/ADT/StringMap.h"
28 #include "llvm/Support/Path.h"
29 #include "llvm/Support/VirtualFileSystem.h"
30 #include <atomic>
31 #include <chrono>
32 #include <condition_variable>
33 #include <mutex>
34 #include <string>
35 #include <tuple>
36 #include <vector>
37 
38 namespace clang {
39 namespace clangd {
40 namespace {
41 
42 // Runs the given action on all parent directories of filename, starting from
43 // deepest directory and going up to root. Stops whenever action succeeds.
actOnAllParentDirectories(PathRef FileName,llvm::function_ref<bool (PathRef)> Action)44 void actOnAllParentDirectories(PathRef FileName,
45                                llvm::function_ref<bool(PathRef)> Action) {
46   for (auto Path = absoluteParent(FileName); !Path.empty() && !Action(Path);
47        Path = absoluteParent(Path))
48     ;
49 }
50 
51 } // namespace
52 
53 tooling::CompileCommand
getFallbackCommand(PathRef File) const54 GlobalCompilationDatabase::getFallbackCommand(PathRef File) const {
55   std::vector<std::string> Argv = {"clang"};
56   // Clang treats .h files as C by default and files without extension as linker
57   // input, resulting in unhelpful diagnostics.
58   // Parsing as Objective C++ is friendly to more cases.
59   auto FileExtension = llvm::sys::path::extension(File);
60   if (FileExtension.empty() || FileExtension == ".h")
61     Argv.push_back("-xobjective-c++-header");
62   Argv.push_back(std::string(File));
63   tooling::CompileCommand Cmd(llvm::sys::path::parent_path(File),
64                               llvm::sys::path::filename(File), std::move(Argv),
65                               /*Output=*/"");
66   Cmd.Heuristic = "clangd fallback";
67   return Cmd;
68 }
69 
70 // Loads and caches the CDB from a single directory.
71 //
72 // This class is threadsafe, which is to say we have independent locks for each
73 // directory we're searching for a CDB.
74 // Loading is deferred until first access.
75 //
76 // The DirectoryBasedCDB keeps a map from path => DirectoryCache.
77 // Typical usage is to:
78 //  - 1) determine all the paths that might be searched
79 //  - 2) acquire the map lock and get-or-create all the DirectoryCache entries
80 //  - 3) release the map lock and query the caches as desired
81 class DirectoryBasedGlobalCompilationDatabase::DirectoryCache {
82   using stopwatch = std::chrono::steady_clock;
83 
84   // CachedFile is used to read a CDB file on disk (e.g. compile_commands.json).
85   // It specializes in being able to quickly bail out if the file is unchanged,
86   // which is the common case.
87   // Internally, it stores file metadata so a stat() can verify it's unchanged.
88   // We don't actually cache the content as it's not needed - if the file is
89   // unchanged then the previous CDB is valid.
90   struct CachedFile {
CachedFileclang::clangd::DirectoryBasedGlobalCompilationDatabase::DirectoryCache::CachedFile91     CachedFile(llvm::StringRef Parent, llvm::StringRef Rel) {
92       llvm::SmallString<256> Path = Parent;
93       llvm::sys::path::append(Path, Rel);
94       this->Path = Path.str().str();
95     }
96     std::string Path;
97     size_t Size = NoFileCached;
98     llvm::sys::TimePoint<> ModifiedTime;
99     FileDigest ContentHash;
100 
101     static constexpr size_t NoFileCached = -1;
102 
103     struct LoadResult {
104       enum {
105         FileNotFound,
106         TransientError,
107         FoundSameData,
108         FoundNewData,
109       } Result;
110       std::unique_ptr<llvm::MemoryBuffer> Buffer; // Set only if FoundNewData
111     };
112 
113     LoadResult load(llvm::vfs::FileSystem &FS, bool HasOldData);
114   };
115 
116   // If we've looked for a CDB here and found none, the time when that happened.
117   // (Atomics make it possible for get() to return without taking a lock)
118   std::atomic<stopwatch::rep> NoCDBAt = {
119       stopwatch::time_point::min().time_since_epoch().count()};
120 
121   // Guards the following cache state.
122   std::mutex Mu;
123   // When was the cache last known to be in sync with disk state?
124   stopwatch::time_point CachePopulatedAt = stopwatch::time_point::min();
125   // Whether a new CDB has been loaded but not broadcast yet.
126   bool NeedsBroadcast = false;
127   // Last loaded CDB, meaningful if CachePopulatedAt was ever set.
128   // shared_ptr so we can overwrite this when callers are still using the CDB.
129   std::shared_ptr<tooling::CompilationDatabase> CDB;
130   // File metadata for the CDB files we support tracking directly.
131   CachedFile CompileCommandsJson;
132   CachedFile BuildCompileCommandsJson;
133   CachedFile CompileFlagsTxt;
134   // CachedFile member corresponding to CDB.
135   //   CDB  | ACF  | Scenario
136   //   null | null | no CDB found, or initial empty cache
137   //   set  | null | CDB was loaded via generic plugin interface
138   //   null | set  | found known CDB file, but parsing it failed
139   //   set  | set  | CDB was parsed from a known file
140   CachedFile *ActiveCachedFile = nullptr;
141 
142 public:
DirectoryCache(llvm::StringRef Path)143   DirectoryCache(llvm::StringRef Path)
144       : CompileCommandsJson(Path, "compile_commands.json"),
145         BuildCompileCommandsJson(Path, "build/compile_commands.json"),
146         CompileFlagsTxt(Path, "compile_flags.txt"), Path(Path) {
147     assert(llvm::sys::path::is_absolute(Path));
148   }
149 
150   // Absolute canonical path that we're the cache for. (Not case-folded).
151   const std::string Path;
152 
153   // Get the CDB associated with this directory.
154   // ShouldBroadcast:
155   //  - as input, signals whether the caller is willing to broadcast a
156   //    newly-discovered CDB. (e.g. to trigger background indexing)
157   //  - as output, signals whether the caller should do so.
158   // (If a new CDB is discovered and ShouldBroadcast is false, we mark the
159   // CDB as needing broadcast, and broadcast it next time we can).
160   std::shared_ptr<const tooling::CompilationDatabase>
get(const ThreadsafeFS & TFS,bool & ShouldBroadcast,stopwatch::time_point FreshTime,stopwatch::time_point FreshTimeMissing)161   get(const ThreadsafeFS &TFS, bool &ShouldBroadcast,
162       stopwatch::time_point FreshTime, stopwatch::time_point FreshTimeMissing) {
163     // Fast path for common case without taking lock.
164     if (stopwatch::time_point(stopwatch::duration(NoCDBAt.load())) >
165         FreshTimeMissing) {
166       ShouldBroadcast = false;
167       return nullptr;
168     }
169 
170     std::lock_guard<std::mutex> Lock(Mu);
171     auto RequestBroadcast = llvm::make_scope_exit([&, OldCDB(CDB.get())] {
172       // If we loaded a new CDB, it should be broadcast at some point.
173       if (CDB != nullptr && CDB.get() != OldCDB)
174         NeedsBroadcast = true;
175       else if (CDB == nullptr) // nothing to broadcast anymore!
176         NeedsBroadcast = false;
177       // If we have something to broadcast, then do so iff allowed.
178       if (!ShouldBroadcast)
179         return;
180       ShouldBroadcast = NeedsBroadcast;
181       NeedsBroadcast = false;
182     });
183 
184     // If our cache is valid, serve from it.
185     if (CachePopulatedAt > FreshTime)
186       return CDB;
187 
188     if (/*MayCache=*/load(*TFS.view(/*CWD=*/llvm::None))) {
189       // Use new timestamp, as loading may be slow.
190       CachePopulatedAt = stopwatch::now();
191       NoCDBAt.store((CDB ? stopwatch::time_point::min() : CachePopulatedAt)
192                         .time_since_epoch()
193                         .count());
194     }
195 
196     return CDB;
197   }
198 
199 private:
200   // Updates `CDB` from disk state. Returns false on failure.
201   bool load(llvm::vfs::FileSystem &FS);
202 };
203 
204 DirectoryBasedGlobalCompilationDatabase::DirectoryCache::CachedFile::LoadResult
load(llvm::vfs::FileSystem & FS,bool HasOldData)205 DirectoryBasedGlobalCompilationDatabase::DirectoryCache::CachedFile::load(
206     llvm::vfs::FileSystem &FS, bool HasOldData) {
207   auto Stat = FS.status(Path);
208   if (!Stat || !Stat->isRegularFile()) {
209     Size = NoFileCached;
210     ContentHash = {};
211     return {LoadResult::FileNotFound, nullptr};
212   }
213   // If both the size and mtime match, presume unchanged without reading.
214   if (HasOldData && Stat->getLastModificationTime() == ModifiedTime &&
215       Stat->getSize() == Size)
216     return {LoadResult::FoundSameData, nullptr};
217   auto Buf = FS.getBufferForFile(Path);
218   if (!Buf || (*Buf)->getBufferSize() != Stat->getSize()) {
219     // Don't clear the cache - possible we're seeing inconsistent size as the
220     // file is being recreated. If it ends up identical later, great!
221     //
222     // This isn't a complete solution: if we see a partial file but stat/read
223     // agree on its size, we're ultimately going to have spurious CDB reloads.
224     // May be worth fixing if generators don't write atomically (CMake does).
225     elog("Failed to read {0}: {1}", Path,
226          Buf ? "size changed" : Buf.getError().message());
227     return {LoadResult::TransientError, nullptr};
228   }
229 
230   FileDigest NewContentHash = digest((*Buf)->getBuffer());
231   if (HasOldData && NewContentHash == ContentHash) {
232     // mtime changed but data is the same: avoid rebuilding the CDB.
233     ModifiedTime = Stat->getLastModificationTime();
234     return {LoadResult::FoundSameData, nullptr};
235   }
236 
237   Size = (*Buf)->getBufferSize();
238   ModifiedTime = Stat->getLastModificationTime();
239   ContentHash = NewContentHash;
240   return {LoadResult::FoundNewData, std::move(*Buf)};
241 }
242 
243 // Adapt CDB-loading functions to a common interface for DirectoryCache::load().
244 static std::unique_ptr<tooling::CompilationDatabase>
parseJSON(PathRef Path,llvm::StringRef Data,std::string & Error)245 parseJSON(PathRef Path, llvm::StringRef Data, std::string &Error) {
246   if (auto CDB = tooling::JSONCompilationDatabase::loadFromBuffer(
247           Data, Error, tooling::JSONCommandLineSyntax::AutoDetect)) {
248     // FS used for expanding response files.
249     // FIXME: ExpandResponseFilesDatabase appears not to provide the usual
250     // thread-safety guarantees, as the access to FS is not locked!
251     // For now, use the real FS, which is known to be threadsafe (if we don't
252     // use/change working directory, which ExpandResponseFilesDatabase doesn't).
253     auto FS = llvm::vfs::getRealFileSystem();
254     return tooling::inferTargetAndDriverMode(
255         tooling::inferMissingCompileCommands(
256             expandResponseFiles(std::move(CDB), std::move(FS))));
257   }
258   return nullptr;
259 }
260 static std::unique_ptr<tooling::CompilationDatabase>
parseFixed(PathRef Path,llvm::StringRef Data,std::string & Error)261 parseFixed(PathRef Path, llvm::StringRef Data, std::string &Error) {
262   return tooling::FixedCompilationDatabase::loadFromBuffer(
263       llvm::sys::path::parent_path(Path), Data, Error);
264 }
265 
load(llvm::vfs::FileSystem & FS)266 bool DirectoryBasedGlobalCompilationDatabase::DirectoryCache::load(
267     llvm::vfs::FileSystem &FS) {
268   dlog("Probing directory {0}", Path);
269   std::string Error;
270 
271   // Load from the specially-supported compilation databases (JSON + Fixed).
272   // For these, we know the files they read and cache their metadata so we can
273   // cheaply validate whether they've changed, and hot-reload if they have.
274   // (As a bonus, these are also VFS-clean)!
275   struct CDBFile {
276     CachedFile *File;
277     // Wrapper for {Fixed,JSON}CompilationDatabase::loadFromBuffer.
278     std::unique_ptr<tooling::CompilationDatabase> (*Parser)(
279         PathRef,
280         /*Data*/ llvm::StringRef,
281         /*ErrorMsg*/ std::string &);
282   };
283   for (const auto &Entry : {CDBFile{&CompileCommandsJson, parseJSON},
284                             CDBFile{&BuildCompileCommandsJson, parseJSON},
285                             CDBFile{&CompileFlagsTxt, parseFixed}}) {
286     bool Active = ActiveCachedFile == Entry.File;
287     auto Loaded = Entry.File->load(FS, Active);
288     switch (Loaded.Result) {
289     case CachedFile::LoadResult::FileNotFound:
290       if (Active) {
291         log("Unloaded compilation database from {0}", Entry.File->Path);
292         ActiveCachedFile = nullptr;
293         CDB = nullptr;
294       }
295       // Continue looking at other candidates.
296       break;
297     case CachedFile::LoadResult::TransientError:
298       // File existed but we couldn't read it. Reuse the cache, retry later.
299       return false; // Load again next time.
300     case CachedFile::LoadResult::FoundSameData:
301       assert(Active && "CachedFile may not return 'same data' if !HasOldData");
302       // This is the critical file, and it hasn't changed.
303       return true;
304     case CachedFile::LoadResult::FoundNewData:
305       // We have a new CDB!
306       CDB = Entry.Parser(Entry.File->Path, Loaded.Buffer->getBuffer(), Error);
307       if (CDB)
308         log("{0} compilation database from {1}", Active ? "Reloaded" : "Loaded",
309             Entry.File->Path);
310       else
311         elog("Failed to load compilation database from {0}: {1}",
312              Entry.File->Path, Error);
313       ActiveCachedFile = Entry.File;
314       return true;
315     }
316   }
317 
318   // Fall back to generic handling of compilation databases.
319   // We don't know what files they read, so can't efficiently check whether
320   // they need to be reloaded. So we never do that.
321   // FIXME: the interface doesn't provide a way to virtualize FS access.
322 
323   // Don't try these more than once. If we've scanned before, we're done.
324   if (CachePopulatedAt > stopwatch::time_point::min())
325     return true;
326   for (const auto &Entry :
327        tooling::CompilationDatabasePluginRegistry::entries()) {
328     // Avoid duplicating the special cases handled above.
329     if (Entry.getName() == "fixed-compilation-database" ||
330         Entry.getName() == "json-compilation-database")
331       continue;
332     auto Plugin = Entry.instantiate();
333     if (auto CDB = Plugin->loadFromDirectory(Path, Error)) {
334       log("Loaded compilation database from {0} with plugin {1}", Path,
335           Entry.getName());
336       this->CDB = std::move(CDB);
337       return true;
338     }
339     // Don't log Error here, it's usually just "couldn't find <file>".
340   }
341   dlog("No compilation database at {0}", Path);
342   return true;
343 }
344 
345 DirectoryBasedGlobalCompilationDatabase::
DirectoryBasedGlobalCompilationDatabase(const Options & Opts)346     DirectoryBasedGlobalCompilationDatabase(const Options &Opts)
347     : Opts(Opts), Broadcaster(std::make_unique<BroadcastThread>(*this)) {
348   if (!this->Opts.ContextProvider)
349     this->Opts.ContextProvider = [](llvm::StringRef) {
350       return Context::current().clone();
351     };
352 }
353 
354 DirectoryBasedGlobalCompilationDatabase::
355     ~DirectoryBasedGlobalCompilationDatabase() = default;
356 
357 llvm::Optional<tooling::CompileCommand>
getCompileCommand(PathRef File) const358 DirectoryBasedGlobalCompilationDatabase::getCompileCommand(PathRef File) const {
359   CDBLookupRequest Req;
360   Req.FileName = File;
361   Req.ShouldBroadcast = true;
362   auto Now = std::chrono::steady_clock::now();
363   Req.FreshTime = Now - Opts.RevalidateAfter;
364   Req.FreshTimeMissing = Now - Opts.RevalidateMissingAfter;
365 
366   auto Res = lookupCDB(Req);
367   if (!Res) {
368     log("Failed to find compilation database for {0}", File);
369     return llvm::None;
370   }
371 
372   auto Candidates = Res->CDB->getCompileCommands(File);
373   if (!Candidates.empty())
374     return std::move(Candidates.front());
375 
376   return None;
377 }
378 
379 std::vector<DirectoryBasedGlobalCompilationDatabase::DirectoryCache *>
getDirectoryCaches(llvm::ArrayRef<llvm::StringRef> Dirs) const380 DirectoryBasedGlobalCompilationDatabase::getDirectoryCaches(
381     llvm::ArrayRef<llvm::StringRef> Dirs) const {
382   std::vector<std::string> FoldedDirs;
383   FoldedDirs.reserve(Dirs.size());
384   for (const auto &Dir : Dirs) {
385 #ifndef NDEBUG
386     if (!llvm::sys::path::is_absolute(Dir))
387       elog("Trying to cache CDB for relative {0}");
388 #endif
389     FoldedDirs.push_back(maybeCaseFoldPath(Dir));
390   }
391 
392   std::vector<DirectoryCache *> Ret;
393   Ret.reserve(Dirs.size());
394 
395   std::lock_guard<std::mutex> Lock(DirCachesMutex);
396   for (unsigned I = 0; I < Dirs.size(); ++I)
397     Ret.push_back(&DirCaches.try_emplace(FoldedDirs[I], Dirs[I]).first->second);
398   return Ret;
399 }
400 
401 llvm::Optional<DirectoryBasedGlobalCompilationDatabase::CDBLookupResult>
lookupCDB(CDBLookupRequest Request) const402 DirectoryBasedGlobalCompilationDatabase::lookupCDB(
403     CDBLookupRequest Request) const {
404   assert(llvm::sys::path::is_absolute(Request.FileName) &&
405          "path must be absolute");
406 
407   std::string Storage;
408   std::vector<llvm::StringRef> SearchDirs;
409   if (Opts.CompileCommandsDir) // FIXME: unify this case with config.
410     SearchDirs = {*Opts.CompileCommandsDir};
411   else {
412     WithContext WithProvidedContext(Opts.ContextProvider(Request.FileName));
413     const auto &Spec = Config::current().CompileFlags.CDBSearch;
414     switch (Spec.Policy) {
415     case Config::CDBSearchSpec::NoCDBSearch:
416       return llvm::None;
417     case Config::CDBSearchSpec::FixedDir:
418       Storage = *Spec.FixedCDBPath;
419       SearchDirs = {Storage};
420       break;
421     case Config::CDBSearchSpec::Ancestors:
422       // Traverse the canonical version to prevent false positives. i.e.:
423       // src/build/../a.cc can detect a CDB in /src/build if not
424       // canonicalized.
425       Storage = removeDots(Request.FileName);
426       actOnAllParentDirectories(Storage, [&](llvm::StringRef Dir) {
427         SearchDirs.push_back(Dir);
428         return false;
429       });
430     }
431   }
432 
433   std::shared_ptr<const tooling::CompilationDatabase> CDB = nullptr;
434   bool ShouldBroadcast = false;
435   DirectoryCache *DirCache = nullptr;
436   for (DirectoryCache *Candidate : getDirectoryCaches(SearchDirs)) {
437     bool CandidateShouldBroadcast = Request.ShouldBroadcast;
438     if ((CDB = Candidate->get(Opts.TFS, CandidateShouldBroadcast,
439                               Request.FreshTime, Request.FreshTimeMissing))) {
440       DirCache = Candidate;
441       ShouldBroadcast = CandidateShouldBroadcast;
442       break;
443     }
444   }
445 
446   if (!CDB)
447     return llvm::None;
448 
449   CDBLookupResult Result;
450   Result.CDB = std::move(CDB);
451   Result.PI.SourceRoot = DirCache->Path;
452 
453   if (ShouldBroadcast)
454     broadcastCDB(Result);
455   return Result;
456 }
457 
458 // The broadcast thread announces files with new compile commands to the world.
459 // Primarily this is used to enqueue them for background indexing.
460 //
461 // It's on a separate thread because:
462 //  - otherwise it would block the first parse of the initial file
463 //  - we need to enumerate all files in the CDB, of which there are many
464 //  - we (will) have to evaluate config for every file in the CDB, which is slow
465 class DirectoryBasedGlobalCompilationDatabase::BroadcastThread {
466   class Filter;
467   DirectoryBasedGlobalCompilationDatabase &Parent;
468 
469   std::mutex Mu;
470   std::condition_variable CV;
471   // Shutdown flag (CV is notified after writing).
472   // This is atomic so that broadcasts can also observe it and abort early.
473   std::atomic<bool> ShouldStop = {false};
474   struct Task {
475     CDBLookupResult Lookup;
476     Context Ctx;
477   };
478   std::deque<Task> Queue;
479   llvm::Optional<Task> ActiveTask;
480   std::thread Thread; // Must be last member.
481 
482   // Thread body: this is just the basic queue procesing boilerplate.
run()483   void run() {
484     std::unique_lock<std::mutex> Lock(Mu);
485     while (true) {
486       bool Stopping = false;
487       CV.wait(Lock, [&] {
488         return (Stopping = ShouldStop.load(std::memory_order_acquire)) ||
489                !Queue.empty();
490       });
491       if (Stopping) {
492         Queue.clear();
493         CV.notify_all();
494         return;
495       }
496       ActiveTask = std::move(Queue.front());
497       Queue.pop_front();
498 
499       Lock.unlock();
500       {
501         WithContext WithCtx(std::move(ActiveTask->Ctx));
502         process(ActiveTask->Lookup);
503       }
504       Lock.lock();
505       ActiveTask.reset();
506       CV.notify_all();
507     }
508   }
509 
510   // Inspects a new CDB and broadcasts the files it owns.
511   void process(const CDBLookupResult &T);
512 
513 public:
BroadcastThread(DirectoryBasedGlobalCompilationDatabase & Parent)514   BroadcastThread(DirectoryBasedGlobalCompilationDatabase &Parent)
515       : Parent(Parent), Thread([this] { run(); }) {}
516 
enqueue(CDBLookupResult Lookup)517   void enqueue(CDBLookupResult Lookup) {
518     {
519       assert(!Lookup.PI.SourceRoot.empty());
520       std::lock_guard<std::mutex> Lock(Mu);
521       // New CDB takes precedence over any queued one for the same directory.
522       llvm::erase_if(Queue, [&](const Task &T) {
523         return T.Lookup.PI.SourceRoot == Lookup.PI.SourceRoot;
524       });
525       Queue.push_back({std::move(Lookup), Context::current().clone()});
526     }
527     CV.notify_all();
528   }
529 
blockUntilIdle(Deadline Timeout)530   bool blockUntilIdle(Deadline Timeout) {
531     std::unique_lock<std::mutex> Lock(Mu);
532     return wait(Lock, CV, Timeout,
533                 [&] { return Queue.empty() && !ActiveTask; });
534   }
535 
~BroadcastThread()536   ~BroadcastThread() {
537     {
538       std::lock_guard<std::mutex> Lock(Mu);
539       ShouldStop.store(true, std::memory_order_release);
540     }
541     CV.notify_all();
542     Thread.join();
543   }
544 };
545 
546 // The DirBasedCDB associates each file with a specific CDB.
547 // When a CDB is discovered, it may claim to describe files that we associate
548 // with a different CDB. We do not want to broadcast discovery of these, and
549 // trigger background indexing of them.
550 //
551 // We must filter the list, and check whether they are associated with this CDB.
552 // This class attempts to do so efficiently.
553 //
554 // Roughly, it:
555 //  - loads the config for each file, and determines the relevant search path
556 //  - gathers all directories that are part of any search path
557 //  - (lazily) checks for a CDB in each such directory at most once
558 //  - walks the search path for each file and determines whether to include it.
559 class DirectoryBasedGlobalCompilationDatabase::BroadcastThread::Filter {
560   llvm::StringRef ThisDir;
561   DirectoryBasedGlobalCompilationDatabase &Parent;
562 
563   // Keep track of all directories we might check for CDBs.
564   struct DirInfo {
565     DirectoryCache *Cache = nullptr;
566     enum { Unknown, Missing, TargetCDB, OtherCDB } State = Unknown;
567     DirInfo *Parent = nullptr;
568   };
569   llvm::StringMap<DirInfo> Dirs;
570 
571   // A search path starts at a directory, and either includes ancestors or not.
572   using SearchPath = llvm::PointerIntPair<DirInfo *, 1>;
573 
574   // Add all ancestor directories of FilePath to the tracked set.
575   // Returns the immediate parent of the file.
addParents(llvm::StringRef FilePath)576   DirInfo *addParents(llvm::StringRef FilePath) {
577     DirInfo *Leaf = nullptr;
578     DirInfo *Child = nullptr;
579     actOnAllParentDirectories(FilePath, [&](llvm::StringRef Dir) {
580       auto &Info = Dirs[Dir];
581       // If this is the first iteration, then this node is the overall result.
582       if (!Leaf)
583         Leaf = &Info;
584       // Fill in the parent link from the previous iteration to this parent.
585       if (Child)
586         Child->Parent = &Info;
587       // Keep walking, whether we inserted or not, if parent link is missing.
588       // (If it's present, parent links must be present up to the root, so stop)
589       Child = &Info;
590       return Info.Parent != nullptr;
591     });
592     return Leaf;
593   }
594 
595   // Populates DirInfo::Cache (and State, if it is TargetCDB).
grabCaches()596   void grabCaches() {
597     // Fast path out if there were no files, or CDB loading is off.
598     if (Dirs.empty())
599       return;
600 
601     std::vector<llvm::StringRef> DirKeys;
602     std::vector<DirInfo *> DirValues;
603     DirKeys.reserve(Dirs.size() + 1);
604     DirValues.reserve(Dirs.size());
605     for (auto &E : Dirs) {
606       DirKeys.push_back(E.first());
607       DirValues.push_back(&E.second);
608     }
609 
610     // Also look up the cache entry for the CDB we're broadcasting.
611     // Comparing DirectoryCache pointers is more robust than checking string
612     // equality, e.g. reuses the case-sensitivity handling.
613     DirKeys.push_back(ThisDir);
614     auto DirCaches = Parent.getDirectoryCaches(DirKeys);
615     const DirectoryCache *ThisCache = DirCaches.back();
616     DirCaches.pop_back();
617     DirKeys.pop_back();
618 
619     for (unsigned I = 0; I < DirKeys.size(); ++I) {
620       DirValues[I]->Cache = DirCaches[I];
621       if (DirCaches[I] == ThisCache)
622         DirValues[I]->State = DirInfo::TargetCDB;
623     }
624   }
625 
626   // Should we include a file from this search path?
shouldInclude(SearchPath P)627   bool shouldInclude(SearchPath P) {
628     DirInfo *Info = P.getPointer();
629     if (!Info)
630       return false;
631     if (Info->State == DirInfo::Unknown) {
632       assert(Info->Cache && "grabCaches() should have filled this");
633       // Given that we know that CDBs have been moved/generated, don't trust
634       // caches. (This should be rare, so it's OK to add a little latency).
635       constexpr auto IgnoreCache = std::chrono::steady_clock::time_point::max();
636       // Don't broadcast CDBs discovered while broadcasting!
637       bool ShouldBroadcast = false;
638       bool Exists =
639           nullptr != Info->Cache->get(Parent.Opts.TFS, ShouldBroadcast,
640                                       /*FreshTime=*/IgnoreCache,
641                                       /*FreshTimeMissing=*/IgnoreCache);
642       Info->State = Exists ? DirInfo::OtherCDB : DirInfo::Missing;
643     }
644     // If we have a CDB, include the file if it's the target CDB only.
645     if (Info->State != DirInfo::Missing)
646       return Info->State == DirInfo::TargetCDB;
647     // If we have no CDB and no relevant parent, don't include the file.
648     if (!P.getInt() || !Info->Parent)
649       return false;
650     // Walk up to the next parent.
651     return shouldInclude(SearchPath(Info->Parent, 1));
652   }
653 
654 public:
Filter(llvm::StringRef ThisDir,DirectoryBasedGlobalCompilationDatabase & Parent)655   Filter(llvm::StringRef ThisDir,
656          DirectoryBasedGlobalCompilationDatabase &Parent)
657       : ThisDir(ThisDir), Parent(Parent) {}
658 
filter(std::vector<std::string> AllFiles,std::atomic<bool> & ShouldStop)659   std::vector<std::string> filter(std::vector<std::string> AllFiles,
660                                   std::atomic<bool> &ShouldStop) {
661     std::vector<std::string> Filtered;
662     // Allow for clean early-exit of the slow parts.
663     auto ExitEarly = [&] {
664       if (ShouldStop.load(std::memory_order_acquire)) {
665         log("Giving up on broadcasting CDB, as we're shutting down");
666         Filtered.clear();
667         return true;
668       }
669       return false;
670     };
671     // Compute search path for each file.
672     std::vector<SearchPath> SearchPaths(AllFiles.size());
673     for (unsigned I = 0; I < AllFiles.size(); ++I) {
674       if (Parent.Opts.CompileCommandsDir) { // FIXME: unify with config
675         SearchPaths[I].setPointer(&Dirs[*Parent.Opts.CompileCommandsDir]);
676         continue;
677       }
678       if (ExitEarly()) // loading config may be slow
679         return Filtered;
680       WithContext WithProvidedContent(Parent.Opts.ContextProvider(AllFiles[I]));
681       const Config::CDBSearchSpec &Spec =
682           Config::current().CompileFlags.CDBSearch;
683       switch (Spec.Policy) {
684       case Config::CDBSearchSpec::NoCDBSearch:
685         break;
686       case Config::CDBSearchSpec::Ancestors:
687         SearchPaths[I].setInt(/*Recursive=*/1);
688         SearchPaths[I].setPointer(addParents(AllFiles[I]));
689         break;
690       case Config::CDBSearchSpec::FixedDir:
691         SearchPaths[I].setPointer(&Dirs[*Spec.FixedCDBPath]);
692         break;
693       }
694     }
695     // Get the CDB cache for each dir on the search path, but don't load yet.
696     grabCaches();
697     // Now work out which files we want to keep, loading CDBs where needed.
698     for (unsigned I = 0; I < AllFiles.size(); ++I) {
699       if (ExitEarly()) // loading CDBs may be slow
700         return Filtered;
701       if (shouldInclude(SearchPaths[I]))
702         Filtered.push_back(std::move(AllFiles[I]));
703     }
704     return Filtered;
705   }
706 };
707 
process(const CDBLookupResult & T)708 void DirectoryBasedGlobalCompilationDatabase::BroadcastThread::process(
709     const CDBLookupResult &T) {
710   vlog("Broadcasting compilation database from {0}", T.PI.SourceRoot);
711   std::vector<std::string> GovernedFiles =
712       Filter(T.PI.SourceRoot, Parent).filter(T.CDB->getAllFiles(), ShouldStop);
713   if (!GovernedFiles.empty())
714     Parent.OnCommandChanged.broadcast(std::move(GovernedFiles));
715 }
716 
broadcastCDB(CDBLookupResult Result) const717 void DirectoryBasedGlobalCompilationDatabase::broadcastCDB(
718     CDBLookupResult Result) const {
719   assert(Result.CDB && "Trying to broadcast an invalid CDB!");
720   Broadcaster->enqueue(Result);
721 }
722 
blockUntilIdle(Deadline Timeout) const723 bool DirectoryBasedGlobalCompilationDatabase::blockUntilIdle(
724     Deadline Timeout) const {
725   return Broadcaster->blockUntilIdle(Timeout);
726 }
727 
728 llvm::Optional<ProjectInfo>
getProjectInfo(PathRef File) const729 DirectoryBasedGlobalCompilationDatabase::getProjectInfo(PathRef File) const {
730   CDBLookupRequest Req;
731   Req.FileName = File;
732   Req.ShouldBroadcast = false;
733   Req.FreshTime = Req.FreshTimeMissing =
734       std::chrono::steady_clock::time_point::min();
735   auto Res = lookupCDB(Req);
736   if (!Res)
737     return llvm::None;
738   return Res->PI;
739 }
740 
OverlayCDB(const GlobalCompilationDatabase * Base,std::vector<std::string> FallbackFlags,tooling::ArgumentsAdjuster Adjuster)741 OverlayCDB::OverlayCDB(const GlobalCompilationDatabase *Base,
742                        std::vector<std::string> FallbackFlags,
743                        tooling::ArgumentsAdjuster Adjuster)
744     : DelegatingCDB(Base), ArgsAdjuster(std::move(Adjuster)),
745       FallbackFlags(std::move(FallbackFlags)) {}
746 
747 llvm::Optional<tooling::CompileCommand>
getCompileCommand(PathRef File) const748 OverlayCDB::getCompileCommand(PathRef File) const {
749   llvm::Optional<tooling::CompileCommand> Cmd;
750   {
751     std::lock_guard<std::mutex> Lock(Mutex);
752     auto It = Commands.find(removeDots(File));
753     if (It != Commands.end())
754       Cmd = It->second;
755   }
756   if (!Cmd)
757     Cmd = DelegatingCDB::getCompileCommand(File);
758   if (!Cmd)
759     return llvm::None;
760   if (ArgsAdjuster)
761     Cmd->CommandLine = ArgsAdjuster(Cmd->CommandLine, File);
762   return Cmd;
763 }
764 
getFallbackCommand(PathRef File) const765 tooling::CompileCommand OverlayCDB::getFallbackCommand(PathRef File) const {
766   auto Cmd = DelegatingCDB::getFallbackCommand(File);
767   std::lock_guard<std::mutex> Lock(Mutex);
768   Cmd.CommandLine.insert(Cmd.CommandLine.end(), FallbackFlags.begin(),
769                          FallbackFlags.end());
770   if (ArgsAdjuster)
771     Cmd.CommandLine = ArgsAdjuster(Cmd.CommandLine, File);
772   return Cmd;
773 }
774 
setCompileCommand(PathRef File,llvm::Optional<tooling::CompileCommand> Cmd)775 void OverlayCDB::setCompileCommand(
776     PathRef File, llvm::Optional<tooling::CompileCommand> Cmd) {
777   // We store a canonical version internally to prevent mismatches between set
778   // and get compile commands. Also it assures clients listening to broadcasts
779   // doesn't receive different names for the same file.
780   std::string CanonPath = removeDots(File);
781   {
782     std::unique_lock<std::mutex> Lock(Mutex);
783     if (Cmd)
784       Commands[CanonPath] = std::move(*Cmd);
785     else
786       Commands.erase(CanonPath);
787   }
788   OnCommandChanged.broadcast({CanonPath});
789 }
790 
DelegatingCDB(const GlobalCompilationDatabase * Base)791 DelegatingCDB::DelegatingCDB(const GlobalCompilationDatabase *Base)
792     : Base(Base) {
793   if (Base)
794     BaseChanged = Base->watch([this](const std::vector<std::string> Changes) {
795       OnCommandChanged.broadcast(Changes);
796     });
797 }
798 
DelegatingCDB(std::unique_ptr<GlobalCompilationDatabase> Base)799 DelegatingCDB::DelegatingCDB(std::unique_ptr<GlobalCompilationDatabase> Base)
800     : DelegatingCDB(Base.get()) {
801   BaseOwner = std::move(Base);
802 }
803 
804 llvm::Optional<tooling::CompileCommand>
getCompileCommand(PathRef File) const805 DelegatingCDB::getCompileCommand(PathRef File) const {
806   if (!Base)
807     return llvm::None;
808   return Base->getCompileCommand(File);
809 }
810 
getProjectInfo(PathRef File) const811 llvm::Optional<ProjectInfo> DelegatingCDB::getProjectInfo(PathRef File) const {
812   if (!Base)
813     return llvm::None;
814   return Base->getProjectInfo(File);
815 }
816 
getFallbackCommand(PathRef File) const817 tooling::CompileCommand DelegatingCDB::getFallbackCommand(PathRef File) const {
818   if (!Base)
819     return GlobalCompilationDatabase::getFallbackCommand(File);
820   return Base->getFallbackCommand(File);
821 }
822 
blockUntilIdle(Deadline D) const823 bool DelegatingCDB::blockUntilIdle(Deadline D) const {
824   if (!Base)
825     return true;
826   return Base->blockUntilIdle(D);
827 }
828 
829 } // namespace clangd
830 } // namespace clang
831