1 //===--- GlobalCompilationDatabase.cpp ---------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "GlobalCompilationDatabase.h"
10 #include "Config.h"
11 #include "FS.h"
12 #include "SourceCode.h"
13 #include "support/Logger.h"
14 #include "support/Path.h"
15 #include "support/Threading.h"
16 #include "support/ThreadsafeFS.h"
17 #include "clang/Frontend/CompilerInvocation.h"
18 #include "clang/Tooling/ArgumentsAdjusters.h"
19 #include "clang/Tooling/CompilationDatabase.h"
20 #include "clang/Tooling/CompilationDatabasePluginRegistry.h"
21 #include "clang/Tooling/JSONCompilationDatabase.h"
22 #include "llvm/ADT/None.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/PointerIntPair.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/ScopeExit.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/StringMap.h"
29 #include "llvm/Support/FileSystem.h"
30 #include "llvm/Support/FileUtilities.h"
31 #include "llvm/Support/Path.h"
32 #include "llvm/Support/Program.h"
33 #include "llvm/Support/VirtualFileSystem.h"
34 #include <atomic>
35 #include <chrono>
36 #include <condition_variable>
37 #include <mutex>
38 #include <string>
39 #include <tuple>
40 #include <vector>
41 
42 namespace clang {
43 namespace clangd {
44 namespace {
45 
46 // Runs the given action on all parent directories of filename, starting from
47 // deepest directory and going up to root. Stops whenever action succeeds.
48 void actOnAllParentDirectories(PathRef FileName,
49                                llvm::function_ref<bool(PathRef)> Action) {
50   for (auto Path = absoluteParent(FileName); !Path.empty() && !Action(Path);
51        Path = absoluteParent(Path))
52     ;
53 }
54 
55 } // namespace
56 
57 tooling::CompileCommand
58 GlobalCompilationDatabase::getFallbackCommand(PathRef File) const {
59   std::vector<std::string> Argv = {"clang"};
60   // Clang treats .h files as C by default and files without extension as linker
61   // input, resulting in unhelpful diagnostics.
62   // Parsing as Objective C++ is friendly to more cases.
63   auto FileExtension = llvm::sys::path::extension(File);
64   if (FileExtension.empty() || FileExtension == ".h")
65     Argv.push_back("-xobjective-c++-header");
66   Argv.push_back(std::string(File));
67   tooling::CompileCommand Cmd(llvm::sys::path::parent_path(File),
68                               llvm::sys::path::filename(File), std::move(Argv),
69                               /*Output=*/"");
70   Cmd.Heuristic = "clangd fallback";
71   return Cmd;
72 }
73 
74 // Loads and caches the CDB from a single directory.
75 //
76 // This class is threadsafe, which is to say we have independent locks for each
77 // directory we're searching for a CDB.
78 // Loading is deferred until first access.
79 //
80 // The DirectoryBasedCDB keeps a map from path => DirectoryCache.
81 // Typical usage is to:
82 //  - 1) determine all the paths that might be searched
83 //  - 2) acquire the map lock and get-or-create all the DirectoryCache entries
84 //  - 3) release the map lock and query the caches as desired
85 class DirectoryBasedGlobalCompilationDatabase::DirectoryCache {
86   using stopwatch = std::chrono::steady_clock;
87 
88   // CachedFile is used to read a CDB file on disk (e.g. compile_commands.json).
89   // It specializes in being able to quickly bail out if the file is unchanged,
90   // which is the common case.
91   // Internally, it stores file metadata so a stat() can verify it's unchanged.
92   // We don't actually cache the content as it's not needed - if the file is
93   // unchanged then the previous CDB is valid.
94   struct CachedFile {
95     CachedFile(llvm::StringRef Parent, llvm::StringRef Rel) {
96       llvm::SmallString<256> Path = Parent;
97       llvm::sys::path::append(Path, Rel);
98       this->Path = Path.str().str();
99     }
100     std::string Path;
101     size_t Size = NoFileCached;
102     llvm::sys::TimePoint<> ModifiedTime;
103     FileDigest ContentHash;
104 
105     static constexpr size_t NoFileCached = -1;
106 
107     struct LoadResult {
108       enum {
109         FileNotFound,
110         TransientError,
111         FoundSameData,
112         FoundNewData,
113       } Result;
114       std::unique_ptr<llvm::MemoryBuffer> Buffer; // Set only if FoundNewData
115     };
116 
117     LoadResult load(llvm::vfs::FileSystem &FS, bool HasOldData);
118   };
119 
120   // If we've looked for a CDB here and found none, the time when that happened.
121   // (Atomics make it possible for get() to return without taking a lock)
122   std::atomic<stopwatch::rep> NoCDBAt = {
123       stopwatch::time_point::min().time_since_epoch().count()};
124 
125   // Guards the following cache state.
126   std::mutex Mu;
127   // When was the cache last known to be in sync with disk state?
128   stopwatch::time_point CachePopulatedAt = stopwatch::time_point::min();
129   // Whether a new CDB has been loaded but not broadcast yet.
130   bool NeedsBroadcast = false;
131   // Last loaded CDB, meaningful if CachePopulatedAt was ever set.
132   // shared_ptr so we can overwrite this when callers are still using the CDB.
133   std::shared_ptr<tooling::CompilationDatabase> CDB;
134   // File metadata for the CDB files we support tracking directly.
135   CachedFile CompileCommandsJson;
136   CachedFile BuildCompileCommandsJson;
137   CachedFile CompileFlagsTxt;
138   // CachedFile member corresponding to CDB.
139   //   CDB  | ACF  | Scenario
140   //   null | null | no CDB found, or initial empty cache
141   //   set  | null | CDB was loaded via generic plugin interface
142   //   null | set  | found known CDB file, but parsing it failed
143   //   set  | set  | CDB was parsed from a known file
144   CachedFile *ActiveCachedFile = nullptr;
145 
146 public:
147   DirectoryCache(llvm::StringRef Path)
148       : CompileCommandsJson(Path, "compile_commands.json"),
149         BuildCompileCommandsJson(Path, "build/compile_commands.json"),
150         CompileFlagsTxt(Path, "compile_flags.txt"), Path(Path) {
151     assert(llvm::sys::path::is_absolute(Path));
152   }
153 
154   // Absolute canonical path that we're the cache for. (Not case-folded).
155   const std::string Path;
156 
157   // Get the CDB associated with this directory.
158   // ShouldBroadcast:
159   //  - as input, signals whether the caller is willing to broadcast a
160   //    newly-discovered CDB. (e.g. to trigger background indexing)
161   //  - as output, signals whether the caller should do so.
162   // (If a new CDB is discovered and ShouldBroadcast is false, we mark the
163   // CDB as needing broadcast, and broadcast it next time we can).
164   std::shared_ptr<const tooling::CompilationDatabase>
165   get(const ThreadsafeFS &TFS, bool &ShouldBroadcast,
166       stopwatch::time_point FreshTime, stopwatch::time_point FreshTimeMissing) {
167     // Fast path for common case without taking lock.
168     if (stopwatch::time_point(stopwatch::duration(NoCDBAt.load())) >
169         FreshTimeMissing) {
170       ShouldBroadcast = false;
171       return nullptr;
172     }
173 
174     std::lock_guard<std::mutex> Lock(Mu);
175     auto RequestBroadcast = llvm::make_scope_exit([&, OldCDB(CDB.get())] {
176       // If we loaded a new CDB, it should be broadcast at some point.
177       if (CDB != nullptr && CDB.get() != OldCDB)
178         NeedsBroadcast = true;
179       else if (CDB == nullptr) // nothing to broadcast anymore!
180         NeedsBroadcast = false;
181       // If we have something to broadcast, then do so iff allowed.
182       if (!ShouldBroadcast)
183         return;
184       ShouldBroadcast = NeedsBroadcast;
185       NeedsBroadcast = false;
186     });
187 
188     // If our cache is valid, serve from it.
189     if (CachePopulatedAt > FreshTime)
190       return CDB;
191 
192     if (/*MayCache=*/load(*TFS.view(/*CWD=*/llvm::None))) {
193       // Use new timestamp, as loading may be slow.
194       CachePopulatedAt = stopwatch::now();
195       NoCDBAt.store((CDB ? stopwatch::time_point::min() : CachePopulatedAt)
196                         .time_since_epoch()
197                         .count());
198     }
199 
200     return CDB;
201   }
202 
203 private:
204   // Updates `CDB` from disk state. Returns false on failure.
205   bool load(llvm::vfs::FileSystem &FS);
206 };
207 
208 DirectoryBasedGlobalCompilationDatabase::DirectoryCache::CachedFile::LoadResult
209 DirectoryBasedGlobalCompilationDatabase::DirectoryCache::CachedFile::load(
210     llvm::vfs::FileSystem &FS, bool HasOldData) {
211   auto Stat = FS.status(Path);
212   if (!Stat || !Stat->isRegularFile()) {
213     Size = NoFileCached;
214     ContentHash = {};
215     return {LoadResult::FileNotFound, nullptr};
216   }
217   // If both the size and mtime match, presume unchanged without reading.
218   if (HasOldData && Stat->getLastModificationTime() == ModifiedTime &&
219       Stat->getSize() == Size)
220     return {LoadResult::FoundSameData, nullptr};
221   auto Buf = FS.getBufferForFile(Path);
222   if (!Buf || (*Buf)->getBufferSize() != Stat->getSize()) {
223     // Don't clear the cache - possible we're seeing inconsistent size as the
224     // file is being recreated. If it ends up identical later, great!
225     //
226     // This isn't a complete solution: if we see a partial file but stat/read
227     // agree on its size, we're ultimately going to have spurious CDB reloads.
228     // May be worth fixing if generators don't write atomically (CMake does).
229     elog("Failed to read {0}: {1}", Path,
230          Buf ? "size changed" : Buf.getError().message());
231     return {LoadResult::TransientError, nullptr};
232   }
233 
234   FileDigest NewContentHash = digest((*Buf)->getBuffer());
235   if (HasOldData && NewContentHash == ContentHash) {
236     // mtime changed but data is the same: avoid rebuilding the CDB.
237     ModifiedTime = Stat->getLastModificationTime();
238     return {LoadResult::FoundSameData, nullptr};
239   }
240 
241   Size = (*Buf)->getBufferSize();
242   ModifiedTime = Stat->getLastModificationTime();
243   ContentHash = NewContentHash;
244   return {LoadResult::FoundNewData, std::move(*Buf)};
245 }
246 
247 // Adapt CDB-loading functions to a common interface for DirectoryCache::load().
248 static std::unique_ptr<tooling::CompilationDatabase>
249 parseJSON(PathRef Path, llvm::StringRef Data, std::string &Error) {
250   if (auto CDB = tooling::JSONCompilationDatabase::loadFromBuffer(
251           Data, Error, tooling::JSONCommandLineSyntax::AutoDetect)) {
252     // FS used for expanding response files.
253     // FIXME: ExpandResponseFilesDatabase appears not to provide the usual
254     // thread-safety guarantees, as the access to FS is not locked!
255     // For now, use the real FS, which is known to be threadsafe (if we don't
256     // use/change working directory, which ExpandResponseFilesDatabase doesn't).
257     auto FS = llvm::vfs::getRealFileSystem();
258     return tooling::inferTargetAndDriverMode(
259         tooling::inferMissingCompileCommands(
260             expandResponseFiles(std::move(CDB), std::move(FS))));
261   }
262   return nullptr;
263 }
264 static std::unique_ptr<tooling::CompilationDatabase>
265 parseFixed(PathRef Path, llvm::StringRef Data, std::string &Error) {
266   return tooling::FixedCompilationDatabase::loadFromBuffer(
267       llvm::sys::path::parent_path(Path), Data, Error);
268 }
269 
270 bool DirectoryBasedGlobalCompilationDatabase::DirectoryCache::load(
271     llvm::vfs::FileSystem &FS) {
272   dlog("Probing directory {0}", Path);
273   std::string Error;
274 
275   // Load from the specially-supported compilation databases (JSON + Fixed).
276   // For these, we know the files they read and cache their metadata so we can
277   // cheaply validate whether they've changed, and hot-reload if they have.
278   // (As a bonus, these are also VFS-clean)!
279   struct CDBFile {
280     CachedFile *File;
281     // Wrapper for {Fixed,JSON}CompilationDatabase::loadFromBuffer.
282     std::unique_ptr<tooling::CompilationDatabase> (*Parser)(
283         PathRef,
284         /*Data*/ llvm::StringRef,
285         /*ErrorMsg*/ std::string &);
286   };
287   for (const auto &Entry : {CDBFile{&CompileCommandsJson, parseJSON},
288                             CDBFile{&BuildCompileCommandsJson, parseJSON},
289                             CDBFile{&CompileFlagsTxt, parseFixed}}) {
290     bool Active = ActiveCachedFile == Entry.File;
291     auto Loaded = Entry.File->load(FS, Active);
292     switch (Loaded.Result) {
293     case CachedFile::LoadResult::FileNotFound:
294       if (Active) {
295         log("Unloaded compilation database from {0}", Entry.File->Path);
296         ActiveCachedFile = nullptr;
297         CDB = nullptr;
298       }
299       // Continue looking at other candidates.
300       break;
301     case CachedFile::LoadResult::TransientError:
302       // File existed but we couldn't read it. Reuse the cache, retry later.
303       return false; // Load again next time.
304     case CachedFile::LoadResult::FoundSameData:
305       assert(Active && "CachedFile may not return 'same data' if !HasOldData");
306       // This is the critical file, and it hasn't changed.
307       return true;
308     case CachedFile::LoadResult::FoundNewData:
309       // We have a new CDB!
310       CDB = Entry.Parser(Entry.File->Path, Loaded.Buffer->getBuffer(), Error);
311       if (CDB)
312         log("{0} compilation database from {1}", Active ? "Reloaded" : "Loaded",
313             Entry.File->Path);
314       else
315         elog("Failed to load compilation database from {0}: {1}",
316              Entry.File->Path, Error);
317       ActiveCachedFile = Entry.File;
318       return true;
319     }
320   }
321 
322   // Fall back to generic handling of compilation databases.
323   // We don't know what files they read, so can't efficiently check whether
324   // they need to be reloaded. So we never do that.
325   // FIXME: the interface doesn't provide a way to virtualize FS access.
326 
327   // Don't try these more than once. If we've scanned before, we're done.
328   if (CachePopulatedAt > stopwatch::time_point::min())
329     return true;
330   for (const auto &Entry :
331        tooling::CompilationDatabasePluginRegistry::entries()) {
332     // Avoid duplicating the special cases handled above.
333     if (Entry.getName() == "fixed-compilation-database" ||
334         Entry.getName() == "json-compilation-database")
335       continue;
336     auto Plugin = Entry.instantiate();
337     if (auto CDB = Plugin->loadFromDirectory(Path, Error)) {
338       log("Loaded compilation database from {0} with plugin {1}", Path,
339           Entry.getName());
340       this->CDB = std::move(CDB);
341       return true;
342     }
343     // Don't log Error here, it's usually just "couldn't find <file>".
344   }
345   dlog("No compilation database at {0}", Path);
346   return true;
347 }
348 
349 DirectoryBasedGlobalCompilationDatabase::
350     DirectoryBasedGlobalCompilationDatabase(const Options &Opts)
351     : Opts(Opts), Broadcaster(std::make_unique<BroadcastThread>(*this)) {
352   if (!this->Opts.ContextProvider)
353     this->Opts.ContextProvider = [](llvm::StringRef) {
354       return Context::current().clone();
355     };
356 }
357 
358 DirectoryBasedGlobalCompilationDatabase::
359     ~DirectoryBasedGlobalCompilationDatabase() = default;
360 
361 llvm::Optional<tooling::CompileCommand>
362 DirectoryBasedGlobalCompilationDatabase::getCompileCommand(PathRef File) const {
363   CDBLookupRequest Req;
364   Req.FileName = File;
365   Req.ShouldBroadcast = true;
366   auto Now = std::chrono::steady_clock::now();
367   Req.FreshTime = Now - Opts.RevalidateAfter;
368   Req.FreshTimeMissing = Now - Opts.RevalidateMissingAfter;
369 
370   auto Res = lookupCDB(Req);
371   if (!Res) {
372     log("Failed to find compilation database for {0}", File);
373     return llvm::None;
374   }
375 
376   auto Candidates = Res->CDB->getCompileCommands(File);
377   if (!Candidates.empty())
378     return std::move(Candidates.front());
379 
380   return None;
381 }
382 
383 std::vector<DirectoryBasedGlobalCompilationDatabase::DirectoryCache *>
384 DirectoryBasedGlobalCompilationDatabase::getDirectoryCaches(
385     llvm::ArrayRef<llvm::StringRef> Dirs) const {
386   std::vector<std::string> FoldedDirs;
387   FoldedDirs.reserve(Dirs.size());
388   for (const auto &Dir : Dirs) {
389 #ifndef NDEBUG
390     if (!llvm::sys::path::is_absolute(Dir))
391       elog("Trying to cache CDB for relative {0}");
392 #endif
393     FoldedDirs.push_back(maybeCaseFoldPath(Dir));
394   }
395 
396   std::vector<DirectoryCache *> Ret;
397   Ret.reserve(Dirs.size());
398 
399   std::lock_guard<std::mutex> Lock(DirCachesMutex);
400   for (unsigned I = 0; I < Dirs.size(); ++I)
401     Ret.push_back(&DirCaches.try_emplace(FoldedDirs[I], Dirs[I]).first->second);
402   return Ret;
403 }
404 
405 llvm::Optional<DirectoryBasedGlobalCompilationDatabase::CDBLookupResult>
406 DirectoryBasedGlobalCompilationDatabase::lookupCDB(
407     CDBLookupRequest Request) const {
408   assert(llvm::sys::path::is_absolute(Request.FileName) &&
409          "path must be absolute");
410 
411   std::string Storage;
412   std::vector<llvm::StringRef> SearchDirs;
413   if (Opts.CompileCommandsDir) // FIXME: unify this case with config.
414     SearchDirs = {Opts.CompileCommandsDir.getValue()};
415   else {
416     WithContext WithProvidedContext(Opts.ContextProvider(Request.FileName));
417     const auto &Spec = Config::current().CompileFlags.CDBSearch;
418     switch (Spec.Policy) {
419     case Config::CDBSearchSpec::NoCDBSearch:
420       return llvm::None;
421     case Config::CDBSearchSpec::FixedDir:
422       Storage = Spec.FixedCDBPath.getValue();
423       SearchDirs = {Storage};
424       break;
425     case Config::CDBSearchSpec::Ancestors:
426       // Traverse the canonical version to prevent false positives. i.e.:
427       // src/build/../a.cc can detect a CDB in /src/build if not
428       // canonicalized.
429       Storage = removeDots(Request.FileName);
430       actOnAllParentDirectories(Storage, [&](llvm::StringRef Dir) {
431         SearchDirs.push_back(Dir);
432         return false;
433       });
434     }
435   }
436 
437   std::shared_ptr<const tooling::CompilationDatabase> CDB = nullptr;
438   bool ShouldBroadcast = false;
439   DirectoryCache *DirCache = nullptr;
440   for (DirectoryCache *Candidate : getDirectoryCaches(SearchDirs)) {
441     bool CandidateShouldBroadcast = Request.ShouldBroadcast;
442     if ((CDB = Candidate->get(Opts.TFS, CandidateShouldBroadcast,
443                               Request.FreshTime, Request.FreshTimeMissing))) {
444       DirCache = Candidate;
445       ShouldBroadcast = CandidateShouldBroadcast;
446       break;
447     }
448   }
449 
450   if (!CDB)
451     return llvm::None;
452 
453   CDBLookupResult Result;
454   Result.CDB = std::move(CDB);
455   Result.PI.SourceRoot = DirCache->Path;
456 
457   if (ShouldBroadcast)
458     broadcastCDB(Result);
459   return Result;
460 }
461 
462 // The broadcast thread announces files with new compile commands to the world.
463 // Primarily this is used to enqueue them for background indexing.
464 //
465 // It's on a separate thread because:
466 //  - otherwise it would block the first parse of the initial file
467 //  - we need to enumerate all files in the CDB, of which there are many
468 //  - we (will) have to evaluate config for every file in the CDB, which is slow
469 class DirectoryBasedGlobalCompilationDatabase::BroadcastThread {
470   class Filter;
471   DirectoryBasedGlobalCompilationDatabase &Parent;
472 
473   std::mutex Mu;
474   std::condition_variable CV;
475   // Shutdown flag (CV is notified after writing).
476   // This is atomic so that broadcasts can also observe it and abort early.
477   std::atomic<bool> ShouldStop = {false};
478   struct Task {
479     CDBLookupResult Lookup;
480     Context Ctx;
481   };
482   std::deque<Task> Queue;
483   llvm::Optional<Task> ActiveTask;
484   std::thread Thread; // Must be last member.
485 
486   // Thread body: this is just the basic queue procesing boilerplate.
487   void run() {
488     std::unique_lock<std::mutex> Lock(Mu);
489     while (true) {
490       bool Stopping = false;
491       CV.wait(Lock, [&] {
492         return (Stopping = ShouldStop.load(std::memory_order_acquire)) ||
493                !Queue.empty();
494       });
495       if (Stopping) {
496         Queue.clear();
497         CV.notify_all();
498         return;
499       }
500       ActiveTask = std::move(Queue.front());
501       Queue.pop_front();
502 
503       Lock.unlock();
504       {
505         WithContext WithCtx(std::move(ActiveTask->Ctx));
506         process(ActiveTask->Lookup);
507       }
508       Lock.lock();
509       ActiveTask.reset();
510       CV.notify_all();
511     }
512   }
513 
514   // Inspects a new CDB and broadcasts the files it owns.
515   void process(const CDBLookupResult &T);
516 
517 public:
518   BroadcastThread(DirectoryBasedGlobalCompilationDatabase &Parent)
519       : Parent(Parent), Thread([this] { run(); }) {}
520 
521   void enqueue(CDBLookupResult Lookup) {
522     {
523       assert(!Lookup.PI.SourceRoot.empty());
524       std::lock_guard<std::mutex> Lock(Mu);
525       // New CDB takes precedence over any queued one for the same directory.
526       llvm::erase_if(Queue, [&](const Task &T) {
527         return T.Lookup.PI.SourceRoot == Lookup.PI.SourceRoot;
528       });
529       Queue.push_back({std::move(Lookup), Context::current().clone()});
530     }
531     CV.notify_all();
532   }
533 
534   bool blockUntilIdle(Deadline Timeout) {
535     std::unique_lock<std::mutex> Lock(Mu);
536     return wait(Lock, CV, Timeout,
537                 [&] { return Queue.empty() && !ActiveTask.hasValue(); });
538   }
539 
540   ~BroadcastThread() {
541     {
542       std::lock_guard<std::mutex> Lock(Mu);
543       ShouldStop.store(true, std::memory_order_release);
544     }
545     CV.notify_all();
546     Thread.join();
547   }
548 };
549 
550 // The DirBasedCDB associates each file with a specific CDB.
551 // When a CDB is discovered, it may claim to describe files that we associate
552 // with a different CDB. We do not want to broadcast discovery of these, and
553 // trigger background indexing of them.
554 //
555 // We must filter the list, and check whether they are associated with this CDB.
556 // This class attempts to do so efficiently.
557 //
558 // Roughly, it:
559 //  - loads the config for each file, and determines the relevant search path
560 //  - gathers all directories that are part of any search path
561 //  - (lazily) checks for a CDB in each such directory at most once
562 //  - walks the search path for each file and determines whether to include it.
563 class DirectoryBasedGlobalCompilationDatabase::BroadcastThread::Filter {
564   llvm::StringRef ThisDir;
565   DirectoryBasedGlobalCompilationDatabase &Parent;
566 
567   // Keep track of all directories we might check for CDBs.
568   struct DirInfo {
569     DirectoryCache *Cache = nullptr;
570     enum { Unknown, Missing, TargetCDB, OtherCDB } State = Unknown;
571     DirInfo *Parent = nullptr;
572   };
573   llvm::StringMap<DirInfo> Dirs;
574 
575   // A search path starts at a directory, and either includes ancestors or not.
576   using SearchPath = llvm::PointerIntPair<DirInfo *, 1>;
577 
578   // Add all ancestor directories of FilePath to the tracked set.
579   // Returns the immediate parent of the file.
580   DirInfo *addParents(llvm::StringRef FilePath) {
581     DirInfo *Leaf = nullptr;
582     DirInfo *Child = nullptr;
583     actOnAllParentDirectories(FilePath, [&](llvm::StringRef Dir) {
584       auto &Info = Dirs[Dir];
585       // If this is the first iteration, then this node is the overall result.
586       if (!Leaf)
587         Leaf = &Info;
588       // Fill in the parent link from the previous iteration to this parent.
589       if (Child)
590         Child->Parent = &Info;
591       // Keep walking, whether we inserted or not, if parent link is missing.
592       // (If it's present, parent links must be present up to the root, so stop)
593       Child = &Info;
594       return Info.Parent != nullptr;
595     });
596     return Leaf;
597   }
598 
599   // Populates DirInfo::Cache (and State, if it is TargetCDB).
600   void grabCaches() {
601     // Fast path out if there were no files, or CDB loading is off.
602     if (Dirs.empty())
603       return;
604 
605     std::vector<llvm::StringRef> DirKeys;
606     std::vector<DirInfo *> DirValues;
607     DirKeys.reserve(Dirs.size() + 1);
608     DirValues.reserve(Dirs.size());
609     for (auto &E : Dirs) {
610       DirKeys.push_back(E.first());
611       DirValues.push_back(&E.second);
612     }
613 
614     // Also look up the cache entry for the CDB we're broadcasting.
615     // Comparing DirectoryCache pointers is more robust than checking string
616     // equality, e.g. reuses the case-sensitivity handling.
617     DirKeys.push_back(ThisDir);
618     auto DirCaches = Parent.getDirectoryCaches(DirKeys);
619     const DirectoryCache *ThisCache = DirCaches.back();
620     DirCaches.pop_back();
621     DirKeys.pop_back();
622 
623     for (unsigned I = 0; I < DirKeys.size(); ++I) {
624       DirValues[I]->Cache = DirCaches[I];
625       if (DirCaches[I] == ThisCache)
626         DirValues[I]->State = DirInfo::TargetCDB;
627     }
628   }
629 
630   // Should we include a file from this search path?
631   bool shouldInclude(SearchPath P) {
632     DirInfo *Info = P.getPointer();
633     if (!Info)
634       return false;
635     if (Info->State == DirInfo::Unknown) {
636       assert(Info->Cache && "grabCaches() should have filled this");
637       // Given that we know that CDBs have been moved/generated, don't trust
638       // caches. (This should be rare, so it's OK to add a little latency).
639       constexpr auto IgnoreCache = std::chrono::steady_clock::time_point::max();
640       // Don't broadcast CDBs discovered while broadcasting!
641       bool ShouldBroadcast = false;
642       bool Exists =
643           nullptr != Info->Cache->get(Parent.Opts.TFS, ShouldBroadcast,
644                                       /*FreshTime=*/IgnoreCache,
645                                       /*FreshTimeMissing=*/IgnoreCache);
646       Info->State = Exists ? DirInfo::OtherCDB : DirInfo::Missing;
647     }
648     // If we have a CDB, include the file if it's the target CDB only.
649     if (Info->State != DirInfo::Missing)
650       return Info->State == DirInfo::TargetCDB;
651     // If we have no CDB and no relevant parent, don't include the file.
652     if (!P.getInt() || !Info->Parent)
653       return false;
654     // Walk up to the next parent.
655     return shouldInclude(SearchPath(Info->Parent, 1));
656   }
657 
658 public:
659   Filter(llvm::StringRef ThisDir,
660          DirectoryBasedGlobalCompilationDatabase &Parent)
661       : ThisDir(ThisDir), Parent(Parent) {}
662 
663   std::vector<std::string> filter(std::vector<std::string> AllFiles,
664                                   std::atomic<bool> &ShouldStop) {
665     std::vector<std::string> Filtered;
666     // Allow for clean early-exit of the slow parts.
667     auto ExitEarly = [&] {
668       if (ShouldStop.load(std::memory_order_acquire)) {
669         log("Giving up on broadcasting CDB, as we're shutting down");
670         Filtered.clear();
671         return true;
672       }
673       return false;
674     };
675     // Compute search path for each file.
676     std::vector<SearchPath> SearchPaths(AllFiles.size());
677     for (unsigned I = 0; I < AllFiles.size(); ++I) {
678       if (Parent.Opts.CompileCommandsDir) { // FIXME: unify with config
679         SearchPaths[I].setPointer(
680             &Dirs[Parent.Opts.CompileCommandsDir.getValue()]);
681         continue;
682       }
683       if (ExitEarly()) // loading config may be slow
684         return Filtered;
685       WithContext WithProvidedContent(Parent.Opts.ContextProvider(AllFiles[I]));
686       const Config::CDBSearchSpec &Spec =
687           Config::current().CompileFlags.CDBSearch;
688       switch (Spec.Policy) {
689       case Config::CDBSearchSpec::NoCDBSearch:
690         break;
691       case Config::CDBSearchSpec::Ancestors:
692         SearchPaths[I].setInt(/*Recursive=*/1);
693         SearchPaths[I].setPointer(addParents(AllFiles[I]));
694         break;
695       case Config::CDBSearchSpec::FixedDir:
696         SearchPaths[I].setPointer(&Dirs[Spec.FixedCDBPath.getValue()]);
697         break;
698       }
699     }
700     // Get the CDB cache for each dir on the search path, but don't load yet.
701     grabCaches();
702     // Now work out which files we want to keep, loading CDBs where needed.
703     for (unsigned I = 0; I < AllFiles.size(); ++I) {
704       if (ExitEarly()) // loading CDBs may be slow
705         return Filtered;
706       if (shouldInclude(SearchPaths[I]))
707         Filtered.push_back(std::move(AllFiles[I]));
708     }
709     return Filtered;
710   }
711 };
712 
713 void DirectoryBasedGlobalCompilationDatabase::BroadcastThread::process(
714     const CDBLookupResult &T) {
715   vlog("Broadcasting compilation database from {0}", T.PI.SourceRoot);
716   std::vector<std::string> GovernedFiles =
717       Filter(T.PI.SourceRoot, Parent).filter(T.CDB->getAllFiles(), ShouldStop);
718   if (!GovernedFiles.empty())
719     Parent.OnCommandChanged.broadcast(std::move(GovernedFiles));
720 }
721 
722 void DirectoryBasedGlobalCompilationDatabase::broadcastCDB(
723     CDBLookupResult Result) const {
724   assert(Result.CDB && "Trying to broadcast an invalid CDB!");
725   Broadcaster->enqueue(Result);
726 }
727 
728 bool DirectoryBasedGlobalCompilationDatabase::blockUntilIdle(
729     Deadline Timeout) const {
730   return Broadcaster->blockUntilIdle(Timeout);
731 }
732 
733 llvm::Optional<ProjectInfo>
734 DirectoryBasedGlobalCompilationDatabase::getProjectInfo(PathRef File) const {
735   CDBLookupRequest Req;
736   Req.FileName = File;
737   Req.ShouldBroadcast = false;
738   Req.FreshTime = Req.FreshTimeMissing =
739       std::chrono::steady_clock::time_point::min();
740   auto Res = lookupCDB(Req);
741   if (!Res)
742     return llvm::None;
743   return Res->PI;
744 }
745 
746 OverlayCDB::OverlayCDB(const GlobalCompilationDatabase *Base,
747                        std::vector<std::string> FallbackFlags,
748                        tooling::ArgumentsAdjuster Adjuster)
749     : DelegatingCDB(Base), ArgsAdjuster(std::move(Adjuster)),
750       FallbackFlags(std::move(FallbackFlags)) {}
751 
752 llvm::Optional<tooling::CompileCommand>
753 OverlayCDB::getCompileCommand(PathRef File) const {
754   llvm::Optional<tooling::CompileCommand> Cmd;
755   {
756     std::lock_guard<std::mutex> Lock(Mutex);
757     auto It = Commands.find(removeDots(File));
758     if (It != Commands.end())
759       Cmd = It->second;
760   }
761   if (!Cmd)
762     Cmd = DelegatingCDB::getCompileCommand(File);
763   if (!Cmd)
764     return llvm::None;
765   if (ArgsAdjuster)
766     Cmd->CommandLine = ArgsAdjuster(Cmd->CommandLine, File);
767   return Cmd;
768 }
769 
770 tooling::CompileCommand OverlayCDB::getFallbackCommand(PathRef File) const {
771   auto Cmd = DelegatingCDB::getFallbackCommand(File);
772   std::lock_guard<std::mutex> Lock(Mutex);
773   Cmd.CommandLine.insert(Cmd.CommandLine.end(), FallbackFlags.begin(),
774                          FallbackFlags.end());
775   if (ArgsAdjuster)
776     Cmd.CommandLine = ArgsAdjuster(Cmd.CommandLine, File);
777   return Cmd;
778 }
779 
780 void OverlayCDB::setCompileCommand(
781     PathRef File, llvm::Optional<tooling::CompileCommand> Cmd) {
782   // We store a canonical version internally to prevent mismatches between set
783   // and get compile commands. Also it assures clients listening to broadcasts
784   // doesn't receive different names for the same file.
785   std::string CanonPath = removeDots(File);
786   {
787     std::unique_lock<std::mutex> Lock(Mutex);
788     if (Cmd)
789       Commands[CanonPath] = std::move(*Cmd);
790     else
791       Commands.erase(CanonPath);
792   }
793   OnCommandChanged.broadcast({CanonPath});
794 }
795 
796 DelegatingCDB::DelegatingCDB(const GlobalCompilationDatabase *Base)
797     : Base(Base) {
798   if (Base)
799     BaseChanged = Base->watch([this](const std::vector<std::string> Changes) {
800       OnCommandChanged.broadcast(Changes);
801     });
802 }
803 
804 DelegatingCDB::DelegatingCDB(std::unique_ptr<GlobalCompilationDatabase> Base)
805     : DelegatingCDB(Base.get()) {
806   BaseOwner = std::move(Base);
807 }
808 
809 llvm::Optional<tooling::CompileCommand>
810 DelegatingCDB::getCompileCommand(PathRef File) const {
811   if (!Base)
812     return llvm::None;
813   return Base->getCompileCommand(File);
814 }
815 
816 llvm::Optional<ProjectInfo> DelegatingCDB::getProjectInfo(PathRef File) const {
817   if (!Base)
818     return llvm::None;
819   return Base->getProjectInfo(File);
820 }
821 
822 tooling::CompileCommand DelegatingCDB::getFallbackCommand(PathRef File) const {
823   if (!Base)
824     return GlobalCompilationDatabase::getFallbackCommand(File);
825   return Base->getFallbackCommand(File);
826 }
827 
828 bool DelegatingCDB::blockUntilIdle(Deadline D) const {
829   if (!Base)
830     return true;
831   return Base->blockUntilIdle(D);
832 }
833 
834 } // namespace clangd
835 } // namespace clang
836