1 //===-- Background.cpp - Build an index in a background thread ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "index/Background.h"
10 #include "Compiler.h"
11 #include "Config.h"
12 #include "Headers.h"
13 #include "SourceCode.h"
14 #include "URI.h"
15 #include "index/BackgroundIndexLoader.h"
16 #include "index/FileIndex.h"
17 #include "index/Index.h"
18 #include "index/IndexAction.h"
19 #include "index/MemIndex.h"
20 #include "index/Ref.h"
21 #include "index/Relation.h"
22 #include "index/Serialization.h"
23 #include "index/Symbol.h"
24 #include "index/SymbolCollector.h"
25 #include "support/Context.h"
26 #include "support/Logger.h"
27 #include "support/Path.h"
28 #include "support/Threading.h"
29 #include "support/ThreadsafeFS.h"
30 #include "support/Trace.h"
31 #include "clang/Basic/SourceLocation.h"
32 #include "clang/Basic/SourceManager.h"
33 #include "clang/Frontend/FrontendAction.h"
34 #include "llvm/ADT/ArrayRef.h"
35 #include "llvm/ADT/DenseSet.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/StringMap.h"
38 #include "llvm/ADT/StringRef.h"
39 #include "llvm/Support/Error.h"
40 #include "llvm/Support/Path.h"
41 #include "llvm/Support/Threading.h"
42 #include "llvm/Support/xxhash.h"
43 
44 #include <algorithm>
45 #include <atomic>
46 #include <chrono>
47 #include <condition_variable>
48 #include <cstddef>
49 #include <memory>
50 #include <mutex>
51 #include <numeric>
52 #include <queue>
53 #include <random>
54 #include <string>
55 #include <thread>
56 #include <utility>
57 #include <vector>
58 
59 namespace clang {
60 namespace clangd {
61 namespace {
62 
63 // We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or
64 // relative to Cmd.Directory, which might not be the same as current working
65 // directory.
getAbsolutePath(const tooling::CompileCommand & Cmd)66 llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) {
67   llvm::SmallString<128> AbsolutePath;
68   if (llvm::sys::path::is_absolute(Cmd.Filename)) {
69     AbsolutePath = Cmd.Filename;
70   } else {
71     AbsolutePath = Cmd.Directory;
72     llvm::sys::path::append(AbsolutePath, Cmd.Filename);
73     llvm::sys::path::remove_dots(AbsolutePath, true);
74   }
75   return AbsolutePath;
76 }
77 
shardIsStale(const LoadedShard & LS,llvm::vfs::FileSystem * FS)78 bool shardIsStale(const LoadedShard &LS, llvm::vfs::FileSystem *FS) {
79   auto Buf = FS->getBufferForFile(LS.AbsolutePath);
80   if (!Buf) {
81     vlog("Background-index: Couldn't read {0} to validate stored index: {1}",
82          LS.AbsolutePath, Buf.getError().message());
83     // There is no point in indexing an unreadable file.
84     return false;
85   }
86   return digest(Buf->get()->getBuffer()) != LS.Digest;
87 }
88 
89 } // namespace
90 
BackgroundIndex(const ThreadsafeFS & TFS,const GlobalCompilationDatabase & CDB,BackgroundIndexStorage::Factory IndexStorageFactory,Options Opts)91 BackgroundIndex::BackgroundIndex(
92     const ThreadsafeFS &TFS, const GlobalCompilationDatabase &CDB,
93     BackgroundIndexStorage::Factory IndexStorageFactory, Options Opts)
94     : SwapIndex(std::make_unique<MemIndex>()), TFS(TFS), CDB(CDB),
95       IndexingPriority(Opts.IndexingPriority),
96       ContextProvider(std::move(Opts.ContextProvider)),
97       IndexedSymbols(IndexContents::All),
98       Rebuilder(this, &IndexedSymbols, Opts.ThreadPoolSize),
99       IndexStorageFactory(std::move(IndexStorageFactory)),
100       Queue(std::move(Opts.OnProgress)),
101       CommandsChanged(
102           CDB.watch([&](const std::vector<std::string> &ChangedFiles) {
103             enqueue(ChangedFiles);
104           })) {
105   assert(Opts.ThreadPoolSize > 0 && "Thread pool size can't be zero.");
106   assert(this->IndexStorageFactory && "Storage factory can not be null!");
107   for (unsigned I = 0; I < Opts.ThreadPoolSize; ++I) {
108     ThreadPool.runAsync("background-worker-" + llvm::Twine(I + 1),
__anonb59968f20302() 109                         [this, Ctx(Context::current().clone())]() mutable {
110                           WithContext BGContext(std::move(Ctx));
111                           Queue.work([&] { Rebuilder.idle(); });
112                         });
113   }
114 }
115 
~BackgroundIndex()116 BackgroundIndex::~BackgroundIndex() {
117   stop();
118   ThreadPool.wait();
119 }
120 
changedFilesTask(const std::vector<std::string> & ChangedFiles)121 BackgroundQueue::Task BackgroundIndex::changedFilesTask(
122     const std::vector<std::string> &ChangedFiles) {
123   BackgroundQueue::Task T([this, ChangedFiles] {
124     trace::Span Tracer("BackgroundIndexEnqueue");
125 
126     llvm::Optional<WithContext> WithProvidedContext;
127     if (ContextProvider)
128       WithProvidedContext.emplace(ContextProvider(/*Path=*/""));
129 
130     // We're doing this asynchronously, because we'll read shards here too.
131     log("Enqueueing {0} commands for indexing", ChangedFiles.size());
132     SPAN_ATTACH(Tracer, "files", int64_t(ChangedFiles.size()));
133 
134     auto NeedsReIndexing = loadProject(std::move(ChangedFiles));
135     // Run indexing for files that need to be updated.
136     std::shuffle(NeedsReIndexing.begin(), NeedsReIndexing.end(),
137                  std::mt19937(std::random_device{}()));
138     std::vector<BackgroundQueue::Task> Tasks;
139     Tasks.reserve(NeedsReIndexing.size());
140     for (const auto &File : NeedsReIndexing)
141       Tasks.push_back(indexFileTask(std::move(File)));
142     Queue.append(std::move(Tasks));
143   });
144 
145   T.QueuePri = LoadShards;
146   T.ThreadPri = llvm::ThreadPriority::Default;
147   return T;
148 }
149 
filenameWithoutExtension(llvm::StringRef Path)150 static llvm::StringRef filenameWithoutExtension(llvm::StringRef Path) {
151   Path = llvm::sys::path::filename(Path);
152   return Path.drop_back(llvm::sys::path::extension(Path).size());
153 }
154 
indexFileTask(std::string Path)155 BackgroundQueue::Task BackgroundIndex::indexFileTask(std::string Path) {
156   std::string Tag = filenameWithoutExtension(Path).str();
157   uint64_t Key = llvm::xxHash64(Path);
158   BackgroundQueue::Task T([this, Path(std::move(Path))] {
159     llvm::Optional<WithContext> WithProvidedContext;
160     if (ContextProvider)
161       WithProvidedContext.emplace(ContextProvider(Path));
162     auto Cmd = CDB.getCompileCommand(Path);
163     if (!Cmd)
164       return;
165     if (auto Error = index(std::move(*Cmd)))
166       elog("Indexing {0} failed: {1}", Path, std::move(Error));
167   });
168   T.QueuePri = IndexFile;
169   T.ThreadPri = IndexingPriority;
170   T.Tag = std::move(Tag);
171   T.Key = Key;
172   return T;
173 }
174 
boostRelated(llvm::StringRef Path)175 void BackgroundIndex::boostRelated(llvm::StringRef Path) {
176   if (isHeaderFile(Path))
177     Queue.boost(filenameWithoutExtension(Path), IndexBoostedFile);
178 }
179 
180 /// Given index results from a TU, only update symbols coming from files that
181 /// are different or missing from than \p ShardVersionsSnapshot. Also stores new
182 /// index information on IndexStorage.
update(llvm::StringRef MainFile,IndexFileIn Index,const llvm::StringMap<ShardVersion> & ShardVersionsSnapshot,bool HadErrors)183 void BackgroundIndex::update(
184     llvm::StringRef MainFile, IndexFileIn Index,
185     const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot,
186     bool HadErrors) {
187   // Keys are URIs.
188   llvm::StringMap<std::pair<Path, FileDigest>> FilesToUpdate;
189   // Note that sources do not contain any information regarding missing headers,
190   // since we don't even know what absolute path they should fall in.
191   for (const auto &IndexIt : *Index.Sources) {
192     const auto &IGN = IndexIt.getValue();
193     auto AbsPath = URI::resolve(IGN.URI, MainFile);
194     if (!AbsPath) {
195       elog("Failed to resolve URI: {0}", AbsPath.takeError());
196       continue;
197     }
198     const auto DigestIt = ShardVersionsSnapshot.find(*AbsPath);
199     // File has different contents, or indexing was successful this time.
200     if (DigestIt == ShardVersionsSnapshot.end() ||
201         DigestIt->getValue().Digest != IGN.Digest ||
202         (DigestIt->getValue().HadErrors && !HadErrors))
203       FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest};
204   }
205 
206   // Shard slabs into files.
207   FileShardedIndex ShardedIndex(std::move(Index));
208 
209   // Build and store new slabs for each updated file.
210   for (const auto &FileIt : FilesToUpdate) {
211     auto Uri = FileIt.first();
212     auto IF = ShardedIndex.getShard(Uri);
213     assert(IF && "no shard for file in Index.Sources?");
214     PathRef Path = FileIt.getValue().first;
215 
216     // Only store command line hash for main files of the TU, since our
217     // current model keeps only one version of a header file.
218     if (Path != MainFile)
219       IF->Cmd.reset();
220 
221     // We need to store shards before updating the index, since the latter
222     // consumes slabs.
223     // FIXME: Also skip serializing the shard if it is already up-to-date.
224     if (auto Error = IndexStorageFactory(Path)->storeShard(Path, *IF))
225       elog("Failed to write background-index shard for file {0}: {1}", Path,
226            std::move(Error));
227 
228     {
229       std::lock_guard<std::mutex> Lock(ShardVersionsMu);
230       const auto &Hash = FileIt.getValue().second;
231       auto DigestIt = ShardVersions.try_emplace(Path);
232       ShardVersion &SV = DigestIt.first->second;
233       // Skip if file is already up to date, unless previous index was broken
234       // and this one is not.
235       if (!DigestIt.second && SV.Digest == Hash && SV.HadErrors && !HadErrors)
236         continue;
237       SV.Digest = Hash;
238       SV.HadErrors = HadErrors;
239 
240       // This can override a newer version that is added in another thread, if
241       // this thread sees the older version but finishes later. This should be
242       // rare in practice.
243       IndexedSymbols.update(
244           Uri, std::make_unique<SymbolSlab>(std::move(*IF->Symbols)),
245           std::make_unique<RefSlab>(std::move(*IF->Refs)),
246           std::make_unique<RelationSlab>(std::move(*IF->Relations)),
247           Path == MainFile);
248     }
249   }
250 }
251 
index(tooling::CompileCommand Cmd)252 llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
253   trace::Span Tracer("BackgroundIndex");
254   SPAN_ATTACH(Tracer, "file", Cmd.Filename);
255   auto AbsolutePath = getAbsolutePath(Cmd);
256 
257   auto FS = TFS.view(Cmd.Directory);
258   auto Buf = FS->getBufferForFile(AbsolutePath);
259   if (!Buf)
260     return llvm::errorCodeToError(Buf.getError());
261   auto Hash = digest(Buf->get()->getBuffer());
262 
263   // Take a snapshot of the versions to avoid locking for each file in the TU.
264   llvm::StringMap<ShardVersion> ShardVersionsSnapshot;
265   {
266     std::lock_guard<std::mutex> Lock(ShardVersionsMu);
267     ShardVersionsSnapshot = ShardVersions;
268   }
269 
270   vlog("Indexing {0} (digest:={1})", Cmd.Filename, llvm::toHex(Hash));
271   ParseInputs Inputs;
272   Inputs.TFS = &TFS;
273   Inputs.CompileCommand = std::move(Cmd);
274   IgnoreDiagnostics IgnoreDiags;
275   auto CI = buildCompilerInvocation(Inputs, IgnoreDiags);
276   if (!CI)
277     return error("Couldn't build compiler invocation");
278 
279   auto Clang =
280       prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr,
281                               std::move(*Buf), std::move(FS), IgnoreDiags);
282   if (!Clang)
283     return error("Couldn't build compiler instance");
284 
285   SymbolCollector::Options IndexOpts;
286   // Creates a filter to not collect index results from files with unchanged
287   // digests.
288   IndexOpts.FileFilter = [&ShardVersionsSnapshot](const SourceManager &SM,
289                                                   FileID FID) {
290     const auto *F = SM.getFileEntryForID(FID);
291     if (!F)
292       return false; // Skip invalid files.
293     auto AbsPath = getCanonicalPath(F, SM);
294     if (!AbsPath)
295       return false; // Skip files without absolute path.
296     auto Digest = digestFile(SM, FID);
297     if (!Digest)
298       return false;
299     auto D = ShardVersionsSnapshot.find(*AbsPath);
300     if (D != ShardVersionsSnapshot.end() && D->second.Digest == Digest &&
301         !D->second.HadErrors)
302       return false; // Skip files that haven't changed, without errors.
303     return true;
304   };
305   IndexOpts.CollectMainFileRefs = true;
306 
307   IndexFileIn Index;
308   auto Action = createStaticIndexingAction(
309       IndexOpts, [&](SymbolSlab S) { Index.Symbols = std::move(S); },
310       [&](RefSlab R) { Index.Refs = std::move(R); },
311       [&](RelationSlab R) { Index.Relations = std::move(R); },
312       [&](IncludeGraph IG) { Index.Sources = std::move(IG); });
313 
314   // We're going to run clang here, and it could potentially crash.
315   // We could use CrashRecoveryContext to try to make indexing crashes nonfatal,
316   // but the leaky "recovery" is pretty scary too in a long-running process.
317   // If crashes are a real problem, maybe we should fork a child process.
318 
319   const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front();
320   if (!Action->BeginSourceFile(*Clang, Input))
321     return error("BeginSourceFile() failed");
322   if (llvm::Error Err = Action->Execute())
323     return Err;
324 
325   Action->EndSourceFile();
326 
327   Index.Cmd = Inputs.CompileCommand;
328   assert(Index.Symbols && Index.Refs && Index.Sources &&
329          "Symbols, Refs and Sources must be set.");
330 
331   log("Indexed {0} ({1} symbols, {2} refs, {3} files)",
332       Inputs.CompileCommand.Filename, Index.Symbols->size(),
333       Index.Refs->numRefs(), Index.Sources->size());
334   SPAN_ATTACH(Tracer, "symbols", int(Index.Symbols->size()));
335   SPAN_ATTACH(Tracer, "refs", int(Index.Refs->numRefs()));
336   SPAN_ATTACH(Tracer, "sources", int(Index.Sources->size()));
337 
338   bool HadErrors = Clang->hasDiagnostics() &&
339                    Clang->getDiagnostics().hasUncompilableErrorOccurred();
340   if (HadErrors) {
341     log("Failed to compile {0}, index may be incomplete", AbsolutePath);
342     for (auto &It : *Index.Sources)
343       It.second.Flags |= IncludeGraphNode::SourceFlag::HadErrors;
344   }
345   update(AbsolutePath, std::move(Index), ShardVersionsSnapshot, HadErrors);
346 
347   Rebuilder.indexedTU();
348   return llvm::Error::success();
349 }
350 
351 // Restores shards for \p MainFiles from index storage. Then checks staleness of
352 // those shards and returns a list of TUs that needs to be indexed to update
353 // staleness.
354 std::vector<std::string>
loadProject(std::vector<std::string> MainFiles)355 BackgroundIndex::loadProject(std::vector<std::string> MainFiles) {
356   // Drop files where background indexing is disabled in config.
357   if (ContextProvider)
358     llvm::erase_if(MainFiles, [&](const std::string &TU) {
359       // Load the config for each TU, as indexing may be selectively enabled.
360       WithContext WithProvidedContext(ContextProvider(TU));
361       return Config::current().Index.Background ==
362              Config::BackgroundPolicy::Skip;
363     });
364   Rebuilder.startLoading();
365   // Load shards for all of the mainfiles.
366   const std::vector<LoadedShard> Result =
367       loadIndexShards(MainFiles, IndexStorageFactory, CDB);
368   size_t LoadedShards = 0;
369   {
370     // Update in-memory state.
371     std::lock_guard<std::mutex> Lock(ShardVersionsMu);
372     for (auto &LS : Result) {
373       if (!LS.Shard)
374         continue;
375       auto SS =
376           LS.Shard->Symbols
377               ? std::make_unique<SymbolSlab>(std::move(*LS.Shard->Symbols))
378               : nullptr;
379       auto RS = LS.Shard->Refs
380                     ? std::make_unique<RefSlab>(std::move(*LS.Shard->Refs))
381                     : nullptr;
382       auto RelS =
383           LS.Shard->Relations
384               ? std::make_unique<RelationSlab>(std::move(*LS.Shard->Relations))
385               : nullptr;
386       ShardVersion &SV = ShardVersions[LS.AbsolutePath];
387       SV.Digest = LS.Digest;
388       SV.HadErrors = LS.HadErrors;
389       ++LoadedShards;
390 
391       IndexedSymbols.update(URI::create(LS.AbsolutePath).toString(),
392                             std::move(SS), std::move(RS), std::move(RelS),
393                             LS.CountReferences);
394     }
395   }
396   Rebuilder.loadedShard(LoadedShards);
397   Rebuilder.doneLoading();
398 
399   auto FS = TFS.view(/*CWD=*/llvm::None);
400   llvm::DenseSet<PathRef> TUsToIndex;
401   // We'll accept data from stale shards, but ensure the files get reindexed
402   // soon.
403   for (auto &LS : Result) {
404     if (!shardIsStale(LS, FS.get()))
405       continue;
406     PathRef TUForFile = LS.DependentTU;
407     assert(!TUForFile.empty() && "File without a TU!");
408 
409     // FIXME: Currently, we simply schedule indexing on a TU whenever any of
410     // its dependencies needs re-indexing. We might do it smarter by figuring
411     // out a minimal set of TUs that will cover all the stale dependencies.
412     // FIXME: Try looking at other TUs if no compile commands are available
413     // for this TU, i.e TU was deleted after we performed indexing.
414     TUsToIndex.insert(TUForFile);
415   }
416 
417   return {TUsToIndex.begin(), TUsToIndex.end()};
418 }
419 
profile(MemoryTree & MT) const420 void BackgroundIndex::profile(MemoryTree &MT) const {
421   IndexedSymbols.profile(MT.child("slabs"));
422   // We don't want to mix memory used by index and symbols, so call base class.
423   MT.child("index").addUsage(SwapIndex::estimateMemoryUsage());
424 }
425 } // namespace clangd
426 } // namespace clang
427