1 //===--- CodeComplete.cpp ----------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Code completion has several moving parts:
10 //  - AST-based completions are provided using the completion hooks in Sema.
11 //  - external completions are retrieved from the index (using hints from Sema)
12 //  - the two sources overlap, and must be merged and overloads bundled
13 //  - results must be scored and ranked (see Quality.h) before rendering
14 //
15 // Signature help works in a similar way as code completion, but it is simpler:
16 // it's purely AST-based, and there are few candidates.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "CodeComplete.h"
21 #include "AST.h"
22 #include "CodeCompletionStrings.h"
23 #include "Compiler.h"
24 #include "Diagnostics.h"
25 #include "ExpectedTypes.h"
26 #include "FileDistance.h"
27 #include "FuzzyMatch.h"
28 #include "Headers.h"
29 #include "Hover.h"
30 #include "Preamble.h"
31 #include "Protocol.h"
32 #include "Quality.h"
33 #include "SourceCode.h"
34 #include "TUScheduler.h"
35 #include "URI.h"
36 #include "index/Index.h"
37 #include "index/Symbol.h"
38 #include "index/SymbolOrigin.h"
39 #include "support/Logger.h"
40 #include "support/Threading.h"
41 #include "support/ThreadsafeFS.h"
42 #include "support/Trace.h"
43 #include "clang/AST/Decl.h"
44 #include "clang/AST/DeclBase.h"
45 #include "clang/Basic/CharInfo.h"
46 #include "clang/Basic/LangOptions.h"
47 #include "clang/Basic/SourceLocation.h"
48 #include "clang/Basic/TokenKinds.h"
49 #include "clang/Format/Format.h"
50 #include "clang/Frontend/CompilerInstance.h"
51 #include "clang/Frontend/FrontendActions.h"
52 #include "clang/Lex/ExternalPreprocessorSource.h"
53 #include "clang/Lex/Lexer.h"
54 #include "clang/Lex/Preprocessor.h"
55 #include "clang/Lex/PreprocessorOptions.h"
56 #include "clang/Sema/CodeCompleteConsumer.h"
57 #include "clang/Sema/DeclSpec.h"
58 #include "clang/Sema/Sema.h"
59 #include "llvm/ADT/ArrayRef.h"
60 #include "llvm/ADT/None.h"
61 #include "llvm/ADT/Optional.h"
62 #include "llvm/ADT/SmallVector.h"
63 #include "llvm/ADT/StringExtras.h"
64 #include "llvm/ADT/StringRef.h"
65 #include "llvm/Support/Compiler.h"
66 #include "llvm/Support/Debug.h"
67 #include "llvm/Support/Error.h"
68 #include "llvm/Support/Format.h"
69 #include "llvm/Support/FormatVariadic.h"
70 #include "llvm/Support/ScopedPrinter.h"
71 #include <algorithm>
72 #include <iterator>
73 #include <limits>
74 
75 // We log detailed candidate here if you run with -debug-only=codecomplete.
76 #define DEBUG_TYPE "CodeComplete"
77 
78 namespace clang {
79 namespace clangd {
80 namespace {
81 
82 CompletionItemKind toCompletionItemKind(index::SymbolKind Kind) {
83   using SK = index::SymbolKind;
84   switch (Kind) {
85   case SK::Unknown:
86     return CompletionItemKind::Missing;
87   case SK::Module:
88   case SK::Namespace:
89   case SK::NamespaceAlias:
90     return CompletionItemKind::Module;
91   case SK::Macro:
92     return CompletionItemKind::Text;
93   case SK::Enum:
94     return CompletionItemKind::Enum;
95   case SK::Struct:
96     return CompletionItemKind::Struct;
97   case SK::Class:
98   case SK::Protocol:
99   case SK::Extension:
100   case SK::Union:
101     return CompletionItemKind::Class;
102   case SK::TypeAlias:
103     // We use the same kind as the VSCode C++ extension.
104     // FIXME: pick a better option when we have one.
105     return CompletionItemKind::Interface;
106   case SK::Using:
107     return CompletionItemKind::Reference;
108   case SK::Function:
109   case SK::ConversionFunction:
110     return CompletionItemKind::Function;
111   case SK::Variable:
112   case SK::Parameter:
113   case SK::NonTypeTemplateParm:
114     return CompletionItemKind::Variable;
115   case SK::Field:
116     return CompletionItemKind::Field;
117   case SK::EnumConstant:
118     return CompletionItemKind::EnumMember;
119   case SK::InstanceMethod:
120   case SK::ClassMethod:
121   case SK::StaticMethod:
122   case SK::Destructor:
123     return CompletionItemKind::Method;
124   case SK::InstanceProperty:
125   case SK::ClassProperty:
126   case SK::StaticProperty:
127     return CompletionItemKind::Property;
128   case SK::Constructor:
129     return CompletionItemKind::Constructor;
130   case SK::TemplateTypeParm:
131   case SK::TemplateTemplateParm:
132     return CompletionItemKind::TypeParameter;
133   }
134   llvm_unreachable("Unhandled clang::index::SymbolKind.");
135 }
136 
137 CompletionItemKind
138 toCompletionItemKind(CodeCompletionResult::ResultKind ResKind,
139                      const NamedDecl *Decl,
140                      CodeCompletionContext::Kind CtxKind) {
141   if (Decl)
142     return toCompletionItemKind(index::getSymbolInfo(Decl).Kind);
143   if (CtxKind == CodeCompletionContext::CCC_IncludedFile)
144     return CompletionItemKind::File;
145   switch (ResKind) {
146   case CodeCompletionResult::RK_Declaration:
147     llvm_unreachable("RK_Declaration without Decl");
148   case CodeCompletionResult::RK_Keyword:
149     return CompletionItemKind::Keyword;
150   case CodeCompletionResult::RK_Macro:
151     return CompletionItemKind::Text; // unfortunately, there's no 'Macro'
152                                      // completion items in LSP.
153   case CodeCompletionResult::RK_Pattern:
154     return CompletionItemKind::Snippet;
155   }
156   llvm_unreachable("Unhandled CodeCompletionResult::ResultKind.");
157 }
158 
159 // Identifier code completion result.
160 struct RawIdentifier {
161   llvm::StringRef Name;
162   unsigned References; // # of usages in file.
163 };
164 
165 /// A code completion result, in clang-native form.
166 /// It may be promoted to a CompletionItem if it's among the top-ranked results.
167 struct CompletionCandidate {
168   llvm::StringRef Name; // Used for filtering and sorting.
169   // We may have a result from Sema, from the index, or both.
170   const CodeCompletionResult *SemaResult = nullptr;
171   const Symbol *IndexResult = nullptr;
172   const RawIdentifier *IdentifierResult = nullptr;
173   llvm::SmallVector<llvm::StringRef, 1> RankedIncludeHeaders;
174 
175   // Returns a token identifying the overload set this is part of.
176   // 0 indicates it's not part of any overload set.
177   size_t overloadSet(const CodeCompleteOptions &Opts, llvm::StringRef FileName,
178                      IncludeInserter *Inserter) const {
179     if (!Opts.BundleOverloads.getValueOr(false))
180       return 0;
181 
182     // Depending on the index implementation, we can see different header
183     // strings (literal or URI) mapping to the same file. We still want to
184     // bundle those, so we must resolve the header to be included here.
185     std::string HeaderForHash;
186     if (Inserter) {
187       if (auto Header = headerToInsertIfAllowed(Opts)) {
188         if (auto HeaderFile = toHeaderFile(*Header, FileName)) {
189           if (auto Spelled =
190                   Inserter->calculateIncludePath(*HeaderFile, FileName))
191             HeaderForHash = *Spelled;
192         } else {
193           vlog("Code completion header path manipulation failed {0}",
194                HeaderFile.takeError());
195         }
196       }
197     }
198 
199     llvm::SmallString<256> Scratch;
200     if (IndexResult) {
201       switch (IndexResult->SymInfo.Kind) {
202       case index::SymbolKind::ClassMethod:
203       case index::SymbolKind::InstanceMethod:
204       case index::SymbolKind::StaticMethod:
205 #ifndef NDEBUG
206         llvm_unreachable("Don't expect members from index in code completion");
207 #else
208         LLVM_FALLTHROUGH;
209 #endif
210       case index::SymbolKind::Function:
211         // We can't group overloads together that need different #includes.
212         // This could break #include insertion.
213         return llvm::hash_combine(
214             (IndexResult->Scope + IndexResult->Name).toStringRef(Scratch),
215             HeaderForHash);
216       default:
217         return 0;
218       }
219     }
220     if (SemaResult) {
221       // We need to make sure we're consistent with the IndexResult case!
222       const NamedDecl *D = SemaResult->Declaration;
223       if (!D || !D->isFunctionOrFunctionTemplate())
224         return 0;
225       {
226         llvm::raw_svector_ostream OS(Scratch);
227         D->printQualifiedName(OS);
228       }
229       return llvm::hash_combine(Scratch, HeaderForHash);
230     }
231     assert(IdentifierResult);
232     return 0;
233   }
234 
235   // The best header to include if include insertion is allowed.
236   llvm::Optional<llvm::StringRef>
237   headerToInsertIfAllowed(const CodeCompleteOptions &Opts) const {
238     if (Opts.InsertIncludes == CodeCompleteOptions::NeverInsert ||
239         RankedIncludeHeaders.empty())
240       return None;
241     if (SemaResult && SemaResult->Declaration) {
242       // Avoid inserting new #include if the declaration is found in the current
243       // file e.g. the symbol is forward declared.
244       auto &SM = SemaResult->Declaration->getASTContext().getSourceManager();
245       for (const Decl *RD : SemaResult->Declaration->redecls())
246         if (SM.isInMainFile(SM.getExpansionLoc(RD->getBeginLoc())))
247           return None;
248     }
249     return RankedIncludeHeaders[0];
250   }
251 
252   using Bundle = llvm::SmallVector<CompletionCandidate, 4>;
253 };
254 using ScoredBundle =
255     std::pair<CompletionCandidate::Bundle, CodeCompletion::Scores>;
256 struct ScoredBundleGreater {
257   bool operator()(const ScoredBundle &L, const ScoredBundle &R) {
258     if (L.second.Total != R.second.Total)
259       return L.second.Total > R.second.Total;
260     return L.first.front().Name <
261            R.first.front().Name; // Earlier name is better.
262   }
263 };
264 
265 // Assembles a code completion out of a bundle of >=1 completion candidates.
266 // Many of the expensive strings are only computed at this point, once we know
267 // the candidate bundle is going to be returned.
268 //
269 // Many fields are the same for all candidates in a bundle (e.g. name), and are
270 // computed from the first candidate, in the constructor.
271 // Others vary per candidate, so add() must be called for remaining candidates.
272 struct CodeCompletionBuilder {
273   CodeCompletionBuilder(ASTContext *ASTCtx, const CompletionCandidate &C,
274                         CodeCompletionString *SemaCCS,
275                         llvm::ArrayRef<std::string> QueryScopes,
276                         const IncludeInserter &Includes,
277                         llvm::StringRef FileName,
278                         CodeCompletionContext::Kind ContextKind,
279                         const CodeCompleteOptions &Opts,
280                         bool IsUsingDeclaration, tok::TokenKind NextTokenKind)
281       : ASTCtx(ASTCtx),
282         EnableFunctionArgSnippets(Opts.EnableFunctionArgSnippets),
283         IsUsingDeclaration(IsUsingDeclaration), NextTokenKind(NextTokenKind) {
284     add(C, SemaCCS);
285     if (C.SemaResult) {
286       assert(ASTCtx);
287       Completion.Origin |= SymbolOrigin::AST;
288       Completion.Name = std::string(llvm::StringRef(SemaCCS->getTypedText()));
289       if (Completion.Scope.empty()) {
290         if ((C.SemaResult->Kind == CodeCompletionResult::RK_Declaration) ||
291             (C.SemaResult->Kind == CodeCompletionResult::RK_Pattern))
292           if (const auto *D = C.SemaResult->getDeclaration())
293             if (const auto *ND = dyn_cast<NamedDecl>(D))
294               Completion.Scope = std::string(
295                   splitQualifiedName(printQualifiedName(*ND)).first);
296       }
297       Completion.Kind = toCompletionItemKind(
298           C.SemaResult->Kind, C.SemaResult->Declaration, ContextKind);
299       // Sema could provide more info on whether the completion was a file or
300       // folder.
301       if (Completion.Kind == CompletionItemKind::File &&
302           Completion.Name.back() == '/')
303         Completion.Kind = CompletionItemKind::Folder;
304       for (const auto &FixIt : C.SemaResult->FixIts) {
305         Completion.FixIts.push_back(toTextEdit(
306             FixIt, ASTCtx->getSourceManager(), ASTCtx->getLangOpts()));
307       }
308       llvm::sort(Completion.FixIts, [](const TextEdit &X, const TextEdit &Y) {
309         return std::tie(X.range.start.line, X.range.start.character) <
310                std::tie(Y.range.start.line, Y.range.start.character);
311       });
312       Completion.Deprecated |=
313           (C.SemaResult->Availability == CXAvailability_Deprecated);
314     }
315     if (C.IndexResult) {
316       Completion.Origin |= C.IndexResult->Origin;
317       if (Completion.Scope.empty())
318         Completion.Scope = std::string(C.IndexResult->Scope);
319       if (Completion.Kind == CompletionItemKind::Missing)
320         Completion.Kind = toCompletionItemKind(C.IndexResult->SymInfo.Kind);
321       if (Completion.Name.empty())
322         Completion.Name = std::string(C.IndexResult->Name);
323       // If the completion was visible to Sema, no qualifier is needed. This
324       // avoids unneeded qualifiers in cases like with `using ns::X`.
325       if (Completion.RequiredQualifier.empty() && !C.SemaResult) {
326         llvm::StringRef ShortestQualifier = C.IndexResult->Scope;
327         for (llvm::StringRef Scope : QueryScopes) {
328           llvm::StringRef Qualifier = C.IndexResult->Scope;
329           if (Qualifier.consume_front(Scope) &&
330               Qualifier.size() < ShortestQualifier.size())
331             ShortestQualifier = Qualifier;
332         }
333         Completion.RequiredQualifier = std::string(ShortestQualifier);
334       }
335       Completion.Deprecated |= (C.IndexResult->Flags & Symbol::Deprecated);
336     }
337     if (C.IdentifierResult) {
338       Completion.Origin |= SymbolOrigin::Identifier;
339       Completion.Kind = CompletionItemKind::Text;
340       Completion.Name = std::string(C.IdentifierResult->Name);
341     }
342 
343     // Turn absolute path into a literal string that can be #included.
344     auto Inserted = [&](llvm::StringRef Header)
345         -> llvm::Expected<std::pair<std::string, bool>> {
346       auto ResolvedDeclaring =
347           URI::resolve(C.IndexResult->CanonicalDeclaration.FileURI, FileName);
348       if (!ResolvedDeclaring)
349         return ResolvedDeclaring.takeError();
350       auto ResolvedInserted = toHeaderFile(Header, FileName);
351       if (!ResolvedInserted)
352         return ResolvedInserted.takeError();
353       auto Spelled = Includes.calculateIncludePath(*ResolvedInserted, FileName);
354       if (!Spelled)
355         return error("Header not on include path");
356       return std::make_pair(
357           std::move(*Spelled),
358           Includes.shouldInsertInclude(*ResolvedDeclaring, *ResolvedInserted));
359     };
360     bool ShouldInsert = C.headerToInsertIfAllowed(Opts).hasValue();
361     // Calculate include paths and edits for all possible headers.
362     for (const auto &Inc : C.RankedIncludeHeaders) {
363       if (auto ToInclude = Inserted(Inc)) {
364         CodeCompletion::IncludeCandidate Include;
365         Include.Header = ToInclude->first;
366         if (ToInclude->second && ShouldInsert)
367           Include.Insertion = Includes.insert(ToInclude->first);
368         Completion.Includes.push_back(std::move(Include));
369       } else
370         log("Failed to generate include insertion edits for adding header "
371             "(FileURI='{0}', IncludeHeader='{1}') into {2}: {3}",
372             C.IndexResult->CanonicalDeclaration.FileURI, Inc, FileName,
373             ToInclude.takeError());
374     }
375     // Prefer includes that do not need edits (i.e. already exist).
376     std::stable_partition(Completion.Includes.begin(),
377                           Completion.Includes.end(),
378                           [](const CodeCompletion::IncludeCandidate &I) {
379                             return !I.Insertion.hasValue();
380                           });
381   }
382 
383   void add(const CompletionCandidate &C, CodeCompletionString *SemaCCS) {
384     assert(bool(C.SemaResult) == bool(SemaCCS));
385     Bundled.emplace_back();
386     BundledEntry &S = Bundled.back();
387     if (C.SemaResult) {
388       bool IsPattern = C.SemaResult->Kind == CodeCompletionResult::RK_Pattern;
389       getSignature(*SemaCCS, &S.Signature, &S.SnippetSuffix,
390                    &Completion.RequiredQualifier, IsPattern);
391       S.ReturnType = getReturnType(*SemaCCS);
392     } else if (C.IndexResult) {
393       S.Signature = std::string(C.IndexResult->Signature);
394       S.SnippetSuffix = std::string(C.IndexResult->CompletionSnippetSuffix);
395       S.ReturnType = std::string(C.IndexResult->ReturnType);
396     }
397     if (!Completion.Documentation) {
398       auto SetDoc = [&](llvm::StringRef Doc) {
399         if (!Doc.empty()) {
400           Completion.Documentation.emplace();
401           parseDocumentation(Doc, *Completion.Documentation);
402         }
403       };
404       if (C.IndexResult) {
405         SetDoc(C.IndexResult->Documentation);
406       } else if (C.SemaResult) {
407         SetDoc(getDocComment(*ASTCtx, *C.SemaResult,
408                              /*CommentsFromHeader=*/false));
409       }
410     }
411   }
412 
413   CodeCompletion build() {
414     Completion.ReturnType = summarizeReturnType();
415     Completion.Signature = summarizeSignature();
416     Completion.SnippetSuffix = summarizeSnippet();
417     Completion.BundleSize = Bundled.size();
418     return std::move(Completion);
419   }
420 
421 private:
422   struct BundledEntry {
423     std::string SnippetSuffix;
424     std::string Signature;
425     std::string ReturnType;
426   };
427 
428   // If all BundledEntries have the same value for a property, return it.
429   template <std::string BundledEntry::*Member>
430   const std::string *onlyValue() const {
431     auto B = Bundled.begin(), E = Bundled.end();
432     for (auto I = B + 1; I != E; ++I)
433       if (I->*Member != B->*Member)
434         return nullptr;
435     return &(B->*Member);
436   }
437 
438   template <bool BundledEntry::*Member> const bool *onlyValue() const {
439     auto B = Bundled.begin(), E = Bundled.end();
440     for (auto I = B + 1; I != E; ++I)
441       if (I->*Member != B->*Member)
442         return nullptr;
443     return &(B->*Member);
444   }
445 
446   std::string summarizeReturnType() const {
447     if (auto *RT = onlyValue<&BundledEntry::ReturnType>())
448       return *RT;
449     return "";
450   }
451 
452   std::string summarizeSnippet() const {
453     if (IsUsingDeclaration)
454       return "";
455     auto *Snippet = onlyValue<&BundledEntry::SnippetSuffix>();
456     if (!Snippet)
457       // All bundles are function calls.
458       // FIXME(ibiryukov): sometimes add template arguments to a snippet, e.g.
459       // we need to complete 'forward<$1>($0)'.
460       return "($0)";
461     // Suppress function argument snippets cursor is followed by left
462     // parenthesis (and potentially arguments) or if there are potentially
463     // template arguments. There are cases where it would be wrong (e.g. next
464     // '<' token is a comparison rather than template argument list start) but
465     // it is less common and suppressing snippet provides better UX.
466     if (Completion.Kind == CompletionItemKind::Function ||
467         Completion.Kind == CompletionItemKind::Method ||
468         Completion.Kind == CompletionItemKind::Constructor) {
469       // If there is a potential template argument list, drop snippet and just
470       // complete symbol name. Ideally, this could generate an edit that would
471       // paste function arguments after template argument list but it would be
472       // complicated. Example:
473       //
474       // fu^<int> -> function<int>
475       if (NextTokenKind == tok::less && Snippet->front() == '<')
476         return "";
477       // Potentially followed by argument list.
478       if (NextTokenKind == tok::l_paren) {
479         // If snippet contains template arguments we will emit them and drop
480         // function arguments. Example:
481         //
482         // fu^(42) -> function<int>(42);
483         if (Snippet->front() == '<') {
484           // Find matching '>'. Snippet->find('>') will not work in cases like
485           // template <typename T=std::vector<int>>. Hence, iterate through
486           // the snippet until the angle bracket balance reaches zero.
487           int Balance = 0;
488           size_t I = 0;
489           do {
490             if (Snippet->at(I) == '>')
491               --Balance;
492             else if (Snippet->at(I) == '<')
493               ++Balance;
494             ++I;
495           } while (Balance > 0);
496           return Snippet->substr(0, I);
497         }
498         return "";
499       }
500     }
501     if (EnableFunctionArgSnippets)
502       return *Snippet;
503 
504     // Replace argument snippets with a simplified pattern.
505     if (Snippet->empty())
506       return "";
507     if (Completion.Kind == CompletionItemKind::Function ||
508         Completion.Kind == CompletionItemKind::Method) {
509       // Functions snippets can be of 2 types:
510       // - containing only function arguments, e.g.
511       //   foo(${1:int p1}, ${2:int p2});
512       //   We transform this pattern to '($0)' or '()'.
513       // - template arguments and function arguments, e.g.
514       //   foo<${1:class}>(${2:int p1}).
515       //   We transform this pattern to '<$1>()$0' or '<$0>()'.
516 
517       bool EmptyArgs = llvm::StringRef(*Snippet).endswith("()");
518       if (Snippet->front() == '<')
519         return EmptyArgs ? "<$1>()$0" : "<$1>($0)";
520       if (Snippet->front() == '(')
521         return EmptyArgs ? "()" : "($0)";
522       return *Snippet; // Not an arg snippet?
523     }
524     // 'CompletionItemKind::Interface' matches template type aliases.
525     if (Completion.Kind == CompletionItemKind::Interface ||
526         Completion.Kind == CompletionItemKind::Class) {
527       if (Snippet->front() != '<')
528         return *Snippet; // Not an arg snippet?
529 
530       // Classes and template using aliases can only have template arguments,
531       // e.g. Foo<${1:class}>.
532       if (llvm::StringRef(*Snippet).endswith("<>"))
533         return "<>"; // can happen with defaulted template arguments.
534       return "<$0>";
535     }
536     return *Snippet;
537   }
538 
539   std::string summarizeSignature() const {
540     if (auto *Signature = onlyValue<&BundledEntry::Signature>())
541       return *Signature;
542     // All bundles are function calls.
543     return "(…)";
544   }
545 
546   // ASTCtx can be nullptr if not run with sema.
547   ASTContext *ASTCtx;
548   CodeCompletion Completion;
549   llvm::SmallVector<BundledEntry, 1> Bundled;
550   bool EnableFunctionArgSnippets;
551   // No snippets will be generated for using declarations and when the function
552   // arguments are already present.
553   bool IsUsingDeclaration;
554   tok::TokenKind NextTokenKind;
555 };
556 
557 // Determine the symbol ID for a Sema code completion result, if possible.
558 SymbolID getSymbolID(const CodeCompletionResult &R, const SourceManager &SM) {
559   switch (R.Kind) {
560   case CodeCompletionResult::RK_Declaration:
561   case CodeCompletionResult::RK_Pattern: {
562     // Computing USR caches linkage, which may change after code completion.
563     if (hasUnstableLinkage(R.Declaration))
564       return {};
565     return clang::clangd::getSymbolID(R.Declaration);
566   }
567   case CodeCompletionResult::RK_Macro:
568     return clang::clangd::getSymbolID(R.Macro->getName(), R.MacroDefInfo, SM);
569   case CodeCompletionResult::RK_Keyword:
570     return {};
571   }
572   llvm_unreachable("unknown CodeCompletionResult kind");
573 }
574 
575 // Scopes of the partial identifier we're trying to complete.
576 // It is used when we query the index for more completion results.
577 struct SpecifiedScope {
578   // The scopes we should look in, determined by Sema.
579   //
580   // If the qualifier was fully resolved, we look for completions in these
581   // scopes; if there is an unresolved part of the qualifier, it should be
582   // resolved within these scopes.
583   //
584   // Examples of qualified completion:
585   //
586   //   "::vec"                                      => {""}
587   //   "using namespace std; ::vec^"                => {"", "std::"}
588   //   "namespace ns {using namespace std;} ns::^"  => {"ns::", "std::"}
589   //   "std::vec^"                                  => {""}  // "std" unresolved
590   //
591   // Examples of unqualified completion:
592   //
593   //   "vec^"                                       => {""}
594   //   "using namespace std; vec^"                  => {"", "std::"}
595   //   "using namespace std; namespace ns { vec^ }" => {"ns::", "std::", ""}
596   //
597   // "" for global namespace, "ns::" for normal namespace.
598   std::vector<std::string> AccessibleScopes;
599   // The full scope qualifier as typed by the user (without the leading "::").
600   // Set if the qualifier is not fully resolved by Sema.
601   llvm::Optional<std::string> UnresolvedQualifier;
602 
603   // Construct scopes being queried in indexes. The results are deduplicated.
604   // This method format the scopes to match the index request representation.
605   std::vector<std::string> scopesForIndexQuery() {
606     std::set<std::string> Results;
607     for (llvm::StringRef AS : AccessibleScopes)
608       Results.insert(
609           (AS + (UnresolvedQualifier ? *UnresolvedQualifier : "")).str());
610     return {Results.begin(), Results.end()};
611   }
612 };
613 
614 // Get all scopes that will be queried in indexes and whether symbols from
615 // any scope is allowed. The first scope in the list is the preferred scope
616 // (e.g. enclosing namespace).
617 std::pair<std::vector<std::string>, bool>
618 getQueryScopes(CodeCompletionContext &CCContext, const Sema &CCSema,
619                const CompletionPrefix &HeuristicPrefix,
620                const CodeCompleteOptions &Opts) {
621   SpecifiedScope Scopes;
622   for (auto *Context : CCContext.getVisitedContexts()) {
623     if (isa<TranslationUnitDecl>(Context))
624       Scopes.AccessibleScopes.push_back(""); // global namespace
625     else if (isa<NamespaceDecl>(Context))
626       Scopes.AccessibleScopes.push_back(printNamespaceScope(*Context));
627   }
628 
629   const CXXScopeSpec *SemaSpecifier =
630       CCContext.getCXXScopeSpecifier().getValueOr(nullptr);
631   // Case 1: unqualified completion.
632   if (!SemaSpecifier) {
633     // Case 2 (exception): sema saw no qualifier, but there appears to be one!
634     // This can happen e.g. in incomplete macro expansions. Use heuristics.
635     if (!HeuristicPrefix.Qualifier.empty()) {
636       vlog("Sema said no scope specifier, but we saw {0} in the source code",
637            HeuristicPrefix.Qualifier);
638       StringRef SpelledSpecifier = HeuristicPrefix.Qualifier;
639       if (SpelledSpecifier.consume_front("::"))
640         Scopes.AccessibleScopes = {""};
641       Scopes.UnresolvedQualifier = std::string(SpelledSpecifier);
642       return {Scopes.scopesForIndexQuery(), false};
643     }
644     // The enclosing namespace must be first, it gets a quality boost.
645     std::vector<std::string> EnclosingAtFront;
646     std::string EnclosingScope = printNamespaceScope(*CCSema.CurContext);
647     EnclosingAtFront.push_back(EnclosingScope);
648     for (auto &S : Scopes.scopesForIndexQuery()) {
649       if (EnclosingScope != S)
650         EnclosingAtFront.push_back(std::move(S));
651     }
652     // Allow AllScopes completion as there is no explicit scope qualifier.
653     return {EnclosingAtFront, Opts.AllScopes};
654   }
655   // Case 3: sema saw and resolved a scope qualifier.
656   if (SemaSpecifier && SemaSpecifier->isValid())
657     return {Scopes.scopesForIndexQuery(), false};
658 
659   // Case 4: There was a qualifier, and Sema didn't resolve it.
660   Scopes.AccessibleScopes.push_back(""); // Make sure global scope is included.
661   llvm::StringRef SpelledSpecifier = Lexer::getSourceText(
662       CharSourceRange::getCharRange(SemaSpecifier->getRange()),
663       CCSema.SourceMgr, clang::LangOptions());
664   if (SpelledSpecifier.consume_front("::"))
665     Scopes.AccessibleScopes = {""};
666   Scopes.UnresolvedQualifier = std::string(SpelledSpecifier);
667   // Sema excludes the trailing "::".
668   if (!Scopes.UnresolvedQualifier->empty())
669     *Scopes.UnresolvedQualifier += "::";
670 
671   return {Scopes.scopesForIndexQuery(), false};
672 }
673 
674 // Should we perform index-based completion in a context of the specified kind?
675 // FIXME: consider allowing completion, but restricting the result types.
676 bool contextAllowsIndex(enum CodeCompletionContext::Kind K) {
677   switch (K) {
678   case CodeCompletionContext::CCC_TopLevel:
679   case CodeCompletionContext::CCC_ObjCInterface:
680   case CodeCompletionContext::CCC_ObjCImplementation:
681   case CodeCompletionContext::CCC_ObjCIvarList:
682   case CodeCompletionContext::CCC_ClassStructUnion:
683   case CodeCompletionContext::CCC_Statement:
684   case CodeCompletionContext::CCC_Expression:
685   case CodeCompletionContext::CCC_ObjCMessageReceiver:
686   case CodeCompletionContext::CCC_EnumTag:
687   case CodeCompletionContext::CCC_UnionTag:
688   case CodeCompletionContext::CCC_ClassOrStructTag:
689   case CodeCompletionContext::CCC_ObjCProtocolName:
690   case CodeCompletionContext::CCC_Namespace:
691   case CodeCompletionContext::CCC_Type:
692   case CodeCompletionContext::CCC_ParenthesizedExpression:
693   case CodeCompletionContext::CCC_ObjCInterfaceName:
694   case CodeCompletionContext::CCC_ObjCCategoryName:
695   case CodeCompletionContext::CCC_Symbol:
696   case CodeCompletionContext::CCC_SymbolOrNewName:
697     return true;
698   case CodeCompletionContext::CCC_OtherWithMacros:
699   case CodeCompletionContext::CCC_DotMemberAccess:
700   case CodeCompletionContext::CCC_ArrowMemberAccess:
701   case CodeCompletionContext::CCC_ObjCPropertyAccess:
702   case CodeCompletionContext::CCC_MacroName:
703   case CodeCompletionContext::CCC_MacroNameUse:
704   case CodeCompletionContext::CCC_PreprocessorExpression:
705   case CodeCompletionContext::CCC_PreprocessorDirective:
706   case CodeCompletionContext::CCC_SelectorName:
707   case CodeCompletionContext::CCC_TypeQualifiers:
708   case CodeCompletionContext::CCC_ObjCInstanceMessage:
709   case CodeCompletionContext::CCC_ObjCClassMessage:
710   case CodeCompletionContext::CCC_IncludedFile:
711   // FIXME: Provide identifier based completions for the following contexts:
712   case CodeCompletionContext::CCC_Other: // Be conservative.
713   case CodeCompletionContext::CCC_NaturalLanguage:
714   case CodeCompletionContext::CCC_Recovery:
715   case CodeCompletionContext::CCC_NewName:
716     return false;
717   }
718   llvm_unreachable("unknown code completion context");
719 }
720 
721 static bool isInjectedClass(const NamedDecl &D) {
722   if (auto *R = dyn_cast_or_null<RecordDecl>(&D))
723     if (R->isInjectedClassName())
724       return true;
725   return false;
726 }
727 
728 // Some member calls are excluded because they're so rarely useful.
729 static bool isExcludedMember(const NamedDecl &D) {
730   // Destructor completion is rarely useful, and works inconsistently.
731   // (s.^ completes ~string, but s.~st^ is an error).
732   if (D.getKind() == Decl::CXXDestructor)
733     return true;
734   // Injected name may be useful for A::foo(), but who writes A::A::foo()?
735   if (isInjectedClass(D))
736     return true;
737   // Explicit calls to operators are also rare.
738   auto NameKind = D.getDeclName().getNameKind();
739   if (NameKind == DeclarationName::CXXOperatorName ||
740       NameKind == DeclarationName::CXXLiteralOperatorName ||
741       NameKind == DeclarationName::CXXConversionFunctionName)
742     return true;
743   return false;
744 }
745 
746 // The CompletionRecorder captures Sema code-complete output, including context.
747 // It filters out ignored results (but doesn't apply fuzzy-filtering yet).
748 // It doesn't do scoring or conversion to CompletionItem yet, as we want to
749 // merge with index results first.
750 // Generally the fields and methods of this object should only be used from
751 // within the callback.
752 struct CompletionRecorder : public CodeCompleteConsumer {
753   CompletionRecorder(const CodeCompleteOptions &Opts,
754                      llvm::unique_function<void()> ResultsCallback)
755       : CodeCompleteConsumer(Opts.getClangCompleteOpts()),
756         CCContext(CodeCompletionContext::CCC_Other), Opts(Opts),
757         CCAllocator(std::make_shared<GlobalCodeCompletionAllocator>()),
758         CCTUInfo(CCAllocator), ResultsCallback(std::move(ResultsCallback)) {
759     assert(this->ResultsCallback);
760   }
761 
762   std::vector<CodeCompletionResult> Results;
763   CodeCompletionContext CCContext;
764   Sema *CCSema = nullptr; // Sema that created the results.
765   // FIXME: Sema is scary. Can we store ASTContext and Preprocessor, instead?
766 
767   void ProcessCodeCompleteResults(class Sema &S, CodeCompletionContext Context,
768                                   CodeCompletionResult *InResults,
769                                   unsigned NumResults) override final {
770     // Results from recovery mode are generally useless, and the callback after
771     // recovery (if any) is usually more interesting. To make sure we handle the
772     // future callback from sema, we just ignore all callbacks in recovery mode,
773     // as taking only results from recovery mode results in poor completion
774     // results.
775     // FIXME: in case there is no future sema completion callback after the
776     // recovery mode, we might still want to provide some results (e.g. trivial
777     // identifier-based completion).
778     if (Context.getKind() == CodeCompletionContext::CCC_Recovery) {
779       log("Code complete: Ignoring sema code complete callback with Recovery "
780           "context.");
781       return;
782     }
783     // If a callback is called without any sema result and the context does not
784     // support index-based completion, we simply skip it to give way to
785     // potential future callbacks with results.
786     if (NumResults == 0 && !contextAllowsIndex(Context.getKind()))
787       return;
788     if (CCSema) {
789       log("Multiple code complete callbacks (parser backtracked?). "
790           "Dropping results from context {0}, keeping results from {1}.",
791           getCompletionKindString(Context.getKind()),
792           getCompletionKindString(this->CCContext.getKind()));
793       return;
794     }
795     // Record the completion context.
796     CCSema = &S;
797     CCContext = Context;
798 
799     // Retain the results we might want.
800     for (unsigned I = 0; I < NumResults; ++I) {
801       auto &Result = InResults[I];
802       // Class members that are shadowed by subclasses are usually noise.
803       if (Result.Hidden && Result.Declaration &&
804           Result.Declaration->isCXXClassMember())
805         continue;
806       if (!Opts.IncludeIneligibleResults &&
807           (Result.Availability == CXAvailability_NotAvailable ||
808            Result.Availability == CXAvailability_NotAccessible))
809         continue;
810       if (Result.Declaration &&
811           !Context.getBaseType().isNull() // is this a member-access context?
812           && isExcludedMember(*Result.Declaration))
813         continue;
814       // Skip injected class name when no class scope is not explicitly set.
815       // E.g. show injected A::A in `using A::A^` but not in "A^".
816       if (Result.Declaration && !Context.getCXXScopeSpecifier().hasValue() &&
817           isInjectedClass(*Result.Declaration))
818         continue;
819       // We choose to never append '::' to completion results in clangd.
820       Result.StartsNestedNameSpecifier = false;
821       Results.push_back(Result);
822     }
823     ResultsCallback();
824   }
825 
826   CodeCompletionAllocator &getAllocator() override { return *CCAllocator; }
827   CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
828 
829   // Returns the filtering/sorting name for Result, which must be from Results.
830   // Returned string is owned by this recorder (or the AST).
831   llvm::StringRef getName(const CodeCompletionResult &Result) {
832     switch (Result.Kind) {
833     case CodeCompletionResult::RK_Declaration:
834       if (auto *ID = Result.Declaration->getIdentifier())
835         return ID->getName();
836       break;
837     case CodeCompletionResult::RK_Keyword:
838       return Result.Keyword;
839     case CodeCompletionResult::RK_Macro:
840       return Result.Macro->getName();
841     case CodeCompletionResult::RK_Pattern:
842       return Result.Pattern->getTypedText();
843     }
844     auto *CCS = codeCompletionString(Result);
845     return CCS->getTypedText();
846   }
847 
848   // Build a CodeCompletion string for R, which must be from Results.
849   // The CCS will be owned by this recorder.
850   CodeCompletionString *codeCompletionString(const CodeCompletionResult &R) {
851     // CodeCompletionResult doesn't seem to be const-correct. We own it, anyway.
852     return const_cast<CodeCompletionResult &>(R).CreateCodeCompletionString(
853         *CCSema, CCContext, *CCAllocator, CCTUInfo,
854         /*IncludeBriefComments=*/false);
855   }
856 
857 private:
858   CodeCompleteOptions Opts;
859   std::shared_ptr<GlobalCodeCompletionAllocator> CCAllocator;
860   CodeCompletionTUInfo CCTUInfo;
861   llvm::unique_function<void()> ResultsCallback;
862 };
863 
864 struct ScoredSignature {
865   // When not null, requires documentation to be requested from the index with
866   // this ID.
867   SymbolID IDForDoc;
868   SignatureInformation Signature;
869   SignatureQualitySignals Quality;
870 };
871 
872 class SignatureHelpCollector final : public CodeCompleteConsumer {
873 public:
874   SignatureHelpCollector(const clang::CodeCompleteOptions &CodeCompleteOpts,
875                          const SymbolIndex *Index, SignatureHelp &SigHelp)
876       : CodeCompleteConsumer(CodeCompleteOpts), SigHelp(SigHelp),
877         Allocator(std::make_shared<clang::GlobalCodeCompletionAllocator>()),
878         CCTUInfo(Allocator), Index(Index) {}
879 
880   void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg,
881                                  OverloadCandidate *Candidates,
882                                  unsigned NumCandidates,
883                                  SourceLocation OpenParLoc) override {
884     assert(!OpenParLoc.isInvalid());
885     SourceManager &SrcMgr = S.getSourceManager();
886     OpenParLoc = SrcMgr.getFileLoc(OpenParLoc);
887     if (SrcMgr.isInMainFile(OpenParLoc))
888       SigHelp.argListStart = sourceLocToPosition(SrcMgr, OpenParLoc);
889     else
890       elog("Location oustide main file in signature help: {0}",
891            OpenParLoc.printToString(SrcMgr));
892 
893     std::vector<ScoredSignature> ScoredSignatures;
894     SigHelp.signatures.reserve(NumCandidates);
895     ScoredSignatures.reserve(NumCandidates);
896     // FIXME(rwols): How can we determine the "active overload candidate"?
897     // Right now the overloaded candidates seem to be provided in a "best fit"
898     // order, so I'm not too worried about this.
899     SigHelp.activeSignature = 0;
900     assert(CurrentArg <= (unsigned)std::numeric_limits<int>::max() &&
901            "too many arguments");
902     SigHelp.activeParameter = static_cast<int>(CurrentArg);
903     for (unsigned I = 0; I < NumCandidates; ++I) {
904       OverloadCandidate Candidate = Candidates[I];
905       // We want to avoid showing instantiated signatures, because they may be
906       // long in some cases (e.g. when 'T' is substituted with 'std::string', we
907       // would get 'std::basic_string<char>').
908       if (auto *Func = Candidate.getFunction()) {
909         if (auto *Pattern = Func->getTemplateInstantiationPattern())
910           Candidate = OverloadCandidate(Pattern);
911       }
912 
913       const auto *CCS = Candidate.CreateSignatureString(
914           CurrentArg, S, *Allocator, CCTUInfo, true);
915       assert(CCS && "Expected the CodeCompletionString to be non-null");
916       ScoredSignatures.push_back(processOverloadCandidate(
917           Candidate, *CCS,
918           Candidate.getFunction()
919               ? getDeclComment(S.getASTContext(), *Candidate.getFunction())
920               : ""));
921     }
922 
923     // Sema does not load the docs from the preamble, so we need to fetch extra
924     // docs from the index instead.
925     llvm::DenseMap<SymbolID, std::string> FetchedDocs;
926     if (Index) {
927       LookupRequest IndexRequest;
928       for (const auto &S : ScoredSignatures) {
929         if (!S.IDForDoc)
930           continue;
931         IndexRequest.IDs.insert(S.IDForDoc);
932       }
933       Index->lookup(IndexRequest, [&](const Symbol &S) {
934         if (!S.Documentation.empty())
935           FetchedDocs[S.ID] = std::string(S.Documentation);
936       });
937       log("SigHelp: requested docs for {0} symbols from the index, got {1} "
938           "symbols with non-empty docs in the response",
939           IndexRequest.IDs.size(), FetchedDocs.size());
940     }
941 
942     llvm::sort(ScoredSignatures, [](const ScoredSignature &L,
943                                     const ScoredSignature &R) {
944       // Ordering follows:
945       // - Less number of parameters is better.
946       // - Function is better than FunctionType which is better than
947       // Function Template.
948       // - High score is better.
949       // - Shorter signature is better.
950       // - Alphabetically smaller is better.
951       if (L.Quality.NumberOfParameters != R.Quality.NumberOfParameters)
952         return L.Quality.NumberOfParameters < R.Quality.NumberOfParameters;
953       if (L.Quality.NumberOfOptionalParameters !=
954           R.Quality.NumberOfOptionalParameters)
955         return L.Quality.NumberOfOptionalParameters <
956                R.Quality.NumberOfOptionalParameters;
957       if (L.Quality.Kind != R.Quality.Kind) {
958         using OC = CodeCompleteConsumer::OverloadCandidate;
959         switch (L.Quality.Kind) {
960         case OC::CK_Function:
961           return true;
962         case OC::CK_FunctionType:
963           return R.Quality.Kind != OC::CK_Function;
964         case OC::CK_FunctionTemplate:
965           return false;
966         }
967         llvm_unreachable("Unknown overload candidate type.");
968       }
969       if (L.Signature.label.size() != R.Signature.label.size())
970         return L.Signature.label.size() < R.Signature.label.size();
971       return L.Signature.label < R.Signature.label;
972     });
973 
974     for (auto &SS : ScoredSignatures) {
975       auto IndexDocIt =
976           SS.IDForDoc ? FetchedDocs.find(SS.IDForDoc) : FetchedDocs.end();
977       if (IndexDocIt != FetchedDocs.end())
978         SS.Signature.documentation = IndexDocIt->second;
979 
980       SigHelp.signatures.push_back(std::move(SS.Signature));
981     }
982   }
983 
984   GlobalCodeCompletionAllocator &getAllocator() override { return *Allocator; }
985 
986   CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
987 
988 private:
989   void processParameterChunk(llvm::StringRef ChunkText,
990                              SignatureInformation &Signature) const {
991     // (!) this is O(n), should still be fast compared to building ASTs.
992     unsigned ParamStartOffset = lspLength(Signature.label);
993     unsigned ParamEndOffset = ParamStartOffset + lspLength(ChunkText);
994     // A piece of text that describes the parameter that corresponds to
995     // the code-completion location within a function call, message send,
996     // macro invocation, etc.
997     Signature.label += ChunkText;
998     ParameterInformation Info;
999     Info.labelOffsets.emplace(ParamStartOffset, ParamEndOffset);
1000     // FIXME: only set 'labelOffsets' when all clients migrate out of it.
1001     Info.labelString = std::string(ChunkText);
1002 
1003     Signature.parameters.push_back(std::move(Info));
1004   }
1005 
1006   void processOptionalChunk(const CodeCompletionString &CCS,
1007                             SignatureInformation &Signature,
1008                             SignatureQualitySignals &Signal) const {
1009     for (const auto &Chunk : CCS) {
1010       switch (Chunk.Kind) {
1011       case CodeCompletionString::CK_Optional:
1012         assert(Chunk.Optional &&
1013                "Expected the optional code completion string to be non-null.");
1014         processOptionalChunk(*Chunk.Optional, Signature, Signal);
1015         break;
1016       case CodeCompletionString::CK_VerticalSpace:
1017         break;
1018       case CodeCompletionString::CK_CurrentParameter:
1019       case CodeCompletionString::CK_Placeholder:
1020         processParameterChunk(Chunk.Text, Signature);
1021         Signal.NumberOfOptionalParameters++;
1022         break;
1023       default:
1024         Signature.label += Chunk.Text;
1025         break;
1026       }
1027     }
1028   }
1029 
1030   // FIXME(ioeric): consider moving CodeCompletionString logic here to
1031   // CompletionString.h.
1032   ScoredSignature processOverloadCandidate(const OverloadCandidate &Candidate,
1033                                            const CodeCompletionString &CCS,
1034                                            llvm::StringRef DocComment) const {
1035     SignatureInformation Signature;
1036     SignatureQualitySignals Signal;
1037     const char *ReturnType = nullptr;
1038 
1039     Signature.documentation = formatDocumentation(CCS, DocComment);
1040     Signal.Kind = Candidate.getKind();
1041 
1042     for (const auto &Chunk : CCS) {
1043       switch (Chunk.Kind) {
1044       case CodeCompletionString::CK_ResultType:
1045         // A piece of text that describes the type of an entity or,
1046         // for functions and methods, the return type.
1047         assert(!ReturnType && "Unexpected CK_ResultType");
1048         ReturnType = Chunk.Text;
1049         break;
1050       case CodeCompletionString::CK_CurrentParameter:
1051       case CodeCompletionString::CK_Placeholder:
1052         processParameterChunk(Chunk.Text, Signature);
1053         Signal.NumberOfParameters++;
1054         break;
1055       case CodeCompletionString::CK_Optional: {
1056         // The rest of the parameters are defaulted/optional.
1057         assert(Chunk.Optional &&
1058                "Expected the optional code completion string to be non-null.");
1059         processOptionalChunk(*Chunk.Optional, Signature, Signal);
1060         break;
1061       }
1062       case CodeCompletionString::CK_VerticalSpace:
1063         break;
1064       default:
1065         Signature.label += Chunk.Text;
1066         break;
1067       }
1068     }
1069     if (ReturnType) {
1070       Signature.label += " -> ";
1071       Signature.label += ReturnType;
1072     }
1073     dlog("Signal for {0}: {1}", Signature, Signal);
1074     ScoredSignature Result;
1075     Result.Signature = std::move(Signature);
1076     Result.Quality = Signal;
1077     const FunctionDecl *Func = Candidate.getFunction();
1078     if (Func && Result.Signature.documentation.empty()) {
1079       // Computing USR caches linkage, which may change after code completion.
1080       if (!hasUnstableLinkage(Func))
1081         Result.IDForDoc = clangd::getSymbolID(Func);
1082     }
1083     return Result;
1084   }
1085 
1086   SignatureHelp &SigHelp;
1087   std::shared_ptr<clang::GlobalCodeCompletionAllocator> Allocator;
1088   CodeCompletionTUInfo CCTUInfo;
1089   const SymbolIndex *Index;
1090 }; // SignatureHelpCollector
1091 
1092 struct SemaCompleteInput {
1093   PathRef FileName;
1094   size_t Offset;
1095   const PreambleData &Preamble;
1096   const llvm::Optional<PreamblePatch> Patch;
1097   const ParseInputs &ParseInput;
1098 };
1099 
1100 void loadMainFilePreambleMacros(const Preprocessor &PP,
1101                                 const PreambleData &Preamble) {
1102   // The ExternalPreprocessorSource has our macros, if we know where to look.
1103   // We can read all the macros using PreambleMacros->ReadDefinedMacros(),
1104   // but this includes transitively included files, so may deserialize a lot.
1105   ExternalPreprocessorSource *PreambleMacros = PP.getExternalSource();
1106   // As we have the names of the macros, we can look up their IdentifierInfo
1107   // and then use this to load just the macros we want.
1108   const auto &ITable = PP.getIdentifierTable();
1109   IdentifierInfoLookup *PreambleIdentifiers =
1110       ITable.getExternalIdentifierLookup();
1111 
1112   if (!PreambleIdentifiers || !PreambleMacros)
1113     return;
1114   for (const auto &MacroName : Preamble.Macros.Names) {
1115     if (ITable.find(MacroName.getKey()) != ITable.end())
1116       continue;
1117     if (auto *II = PreambleIdentifiers->get(MacroName.getKey()))
1118       if (II->isOutOfDate())
1119         PreambleMacros->updateOutOfDateIdentifier(*II);
1120   }
1121 }
1122 
1123 // Invokes Sema code completion on a file.
1124 // If \p Includes is set, it will be updated based on the compiler invocation.
1125 bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,
1126                       const clang::CodeCompleteOptions &Options,
1127                       const SemaCompleteInput &Input,
1128                       IncludeStructure *Includes = nullptr) {
1129   trace::Span Tracer("Sema completion");
1130 
1131   IgnoreDiagnostics IgnoreDiags;
1132   auto CI = buildCompilerInvocation(Input.ParseInput, IgnoreDiags);
1133   if (!CI) {
1134     elog("Couldn't create CompilerInvocation");
1135     return false;
1136   }
1137   auto &FrontendOpts = CI->getFrontendOpts();
1138   FrontendOpts.SkipFunctionBodies = true;
1139   // Disable typo correction in Sema.
1140   CI->getLangOpts()->SpellChecking = false;
1141   // Code completion won't trigger in delayed template bodies.
1142   // This is on-by-default in windows to allow parsing SDK headers; we're only
1143   // disabling it for the main-file (not preamble).
1144   CI->getLangOpts()->DelayedTemplateParsing = false;
1145   // Setup code completion.
1146   FrontendOpts.CodeCompleteOpts = Options;
1147   FrontendOpts.CodeCompletionAt.FileName = std::string(Input.FileName);
1148   std::tie(FrontendOpts.CodeCompletionAt.Line,
1149            FrontendOpts.CodeCompletionAt.Column) =
1150       offsetToClangLineColumn(Input.ParseInput.Contents, Input.Offset);
1151 
1152   std::unique_ptr<llvm::MemoryBuffer> ContentsBuffer =
1153       llvm::MemoryBuffer::getMemBuffer(Input.ParseInput.Contents,
1154                                        Input.FileName);
1155   // The diagnostic options must be set before creating a CompilerInstance.
1156   CI->getDiagnosticOpts().IgnoreWarnings = true;
1157   // We reuse the preamble whether it's valid or not. This is a
1158   // correctness/performance tradeoff: building without a preamble is slow, and
1159   // completion is latency-sensitive.
1160   // However, if we're completing *inside* the preamble section of the draft,
1161   // overriding the preamble will break sema completion. Fortunately we can just
1162   // skip all includes in this case; these completions are really simple.
1163   PreambleBounds PreambleRegion =
1164       ComputePreambleBounds(*CI->getLangOpts(), *ContentsBuffer, 0);
1165   bool CompletingInPreamble = Input.Offset < PreambleRegion.Size ||
1166                               (!PreambleRegion.PreambleEndsAtStartOfLine &&
1167                                Input.Offset == PreambleRegion.Size);
1168   if (Input.Patch)
1169     Input.Patch->apply(*CI);
1170   // NOTE: we must call BeginSourceFile after prepareCompilerInstance. Otherwise
1171   // the remapped buffers do not get freed.
1172   llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS =
1173       Input.ParseInput.TFS->view(Input.ParseInput.CompileCommand.Directory);
1174   if (Input.Preamble.StatCache)
1175     VFS = Input.Preamble.StatCache->getConsumingFS(std::move(VFS));
1176   auto Clang = prepareCompilerInstance(
1177       std::move(CI), !CompletingInPreamble ? &Input.Preamble.Preamble : nullptr,
1178       std::move(ContentsBuffer), std::move(VFS), IgnoreDiags);
1179   Clang->getPreprocessorOpts().SingleFileParseMode = CompletingInPreamble;
1180   Clang->setCodeCompletionConsumer(Consumer.release());
1181 
1182   SyntaxOnlyAction Action;
1183   if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])) {
1184     log("BeginSourceFile() failed when running codeComplete for {0}",
1185         Input.FileName);
1186     return false;
1187   }
1188   // Macros can be defined within the preamble region of the main file.
1189   // They don't fall nicely into our index/Sema dichotomy:
1190   //  - they're not indexed for completion (they're not available across files)
1191   //  - but Sema code complete won't see them: as part of the preamble, they're
1192   //    deserialized only when mentioned.
1193   // Force them to be deserialized so SemaCodeComplete sees them.
1194   loadMainFilePreambleMacros(Clang->getPreprocessor(), Input.Preamble);
1195   if (Includes)
1196     Clang->getPreprocessor().addPPCallbacks(
1197         collectIncludeStructureCallback(Clang->getSourceManager(), Includes));
1198   if (llvm::Error Err = Action.Execute()) {
1199     log("Execute() failed when running codeComplete for {0}: {1}",
1200         Input.FileName, toString(std::move(Err)));
1201     return false;
1202   }
1203   Action.EndSourceFile();
1204 
1205   return true;
1206 }
1207 
1208 // Should we allow index completions in the specified context?
1209 bool allowIndex(CodeCompletionContext &CC) {
1210   if (!contextAllowsIndex(CC.getKind()))
1211     return false;
1212   // We also avoid ClassName::bar (but allow namespace::bar).
1213   auto Scope = CC.getCXXScopeSpecifier();
1214   if (!Scope)
1215     return true;
1216   NestedNameSpecifier *NameSpec = (*Scope)->getScopeRep();
1217   if (!NameSpec)
1218     return true;
1219   // We only query the index when qualifier is a namespace.
1220   // If it's a class, we rely solely on sema completions.
1221   switch (NameSpec->getKind()) {
1222   case NestedNameSpecifier::Global:
1223   case NestedNameSpecifier::Namespace:
1224   case NestedNameSpecifier::NamespaceAlias:
1225     return true;
1226   case NestedNameSpecifier::Super:
1227   case NestedNameSpecifier::TypeSpec:
1228   case NestedNameSpecifier::TypeSpecWithTemplate:
1229   // Unresolved inside a template.
1230   case NestedNameSpecifier::Identifier:
1231     return false;
1232   }
1233   llvm_unreachable("invalid NestedNameSpecifier kind");
1234 }
1235 
1236 std::future<SymbolSlab> startAsyncFuzzyFind(const SymbolIndex &Index,
1237                                             const FuzzyFindRequest &Req) {
1238   return runAsync<SymbolSlab>([&Index, Req]() {
1239     trace::Span Tracer("Async fuzzyFind");
1240     SymbolSlab::Builder Syms;
1241     Index.fuzzyFind(Req, [&Syms](const Symbol &Sym) { Syms.insert(Sym); });
1242     return std::move(Syms).build();
1243   });
1244 }
1245 
1246 // Creates a `FuzzyFindRequest` based on the cached index request from the
1247 // last completion, if any, and the speculated completion filter text in the
1248 // source code.
1249 FuzzyFindRequest speculativeFuzzyFindRequestForCompletion(
1250     FuzzyFindRequest CachedReq, const CompletionPrefix &HeuristicPrefix) {
1251   CachedReq.Query = std::string(HeuristicPrefix.Name);
1252   return CachedReq;
1253 }
1254 
1255 // Runs Sema-based (AST) and Index-based completion, returns merged results.
1256 //
1257 // There are a few tricky considerations:
1258 //   - the AST provides information needed for the index query (e.g. which
1259 //     namespaces to search in). So Sema must start first.
1260 //   - we only want to return the top results (Opts.Limit).
1261 //     Building CompletionItems for everything else is wasteful, so we want to
1262 //     preserve the "native" format until we're done with scoring.
1263 //   - the data underlying Sema completion items is owned by the AST and various
1264 //     other arenas, which must stay alive for us to build CompletionItems.
1265 //   - we may get duplicate results from Sema and the Index, we need to merge.
1266 //
1267 // So we start Sema completion first, and do all our work in its callback.
1268 // We use the Sema context information to query the index.
1269 // Then we merge the two result sets, producing items that are Sema/Index/Both.
1270 // These items are scored, and the top N are synthesized into the LSP response.
1271 // Finally, we can clean up the data structures created by Sema completion.
1272 //
1273 // Main collaborators are:
1274 //   - semaCodeComplete sets up the compiler machinery to run code completion.
1275 //   - CompletionRecorder captures Sema completion results, including context.
1276 //   - SymbolIndex (Opts.Index) provides index completion results as Symbols
1277 //   - CompletionCandidates are the result of merging Sema and Index results.
1278 //     Each candidate points to an underlying CodeCompletionResult (Sema), a
1279 //     Symbol (Index), or both. It computes the result quality score.
1280 //     CompletionCandidate also does conversion to CompletionItem (at the end).
1281 //   - FuzzyMatcher scores how the candidate matches the partial identifier.
1282 //     This score is combined with the result quality score for the final score.
1283 //   - TopN determines the results with the best score.
1284 class CodeCompleteFlow {
1285   PathRef FileName;
1286   IncludeStructure Includes;           // Complete once the compiler runs.
1287   SpeculativeFuzzyFind *SpecFuzzyFind; // Can be nullptr.
1288   const CodeCompleteOptions &Opts;
1289 
1290   // Sema takes ownership of Recorder. Recorder is valid until Sema cleanup.
1291   CompletionRecorder *Recorder = nullptr;
1292   CodeCompletionContext::Kind CCContextKind = CodeCompletionContext::CCC_Other;
1293   bool IsUsingDeclaration = false;
1294   // The snippets will not be generated if the token following completion
1295   // location is an opening parenthesis (tok::l_paren) because this would add
1296   // extra parenthesis.
1297   tok::TokenKind NextTokenKind = tok::eof;
1298   // Counters for logging.
1299   int NSema = 0, NIndex = 0, NSemaAndIndex = 0, NIdent = 0;
1300   bool Incomplete = false; // Would more be available with a higher limit?
1301   CompletionPrefix HeuristicPrefix;
1302   llvm::Optional<FuzzyMatcher> Filter; // Initialized once Sema runs.
1303   Range ReplacedRange;
1304   std::vector<std::string> QueryScopes; // Initialized once Sema runs.
1305   // Initialized once QueryScopes is initialized, if there are scopes.
1306   llvm::Optional<ScopeDistance> ScopeProximity;
1307   llvm::Optional<OpaqueType> PreferredType; // Initialized once Sema runs.
1308   // Whether to query symbols from any scope. Initialized once Sema runs.
1309   bool AllScopes = false;
1310   llvm::StringSet<> ContextWords;
1311   // Include-insertion and proximity scoring rely on the include structure.
1312   // This is available after Sema has run.
1313   llvm::Optional<IncludeInserter> Inserter;  // Available during runWithSema.
1314   llvm::Optional<URIDistance> FileProximity; // Initialized once Sema runs.
1315   /// Speculative request based on the cached request and the filter text before
1316   /// the cursor.
1317   /// Initialized right before sema run. This is only set if `SpecFuzzyFind` is
1318   /// set and contains a cached request.
1319   llvm::Optional<FuzzyFindRequest> SpecReq;
1320 
1321 public:
1322   // A CodeCompleteFlow object is only useful for calling run() exactly once.
1323   CodeCompleteFlow(PathRef FileName, const IncludeStructure &Includes,
1324                    SpeculativeFuzzyFind *SpecFuzzyFind,
1325                    const CodeCompleteOptions &Opts)
1326       : FileName(FileName), Includes(Includes), SpecFuzzyFind(SpecFuzzyFind),
1327         Opts(Opts) {}
1328 
1329   CodeCompleteResult run(const SemaCompleteInput &SemaCCInput) && {
1330     trace::Span Tracer("CodeCompleteFlow");
1331     HeuristicPrefix = guessCompletionPrefix(SemaCCInput.ParseInput.Contents,
1332                                             SemaCCInput.Offset);
1333     populateContextWords(SemaCCInput.ParseInput.Contents);
1334     if (Opts.Index && SpecFuzzyFind && SpecFuzzyFind->CachedReq.hasValue()) {
1335       assert(!SpecFuzzyFind->Result.valid());
1336       SpecReq = speculativeFuzzyFindRequestForCompletion(
1337           *SpecFuzzyFind->CachedReq, HeuristicPrefix);
1338       SpecFuzzyFind->Result = startAsyncFuzzyFind(*Opts.Index, *SpecReq);
1339     }
1340 
1341     // We run Sema code completion first. It builds an AST and calculates:
1342     //   - completion results based on the AST.
1343     //   - partial identifier and context. We need these for the index query.
1344     CodeCompleteResult Output;
1345     auto RecorderOwner = std::make_unique<CompletionRecorder>(Opts, [&]() {
1346       assert(Recorder && "Recorder is not set");
1347       CCContextKind = Recorder->CCContext.getKind();
1348       IsUsingDeclaration = Recorder->CCContext.isUsingDeclaration();
1349       auto Style = getFormatStyleForFile(SemaCCInput.FileName,
1350                                          SemaCCInput.ParseInput.Contents,
1351                                          *SemaCCInput.ParseInput.TFS);
1352       const auto NextToken = Lexer::findNextToken(
1353           Recorder->CCSema->getPreprocessor().getCodeCompletionLoc(),
1354           Recorder->CCSema->getSourceManager(), Recorder->CCSema->LangOpts);
1355       if (NextToken)
1356         NextTokenKind = NextToken->getKind();
1357       // If preprocessor was run, inclusions from preprocessor callback should
1358       // already be added to Includes.
1359       Inserter.emplace(
1360           SemaCCInput.FileName, SemaCCInput.ParseInput.Contents, Style,
1361           SemaCCInput.ParseInput.CompileCommand.Directory,
1362           &Recorder->CCSema->getPreprocessor().getHeaderSearchInfo());
1363       for (const auto &Inc : Includes.MainFileIncludes)
1364         Inserter->addExisting(Inc);
1365 
1366       // Most of the cost of file proximity is in initializing the FileDistance
1367       // structures based on the observed includes, once per query. Conceptually
1368       // that happens here (though the per-URI-scheme initialization is lazy).
1369       // The per-result proximity scoring is (amortized) very cheap.
1370       FileDistanceOptions ProxOpts{}; // Use defaults.
1371       const auto &SM = Recorder->CCSema->getSourceManager();
1372       llvm::StringMap<SourceParams> ProxSources;
1373       for (auto &Entry : Includes.includeDepth(
1374                SM.getFileEntryForID(SM.getMainFileID())->getName())) {
1375         auto &Source = ProxSources[Entry.getKey()];
1376         Source.Cost = Entry.getValue() * ProxOpts.IncludeCost;
1377         // Symbols near our transitive includes are good, but only consider
1378         // things in the same directory or below it. Otherwise there can be
1379         // many false positives.
1380         if (Entry.getValue() > 0)
1381           Source.MaxUpTraversals = 1;
1382       }
1383       FileProximity.emplace(ProxSources, ProxOpts);
1384 
1385       Output = runWithSema();
1386       Inserter.reset(); // Make sure this doesn't out-live Clang.
1387       SPAN_ATTACH(Tracer, "sema_completion_kind",
1388                   getCompletionKindString(CCContextKind));
1389       log("Code complete: sema context {0}, query scopes [{1}] (AnyScope={2}), "
1390           "expected type {3}{4}",
1391           getCompletionKindString(CCContextKind),
1392           llvm::join(QueryScopes.begin(), QueryScopes.end(), ","), AllScopes,
1393           PreferredType ? Recorder->CCContext.getPreferredType().getAsString()
1394                         : "<none>",
1395           IsUsingDeclaration ? ", inside using declaration" : "");
1396     });
1397 
1398     Recorder = RecorderOwner.get();
1399 
1400     semaCodeComplete(std::move(RecorderOwner), Opts.getClangCompleteOpts(),
1401                      SemaCCInput, &Includes);
1402     logResults(Output, Tracer);
1403     return Output;
1404   }
1405 
1406   void logResults(const CodeCompleteResult &Output, const trace::Span &Tracer) {
1407     SPAN_ATTACH(Tracer, "sema_results", NSema);
1408     SPAN_ATTACH(Tracer, "index_results", NIndex);
1409     SPAN_ATTACH(Tracer, "merged_results", NSemaAndIndex);
1410     SPAN_ATTACH(Tracer, "identifier_results", NIdent);
1411     SPAN_ATTACH(Tracer, "returned_results", int64_t(Output.Completions.size()));
1412     SPAN_ATTACH(Tracer, "incomplete", Output.HasMore);
1413     log("Code complete: {0} results from Sema, {1} from Index, "
1414         "{2} matched, {3} from identifiers, {4} returned{5}.",
1415         NSema, NIndex, NSemaAndIndex, NIdent, Output.Completions.size(),
1416         Output.HasMore ? " (incomplete)" : "");
1417     assert(!Opts.Limit || Output.Completions.size() <= Opts.Limit);
1418     // We don't assert that isIncomplete means we hit a limit.
1419     // Indexes may choose to impose their own limits even if we don't have one.
1420   }
1421 
1422   CodeCompleteResult runWithoutSema(llvm::StringRef Content, size_t Offset,
1423                                     const ThreadsafeFS &TFS) && {
1424     trace::Span Tracer("CodeCompleteWithoutSema");
1425     // Fill in fields normally set by runWithSema()
1426     HeuristicPrefix = guessCompletionPrefix(Content, Offset);
1427     populateContextWords(Content);
1428     CCContextKind = CodeCompletionContext::CCC_Recovery;
1429     IsUsingDeclaration = false;
1430     Filter = FuzzyMatcher(HeuristicPrefix.Name);
1431     auto Pos = offsetToPosition(Content, Offset);
1432     ReplacedRange.start = ReplacedRange.end = Pos;
1433     ReplacedRange.start.character -= HeuristicPrefix.Name.size();
1434 
1435     llvm::StringMap<SourceParams> ProxSources;
1436     ProxSources[FileName].Cost = 0;
1437     FileProximity.emplace(ProxSources);
1438 
1439     auto Style = getFormatStyleForFile(FileName, Content, TFS);
1440     // This will only insert verbatim headers.
1441     Inserter.emplace(FileName, Content, Style,
1442                      /*BuildDir=*/"", /*HeaderSearchInfo=*/nullptr);
1443 
1444     auto Identifiers = collectIdentifiers(Content, Style);
1445     std::vector<RawIdentifier> IdentifierResults;
1446     for (const auto &IDAndCount : Identifiers) {
1447       RawIdentifier ID;
1448       ID.Name = IDAndCount.first();
1449       ID.References = IDAndCount.second;
1450       // Avoid treating typed filter as an identifier.
1451       if (ID.Name == HeuristicPrefix.Name)
1452         --ID.References;
1453       if (ID.References > 0)
1454         IdentifierResults.push_back(std::move(ID));
1455     }
1456 
1457     // Simplified version of getQueryScopes():
1458     //  - accessible scopes are determined heuristically.
1459     //  - all-scopes query if no qualifier was typed (and it's allowed).
1460     SpecifiedScope Scopes;
1461     Scopes.AccessibleScopes = visibleNamespaces(
1462         Content.take_front(Offset), format::getFormattingLangOpts(Style));
1463     for (std::string &S : Scopes.AccessibleScopes)
1464       if (!S.empty())
1465         S.append("::"); // visibleNamespaces doesn't include trailing ::.
1466     if (HeuristicPrefix.Qualifier.empty())
1467       AllScopes = Opts.AllScopes;
1468     else if (HeuristicPrefix.Qualifier.startswith("::")) {
1469       Scopes.AccessibleScopes = {""};
1470       Scopes.UnresolvedQualifier =
1471           std::string(HeuristicPrefix.Qualifier.drop_front(2));
1472     } else
1473       Scopes.UnresolvedQualifier = std::string(HeuristicPrefix.Qualifier);
1474     // First scope is the (modified) enclosing scope.
1475     QueryScopes = Scopes.scopesForIndexQuery();
1476     ScopeProximity.emplace(QueryScopes);
1477 
1478     SymbolSlab IndexResults = Opts.Index ? queryIndex() : SymbolSlab();
1479 
1480     CodeCompleteResult Output = toCodeCompleteResult(mergeResults(
1481         /*SemaResults=*/{}, IndexResults, IdentifierResults));
1482     Output.RanParser = false;
1483     logResults(Output, Tracer);
1484     return Output;
1485   }
1486 
1487 private:
1488   void populateContextWords(llvm::StringRef Content) {
1489     // Take last 3 lines before the completion point.
1490     unsigned RangeEnd = HeuristicPrefix.Qualifier.begin() - Content.data(),
1491              RangeBegin = RangeEnd;
1492     for (size_t I = 0; I < 3 && RangeBegin > 0; ++I) {
1493       auto PrevNL = Content.rfind('\n', RangeBegin);
1494       if (PrevNL == StringRef::npos) {
1495         RangeBegin = 0;
1496         break;
1497       }
1498       RangeBegin = PrevNL;
1499     }
1500 
1501     ContextWords = collectWords(Content.slice(RangeBegin, RangeEnd));
1502     dlog("Completion context words: {0}",
1503          llvm::join(ContextWords.keys(), ", "));
1504   }
1505 
1506   // This is called by run() once Sema code completion is done, but before the
1507   // Sema data structures are torn down. It does all the real work.
1508   CodeCompleteResult runWithSema() {
1509     const auto &CodeCompletionRange = CharSourceRange::getCharRange(
1510         Recorder->CCSema->getPreprocessor().getCodeCompletionTokenRange());
1511     // When we are getting completions with an empty identifier, for example
1512     //    std::vector<int> asdf;
1513     //    asdf.^;
1514     // Then the range will be invalid and we will be doing insertion, use
1515     // current cursor position in such cases as range.
1516     if (CodeCompletionRange.isValid()) {
1517       ReplacedRange = halfOpenToRange(Recorder->CCSema->getSourceManager(),
1518                                       CodeCompletionRange);
1519     } else {
1520       const auto &Pos = sourceLocToPosition(
1521           Recorder->CCSema->getSourceManager(),
1522           Recorder->CCSema->getPreprocessor().getCodeCompletionLoc());
1523       ReplacedRange.start = ReplacedRange.end = Pos;
1524     }
1525     Filter = FuzzyMatcher(
1526         Recorder->CCSema->getPreprocessor().getCodeCompletionFilter());
1527     std::tie(QueryScopes, AllScopes) = getQueryScopes(
1528         Recorder->CCContext, *Recorder->CCSema, HeuristicPrefix, Opts);
1529     if (!QueryScopes.empty())
1530       ScopeProximity.emplace(QueryScopes);
1531     PreferredType =
1532         OpaqueType::fromType(Recorder->CCSema->getASTContext(),
1533                              Recorder->CCContext.getPreferredType());
1534     // Sema provides the needed context to query the index.
1535     // FIXME: in addition to querying for extra/overlapping symbols, we should
1536     //        explicitly request symbols corresponding to Sema results.
1537     //        We can use their signals even if the index can't suggest them.
1538     // We must copy index results to preserve them, but there are at most Limit.
1539     auto IndexResults = (Opts.Index && allowIndex(Recorder->CCContext))
1540                             ? queryIndex()
1541                             : SymbolSlab();
1542     trace::Span Tracer("Populate CodeCompleteResult");
1543     // Merge Sema and Index results, score them, and pick the winners.
1544     auto Top =
1545         mergeResults(Recorder->Results, IndexResults, /*Identifiers*/ {});
1546     return toCodeCompleteResult(Top);
1547   }
1548 
1549   CodeCompleteResult
1550   toCodeCompleteResult(const std::vector<ScoredBundle> &Scored) {
1551     CodeCompleteResult Output;
1552 
1553     // Convert the results to final form, assembling the expensive strings.
1554     for (auto &C : Scored) {
1555       Output.Completions.push_back(toCodeCompletion(C.first));
1556       Output.Completions.back().Score = C.second;
1557       Output.Completions.back().CompletionTokenRange = ReplacedRange;
1558     }
1559     Output.HasMore = Incomplete;
1560     Output.Context = CCContextKind;
1561     Output.CompletionRange = ReplacedRange;
1562     return Output;
1563   }
1564 
1565   SymbolSlab queryIndex() {
1566     trace::Span Tracer("Query index");
1567     SPAN_ATTACH(Tracer, "limit", int64_t(Opts.Limit));
1568 
1569     // Build the query.
1570     FuzzyFindRequest Req;
1571     if (Opts.Limit)
1572       Req.Limit = Opts.Limit;
1573     Req.Query = std::string(Filter->pattern());
1574     Req.RestrictForCodeCompletion = true;
1575     Req.Scopes = QueryScopes;
1576     Req.AnyScope = AllScopes;
1577     // FIXME: we should send multiple weighted paths here.
1578     Req.ProximityPaths.push_back(std::string(FileName));
1579     if (PreferredType)
1580       Req.PreferredTypes.push_back(std::string(PreferredType->raw()));
1581     vlog("Code complete: fuzzyFind({0:2})", toJSON(Req));
1582 
1583     if (SpecFuzzyFind)
1584       SpecFuzzyFind->NewReq = Req;
1585     if (SpecFuzzyFind && SpecFuzzyFind->Result.valid() && (*SpecReq == Req)) {
1586       vlog("Code complete: speculative fuzzy request matches the actual index "
1587            "request. Waiting for the speculative index results.");
1588       SPAN_ATTACH(Tracer, "Speculative results", true);
1589 
1590       trace::Span WaitSpec("Wait speculative results");
1591       return SpecFuzzyFind->Result.get();
1592     }
1593 
1594     SPAN_ATTACH(Tracer, "Speculative results", false);
1595 
1596     // Run the query against the index.
1597     SymbolSlab::Builder ResultsBuilder;
1598     if (Opts.Index->fuzzyFind(
1599             Req, [&](const Symbol &Sym) { ResultsBuilder.insert(Sym); }))
1600       Incomplete = true;
1601     return std::move(ResultsBuilder).build();
1602   }
1603 
1604   // Merges Sema and Index results where possible, to form CompletionCandidates.
1605   // \p Identifiers is raw identifiers that can also be completion candidates.
1606   // Identifiers are not merged with results from index or sema.
1607   // Groups overloads if desired, to form CompletionCandidate::Bundles. The
1608   // bundles are scored and top results are returned, best to worst.
1609   std::vector<ScoredBundle>
1610   mergeResults(const std::vector<CodeCompletionResult> &SemaResults,
1611                const SymbolSlab &IndexResults,
1612                const std::vector<RawIdentifier> &IdentifierResults) {
1613     trace::Span Tracer("Merge and score results");
1614     std::vector<CompletionCandidate::Bundle> Bundles;
1615     llvm::DenseMap<size_t, size_t> BundleLookup;
1616     auto AddToBundles = [&](const CodeCompletionResult *SemaResult,
1617                             const Symbol *IndexResult,
1618                             const RawIdentifier *IdentifierResult) {
1619       CompletionCandidate C;
1620       C.SemaResult = SemaResult;
1621       C.IndexResult = IndexResult;
1622       C.IdentifierResult = IdentifierResult;
1623       if (C.IndexResult) {
1624         C.Name = IndexResult->Name;
1625         C.RankedIncludeHeaders = getRankedIncludes(*C.IndexResult);
1626       } else if (C.SemaResult) {
1627         C.Name = Recorder->getName(*SemaResult);
1628       } else {
1629         assert(IdentifierResult);
1630         C.Name = IdentifierResult->Name;
1631       }
1632       if (auto OverloadSet = C.overloadSet(
1633               Opts, FileName, Inserter ? Inserter.getPointer() : nullptr)) {
1634         auto Ret = BundleLookup.try_emplace(OverloadSet, Bundles.size());
1635         if (Ret.second)
1636           Bundles.emplace_back();
1637         Bundles[Ret.first->second].push_back(std::move(C));
1638       } else {
1639         Bundles.emplace_back();
1640         Bundles.back().push_back(std::move(C));
1641       }
1642     };
1643     llvm::DenseSet<const Symbol *> UsedIndexResults;
1644     auto CorrespondingIndexResult =
1645         [&](const CodeCompletionResult &SemaResult) -> const Symbol * {
1646       if (auto SymID =
1647               getSymbolID(SemaResult, Recorder->CCSema->getSourceManager())) {
1648         auto I = IndexResults.find(SymID);
1649         if (I != IndexResults.end()) {
1650           UsedIndexResults.insert(&*I);
1651           return &*I;
1652         }
1653       }
1654       return nullptr;
1655     };
1656     // Emit all Sema results, merging them with Index results if possible.
1657     for (auto &SemaResult : SemaResults)
1658       AddToBundles(&SemaResult, CorrespondingIndexResult(SemaResult), nullptr);
1659     // Now emit any Index-only results.
1660     for (const auto &IndexResult : IndexResults) {
1661       if (UsedIndexResults.count(&IndexResult))
1662         continue;
1663       AddToBundles(/*SemaResult=*/nullptr, &IndexResult, nullptr);
1664     }
1665     // Emit identifier results.
1666     for (const auto &Ident : IdentifierResults)
1667       AddToBundles(/*SemaResult=*/nullptr, /*IndexResult=*/nullptr, &Ident);
1668     // We only keep the best N results at any time, in "native" format.
1669     TopN<ScoredBundle, ScoredBundleGreater> Top(
1670         Opts.Limit == 0 ? std::numeric_limits<size_t>::max() : Opts.Limit);
1671     for (auto &Bundle : Bundles)
1672       addCandidate(Top, std::move(Bundle));
1673     return std::move(Top).items();
1674   }
1675 
1676   llvm::Optional<float> fuzzyScore(const CompletionCandidate &C) {
1677     // Macros can be very spammy, so we only support prefix completion.
1678     if (((C.SemaResult &&
1679           C.SemaResult->Kind == CodeCompletionResult::RK_Macro) ||
1680          (C.IndexResult &&
1681           C.IndexResult->SymInfo.Kind == index::SymbolKind::Macro)) &&
1682         !C.Name.startswith_lower(Filter->pattern()))
1683       return None;
1684     return Filter->match(C.Name);
1685   }
1686 
1687   CodeCompletion::Scores
1688   evaluateCompletion(const SymbolQualitySignals &Quality,
1689                      const SymbolRelevanceSignals &Relevance) {
1690     using RM = CodeCompleteOptions::CodeCompletionRankingModel;
1691     CodeCompletion::Scores Scores;
1692     switch (Opts.RankingModel) {
1693     case RM::Heuristics:
1694       Scores.Quality = Quality.evaluateHeuristics();
1695       Scores.Relevance = Relevance.evaluateHeuristics();
1696       Scores.Total =
1697           evaluateSymbolAndRelevance(Scores.Quality, Scores.Relevance);
1698       // NameMatch is in fact a multiplier on total score, so rescoring is
1699       // sound.
1700       Scores.ExcludingName =
1701           Relevance.NameMatch > std::numeric_limits<float>::epsilon()
1702               ? Scores.Total / Relevance.NameMatch
1703               : Scores.Quality;
1704       return Scores;
1705 
1706     case RM::DecisionForest:
1707       DecisionForestScores DFScores = Opts.DecisionForestScorer(
1708           Quality, Relevance, Opts.DecisionForestBase);
1709       Scores.ExcludingName = DFScores.ExcludingName;
1710       Scores.Total = DFScores.Total;
1711       return Scores;
1712     }
1713     llvm_unreachable("Unhandled CodeCompletion ranking model.");
1714   }
1715 
1716   // Scores a candidate and adds it to the TopN structure.
1717   void addCandidate(TopN<ScoredBundle, ScoredBundleGreater> &Candidates,
1718                     CompletionCandidate::Bundle Bundle) {
1719     SymbolQualitySignals Quality;
1720     SymbolRelevanceSignals Relevance;
1721     Relevance.Context = CCContextKind;
1722     Relevance.Name = Bundle.front().Name;
1723     Relevance.FilterLength = HeuristicPrefix.Name.size();
1724     Relevance.Query = SymbolRelevanceSignals::CodeComplete;
1725     Relevance.FileProximityMatch = FileProximity.getPointer();
1726     if (ScopeProximity)
1727       Relevance.ScopeProximityMatch = ScopeProximity.getPointer();
1728     if (PreferredType)
1729       Relevance.HadContextType = true;
1730     Relevance.ContextWords = &ContextWords;
1731     Relevance.MainFileSignals = Opts.MainFileSignals;
1732 
1733     auto &First = Bundle.front();
1734     if (auto FuzzyScore = fuzzyScore(First))
1735       Relevance.NameMatch = *FuzzyScore;
1736     else
1737       return;
1738     SymbolOrigin Origin = SymbolOrigin::Unknown;
1739     bool FromIndex = false;
1740     for (const auto &Candidate : Bundle) {
1741       if (Candidate.IndexResult) {
1742         Quality.merge(*Candidate.IndexResult);
1743         Relevance.merge(*Candidate.IndexResult);
1744         Origin |= Candidate.IndexResult->Origin;
1745         FromIndex = true;
1746         if (!Candidate.IndexResult->Type.empty())
1747           Relevance.HadSymbolType |= true;
1748         if (PreferredType &&
1749             PreferredType->raw() == Candidate.IndexResult->Type) {
1750           Relevance.TypeMatchesPreferred = true;
1751         }
1752       }
1753       if (Candidate.SemaResult) {
1754         Quality.merge(*Candidate.SemaResult);
1755         Relevance.merge(*Candidate.SemaResult);
1756         if (PreferredType) {
1757           if (auto CompletionType = OpaqueType::fromCompletionResult(
1758                   Recorder->CCSema->getASTContext(), *Candidate.SemaResult)) {
1759             Relevance.HadSymbolType |= true;
1760             if (PreferredType == CompletionType)
1761               Relevance.TypeMatchesPreferred = true;
1762           }
1763         }
1764         Origin |= SymbolOrigin::AST;
1765       }
1766       if (Candidate.IdentifierResult) {
1767         Quality.References = Candidate.IdentifierResult->References;
1768         Relevance.Scope = SymbolRelevanceSignals::FileScope;
1769         Origin |= SymbolOrigin::Identifier;
1770       }
1771     }
1772 
1773     CodeCompletion::Scores Scores = evaluateCompletion(Quality, Relevance);
1774     if (Opts.RecordCCResult)
1775       Opts.RecordCCResult(toCodeCompletion(Bundle), Quality, Relevance,
1776                           Scores.Total);
1777 
1778     dlog("CodeComplete: {0} ({1}) = {2}\n{3}{4}\n", First.Name,
1779          llvm::to_string(Origin), Scores.Total, llvm::to_string(Quality),
1780          llvm::to_string(Relevance));
1781 
1782     NSema += bool(Origin & SymbolOrigin::AST);
1783     NIndex += FromIndex;
1784     NSemaAndIndex += bool(Origin & SymbolOrigin::AST) && FromIndex;
1785     NIdent += bool(Origin & SymbolOrigin::Identifier);
1786     if (Candidates.push({std::move(Bundle), Scores}))
1787       Incomplete = true;
1788   }
1789 
1790   CodeCompletion toCodeCompletion(const CompletionCandidate::Bundle &Bundle) {
1791     llvm::Optional<CodeCompletionBuilder> Builder;
1792     for (const auto &Item : Bundle) {
1793       CodeCompletionString *SemaCCS =
1794           Item.SemaResult ? Recorder->codeCompletionString(*Item.SemaResult)
1795                           : nullptr;
1796       if (!Builder)
1797         Builder.emplace(Recorder ? &Recorder->CCSema->getASTContext() : nullptr,
1798                         Item, SemaCCS, QueryScopes, *Inserter, FileName,
1799                         CCContextKind, Opts, IsUsingDeclaration, NextTokenKind);
1800       else
1801         Builder->add(Item, SemaCCS);
1802     }
1803     return Builder->build();
1804   }
1805 };
1806 
1807 } // namespace
1808 
1809 clang::CodeCompleteOptions CodeCompleteOptions::getClangCompleteOpts() const {
1810   clang::CodeCompleteOptions Result;
1811   Result.IncludeCodePatterns = EnableSnippets;
1812   Result.IncludeMacros = true;
1813   Result.IncludeGlobals = true;
1814   // We choose to include full comments and not do doxygen parsing in
1815   // completion.
1816   // FIXME: ideally, we should support doxygen in some form, e.g. do markdown
1817   // formatting of the comments.
1818   Result.IncludeBriefComments = false;
1819 
1820   // When an is used, Sema is responsible for completing the main file,
1821   // the index can provide results from the preamble.
1822   // Tell Sema not to deserialize the preamble to look for results.
1823   Result.LoadExternal = !Index;
1824   Result.IncludeFixIts = IncludeFixIts;
1825 
1826   return Result;
1827 }
1828 
1829 CompletionPrefix guessCompletionPrefix(llvm::StringRef Content,
1830                                        unsigned Offset) {
1831   assert(Offset <= Content.size());
1832   StringRef Rest = Content.take_front(Offset);
1833   CompletionPrefix Result;
1834 
1835   // Consume the unqualified name. We only handle ASCII characters.
1836   // isIdentifierBody will let us match "0invalid", but we don't mind.
1837   while (!Rest.empty() && isIdentifierBody(Rest.back()))
1838     Rest = Rest.drop_back();
1839   Result.Name = Content.slice(Rest.size(), Offset);
1840 
1841   // Consume qualifiers.
1842   while (Rest.consume_back("::") && !Rest.endswith(":")) // reject ::::
1843     while (!Rest.empty() && isIdentifierBody(Rest.back()))
1844       Rest = Rest.drop_back();
1845   Result.Qualifier =
1846       Content.slice(Rest.size(), Result.Name.begin() - Content.begin());
1847 
1848   return Result;
1849 }
1850 
1851 CodeCompleteResult codeComplete(PathRef FileName, Position Pos,
1852                                 const PreambleData *Preamble,
1853                                 const ParseInputs &ParseInput,
1854                                 CodeCompleteOptions Opts,
1855                                 SpeculativeFuzzyFind *SpecFuzzyFind) {
1856   auto Offset = positionToOffset(ParseInput.Contents, Pos);
1857   if (!Offset) {
1858     elog("Code completion position was invalid {0}", Offset.takeError());
1859     return CodeCompleteResult();
1860   }
1861   auto Flow = CodeCompleteFlow(
1862       FileName, Preamble ? Preamble->Includes : IncludeStructure(),
1863       SpecFuzzyFind, Opts);
1864   return (!Preamble || Opts.RunParser == CodeCompleteOptions::NeverParse)
1865              ? std::move(Flow).runWithoutSema(ParseInput.Contents, *Offset,
1866                                               *ParseInput.TFS)
1867              : std::move(Flow).run({FileName, *Offset, *Preamble,
1868                                     // We want to serve code completions with
1869                                     // low latency, so don't bother patching.
1870                                     /*PreamblePatch=*/llvm::None, ParseInput});
1871 }
1872 
1873 SignatureHelp signatureHelp(PathRef FileName, Position Pos,
1874                             const PreambleData &Preamble,
1875                             const ParseInputs &ParseInput) {
1876   auto Offset = positionToOffset(ParseInput.Contents, Pos);
1877   if (!Offset) {
1878     elog("Signature help position was invalid {0}", Offset.takeError());
1879     return SignatureHelp();
1880   }
1881   SignatureHelp Result;
1882   clang::CodeCompleteOptions Options;
1883   Options.IncludeGlobals = false;
1884   Options.IncludeMacros = false;
1885   Options.IncludeCodePatterns = false;
1886   Options.IncludeBriefComments = false;
1887   semaCodeComplete(
1888       std::make_unique<SignatureHelpCollector>(Options, ParseInput.Index,
1889                                                Result),
1890       Options,
1891       {FileName, *Offset, Preamble,
1892        PreamblePatch::create(FileName, ParseInput, Preamble), ParseInput});
1893   return Result;
1894 }
1895 
1896 bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx) {
1897   auto InTopLevelScope = [](const NamedDecl &ND) {
1898     switch (ND.getDeclContext()->getDeclKind()) {
1899     case Decl::TranslationUnit:
1900     case Decl::Namespace:
1901     case Decl::LinkageSpec:
1902       return true;
1903     default:
1904       break;
1905     };
1906     return false;
1907   };
1908   // We only complete symbol's name, which is the same as the name of the
1909   // *primary* template in case of template specializations.
1910   if (isExplicitTemplateSpecialization(&ND))
1911     return false;
1912 
1913   if (InTopLevelScope(ND))
1914     return true;
1915 
1916   if (const auto *EnumDecl = dyn_cast<clang::EnumDecl>(ND.getDeclContext()))
1917     return InTopLevelScope(*EnumDecl) && !EnumDecl->isScoped();
1918 
1919   return false;
1920 }
1921 
1922 // FIXME: find a home for this (that can depend on both markup and Protocol).
1923 static MarkupContent renderDoc(const markup::Document &Doc, MarkupKind Kind) {
1924   MarkupContent Result;
1925   Result.kind = Kind;
1926   switch (Kind) {
1927   case MarkupKind::PlainText:
1928     Result.value.append(Doc.asPlainText());
1929     break;
1930   case MarkupKind::Markdown:
1931     Result.value.append(Doc.asMarkdown());
1932     break;
1933   }
1934   return Result;
1935 }
1936 
1937 CompletionItem CodeCompletion::render(const CodeCompleteOptions &Opts) const {
1938   CompletionItem LSP;
1939   const auto *InsertInclude = Includes.empty() ? nullptr : &Includes[0];
1940   LSP.label = ((InsertInclude && InsertInclude->Insertion)
1941                    ? Opts.IncludeIndicator.Insert
1942                    : Opts.IncludeIndicator.NoInsert) +
1943               (Opts.ShowOrigins ? "[" + llvm::to_string(Origin) + "]" : "") +
1944               RequiredQualifier + Name + Signature;
1945 
1946   LSP.kind = Kind;
1947   LSP.detail = BundleSize > 1
1948                    ? std::string(llvm::formatv("[{0} overloads]", BundleSize))
1949                    : ReturnType;
1950   LSP.deprecated = Deprecated;
1951   // Combine header information and documentation in LSP `documentation` field.
1952   // This is not quite right semantically, but tends to display well in editors.
1953   if (InsertInclude || Documentation) {
1954     markup::Document Doc;
1955     if (InsertInclude)
1956       Doc.addParagraph().appendText("From ").appendCode(InsertInclude->Header);
1957     if (Documentation)
1958       Doc.append(*Documentation);
1959     LSP.documentation = renderDoc(Doc, Opts.DocumentationFormat);
1960   }
1961   LSP.sortText = sortText(Score.Total, Name);
1962   LSP.filterText = Name;
1963   LSP.textEdit = {CompletionTokenRange, RequiredQualifier + Name};
1964   // Merge continuous additionalTextEdits into main edit. The main motivation
1965   // behind this is to help LSP clients, it seems most of them are confused when
1966   // they are provided with additionalTextEdits that are consecutive to main
1967   // edit.
1968   // Note that we store additional text edits from back to front in a line. That
1969   // is mainly to help LSP clients again, so that changes do not effect each
1970   // other.
1971   for (const auto &FixIt : FixIts) {
1972     if (FixIt.range.end == LSP.textEdit->range.start) {
1973       LSP.textEdit->newText = FixIt.newText + LSP.textEdit->newText;
1974       LSP.textEdit->range.start = FixIt.range.start;
1975     } else {
1976       LSP.additionalTextEdits.push_back(FixIt);
1977     }
1978   }
1979   if (Opts.EnableSnippets)
1980     LSP.textEdit->newText += SnippetSuffix;
1981 
1982   // FIXME(kadircet): Do not even fill insertText after making sure textEdit is
1983   // compatible with most of the editors.
1984   LSP.insertText = LSP.textEdit->newText;
1985   LSP.insertTextFormat = Opts.EnableSnippets ? InsertTextFormat::Snippet
1986                                              : InsertTextFormat::PlainText;
1987   if (InsertInclude && InsertInclude->Insertion)
1988     LSP.additionalTextEdits.push_back(*InsertInclude->Insertion);
1989 
1990   LSP.score = Score.ExcludingName;
1991 
1992   return LSP;
1993 }
1994 
1995 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const CodeCompletion &C) {
1996   // For now just lean on CompletionItem.
1997   return OS << C.render(CodeCompleteOptions());
1998 }
1999 
2000 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
2001                               const CodeCompleteResult &R) {
2002   OS << "CodeCompleteResult: " << R.Completions.size() << (R.HasMore ? "+" : "")
2003      << " (" << getCompletionKindString(R.Context) << ")"
2004      << " items:\n";
2005   for (const auto &C : R.Completions)
2006     OS << C << "\n";
2007   return OS;
2008 }
2009 
2010 // Heuristically detect whether the `Line` is an unterminated include filename.
2011 bool isIncludeFile(llvm::StringRef Line) {
2012   Line = Line.ltrim();
2013   if (!Line.consume_front("#"))
2014     return false;
2015   Line = Line.ltrim();
2016   if (!(Line.consume_front("include_next") || Line.consume_front("include") ||
2017         Line.consume_front("import")))
2018     return false;
2019   Line = Line.ltrim();
2020   if (Line.consume_front("<"))
2021     return Line.count('>') == 0;
2022   if (Line.consume_front("\""))
2023     return Line.count('"') == 0;
2024   return false;
2025 }
2026 
2027 bool allowImplicitCompletion(llvm::StringRef Content, unsigned Offset) {
2028   // Look at last line before completion point only.
2029   Content = Content.take_front(Offset);
2030   auto Pos = Content.rfind('\n');
2031   if (Pos != llvm::StringRef::npos)
2032     Content = Content.substr(Pos + 1);
2033 
2034   // Complete after scope operators.
2035   if (Content.endswith(".") || Content.endswith("->") || Content.endswith("::"))
2036     return true;
2037   // Complete after `#include <` and #include `<foo/`.
2038   if ((Content.endswith("<") || Content.endswith("\"") ||
2039        Content.endswith("/")) &&
2040       isIncludeFile(Content))
2041     return true;
2042 
2043   // Complete words. Give non-ascii characters the benefit of the doubt.
2044   return !Content.empty() &&
2045          (isIdentifierBody(Content.back()) || !llvm::isASCII(Content.back()));
2046 }
2047 
2048 } // namespace clangd
2049 } // namespace clang
2050