1 //===--- CodeComplete.cpp ----------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Code completion has several moving parts:
10 //  - AST-based completions are provided using the completion hooks in Sema.
11 //  - external completions are retrieved from the index (using hints from Sema)
12 //  - the two sources overlap, and must be merged and overloads bundled
13 //  - results must be scored and ranked (see Quality.h) before rendering
14 //
15 // Signature help works in a similar way as code completion, but it is simpler:
16 // it's purely AST-based, and there are few candidates.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "CodeComplete.h"
21 #include "AST.h"
22 #include "CodeCompletionStrings.h"
23 #include "Compiler.h"
24 #include "Diagnostics.h"
25 #include "ExpectedTypes.h"
26 #include "FileDistance.h"
27 #include "FuzzyMatch.h"
28 #include "Headers.h"
29 #include "Logger.h"
30 #include "Preamble.h"
31 #include "Protocol.h"
32 #include "Quality.h"
33 #include "SourceCode.h"
34 #include "TUScheduler.h"
35 #include "Threading.h"
36 #include "Trace.h"
37 #include "URI.h"
38 #include "index/Index.h"
39 #include "index/Symbol.h"
40 #include "index/SymbolOrigin.h"
41 #include "clang/AST/Decl.h"
42 #include "clang/AST/DeclBase.h"
43 #include "clang/Basic/CharInfo.h"
44 #include "clang/Basic/LangOptions.h"
45 #include "clang/Basic/SourceLocation.h"
46 #include "clang/Format/Format.h"
47 #include "clang/Frontend/CompilerInstance.h"
48 #include "clang/Frontend/FrontendActions.h"
49 #include "clang/Lex/ExternalPreprocessorSource.h"
50 #include "clang/Lex/Preprocessor.h"
51 #include "clang/Lex/PreprocessorOptions.h"
52 #include "clang/Sema/CodeCompleteConsumer.h"
53 #include "clang/Sema/DeclSpec.h"
54 #include "clang/Sema/Sema.h"
55 #include "llvm/ADT/ArrayRef.h"
56 #include "llvm/ADT/None.h"
57 #include "llvm/ADT/Optional.h"
58 #include "llvm/ADT/SmallVector.h"
59 #include "llvm/ADT/StringExtras.h"
60 #include "llvm/ADT/StringRef.h"
61 #include "llvm/Support/Compiler.h"
62 #include "llvm/Support/Debug.h"
63 #include "llvm/Support/Error.h"
64 #include "llvm/Support/Format.h"
65 #include "llvm/Support/FormatVariadic.h"
66 #include "llvm/Support/ScopedPrinter.h"
67 #include <algorithm>
68 #include <iterator>
69 
70 // We log detailed candidate here if you run with -debug-only=codecomplete.
71 #define DEBUG_TYPE "CodeComplete"
72 
73 namespace clang {
74 namespace clangd {
75 namespace {
76 
77 CompletionItemKind toCompletionItemKind(index::SymbolKind Kind) {
78   using SK = index::SymbolKind;
79   switch (Kind) {
80   case SK::Unknown:
81     return CompletionItemKind::Missing;
82   case SK::Module:
83   case SK::Namespace:
84   case SK::NamespaceAlias:
85     return CompletionItemKind::Module;
86   case SK::Macro:
87     return CompletionItemKind::Text;
88   case SK::Enum:
89     return CompletionItemKind::Enum;
90   case SK::Struct:
91     return CompletionItemKind::Struct;
92   case SK::Class:
93   case SK::Protocol:
94   case SK::Extension:
95   case SK::Union:
96     return CompletionItemKind::Class;
97   case SK::TypeAlias:
98     // We use the same kind as the VSCode C++ extension.
99     // FIXME: pick a better option when we have one.
100     return CompletionItemKind::Interface;
101   case SK::Using:
102     return CompletionItemKind::Reference;
103   case SK::Function:
104   case SK::ConversionFunction:
105     return CompletionItemKind::Function;
106   case SK::Variable:
107   case SK::Parameter:
108   case SK::NonTypeTemplateParm:
109     return CompletionItemKind::Variable;
110   case SK::Field:
111     return CompletionItemKind::Field;
112   case SK::EnumConstant:
113     return CompletionItemKind::EnumMember;
114   case SK::InstanceMethod:
115   case SK::ClassMethod:
116   case SK::StaticMethod:
117   case SK::Destructor:
118     return CompletionItemKind::Method;
119   case SK::InstanceProperty:
120   case SK::ClassProperty:
121   case SK::StaticProperty:
122     return CompletionItemKind::Property;
123   case SK::Constructor:
124     return CompletionItemKind::Constructor;
125   case SK::TemplateTypeParm:
126   case SK::TemplateTemplateParm:
127     return CompletionItemKind::TypeParameter;
128   }
129   llvm_unreachable("Unhandled clang::index::SymbolKind.");
130 }
131 
132 CompletionItemKind
133 toCompletionItemKind(CodeCompletionResult::ResultKind ResKind,
134                      const NamedDecl *Decl,
135                      CodeCompletionContext::Kind CtxKind) {
136   if (Decl)
137     return toCompletionItemKind(index::getSymbolInfo(Decl).Kind);
138   if (CtxKind == CodeCompletionContext::CCC_IncludedFile)
139     return CompletionItemKind::File;
140   switch (ResKind) {
141   case CodeCompletionResult::RK_Declaration:
142     llvm_unreachable("RK_Declaration without Decl");
143   case CodeCompletionResult::RK_Keyword:
144     return CompletionItemKind::Keyword;
145   case CodeCompletionResult::RK_Macro:
146     return CompletionItemKind::Text; // unfortunately, there's no 'Macro'
147                                      // completion items in LSP.
148   case CodeCompletionResult::RK_Pattern:
149     return CompletionItemKind::Snippet;
150   }
151   llvm_unreachable("Unhandled CodeCompletionResult::ResultKind.");
152 }
153 
154 // Identifier code completion result.
155 struct RawIdentifier {
156   llvm::StringRef Name;
157   unsigned References; // # of usages in file.
158 };
159 
160 /// A code completion result, in clang-native form.
161 /// It may be promoted to a CompletionItem if it's among the top-ranked results.
162 struct CompletionCandidate {
163   llvm::StringRef Name; // Used for filtering and sorting.
164   // We may have a result from Sema, from the index, or both.
165   const CodeCompletionResult *SemaResult = nullptr;
166   const Symbol *IndexResult = nullptr;
167   const RawIdentifier *IdentifierResult = nullptr;
168   llvm::SmallVector<llvm::StringRef, 1> RankedIncludeHeaders;
169 
170   // Returns a token identifying the overload set this is part of.
171   // 0 indicates it's not part of any overload set.
172   size_t overloadSet(const CodeCompleteOptions &Opts) const {
173     if (!Opts.BundleOverloads.getValueOr(false))
174       return 0;
175     llvm::SmallString<256> Scratch;
176     if (IndexResult) {
177       switch (IndexResult->SymInfo.Kind) {
178       case index::SymbolKind::ClassMethod:
179       case index::SymbolKind::InstanceMethod:
180       case index::SymbolKind::StaticMethod:
181 #ifndef NDEBUG
182         llvm_unreachable("Don't expect members from index in code completion");
183 #else
184         LLVM_FALLTHROUGH;
185 #endif
186       case index::SymbolKind::Function:
187         // We can't group overloads together that need different #includes.
188         // This could break #include insertion.
189         return llvm::hash_combine(
190             (IndexResult->Scope + IndexResult->Name).toStringRef(Scratch),
191             headerToInsertIfAllowed(Opts).getValueOr(""));
192       default:
193         return 0;
194       }
195     }
196     if (SemaResult) {
197       // We need to make sure we're consistent with the IndexResult case!
198       const NamedDecl *D = SemaResult->Declaration;
199       if (!D || !D->isFunctionOrFunctionTemplate())
200         return 0;
201       {
202         llvm::raw_svector_ostream OS(Scratch);
203         D->printQualifiedName(OS);
204       }
205       return llvm::hash_combine(Scratch,
206                                 headerToInsertIfAllowed(Opts).getValueOr(""));
207     }
208     assert(IdentifierResult);
209     return 0;
210   }
211 
212   // The best header to include if include insertion is allowed.
213   llvm::Optional<llvm::StringRef>
214   headerToInsertIfAllowed(const CodeCompleteOptions &Opts) const {
215     if (Opts.InsertIncludes == CodeCompleteOptions::NeverInsert ||
216         RankedIncludeHeaders.empty())
217       return None;
218     if (SemaResult && SemaResult->Declaration) {
219       // Avoid inserting new #include if the declaration is found in the current
220       // file e.g. the symbol is forward declared.
221       auto &SM = SemaResult->Declaration->getASTContext().getSourceManager();
222       for (const Decl *RD : SemaResult->Declaration->redecls())
223         if (SM.isInMainFile(SM.getExpansionLoc(RD->getBeginLoc())))
224           return None;
225     }
226     return RankedIncludeHeaders[0];
227   }
228 
229   using Bundle = llvm::SmallVector<CompletionCandidate, 4>;
230 };
231 using ScoredBundle =
232     std::pair<CompletionCandidate::Bundle, CodeCompletion::Scores>;
233 struct ScoredBundleGreater {
234   bool operator()(const ScoredBundle &L, const ScoredBundle &R) {
235     if (L.second.Total != R.second.Total)
236       return L.second.Total > R.second.Total;
237     return L.first.front().Name <
238            R.first.front().Name; // Earlier name is better.
239   }
240 };
241 
242 // Assembles a code completion out of a bundle of >=1 completion candidates.
243 // Many of the expensive strings are only computed at this point, once we know
244 // the candidate bundle is going to be returned.
245 //
246 // Many fields are the same for all candidates in a bundle (e.g. name), and are
247 // computed from the first candidate, in the constructor.
248 // Others vary per candidate, so add() must be called for remaining candidates.
249 struct CodeCompletionBuilder {
250   CodeCompletionBuilder(ASTContext *ASTCtx, const CompletionCandidate &C,
251                         CodeCompletionString *SemaCCS,
252                         llvm::ArrayRef<std::string> QueryScopes,
253                         const IncludeInserter &Includes,
254                         llvm::StringRef FileName,
255                         CodeCompletionContext::Kind ContextKind,
256                         const CodeCompleteOptions &Opts, bool GenerateSnippets)
257       : ASTCtx(ASTCtx), ExtractDocumentation(Opts.IncludeComments),
258         EnableFunctionArgSnippets(Opts.EnableFunctionArgSnippets),
259         GenerateSnippets(GenerateSnippets) {
260     add(C, SemaCCS);
261     if (C.SemaResult) {
262       assert(ASTCtx);
263       Completion.Origin |= SymbolOrigin::AST;
264       Completion.Name = std::string(llvm::StringRef(SemaCCS->getTypedText()));
265       if (Completion.Scope.empty()) {
266         if ((C.SemaResult->Kind == CodeCompletionResult::RK_Declaration) ||
267             (C.SemaResult->Kind == CodeCompletionResult::RK_Pattern))
268           if (const auto *D = C.SemaResult->getDeclaration())
269             if (const auto *ND = dyn_cast<NamedDecl>(D))
270               Completion.Scope = std::string(
271                   splitQualifiedName(printQualifiedName(*ND)).first);
272       }
273       Completion.Kind = toCompletionItemKind(
274           C.SemaResult->Kind, C.SemaResult->Declaration, ContextKind);
275       // Sema could provide more info on whether the completion was a file or
276       // folder.
277       if (Completion.Kind == CompletionItemKind::File &&
278           Completion.Name.back() == '/')
279         Completion.Kind = CompletionItemKind::Folder;
280       for (const auto &FixIt : C.SemaResult->FixIts) {
281         Completion.FixIts.push_back(toTextEdit(
282             FixIt, ASTCtx->getSourceManager(), ASTCtx->getLangOpts()));
283       }
284       llvm::sort(Completion.FixIts, [](const TextEdit &X, const TextEdit &Y) {
285         return std::tie(X.range.start.line, X.range.start.character) <
286                std::tie(Y.range.start.line, Y.range.start.character);
287       });
288       Completion.Deprecated |=
289           (C.SemaResult->Availability == CXAvailability_Deprecated);
290     }
291     if (C.IndexResult) {
292       Completion.Origin |= C.IndexResult->Origin;
293       if (Completion.Scope.empty())
294         Completion.Scope = std::string(C.IndexResult->Scope);
295       if (Completion.Kind == CompletionItemKind::Missing)
296         Completion.Kind = toCompletionItemKind(C.IndexResult->SymInfo.Kind);
297       if (Completion.Name.empty())
298         Completion.Name = std::string(C.IndexResult->Name);
299       // If the completion was visible to Sema, no qualifier is needed. This
300       // avoids unneeded qualifiers in cases like with `using ns::X`.
301       if (Completion.RequiredQualifier.empty() && !C.SemaResult) {
302         llvm::StringRef ShortestQualifier = C.IndexResult->Scope;
303         for (llvm::StringRef Scope : QueryScopes) {
304           llvm::StringRef Qualifier = C.IndexResult->Scope;
305           if (Qualifier.consume_front(Scope) &&
306               Qualifier.size() < ShortestQualifier.size())
307             ShortestQualifier = Qualifier;
308         }
309         Completion.RequiredQualifier = std::string(ShortestQualifier);
310       }
311       Completion.Deprecated |= (C.IndexResult->Flags & Symbol::Deprecated);
312     }
313     if (C.IdentifierResult) {
314       Completion.Origin |= SymbolOrigin::Identifier;
315       Completion.Kind = CompletionItemKind::Text;
316       Completion.Name = std::string(C.IdentifierResult->Name);
317     }
318 
319     // Turn absolute path into a literal string that can be #included.
320     auto Inserted = [&](llvm::StringRef Header)
321         -> llvm::Expected<std::pair<std::string, bool>> {
322       auto ResolvedDeclaring =
323           URI::resolve(C.IndexResult->CanonicalDeclaration.FileURI, FileName);
324       if (!ResolvedDeclaring)
325         return ResolvedDeclaring.takeError();
326       auto ResolvedInserted = toHeaderFile(Header, FileName);
327       if (!ResolvedInserted)
328         return ResolvedInserted.takeError();
329       auto Spelled = Includes.calculateIncludePath(*ResolvedInserted, FileName);
330       if (!Spelled)
331         return llvm::createStringError(llvm::inconvertibleErrorCode(),
332                                        "Header not on include path");
333       return std::make_pair(
334           std::move(*Spelled),
335           Includes.shouldInsertInclude(*ResolvedDeclaring, *ResolvedInserted));
336     };
337     bool ShouldInsert = C.headerToInsertIfAllowed(Opts).hasValue();
338     // Calculate include paths and edits for all possible headers.
339     for (const auto &Inc : C.RankedIncludeHeaders) {
340       if (auto ToInclude = Inserted(Inc)) {
341         CodeCompletion::IncludeCandidate Include;
342         Include.Header = ToInclude->first;
343         if (ToInclude->second && ShouldInsert)
344           Include.Insertion = Includes.insert(ToInclude->first);
345         Completion.Includes.push_back(std::move(Include));
346       } else
347         log("Failed to generate include insertion edits for adding header "
348             "(FileURI='{0}', IncludeHeader='{1}') into {2}: {3}",
349             C.IndexResult->CanonicalDeclaration.FileURI, Inc, FileName,
350             ToInclude.takeError());
351     }
352     // Prefer includes that do not need edits (i.e. already exist).
353     std::stable_partition(Completion.Includes.begin(),
354                           Completion.Includes.end(),
355                           [](const CodeCompletion::IncludeCandidate &I) {
356                             return !I.Insertion.hasValue();
357                           });
358   }
359 
360   void add(const CompletionCandidate &C, CodeCompletionString *SemaCCS) {
361     assert(bool(C.SemaResult) == bool(SemaCCS));
362     Bundled.emplace_back();
363     BundledEntry &S = Bundled.back();
364     if (C.SemaResult) {
365       bool IsPattern = C.SemaResult->Kind == CodeCompletionResult::RK_Pattern;
366       getSignature(*SemaCCS, &S.Signature, &S.SnippetSuffix,
367                    &Completion.RequiredQualifier, IsPattern);
368       S.ReturnType = getReturnType(*SemaCCS);
369     } else if (C.IndexResult) {
370       S.Signature = std::string(C.IndexResult->Signature);
371       S.SnippetSuffix = std::string(C.IndexResult->CompletionSnippetSuffix);
372       S.ReturnType = std::string(C.IndexResult->ReturnType);
373     }
374     if (ExtractDocumentation && Completion.Documentation.empty()) {
375       if (C.IndexResult)
376         Completion.Documentation = std::string(C.IndexResult->Documentation);
377       else if (C.SemaResult)
378         Completion.Documentation = getDocComment(*ASTCtx, *C.SemaResult,
379                                                  /*CommentsFromHeader=*/false);
380     }
381   }
382 
383   CodeCompletion build() {
384     Completion.ReturnType = summarizeReturnType();
385     Completion.Signature = summarizeSignature();
386     Completion.SnippetSuffix = summarizeSnippet();
387     Completion.BundleSize = Bundled.size();
388     return std::move(Completion);
389   }
390 
391 private:
392   struct BundledEntry {
393     std::string SnippetSuffix;
394     std::string Signature;
395     std::string ReturnType;
396   };
397 
398   // If all BundledEntries have the same value for a property, return it.
399   template <std::string BundledEntry::*Member>
400   const std::string *onlyValue() const {
401     auto B = Bundled.begin(), E = Bundled.end();
402     for (auto I = B + 1; I != E; ++I)
403       if (I->*Member != B->*Member)
404         return nullptr;
405     return &(B->*Member);
406   }
407 
408   template <bool BundledEntry::*Member> const bool *onlyValue() const {
409     auto B = Bundled.begin(), E = Bundled.end();
410     for (auto I = B + 1; I != E; ++I)
411       if (I->*Member != B->*Member)
412         return nullptr;
413     return &(B->*Member);
414   }
415 
416   std::string summarizeReturnType() const {
417     if (auto *RT = onlyValue<&BundledEntry::ReturnType>())
418       return *RT;
419     return "";
420   }
421 
422   std::string summarizeSnippet() const {
423     if (!GenerateSnippets)
424       return "";
425     auto *Snippet = onlyValue<&BundledEntry::SnippetSuffix>();
426     if (!Snippet)
427       // All bundles are function calls.
428       // FIXME(ibiryukov): sometimes add template arguments to a snippet, e.g.
429       // we need to complete 'forward<$1>($0)'.
430       return "($0)";
431     if (EnableFunctionArgSnippets)
432       return *Snippet;
433 
434     // Replace argument snippets with a simplified pattern.
435     if (Snippet->empty())
436       return "";
437     if (Completion.Kind == CompletionItemKind::Function ||
438         Completion.Kind == CompletionItemKind::Method) {
439       // Functions snippets can be of 2 types:
440       // - containing only function arguments, e.g.
441       //   foo(${1:int p1}, ${2:int p2});
442       //   We transform this pattern to '($0)' or '()'.
443       // - template arguments and function arguments, e.g.
444       //   foo<${1:class}>(${2:int p1}).
445       //   We transform this pattern to '<$1>()$0' or '<$0>()'.
446 
447       bool EmptyArgs = llvm::StringRef(*Snippet).endswith("()");
448       if (Snippet->front() == '<')
449         return EmptyArgs ? "<$1>()$0" : "<$1>($0)";
450       if (Snippet->front() == '(')
451         return EmptyArgs ? "()" : "($0)";
452       return *Snippet; // Not an arg snippet?
453     }
454     // 'CompletionItemKind::Interface' matches template type aliases.
455     if (Completion.Kind == CompletionItemKind::Interface ||
456         Completion.Kind == CompletionItemKind::Class) {
457       if (Snippet->front() != '<')
458         return *Snippet; // Not an arg snippet?
459 
460       // Classes and template using aliases can only have template arguments,
461       // e.g. Foo<${1:class}>.
462       if (llvm::StringRef(*Snippet).endswith("<>"))
463         return "<>"; // can happen with defaulted template arguments.
464       return "<$0>";
465     }
466     return *Snippet;
467   }
468 
469   std::string summarizeSignature() const {
470     if (auto *Signature = onlyValue<&BundledEntry::Signature>())
471       return *Signature;
472     // All bundles are function calls.
473     return "(…)";
474   }
475 
476   // ASTCtx can be nullptr if not run with sema.
477   ASTContext *ASTCtx;
478   CodeCompletion Completion;
479   llvm::SmallVector<BundledEntry, 1> Bundled;
480   bool ExtractDocumentation;
481   bool EnableFunctionArgSnippets;
482   /// When false, no snippets are generated argument lists.
483   bool GenerateSnippets;
484 };
485 
486 // Determine the symbol ID for a Sema code completion result, if possible.
487 llvm::Optional<SymbolID> getSymbolID(const CodeCompletionResult &R,
488                                      const SourceManager &SM) {
489   switch (R.Kind) {
490   case CodeCompletionResult::RK_Declaration:
491   case CodeCompletionResult::RK_Pattern: {
492     // Computing USR caches linkage, which may change after code completion.
493     if (hasUnstableLinkage(R.Declaration))
494       return llvm::None;
495     return clang::clangd::getSymbolID(R.Declaration);
496   }
497   case CodeCompletionResult::RK_Macro:
498     return clang::clangd::getSymbolID(R.Macro->getName(), R.MacroDefInfo, SM);
499   case CodeCompletionResult::RK_Keyword:
500     return None;
501   }
502   llvm_unreachable("unknown CodeCompletionResult kind");
503 }
504 
505 // Scopes of the partial identifier we're trying to complete.
506 // It is used when we query the index for more completion results.
507 struct SpecifiedScope {
508   // The scopes we should look in, determined by Sema.
509   //
510   // If the qualifier was fully resolved, we look for completions in these
511   // scopes; if there is an unresolved part of the qualifier, it should be
512   // resolved within these scopes.
513   //
514   // Examples of qualified completion:
515   //
516   //   "::vec"                                      => {""}
517   //   "using namespace std; ::vec^"                => {"", "std::"}
518   //   "namespace ns {using namespace std;} ns::^"  => {"ns::", "std::"}
519   //   "std::vec^"                                  => {""}  // "std" unresolved
520   //
521   // Examples of unqualified completion:
522   //
523   //   "vec^"                                       => {""}
524   //   "using namespace std; vec^"                  => {"", "std::"}
525   //   "using namespace std; namespace ns { vec^ }" => {"ns::", "std::", ""}
526   //
527   // "" for global namespace, "ns::" for normal namespace.
528   std::vector<std::string> AccessibleScopes;
529   // The full scope qualifier as typed by the user (without the leading "::").
530   // Set if the qualifier is not fully resolved by Sema.
531   llvm::Optional<std::string> UnresolvedQualifier;
532 
533   // Construct scopes being queried in indexes. The results are deduplicated.
534   // This method format the scopes to match the index request representation.
535   std::vector<std::string> scopesForIndexQuery() {
536     std::set<std::string> Results;
537     for (llvm::StringRef AS : AccessibleScopes)
538       Results.insert(
539           (AS + (UnresolvedQualifier ? *UnresolvedQualifier : "")).str());
540     return {Results.begin(), Results.end()};
541   }
542 };
543 
544 // Get all scopes that will be queried in indexes and whether symbols from
545 // any scope is allowed. The first scope in the list is the preferred scope
546 // (e.g. enclosing namespace).
547 std::pair<std::vector<std::string>, bool>
548 getQueryScopes(CodeCompletionContext &CCContext, const Sema &CCSema,
549                const CompletionPrefix &HeuristicPrefix,
550                const CodeCompleteOptions &Opts) {
551   SpecifiedScope Scopes;
552   for (auto *Context : CCContext.getVisitedContexts()) {
553     if (isa<TranslationUnitDecl>(Context))
554       Scopes.AccessibleScopes.push_back(""); // global namespace
555     else if (isa<NamespaceDecl>(Context))
556       Scopes.AccessibleScopes.push_back(printNamespaceScope(*Context));
557   }
558 
559   const CXXScopeSpec *SemaSpecifier =
560       CCContext.getCXXScopeSpecifier().getValueOr(nullptr);
561   // Case 1: unqualified completion.
562   if (!SemaSpecifier) {
563     // Case 2 (exception): sema saw no qualifier, but there appears to be one!
564     // This can happen e.g. in incomplete macro expansions. Use heuristics.
565     if (!HeuristicPrefix.Qualifier.empty()) {
566       vlog("Sema said no scope specifier, but we saw {0} in the source code",
567            HeuristicPrefix.Qualifier);
568       StringRef SpelledSpecifier = HeuristicPrefix.Qualifier;
569       if (SpelledSpecifier.consume_front("::"))
570         Scopes.AccessibleScopes = {""};
571       Scopes.UnresolvedQualifier = std::string(SpelledSpecifier);
572       return {Scopes.scopesForIndexQuery(), false};
573     }
574     // The enclosing namespace must be first, it gets a quality boost.
575     std::vector<std::string> EnclosingAtFront;
576     std::string EnclosingScope = printNamespaceScope(*CCSema.CurContext);
577     EnclosingAtFront.push_back(EnclosingScope);
578     for (auto &S : Scopes.scopesForIndexQuery()) {
579       if (EnclosingScope != S)
580         EnclosingAtFront.push_back(std::move(S));
581     }
582     // Allow AllScopes completion as there is no explicit scope qualifier.
583     return {EnclosingAtFront, Opts.AllScopes};
584   }
585   // Case 3: sema saw and resolved a scope qualifier.
586   if (SemaSpecifier && SemaSpecifier->isValid())
587     return {Scopes.scopesForIndexQuery(), false};
588 
589   // Case 4: There was a qualifier, and Sema didn't resolve it.
590   Scopes.AccessibleScopes.push_back(""); // Make sure global scope is included.
591   llvm::StringRef SpelledSpecifier = Lexer::getSourceText(
592       CharSourceRange::getCharRange(SemaSpecifier->getRange()),
593       CCSema.SourceMgr, clang::LangOptions());
594   if (SpelledSpecifier.consume_front("::"))
595     Scopes.AccessibleScopes = {""};
596   Scopes.UnresolvedQualifier = std::string(SpelledSpecifier);
597   // Sema excludes the trailing "::".
598   if (!Scopes.UnresolvedQualifier->empty())
599     *Scopes.UnresolvedQualifier += "::";
600 
601   return {Scopes.scopesForIndexQuery(), false};
602 }
603 
604 // Should we perform index-based completion in a context of the specified kind?
605 // FIXME: consider allowing completion, but restricting the result types.
606 bool contextAllowsIndex(enum CodeCompletionContext::Kind K) {
607   switch (K) {
608   case CodeCompletionContext::CCC_TopLevel:
609   case CodeCompletionContext::CCC_ObjCInterface:
610   case CodeCompletionContext::CCC_ObjCImplementation:
611   case CodeCompletionContext::CCC_ObjCIvarList:
612   case CodeCompletionContext::CCC_ClassStructUnion:
613   case CodeCompletionContext::CCC_Statement:
614   case CodeCompletionContext::CCC_Expression:
615   case CodeCompletionContext::CCC_ObjCMessageReceiver:
616   case CodeCompletionContext::CCC_EnumTag:
617   case CodeCompletionContext::CCC_UnionTag:
618   case CodeCompletionContext::CCC_ClassOrStructTag:
619   case CodeCompletionContext::CCC_ObjCProtocolName:
620   case CodeCompletionContext::CCC_Namespace:
621   case CodeCompletionContext::CCC_Type:
622   case CodeCompletionContext::CCC_ParenthesizedExpression:
623   case CodeCompletionContext::CCC_ObjCInterfaceName:
624   case CodeCompletionContext::CCC_ObjCCategoryName:
625   case CodeCompletionContext::CCC_Symbol:
626   case CodeCompletionContext::CCC_SymbolOrNewName:
627     return true;
628   case CodeCompletionContext::CCC_OtherWithMacros:
629   case CodeCompletionContext::CCC_DotMemberAccess:
630   case CodeCompletionContext::CCC_ArrowMemberAccess:
631   case CodeCompletionContext::CCC_ObjCPropertyAccess:
632   case CodeCompletionContext::CCC_MacroName:
633   case CodeCompletionContext::CCC_MacroNameUse:
634   case CodeCompletionContext::CCC_PreprocessorExpression:
635   case CodeCompletionContext::CCC_PreprocessorDirective:
636   case CodeCompletionContext::CCC_SelectorName:
637   case CodeCompletionContext::CCC_TypeQualifiers:
638   case CodeCompletionContext::CCC_ObjCInstanceMessage:
639   case CodeCompletionContext::CCC_ObjCClassMessage:
640   case CodeCompletionContext::CCC_IncludedFile:
641   // FIXME: Provide identifier based completions for the following contexts:
642   case CodeCompletionContext::CCC_Other: // Be conservative.
643   case CodeCompletionContext::CCC_NaturalLanguage:
644   case CodeCompletionContext::CCC_Recovery:
645   case CodeCompletionContext::CCC_NewName:
646     return false;
647   }
648   llvm_unreachable("unknown code completion context");
649 }
650 
651 static bool isInjectedClass(const NamedDecl &D) {
652   if (auto *R = dyn_cast_or_null<RecordDecl>(&D))
653     if (R->isInjectedClassName())
654       return true;
655   return false;
656 }
657 
658 // Some member calls are blacklisted because they're so rarely useful.
659 static bool isBlacklistedMember(const NamedDecl &D) {
660   // Destructor completion is rarely useful, and works inconsistently.
661   // (s.^ completes ~string, but s.~st^ is an error).
662   if (D.getKind() == Decl::CXXDestructor)
663     return true;
664   // Injected name may be useful for A::foo(), but who writes A::A::foo()?
665   if (isInjectedClass(D))
666     return true;
667   // Explicit calls to operators are also rare.
668   auto NameKind = D.getDeclName().getNameKind();
669   if (NameKind == DeclarationName::CXXOperatorName ||
670       NameKind == DeclarationName::CXXLiteralOperatorName ||
671       NameKind == DeclarationName::CXXConversionFunctionName)
672     return true;
673   return false;
674 }
675 
676 // The CompletionRecorder captures Sema code-complete output, including context.
677 // It filters out ignored results (but doesn't apply fuzzy-filtering yet).
678 // It doesn't do scoring or conversion to CompletionItem yet, as we want to
679 // merge with index results first.
680 // Generally the fields and methods of this object should only be used from
681 // within the callback.
682 struct CompletionRecorder : public CodeCompleteConsumer {
683   CompletionRecorder(const CodeCompleteOptions &Opts,
684                      llvm::unique_function<void()> ResultsCallback)
685       : CodeCompleteConsumer(Opts.getClangCompleteOpts()),
686         CCContext(CodeCompletionContext::CCC_Other), Opts(Opts),
687         CCAllocator(std::make_shared<GlobalCodeCompletionAllocator>()),
688         CCTUInfo(CCAllocator), ResultsCallback(std::move(ResultsCallback)) {
689     assert(this->ResultsCallback);
690   }
691 
692   std::vector<CodeCompletionResult> Results;
693   CodeCompletionContext CCContext;
694   Sema *CCSema = nullptr; // Sema that created the results.
695   // FIXME: Sema is scary. Can we store ASTContext and Preprocessor, instead?
696 
697   void ProcessCodeCompleteResults(class Sema &S, CodeCompletionContext Context,
698                                   CodeCompletionResult *InResults,
699                                   unsigned NumResults) override final {
700     // Results from recovery mode are generally useless, and the callback after
701     // recovery (if any) is usually more interesting. To make sure we handle the
702     // future callback from sema, we just ignore all callbacks in recovery mode,
703     // as taking only results from recovery mode results in poor completion
704     // results.
705     // FIXME: in case there is no future sema completion callback after the
706     // recovery mode, we might still want to provide some results (e.g. trivial
707     // identifier-based completion).
708     if (Context.getKind() == CodeCompletionContext::CCC_Recovery) {
709       log("Code complete: Ignoring sema code complete callback with Recovery "
710           "context.");
711       return;
712     }
713     // If a callback is called without any sema result and the context does not
714     // support index-based completion, we simply skip it to give way to
715     // potential future callbacks with results.
716     if (NumResults == 0 && !contextAllowsIndex(Context.getKind()))
717       return;
718     if (CCSema) {
719       log("Multiple code complete callbacks (parser backtracked?). "
720           "Dropping results from context {0}, keeping results from {1}.",
721           getCompletionKindString(Context.getKind()),
722           getCompletionKindString(this->CCContext.getKind()));
723       return;
724     }
725     // Record the completion context.
726     CCSema = &S;
727     CCContext = Context;
728 
729     // Retain the results we might want.
730     for (unsigned I = 0; I < NumResults; ++I) {
731       auto &Result = InResults[I];
732       // Class members that are shadowed by subclasses are usually noise.
733       if (Result.Hidden && Result.Declaration &&
734           Result.Declaration->isCXXClassMember())
735         continue;
736       if (!Opts.IncludeIneligibleResults &&
737           (Result.Availability == CXAvailability_NotAvailable ||
738            Result.Availability == CXAvailability_NotAccessible))
739         continue;
740       if (Result.Declaration &&
741           !Context.getBaseType().isNull() // is this a member-access context?
742           && isBlacklistedMember(*Result.Declaration))
743         continue;
744       // Skip injected class name when no class scope is not explicitly set.
745       // E.g. show injected A::A in `using A::A^` but not in "A^".
746       if (Result.Declaration && !Context.getCXXScopeSpecifier().hasValue() &&
747           isInjectedClass(*Result.Declaration))
748         continue;
749       // We choose to never append '::' to completion results in clangd.
750       Result.StartsNestedNameSpecifier = false;
751       Results.push_back(Result);
752     }
753     ResultsCallback();
754   }
755 
756   CodeCompletionAllocator &getAllocator() override { return *CCAllocator; }
757   CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
758 
759   // Returns the filtering/sorting name for Result, which must be from Results.
760   // Returned string is owned by this recorder (or the AST).
761   llvm::StringRef getName(const CodeCompletionResult &Result) {
762     switch (Result.Kind) {
763     case CodeCompletionResult::RK_Declaration:
764       if (auto *ID = Result.Declaration->getIdentifier())
765         return ID->getName();
766       break;
767     case CodeCompletionResult::RK_Keyword:
768       return Result.Keyword;
769     case CodeCompletionResult::RK_Macro:
770       return Result.Macro->getName();
771     case CodeCompletionResult::RK_Pattern:
772       return Result.Pattern->getTypedText();
773     }
774     auto *CCS = codeCompletionString(Result);
775     return CCS->getTypedText();
776   }
777 
778   // Build a CodeCompletion string for R, which must be from Results.
779   // The CCS will be owned by this recorder.
780   CodeCompletionString *codeCompletionString(const CodeCompletionResult &R) {
781     // CodeCompletionResult doesn't seem to be const-correct. We own it, anyway.
782     return const_cast<CodeCompletionResult &>(R).CreateCodeCompletionString(
783         *CCSema, CCContext, *CCAllocator, CCTUInfo,
784         /*IncludeBriefComments=*/false);
785   }
786 
787 private:
788   CodeCompleteOptions Opts;
789   std::shared_ptr<GlobalCodeCompletionAllocator> CCAllocator;
790   CodeCompletionTUInfo CCTUInfo;
791   llvm::unique_function<void()> ResultsCallback;
792 };
793 
794 struct ScoredSignature {
795   // When set, requires documentation to be requested from the index with this
796   // ID.
797   llvm::Optional<SymbolID> IDForDoc;
798   SignatureInformation Signature;
799   SignatureQualitySignals Quality;
800 };
801 
802 class SignatureHelpCollector final : public CodeCompleteConsumer {
803 public:
804   SignatureHelpCollector(const clang::CodeCompleteOptions &CodeCompleteOpts,
805                          const SymbolIndex *Index, SignatureHelp &SigHelp)
806       : CodeCompleteConsumer(CodeCompleteOpts), SigHelp(SigHelp),
807         Allocator(std::make_shared<clang::GlobalCodeCompletionAllocator>()),
808         CCTUInfo(Allocator), Index(Index) {}
809 
810   void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg,
811                                  OverloadCandidate *Candidates,
812                                  unsigned NumCandidates,
813                                  SourceLocation OpenParLoc) override {
814     assert(!OpenParLoc.isInvalid());
815     SourceManager &SrcMgr = S.getSourceManager();
816     OpenParLoc = SrcMgr.getFileLoc(OpenParLoc);
817     if (SrcMgr.isInMainFile(OpenParLoc))
818       SigHelp.argListStart = sourceLocToPosition(SrcMgr, OpenParLoc);
819     else
820       elog("Location oustide main file in signature help: {0}",
821            OpenParLoc.printToString(SrcMgr));
822 
823     std::vector<ScoredSignature> ScoredSignatures;
824     SigHelp.signatures.reserve(NumCandidates);
825     ScoredSignatures.reserve(NumCandidates);
826     // FIXME(rwols): How can we determine the "active overload candidate"?
827     // Right now the overloaded candidates seem to be provided in a "best fit"
828     // order, so I'm not too worried about this.
829     SigHelp.activeSignature = 0;
830     assert(CurrentArg <= (unsigned)std::numeric_limits<int>::max() &&
831            "too many arguments");
832     SigHelp.activeParameter = static_cast<int>(CurrentArg);
833     for (unsigned I = 0; I < NumCandidates; ++I) {
834       OverloadCandidate Candidate = Candidates[I];
835       // We want to avoid showing instantiated signatures, because they may be
836       // long in some cases (e.g. when 'T' is substituted with 'std::string', we
837       // would get 'std::basic_string<char>').
838       if (auto *Func = Candidate.getFunction()) {
839         if (auto *Pattern = Func->getTemplateInstantiationPattern())
840           Candidate = OverloadCandidate(Pattern);
841       }
842 
843       const auto *CCS = Candidate.CreateSignatureString(
844           CurrentArg, S, *Allocator, CCTUInfo, true);
845       assert(CCS && "Expected the CodeCompletionString to be non-null");
846       ScoredSignatures.push_back(processOverloadCandidate(
847           Candidate, *CCS,
848           Candidate.getFunction()
849               ? getDeclComment(S.getASTContext(), *Candidate.getFunction())
850               : ""));
851     }
852 
853     // Sema does not load the docs from the preamble, so we need to fetch extra
854     // docs from the index instead.
855     llvm::DenseMap<SymbolID, std::string> FetchedDocs;
856     if (Index) {
857       LookupRequest IndexRequest;
858       for (const auto &S : ScoredSignatures) {
859         if (!S.IDForDoc)
860           continue;
861         IndexRequest.IDs.insert(*S.IDForDoc);
862       }
863       Index->lookup(IndexRequest, [&](const Symbol &S) {
864         if (!S.Documentation.empty())
865           FetchedDocs[S.ID] = std::string(S.Documentation);
866       });
867       log("SigHelp: requested docs for {0} symbols from the index, got {1} "
868           "symbols with non-empty docs in the response",
869           IndexRequest.IDs.size(), FetchedDocs.size());
870     }
871 
872     llvm::sort(ScoredSignatures, [](const ScoredSignature &L,
873                                     const ScoredSignature &R) {
874       // Ordering follows:
875       // - Less number of parameters is better.
876       // - Function is better than FunctionType which is better than
877       // Function Template.
878       // - High score is better.
879       // - Shorter signature is better.
880       // - Alphabetically smaller is better.
881       if (L.Quality.NumberOfParameters != R.Quality.NumberOfParameters)
882         return L.Quality.NumberOfParameters < R.Quality.NumberOfParameters;
883       if (L.Quality.NumberOfOptionalParameters !=
884           R.Quality.NumberOfOptionalParameters)
885         return L.Quality.NumberOfOptionalParameters <
886                R.Quality.NumberOfOptionalParameters;
887       if (L.Quality.Kind != R.Quality.Kind) {
888         using OC = CodeCompleteConsumer::OverloadCandidate;
889         switch (L.Quality.Kind) {
890         case OC::CK_Function:
891           return true;
892         case OC::CK_FunctionType:
893           return R.Quality.Kind != OC::CK_Function;
894         case OC::CK_FunctionTemplate:
895           return false;
896         }
897         llvm_unreachable("Unknown overload candidate type.");
898       }
899       if (L.Signature.label.size() != R.Signature.label.size())
900         return L.Signature.label.size() < R.Signature.label.size();
901       return L.Signature.label < R.Signature.label;
902     });
903 
904     for (auto &SS : ScoredSignatures) {
905       auto IndexDocIt =
906           SS.IDForDoc ? FetchedDocs.find(*SS.IDForDoc) : FetchedDocs.end();
907       if (IndexDocIt != FetchedDocs.end())
908         SS.Signature.documentation = IndexDocIt->second;
909 
910       SigHelp.signatures.push_back(std::move(SS.Signature));
911     }
912   }
913 
914   GlobalCodeCompletionAllocator &getAllocator() override { return *Allocator; }
915 
916   CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
917 
918 private:
919   void processParameterChunk(llvm::StringRef ChunkText,
920                              SignatureInformation &Signature) const {
921     // (!) this is O(n), should still be fast compared to building ASTs.
922     unsigned ParamStartOffset = lspLength(Signature.label);
923     unsigned ParamEndOffset = ParamStartOffset + lspLength(ChunkText);
924     // A piece of text that describes the parameter that corresponds to
925     // the code-completion location within a function call, message send,
926     // macro invocation, etc.
927     Signature.label += ChunkText;
928     ParameterInformation Info;
929     Info.labelOffsets.emplace(ParamStartOffset, ParamEndOffset);
930     // FIXME: only set 'labelOffsets' when all clients migrate out of it.
931     Info.labelString = std::string(ChunkText);
932 
933     Signature.parameters.push_back(std::move(Info));
934   }
935 
936   void processOptionalChunk(const CodeCompletionString &CCS,
937                             SignatureInformation &Signature,
938                             SignatureQualitySignals &Signal) const {
939     for (const auto &Chunk : CCS) {
940       switch (Chunk.Kind) {
941       case CodeCompletionString::CK_Optional:
942         assert(Chunk.Optional &&
943                "Expected the optional code completion string to be non-null.");
944         processOptionalChunk(*Chunk.Optional, Signature, Signal);
945         break;
946       case CodeCompletionString::CK_VerticalSpace:
947         break;
948       case CodeCompletionString::CK_CurrentParameter:
949       case CodeCompletionString::CK_Placeholder:
950         processParameterChunk(Chunk.Text, Signature);
951         Signal.NumberOfOptionalParameters++;
952         break;
953       default:
954         Signature.label += Chunk.Text;
955         break;
956       }
957     }
958   }
959 
960   // FIXME(ioeric): consider moving CodeCompletionString logic here to
961   // CompletionString.h.
962   ScoredSignature processOverloadCandidate(const OverloadCandidate &Candidate,
963                                            const CodeCompletionString &CCS,
964                                            llvm::StringRef DocComment) const {
965     SignatureInformation Signature;
966     SignatureQualitySignals Signal;
967     const char *ReturnType = nullptr;
968 
969     Signature.documentation = formatDocumentation(CCS, DocComment);
970     Signal.Kind = Candidate.getKind();
971 
972     for (const auto &Chunk : CCS) {
973       switch (Chunk.Kind) {
974       case CodeCompletionString::CK_ResultType:
975         // A piece of text that describes the type of an entity or,
976         // for functions and methods, the return type.
977         assert(!ReturnType && "Unexpected CK_ResultType");
978         ReturnType = Chunk.Text;
979         break;
980       case CodeCompletionString::CK_CurrentParameter:
981       case CodeCompletionString::CK_Placeholder:
982         processParameterChunk(Chunk.Text, Signature);
983         Signal.NumberOfParameters++;
984         break;
985       case CodeCompletionString::CK_Optional: {
986         // The rest of the parameters are defaulted/optional.
987         assert(Chunk.Optional &&
988                "Expected the optional code completion string to be non-null.");
989         processOptionalChunk(*Chunk.Optional, Signature, Signal);
990         break;
991       }
992       case CodeCompletionString::CK_VerticalSpace:
993         break;
994       default:
995         Signature.label += Chunk.Text;
996         break;
997       }
998     }
999     if (ReturnType) {
1000       Signature.label += " -> ";
1001       Signature.label += ReturnType;
1002     }
1003     dlog("Signal for {0}: {1}", Signature, Signal);
1004     ScoredSignature Result;
1005     Result.Signature = std::move(Signature);
1006     Result.Quality = Signal;
1007     const FunctionDecl *Func = Candidate.getFunction();
1008     if (Func && Result.Signature.documentation.empty()) {
1009       // Computing USR caches linkage, which may change after code completion.
1010       if (!hasUnstableLinkage(Func))
1011         Result.IDForDoc = clangd::getSymbolID(Func);
1012     }
1013     return Result;
1014   }
1015 
1016   SignatureHelp &SigHelp;
1017   std::shared_ptr<clang::GlobalCodeCompletionAllocator> Allocator;
1018   CodeCompletionTUInfo CCTUInfo;
1019   const SymbolIndex *Index;
1020 }; // SignatureHelpCollector
1021 
1022 struct SemaCompleteInput {
1023   PathRef FileName;
1024   const tooling::CompileCommand &Command;
1025   const PreambleData *Preamble;
1026   llvm::StringRef Contents;
1027   size_t Offset;
1028   llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS;
1029 };
1030 
1031 void loadMainFilePreambleMacros(const Preprocessor &PP,
1032                                 const PreambleData &Preamble) {
1033   // The ExternalPreprocessorSource has our macros, if we know where to look.
1034   // We can read all the macros using PreambleMacros->ReadDefinedMacros(),
1035   // but this includes transitively included files, so may deserialize a lot.
1036   ExternalPreprocessorSource *PreambleMacros = PP.getExternalSource();
1037   // As we have the names of the macros, we can look up their IdentifierInfo
1038   // and then use this to load just the macros we want.
1039   IdentifierInfoLookup *PreambleIdentifiers =
1040       PP.getIdentifierTable().getExternalIdentifierLookup();
1041   if (!PreambleIdentifiers || !PreambleMacros)
1042     return;
1043   for (const auto &MacroName : Preamble.Macros.Names)
1044     if (auto *II = PreambleIdentifiers->get(MacroName.getKey()))
1045       if (II->isOutOfDate())
1046         PreambleMacros->updateOutOfDateIdentifier(*II);
1047 }
1048 
1049 // Invokes Sema code completion on a file.
1050 // If \p Includes is set, it will be updated based on the compiler invocation.
1051 bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,
1052                       const clang::CodeCompleteOptions &Options,
1053                       const SemaCompleteInput &Input,
1054                       IncludeStructure *Includes = nullptr) {
1055   trace::Span Tracer("Sema completion");
1056   llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS = Input.VFS;
1057   if (Input.Preamble && Input.Preamble->StatCache)
1058     VFS = Input.Preamble->StatCache->getConsumingFS(std::move(VFS));
1059   ParseInputs ParseInput;
1060   ParseInput.CompileCommand = Input.Command;
1061   ParseInput.FS = VFS;
1062   ParseInput.Contents = std::string(Input.Contents);
1063   ParseInput.Opts = ParseOptions();
1064 
1065   IgnoreDiagnostics IgnoreDiags;
1066   auto CI = buildCompilerInvocation(ParseInput, IgnoreDiags);
1067   if (!CI) {
1068     elog("Couldn't create CompilerInvocation");
1069     return false;
1070   }
1071   auto &FrontendOpts = CI->getFrontendOpts();
1072   FrontendOpts.SkipFunctionBodies = true;
1073   // Disable typo correction in Sema.
1074   CI->getLangOpts()->SpellChecking = false;
1075   // Setup code completion.
1076   FrontendOpts.CodeCompleteOpts = Options;
1077   FrontendOpts.CodeCompletionAt.FileName = std::string(Input.FileName);
1078   std::tie(FrontendOpts.CodeCompletionAt.Line,
1079            FrontendOpts.CodeCompletionAt.Column) =
1080       offsetToClangLineColumn(Input.Contents, Input.Offset);
1081 
1082   std::unique_ptr<llvm::MemoryBuffer> ContentsBuffer =
1083       llvm::MemoryBuffer::getMemBufferCopy(Input.Contents, Input.FileName);
1084   // The diagnostic options must be set before creating a CompilerInstance.
1085   CI->getDiagnosticOpts().IgnoreWarnings = true;
1086   // We reuse the preamble whether it's valid or not. This is a
1087   // correctness/performance tradeoff: building without a preamble is slow, and
1088   // completion is latency-sensitive.
1089   // However, if we're completing *inside* the preamble section of the draft,
1090   // overriding the preamble will break sema completion. Fortunately we can just
1091   // skip all includes in this case; these completions are really simple.
1092   PreambleBounds PreambleRegion =
1093       ComputePreambleBounds(*CI->getLangOpts(), ContentsBuffer.get(), 0);
1094   bool CompletingInPreamble = PreambleRegion.Size > Input.Offset;
1095   // NOTE: we must call BeginSourceFile after prepareCompilerInstance. Otherwise
1096   // the remapped buffers do not get freed.
1097   auto Clang = prepareCompilerInstance(
1098       std::move(CI),
1099       (Input.Preamble && !CompletingInPreamble) ? &Input.Preamble->Preamble
1100                                                 : nullptr,
1101       std::move(ContentsBuffer), std::move(VFS), IgnoreDiags);
1102   Clang->getPreprocessorOpts().SingleFileParseMode = CompletingInPreamble;
1103   Clang->setCodeCompletionConsumer(Consumer.release());
1104 
1105   SyntaxOnlyAction Action;
1106   if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])) {
1107     log("BeginSourceFile() failed when running codeComplete for {0}",
1108         Input.FileName);
1109     return false;
1110   }
1111   // Macros can be defined within the preamble region of the main file.
1112   // They don't fall nicely into our index/Sema dichotomy:
1113   //  - they're not indexed for completion (they're not available across files)
1114   //  - but Sema code complete won't see them: as part of the preamble, they're
1115   //    deserialized only when mentioned.
1116   // Force them to be deserialized so SemaCodeComplete sees them.
1117   if (Input.Preamble)
1118     loadMainFilePreambleMacros(Clang->getPreprocessor(), *Input.Preamble);
1119   if (Includes)
1120     Clang->getPreprocessor().addPPCallbacks(
1121         collectIncludeStructureCallback(Clang->getSourceManager(), Includes));
1122   if (llvm::Error Err = Action.Execute()) {
1123     log("Execute() failed when running codeComplete for {0}: {1}",
1124         Input.FileName, toString(std::move(Err)));
1125     return false;
1126   }
1127   Action.EndSourceFile();
1128 
1129   return true;
1130 }
1131 
1132 // Should we allow index completions in the specified context?
1133 bool allowIndex(CodeCompletionContext &CC) {
1134   if (!contextAllowsIndex(CC.getKind()))
1135     return false;
1136   // We also avoid ClassName::bar (but allow namespace::bar).
1137   auto Scope = CC.getCXXScopeSpecifier();
1138   if (!Scope)
1139     return true;
1140   NestedNameSpecifier *NameSpec = (*Scope)->getScopeRep();
1141   if (!NameSpec)
1142     return true;
1143   // We only query the index when qualifier is a namespace.
1144   // If it's a class, we rely solely on sema completions.
1145   switch (NameSpec->getKind()) {
1146   case NestedNameSpecifier::Global:
1147   case NestedNameSpecifier::Namespace:
1148   case NestedNameSpecifier::NamespaceAlias:
1149     return true;
1150   case NestedNameSpecifier::Super:
1151   case NestedNameSpecifier::TypeSpec:
1152   case NestedNameSpecifier::TypeSpecWithTemplate:
1153   // Unresolved inside a template.
1154   case NestedNameSpecifier::Identifier:
1155     return false;
1156   }
1157   llvm_unreachable("invalid NestedNameSpecifier kind");
1158 }
1159 
1160 std::future<SymbolSlab> startAsyncFuzzyFind(const SymbolIndex &Index,
1161                                             const FuzzyFindRequest &Req) {
1162   return runAsync<SymbolSlab>([&Index, Req]() {
1163     trace::Span Tracer("Async fuzzyFind");
1164     SymbolSlab::Builder Syms;
1165     Index.fuzzyFind(Req, [&Syms](const Symbol &Sym) { Syms.insert(Sym); });
1166     return std::move(Syms).build();
1167   });
1168 }
1169 
1170 // Creates a `FuzzyFindRequest` based on the cached index request from the
1171 // last completion, if any, and the speculated completion filter text in the
1172 // source code.
1173 FuzzyFindRequest speculativeFuzzyFindRequestForCompletion(
1174     FuzzyFindRequest CachedReq, const CompletionPrefix &HeuristicPrefix) {
1175   CachedReq.Query = std::string(HeuristicPrefix.Name);
1176   return CachedReq;
1177 }
1178 
1179 // Runs Sema-based (AST) and Index-based completion, returns merged results.
1180 //
1181 // There are a few tricky considerations:
1182 //   - the AST provides information needed for the index query (e.g. which
1183 //     namespaces to search in). So Sema must start first.
1184 //   - we only want to return the top results (Opts.Limit).
1185 //     Building CompletionItems for everything else is wasteful, so we want to
1186 //     preserve the "native" format until we're done with scoring.
1187 //   - the data underlying Sema completion items is owned by the AST and various
1188 //     other arenas, which must stay alive for us to build CompletionItems.
1189 //   - we may get duplicate results from Sema and the Index, we need to merge.
1190 //
1191 // So we start Sema completion first, and do all our work in its callback.
1192 // We use the Sema context information to query the index.
1193 // Then we merge the two result sets, producing items that are Sema/Index/Both.
1194 // These items are scored, and the top N are synthesized into the LSP response.
1195 // Finally, we can clean up the data structures created by Sema completion.
1196 //
1197 // Main collaborators are:
1198 //   - semaCodeComplete sets up the compiler machinery to run code completion.
1199 //   - CompletionRecorder captures Sema completion results, including context.
1200 //   - SymbolIndex (Opts.Index) provides index completion results as Symbols
1201 //   - CompletionCandidates are the result of merging Sema and Index results.
1202 //     Each candidate points to an underlying CodeCompletionResult (Sema), a
1203 //     Symbol (Index), or both. It computes the result quality score.
1204 //     CompletionCandidate also does conversion to CompletionItem (at the end).
1205 //   - FuzzyMatcher scores how the candidate matches the partial identifier.
1206 //     This score is combined with the result quality score for the final score.
1207 //   - TopN determines the results with the best score.
1208 class CodeCompleteFlow {
1209   PathRef FileName;
1210   IncludeStructure Includes;           // Complete once the compiler runs.
1211   SpeculativeFuzzyFind *SpecFuzzyFind; // Can be nullptr.
1212   const CodeCompleteOptions &Opts;
1213 
1214   // Sema takes ownership of Recorder. Recorder is valid until Sema cleanup.
1215   CompletionRecorder *Recorder = nullptr;
1216   CodeCompletionContext::Kind CCContextKind = CodeCompletionContext::CCC_Other;
1217   bool IsUsingDeclaration = false;
1218   // Counters for logging.
1219   int NSema = 0, NIndex = 0, NSemaAndIndex = 0, NIdent = 0;
1220   bool Incomplete = false; // Would more be available with a higher limit?
1221   CompletionPrefix HeuristicPrefix;
1222   llvm::Optional<FuzzyMatcher> Filter; // Initialized once Sema runs.
1223   Range ReplacedRange;
1224   std::vector<std::string> QueryScopes; // Initialized once Sema runs.
1225   // Initialized once QueryScopes is initialized, if there are scopes.
1226   llvm::Optional<ScopeDistance> ScopeProximity;
1227   llvm::Optional<OpaqueType> PreferredType; // Initialized once Sema runs.
1228   // Whether to query symbols from any scope. Initialized once Sema runs.
1229   bool AllScopes = false;
1230   llvm::StringSet<> ContextWords;
1231   // Include-insertion and proximity scoring rely on the include structure.
1232   // This is available after Sema has run.
1233   llvm::Optional<IncludeInserter> Inserter;  // Available during runWithSema.
1234   llvm::Optional<URIDistance> FileProximity; // Initialized once Sema runs.
1235   /// Speculative request based on the cached request and the filter text before
1236   /// the cursor.
1237   /// Initialized right before sema run. This is only set if `SpecFuzzyFind` is
1238   /// set and contains a cached request.
1239   llvm::Optional<FuzzyFindRequest> SpecReq;
1240 
1241 public:
1242   // A CodeCompleteFlow object is only useful for calling run() exactly once.
1243   CodeCompleteFlow(PathRef FileName, const IncludeStructure &Includes,
1244                    SpeculativeFuzzyFind *SpecFuzzyFind,
1245                    const CodeCompleteOptions &Opts)
1246       : FileName(FileName), Includes(Includes), SpecFuzzyFind(SpecFuzzyFind),
1247         Opts(Opts) {}
1248 
1249   CodeCompleteResult run(const SemaCompleteInput &SemaCCInput) && {
1250     trace::Span Tracer("CodeCompleteFlow");
1251     HeuristicPrefix =
1252         guessCompletionPrefix(SemaCCInput.Contents, SemaCCInput.Offset);
1253     populateContextWords(SemaCCInput.Contents);
1254     if (Opts.Index && SpecFuzzyFind && SpecFuzzyFind->CachedReq.hasValue()) {
1255       assert(!SpecFuzzyFind->Result.valid());
1256       SpecReq = speculativeFuzzyFindRequestForCompletion(
1257           *SpecFuzzyFind->CachedReq, HeuristicPrefix);
1258       SpecFuzzyFind->Result = startAsyncFuzzyFind(*Opts.Index, *SpecReq);
1259     }
1260 
1261     // We run Sema code completion first. It builds an AST and calculates:
1262     //   - completion results based on the AST.
1263     //   - partial identifier and context. We need these for the index query.
1264     CodeCompleteResult Output;
1265     auto RecorderOwner = std::make_unique<CompletionRecorder>(Opts, [&]() {
1266       assert(Recorder && "Recorder is not set");
1267       CCContextKind = Recorder->CCContext.getKind();
1268       IsUsingDeclaration = Recorder->CCContext.isUsingDeclaration();
1269       auto Style = getFormatStyleForFile(
1270           SemaCCInput.FileName, SemaCCInput.Contents, SemaCCInput.VFS.get());
1271       // If preprocessor was run, inclusions from preprocessor callback should
1272       // already be added to Includes.
1273       Inserter.emplace(
1274           SemaCCInput.FileName, SemaCCInput.Contents, Style,
1275           SemaCCInput.Command.Directory,
1276           &Recorder->CCSema->getPreprocessor().getHeaderSearchInfo());
1277       for (const auto &Inc : Includes.MainFileIncludes)
1278         Inserter->addExisting(Inc);
1279 
1280       // Most of the cost of file proximity is in initializing the FileDistance
1281       // structures based on the observed includes, once per query. Conceptually
1282       // that happens here (though the per-URI-scheme initialization is lazy).
1283       // The per-result proximity scoring is (amortized) very cheap.
1284       FileDistanceOptions ProxOpts{}; // Use defaults.
1285       const auto &SM = Recorder->CCSema->getSourceManager();
1286       llvm::StringMap<SourceParams> ProxSources;
1287       for (auto &Entry : Includes.includeDepth(
1288                SM.getFileEntryForID(SM.getMainFileID())->getName())) {
1289         auto &Source = ProxSources[Entry.getKey()];
1290         Source.Cost = Entry.getValue() * ProxOpts.IncludeCost;
1291         // Symbols near our transitive includes are good, but only consider
1292         // things in the same directory or below it. Otherwise there can be
1293         // many false positives.
1294         if (Entry.getValue() > 0)
1295           Source.MaxUpTraversals = 1;
1296       }
1297       FileProximity.emplace(ProxSources, ProxOpts);
1298 
1299       Output = runWithSema();
1300       Inserter.reset(); // Make sure this doesn't out-live Clang.
1301       SPAN_ATTACH(Tracer, "sema_completion_kind",
1302                   getCompletionKindString(CCContextKind));
1303       log("Code complete: sema context {0}, query scopes [{1}] (AnyScope={2}), "
1304           "expected type {3}{4}",
1305           getCompletionKindString(CCContextKind),
1306           llvm::join(QueryScopes.begin(), QueryScopes.end(), ","), AllScopes,
1307           PreferredType ? Recorder->CCContext.getPreferredType().getAsString()
1308                         : "<none>",
1309           IsUsingDeclaration ? ", inside using declaration" : "");
1310     });
1311 
1312     Recorder = RecorderOwner.get();
1313 
1314     semaCodeComplete(std::move(RecorderOwner), Opts.getClangCompleteOpts(),
1315                      SemaCCInput, &Includes);
1316     logResults(Output, Tracer);
1317     return Output;
1318   }
1319 
1320   void logResults(const CodeCompleteResult &Output, const trace::Span &Tracer) {
1321     SPAN_ATTACH(Tracer, "sema_results", NSema);
1322     SPAN_ATTACH(Tracer, "index_results", NIndex);
1323     SPAN_ATTACH(Tracer, "merged_results", NSemaAndIndex);
1324     SPAN_ATTACH(Tracer, "identifier_results", NIdent);
1325     SPAN_ATTACH(Tracer, "returned_results", int64_t(Output.Completions.size()));
1326     SPAN_ATTACH(Tracer, "incomplete", Output.HasMore);
1327     log("Code complete: {0} results from Sema, {1} from Index, "
1328         "{2} matched, {3} from identifiers, {4} returned{5}.",
1329         NSema, NIndex, NSemaAndIndex, NIdent, Output.Completions.size(),
1330         Output.HasMore ? " (incomplete)" : "");
1331     assert(!Opts.Limit || Output.Completions.size() <= Opts.Limit);
1332     // We don't assert that isIncomplete means we hit a limit.
1333     // Indexes may choose to impose their own limits even if we don't have one.
1334   }
1335 
1336   CodeCompleteResult
1337   runWithoutSema(llvm::StringRef Content, size_t Offset,
1338                  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) && {
1339     trace::Span Tracer("CodeCompleteWithoutSema");
1340     // Fill in fields normally set by runWithSema()
1341     HeuristicPrefix = guessCompletionPrefix(Content, Offset);
1342     populateContextWords(Content);
1343     CCContextKind = CodeCompletionContext::CCC_Recovery;
1344     IsUsingDeclaration = false;
1345     Filter = FuzzyMatcher(HeuristicPrefix.Name);
1346     auto Pos = offsetToPosition(Content, Offset);
1347     ReplacedRange.start = ReplacedRange.end = Pos;
1348     ReplacedRange.start.character -= HeuristicPrefix.Name.size();
1349 
1350     llvm::StringMap<SourceParams> ProxSources;
1351     ProxSources[FileName].Cost = 0;
1352     FileProximity.emplace(ProxSources);
1353 
1354     auto Style = getFormatStyleForFile(FileName, Content, VFS.get());
1355     // This will only insert verbatim headers.
1356     Inserter.emplace(FileName, Content, Style,
1357                      /*BuildDir=*/"", /*HeaderSearchInfo=*/nullptr);
1358 
1359     auto Identifiers = collectIdentifiers(Content, Style);
1360     std::vector<RawIdentifier> IdentifierResults;
1361     for (const auto &IDAndCount : Identifiers) {
1362       RawIdentifier ID;
1363       ID.Name = IDAndCount.first();
1364       ID.References = IDAndCount.second;
1365       // Avoid treating typed filter as an identifier.
1366       if (ID.Name == HeuristicPrefix.Name)
1367         --ID.References;
1368       if (ID.References > 0)
1369         IdentifierResults.push_back(std::move(ID));
1370     }
1371 
1372     // Simplified version of getQueryScopes():
1373     //  - accessible scopes are determined heuristically.
1374     //  - all-scopes query if no qualifier was typed (and it's allowed).
1375     SpecifiedScope Scopes;
1376     Scopes.AccessibleScopes = visibleNamespaces(
1377         Content.take_front(Offset), format::getFormattingLangOpts(Style));
1378     for (std::string &S : Scopes.AccessibleScopes)
1379       if (!S.empty())
1380         S.append("::"); // visibleNamespaces doesn't include trailing ::.
1381     if (HeuristicPrefix.Qualifier.empty())
1382       AllScopes = Opts.AllScopes;
1383     else if (HeuristicPrefix.Qualifier.startswith("::")) {
1384       Scopes.AccessibleScopes = {""};
1385       Scopes.UnresolvedQualifier =
1386           std::string(HeuristicPrefix.Qualifier.drop_front(2));
1387     } else
1388       Scopes.UnresolvedQualifier = std::string(HeuristicPrefix.Qualifier);
1389     // First scope is the (modified) enclosing scope.
1390     QueryScopes = Scopes.scopesForIndexQuery();
1391     ScopeProximity.emplace(QueryScopes);
1392 
1393     SymbolSlab IndexResults = Opts.Index ? queryIndex() : SymbolSlab();
1394 
1395     CodeCompleteResult Output = toCodeCompleteResult(mergeResults(
1396         /*SemaResults=*/{}, IndexResults, IdentifierResults));
1397     Output.RanParser = false;
1398     logResults(Output, Tracer);
1399     return Output;
1400   }
1401 
1402 private:
1403   void populateContextWords(llvm::StringRef Content) {
1404     // Take last 3 lines before the completion point.
1405     unsigned RangeEnd = HeuristicPrefix.Qualifier.begin() - Content.data(),
1406              RangeBegin = RangeEnd;
1407     for (size_t I = 0; I < 3 && RangeBegin > 0; ++I) {
1408       auto PrevNL = Content.rfind('\n', RangeBegin);
1409       if (PrevNL == StringRef::npos) {
1410         RangeBegin = 0;
1411         break;
1412       }
1413       RangeBegin = PrevNL;
1414     }
1415 
1416     ContextWords = collectWords(Content.slice(RangeBegin, RangeEnd));
1417     dlog("Completion context words: {0}",
1418          llvm::join(ContextWords.keys(), ", "));
1419   }
1420 
1421   // This is called by run() once Sema code completion is done, but before the
1422   // Sema data structures are torn down. It does all the real work.
1423   CodeCompleteResult runWithSema() {
1424     const auto &CodeCompletionRange = CharSourceRange::getCharRange(
1425         Recorder->CCSema->getPreprocessor().getCodeCompletionTokenRange());
1426     // When we are getting completions with an empty identifier, for example
1427     //    std::vector<int> asdf;
1428     //    asdf.^;
1429     // Then the range will be invalid and we will be doing insertion, use
1430     // current cursor position in such cases as range.
1431     if (CodeCompletionRange.isValid()) {
1432       ReplacedRange = halfOpenToRange(Recorder->CCSema->getSourceManager(),
1433                                       CodeCompletionRange);
1434     } else {
1435       const auto &Pos = sourceLocToPosition(
1436           Recorder->CCSema->getSourceManager(),
1437           Recorder->CCSema->getPreprocessor().getCodeCompletionLoc());
1438       ReplacedRange.start = ReplacedRange.end = Pos;
1439     }
1440     Filter = FuzzyMatcher(
1441         Recorder->CCSema->getPreprocessor().getCodeCompletionFilter());
1442     std::tie(QueryScopes, AllScopes) = getQueryScopes(
1443         Recorder->CCContext, *Recorder->CCSema, HeuristicPrefix, Opts);
1444     if (!QueryScopes.empty())
1445       ScopeProximity.emplace(QueryScopes);
1446     PreferredType =
1447         OpaqueType::fromType(Recorder->CCSema->getASTContext(),
1448                              Recorder->CCContext.getPreferredType());
1449     // Sema provides the needed context to query the index.
1450     // FIXME: in addition to querying for extra/overlapping symbols, we should
1451     //        explicitly request symbols corresponding to Sema results.
1452     //        We can use their signals even if the index can't suggest them.
1453     // We must copy index results to preserve them, but there are at most Limit.
1454     auto IndexResults = (Opts.Index && allowIndex(Recorder->CCContext))
1455                             ? queryIndex()
1456                             : SymbolSlab();
1457     trace::Span Tracer("Populate CodeCompleteResult");
1458     // Merge Sema and Index results, score them, and pick the winners.
1459     auto Top =
1460         mergeResults(Recorder->Results, IndexResults, /*Identifiers*/ {});
1461     return toCodeCompleteResult(Top);
1462   }
1463 
1464   CodeCompleteResult
1465   toCodeCompleteResult(const std::vector<ScoredBundle> &Scored) {
1466     CodeCompleteResult Output;
1467 
1468     // Convert the results to final form, assembling the expensive strings.
1469     for (auto &C : Scored) {
1470       Output.Completions.push_back(toCodeCompletion(C.first));
1471       Output.Completions.back().Score = C.second;
1472       Output.Completions.back().CompletionTokenRange = ReplacedRange;
1473     }
1474     Output.HasMore = Incomplete;
1475     Output.Context = CCContextKind;
1476     Output.CompletionRange = ReplacedRange;
1477     return Output;
1478   }
1479 
1480   SymbolSlab queryIndex() {
1481     trace::Span Tracer("Query index");
1482     SPAN_ATTACH(Tracer, "limit", int64_t(Opts.Limit));
1483 
1484     // Build the query.
1485     FuzzyFindRequest Req;
1486     if (Opts.Limit)
1487       Req.Limit = Opts.Limit;
1488     Req.Query = std::string(Filter->pattern());
1489     Req.RestrictForCodeCompletion = true;
1490     Req.Scopes = QueryScopes;
1491     Req.AnyScope = AllScopes;
1492     // FIXME: we should send multiple weighted paths here.
1493     Req.ProximityPaths.push_back(std::string(FileName));
1494     if (PreferredType)
1495       Req.PreferredTypes.push_back(std::string(PreferredType->raw()));
1496     vlog("Code complete: fuzzyFind({0:2})", toJSON(Req));
1497 
1498     if (SpecFuzzyFind)
1499       SpecFuzzyFind->NewReq = Req;
1500     if (SpecFuzzyFind && SpecFuzzyFind->Result.valid() && (*SpecReq == Req)) {
1501       vlog("Code complete: speculative fuzzy request matches the actual index "
1502            "request. Waiting for the speculative index results.");
1503       SPAN_ATTACH(Tracer, "Speculative results", true);
1504 
1505       trace::Span WaitSpec("Wait speculative results");
1506       return SpecFuzzyFind->Result.get();
1507     }
1508 
1509     SPAN_ATTACH(Tracer, "Speculative results", false);
1510 
1511     // Run the query against the index.
1512     SymbolSlab::Builder ResultsBuilder;
1513     if (Opts.Index->fuzzyFind(
1514             Req, [&](const Symbol &Sym) { ResultsBuilder.insert(Sym); }))
1515       Incomplete = true;
1516     return std::move(ResultsBuilder).build();
1517   }
1518 
1519   // Merges Sema and Index results where possible, to form CompletionCandidates.
1520   // \p Identifiers is raw identifiers that can also be completion candidates.
1521   // Identifiers are not merged with results from index or sema.
1522   // Groups overloads if desired, to form CompletionCandidate::Bundles. The
1523   // bundles are scored and top results are returned, best to worst.
1524   std::vector<ScoredBundle>
1525   mergeResults(const std::vector<CodeCompletionResult> &SemaResults,
1526                const SymbolSlab &IndexResults,
1527                const std::vector<RawIdentifier> &IdentifierResults) {
1528     trace::Span Tracer("Merge and score results");
1529     std::vector<CompletionCandidate::Bundle> Bundles;
1530     llvm::DenseMap<size_t, size_t> BundleLookup;
1531     auto AddToBundles = [&](const CodeCompletionResult *SemaResult,
1532                             const Symbol *IndexResult,
1533                             const RawIdentifier *IdentifierResult) {
1534       CompletionCandidate C;
1535       C.SemaResult = SemaResult;
1536       C.IndexResult = IndexResult;
1537       C.IdentifierResult = IdentifierResult;
1538       if (C.IndexResult) {
1539         C.Name = IndexResult->Name;
1540         C.RankedIncludeHeaders = getRankedIncludes(*C.IndexResult);
1541       } else if (C.SemaResult) {
1542         C.Name = Recorder->getName(*SemaResult);
1543       } else {
1544         assert(IdentifierResult);
1545         C.Name = IdentifierResult->Name;
1546       }
1547       if (auto OverloadSet = C.overloadSet(Opts)) {
1548         auto Ret = BundleLookup.try_emplace(OverloadSet, Bundles.size());
1549         if (Ret.second)
1550           Bundles.emplace_back();
1551         Bundles[Ret.first->second].push_back(std::move(C));
1552       } else {
1553         Bundles.emplace_back();
1554         Bundles.back().push_back(std::move(C));
1555       }
1556     };
1557     llvm::DenseSet<const Symbol *> UsedIndexResults;
1558     auto CorrespondingIndexResult =
1559         [&](const CodeCompletionResult &SemaResult) -> const Symbol * {
1560       if (auto SymID =
1561               getSymbolID(SemaResult, Recorder->CCSema->getSourceManager())) {
1562         auto I = IndexResults.find(*SymID);
1563         if (I != IndexResults.end()) {
1564           UsedIndexResults.insert(&*I);
1565           return &*I;
1566         }
1567       }
1568       return nullptr;
1569     };
1570     // Emit all Sema results, merging them with Index results if possible.
1571     for (auto &SemaResult : SemaResults)
1572       AddToBundles(&SemaResult, CorrespondingIndexResult(SemaResult), nullptr);
1573     // Now emit any Index-only results.
1574     for (const auto &IndexResult : IndexResults) {
1575       if (UsedIndexResults.count(&IndexResult))
1576         continue;
1577       AddToBundles(/*SemaResult=*/nullptr, &IndexResult, nullptr);
1578     }
1579     // Emit identifier results.
1580     for (const auto &Ident : IdentifierResults)
1581       AddToBundles(/*SemaResult=*/nullptr, /*IndexResult=*/nullptr, &Ident);
1582     // We only keep the best N results at any time, in "native" format.
1583     TopN<ScoredBundle, ScoredBundleGreater> Top(
1584         Opts.Limit == 0 ? std::numeric_limits<size_t>::max() : Opts.Limit);
1585     for (auto &Bundle : Bundles)
1586       addCandidate(Top, std::move(Bundle));
1587     return std::move(Top).items();
1588   }
1589 
1590   llvm::Optional<float> fuzzyScore(const CompletionCandidate &C) {
1591     // Macros can be very spammy, so we only support prefix completion.
1592     // We won't end up with underfull index results, as macros are sema-only.
1593     if (C.SemaResult && C.SemaResult->Kind == CodeCompletionResult::RK_Macro &&
1594         !C.Name.startswith_lower(Filter->pattern()))
1595       return None;
1596     return Filter->match(C.Name);
1597   }
1598 
1599   // Scores a candidate and adds it to the TopN structure.
1600   void addCandidate(TopN<ScoredBundle, ScoredBundleGreater> &Candidates,
1601                     CompletionCandidate::Bundle Bundle) {
1602     SymbolQualitySignals Quality;
1603     SymbolRelevanceSignals Relevance;
1604     Relevance.Context = CCContextKind;
1605     Relevance.Name = Bundle.front().Name;
1606     Relevance.Query = SymbolRelevanceSignals::CodeComplete;
1607     Relevance.FileProximityMatch = FileProximity.getPointer();
1608     if (ScopeProximity)
1609       Relevance.ScopeProximityMatch = ScopeProximity.getPointer();
1610     if (PreferredType)
1611       Relevance.HadContextType = true;
1612     Relevance.ContextWords = &ContextWords;
1613 
1614     auto &First = Bundle.front();
1615     if (auto FuzzyScore = fuzzyScore(First))
1616       Relevance.NameMatch = *FuzzyScore;
1617     else
1618       return;
1619     SymbolOrigin Origin = SymbolOrigin::Unknown;
1620     bool FromIndex = false;
1621     for (const auto &Candidate : Bundle) {
1622       if (Candidate.IndexResult) {
1623         Quality.merge(*Candidate.IndexResult);
1624         Relevance.merge(*Candidate.IndexResult);
1625         Origin |= Candidate.IndexResult->Origin;
1626         FromIndex = true;
1627         if (!Candidate.IndexResult->Type.empty())
1628           Relevance.HadSymbolType |= true;
1629         if (PreferredType &&
1630             PreferredType->raw() == Candidate.IndexResult->Type) {
1631           Relevance.TypeMatchesPreferred = true;
1632         }
1633       }
1634       if (Candidate.SemaResult) {
1635         Quality.merge(*Candidate.SemaResult);
1636         Relevance.merge(*Candidate.SemaResult);
1637         if (PreferredType) {
1638           if (auto CompletionType = OpaqueType::fromCompletionResult(
1639                   Recorder->CCSema->getASTContext(), *Candidate.SemaResult)) {
1640             Relevance.HadSymbolType |= true;
1641             if (PreferredType == CompletionType)
1642               Relevance.TypeMatchesPreferred = true;
1643           }
1644         }
1645         Origin |= SymbolOrigin::AST;
1646       }
1647       if (Candidate.IdentifierResult) {
1648         Quality.References = Candidate.IdentifierResult->References;
1649         Relevance.Scope = SymbolRelevanceSignals::FileScope;
1650         Origin |= SymbolOrigin::Identifier;
1651       }
1652     }
1653 
1654     CodeCompletion::Scores Scores;
1655     Scores.Quality = Quality.evaluate();
1656     Scores.Relevance = Relevance.evaluate();
1657     Scores.Total = evaluateSymbolAndRelevance(Scores.Quality, Scores.Relevance);
1658     // NameMatch is in fact a multiplier on total score, so rescoring is sound.
1659     Scores.ExcludingName = Relevance.NameMatch
1660                                ? Scores.Total / Relevance.NameMatch
1661                                : Scores.Quality;
1662 
1663     if (Opts.RecordCCResult)
1664       Opts.RecordCCResult(toCodeCompletion(Bundle), Quality, Relevance,
1665                           Scores.Total);
1666 
1667     dlog("CodeComplete: {0} ({1}) = {2}\n{3}{4}\n", First.Name,
1668          llvm::to_string(Origin), Scores.Total, llvm::to_string(Quality),
1669          llvm::to_string(Relevance));
1670 
1671     NSema += bool(Origin & SymbolOrigin::AST);
1672     NIndex += FromIndex;
1673     NSemaAndIndex += bool(Origin & SymbolOrigin::AST) && FromIndex;
1674     NIdent += bool(Origin & SymbolOrigin::Identifier);
1675     if (Candidates.push({std::move(Bundle), Scores}))
1676       Incomplete = true;
1677   }
1678 
1679   CodeCompletion toCodeCompletion(const CompletionCandidate::Bundle &Bundle) {
1680     llvm::Optional<CodeCompletionBuilder> Builder;
1681     for (const auto &Item : Bundle) {
1682       CodeCompletionString *SemaCCS =
1683           Item.SemaResult ? Recorder->codeCompletionString(*Item.SemaResult)
1684                           : nullptr;
1685       if (!Builder)
1686         Builder.emplace(Recorder ? &Recorder->CCSema->getASTContext() : nullptr,
1687                         Item, SemaCCS, QueryScopes, *Inserter, FileName,
1688                         CCContextKind, Opts,
1689                         /*GenerateSnippets=*/!IsUsingDeclaration);
1690       else
1691         Builder->add(Item, SemaCCS);
1692     }
1693     return Builder->build();
1694   }
1695 };
1696 
1697 } // namespace
1698 
1699 clang::CodeCompleteOptions CodeCompleteOptions::getClangCompleteOpts() const {
1700   clang::CodeCompleteOptions Result;
1701   Result.IncludeCodePatterns = EnableSnippets && IncludeCodePatterns;
1702   Result.IncludeMacros = IncludeMacros;
1703   Result.IncludeGlobals = true;
1704   // We choose to include full comments and not do doxygen parsing in
1705   // completion.
1706   // FIXME: ideally, we should support doxygen in some form, e.g. do markdown
1707   // formatting of the comments.
1708   Result.IncludeBriefComments = false;
1709 
1710   // When an is used, Sema is responsible for completing the main file,
1711   // the index can provide results from the preamble.
1712   // Tell Sema not to deserialize the preamble to look for results.
1713   Result.LoadExternal = !Index;
1714   Result.IncludeFixIts = IncludeFixIts;
1715 
1716   return Result;
1717 }
1718 
1719 CompletionPrefix guessCompletionPrefix(llvm::StringRef Content,
1720                                        unsigned Offset) {
1721   assert(Offset <= Content.size());
1722   StringRef Rest = Content.take_front(Offset);
1723   CompletionPrefix Result;
1724 
1725   // Consume the unqualified name. We only handle ASCII characters.
1726   // isIdentifierBody will let us match "0invalid", but we don't mind.
1727   while (!Rest.empty() && isIdentifierBody(Rest.back()))
1728     Rest = Rest.drop_back();
1729   Result.Name = Content.slice(Rest.size(), Offset);
1730 
1731   // Consume qualifiers.
1732   while (Rest.consume_back("::") && !Rest.endswith(":")) // reject ::::
1733     while (!Rest.empty() && isIdentifierBody(Rest.back()))
1734       Rest = Rest.drop_back();
1735   Result.Qualifier =
1736       Content.slice(Rest.size(), Result.Name.begin() - Content.begin());
1737 
1738   return Result;
1739 }
1740 
1741 CodeCompleteResult
1742 codeComplete(PathRef FileName, const tooling::CompileCommand &Command,
1743              const PreambleData *Preamble, llvm::StringRef Contents,
1744              Position Pos, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
1745              CodeCompleteOptions Opts, SpeculativeFuzzyFind *SpecFuzzyFind) {
1746   auto Offset = positionToOffset(Contents, Pos);
1747   if (!Offset) {
1748     elog("Code completion position was invalid {0}", Offset.takeError());
1749     return CodeCompleteResult();
1750   }
1751   auto Flow = CodeCompleteFlow(
1752       FileName, Preamble ? Preamble->Includes : IncludeStructure(),
1753       SpecFuzzyFind, Opts);
1754   return (!Preamble || Opts.RunParser == CodeCompleteOptions::NeverParse)
1755              ? std::move(Flow).runWithoutSema(Contents, *Offset, VFS)
1756              : std::move(Flow).run(
1757                    {FileName, Command, Preamble, Contents, *Offset, VFS});
1758 }
1759 
1760 SignatureHelp signatureHelp(PathRef FileName,
1761                             const tooling::CompileCommand &Command,
1762                             const PreambleData *Preamble,
1763                             llvm::StringRef Contents, Position Pos,
1764                             llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
1765                             const SymbolIndex *Index) {
1766   auto Offset = positionToOffset(Contents, Pos);
1767   if (!Offset) {
1768     elog("Code completion position was invalid {0}", Offset.takeError());
1769     return SignatureHelp();
1770   }
1771   SignatureHelp Result;
1772   clang::CodeCompleteOptions Options;
1773   Options.IncludeGlobals = false;
1774   Options.IncludeMacros = false;
1775   Options.IncludeCodePatterns = false;
1776   Options.IncludeBriefComments = false;
1777   IncludeStructure PreambleInclusions; // Unused for signatureHelp
1778   semaCodeComplete(
1779       std::make_unique<SignatureHelpCollector>(Options, Index, Result), Options,
1780       {FileName, Command, Preamble, Contents, *Offset, std::move(VFS)});
1781   return Result;
1782 }
1783 
1784 bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx) {
1785   auto InTopLevelScope = [](const NamedDecl &ND) {
1786     switch (ND.getDeclContext()->getDeclKind()) {
1787     case Decl::TranslationUnit:
1788     case Decl::Namespace:
1789     case Decl::LinkageSpec:
1790       return true;
1791     default:
1792       break;
1793     };
1794     return false;
1795   };
1796   // We only complete symbol's name, which is the same as the name of the
1797   // *primary* template in case of template specializations.
1798   if (isExplicitTemplateSpecialization(&ND))
1799     return false;
1800 
1801   if (InTopLevelScope(ND))
1802     return true;
1803 
1804   if (const auto *EnumDecl = dyn_cast<clang::EnumDecl>(ND.getDeclContext()))
1805     return InTopLevelScope(*EnumDecl) && !EnumDecl->isScoped();
1806 
1807   return false;
1808 }
1809 
1810 CompletionItem CodeCompletion::render(const CodeCompleteOptions &Opts) const {
1811   CompletionItem LSP;
1812   const auto *InsertInclude = Includes.empty() ? nullptr : &Includes[0];
1813   LSP.label = ((InsertInclude && InsertInclude->Insertion)
1814                    ? Opts.IncludeIndicator.Insert
1815                    : Opts.IncludeIndicator.NoInsert) +
1816               (Opts.ShowOrigins ? "[" + llvm::to_string(Origin) + "]" : "") +
1817               RequiredQualifier + Name + Signature;
1818 
1819   LSP.kind = Kind;
1820   LSP.detail = BundleSize > 1
1821                    ? std::string(llvm::formatv("[{0} overloads]", BundleSize))
1822                    : ReturnType;
1823   LSP.deprecated = Deprecated;
1824   if (InsertInclude)
1825     LSP.detail += "\n" + InsertInclude->Header;
1826   LSP.documentation = Documentation;
1827   LSP.sortText = sortText(Score.Total, Name);
1828   LSP.filterText = Name;
1829   LSP.textEdit = {CompletionTokenRange, RequiredQualifier + Name};
1830   // Merge continuous additionalTextEdits into main edit. The main motivation
1831   // behind this is to help LSP clients, it seems most of them are confused when
1832   // they are provided with additionalTextEdits that are consecutive to main
1833   // edit.
1834   // Note that we store additional text edits from back to front in a line. That
1835   // is mainly to help LSP clients again, so that changes do not effect each
1836   // other.
1837   for (const auto &FixIt : FixIts) {
1838     if (isRangeConsecutive(FixIt.range, LSP.textEdit->range)) {
1839       LSP.textEdit->newText = FixIt.newText + LSP.textEdit->newText;
1840       LSP.textEdit->range.start = FixIt.range.start;
1841     } else {
1842       LSP.additionalTextEdits.push_back(FixIt);
1843     }
1844   }
1845   if (Opts.EnableSnippets)
1846     LSP.textEdit->newText += SnippetSuffix;
1847 
1848   // FIXME(kadircet): Do not even fill insertText after making sure textEdit is
1849   // compatible with most of the editors.
1850   LSP.insertText = LSP.textEdit->newText;
1851   LSP.insertTextFormat = Opts.EnableSnippets ? InsertTextFormat::Snippet
1852                                              : InsertTextFormat::PlainText;
1853   if (InsertInclude && InsertInclude->Insertion)
1854     LSP.additionalTextEdits.push_back(*InsertInclude->Insertion);
1855 
1856   LSP.score = Score.ExcludingName;
1857 
1858   return LSP;
1859 }
1860 
1861 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const CodeCompletion &C) {
1862   // For now just lean on CompletionItem.
1863   return OS << C.render(CodeCompleteOptions());
1864 }
1865 
1866 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
1867                               const CodeCompleteResult &R) {
1868   OS << "CodeCompleteResult: " << R.Completions.size() << (R.HasMore ? "+" : "")
1869      << " (" << getCompletionKindString(R.Context) << ")"
1870      << " items:\n";
1871   for (const auto &C : R.Completions)
1872     OS << C << "\n";
1873   return OS;
1874 }
1875 
1876 } // namespace clangd
1877 } // namespace clang
1878