1 //===--- CodeComplete.cpp ----------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Code completion has several moving parts:
10 //  - AST-based completions are provided using the completion hooks in Sema.
11 //  - external completions are retrieved from the index (using hints from Sema)
12 //  - the two sources overlap, and must be merged and overloads bundled
13 //  - results must be scored and ranked (see Quality.h) before rendering
14 //
15 // Signature help works in a similar way as code completion, but it is simpler:
16 // it's purely AST-based, and there are few candidates.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "CodeComplete.h"
21 #include "AST.h"
22 #include "CodeCompletionStrings.h"
23 #include "Compiler.h"
24 #include "Diagnostics.h"
25 #include "ExpectedTypes.h"
26 #include "FileDistance.h"
27 #include "FuzzyMatch.h"
28 #include "Headers.h"
29 #include "Hover.h"
30 #include "Preamble.h"
31 #include "Protocol.h"
32 #include "Quality.h"
33 #include "SourceCode.h"
34 #include "TUScheduler.h"
35 #include "URI.h"
36 #include "index/Index.h"
37 #include "index/Symbol.h"
38 #include "index/SymbolOrigin.h"
39 #include "support/FSProvider.h"
40 #include "support/Logger.h"
41 #include "support/Threading.h"
42 #include "support/Trace.h"
43 #include "clang/AST/Decl.h"
44 #include "clang/AST/DeclBase.h"
45 #include "clang/Basic/CharInfo.h"
46 #include "clang/Basic/LangOptions.h"
47 #include "clang/Basic/SourceLocation.h"
48 #include "clang/Basic/TokenKinds.h"
49 #include "clang/Format/Format.h"
50 #include "clang/Frontend/CompilerInstance.h"
51 #include "clang/Frontend/FrontendActions.h"
52 #include "clang/Lex/ExternalPreprocessorSource.h"
53 #include "clang/Lex/Lexer.h"
54 #include "clang/Lex/Preprocessor.h"
55 #include "clang/Lex/PreprocessorOptions.h"
56 #include "clang/Sema/CodeCompleteConsumer.h"
57 #include "clang/Sema/DeclSpec.h"
58 #include "clang/Sema/Sema.h"
59 #include "llvm/ADT/ArrayRef.h"
60 #include "llvm/ADT/None.h"
61 #include "llvm/ADT/Optional.h"
62 #include "llvm/ADT/SmallVector.h"
63 #include "llvm/ADT/StringExtras.h"
64 #include "llvm/ADT/StringRef.h"
65 #include "llvm/Support/Compiler.h"
66 #include "llvm/Support/Debug.h"
67 #include "llvm/Support/Error.h"
68 #include "llvm/Support/Format.h"
69 #include "llvm/Support/FormatVariadic.h"
70 #include "llvm/Support/ScopedPrinter.h"
71 #include <algorithm>
72 #include <iterator>
73 
74 // We log detailed candidate here if you run with -debug-only=codecomplete.
75 #define DEBUG_TYPE "CodeComplete"
76 
77 namespace clang {
78 namespace clangd {
79 namespace {
80 
81 CompletionItemKind toCompletionItemKind(index::SymbolKind Kind) {
82   using SK = index::SymbolKind;
83   switch (Kind) {
84   case SK::Unknown:
85     return CompletionItemKind::Missing;
86   case SK::Module:
87   case SK::Namespace:
88   case SK::NamespaceAlias:
89     return CompletionItemKind::Module;
90   case SK::Macro:
91     return CompletionItemKind::Text;
92   case SK::Enum:
93     return CompletionItemKind::Enum;
94   case SK::Struct:
95     return CompletionItemKind::Struct;
96   case SK::Class:
97   case SK::Protocol:
98   case SK::Extension:
99   case SK::Union:
100     return CompletionItemKind::Class;
101   case SK::TypeAlias:
102     // We use the same kind as the VSCode C++ extension.
103     // FIXME: pick a better option when we have one.
104     return CompletionItemKind::Interface;
105   case SK::Using:
106     return CompletionItemKind::Reference;
107   case SK::Function:
108   case SK::ConversionFunction:
109     return CompletionItemKind::Function;
110   case SK::Variable:
111   case SK::Parameter:
112   case SK::NonTypeTemplateParm:
113     return CompletionItemKind::Variable;
114   case SK::Field:
115     return CompletionItemKind::Field;
116   case SK::EnumConstant:
117     return CompletionItemKind::EnumMember;
118   case SK::InstanceMethod:
119   case SK::ClassMethod:
120   case SK::StaticMethod:
121   case SK::Destructor:
122     return CompletionItemKind::Method;
123   case SK::InstanceProperty:
124   case SK::ClassProperty:
125   case SK::StaticProperty:
126     return CompletionItemKind::Property;
127   case SK::Constructor:
128     return CompletionItemKind::Constructor;
129   case SK::TemplateTypeParm:
130   case SK::TemplateTemplateParm:
131     return CompletionItemKind::TypeParameter;
132   }
133   llvm_unreachable("Unhandled clang::index::SymbolKind.");
134 }
135 
136 CompletionItemKind
137 toCompletionItemKind(CodeCompletionResult::ResultKind ResKind,
138                      const NamedDecl *Decl,
139                      CodeCompletionContext::Kind CtxKind) {
140   if (Decl)
141     return toCompletionItemKind(index::getSymbolInfo(Decl).Kind);
142   if (CtxKind == CodeCompletionContext::CCC_IncludedFile)
143     return CompletionItemKind::File;
144   switch (ResKind) {
145   case CodeCompletionResult::RK_Declaration:
146     llvm_unreachable("RK_Declaration without Decl");
147   case CodeCompletionResult::RK_Keyword:
148     return CompletionItemKind::Keyword;
149   case CodeCompletionResult::RK_Macro:
150     return CompletionItemKind::Text; // unfortunately, there's no 'Macro'
151                                      // completion items in LSP.
152   case CodeCompletionResult::RK_Pattern:
153     return CompletionItemKind::Snippet;
154   }
155   llvm_unreachable("Unhandled CodeCompletionResult::ResultKind.");
156 }
157 
158 // Identifier code completion result.
159 struct RawIdentifier {
160   llvm::StringRef Name;
161   unsigned References; // # of usages in file.
162 };
163 
164 /// A code completion result, in clang-native form.
165 /// It may be promoted to a CompletionItem if it's among the top-ranked results.
166 struct CompletionCandidate {
167   llvm::StringRef Name; // Used for filtering and sorting.
168   // We may have a result from Sema, from the index, or both.
169   const CodeCompletionResult *SemaResult = nullptr;
170   const Symbol *IndexResult = nullptr;
171   const RawIdentifier *IdentifierResult = nullptr;
172   llvm::SmallVector<llvm::StringRef, 1> RankedIncludeHeaders;
173 
174   // Returns a token identifying the overload set this is part of.
175   // 0 indicates it's not part of any overload set.
176   size_t overloadSet(const CodeCompleteOptions &Opts) const {
177     if (!Opts.BundleOverloads.getValueOr(false))
178       return 0;
179     llvm::SmallString<256> Scratch;
180     if (IndexResult) {
181       switch (IndexResult->SymInfo.Kind) {
182       case index::SymbolKind::ClassMethod:
183       case index::SymbolKind::InstanceMethod:
184       case index::SymbolKind::StaticMethod:
185 #ifndef NDEBUG
186         llvm_unreachable("Don't expect members from index in code completion");
187 #else
188         LLVM_FALLTHROUGH;
189 #endif
190       case index::SymbolKind::Function:
191         // We can't group overloads together that need different #includes.
192         // This could break #include insertion.
193         return llvm::hash_combine(
194             (IndexResult->Scope + IndexResult->Name).toStringRef(Scratch),
195             headerToInsertIfAllowed(Opts).getValueOr(""));
196       default:
197         return 0;
198       }
199     }
200     if (SemaResult) {
201       // We need to make sure we're consistent with the IndexResult case!
202       const NamedDecl *D = SemaResult->Declaration;
203       if (!D || !D->isFunctionOrFunctionTemplate())
204         return 0;
205       {
206         llvm::raw_svector_ostream OS(Scratch);
207         D->printQualifiedName(OS);
208       }
209       return llvm::hash_combine(Scratch,
210                                 headerToInsertIfAllowed(Opts).getValueOr(""));
211     }
212     assert(IdentifierResult);
213     return 0;
214   }
215 
216   // The best header to include if include insertion is allowed.
217   llvm::Optional<llvm::StringRef>
218   headerToInsertIfAllowed(const CodeCompleteOptions &Opts) const {
219     if (Opts.InsertIncludes == CodeCompleteOptions::NeverInsert ||
220         RankedIncludeHeaders.empty())
221       return None;
222     if (SemaResult && SemaResult->Declaration) {
223       // Avoid inserting new #include if the declaration is found in the current
224       // file e.g. the symbol is forward declared.
225       auto &SM = SemaResult->Declaration->getASTContext().getSourceManager();
226       for (const Decl *RD : SemaResult->Declaration->redecls())
227         if (SM.isInMainFile(SM.getExpansionLoc(RD->getBeginLoc())))
228           return None;
229     }
230     return RankedIncludeHeaders[0];
231   }
232 
233   using Bundle = llvm::SmallVector<CompletionCandidate, 4>;
234 };
235 using ScoredBundle =
236     std::pair<CompletionCandidate::Bundle, CodeCompletion::Scores>;
237 struct ScoredBundleGreater {
238   bool operator()(const ScoredBundle &L, const ScoredBundle &R) {
239     if (L.second.Total != R.second.Total)
240       return L.second.Total > R.second.Total;
241     return L.first.front().Name <
242            R.first.front().Name; // Earlier name is better.
243   }
244 };
245 
246 // Assembles a code completion out of a bundle of >=1 completion candidates.
247 // Many of the expensive strings are only computed at this point, once we know
248 // the candidate bundle is going to be returned.
249 //
250 // Many fields are the same for all candidates in a bundle (e.g. name), and are
251 // computed from the first candidate, in the constructor.
252 // Others vary per candidate, so add() must be called for remaining candidates.
253 struct CodeCompletionBuilder {
254   CodeCompletionBuilder(ASTContext *ASTCtx, const CompletionCandidate &C,
255                         CodeCompletionString *SemaCCS,
256                         llvm::ArrayRef<std::string> QueryScopes,
257                         const IncludeInserter &Includes,
258                         llvm::StringRef FileName,
259                         CodeCompletionContext::Kind ContextKind,
260                         const CodeCompleteOptions &Opts,
261                         bool IsUsingDeclaration, tok::TokenKind NextTokenKind)
262       : ASTCtx(ASTCtx), ExtractDocumentation(Opts.IncludeComments),
263         EnableFunctionArgSnippets(Opts.EnableFunctionArgSnippets),
264         IsUsingDeclaration(IsUsingDeclaration), NextTokenKind(NextTokenKind) {
265     add(C, SemaCCS);
266     if (C.SemaResult) {
267       assert(ASTCtx);
268       Completion.Origin |= SymbolOrigin::AST;
269       Completion.Name = std::string(llvm::StringRef(SemaCCS->getTypedText()));
270       if (Completion.Scope.empty()) {
271         if ((C.SemaResult->Kind == CodeCompletionResult::RK_Declaration) ||
272             (C.SemaResult->Kind == CodeCompletionResult::RK_Pattern))
273           if (const auto *D = C.SemaResult->getDeclaration())
274             if (const auto *ND = dyn_cast<NamedDecl>(D))
275               Completion.Scope = std::string(
276                   splitQualifiedName(printQualifiedName(*ND)).first);
277       }
278       Completion.Kind = toCompletionItemKind(
279           C.SemaResult->Kind, C.SemaResult->Declaration, ContextKind);
280       // Sema could provide more info on whether the completion was a file or
281       // folder.
282       if (Completion.Kind == CompletionItemKind::File &&
283           Completion.Name.back() == '/')
284         Completion.Kind = CompletionItemKind::Folder;
285       for (const auto &FixIt : C.SemaResult->FixIts) {
286         Completion.FixIts.push_back(toTextEdit(
287             FixIt, ASTCtx->getSourceManager(), ASTCtx->getLangOpts()));
288       }
289       llvm::sort(Completion.FixIts, [](const TextEdit &X, const TextEdit &Y) {
290         return std::tie(X.range.start.line, X.range.start.character) <
291                std::tie(Y.range.start.line, Y.range.start.character);
292       });
293       Completion.Deprecated |=
294           (C.SemaResult->Availability == CXAvailability_Deprecated);
295     }
296     if (C.IndexResult) {
297       Completion.Origin |= C.IndexResult->Origin;
298       if (Completion.Scope.empty())
299         Completion.Scope = std::string(C.IndexResult->Scope);
300       if (Completion.Kind == CompletionItemKind::Missing)
301         Completion.Kind = toCompletionItemKind(C.IndexResult->SymInfo.Kind);
302       if (Completion.Name.empty())
303         Completion.Name = std::string(C.IndexResult->Name);
304       // If the completion was visible to Sema, no qualifier is needed. This
305       // avoids unneeded qualifiers in cases like with `using ns::X`.
306       if (Completion.RequiredQualifier.empty() && !C.SemaResult) {
307         llvm::StringRef ShortestQualifier = C.IndexResult->Scope;
308         for (llvm::StringRef Scope : QueryScopes) {
309           llvm::StringRef Qualifier = C.IndexResult->Scope;
310           if (Qualifier.consume_front(Scope) &&
311               Qualifier.size() < ShortestQualifier.size())
312             ShortestQualifier = Qualifier;
313         }
314         Completion.RequiredQualifier = std::string(ShortestQualifier);
315       }
316       Completion.Deprecated |= (C.IndexResult->Flags & Symbol::Deprecated);
317     }
318     if (C.IdentifierResult) {
319       Completion.Origin |= SymbolOrigin::Identifier;
320       Completion.Kind = CompletionItemKind::Text;
321       Completion.Name = std::string(C.IdentifierResult->Name);
322     }
323 
324     // Turn absolute path into a literal string that can be #included.
325     auto Inserted = [&](llvm::StringRef Header)
326         -> llvm::Expected<std::pair<std::string, bool>> {
327       auto ResolvedDeclaring =
328           URI::resolve(C.IndexResult->CanonicalDeclaration.FileURI, FileName);
329       if (!ResolvedDeclaring)
330         return ResolvedDeclaring.takeError();
331       auto ResolvedInserted = toHeaderFile(Header, FileName);
332       if (!ResolvedInserted)
333         return ResolvedInserted.takeError();
334       auto Spelled = Includes.calculateIncludePath(*ResolvedInserted, FileName);
335       if (!Spelled)
336         return llvm::createStringError(llvm::inconvertibleErrorCode(),
337                                        "Header not on include path");
338       return std::make_pair(
339           std::move(*Spelled),
340           Includes.shouldInsertInclude(*ResolvedDeclaring, *ResolvedInserted));
341     };
342     bool ShouldInsert = C.headerToInsertIfAllowed(Opts).hasValue();
343     // Calculate include paths and edits for all possible headers.
344     for (const auto &Inc : C.RankedIncludeHeaders) {
345       if (auto ToInclude = Inserted(Inc)) {
346         CodeCompletion::IncludeCandidate Include;
347         Include.Header = ToInclude->first;
348         if (ToInclude->second && ShouldInsert)
349           Include.Insertion = Includes.insert(ToInclude->first);
350         Completion.Includes.push_back(std::move(Include));
351       } else
352         log("Failed to generate include insertion edits for adding header "
353             "(FileURI='{0}', IncludeHeader='{1}') into {2}: {3}",
354             C.IndexResult->CanonicalDeclaration.FileURI, Inc, FileName,
355             ToInclude.takeError());
356     }
357     // Prefer includes that do not need edits (i.e. already exist).
358     std::stable_partition(Completion.Includes.begin(),
359                           Completion.Includes.end(),
360                           [](const CodeCompletion::IncludeCandidate &I) {
361                             return !I.Insertion.hasValue();
362                           });
363   }
364 
365   void add(const CompletionCandidate &C, CodeCompletionString *SemaCCS) {
366     assert(bool(C.SemaResult) == bool(SemaCCS));
367     Bundled.emplace_back();
368     BundledEntry &S = Bundled.back();
369     if (C.SemaResult) {
370       bool IsPattern = C.SemaResult->Kind == CodeCompletionResult::RK_Pattern;
371       getSignature(*SemaCCS, &S.Signature, &S.SnippetSuffix,
372                    &Completion.RequiredQualifier, IsPattern);
373       S.ReturnType = getReturnType(*SemaCCS);
374     } else if (C.IndexResult) {
375       S.Signature = std::string(C.IndexResult->Signature);
376       S.SnippetSuffix = std::string(C.IndexResult->CompletionSnippetSuffix);
377       S.ReturnType = std::string(C.IndexResult->ReturnType);
378     }
379     if (ExtractDocumentation && !Completion.Documentation) {
380       auto SetDoc = [&](llvm::StringRef Doc) {
381         if (!Doc.empty()) {
382           Completion.Documentation.emplace();
383           parseDocumentation(Doc, *Completion.Documentation);
384         }
385       };
386       if (C.IndexResult) {
387         SetDoc(C.IndexResult->Documentation);
388       } else if (C.SemaResult) {
389         SetDoc(getDocComment(*ASTCtx, *C.SemaResult,
390                              /*CommentsFromHeader=*/false));
391       }
392     }
393   }
394 
395   CodeCompletion build() {
396     Completion.ReturnType = summarizeReturnType();
397     Completion.Signature = summarizeSignature();
398     Completion.SnippetSuffix = summarizeSnippet();
399     Completion.BundleSize = Bundled.size();
400     return std::move(Completion);
401   }
402 
403 private:
404   struct BundledEntry {
405     std::string SnippetSuffix;
406     std::string Signature;
407     std::string ReturnType;
408   };
409 
410   // If all BundledEntries have the same value for a property, return it.
411   template <std::string BundledEntry::*Member>
412   const std::string *onlyValue() const {
413     auto B = Bundled.begin(), E = Bundled.end();
414     for (auto I = B + 1; I != E; ++I)
415       if (I->*Member != B->*Member)
416         return nullptr;
417     return &(B->*Member);
418   }
419 
420   template <bool BundledEntry::*Member> const bool *onlyValue() const {
421     auto B = Bundled.begin(), E = Bundled.end();
422     for (auto I = B + 1; I != E; ++I)
423       if (I->*Member != B->*Member)
424         return nullptr;
425     return &(B->*Member);
426   }
427 
428   std::string summarizeReturnType() const {
429     if (auto *RT = onlyValue<&BundledEntry::ReturnType>())
430       return *RT;
431     return "";
432   }
433 
434   std::string summarizeSnippet() const {
435     if (IsUsingDeclaration)
436       return "";
437     // Suppress function argument snippets if args are already present.
438     if ((Completion.Kind == CompletionItemKind::Function ||
439          Completion.Kind == CompletionItemKind::Method ||
440          Completion.Kind == CompletionItemKind::Constructor) &&
441         NextTokenKind == tok::l_paren)
442       return "";
443     auto *Snippet = onlyValue<&BundledEntry::SnippetSuffix>();
444     if (!Snippet)
445       // All bundles are function calls.
446       // FIXME(ibiryukov): sometimes add template arguments to a snippet, e.g.
447       // we need to complete 'forward<$1>($0)'.
448       return "($0)";
449     if (EnableFunctionArgSnippets)
450       return *Snippet;
451 
452     // Replace argument snippets with a simplified pattern.
453     if (Snippet->empty())
454       return "";
455     if (Completion.Kind == CompletionItemKind::Function ||
456         Completion.Kind == CompletionItemKind::Method) {
457       // Functions snippets can be of 2 types:
458       // - containing only function arguments, e.g.
459       //   foo(${1:int p1}, ${2:int p2});
460       //   We transform this pattern to '($0)' or '()'.
461       // - template arguments and function arguments, e.g.
462       //   foo<${1:class}>(${2:int p1}).
463       //   We transform this pattern to '<$1>()$0' or '<$0>()'.
464 
465       bool EmptyArgs = llvm::StringRef(*Snippet).endswith("()");
466       if (Snippet->front() == '<')
467         return EmptyArgs ? "<$1>()$0" : "<$1>($0)";
468       if (Snippet->front() == '(')
469         return EmptyArgs ? "()" : "($0)";
470       return *Snippet; // Not an arg snippet?
471     }
472     // 'CompletionItemKind::Interface' matches template type aliases.
473     if (Completion.Kind == CompletionItemKind::Interface ||
474         Completion.Kind == CompletionItemKind::Class) {
475       if (Snippet->front() != '<')
476         return *Snippet; // Not an arg snippet?
477 
478       // Classes and template using aliases can only have template arguments,
479       // e.g. Foo<${1:class}>.
480       if (llvm::StringRef(*Snippet).endswith("<>"))
481         return "<>"; // can happen with defaulted template arguments.
482       return "<$0>";
483     }
484     return *Snippet;
485   }
486 
487   std::string summarizeSignature() const {
488     if (auto *Signature = onlyValue<&BundledEntry::Signature>())
489       return *Signature;
490     // All bundles are function calls.
491     return "(…)";
492   }
493 
494   // ASTCtx can be nullptr if not run with sema.
495   ASTContext *ASTCtx;
496   CodeCompletion Completion;
497   llvm::SmallVector<BundledEntry, 1> Bundled;
498   bool ExtractDocumentation;
499   bool EnableFunctionArgSnippets;
500   // No snippets will be generated for using declarations and when the function
501   // arguments are already present.
502   bool IsUsingDeclaration;
503   tok::TokenKind NextTokenKind;
504 };
505 
506 // Determine the symbol ID for a Sema code completion result, if possible.
507 llvm::Optional<SymbolID> getSymbolID(const CodeCompletionResult &R,
508                                      const SourceManager &SM) {
509   switch (R.Kind) {
510   case CodeCompletionResult::RK_Declaration:
511   case CodeCompletionResult::RK_Pattern: {
512     // Computing USR caches linkage, which may change after code completion.
513     if (hasUnstableLinkage(R.Declaration))
514       return llvm::None;
515     return clang::clangd::getSymbolID(R.Declaration);
516   }
517   case CodeCompletionResult::RK_Macro:
518     return clang::clangd::getSymbolID(R.Macro->getName(), R.MacroDefInfo, SM);
519   case CodeCompletionResult::RK_Keyword:
520     return None;
521   }
522   llvm_unreachable("unknown CodeCompletionResult kind");
523 }
524 
525 // Scopes of the partial identifier we're trying to complete.
526 // It is used when we query the index for more completion results.
527 struct SpecifiedScope {
528   // The scopes we should look in, determined by Sema.
529   //
530   // If the qualifier was fully resolved, we look for completions in these
531   // scopes; if there is an unresolved part of the qualifier, it should be
532   // resolved within these scopes.
533   //
534   // Examples of qualified completion:
535   //
536   //   "::vec"                                      => {""}
537   //   "using namespace std; ::vec^"                => {"", "std::"}
538   //   "namespace ns {using namespace std;} ns::^"  => {"ns::", "std::"}
539   //   "std::vec^"                                  => {""}  // "std" unresolved
540   //
541   // Examples of unqualified completion:
542   //
543   //   "vec^"                                       => {""}
544   //   "using namespace std; vec^"                  => {"", "std::"}
545   //   "using namespace std; namespace ns { vec^ }" => {"ns::", "std::", ""}
546   //
547   // "" for global namespace, "ns::" for normal namespace.
548   std::vector<std::string> AccessibleScopes;
549   // The full scope qualifier as typed by the user (without the leading "::").
550   // Set if the qualifier is not fully resolved by Sema.
551   llvm::Optional<std::string> UnresolvedQualifier;
552 
553   // Construct scopes being queried in indexes. The results are deduplicated.
554   // This method format the scopes to match the index request representation.
555   std::vector<std::string> scopesForIndexQuery() {
556     std::set<std::string> Results;
557     for (llvm::StringRef AS : AccessibleScopes)
558       Results.insert(
559           (AS + (UnresolvedQualifier ? *UnresolvedQualifier : "")).str());
560     return {Results.begin(), Results.end()};
561   }
562 };
563 
564 // Get all scopes that will be queried in indexes and whether symbols from
565 // any scope is allowed. The first scope in the list is the preferred scope
566 // (e.g. enclosing namespace).
567 std::pair<std::vector<std::string>, bool>
568 getQueryScopes(CodeCompletionContext &CCContext, const Sema &CCSema,
569                const CompletionPrefix &HeuristicPrefix,
570                const CodeCompleteOptions &Opts) {
571   SpecifiedScope Scopes;
572   for (auto *Context : CCContext.getVisitedContexts()) {
573     if (isa<TranslationUnitDecl>(Context))
574       Scopes.AccessibleScopes.push_back(""); // global namespace
575     else if (isa<NamespaceDecl>(Context))
576       Scopes.AccessibleScopes.push_back(printNamespaceScope(*Context));
577   }
578 
579   const CXXScopeSpec *SemaSpecifier =
580       CCContext.getCXXScopeSpecifier().getValueOr(nullptr);
581   // Case 1: unqualified completion.
582   if (!SemaSpecifier) {
583     // Case 2 (exception): sema saw no qualifier, but there appears to be one!
584     // This can happen e.g. in incomplete macro expansions. Use heuristics.
585     if (!HeuristicPrefix.Qualifier.empty()) {
586       vlog("Sema said no scope specifier, but we saw {0} in the source code",
587            HeuristicPrefix.Qualifier);
588       StringRef SpelledSpecifier = HeuristicPrefix.Qualifier;
589       if (SpelledSpecifier.consume_front("::"))
590         Scopes.AccessibleScopes = {""};
591       Scopes.UnresolvedQualifier = std::string(SpelledSpecifier);
592       return {Scopes.scopesForIndexQuery(), false};
593     }
594     // The enclosing namespace must be first, it gets a quality boost.
595     std::vector<std::string> EnclosingAtFront;
596     std::string EnclosingScope = printNamespaceScope(*CCSema.CurContext);
597     EnclosingAtFront.push_back(EnclosingScope);
598     for (auto &S : Scopes.scopesForIndexQuery()) {
599       if (EnclosingScope != S)
600         EnclosingAtFront.push_back(std::move(S));
601     }
602     // Allow AllScopes completion as there is no explicit scope qualifier.
603     return {EnclosingAtFront, Opts.AllScopes};
604   }
605   // Case 3: sema saw and resolved a scope qualifier.
606   if (SemaSpecifier && SemaSpecifier->isValid())
607     return {Scopes.scopesForIndexQuery(), false};
608 
609   // Case 4: There was a qualifier, and Sema didn't resolve it.
610   Scopes.AccessibleScopes.push_back(""); // Make sure global scope is included.
611   llvm::StringRef SpelledSpecifier = Lexer::getSourceText(
612       CharSourceRange::getCharRange(SemaSpecifier->getRange()),
613       CCSema.SourceMgr, clang::LangOptions());
614   if (SpelledSpecifier.consume_front("::"))
615     Scopes.AccessibleScopes = {""};
616   Scopes.UnresolvedQualifier = std::string(SpelledSpecifier);
617   // Sema excludes the trailing "::".
618   if (!Scopes.UnresolvedQualifier->empty())
619     *Scopes.UnresolvedQualifier += "::";
620 
621   return {Scopes.scopesForIndexQuery(), false};
622 }
623 
624 // Should we perform index-based completion in a context of the specified kind?
625 // FIXME: consider allowing completion, but restricting the result types.
626 bool contextAllowsIndex(enum CodeCompletionContext::Kind K) {
627   switch (K) {
628   case CodeCompletionContext::CCC_TopLevel:
629   case CodeCompletionContext::CCC_ObjCInterface:
630   case CodeCompletionContext::CCC_ObjCImplementation:
631   case CodeCompletionContext::CCC_ObjCIvarList:
632   case CodeCompletionContext::CCC_ClassStructUnion:
633   case CodeCompletionContext::CCC_Statement:
634   case CodeCompletionContext::CCC_Expression:
635   case CodeCompletionContext::CCC_ObjCMessageReceiver:
636   case CodeCompletionContext::CCC_EnumTag:
637   case CodeCompletionContext::CCC_UnionTag:
638   case CodeCompletionContext::CCC_ClassOrStructTag:
639   case CodeCompletionContext::CCC_ObjCProtocolName:
640   case CodeCompletionContext::CCC_Namespace:
641   case CodeCompletionContext::CCC_Type:
642   case CodeCompletionContext::CCC_ParenthesizedExpression:
643   case CodeCompletionContext::CCC_ObjCInterfaceName:
644   case CodeCompletionContext::CCC_ObjCCategoryName:
645   case CodeCompletionContext::CCC_Symbol:
646   case CodeCompletionContext::CCC_SymbolOrNewName:
647     return true;
648   case CodeCompletionContext::CCC_OtherWithMacros:
649   case CodeCompletionContext::CCC_DotMemberAccess:
650   case CodeCompletionContext::CCC_ArrowMemberAccess:
651   case CodeCompletionContext::CCC_ObjCPropertyAccess:
652   case CodeCompletionContext::CCC_MacroName:
653   case CodeCompletionContext::CCC_MacroNameUse:
654   case CodeCompletionContext::CCC_PreprocessorExpression:
655   case CodeCompletionContext::CCC_PreprocessorDirective:
656   case CodeCompletionContext::CCC_SelectorName:
657   case CodeCompletionContext::CCC_TypeQualifiers:
658   case CodeCompletionContext::CCC_ObjCInstanceMessage:
659   case CodeCompletionContext::CCC_ObjCClassMessage:
660   case CodeCompletionContext::CCC_IncludedFile:
661   // FIXME: Provide identifier based completions for the following contexts:
662   case CodeCompletionContext::CCC_Other: // Be conservative.
663   case CodeCompletionContext::CCC_NaturalLanguage:
664   case CodeCompletionContext::CCC_Recovery:
665   case CodeCompletionContext::CCC_NewName:
666     return false;
667   }
668   llvm_unreachable("unknown code completion context");
669 }
670 
671 static bool isInjectedClass(const NamedDecl &D) {
672   if (auto *R = dyn_cast_or_null<RecordDecl>(&D))
673     if (R->isInjectedClassName())
674       return true;
675   return false;
676 }
677 
678 // Some member calls are blacklisted because they're so rarely useful.
679 static bool isBlacklistedMember(const NamedDecl &D) {
680   // Destructor completion is rarely useful, and works inconsistently.
681   // (s.^ completes ~string, but s.~st^ is an error).
682   if (D.getKind() == Decl::CXXDestructor)
683     return true;
684   // Injected name may be useful for A::foo(), but who writes A::A::foo()?
685   if (isInjectedClass(D))
686     return true;
687   // Explicit calls to operators are also rare.
688   auto NameKind = D.getDeclName().getNameKind();
689   if (NameKind == DeclarationName::CXXOperatorName ||
690       NameKind == DeclarationName::CXXLiteralOperatorName ||
691       NameKind == DeclarationName::CXXConversionFunctionName)
692     return true;
693   return false;
694 }
695 
696 // The CompletionRecorder captures Sema code-complete output, including context.
697 // It filters out ignored results (but doesn't apply fuzzy-filtering yet).
698 // It doesn't do scoring or conversion to CompletionItem yet, as we want to
699 // merge with index results first.
700 // Generally the fields and methods of this object should only be used from
701 // within the callback.
702 struct CompletionRecorder : public CodeCompleteConsumer {
703   CompletionRecorder(const CodeCompleteOptions &Opts,
704                      llvm::unique_function<void()> ResultsCallback)
705       : CodeCompleteConsumer(Opts.getClangCompleteOpts()),
706         CCContext(CodeCompletionContext::CCC_Other), Opts(Opts),
707         CCAllocator(std::make_shared<GlobalCodeCompletionAllocator>()),
708         CCTUInfo(CCAllocator), ResultsCallback(std::move(ResultsCallback)) {
709     assert(this->ResultsCallback);
710   }
711 
712   std::vector<CodeCompletionResult> Results;
713   CodeCompletionContext CCContext;
714   Sema *CCSema = nullptr; // Sema that created the results.
715   // FIXME: Sema is scary. Can we store ASTContext and Preprocessor, instead?
716 
717   void ProcessCodeCompleteResults(class Sema &S, CodeCompletionContext Context,
718                                   CodeCompletionResult *InResults,
719                                   unsigned NumResults) override final {
720     // Results from recovery mode are generally useless, and the callback after
721     // recovery (if any) is usually more interesting. To make sure we handle the
722     // future callback from sema, we just ignore all callbacks in recovery mode,
723     // as taking only results from recovery mode results in poor completion
724     // results.
725     // FIXME: in case there is no future sema completion callback after the
726     // recovery mode, we might still want to provide some results (e.g. trivial
727     // identifier-based completion).
728     if (Context.getKind() == CodeCompletionContext::CCC_Recovery) {
729       log("Code complete: Ignoring sema code complete callback with Recovery "
730           "context.");
731       return;
732     }
733     // If a callback is called without any sema result and the context does not
734     // support index-based completion, we simply skip it to give way to
735     // potential future callbacks with results.
736     if (NumResults == 0 && !contextAllowsIndex(Context.getKind()))
737       return;
738     if (CCSema) {
739       log("Multiple code complete callbacks (parser backtracked?). "
740           "Dropping results from context {0}, keeping results from {1}.",
741           getCompletionKindString(Context.getKind()),
742           getCompletionKindString(this->CCContext.getKind()));
743       return;
744     }
745     // Record the completion context.
746     CCSema = &S;
747     CCContext = Context;
748 
749     // Retain the results we might want.
750     for (unsigned I = 0; I < NumResults; ++I) {
751       auto &Result = InResults[I];
752       // Class members that are shadowed by subclasses are usually noise.
753       if (Result.Hidden && Result.Declaration &&
754           Result.Declaration->isCXXClassMember())
755         continue;
756       if (!Opts.IncludeIneligibleResults &&
757           (Result.Availability == CXAvailability_NotAvailable ||
758            Result.Availability == CXAvailability_NotAccessible))
759         continue;
760       if (Result.Declaration &&
761           !Context.getBaseType().isNull() // is this a member-access context?
762           && isBlacklistedMember(*Result.Declaration))
763         continue;
764       // Skip injected class name when no class scope is not explicitly set.
765       // E.g. show injected A::A in `using A::A^` but not in "A^".
766       if (Result.Declaration && !Context.getCXXScopeSpecifier().hasValue() &&
767           isInjectedClass(*Result.Declaration))
768         continue;
769       // We choose to never append '::' to completion results in clangd.
770       Result.StartsNestedNameSpecifier = false;
771       Results.push_back(Result);
772     }
773     ResultsCallback();
774   }
775 
776   CodeCompletionAllocator &getAllocator() override { return *CCAllocator; }
777   CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
778 
779   // Returns the filtering/sorting name for Result, which must be from Results.
780   // Returned string is owned by this recorder (or the AST).
781   llvm::StringRef getName(const CodeCompletionResult &Result) {
782     switch (Result.Kind) {
783     case CodeCompletionResult::RK_Declaration:
784       if (auto *ID = Result.Declaration->getIdentifier())
785         return ID->getName();
786       break;
787     case CodeCompletionResult::RK_Keyword:
788       return Result.Keyword;
789     case CodeCompletionResult::RK_Macro:
790       return Result.Macro->getName();
791     case CodeCompletionResult::RK_Pattern:
792       return Result.Pattern->getTypedText();
793     }
794     auto *CCS = codeCompletionString(Result);
795     return CCS->getTypedText();
796   }
797 
798   // Build a CodeCompletion string for R, which must be from Results.
799   // The CCS will be owned by this recorder.
800   CodeCompletionString *codeCompletionString(const CodeCompletionResult &R) {
801     // CodeCompletionResult doesn't seem to be const-correct. We own it, anyway.
802     return const_cast<CodeCompletionResult &>(R).CreateCodeCompletionString(
803         *CCSema, CCContext, *CCAllocator, CCTUInfo,
804         /*IncludeBriefComments=*/false);
805   }
806 
807 private:
808   CodeCompleteOptions Opts;
809   std::shared_ptr<GlobalCodeCompletionAllocator> CCAllocator;
810   CodeCompletionTUInfo CCTUInfo;
811   llvm::unique_function<void()> ResultsCallback;
812 };
813 
814 struct ScoredSignature {
815   // When set, requires documentation to be requested from the index with this
816   // ID.
817   llvm::Optional<SymbolID> IDForDoc;
818   SignatureInformation Signature;
819   SignatureQualitySignals Quality;
820 };
821 
822 class SignatureHelpCollector final : public CodeCompleteConsumer {
823 public:
824   SignatureHelpCollector(const clang::CodeCompleteOptions &CodeCompleteOpts,
825                          const SymbolIndex *Index, SignatureHelp &SigHelp)
826       : CodeCompleteConsumer(CodeCompleteOpts), SigHelp(SigHelp),
827         Allocator(std::make_shared<clang::GlobalCodeCompletionAllocator>()),
828         CCTUInfo(Allocator), Index(Index) {}
829 
830   void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg,
831                                  OverloadCandidate *Candidates,
832                                  unsigned NumCandidates,
833                                  SourceLocation OpenParLoc) override {
834     assert(!OpenParLoc.isInvalid());
835     SourceManager &SrcMgr = S.getSourceManager();
836     OpenParLoc = SrcMgr.getFileLoc(OpenParLoc);
837     if (SrcMgr.isInMainFile(OpenParLoc))
838       SigHelp.argListStart = sourceLocToPosition(SrcMgr, OpenParLoc);
839     else
840       elog("Location oustide main file in signature help: {0}",
841            OpenParLoc.printToString(SrcMgr));
842 
843     std::vector<ScoredSignature> ScoredSignatures;
844     SigHelp.signatures.reserve(NumCandidates);
845     ScoredSignatures.reserve(NumCandidates);
846     // FIXME(rwols): How can we determine the "active overload candidate"?
847     // Right now the overloaded candidates seem to be provided in a "best fit"
848     // order, so I'm not too worried about this.
849     SigHelp.activeSignature = 0;
850     assert(CurrentArg <= (unsigned)std::numeric_limits<int>::max() &&
851            "too many arguments");
852     SigHelp.activeParameter = static_cast<int>(CurrentArg);
853     for (unsigned I = 0; I < NumCandidates; ++I) {
854       OverloadCandidate Candidate = Candidates[I];
855       // We want to avoid showing instantiated signatures, because they may be
856       // long in some cases (e.g. when 'T' is substituted with 'std::string', we
857       // would get 'std::basic_string<char>').
858       if (auto *Func = Candidate.getFunction()) {
859         if (auto *Pattern = Func->getTemplateInstantiationPattern())
860           Candidate = OverloadCandidate(Pattern);
861       }
862 
863       const auto *CCS = Candidate.CreateSignatureString(
864           CurrentArg, S, *Allocator, CCTUInfo, true);
865       assert(CCS && "Expected the CodeCompletionString to be non-null");
866       ScoredSignatures.push_back(processOverloadCandidate(
867           Candidate, *CCS,
868           Candidate.getFunction()
869               ? getDeclComment(S.getASTContext(), *Candidate.getFunction())
870               : ""));
871     }
872 
873     // Sema does not load the docs from the preamble, so we need to fetch extra
874     // docs from the index instead.
875     llvm::DenseMap<SymbolID, std::string> FetchedDocs;
876     if (Index) {
877       LookupRequest IndexRequest;
878       for (const auto &S : ScoredSignatures) {
879         if (!S.IDForDoc)
880           continue;
881         IndexRequest.IDs.insert(*S.IDForDoc);
882       }
883       Index->lookup(IndexRequest, [&](const Symbol &S) {
884         if (!S.Documentation.empty())
885           FetchedDocs[S.ID] = std::string(S.Documentation);
886       });
887       log("SigHelp: requested docs for {0} symbols from the index, got {1} "
888           "symbols with non-empty docs in the response",
889           IndexRequest.IDs.size(), FetchedDocs.size());
890     }
891 
892     llvm::sort(ScoredSignatures, [](const ScoredSignature &L,
893                                     const ScoredSignature &R) {
894       // Ordering follows:
895       // - Less number of parameters is better.
896       // - Function is better than FunctionType which is better than
897       // Function Template.
898       // - High score is better.
899       // - Shorter signature is better.
900       // - Alphabetically smaller is better.
901       if (L.Quality.NumberOfParameters != R.Quality.NumberOfParameters)
902         return L.Quality.NumberOfParameters < R.Quality.NumberOfParameters;
903       if (L.Quality.NumberOfOptionalParameters !=
904           R.Quality.NumberOfOptionalParameters)
905         return L.Quality.NumberOfOptionalParameters <
906                R.Quality.NumberOfOptionalParameters;
907       if (L.Quality.Kind != R.Quality.Kind) {
908         using OC = CodeCompleteConsumer::OverloadCandidate;
909         switch (L.Quality.Kind) {
910         case OC::CK_Function:
911           return true;
912         case OC::CK_FunctionType:
913           return R.Quality.Kind != OC::CK_Function;
914         case OC::CK_FunctionTemplate:
915           return false;
916         }
917         llvm_unreachable("Unknown overload candidate type.");
918       }
919       if (L.Signature.label.size() != R.Signature.label.size())
920         return L.Signature.label.size() < R.Signature.label.size();
921       return L.Signature.label < R.Signature.label;
922     });
923 
924     for (auto &SS : ScoredSignatures) {
925       auto IndexDocIt =
926           SS.IDForDoc ? FetchedDocs.find(*SS.IDForDoc) : FetchedDocs.end();
927       if (IndexDocIt != FetchedDocs.end())
928         SS.Signature.documentation = IndexDocIt->second;
929 
930       SigHelp.signatures.push_back(std::move(SS.Signature));
931     }
932   }
933 
934   GlobalCodeCompletionAllocator &getAllocator() override { return *Allocator; }
935 
936   CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
937 
938 private:
939   void processParameterChunk(llvm::StringRef ChunkText,
940                              SignatureInformation &Signature) const {
941     // (!) this is O(n), should still be fast compared to building ASTs.
942     unsigned ParamStartOffset = lspLength(Signature.label);
943     unsigned ParamEndOffset = ParamStartOffset + lspLength(ChunkText);
944     // A piece of text that describes the parameter that corresponds to
945     // the code-completion location within a function call, message send,
946     // macro invocation, etc.
947     Signature.label += ChunkText;
948     ParameterInformation Info;
949     Info.labelOffsets.emplace(ParamStartOffset, ParamEndOffset);
950     // FIXME: only set 'labelOffsets' when all clients migrate out of it.
951     Info.labelString = std::string(ChunkText);
952 
953     Signature.parameters.push_back(std::move(Info));
954   }
955 
956   void processOptionalChunk(const CodeCompletionString &CCS,
957                             SignatureInformation &Signature,
958                             SignatureQualitySignals &Signal) const {
959     for (const auto &Chunk : CCS) {
960       switch (Chunk.Kind) {
961       case CodeCompletionString::CK_Optional:
962         assert(Chunk.Optional &&
963                "Expected the optional code completion string to be non-null.");
964         processOptionalChunk(*Chunk.Optional, Signature, Signal);
965         break;
966       case CodeCompletionString::CK_VerticalSpace:
967         break;
968       case CodeCompletionString::CK_CurrentParameter:
969       case CodeCompletionString::CK_Placeholder:
970         processParameterChunk(Chunk.Text, Signature);
971         Signal.NumberOfOptionalParameters++;
972         break;
973       default:
974         Signature.label += Chunk.Text;
975         break;
976       }
977     }
978   }
979 
980   // FIXME(ioeric): consider moving CodeCompletionString logic here to
981   // CompletionString.h.
982   ScoredSignature processOverloadCandidate(const OverloadCandidate &Candidate,
983                                            const CodeCompletionString &CCS,
984                                            llvm::StringRef DocComment) const {
985     SignatureInformation Signature;
986     SignatureQualitySignals Signal;
987     const char *ReturnType = nullptr;
988 
989     Signature.documentation = formatDocumentation(CCS, DocComment);
990     Signal.Kind = Candidate.getKind();
991 
992     for (const auto &Chunk : CCS) {
993       switch (Chunk.Kind) {
994       case CodeCompletionString::CK_ResultType:
995         // A piece of text that describes the type of an entity or,
996         // for functions and methods, the return type.
997         assert(!ReturnType && "Unexpected CK_ResultType");
998         ReturnType = Chunk.Text;
999         break;
1000       case CodeCompletionString::CK_CurrentParameter:
1001       case CodeCompletionString::CK_Placeholder:
1002         processParameterChunk(Chunk.Text, Signature);
1003         Signal.NumberOfParameters++;
1004         break;
1005       case CodeCompletionString::CK_Optional: {
1006         // The rest of the parameters are defaulted/optional.
1007         assert(Chunk.Optional &&
1008                "Expected the optional code completion string to be non-null.");
1009         processOptionalChunk(*Chunk.Optional, Signature, Signal);
1010         break;
1011       }
1012       case CodeCompletionString::CK_VerticalSpace:
1013         break;
1014       default:
1015         Signature.label += Chunk.Text;
1016         break;
1017       }
1018     }
1019     if (ReturnType) {
1020       Signature.label += " -> ";
1021       Signature.label += ReturnType;
1022     }
1023     dlog("Signal for {0}: {1}", Signature, Signal);
1024     ScoredSignature Result;
1025     Result.Signature = std::move(Signature);
1026     Result.Quality = Signal;
1027     const FunctionDecl *Func = Candidate.getFunction();
1028     if (Func && Result.Signature.documentation.empty()) {
1029       // Computing USR caches linkage, which may change after code completion.
1030       if (!hasUnstableLinkage(Func))
1031         Result.IDForDoc = clangd::getSymbolID(Func);
1032     }
1033     return Result;
1034   }
1035 
1036   SignatureHelp &SigHelp;
1037   std::shared_ptr<clang::GlobalCodeCompletionAllocator> Allocator;
1038   CodeCompletionTUInfo CCTUInfo;
1039   const SymbolIndex *Index;
1040 }; // SignatureHelpCollector
1041 
1042 struct SemaCompleteInput {
1043   PathRef FileName;
1044   size_t Offset;
1045   const PreambleData &Preamble;
1046   const llvm::Optional<PreamblePatch> Patch;
1047   const ParseInputs &ParseInput;
1048 };
1049 
1050 void loadMainFilePreambleMacros(const Preprocessor &PP,
1051                                 const PreambleData &Preamble) {
1052   // The ExternalPreprocessorSource has our macros, if we know where to look.
1053   // We can read all the macros using PreambleMacros->ReadDefinedMacros(),
1054   // but this includes transitively included files, so may deserialize a lot.
1055   ExternalPreprocessorSource *PreambleMacros = PP.getExternalSource();
1056   // As we have the names of the macros, we can look up their IdentifierInfo
1057   // and then use this to load just the macros we want.
1058   IdentifierInfoLookup *PreambleIdentifiers =
1059       PP.getIdentifierTable().getExternalIdentifierLookup();
1060   if (!PreambleIdentifiers || !PreambleMacros)
1061     return;
1062   for (const auto &MacroName : Preamble.Macros.Names)
1063     if (auto *II = PreambleIdentifiers->get(MacroName.getKey()))
1064       if (II->isOutOfDate())
1065         PreambleMacros->updateOutOfDateIdentifier(*II);
1066 }
1067 
1068 // Invokes Sema code completion on a file.
1069 // If \p Includes is set, it will be updated based on the compiler invocation.
1070 bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,
1071                       const clang::CodeCompleteOptions &Options,
1072                       const SemaCompleteInput &Input,
1073                       IncludeStructure *Includes = nullptr) {
1074   trace::Span Tracer("Sema completion");
1075 
1076   IgnoreDiagnostics IgnoreDiags;
1077   auto CI = buildCompilerInvocation(Input.ParseInput, IgnoreDiags);
1078   if (!CI) {
1079     elog("Couldn't create CompilerInvocation");
1080     return false;
1081   }
1082   auto &FrontendOpts = CI->getFrontendOpts();
1083   FrontendOpts.SkipFunctionBodies = true;
1084   // Disable typo correction in Sema.
1085   CI->getLangOpts()->SpellChecking = false;
1086   // Code completion won't trigger in delayed template bodies.
1087   // This is on-by-default in windows to allow parsing SDK headers; we're only
1088   // disabling it for the main-file (not preamble).
1089   CI->getLangOpts()->DelayedTemplateParsing = false;
1090   // Setup code completion.
1091   FrontendOpts.CodeCompleteOpts = Options;
1092   FrontendOpts.CodeCompletionAt.FileName = std::string(Input.FileName);
1093   std::tie(FrontendOpts.CodeCompletionAt.Line,
1094            FrontendOpts.CodeCompletionAt.Column) =
1095       offsetToClangLineColumn(Input.ParseInput.Contents, Input.Offset);
1096 
1097   std::unique_ptr<llvm::MemoryBuffer> ContentsBuffer =
1098       llvm::MemoryBuffer::getMemBufferCopy(Input.ParseInput.Contents,
1099                                            Input.FileName);
1100   // The diagnostic options must be set before creating a CompilerInstance.
1101   CI->getDiagnosticOpts().IgnoreWarnings = true;
1102   // We reuse the preamble whether it's valid or not. This is a
1103   // correctness/performance tradeoff: building without a preamble is slow, and
1104   // completion is latency-sensitive.
1105   // However, if we're completing *inside* the preamble section of the draft,
1106   // overriding the preamble will break sema completion. Fortunately we can just
1107   // skip all includes in this case; these completions are really simple.
1108   PreambleBounds PreambleRegion =
1109       ComputePreambleBounds(*CI->getLangOpts(), ContentsBuffer.get(), 0);
1110   bool CompletingInPreamble = PreambleRegion.Size > Input.Offset;
1111   if (Input.Patch)
1112     Input.Patch->apply(*CI);
1113   // NOTE: we must call BeginSourceFile after prepareCompilerInstance. Otherwise
1114   // the remapped buffers do not get freed.
1115   llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS =
1116       Input.ParseInput.FSProvider->getFileSystem();
1117   if (Input.Preamble.StatCache)
1118     VFS = Input.Preamble.StatCache->getConsumingFS(std::move(VFS));
1119   if (VFS->setCurrentWorkingDirectory(
1120           Input.ParseInput.CompileCommand.Directory))
1121     elog("Couldn't set working directory during code completion");
1122   auto Clang = prepareCompilerInstance(
1123       std::move(CI), !CompletingInPreamble ? &Input.Preamble.Preamble : nullptr,
1124       std::move(ContentsBuffer), std::move(VFS), IgnoreDiags);
1125   Clang->getPreprocessorOpts().SingleFileParseMode = CompletingInPreamble;
1126   Clang->setCodeCompletionConsumer(Consumer.release());
1127 
1128   SyntaxOnlyAction Action;
1129   if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])) {
1130     log("BeginSourceFile() failed when running codeComplete for {0}",
1131         Input.FileName);
1132     return false;
1133   }
1134   // Macros can be defined within the preamble region of the main file.
1135   // They don't fall nicely into our index/Sema dichotomy:
1136   //  - they're not indexed for completion (they're not available across files)
1137   //  - but Sema code complete won't see them: as part of the preamble, they're
1138   //    deserialized only when mentioned.
1139   // Force them to be deserialized so SemaCodeComplete sees them.
1140   loadMainFilePreambleMacros(Clang->getPreprocessor(), Input.Preamble);
1141   if (Includes)
1142     Clang->getPreprocessor().addPPCallbacks(
1143         collectIncludeStructureCallback(Clang->getSourceManager(), Includes));
1144   if (llvm::Error Err = Action.Execute()) {
1145     log("Execute() failed when running codeComplete for {0}: {1}",
1146         Input.FileName, toString(std::move(Err)));
1147     return false;
1148   }
1149   Action.EndSourceFile();
1150 
1151   return true;
1152 }
1153 
1154 // Should we allow index completions in the specified context?
1155 bool allowIndex(CodeCompletionContext &CC) {
1156   if (!contextAllowsIndex(CC.getKind()))
1157     return false;
1158   // We also avoid ClassName::bar (but allow namespace::bar).
1159   auto Scope = CC.getCXXScopeSpecifier();
1160   if (!Scope)
1161     return true;
1162   NestedNameSpecifier *NameSpec = (*Scope)->getScopeRep();
1163   if (!NameSpec)
1164     return true;
1165   // We only query the index when qualifier is a namespace.
1166   // If it's a class, we rely solely on sema completions.
1167   switch (NameSpec->getKind()) {
1168   case NestedNameSpecifier::Global:
1169   case NestedNameSpecifier::Namespace:
1170   case NestedNameSpecifier::NamespaceAlias:
1171     return true;
1172   case NestedNameSpecifier::Super:
1173   case NestedNameSpecifier::TypeSpec:
1174   case NestedNameSpecifier::TypeSpecWithTemplate:
1175   // Unresolved inside a template.
1176   case NestedNameSpecifier::Identifier:
1177     return false;
1178   }
1179   llvm_unreachable("invalid NestedNameSpecifier kind");
1180 }
1181 
1182 std::future<SymbolSlab> startAsyncFuzzyFind(const SymbolIndex &Index,
1183                                             const FuzzyFindRequest &Req) {
1184   return runAsync<SymbolSlab>([&Index, Req]() {
1185     trace::Span Tracer("Async fuzzyFind");
1186     SymbolSlab::Builder Syms;
1187     Index.fuzzyFind(Req, [&Syms](const Symbol &Sym) { Syms.insert(Sym); });
1188     return std::move(Syms).build();
1189   });
1190 }
1191 
1192 // Creates a `FuzzyFindRequest` based on the cached index request from the
1193 // last completion, if any, and the speculated completion filter text in the
1194 // source code.
1195 FuzzyFindRequest speculativeFuzzyFindRequestForCompletion(
1196     FuzzyFindRequest CachedReq, const CompletionPrefix &HeuristicPrefix) {
1197   CachedReq.Query = std::string(HeuristicPrefix.Name);
1198   return CachedReq;
1199 }
1200 
1201 // Runs Sema-based (AST) and Index-based completion, returns merged results.
1202 //
1203 // There are a few tricky considerations:
1204 //   - the AST provides information needed for the index query (e.g. which
1205 //     namespaces to search in). So Sema must start first.
1206 //   - we only want to return the top results (Opts.Limit).
1207 //     Building CompletionItems for everything else is wasteful, so we want to
1208 //     preserve the "native" format until we're done with scoring.
1209 //   - the data underlying Sema completion items is owned by the AST and various
1210 //     other arenas, which must stay alive for us to build CompletionItems.
1211 //   - we may get duplicate results from Sema and the Index, we need to merge.
1212 //
1213 // So we start Sema completion first, and do all our work in its callback.
1214 // We use the Sema context information to query the index.
1215 // Then we merge the two result sets, producing items that are Sema/Index/Both.
1216 // These items are scored, and the top N are synthesized into the LSP response.
1217 // Finally, we can clean up the data structures created by Sema completion.
1218 //
1219 // Main collaborators are:
1220 //   - semaCodeComplete sets up the compiler machinery to run code completion.
1221 //   - CompletionRecorder captures Sema completion results, including context.
1222 //   - SymbolIndex (Opts.Index) provides index completion results as Symbols
1223 //   - CompletionCandidates are the result of merging Sema and Index results.
1224 //     Each candidate points to an underlying CodeCompletionResult (Sema), a
1225 //     Symbol (Index), or both. It computes the result quality score.
1226 //     CompletionCandidate also does conversion to CompletionItem (at the end).
1227 //   - FuzzyMatcher scores how the candidate matches the partial identifier.
1228 //     This score is combined with the result quality score for the final score.
1229 //   - TopN determines the results with the best score.
1230 class CodeCompleteFlow {
1231   PathRef FileName;
1232   IncludeStructure Includes;           // Complete once the compiler runs.
1233   SpeculativeFuzzyFind *SpecFuzzyFind; // Can be nullptr.
1234   const CodeCompleteOptions &Opts;
1235 
1236   // Sema takes ownership of Recorder. Recorder is valid until Sema cleanup.
1237   CompletionRecorder *Recorder = nullptr;
1238   CodeCompletionContext::Kind CCContextKind = CodeCompletionContext::CCC_Other;
1239   bool IsUsingDeclaration = false;
1240   // The snippets will not be generated if the token following completion
1241   // location is an opening parenthesis (tok::l_paren) because this would add
1242   // extra parenthesis.
1243   tok::TokenKind NextTokenKind = tok::eof;
1244   // Counters for logging.
1245   int NSema = 0, NIndex = 0, NSemaAndIndex = 0, NIdent = 0;
1246   bool Incomplete = false; // Would more be available with a higher limit?
1247   CompletionPrefix HeuristicPrefix;
1248   llvm::Optional<FuzzyMatcher> Filter; // Initialized once Sema runs.
1249   Range ReplacedRange;
1250   std::vector<std::string> QueryScopes; // Initialized once Sema runs.
1251   // Initialized once QueryScopes is initialized, if there are scopes.
1252   llvm::Optional<ScopeDistance> ScopeProximity;
1253   llvm::Optional<OpaqueType> PreferredType; // Initialized once Sema runs.
1254   // Whether to query symbols from any scope. Initialized once Sema runs.
1255   bool AllScopes = false;
1256   llvm::StringSet<> ContextWords;
1257   // Include-insertion and proximity scoring rely on the include structure.
1258   // This is available after Sema has run.
1259   llvm::Optional<IncludeInserter> Inserter;  // Available during runWithSema.
1260   llvm::Optional<URIDistance> FileProximity; // Initialized once Sema runs.
1261   /// Speculative request based on the cached request and the filter text before
1262   /// the cursor.
1263   /// Initialized right before sema run. This is only set if `SpecFuzzyFind` is
1264   /// set and contains a cached request.
1265   llvm::Optional<FuzzyFindRequest> SpecReq;
1266 
1267 public:
1268   // A CodeCompleteFlow object is only useful for calling run() exactly once.
1269   CodeCompleteFlow(PathRef FileName, const IncludeStructure &Includes,
1270                    SpeculativeFuzzyFind *SpecFuzzyFind,
1271                    const CodeCompleteOptions &Opts)
1272       : FileName(FileName), Includes(Includes), SpecFuzzyFind(SpecFuzzyFind),
1273         Opts(Opts) {}
1274 
1275   CodeCompleteResult run(const SemaCompleteInput &SemaCCInput) && {
1276     trace::Span Tracer("CodeCompleteFlow");
1277     HeuristicPrefix = guessCompletionPrefix(SemaCCInput.ParseInput.Contents,
1278                                             SemaCCInput.Offset);
1279     populateContextWords(SemaCCInput.ParseInput.Contents);
1280     if (Opts.Index && SpecFuzzyFind && SpecFuzzyFind->CachedReq.hasValue()) {
1281       assert(!SpecFuzzyFind->Result.valid());
1282       SpecReq = speculativeFuzzyFindRequestForCompletion(
1283           *SpecFuzzyFind->CachedReq, HeuristicPrefix);
1284       SpecFuzzyFind->Result = startAsyncFuzzyFind(*Opts.Index, *SpecReq);
1285     }
1286 
1287     // We run Sema code completion first. It builds an AST and calculates:
1288     //   - completion results based on the AST.
1289     //   - partial identifier and context. We need these for the index query.
1290     CodeCompleteResult Output;
1291     auto RecorderOwner = std::make_unique<CompletionRecorder>(Opts, [&]() {
1292       assert(Recorder && "Recorder is not set");
1293       CCContextKind = Recorder->CCContext.getKind();
1294       IsUsingDeclaration = Recorder->CCContext.isUsingDeclaration();
1295       auto Style = getFormatStyleForFile(
1296           SemaCCInput.FileName, SemaCCInput.ParseInput.Contents,
1297           SemaCCInput.ParseInput.FSProvider->getFileSystem().get());
1298       const auto NextToken = Lexer::findNextToken(
1299           Recorder->CCSema->getPreprocessor().getCodeCompletionLoc(),
1300           Recorder->CCSema->getSourceManager(), Recorder->CCSema->LangOpts);
1301       if (NextToken)
1302         NextTokenKind = NextToken->getKind();
1303       // If preprocessor was run, inclusions from preprocessor callback should
1304       // already be added to Includes.
1305       Inserter.emplace(
1306           SemaCCInput.FileName, SemaCCInput.ParseInput.Contents, Style,
1307           SemaCCInput.ParseInput.CompileCommand.Directory,
1308           &Recorder->CCSema->getPreprocessor().getHeaderSearchInfo());
1309       for (const auto &Inc : Includes.MainFileIncludes)
1310         Inserter->addExisting(Inc);
1311 
1312       // Most of the cost of file proximity is in initializing the FileDistance
1313       // structures based on the observed includes, once per query. Conceptually
1314       // that happens here (though the per-URI-scheme initialization is lazy).
1315       // The per-result proximity scoring is (amortized) very cheap.
1316       FileDistanceOptions ProxOpts{}; // Use defaults.
1317       const auto &SM = Recorder->CCSema->getSourceManager();
1318       llvm::StringMap<SourceParams> ProxSources;
1319       for (auto &Entry : Includes.includeDepth(
1320                SM.getFileEntryForID(SM.getMainFileID())->getName())) {
1321         auto &Source = ProxSources[Entry.getKey()];
1322         Source.Cost = Entry.getValue() * ProxOpts.IncludeCost;
1323         // Symbols near our transitive includes are good, but only consider
1324         // things in the same directory or below it. Otherwise there can be
1325         // many false positives.
1326         if (Entry.getValue() > 0)
1327           Source.MaxUpTraversals = 1;
1328       }
1329       FileProximity.emplace(ProxSources, ProxOpts);
1330 
1331       Output = runWithSema();
1332       Inserter.reset(); // Make sure this doesn't out-live Clang.
1333       SPAN_ATTACH(Tracer, "sema_completion_kind",
1334                   getCompletionKindString(CCContextKind));
1335       log("Code complete: sema context {0}, query scopes [{1}] (AnyScope={2}), "
1336           "expected type {3}{4}",
1337           getCompletionKindString(CCContextKind),
1338           llvm::join(QueryScopes.begin(), QueryScopes.end(), ","), AllScopes,
1339           PreferredType ? Recorder->CCContext.getPreferredType().getAsString()
1340                         : "<none>",
1341           IsUsingDeclaration ? ", inside using declaration" : "");
1342     });
1343 
1344     Recorder = RecorderOwner.get();
1345 
1346     semaCodeComplete(std::move(RecorderOwner), Opts.getClangCompleteOpts(),
1347                      SemaCCInput, &Includes);
1348     logResults(Output, Tracer);
1349     return Output;
1350   }
1351 
1352   void logResults(const CodeCompleteResult &Output, const trace::Span &Tracer) {
1353     SPAN_ATTACH(Tracer, "sema_results", NSema);
1354     SPAN_ATTACH(Tracer, "index_results", NIndex);
1355     SPAN_ATTACH(Tracer, "merged_results", NSemaAndIndex);
1356     SPAN_ATTACH(Tracer, "identifier_results", NIdent);
1357     SPAN_ATTACH(Tracer, "returned_results", int64_t(Output.Completions.size()));
1358     SPAN_ATTACH(Tracer, "incomplete", Output.HasMore);
1359     log("Code complete: {0} results from Sema, {1} from Index, "
1360         "{2} matched, {3} from identifiers, {4} returned{5}.",
1361         NSema, NIndex, NSemaAndIndex, NIdent, Output.Completions.size(),
1362         Output.HasMore ? " (incomplete)" : "");
1363     assert(!Opts.Limit || Output.Completions.size() <= Opts.Limit);
1364     // We don't assert that isIncomplete means we hit a limit.
1365     // Indexes may choose to impose their own limits even if we don't have one.
1366   }
1367 
1368   CodeCompleteResult
1369   runWithoutSema(llvm::StringRef Content, size_t Offset,
1370                  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) && {
1371     trace::Span Tracer("CodeCompleteWithoutSema");
1372     // Fill in fields normally set by runWithSema()
1373     HeuristicPrefix = guessCompletionPrefix(Content, Offset);
1374     populateContextWords(Content);
1375     CCContextKind = CodeCompletionContext::CCC_Recovery;
1376     IsUsingDeclaration = false;
1377     Filter = FuzzyMatcher(HeuristicPrefix.Name);
1378     auto Pos = offsetToPosition(Content, Offset);
1379     ReplacedRange.start = ReplacedRange.end = Pos;
1380     ReplacedRange.start.character -= HeuristicPrefix.Name.size();
1381 
1382     llvm::StringMap<SourceParams> ProxSources;
1383     ProxSources[FileName].Cost = 0;
1384     FileProximity.emplace(ProxSources);
1385 
1386     auto Style = getFormatStyleForFile(FileName, Content, VFS.get());
1387     // This will only insert verbatim headers.
1388     Inserter.emplace(FileName, Content, Style,
1389                      /*BuildDir=*/"", /*HeaderSearchInfo=*/nullptr);
1390 
1391     auto Identifiers = collectIdentifiers(Content, Style);
1392     std::vector<RawIdentifier> IdentifierResults;
1393     for (const auto &IDAndCount : Identifiers) {
1394       RawIdentifier ID;
1395       ID.Name = IDAndCount.first();
1396       ID.References = IDAndCount.second;
1397       // Avoid treating typed filter as an identifier.
1398       if (ID.Name == HeuristicPrefix.Name)
1399         --ID.References;
1400       if (ID.References > 0)
1401         IdentifierResults.push_back(std::move(ID));
1402     }
1403 
1404     // Simplified version of getQueryScopes():
1405     //  - accessible scopes are determined heuristically.
1406     //  - all-scopes query if no qualifier was typed (and it's allowed).
1407     SpecifiedScope Scopes;
1408     Scopes.AccessibleScopes = visibleNamespaces(
1409         Content.take_front(Offset), format::getFormattingLangOpts(Style));
1410     for (std::string &S : Scopes.AccessibleScopes)
1411       if (!S.empty())
1412         S.append("::"); // visibleNamespaces doesn't include trailing ::.
1413     if (HeuristicPrefix.Qualifier.empty())
1414       AllScopes = Opts.AllScopes;
1415     else if (HeuristicPrefix.Qualifier.startswith("::")) {
1416       Scopes.AccessibleScopes = {""};
1417       Scopes.UnresolvedQualifier =
1418           std::string(HeuristicPrefix.Qualifier.drop_front(2));
1419     } else
1420       Scopes.UnresolvedQualifier = std::string(HeuristicPrefix.Qualifier);
1421     // First scope is the (modified) enclosing scope.
1422     QueryScopes = Scopes.scopesForIndexQuery();
1423     ScopeProximity.emplace(QueryScopes);
1424 
1425     SymbolSlab IndexResults = Opts.Index ? queryIndex() : SymbolSlab();
1426 
1427     CodeCompleteResult Output = toCodeCompleteResult(mergeResults(
1428         /*SemaResults=*/{}, IndexResults, IdentifierResults));
1429     Output.RanParser = false;
1430     logResults(Output, Tracer);
1431     return Output;
1432   }
1433 
1434 private:
1435   void populateContextWords(llvm::StringRef Content) {
1436     // Take last 3 lines before the completion point.
1437     unsigned RangeEnd = HeuristicPrefix.Qualifier.begin() - Content.data(),
1438              RangeBegin = RangeEnd;
1439     for (size_t I = 0; I < 3 && RangeBegin > 0; ++I) {
1440       auto PrevNL = Content.rfind('\n', RangeBegin);
1441       if (PrevNL == StringRef::npos) {
1442         RangeBegin = 0;
1443         break;
1444       }
1445       RangeBegin = PrevNL;
1446     }
1447 
1448     ContextWords = collectWords(Content.slice(RangeBegin, RangeEnd));
1449     dlog("Completion context words: {0}",
1450          llvm::join(ContextWords.keys(), ", "));
1451   }
1452 
1453   // This is called by run() once Sema code completion is done, but before the
1454   // Sema data structures are torn down. It does all the real work.
1455   CodeCompleteResult runWithSema() {
1456     const auto &CodeCompletionRange = CharSourceRange::getCharRange(
1457         Recorder->CCSema->getPreprocessor().getCodeCompletionTokenRange());
1458     // When we are getting completions with an empty identifier, for example
1459     //    std::vector<int> asdf;
1460     //    asdf.^;
1461     // Then the range will be invalid and we will be doing insertion, use
1462     // current cursor position in such cases as range.
1463     if (CodeCompletionRange.isValid()) {
1464       ReplacedRange = halfOpenToRange(Recorder->CCSema->getSourceManager(),
1465                                       CodeCompletionRange);
1466     } else {
1467       const auto &Pos = sourceLocToPosition(
1468           Recorder->CCSema->getSourceManager(),
1469           Recorder->CCSema->getPreprocessor().getCodeCompletionLoc());
1470       ReplacedRange.start = ReplacedRange.end = Pos;
1471     }
1472     Filter = FuzzyMatcher(
1473         Recorder->CCSema->getPreprocessor().getCodeCompletionFilter());
1474     std::tie(QueryScopes, AllScopes) = getQueryScopes(
1475         Recorder->CCContext, *Recorder->CCSema, HeuristicPrefix, Opts);
1476     if (!QueryScopes.empty())
1477       ScopeProximity.emplace(QueryScopes);
1478     PreferredType =
1479         OpaqueType::fromType(Recorder->CCSema->getASTContext(),
1480                              Recorder->CCContext.getPreferredType());
1481     // Sema provides the needed context to query the index.
1482     // FIXME: in addition to querying for extra/overlapping symbols, we should
1483     //        explicitly request symbols corresponding to Sema results.
1484     //        We can use their signals even if the index can't suggest them.
1485     // We must copy index results to preserve them, but there are at most Limit.
1486     auto IndexResults = (Opts.Index && allowIndex(Recorder->CCContext))
1487                             ? queryIndex()
1488                             : SymbolSlab();
1489     trace::Span Tracer("Populate CodeCompleteResult");
1490     // Merge Sema and Index results, score them, and pick the winners.
1491     auto Top =
1492         mergeResults(Recorder->Results, IndexResults, /*Identifiers*/ {});
1493     return toCodeCompleteResult(Top);
1494   }
1495 
1496   CodeCompleteResult
1497   toCodeCompleteResult(const std::vector<ScoredBundle> &Scored) {
1498     CodeCompleteResult Output;
1499 
1500     // Convert the results to final form, assembling the expensive strings.
1501     for (auto &C : Scored) {
1502       Output.Completions.push_back(toCodeCompletion(C.first));
1503       Output.Completions.back().Score = C.second;
1504       Output.Completions.back().CompletionTokenRange = ReplacedRange;
1505     }
1506     Output.HasMore = Incomplete;
1507     Output.Context = CCContextKind;
1508     Output.CompletionRange = ReplacedRange;
1509     return Output;
1510   }
1511 
1512   SymbolSlab queryIndex() {
1513     trace::Span Tracer("Query index");
1514     SPAN_ATTACH(Tracer, "limit", int64_t(Opts.Limit));
1515 
1516     // Build the query.
1517     FuzzyFindRequest Req;
1518     if (Opts.Limit)
1519       Req.Limit = Opts.Limit;
1520     Req.Query = std::string(Filter->pattern());
1521     Req.RestrictForCodeCompletion = true;
1522     Req.Scopes = QueryScopes;
1523     Req.AnyScope = AllScopes;
1524     // FIXME: we should send multiple weighted paths here.
1525     Req.ProximityPaths.push_back(std::string(FileName));
1526     if (PreferredType)
1527       Req.PreferredTypes.push_back(std::string(PreferredType->raw()));
1528     vlog("Code complete: fuzzyFind({0:2})", toJSON(Req));
1529 
1530     if (SpecFuzzyFind)
1531       SpecFuzzyFind->NewReq = Req;
1532     if (SpecFuzzyFind && SpecFuzzyFind->Result.valid() && (*SpecReq == Req)) {
1533       vlog("Code complete: speculative fuzzy request matches the actual index "
1534            "request. Waiting for the speculative index results.");
1535       SPAN_ATTACH(Tracer, "Speculative results", true);
1536 
1537       trace::Span WaitSpec("Wait speculative results");
1538       return SpecFuzzyFind->Result.get();
1539     }
1540 
1541     SPAN_ATTACH(Tracer, "Speculative results", false);
1542 
1543     // Run the query against the index.
1544     SymbolSlab::Builder ResultsBuilder;
1545     if (Opts.Index->fuzzyFind(
1546             Req, [&](const Symbol &Sym) { ResultsBuilder.insert(Sym); }))
1547       Incomplete = true;
1548     return std::move(ResultsBuilder).build();
1549   }
1550 
1551   // Merges Sema and Index results where possible, to form CompletionCandidates.
1552   // \p Identifiers is raw identifiers that can also be completion candidates.
1553   // Identifiers are not merged with results from index or sema.
1554   // Groups overloads if desired, to form CompletionCandidate::Bundles. The
1555   // bundles are scored and top results are returned, best to worst.
1556   std::vector<ScoredBundle>
1557   mergeResults(const std::vector<CodeCompletionResult> &SemaResults,
1558                const SymbolSlab &IndexResults,
1559                const std::vector<RawIdentifier> &IdentifierResults) {
1560     trace::Span Tracer("Merge and score results");
1561     std::vector<CompletionCandidate::Bundle> Bundles;
1562     llvm::DenseMap<size_t, size_t> BundleLookup;
1563     auto AddToBundles = [&](const CodeCompletionResult *SemaResult,
1564                             const Symbol *IndexResult,
1565                             const RawIdentifier *IdentifierResult) {
1566       CompletionCandidate C;
1567       C.SemaResult = SemaResult;
1568       C.IndexResult = IndexResult;
1569       C.IdentifierResult = IdentifierResult;
1570       if (C.IndexResult) {
1571         C.Name = IndexResult->Name;
1572         C.RankedIncludeHeaders = getRankedIncludes(*C.IndexResult);
1573       } else if (C.SemaResult) {
1574         C.Name = Recorder->getName(*SemaResult);
1575       } else {
1576         assert(IdentifierResult);
1577         C.Name = IdentifierResult->Name;
1578       }
1579       if (auto OverloadSet = C.overloadSet(Opts)) {
1580         auto Ret = BundleLookup.try_emplace(OverloadSet, Bundles.size());
1581         if (Ret.second)
1582           Bundles.emplace_back();
1583         Bundles[Ret.first->second].push_back(std::move(C));
1584       } else {
1585         Bundles.emplace_back();
1586         Bundles.back().push_back(std::move(C));
1587       }
1588     };
1589     llvm::DenseSet<const Symbol *> UsedIndexResults;
1590     auto CorrespondingIndexResult =
1591         [&](const CodeCompletionResult &SemaResult) -> const Symbol * {
1592       if (auto SymID =
1593               getSymbolID(SemaResult, Recorder->CCSema->getSourceManager())) {
1594         auto I = IndexResults.find(*SymID);
1595         if (I != IndexResults.end()) {
1596           UsedIndexResults.insert(&*I);
1597           return &*I;
1598         }
1599       }
1600       return nullptr;
1601     };
1602     // Emit all Sema results, merging them with Index results if possible.
1603     for (auto &SemaResult : SemaResults)
1604       AddToBundles(&SemaResult, CorrespondingIndexResult(SemaResult), nullptr);
1605     // Now emit any Index-only results.
1606     for (const auto &IndexResult : IndexResults) {
1607       if (UsedIndexResults.count(&IndexResult))
1608         continue;
1609       AddToBundles(/*SemaResult=*/nullptr, &IndexResult, nullptr);
1610     }
1611     // Emit identifier results.
1612     for (const auto &Ident : IdentifierResults)
1613       AddToBundles(/*SemaResult=*/nullptr, /*IndexResult=*/nullptr, &Ident);
1614     // We only keep the best N results at any time, in "native" format.
1615     TopN<ScoredBundle, ScoredBundleGreater> Top(
1616         Opts.Limit == 0 ? std::numeric_limits<size_t>::max() : Opts.Limit);
1617     for (auto &Bundle : Bundles)
1618       addCandidate(Top, std::move(Bundle));
1619     return std::move(Top).items();
1620   }
1621 
1622   llvm::Optional<float> fuzzyScore(const CompletionCandidate &C) {
1623     // Macros can be very spammy, so we only support prefix completion.
1624     // We won't end up with underfull index results, as macros are sema-only.
1625     if (C.SemaResult && C.SemaResult->Kind == CodeCompletionResult::RK_Macro &&
1626         !C.Name.startswith_lower(Filter->pattern()))
1627       return None;
1628     return Filter->match(C.Name);
1629   }
1630 
1631   // Scores a candidate and adds it to the TopN structure.
1632   void addCandidate(TopN<ScoredBundle, ScoredBundleGreater> &Candidates,
1633                     CompletionCandidate::Bundle Bundle) {
1634     SymbolQualitySignals Quality;
1635     SymbolRelevanceSignals Relevance;
1636     Relevance.Context = CCContextKind;
1637     Relevance.Name = Bundle.front().Name;
1638     Relevance.Query = SymbolRelevanceSignals::CodeComplete;
1639     Relevance.FileProximityMatch = FileProximity.getPointer();
1640     if (ScopeProximity)
1641       Relevance.ScopeProximityMatch = ScopeProximity.getPointer();
1642     if (PreferredType)
1643       Relevance.HadContextType = true;
1644     Relevance.ContextWords = &ContextWords;
1645 
1646     auto &First = Bundle.front();
1647     if (auto FuzzyScore = fuzzyScore(First))
1648       Relevance.NameMatch = *FuzzyScore;
1649     else
1650       return;
1651     SymbolOrigin Origin = SymbolOrigin::Unknown;
1652     bool FromIndex = false;
1653     for (const auto &Candidate : Bundle) {
1654       if (Candidate.IndexResult) {
1655         Quality.merge(*Candidate.IndexResult);
1656         Relevance.merge(*Candidate.IndexResult);
1657         Origin |= Candidate.IndexResult->Origin;
1658         FromIndex = true;
1659         if (!Candidate.IndexResult->Type.empty())
1660           Relevance.HadSymbolType |= true;
1661         if (PreferredType &&
1662             PreferredType->raw() == Candidate.IndexResult->Type) {
1663           Relevance.TypeMatchesPreferred = true;
1664         }
1665       }
1666       if (Candidate.SemaResult) {
1667         Quality.merge(*Candidate.SemaResult);
1668         Relevance.merge(*Candidate.SemaResult);
1669         if (PreferredType) {
1670           if (auto CompletionType = OpaqueType::fromCompletionResult(
1671                   Recorder->CCSema->getASTContext(), *Candidate.SemaResult)) {
1672             Relevance.HadSymbolType |= true;
1673             if (PreferredType == CompletionType)
1674               Relevance.TypeMatchesPreferred = true;
1675           }
1676         }
1677         Origin |= SymbolOrigin::AST;
1678       }
1679       if (Candidate.IdentifierResult) {
1680         Quality.References = Candidate.IdentifierResult->References;
1681         Relevance.Scope = SymbolRelevanceSignals::FileScope;
1682         Origin |= SymbolOrigin::Identifier;
1683       }
1684     }
1685 
1686     CodeCompletion::Scores Scores;
1687     Scores.Quality = Quality.evaluate();
1688     Scores.Relevance = Relevance.evaluate();
1689     Scores.Total = evaluateSymbolAndRelevance(Scores.Quality, Scores.Relevance);
1690     // NameMatch is in fact a multiplier on total score, so rescoring is sound.
1691     Scores.ExcludingName = Relevance.NameMatch
1692                                ? Scores.Total / Relevance.NameMatch
1693                                : Scores.Quality;
1694 
1695     if (Opts.RecordCCResult)
1696       Opts.RecordCCResult(toCodeCompletion(Bundle), Quality, Relevance,
1697                           Scores.Total);
1698 
1699     dlog("CodeComplete: {0} ({1}) = {2}\n{3}{4}\n", First.Name,
1700          llvm::to_string(Origin), Scores.Total, llvm::to_string(Quality),
1701          llvm::to_string(Relevance));
1702 
1703     NSema += bool(Origin & SymbolOrigin::AST);
1704     NIndex += FromIndex;
1705     NSemaAndIndex += bool(Origin & SymbolOrigin::AST) && FromIndex;
1706     NIdent += bool(Origin & SymbolOrigin::Identifier);
1707     if (Candidates.push({std::move(Bundle), Scores}))
1708       Incomplete = true;
1709   }
1710 
1711   CodeCompletion toCodeCompletion(const CompletionCandidate::Bundle &Bundle) {
1712     llvm::Optional<CodeCompletionBuilder> Builder;
1713     for (const auto &Item : Bundle) {
1714       CodeCompletionString *SemaCCS =
1715           Item.SemaResult ? Recorder->codeCompletionString(*Item.SemaResult)
1716                           : nullptr;
1717       if (!Builder)
1718         Builder.emplace(Recorder ? &Recorder->CCSema->getASTContext() : nullptr,
1719                         Item, SemaCCS, QueryScopes, *Inserter, FileName,
1720                         CCContextKind, Opts, IsUsingDeclaration, NextTokenKind);
1721       else
1722         Builder->add(Item, SemaCCS);
1723     }
1724     return Builder->build();
1725   }
1726 };
1727 
1728 } // namespace
1729 
1730 clang::CodeCompleteOptions CodeCompleteOptions::getClangCompleteOpts() const {
1731   clang::CodeCompleteOptions Result;
1732   Result.IncludeCodePatterns = EnableSnippets && IncludeCodePatterns;
1733   Result.IncludeMacros = IncludeMacros;
1734   Result.IncludeGlobals = true;
1735   // We choose to include full comments and not do doxygen parsing in
1736   // completion.
1737   // FIXME: ideally, we should support doxygen in some form, e.g. do markdown
1738   // formatting of the comments.
1739   Result.IncludeBriefComments = false;
1740 
1741   // When an is used, Sema is responsible for completing the main file,
1742   // the index can provide results from the preamble.
1743   // Tell Sema not to deserialize the preamble to look for results.
1744   Result.LoadExternal = !Index;
1745   Result.IncludeFixIts = IncludeFixIts;
1746 
1747   return Result;
1748 }
1749 
1750 CompletionPrefix guessCompletionPrefix(llvm::StringRef Content,
1751                                        unsigned Offset) {
1752   assert(Offset <= Content.size());
1753   StringRef Rest = Content.take_front(Offset);
1754   CompletionPrefix Result;
1755 
1756   // Consume the unqualified name. We only handle ASCII characters.
1757   // isIdentifierBody will let us match "0invalid", but we don't mind.
1758   while (!Rest.empty() && isIdentifierBody(Rest.back()))
1759     Rest = Rest.drop_back();
1760   Result.Name = Content.slice(Rest.size(), Offset);
1761 
1762   // Consume qualifiers.
1763   while (Rest.consume_back("::") && !Rest.endswith(":")) // reject ::::
1764     while (!Rest.empty() && isIdentifierBody(Rest.back()))
1765       Rest = Rest.drop_back();
1766   Result.Qualifier =
1767       Content.slice(Rest.size(), Result.Name.begin() - Content.begin());
1768 
1769   return Result;
1770 }
1771 
1772 CodeCompleteResult codeComplete(PathRef FileName, Position Pos,
1773                                 const PreambleData *Preamble,
1774                                 const ParseInputs &ParseInput,
1775                                 CodeCompleteOptions Opts,
1776                                 SpeculativeFuzzyFind *SpecFuzzyFind) {
1777   auto Offset = positionToOffset(ParseInput.Contents, Pos);
1778   if (!Offset) {
1779     elog("Code completion position was invalid {0}", Offset.takeError());
1780     return CodeCompleteResult();
1781   }
1782   auto Flow = CodeCompleteFlow(
1783       FileName, Preamble ? Preamble->Includes : IncludeStructure(),
1784       SpecFuzzyFind, Opts);
1785   return (!Preamble || Opts.RunParser == CodeCompleteOptions::NeverParse)
1786              ? std::move(Flow).runWithoutSema(
1787                    ParseInput.Contents, *Offset,
1788                    ParseInput.FSProvider->getFileSystem())
1789              : std::move(Flow).run({FileName, *Offset, *Preamble,
1790                                     // We want to serve code completions with
1791                                     // low latency, so don't bother patching.
1792                                     /*PreamblePatch=*/llvm::None, ParseInput});
1793 }
1794 
1795 SignatureHelp signatureHelp(PathRef FileName, Position Pos,
1796                             const PreambleData &Preamble,
1797                             const ParseInputs &ParseInput) {
1798   auto Offset = positionToOffset(ParseInput.Contents, Pos);
1799   if (!Offset) {
1800     elog("Signature help position was invalid {0}", Offset.takeError());
1801     return SignatureHelp();
1802   }
1803   SignatureHelp Result;
1804   clang::CodeCompleteOptions Options;
1805   Options.IncludeGlobals = false;
1806   Options.IncludeMacros = false;
1807   Options.IncludeCodePatterns = false;
1808   Options.IncludeBriefComments = false;
1809   semaCodeComplete(
1810       std::make_unique<SignatureHelpCollector>(Options, ParseInput.Index,
1811                                                Result),
1812       Options,
1813       {FileName, *Offset, Preamble,
1814        PreamblePatch::create(FileName, ParseInput, Preamble), ParseInput});
1815   return Result;
1816 }
1817 
1818 bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx) {
1819   auto InTopLevelScope = [](const NamedDecl &ND) {
1820     switch (ND.getDeclContext()->getDeclKind()) {
1821     case Decl::TranslationUnit:
1822     case Decl::Namespace:
1823     case Decl::LinkageSpec:
1824       return true;
1825     default:
1826       break;
1827     };
1828     return false;
1829   };
1830   // We only complete symbol's name, which is the same as the name of the
1831   // *primary* template in case of template specializations.
1832   if (isExplicitTemplateSpecialization(&ND))
1833     return false;
1834 
1835   if (InTopLevelScope(ND))
1836     return true;
1837 
1838   if (const auto *EnumDecl = dyn_cast<clang::EnumDecl>(ND.getDeclContext()))
1839     return InTopLevelScope(*EnumDecl) && !EnumDecl->isScoped();
1840 
1841   return false;
1842 }
1843 
1844 // FIXME: find a home for this (that can depend on both markup and Protocol).
1845 static MarkupContent renderDoc(const markup::Document &Doc, MarkupKind Kind) {
1846   MarkupContent Result;
1847   Result.kind = Kind;
1848   switch (Kind) {
1849   case MarkupKind::PlainText:
1850     Result.value.append(Doc.asPlainText());
1851     break;
1852   case MarkupKind::Markdown:
1853     Result.value.append(Doc.asMarkdown());
1854     break;
1855   }
1856   return Result;
1857 }
1858 
1859 CompletionItem CodeCompletion::render(const CodeCompleteOptions &Opts) const {
1860   CompletionItem LSP;
1861   const auto *InsertInclude = Includes.empty() ? nullptr : &Includes[0];
1862   LSP.label = ((InsertInclude && InsertInclude->Insertion)
1863                    ? Opts.IncludeIndicator.Insert
1864                    : Opts.IncludeIndicator.NoInsert) +
1865               (Opts.ShowOrigins ? "[" + llvm::to_string(Origin) + "]" : "") +
1866               RequiredQualifier + Name + Signature;
1867 
1868   LSP.kind = Kind;
1869   LSP.detail = BundleSize > 1
1870                    ? std::string(llvm::formatv("[{0} overloads]", BundleSize))
1871                    : ReturnType;
1872   LSP.deprecated = Deprecated;
1873   // Combine header information and documentation in LSP `documentation` field.
1874   // This is not quite right semantically, but tends to display well in editors.
1875   if (InsertInclude || Documentation) {
1876     markup::Document Doc;
1877     if (InsertInclude)
1878       Doc.addParagraph().appendText("From ").appendCode(InsertInclude->Header);
1879     if (Documentation)
1880       Doc.append(*Documentation);
1881     LSP.documentation = renderDoc(Doc, Opts.DocumentationFormat);
1882   }
1883   LSP.sortText = sortText(Score.Total, Name);
1884   LSP.filterText = Name;
1885   LSP.textEdit = {CompletionTokenRange, RequiredQualifier + Name};
1886   // Merge continuous additionalTextEdits into main edit. The main motivation
1887   // behind this is to help LSP clients, it seems most of them are confused when
1888   // they are provided with additionalTextEdits that are consecutive to main
1889   // edit.
1890   // Note that we store additional text edits from back to front in a line. That
1891   // is mainly to help LSP clients again, so that changes do not effect each
1892   // other.
1893   for (const auto &FixIt : FixIts) {
1894     if (FixIt.range.end == LSP.textEdit->range.start) {
1895       LSP.textEdit->newText = FixIt.newText + LSP.textEdit->newText;
1896       LSP.textEdit->range.start = FixIt.range.start;
1897     } else {
1898       LSP.additionalTextEdits.push_back(FixIt);
1899     }
1900   }
1901   if (Opts.EnableSnippets)
1902     LSP.textEdit->newText += SnippetSuffix;
1903 
1904   // FIXME(kadircet): Do not even fill insertText after making sure textEdit is
1905   // compatible with most of the editors.
1906   LSP.insertText = LSP.textEdit->newText;
1907   LSP.insertTextFormat = Opts.EnableSnippets ? InsertTextFormat::Snippet
1908                                              : InsertTextFormat::PlainText;
1909   if (InsertInclude && InsertInclude->Insertion)
1910     LSP.additionalTextEdits.push_back(*InsertInclude->Insertion);
1911 
1912   LSP.score = Score.ExcludingName;
1913 
1914   return LSP;
1915 }
1916 
1917 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const CodeCompletion &C) {
1918   // For now just lean on CompletionItem.
1919   return OS << C.render(CodeCompleteOptions());
1920 }
1921 
1922 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
1923                               const CodeCompleteResult &R) {
1924   OS << "CodeCompleteResult: " << R.Completions.size() << (R.HasMore ? "+" : "")
1925      << " (" << getCompletionKindString(R.Context) << ")"
1926      << " items:\n";
1927   for (const auto &C : R.Completions)
1928     OS << C << "\n";
1929   return OS;
1930 }
1931 
1932 // Heuristically detect whether the `Line` is an unterminated include filename.
1933 bool isIncludeFile(llvm::StringRef Line) {
1934   Line = Line.ltrim();
1935   if (!Line.consume_front("#"))
1936     return false;
1937   Line = Line.ltrim();
1938   if (!(Line.consume_front("include_next") || Line.consume_front("include") ||
1939         Line.consume_front("import")))
1940     return false;
1941   Line = Line.ltrim();
1942   if (Line.consume_front("<"))
1943     return Line.count('>') == 0;
1944   if (Line.consume_front("\""))
1945     return Line.count('"') == 0;
1946   return false;
1947 }
1948 
1949 bool allowImplicitCompletion(llvm::StringRef Content, unsigned Offset) {
1950   // Look at last line before completion point only.
1951   Content = Content.take_front(Offset);
1952   auto Pos = Content.rfind('\n');
1953   if (Pos != llvm::StringRef::npos)
1954     Content = Content.substr(Pos + 1);
1955 
1956   // Complete after scope operators.
1957   if (Content.endswith(".") || Content.endswith("->") || Content.endswith("::"))
1958     return true;
1959   // Complete after `#include <` and #include `<foo/`.
1960   if ((Content.endswith("<") || Content.endswith("\"") ||
1961        Content.endswith("/")) &&
1962       isIncludeFile(Content))
1963     return true;
1964 
1965   // Complete words. Give non-ascii characters the benefit of the doubt.
1966   return !Content.empty() &&
1967          (isIdentifierBody(Content.back()) || !llvm::isASCII(Content.back()));
1968 }
1969 
1970 } // namespace clangd
1971 } // namespace clang
1972