1 //===--- CodeComplete.cpp ----------------------------------------*- C++-*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Code completion has several moving parts:
11 //  - AST-based completions are provided using the completion hooks in Sema.
12 //  - external completions are retrieved from the index (using hints from Sema)
13 //  - the two sources overlap, and must be merged and overloads bundled
14 //  - results must be scored and ranked (see Quality.h) before rendering
15 //
16 // Signature help works in a similar way as code completion, but it is simpler:
17 // it's purely AST-based, and there are few candidates.
18 //
19 //===----------------------------------------------------------------------===//
20 
21 #include "CodeComplete.h"
22 #include "AST.h"
23 #include "CodeCompletionStrings.h"
24 #include "Compiler.h"
25 #include "Diagnostics.h"
26 #include "FileDistance.h"
27 #include "FuzzyMatch.h"
28 #include "Headers.h"
29 #include "Logger.h"
30 #include "Quality.h"
31 #include "SourceCode.h"
32 #include "TUScheduler.h"
33 #include "Trace.h"
34 #include "URI.h"
35 #include "index/Index.h"
36 #include "clang/ASTMatchers/ASTMatchFinder.h"
37 #include "clang/Basic/LangOptions.h"
38 #include "clang/Basic/SourceLocation.h"
39 #include "clang/Format/Format.h"
40 #include "clang/Frontend/CompilerInstance.h"
41 #include "clang/Frontend/FrontendActions.h"
42 #include "clang/Index/USRGeneration.h"
43 #include "clang/Sema/CodeCompleteConsumer.h"
44 #include "clang/Sema/Sema.h"
45 #include "clang/Tooling/Core/Replacement.h"
46 #include "llvm/ADT/Optional.h"
47 #include "llvm/Support/Error.h"
48 #include "llvm/Support/Format.h"
49 #include "llvm/Support/FormatVariadic.h"
50 #include "llvm/Support/ScopedPrinter.h"
51 #include <queue>
52 
53 // We log detailed candidate here if you run with -debug-only=codecomplete.
54 #define DEBUG_TYPE "CodeComplete"
55 
56 namespace clang {
57 namespace clangd {
58 namespace {
59 
60 CompletionItemKind toCompletionItemKind(index::SymbolKind Kind) {
61   using SK = index::SymbolKind;
62   switch (Kind) {
63   case SK::Unknown:
64     return CompletionItemKind::Missing;
65   case SK::Module:
66   case SK::Namespace:
67   case SK::NamespaceAlias:
68     return CompletionItemKind::Module;
69   case SK::Macro:
70     return CompletionItemKind::Text;
71   case SK::Enum:
72     return CompletionItemKind::Enum;
73   // FIXME(ioeric): use LSP struct instead of class when it is suppoted in the
74   // protocol.
75   case SK::Struct:
76   case SK::Class:
77   case SK::Protocol:
78   case SK::Extension:
79   case SK::Union:
80     return CompletionItemKind::Class;
81   // FIXME(ioeric): figure out whether reference is the right type for aliases.
82   case SK::TypeAlias:
83   case SK::Using:
84     return CompletionItemKind::Reference;
85   case SK::Function:
86   // FIXME(ioeric): this should probably be an operator. This should be fixed
87   // when `Operator` is support type in the protocol.
88   case SK::ConversionFunction:
89     return CompletionItemKind::Function;
90   case SK::Variable:
91   case SK::Parameter:
92     return CompletionItemKind::Variable;
93   case SK::Field:
94     return CompletionItemKind::Field;
95   // FIXME(ioeric): use LSP enum constant when it is supported in the protocol.
96   case SK::EnumConstant:
97     return CompletionItemKind::Value;
98   case SK::InstanceMethod:
99   case SK::ClassMethod:
100   case SK::StaticMethod:
101   case SK::Destructor:
102     return CompletionItemKind::Method;
103   case SK::InstanceProperty:
104   case SK::ClassProperty:
105   case SK::StaticProperty:
106     return CompletionItemKind::Property;
107   case SK::Constructor:
108     return CompletionItemKind::Constructor;
109   }
110   llvm_unreachable("Unhandled clang::index::SymbolKind.");
111 }
112 
113 CompletionItemKind
114 toCompletionItemKind(CodeCompletionResult::ResultKind ResKind,
115                      const NamedDecl *Decl) {
116   if (Decl)
117     return toCompletionItemKind(index::getSymbolInfo(Decl).Kind);
118   switch (ResKind) {
119   case CodeCompletionResult::RK_Declaration:
120     llvm_unreachable("RK_Declaration without Decl");
121   case CodeCompletionResult::RK_Keyword:
122     return CompletionItemKind::Keyword;
123   case CodeCompletionResult::RK_Macro:
124     return CompletionItemKind::Text; // unfortunately, there's no 'Macro'
125                                      // completion items in LSP.
126   case CodeCompletionResult::RK_Pattern:
127     return CompletionItemKind::Snippet;
128   }
129   llvm_unreachable("Unhandled CodeCompletionResult::ResultKind.");
130 }
131 
132 /// Get the optional chunk as a string. This function is possibly recursive.
133 ///
134 /// The parameter info for each parameter is appended to the Parameters.
135 std::string getOptionalParameters(const CodeCompletionString &CCS,
136                                   std::vector<ParameterInformation> &Parameters,
137                                   SignatureQualitySignals &Signal) {
138   std::string Result;
139   for (const auto &Chunk : CCS) {
140     switch (Chunk.Kind) {
141     case CodeCompletionString::CK_Optional:
142       assert(Chunk.Optional &&
143              "Expected the optional code completion string to be non-null.");
144       Result += getOptionalParameters(*Chunk.Optional, Parameters, Signal);
145       break;
146     case CodeCompletionString::CK_VerticalSpace:
147       break;
148     case CodeCompletionString::CK_Placeholder:
149       // A string that acts as a placeholder for, e.g., a function call
150       // argument.
151       // Intentional fallthrough here.
152     case CodeCompletionString::CK_CurrentParameter: {
153       // A piece of text that describes the parameter that corresponds to
154       // the code-completion location within a function call, message send,
155       // macro invocation, etc.
156       Result += Chunk.Text;
157       ParameterInformation Info;
158       Info.label = Chunk.Text;
159       Parameters.push_back(std::move(Info));
160       Signal.ContainsActiveParameter = true;
161       Signal.NumberOfOptionalParameters++;
162       break;
163     }
164     default:
165       Result += Chunk.Text;
166       break;
167     }
168   }
169   return Result;
170 }
171 
172 /// Creates a `HeaderFile` from \p Header which can be either a URI or a literal
173 /// include.
174 static llvm::Expected<HeaderFile> toHeaderFile(StringRef Header,
175                                                llvm::StringRef HintPath) {
176   if (isLiteralInclude(Header))
177     return HeaderFile{Header.str(), /*Verbatim=*/true};
178   auto U = URI::parse(Header);
179   if (!U)
180     return U.takeError();
181 
182   auto IncludePath = URI::includeSpelling(*U);
183   if (!IncludePath)
184     return IncludePath.takeError();
185   if (!IncludePath->empty())
186     return HeaderFile{std::move(*IncludePath), /*Verbatim=*/true};
187 
188   auto Resolved = URI::resolve(*U, HintPath);
189   if (!Resolved)
190     return Resolved.takeError();
191   return HeaderFile{std::move(*Resolved), /*Verbatim=*/false};
192 }
193 
194 // First traverses all method definitions inside current class/struct/union
195 // definition. Than traverses base classes to find virtual methods that haven't
196 // been overriden within current context.
197 // FIXME(kadircet): Currently we cannot see declarations below completion point.
198 // It is because Sema gets run only upto completion point. Need to find a
199 // solution to run it for the whole class/struct/union definition.
200 static std::vector<CodeCompletionResult>
201 getNonOverridenMethodCompletionResults(const DeclContext *DC, Sema *S) {
202   const auto *CR = llvm::dyn_cast<CXXRecordDecl>(DC);
203   // If not inside a class/struct/union return empty.
204   if (!CR)
205     return {};
206   // First store overrides within current class.
207   // These are stored by name to make querying fast in the later step.
208   llvm::StringMap<std::vector<FunctionDecl *>> Overrides;
209   for (auto *Method : CR->methods()) {
210     if (!Method->isVirtual())
211       continue;
212     Overrides[Method->getName()].push_back(Method);
213   }
214 
215   std::vector<CodeCompletionResult> Results;
216   for (const auto &Base : CR->bases()) {
217     const auto *BR = Base.getType().getTypePtr()->getAsCXXRecordDecl();
218     if (!BR)
219       continue;
220     for (auto *Method : BR->methods()) {
221       if (!Method->isVirtual())
222         continue;
223       const auto it = Overrides.find(Method->getName());
224       bool IsOverriden = false;
225       if (it != Overrides.end()) {
226         for (auto *MD : it->second) {
227           // If the method in current body is not an overload of this virtual
228           // function, that it overrides this one.
229           if (!S->IsOverload(MD, Method, false)) {
230             IsOverriden = true;
231             break;
232           }
233         }
234       }
235       if (!IsOverriden)
236         Results.emplace_back(Method, 0);
237     }
238   }
239 
240   return Results;
241 }
242 
243 /// A code completion result, in clang-native form.
244 /// It may be promoted to a CompletionItem if it's among the top-ranked results.
245 struct CompletionCandidate {
246   llvm::StringRef Name; // Used for filtering and sorting.
247   // We may have a result from Sema, from the index, or both.
248   const CodeCompletionResult *SemaResult = nullptr;
249   const Symbol *IndexResult = nullptr;
250 
251   // States whether this item is an override suggestion.
252   bool IsOverride = false;
253 
254   // Returns a token identifying the overload set this is part of.
255   // 0 indicates it's not part of any overload set.
256   size_t overloadSet() const {
257     SmallString<256> Scratch;
258     if (IndexResult) {
259       switch (IndexResult->SymInfo.Kind) {
260       case index::SymbolKind::ClassMethod:
261       case index::SymbolKind::InstanceMethod:
262       case index::SymbolKind::StaticMethod:
263         assert(false && "Don't expect members from index in code completion");
264         // fall through
265       case index::SymbolKind::Function:
266         // We can't group overloads together that need different #includes.
267         // This could break #include insertion.
268         return hash_combine(
269             (IndexResult->Scope + IndexResult->Name).toStringRef(Scratch),
270             headerToInsertIfNotPresent().getValueOr(""));
271       default:
272         return 0;
273       }
274     }
275     assert(SemaResult);
276     // We need to make sure we're consistent with the IndexResult case!
277     const NamedDecl *D = SemaResult->Declaration;
278     if (!D || !D->isFunctionOrFunctionTemplate())
279       return 0;
280     {
281       llvm::raw_svector_ostream OS(Scratch);
282       D->printQualifiedName(OS);
283     }
284     return hash_combine(Scratch, headerToInsertIfNotPresent().getValueOr(""));
285   }
286 
287   llvm::Optional<llvm::StringRef> headerToInsertIfNotPresent() const {
288     if (!IndexResult || !IndexResult->Detail ||
289         IndexResult->Detail->IncludeHeader.empty())
290       return llvm::None;
291     if (SemaResult && SemaResult->Declaration) {
292       // Avoid inserting new #include if the declaration is found in the current
293       // file e.g. the symbol is forward declared.
294       auto &SM = SemaResult->Declaration->getASTContext().getSourceManager();
295       for (const Decl *RD : SemaResult->Declaration->redecls())
296         if (SM.isInMainFile(SM.getExpansionLoc(RD->getBeginLoc())))
297           return llvm::None;
298     }
299     return IndexResult->Detail->IncludeHeader;
300   }
301 
302   using Bundle = llvm::SmallVector<CompletionCandidate, 4>;
303 };
304 using ScoredBundle =
305     std::pair<CompletionCandidate::Bundle, CodeCompletion::Scores>;
306 struct ScoredBundleGreater {
307   bool operator()(const ScoredBundle &L, const ScoredBundle &R) {
308     if (L.second.Total != R.second.Total)
309       return L.second.Total > R.second.Total;
310     return L.first.front().Name <
311            R.first.front().Name; // Earlier name is better.
312   }
313 };
314 
315 // Assembles a code completion out of a bundle of >=1 completion candidates.
316 // Many of the expensive strings are only computed at this point, once we know
317 // the candidate bundle is going to be returned.
318 //
319 // Many fields are the same for all candidates in a bundle (e.g. name), and are
320 // computed from the first candidate, in the constructor.
321 // Others vary per candidate, so add() must be called for remaining candidates.
322 struct CodeCompletionBuilder {
323   CodeCompletionBuilder(ASTContext &ASTCtx, const CompletionCandidate &C,
324                         CodeCompletionString *SemaCCS,
325                         const IncludeInserter &Includes, StringRef FileName,
326                         const CodeCompleteOptions &Opts)
327       : ASTCtx(ASTCtx), ExtractDocumentation(Opts.IncludeComments),
328         EnableFunctionArgSnippets(Opts.EnableFunctionArgSnippets) {
329     add(C, SemaCCS);
330     if (C.SemaResult) {
331       Completion.Origin |= SymbolOrigin::AST;
332       Completion.Name = llvm::StringRef(SemaCCS->getTypedText());
333       if (Completion.Scope.empty()) {
334         if ((C.SemaResult->Kind == CodeCompletionResult::RK_Declaration) ||
335             (C.SemaResult->Kind == CodeCompletionResult::RK_Pattern))
336           if (const auto *D = C.SemaResult->getDeclaration())
337             if (const auto *ND = llvm::dyn_cast<NamedDecl>(D))
338               Completion.Scope =
339                   splitQualifiedName(printQualifiedName(*ND)).first;
340       }
341       Completion.Kind =
342           toCompletionItemKind(C.SemaResult->Kind, C.SemaResult->Declaration);
343       for (const auto &FixIt : C.SemaResult->FixIts) {
344         Completion.FixIts.push_back(
345             toTextEdit(FixIt, ASTCtx.getSourceManager(), ASTCtx.getLangOpts()));
346       }
347       std::sort(Completion.FixIts.begin(), Completion.FixIts.end(),
348                 [](const TextEdit &X, const TextEdit &Y) {
349                   return std::tie(X.range.start.line, X.range.start.character) <
350                          std::tie(Y.range.start.line, Y.range.start.character);
351                 });
352     }
353     if (C.IndexResult) {
354       Completion.Origin |= C.IndexResult->Origin;
355       if (Completion.Scope.empty())
356         Completion.Scope = C.IndexResult->Scope;
357       if (Completion.Kind == CompletionItemKind::Missing)
358         Completion.Kind = toCompletionItemKind(C.IndexResult->SymInfo.Kind);
359       if (Completion.Name.empty())
360         Completion.Name = C.IndexResult->Name;
361     }
362     if (auto Inserted = C.headerToInsertIfNotPresent()) {
363       // Turn absolute path into a literal string that can be #included.
364       auto Include = [&]() -> Expected<std::pair<std::string, bool>> {
365         auto ResolvedDeclaring =
366             toHeaderFile(C.IndexResult->CanonicalDeclaration.FileURI, FileName);
367         if (!ResolvedDeclaring)
368           return ResolvedDeclaring.takeError();
369         auto ResolvedInserted = toHeaderFile(*Inserted, FileName);
370         if (!ResolvedInserted)
371           return ResolvedInserted.takeError();
372         return std::make_pair(Includes.calculateIncludePath(*ResolvedDeclaring,
373                                                             *ResolvedInserted),
374                               Includes.shouldInsertInclude(*ResolvedDeclaring,
375                                                            *ResolvedInserted));
376       }();
377       if (Include) {
378         Completion.Header = Include->first;
379         if (Include->second)
380           Completion.HeaderInsertion = Includes.insert(Include->first);
381       } else
382         log("Failed to generate include insertion edits for adding header "
383             "(FileURI='{0}', IncludeHeader='{1}') into {2}",
384             C.IndexResult->CanonicalDeclaration.FileURI,
385             C.IndexResult->Detail->IncludeHeader, FileName);
386     }
387   }
388 
389   void add(const CompletionCandidate &C, CodeCompletionString *SemaCCS) {
390     assert(bool(C.SemaResult) == bool(SemaCCS));
391     Bundled.emplace_back();
392     BundledEntry &S = Bundled.back();
393     if (C.SemaResult) {
394       getSignature(*SemaCCS, &S.Signature, &S.SnippetSuffix,
395                    &Completion.RequiredQualifier);
396       S.ReturnType = getReturnType(*SemaCCS);
397     } else if (C.IndexResult) {
398       S.Signature = C.IndexResult->Signature;
399       S.SnippetSuffix = C.IndexResult->CompletionSnippetSuffix;
400       if (auto *D = C.IndexResult->Detail)
401         S.ReturnType = D->ReturnType;
402     }
403     if (ExtractDocumentation && Completion.Documentation.empty()) {
404       if (C.IndexResult && C.IndexResult->Detail)
405         Completion.Documentation = C.IndexResult->Detail->Documentation;
406       else if (C.SemaResult)
407         Completion.Documentation = getDocComment(ASTCtx, *C.SemaResult,
408                                                  /*CommentsFromHeader=*/false);
409     }
410     if (C.IsOverride)
411       S.OverrideSuffix = true;
412   }
413 
414   CodeCompletion build() {
415     Completion.ReturnType = summarizeReturnType();
416     Completion.Signature = summarizeSignature();
417     Completion.SnippetSuffix = summarizeSnippet();
418     Completion.BundleSize = Bundled.size();
419     if (summarizeOverride()) {
420       Completion.Name = Completion.ReturnType + ' ' +
421                         std::move(Completion.Name) +
422                         std::move(Completion.Signature) + " override";
423       Completion.Signature.clear();
424     }
425     return std::move(Completion);
426   }
427 
428 private:
429   struct BundledEntry {
430     std::string SnippetSuffix;
431     std::string Signature;
432     std::string ReturnType;
433     bool OverrideSuffix;
434   };
435 
436   // If all BundledEntrys have the same value for a property, return it.
437   template <std::string BundledEntry::*Member>
438   const std::string *onlyValue() const {
439     auto B = Bundled.begin(), E = Bundled.end();
440     for (auto I = B + 1; I != E; ++I)
441       if (I->*Member != B->*Member)
442         return nullptr;
443     return &(B->*Member);
444   }
445 
446   template <bool BundledEntry::*Member> const bool *onlyValue() const {
447     auto B = Bundled.begin(), E = Bundled.end();
448     for (auto I = B + 1; I != E; ++I)
449       if (I->*Member != B->*Member)
450         return nullptr;
451     return &(B->*Member);
452   }
453 
454   std::string summarizeReturnType() const {
455     if (auto *RT = onlyValue<&BundledEntry::ReturnType>())
456       return *RT;
457     return "";
458   }
459 
460   std::string summarizeSnippet() const {
461     auto *Snippet = onlyValue<&BundledEntry::SnippetSuffix>();
462     if (!Snippet)
463       // All bundles are function calls.
464       return "($0)";
465     if (!Snippet->empty() && !EnableFunctionArgSnippets &&
466         ((Completion.Kind == CompletionItemKind::Function) ||
467          (Completion.Kind == CompletionItemKind::Method)) &&
468         (Snippet->front() == '(') && (Snippet->back() == ')'))
469       // Check whether function has any parameters or not.
470       return Snippet->size() > 2 ? "($0)" : "()";
471     return *Snippet;
472   }
473 
474   std::string summarizeSignature() const {
475     if (auto *Signature = onlyValue<&BundledEntry::Signature>())
476       return *Signature;
477     // All bundles are function calls.
478     return "(…)";
479   }
480 
481   bool summarizeOverride() const {
482     if (auto *OverrideSuffix = onlyValue<&BundledEntry::OverrideSuffix>())
483       return *OverrideSuffix;
484     return false;
485   }
486 
487   ASTContext &ASTCtx;
488   CodeCompletion Completion;
489   SmallVector<BundledEntry, 1> Bundled;
490   bool ExtractDocumentation;
491   bool EnableFunctionArgSnippets;
492 };
493 
494 // Determine the symbol ID for a Sema code completion result, if possible.
495 llvm::Optional<SymbolID> getSymbolID(const CodeCompletionResult &R) {
496   switch (R.Kind) {
497   case CodeCompletionResult::RK_Declaration:
498   case CodeCompletionResult::RK_Pattern: {
499     return clang::clangd::getSymbolID(R.Declaration);
500   }
501   case CodeCompletionResult::RK_Macro:
502     // FIXME: Macros do have USRs, but the CCR doesn't contain enough info.
503   case CodeCompletionResult::RK_Keyword:
504     return None;
505   }
506   llvm_unreachable("unknown CodeCompletionResult kind");
507 }
508 
509 // Scopes of the paritial identifier we're trying to complete.
510 // It is used when we query the index for more completion results.
511 struct SpecifiedScope {
512   // The scopes we should look in, determined by Sema.
513   //
514   // If the qualifier was fully resolved, we look for completions in these
515   // scopes; if there is an unresolved part of the qualifier, it should be
516   // resolved within these scopes.
517   //
518   // Examples of qualified completion:
519   //
520   //   "::vec"                                      => {""}
521   //   "using namespace std; ::vec^"                => {"", "std::"}
522   //   "namespace ns {using namespace std;} ns::^"  => {"ns::", "std::"}
523   //   "std::vec^"                                  => {""}  // "std" unresolved
524   //
525   // Examples of unqualified completion:
526   //
527   //   "vec^"                                       => {""}
528   //   "using namespace std; vec^"                  => {"", "std::"}
529   //   "using namespace std; namespace ns { vec^ }" => {"ns::", "std::", ""}
530   //
531   // "" for global namespace, "ns::" for normal namespace.
532   std::vector<std::string> AccessibleScopes;
533   // The full scope qualifier as typed by the user (without the leading "::").
534   // Set if the qualifier is not fully resolved by Sema.
535   llvm::Optional<std::string> UnresolvedQualifier;
536 
537   // Construct scopes being queried in indexes.
538   // This method format the scopes to match the index request representation.
539   std::vector<std::string> scopesForIndexQuery() {
540     std::vector<std::string> Results;
541     for (llvm::StringRef AS : AccessibleScopes) {
542       Results.push_back(AS);
543       if (UnresolvedQualifier)
544         Results.back() += *UnresolvedQualifier;
545     }
546     return Results;
547   }
548 };
549 
550 // Get all scopes that will be queried in indexes.
551 std::vector<std::string> getQueryScopes(CodeCompletionContext &CCContext,
552                                         const SourceManager &SM) {
553   auto GetAllAccessibleScopes = [](CodeCompletionContext &CCContext) {
554     SpecifiedScope Info;
555     for (auto *Context : CCContext.getVisitedContexts()) {
556       if (isa<TranslationUnitDecl>(Context))
557         Info.AccessibleScopes.push_back(""); // global namespace
558       else if (const auto *NS = dyn_cast<NamespaceDecl>(Context))
559         Info.AccessibleScopes.push_back(NS->getQualifiedNameAsString() + "::");
560     }
561     return Info;
562   };
563 
564   auto SS = CCContext.getCXXScopeSpecifier();
565 
566   // Unqualified completion (e.g. "vec^").
567   if (!SS) {
568     // FIXME: Once we can insert namespace qualifiers and use the in-scope
569     //        namespaces for scoring, search in all namespaces.
570     // FIXME: Capture scopes and use for scoring, for example,
571     //        "using namespace std; namespace foo {v^}" =>
572     //        foo::value > std::vector > boost::variant
573     return GetAllAccessibleScopes(CCContext).scopesForIndexQuery();
574   }
575 
576   // Qualified completion ("std::vec^"), we have two cases depending on whether
577   // the qualifier can be resolved by Sema.
578   if ((*SS)->isValid()) { // Resolved qualifier.
579     return GetAllAccessibleScopes(CCContext).scopesForIndexQuery();
580   }
581 
582   // Unresolved qualifier.
583   // FIXME: When Sema can resolve part of a scope chain (e.g.
584   // "known::unknown::id"), we should expand the known part ("known::") rather
585   // than treating the whole thing as unknown.
586   SpecifiedScope Info;
587   Info.AccessibleScopes.push_back(""); // global namespace
588 
589   Info.UnresolvedQualifier =
590       Lexer::getSourceText(CharSourceRange::getCharRange((*SS)->getRange()), SM,
591                            clang::LangOptions())
592           .ltrim("::");
593   // Sema excludes the trailing "::".
594   if (!Info.UnresolvedQualifier->empty())
595     *Info.UnresolvedQualifier += "::";
596 
597   return Info.scopesForIndexQuery();
598 }
599 
600 // Should we perform index-based completion in a context of the specified kind?
601 // FIXME: consider allowing completion, but restricting the result types.
602 bool contextAllowsIndex(enum CodeCompletionContext::Kind K) {
603   switch (K) {
604   case CodeCompletionContext::CCC_TopLevel:
605   case CodeCompletionContext::CCC_ObjCInterface:
606   case CodeCompletionContext::CCC_ObjCImplementation:
607   case CodeCompletionContext::CCC_ObjCIvarList:
608   case CodeCompletionContext::CCC_ClassStructUnion:
609   case CodeCompletionContext::CCC_Statement:
610   case CodeCompletionContext::CCC_Expression:
611   case CodeCompletionContext::CCC_ObjCMessageReceiver:
612   case CodeCompletionContext::CCC_EnumTag:
613   case CodeCompletionContext::CCC_UnionTag:
614   case CodeCompletionContext::CCC_ClassOrStructTag:
615   case CodeCompletionContext::CCC_ObjCProtocolName:
616   case CodeCompletionContext::CCC_Namespace:
617   case CodeCompletionContext::CCC_Type:
618   case CodeCompletionContext::CCC_Name: // FIXME: why does ns::^ give this?
619   case CodeCompletionContext::CCC_PotentiallyQualifiedName:
620   case CodeCompletionContext::CCC_ParenthesizedExpression:
621   case CodeCompletionContext::CCC_ObjCInterfaceName:
622   case CodeCompletionContext::CCC_ObjCCategoryName:
623     return true;
624   case CodeCompletionContext::CCC_Other: // Be conservative.
625   case CodeCompletionContext::CCC_OtherWithMacros:
626   case CodeCompletionContext::CCC_DotMemberAccess:
627   case CodeCompletionContext::CCC_ArrowMemberAccess:
628   case CodeCompletionContext::CCC_ObjCPropertyAccess:
629   case CodeCompletionContext::CCC_MacroName:
630   case CodeCompletionContext::CCC_MacroNameUse:
631   case CodeCompletionContext::CCC_PreprocessorExpression:
632   case CodeCompletionContext::CCC_PreprocessorDirective:
633   case CodeCompletionContext::CCC_NaturalLanguage:
634   case CodeCompletionContext::CCC_SelectorName:
635   case CodeCompletionContext::CCC_TypeQualifiers:
636   case CodeCompletionContext::CCC_ObjCInstanceMessage:
637   case CodeCompletionContext::CCC_ObjCClassMessage:
638   case CodeCompletionContext::CCC_Recovery:
639     return false;
640   }
641   llvm_unreachable("unknown code completion context");
642 }
643 
644 // Some member calls are blacklisted because they're so rarely useful.
645 static bool isBlacklistedMember(const NamedDecl &D) {
646   // Destructor completion is rarely useful, and works inconsistently.
647   // (s.^ completes ~string, but s.~st^ is an error).
648   if (D.getKind() == Decl::CXXDestructor)
649     return true;
650   // Injected name may be useful for A::foo(), but who writes A::A::foo()?
651   if (auto *R = dyn_cast_or_null<RecordDecl>(&D))
652     if (R->isInjectedClassName())
653       return true;
654   // Explicit calls to operators are also rare.
655   auto NameKind = D.getDeclName().getNameKind();
656   if (NameKind == DeclarationName::CXXOperatorName ||
657       NameKind == DeclarationName::CXXLiteralOperatorName ||
658       NameKind == DeclarationName::CXXConversionFunctionName)
659     return true;
660   return false;
661 }
662 
663 // The CompletionRecorder captures Sema code-complete output, including context.
664 // It filters out ignored results (but doesn't apply fuzzy-filtering yet).
665 // It doesn't do scoring or conversion to CompletionItem yet, as we want to
666 // merge with index results first.
667 // Generally the fields and methods of this object should only be used from
668 // within the callback.
669 struct CompletionRecorder : public CodeCompleteConsumer {
670   CompletionRecorder(const CodeCompleteOptions &Opts,
671                      llvm::unique_function<void()> ResultsCallback)
672       : CodeCompleteConsumer(Opts.getClangCompleteOpts(),
673                              /*OutputIsBinary=*/false),
674         CCContext(CodeCompletionContext::CCC_Other), Opts(Opts),
675         CCAllocator(std::make_shared<GlobalCodeCompletionAllocator>()),
676         CCTUInfo(CCAllocator), ResultsCallback(std::move(ResultsCallback)) {
677     assert(this->ResultsCallback);
678   }
679 
680   std::vector<CodeCompletionResult> Results;
681   CodeCompletionContext CCContext;
682   Sema *CCSema = nullptr; // Sema that created the results.
683   // FIXME: Sema is scary. Can we store ASTContext and Preprocessor, instead?
684 
685   void ProcessCodeCompleteResults(class Sema &S, CodeCompletionContext Context,
686                                   CodeCompletionResult *InResults,
687                                   unsigned NumResults) override final {
688     // Results from recovery mode are generally useless, and the callback after
689     // recovery (if any) is usually more interesting. To make sure we handle the
690     // future callback from sema, we just ignore all callbacks in recovery mode,
691     // as taking only results from recovery mode results in poor completion
692     // results.
693     // FIXME: in case there is no future sema completion callback after the
694     // recovery mode, we might still want to provide some results (e.g. trivial
695     // identifier-based completion).
696     if (Context.getKind() == CodeCompletionContext::CCC_Recovery) {
697       log("Code complete: Ignoring sema code complete callback with Recovery "
698           "context.");
699       return;
700     }
701     // If a callback is called without any sema result and the context does not
702     // support index-based completion, we simply skip it to give way to
703     // potential future callbacks with results.
704     if (NumResults == 0 && !contextAllowsIndex(Context.getKind()))
705       return;
706     if (CCSema) {
707       log("Multiple code complete callbacks (parser backtracked?). "
708           "Dropping results from context {0}, keeping results from {1}.",
709           getCompletionKindString(Context.getKind()),
710           getCompletionKindString(this->CCContext.getKind()));
711       return;
712     }
713     // Record the completion context.
714     CCSema = &S;
715     CCContext = Context;
716 
717     // Retain the results we might want.
718     for (unsigned I = 0; I < NumResults; ++I) {
719       auto &Result = InResults[I];
720       // Drop hidden items which cannot be found by lookup after completion.
721       // Exception: some items can be named by using a qualifier.
722       if (Result.Hidden && (!Result.Qualifier || Result.QualifierIsInformative))
723         continue;
724       if (!Opts.IncludeIneligibleResults &&
725           (Result.Availability == CXAvailability_NotAvailable ||
726            Result.Availability == CXAvailability_NotAccessible))
727         continue;
728       if (Result.Declaration &&
729           !Context.getBaseType().isNull() // is this a member-access context?
730           && isBlacklistedMember(*Result.Declaration))
731         continue;
732       // We choose to never append '::' to completion results in clangd.
733       Result.StartsNestedNameSpecifier = false;
734       Results.push_back(Result);
735     }
736     ResultsCallback();
737   }
738 
739   CodeCompletionAllocator &getAllocator() override { return *CCAllocator; }
740   CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
741 
742   // Returns the filtering/sorting name for Result, which must be from Results.
743   // Returned string is owned by this recorder (or the AST).
744   llvm::StringRef getName(const CodeCompletionResult &Result) {
745     switch (Result.Kind) {
746     case CodeCompletionResult::RK_Declaration:
747       if (auto *ID = Result.Declaration->getIdentifier())
748         return ID->getName();
749       break;
750     case CodeCompletionResult::RK_Keyword:
751       return Result.Keyword;
752     case CodeCompletionResult::RK_Macro:
753       return Result.Macro->getName();
754     case CodeCompletionResult::RK_Pattern:
755       return Result.Pattern->getTypedText();
756     }
757     auto *CCS = codeCompletionString(Result);
758     return CCS->getTypedText();
759   }
760 
761   // Build a CodeCompletion string for R, which must be from Results.
762   // The CCS will be owned by this recorder.
763   CodeCompletionString *codeCompletionString(const CodeCompletionResult &R) {
764     // CodeCompletionResult doesn't seem to be const-correct. We own it, anyway.
765     return const_cast<CodeCompletionResult &>(R).CreateCodeCompletionString(
766         *CCSema, CCContext, *CCAllocator, CCTUInfo,
767         /*IncludeBriefComments=*/false);
768   }
769 
770 private:
771   CodeCompleteOptions Opts;
772   std::shared_ptr<GlobalCodeCompletionAllocator> CCAllocator;
773   CodeCompletionTUInfo CCTUInfo;
774   llvm::unique_function<void()> ResultsCallback;
775 };
776 
777 struct ScoredSignature {
778   // When set, requires documentation to be requested from the index with this
779   // ID.
780   llvm::Optional<SymbolID> IDForDoc;
781   SignatureInformation Signature;
782   SignatureQualitySignals Quality;
783 };
784 
785 class SignatureHelpCollector final : public CodeCompleteConsumer {
786 public:
787   SignatureHelpCollector(const clang::CodeCompleteOptions &CodeCompleteOpts,
788                          SymbolIndex *Index, SignatureHelp &SigHelp)
789       : CodeCompleteConsumer(CodeCompleteOpts,
790                              /*OutputIsBinary=*/false),
791         SigHelp(SigHelp),
792         Allocator(std::make_shared<clang::GlobalCodeCompletionAllocator>()),
793         CCTUInfo(Allocator), Index(Index) {}
794 
795   void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg,
796                                  OverloadCandidate *Candidates,
797                                  unsigned NumCandidates) override {
798     std::vector<ScoredSignature> ScoredSignatures;
799     SigHelp.signatures.reserve(NumCandidates);
800     ScoredSignatures.reserve(NumCandidates);
801     // FIXME(rwols): How can we determine the "active overload candidate"?
802     // Right now the overloaded candidates seem to be provided in a "best fit"
803     // order, so I'm not too worried about this.
804     SigHelp.activeSignature = 0;
805     assert(CurrentArg <= (unsigned)std::numeric_limits<int>::max() &&
806            "too many arguments");
807     SigHelp.activeParameter = static_cast<int>(CurrentArg);
808     for (unsigned I = 0; I < NumCandidates; ++I) {
809       OverloadCandidate Candidate = Candidates[I];
810       // We want to avoid showing instantiated signatures, because they may be
811       // long in some cases (e.g. when 'T' is substituted with 'std::string', we
812       // would get 'std::basic_string<char>').
813       if (auto *Func = Candidate.getFunction()) {
814         if (auto *Pattern = Func->getTemplateInstantiationPattern())
815           Candidate = OverloadCandidate(Pattern);
816       }
817 
818       const auto *CCS = Candidate.CreateSignatureString(
819           CurrentArg, S, *Allocator, CCTUInfo, true);
820       assert(CCS && "Expected the CodeCompletionString to be non-null");
821       ScoredSignatures.push_back(processOverloadCandidate(
822           Candidate, *CCS,
823           Candidate.getFunction()
824               ? getDeclComment(S.getASTContext(), *Candidate.getFunction())
825               : ""));
826     }
827 
828     // Sema does not load the docs from the preamble, so we need to fetch extra
829     // docs from the index instead.
830     llvm::DenseMap<SymbolID, std::string> FetchedDocs;
831     if (Index) {
832       LookupRequest IndexRequest;
833       for (const auto &S : ScoredSignatures) {
834         if (!S.IDForDoc)
835           continue;
836         IndexRequest.IDs.insert(*S.IDForDoc);
837       }
838       Index->lookup(IndexRequest, [&](const Symbol &S) {
839         if (!S.Detail || S.Detail->Documentation.empty())
840           return;
841         FetchedDocs[S.ID] = S.Detail->Documentation;
842       });
843       log("SigHelp: requested docs for {0} symbols from the index, got {1} "
844           "symbols with non-empty docs in the response",
845           IndexRequest.IDs.size(), FetchedDocs.size());
846     }
847 
848     std::sort(
849         ScoredSignatures.begin(), ScoredSignatures.end(),
850         [](const ScoredSignature &L, const ScoredSignature &R) {
851           // Ordering follows:
852           // - Less number of parameters is better.
853           // - Function is better than FunctionType which is better than
854           // Function Template.
855           // - High score is better.
856           // - Shorter signature is better.
857           // - Alphebatically smaller is better.
858           if (L.Quality.NumberOfParameters != R.Quality.NumberOfParameters)
859             return L.Quality.NumberOfParameters < R.Quality.NumberOfParameters;
860           if (L.Quality.NumberOfOptionalParameters !=
861               R.Quality.NumberOfOptionalParameters)
862             return L.Quality.NumberOfOptionalParameters <
863                    R.Quality.NumberOfOptionalParameters;
864           if (L.Quality.Kind != R.Quality.Kind) {
865             using OC = CodeCompleteConsumer::OverloadCandidate;
866             switch (L.Quality.Kind) {
867             case OC::CK_Function:
868               return true;
869             case OC::CK_FunctionType:
870               return R.Quality.Kind != OC::CK_Function;
871             case OC::CK_FunctionTemplate:
872               return false;
873             }
874             llvm_unreachable("Unknown overload candidate type.");
875           }
876           if (L.Signature.label.size() != R.Signature.label.size())
877             return L.Signature.label.size() < R.Signature.label.size();
878           return L.Signature.label < R.Signature.label;
879         });
880 
881     for (auto &SS : ScoredSignatures) {
882       auto IndexDocIt =
883           SS.IDForDoc ? FetchedDocs.find(*SS.IDForDoc) : FetchedDocs.end();
884       if (IndexDocIt != FetchedDocs.end())
885         SS.Signature.documentation = IndexDocIt->second;
886 
887       SigHelp.signatures.push_back(std::move(SS.Signature));
888     }
889   }
890 
891   GlobalCodeCompletionAllocator &getAllocator() override { return *Allocator; }
892 
893   CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
894 
895 private:
896   // FIXME(ioeric): consider moving CodeCompletionString logic here to
897   // CompletionString.h.
898   ScoredSignature processOverloadCandidate(const OverloadCandidate &Candidate,
899                                            const CodeCompletionString &CCS,
900                                            llvm::StringRef DocComment) const {
901     SignatureInformation Signature;
902     SignatureQualitySignals Signal;
903     const char *ReturnType = nullptr;
904 
905     Signature.documentation = formatDocumentation(CCS, DocComment);
906     Signal.Kind = Candidate.getKind();
907 
908     for (const auto &Chunk : CCS) {
909       switch (Chunk.Kind) {
910       case CodeCompletionString::CK_ResultType:
911         // A piece of text that describes the type of an entity or,
912         // for functions and methods, the return type.
913         assert(!ReturnType && "Unexpected CK_ResultType");
914         ReturnType = Chunk.Text;
915         break;
916       case CodeCompletionString::CK_Placeholder:
917         // A string that acts as a placeholder for, e.g., a function call
918         // argument.
919         // Intentional fallthrough here.
920       case CodeCompletionString::CK_CurrentParameter: {
921         // A piece of text that describes the parameter that corresponds to
922         // the code-completion location within a function call, message send,
923         // macro invocation, etc.
924         Signature.label += Chunk.Text;
925         ParameterInformation Info;
926         Info.label = Chunk.Text;
927         Signature.parameters.push_back(std::move(Info));
928         Signal.NumberOfParameters++;
929         Signal.ContainsActiveParameter = true;
930         break;
931       }
932       case CodeCompletionString::CK_Optional: {
933         // The rest of the parameters are defaulted/optional.
934         assert(Chunk.Optional &&
935                "Expected the optional code completion string to be non-null.");
936         Signature.label += getOptionalParameters(*Chunk.Optional,
937                                                  Signature.parameters, Signal);
938         break;
939       }
940       case CodeCompletionString::CK_VerticalSpace:
941         break;
942       default:
943         Signature.label += Chunk.Text;
944         break;
945       }
946     }
947     if (ReturnType) {
948       Signature.label += " -> ";
949       Signature.label += ReturnType;
950     }
951     dlog("Signal for {0}: {1}", Signature, Signal);
952     ScoredSignature Result;
953     Result.Signature = std::move(Signature);
954     Result.Quality = Signal;
955     Result.IDForDoc =
956         Result.Signature.documentation.empty() && Candidate.getFunction()
957             ? clangd::getSymbolID(Candidate.getFunction())
958             : llvm::None;
959     return Result;
960   }
961 
962   SignatureHelp &SigHelp;
963   std::shared_ptr<clang::GlobalCodeCompletionAllocator> Allocator;
964   CodeCompletionTUInfo CCTUInfo;
965   const SymbolIndex *Index;
966 }; // SignatureHelpCollector
967 
968 struct SemaCompleteInput {
969   PathRef FileName;
970   const tooling::CompileCommand &Command;
971   PrecompiledPreamble const *Preamble;
972   StringRef Contents;
973   Position Pos;
974   IntrusiveRefCntPtr<vfs::FileSystem> VFS;
975   std::shared_ptr<PCHContainerOperations> PCHs;
976 };
977 
978 // Invokes Sema code completion on a file.
979 // If \p Includes is set, it will be updated based on the compiler invocation.
980 bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,
981                       const clang::CodeCompleteOptions &Options,
982                       const SemaCompleteInput &Input,
983                       IncludeStructure *Includes = nullptr) {
984   trace::Span Tracer("Sema completion");
985   std::vector<const char *> ArgStrs;
986   for (const auto &S : Input.Command.CommandLine)
987     ArgStrs.push_back(S.c_str());
988 
989   if (Input.VFS->setCurrentWorkingDirectory(Input.Command.Directory)) {
990     log("Couldn't set working directory");
991     // We run parsing anyway, our lit-tests rely on results for non-existing
992     // working dirs.
993   }
994 
995   IgnoreDiagnostics DummyDiagsConsumer;
996   auto CI = createInvocationFromCommandLine(
997       ArgStrs,
998       CompilerInstance::createDiagnostics(new DiagnosticOptions,
999                                           &DummyDiagsConsumer, false),
1000       Input.VFS);
1001   if (!CI) {
1002     elog("Couldn't create CompilerInvocation");
1003     return false;
1004   }
1005   auto &FrontendOpts = CI->getFrontendOpts();
1006   FrontendOpts.DisableFree = false;
1007   FrontendOpts.SkipFunctionBodies = true;
1008   CI->getLangOpts()->CommentOpts.ParseAllComments = true;
1009   // Disable typo correction in Sema.
1010   CI->getLangOpts()->SpellChecking = false;
1011   // Setup code completion.
1012   FrontendOpts.CodeCompleteOpts = Options;
1013   FrontendOpts.CodeCompletionAt.FileName = Input.FileName;
1014   auto Offset = positionToOffset(Input.Contents, Input.Pos);
1015   if (!Offset) {
1016     elog("Code completion position was invalid {0}", Offset.takeError());
1017     return false;
1018   }
1019   std::tie(FrontendOpts.CodeCompletionAt.Line,
1020            FrontendOpts.CodeCompletionAt.Column) =
1021       offsetToClangLineColumn(Input.Contents, *Offset);
1022 
1023   std::unique_ptr<llvm::MemoryBuffer> ContentsBuffer =
1024       llvm::MemoryBuffer::getMemBufferCopy(Input.Contents, Input.FileName);
1025   // The diagnostic options must be set before creating a CompilerInstance.
1026   CI->getDiagnosticOpts().IgnoreWarnings = true;
1027   // We reuse the preamble whether it's valid or not. This is a
1028   // correctness/performance tradeoff: building without a preamble is slow, and
1029   // completion is latency-sensitive.
1030   // NOTE: we must call BeginSourceFile after prepareCompilerInstance. Otherwise
1031   // the remapped buffers do not get freed.
1032   auto Clang = prepareCompilerInstance(
1033       std::move(CI), Input.Preamble, std::move(ContentsBuffer),
1034       std::move(Input.PCHs), std::move(Input.VFS), DummyDiagsConsumer);
1035   Clang->setCodeCompletionConsumer(Consumer.release());
1036 
1037   SyntaxOnlyAction Action;
1038   if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])) {
1039     log("BeginSourceFile() failed when running codeComplete for {0}",
1040         Input.FileName);
1041     return false;
1042   }
1043   if (Includes)
1044     Clang->getPreprocessor().addPPCallbacks(
1045         collectIncludeStructureCallback(Clang->getSourceManager(), Includes));
1046   if (!Action.Execute()) {
1047     log("Execute() failed when running codeComplete for {0}", Input.FileName);
1048     return false;
1049   }
1050   Action.EndSourceFile();
1051 
1052   return true;
1053 }
1054 
1055 // Should we allow index completions in the specified context?
1056 bool allowIndex(CodeCompletionContext &CC) {
1057   if (!contextAllowsIndex(CC.getKind()))
1058     return false;
1059   // We also avoid ClassName::bar (but allow namespace::bar).
1060   auto Scope = CC.getCXXScopeSpecifier();
1061   if (!Scope)
1062     return true;
1063   NestedNameSpecifier *NameSpec = (*Scope)->getScopeRep();
1064   if (!NameSpec)
1065     return true;
1066   // We only query the index when qualifier is a namespace.
1067   // If it's a class, we rely solely on sema completions.
1068   switch (NameSpec->getKind()) {
1069   case NestedNameSpecifier::Global:
1070   case NestedNameSpecifier::Namespace:
1071   case NestedNameSpecifier::NamespaceAlias:
1072     return true;
1073   case NestedNameSpecifier::Super:
1074   case NestedNameSpecifier::TypeSpec:
1075   case NestedNameSpecifier::TypeSpecWithTemplate:
1076   // Unresolved inside a template.
1077   case NestedNameSpecifier::Identifier:
1078     return false;
1079   }
1080   llvm_unreachable("invalid NestedNameSpecifier kind");
1081 }
1082 
1083 std::future<SymbolSlab> startAsyncFuzzyFind(const SymbolIndex &Index,
1084                                             const FuzzyFindRequest &Req) {
1085   return runAsync<SymbolSlab>([&Index, Req]() {
1086     trace::Span Tracer("Async fuzzyFind");
1087     SymbolSlab::Builder Syms;
1088     Index.fuzzyFind(Req, [&Syms](const Symbol &Sym) { Syms.insert(Sym); });
1089     return std::move(Syms).build();
1090   });
1091 }
1092 
1093 // Creates a `FuzzyFindRequest` based on the cached index request from the
1094 // last completion, if any, and the speculated completion filter text in the
1095 // source code.
1096 llvm::Optional<FuzzyFindRequest> speculativeFuzzyFindRequestForCompletion(
1097     FuzzyFindRequest CachedReq, PathRef File, StringRef Content, Position Pos) {
1098   auto Filter = speculateCompletionFilter(Content, Pos);
1099   if (!Filter) {
1100     elog("Failed to speculate filter text for code completion at Pos "
1101          "{0}:{1}: {2}",
1102          Pos.line, Pos.character, Filter.takeError());
1103     return llvm::None;
1104   }
1105   CachedReq.Query = *Filter;
1106   return CachedReq;
1107 }
1108 
1109 } // namespace
1110 
1111 clang::CodeCompleteOptions CodeCompleteOptions::getClangCompleteOpts() const {
1112   clang::CodeCompleteOptions Result;
1113   Result.IncludeCodePatterns = EnableSnippets && IncludeCodePatterns;
1114   Result.IncludeMacros = IncludeMacros;
1115   Result.IncludeGlobals = true;
1116   // We choose to include full comments and not do doxygen parsing in
1117   // completion.
1118   // FIXME: ideally, we should support doxygen in some form, e.g. do markdown
1119   // formatting of the comments.
1120   Result.IncludeBriefComments = false;
1121 
1122   // When an is used, Sema is responsible for completing the main file,
1123   // the index can provide results from the preamble.
1124   // Tell Sema not to deserialize the preamble to look for results.
1125   Result.LoadExternal = !Index;
1126   Result.IncludeFixIts = IncludeFixIts;
1127 
1128   return Result;
1129 }
1130 
1131 // Runs Sema-based (AST) and Index-based completion, returns merged results.
1132 //
1133 // There are a few tricky considerations:
1134 //   - the AST provides information needed for the index query (e.g. which
1135 //     namespaces to search in). So Sema must start first.
1136 //   - we only want to return the top results (Opts.Limit).
1137 //     Building CompletionItems for everything else is wasteful, so we want to
1138 //     preserve the "native" format until we're done with scoring.
1139 //   - the data underlying Sema completion items is owned by the AST and various
1140 //     other arenas, which must stay alive for us to build CompletionItems.
1141 //   - we may get duplicate results from Sema and the Index, we need to merge.
1142 //
1143 // So we start Sema completion first, and do all our work in its callback.
1144 // We use the Sema context information to query the index.
1145 // Then we merge the two result sets, producing items that are Sema/Index/Both.
1146 // These items are scored, and the top N are synthesized into the LSP response.
1147 // Finally, we can clean up the data structures created by Sema completion.
1148 //
1149 // Main collaborators are:
1150 //   - semaCodeComplete sets up the compiler machinery to run code completion.
1151 //   - CompletionRecorder captures Sema completion results, including context.
1152 //   - SymbolIndex (Opts.Index) provides index completion results as Symbols
1153 //   - CompletionCandidates are the result of merging Sema and Index results.
1154 //     Each candidate points to an underlying CodeCompletionResult (Sema), a
1155 //     Symbol (Index), or both. It computes the result quality score.
1156 //     CompletionCandidate also does conversion to CompletionItem (at the end).
1157 //   - FuzzyMatcher scores how the candidate matches the partial identifier.
1158 //     This score is combined with the result quality score for the final score.
1159 //   - TopN determines the results with the best score.
1160 class CodeCompleteFlow {
1161   PathRef FileName;
1162   IncludeStructure Includes; // Complete once the compiler runs.
1163   SpeculativeFuzzyFind *SpecFuzzyFind; // Can be nullptr.
1164   const CodeCompleteOptions &Opts;
1165 
1166   // Sema takes ownership of Recorder. Recorder is valid until Sema cleanup.
1167   CompletionRecorder *Recorder = nullptr;
1168   int NSema = 0, NIndex = 0, NBoth = 0; // Counters for logging.
1169   bool Incomplete = false; // Would more be available with a higher limit?
1170   llvm::Optional<FuzzyMatcher> Filter;       // Initialized once Sema runs.
1171   std::vector<std::string> QueryScopes;      // Initialized once Sema runs.
1172   // Include-insertion and proximity scoring rely on the include structure.
1173   // This is available after Sema has run.
1174   llvm::Optional<IncludeInserter> Inserter;  // Available during runWithSema.
1175   llvm::Optional<URIDistance> FileProximity; // Initialized once Sema runs.
1176   /// Speculative request based on the cached request and the filter text before
1177   /// the cursor.
1178   /// Initialized right before sema run. This is only set if `SpecFuzzyFind` is
1179   /// set and contains a cached request.
1180   llvm::Optional<FuzzyFindRequest> SpecReq;
1181 
1182 public:
1183   // A CodeCompleteFlow object is only useful for calling run() exactly once.
1184   CodeCompleteFlow(PathRef FileName, const IncludeStructure &Includes,
1185                    SpeculativeFuzzyFind *SpecFuzzyFind,
1186                    const CodeCompleteOptions &Opts)
1187       : FileName(FileName), Includes(Includes), SpecFuzzyFind(SpecFuzzyFind),
1188         Opts(Opts) {}
1189 
1190   CodeCompleteResult run(const SemaCompleteInput &SemaCCInput) && {
1191     trace::Span Tracer("CodeCompleteFlow");
1192     if (Opts.Index && SpecFuzzyFind && SpecFuzzyFind->CachedReq.hasValue()) {
1193       assert(!SpecFuzzyFind->Result.valid());
1194       if ((SpecReq = speculativeFuzzyFindRequestForCompletion(
1195                *SpecFuzzyFind->CachedReq, SemaCCInput.FileName,
1196                SemaCCInput.Contents, SemaCCInput.Pos)))
1197         SpecFuzzyFind->Result = startAsyncFuzzyFind(*Opts.Index, *SpecReq);
1198     }
1199 
1200     // We run Sema code completion first. It builds an AST and calculates:
1201     //   - completion results based on the AST.
1202     //   - partial identifier and context. We need these for the index query.
1203     CodeCompleteResult Output;
1204     auto RecorderOwner = llvm::make_unique<CompletionRecorder>(Opts, [&]() {
1205       assert(Recorder && "Recorder is not set");
1206       auto Style =
1207           format::getStyle(format::DefaultFormatStyle, SemaCCInput.FileName,
1208                            format::DefaultFallbackStyle, SemaCCInput.Contents,
1209                            SemaCCInput.VFS.get());
1210       if (!Style) {
1211         log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.",
1212             SemaCCInput.FileName, Style.takeError());
1213         Style = format::getLLVMStyle();
1214       }
1215       // If preprocessor was run, inclusions from preprocessor callback should
1216       // already be added to Includes.
1217       Inserter.emplace(
1218           SemaCCInput.FileName, SemaCCInput.Contents, *Style,
1219           SemaCCInput.Command.Directory,
1220           Recorder->CCSema->getPreprocessor().getHeaderSearchInfo());
1221       for (const auto &Inc : Includes.MainFileIncludes)
1222         Inserter->addExisting(Inc);
1223 
1224       // Most of the cost of file proximity is in initializing the FileDistance
1225       // structures based on the observed includes, once per query. Conceptually
1226       // that happens here (though the per-URI-scheme initialization is lazy).
1227       // The per-result proximity scoring is (amortized) very cheap.
1228       FileDistanceOptions ProxOpts{}; // Use defaults.
1229       const auto &SM = Recorder->CCSema->getSourceManager();
1230       llvm::StringMap<SourceParams> ProxSources;
1231       for (auto &Entry : Includes.includeDepth(
1232                SM.getFileEntryForID(SM.getMainFileID())->getName())) {
1233         auto &Source = ProxSources[Entry.getKey()];
1234         Source.Cost = Entry.getValue() * ProxOpts.IncludeCost;
1235         // Symbols near our transitive includes are good, but only consider
1236         // things in the same directory or below it. Otherwise there can be
1237         // many false positives.
1238         if (Entry.getValue() > 0)
1239           Source.MaxUpTraversals = 1;
1240       }
1241       FileProximity.emplace(ProxSources, ProxOpts);
1242 
1243       Output = runWithSema();
1244       Inserter.reset(); // Make sure this doesn't out-live Clang.
1245       SPAN_ATTACH(Tracer, "sema_completion_kind",
1246                   getCompletionKindString(Recorder->CCContext.getKind()));
1247       log("Code complete: sema context {0}, query scopes [{1}]",
1248           getCompletionKindString(Recorder->CCContext.getKind()),
1249           llvm::join(QueryScopes.begin(), QueryScopes.end(), ","));
1250     });
1251 
1252     Recorder = RecorderOwner.get();
1253 
1254     semaCodeComplete(std::move(RecorderOwner), Opts.getClangCompleteOpts(),
1255                      SemaCCInput, &Includes);
1256 
1257     SPAN_ATTACH(Tracer, "sema_results", NSema);
1258     SPAN_ATTACH(Tracer, "index_results", NIndex);
1259     SPAN_ATTACH(Tracer, "merged_results", NBoth);
1260     SPAN_ATTACH(Tracer, "returned_results", int64_t(Output.Completions.size()));
1261     SPAN_ATTACH(Tracer, "incomplete", Output.HasMore);
1262     log("Code complete: {0} results from Sema, {1} from Index, "
1263         "{2} matched, {3} returned{4}.",
1264         NSema, NIndex, NBoth, Output.Completions.size(),
1265         Output.HasMore ? " (incomplete)" : "");
1266     assert(!Opts.Limit || Output.Completions.size() <= Opts.Limit);
1267     // We don't assert that isIncomplete means we hit a limit.
1268     // Indexes may choose to impose their own limits even if we don't have one.
1269     return Output;
1270   }
1271 
1272 private:
1273   // This is called by run() once Sema code completion is done, but before the
1274   // Sema data structures are torn down. It does all the real work.
1275   CodeCompleteResult runWithSema() {
1276     const auto &CodeCompletionRange = CharSourceRange::getCharRange(
1277         Recorder->CCSema->getPreprocessor().getCodeCompletionTokenRange());
1278     Range TextEditRange;
1279     // When we are getting completions with an empty identifier, for example
1280     //    std::vector<int> asdf;
1281     //    asdf.^;
1282     // Then the range will be invalid and we will be doing insertion, use
1283     // current cursor position in such cases as range.
1284     if (CodeCompletionRange.isValid()) {
1285       TextEditRange = halfOpenToRange(Recorder->CCSema->getSourceManager(),
1286                                       CodeCompletionRange);
1287     } else {
1288       const auto &Pos = sourceLocToPosition(
1289           Recorder->CCSema->getSourceManager(),
1290           Recorder->CCSema->getPreprocessor().getCodeCompletionLoc());
1291       TextEditRange.start = TextEditRange.end = Pos;
1292     }
1293     Filter = FuzzyMatcher(
1294         Recorder->CCSema->getPreprocessor().getCodeCompletionFilter());
1295     QueryScopes = getQueryScopes(Recorder->CCContext,
1296                                  Recorder->CCSema->getSourceManager());
1297     // Sema provides the needed context to query the index.
1298     // FIXME: in addition to querying for extra/overlapping symbols, we should
1299     //        explicitly request symbols corresponding to Sema results.
1300     //        We can use their signals even if the index can't suggest them.
1301     // We must copy index results to preserve them, but there are at most Limit.
1302     auto IndexResults = (Opts.Index && allowIndex(Recorder->CCContext))
1303                             ? queryIndex()
1304                             : SymbolSlab();
1305     trace::Span Tracer("Populate CodeCompleteResult");
1306     // Merge Sema, Index and Override results, score them, and pick the
1307     // winners.
1308     const auto Overrides = getNonOverridenMethodCompletionResults(
1309         Recorder->CCSema->CurContext, Recorder->CCSema);
1310     auto Top = mergeResults(Recorder->Results, IndexResults, Overrides);
1311     CodeCompleteResult Output;
1312 
1313     // Convert the results to final form, assembling the expensive strings.
1314     for (auto &C : Top) {
1315       Output.Completions.push_back(toCodeCompletion(C.first));
1316       Output.Completions.back().Score = C.second;
1317       Output.Completions.back().CompletionTokenRange = TextEditRange;
1318     }
1319     Output.HasMore = Incomplete;
1320     Output.Context = Recorder->CCContext.getKind();
1321 
1322     return Output;
1323   }
1324 
1325   SymbolSlab queryIndex() {
1326     trace::Span Tracer("Query index");
1327     SPAN_ATTACH(Tracer, "limit", int64_t(Opts.Limit));
1328 
1329     // Build the query.
1330     FuzzyFindRequest Req;
1331     if (Opts.Limit)
1332       Req.MaxCandidateCount = Opts.Limit;
1333     Req.Query = Filter->pattern();
1334     Req.RestrictForCodeCompletion = true;
1335     Req.Scopes = QueryScopes;
1336     // FIXME: we should send multiple weighted paths here.
1337     Req.ProximityPaths.push_back(FileName);
1338     vlog("Code complete: fuzzyFind(\"{0}\", scopes=[{1}])", Req.Query,
1339          llvm::join(Req.Scopes.begin(), Req.Scopes.end(), ","));
1340 
1341     if (SpecFuzzyFind)
1342       SpecFuzzyFind->NewReq = Req;
1343     if (SpecFuzzyFind && SpecFuzzyFind->Result.valid() && (*SpecReq == Req)) {
1344       vlog("Code complete: speculative fuzzy request matches the actual index "
1345            "request. Waiting for the speculative index results.");
1346       SPAN_ATTACH(Tracer, "Speculative results", true);
1347 
1348       trace::Span WaitSpec("Wait speculative results");
1349       return SpecFuzzyFind->Result.get();
1350     }
1351 
1352     SPAN_ATTACH(Tracer, "Speculative results", false);
1353 
1354     // Run the query against the index.
1355     SymbolSlab::Builder ResultsBuilder;
1356     if (Opts.Index->fuzzyFind(
1357             Req, [&](const Symbol &Sym) { ResultsBuilder.insert(Sym); }))
1358       Incomplete = true;
1359     return std::move(ResultsBuilder).build();
1360   }
1361 
1362   // Merges Sema, Index and Override results where possible, to form
1363   // CompletionCandidates. Groups overloads if desired, to form
1364   // CompletionCandidate::Bundles. The bundles are scored and top results are
1365   // returned, best to worst.
1366   std::vector<ScoredBundle>
1367   mergeResults(const std::vector<CodeCompletionResult> &SemaResults,
1368                const SymbolSlab &IndexResults,
1369                const std::vector<CodeCompletionResult> &OverrideResults) {
1370     trace::Span Tracer("Merge and score results");
1371     std::vector<CompletionCandidate::Bundle> Bundles;
1372     llvm::DenseMap<size_t, size_t> BundleLookup;
1373     auto AddToBundles = [&](const CodeCompletionResult *SemaResult,
1374                             const Symbol *IndexResult,
1375                             bool IsOverride = false) {
1376       CompletionCandidate C;
1377       C.SemaResult = SemaResult;
1378       C.IndexResult = IndexResult;
1379       C.IsOverride = IsOverride;
1380       C.Name = IndexResult ? IndexResult->Name : Recorder->getName(*SemaResult);
1381       if (auto OverloadSet = Opts.BundleOverloads ? C.overloadSet() : 0) {
1382         auto Ret = BundleLookup.try_emplace(OverloadSet, Bundles.size());
1383         if (Ret.second)
1384           Bundles.emplace_back();
1385         Bundles[Ret.first->second].push_back(std::move(C));
1386       } else {
1387         Bundles.emplace_back();
1388         Bundles.back().push_back(std::move(C));
1389       }
1390     };
1391     llvm::DenseSet<const Symbol *> UsedIndexResults;
1392     auto CorrespondingIndexResult =
1393         [&](const CodeCompletionResult &SemaResult) -> const Symbol * {
1394       if (auto SymID = getSymbolID(SemaResult)) {
1395         auto I = IndexResults.find(*SymID);
1396         if (I != IndexResults.end()) {
1397           UsedIndexResults.insert(&*I);
1398           return &*I;
1399         }
1400       }
1401       return nullptr;
1402     };
1403     // Emit all Sema results, merging them with Index results if possible.
1404     for (auto &SemaResult : Recorder->Results)
1405       AddToBundles(&SemaResult, CorrespondingIndexResult(SemaResult));
1406     // Handle OverrideResults the same way we deal with SemaResults. Since these
1407     // results use the same structs as a SemaResult it is safe to do that, but
1408     // we need to make sure we dont' duplicate things in future if Sema starts
1409     // to provide them as well.
1410     for (auto &OverrideResult : OverrideResults)
1411       AddToBundles(&OverrideResult, CorrespondingIndexResult(OverrideResult),
1412                    true);
1413     // Now emit any Index-only results.
1414     for (const auto &IndexResult : IndexResults) {
1415       if (UsedIndexResults.count(&IndexResult))
1416         continue;
1417       AddToBundles(/*SemaResult=*/nullptr, &IndexResult);
1418     }
1419     // We only keep the best N results at any time, in "native" format.
1420     TopN<ScoredBundle, ScoredBundleGreater> Top(
1421         Opts.Limit == 0 ? std::numeric_limits<size_t>::max() : Opts.Limit);
1422     for (auto &Bundle : Bundles)
1423       addCandidate(Top, std::move(Bundle));
1424     return std::move(Top).items();
1425   }
1426 
1427   Optional<float> fuzzyScore(const CompletionCandidate &C) {
1428     // Macros can be very spammy, so we only support prefix completion.
1429     // We won't end up with underfull index results, as macros are sema-only.
1430     if (C.SemaResult && C.SemaResult->Kind == CodeCompletionResult::RK_Macro &&
1431         !C.Name.startswith_lower(Filter->pattern()))
1432       return None;
1433     return Filter->match(C.Name);
1434   }
1435 
1436   // Scores a candidate and adds it to the TopN structure.
1437   void addCandidate(TopN<ScoredBundle, ScoredBundleGreater> &Candidates,
1438                     CompletionCandidate::Bundle Bundle) {
1439     SymbolQualitySignals Quality;
1440     SymbolRelevanceSignals Relevance;
1441     Relevance.Context = Recorder->CCContext.getKind();
1442     Relevance.Query = SymbolRelevanceSignals::CodeComplete;
1443     Relevance.FileProximityMatch = FileProximity.getPointer();
1444     auto &First = Bundle.front();
1445     if (auto FuzzyScore = fuzzyScore(First))
1446       Relevance.NameMatch = *FuzzyScore;
1447     else
1448       return;
1449     SymbolOrigin Origin = SymbolOrigin::Unknown;
1450     bool FromIndex = false;
1451     for (const auto &Candidate : Bundle) {
1452       if (Candidate.IndexResult) {
1453         Quality.merge(*Candidate.IndexResult);
1454         Relevance.merge(*Candidate.IndexResult);
1455         Origin |= Candidate.IndexResult->Origin;
1456         FromIndex = true;
1457       }
1458       if (Candidate.SemaResult) {
1459         Quality.merge(*Candidate.SemaResult);
1460         Relevance.merge(*Candidate.SemaResult);
1461         Origin |= SymbolOrigin::AST;
1462       }
1463     }
1464 
1465     CodeCompletion::Scores Scores;
1466     Scores.Quality = Quality.evaluate();
1467     Scores.Relevance = Relevance.evaluate();
1468     Scores.Total = evaluateSymbolAndRelevance(Scores.Quality, Scores.Relevance);
1469     // NameMatch is in fact a multiplier on total score, so rescoring is sound.
1470     Scores.ExcludingName = Relevance.NameMatch
1471                                ? Scores.Total / Relevance.NameMatch
1472                                : Scores.Quality;
1473 
1474     dlog("CodeComplete: {0} ({1}) = {2}\n{3}{4}\n", First.Name,
1475          llvm::to_string(Origin), Scores.Total, llvm::to_string(Quality),
1476          llvm::to_string(Relevance));
1477 
1478     NSema += bool(Origin & SymbolOrigin::AST);
1479     NIndex += FromIndex;
1480     NBoth += bool(Origin & SymbolOrigin::AST) && FromIndex;
1481     if (Candidates.push({std::move(Bundle), Scores}))
1482       Incomplete = true;
1483   }
1484 
1485   CodeCompletion toCodeCompletion(const CompletionCandidate::Bundle &Bundle) {
1486     llvm::Optional<CodeCompletionBuilder> Builder;
1487     for (const auto &Item : Bundle) {
1488       CodeCompletionString *SemaCCS =
1489           Item.SemaResult ? Recorder->codeCompletionString(*Item.SemaResult)
1490                           : nullptr;
1491       if (!Builder)
1492         Builder.emplace(Recorder->CCSema->getASTContext(), Item, SemaCCS,
1493                         *Inserter, FileName, Opts);
1494       else
1495         Builder->add(Item, SemaCCS);
1496     }
1497     return Builder->build();
1498   }
1499 };
1500 
1501 llvm::Expected<llvm::StringRef>
1502 speculateCompletionFilter(llvm::StringRef Content, Position Pos) {
1503   auto Offset = positionToOffset(Content, Pos);
1504   if (!Offset)
1505     return llvm::make_error<llvm::StringError>(
1506         "Failed to convert position to offset in content.",
1507         llvm::inconvertibleErrorCode());
1508   if (*Offset == 0)
1509     return "";
1510 
1511   // Start from the character before the cursor.
1512   int St = *Offset - 1;
1513   // FIXME(ioeric): consider UTF characters?
1514   auto IsValidIdentifierChar = [](char c) {
1515     return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
1516             (c >= '0' && c <= '9') || (c == '_'));
1517   };
1518   size_t Len = 0;
1519   for (; (St >= 0) && IsValidIdentifierChar(Content[St]); --St, ++Len) {
1520   }
1521   if (Len > 0)
1522     St++; // Shift to the first valid character.
1523   return Content.substr(St, Len);
1524 }
1525 
1526 CodeCompleteResult
1527 codeComplete(PathRef FileName, const tooling::CompileCommand &Command,
1528              PrecompiledPreamble const *Preamble,
1529              const IncludeStructure &PreambleInclusions, StringRef Contents,
1530              Position Pos, IntrusiveRefCntPtr<vfs::FileSystem> VFS,
1531              std::shared_ptr<PCHContainerOperations> PCHs,
1532              CodeCompleteOptions Opts, SpeculativeFuzzyFind *SpecFuzzyFind) {
1533   return CodeCompleteFlow(FileName, PreambleInclusions, SpecFuzzyFind, Opts)
1534       .run({FileName, Command, Preamble, Contents, Pos, VFS, PCHs});
1535 }
1536 
1537 SignatureHelp signatureHelp(PathRef FileName,
1538                             const tooling::CompileCommand &Command,
1539                             PrecompiledPreamble const *Preamble,
1540                             StringRef Contents, Position Pos,
1541                             IntrusiveRefCntPtr<vfs::FileSystem> VFS,
1542                             std::shared_ptr<PCHContainerOperations> PCHs,
1543                             SymbolIndex *Index) {
1544   SignatureHelp Result;
1545   clang::CodeCompleteOptions Options;
1546   Options.IncludeGlobals = false;
1547   Options.IncludeMacros = false;
1548   Options.IncludeCodePatterns = false;
1549   Options.IncludeBriefComments = false;
1550   IncludeStructure PreambleInclusions; // Unused for signatureHelp
1551   semaCodeComplete(
1552       llvm::make_unique<SignatureHelpCollector>(Options, Index, Result),
1553       Options,
1554       {FileName, Command, Preamble, Contents, Pos, std::move(VFS),
1555        std::move(PCHs)});
1556   return Result;
1557 }
1558 
1559 bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx) {
1560   using namespace clang::ast_matchers;
1561   auto InTopLevelScope = hasDeclContext(
1562       anyOf(namespaceDecl(), translationUnitDecl(), linkageSpecDecl()));
1563   return !match(decl(anyOf(InTopLevelScope,
1564                            hasDeclContext(
1565                                enumDecl(InTopLevelScope, unless(isScoped()))))),
1566                 ND, ASTCtx)
1567               .empty();
1568 }
1569 
1570 CompletionItem CodeCompletion::render(const CodeCompleteOptions &Opts) const {
1571   CompletionItem LSP;
1572   LSP.label = (HeaderInsertion ? Opts.IncludeIndicator.Insert
1573                                : Opts.IncludeIndicator.NoInsert) +
1574               (Opts.ShowOrigins ? "[" + llvm::to_string(Origin) + "]" : "") +
1575               RequiredQualifier + Name + Signature;
1576 
1577   LSP.kind = Kind;
1578   LSP.detail = BundleSize > 1 ? llvm::formatv("[{0} overloads]", BundleSize)
1579                               : ReturnType;
1580   if (!Header.empty())
1581     LSP.detail += "\n" + Header;
1582   LSP.documentation = Documentation;
1583   LSP.sortText = sortText(Score.Total, Name);
1584   LSP.filterText = Name;
1585   LSP.textEdit = {CompletionTokenRange, RequiredQualifier + Name};
1586   // Merge continious additionalTextEdits into main edit. The main motivation
1587   // behind this is to help LSP clients, it seems most of them are confused when
1588   // they are provided with additionalTextEdits that are consecutive to main
1589   // edit.
1590   // Note that we store additional text edits from back to front in a line. That
1591   // is mainly to help LSP clients again, so that changes do not effect each
1592   // other.
1593   for (const auto &FixIt : FixIts) {
1594     if (IsRangeConsecutive(FixIt.range, LSP.textEdit->range)) {
1595       LSP.textEdit->newText = FixIt.newText + LSP.textEdit->newText;
1596       LSP.textEdit->range.start = FixIt.range.start;
1597     } else {
1598       LSP.additionalTextEdits.push_back(FixIt);
1599     }
1600   }
1601   if (Opts.EnableSnippets)
1602     LSP.textEdit->newText += SnippetSuffix;
1603 
1604   // FIXME(kadircet): Do not even fill insertText after making sure textEdit is
1605   // compatible with most of the editors.
1606   LSP.insertText = LSP.textEdit->newText;
1607   LSP.insertTextFormat = Opts.EnableSnippets ? InsertTextFormat::Snippet
1608                                              : InsertTextFormat::PlainText;
1609   if (HeaderInsertion)
1610     LSP.additionalTextEdits.push_back(*HeaderInsertion);
1611   return LSP;
1612 }
1613 
1614 raw_ostream &operator<<(raw_ostream &OS, const CodeCompletion &C) {
1615   // For now just lean on CompletionItem.
1616   return OS << C.render(CodeCompleteOptions());
1617 }
1618 
1619 raw_ostream &operator<<(raw_ostream &OS, const CodeCompleteResult &R) {
1620   OS << "CodeCompleteResult: " << R.Completions.size() << (R.HasMore ? "+" : "")
1621      << " (" << getCompletionKindString(R.Context) << ")"
1622      << " items:\n";
1623   for (const auto &C : R.Completions)
1624     OS << C << "\n";
1625   return OS;
1626 }
1627 
1628 } // namespace clangd
1629 } // namespace clang
1630