1 //===--- CodeComplete.cpp ---------------------------------------*- C++-*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===---------------------------------------------------------------------===//
9 //
10 // AST-based completions are provided using the completion hooks in Sema.
11 //
12 // Signature help works in a similar way as code completion, but it is simpler
13 // as there are typically fewer candidates.
14 //
15 //===---------------------------------------------------------------------===//
16 
17 #include "CodeComplete.h"
18 #include "CodeCompletionStrings.h"
19 #include "Compiler.h"
20 #include "FuzzyMatch.h"
21 #include "Logger.h"
22 #include "SourceCode.h"
23 #include "Trace.h"
24 #include "index/Index.h"
25 #include "clang/Format/Format.h"
26 #include "clang/Frontend/CompilerInstance.h"
27 #include "clang/Frontend/FrontendActions.h"
28 #include "clang/Index/USRGeneration.h"
29 #include "clang/Sema/CodeCompleteConsumer.h"
30 #include "clang/Sema/Sema.h"
31 #include "clang/Tooling/Core/Replacement.h"
32 #include "llvm/Support/Format.h"
33 #include <queue>
34 
35 namespace clang {
36 namespace clangd {
37 namespace {
38 
39 CompletionItemKind toCompletionItemKind(CXCursorKind CursorKind) {
40   switch (CursorKind) {
41   case CXCursor_MacroInstantiation:
42   case CXCursor_MacroDefinition:
43     return CompletionItemKind::Text;
44   case CXCursor_CXXMethod:
45   case CXCursor_Destructor:
46     return CompletionItemKind::Method;
47   case CXCursor_FunctionDecl:
48   case CXCursor_FunctionTemplate:
49     return CompletionItemKind::Function;
50   case CXCursor_Constructor:
51     return CompletionItemKind::Constructor;
52   case CXCursor_FieldDecl:
53     return CompletionItemKind::Field;
54   case CXCursor_VarDecl:
55   case CXCursor_ParmDecl:
56     return CompletionItemKind::Variable;
57   // FIXME(ioeric): use LSP struct instead of class when it is suppoted in the
58   // protocol.
59   case CXCursor_StructDecl:
60   case CXCursor_ClassDecl:
61   case CXCursor_UnionDecl:
62   case CXCursor_ClassTemplate:
63   case CXCursor_ClassTemplatePartialSpecialization:
64     return CompletionItemKind::Class;
65   case CXCursor_Namespace:
66   case CXCursor_NamespaceAlias:
67   case CXCursor_NamespaceRef:
68     return CompletionItemKind::Module;
69   case CXCursor_EnumConstantDecl:
70     return CompletionItemKind::Value;
71   case CXCursor_EnumDecl:
72     return CompletionItemKind::Enum;
73   // FIXME(ioeric): figure out whether reference is the right type for aliases.
74   case CXCursor_TypeAliasDecl:
75   case CXCursor_TypeAliasTemplateDecl:
76   case CXCursor_TypedefDecl:
77   case CXCursor_MemberRef:
78   case CXCursor_TypeRef:
79     return CompletionItemKind::Reference;
80   default:
81     return CompletionItemKind::Missing;
82   }
83 }
84 
85 CompletionItemKind
86 toCompletionItemKind(CodeCompletionResult::ResultKind ResKind,
87                      CXCursorKind CursorKind) {
88   switch (ResKind) {
89   case CodeCompletionResult::RK_Declaration:
90     return toCompletionItemKind(CursorKind);
91   case CodeCompletionResult::RK_Keyword:
92     return CompletionItemKind::Keyword;
93   case CodeCompletionResult::RK_Macro:
94     return CompletionItemKind::Text; // unfortunately, there's no 'Macro'
95                                      // completion items in LSP.
96   case CodeCompletionResult::RK_Pattern:
97     return CompletionItemKind::Snippet;
98   }
99   llvm_unreachable("Unhandled CodeCompletionResult::ResultKind.");
100 }
101 
102 CompletionItemKind toCompletionItemKind(index::SymbolKind Kind) {
103   using SK = index::SymbolKind;
104   switch (Kind) {
105   case SK::Unknown:
106     return CompletionItemKind::Missing;
107   case SK::Module:
108   case SK::Namespace:
109   case SK::NamespaceAlias:
110     return CompletionItemKind::Module;
111   case SK::Macro:
112     return CompletionItemKind::Text;
113   case SK::Enum:
114     return CompletionItemKind::Enum;
115   // FIXME(ioeric): use LSP struct instead of class when it is suppoted in the
116   // protocol.
117   case SK::Struct:
118   case SK::Class:
119   case SK::Protocol:
120   case SK::Extension:
121   case SK::Union:
122     return CompletionItemKind::Class;
123   // FIXME(ioeric): figure out whether reference is the right type for aliases.
124   case SK::TypeAlias:
125   case SK::Using:
126     return CompletionItemKind::Reference;
127   case SK::Function:
128   // FIXME(ioeric): this should probably be an operator. This should be fixed
129   // when `Operator` is support type in the protocol.
130   case SK::ConversionFunction:
131     return CompletionItemKind::Function;
132   case SK::Variable:
133   case SK::Parameter:
134     return CompletionItemKind::Variable;
135   case SK::Field:
136     return CompletionItemKind::Field;
137   // FIXME(ioeric): use LSP enum constant when it is supported in the protocol.
138   case SK::EnumConstant:
139     return CompletionItemKind::Value;
140   case SK::InstanceMethod:
141   case SK::ClassMethod:
142   case SK::StaticMethod:
143   case SK::Destructor:
144     return CompletionItemKind::Method;
145   case SK::InstanceProperty:
146   case SK::ClassProperty:
147   case SK::StaticProperty:
148     return CompletionItemKind::Property;
149   case SK::Constructor:
150     return CompletionItemKind::Constructor;
151   }
152   llvm_unreachable("Unhandled clang::index::SymbolKind.");
153 }
154 
155 /// Get the optional chunk as a string. This function is possibly recursive.
156 ///
157 /// The parameter info for each parameter is appended to the Parameters.
158 std::string
159 getOptionalParameters(const CodeCompletionString &CCS,
160                       std::vector<ParameterInformation> &Parameters) {
161   std::string Result;
162   for (const auto &Chunk : CCS) {
163     switch (Chunk.Kind) {
164     case CodeCompletionString::CK_Optional:
165       assert(Chunk.Optional &&
166              "Expected the optional code completion string to be non-null.");
167       Result += getOptionalParameters(*Chunk.Optional, Parameters);
168       break;
169     case CodeCompletionString::CK_VerticalSpace:
170       break;
171     case CodeCompletionString::CK_Placeholder:
172       // A string that acts as a placeholder for, e.g., a function call
173       // argument.
174       // Intentional fallthrough here.
175     case CodeCompletionString::CK_CurrentParameter: {
176       // A piece of text that describes the parameter that corresponds to
177       // the code-completion location within a function call, message send,
178       // macro invocation, etc.
179       Result += Chunk.Text;
180       ParameterInformation Info;
181       Info.label = Chunk.Text;
182       Parameters.push_back(std::move(Info));
183       break;
184     }
185     default:
186       Result += Chunk.Text;
187       break;
188     }
189   }
190   return Result;
191 }
192 
193 // Produces an integer that sorts in the same order as F.
194 // That is: a < b <==> encodeFloat(a) < encodeFloat(b).
195 uint32_t encodeFloat(float F) {
196   static_assert(std::numeric_limits<float>::is_iec559, "");
197   static_assert(sizeof(float) == sizeof(uint32_t), "");
198   constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
199 
200   // Get the bits of the float. Endianness is the same as for integers.
201   uint32_t U;
202   memcpy(&U, &F, sizeof(float));
203   // IEEE 754 floats compare like sign-magnitude integers.
204   if (U & TopBit)    // Negative float.
205     return 0 - U;    // Map onto the low half of integers, order reversed.
206   return U + TopBit; // Positive floats map onto the high half of integers.
207 }
208 
209 // Returns a string that sorts in the same order as (-Score, Name), for LSP.
210 std::string sortText(float Score, llvm::StringRef Name) {
211   // We convert -Score to an integer, and hex-encode for readability.
212   // Example: [0.5, "foo"] -> "41000000foo"
213   std::string S;
214   llvm::raw_string_ostream OS(S);
215   write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
216             /*Width=*/2 * sizeof(Score));
217   OS << Name;
218   OS.flush();
219   return S;
220 }
221 
222 /// A code completion result, in clang-native form.
223 /// It may be promoted to a CompletionItem if it's among the top-ranked results.
224 struct CompletionCandidate {
225   llvm::StringRef Name; // Used for filtering and sorting.
226   // We may have a result from Sema, from the index, or both.
227   const CodeCompletionResult *SemaResult = nullptr;
228   const Symbol *IndexResult = nullptr;
229 
230   // Computes the "symbol quality" score for this completion. Higher is better.
231   float score() const {
232     // For now we just use the Sema priority, mapping it onto a 0-1 interval.
233     if (!SemaResult) // FIXME(sammccall): better scoring for index results.
234       return 0.3f;   // fixed mediocre score for index-only results.
235 
236     // Priority 80 is a really bad score.
237     float Score = 1 - std::min<float>(80, SemaResult->Priority) / 80;
238 
239     switch (static_cast<CXAvailabilityKind>(SemaResult->Availability)) {
240     case CXAvailability_Available:
241       // No penalty.
242       break;
243     case CXAvailability_Deprecated:
244       Score *= 0.1f;
245       break;
246     case CXAvailability_NotAccessible:
247     case CXAvailability_NotAvailable:
248       Score = 0;
249       break;
250     }
251     return Score;
252   }
253 
254   // Builds an LSP completion item.
255   CompletionItem build(llvm::StringRef FileName,
256                        const CompletionItemScores &Scores,
257                        const CodeCompleteOptions &Opts,
258                        CodeCompletionString *SemaCCS) const {
259     assert(bool(SemaResult) == bool(SemaCCS));
260     CompletionItem I;
261     if (SemaResult) {
262       I.kind = toCompletionItemKind(SemaResult->Kind, SemaResult->CursorKind);
263       getLabelAndInsertText(*SemaCCS, &I.label, &I.insertText,
264                             Opts.EnableSnippets);
265       I.filterText = getFilterText(*SemaCCS);
266       I.documentation = getDocumentation(*SemaCCS);
267       I.detail = getDetail(*SemaCCS);
268     }
269     if (IndexResult) {
270       if (I.kind == CompletionItemKind::Missing)
271         I.kind = toCompletionItemKind(IndexResult->SymInfo.Kind);
272       // FIXME: reintroduce a way to show the index source for debugging.
273       if (I.label.empty())
274         I.label = IndexResult->CompletionLabel;
275       if (I.filterText.empty())
276         I.filterText = IndexResult->Name;
277 
278       // FIXME(ioeric): support inserting/replacing scope qualifiers.
279       if (I.insertText.empty())
280         I.insertText = Opts.EnableSnippets
281                            ? IndexResult->CompletionSnippetInsertText
282                            : IndexResult->CompletionPlainInsertText;
283 
284       if (auto *D = IndexResult->Detail) {
285         if (I.documentation.empty())
286           I.documentation = D->Documentation;
287         if (I.detail.empty())
288           I.detail = D->CompletionDetail;
289         // FIXME: delay creating include insertion command to
290         // "completionItem/resolve", when it is supported
291         if (!D->IncludeHeader.empty()) {
292           // LSP favors additionalTextEdits over command. But we are still using
293           // command here because it would be expensive to calculate #include
294           // insertion edits for all candidates, and the include insertion edit
295           // is unlikely to conflict with the code completion edits.
296           Command Cmd;
297           // Command title is not added since this is not a user-facing command.
298           Cmd.command = ExecuteCommandParams::CLANGD_INSERT_HEADER_INCLUDE;
299           IncludeInsertion Insertion;
300           // Fallback to canonical header if declaration location is invalid.
301           Insertion.declaringHeader =
302               IndexResult->CanonicalDeclaration.FileURI.empty()
303                   ? D->IncludeHeader
304                   : IndexResult->CanonicalDeclaration.FileURI;
305           Insertion.preferredHeader = D->IncludeHeader;
306           Insertion.textDocument.uri = URIForFile(FileName);
307           Cmd.includeInsertion = std::move(Insertion);
308           I.command = std::move(Cmd);
309         }
310       }
311     }
312     I.scoreInfo = Scores;
313     I.sortText = sortText(Scores.finalScore, Name);
314     I.insertTextFormat = Opts.EnableSnippets ? InsertTextFormat::Snippet
315                                              : InsertTextFormat::PlainText;
316     return I;
317   }
318 };
319 
320 // Determine the symbol ID for a Sema code completion result, if possible.
321 llvm::Optional<SymbolID> getSymbolID(const CodeCompletionResult &R) {
322   switch (R.Kind) {
323   case CodeCompletionResult::RK_Declaration:
324   case CodeCompletionResult::RK_Pattern: {
325     llvm::SmallString<128> USR;
326     if (/*Ignore=*/clang::index::generateUSRForDecl(R.Declaration, USR))
327       return None;
328     return SymbolID(USR);
329   }
330   case CodeCompletionResult::RK_Macro:
331     // FIXME: Macros do have USRs, but the CCR doesn't contain enough info.
332   case CodeCompletionResult::RK_Keyword:
333     return None;
334   }
335   llvm_unreachable("unknown CodeCompletionResult kind");
336 }
337 
338 // Scopes of the paritial identifier we're trying to complete.
339 // It is used when we query the index for more completion results.
340 struct SpecifiedScope {
341   // The scopes we should look in, determined by Sema.
342   //
343   // If the qualifier was fully resolved, we look for completions in these
344   // scopes; if there is an unresolved part of the qualifier, it should be
345   // resolved within these scopes.
346   //
347   // Examples of qualified completion:
348   //
349   //   "::vec"                                      => {""}
350   //   "using namespace std; ::vec^"                => {"", "std::"}
351   //   "namespace ns {using namespace std;} ns::^"  => {"ns::", "std::"}
352   //   "std::vec^"                                  => {""}  // "std" unresolved
353   //
354   // Examples of unqualified completion:
355   //
356   //   "vec^"                                       => {""}
357   //   "using namespace std; vec^"                  => {"", "std::"}
358   //   "using namespace std; namespace ns { vec^ }" => {"ns::", "std::", ""}
359   //
360   // "" for global namespace, "ns::" for normal namespace.
361   std::vector<std::string> AccessibleScopes;
362   // The full scope qualifier as typed by the user (without the leading "::").
363   // Set if the qualifier is not fully resolved by Sema.
364   llvm::Optional<std::string> UnresolvedQualifier;
365 
366   // Construct scopes being queried in indexes.
367   // This method format the scopes to match the index request representation.
368   std::vector<std::string> scopesForIndexQuery() {
369     std::vector<std::string> Results;
370     for (llvm::StringRef AS : AccessibleScopes) {
371       Results.push_back(AS);
372       if (UnresolvedQualifier)
373         Results.back() += *UnresolvedQualifier;
374     }
375     return Results;
376   }
377 };
378 
379 // Get all scopes that will be queried in indexes.
380 std::vector<std::string> getQueryScopes(CodeCompletionContext &CCContext,
381                                         const SourceManager& SM) {
382   auto GetAllAccessibleScopes = [](CodeCompletionContext& CCContext) {
383     SpecifiedScope Info;
384     for (auto* Context : CCContext.getVisitedContexts()) {
385       if (isa<TranslationUnitDecl>(Context))
386         Info.AccessibleScopes.push_back(""); // global namespace
387       else if (const auto*NS = dyn_cast<NamespaceDecl>(Context))
388         Info.AccessibleScopes.push_back(NS->getQualifiedNameAsString() + "::");
389     }
390     return Info;
391   };
392 
393   auto SS = CCContext.getCXXScopeSpecifier();
394 
395   // Unqualified completion (e.g. "vec^").
396   if (!SS) {
397     // FIXME: Once we can insert namespace qualifiers and use the in-scope
398     //        namespaces for scoring, search in all namespaces.
399     // FIXME: Capture scopes and use for scoring, for example,
400     //        "using namespace std; namespace foo {v^}" =>
401     //        foo::value > std::vector > boost::variant
402     return GetAllAccessibleScopes(CCContext).scopesForIndexQuery();
403   }
404 
405   // Qualified completion ("std::vec^"), we have two cases depending on whether
406   // the qualifier can be resolved by Sema.
407   if ((*SS)->isValid()) { // Resolved qualifier.
408     return GetAllAccessibleScopes(CCContext).scopesForIndexQuery();
409   }
410 
411   // Unresolved qualifier.
412   // FIXME: When Sema can resolve part of a scope chain (e.g.
413   // "known::unknown::id"), we should expand the known part ("known::") rather
414   // than treating the whole thing as unknown.
415   SpecifiedScope Info;
416   Info.AccessibleScopes.push_back(""); // global namespace
417 
418   Info.UnresolvedQualifier =
419       Lexer::getSourceText(CharSourceRange::getCharRange((*SS)->getRange()),
420                            SM, clang::LangOptions()).ltrim("::");
421   // Sema excludes the trailing "::".
422   if (!Info.UnresolvedQualifier->empty())
423     *Info.UnresolvedQualifier += "::";
424 
425   return Info.scopesForIndexQuery();
426 }
427 
428 // The CompletionRecorder captures Sema code-complete output, including context.
429 // It filters out ignored results (but doesn't apply fuzzy-filtering yet).
430 // It doesn't do scoring or conversion to CompletionItem yet, as we want to
431 // merge with index results first.
432 // Generally the fields and methods of this object should only be used from
433 // within the callback.
434 struct CompletionRecorder : public CodeCompleteConsumer {
435   CompletionRecorder(const CodeCompleteOptions &Opts,
436                      UniqueFunction<void()> ResultsCallback)
437       : CodeCompleteConsumer(Opts.getClangCompleteOpts(),
438                              /*OutputIsBinary=*/false),
439         CCContext(CodeCompletionContext::CCC_Other), Opts(Opts),
440         CCAllocator(std::make_shared<GlobalCodeCompletionAllocator>()),
441         CCTUInfo(CCAllocator), ResultsCallback(std::move(ResultsCallback)) {
442     assert(this->ResultsCallback);
443   }
444 
445   std::vector<CodeCompletionResult> Results;
446   CodeCompletionContext CCContext;
447   Sema *CCSema = nullptr; // Sema that created the results.
448   // FIXME: Sema is scary. Can we store ASTContext and Preprocessor, instead?
449 
450   void ProcessCodeCompleteResults(class Sema &S, CodeCompletionContext Context,
451                                   CodeCompletionResult *InResults,
452                                   unsigned NumResults) override final {
453     if (CCSema) {
454       log(llvm::formatv(
455           "Multiple code complete callbacks (parser backtracked?). "
456           "Dropping results from context {0}, keeping results from {1}.",
457           getCompletionKindString(this->CCContext.getKind()),
458           getCompletionKindString(Context.getKind())));
459       return;
460     }
461     // Record the completion context.
462     CCSema = &S;
463     CCContext = Context;
464 
465     // Retain the results we might want.
466     for (unsigned I = 0; I < NumResults; ++I) {
467       auto &Result = InResults[I];
468       // Drop hidden items which cannot be found by lookup after completion.
469       // Exception: some items can be named by using a qualifier.
470       if (Result.Hidden && (!Result.Qualifier || Result.QualifierIsInformative))
471         continue;
472       if (!Opts.IncludeIneligibleResults &&
473           (Result.Availability == CXAvailability_NotAvailable ||
474            Result.Availability == CXAvailability_NotAccessible))
475         continue;
476       // Destructor completion is rarely useful, and works inconsistently.
477       // (s.^ completes ~string, but s.~st^ is an error).
478       if (dyn_cast_or_null<CXXDestructorDecl>(Result.Declaration))
479         continue;
480       // We choose to never append '::' to completion results in clangd.
481       Result.StartsNestedNameSpecifier = false;
482       Results.push_back(Result);
483     }
484     ResultsCallback();
485   }
486 
487   CodeCompletionAllocator &getAllocator() override { return *CCAllocator; }
488   CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
489 
490   // Returns the filtering/sorting name for Result, which must be from Results.
491   // Returned string is owned by this recorder (or the AST).
492   llvm::StringRef getName(const CodeCompletionResult &Result) {
493     switch (Result.Kind) {
494     case CodeCompletionResult::RK_Declaration:
495       if (auto *ID = Result.Declaration->getIdentifier())
496         return ID->getName();
497       break;
498     case CodeCompletionResult::RK_Keyword:
499       return Result.Keyword;
500     case CodeCompletionResult::RK_Macro:
501       return Result.Macro->getName();
502     case CodeCompletionResult::RK_Pattern:
503       return Result.Pattern->getTypedText();
504     }
505     auto *CCS = codeCompletionString(Result, /*IncludeBriefComments=*/false);
506     return CCS->getTypedText();
507   }
508 
509   // Build a CodeCompletion string for R, which must be from Results.
510   // The CCS will be owned by this recorder.
511   CodeCompletionString *codeCompletionString(const CodeCompletionResult &R,
512                                              bool IncludeBriefComments) {
513     // CodeCompletionResult doesn't seem to be const-correct. We own it, anyway.
514     return const_cast<CodeCompletionResult &>(R).CreateCodeCompletionString(
515         *CCSema, CCContext, *CCAllocator, CCTUInfo, IncludeBriefComments);
516   }
517 
518 private:
519   CodeCompleteOptions Opts;
520   std::shared_ptr<GlobalCodeCompletionAllocator> CCAllocator;
521   CodeCompletionTUInfo CCTUInfo;
522   UniqueFunction<void()> ResultsCallback;
523 };
524 
525 // Tracks a bounded number of candidates with the best scores.
526 class TopN {
527 public:
528   using value_type = std::pair<CompletionCandidate, CompletionItemScores>;
529   static constexpr size_t Unbounded = std::numeric_limits<size_t>::max();
530 
531   TopN(size_t N) : N(N) {}
532 
533   // Adds a candidate to the set.
534   // Returns true if a candidate was dropped to get back under N.
535   bool push(value_type &&V) {
536     bool Dropped = false;
537     if (Heap.size() >= N) {
538       Dropped = true;
539       if (N > 0 && greater(V, Heap.front())) {
540         std::pop_heap(Heap.begin(), Heap.end(), greater);
541         Heap.back() = std::move(V);
542         std::push_heap(Heap.begin(), Heap.end(), greater);
543       }
544     } else {
545       Heap.push_back(std::move(V));
546       std::push_heap(Heap.begin(), Heap.end(), greater);
547     }
548     assert(Heap.size() <= N);
549     assert(std::is_heap(Heap.begin(), Heap.end(), greater));
550     return Dropped;
551   }
552 
553   // Returns candidates from best to worst.
554   std::vector<value_type> items() && {
555     std::sort_heap(Heap.begin(), Heap.end(), greater);
556     assert(Heap.size() <= N);
557     return std::move(Heap);
558   }
559 
560 private:
561   static bool greater(const value_type &L, const value_type &R) {
562     if (L.second.finalScore != R.second.finalScore)
563       return L.second.finalScore > R.second.finalScore;
564     return L.first.Name < R.first.Name; // Earlier name is better.
565   }
566 
567   const size_t N;
568   std::vector<value_type> Heap; // Min-heap, comparator is greater().
569 };
570 
571 class SignatureHelpCollector final : public CodeCompleteConsumer {
572 
573 public:
574   SignatureHelpCollector(const clang::CodeCompleteOptions &CodeCompleteOpts,
575                          SignatureHelp &SigHelp)
576       : CodeCompleteConsumer(CodeCompleteOpts, /*OutputIsBinary=*/false),
577         SigHelp(SigHelp),
578         Allocator(std::make_shared<clang::GlobalCodeCompletionAllocator>()),
579         CCTUInfo(Allocator) {}
580 
581   void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg,
582                                  OverloadCandidate *Candidates,
583                                  unsigned NumCandidates) override {
584     SigHelp.signatures.reserve(NumCandidates);
585     // FIXME(rwols): How can we determine the "active overload candidate"?
586     // Right now the overloaded candidates seem to be provided in a "best fit"
587     // order, so I'm not too worried about this.
588     SigHelp.activeSignature = 0;
589     assert(CurrentArg <= (unsigned)std::numeric_limits<int>::max() &&
590            "too many arguments");
591     SigHelp.activeParameter = static_cast<int>(CurrentArg);
592     for (unsigned I = 0; I < NumCandidates; ++I) {
593       const auto &Candidate = Candidates[I];
594       const auto *CCS = Candidate.CreateSignatureString(
595           CurrentArg, S, *Allocator, CCTUInfo, true);
596       assert(CCS && "Expected the CodeCompletionString to be non-null");
597       SigHelp.signatures.push_back(ProcessOverloadCandidate(Candidate, *CCS));
598     }
599   }
600 
601   GlobalCodeCompletionAllocator &getAllocator() override { return *Allocator; }
602 
603   CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
604 
605 private:
606   // FIXME(ioeric): consider moving CodeCompletionString logic here to
607   // CompletionString.h.
608   SignatureInformation
609   ProcessOverloadCandidate(const OverloadCandidate &Candidate,
610                            const CodeCompletionString &CCS) const {
611     SignatureInformation Result;
612     const char *ReturnType = nullptr;
613 
614     Result.documentation = getDocumentation(CCS);
615 
616     for (const auto &Chunk : CCS) {
617       switch (Chunk.Kind) {
618       case CodeCompletionString::CK_ResultType:
619         // A piece of text that describes the type of an entity or,
620         // for functions and methods, the return type.
621         assert(!ReturnType && "Unexpected CK_ResultType");
622         ReturnType = Chunk.Text;
623         break;
624       case CodeCompletionString::CK_Placeholder:
625         // A string that acts as a placeholder for, e.g., a function call
626         // argument.
627         // Intentional fallthrough here.
628       case CodeCompletionString::CK_CurrentParameter: {
629         // A piece of text that describes the parameter that corresponds to
630         // the code-completion location within a function call, message send,
631         // macro invocation, etc.
632         Result.label += Chunk.Text;
633         ParameterInformation Info;
634         Info.label = Chunk.Text;
635         Result.parameters.push_back(std::move(Info));
636         break;
637       }
638       case CodeCompletionString::CK_Optional: {
639         // The rest of the parameters are defaulted/optional.
640         assert(Chunk.Optional &&
641                "Expected the optional code completion string to be non-null.");
642         Result.label +=
643             getOptionalParameters(*Chunk.Optional, Result.parameters);
644         break;
645       }
646       case CodeCompletionString::CK_VerticalSpace:
647         break;
648       default:
649         Result.label += Chunk.Text;
650         break;
651       }
652     }
653     if (ReturnType) {
654       Result.label += " -> ";
655       Result.label += ReturnType;
656     }
657     return Result;
658   }
659 
660   SignatureHelp &SigHelp;
661   std::shared_ptr<clang::GlobalCodeCompletionAllocator> Allocator;
662   CodeCompletionTUInfo CCTUInfo;
663 
664 }; // SignatureHelpCollector
665 
666 struct SemaCompleteInput {
667   PathRef FileName;
668   const tooling::CompileCommand &Command;
669   PrecompiledPreamble const *Preamble;
670   StringRef Contents;
671   Position Pos;
672   IntrusiveRefCntPtr<vfs::FileSystem> VFS;
673   std::shared_ptr<PCHContainerOperations> PCHs;
674 };
675 
676 // Invokes Sema code completion on a file.
677 bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,
678                       const clang::CodeCompleteOptions &Options,
679                       const SemaCompleteInput &Input) {
680   trace::Span Tracer("Sema completion");
681   std::vector<const char *> ArgStrs;
682   for (const auto &S : Input.Command.CommandLine)
683     ArgStrs.push_back(S.c_str());
684 
685   if (Input.VFS->setCurrentWorkingDirectory(Input.Command.Directory)) {
686     log("Couldn't set working directory");
687     // We run parsing anyway, our lit-tests rely on results for non-existing
688     // working dirs.
689   }
690 
691   IgnoreDiagnostics DummyDiagsConsumer;
692   auto CI = createInvocationFromCommandLine(
693       ArgStrs,
694       CompilerInstance::createDiagnostics(new DiagnosticOptions,
695                                           &DummyDiagsConsumer, false),
696       Input.VFS);
697   if (!CI) {
698     log("Couldn't create CompilerInvocation");;
699     return false;
700   }
701   CI->getFrontendOpts().DisableFree = false;
702 
703   std::unique_ptr<llvm::MemoryBuffer> ContentsBuffer =
704       llvm::MemoryBuffer::getMemBufferCopy(Input.Contents, Input.FileName);
705 
706   // We reuse the preamble whether it's valid or not. This is a
707   // correctness/performance tradeoff: building without a preamble is slow, and
708   // completion is latency-sensitive.
709   if (Input.Preamble) {
710     auto Bounds =
711         ComputePreambleBounds(*CI->getLangOpts(), ContentsBuffer.get(), 0);
712     // FIXME(ibiryukov): Remove this call to CanReuse() after we'll fix
713     // clients relying on getting stats for preamble files during code
714     // completion.
715     // Note that results of CanReuse() are ignored, see the comment above.
716     Input.Preamble->CanReuse(*CI, ContentsBuffer.get(), Bounds,
717                              Input.VFS.get());
718   }
719   // The diagnostic options must be set before creating a CompilerInstance.
720   CI->getDiagnosticOpts().IgnoreWarnings = true;
721   auto Clang = prepareCompilerInstance(
722       std::move(CI), Input.Preamble, std::move(ContentsBuffer),
723       std::move(Input.PCHs), std::move(Input.VFS), DummyDiagsConsumer);
724 
725   // Disable typo correction in Sema.
726   Clang->getLangOpts().SpellChecking = false;
727 
728   auto &FrontendOpts = Clang->getFrontendOpts();
729   FrontendOpts.SkipFunctionBodies = true;
730   FrontendOpts.CodeCompleteOpts = Options;
731   FrontendOpts.CodeCompletionAt.FileName = Input.FileName;
732   FrontendOpts.CodeCompletionAt.Line = Input.Pos.line + 1;
733   FrontendOpts.CodeCompletionAt.Column = Input.Pos.character + 1;
734 
735   Clang->setCodeCompletionConsumer(Consumer.release());
736 
737   SyntaxOnlyAction Action;
738   if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])) {
739     log("BeginSourceFile() failed when running codeComplete for " +
740         Input.FileName);
741     return false;
742   }
743   if (!Action.Execute()) {
744     log("Execute() failed when running codeComplete for " + Input.FileName);
745     return false;
746   }
747   Action.EndSourceFile();
748 
749   return true;
750 }
751 
752 // Should we perform index-based completion in this context?
753 // FIXME: consider allowing completion, but restricting the result types.
754 bool allowIndex(enum CodeCompletionContext::Kind K) {
755   switch (K) {
756   case CodeCompletionContext::CCC_TopLevel:
757   case CodeCompletionContext::CCC_ObjCInterface:
758   case CodeCompletionContext::CCC_ObjCImplementation:
759   case CodeCompletionContext::CCC_ObjCIvarList:
760   case CodeCompletionContext::CCC_ClassStructUnion:
761   case CodeCompletionContext::CCC_Statement:
762   case CodeCompletionContext::CCC_Expression:
763   case CodeCompletionContext::CCC_ObjCMessageReceiver:
764   case CodeCompletionContext::CCC_EnumTag:
765   case CodeCompletionContext::CCC_UnionTag:
766   case CodeCompletionContext::CCC_ClassOrStructTag:
767   case CodeCompletionContext::CCC_ObjCProtocolName:
768   case CodeCompletionContext::CCC_Namespace:
769   case CodeCompletionContext::CCC_Type:
770   case CodeCompletionContext::CCC_Name: // FIXME: why does ns::^ give this?
771   case CodeCompletionContext::CCC_PotentiallyQualifiedName:
772   case CodeCompletionContext::CCC_ParenthesizedExpression:
773   case CodeCompletionContext::CCC_ObjCInterfaceName:
774   case CodeCompletionContext::CCC_ObjCCategoryName:
775     return true;
776   case CodeCompletionContext::CCC_Other: // Be conservative.
777   case CodeCompletionContext::CCC_OtherWithMacros:
778   case CodeCompletionContext::CCC_DotMemberAccess:
779   case CodeCompletionContext::CCC_ArrowMemberAccess:
780   case CodeCompletionContext::CCC_ObjCPropertyAccess:
781   case CodeCompletionContext::CCC_MacroName:
782   case CodeCompletionContext::CCC_MacroNameUse:
783   case CodeCompletionContext::CCC_PreprocessorExpression:
784   case CodeCompletionContext::CCC_PreprocessorDirective:
785   case CodeCompletionContext::CCC_NaturalLanguage:
786   case CodeCompletionContext::CCC_SelectorName:
787   case CodeCompletionContext::CCC_TypeQualifiers:
788   case CodeCompletionContext::CCC_ObjCInstanceMessage:
789   case CodeCompletionContext::CCC_ObjCClassMessage:
790   case CodeCompletionContext::CCC_Recovery:
791     return false;
792   }
793   llvm_unreachable("unknown code completion context");
794 }
795 
796 } // namespace
797 
798 clang::CodeCompleteOptions CodeCompleteOptions::getClangCompleteOpts() const {
799   clang::CodeCompleteOptions Result;
800   Result.IncludeCodePatterns = EnableSnippets && IncludeCodePatterns;
801   Result.IncludeMacros = IncludeMacros;
802   Result.IncludeGlobals = true;
803   Result.IncludeBriefComments = IncludeBriefComments;
804 
805   // When an is used, Sema is responsible for completing the main file,
806   // the index can provide results from the preamble.
807   // Tell Sema not to deserialize the preamble to look for results.
808   Result.LoadExternal = !Index;
809 
810   return Result;
811 }
812 
813 // Runs Sema-based (AST) and Index-based completion, returns merged results.
814 //
815 // There are a few tricky considerations:
816 //   - the AST provides information needed for the index query (e.g. which
817 //     namespaces to search in). So Sema must start first.
818 //   - we only want to return the top results (Opts.Limit).
819 //     Building CompletionItems for everything else is wasteful, so we want to
820 //     preserve the "native" format until we're done with scoring.
821 //   - the data underlying Sema completion items is owned by the AST and various
822 //     other arenas, which must stay alive for us to build CompletionItems.
823 //   - we may get duplicate results from Sema and the Index, we need to merge.
824 //
825 // So we start Sema completion first, and do all our work in its callback.
826 // We use the Sema context information to query the index.
827 // Then we merge the two result sets, producing items that are Sema/Index/Both.
828 // These items are scored, and the top N are synthesized into the LSP response.
829 // Finally, we can clean up the data structures created by Sema completion.
830 //
831 // Main collaborators are:
832 //   - semaCodeComplete sets up the compiler machinery to run code completion.
833 //   - CompletionRecorder captures Sema completion results, including context.
834 //   - SymbolIndex (Opts.Index) provides index completion results as Symbols
835 //   - CompletionCandidates are the result of merging Sema and Index results.
836 //     Each candidate points to an underlying CodeCompletionResult (Sema), a
837 //     Symbol (Index), or both. It computes the result quality score.
838 //     CompletionCandidate also does conversion to CompletionItem (at the end).
839 //   - FuzzyMatcher scores how the candidate matches the partial identifier.
840 //     This score is combined with the result quality score for the final score.
841 //   - TopN determines the results with the best score.
842 class CodeCompleteFlow {
843   PathRef FileName;
844   const CodeCompleteOptions &Opts;
845   // Sema takes ownership of Recorder. Recorder is valid until Sema cleanup.
846   CompletionRecorder *Recorder = nullptr;
847   int NSema = 0, NIndex = 0, NBoth = 0; // Counters for logging.
848   bool Incomplete = false; // Would more be available with a higher limit?
849   llvm::Optional<FuzzyMatcher> Filter; // Initialized once Sema runs.
850 
851 public:
852   // A CodeCompleteFlow object is only useful for calling run() exactly once.
853   CodeCompleteFlow(PathRef FileName, const CodeCompleteOptions &Opts)
854       : FileName(FileName), Opts(Opts) {}
855 
856   CompletionList run(const SemaCompleteInput &SemaCCInput) && {
857     trace::Span Tracer("CodeCompleteFlow");
858     // We run Sema code completion first. It builds an AST and calculates:
859     //   - completion results based on the AST.
860     //   - partial identifier and context. We need these for the index query.
861     CompletionList Output;
862     auto RecorderOwner = llvm::make_unique<CompletionRecorder>(Opts, [&]() {
863       assert(Recorder && "Recorder is not set");
864       Output = runWithSema();
865       SPAN_ATTACH(Tracer, "sema_completion_kind",
866                   getCompletionKindString(Recorder->CCContext.getKind()));
867     });
868 
869     Recorder = RecorderOwner.get();
870     semaCodeComplete(std::move(RecorderOwner), Opts.getClangCompleteOpts(),
871                      SemaCCInput);
872 
873     SPAN_ATTACH(Tracer, "sema_results", NSema);
874     SPAN_ATTACH(Tracer, "index_results", NIndex);
875     SPAN_ATTACH(Tracer, "merged_results", NBoth);
876     SPAN_ATTACH(Tracer, "returned_results", Output.items.size());
877     SPAN_ATTACH(Tracer, "incomplete", Output.isIncomplete);
878     log(llvm::formatv("Code complete: {0} results from Sema, {1} from Index, "
879                       "{2} matched, {3} returned{4}.",
880                       NSema, NIndex, NBoth, Output.items.size(),
881                       Output.isIncomplete ? " (incomplete)" : ""));
882     assert(!Opts.Limit || Output.items.size() <= Opts.Limit);
883     // We don't assert that isIncomplete means we hit a limit.
884     // Indexes may choose to impose their own limits even if we don't have one.
885     return Output;
886   }
887 
888 private:
889   // This is called by run() once Sema code completion is done, but before the
890   // Sema data structures are torn down. It does all the real work.
891   CompletionList runWithSema() {
892     Filter = FuzzyMatcher(
893         Recorder->CCSema->getPreprocessor().getCodeCompletionFilter());
894     // Sema provides the needed context to query the index.
895     // FIXME: in addition to querying for extra/overlapping symbols, we should
896     //        explicitly request symbols corresponding to Sema results.
897     //        We can use their signals even if the index can't suggest them.
898     // We must copy index results to preserve them, but there are at most Limit.
899     auto IndexResults = queryIndex();
900     // Merge Sema and Index results, score them, and pick the winners.
901     auto Top = mergeResults(Recorder->Results, IndexResults);
902     // Convert the results to the desired LSP structs.
903     CompletionList Output;
904     for (auto &C : Top)
905       Output.items.push_back(toCompletionItem(C.first, C.second));
906     Output.isIncomplete = Incomplete;
907     return Output;
908   }
909 
910   SymbolSlab queryIndex() {
911     if (!Opts.Index || !allowIndex(Recorder->CCContext.getKind()))
912       return SymbolSlab();
913     trace::Span Tracer("Query index");
914     SPAN_ATTACH(Tracer, "limit", Opts.Limit);
915 
916     SymbolSlab::Builder ResultsBuilder;
917     // Build the query.
918     FuzzyFindRequest Req;
919     if (Opts.Limit)
920       Req.MaxCandidateCount = Opts.Limit;
921     Req.Query = Filter->pattern();
922     Req.Scopes = getQueryScopes(Recorder->CCContext,
923                                 Recorder->CCSema->getSourceManager());
924     log(llvm::formatv("Code complete: fuzzyFind(\"{0}\", scopes=[{1}])",
925                       Req.Query,
926                       llvm::join(Req.Scopes.begin(), Req.Scopes.end(), ",")));
927     // Run the query against the index.
928     if (Opts.Index->fuzzyFind(
929             Req, [&](const Symbol &Sym) { ResultsBuilder.insert(Sym); }))
930       Incomplete = true;
931     return std::move(ResultsBuilder).build();
932   }
933 
934   // Merges the Sema and Index results where possible, scores them, and
935   // returns the top results from best to worst.
936   std::vector<std::pair<CompletionCandidate, CompletionItemScores>>
937   mergeResults(const std::vector<CodeCompletionResult> &SemaResults,
938                const SymbolSlab &IndexResults) {
939     trace::Span Tracer("Merge and score results");
940     // We only keep the best N results at any time, in "native" format.
941     TopN Top(Opts.Limit == 0 ? TopN::Unbounded : Opts.Limit);
942     llvm::DenseSet<const Symbol *> UsedIndexResults;
943     auto CorrespondingIndexResult =
944         [&](const CodeCompletionResult &SemaResult) -> const Symbol * {
945       if (auto SymID = getSymbolID(SemaResult)) {
946         auto I = IndexResults.find(*SymID);
947         if (I != IndexResults.end()) {
948           UsedIndexResults.insert(&*I);
949           return &*I;
950         }
951       }
952       return nullptr;
953     };
954     // Emit all Sema results, merging them with Index results if possible.
955     for (auto &SemaResult : Recorder->Results)
956       addCandidate(Top, &SemaResult, CorrespondingIndexResult(SemaResult));
957     // Now emit any Index-only results.
958     for (const auto &IndexResult : IndexResults) {
959       if (UsedIndexResults.count(&IndexResult))
960         continue;
961       addCandidate(Top, /*SemaResult=*/nullptr, &IndexResult);
962     }
963     return std::move(Top).items();
964   }
965 
966   // Scores a candidate and adds it to the TopN structure.
967   void addCandidate(TopN &Candidates, const CodeCompletionResult *SemaResult,
968                     const Symbol *IndexResult) {
969     CompletionCandidate C;
970     C.SemaResult = SemaResult;
971     C.IndexResult = IndexResult;
972     C.Name = IndexResult ? IndexResult->Name : Recorder->getName(*SemaResult);
973 
974     CompletionItemScores Scores;
975     if (auto FuzzyScore = Filter->match(C.Name))
976       Scores.filterScore = *FuzzyScore;
977     else
978       return;
979     Scores.symbolScore = C.score();
980     // We score candidates by multiplying symbolScore ("quality" of the result)
981     // with filterScore (how well it matched the query).
982     // This is sensitive to the distribution of both component scores!
983     Scores.finalScore = Scores.filterScore * Scores.symbolScore;
984 
985     NSema += bool(SemaResult);
986     NIndex += bool(IndexResult);
987     NBoth += SemaResult && IndexResult;
988     if (Candidates.push({C, Scores}))
989       Incomplete = true;
990   }
991 
992   CompletionItem toCompletionItem(const CompletionCandidate &Candidate,
993                                   const CompletionItemScores &Scores) {
994     CodeCompletionString *SemaCCS = nullptr;
995     if (auto *SR = Candidate.SemaResult)
996       SemaCCS = Recorder->codeCompletionString(*SR, Opts.IncludeBriefComments);
997     return Candidate.build(FileName, Scores, Opts, SemaCCS);
998   }
999 };
1000 
1001 CompletionList codeComplete(PathRef FileName,
1002                             const tooling::CompileCommand &Command,
1003                             PrecompiledPreamble const *Preamble,
1004                             StringRef Contents, Position Pos,
1005                             IntrusiveRefCntPtr<vfs::FileSystem> VFS,
1006                             std::shared_ptr<PCHContainerOperations> PCHs,
1007                             CodeCompleteOptions Opts) {
1008   return CodeCompleteFlow(FileName, Opts)
1009       .run({FileName, Command, Preamble, Contents, Pos, VFS, PCHs});
1010 }
1011 
1012 SignatureHelp signatureHelp(PathRef FileName,
1013                             const tooling::CompileCommand &Command,
1014                             PrecompiledPreamble const *Preamble,
1015                             StringRef Contents, Position Pos,
1016                             IntrusiveRefCntPtr<vfs::FileSystem> VFS,
1017                             std::shared_ptr<PCHContainerOperations> PCHs) {
1018   SignatureHelp Result;
1019   clang::CodeCompleteOptions Options;
1020   Options.IncludeGlobals = false;
1021   Options.IncludeMacros = false;
1022   Options.IncludeCodePatterns = false;
1023   Options.IncludeBriefComments = true;
1024   semaCodeComplete(llvm::make_unique<SignatureHelpCollector>(Options, Result),
1025                    Options,
1026                    {FileName, Command, Preamble, Contents, Pos, std::move(VFS),
1027                     std::move(PCHs)});
1028   return Result;
1029 }
1030 
1031 } // namespace clangd
1032 } // namespace clang
1033