1 //===--- CodeComplete.cpp ----------------------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Code completion has several moving parts: 10 // - AST-based completions are provided using the completion hooks in Sema. 11 // - external completions are retrieved from the index (using hints from Sema) 12 // - the two sources overlap, and must be merged and overloads bundled 13 // - results must be scored and ranked (see Quality.h) before rendering 14 // 15 // Signature help works in a similar way as code completion, but it is simpler: 16 // it's purely AST-based, and there are few candidates. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "CodeComplete.h" 21 #include "AST.h" 22 #include "CodeCompletionStrings.h" 23 #include "Compiler.h" 24 #include "Diagnostics.h" 25 #include "ExpectedTypes.h" 26 #include "FileDistance.h" 27 #include "FuzzyMatch.h" 28 #include "Headers.h" 29 #include "Hover.h" 30 #include "Preamble.h" 31 #include "Protocol.h" 32 #include "Quality.h" 33 #include "SourceCode.h" 34 #include "TUScheduler.h" 35 #include "URI.h" 36 #include "index/Index.h" 37 #include "index/Symbol.h" 38 #include "index/SymbolOrigin.h" 39 #include "support/Logger.h" 40 #include "support/Threading.h" 41 #include "support/ThreadsafeFS.h" 42 #include "support/Trace.h" 43 #include "clang/AST/Decl.h" 44 #include "clang/AST/DeclBase.h" 45 #include "clang/Basic/CharInfo.h" 46 #include "clang/Basic/LangOptions.h" 47 #include "clang/Basic/SourceLocation.h" 48 #include "clang/Basic/TokenKinds.h" 49 #include "clang/Format/Format.h" 50 #include "clang/Frontend/CompilerInstance.h" 51 #include "clang/Frontend/FrontendActions.h" 52 #include "clang/Lex/ExternalPreprocessorSource.h" 53 #include "clang/Lex/Lexer.h" 54 #include "clang/Lex/Preprocessor.h" 55 #include "clang/Lex/PreprocessorOptions.h" 56 #include "clang/Sema/CodeCompleteConsumer.h" 57 #include "clang/Sema/DeclSpec.h" 58 #include "clang/Sema/Sema.h" 59 #include "llvm/ADT/ArrayRef.h" 60 #include "llvm/ADT/None.h" 61 #include "llvm/ADT/Optional.h" 62 #include "llvm/ADT/SmallVector.h" 63 #include "llvm/ADT/StringExtras.h" 64 #include "llvm/ADT/StringRef.h" 65 #include "llvm/Support/Compiler.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/Error.h" 68 #include "llvm/Support/Format.h" 69 #include "llvm/Support/FormatVariadic.h" 70 #include "llvm/Support/ScopedPrinter.h" 71 #include <algorithm> 72 #include <iterator> 73 #include <limits> 74 75 // We log detailed candidate here if you run with -debug-only=codecomplete. 76 #define DEBUG_TYPE "CodeComplete" 77 78 namespace clang { 79 namespace clangd { 80 namespace { 81 82 CompletionItemKind toCompletionItemKind(index::SymbolKind Kind) { 83 using SK = index::SymbolKind; 84 switch (Kind) { 85 case SK::Unknown: 86 return CompletionItemKind::Missing; 87 case SK::Module: 88 case SK::Namespace: 89 case SK::NamespaceAlias: 90 return CompletionItemKind::Module; 91 case SK::Macro: 92 return CompletionItemKind::Text; 93 case SK::Enum: 94 return CompletionItemKind::Enum; 95 case SK::Struct: 96 return CompletionItemKind::Struct; 97 case SK::Class: 98 case SK::Protocol: 99 case SK::Extension: 100 case SK::Union: 101 return CompletionItemKind::Class; 102 case SK::TypeAlias: 103 // We use the same kind as the VSCode C++ extension. 104 // FIXME: pick a better option when we have one. 105 return CompletionItemKind::Interface; 106 case SK::Using: 107 return CompletionItemKind::Reference; 108 case SK::Function: 109 case SK::ConversionFunction: 110 return CompletionItemKind::Function; 111 case SK::Variable: 112 case SK::Parameter: 113 case SK::NonTypeTemplateParm: 114 return CompletionItemKind::Variable; 115 case SK::Field: 116 return CompletionItemKind::Field; 117 case SK::EnumConstant: 118 return CompletionItemKind::EnumMember; 119 case SK::InstanceMethod: 120 case SK::ClassMethod: 121 case SK::StaticMethod: 122 case SK::Destructor: 123 return CompletionItemKind::Method; 124 case SK::InstanceProperty: 125 case SK::ClassProperty: 126 case SK::StaticProperty: 127 return CompletionItemKind::Property; 128 case SK::Constructor: 129 return CompletionItemKind::Constructor; 130 case SK::TemplateTypeParm: 131 case SK::TemplateTemplateParm: 132 return CompletionItemKind::TypeParameter; 133 } 134 llvm_unreachable("Unhandled clang::index::SymbolKind."); 135 } 136 137 CompletionItemKind 138 toCompletionItemKind(CodeCompletionResult::ResultKind ResKind, 139 const NamedDecl *Decl, 140 CodeCompletionContext::Kind CtxKind) { 141 if (Decl) 142 return toCompletionItemKind(index::getSymbolInfo(Decl).Kind); 143 if (CtxKind == CodeCompletionContext::CCC_IncludedFile) 144 return CompletionItemKind::File; 145 switch (ResKind) { 146 case CodeCompletionResult::RK_Declaration: 147 llvm_unreachable("RK_Declaration without Decl"); 148 case CodeCompletionResult::RK_Keyword: 149 return CompletionItemKind::Keyword; 150 case CodeCompletionResult::RK_Macro: 151 return CompletionItemKind::Text; // unfortunately, there's no 'Macro' 152 // completion items in LSP. 153 case CodeCompletionResult::RK_Pattern: 154 return CompletionItemKind::Snippet; 155 } 156 llvm_unreachable("Unhandled CodeCompletionResult::ResultKind."); 157 } 158 159 // Identifier code completion result. 160 struct RawIdentifier { 161 llvm::StringRef Name; 162 unsigned References; // # of usages in file. 163 }; 164 165 /// A code completion result, in clang-native form. 166 /// It may be promoted to a CompletionItem if it's among the top-ranked results. 167 struct CompletionCandidate { 168 llvm::StringRef Name; // Used for filtering and sorting. 169 // We may have a result from Sema, from the index, or both. 170 const CodeCompletionResult *SemaResult = nullptr; 171 const Symbol *IndexResult = nullptr; 172 const RawIdentifier *IdentifierResult = nullptr; 173 llvm::SmallVector<llvm::StringRef, 1> RankedIncludeHeaders; 174 175 // Returns a token identifying the overload set this is part of. 176 // 0 indicates it's not part of any overload set. 177 size_t overloadSet(const CodeCompleteOptions &Opts, llvm::StringRef FileName, 178 IncludeInserter *Inserter) const { 179 if (!Opts.BundleOverloads.getValueOr(false)) 180 return 0; 181 182 // Depending on the index implementation, we can see different header 183 // strings (literal or URI) mapping to the same file. We still want to 184 // bundle those, so we must resolve the header to be included here. 185 std::string HeaderForHash; 186 if (Inserter) { 187 if (auto Header = headerToInsertIfAllowed(Opts)) { 188 if (auto HeaderFile = toHeaderFile(*Header, FileName)) { 189 if (auto Spelled = 190 Inserter->calculateIncludePath(*HeaderFile, FileName)) 191 HeaderForHash = *Spelled; 192 } else { 193 vlog("Code completion header path manipulation failed {0}", 194 HeaderFile.takeError()); 195 } 196 } 197 } 198 199 llvm::SmallString<256> Scratch; 200 if (IndexResult) { 201 switch (IndexResult->SymInfo.Kind) { 202 case index::SymbolKind::ClassMethod: 203 case index::SymbolKind::InstanceMethod: 204 case index::SymbolKind::StaticMethod: 205 #ifndef NDEBUG 206 llvm_unreachable("Don't expect members from index in code completion"); 207 #else 208 LLVM_FALLTHROUGH; 209 #endif 210 case index::SymbolKind::Function: 211 // We can't group overloads together that need different #includes. 212 // This could break #include insertion. 213 return llvm::hash_combine( 214 (IndexResult->Scope + IndexResult->Name).toStringRef(Scratch), 215 HeaderForHash); 216 default: 217 return 0; 218 } 219 } 220 if (SemaResult) { 221 // We need to make sure we're consistent with the IndexResult case! 222 const NamedDecl *D = SemaResult->Declaration; 223 if (!D || !D->isFunctionOrFunctionTemplate()) 224 return 0; 225 { 226 llvm::raw_svector_ostream OS(Scratch); 227 D->printQualifiedName(OS); 228 } 229 return llvm::hash_combine(Scratch, HeaderForHash); 230 } 231 assert(IdentifierResult); 232 return 0; 233 } 234 235 // The best header to include if include insertion is allowed. 236 llvm::Optional<llvm::StringRef> 237 headerToInsertIfAllowed(const CodeCompleteOptions &Opts) const { 238 if (Opts.InsertIncludes == CodeCompleteOptions::NeverInsert || 239 RankedIncludeHeaders.empty()) 240 return None; 241 if (SemaResult && SemaResult->Declaration) { 242 // Avoid inserting new #include if the declaration is found in the current 243 // file e.g. the symbol is forward declared. 244 auto &SM = SemaResult->Declaration->getASTContext().getSourceManager(); 245 for (const Decl *RD : SemaResult->Declaration->redecls()) 246 if (SM.isInMainFile(SM.getExpansionLoc(RD->getBeginLoc()))) 247 return None; 248 } 249 return RankedIncludeHeaders[0]; 250 } 251 252 using Bundle = llvm::SmallVector<CompletionCandidate, 4>; 253 }; 254 using ScoredBundle = 255 std::pair<CompletionCandidate::Bundle, CodeCompletion::Scores>; 256 struct ScoredBundleGreater { 257 bool operator()(const ScoredBundle &L, const ScoredBundle &R) { 258 if (L.second.Total != R.second.Total) 259 return L.second.Total > R.second.Total; 260 return L.first.front().Name < 261 R.first.front().Name; // Earlier name is better. 262 } 263 }; 264 265 // Assembles a code completion out of a bundle of >=1 completion candidates. 266 // Many of the expensive strings are only computed at this point, once we know 267 // the candidate bundle is going to be returned. 268 // 269 // Many fields are the same for all candidates in a bundle (e.g. name), and are 270 // computed from the first candidate, in the constructor. 271 // Others vary per candidate, so add() must be called for remaining candidates. 272 struct CodeCompletionBuilder { 273 CodeCompletionBuilder(ASTContext *ASTCtx, const CompletionCandidate &C, 274 CodeCompletionString *SemaCCS, 275 llvm::ArrayRef<std::string> QueryScopes, 276 const IncludeInserter &Includes, 277 llvm::StringRef FileName, 278 CodeCompletionContext::Kind ContextKind, 279 const CodeCompleteOptions &Opts, 280 bool IsUsingDeclaration, tok::TokenKind NextTokenKind) 281 : ASTCtx(ASTCtx), 282 EnableFunctionArgSnippets(Opts.EnableFunctionArgSnippets), 283 IsUsingDeclaration(IsUsingDeclaration), NextTokenKind(NextTokenKind) { 284 add(C, SemaCCS); 285 if (C.SemaResult) { 286 assert(ASTCtx); 287 Completion.Origin |= SymbolOrigin::AST; 288 Completion.Name = std::string(llvm::StringRef(SemaCCS->getTypedText())); 289 if (Completion.Scope.empty()) { 290 if ((C.SemaResult->Kind == CodeCompletionResult::RK_Declaration) || 291 (C.SemaResult->Kind == CodeCompletionResult::RK_Pattern)) 292 if (const auto *D = C.SemaResult->getDeclaration()) 293 if (const auto *ND = dyn_cast<NamedDecl>(D)) 294 Completion.Scope = std::string( 295 splitQualifiedName(printQualifiedName(*ND)).first); 296 } 297 Completion.Kind = toCompletionItemKind( 298 C.SemaResult->Kind, C.SemaResult->Declaration, ContextKind); 299 // Sema could provide more info on whether the completion was a file or 300 // folder. 301 if (Completion.Kind == CompletionItemKind::File && 302 Completion.Name.back() == '/') 303 Completion.Kind = CompletionItemKind::Folder; 304 for (const auto &FixIt : C.SemaResult->FixIts) { 305 Completion.FixIts.push_back(toTextEdit( 306 FixIt, ASTCtx->getSourceManager(), ASTCtx->getLangOpts())); 307 } 308 llvm::sort(Completion.FixIts, [](const TextEdit &X, const TextEdit &Y) { 309 return std::tie(X.range.start.line, X.range.start.character) < 310 std::tie(Y.range.start.line, Y.range.start.character); 311 }); 312 Completion.Deprecated |= 313 (C.SemaResult->Availability == CXAvailability_Deprecated); 314 } 315 if (C.IndexResult) { 316 Completion.Origin |= C.IndexResult->Origin; 317 if (Completion.Scope.empty()) 318 Completion.Scope = std::string(C.IndexResult->Scope); 319 if (Completion.Kind == CompletionItemKind::Missing) 320 Completion.Kind = toCompletionItemKind(C.IndexResult->SymInfo.Kind); 321 if (Completion.Name.empty()) 322 Completion.Name = std::string(C.IndexResult->Name); 323 // If the completion was visible to Sema, no qualifier is needed. This 324 // avoids unneeded qualifiers in cases like with `using ns::X`. 325 if (Completion.RequiredQualifier.empty() && !C.SemaResult) { 326 llvm::StringRef ShortestQualifier = C.IndexResult->Scope; 327 for (llvm::StringRef Scope : QueryScopes) { 328 llvm::StringRef Qualifier = C.IndexResult->Scope; 329 if (Qualifier.consume_front(Scope) && 330 Qualifier.size() < ShortestQualifier.size()) 331 ShortestQualifier = Qualifier; 332 } 333 Completion.RequiredQualifier = std::string(ShortestQualifier); 334 } 335 Completion.Deprecated |= (C.IndexResult->Flags & Symbol::Deprecated); 336 } 337 if (C.IdentifierResult) { 338 Completion.Origin |= SymbolOrigin::Identifier; 339 Completion.Kind = CompletionItemKind::Text; 340 Completion.Name = std::string(C.IdentifierResult->Name); 341 } 342 343 // Turn absolute path into a literal string that can be #included. 344 auto Inserted = [&](llvm::StringRef Header) 345 -> llvm::Expected<std::pair<std::string, bool>> { 346 auto ResolvedDeclaring = 347 URI::resolve(C.IndexResult->CanonicalDeclaration.FileURI, FileName); 348 if (!ResolvedDeclaring) 349 return ResolvedDeclaring.takeError(); 350 auto ResolvedInserted = toHeaderFile(Header, FileName); 351 if (!ResolvedInserted) 352 return ResolvedInserted.takeError(); 353 auto Spelled = Includes.calculateIncludePath(*ResolvedInserted, FileName); 354 if (!Spelled) 355 return error("Header not on include path"); 356 return std::make_pair( 357 std::move(*Spelled), 358 Includes.shouldInsertInclude(*ResolvedDeclaring, *ResolvedInserted)); 359 }; 360 bool ShouldInsert = C.headerToInsertIfAllowed(Opts).hasValue(); 361 // Calculate include paths and edits for all possible headers. 362 for (const auto &Inc : C.RankedIncludeHeaders) { 363 if (auto ToInclude = Inserted(Inc)) { 364 CodeCompletion::IncludeCandidate Include; 365 Include.Header = ToInclude->first; 366 if (ToInclude->second && ShouldInsert) 367 Include.Insertion = Includes.insert(ToInclude->first); 368 Completion.Includes.push_back(std::move(Include)); 369 } else 370 log("Failed to generate include insertion edits for adding header " 371 "(FileURI='{0}', IncludeHeader='{1}') into {2}: {3}", 372 C.IndexResult->CanonicalDeclaration.FileURI, Inc, FileName, 373 ToInclude.takeError()); 374 } 375 // Prefer includes that do not need edits (i.e. already exist). 376 std::stable_partition(Completion.Includes.begin(), 377 Completion.Includes.end(), 378 [](const CodeCompletion::IncludeCandidate &I) { 379 return !I.Insertion.hasValue(); 380 }); 381 } 382 383 void add(const CompletionCandidate &C, CodeCompletionString *SemaCCS) { 384 assert(bool(C.SemaResult) == bool(SemaCCS)); 385 Bundled.emplace_back(); 386 BundledEntry &S = Bundled.back(); 387 if (C.SemaResult) { 388 bool IsPattern = C.SemaResult->Kind == CodeCompletionResult::RK_Pattern; 389 getSignature(*SemaCCS, &S.Signature, &S.SnippetSuffix, 390 &Completion.RequiredQualifier, IsPattern); 391 S.ReturnType = getReturnType(*SemaCCS); 392 } else if (C.IndexResult) { 393 S.Signature = std::string(C.IndexResult->Signature); 394 S.SnippetSuffix = std::string(C.IndexResult->CompletionSnippetSuffix); 395 S.ReturnType = std::string(C.IndexResult->ReturnType); 396 } 397 if (!Completion.Documentation) { 398 auto SetDoc = [&](llvm::StringRef Doc) { 399 if (!Doc.empty()) { 400 Completion.Documentation.emplace(); 401 parseDocumentation(Doc, *Completion.Documentation); 402 } 403 }; 404 if (C.IndexResult) { 405 SetDoc(C.IndexResult->Documentation); 406 } else if (C.SemaResult) { 407 SetDoc(getDocComment(*ASTCtx, *C.SemaResult, 408 /*CommentsFromHeader=*/false)); 409 } 410 } 411 } 412 413 CodeCompletion build() { 414 Completion.ReturnType = summarizeReturnType(); 415 Completion.Signature = summarizeSignature(); 416 Completion.SnippetSuffix = summarizeSnippet(); 417 Completion.BundleSize = Bundled.size(); 418 return std::move(Completion); 419 } 420 421 private: 422 struct BundledEntry { 423 std::string SnippetSuffix; 424 std::string Signature; 425 std::string ReturnType; 426 }; 427 428 // If all BundledEntries have the same value for a property, return it. 429 template <std::string BundledEntry::*Member> 430 const std::string *onlyValue() const { 431 auto B = Bundled.begin(), E = Bundled.end(); 432 for (auto I = B + 1; I != E; ++I) 433 if (I->*Member != B->*Member) 434 return nullptr; 435 return &(B->*Member); 436 } 437 438 template <bool BundledEntry::*Member> const bool *onlyValue() const { 439 auto B = Bundled.begin(), E = Bundled.end(); 440 for (auto I = B + 1; I != E; ++I) 441 if (I->*Member != B->*Member) 442 return nullptr; 443 return &(B->*Member); 444 } 445 446 std::string summarizeReturnType() const { 447 if (auto *RT = onlyValue<&BundledEntry::ReturnType>()) 448 return *RT; 449 return ""; 450 } 451 452 std::string summarizeSnippet() const { 453 if (IsUsingDeclaration) 454 return ""; 455 auto *Snippet = onlyValue<&BundledEntry::SnippetSuffix>(); 456 if (!Snippet) 457 // All bundles are function calls. 458 // FIXME(ibiryukov): sometimes add template arguments to a snippet, e.g. 459 // we need to complete 'forward<$1>($0)'. 460 return "($0)"; 461 // Suppress function argument snippets cursor is followed by left 462 // parenthesis (and potentially arguments) or if there are potentially 463 // template arguments. There are cases where it would be wrong (e.g. next 464 // '<' token is a comparison rather than template argument list start) but 465 // it is less common and suppressing snippet provides better UX. 466 if (Completion.Kind == CompletionItemKind::Function || 467 Completion.Kind == CompletionItemKind::Method || 468 Completion.Kind == CompletionItemKind::Constructor) { 469 // If there is a potential template argument list, drop snippet and just 470 // complete symbol name. Ideally, this could generate an edit that would 471 // paste function arguments after template argument list but it would be 472 // complicated. Example: 473 // 474 // fu^<int> -> function<int> 475 if (NextTokenKind == tok::less && Snippet->front() == '<') 476 return ""; 477 // Potentially followed by argument list. 478 if (NextTokenKind == tok::l_paren) { 479 // If snippet contains template arguments we will emit them and drop 480 // function arguments. Example: 481 // 482 // fu^(42) -> function<int>(42); 483 if (Snippet->front() == '<') { 484 // Find matching '>'. Snippet->find('>') will not work in cases like 485 // template <typename T=std::vector<int>>. Hence, iterate through 486 // the snippet until the angle bracket balance reaches zero. 487 int Balance = 0; 488 size_t I = 0; 489 do { 490 if (Snippet->at(I) == '>') 491 --Balance; 492 else if (Snippet->at(I) == '<') 493 ++Balance; 494 ++I; 495 } while (Balance > 0); 496 return Snippet->substr(0, I); 497 } 498 return ""; 499 } 500 } 501 if (EnableFunctionArgSnippets) 502 return *Snippet; 503 504 // Replace argument snippets with a simplified pattern. 505 if (Snippet->empty()) 506 return ""; 507 if (Completion.Kind == CompletionItemKind::Function || 508 Completion.Kind == CompletionItemKind::Method) { 509 // Functions snippets can be of 2 types: 510 // - containing only function arguments, e.g. 511 // foo(${1:int p1}, ${2:int p2}); 512 // We transform this pattern to '($0)' or '()'. 513 // - template arguments and function arguments, e.g. 514 // foo<${1:class}>(${2:int p1}). 515 // We transform this pattern to '<$1>()$0' or '<$0>()'. 516 517 bool EmptyArgs = llvm::StringRef(*Snippet).endswith("()"); 518 if (Snippet->front() == '<') 519 return EmptyArgs ? "<$1>()$0" : "<$1>($0)"; 520 if (Snippet->front() == '(') 521 return EmptyArgs ? "()" : "($0)"; 522 return *Snippet; // Not an arg snippet? 523 } 524 // 'CompletionItemKind::Interface' matches template type aliases. 525 if (Completion.Kind == CompletionItemKind::Interface || 526 Completion.Kind == CompletionItemKind::Class) { 527 if (Snippet->front() != '<') 528 return *Snippet; // Not an arg snippet? 529 530 // Classes and template using aliases can only have template arguments, 531 // e.g. Foo<${1:class}>. 532 if (llvm::StringRef(*Snippet).endswith("<>")) 533 return "<>"; // can happen with defaulted template arguments. 534 return "<$0>"; 535 } 536 return *Snippet; 537 } 538 539 std::string summarizeSignature() const { 540 if (auto *Signature = onlyValue<&BundledEntry::Signature>()) 541 return *Signature; 542 // All bundles are function calls. 543 return "(…)"; 544 } 545 546 // ASTCtx can be nullptr if not run with sema. 547 ASTContext *ASTCtx; 548 CodeCompletion Completion; 549 llvm::SmallVector<BundledEntry, 1> Bundled; 550 bool EnableFunctionArgSnippets; 551 // No snippets will be generated for using declarations and when the function 552 // arguments are already present. 553 bool IsUsingDeclaration; 554 tok::TokenKind NextTokenKind; 555 }; 556 557 // Determine the symbol ID for a Sema code completion result, if possible. 558 SymbolID getSymbolID(const CodeCompletionResult &R, const SourceManager &SM) { 559 switch (R.Kind) { 560 case CodeCompletionResult::RK_Declaration: 561 case CodeCompletionResult::RK_Pattern: { 562 // Computing USR caches linkage, which may change after code completion. 563 if (hasUnstableLinkage(R.Declaration)) 564 return {}; 565 return clang::clangd::getSymbolID(R.Declaration); 566 } 567 case CodeCompletionResult::RK_Macro: 568 return clang::clangd::getSymbolID(R.Macro->getName(), R.MacroDefInfo, SM); 569 case CodeCompletionResult::RK_Keyword: 570 return {}; 571 } 572 llvm_unreachable("unknown CodeCompletionResult kind"); 573 } 574 575 // Scopes of the partial identifier we're trying to complete. 576 // It is used when we query the index for more completion results. 577 struct SpecifiedScope { 578 // The scopes we should look in, determined by Sema. 579 // 580 // If the qualifier was fully resolved, we look for completions in these 581 // scopes; if there is an unresolved part of the qualifier, it should be 582 // resolved within these scopes. 583 // 584 // Examples of qualified completion: 585 // 586 // "::vec" => {""} 587 // "using namespace std; ::vec^" => {"", "std::"} 588 // "namespace ns {using namespace std;} ns::^" => {"ns::", "std::"} 589 // "std::vec^" => {""} // "std" unresolved 590 // 591 // Examples of unqualified completion: 592 // 593 // "vec^" => {""} 594 // "using namespace std; vec^" => {"", "std::"} 595 // "using namespace std; namespace ns { vec^ }" => {"ns::", "std::", ""} 596 // 597 // "" for global namespace, "ns::" for normal namespace. 598 std::vector<std::string> AccessibleScopes; 599 // The full scope qualifier as typed by the user (without the leading "::"). 600 // Set if the qualifier is not fully resolved by Sema. 601 llvm::Optional<std::string> UnresolvedQualifier; 602 603 // Construct scopes being queried in indexes. The results are deduplicated. 604 // This method format the scopes to match the index request representation. 605 std::vector<std::string> scopesForIndexQuery() { 606 std::set<std::string> Results; 607 for (llvm::StringRef AS : AccessibleScopes) 608 Results.insert( 609 (AS + (UnresolvedQualifier ? *UnresolvedQualifier : "")).str()); 610 return {Results.begin(), Results.end()}; 611 } 612 }; 613 614 // Get all scopes that will be queried in indexes and whether symbols from 615 // any scope is allowed. The first scope in the list is the preferred scope 616 // (e.g. enclosing namespace). 617 std::pair<std::vector<std::string>, bool> 618 getQueryScopes(CodeCompletionContext &CCContext, const Sema &CCSema, 619 const CompletionPrefix &HeuristicPrefix, 620 const CodeCompleteOptions &Opts) { 621 SpecifiedScope Scopes; 622 for (auto *Context : CCContext.getVisitedContexts()) { 623 if (isa<TranslationUnitDecl>(Context)) 624 Scopes.AccessibleScopes.push_back(""); // global namespace 625 else if (isa<NamespaceDecl>(Context)) 626 Scopes.AccessibleScopes.push_back(printNamespaceScope(*Context)); 627 } 628 629 const CXXScopeSpec *SemaSpecifier = 630 CCContext.getCXXScopeSpecifier().getValueOr(nullptr); 631 // Case 1: unqualified completion. 632 if (!SemaSpecifier) { 633 // Case 2 (exception): sema saw no qualifier, but there appears to be one! 634 // This can happen e.g. in incomplete macro expansions. Use heuristics. 635 if (!HeuristicPrefix.Qualifier.empty()) { 636 vlog("Sema said no scope specifier, but we saw {0} in the source code", 637 HeuristicPrefix.Qualifier); 638 StringRef SpelledSpecifier = HeuristicPrefix.Qualifier; 639 if (SpelledSpecifier.consume_front("::")) 640 Scopes.AccessibleScopes = {""}; 641 Scopes.UnresolvedQualifier = std::string(SpelledSpecifier); 642 return {Scopes.scopesForIndexQuery(), false}; 643 } 644 // The enclosing namespace must be first, it gets a quality boost. 645 std::vector<std::string> EnclosingAtFront; 646 std::string EnclosingScope = printNamespaceScope(*CCSema.CurContext); 647 EnclosingAtFront.push_back(EnclosingScope); 648 for (auto &S : Scopes.scopesForIndexQuery()) { 649 if (EnclosingScope != S) 650 EnclosingAtFront.push_back(std::move(S)); 651 } 652 // Allow AllScopes completion as there is no explicit scope qualifier. 653 return {EnclosingAtFront, Opts.AllScopes}; 654 } 655 // Case 3: sema saw and resolved a scope qualifier. 656 if (SemaSpecifier && SemaSpecifier->isValid()) 657 return {Scopes.scopesForIndexQuery(), false}; 658 659 // Case 4: There was a qualifier, and Sema didn't resolve it. 660 Scopes.AccessibleScopes.push_back(""); // Make sure global scope is included. 661 llvm::StringRef SpelledSpecifier = Lexer::getSourceText( 662 CharSourceRange::getCharRange(SemaSpecifier->getRange()), 663 CCSema.SourceMgr, clang::LangOptions()); 664 if (SpelledSpecifier.consume_front("::")) 665 Scopes.AccessibleScopes = {""}; 666 Scopes.UnresolvedQualifier = std::string(SpelledSpecifier); 667 // Sema excludes the trailing "::". 668 if (!Scopes.UnresolvedQualifier->empty()) 669 *Scopes.UnresolvedQualifier += "::"; 670 671 return {Scopes.scopesForIndexQuery(), false}; 672 } 673 674 // Should we perform index-based completion in a context of the specified kind? 675 // FIXME: consider allowing completion, but restricting the result types. 676 bool contextAllowsIndex(enum CodeCompletionContext::Kind K) { 677 switch (K) { 678 case CodeCompletionContext::CCC_TopLevel: 679 case CodeCompletionContext::CCC_ObjCInterface: 680 case CodeCompletionContext::CCC_ObjCImplementation: 681 case CodeCompletionContext::CCC_ObjCIvarList: 682 case CodeCompletionContext::CCC_ClassStructUnion: 683 case CodeCompletionContext::CCC_Statement: 684 case CodeCompletionContext::CCC_Expression: 685 case CodeCompletionContext::CCC_ObjCMessageReceiver: 686 case CodeCompletionContext::CCC_EnumTag: 687 case CodeCompletionContext::CCC_UnionTag: 688 case CodeCompletionContext::CCC_ClassOrStructTag: 689 case CodeCompletionContext::CCC_ObjCProtocolName: 690 case CodeCompletionContext::CCC_Namespace: 691 case CodeCompletionContext::CCC_Type: 692 case CodeCompletionContext::CCC_ParenthesizedExpression: 693 case CodeCompletionContext::CCC_ObjCInterfaceName: 694 case CodeCompletionContext::CCC_ObjCCategoryName: 695 case CodeCompletionContext::CCC_Symbol: 696 case CodeCompletionContext::CCC_SymbolOrNewName: 697 return true; 698 case CodeCompletionContext::CCC_OtherWithMacros: 699 case CodeCompletionContext::CCC_DotMemberAccess: 700 case CodeCompletionContext::CCC_ArrowMemberAccess: 701 case CodeCompletionContext::CCC_ObjCPropertyAccess: 702 case CodeCompletionContext::CCC_MacroName: 703 case CodeCompletionContext::CCC_MacroNameUse: 704 case CodeCompletionContext::CCC_PreprocessorExpression: 705 case CodeCompletionContext::CCC_PreprocessorDirective: 706 case CodeCompletionContext::CCC_SelectorName: 707 case CodeCompletionContext::CCC_TypeQualifiers: 708 case CodeCompletionContext::CCC_ObjCInstanceMessage: 709 case CodeCompletionContext::CCC_ObjCClassMessage: 710 case CodeCompletionContext::CCC_IncludedFile: 711 // FIXME: Provide identifier based completions for the following contexts: 712 case CodeCompletionContext::CCC_Other: // Be conservative. 713 case CodeCompletionContext::CCC_NaturalLanguage: 714 case CodeCompletionContext::CCC_Recovery: 715 case CodeCompletionContext::CCC_NewName: 716 return false; 717 } 718 llvm_unreachable("unknown code completion context"); 719 } 720 721 static bool isInjectedClass(const NamedDecl &D) { 722 if (auto *R = dyn_cast_or_null<RecordDecl>(&D)) 723 if (R->isInjectedClassName()) 724 return true; 725 return false; 726 } 727 728 // Some member calls are excluded because they're so rarely useful. 729 static bool isExcludedMember(const NamedDecl &D) { 730 // Destructor completion is rarely useful, and works inconsistently. 731 // (s.^ completes ~string, but s.~st^ is an error). 732 if (D.getKind() == Decl::CXXDestructor) 733 return true; 734 // Injected name may be useful for A::foo(), but who writes A::A::foo()? 735 if (isInjectedClass(D)) 736 return true; 737 // Explicit calls to operators are also rare. 738 auto NameKind = D.getDeclName().getNameKind(); 739 if (NameKind == DeclarationName::CXXOperatorName || 740 NameKind == DeclarationName::CXXLiteralOperatorName || 741 NameKind == DeclarationName::CXXConversionFunctionName) 742 return true; 743 return false; 744 } 745 746 // The CompletionRecorder captures Sema code-complete output, including context. 747 // It filters out ignored results (but doesn't apply fuzzy-filtering yet). 748 // It doesn't do scoring or conversion to CompletionItem yet, as we want to 749 // merge with index results first. 750 // Generally the fields and methods of this object should only be used from 751 // within the callback. 752 struct CompletionRecorder : public CodeCompleteConsumer { 753 CompletionRecorder(const CodeCompleteOptions &Opts, 754 llvm::unique_function<void()> ResultsCallback) 755 : CodeCompleteConsumer(Opts.getClangCompleteOpts()), 756 CCContext(CodeCompletionContext::CCC_Other), Opts(Opts), 757 CCAllocator(std::make_shared<GlobalCodeCompletionAllocator>()), 758 CCTUInfo(CCAllocator), ResultsCallback(std::move(ResultsCallback)) { 759 assert(this->ResultsCallback); 760 } 761 762 std::vector<CodeCompletionResult> Results; 763 CodeCompletionContext CCContext; 764 Sema *CCSema = nullptr; // Sema that created the results. 765 // FIXME: Sema is scary. Can we store ASTContext and Preprocessor, instead? 766 767 void ProcessCodeCompleteResults(class Sema &S, CodeCompletionContext Context, 768 CodeCompletionResult *InResults, 769 unsigned NumResults) override final { 770 // Results from recovery mode are generally useless, and the callback after 771 // recovery (if any) is usually more interesting. To make sure we handle the 772 // future callback from sema, we just ignore all callbacks in recovery mode, 773 // as taking only results from recovery mode results in poor completion 774 // results. 775 // FIXME: in case there is no future sema completion callback after the 776 // recovery mode, we might still want to provide some results (e.g. trivial 777 // identifier-based completion). 778 if (Context.getKind() == CodeCompletionContext::CCC_Recovery) { 779 log("Code complete: Ignoring sema code complete callback with Recovery " 780 "context."); 781 return; 782 } 783 // If a callback is called without any sema result and the context does not 784 // support index-based completion, we simply skip it to give way to 785 // potential future callbacks with results. 786 if (NumResults == 0 && !contextAllowsIndex(Context.getKind())) 787 return; 788 if (CCSema) { 789 log("Multiple code complete callbacks (parser backtracked?). " 790 "Dropping results from context {0}, keeping results from {1}.", 791 getCompletionKindString(Context.getKind()), 792 getCompletionKindString(this->CCContext.getKind())); 793 return; 794 } 795 // Record the completion context. 796 CCSema = &S; 797 CCContext = Context; 798 799 // Retain the results we might want. 800 for (unsigned I = 0; I < NumResults; ++I) { 801 auto &Result = InResults[I]; 802 // Class members that are shadowed by subclasses are usually noise. 803 if (Result.Hidden && Result.Declaration && 804 Result.Declaration->isCXXClassMember()) 805 continue; 806 if (!Opts.IncludeIneligibleResults && 807 (Result.Availability == CXAvailability_NotAvailable || 808 Result.Availability == CXAvailability_NotAccessible)) 809 continue; 810 if (Result.Declaration && 811 !Context.getBaseType().isNull() // is this a member-access context? 812 && isExcludedMember(*Result.Declaration)) 813 continue; 814 // Skip injected class name when no class scope is not explicitly set. 815 // E.g. show injected A::A in `using A::A^` but not in "A^". 816 if (Result.Declaration && !Context.getCXXScopeSpecifier().hasValue() && 817 isInjectedClass(*Result.Declaration)) 818 continue; 819 // We choose to never append '::' to completion results in clangd. 820 Result.StartsNestedNameSpecifier = false; 821 Results.push_back(Result); 822 } 823 ResultsCallback(); 824 } 825 826 CodeCompletionAllocator &getAllocator() override { return *CCAllocator; } 827 CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; } 828 829 // Returns the filtering/sorting name for Result, which must be from Results. 830 // Returned string is owned by this recorder (or the AST). 831 llvm::StringRef getName(const CodeCompletionResult &Result) { 832 switch (Result.Kind) { 833 case CodeCompletionResult::RK_Declaration: 834 if (auto *ID = Result.Declaration->getIdentifier()) 835 return ID->getName(); 836 break; 837 case CodeCompletionResult::RK_Keyword: 838 return Result.Keyword; 839 case CodeCompletionResult::RK_Macro: 840 return Result.Macro->getName(); 841 case CodeCompletionResult::RK_Pattern: 842 return Result.Pattern->getTypedText(); 843 } 844 auto *CCS = codeCompletionString(Result); 845 return CCS->getTypedText(); 846 } 847 848 // Build a CodeCompletion string for R, which must be from Results. 849 // The CCS will be owned by this recorder. 850 CodeCompletionString *codeCompletionString(const CodeCompletionResult &R) { 851 // CodeCompletionResult doesn't seem to be const-correct. We own it, anyway. 852 return const_cast<CodeCompletionResult &>(R).CreateCodeCompletionString( 853 *CCSema, CCContext, *CCAllocator, CCTUInfo, 854 /*IncludeBriefComments=*/false); 855 } 856 857 private: 858 CodeCompleteOptions Opts; 859 std::shared_ptr<GlobalCodeCompletionAllocator> CCAllocator; 860 CodeCompletionTUInfo CCTUInfo; 861 llvm::unique_function<void()> ResultsCallback; 862 }; 863 864 struct ScoredSignature { 865 // When not null, requires documentation to be requested from the index with 866 // this ID. 867 SymbolID IDForDoc; 868 SignatureInformation Signature; 869 SignatureQualitySignals Quality; 870 }; 871 872 class SignatureHelpCollector final : public CodeCompleteConsumer { 873 public: 874 SignatureHelpCollector(const clang::CodeCompleteOptions &CodeCompleteOpts, 875 const SymbolIndex *Index, SignatureHelp &SigHelp) 876 : CodeCompleteConsumer(CodeCompleteOpts), SigHelp(SigHelp), 877 Allocator(std::make_shared<clang::GlobalCodeCompletionAllocator>()), 878 CCTUInfo(Allocator), Index(Index) {} 879 880 void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg, 881 OverloadCandidate *Candidates, 882 unsigned NumCandidates, 883 SourceLocation OpenParLoc) override { 884 assert(!OpenParLoc.isInvalid()); 885 SourceManager &SrcMgr = S.getSourceManager(); 886 OpenParLoc = SrcMgr.getFileLoc(OpenParLoc); 887 if (SrcMgr.isInMainFile(OpenParLoc)) 888 SigHelp.argListStart = sourceLocToPosition(SrcMgr, OpenParLoc); 889 else 890 elog("Location oustide main file in signature help: {0}", 891 OpenParLoc.printToString(SrcMgr)); 892 893 std::vector<ScoredSignature> ScoredSignatures; 894 SigHelp.signatures.reserve(NumCandidates); 895 ScoredSignatures.reserve(NumCandidates); 896 // FIXME(rwols): How can we determine the "active overload candidate"? 897 // Right now the overloaded candidates seem to be provided in a "best fit" 898 // order, so I'm not too worried about this. 899 SigHelp.activeSignature = 0; 900 assert(CurrentArg <= (unsigned)std::numeric_limits<int>::max() && 901 "too many arguments"); 902 SigHelp.activeParameter = static_cast<int>(CurrentArg); 903 for (unsigned I = 0; I < NumCandidates; ++I) { 904 OverloadCandidate Candidate = Candidates[I]; 905 // We want to avoid showing instantiated signatures, because they may be 906 // long in some cases (e.g. when 'T' is substituted with 'std::string', we 907 // would get 'std::basic_string<char>'). 908 if (auto *Func = Candidate.getFunction()) { 909 if (auto *Pattern = Func->getTemplateInstantiationPattern()) 910 Candidate = OverloadCandidate(Pattern); 911 } 912 913 const auto *CCS = Candidate.CreateSignatureString( 914 CurrentArg, S, *Allocator, CCTUInfo, true); 915 assert(CCS && "Expected the CodeCompletionString to be non-null"); 916 ScoredSignatures.push_back(processOverloadCandidate( 917 Candidate, *CCS, 918 Candidate.getFunction() 919 ? getDeclComment(S.getASTContext(), *Candidate.getFunction()) 920 : "")); 921 } 922 923 // Sema does not load the docs from the preamble, so we need to fetch extra 924 // docs from the index instead. 925 llvm::DenseMap<SymbolID, std::string> FetchedDocs; 926 if (Index) { 927 LookupRequest IndexRequest; 928 for (const auto &S : ScoredSignatures) { 929 if (!S.IDForDoc) 930 continue; 931 IndexRequest.IDs.insert(S.IDForDoc); 932 } 933 Index->lookup(IndexRequest, [&](const Symbol &S) { 934 if (!S.Documentation.empty()) 935 FetchedDocs[S.ID] = std::string(S.Documentation); 936 }); 937 log("SigHelp: requested docs for {0} symbols from the index, got {1} " 938 "symbols with non-empty docs in the response", 939 IndexRequest.IDs.size(), FetchedDocs.size()); 940 } 941 942 llvm::sort(ScoredSignatures, [](const ScoredSignature &L, 943 const ScoredSignature &R) { 944 // Ordering follows: 945 // - Less number of parameters is better. 946 // - Function is better than FunctionType which is better than 947 // Function Template. 948 // - High score is better. 949 // - Shorter signature is better. 950 // - Alphabetically smaller is better. 951 if (L.Quality.NumberOfParameters != R.Quality.NumberOfParameters) 952 return L.Quality.NumberOfParameters < R.Quality.NumberOfParameters; 953 if (L.Quality.NumberOfOptionalParameters != 954 R.Quality.NumberOfOptionalParameters) 955 return L.Quality.NumberOfOptionalParameters < 956 R.Quality.NumberOfOptionalParameters; 957 if (L.Quality.Kind != R.Quality.Kind) { 958 using OC = CodeCompleteConsumer::OverloadCandidate; 959 switch (L.Quality.Kind) { 960 case OC::CK_Function: 961 return true; 962 case OC::CK_FunctionType: 963 return R.Quality.Kind != OC::CK_Function; 964 case OC::CK_FunctionTemplate: 965 return false; 966 } 967 llvm_unreachable("Unknown overload candidate type."); 968 } 969 if (L.Signature.label.size() != R.Signature.label.size()) 970 return L.Signature.label.size() < R.Signature.label.size(); 971 return L.Signature.label < R.Signature.label; 972 }); 973 974 for (auto &SS : ScoredSignatures) { 975 auto IndexDocIt = 976 SS.IDForDoc ? FetchedDocs.find(SS.IDForDoc) : FetchedDocs.end(); 977 if (IndexDocIt != FetchedDocs.end()) 978 SS.Signature.documentation = IndexDocIt->second; 979 980 SigHelp.signatures.push_back(std::move(SS.Signature)); 981 } 982 } 983 984 GlobalCodeCompletionAllocator &getAllocator() override { return *Allocator; } 985 986 CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; } 987 988 private: 989 void processParameterChunk(llvm::StringRef ChunkText, 990 SignatureInformation &Signature) const { 991 // (!) this is O(n), should still be fast compared to building ASTs. 992 unsigned ParamStartOffset = lspLength(Signature.label); 993 unsigned ParamEndOffset = ParamStartOffset + lspLength(ChunkText); 994 // A piece of text that describes the parameter that corresponds to 995 // the code-completion location within a function call, message send, 996 // macro invocation, etc. 997 Signature.label += ChunkText; 998 ParameterInformation Info; 999 Info.labelOffsets.emplace(ParamStartOffset, ParamEndOffset); 1000 // FIXME: only set 'labelOffsets' when all clients migrate out of it. 1001 Info.labelString = std::string(ChunkText); 1002 1003 Signature.parameters.push_back(std::move(Info)); 1004 } 1005 1006 void processOptionalChunk(const CodeCompletionString &CCS, 1007 SignatureInformation &Signature, 1008 SignatureQualitySignals &Signal) const { 1009 for (const auto &Chunk : CCS) { 1010 switch (Chunk.Kind) { 1011 case CodeCompletionString::CK_Optional: 1012 assert(Chunk.Optional && 1013 "Expected the optional code completion string to be non-null."); 1014 processOptionalChunk(*Chunk.Optional, Signature, Signal); 1015 break; 1016 case CodeCompletionString::CK_VerticalSpace: 1017 break; 1018 case CodeCompletionString::CK_CurrentParameter: 1019 case CodeCompletionString::CK_Placeholder: 1020 processParameterChunk(Chunk.Text, Signature); 1021 Signal.NumberOfOptionalParameters++; 1022 break; 1023 default: 1024 Signature.label += Chunk.Text; 1025 break; 1026 } 1027 } 1028 } 1029 1030 // FIXME(ioeric): consider moving CodeCompletionString logic here to 1031 // CompletionString.h. 1032 ScoredSignature processOverloadCandidate(const OverloadCandidate &Candidate, 1033 const CodeCompletionString &CCS, 1034 llvm::StringRef DocComment) const { 1035 SignatureInformation Signature; 1036 SignatureQualitySignals Signal; 1037 const char *ReturnType = nullptr; 1038 1039 Signature.documentation = formatDocumentation(CCS, DocComment); 1040 Signal.Kind = Candidate.getKind(); 1041 1042 for (const auto &Chunk : CCS) { 1043 switch (Chunk.Kind) { 1044 case CodeCompletionString::CK_ResultType: 1045 // A piece of text that describes the type of an entity or, 1046 // for functions and methods, the return type. 1047 assert(!ReturnType && "Unexpected CK_ResultType"); 1048 ReturnType = Chunk.Text; 1049 break; 1050 case CodeCompletionString::CK_CurrentParameter: 1051 case CodeCompletionString::CK_Placeholder: 1052 processParameterChunk(Chunk.Text, Signature); 1053 Signal.NumberOfParameters++; 1054 break; 1055 case CodeCompletionString::CK_Optional: { 1056 // The rest of the parameters are defaulted/optional. 1057 assert(Chunk.Optional && 1058 "Expected the optional code completion string to be non-null."); 1059 processOptionalChunk(*Chunk.Optional, Signature, Signal); 1060 break; 1061 } 1062 case CodeCompletionString::CK_VerticalSpace: 1063 break; 1064 default: 1065 Signature.label += Chunk.Text; 1066 break; 1067 } 1068 } 1069 if (ReturnType) { 1070 Signature.label += " -> "; 1071 Signature.label += ReturnType; 1072 } 1073 dlog("Signal for {0}: {1}", Signature, Signal); 1074 ScoredSignature Result; 1075 Result.Signature = std::move(Signature); 1076 Result.Quality = Signal; 1077 const FunctionDecl *Func = Candidate.getFunction(); 1078 if (Func && Result.Signature.documentation.empty()) { 1079 // Computing USR caches linkage, which may change after code completion. 1080 if (!hasUnstableLinkage(Func)) 1081 Result.IDForDoc = clangd::getSymbolID(Func); 1082 } 1083 return Result; 1084 } 1085 1086 SignatureHelp &SigHelp; 1087 std::shared_ptr<clang::GlobalCodeCompletionAllocator> Allocator; 1088 CodeCompletionTUInfo CCTUInfo; 1089 const SymbolIndex *Index; 1090 }; // SignatureHelpCollector 1091 1092 struct SemaCompleteInput { 1093 PathRef FileName; 1094 size_t Offset; 1095 const PreambleData &Preamble; 1096 const llvm::Optional<PreamblePatch> Patch; 1097 const ParseInputs &ParseInput; 1098 }; 1099 1100 void loadMainFilePreambleMacros(const Preprocessor &PP, 1101 const PreambleData &Preamble) { 1102 // The ExternalPreprocessorSource has our macros, if we know where to look. 1103 // We can read all the macros using PreambleMacros->ReadDefinedMacros(), 1104 // but this includes transitively included files, so may deserialize a lot. 1105 ExternalPreprocessorSource *PreambleMacros = PP.getExternalSource(); 1106 // As we have the names of the macros, we can look up their IdentifierInfo 1107 // and then use this to load just the macros we want. 1108 IdentifierInfoLookup *PreambleIdentifiers = 1109 PP.getIdentifierTable().getExternalIdentifierLookup(); 1110 if (!PreambleIdentifiers || !PreambleMacros) 1111 return; 1112 for (const auto &MacroName : Preamble.Macros.Names) 1113 if (auto *II = PreambleIdentifiers->get(MacroName.getKey())) 1114 if (II->isOutOfDate()) 1115 PreambleMacros->updateOutOfDateIdentifier(*II); 1116 } 1117 1118 // Invokes Sema code completion on a file. 1119 // If \p Includes is set, it will be updated based on the compiler invocation. 1120 bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer, 1121 const clang::CodeCompleteOptions &Options, 1122 const SemaCompleteInput &Input, 1123 IncludeStructure *Includes = nullptr) { 1124 trace::Span Tracer("Sema completion"); 1125 1126 IgnoreDiagnostics IgnoreDiags; 1127 auto CI = buildCompilerInvocation(Input.ParseInput, IgnoreDiags); 1128 if (!CI) { 1129 elog("Couldn't create CompilerInvocation"); 1130 return false; 1131 } 1132 auto &FrontendOpts = CI->getFrontendOpts(); 1133 FrontendOpts.SkipFunctionBodies = true; 1134 // Disable typo correction in Sema. 1135 CI->getLangOpts()->SpellChecking = false; 1136 // Code completion won't trigger in delayed template bodies. 1137 // This is on-by-default in windows to allow parsing SDK headers; we're only 1138 // disabling it for the main-file (not preamble). 1139 CI->getLangOpts()->DelayedTemplateParsing = false; 1140 // Setup code completion. 1141 FrontendOpts.CodeCompleteOpts = Options; 1142 FrontendOpts.CodeCompletionAt.FileName = std::string(Input.FileName); 1143 std::tie(FrontendOpts.CodeCompletionAt.Line, 1144 FrontendOpts.CodeCompletionAt.Column) = 1145 offsetToClangLineColumn(Input.ParseInput.Contents, Input.Offset); 1146 1147 std::unique_ptr<llvm::MemoryBuffer> ContentsBuffer = 1148 llvm::MemoryBuffer::getMemBuffer(Input.ParseInput.Contents, 1149 Input.FileName); 1150 // The diagnostic options must be set before creating a CompilerInstance. 1151 CI->getDiagnosticOpts().IgnoreWarnings = true; 1152 // We reuse the preamble whether it's valid or not. This is a 1153 // correctness/performance tradeoff: building without a preamble is slow, and 1154 // completion is latency-sensitive. 1155 // However, if we're completing *inside* the preamble section of the draft, 1156 // overriding the preamble will break sema completion. Fortunately we can just 1157 // skip all includes in this case; these completions are really simple. 1158 PreambleBounds PreambleRegion = 1159 ComputePreambleBounds(*CI->getLangOpts(), *ContentsBuffer, 0); 1160 bool CompletingInPreamble = Input.Offset < PreambleRegion.Size || 1161 (!PreambleRegion.PreambleEndsAtStartOfLine && 1162 Input.Offset == PreambleRegion.Size); 1163 if (Input.Patch) 1164 Input.Patch->apply(*CI); 1165 // NOTE: we must call BeginSourceFile after prepareCompilerInstance. Otherwise 1166 // the remapped buffers do not get freed. 1167 llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS = 1168 Input.ParseInput.TFS->view(Input.ParseInput.CompileCommand.Directory); 1169 if (Input.Preamble.StatCache) 1170 VFS = Input.Preamble.StatCache->getConsumingFS(std::move(VFS)); 1171 auto Clang = prepareCompilerInstance( 1172 std::move(CI), !CompletingInPreamble ? &Input.Preamble.Preamble : nullptr, 1173 std::move(ContentsBuffer), std::move(VFS), IgnoreDiags); 1174 Clang->getPreprocessorOpts().SingleFileParseMode = CompletingInPreamble; 1175 Clang->setCodeCompletionConsumer(Consumer.release()); 1176 1177 SyntaxOnlyAction Action; 1178 if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])) { 1179 log("BeginSourceFile() failed when running codeComplete for {0}", 1180 Input.FileName); 1181 return false; 1182 } 1183 // Macros can be defined within the preamble region of the main file. 1184 // They don't fall nicely into our index/Sema dichotomy: 1185 // - they're not indexed for completion (they're not available across files) 1186 // - but Sema code complete won't see them: as part of the preamble, they're 1187 // deserialized only when mentioned. 1188 // Force them to be deserialized so SemaCodeComplete sees them. 1189 loadMainFilePreambleMacros(Clang->getPreprocessor(), Input.Preamble); 1190 if (Includes) 1191 Clang->getPreprocessor().addPPCallbacks( 1192 collectIncludeStructureCallback(Clang->getSourceManager(), Includes)); 1193 if (llvm::Error Err = Action.Execute()) { 1194 log("Execute() failed when running codeComplete for {0}: {1}", 1195 Input.FileName, toString(std::move(Err))); 1196 return false; 1197 } 1198 Action.EndSourceFile(); 1199 1200 return true; 1201 } 1202 1203 // Should we allow index completions in the specified context? 1204 bool allowIndex(CodeCompletionContext &CC) { 1205 if (!contextAllowsIndex(CC.getKind())) 1206 return false; 1207 // We also avoid ClassName::bar (but allow namespace::bar). 1208 auto Scope = CC.getCXXScopeSpecifier(); 1209 if (!Scope) 1210 return true; 1211 NestedNameSpecifier *NameSpec = (*Scope)->getScopeRep(); 1212 if (!NameSpec) 1213 return true; 1214 // We only query the index when qualifier is a namespace. 1215 // If it's a class, we rely solely on sema completions. 1216 switch (NameSpec->getKind()) { 1217 case NestedNameSpecifier::Global: 1218 case NestedNameSpecifier::Namespace: 1219 case NestedNameSpecifier::NamespaceAlias: 1220 return true; 1221 case NestedNameSpecifier::Super: 1222 case NestedNameSpecifier::TypeSpec: 1223 case NestedNameSpecifier::TypeSpecWithTemplate: 1224 // Unresolved inside a template. 1225 case NestedNameSpecifier::Identifier: 1226 return false; 1227 } 1228 llvm_unreachable("invalid NestedNameSpecifier kind"); 1229 } 1230 1231 std::future<SymbolSlab> startAsyncFuzzyFind(const SymbolIndex &Index, 1232 const FuzzyFindRequest &Req) { 1233 return runAsync<SymbolSlab>([&Index, Req]() { 1234 trace::Span Tracer("Async fuzzyFind"); 1235 SymbolSlab::Builder Syms; 1236 Index.fuzzyFind(Req, [&Syms](const Symbol &Sym) { Syms.insert(Sym); }); 1237 return std::move(Syms).build(); 1238 }); 1239 } 1240 1241 // Creates a `FuzzyFindRequest` based on the cached index request from the 1242 // last completion, if any, and the speculated completion filter text in the 1243 // source code. 1244 FuzzyFindRequest speculativeFuzzyFindRequestForCompletion( 1245 FuzzyFindRequest CachedReq, const CompletionPrefix &HeuristicPrefix) { 1246 CachedReq.Query = std::string(HeuristicPrefix.Name); 1247 return CachedReq; 1248 } 1249 1250 // Runs Sema-based (AST) and Index-based completion, returns merged results. 1251 // 1252 // There are a few tricky considerations: 1253 // - the AST provides information needed for the index query (e.g. which 1254 // namespaces to search in). So Sema must start first. 1255 // - we only want to return the top results (Opts.Limit). 1256 // Building CompletionItems for everything else is wasteful, so we want to 1257 // preserve the "native" format until we're done with scoring. 1258 // - the data underlying Sema completion items is owned by the AST and various 1259 // other arenas, which must stay alive for us to build CompletionItems. 1260 // - we may get duplicate results from Sema and the Index, we need to merge. 1261 // 1262 // So we start Sema completion first, and do all our work in its callback. 1263 // We use the Sema context information to query the index. 1264 // Then we merge the two result sets, producing items that are Sema/Index/Both. 1265 // These items are scored, and the top N are synthesized into the LSP response. 1266 // Finally, we can clean up the data structures created by Sema completion. 1267 // 1268 // Main collaborators are: 1269 // - semaCodeComplete sets up the compiler machinery to run code completion. 1270 // - CompletionRecorder captures Sema completion results, including context. 1271 // - SymbolIndex (Opts.Index) provides index completion results as Symbols 1272 // - CompletionCandidates are the result of merging Sema and Index results. 1273 // Each candidate points to an underlying CodeCompletionResult (Sema), a 1274 // Symbol (Index), or both. It computes the result quality score. 1275 // CompletionCandidate also does conversion to CompletionItem (at the end). 1276 // - FuzzyMatcher scores how the candidate matches the partial identifier. 1277 // This score is combined with the result quality score for the final score. 1278 // - TopN determines the results with the best score. 1279 class CodeCompleteFlow { 1280 PathRef FileName; 1281 IncludeStructure Includes; // Complete once the compiler runs. 1282 SpeculativeFuzzyFind *SpecFuzzyFind; // Can be nullptr. 1283 const CodeCompleteOptions &Opts; 1284 1285 // Sema takes ownership of Recorder. Recorder is valid until Sema cleanup. 1286 CompletionRecorder *Recorder = nullptr; 1287 CodeCompletionContext::Kind CCContextKind = CodeCompletionContext::CCC_Other; 1288 bool IsUsingDeclaration = false; 1289 // The snippets will not be generated if the token following completion 1290 // location is an opening parenthesis (tok::l_paren) because this would add 1291 // extra parenthesis. 1292 tok::TokenKind NextTokenKind = tok::eof; 1293 // Counters for logging. 1294 int NSema = 0, NIndex = 0, NSemaAndIndex = 0, NIdent = 0; 1295 bool Incomplete = false; // Would more be available with a higher limit? 1296 CompletionPrefix HeuristicPrefix; 1297 llvm::Optional<FuzzyMatcher> Filter; // Initialized once Sema runs. 1298 Range ReplacedRange; 1299 std::vector<std::string> QueryScopes; // Initialized once Sema runs. 1300 // Initialized once QueryScopes is initialized, if there are scopes. 1301 llvm::Optional<ScopeDistance> ScopeProximity; 1302 llvm::Optional<OpaqueType> PreferredType; // Initialized once Sema runs. 1303 // Whether to query symbols from any scope. Initialized once Sema runs. 1304 bool AllScopes = false; 1305 llvm::StringSet<> ContextWords; 1306 // Include-insertion and proximity scoring rely on the include structure. 1307 // This is available after Sema has run. 1308 llvm::Optional<IncludeInserter> Inserter; // Available during runWithSema. 1309 llvm::Optional<URIDistance> FileProximity; // Initialized once Sema runs. 1310 /// Speculative request based on the cached request and the filter text before 1311 /// the cursor. 1312 /// Initialized right before sema run. This is only set if `SpecFuzzyFind` is 1313 /// set and contains a cached request. 1314 llvm::Optional<FuzzyFindRequest> SpecReq; 1315 1316 public: 1317 // A CodeCompleteFlow object is only useful for calling run() exactly once. 1318 CodeCompleteFlow(PathRef FileName, const IncludeStructure &Includes, 1319 SpeculativeFuzzyFind *SpecFuzzyFind, 1320 const CodeCompleteOptions &Opts) 1321 : FileName(FileName), Includes(Includes), SpecFuzzyFind(SpecFuzzyFind), 1322 Opts(Opts) {} 1323 1324 CodeCompleteResult run(const SemaCompleteInput &SemaCCInput) && { 1325 trace::Span Tracer("CodeCompleteFlow"); 1326 HeuristicPrefix = guessCompletionPrefix(SemaCCInput.ParseInput.Contents, 1327 SemaCCInput.Offset); 1328 populateContextWords(SemaCCInput.ParseInput.Contents); 1329 if (Opts.Index && SpecFuzzyFind && SpecFuzzyFind->CachedReq.hasValue()) { 1330 assert(!SpecFuzzyFind->Result.valid()); 1331 SpecReq = speculativeFuzzyFindRequestForCompletion( 1332 *SpecFuzzyFind->CachedReq, HeuristicPrefix); 1333 SpecFuzzyFind->Result = startAsyncFuzzyFind(*Opts.Index, *SpecReq); 1334 } 1335 1336 // We run Sema code completion first. It builds an AST and calculates: 1337 // - completion results based on the AST. 1338 // - partial identifier and context. We need these for the index query. 1339 CodeCompleteResult Output; 1340 auto RecorderOwner = std::make_unique<CompletionRecorder>(Opts, [&]() { 1341 assert(Recorder && "Recorder is not set"); 1342 CCContextKind = Recorder->CCContext.getKind(); 1343 IsUsingDeclaration = Recorder->CCContext.isUsingDeclaration(); 1344 auto Style = getFormatStyleForFile(SemaCCInput.FileName, 1345 SemaCCInput.ParseInput.Contents, 1346 *SemaCCInput.ParseInput.TFS); 1347 const auto NextToken = Lexer::findNextToken( 1348 Recorder->CCSema->getPreprocessor().getCodeCompletionLoc(), 1349 Recorder->CCSema->getSourceManager(), Recorder->CCSema->LangOpts); 1350 if (NextToken) 1351 NextTokenKind = NextToken->getKind(); 1352 // If preprocessor was run, inclusions from preprocessor callback should 1353 // already be added to Includes. 1354 Inserter.emplace( 1355 SemaCCInput.FileName, SemaCCInput.ParseInput.Contents, Style, 1356 SemaCCInput.ParseInput.CompileCommand.Directory, 1357 &Recorder->CCSema->getPreprocessor().getHeaderSearchInfo()); 1358 for (const auto &Inc : Includes.MainFileIncludes) 1359 Inserter->addExisting(Inc); 1360 1361 // Most of the cost of file proximity is in initializing the FileDistance 1362 // structures based on the observed includes, once per query. Conceptually 1363 // that happens here (though the per-URI-scheme initialization is lazy). 1364 // The per-result proximity scoring is (amortized) very cheap. 1365 FileDistanceOptions ProxOpts{}; // Use defaults. 1366 const auto &SM = Recorder->CCSema->getSourceManager(); 1367 llvm::StringMap<SourceParams> ProxSources; 1368 for (auto &Entry : Includes.includeDepth( 1369 SM.getFileEntryForID(SM.getMainFileID())->getName())) { 1370 auto &Source = ProxSources[Entry.getKey()]; 1371 Source.Cost = Entry.getValue() * ProxOpts.IncludeCost; 1372 // Symbols near our transitive includes are good, but only consider 1373 // things in the same directory or below it. Otherwise there can be 1374 // many false positives. 1375 if (Entry.getValue() > 0) 1376 Source.MaxUpTraversals = 1; 1377 } 1378 FileProximity.emplace(ProxSources, ProxOpts); 1379 1380 Output = runWithSema(); 1381 Inserter.reset(); // Make sure this doesn't out-live Clang. 1382 SPAN_ATTACH(Tracer, "sema_completion_kind", 1383 getCompletionKindString(CCContextKind)); 1384 log("Code complete: sema context {0}, query scopes [{1}] (AnyScope={2}), " 1385 "expected type {3}{4}", 1386 getCompletionKindString(CCContextKind), 1387 llvm::join(QueryScopes.begin(), QueryScopes.end(), ","), AllScopes, 1388 PreferredType ? Recorder->CCContext.getPreferredType().getAsString() 1389 : "<none>", 1390 IsUsingDeclaration ? ", inside using declaration" : ""); 1391 }); 1392 1393 Recorder = RecorderOwner.get(); 1394 1395 semaCodeComplete(std::move(RecorderOwner), Opts.getClangCompleteOpts(), 1396 SemaCCInput, &Includes); 1397 logResults(Output, Tracer); 1398 return Output; 1399 } 1400 1401 void logResults(const CodeCompleteResult &Output, const trace::Span &Tracer) { 1402 SPAN_ATTACH(Tracer, "sema_results", NSema); 1403 SPAN_ATTACH(Tracer, "index_results", NIndex); 1404 SPAN_ATTACH(Tracer, "merged_results", NSemaAndIndex); 1405 SPAN_ATTACH(Tracer, "identifier_results", NIdent); 1406 SPAN_ATTACH(Tracer, "returned_results", int64_t(Output.Completions.size())); 1407 SPAN_ATTACH(Tracer, "incomplete", Output.HasMore); 1408 log("Code complete: {0} results from Sema, {1} from Index, " 1409 "{2} matched, {3} from identifiers, {4} returned{5}.", 1410 NSema, NIndex, NSemaAndIndex, NIdent, Output.Completions.size(), 1411 Output.HasMore ? " (incomplete)" : ""); 1412 assert(!Opts.Limit || Output.Completions.size() <= Opts.Limit); 1413 // We don't assert that isIncomplete means we hit a limit. 1414 // Indexes may choose to impose their own limits even if we don't have one. 1415 } 1416 1417 CodeCompleteResult runWithoutSema(llvm::StringRef Content, size_t Offset, 1418 const ThreadsafeFS &TFS) && { 1419 trace::Span Tracer("CodeCompleteWithoutSema"); 1420 // Fill in fields normally set by runWithSema() 1421 HeuristicPrefix = guessCompletionPrefix(Content, Offset); 1422 populateContextWords(Content); 1423 CCContextKind = CodeCompletionContext::CCC_Recovery; 1424 IsUsingDeclaration = false; 1425 Filter = FuzzyMatcher(HeuristicPrefix.Name); 1426 auto Pos = offsetToPosition(Content, Offset); 1427 ReplacedRange.start = ReplacedRange.end = Pos; 1428 ReplacedRange.start.character -= HeuristicPrefix.Name.size(); 1429 1430 llvm::StringMap<SourceParams> ProxSources; 1431 ProxSources[FileName].Cost = 0; 1432 FileProximity.emplace(ProxSources); 1433 1434 auto Style = getFormatStyleForFile(FileName, Content, TFS); 1435 // This will only insert verbatim headers. 1436 Inserter.emplace(FileName, Content, Style, 1437 /*BuildDir=*/"", /*HeaderSearchInfo=*/nullptr); 1438 1439 auto Identifiers = collectIdentifiers(Content, Style); 1440 std::vector<RawIdentifier> IdentifierResults; 1441 for (const auto &IDAndCount : Identifiers) { 1442 RawIdentifier ID; 1443 ID.Name = IDAndCount.first(); 1444 ID.References = IDAndCount.second; 1445 // Avoid treating typed filter as an identifier. 1446 if (ID.Name == HeuristicPrefix.Name) 1447 --ID.References; 1448 if (ID.References > 0) 1449 IdentifierResults.push_back(std::move(ID)); 1450 } 1451 1452 // Simplified version of getQueryScopes(): 1453 // - accessible scopes are determined heuristically. 1454 // - all-scopes query if no qualifier was typed (and it's allowed). 1455 SpecifiedScope Scopes; 1456 Scopes.AccessibleScopes = visibleNamespaces( 1457 Content.take_front(Offset), format::getFormattingLangOpts(Style)); 1458 for (std::string &S : Scopes.AccessibleScopes) 1459 if (!S.empty()) 1460 S.append("::"); // visibleNamespaces doesn't include trailing ::. 1461 if (HeuristicPrefix.Qualifier.empty()) 1462 AllScopes = Opts.AllScopes; 1463 else if (HeuristicPrefix.Qualifier.startswith("::")) { 1464 Scopes.AccessibleScopes = {""}; 1465 Scopes.UnresolvedQualifier = 1466 std::string(HeuristicPrefix.Qualifier.drop_front(2)); 1467 } else 1468 Scopes.UnresolvedQualifier = std::string(HeuristicPrefix.Qualifier); 1469 // First scope is the (modified) enclosing scope. 1470 QueryScopes = Scopes.scopesForIndexQuery(); 1471 ScopeProximity.emplace(QueryScopes); 1472 1473 SymbolSlab IndexResults = Opts.Index ? queryIndex() : SymbolSlab(); 1474 1475 CodeCompleteResult Output = toCodeCompleteResult(mergeResults( 1476 /*SemaResults=*/{}, IndexResults, IdentifierResults)); 1477 Output.RanParser = false; 1478 logResults(Output, Tracer); 1479 return Output; 1480 } 1481 1482 private: 1483 void populateContextWords(llvm::StringRef Content) { 1484 // Take last 3 lines before the completion point. 1485 unsigned RangeEnd = HeuristicPrefix.Qualifier.begin() - Content.data(), 1486 RangeBegin = RangeEnd; 1487 for (size_t I = 0; I < 3 && RangeBegin > 0; ++I) { 1488 auto PrevNL = Content.rfind('\n', RangeBegin); 1489 if (PrevNL == StringRef::npos) { 1490 RangeBegin = 0; 1491 break; 1492 } 1493 RangeBegin = PrevNL; 1494 } 1495 1496 ContextWords = collectWords(Content.slice(RangeBegin, RangeEnd)); 1497 dlog("Completion context words: {0}", 1498 llvm::join(ContextWords.keys(), ", ")); 1499 } 1500 1501 // This is called by run() once Sema code completion is done, but before the 1502 // Sema data structures are torn down. It does all the real work. 1503 CodeCompleteResult runWithSema() { 1504 const auto &CodeCompletionRange = CharSourceRange::getCharRange( 1505 Recorder->CCSema->getPreprocessor().getCodeCompletionTokenRange()); 1506 // When we are getting completions with an empty identifier, for example 1507 // std::vector<int> asdf; 1508 // asdf.^; 1509 // Then the range will be invalid and we will be doing insertion, use 1510 // current cursor position in such cases as range. 1511 if (CodeCompletionRange.isValid()) { 1512 ReplacedRange = halfOpenToRange(Recorder->CCSema->getSourceManager(), 1513 CodeCompletionRange); 1514 } else { 1515 const auto &Pos = sourceLocToPosition( 1516 Recorder->CCSema->getSourceManager(), 1517 Recorder->CCSema->getPreprocessor().getCodeCompletionLoc()); 1518 ReplacedRange.start = ReplacedRange.end = Pos; 1519 } 1520 Filter = FuzzyMatcher( 1521 Recorder->CCSema->getPreprocessor().getCodeCompletionFilter()); 1522 std::tie(QueryScopes, AllScopes) = getQueryScopes( 1523 Recorder->CCContext, *Recorder->CCSema, HeuristicPrefix, Opts); 1524 if (!QueryScopes.empty()) 1525 ScopeProximity.emplace(QueryScopes); 1526 PreferredType = 1527 OpaqueType::fromType(Recorder->CCSema->getASTContext(), 1528 Recorder->CCContext.getPreferredType()); 1529 // Sema provides the needed context to query the index. 1530 // FIXME: in addition to querying for extra/overlapping symbols, we should 1531 // explicitly request symbols corresponding to Sema results. 1532 // We can use their signals even if the index can't suggest them. 1533 // We must copy index results to preserve them, but there are at most Limit. 1534 auto IndexResults = (Opts.Index && allowIndex(Recorder->CCContext)) 1535 ? queryIndex() 1536 : SymbolSlab(); 1537 trace::Span Tracer("Populate CodeCompleteResult"); 1538 // Merge Sema and Index results, score them, and pick the winners. 1539 auto Top = 1540 mergeResults(Recorder->Results, IndexResults, /*Identifiers*/ {}); 1541 return toCodeCompleteResult(Top); 1542 } 1543 1544 CodeCompleteResult 1545 toCodeCompleteResult(const std::vector<ScoredBundle> &Scored) { 1546 CodeCompleteResult Output; 1547 1548 // Convert the results to final form, assembling the expensive strings. 1549 for (auto &C : Scored) { 1550 Output.Completions.push_back(toCodeCompletion(C.first)); 1551 Output.Completions.back().Score = C.second; 1552 Output.Completions.back().CompletionTokenRange = ReplacedRange; 1553 } 1554 Output.HasMore = Incomplete; 1555 Output.Context = CCContextKind; 1556 Output.CompletionRange = ReplacedRange; 1557 return Output; 1558 } 1559 1560 SymbolSlab queryIndex() { 1561 trace::Span Tracer("Query index"); 1562 SPAN_ATTACH(Tracer, "limit", int64_t(Opts.Limit)); 1563 1564 // Build the query. 1565 FuzzyFindRequest Req; 1566 if (Opts.Limit) 1567 Req.Limit = Opts.Limit; 1568 Req.Query = std::string(Filter->pattern()); 1569 Req.RestrictForCodeCompletion = true; 1570 Req.Scopes = QueryScopes; 1571 Req.AnyScope = AllScopes; 1572 // FIXME: we should send multiple weighted paths here. 1573 Req.ProximityPaths.push_back(std::string(FileName)); 1574 if (PreferredType) 1575 Req.PreferredTypes.push_back(std::string(PreferredType->raw())); 1576 vlog("Code complete: fuzzyFind({0:2})", toJSON(Req)); 1577 1578 if (SpecFuzzyFind) 1579 SpecFuzzyFind->NewReq = Req; 1580 if (SpecFuzzyFind && SpecFuzzyFind->Result.valid() && (*SpecReq == Req)) { 1581 vlog("Code complete: speculative fuzzy request matches the actual index " 1582 "request. Waiting for the speculative index results."); 1583 SPAN_ATTACH(Tracer, "Speculative results", true); 1584 1585 trace::Span WaitSpec("Wait speculative results"); 1586 return SpecFuzzyFind->Result.get(); 1587 } 1588 1589 SPAN_ATTACH(Tracer, "Speculative results", false); 1590 1591 // Run the query against the index. 1592 SymbolSlab::Builder ResultsBuilder; 1593 if (Opts.Index->fuzzyFind( 1594 Req, [&](const Symbol &Sym) { ResultsBuilder.insert(Sym); })) 1595 Incomplete = true; 1596 return std::move(ResultsBuilder).build(); 1597 } 1598 1599 // Merges Sema and Index results where possible, to form CompletionCandidates. 1600 // \p Identifiers is raw identifiers that can also be completion candidates. 1601 // Identifiers are not merged with results from index or sema. 1602 // Groups overloads if desired, to form CompletionCandidate::Bundles. The 1603 // bundles are scored and top results are returned, best to worst. 1604 std::vector<ScoredBundle> 1605 mergeResults(const std::vector<CodeCompletionResult> &SemaResults, 1606 const SymbolSlab &IndexResults, 1607 const std::vector<RawIdentifier> &IdentifierResults) { 1608 trace::Span Tracer("Merge and score results"); 1609 std::vector<CompletionCandidate::Bundle> Bundles; 1610 llvm::DenseMap<size_t, size_t> BundleLookup; 1611 auto AddToBundles = [&](const CodeCompletionResult *SemaResult, 1612 const Symbol *IndexResult, 1613 const RawIdentifier *IdentifierResult) { 1614 CompletionCandidate C; 1615 C.SemaResult = SemaResult; 1616 C.IndexResult = IndexResult; 1617 C.IdentifierResult = IdentifierResult; 1618 if (C.IndexResult) { 1619 C.Name = IndexResult->Name; 1620 C.RankedIncludeHeaders = getRankedIncludes(*C.IndexResult); 1621 } else if (C.SemaResult) { 1622 C.Name = Recorder->getName(*SemaResult); 1623 } else { 1624 assert(IdentifierResult); 1625 C.Name = IdentifierResult->Name; 1626 } 1627 if (auto OverloadSet = C.overloadSet( 1628 Opts, FileName, Inserter ? Inserter.getPointer() : nullptr)) { 1629 auto Ret = BundleLookup.try_emplace(OverloadSet, Bundles.size()); 1630 if (Ret.second) 1631 Bundles.emplace_back(); 1632 Bundles[Ret.first->second].push_back(std::move(C)); 1633 } else { 1634 Bundles.emplace_back(); 1635 Bundles.back().push_back(std::move(C)); 1636 } 1637 }; 1638 llvm::DenseSet<const Symbol *> UsedIndexResults; 1639 auto CorrespondingIndexResult = 1640 [&](const CodeCompletionResult &SemaResult) -> const Symbol * { 1641 if (auto SymID = 1642 getSymbolID(SemaResult, Recorder->CCSema->getSourceManager())) { 1643 auto I = IndexResults.find(SymID); 1644 if (I != IndexResults.end()) { 1645 UsedIndexResults.insert(&*I); 1646 return &*I; 1647 } 1648 } 1649 return nullptr; 1650 }; 1651 // Emit all Sema results, merging them with Index results if possible. 1652 for (auto &SemaResult : SemaResults) 1653 AddToBundles(&SemaResult, CorrespondingIndexResult(SemaResult), nullptr); 1654 // Now emit any Index-only results. 1655 for (const auto &IndexResult : IndexResults) { 1656 if (UsedIndexResults.count(&IndexResult)) 1657 continue; 1658 AddToBundles(/*SemaResult=*/nullptr, &IndexResult, nullptr); 1659 } 1660 // Emit identifier results. 1661 for (const auto &Ident : IdentifierResults) 1662 AddToBundles(/*SemaResult=*/nullptr, /*IndexResult=*/nullptr, &Ident); 1663 // We only keep the best N results at any time, in "native" format. 1664 TopN<ScoredBundle, ScoredBundleGreater> Top( 1665 Opts.Limit == 0 ? std::numeric_limits<size_t>::max() : Opts.Limit); 1666 for (auto &Bundle : Bundles) 1667 addCandidate(Top, std::move(Bundle)); 1668 return std::move(Top).items(); 1669 } 1670 1671 llvm::Optional<float> fuzzyScore(const CompletionCandidate &C) { 1672 // Macros can be very spammy, so we only support prefix completion. 1673 if (((C.SemaResult && 1674 C.SemaResult->Kind == CodeCompletionResult::RK_Macro) || 1675 (C.IndexResult && 1676 C.IndexResult->SymInfo.Kind == index::SymbolKind::Macro)) && 1677 !C.Name.startswith_lower(Filter->pattern())) 1678 return None; 1679 return Filter->match(C.Name); 1680 } 1681 1682 CodeCompletion::Scores 1683 evaluateCompletion(const SymbolQualitySignals &Quality, 1684 const SymbolRelevanceSignals &Relevance) { 1685 using RM = CodeCompleteOptions::CodeCompletionRankingModel; 1686 CodeCompletion::Scores Scores; 1687 switch (Opts.RankingModel) { 1688 case RM::Heuristics: 1689 Scores.Quality = Quality.evaluateHeuristics(); 1690 Scores.Relevance = Relevance.evaluateHeuristics(); 1691 Scores.Total = 1692 evaluateSymbolAndRelevance(Scores.Quality, Scores.Relevance); 1693 // NameMatch is in fact a multiplier on total score, so rescoring is 1694 // sound. 1695 Scores.ExcludingName = 1696 Relevance.NameMatch > std::numeric_limits<float>::epsilon() 1697 ? Scores.Total / Relevance.NameMatch 1698 : Scores.Quality; 1699 return Scores; 1700 1701 case RM::DecisionForest: 1702 DecisionForestScores DFScores = Opts.DecisionForestScorer( 1703 Quality, Relevance, Opts.DecisionForestBase); 1704 Scores.ExcludingName = DFScores.ExcludingName; 1705 Scores.Total = DFScores.Total; 1706 return Scores; 1707 } 1708 llvm_unreachable("Unhandled CodeCompletion ranking model."); 1709 } 1710 1711 // Scores a candidate and adds it to the TopN structure. 1712 void addCandidate(TopN<ScoredBundle, ScoredBundleGreater> &Candidates, 1713 CompletionCandidate::Bundle Bundle) { 1714 SymbolQualitySignals Quality; 1715 SymbolRelevanceSignals Relevance; 1716 Relevance.Context = CCContextKind; 1717 Relevance.Name = Bundle.front().Name; 1718 Relevance.FilterLength = HeuristicPrefix.Name.size(); 1719 Relevance.Query = SymbolRelevanceSignals::CodeComplete; 1720 Relevance.FileProximityMatch = FileProximity.getPointer(); 1721 if (ScopeProximity) 1722 Relevance.ScopeProximityMatch = ScopeProximity.getPointer(); 1723 if (PreferredType) 1724 Relevance.HadContextType = true; 1725 Relevance.ContextWords = &ContextWords; 1726 Relevance.MainFileSignals = Opts.MainFileSignals; 1727 1728 auto &First = Bundle.front(); 1729 if (auto FuzzyScore = fuzzyScore(First)) 1730 Relevance.NameMatch = *FuzzyScore; 1731 else 1732 return; 1733 SymbolOrigin Origin = SymbolOrigin::Unknown; 1734 bool FromIndex = false; 1735 for (const auto &Candidate : Bundle) { 1736 if (Candidate.IndexResult) { 1737 Quality.merge(*Candidate.IndexResult); 1738 Relevance.merge(*Candidate.IndexResult); 1739 Origin |= Candidate.IndexResult->Origin; 1740 FromIndex = true; 1741 if (!Candidate.IndexResult->Type.empty()) 1742 Relevance.HadSymbolType |= true; 1743 if (PreferredType && 1744 PreferredType->raw() == Candidate.IndexResult->Type) { 1745 Relevance.TypeMatchesPreferred = true; 1746 } 1747 } 1748 if (Candidate.SemaResult) { 1749 Quality.merge(*Candidate.SemaResult); 1750 Relevance.merge(*Candidate.SemaResult); 1751 if (PreferredType) { 1752 if (auto CompletionType = OpaqueType::fromCompletionResult( 1753 Recorder->CCSema->getASTContext(), *Candidate.SemaResult)) { 1754 Relevance.HadSymbolType |= true; 1755 if (PreferredType == CompletionType) 1756 Relevance.TypeMatchesPreferred = true; 1757 } 1758 } 1759 Origin |= SymbolOrigin::AST; 1760 } 1761 if (Candidate.IdentifierResult) { 1762 Quality.References = Candidate.IdentifierResult->References; 1763 Relevance.Scope = SymbolRelevanceSignals::FileScope; 1764 Origin |= SymbolOrigin::Identifier; 1765 } 1766 } 1767 1768 CodeCompletion::Scores Scores = evaluateCompletion(Quality, Relevance); 1769 if (Opts.RecordCCResult) 1770 Opts.RecordCCResult(toCodeCompletion(Bundle), Quality, Relevance, 1771 Scores.Total); 1772 1773 dlog("CodeComplete: {0} ({1}) = {2}\n{3}{4}\n", First.Name, 1774 llvm::to_string(Origin), Scores.Total, llvm::to_string(Quality), 1775 llvm::to_string(Relevance)); 1776 1777 NSema += bool(Origin & SymbolOrigin::AST); 1778 NIndex += FromIndex; 1779 NSemaAndIndex += bool(Origin & SymbolOrigin::AST) && FromIndex; 1780 NIdent += bool(Origin & SymbolOrigin::Identifier); 1781 if (Candidates.push({std::move(Bundle), Scores})) 1782 Incomplete = true; 1783 } 1784 1785 CodeCompletion toCodeCompletion(const CompletionCandidate::Bundle &Bundle) { 1786 llvm::Optional<CodeCompletionBuilder> Builder; 1787 for (const auto &Item : Bundle) { 1788 CodeCompletionString *SemaCCS = 1789 Item.SemaResult ? Recorder->codeCompletionString(*Item.SemaResult) 1790 : nullptr; 1791 if (!Builder) 1792 Builder.emplace(Recorder ? &Recorder->CCSema->getASTContext() : nullptr, 1793 Item, SemaCCS, QueryScopes, *Inserter, FileName, 1794 CCContextKind, Opts, IsUsingDeclaration, NextTokenKind); 1795 else 1796 Builder->add(Item, SemaCCS); 1797 } 1798 return Builder->build(); 1799 } 1800 }; 1801 1802 } // namespace 1803 1804 clang::CodeCompleteOptions CodeCompleteOptions::getClangCompleteOpts() const { 1805 clang::CodeCompleteOptions Result; 1806 Result.IncludeCodePatterns = EnableSnippets; 1807 Result.IncludeMacros = true; 1808 Result.IncludeGlobals = true; 1809 // We choose to include full comments and not do doxygen parsing in 1810 // completion. 1811 // FIXME: ideally, we should support doxygen in some form, e.g. do markdown 1812 // formatting of the comments. 1813 Result.IncludeBriefComments = false; 1814 1815 // When an is used, Sema is responsible for completing the main file, 1816 // the index can provide results from the preamble. 1817 // Tell Sema not to deserialize the preamble to look for results. 1818 Result.LoadExternal = !Index; 1819 Result.IncludeFixIts = IncludeFixIts; 1820 1821 return Result; 1822 } 1823 1824 CompletionPrefix guessCompletionPrefix(llvm::StringRef Content, 1825 unsigned Offset) { 1826 assert(Offset <= Content.size()); 1827 StringRef Rest = Content.take_front(Offset); 1828 CompletionPrefix Result; 1829 1830 // Consume the unqualified name. We only handle ASCII characters. 1831 // isIdentifierBody will let us match "0invalid", but we don't mind. 1832 while (!Rest.empty() && isIdentifierBody(Rest.back())) 1833 Rest = Rest.drop_back(); 1834 Result.Name = Content.slice(Rest.size(), Offset); 1835 1836 // Consume qualifiers. 1837 while (Rest.consume_back("::") && !Rest.endswith(":")) // reject :::: 1838 while (!Rest.empty() && isIdentifierBody(Rest.back())) 1839 Rest = Rest.drop_back(); 1840 Result.Qualifier = 1841 Content.slice(Rest.size(), Result.Name.begin() - Content.begin()); 1842 1843 return Result; 1844 } 1845 1846 CodeCompleteResult codeComplete(PathRef FileName, Position Pos, 1847 const PreambleData *Preamble, 1848 const ParseInputs &ParseInput, 1849 CodeCompleteOptions Opts, 1850 SpeculativeFuzzyFind *SpecFuzzyFind) { 1851 auto Offset = positionToOffset(ParseInput.Contents, Pos); 1852 if (!Offset) { 1853 elog("Code completion position was invalid {0}", Offset.takeError()); 1854 return CodeCompleteResult(); 1855 } 1856 auto Flow = CodeCompleteFlow( 1857 FileName, Preamble ? Preamble->Includes : IncludeStructure(), 1858 SpecFuzzyFind, Opts); 1859 return (!Preamble || Opts.RunParser == CodeCompleteOptions::NeverParse) 1860 ? std::move(Flow).runWithoutSema(ParseInput.Contents, *Offset, 1861 *ParseInput.TFS) 1862 : std::move(Flow).run({FileName, *Offset, *Preamble, 1863 // We want to serve code completions with 1864 // low latency, so don't bother patching. 1865 /*PreamblePatch=*/llvm::None, ParseInput}); 1866 } 1867 1868 SignatureHelp signatureHelp(PathRef FileName, Position Pos, 1869 const PreambleData &Preamble, 1870 const ParseInputs &ParseInput) { 1871 auto Offset = positionToOffset(ParseInput.Contents, Pos); 1872 if (!Offset) { 1873 elog("Signature help position was invalid {0}", Offset.takeError()); 1874 return SignatureHelp(); 1875 } 1876 SignatureHelp Result; 1877 clang::CodeCompleteOptions Options; 1878 Options.IncludeGlobals = false; 1879 Options.IncludeMacros = false; 1880 Options.IncludeCodePatterns = false; 1881 Options.IncludeBriefComments = false; 1882 semaCodeComplete( 1883 std::make_unique<SignatureHelpCollector>(Options, ParseInput.Index, 1884 Result), 1885 Options, 1886 {FileName, *Offset, Preamble, 1887 PreamblePatch::create(FileName, ParseInput, Preamble), ParseInput}); 1888 return Result; 1889 } 1890 1891 bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx) { 1892 auto InTopLevelScope = [](const NamedDecl &ND) { 1893 switch (ND.getDeclContext()->getDeclKind()) { 1894 case Decl::TranslationUnit: 1895 case Decl::Namespace: 1896 case Decl::LinkageSpec: 1897 return true; 1898 default: 1899 break; 1900 }; 1901 return false; 1902 }; 1903 // We only complete symbol's name, which is the same as the name of the 1904 // *primary* template in case of template specializations. 1905 if (isExplicitTemplateSpecialization(&ND)) 1906 return false; 1907 1908 if (InTopLevelScope(ND)) 1909 return true; 1910 1911 if (const auto *EnumDecl = dyn_cast<clang::EnumDecl>(ND.getDeclContext())) 1912 return InTopLevelScope(*EnumDecl) && !EnumDecl->isScoped(); 1913 1914 return false; 1915 } 1916 1917 // FIXME: find a home for this (that can depend on both markup and Protocol). 1918 static MarkupContent renderDoc(const markup::Document &Doc, MarkupKind Kind) { 1919 MarkupContent Result; 1920 Result.kind = Kind; 1921 switch (Kind) { 1922 case MarkupKind::PlainText: 1923 Result.value.append(Doc.asPlainText()); 1924 break; 1925 case MarkupKind::Markdown: 1926 Result.value.append(Doc.asMarkdown()); 1927 break; 1928 } 1929 return Result; 1930 } 1931 1932 CompletionItem CodeCompletion::render(const CodeCompleteOptions &Opts) const { 1933 CompletionItem LSP; 1934 const auto *InsertInclude = Includes.empty() ? nullptr : &Includes[0]; 1935 LSP.label = ((InsertInclude && InsertInclude->Insertion) 1936 ? Opts.IncludeIndicator.Insert 1937 : Opts.IncludeIndicator.NoInsert) + 1938 (Opts.ShowOrigins ? "[" + llvm::to_string(Origin) + "]" : "") + 1939 RequiredQualifier + Name + Signature; 1940 1941 LSP.kind = Kind; 1942 LSP.detail = BundleSize > 1 1943 ? std::string(llvm::formatv("[{0} overloads]", BundleSize)) 1944 : ReturnType; 1945 LSP.deprecated = Deprecated; 1946 // Combine header information and documentation in LSP `documentation` field. 1947 // This is not quite right semantically, but tends to display well in editors. 1948 if (InsertInclude || Documentation) { 1949 markup::Document Doc; 1950 if (InsertInclude) 1951 Doc.addParagraph().appendText("From ").appendCode(InsertInclude->Header); 1952 if (Documentation) 1953 Doc.append(*Documentation); 1954 LSP.documentation = renderDoc(Doc, Opts.DocumentationFormat); 1955 } 1956 LSP.sortText = sortText(Score.Total, Name); 1957 LSP.filterText = Name; 1958 LSP.textEdit = {CompletionTokenRange, RequiredQualifier + Name}; 1959 // Merge continuous additionalTextEdits into main edit. The main motivation 1960 // behind this is to help LSP clients, it seems most of them are confused when 1961 // they are provided with additionalTextEdits that are consecutive to main 1962 // edit. 1963 // Note that we store additional text edits from back to front in a line. That 1964 // is mainly to help LSP clients again, so that changes do not effect each 1965 // other. 1966 for (const auto &FixIt : FixIts) { 1967 if (FixIt.range.end == LSP.textEdit->range.start) { 1968 LSP.textEdit->newText = FixIt.newText + LSP.textEdit->newText; 1969 LSP.textEdit->range.start = FixIt.range.start; 1970 } else { 1971 LSP.additionalTextEdits.push_back(FixIt); 1972 } 1973 } 1974 if (Opts.EnableSnippets) 1975 LSP.textEdit->newText += SnippetSuffix; 1976 1977 // FIXME(kadircet): Do not even fill insertText after making sure textEdit is 1978 // compatible with most of the editors. 1979 LSP.insertText = LSP.textEdit->newText; 1980 LSP.insertTextFormat = Opts.EnableSnippets ? InsertTextFormat::Snippet 1981 : InsertTextFormat::PlainText; 1982 if (InsertInclude && InsertInclude->Insertion) 1983 LSP.additionalTextEdits.push_back(*InsertInclude->Insertion); 1984 1985 LSP.score = Score.ExcludingName; 1986 1987 return LSP; 1988 } 1989 1990 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const CodeCompletion &C) { 1991 // For now just lean on CompletionItem. 1992 return OS << C.render(CodeCompleteOptions()); 1993 } 1994 1995 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, 1996 const CodeCompleteResult &R) { 1997 OS << "CodeCompleteResult: " << R.Completions.size() << (R.HasMore ? "+" : "") 1998 << " (" << getCompletionKindString(R.Context) << ")" 1999 << " items:\n"; 2000 for (const auto &C : R.Completions) 2001 OS << C << "\n"; 2002 return OS; 2003 } 2004 2005 // Heuristically detect whether the `Line` is an unterminated include filename. 2006 bool isIncludeFile(llvm::StringRef Line) { 2007 Line = Line.ltrim(); 2008 if (!Line.consume_front("#")) 2009 return false; 2010 Line = Line.ltrim(); 2011 if (!(Line.consume_front("include_next") || Line.consume_front("include") || 2012 Line.consume_front("import"))) 2013 return false; 2014 Line = Line.ltrim(); 2015 if (Line.consume_front("<")) 2016 return Line.count('>') == 0; 2017 if (Line.consume_front("\"")) 2018 return Line.count('"') == 0; 2019 return false; 2020 } 2021 2022 bool allowImplicitCompletion(llvm::StringRef Content, unsigned Offset) { 2023 // Look at last line before completion point only. 2024 Content = Content.take_front(Offset); 2025 auto Pos = Content.rfind('\n'); 2026 if (Pos != llvm::StringRef::npos) 2027 Content = Content.substr(Pos + 1); 2028 2029 // Complete after scope operators. 2030 if (Content.endswith(".") || Content.endswith("->") || Content.endswith("::")) 2031 return true; 2032 // Complete after `#include <` and #include `<foo/`. 2033 if ((Content.endswith("<") || Content.endswith("\"") || 2034 Content.endswith("/")) && 2035 isIncludeFile(Content)) 2036 return true; 2037 2038 // Complete words. Give non-ascii characters the benefit of the doubt. 2039 return !Content.empty() && 2040 (isIdentifierBody(Content.back()) || !llvm::isASCII(Content.back())); 2041 } 2042 2043 } // namespace clangd 2044 } // namespace clang 2045