1 //===--- Quality.h - Ranking alternatives for ambiguous queries --*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// Some operations such as code completion produce a set of candidates. 10 /// Usually the user can choose between them, but we should put the best options 11 /// at the top (they're easier to select, and more likely to be seen). 12 /// 13 /// This file defines building blocks for ranking candidates. 14 /// It's used by the features directly and also in the implementation of 15 /// indexes, as indexes also need to heuristically limit their results. 16 /// 17 /// The facilities here are: 18 /// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString 19 /// These are structured in a way that they can be debugged, and are fairly 20 /// consistent regardless of the source. 21 /// - compute scores from scoring signals. These are suitable for sorting. 22 /// - sorting utilities like the TopN container. 23 /// These could be split up further to isolate dependencies if we care. 24 /// 25 //===----------------------------------------------------------------------===// 26 27 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H 28 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H 29 30 #include "FileDistance.h" 31 #include "clang/Sema/CodeCompleteConsumer.h" 32 #include "llvm/ADT/StringRef.h" 33 #include "llvm/ADT/StringSet.h" 34 #include <algorithm> 35 #include <functional> 36 #include <vector> 37 38 namespace llvm { 39 class raw_ostream; 40 } // namespace llvm 41 42 namespace clang { 43 class CodeCompletionResult; 44 45 namespace clangd { 46 struct ASTSignals; 47 struct Symbol; 48 class URIDistance; 49 50 // Signals structs are designed to be aggregated from 0 or more sources. 51 // A default instance has neutral signals, and sources are merged into it. 52 // They can be dumped for debugging, and evaluate()d into a score. 53 54 /// Attributes of a symbol that affect how much we like it. 55 struct SymbolQualitySignals { 56 bool Deprecated = false; 57 bool ReservedName = false; // __foo, _Foo are usually implementation details. 58 // FIXME: make these findable once user types _. 59 bool ImplementationDetail = false; 60 unsigned References = 0; 61 62 enum SymbolCategory { 63 Unknown = 0, 64 Variable, 65 Macro, 66 Type, 67 Function, 68 Constructor, 69 Destructor, 70 Namespace, 71 Keyword, 72 Operator, 73 } Category = Unknown; 74 75 void merge(const CodeCompletionResult &SemaCCResult); 76 void merge(const Symbol &IndexResult); 77 78 // Condense these signals down to a single number, higher is better. 79 float evaluateHeuristics() const; 80 }; 81 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 82 const SymbolQualitySignals &); 83 84 /// Attributes of a symbol-query pair that affect how much we like it. 85 struct SymbolRelevanceSignals { 86 /// The name of the symbol (for ContextWords). Must be explicitly assigned. 87 llvm::StringRef Name; 88 /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. 89 float NameMatch = 1; 90 /// Lowercase words relevant to the context (e.g. near the completion point). 91 llvm::StringSet<>* ContextWords = nullptr; 92 bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). 93 /// Whether fixits needs to be applied for that completion or not. 94 bool NeedsFixIts = false; 95 bool InBaseClass = false; // A member from base class of the accessed class. 96 97 URIDistance *FileProximityMatch = nullptr; 98 /// These are used to calculate proximity between the index symbol and the 99 /// query. 100 llvm::StringRef SymbolURI; 101 /// FIXME: unify with index proximity score - signals should be 102 /// source-independent. 103 /// Proximity between best declaration and the query. [0-1], 1 is closest. 104 float SemaFileProximityScore = 0; 105 106 // Scope proximity is only considered (both index and sema) when this is set. 107 ScopeDistance *ScopeProximityMatch = nullptr; 108 llvm::Optional<llvm::StringRef> SymbolScope; 109 // A symbol from sema should be accessible from the current scope. 110 bool SemaSaysInScope = false; 111 112 // An approximate measure of where we expect the symbol to be used. 113 enum AccessibleScope { 114 FunctionScope, 115 ClassScope, 116 FileScope, 117 GlobalScope, 118 } Scope = GlobalScope; 119 120 enum QueryType { 121 CodeComplete, 122 Generic, 123 } Query = Generic; 124 125 CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other; 126 127 // Whether symbol is an instance member of a class. 128 bool IsInstanceMember = false; 129 130 // Whether clang provided a preferred type in the completion context. 131 bool HadContextType = false; 132 // Whether a source completion item or a symbol had a type information. 133 bool HadSymbolType = false; 134 // Whether the item matches the type expected in the completion context. 135 bool TypeMatchesPreferred = false; 136 137 /// Length of the unqualified partial name of Symbol typed in 138 /// CompletionPrefix. 139 unsigned FilterLength = 0; 140 141 const ASTSignals *MainFileSignals = nullptr; 142 /// Number of references to the candidate in the main file. 143 unsigned MainFileRefs = 0; 144 /// Number of unique symbols in the main file which belongs to candidate's 145 /// namespace. This indicates how relevant the namespace is in the current 146 /// file. 147 unsigned ScopeRefsInFile = 0; 148 149 /// Set of derived signals computed by calculateDerivedSignals(). Must not be 150 /// set explicitly. 151 struct DerivedSignals { 152 /// Whether Name contains some word from context. 153 bool NameMatchesContext = false; 154 /// Min distance between SymbolURI and all the headers included by the TU. 155 unsigned FileProximityDistance = FileDistance::Unreachable; 156 /// Min distance between SymbolScope and all the available scopes. 157 unsigned ScopeProximityDistance = FileDistance::Unreachable; 158 }; 159 160 DerivedSignals calculateDerivedSignals() const; 161 162 void merge(const CodeCompletionResult &SemaResult); 163 void merge(const Symbol &IndexResult); 164 void computeASTSignals(const CodeCompletionResult &SemaResult); 165 166 // Condense these signals down to a single number, higher is better. 167 float evaluateHeuristics() const; 168 }; 169 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 170 const SymbolRelevanceSignals &); 171 172 /// Combine symbol quality and relevance into a single score. 173 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance); 174 175 /// Same semantics as CodeComplete::Score. Quality score and Relevance score 176 /// have been removed since DecisionForest cannot assign individual scores to 177 /// Quality and Relevance signals. 178 struct DecisionForestScores { 179 float Total = 0.f; 180 float ExcludingName = 0.f; 181 }; 182 183 DecisionForestScores 184 evaluateDecisionForest(const SymbolQualitySignals &Quality, 185 const SymbolRelevanceSignals &Relevance, float Base); 186 187 /// TopN<T> is a lossy container that preserves only the "best" N elements. 188 template <typename T, typename Compare = std::greater<T>> class TopN { 189 public: 190 using value_type = T; 191 TopN(size_t N, Compare Greater = Compare()) N(N)192 : N(N), Greater(std::move(Greater)) {} 193 194 // Adds a candidate to the set. 195 // Returns true if a candidate was dropped to get back under N. push(value_type && V)196 bool push(value_type &&V) { 197 bool Dropped = false; 198 if (Heap.size() >= N) { 199 Dropped = true; 200 if (N > 0 && Greater(V, Heap.front())) { 201 std::pop_heap(Heap.begin(), Heap.end(), Greater); 202 Heap.back() = std::move(V); 203 std::push_heap(Heap.begin(), Heap.end(), Greater); 204 } 205 } else { 206 Heap.push_back(std::move(V)); 207 std::push_heap(Heap.begin(), Heap.end(), Greater); 208 } 209 assert(Heap.size() <= N); 210 assert(std::is_heap(Heap.begin(), Heap.end(), Greater)); 211 return Dropped; 212 } 213 214 // Returns candidates from best to worst. items()215 std::vector<value_type> items() && { 216 std::sort_heap(Heap.begin(), Heap.end(), Greater); 217 assert(Heap.size() <= N); 218 return std::move(Heap); 219 } 220 221 private: 222 const size_t N; 223 std::vector<value_type> Heap; // Min-heap, comparator is Greater. 224 Compare Greater; 225 }; 226 227 /// Returns a string that sorts in the same order as (-Score, Tiebreak), for 228 /// LSP. (The highest score compares smallest so it sorts at the top). 229 std::string sortText(float Score, llvm::StringRef Tiebreak = ""); 230 231 struct SignatureQualitySignals { 232 uint32_t NumberOfParameters = 0; 233 uint32_t NumberOfOptionalParameters = 0; 234 CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind = 235 CodeCompleteConsumer::OverloadCandidate::CandidateKind::CK_Function; 236 }; 237 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 238 const SignatureQualitySignals &); 239 240 } // namespace clangd 241 } // namespace clang 242 243 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H 244