1 //===--- FileDistance.cpp - File contents container -------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The FileDistance structure allows calculating the minimum distance to paths 10 // in a single tree. 11 // We simply walk up the path's ancestors until we find a node whose cost is 12 // known, and add the cost of walking back down. Initialization ensures this 13 // gives the correct path to the roots. 14 // We cache the results, so that the runtime is O(|A|), where A is the set of 15 // all distinct ancestors of visited paths. 16 // 17 // Example after initialization with /=2, /bar=0, DownCost = 1: 18 // / = 2 19 // /bar = 0 20 // 21 // After querying /foo/bar and /bar/foo: 22 // / = 2 23 // /bar = 0 24 // /bar/foo = 1 25 // /foo = 3 26 // /foo/bar = 4 27 // 28 // URIDistance creates FileDistance lazily for each URI scheme encountered. In 29 // practice this is a small constant factor. 30 // 31 //===-------------------------------------------------------------------------// 32 33 #include "FileDistance.h" 34 #include "URI.h" 35 #include "support/Logger.h" 36 #include "llvm/ADT/STLExtras.h" 37 #include "llvm/Support/Path.h" 38 #include <queue> 39 40 namespace clang { 41 namespace clangd { 42 43 // Convert a path into the canonical form. 44 // Canonical form is either "/", or "/segment" * N: 45 // C:\foo\bar --> /c:/foo/bar 46 // /foo/ --> /foo 47 // a/b/c --> /a/b/c 48 static llvm::SmallString<128> canonicalize(llvm::StringRef Path) { 49 llvm::SmallString<128> Result = Path.rtrim('/'); 50 native(Result, llvm::sys::path::Style::posix); 51 if (Result.empty() || Result.front() != '/') 52 Result.insert(Result.begin(), '/'); 53 return Result; 54 } 55 56 constexpr const unsigned FileDistance::Unreachable; 57 const llvm::hash_code FileDistance::RootHash = 58 llvm::hash_value(llvm::StringRef("/")); 59 60 FileDistance::FileDistance(llvm::StringMap<SourceParams> Sources, 61 const FileDistanceOptions &Opts) 62 : Opts(Opts) { 63 llvm::DenseMap<llvm::hash_code, llvm::SmallVector<llvm::hash_code>> DownEdges; 64 // Compute the best distance following only up edges. 65 // Keep track of down edges, in case we can use them to improve on this. 66 for (const auto &S : Sources) { 67 auto Canonical = canonicalize(S.getKey()); 68 dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost, 69 S.second.MaxUpTraversals); 70 // Walk up to ancestors of this source, assigning cost. 71 llvm::StringRef Rest = Canonical; 72 llvm::hash_code Hash = llvm::hash_value(Rest); 73 for (unsigned I = 0; !Rest.empty(); ++I) { 74 Rest = parent_path(Rest, llvm::sys::path::Style::posix); 75 auto NextHash = llvm::hash_value(Rest); 76 auto &Down = DownEdges[NextHash]; 77 if (!llvm::is_contained(Down, Hash)) 78 Down.push_back(Hash); 79 // We can't just break after MaxUpTraversals, must still set DownEdges. 80 if (I > S.getValue().MaxUpTraversals) { 81 if (Cache.find(Hash) != Cache.end()) 82 break; 83 } else { 84 unsigned Cost = S.getValue().Cost + I * Opts.UpCost; 85 auto R = Cache.try_emplace(Hash, Cost); 86 if (!R.second) { 87 if (Cost < R.first->second) { 88 R.first->second = Cost; 89 } else { 90 // If we're not the best way to get to this path, stop assigning. 91 break; 92 } 93 } 94 } 95 Hash = NextHash; 96 } 97 } 98 // Now propagate scores parent -> child if that's an improvement. 99 // BFS ensures we propagate down chains (must visit parents before children). 100 std::queue<llvm::hash_code> Next; 101 for (auto Child : DownEdges.lookup(llvm::hash_value(llvm::StringRef("")))) 102 Next.push(Child); 103 while (!Next.empty()) { 104 auto Parent = Next.front(); 105 Next.pop(); 106 auto ParentCost = Cache.lookup(Parent); 107 for (auto Child : DownEdges.lookup(Parent)) { 108 if (Parent != RootHash || Opts.AllowDownTraversalFromRoot) { 109 auto &ChildCost = 110 Cache.try_emplace(Child, Unreachable).first->getSecond(); 111 if (ParentCost + Opts.DownCost < ChildCost) 112 ChildCost = ParentCost + Opts.DownCost; 113 } 114 Next.push(Child); 115 } 116 } 117 } 118 119 unsigned FileDistance::distance(llvm::StringRef Path) { 120 auto Canonical = canonicalize(Path); 121 unsigned Cost = Unreachable; 122 llvm::SmallVector<llvm::hash_code> Ancestors; 123 // Walk up ancestors until we find a path we know the distance for. 124 for (llvm::StringRef Rest = Canonical; !Rest.empty(); 125 Rest = parent_path(Rest, llvm::sys::path::Style::posix)) { 126 auto Hash = llvm::hash_value(Rest); 127 if (Hash == RootHash && !Ancestors.empty() && 128 !Opts.AllowDownTraversalFromRoot) { 129 Cost = Unreachable; 130 break; 131 } 132 auto It = Cache.find(Hash); 133 if (It != Cache.end()) { 134 Cost = It->second; 135 break; 136 } 137 Ancestors.push_back(Hash); 138 } 139 // Now we know the costs for (known node, queried node]. 140 // Fill these in, walking down the directory tree. 141 for (llvm::hash_code Hash : llvm::reverse(Ancestors)) { 142 if (Cost != Unreachable) 143 Cost += Opts.DownCost; 144 Cache.try_emplace(Hash, Cost); 145 } 146 dlog("distance({0} = {1})", Path, Cost); 147 return Cost; 148 } 149 150 unsigned URIDistance::distance(llvm::StringRef URI) { 151 auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::Unreachable); 152 if (!R.second) 153 return R.first->getSecond(); 154 if (auto U = clangd::URI::parse(URI)) { 155 dlog("distance({0} = {1})", URI, U->body()); 156 R.first->second = forScheme(U->scheme()).distance(U->body()); 157 } else { 158 log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError()); 159 } 160 return R.first->second; 161 } 162 163 FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) { 164 auto &Delegate = ByScheme[Scheme]; 165 if (!Delegate) { 166 llvm::StringMap<SourceParams> SchemeSources; 167 for (const auto &Source : Sources) { 168 if (auto U = clangd::URI::create(Source.getKey(), Scheme)) 169 SchemeSources.try_emplace(U->body(), Source.getValue()); 170 else 171 llvm::consumeError(U.takeError()); 172 } 173 dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme, 174 SchemeSources.size(), Sources.size()); 175 Delegate.reset(new FileDistance(std::move(SchemeSources), Opts)); 176 } 177 return *Delegate; 178 } 179 180 static std::pair<std::string, int> scopeToPath(llvm::StringRef Scope) { 181 llvm::SmallVector<llvm::StringRef> Split; 182 Scope.split(Split, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); 183 return {"/" + llvm::join(Split, "/"), Split.size()}; 184 } 185 186 static FileDistance 187 createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes) { 188 FileDistanceOptions Opts; 189 Opts.UpCost = 2; 190 Opts.DownCost = 4; 191 Opts.AllowDownTraversalFromRoot = false; 192 193 llvm::StringMap<SourceParams> Sources; 194 llvm::StringRef Preferred = 195 QueryScopes.empty() ? "" : QueryScopes.front().c_str(); 196 for (llvm::StringRef S : QueryScopes) { 197 SourceParams Param; 198 // Penalize the global scope even it's preferred, as all projects can define 199 // symbols in it, and there is pattern where using-namespace is used in 200 // place of enclosing namespaces (e.g. in implementation files). 201 if (S == Preferred) 202 Param.Cost = S == "" ? 4 : 0; 203 else if (Preferred.startswith(S) && !S.empty()) 204 continue; // just rely on up-traversals. 205 else 206 Param.Cost = S == "" ? 6 : 2; 207 auto Path = scopeToPath(S); 208 // The global namespace is not 'near' its children. 209 Param.MaxUpTraversals = std::max(Path.second - 1, 0); 210 Sources[Path.first] = std::move(Param); 211 } 212 return FileDistance(std::move(Sources), Opts); 213 } 214 215 ScopeDistance::ScopeDistance(llvm::ArrayRef<std::string> QueryScopes) 216 : Distance(createScopeFileDistance(QueryScopes)) {} 217 218 unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) { 219 return Distance.distance(scopeToPath(SymbolScope).first); 220 } 221 222 } // namespace clangd 223 } // namespace clang 224