1 //===--- FileDistance.cpp - File contents container -------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The FileDistance structure allows calculating the minimum distance to paths 10 // in a single tree. 11 // We simply walk up the path's ancestors until we find a node whose cost is 12 // known, and add the cost of walking back down. Initialization ensures this 13 // gives the correct path to the roots. 14 // We cache the results, so that the runtime is O(|A|), where A is the set of 15 // all distinct ancestors of visited paths. 16 // 17 // Example after initialization with /=2, /bar=0, DownCost = 1: 18 // / = 2 19 // /bar = 0 20 // 21 // After querying /foo/bar and /bar/foo: 22 // / = 2 23 // /bar = 0 24 // /bar/foo = 1 25 // /foo = 3 26 // /foo/bar = 4 27 // 28 // URIDistance creates FileDistance lazily for each URI scheme encountered. In 29 // practice this is a small constant factor. 30 // 31 //===-------------------------------------------------------------------------// 32 33 #include "FileDistance.h" 34 #include "support/Logger.h" 35 #include "llvm/ADT/STLExtras.h" 36 #include <queue> 37 38 namespace clang { 39 namespace clangd { 40 41 // Convert a path into the canonical form. 42 // Canonical form is either "/", or "/segment" * N: 43 // C:\foo\bar --> /c:/foo/bar 44 // /foo/ --> /foo 45 // a/b/c --> /a/b/c 46 static llvm::SmallString<128> canonicalize(llvm::StringRef Path) { 47 llvm::SmallString<128> Result = Path.rtrim('/'); 48 native(Result, llvm::sys::path::Style::posix); 49 if (Result.empty() || Result.front() != '/') 50 Result.insert(Result.begin(), '/'); 51 return Result; 52 } 53 54 constexpr const unsigned FileDistance::Unreachable; 55 const llvm::hash_code FileDistance::RootHash = 56 llvm::hash_value(llvm::StringRef("/")); 57 58 FileDistance::FileDistance(llvm::StringMap<SourceParams> Sources, 59 const FileDistanceOptions &Opts) 60 : Opts(Opts) { 61 llvm::DenseMap<llvm::hash_code, llvm::SmallVector<llvm::hash_code, 4>> 62 DownEdges; 63 // Compute the best distance following only up edges. 64 // Keep track of down edges, in case we can use them to improve on this. 65 for (const auto &S : Sources) { 66 auto Canonical = canonicalize(S.getKey()); 67 dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost, 68 S.second.MaxUpTraversals); 69 // Walk up to ancestors of this source, assigning cost. 70 llvm::StringRef Rest = Canonical; 71 llvm::hash_code Hash = llvm::hash_value(Rest); 72 for (unsigned I = 0; !Rest.empty(); ++I) { 73 Rest = parent_path(Rest, llvm::sys::path::Style::posix); 74 auto NextHash = llvm::hash_value(Rest); 75 auto &Down = DownEdges[NextHash]; 76 if (!llvm::is_contained(Down, Hash)) 77 Down.push_back(Hash); 78 // We can't just break after MaxUpTraversals, must still set DownEdges. 79 if (I > S.getValue().MaxUpTraversals) { 80 if (Cache.find(Hash) != Cache.end()) 81 break; 82 } else { 83 unsigned Cost = S.getValue().Cost + I * Opts.UpCost; 84 auto R = Cache.try_emplace(Hash, Cost); 85 if (!R.second) { 86 if (Cost < R.first->second) { 87 R.first->second = Cost; 88 } else { 89 // If we're not the best way to get to this path, stop assigning. 90 break; 91 } 92 } 93 } 94 Hash = NextHash; 95 } 96 } 97 // Now propagate scores parent -> child if that's an improvement. 98 // BFS ensures we propagate down chains (must visit parents before children). 99 std::queue<llvm::hash_code> Next; 100 for (auto Child : DownEdges.lookup(llvm::hash_value(llvm::StringRef("")))) 101 Next.push(Child); 102 while (!Next.empty()) { 103 auto Parent = Next.front(); 104 Next.pop(); 105 auto ParentCost = Cache.lookup(Parent); 106 for (auto Child : DownEdges.lookup(Parent)) { 107 if (Parent != RootHash || Opts.AllowDownTraversalFromRoot) { 108 auto &ChildCost = 109 Cache.try_emplace(Child, Unreachable).first->getSecond(); 110 if (ParentCost + Opts.DownCost < ChildCost) 111 ChildCost = ParentCost + Opts.DownCost; 112 } 113 Next.push(Child); 114 } 115 } 116 } 117 118 unsigned FileDistance::distance(llvm::StringRef Path) { 119 auto Canonical = canonicalize(Path); 120 unsigned Cost = Unreachable; 121 llvm::SmallVector<llvm::hash_code, 16> Ancestors; 122 // Walk up ancestors until we find a path we know the distance for. 123 for (llvm::StringRef Rest = Canonical; !Rest.empty(); 124 Rest = parent_path(Rest, llvm::sys::path::Style::posix)) { 125 auto Hash = llvm::hash_value(Rest); 126 if (Hash == RootHash && !Ancestors.empty() && 127 !Opts.AllowDownTraversalFromRoot) { 128 Cost = Unreachable; 129 break; 130 } 131 auto It = Cache.find(Hash); 132 if (It != Cache.end()) { 133 Cost = It->second; 134 break; 135 } 136 Ancestors.push_back(Hash); 137 } 138 // Now we know the costs for (known node, queried node]. 139 // Fill these in, walking down the directory tree. 140 for (llvm::hash_code Hash : llvm::reverse(Ancestors)) { 141 if (Cost != Unreachable) 142 Cost += Opts.DownCost; 143 Cache.try_emplace(Hash, Cost); 144 } 145 dlog("distance({0} = {1})", Path, Cost); 146 return Cost; 147 } 148 149 unsigned URIDistance::distance(llvm::StringRef URI) { 150 auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::Unreachable); 151 if (!R.second) 152 return R.first->getSecond(); 153 if (auto U = clangd::URI::parse(URI)) { 154 dlog("distance({0} = {1})", URI, U->body()); 155 R.first->second = forScheme(U->scheme()).distance(U->body()); 156 } else { 157 log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError()); 158 } 159 return R.first->second; 160 } 161 162 FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) { 163 auto &Delegate = ByScheme[Scheme]; 164 if (!Delegate) { 165 llvm::StringMap<SourceParams> SchemeSources; 166 for (const auto &Source : Sources) { 167 if (auto U = clangd::URI::create(Source.getKey(), Scheme)) 168 SchemeSources.try_emplace(U->body(), Source.getValue()); 169 else 170 llvm::consumeError(U.takeError()); 171 } 172 dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme, 173 SchemeSources.size(), Sources.size()); 174 Delegate.reset(new FileDistance(std::move(SchemeSources), Opts)); 175 } 176 return *Delegate; 177 } 178 179 static std::pair<std::string, int> scopeToPath(llvm::StringRef Scope) { 180 llvm::SmallVector<llvm::StringRef, 4> Split; 181 Scope.split(Split, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); 182 return {"/" + llvm::join(Split, "/"), Split.size()}; 183 } 184 185 static FileDistance 186 createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes) { 187 FileDistanceOptions Opts; 188 Opts.UpCost = 2; 189 Opts.DownCost = 4; 190 Opts.AllowDownTraversalFromRoot = false; 191 192 llvm::StringMap<SourceParams> Sources; 193 llvm::StringRef Preferred = 194 QueryScopes.empty() ? "" : QueryScopes.front().c_str(); 195 for (llvm::StringRef S : QueryScopes) { 196 SourceParams Param; 197 // Penalize the global scope even it's preferred, as all projects can define 198 // symbols in it, and there is pattern where using-namespace is used in 199 // place of enclosing namespaces (e.g. in implementation files). 200 if (S == Preferred) 201 Param.Cost = S == "" ? 4 : 0; 202 else if (Preferred.startswith(S) && !S.empty()) 203 continue; // just rely on up-traversals. 204 else 205 Param.Cost = S == "" ? 6 : 2; 206 auto Path = scopeToPath(S); 207 // The global namespace is not 'near' its children. 208 Param.MaxUpTraversals = std::max(Path.second - 1, 0); 209 Sources[Path.first] = std::move(Param); 210 } 211 return FileDistance(std::move(Sources), Opts); 212 } 213 214 ScopeDistance::ScopeDistance(llvm::ArrayRef<std::string> QueryScopes) 215 : Distance(createScopeFileDistance(QueryScopes)) {} 216 217 unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) { 218 return Distance.distance(scopeToPath(SymbolScope).first); 219 } 220 221 } // namespace clangd 222 } // namespace clang 223