1 //===--- FileDistance.cpp - File contents container -------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The FileDistance structure allows calculating the minimum distance to paths 10 // in a single tree. 11 // We simply walk up the path's ancestors until we find a node whose cost is 12 // known, and add the cost of walking back down. Initialization ensures this 13 // gives the correct path to the roots. 14 // We cache the results, so that the runtime is O(|A|), where A is the set of 15 // all distinct ancestors of visited paths. 16 // 17 // Example after initialization with /=2, /bar=0, DownCost = 1: 18 // / = 2 19 // /bar = 0 20 // 21 // After querying /foo/bar and /bar/foo: 22 // / = 2 23 // /bar = 0 24 // /bar/foo = 1 25 // /foo = 3 26 // /foo/bar = 4 27 // 28 // URIDistance creates FileDistance lazily for each URI scheme encountered. In 29 // practice this is a small constant factor. 30 // 31 //===-------------------------------------------------------------------------// 32 33 #include "FileDistance.h" 34 #include "support/Logger.h" 35 #include "llvm/ADT/STLExtras.h" 36 #include <queue> 37 38 namespace clang { 39 namespace clangd { 40 41 // Convert a path into the canonical form. 42 // Canonical form is either "/", or "/segment" * N: 43 // C:\foo\bar --> /c:/foo/bar 44 // /foo/ --> /foo 45 // a/b/c --> /a/b/c 46 static llvm::SmallString<128> canonicalize(llvm::StringRef Path) { 47 llvm::SmallString<128> Result = Path.rtrim('/'); 48 native(Result, llvm::sys::path::Style::posix); 49 if (Result.empty() || Result.front() != '/') 50 Result.insert(Result.begin(), '/'); 51 return Result; 52 } 53 54 constexpr const unsigned FileDistance::Unreachable; 55 const llvm::hash_code FileDistance::RootHash = 56 llvm::hash_value(llvm::StringRef("/")); 57 58 FileDistance::FileDistance(llvm::StringMap<SourceParams> Sources, 59 const FileDistanceOptions &Opts) 60 : Opts(Opts) { 61 llvm::DenseMap<llvm::hash_code, llvm::SmallVector<llvm::hash_code>> DownEdges; 62 // Compute the best distance following only up edges. 63 // Keep track of down edges, in case we can use them to improve on this. 64 for (const auto &S : Sources) { 65 auto Canonical = canonicalize(S.getKey()); 66 dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost, 67 S.second.MaxUpTraversals); 68 // Walk up to ancestors of this source, assigning cost. 69 llvm::StringRef Rest = Canonical; 70 llvm::hash_code Hash = llvm::hash_value(Rest); 71 for (unsigned I = 0; !Rest.empty(); ++I) { 72 Rest = parent_path(Rest, llvm::sys::path::Style::posix); 73 auto NextHash = llvm::hash_value(Rest); 74 auto &Down = DownEdges[NextHash]; 75 if (!llvm::is_contained(Down, Hash)) 76 Down.push_back(Hash); 77 // We can't just break after MaxUpTraversals, must still set DownEdges. 78 if (I > S.getValue().MaxUpTraversals) { 79 if (Cache.find(Hash) != Cache.end()) 80 break; 81 } else { 82 unsigned Cost = S.getValue().Cost + I * Opts.UpCost; 83 auto R = Cache.try_emplace(Hash, Cost); 84 if (!R.second) { 85 if (Cost < R.first->second) { 86 R.first->second = Cost; 87 } else { 88 // If we're not the best way to get to this path, stop assigning. 89 break; 90 } 91 } 92 } 93 Hash = NextHash; 94 } 95 } 96 // Now propagate scores parent -> child if that's an improvement. 97 // BFS ensures we propagate down chains (must visit parents before children). 98 std::queue<llvm::hash_code> Next; 99 for (auto Child : DownEdges.lookup(llvm::hash_value(llvm::StringRef("")))) 100 Next.push(Child); 101 while (!Next.empty()) { 102 auto Parent = Next.front(); 103 Next.pop(); 104 auto ParentCost = Cache.lookup(Parent); 105 for (auto Child : DownEdges.lookup(Parent)) { 106 if (Parent != RootHash || Opts.AllowDownTraversalFromRoot) { 107 auto &ChildCost = 108 Cache.try_emplace(Child, Unreachable).first->getSecond(); 109 if (ParentCost + Opts.DownCost < ChildCost) 110 ChildCost = ParentCost + Opts.DownCost; 111 } 112 Next.push(Child); 113 } 114 } 115 } 116 117 unsigned FileDistance::distance(llvm::StringRef Path) { 118 auto Canonical = canonicalize(Path); 119 unsigned Cost = Unreachable; 120 llvm::SmallVector<llvm::hash_code> Ancestors; 121 // Walk up ancestors until we find a path we know the distance for. 122 for (llvm::StringRef Rest = Canonical; !Rest.empty(); 123 Rest = parent_path(Rest, llvm::sys::path::Style::posix)) { 124 auto Hash = llvm::hash_value(Rest); 125 if (Hash == RootHash && !Ancestors.empty() && 126 !Opts.AllowDownTraversalFromRoot) { 127 Cost = Unreachable; 128 break; 129 } 130 auto It = Cache.find(Hash); 131 if (It != Cache.end()) { 132 Cost = It->second; 133 break; 134 } 135 Ancestors.push_back(Hash); 136 } 137 // Now we know the costs for (known node, queried node]. 138 // Fill these in, walking down the directory tree. 139 for (llvm::hash_code Hash : llvm::reverse(Ancestors)) { 140 if (Cost != Unreachable) 141 Cost += Opts.DownCost; 142 Cache.try_emplace(Hash, Cost); 143 } 144 dlog("distance({0} = {1})", Path, Cost); 145 return Cost; 146 } 147 148 unsigned URIDistance::distance(llvm::StringRef URI) { 149 auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::Unreachable); 150 if (!R.second) 151 return R.first->getSecond(); 152 if (auto U = clangd::URI::parse(URI)) { 153 dlog("distance({0} = {1})", URI, U->body()); 154 R.first->second = forScheme(U->scheme()).distance(U->body()); 155 } else { 156 log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError()); 157 } 158 return R.first->second; 159 } 160 161 FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) { 162 auto &Delegate = ByScheme[Scheme]; 163 if (!Delegate) { 164 llvm::StringMap<SourceParams> SchemeSources; 165 for (const auto &Source : Sources) { 166 if (auto U = clangd::URI::create(Source.getKey(), Scheme)) 167 SchemeSources.try_emplace(U->body(), Source.getValue()); 168 else 169 llvm::consumeError(U.takeError()); 170 } 171 dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme, 172 SchemeSources.size(), Sources.size()); 173 Delegate.reset(new FileDistance(std::move(SchemeSources), Opts)); 174 } 175 return *Delegate; 176 } 177 178 static std::pair<std::string, int> scopeToPath(llvm::StringRef Scope) { 179 llvm::SmallVector<llvm::StringRef> Split; 180 Scope.split(Split, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); 181 return {"/" + llvm::join(Split, "/"), Split.size()}; 182 } 183 184 static FileDistance 185 createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes) { 186 FileDistanceOptions Opts; 187 Opts.UpCost = 2; 188 Opts.DownCost = 4; 189 Opts.AllowDownTraversalFromRoot = false; 190 191 llvm::StringMap<SourceParams> Sources; 192 llvm::StringRef Preferred = 193 QueryScopes.empty() ? "" : QueryScopes.front().c_str(); 194 for (llvm::StringRef S : QueryScopes) { 195 SourceParams Param; 196 // Penalize the global scope even it's preferred, as all projects can define 197 // symbols in it, and there is pattern where using-namespace is used in 198 // place of enclosing namespaces (e.g. in implementation files). 199 if (S == Preferred) 200 Param.Cost = S == "" ? 4 : 0; 201 else if (Preferred.startswith(S) && !S.empty()) 202 continue; // just rely on up-traversals. 203 else 204 Param.Cost = S == "" ? 6 : 2; 205 auto Path = scopeToPath(S); 206 // The global namespace is not 'near' its children. 207 Param.MaxUpTraversals = std::max(Path.second - 1, 0); 208 Sources[Path.first] = std::move(Param); 209 } 210 return FileDistance(std::move(Sources), Opts); 211 } 212 213 ScopeDistance::ScopeDistance(llvm::ArrayRef<std::string> QueryScopes) 214 : Distance(createScopeFileDistance(QueryScopes)) {} 215 216 unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) { 217 return Distance.distance(scopeToPath(SymbolScope).first); 218 } 219 220 } // namespace clangd 221 } // namespace clang 222