1 //===--- FileDistance.cpp - File contents container -------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The FileDistance structure allows calculating the minimum distance to paths 11 // in a single tree. 12 // We simply walk up the path's ancestors until we find a node whose cost is 13 // known, and add the cost of walking back down. Initialization ensures this 14 // gives the correct path to the roots. 15 // We cache the results, so that the runtime is O(|A|), where A is the set of 16 // all distinct ancestors of visited paths. 17 // 18 // Example after initialization with /=2, /bar=0, DownCost = 1: 19 // / = 2 20 // /bar = 0 21 // 22 // After querying /foo/bar and /bar/foo: 23 // / = 2 24 // /bar = 0 25 // /bar/foo = 1 26 // /foo = 3 27 // /foo/bar = 4 28 // 29 // URIDistance creates FileDistance lazily for each URI scheme encountered. In 30 // practice this is a small constant factor. 31 // 32 //===-------------------------------------------------------------------------// 33 34 #include "FileDistance.h" 35 #include "Logger.h" 36 #include "llvm/ADT/STLExtras.h" 37 #include <queue> 38 39 namespace clang { 40 namespace clangd { 41 using namespace llvm; 42 43 // Convert a path into the canonical form. 44 // Canonical form is either "/", or "/segment" * N: 45 // C:\foo\bar --> /c:/foo/bar 46 // /foo/ --> /foo 47 // a/b/c --> /a/b/c 48 static SmallString<128> canonicalize(StringRef Path) { 49 SmallString<128> Result = Path.rtrim('/'); 50 native(Result, sys::path::Style::posix); 51 if (Result.empty() || Result.front() != '/') 52 Result.insert(Result.begin(), '/'); 53 return Result; 54 } 55 56 constexpr const unsigned FileDistance::Unreachable; 57 58 FileDistance::FileDistance(StringMap<SourceParams> Sources, 59 const FileDistanceOptions &Opts) 60 : Opts(Opts) { 61 llvm::DenseMap<hash_code, SmallVector<hash_code, 4>> DownEdges; 62 // Compute the best distance following only up edges. 63 // Keep track of down edges, in case we can use them to improve on this. 64 for (const auto &S : Sources) { 65 auto Canonical = canonicalize(S.getKey()); 66 dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost, 67 S.second.MaxUpTraversals); 68 // Walk up to ancestors of this source, assigning cost. 69 StringRef Rest = Canonical; 70 llvm::hash_code Hash = hash_value(Rest); 71 for (unsigned I = 0; !Rest.empty(); ++I) { 72 Rest = parent_path(Rest, sys::path::Style::posix); 73 auto NextHash = hash_value(Rest); 74 auto &Down = DownEdges[NextHash]; 75 if (std::find(Down.begin(), Down.end(), Hash) == Down.end()) 76 DownEdges[NextHash].push_back(Hash); 77 // We can't just break after MaxUpTraversals, must still set DownEdges. 78 if (I > S.getValue().MaxUpTraversals) { 79 if (Cache.find(Hash) != Cache.end()) 80 break; 81 } else { 82 unsigned Cost = S.getValue().Cost + I * Opts.UpCost; 83 auto R = Cache.try_emplace(Hash, Cost); 84 if (!R.second) { 85 if (Cost < R.first->second) { 86 R.first->second = Cost; 87 } else { 88 // If we're not the best way to get to this path, stop assigning. 89 break; 90 } 91 } 92 } 93 Hash = NextHash; 94 } 95 } 96 // Now propagate scores parent -> child if that's an improvement. 97 // BFS ensures we propagate down chains (must visit parents before children). 98 std::queue<hash_code> Next; 99 for (auto Child : DownEdges.lookup(hash_value(llvm::StringRef("")))) 100 Next.push(Child); 101 while (!Next.empty()) { 102 auto ParentCost = Cache.lookup(Next.front()); 103 for (auto Child : DownEdges.lookup(Next.front())) { 104 auto &ChildCost = 105 Cache.try_emplace(Child, Unreachable).first->getSecond(); 106 if (ParentCost + Opts.DownCost < ChildCost) 107 ChildCost = ParentCost + Opts.DownCost; 108 Next.push(Child); 109 } 110 Next.pop(); 111 } 112 } 113 114 unsigned FileDistance::distance(StringRef Path) { 115 auto Canonical = canonicalize(Path); 116 unsigned Cost = Unreachable; 117 SmallVector<hash_code, 16> Ancestors; 118 // Walk up ancestors until we find a path we know the distance for. 119 for (StringRef Rest = Canonical; !Rest.empty(); 120 Rest = parent_path(Rest, sys::path::Style::posix)) { 121 auto Hash = hash_value(Rest); 122 auto It = Cache.find(Hash); 123 if (It != Cache.end()) { 124 Cost = It->second; 125 break; 126 } 127 Ancestors.push_back(Hash); 128 } 129 // Now we know the costs for (known node, queried node]. 130 // Fill these in, walking down the directory tree. 131 for (hash_code Hash : reverse(Ancestors)) { 132 if (Cost != Unreachable) 133 Cost += Opts.DownCost; 134 Cache.try_emplace(Hash, Cost); 135 } 136 dlog("distance({0} = {1})", Path, Cost); 137 return Cost; 138 } 139 140 unsigned URIDistance::distance(llvm::StringRef URI) { 141 auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::Unreachable); 142 if (!R.second) 143 return R.first->getSecond(); 144 if (auto U = clangd::URI::parse(URI)) { 145 dlog("distance({0} = {1})", URI, U->body()); 146 R.first->second = forScheme(U->scheme()).distance(U->body()); 147 } else { 148 log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError()); 149 } 150 return R.first->second; 151 } 152 153 FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) { 154 auto &Delegate = ByScheme[Scheme]; 155 if (!Delegate) { 156 llvm::StringMap<SourceParams> SchemeSources; 157 for (const auto &Source : Sources) { 158 if (auto U = clangd::URI::create(Source.getKey(), Scheme)) 159 SchemeSources.try_emplace(U->body(), Source.getValue()); 160 else 161 consumeError(U.takeError()); 162 } 163 dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme, 164 SchemeSources.size(), Sources.size()); 165 Delegate.reset(new FileDistance(std::move(SchemeSources), Opts)); 166 } 167 return *Delegate; 168 } 169 170 } // namespace clangd 171 } // namespace clang 172