1 //===--- QueryDriverDatabase.cpp ---------------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // Some compiler drivers have implicit search mechanism for system headers. 9 // This compilation database implementation tries to extract that information by 10 // executing the driver in verbose mode. gcc-compatible drivers print something 11 // like: 12 // .... 13 // .... 14 // #include <...> search starts here: 15 // /usr/lib/gcc/x86_64-linux-gnu/7/include 16 // /usr/local/include 17 // /usr/lib/gcc/x86_64-linux-gnu/7/include-fixed 18 // /usr/include/x86_64-linux-gnu 19 // /usr/include 20 // End of search list. 21 // .... 22 // .... 23 // This component parses that output and adds each path to command line args 24 // provided by Base, after prepending them with -isystem. Therefore current 25 // implementation would not work with a driver that is not gcc-compatible. 26 // 27 // First argument of the command line received from underlying compilation 28 // database is used as compiler driver path. Due to this arbitrary binary 29 // execution, this mechanism is not used by default and only executes binaries 30 // in the paths that are explicitly whitelisted by the user. 31 32 #include "GlobalCompilationDatabase.h" 33 #include "Logger.h" 34 #include "Path.h" 35 #include "Trace.h" 36 #include "clang/Driver/Types.h" 37 #include "clang/Tooling/CompilationDatabase.h" 38 #include "llvm/ADT/DenseMap.h" 39 #include "llvm/ADT/ScopeExit.h" 40 #include "llvm/ADT/SmallString.h" 41 #include "llvm/ADT/StringExtras.h" 42 #include "llvm/ADT/StringRef.h" 43 #include "llvm/ADT/iterator_range.h" 44 #include "llvm/Support/FileSystem.h" 45 #include "llvm/Support/MemoryBuffer.h" 46 #include "llvm/Support/Path.h" 47 #include "llvm/Support/Program.h" 48 #include "llvm/Support/Regex.h" 49 #include "llvm/Support/ScopedPrinter.h" 50 #include <algorithm> 51 #include <map> 52 #include <string> 53 #include <vector> 54 55 namespace clang { 56 namespace clangd { 57 namespace { 58 59 std::vector<std::string> parseDriverOutput(llvm::StringRef Output) { 60 std::vector<std::string> SystemIncludes; 61 const char SIS[] = "#include <...> search starts here:"; 62 const char SIE[] = "End of search list."; 63 llvm::SmallVector<llvm::StringRef, 8> Lines; 64 Output.split(Lines, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); 65 66 auto StartIt = llvm::find_if( 67 Lines, [SIS](llvm::StringRef Line) { return Line.trim() == SIS; }); 68 if (StartIt == Lines.end()) { 69 elog("System include extraction: start marker not found: {0}", Output); 70 return {}; 71 } 72 ++StartIt; 73 const auto EndIt = 74 llvm::find_if(llvm::make_range(StartIt, Lines.end()), 75 [SIE](llvm::StringRef Line) { return Line.trim() == SIE; }); 76 if (EndIt == Lines.end()) { 77 elog("System include extraction: end marker missing: {0}", Output); 78 return {}; 79 } 80 81 for (llvm::StringRef Line : llvm::make_range(StartIt, EndIt)) { 82 SystemIncludes.push_back(Line.trim().str()); 83 vlog("System include extraction: adding {0}", Line); 84 } 85 return SystemIncludes; 86 } 87 88 std::vector<std::string> extractSystemIncludes(PathRef Driver, 89 llvm::StringRef Lang, 90 llvm::Regex &QueryDriverRegex) { 91 trace::Span Tracer("Extract system includes"); 92 SPAN_ATTACH(Tracer, "driver", Driver); 93 SPAN_ATTACH(Tracer, "lang", Lang); 94 95 if (!QueryDriverRegex.match(Driver)) { 96 vlog("System include extraction: not whitelisted driver {0}", Driver); 97 return {}; 98 } 99 100 if (!llvm::sys::fs::exists(Driver)) { 101 elog("System include extraction: {0} does not exist.", Driver); 102 return {}; 103 } 104 if (!llvm::sys::fs::can_execute(Driver)) { 105 elog("System include extraction: {0} is not executable.", Driver); 106 return {}; 107 } 108 109 llvm::SmallString<128> StdErrPath; 110 if (auto EC = llvm::sys::fs::createTemporaryFile("system-includes", "clangd", 111 StdErrPath)) { 112 elog("System include extraction: failed to create temporary file with " 113 "error {0}", 114 EC.message()); 115 return {}; 116 } 117 auto CleanUp = llvm::make_scope_exit( 118 [&StdErrPath]() { llvm::sys::fs::remove(StdErrPath); }); 119 120 llvm::Optional<llvm::StringRef> Redirects[] = { 121 {""}, {""}, llvm::StringRef(StdErrPath)}; 122 123 // Should we also preserve flags like "-sysroot", "-nostdinc" ? 124 const llvm::StringRef Args[] = {Driver, "-E", "-x", Lang, "-", "-v"}; 125 126 if (int RC = llvm::sys::ExecuteAndWait(Driver, Args, /*Env=*/llvm::None, 127 Redirects)) { 128 elog("System include extraction: driver execution failed with return code: " 129 "{0}", 130 llvm::to_string(RC)); 131 return {}; 132 } 133 134 auto BufOrError = llvm::MemoryBuffer::getFile(StdErrPath); 135 if (!BufOrError) { 136 elog("System include extraction: failed to read {0} with error {1}", 137 StdErrPath, BufOrError.getError().message()); 138 return {}; 139 } 140 141 auto Includes = parseDriverOutput(BufOrError->get()->getBuffer()); 142 log("System include extractor: succesfully executed {0}, got includes: " 143 "\"{1}\"", 144 Driver, llvm::join(Includes, ", ")); 145 return Includes; 146 } 147 148 tooling::CompileCommand & 149 addSystemIncludes(tooling::CompileCommand &Cmd, 150 llvm::ArrayRef<std::string> SystemIncludes) { 151 for (llvm::StringRef Include : SystemIncludes) { 152 // FIXME(kadircet): This doesn't work when we have "--driver-mode=cl" 153 Cmd.CommandLine.push_back("-isystem"); 154 Cmd.CommandLine.push_back(Include.str()); 155 } 156 return Cmd; 157 } 158 159 /// Converts a glob containing only ** or * into a regex. 160 std::string convertGlobToRegex(llvm::StringRef Glob) { 161 std::string RegText; 162 llvm::raw_string_ostream RegStream(RegText); 163 RegStream << '^'; 164 for (size_t I = 0, E = Glob.size(); I < E; ++I) { 165 if (Glob[I] == '*') { 166 if (I + 1 < E && Glob[I + 1] == '*') { 167 // Double star, accept any sequence. 168 RegStream << ".*"; 169 // Also skip the second star. 170 ++I; 171 } else { 172 // Single star, accept any sequence without a slash. 173 RegStream << "[^/]*"; 174 } 175 } else { 176 RegStream << llvm::Regex::escape(Glob.substr(I, 1)); 177 } 178 } 179 RegStream << '$'; 180 RegStream.flush(); 181 return RegText; 182 } 183 184 /// Converts a glob containing only ** or * into a regex. 185 llvm::Regex convertGlobsToRegex(llvm::ArrayRef<std::string> Globs) { 186 assert(!Globs.empty() && "Globs cannot be empty!"); 187 std::vector<std::string> RegTexts; 188 RegTexts.reserve(Globs.size()); 189 for (llvm::StringRef Glob : Globs) 190 RegTexts.push_back(convertGlobToRegex(Glob)); 191 192 llvm::Regex Reg(llvm::join(RegTexts, "|")); 193 assert(Reg.isValid(RegTexts.front()) && 194 "Created an invalid regex from globs"); 195 return Reg; 196 } 197 198 /// Extracts system includes from a trusted driver by parsing the output of 199 /// include search path and appends them to the commands coming from underlying 200 /// compilation database. 201 class QueryDriverDatabase : public GlobalCompilationDatabase { 202 public: 203 QueryDriverDatabase(llvm::ArrayRef<std::string> QueryDriverGlobs, 204 std::unique_ptr<GlobalCompilationDatabase> Base) 205 : QueryDriverRegex(convertGlobsToRegex(QueryDriverGlobs)), 206 Base(std::move(Base)) { 207 assert(this->Base); 208 BaseChanged = 209 this->Base->watch([this](const std::vector<std::string> &Changes) { 210 OnCommandChanged.broadcast(Changes); 211 }); 212 } 213 214 llvm::Optional<tooling::CompileCommand> 215 getCompileCommand(PathRef File) const override { 216 auto Cmd = Base->getCompileCommand(File); 217 if (!Cmd || Cmd->CommandLine.empty()) 218 return Cmd; 219 220 llvm::StringRef Lang; 221 for (size_t I = 0, E = Cmd->CommandLine.size(); I < E; ++I) { 222 llvm::StringRef Arg = Cmd->CommandLine[I]; 223 if (Arg == "-x" && I + 1 < E) 224 Lang = Cmd->CommandLine[I + 1]; 225 else if (Arg.startswith("-x")) 226 Lang = Arg.drop_front(2).trim(); 227 } 228 if (Lang.empty()) { 229 llvm::StringRef Ext = llvm::sys::path::extension(File).trim('.'); 230 auto Type = driver::types::lookupTypeForExtension(Ext); 231 if (Type == driver::types::TY_INVALID) { 232 elog("System include extraction: invalid file type for {0}", Ext); 233 return {}; 234 } 235 Lang = driver::types::getTypeName(Type); 236 } 237 238 llvm::SmallString<128> Driver(Cmd->CommandLine.front()); 239 llvm::sys::fs::make_absolute(Cmd->Directory, Driver); 240 auto Key = std::make_pair(Driver.str(), Lang); 241 242 std::vector<std::string> SystemIncludes; 243 { 244 std::lock_guard<std::mutex> Lock(Mu); 245 246 auto It = DriverToIncludesCache.find(Key); 247 if (It != DriverToIncludesCache.end()) 248 SystemIncludes = It->second; 249 else 250 DriverToIncludesCache[Key] = SystemIncludes = 251 extractSystemIncludes(Key.first, Key.second, QueryDriverRegex); 252 } 253 254 return addSystemIncludes(*Cmd, SystemIncludes); 255 } 256 257 llvm::Optional<ProjectInfo> getProjectInfo(PathRef File) const override { 258 return Base->getProjectInfo(File); 259 } 260 261 private: 262 mutable std::mutex Mu; 263 // Caches includes extracted from a driver. 264 mutable std::map<std::pair<std::string, std::string>, 265 std::vector<std::string>> 266 DriverToIncludesCache; 267 mutable llvm::Regex QueryDriverRegex; 268 269 std::unique_ptr<GlobalCompilationDatabase> Base; 270 CommandChanged::Subscription BaseChanged; 271 }; 272 } // namespace 273 274 std::unique_ptr<GlobalCompilationDatabase> 275 getQueryDriverDatabase(llvm::ArrayRef<std::string> QueryDriverGlobs, 276 std::unique_ptr<GlobalCompilationDatabase> Base) { 277 assert(Base && "Null base to SystemIncludeExtractor"); 278 if (QueryDriverGlobs.empty()) 279 return Base; 280 return llvm::make_unique<QueryDriverDatabase>(QueryDriverGlobs, 281 std::move(Base)); 282 } 283 284 } // namespace clangd 285 } // namespace clang 286