1 //===--- QueryDriverDatabase.cpp ---------------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // Some compiler drivers have implicit search mechanism for system headers. 9 // This compilation database implementation tries to extract that information by 10 // executing the driver in verbose mode. gcc-compatible drivers print something 11 // like: 12 // .... 13 // .... 14 // #include <...> search starts here: 15 // /usr/lib/gcc/x86_64-linux-gnu/7/include 16 // /usr/local/include 17 // /usr/lib/gcc/x86_64-linux-gnu/7/include-fixed 18 // /usr/include/x86_64-linux-gnu 19 // /usr/include 20 // End of search list. 21 // .... 22 // .... 23 // This component parses that output and adds each path to command line args 24 // provided by Base, after prepending them with -isystem. Therefore current 25 // implementation would not work with a driver that is not gcc-compatible. 26 // 27 // First argument of the command line received from underlying compilation 28 // database is used as compiler driver path. Due to this arbitrary binary 29 // execution, this mechanism is not used by default and only executes binaries 30 // in the paths that are explicitly included by the user. 31 32 #include "CompileCommands.h" 33 #include "GlobalCompilationDatabase.h" 34 #include "support/Logger.h" 35 #include "support/Path.h" 36 #include "support/Trace.h" 37 #include "clang/Basic/Diagnostic.h" 38 #include "clang/Basic/TargetInfo.h" 39 #include "clang/Basic/TargetOptions.h" 40 #include "clang/Driver/Types.h" 41 #include "clang/Tooling/CompilationDatabase.h" 42 #include "llvm/ADT/ScopeExit.h" 43 #include "llvm/ADT/SmallString.h" 44 #include "llvm/ADT/StringExtras.h" 45 #include "llvm/ADT/StringRef.h" 46 #include "llvm/Support/FileSystem.h" 47 #include "llvm/Support/MemoryBuffer.h" 48 #include "llvm/Support/Path.h" 49 #include "llvm/Support/Program.h" 50 #include "llvm/Support/Regex.h" 51 #include "llvm/Support/ScopedPrinter.h" 52 #include <algorithm> 53 #include <map> 54 #include <string> 55 #include <vector> 56 57 namespace clang { 58 namespace clangd { 59 namespace { 60 61 struct DriverInfo { 62 std::vector<std::string> SystemIncludes; 63 std::string Target; 64 }; 65 66 bool isValidTarget(llvm::StringRef Triple) { 67 std::shared_ptr<TargetOptions> TargetOpts(new TargetOptions); 68 TargetOpts->Triple = Triple.str(); 69 DiagnosticsEngine Diags(new DiagnosticIDs, new DiagnosticOptions, 70 new IgnoringDiagConsumer); 71 IntrusiveRefCntPtr<TargetInfo> Target = 72 TargetInfo::CreateTargetInfo(Diags, TargetOpts); 73 return bool(Target); 74 } 75 76 llvm::Optional<DriverInfo> parseDriverOutput(llvm::StringRef Output) { 77 DriverInfo Info; 78 const char SIS[] = "#include <...> search starts here:"; 79 const char SIE[] = "End of search list."; 80 const char TS[] = "Target: "; 81 llvm::SmallVector<llvm::StringRef> Lines; 82 Output.split(Lines, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); 83 84 enum { 85 Initial, // Initial state: searching for target or includes list. 86 IncludesExtracting, // Includes extracting. 87 Done // Includes and target extraction done. 88 } State = Initial; 89 bool SeenIncludes = false; 90 bool SeenTarget = false; 91 for (auto *It = Lines.begin(); State != Done && It != Lines.end(); ++It) { 92 auto Line = *It; 93 switch (State) { 94 case Initial: 95 if (!SeenIncludes && Line.trim() == SIS) { 96 SeenIncludes = true; 97 State = IncludesExtracting; 98 } else if (!SeenTarget && Line.trim().startswith(TS)) { 99 SeenTarget = true; 100 llvm::StringRef TargetLine = Line.trim(); 101 TargetLine.consume_front(TS); 102 // Only detect targets that clang understands 103 if (!isValidTarget(TargetLine)) { 104 elog("System include extraction: invalid target \"{0}\", ignoring", 105 TargetLine); 106 } else { 107 Info.Target = TargetLine.str(); 108 vlog("System include extraction: target extracted: \"{0}\"", 109 TargetLine); 110 } 111 } 112 break; 113 case IncludesExtracting: 114 if (Line.trim() == SIE) { 115 State = SeenTarget ? Done : Initial; 116 } else { 117 Info.SystemIncludes.push_back(Line.trim().str()); 118 vlog("System include extraction: adding {0}", Line); 119 } 120 break; 121 default: 122 llvm_unreachable("Impossible state of the driver output parser"); 123 break; 124 } 125 } 126 if (!SeenIncludes) { 127 elog("System include extraction: start marker not found: {0}", Output); 128 return llvm::None; 129 } 130 if (State == IncludesExtracting) { 131 elog("System include extraction: end marker missing: {0}", Output); 132 return llvm::None; 133 } 134 return std::move(Info); 135 } 136 137 llvm::Optional<DriverInfo> 138 extractSystemIncludesAndTarget(llvm::SmallString<128> Driver, 139 llvm::StringRef Lang, 140 llvm::ArrayRef<std::string> CommandLine, 141 const llvm::Regex &QueryDriverRegex) { 142 trace::Span Tracer("Extract system includes and target"); 143 144 if (!llvm::sys::path::is_absolute(Driver)) { 145 assert(llvm::none_of( 146 Driver, [](char C) { return llvm::sys::path::is_separator(C); })); 147 auto DriverProgram = llvm::sys::findProgramByName(Driver); 148 if (DriverProgram) { 149 vlog("System include extraction: driver {0} expanded to {1}", Driver, 150 *DriverProgram); 151 Driver = *DriverProgram; 152 } else { 153 elog("System include extraction: driver {0} not found in PATH", Driver); 154 return llvm::None; 155 } 156 } 157 158 SPAN_ATTACH(Tracer, "driver", Driver); 159 SPAN_ATTACH(Tracer, "lang", Lang); 160 161 if (!QueryDriverRegex.match(Driver)) { 162 vlog("System include extraction: not allowed driver {0}", Driver); 163 return llvm::None; 164 } 165 166 llvm::SmallString<128> StdErrPath; 167 if (auto EC = llvm::sys::fs::createTemporaryFile("system-includes", "clangd", 168 StdErrPath)) { 169 elog("System include extraction: failed to create temporary file with " 170 "error {0}", 171 EC.message()); 172 return llvm::None; 173 } 174 auto CleanUp = llvm::make_scope_exit( 175 [&StdErrPath]() { llvm::sys::fs::remove(StdErrPath); }); 176 177 llvm::Optional<llvm::StringRef> Redirects[] = {{""}, {""}, StdErrPath.str()}; 178 179 llvm::SmallVector<llvm::StringRef> Args = {Driver, "-E", "-x", 180 Lang, "-", "-v"}; 181 182 // These flags will be preserved 183 const llvm::StringRef FlagsToPreserve[] = { 184 "-nostdinc", "--no-standard-includes", "-nostdinc++", "-nobuiltininc"}; 185 // Preserves these flags and their values, either as separate args or with an 186 // equalsbetween them 187 const llvm::StringRef ArgsToPreserve[] = {"--sysroot", "-isysroot"}; 188 189 for (size_t I = 0, E = CommandLine.size(); I < E; ++I) { 190 llvm::StringRef Arg = CommandLine[I]; 191 if (llvm::any_of(FlagsToPreserve, 192 [&Arg](llvm::StringRef S) { return S == Arg; })) { 193 Args.push_back(Arg); 194 } else { 195 const auto *Found = 196 llvm::find_if(ArgsToPreserve, [&Arg](llvm::StringRef S) { 197 return Arg.startswith(S); 198 }); 199 if (Found == std::end(ArgsToPreserve)) 200 continue; 201 Arg = Arg.drop_front(Found->size()); 202 if (Arg.empty() && I + 1 < E) { 203 Args.push_back(CommandLine[I]); 204 Args.push_back(CommandLine[++I]); 205 } else if (Arg.startswith("=")) { 206 Args.push_back(CommandLine[I]); 207 } 208 } 209 } 210 211 std::string ErrMsg; 212 if (int RC = llvm::sys::ExecuteAndWait(Driver, Args, /*Env=*/llvm::None, 213 Redirects, /*SecondsToWait=*/0, 214 /*MemoryLimit=*/0, &ErrMsg)) { 215 elog("System include extraction: driver execution failed with return code: " 216 "{0} - '{1}'. Args: [{2}]", 217 llvm::to_string(RC), ErrMsg, printArgv(Args)); 218 return llvm::None; 219 } 220 221 auto BufOrError = llvm::MemoryBuffer::getFile(StdErrPath); 222 if (!BufOrError) { 223 elog("System include extraction: failed to read {0} with error {1}", 224 StdErrPath, BufOrError.getError().message()); 225 return llvm::None; 226 } 227 228 llvm::Optional<DriverInfo> Info = 229 parseDriverOutput(BufOrError->get()->getBuffer()); 230 if (!Info) 231 return llvm::None; 232 log("System includes extractor: successfully executed {0}\n\tgot includes: " 233 "\"{1}\"\n\tgot target: \"{2}\"", 234 Driver, llvm::join(Info->SystemIncludes, ", "), Info->Target); 235 return Info; 236 } 237 238 tooling::CompileCommand & 239 addSystemIncludes(tooling::CompileCommand &Cmd, 240 llvm::ArrayRef<std::string> SystemIncludes) { 241 for (llvm::StringRef Include : SystemIncludes) { 242 // FIXME(kadircet): This doesn't work when we have "--driver-mode=cl" 243 Cmd.CommandLine.push_back("-isystem"); 244 Cmd.CommandLine.push_back(Include.str()); 245 } 246 return Cmd; 247 } 248 249 tooling::CompileCommand &setTarget(tooling::CompileCommand &Cmd, 250 const std::string &Target) { 251 if (!Target.empty()) { 252 // We do not want to override existing target with extracted one. 253 for (llvm::StringRef Arg : Cmd.CommandLine) { 254 if (Arg == "-target" || Arg.startswith("--target=")) 255 return Cmd; 256 } 257 Cmd.CommandLine.push_back("--target=" + Target); 258 } 259 return Cmd; 260 } 261 262 /// Converts a glob containing only ** or * into a regex. 263 std::string convertGlobToRegex(llvm::StringRef Glob) { 264 std::string RegText; 265 llvm::raw_string_ostream RegStream(RegText); 266 RegStream << '^'; 267 for (size_t I = 0, E = Glob.size(); I < E; ++I) { 268 if (Glob[I] == '*') { 269 if (I + 1 < E && Glob[I + 1] == '*') { 270 // Double star, accept any sequence. 271 RegStream << ".*"; 272 // Also skip the second star. 273 ++I; 274 } else { 275 // Single star, accept any sequence without a slash. 276 RegStream << "[^/]*"; 277 } 278 } else if (llvm::sys::path::is_separator(Glob[I]) && 279 llvm::sys::path::is_separator('/') && 280 llvm::sys::path::is_separator('\\')) { 281 RegStream << R"([/\\])"; // Accept either slash on windows. 282 } else { 283 RegStream << llvm::Regex::escape(Glob.substr(I, 1)); 284 } 285 } 286 RegStream << '$'; 287 RegStream.flush(); 288 return RegText; 289 } 290 291 /// Converts a glob containing only ** or * into a regex. 292 llvm::Regex convertGlobsToRegex(llvm::ArrayRef<std::string> Globs) { 293 assert(!Globs.empty() && "Globs cannot be empty!"); 294 std::vector<std::string> RegTexts; 295 RegTexts.reserve(Globs.size()); 296 for (llvm::StringRef Glob : Globs) 297 RegTexts.push_back(convertGlobToRegex(Glob)); 298 299 // Tempting to pass IgnoreCase, but we don't know the FS sensitivity. 300 llvm::Regex Reg(llvm::join(RegTexts, "|")); 301 assert(Reg.isValid(RegTexts.front()) && 302 "Created an invalid regex from globs"); 303 return Reg; 304 } 305 306 /// Extracts system includes from a trusted driver by parsing the output of 307 /// include search path and appends them to the commands coming from underlying 308 /// compilation database. 309 class QueryDriverDatabase : public DelegatingCDB { 310 public: 311 QueryDriverDatabase(llvm::ArrayRef<std::string> QueryDriverGlobs, 312 std::unique_ptr<GlobalCompilationDatabase> Base) 313 : DelegatingCDB(std::move(Base)), 314 QueryDriverRegex(convertGlobsToRegex(QueryDriverGlobs)) {} 315 316 llvm::Optional<tooling::CompileCommand> 317 getCompileCommand(PathRef File) const override { 318 auto Cmd = DelegatingCDB::getCompileCommand(File); 319 if (!Cmd || Cmd->CommandLine.empty()) 320 return Cmd; 321 322 llvm::StringRef Lang; 323 for (size_t I = 0, E = Cmd->CommandLine.size(); I < E; ++I) { 324 llvm::StringRef Arg = Cmd->CommandLine[I]; 325 if (Arg == "-x" && I + 1 < E) 326 Lang = Cmd->CommandLine[I + 1]; 327 else if (Arg.startswith("-x")) 328 Lang = Arg.drop_front(2).trim(); 329 } 330 if (Lang.empty()) { 331 llvm::StringRef Ext = llvm::sys::path::extension(File).trim('.'); 332 auto Type = driver::types::lookupTypeForExtension(Ext); 333 if (Type == driver::types::TY_INVALID) { 334 elog("System include extraction: invalid file type for {0}", Ext); 335 return {}; 336 } 337 Lang = driver::types::getTypeName(Type); 338 } 339 340 llvm::SmallString<128> Driver(Cmd->CommandLine.front()); 341 if (llvm::any_of(Driver, 342 [](char C) { return llvm::sys::path::is_separator(C); })) 343 // Driver is a not a single executable name but instead a path (either 344 // relative or absolute). 345 llvm::sys::fs::make_absolute(Cmd->Directory, Driver); 346 347 if (auto Info = 348 QueriedDrivers.get(/*Key=*/(Driver + ":" + Lang).str(), [&] { 349 return extractSystemIncludesAndTarget( 350 Driver, Lang, Cmd->CommandLine, QueryDriverRegex); 351 })) { 352 setTarget(addSystemIncludes(*Cmd, Info->SystemIncludes), Info->Target); 353 } 354 return Cmd; 355 } 356 357 private: 358 // Caches includes extracted from a driver. Key is driver:lang. 359 Memoize<llvm::StringMap<llvm::Optional<DriverInfo>>> QueriedDrivers; 360 llvm::Regex QueryDriverRegex; 361 }; 362 } // namespace 363 364 std::unique_ptr<GlobalCompilationDatabase> 365 getQueryDriverDatabase(llvm::ArrayRef<std::string> QueryDriverGlobs, 366 std::unique_ptr<GlobalCompilationDatabase> Base) { 367 assert(Base && "Null base to SystemIncludeExtractor"); 368 if (QueryDriverGlobs.empty()) 369 return Base; 370 return std::make_unique<QueryDriverDatabase>(QueryDriverGlobs, 371 std::move(Base)); 372 } 373 374 } // namespace clangd 375 } // namespace clang 376