1ea9773acSSam McCall //===- InterpolatingCompilationDatabase.cpp ---------------------*- C++ -*-===//
2ea9773acSSam McCall //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ea9773acSSam McCall //
7ea9773acSSam McCall //===----------------------------------------------------------------------===//
8ea9773acSSam McCall //
9ea9773acSSam McCall // InterpolatingCompilationDatabase wraps another CompilationDatabase and
10ea9773acSSam McCall // attempts to heuristically determine appropriate compile commands for files
11ea9773acSSam McCall // that are not included, such as headers or newly created files.
12ea9773acSSam McCall //
13ea9773acSSam McCall // Motivating cases include:
14ea9773acSSam McCall //   Header files that live next to their implementation files. These typically
15ea9773acSSam McCall // share a base filename. (libclang/CXString.h, libclang/CXString.cpp).
16ea9773acSSam McCall //   Some projects separate headers from includes. Filenames still typically
17ea9773acSSam McCall // match, maybe other path segments too. (include/llvm/IR/Use.h, lib/IR/Use.cc).
18ea9773acSSam McCall //   Matches are sometimes only approximate (Sema.h, SemaDecl.cpp). This goes
19ea9773acSSam McCall // for directories too (Support/Unix/Process.inc, lib/Support/Process.cpp).
20ea9773acSSam McCall //   Even if we can't find a "right" compile command, even a random one from
21ea9773acSSam McCall // the project will tend to get important flags like -I and -x right.
22ea9773acSSam McCall //
23ea9773acSSam McCall // We "borrow" the compile command for the closest available file:
24ea9773acSSam McCall //   - points are awarded if the filename matches (ignoring extension)
25ea9773acSSam McCall //   - points are awarded if the directory structure matches
26ea9773acSSam McCall //   - ties are broken by length of path prefix match
27ea9773acSSam McCall //
28ea9773acSSam McCall // The compile command is adjusted, replacing the filename and removing output
29ea9773acSSam McCall // file arguments. The -x and -std flags may be affected too.
30ea9773acSSam McCall //
31ea9773acSSam McCall // Source language is a tricky issue: is it OK to use a .c file's command
32ea9773acSSam McCall // for building a .cc file? What language is a .h file in?
33ea9773acSSam McCall //   - We only consider compile commands for c-family languages as candidates.
34ea9773acSSam McCall //   - For files whose language is implied by the filename (e.g. .m, .hpp)
35ea9773acSSam McCall //     we prefer candidates from the same language.
36ea9773acSSam McCall //     If we must cross languages, we drop any -x and -std flags.
37ea9773acSSam McCall //   - For .h files, candidates from any c-family language are acceptable.
38ea9773acSSam McCall //     We use the candidate's language, inserting  e.g. -x c++-header.
39ea9773acSSam McCall //
40ea9773acSSam McCall // This class is only useful when wrapping databases that can enumerate all
41ea9773acSSam McCall // their compile commands. If getAllFilenames() is empty, no inference occurs.
42ea9773acSSam McCall //
43ea9773acSSam McCall //===----------------------------------------------------------------------===//
44ea9773acSSam McCall 
4509d890d7SRainer Orth #include "clang/Basic/LangStandard.h"
46ea9773acSSam McCall #include "clang/Driver/Options.h"
47ea9773acSSam McCall #include "clang/Driver/Types.h"
48ea9773acSSam McCall #include "clang/Tooling/CompilationDatabase.h"
49ea9773acSSam McCall #include "llvm/ADT/DenseMap.h"
50e05bf606SHamza Sood #include "llvm/ADT/Optional.h"
51ea9773acSSam McCall #include "llvm/ADT/StringExtras.h"
52ea9773acSSam McCall #include "llvm/ADT/StringSwitch.h"
53ea9773acSSam McCall #include "llvm/Option/ArgList.h"
54ea9773acSSam McCall #include "llvm/Option/OptTable.h"
55ea9773acSSam McCall #include "llvm/Support/Debug.h"
56ea9773acSSam McCall #include "llvm/Support/Path.h"
57ea9773acSSam McCall #include "llvm/Support/StringSaver.h"
58ea9773acSSam McCall #include "llvm/Support/raw_ostream.h"
59ea9773acSSam McCall #include <memory>
60ea9773acSSam McCall 
61ea9773acSSam McCall namespace clang {
62ea9773acSSam McCall namespace tooling {
63ea9773acSSam McCall namespace {
64ea9773acSSam McCall using namespace llvm;
65ea9773acSSam McCall namespace types = clang::driver::types;
66ea9773acSSam McCall namespace path = llvm::sys::path;
67ea9773acSSam McCall 
68ea9773acSSam McCall // The length of the prefix these two strings have in common.
69ea9773acSSam McCall size_t matchingPrefix(StringRef L, StringRef R) {
70ea9773acSSam McCall   size_t Limit = std::min(L.size(), R.size());
71ea9773acSSam McCall   for (size_t I = 0; I < Limit; ++I)
72ea9773acSSam McCall     if (L[I] != R[I])
73ea9773acSSam McCall       return I;
74ea9773acSSam McCall   return Limit;
75ea9773acSSam McCall }
76ea9773acSSam McCall 
77ea9773acSSam McCall // A comparator for searching SubstringWithIndexes with std::equal_range etc.
78ea9773acSSam McCall // Optionaly prefix semantics: compares equal if the key is a prefix.
79ea9773acSSam McCall template <bool Prefix> struct Less {
80ea9773acSSam McCall   bool operator()(StringRef Key, std::pair<StringRef, size_t> Value) const {
81ea9773acSSam McCall     StringRef V = Prefix ? Value.first.substr(0, Key.size()) : Value.first;
82ea9773acSSam McCall     return Key < V;
83ea9773acSSam McCall   }
84ea9773acSSam McCall   bool operator()(std::pair<StringRef, size_t> Value, StringRef Key) const {
85ea9773acSSam McCall     StringRef V = Prefix ? Value.first.substr(0, Key.size()) : Value.first;
86ea9773acSSam McCall     return V < Key;
87ea9773acSSam McCall   }
88ea9773acSSam McCall };
89ea9773acSSam McCall 
90ea9773acSSam McCall // Infer type from filename. If we might have gotten it wrong, set *Certain.
91ea9773acSSam McCall // *.h will be inferred as a C header, but not certain.
92ea9773acSSam McCall types::ID guessType(StringRef Filename, bool *Certain = nullptr) {
93ea9773acSSam McCall   // path::extension is ".cpp", lookupTypeForExtension wants "cpp".
94ea9773acSSam McCall   auto Lang =
95ea9773acSSam McCall       types::lookupTypeForExtension(path::extension(Filename).substr(1));
96ea9773acSSam McCall   if (Certain)
97ea9773acSSam McCall     *Certain = Lang != types::TY_CHeader && Lang != types::TY_INVALID;
98ea9773acSSam McCall   return Lang;
99ea9773acSSam McCall }
100ea9773acSSam McCall 
101ea9773acSSam McCall // Return Lang as one of the canonical supported types.
102ea9773acSSam McCall // e.g. c-header --> c; fortran --> TY_INVALID
103ea9773acSSam McCall static types::ID foldType(types::ID Lang) {
104ea9773acSSam McCall   switch (Lang) {
105ea9773acSSam McCall   case types::TY_C:
106ea9773acSSam McCall   case types::TY_CHeader:
107ea9773acSSam McCall     return types::TY_C;
108ea9773acSSam McCall   case types::TY_ObjC:
109ea9773acSSam McCall   case types::TY_ObjCHeader:
110ea9773acSSam McCall     return types::TY_ObjC;
111ea9773acSSam McCall   case types::TY_CXX:
112ea9773acSSam McCall   case types::TY_CXXHeader:
113ea9773acSSam McCall     return types::TY_CXX;
114ea9773acSSam McCall   case types::TY_ObjCXX:
115ea9773acSSam McCall   case types::TY_ObjCXXHeader:
116ea9773acSSam McCall     return types::TY_ObjCXX;
1176ed88afdSADRA   case types::TY_CUDA:
1186ed88afdSADRA   case types::TY_CUDA_DEVICE:
1192a1418f2SChristopher Tetreault     return types::TY_CUDA;
120ea9773acSSam McCall   default:
121ea9773acSSam McCall     return types::TY_INVALID;
122ea9773acSSam McCall   }
123ea9773acSSam McCall }
124ea9773acSSam McCall 
125ea9773acSSam McCall // A CompileCommand that can be applied to another file.
126ea9773acSSam McCall struct TransferableCommand {
127ea9773acSSam McCall   // Flags that should not apply to all files are stripped from CommandLine.
128ea9773acSSam McCall   CompileCommand Cmd;
1295167e2d1SIlya Biryukov   // Language detected from -x or the filename. Never TY_INVALID.
1305167e2d1SIlya Biryukov   Optional<types::ID> Type;
131ea9773acSSam McCall   // Standard specified by -std.
132ea9773acSSam McCall   LangStandard::Kind Std = LangStandard::lang_unspecified;
133e05bf606SHamza Sood   // Whether the command line is for the cl-compatible driver.
134e05bf606SHamza Sood   bool ClangCLMode;
135ea9773acSSam McCall 
136ea9773acSSam McCall   TransferableCommand(CompileCommand C)
137e05bf606SHamza Sood       : Cmd(std::move(C)), Type(guessType(Cmd.Filename)),
138e05bf606SHamza Sood         ClangCLMode(checkIsCLMode(Cmd.CommandLine)) {
139e05bf606SHamza Sood     std::vector<std::string> OldArgs = std::move(Cmd.CommandLine);
140e05bf606SHamza Sood     Cmd.CommandLine.clear();
141e05bf606SHamza Sood 
142e05bf606SHamza Sood     // Wrap the old arguments in an InputArgList.
143e05bf606SHamza Sood     llvm::opt::InputArgList ArgList;
144e05bf606SHamza Sood     {
145e05bf606SHamza Sood       SmallVector<const char *, 16> TmpArgv;
146e05bf606SHamza Sood       for (const std::string &S : OldArgs)
147e05bf606SHamza Sood         TmpArgv.push_back(S.c_str());
148e05bf606SHamza Sood       ArgList = {TmpArgv.begin(), TmpArgv.end()};
149e05bf606SHamza Sood     }
150e05bf606SHamza Sood 
151ea9773acSSam McCall     // Parse the old args in order to strip out and record unwanted flags.
152e05bf606SHamza Sood     // We parse each argument individually so that we can retain the exact
153e05bf606SHamza Sood     // spelling of each argument; re-rendering is lossy for aliased flags.
154e05bf606SHamza Sood     // E.g. in CL mode, /W4 maps to -Wall.
15543392759SIlya Biryukov     auto &OptTable = clang::driver::getDriverOptTable();
15660ca31a7SSerge Guelton     if (!OldArgs.empty())
157e05bf606SHamza Sood       Cmd.CommandLine.emplace_back(OldArgs.front());
158e05bf606SHamza Sood     for (unsigned Pos = 1; Pos < OldArgs.size();) {
159e05bf606SHamza Sood       using namespace driver::options;
160e05bf606SHamza Sood 
161e05bf606SHamza Sood       const unsigned OldPos = Pos;
16243392759SIlya Biryukov       std::unique_ptr<llvm::opt::Arg> Arg(OptTable.ParseOneArg(
163e05bf606SHamza Sood           ArgList, Pos,
164e05bf606SHamza Sood           /* Include */ ClangCLMode ? CoreOption | CLOption : 0,
165e05bf606SHamza Sood           /* Exclude */ ClangCLMode ? 0 : CLOption));
166e05bf606SHamza Sood 
167e05bf606SHamza Sood       if (!Arg)
168e05bf606SHamza Sood         continue;
169e05bf606SHamza Sood 
170e05bf606SHamza Sood       const llvm::opt::Option &Opt = Arg->getOption();
171e05bf606SHamza Sood 
172ea9773acSSam McCall       // Strip input and output files.
173e05bf606SHamza Sood       if (Opt.matches(OPT_INPUT) || Opt.matches(OPT_o) ||
174e05bf606SHamza Sood           (ClangCLMode && (Opt.matches(OPT__SLASH_Fa) ||
175e05bf606SHamza Sood                            Opt.matches(OPT__SLASH_Fe) ||
176e05bf606SHamza Sood                            Opt.matches(OPT__SLASH_Fi) ||
177e05bf606SHamza Sood                            Opt.matches(OPT__SLASH_Fo))))
178ea9773acSSam McCall         continue;
179e05bf606SHamza Sood 
180*e030ce3eSJanusz Nykiel       // ...including when the inputs are passed after --.
181*e030ce3eSJanusz Nykiel       if (Opt.matches(OPT__DASH_DASH))
182*e030ce3eSJanusz Nykiel         break;
183*e030ce3eSJanusz Nykiel 
184ea9773acSSam McCall       // Strip -x, but record the overridden language.
185e05bf606SHamza Sood       if (const auto GivenType = tryParseTypeArg(*Arg)) {
186e05bf606SHamza Sood         Type = *GivenType;
187ea9773acSSam McCall         continue;
188ea9773acSSam McCall       }
189e05bf606SHamza Sood 
190e05bf606SHamza Sood       // Strip -std, but record the value.
191e05bf606SHamza Sood       if (const auto GivenStd = tryParseStdArg(*Arg)) {
192e05bf606SHamza Sood         if (*GivenStd != LangStandard::lang_unspecified)
193e05bf606SHamza Sood           Std = *GivenStd;
194ea9773acSSam McCall         continue;
195ea9773acSSam McCall       }
196e05bf606SHamza Sood 
197e05bf606SHamza Sood       Cmd.CommandLine.insert(Cmd.CommandLine.end(),
198fd1dc75bSHamza Sood                              OldArgs.data() + OldPos, OldArgs.data() + Pos);
199ea9773acSSam McCall     }
200ea9773acSSam McCall 
201c2377eaeSKadir Cetinkaya     // Make use of -std iff -x was missing.
202c2377eaeSKadir Cetinkaya     if (Type == types::TY_INVALID && Std != LangStandard::lang_unspecified)
203ea9773acSSam McCall       Type = toType(LangStandard::getLangStandardForKind(Std).getLanguage());
2045167e2d1SIlya Biryukov     Type = foldType(*Type);
2055167e2d1SIlya Biryukov     // The contract is to store None instead of TY_INVALID.
2065167e2d1SIlya Biryukov     if (Type == types::TY_INVALID)
2075167e2d1SIlya Biryukov       Type = llvm::None;
208ea9773acSSam McCall   }
209ea9773acSSam McCall 
210ea9773acSSam McCall   // Produce a CompileCommand for \p filename, based on this one.
211588db1ccSSam McCall   // (This consumes the TransferableCommand just to avoid copying Cmd).
212588db1ccSSam McCall   CompileCommand transferTo(StringRef Filename) && {
213588db1ccSSam McCall     CompileCommand Result = std::move(Cmd);
214588db1ccSSam McCall     Result.Heuristic = "inferred from " + Result.Filename;
215adcd0268SBenjamin Kramer     Result.Filename = std::string(Filename);
216ea9773acSSam McCall     bool TypeCertain;
217ea9773acSSam McCall     auto TargetType = guessType(Filename, &TypeCertain);
218ea9773acSSam McCall     // If the filename doesn't determine the language (.h), transfer with -x.
21987ad30beSSam McCall     if ((!TargetType || !TypeCertain) && Type) {
22087ad30beSSam McCall       // Use *Type, or its header variant if the file is a header.
22187ad30beSSam McCall       // Treat no/invalid extension as header (e.g. C++ standard library).
22287ad30beSSam McCall       TargetType =
22387ad30beSSam McCall           (!TargetType || types::onlyPrecompileType(TargetType)) // header?
2245167e2d1SIlya Biryukov               ? types::lookupHeaderTypeForSourceType(*Type)
2255167e2d1SIlya Biryukov               : *Type;
226e05bf606SHamza Sood       if (ClangCLMode) {
227e05bf606SHamza Sood         const StringRef Flag = toCLFlag(TargetType);
228e05bf606SHamza Sood         if (!Flag.empty())
229adcd0268SBenjamin Kramer           Result.CommandLine.push_back(std::string(Flag));
230e05bf606SHamza Sood       } else {
231ea9773acSSam McCall         Result.CommandLine.push_back("-x");
232ea9773acSSam McCall         Result.CommandLine.push_back(types::getTypeName(TargetType));
233ea9773acSSam McCall       }
234e05bf606SHamza Sood     }
235ea9773acSSam McCall     // --std flag may only be transferred if the language is the same.
236ea9773acSSam McCall     // We may consider "translating" these, e.g. c++11 -> c11.
237ea9773acSSam McCall     if (Std != LangStandard::lang_unspecified && foldType(TargetType) == Type) {
238e05bf606SHamza Sood       Result.CommandLine.emplace_back((
239e05bf606SHamza Sood           llvm::Twine(ClangCLMode ? "/std:" : "-std=") +
240e05bf606SHamza Sood           LangStandard::getLangStandardForKind(Std).getName()).str());
241ea9773acSSam McCall     }
242*e030ce3eSJanusz Nykiel     if (Filename.startswith("-") || (ClangCLMode && Filename.startswith("/")))
243*e030ce3eSJanusz Nykiel       Result.CommandLine.push_back("--");
244adcd0268SBenjamin Kramer     Result.CommandLine.push_back(std::string(Filename));
245ea9773acSSam McCall     return Result;
246ea9773acSSam McCall   }
247ea9773acSSam McCall 
248ea9773acSSam McCall private:
249e05bf606SHamza Sood   // Determine whether the given command line is intended for the CL driver.
250e05bf606SHamza Sood   static bool checkIsCLMode(ArrayRef<std::string> CmdLine) {
251e05bf606SHamza Sood     // First look for --driver-mode.
252e05bf606SHamza Sood     for (StringRef S : llvm::reverse(CmdLine)) {
253e05bf606SHamza Sood       if (S.consume_front("--driver-mode="))
254e05bf606SHamza Sood         return S == "cl";
255e05bf606SHamza Sood     }
256e05bf606SHamza Sood 
257e05bf606SHamza Sood     // Otherwise just check the clang executable file name.
25860ca31a7SSerge Guelton     return !CmdLine.empty() &&
25960ca31a7SSerge Guelton            llvm::sys::path::stem(CmdLine.front()).endswith_lower("cl");
260e05bf606SHamza Sood   }
261e05bf606SHamza Sood 
262ea9773acSSam McCall   // Map the language from the --std flag to that of the -x flag.
26309d890d7SRainer Orth   static types::ID toType(Language Lang) {
264ea9773acSSam McCall     switch (Lang) {
26509d890d7SRainer Orth     case Language::C:
266ea9773acSSam McCall       return types::TY_C;
26709d890d7SRainer Orth     case Language::CXX:
268ea9773acSSam McCall       return types::TY_CXX;
26909d890d7SRainer Orth     case Language::ObjC:
270ea9773acSSam McCall       return types::TY_ObjC;
27109d890d7SRainer Orth     case Language::ObjCXX:
272ea9773acSSam McCall       return types::TY_ObjCXX;
273ea9773acSSam McCall     default:
274ea9773acSSam McCall       return types::TY_INVALID;
275ea9773acSSam McCall     }
276ea9773acSSam McCall   }
277e05bf606SHamza Sood 
278e05bf606SHamza Sood   // Convert a file type to the matching CL-style type flag.
279e05bf606SHamza Sood   static StringRef toCLFlag(types::ID Type) {
280e05bf606SHamza Sood     switch (Type) {
281e05bf606SHamza Sood     case types::TY_C:
282e05bf606SHamza Sood     case types::TY_CHeader:
283e05bf606SHamza Sood       return "/TC";
284e05bf606SHamza Sood     case types::TY_CXX:
285e05bf606SHamza Sood     case types::TY_CXXHeader:
286e05bf606SHamza Sood       return "/TP";
287e05bf606SHamza Sood     default:
288e05bf606SHamza Sood       return StringRef();
289e05bf606SHamza Sood     }
290e05bf606SHamza Sood   }
291e05bf606SHamza Sood 
292e05bf606SHamza Sood   // Try to interpret the argument as a type specifier, e.g. '-x'.
293e05bf606SHamza Sood   Optional<types::ID> tryParseTypeArg(const llvm::opt::Arg &Arg) {
294e05bf606SHamza Sood     const llvm::opt::Option &Opt = Arg.getOption();
295e05bf606SHamza Sood     using namespace driver::options;
296e05bf606SHamza Sood     if (ClangCLMode) {
297e05bf606SHamza Sood       if (Opt.matches(OPT__SLASH_TC) || Opt.matches(OPT__SLASH_Tc))
298e05bf606SHamza Sood         return types::TY_C;
299e05bf606SHamza Sood       if (Opt.matches(OPT__SLASH_TP) || Opt.matches(OPT__SLASH_Tp))
300e05bf606SHamza Sood         return types::TY_CXX;
301e05bf606SHamza Sood     } else {
302e05bf606SHamza Sood       if (Opt.matches(driver::options::OPT_x))
303e05bf606SHamza Sood         return types::lookupTypeForTypeSpecifier(Arg.getValue());
304e05bf606SHamza Sood     }
305e05bf606SHamza Sood     return None;
306e05bf606SHamza Sood   }
307e05bf606SHamza Sood 
308e05bf606SHamza Sood   // Try to interpret the argument as '-std='.
309e05bf606SHamza Sood   Optional<LangStandard::Kind> tryParseStdArg(const llvm::opt::Arg &Arg) {
310e05bf606SHamza Sood     using namespace driver::options;
31109d890d7SRainer Orth     if (Arg.getOption().matches(ClangCLMode ? OPT__SLASH_std : OPT_std_EQ))
31209d890d7SRainer Orth       return LangStandard::getLangKind(Arg.getValue());
313e05bf606SHamza Sood     return None;
314e05bf606SHamza Sood   }
315ea9773acSSam McCall };
316ea9773acSSam McCall 
3175167e2d1SIlya Biryukov // Given a filename, FileIndex picks the best matching file from the underlying
3185167e2d1SIlya Biryukov // DB. This is the proxy file whose CompileCommand will be reused. The
3195167e2d1SIlya Biryukov // heuristics incorporate file name, extension, and directory structure.
3205167e2d1SIlya Biryukov // Strategy:
321ea9773acSSam McCall // - Build indexes of each of the substrings we want to look up by.
322ea9773acSSam McCall //   These indexes are just sorted lists of the substrings.
323ea9773acSSam McCall // - Each criterion corresponds to a range lookup into the index, so we only
324ea9773acSSam McCall //   need O(log N) string comparisons to determine scores.
3255167e2d1SIlya Biryukov //
3265167e2d1SIlya Biryukov // Apart from path proximity signals, also takes file extensions into account
3275167e2d1SIlya Biryukov // when scoring the candidates.
3285167e2d1SIlya Biryukov class FileIndex {
329ea9773acSSam McCall public:
3305167e2d1SIlya Biryukov   FileIndex(std::vector<std::string> Files)
3315167e2d1SIlya Biryukov       : OriginalPaths(std::move(Files)), Strings(Arena) {
332ea9773acSSam McCall     // Sort commands by filename for determinism (index is a tiebreaker later).
33355fab260SFangrui Song     llvm::sort(OriginalPaths);
3345167e2d1SIlya Biryukov     Paths.reserve(OriginalPaths.size());
3355167e2d1SIlya Biryukov     Types.reserve(OriginalPaths.size());
3365167e2d1SIlya Biryukov     Stems.reserve(OriginalPaths.size());
3375167e2d1SIlya Biryukov     for (size_t I = 0; I < OriginalPaths.size(); ++I) {
3385167e2d1SIlya Biryukov       StringRef Path = Strings.save(StringRef(OriginalPaths[I]).lower());
3395167e2d1SIlya Biryukov 
3405167e2d1SIlya Biryukov       Paths.emplace_back(Path, I);
3415167e2d1SIlya Biryukov       Types.push_back(foldType(guessType(Path)));
342ea9773acSSam McCall       Stems.emplace_back(sys::path::stem(Path), I);
343ea9773acSSam McCall       auto Dir = ++sys::path::rbegin(Path), DirEnd = sys::path::rend(Path);
344ea9773acSSam McCall       for (int J = 0; J < DirectorySegmentsIndexed && Dir != DirEnd; ++J, ++Dir)
345ea9773acSSam McCall         if (Dir->size() > ShortDirectorySegment) // not trivial ones
346ea9773acSSam McCall           Components.emplace_back(*Dir, I);
347ea9773acSSam McCall     }
34855fab260SFangrui Song     llvm::sort(Paths);
34955fab260SFangrui Song     llvm::sort(Stems);
35055fab260SFangrui Song     llvm::sort(Components);
351ea9773acSSam McCall   }
352ea9773acSSam McCall 
3535167e2d1SIlya Biryukov   bool empty() const { return Paths.empty(); }
354ea9773acSSam McCall 
3555167e2d1SIlya Biryukov   // Returns the path for the file that best fits OriginalFilename.
3565167e2d1SIlya Biryukov   // Candidates with extensions matching PreferLanguage will be chosen over
3575167e2d1SIlya Biryukov   // others (unless it's TY_INVALID, or all candidates are bad).
3585167e2d1SIlya Biryukov   StringRef chooseProxy(StringRef OriginalFilename,
359ea9773acSSam McCall                         types::ID PreferLanguage) const {
360ea9773acSSam McCall     assert(!empty() && "need at least one candidate!");
361ea9773acSSam McCall     std::string Filename = OriginalFilename.lower();
362ea9773acSSam McCall     auto Candidates = scoreCandidates(Filename);
363ea9773acSSam McCall     std::pair<size_t, int> Best =
364ea9773acSSam McCall         pickWinner(Candidates, Filename, PreferLanguage);
365ea9773acSSam McCall 
3665167e2d1SIlya Biryukov     DEBUG_WITH_TYPE(
3675167e2d1SIlya Biryukov         "interpolate",
3685167e2d1SIlya Biryukov         llvm::dbgs() << "interpolate: chose " << OriginalPaths[Best.first]
3695167e2d1SIlya Biryukov                      << " as proxy for " << OriginalFilename << " preferring "
370ea9773acSSam McCall                      << (PreferLanguage == types::TY_INVALID
371ea9773acSSam McCall                              ? "none"
372ea9773acSSam McCall                              : types::getTypeName(PreferLanguage))
373ea9773acSSam McCall                      << " score=" << Best.second << "\n");
3745167e2d1SIlya Biryukov     return OriginalPaths[Best.first];
375ea9773acSSam McCall   }
376ea9773acSSam McCall 
377ea9773acSSam McCall private:
378ea9773acSSam McCall   using SubstringAndIndex = std::pair<StringRef, size_t>;
379ea9773acSSam McCall   // Directory matching parameters: we look at the last two segments of the
380ea9773acSSam McCall   // parent directory (usually the semantically significant ones in practice).
381ea9773acSSam McCall   // We search only the last four of each candidate (for efficiency).
382ea9773acSSam McCall   constexpr static int DirectorySegmentsIndexed = 4;
383ea9773acSSam McCall   constexpr static int DirectorySegmentsQueried = 2;
384ea9773acSSam McCall   constexpr static int ShortDirectorySegment = 1; // Only look at longer names.
385ea9773acSSam McCall 
386ea9773acSSam McCall   // Award points to candidate entries that should be considered for the file.
387ea9773acSSam McCall   // Returned keys are indexes into paths, and the values are (nonzero) scores.
388ea9773acSSam McCall   DenseMap<size_t, int> scoreCandidates(StringRef Filename) const {
389ea9773acSSam McCall     // Decompose Filename into the parts we care about.
390ea9773acSSam McCall     // /some/path/complicated/project/Interesting.h
391ea9773acSSam McCall     // [-prefix--][---dir---] [-dir-] [--stem---]
392ea9773acSSam McCall     StringRef Stem = sys::path::stem(Filename);
393ea9773acSSam McCall     llvm::SmallVector<StringRef, DirectorySegmentsQueried> Dirs;
394ea9773acSSam McCall     llvm::StringRef Prefix;
395ea9773acSSam McCall     auto Dir = ++sys::path::rbegin(Filename),
396ea9773acSSam McCall          DirEnd = sys::path::rend(Filename);
397ea9773acSSam McCall     for (int I = 0; I < DirectorySegmentsQueried && Dir != DirEnd; ++I, ++Dir) {
398ea9773acSSam McCall       if (Dir->size() > ShortDirectorySegment)
399ea9773acSSam McCall         Dirs.push_back(*Dir);
400ea9773acSSam McCall       Prefix = Filename.substr(0, Dir - DirEnd);
401ea9773acSSam McCall     }
402ea9773acSSam McCall 
403ea9773acSSam McCall     // Now award points based on lookups into our various indexes.
404ea9773acSSam McCall     DenseMap<size_t, int> Candidates; // Index -> score.
405ea9773acSSam McCall     auto Award = [&](int Points, ArrayRef<SubstringAndIndex> Range) {
406ea9773acSSam McCall       for (const auto &Entry : Range)
407ea9773acSSam McCall         Candidates[Entry.second] += Points;
408ea9773acSSam McCall     };
409ea9773acSSam McCall     // Award one point if the file's basename is a prefix of the candidate,
410ea9773acSSam McCall     // and another if it's an exact match (so exact matches get two points).
411ea9773acSSam McCall     Award(1, indexLookup</*Prefix=*/true>(Stem, Stems));
412ea9773acSSam McCall     Award(1, indexLookup</*Prefix=*/false>(Stem, Stems));
413ea9773acSSam McCall     // For each of the last few directories in the Filename, award a point
414ea9773acSSam McCall     // if it's present in the candidate.
415ea9773acSSam McCall     for (StringRef Dir : Dirs)
416ea9773acSSam McCall       Award(1, indexLookup</*Prefix=*/false>(Dir, Components));
417ea9773acSSam McCall     // Award one more point if the whole rest of the path matches.
418ea9773acSSam McCall     if (sys::path::root_directory(Prefix) != Prefix)
419ea9773acSSam McCall       Award(1, indexLookup</*Prefix=*/true>(Prefix, Paths));
420ea9773acSSam McCall     return Candidates;
421ea9773acSSam McCall   }
422ea9773acSSam McCall 
423ea9773acSSam McCall   // Pick a single winner from the set of scored candidates.
424ea9773acSSam McCall   // Returns (index, score).
425ea9773acSSam McCall   std::pair<size_t, int> pickWinner(const DenseMap<size_t, int> &Candidates,
426ea9773acSSam McCall                                     StringRef Filename,
427ea9773acSSam McCall                                     types::ID PreferredLanguage) const {
428ea9773acSSam McCall     struct ScoredCandidate {
429ea9773acSSam McCall       size_t Index;
430ea9773acSSam McCall       bool Preferred;
431ea9773acSSam McCall       int Points;
432ea9773acSSam McCall       size_t PrefixLength;
433ea9773acSSam McCall     };
434ea9773acSSam McCall     // Choose the best candidate by (preferred, points, prefix length, alpha).
435ea9773acSSam McCall     ScoredCandidate Best = {size_t(-1), false, 0, 0};
436ea9773acSSam McCall     for (const auto &Candidate : Candidates) {
437ea9773acSSam McCall       ScoredCandidate S;
438ea9773acSSam McCall       S.Index = Candidate.first;
439ea9773acSSam McCall       S.Preferred = PreferredLanguage == types::TY_INVALID ||
4405167e2d1SIlya Biryukov                     PreferredLanguage == Types[S.Index];
441ea9773acSSam McCall       S.Points = Candidate.second;
442ea9773acSSam McCall       if (!S.Preferred && Best.Preferred)
443ea9773acSSam McCall         continue;
444ea9773acSSam McCall       if (S.Preferred == Best.Preferred) {
445ea9773acSSam McCall         if (S.Points < Best.Points)
446ea9773acSSam McCall           continue;
447ea9773acSSam McCall         if (S.Points == Best.Points) {
448ea9773acSSam McCall           S.PrefixLength = matchingPrefix(Filename, Paths[S.Index].first);
449ea9773acSSam McCall           if (S.PrefixLength < Best.PrefixLength)
450ea9773acSSam McCall             continue;
451ea9773acSSam McCall           // hidden heuristics should at least be deterministic!
452ea9773acSSam McCall           if (S.PrefixLength == Best.PrefixLength)
453ea9773acSSam McCall             if (S.Index > Best.Index)
454ea9773acSSam McCall               continue;
455ea9773acSSam McCall         }
456ea9773acSSam McCall       }
457ea9773acSSam McCall       // PrefixLength was only set above if actually needed for a tiebreak.
458ea9773acSSam McCall       // But it definitely needs to be set to break ties in the future.
459ea9773acSSam McCall       S.PrefixLength = matchingPrefix(Filename, Paths[S.Index].first);
460ea9773acSSam McCall       Best = S;
461ea9773acSSam McCall     }
462ea9773acSSam McCall     // Edge case: no candidate got any points.
463ea9773acSSam McCall     // We ignore PreferredLanguage at this point (not ideal).
464ea9773acSSam McCall     if (Best.Index == size_t(-1))
465ea9773acSSam McCall       return {longestMatch(Filename, Paths).second, 0};
466ea9773acSSam McCall     return {Best.Index, Best.Points};
467ea9773acSSam McCall   }
468ea9773acSSam McCall 
469ea9773acSSam McCall   // Returns the range within a sorted index that compares equal to Key.
470ea9773acSSam McCall   // If Prefix is true, it's instead the range starting with Key.
471ea9773acSSam McCall   template <bool Prefix>
472ea9773acSSam McCall   ArrayRef<SubstringAndIndex>
4735167e2d1SIlya Biryukov   indexLookup(StringRef Key, ArrayRef<SubstringAndIndex> Idx) const {
474ea9773acSSam McCall     // Use pointers as iteratiors to ease conversion of result to ArrayRef.
47544f2f4ecSSam McCall     auto Range = std::equal_range(Idx.data(), Idx.data() + Idx.size(), Key,
47644f2f4ecSSam McCall                                   Less<Prefix>());
477ea9773acSSam McCall     return {Range.first, Range.second};
478ea9773acSSam McCall   }
479ea9773acSSam McCall 
480ea9773acSSam McCall   // Performs a point lookup into a nonempty index, returning a longest match.
4815167e2d1SIlya Biryukov   SubstringAndIndex longestMatch(StringRef Key,
4825167e2d1SIlya Biryukov                                  ArrayRef<SubstringAndIndex> Idx) const {
483ea9773acSSam McCall     assert(!Idx.empty());
484ea9773acSSam McCall     // Longest substring match will be adjacent to a direct lookup.
4857264a474SFangrui Song     auto It = llvm::lower_bound(Idx, SubstringAndIndex{Key, 0});
486ea9773acSSam McCall     if (It == Idx.begin())
487ea9773acSSam McCall       return *It;
488ea9773acSSam McCall     if (It == Idx.end())
489ea9773acSSam McCall       return *--It;
490ea9773acSSam McCall     // Have to choose between It and It-1
491ea9773acSSam McCall     size_t Prefix = matchingPrefix(Key, It->first);
492ea9773acSSam McCall     size_t PrevPrefix = matchingPrefix(Key, (It - 1)->first);
493ea9773acSSam McCall     return Prefix > PrevPrefix ? *It : *--It;
494ea9773acSSam McCall   }
495ea9773acSSam McCall 
4965167e2d1SIlya Biryukov   // Original paths, everything else is in lowercase.
4975167e2d1SIlya Biryukov   std::vector<std::string> OriginalPaths;
498ea9773acSSam McCall   BumpPtrAllocator Arena;
499ea9773acSSam McCall   StringSaver Strings;
500ea9773acSSam McCall   // Indexes of candidates by certain substrings.
501ea9773acSSam McCall   // String is lowercase and sorted, index points into OriginalPaths.
502ea9773acSSam McCall   std::vector<SubstringAndIndex> Paths;      // Full path.
5035167e2d1SIlya Biryukov   // Lang types obtained by guessing on the corresponding path. I-th element is
5045167e2d1SIlya Biryukov   // a type for the I-th path.
5055167e2d1SIlya Biryukov   std::vector<types::ID> Types;
506ea9773acSSam McCall   std::vector<SubstringAndIndex> Stems;      // Basename, without extension.
507ea9773acSSam McCall   std::vector<SubstringAndIndex> Components; // Last path components.
508ea9773acSSam McCall };
509ea9773acSSam McCall 
510ea9773acSSam McCall // The actual CompilationDatabase wrapper delegates to its inner database.
5115167e2d1SIlya Biryukov // If no match, looks up a proxy file in FileIndex and transfers its
5125167e2d1SIlya Biryukov // command to the requested file.
513ea9773acSSam McCall class InterpolatingCompilationDatabase : public CompilationDatabase {
514ea9773acSSam McCall public:
515ea9773acSSam McCall   InterpolatingCompilationDatabase(std::unique_ptr<CompilationDatabase> Inner)
5165167e2d1SIlya Biryukov       : Inner(std::move(Inner)), Index(this->Inner->getAllFiles()) {}
517ea9773acSSam McCall 
518ea9773acSSam McCall   std::vector<CompileCommand>
519ea9773acSSam McCall   getCompileCommands(StringRef Filename) const override {
520ea9773acSSam McCall     auto Known = Inner->getCompileCommands(Filename);
521ea9773acSSam McCall     if (Index.empty() || !Known.empty())
522ea9773acSSam McCall       return Known;
523ea9773acSSam McCall     bool TypeCertain;
524ea9773acSSam McCall     auto Lang = guessType(Filename, &TypeCertain);
525ea9773acSSam McCall     if (!TypeCertain)
526ea9773acSSam McCall       Lang = types::TY_INVALID;
5275167e2d1SIlya Biryukov     auto ProxyCommands =
5285167e2d1SIlya Biryukov         Inner->getCompileCommands(Index.chooseProxy(Filename, foldType(Lang)));
5295167e2d1SIlya Biryukov     if (ProxyCommands.empty())
5305167e2d1SIlya Biryukov       return {};
531588db1ccSSam McCall     return {transferCompileCommand(std::move(ProxyCommands.front()), Filename)};
532ea9773acSSam McCall   }
533ea9773acSSam McCall 
534ea9773acSSam McCall   std::vector<std::string> getAllFiles() const override {
535ea9773acSSam McCall     return Inner->getAllFiles();
536ea9773acSSam McCall   }
537ea9773acSSam McCall 
538ea9773acSSam McCall   std::vector<CompileCommand> getAllCompileCommands() const override {
539ea9773acSSam McCall     return Inner->getAllCompileCommands();
540ea9773acSSam McCall   }
541ea9773acSSam McCall 
542ea9773acSSam McCall private:
543ea9773acSSam McCall   std::unique_ptr<CompilationDatabase> Inner;
5445167e2d1SIlya Biryukov   FileIndex Index;
545ea9773acSSam McCall };
546ea9773acSSam McCall 
547ea9773acSSam McCall } // namespace
548ea9773acSSam McCall 
549ea9773acSSam McCall std::unique_ptr<CompilationDatabase>
550ea9773acSSam McCall inferMissingCompileCommands(std::unique_ptr<CompilationDatabase> Inner) {
5512b3d49b6SJonas Devlieghere   return std::make_unique<InterpolatingCompilationDatabase>(std::move(Inner));
552ea9773acSSam McCall }
553ea9773acSSam McCall 
554588db1ccSSam McCall tooling::CompileCommand transferCompileCommand(CompileCommand Cmd,
555588db1ccSSam McCall                                                StringRef Filename) {
556588db1ccSSam McCall   return TransferableCommand(std::move(Cmd)).transferTo(Filename);
557588db1ccSSam McCall }
558588db1ccSSam McCall 
559ea9773acSSam McCall } // namespace tooling
560ea9773acSSam McCall } // namespace clang
561