1 //===--- ConfigYAML.cpp - Loading configuration fragments from YAML files -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "ConfigFragment.h"
9 #include "llvm/ADT/Optional.h"
10 #include "llvm/ADT/SmallSet.h"
11 #include "llvm/ADT/SmallString.h"
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/Support/MemoryBuffer.h"
14 #include "llvm/Support/SourceMgr.h"
15 #include "llvm/Support/YAMLParser.h"
16 #include <string>
17 #include <system_error>
18 
19 namespace clang {
20 namespace clangd {
21 namespace config {
22 namespace {
23 using llvm::yaml::BlockScalarNode;
24 using llvm::yaml::MappingNode;
25 using llvm::yaml::Node;
26 using llvm::yaml::ScalarNode;
27 using llvm::yaml::SequenceNode;
28 
29 llvm::Optional<llvm::StringRef>
bestGuess(llvm::StringRef Search,llvm::ArrayRef<llvm::StringRef> AllowedValues)30 bestGuess(llvm::StringRef Search,
31           llvm::ArrayRef<llvm::StringRef> AllowedValues) {
32   unsigned MaxEdit = (Search.size() + 1) / 3;
33   if (!MaxEdit)
34     return llvm::None;
35   llvm::Optional<llvm::StringRef> Result;
36   for (const auto &AllowedValue : AllowedValues) {
37     unsigned EditDistance = Search.edit_distance(AllowedValue, true, MaxEdit);
38     // We can't do better than an edit distance of 1, so just return this and
39     // save computing other values.
40     if (EditDistance == 1U)
41       return AllowedValue;
42     if (EditDistance == MaxEdit && !Result) {
43       Result = AllowedValue;
44     } else if (EditDistance < MaxEdit) {
45       Result = AllowedValue;
46       MaxEdit = EditDistance;
47     }
48   }
49   return Result;
50 }
51 
52 class Parser {
53   llvm::SourceMgr &SM;
54   bool HadError = false;
55 
56 public:
Parser(llvm::SourceMgr & SM)57   Parser(llvm::SourceMgr &SM) : SM(SM) {}
58 
59   // Tries to parse N into F, returning false if it failed and we couldn't
60   // meaningfully recover (YAML syntax error, or hard semantic error).
parse(Fragment & F,Node & N)61   bool parse(Fragment &F, Node &N) {
62     DictParser Dict("Config", this);
63     Dict.handle("If", [&](Node &N) { parse(F.If, N); });
64     Dict.handle("CompileFlags", [&](Node &N) { parse(F.CompileFlags, N); });
65     Dict.handle("Index", [&](Node &N) { parse(F.Index, N); });
66     Dict.handle("Style", [&](Node &N) { parse(F.Style, N); });
67     Dict.handle("Diagnostics", [&](Node &N) { parse(F.Diagnostics, N); });
68     Dict.handle("Completion", [&](Node &N) { parse(F.Completion, N); });
69     Dict.handle("Hover", [&](Node &N) { parse(F.Hover, N); });
70     Dict.handle("InlayHints", [&](Node &N) { parse(F.InlayHints, N); });
71     Dict.parse(N);
72     return !(N.failed() || HadError);
73   }
74 
75 private:
parse(Fragment::IfBlock & F,Node & N)76   void parse(Fragment::IfBlock &F, Node &N) {
77     DictParser Dict("If", this);
78     Dict.unrecognized([&](Located<std::string>, Node &) {
79       F.HasUnrecognizedCondition = true;
80       return true; // Emit a warning for the unrecognized key.
81     });
82     Dict.handle("PathMatch", [&](Node &N) {
83       if (auto Values = scalarValues(N))
84         F.PathMatch = std::move(*Values);
85     });
86     Dict.handle("PathExclude", [&](Node &N) {
87       if (auto Values = scalarValues(N))
88         F.PathExclude = std::move(*Values);
89     });
90     Dict.parse(N);
91   }
92 
parse(Fragment::CompileFlagsBlock & F,Node & N)93   void parse(Fragment::CompileFlagsBlock &F, Node &N) {
94     DictParser Dict("CompileFlags", this);
95     Dict.handle("Compiler", [&](Node &N) {
96       if (auto Value = scalarValue(N, "Compiler"))
97         F.Compiler = std::move(*Value);
98     });
99     Dict.handle("Add", [&](Node &N) {
100       if (auto Values = scalarValues(N))
101         F.Add = std::move(*Values);
102     });
103     Dict.handle("Remove", [&](Node &N) {
104       if (auto Values = scalarValues(N))
105         F.Remove = std::move(*Values);
106     });
107     Dict.handle("CompilationDatabase", [&](Node &N) {
108       F.CompilationDatabase = scalarValue(N, "CompilationDatabase");
109     });
110     Dict.parse(N);
111   }
112 
parse(Fragment::StyleBlock & F,Node & N)113   void parse(Fragment::StyleBlock &F, Node &N) {
114     DictParser Dict("Style", this);
115     Dict.handle("FullyQualifiedNamespaces", [&](Node &N) {
116       if (auto Values = scalarValues(N))
117         F.FullyQualifiedNamespaces = std::move(*Values);
118     });
119     Dict.parse(N);
120   }
121 
parse(Fragment::DiagnosticsBlock & F,Node & N)122   void parse(Fragment::DiagnosticsBlock &F, Node &N) {
123     DictParser Dict("Diagnostics", this);
124     Dict.handle("Suppress", [&](Node &N) {
125       if (auto Values = scalarValues(N))
126         F.Suppress = std::move(*Values);
127     });
128     Dict.handle("UnusedIncludes", [&](Node &N) {
129       F.UnusedIncludes = scalarValue(N, "UnusedIncludes");
130     });
131     Dict.handle("Includes", [&](Node &N) { parse(F.Includes, N); });
132     Dict.handle("ClangTidy", [&](Node &N) { parse(F.ClangTidy, N); });
133     Dict.parse(N);
134   }
135 
parse(Fragment::DiagnosticsBlock::ClangTidyBlock & F,Node & N)136   void parse(Fragment::DiagnosticsBlock::ClangTidyBlock &F, Node &N) {
137     DictParser Dict("ClangTidy", this);
138     Dict.handle("Add", [&](Node &N) {
139       if (auto Values = scalarValues(N))
140         F.Add = std::move(*Values);
141     });
142     Dict.handle("Remove", [&](Node &N) {
143       if (auto Values = scalarValues(N))
144         F.Remove = std::move(*Values);
145     });
146     Dict.handle("CheckOptions", [&](Node &N) {
147       DictParser CheckOptDict("CheckOptions", this);
148       CheckOptDict.unrecognized([&](Located<std::string> &&Key, Node &Val) {
149         if (auto Value = scalarValue(Val, *Key))
150           F.CheckOptions.emplace_back(std::move(Key), std::move(*Value));
151         return false; // Don't emit a warning
152       });
153       CheckOptDict.parse(N);
154     });
155     Dict.parse(N);
156   }
157 
parse(Fragment::DiagnosticsBlock::IncludesBlock & F,Node & N)158   void parse(Fragment::DiagnosticsBlock::IncludesBlock &F, Node &N) {
159     DictParser Dict("Includes", this);
160     Dict.handle("IgnoreHeader", [&](Node &N) {
161       if (auto Values = scalarValues(N))
162         F.IgnoreHeader = std::move(*Values);
163     });
164     Dict.parse(N);
165   }
166 
parse(Fragment::IndexBlock & F,Node & N)167   void parse(Fragment::IndexBlock &F, Node &N) {
168     DictParser Dict("Index", this);
169     Dict.handle("Background",
170                 [&](Node &N) { F.Background = scalarValue(N, "Background"); });
171     Dict.handle("External", [&](Node &N) {
172       Fragment::IndexBlock::ExternalBlock External;
173       // External block can either be a mapping or a scalar value. Dispatch
174       // accordingly.
175       if (N.getType() == Node::NK_Mapping) {
176         parse(External, N);
177       } else if (N.getType() == Node::NK_Scalar ||
178                  N.getType() == Node::NK_BlockScalar) {
179         parse(External, *scalarValue(N, "External"));
180       } else {
181         error("External must be either a scalar or a mapping.", N);
182         return;
183       }
184       F.External.emplace(std::move(External));
185       F.External->Range = N.getSourceRange();
186     });
187     Dict.handle("StandardLibrary", [&](Node &N) {
188       if (auto StandardLibrary = boolValue(N, "StandardLibrary"))
189         F.StandardLibrary = *StandardLibrary;
190     });
191     Dict.parse(N);
192   }
193 
parse(Fragment::IndexBlock::ExternalBlock & F,Located<std::string> ExternalVal)194   void parse(Fragment::IndexBlock::ExternalBlock &F,
195              Located<std::string> ExternalVal) {
196     if (!llvm::StringRef(*ExternalVal).equals_insensitive("none")) {
197       error("Only scalar value supported for External is 'None'",
198             ExternalVal.Range);
199       return;
200     }
201     F.IsNone = true;
202     F.IsNone.Range = ExternalVal.Range;
203   }
204 
parse(Fragment::IndexBlock::ExternalBlock & F,Node & N)205   void parse(Fragment::IndexBlock::ExternalBlock &F, Node &N) {
206     DictParser Dict("External", this);
207     Dict.handle("File", [&](Node &N) { F.File = scalarValue(N, "File"); });
208     Dict.handle("Server",
209                 [&](Node &N) { F.Server = scalarValue(N, "Server"); });
210     Dict.handle("MountPoint",
211                 [&](Node &N) { F.MountPoint = scalarValue(N, "MountPoint"); });
212     Dict.parse(N);
213   }
214 
parse(Fragment::CompletionBlock & F,Node & N)215   void parse(Fragment::CompletionBlock &F, Node &N) {
216     DictParser Dict("Completion", this);
217     Dict.handle("AllScopes", [&](Node &N) {
218       if (auto AllScopes = boolValue(N, "AllScopes"))
219         F.AllScopes = *AllScopes;
220     });
221     Dict.parse(N);
222   }
223 
parse(Fragment::HoverBlock & F,Node & N)224   void parse(Fragment::HoverBlock &F, Node &N) {
225     DictParser Dict("Hover", this);
226     Dict.handle("ShowAKA", [&](Node &N) {
227       if (auto ShowAKA = boolValue(N, "ShowAKA"))
228         F.ShowAKA = *ShowAKA;
229     });
230     Dict.parse(N);
231   }
232 
parse(Fragment::InlayHintsBlock & F,Node & N)233   void parse(Fragment::InlayHintsBlock &F, Node &N) {
234     DictParser Dict("InlayHints", this);
235     Dict.handle("Enabled", [&](Node &N) {
236       if (auto Value = boolValue(N, "Enabled"))
237         F.Enabled = *Value;
238     });
239     Dict.handle("ParameterNames", [&](Node &N) {
240       if (auto Value = boolValue(N, "ParameterNames"))
241         F.ParameterNames = *Value;
242     });
243     Dict.handle("DeducedTypes", [&](Node &N) {
244       if (auto Value = boolValue(N, "DeducedTypes"))
245         F.DeducedTypes = *Value;
246     });
247     Dict.handle("Designators", [&](Node &N) {
248       if (auto Value = boolValue(N, "Designators"))
249         F.Designators = *Value;
250     });
251     Dict.parse(N);
252   }
253 
254   // Helper for parsing mapping nodes (dictionaries).
255   // We don't use YamlIO as we want to control over unknown keys.
256   class DictParser {
257     llvm::StringRef Description;
258     std::vector<std::pair<llvm::StringRef, std::function<void(Node &)>>> Keys;
259     std::function<bool(Located<std::string>, Node &)> UnknownHandler;
260     Parser *Outer;
261 
262   public:
DictParser(llvm::StringRef Description,Parser * Outer)263     DictParser(llvm::StringRef Description, Parser *Outer)
264         : Description(Description), Outer(Outer) {}
265 
266     // Parse is called when Key is encountered, and passed the associated value.
267     // It should emit diagnostics if the value is invalid (e.g. wrong type).
268     // If Key is seen twice, Parse runs only once and an error is reported.
handle(llvm::StringLiteral Key,std::function<void (Node &)> Parse)269     void handle(llvm::StringLiteral Key, std::function<void(Node &)> Parse) {
270       for (const auto &Entry : Keys) {
271         (void) Entry;
272         assert(Entry.first != Key && "duplicate key handler");
273       }
274       Keys.emplace_back(Key, std::move(Parse));
275     }
276 
277     // Handler is called when a Key is not matched by any handle().
278     // If this is unset or the Handler returns true, a warning is emitted for
279     // the unknown key.
280     void
unrecognized(std::function<bool (Located<std::string>,Node &)> Handler)281     unrecognized(std::function<bool(Located<std::string>, Node &)> Handler) {
282       UnknownHandler = std::move(Handler);
283     }
284 
285     // Process a mapping node and call handlers for each key/value pair.
parse(Node & N) const286     void parse(Node &N) const {
287       if (N.getType() != Node::NK_Mapping) {
288         Outer->error(Description + " should be a dictionary", N);
289         return;
290       }
291       llvm::SmallSet<std::string, 8> Seen;
292       llvm::SmallVector<Located<std::string>, 0> UnknownKeys;
293       // We *must* consume all items, even on error, or the parser will assert.
294       for (auto &KV : llvm::cast<MappingNode>(N)) {
295         auto *K = KV.getKey();
296         if (!K) // YAMLParser emitted an error.
297           continue;
298         auto Key = Outer->scalarValue(*K, "Dictionary key");
299         if (!Key)
300           continue;
301         if (!Seen.insert(**Key).second) {
302           Outer->warning("Duplicate key " + **Key + " is ignored", *K);
303           if (auto *Value = KV.getValue())
304             Value->skip();
305           continue;
306         }
307         auto *Value = KV.getValue();
308         if (!Value) // YAMLParser emitted an error.
309           continue;
310         bool Matched = false;
311         for (const auto &Handler : Keys) {
312           if (Handler.first == **Key) {
313             Matched = true;
314             Handler.second(*Value);
315             break;
316           }
317         }
318         if (!Matched) {
319           bool Warn = !UnknownHandler;
320           if (UnknownHandler)
321             Warn = UnknownHandler(
322                 Located<std::string>(**Key, K->getSourceRange()), *Value);
323           if (Warn)
324             UnknownKeys.push_back(std::move(*Key));
325         }
326       }
327       if (!UnknownKeys.empty())
328         warnUnknownKeys(UnknownKeys, Seen);
329     }
330 
331   private:
warnUnknownKeys(llvm::ArrayRef<Located<std::string>> UnknownKeys,const llvm::SmallSet<std::string,8> & SeenKeys) const332     void warnUnknownKeys(llvm::ArrayRef<Located<std::string>> UnknownKeys,
333                          const llvm::SmallSet<std::string, 8> &SeenKeys) const {
334       llvm::SmallVector<llvm::StringRef> UnseenKeys;
335       for (const auto &KeyAndHandler : Keys)
336         if (!SeenKeys.count(KeyAndHandler.first.str()))
337           UnseenKeys.push_back(KeyAndHandler.first);
338 
339       for (const Located<std::string> &UnknownKey : UnknownKeys)
340         if (auto BestGuess = bestGuess(*UnknownKey, UnseenKeys))
341           Outer->warning("Unknown " + Description + " key '" + *UnknownKey +
342                              "'; did you mean '" + *BestGuess + "'?",
343                          UnknownKey.Range);
344         else
345           Outer->warning("Unknown " + Description + " key '" + *UnknownKey +
346                              "'",
347                          UnknownKey.Range);
348     }
349   };
350 
351   // Try to parse a single scalar value from the node, warn on failure.
scalarValue(Node & N,llvm::StringRef Desc)352   llvm::Optional<Located<std::string>> scalarValue(Node &N,
353                                                    llvm::StringRef Desc) {
354     llvm::SmallString<256> Buf;
355     if (auto *S = llvm::dyn_cast<ScalarNode>(&N))
356       return Located<std::string>(S->getValue(Buf).str(), N.getSourceRange());
357     if (auto *BS = llvm::dyn_cast<BlockScalarNode>(&N))
358       return Located<std::string>(BS->getValue().str(), N.getSourceRange());
359     warning(Desc + " should be scalar", N);
360     return llvm::None;
361   }
362 
boolValue(Node & N,llvm::StringRef Desc)363   llvm::Optional<Located<bool>> boolValue(Node &N, llvm::StringRef Desc) {
364     if (auto Scalar = scalarValue(N, Desc)) {
365       if (auto Bool = llvm::yaml::parseBool(**Scalar))
366         return Located<bool>(*Bool, Scalar->Range);
367       warning(Desc + " should be a boolean", N);
368     }
369     return llvm::None;
370   }
371 
372   // Try to parse a list of single scalar values, or just a single value.
scalarValues(Node & N)373   llvm::Optional<std::vector<Located<std::string>>> scalarValues(Node &N) {
374     std::vector<Located<std::string>> Result;
375     if (auto *S = llvm::dyn_cast<ScalarNode>(&N)) {
376       llvm::SmallString<256> Buf;
377       Result.emplace_back(S->getValue(Buf).str(), N.getSourceRange());
378     } else if (auto *S = llvm::dyn_cast<BlockScalarNode>(&N)) {
379       Result.emplace_back(S->getValue().str(), N.getSourceRange());
380     } else if (auto *S = llvm::dyn_cast<SequenceNode>(&N)) {
381       // We *must* consume all items, even on error, or the parser will assert.
382       for (auto &Child : *S) {
383         if (auto Value = scalarValue(Child, "List item"))
384           Result.push_back(std::move(*Value));
385       }
386     } else {
387       warning("Expected scalar or list of scalars", N);
388       return llvm::None;
389     }
390     return Result;
391   }
392 
393   // Report a "hard" error, reflecting a config file that can never be valid.
error(const llvm::Twine & Msg,llvm::SMRange Range)394   void error(const llvm::Twine &Msg, llvm::SMRange Range) {
395     HadError = true;
396     SM.PrintMessage(Range.Start, llvm::SourceMgr::DK_Error, Msg, Range);
397   }
error(const llvm::Twine & Msg,const Node & N)398   void error(const llvm::Twine &Msg, const Node &N) {
399     return error(Msg, N.getSourceRange());
400   }
401 
402   // Report a "soft" error that could be caused by e.g. version skew.
warning(const llvm::Twine & Msg,llvm::SMRange Range)403   void warning(const llvm::Twine &Msg, llvm::SMRange Range) {
404     SM.PrintMessage(Range.Start, llvm::SourceMgr::DK_Warning, Msg, Range);
405   }
warning(const llvm::Twine & Msg,const Node & N)406   void warning(const llvm::Twine &Msg, const Node &N) {
407     return warning(Msg, N.getSourceRange());
408   }
409 };
410 
411 } // namespace
412 
parseYAML(llvm::StringRef YAML,llvm::StringRef BufferName,DiagnosticCallback Diags)413 std::vector<Fragment> Fragment::parseYAML(llvm::StringRef YAML,
414                                           llvm::StringRef BufferName,
415                                           DiagnosticCallback Diags) {
416   // The YAML document may contain multiple conditional fragments.
417   // The SourceManager is shared for all of them.
418   auto SM = std::make_shared<llvm::SourceMgr>();
419   auto Buf = llvm::MemoryBuffer::getMemBufferCopy(YAML, BufferName);
420   // Adapt DiagnosticCallback to function-pointer interface.
421   // Callback receives both errors we emit and those from the YAML parser.
422   SM->setDiagHandler(
423       [](const llvm::SMDiagnostic &Diag, void *Ctx) {
424         (*reinterpret_cast<DiagnosticCallback *>(Ctx))(Diag);
425       },
426       &Diags);
427   std::vector<Fragment> Result;
428   for (auto &Doc : llvm::yaml::Stream(*Buf, *SM)) {
429     if (Node *N = Doc.getRoot()) {
430       Fragment Fragment;
431       Fragment.Source.Manager = SM;
432       Fragment.Source.Location = N->getSourceRange().Start;
433       SM->PrintMessage(Fragment.Source.Location, llvm::SourceMgr::DK_Note,
434                        "Parsing config fragment");
435       if (Parser(*SM).parse(Fragment, *N))
436         Result.push_back(std::move(Fragment));
437     }
438   }
439   SM->PrintMessage(SM->FindLocForLineAndColumn(SM->getMainFileID(), 0, 0),
440                    llvm::SourceMgr::DK_Note,
441                    "Parsed " + llvm::Twine(Result.size()) +
442                        " fragments from file");
443   // Hack: stash the buffer in the SourceMgr to keep it alive.
444   // SM has two entries: "main" non-owning buffer, and ignored owning buffer.
445   SM->AddNewSourceBuffer(std::move(Buf), llvm::SMLoc());
446   return Result;
447 }
448 
449 } // namespace config
450 } // namespace clangd
451 } // namespace clang
452