1 //===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===---------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file supports working with JSON data.
11 ///
12 /// It comprises:
13 ///
14 /// - classes which hold dynamically-typed parsed JSON structures
15 /// These are value types that can be composed, inspected, and modified.
16 /// See json::Value, and the related types json::Object and json::Array.
17 ///
18 /// - functions to parse JSON text into Values, and to serialize Values to text.
19 /// See parse(), operator<<, and format_provider.
20 ///
21 /// - a convention and helpers for mapping between json::Value and user-defined
22 /// types. See fromJSON(), ObjectMapper, and the class comment on Value.
23 ///
24 /// - an output API json::OStream which can emit JSON without materializing
25 /// all structures as json::Value.
26 ///
27 /// Typically, JSON data would be read from an external source, parsed into
28 /// a Value, and then converted into some native data structure before doing
29 /// real work on it. (And vice versa when writing).
30 ///
31 /// Other serialization mechanisms you may consider:
32 ///
33 /// - YAML is also text-based, and more human-readable than JSON. It's a more
34 /// complex format and data model, and YAML parsers aren't ubiquitous.
35 /// YAMLParser.h is a streaming parser suitable for parsing large documents
36 /// (including JSON, as YAML is a superset). It can be awkward to use
37 /// directly. YAML I/O (YAMLTraits.h) provides data mapping that is more
38 /// declarative than the toJSON/fromJSON conventions here.
39 ///
40 /// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it
41 /// encodes LLVM IR ("bitcode"), but it can be a container for other data.
42 /// Low-level reader/writer libraries are in Bitstream/Bitstream*.h
43 ///
44 //===---------------------------------------------------------------------===//
45
46 #ifndef LLVM_SUPPORT_JSON_H
47 #define LLVM_SUPPORT_JSON_H
48
49 #include "llvm/ADT/DenseMap.h"
50 #include "llvm/ADT/SmallVector.h"
51 #include "llvm/ADT/StringRef.h"
52 #include "llvm/ADT/STLFunctionalExtras.h"
53 #include "llvm/Support/Error.h"
54 #include "llvm/Support/FormatVariadic.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include <map>
57
58 namespace llvm {
59 namespace json {
60
61 // === String encodings ===
62 //
63 // JSON strings are character sequences (not byte sequences like std::string).
64 // We need to know the encoding, and for simplicity only support UTF-8.
65 //
66 // - When parsing, invalid UTF-8 is a syntax error like any other
67 //
68 // - When creating Values from strings, callers must ensure they are UTF-8.
69 // with asserts on, invalid UTF-8 will crash the program
70 // with asserts off, we'll substitute the replacement character (U+FFFD)
71 // Callers can use json::isUTF8() and json::fixUTF8() for validation.
72 //
73 // - When retrieving strings from Values (e.g. asString()), the result will
74 // always be valid UTF-8.
75
76 /// Returns true if \p S is valid UTF-8, which is required for use as JSON.
77 /// If it returns false, \p Offset is set to a byte offset near the first error.
78 bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr);
79 /// Replaces invalid UTF-8 sequences in \p S with the replacement character
80 /// (U+FFFD). The returned string is valid UTF-8.
81 /// This is much slower than isUTF8, so test that first.
82 std::string fixUTF8(llvm::StringRef S);
83
84 class Array;
85 class ObjectKey;
86 class Value;
87 template <typename T> Value toJSON(const llvm::Optional<T> &Opt);
88
89 /// An Object is a JSON object, which maps strings to heterogenous JSON values.
90 /// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string.
91 class Object {
92 using Storage = DenseMap<ObjectKey, Value, llvm::DenseMapInfo<StringRef>>;
93 Storage M;
94
95 public:
96 using key_type = ObjectKey;
97 using mapped_type = Value;
98 using value_type = Storage::value_type;
99 using iterator = Storage::iterator;
100 using const_iterator = Storage::const_iterator;
101
102 Object() = default;
103 // KV is a trivial key-value struct for list-initialization.
104 // (using std::pair forces extra copies).
105 struct KV;
106 explicit Object(std::initializer_list<KV> Properties);
107
begin()108 iterator begin() { return M.begin(); }
begin()109 const_iterator begin() const { return M.begin(); }
end()110 iterator end() { return M.end(); }
end()111 const_iterator end() const { return M.end(); }
112
empty()113 bool empty() const { return M.empty(); }
size()114 size_t size() const { return M.size(); }
115
clear()116 void clear() { M.clear(); }
117 std::pair<iterator, bool> insert(KV E);
118 template <typename... Ts>
try_emplace(const ObjectKey & K,Ts &&...Args)119 std::pair<iterator, bool> try_emplace(const ObjectKey &K, Ts &&... Args) {
120 return M.try_emplace(K, std::forward<Ts>(Args)...);
121 }
122 template <typename... Ts>
try_emplace(ObjectKey && K,Ts &&...Args)123 std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) {
124 return M.try_emplace(std::move(K), std::forward<Ts>(Args)...);
125 }
126 bool erase(StringRef K);
erase(iterator I)127 void erase(iterator I) { M.erase(I); }
128
find(StringRef K)129 iterator find(StringRef K) { return M.find_as(K); }
find(StringRef K)130 const_iterator find(StringRef K) const { return M.find_as(K); }
131 // operator[] acts as if Value was default-constructible as null.
132 Value &operator[](const ObjectKey &K);
133 Value &operator[](ObjectKey &&K);
134 // Look up a property, returning nullptr if it doesn't exist.
135 Value *get(StringRef K);
136 const Value *get(StringRef K) const;
137 // Typed accessors return None/nullptr if
138 // - the property doesn't exist
139 // - or it has the wrong type
140 llvm::Optional<std::nullptr_t> getNull(StringRef K) const;
141 llvm::Optional<bool> getBoolean(StringRef K) const;
142 llvm::Optional<double> getNumber(StringRef K) const;
143 llvm::Optional<int64_t> getInteger(StringRef K) const;
144 llvm::Optional<llvm::StringRef> getString(StringRef K) const;
145 const json::Object *getObject(StringRef K) const;
146 json::Object *getObject(StringRef K);
147 const json::Array *getArray(StringRef K) const;
148 json::Array *getArray(StringRef K);
149 };
150 bool operator==(const Object &LHS, const Object &RHS);
151 inline bool operator!=(const Object &LHS, const Object &RHS) {
152 return !(LHS == RHS);
153 }
154
155 /// An Array is a JSON array, which contains heterogeneous JSON values.
156 /// It simulates std::vector<Value>.
157 class Array {
158 std::vector<Value> V;
159
160 public:
161 using value_type = Value;
162 using iterator = std::vector<Value>::iterator;
163 using const_iterator = std::vector<Value>::const_iterator;
164
165 Array() = default;
166 explicit Array(std::initializer_list<Value> Elements);
Array(const Collection & C)167 template <typename Collection> explicit Array(const Collection &C) {
168 for (const auto &V : C)
169 emplace_back(V);
170 }
171
172 Value &operator[](size_t I);
173 const Value &operator[](size_t I) const;
174 Value &front();
175 const Value &front() const;
176 Value &back();
177 const Value &back() const;
178 Value *data();
179 const Value *data() const;
180
181 iterator begin();
182 const_iterator begin() const;
183 iterator end();
184 const_iterator end() const;
185
186 bool empty() const;
187 size_t size() const;
188 void reserve(size_t S);
189
190 void clear();
191 void push_back(const Value &E);
192 void push_back(Value &&E);
193 template <typename... Args> void emplace_back(Args &&...A);
194 void pop_back();
195 // FIXME: insert() takes const_iterator since C++11, old libstdc++ disagrees.
196 iterator insert(iterator P, const Value &E);
197 iterator insert(iterator P, Value &&E);
198 template <typename It> iterator insert(iterator P, It A, It Z);
199 template <typename... Args> iterator emplace(const_iterator P, Args &&...A);
200
201 friend bool operator==(const Array &L, const Array &R);
202 };
203 inline bool operator!=(const Array &L, const Array &R) { return !(L == R); }
204
205 /// A Value is an JSON value of unknown type.
206 /// They can be copied, but should generally be moved.
207 ///
208 /// === Composing values ===
209 ///
210 /// You can implicitly construct Values from:
211 /// - strings: std::string, SmallString, formatv, StringRef, char*
212 /// (char*, and StringRef are references, not copies!)
213 /// - numbers
214 /// - booleans
215 /// - null: nullptr
216 /// - arrays: {"foo", 42.0, false}
217 /// - serializable things: types with toJSON(const T&)->Value, found by ADL
218 ///
219 /// They can also be constructed from object/array helpers:
220 /// - json::Object is a type like map<ObjectKey, Value>
221 /// - json::Array is a type like vector<Value>
222 /// These can be list-initialized, or used to build up collections in a loop.
223 /// json::ary(Collection) converts all items in a collection to Values.
224 ///
225 /// === Inspecting values ===
226 ///
227 /// Each Value is one of the JSON kinds:
228 /// null (nullptr_t)
229 /// boolean (bool)
230 /// number (double, int64 or uint64)
231 /// string (StringRef)
232 /// array (json::Array)
233 /// object (json::Object)
234 ///
235 /// The kind can be queried directly, or implicitly via the typed accessors:
236 /// if (Optional<StringRef> S = E.getAsString()
237 /// assert(E.kind() == Value::String);
238 ///
239 /// Array and Object also have typed indexing accessors for easy traversal:
240 /// Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )");
241 /// if (Object* O = E->getAsObject())
242 /// if (Object* Opts = O->getObject("options"))
243 /// if (Optional<StringRef> Font = Opts->getString("font"))
244 /// assert(Opts->at("font").kind() == Value::String);
245 ///
246 /// === Converting JSON values to C++ types ===
247 ///
248 /// The convention is to have a deserializer function findable via ADL:
249 /// fromJSON(const json::Value&, T&, Path) -> bool
250 ///
251 /// The return value indicates overall success, and Path is used for precise
252 /// error reporting. (The Path::Root passed in at the top level fromJSON call
253 /// captures any nested error and can render it in context).
254 /// If conversion fails, fromJSON calls Path::report() and immediately returns.
255 /// This ensures that the first fatal error survives.
256 ///
257 /// Deserializers are provided for:
258 /// - bool
259 /// - int and int64_t
260 /// - double
261 /// - std::string
262 /// - vector<T>, where T is deserializable
263 /// - map<string, T>, where T is deserializable
264 /// - Optional<T>, where T is deserializable
265 /// ObjectMapper can help writing fromJSON() functions for object types.
266 ///
267 /// For conversion in the other direction, the serializer function is:
268 /// toJSON(const T&) -> json::Value
269 /// If this exists, then it also allows constructing Value from T, and can
270 /// be used to serialize vector<T>, map<string, T>, and Optional<T>.
271 ///
272 /// === Serialization ===
273 ///
274 /// Values can be serialized to JSON:
275 /// 1) raw_ostream << Value // Basic formatting.
276 /// 2) raw_ostream << formatv("{0}", Value) // Basic formatting.
277 /// 3) raw_ostream << formatv("{0:2}", Value) // Pretty-print with indent 2.
278 ///
279 /// And parsed:
280 /// Expected<Value> E = json::parse("[1, 2, null]");
281 /// assert(E && E->kind() == Value::Array);
282 class Value {
283 public:
284 enum Kind {
285 Null,
286 Boolean,
287 /// Number values can store both int64s and doubles at full precision,
288 /// depending on what they were constructed/parsed from.
289 Number,
290 String,
291 Array,
292 Object,
293 };
294
295 // It would be nice to have Value() be null. But that would make {} null too.
Value(const Value & M)296 Value(const Value &M) { copyFrom(M); }
Value(Value && M)297 Value(Value &&M) { moveFrom(std::move(M)); }
298 Value(std::initializer_list<Value> Elements);
Value(json::Array && Elements)299 Value(json::Array &&Elements) : Type(T_Array) {
300 create<json::Array>(std::move(Elements));
301 }
302 template <typename Elt>
Value(const std::vector<Elt> & C)303 Value(const std::vector<Elt> &C) : Value(json::Array(C)) {}
Value(json::Object && Properties)304 Value(json::Object &&Properties) : Type(T_Object) {
305 create<json::Object>(std::move(Properties));
306 }
307 template <typename Elt>
Value(const std::map<std::string,Elt> & C)308 Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {}
309 // Strings: types with value semantics. Must be valid UTF-8.
Value(std::string V)310 Value(std::string V) : Type(T_String) {
311 if (LLVM_UNLIKELY(!isUTF8(V))) {
312 assert(false && "Invalid UTF-8 in value used as JSON");
313 V = fixUTF8(std::move(V));
314 }
315 create<std::string>(std::move(V));
316 }
Value(const llvm::SmallVectorImpl<char> & V)317 Value(const llvm::SmallVectorImpl<char> &V)
318 : Value(std::string(V.begin(), V.end())) {}
Value(const llvm::formatv_object_base & V)319 Value(const llvm::formatv_object_base &V) : Value(V.str()) {}
320 // Strings: types with reference semantics. Must be valid UTF-8.
Value(StringRef V)321 Value(StringRef V) : Type(T_StringRef) {
322 create<llvm::StringRef>(V);
323 if (LLVM_UNLIKELY(!isUTF8(V))) {
324 assert(false && "Invalid UTF-8 in value used as JSON");
325 *this = Value(fixUTF8(V));
326 }
327 }
Value(const char * V)328 Value(const char *V) : Value(StringRef(V)) {}
Value(std::nullptr_t)329 Value(std::nullptr_t) : Type(T_Null) {}
330 // Boolean (disallow implicit conversions).
331 // (The last template parameter is a dummy to keep templates distinct.)
332 template <typename T,
333 typename = std::enable_if_t<std::is_same<T, bool>::value>,
334 bool = false>
Value(T B)335 Value(T B) : Type(T_Boolean) {
336 create<bool>(B);
337 }
338
339 // Unsigned 64-bit long integers.
340 template <typename T,
341 typename = std::enable_if_t<std::is_same<T, uint64_t>::value>,
342 bool = false, bool = false>
Value(T V)343 Value(T V) : Type(T_UINT64) {
344 create<uint64_t>(uint64_t{V});
345 }
346
347 // Integers (except boolean and uint64_t).
348 // Must be non-narrowing convertible to int64_t.
349 template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>,
350 typename = std::enable_if_t<!std::is_same<T, bool>::value>,
351 typename = std::enable_if_t<!std::is_same<T, uint64_t>::value>>
Value(T I)352 Value(T I) : Type(T_Integer) {
353 create<int64_t>(int64_t{I});
354 }
355 // Floating point. Must be non-narrowing convertible to double.
356 template <typename T,
357 typename = std::enable_if_t<std::is_floating_point<T>::value>,
358 double * = nullptr>
Value(T D)359 Value(T D) : Type(T_Double) {
360 create<double>(double{D});
361 }
362 // Serializable types: with a toJSON(const T&)->Value function, found by ADL.
363 template <typename T,
364 typename = std::enable_if_t<std::is_same<
365 Value, decltype(toJSON(*(const T *)nullptr))>::value>,
366 Value * = nullptr>
Value(const T & V)367 Value(const T &V) : Value(toJSON(V)) {}
368
369 Value &operator=(const Value &M) {
370 destroy();
371 copyFrom(M);
372 return *this;
373 }
374 Value &operator=(Value &&M) {
375 destroy();
376 moveFrom(std::move(M));
377 return *this;
378 }
~Value()379 ~Value() { destroy(); }
380
kind()381 Kind kind() const {
382 switch (Type) {
383 case T_Null:
384 return Null;
385 case T_Boolean:
386 return Boolean;
387 case T_Double:
388 case T_Integer:
389 case T_UINT64:
390 return Number;
391 case T_String:
392 case T_StringRef:
393 return String;
394 case T_Object:
395 return Object;
396 case T_Array:
397 return Array;
398 }
399 llvm_unreachable("Unknown kind");
400 }
401
402 // Typed accessors return None/nullptr if the Value is not of this type.
getAsNull()403 llvm::Optional<std::nullptr_t> getAsNull() const {
404 if (LLVM_LIKELY(Type == T_Null))
405 return nullptr;
406 return llvm::None;
407 }
getAsBoolean()408 llvm::Optional<bool> getAsBoolean() const {
409 if (LLVM_LIKELY(Type == T_Boolean))
410 return as<bool>();
411 return llvm::None;
412 }
getAsNumber()413 llvm::Optional<double> getAsNumber() const {
414 if (LLVM_LIKELY(Type == T_Double))
415 return as<double>();
416 if (LLVM_LIKELY(Type == T_Integer))
417 return as<int64_t>();
418 if (LLVM_LIKELY(Type == T_UINT64))
419 return as<uint64_t>();
420 return llvm::None;
421 }
422 // Succeeds if the Value is a Number, and exactly representable as int64_t.
getAsInteger()423 llvm::Optional<int64_t> getAsInteger() const {
424 if (LLVM_LIKELY(Type == T_Integer))
425 return as<int64_t>();
426 if (LLVM_LIKELY(Type == T_Double)) {
427 double D = as<double>();
428 if (LLVM_LIKELY(std::modf(D, &D) == 0.0 &&
429 D >= double(std::numeric_limits<int64_t>::min()) &&
430 D <= double(std::numeric_limits<int64_t>::max())))
431 return D;
432 }
433 return llvm::None;
434 }
getAsUINT64()435 llvm::Optional<uint64_t> getAsUINT64() const {
436 if (Type == T_UINT64)
437 return as<uint64_t>();
438 else if (Type == T_Integer) {
439 int64_t N = as<int64_t>();
440 if (N >= 0)
441 return as<uint64_t>();
442 }
443 return llvm::None;
444 }
getAsString()445 llvm::Optional<llvm::StringRef> getAsString() const {
446 if (Type == T_String)
447 return llvm::StringRef(as<std::string>());
448 if (LLVM_LIKELY(Type == T_StringRef))
449 return as<llvm::StringRef>();
450 return llvm::None;
451 }
getAsObject()452 const json::Object *getAsObject() const {
453 return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
454 }
getAsObject()455 json::Object *getAsObject() {
456 return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
457 }
getAsArray()458 const json::Array *getAsArray() const {
459 return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
460 }
getAsArray()461 json::Array *getAsArray() {
462 return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
463 }
464
465 private:
466 void destroy();
467 void copyFrom(const Value &M);
468 // We allow moving from *const* Values, by marking all members as mutable!
469 // This hack is needed to support initializer-list syntax efficiently.
470 // (std::initializer_list<T> is a container of const T).
471 void moveFrom(const Value &&M);
472 friend class Array;
473 friend class Object;
474
create(U &&...V)475 template <typename T, typename... U> void create(U &&... V) {
476 new (reinterpret_cast<T *>(&Union)) T(std::forward<U>(V)...);
477 }
as()478 template <typename T> T &as() const {
479 // Using this two-step static_cast via void * instead of reinterpret_cast
480 // silences a -Wstrict-aliasing false positive from GCC6 and earlier.
481 void *Storage = static_cast<void *>(&Union);
482 return *static_cast<T *>(Storage);
483 }
484
485 friend class OStream;
486
487 enum ValueType : char16_t {
488 T_Null,
489 T_Boolean,
490 T_Double,
491 T_Integer,
492 T_UINT64,
493 T_StringRef,
494 T_String,
495 T_Object,
496 T_Array,
497 };
498 // All members mutable, see moveFrom().
499 mutable ValueType Type;
500 mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, uint64_t,
501 llvm::StringRef, std::string, json::Array,
502 json::Object>
503 Union;
504 friend bool operator==(const Value &, const Value &);
505 };
506
507 bool operator==(const Value &, const Value &);
508 inline bool operator!=(const Value &L, const Value &R) { return !(L == R); }
509
510 // Array Methods
511 inline Value &Array::operator[](size_t I) { return V[I]; }
512 inline const Value &Array::operator[](size_t I) const { return V[I]; }
front()513 inline Value &Array::front() { return V.front(); }
front()514 inline const Value &Array::front() const { return V.front(); }
back()515 inline Value &Array::back() { return V.back(); }
back()516 inline const Value &Array::back() const { return V.back(); }
data()517 inline Value *Array::data() { return V.data(); }
data()518 inline const Value *Array::data() const { return V.data(); }
519
begin()520 inline typename Array::iterator Array::begin() { return V.begin(); }
begin()521 inline typename Array::const_iterator Array::begin() const { return V.begin(); }
end()522 inline typename Array::iterator Array::end() { return V.end(); }
end()523 inline typename Array::const_iterator Array::end() const { return V.end(); }
524
empty()525 inline bool Array::empty() const { return V.empty(); }
size()526 inline size_t Array::size() const { return V.size(); }
reserve(size_t S)527 inline void Array::reserve(size_t S) { V.reserve(S); }
528
clear()529 inline void Array::clear() { V.clear(); }
push_back(const Value & E)530 inline void Array::push_back(const Value &E) { V.push_back(E); }
push_back(Value && E)531 inline void Array::push_back(Value &&E) { V.push_back(std::move(E)); }
emplace_back(Args &&...A)532 template <typename... Args> inline void Array::emplace_back(Args &&...A) {
533 V.emplace_back(std::forward<Args>(A)...);
534 }
pop_back()535 inline void Array::pop_back() { V.pop_back(); }
insert(iterator P,const Value & E)536 inline typename Array::iterator Array::insert(iterator P, const Value &E) {
537 return V.insert(P, E);
538 }
insert(iterator P,Value && E)539 inline typename Array::iterator Array::insert(iterator P, Value &&E) {
540 return V.insert(P, std::move(E));
541 }
542 template <typename It>
insert(iterator P,It A,It Z)543 inline typename Array::iterator Array::insert(iterator P, It A, It Z) {
544 return V.insert(P, A, Z);
545 }
546 template <typename... Args>
emplace(const_iterator P,Args &&...A)547 inline typename Array::iterator Array::emplace(const_iterator P, Args &&...A) {
548 return V.emplace(P, std::forward<Args>(A)...);
549 }
550 inline bool operator==(const Array &L, const Array &R) { return L.V == R.V; }
551
552 /// ObjectKey is a used to capture keys in Object. Like Value but:
553 /// - only strings are allowed
554 /// - it's optimized for the string literal case (Owned == nullptr)
555 /// Like Value, strings must be UTF-8. See isUTF8 documentation for details.
556 class ObjectKey {
557 public:
ObjectKey(const char * S)558 ObjectKey(const char *S) : ObjectKey(StringRef(S)) {}
ObjectKey(std::string S)559 ObjectKey(std::string S) : Owned(new std::string(std::move(S))) {
560 if (LLVM_UNLIKELY(!isUTF8(*Owned))) {
561 assert(false && "Invalid UTF-8 in value used as JSON");
562 *Owned = fixUTF8(std::move(*Owned));
563 }
564 Data = *Owned;
565 }
ObjectKey(llvm::StringRef S)566 ObjectKey(llvm::StringRef S) : Data(S) {
567 if (LLVM_UNLIKELY(!isUTF8(Data))) {
568 assert(false && "Invalid UTF-8 in value used as JSON");
569 *this = ObjectKey(fixUTF8(S));
570 }
571 }
ObjectKey(const llvm::SmallVectorImpl<char> & V)572 ObjectKey(const llvm::SmallVectorImpl<char> &V)
573 : ObjectKey(std::string(V.begin(), V.end())) {}
ObjectKey(const llvm::formatv_object_base & V)574 ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {}
575
ObjectKey(const ObjectKey & C)576 ObjectKey(const ObjectKey &C) { *this = C; }
ObjectKey(ObjectKey && C)577 ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {}
578 ObjectKey &operator=(const ObjectKey &C) {
579 if (C.Owned) {
580 Owned.reset(new std::string(*C.Owned));
581 Data = *Owned;
582 } else {
583 Data = C.Data;
584 }
585 return *this;
586 }
587 ObjectKey &operator=(ObjectKey &&) = default;
588
StringRef()589 operator llvm::StringRef() const { return Data; }
str()590 std::string str() const { return Data.str(); }
591
592 private:
593 // FIXME: this is unneccesarily large (3 pointers). Pointer + length + owned
594 // could be 2 pointers at most.
595 std::unique_ptr<std::string> Owned;
596 llvm::StringRef Data;
597 };
598
599 inline bool operator==(const ObjectKey &L, const ObjectKey &R) {
600 return llvm::StringRef(L) == llvm::StringRef(R);
601 }
602 inline bool operator!=(const ObjectKey &L, const ObjectKey &R) {
603 return !(L == R);
604 }
605 inline bool operator<(const ObjectKey &L, const ObjectKey &R) {
606 return StringRef(L) < StringRef(R);
607 }
608
609 struct Object::KV {
610 ObjectKey K;
611 Value V;
612 };
613
Object(std::initializer_list<KV> Properties)614 inline Object::Object(std::initializer_list<KV> Properties) {
615 for (const auto &P : Properties) {
616 auto R = try_emplace(P.K, nullptr);
617 if (R.second)
618 R.first->getSecond().moveFrom(std::move(P.V));
619 }
620 }
insert(KV E)621 inline std::pair<Object::iterator, bool> Object::insert(KV E) {
622 return try_emplace(std::move(E.K), std::move(E.V));
623 }
erase(StringRef K)624 inline bool Object::erase(StringRef K) {
625 return M.erase(ObjectKey(K));
626 }
627
628 /// A "cursor" marking a position within a Value.
629 /// The Value is a tree, and this is the path from the root to the current node.
630 /// This is used to associate errors with particular subobjects.
631 class Path {
632 public:
633 class Root;
634
635 /// Records that the value at the current path is invalid.
636 /// Message is e.g. "expected number" and becomes part of the final error.
637 /// This overwrites any previously written error message in the root.
638 void report(llvm::StringLiteral Message);
639
640 /// The root may be treated as a Path.
Path(Root & R)641 Path(Root &R) : Parent(nullptr), Seg(&R) {}
642 /// Derives a path for an array element: this[Index]
index(unsigned Index)643 Path index(unsigned Index) const { return Path(this, Segment(Index)); }
644 /// Derives a path for an object field: this.Field
field(StringRef Field)645 Path field(StringRef Field) const { return Path(this, Segment(Field)); }
646
647 private:
648 /// One element in a JSON path: an object field (.foo) or array index [27].
649 /// Exception: the root Path encodes a pointer to the Path::Root.
650 class Segment {
651 uintptr_t Pointer;
652 unsigned Offset;
653
654 public:
655 Segment() = default;
Segment(Root * R)656 Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {}
Segment(llvm::StringRef Field)657 Segment(llvm::StringRef Field)
658 : Pointer(reinterpret_cast<uintptr_t>(Field.data())),
659 Offset(static_cast<unsigned>(Field.size())) {}
Segment(unsigned Index)660 Segment(unsigned Index) : Pointer(0), Offset(Index) {}
661
isField()662 bool isField() const { return Pointer != 0; }
field()663 StringRef field() const {
664 return StringRef(reinterpret_cast<const char *>(Pointer), Offset);
665 }
index()666 unsigned index() const { return Offset; }
root()667 Root *root() const { return reinterpret_cast<Root *>(Pointer); }
668 };
669
670 const Path *Parent;
671 Segment Seg;
672
Path(const Path * Parent,Segment S)673 Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {}
674 };
675
676 /// The root is the trivial Path to the root value.
677 /// It also stores the latest reported error and the path where it occurred.
678 class Path::Root {
679 llvm::StringRef Name;
680 llvm::StringLiteral ErrorMessage;
681 std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed.
682
683 friend void Path::report(llvm::StringLiteral Message);
684
685 public:
Name(Name)686 Root(llvm::StringRef Name = "") : Name(Name), ErrorMessage("") {}
687 // No copy/move allowed as there are incoming pointers.
688 Root(Root &&) = delete;
689 Root &operator=(Root &&) = delete;
690 Root(const Root &) = delete;
691 Root &operator=(const Root &) = delete;
692
693 /// Returns the last error reported, or else a generic error.
694 Error getError() const;
695 /// Print the root value with the error shown inline as a comment.
696 /// Unrelated parts of the value are elided for brevity, e.g.
697 /// {
698 /// "id": 42,
699 /// "name": /* expected string */ null,
700 /// "properties": { ... }
701 /// }
702 void printErrorContext(const Value &, llvm::raw_ostream &) const;
703 };
704
705 // Standard deserializers are provided for primitive types.
706 // See comments on Value.
fromJSON(const Value & E,std::string & Out,Path P)707 inline bool fromJSON(const Value &E, std::string &Out, Path P) {
708 if (auto S = E.getAsString()) {
709 Out = std::string(*S);
710 return true;
711 }
712 P.report("expected string");
713 return false;
714 }
fromJSON(const Value & E,int & Out,Path P)715 inline bool fromJSON(const Value &E, int &Out, Path P) {
716 if (auto S = E.getAsInteger()) {
717 Out = *S;
718 return true;
719 }
720 P.report("expected integer");
721 return false;
722 }
fromJSON(const Value & E,int64_t & Out,Path P)723 inline bool fromJSON(const Value &E, int64_t &Out, Path P) {
724 if (auto S = E.getAsInteger()) {
725 Out = *S;
726 return true;
727 }
728 P.report("expected integer");
729 return false;
730 }
fromJSON(const Value & E,double & Out,Path P)731 inline bool fromJSON(const Value &E, double &Out, Path P) {
732 if (auto S = E.getAsNumber()) {
733 Out = *S;
734 return true;
735 }
736 P.report("expected number");
737 return false;
738 }
fromJSON(const Value & E,bool & Out,Path P)739 inline bool fromJSON(const Value &E, bool &Out, Path P) {
740 if (auto S = E.getAsBoolean()) {
741 Out = *S;
742 return true;
743 }
744 P.report("expected boolean");
745 return false;
746 }
fromJSON(const Value & E,uint64_t & Out,Path P)747 inline bool fromJSON(const Value &E, uint64_t &Out, Path P) {
748 if (auto S = E.getAsUINT64()) {
749 Out = *S;
750 return true;
751 }
752 P.report("expected uint64_t");
753 return false;
754 }
fromJSON(const Value & E,std::nullptr_t & Out,Path P)755 inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) {
756 if (auto S = E.getAsNull()) {
757 Out = *S;
758 return true;
759 }
760 P.report("expected null");
761 return false;
762 }
763 template <typename T>
fromJSON(const Value & E,llvm::Optional<T> & Out,Path P)764 bool fromJSON(const Value &E, llvm::Optional<T> &Out, Path P) {
765 if (E.getAsNull()) {
766 Out = llvm::None;
767 return true;
768 }
769 T Result;
770 if (!fromJSON(E, Result, P))
771 return false;
772 Out = std::move(Result);
773 return true;
774 }
775 template <typename T>
fromJSON(const Value & E,std::vector<T> & Out,Path P)776 bool fromJSON(const Value &E, std::vector<T> &Out, Path P) {
777 if (auto *A = E.getAsArray()) {
778 Out.clear();
779 Out.resize(A->size());
780 for (size_t I = 0; I < A->size(); ++I)
781 if (!fromJSON((*A)[I], Out[I], P.index(I)))
782 return false;
783 return true;
784 }
785 P.report("expected array");
786 return false;
787 }
788 template <typename T>
fromJSON(const Value & E,std::map<std::string,T> & Out,Path P)789 bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) {
790 if (auto *O = E.getAsObject()) {
791 Out.clear();
792 for (const auto &KV : *O)
793 if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))],
794 P.field(KV.first)))
795 return false;
796 return true;
797 }
798 P.report("expected object");
799 return false;
800 }
801
802 // Allow serialization of Optional<T> for supported T.
toJSON(const llvm::Optional<T> & Opt)803 template <typename T> Value toJSON(const llvm::Optional<T> &Opt) {
804 return Opt ? Value(*Opt) : Value(nullptr);
805 }
806
807 /// Helper for mapping JSON objects onto protocol structs.
808 ///
809 /// Example:
810 /// \code
811 /// bool fromJSON(const Value &E, MyStruct &R, Path P) {
812 /// ObjectMapper O(E, P);
813 /// // When returning false, error details were already reported.
814 /// return O && O.map("mandatory_field", R.MandatoryField) &&
815 /// O.mapOptional("optional_field", R.OptionalField);
816 /// }
817 /// \endcode
818 class ObjectMapper {
819 public:
820 /// If O is not an object, this mapper is invalid and an error is reported.
ObjectMapper(const Value & E,Path P)821 ObjectMapper(const Value &E, Path P) : O(E.getAsObject()), P(P) {
822 if (!O)
823 P.report("expected object");
824 }
825
826 /// True if the expression is an object.
827 /// Must be checked before calling map().
828 operator bool() const { return O; }
829
830 /// Maps a property to a field.
831 /// If the property is missing or invalid, reports an error.
map(StringLiteral Prop,T & Out)832 template <typename T> bool map(StringLiteral Prop, T &Out) {
833 assert(*this && "Must check this is an object before calling map()");
834 if (const Value *E = O->get(Prop))
835 return fromJSON(*E, Out, P.field(Prop));
836 P.field(Prop).report("missing value");
837 return false;
838 }
839
840 /// Maps a property to a field, if it exists.
841 /// If the property exists and is invalid, reports an error.
842 /// (Optional requires special handling, because missing keys are OK).
map(StringLiteral Prop,llvm::Optional<T> & Out)843 template <typename T> bool map(StringLiteral Prop, llvm::Optional<T> &Out) {
844 assert(*this && "Must check this is an object before calling map()");
845 if (const Value *E = O->get(Prop))
846 return fromJSON(*E, Out, P.field(Prop));
847 Out = llvm::None;
848 return true;
849 }
850
851 /// Maps a property to a field, if it exists.
852 /// If the property exists and is invalid, reports an error.
853 /// If the property does not exist, Out is unchanged.
mapOptional(StringLiteral Prop,T & Out)854 template <typename T> bool mapOptional(StringLiteral Prop, T &Out) {
855 assert(*this && "Must check this is an object before calling map()");
856 if (const Value *E = O->get(Prop))
857 return fromJSON(*E, Out, P.field(Prop));
858 return true;
859 }
860
861 private:
862 const Object *O;
863 Path P;
864 };
865
866 /// Parses the provided JSON source, or returns a ParseError.
867 /// The returned Value is self-contained and owns its strings (they do not refer
868 /// to the original source).
869 llvm::Expected<Value> parse(llvm::StringRef JSON);
870
871 class ParseError : public llvm::ErrorInfo<ParseError> {
872 const char *Msg;
873 unsigned Line, Column, Offset;
874
875 public:
876 static char ID;
ParseError(const char * Msg,unsigned Line,unsigned Column,unsigned Offset)877 ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset)
878 : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {}
log(llvm::raw_ostream & OS)879 void log(llvm::raw_ostream &OS) const override {
880 OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg);
881 }
convertToErrorCode()882 std::error_code convertToErrorCode() const override {
883 return llvm::inconvertibleErrorCode();
884 }
885 };
886
887 /// Version of parse() that converts the parsed value to the type T.
888 /// RootName describes the root object and is used in error messages.
889 template <typename T>
890 Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "") {
891 auto V = parse(JSON);
892 if (!V)
893 return V.takeError();
894 Path::Root R(RootName);
895 T Result;
896 if (fromJSON(*V, Result, R))
897 return std::move(Result);
898 return R.getError();
899 }
900
901 /// json::OStream allows writing well-formed JSON without materializing
902 /// all structures as json::Value ahead of time.
903 /// It's faster, lower-level, and less safe than OS << json::Value.
904 /// It also allows emitting more constructs, such as comments.
905 ///
906 /// Only one "top-level" object can be written to a stream.
907 /// Simplest usage involves passing lambdas (Blocks) to fill in containers:
908 ///
909 /// json::OStream J(OS);
910 /// J.array([&]{
911 /// for (const Event &E : Events)
912 /// J.object([&] {
913 /// J.attribute("timestamp", int64_t(E.Time));
914 /// J.attributeArray("participants", [&] {
915 /// for (const Participant &P : E.Participants)
916 /// J.value(P.toString());
917 /// });
918 /// });
919 /// });
920 ///
921 /// This would produce JSON like:
922 ///
923 /// [
924 /// {
925 /// "timestamp": 19287398741,
926 /// "participants": [
927 /// "King Kong",
928 /// "Miley Cyrus",
929 /// "Cleopatra"
930 /// ]
931 /// },
932 /// ...
933 /// ]
934 ///
935 /// The lower level begin/end methods (arrayBegin()) are more flexible but
936 /// care must be taken to pair them correctly:
937 ///
938 /// json::OStream J(OS);
939 // J.arrayBegin();
940 /// for (const Event &E : Events) {
941 /// J.objectBegin();
942 /// J.attribute("timestamp", int64_t(E.Time));
943 /// J.attributeBegin("participants");
944 /// for (const Participant &P : E.Participants)
945 /// J.value(P.toString());
946 /// J.attributeEnd();
947 /// J.objectEnd();
948 /// }
949 /// J.arrayEnd();
950 ///
951 /// If the call sequence isn't valid JSON, asserts will fire in debug mode.
952 /// This can be mismatched begin()/end() pairs, trying to emit attributes inside
953 /// an array, and so on.
954 /// With asserts disabled, this is undefined behavior.
955 class OStream {
956 public:
957 using Block = llvm::function_ref<void()>;
958 // If IndentSize is nonzero, output is pretty-printed.
959 explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0)
OS(OS)960 : OS(OS), IndentSize(IndentSize) {
961 Stack.emplace_back();
962 }
~OStream()963 ~OStream() {
964 assert(Stack.size() == 1 && "Unmatched begin()/end()");
965 assert(Stack.back().Ctx == Singleton);
966 assert(Stack.back().HasValue && "Did not write top-level value");
967 }
968
969 /// Flushes the underlying ostream. OStream does not buffer internally.
flush()970 void flush() { OS.flush(); }
971
972 // High level functions to output a value.
973 // Valid at top-level (exactly once), in an attribute value (exactly once),
974 // or in an array (any number of times).
975
976 /// Emit a self-contained value (number, string, vector<string> etc).
977 void value(const Value &V);
978 /// Emit an array whose elements are emitted in the provided Block.
array(Block Contents)979 void array(Block Contents) {
980 arrayBegin();
981 Contents();
982 arrayEnd();
983 }
984 /// Emit an object whose elements are emitted in the provided Block.
object(Block Contents)985 void object(Block Contents) {
986 objectBegin();
987 Contents();
988 objectEnd();
989 }
990 /// Emit an externally-serialized value.
991 /// The caller must write exactly one valid JSON value to the provided stream.
992 /// No validation or formatting of this value occurs.
rawValue(llvm::function_ref<void (raw_ostream &)> Contents)993 void rawValue(llvm::function_ref<void(raw_ostream &)> Contents) {
994 rawValueBegin();
995 Contents(OS);
996 rawValueEnd();
997 }
rawValue(llvm::StringRef Contents)998 void rawValue(llvm::StringRef Contents) {
999 rawValue([&](raw_ostream &OS) { OS << Contents; });
1000 }
1001 /// Emit a JavaScript comment associated with the next printed value.
1002 /// The string must be valid until the next attribute or value is emitted.
1003 /// Comments are not part of standard JSON, and many parsers reject them!
1004 void comment(llvm::StringRef);
1005
1006 // High level functions to output object attributes.
1007 // Valid only within an object (any number of times).
1008
1009 /// Emit an attribute whose value is self-contained (number, vector<int> etc).
attribute(llvm::StringRef Key,const Value & Contents)1010 void attribute(llvm::StringRef Key, const Value& Contents) {
1011 attributeImpl(Key, [&] { value(Contents); });
1012 }
1013 /// Emit an attribute whose value is an array with elements from the Block.
attributeArray(llvm::StringRef Key,Block Contents)1014 void attributeArray(llvm::StringRef Key, Block Contents) {
1015 attributeImpl(Key, [&] { array(Contents); });
1016 }
1017 /// Emit an attribute whose value is an object with attributes from the Block.
attributeObject(llvm::StringRef Key,Block Contents)1018 void attributeObject(llvm::StringRef Key, Block Contents) {
1019 attributeImpl(Key, [&] { object(Contents); });
1020 }
1021
1022 // Low-level begin/end functions to output arrays, objects, and attributes.
1023 // Must be correctly paired. Allowed contexts are as above.
1024
1025 void arrayBegin();
1026 void arrayEnd();
1027 void objectBegin();
1028 void objectEnd();
1029 void attributeBegin(llvm::StringRef Key);
1030 void attributeEnd();
1031 raw_ostream &rawValueBegin();
1032 void rawValueEnd();
1033
1034 private:
attributeImpl(llvm::StringRef Key,Block Contents)1035 void attributeImpl(llvm::StringRef Key, Block Contents) {
1036 attributeBegin(Key);
1037 Contents();
1038 attributeEnd();
1039 }
1040
1041 void valueBegin();
1042 void flushComment();
1043 void newline();
1044
1045 enum Context {
1046 Singleton, // Top level, or object attribute.
1047 Array,
1048 Object,
1049 RawValue, // External code writing a value to OS directly.
1050 };
1051 struct State {
1052 Context Ctx = Singleton;
1053 bool HasValue = false;
1054 };
1055 llvm::SmallVector<State, 16> Stack; // Never empty.
1056 llvm::StringRef PendingComment;
1057 llvm::raw_ostream &OS;
1058 unsigned IndentSize;
1059 unsigned Indent = 0;
1060 };
1061
1062 /// Serializes this Value to JSON, writing it to the provided stream.
1063 /// The formatting is compact (no extra whitespace) and deterministic.
1064 /// For pretty-printing, use the formatv() format_provider below.
1065 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) {
1066 OStream(OS).value(V);
1067 return OS;
1068 }
1069 } // namespace json
1070
1071 /// Allow printing json::Value with formatv().
1072 /// The default style is basic/compact formatting, like operator<<.
1073 /// A format string like formatv("{0:2}", Value) pretty-prints with indent 2.
1074 template <> struct format_provider<llvm::json::Value> {
1075 static void format(const llvm::json::Value &, raw_ostream &, StringRef);
1076 };
1077 } // namespace llvm
1078
1079 #endif
1080