10b57cec5SDimitry Andric //=== JSON.cpp - JSON value, parsing and serialization - C++ -----------*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
80b57cec5SDimitry Andric
90b57cec5SDimitry Andric #include "llvm/Support/JSON.h"
10e8d8bef9SDimitry Andric #include "llvm/ADT/STLExtras.h"
11*fe013be4SDimitry Andric #include "llvm/ADT/StringExtras.h"
120b57cec5SDimitry Andric #include "llvm/Support/ConvertUTF.h"
13e8d8bef9SDimitry Andric #include "llvm/Support/Error.h"
140b57cec5SDimitry Andric #include "llvm/Support/Format.h"
1504eeddc0SDimitry Andric #include "llvm/Support/NativeFormatting.h"
16*fe013be4SDimitry Andric #include "llvm/Support/raw_ostream.h"
170b57cec5SDimitry Andric #include <cctype>
18*fe013be4SDimitry Andric #include <cerrno>
19bdd1243dSDimitry Andric #include <optional>
200b57cec5SDimitry Andric
210b57cec5SDimitry Andric namespace llvm {
220b57cec5SDimitry Andric namespace json {
230b57cec5SDimitry Andric
operator [](const ObjectKey & K)240b57cec5SDimitry Andric Value &Object::operator[](const ObjectKey &K) {
250b57cec5SDimitry Andric return try_emplace(K, nullptr).first->getSecond();
260b57cec5SDimitry Andric }
operator [](ObjectKey && K)270b57cec5SDimitry Andric Value &Object::operator[](ObjectKey &&K) {
280b57cec5SDimitry Andric return try_emplace(std::move(K), nullptr).first->getSecond();
290b57cec5SDimitry Andric }
get(StringRef K)300b57cec5SDimitry Andric Value *Object::get(StringRef K) {
310b57cec5SDimitry Andric auto I = find(K);
320b57cec5SDimitry Andric if (I == end())
330b57cec5SDimitry Andric return nullptr;
340b57cec5SDimitry Andric return &I->second;
350b57cec5SDimitry Andric }
get(StringRef K) const360b57cec5SDimitry Andric const Value *Object::get(StringRef K) const {
370b57cec5SDimitry Andric auto I = find(K);
380b57cec5SDimitry Andric if (I == end())
390b57cec5SDimitry Andric return nullptr;
400b57cec5SDimitry Andric return &I->second;
410b57cec5SDimitry Andric }
getNull(StringRef K) const42bdd1243dSDimitry Andric std::optional<std::nullptr_t> Object::getNull(StringRef K) const {
430b57cec5SDimitry Andric if (auto *V = get(K))
440b57cec5SDimitry Andric return V->getAsNull();
45bdd1243dSDimitry Andric return std::nullopt;
460b57cec5SDimitry Andric }
getBoolean(StringRef K) const47bdd1243dSDimitry Andric std::optional<bool> Object::getBoolean(StringRef K) const {
480b57cec5SDimitry Andric if (auto *V = get(K))
490b57cec5SDimitry Andric return V->getAsBoolean();
50bdd1243dSDimitry Andric return std::nullopt;
510b57cec5SDimitry Andric }
getNumber(StringRef K) const52bdd1243dSDimitry Andric std::optional<double> Object::getNumber(StringRef K) const {
530b57cec5SDimitry Andric if (auto *V = get(K))
540b57cec5SDimitry Andric return V->getAsNumber();
55bdd1243dSDimitry Andric return std::nullopt;
560b57cec5SDimitry Andric }
getInteger(StringRef K) const57bdd1243dSDimitry Andric std::optional<int64_t> Object::getInteger(StringRef K) const {
580b57cec5SDimitry Andric if (auto *V = get(K))
590b57cec5SDimitry Andric return V->getAsInteger();
60bdd1243dSDimitry Andric return std::nullopt;
610b57cec5SDimitry Andric }
getString(StringRef K) const62bdd1243dSDimitry Andric std::optional<llvm::StringRef> Object::getString(StringRef K) const {
630b57cec5SDimitry Andric if (auto *V = get(K))
640b57cec5SDimitry Andric return V->getAsString();
65bdd1243dSDimitry Andric return std::nullopt;
660b57cec5SDimitry Andric }
getObject(StringRef K) const670b57cec5SDimitry Andric const json::Object *Object::getObject(StringRef K) const {
680b57cec5SDimitry Andric if (auto *V = get(K))
690b57cec5SDimitry Andric return V->getAsObject();
700b57cec5SDimitry Andric return nullptr;
710b57cec5SDimitry Andric }
getObject(StringRef K)720b57cec5SDimitry Andric json::Object *Object::getObject(StringRef K) {
730b57cec5SDimitry Andric if (auto *V = get(K))
740b57cec5SDimitry Andric return V->getAsObject();
750b57cec5SDimitry Andric return nullptr;
760b57cec5SDimitry Andric }
getArray(StringRef K) const770b57cec5SDimitry Andric const json::Array *Object::getArray(StringRef K) const {
780b57cec5SDimitry Andric if (auto *V = get(K))
790b57cec5SDimitry Andric return V->getAsArray();
800b57cec5SDimitry Andric return nullptr;
810b57cec5SDimitry Andric }
getArray(StringRef K)820b57cec5SDimitry Andric json::Array *Object::getArray(StringRef K) {
830b57cec5SDimitry Andric if (auto *V = get(K))
840b57cec5SDimitry Andric return V->getAsArray();
850b57cec5SDimitry Andric return nullptr;
860b57cec5SDimitry Andric }
operator ==(const Object & LHS,const Object & RHS)870b57cec5SDimitry Andric bool operator==(const Object &LHS, const Object &RHS) {
880b57cec5SDimitry Andric if (LHS.size() != RHS.size())
890b57cec5SDimitry Andric return false;
900b57cec5SDimitry Andric for (const auto &L : LHS) {
910b57cec5SDimitry Andric auto R = RHS.find(L.first);
920b57cec5SDimitry Andric if (R == RHS.end() || L.second != R->second)
930b57cec5SDimitry Andric return false;
940b57cec5SDimitry Andric }
950b57cec5SDimitry Andric return true;
960b57cec5SDimitry Andric }
970b57cec5SDimitry Andric
Array(std::initializer_list<Value> Elements)980b57cec5SDimitry Andric Array::Array(std::initializer_list<Value> Elements) {
990b57cec5SDimitry Andric V.reserve(Elements.size());
1000b57cec5SDimitry Andric for (const Value &V : Elements) {
1010b57cec5SDimitry Andric emplace_back(nullptr);
1020b57cec5SDimitry Andric back().moveFrom(std::move(V));
1030b57cec5SDimitry Andric }
1040b57cec5SDimitry Andric }
1050b57cec5SDimitry Andric
Value(std::initializer_list<Value> Elements)1060b57cec5SDimitry Andric Value::Value(std::initializer_list<Value> Elements)
1070b57cec5SDimitry Andric : Value(json::Array(Elements)) {}
1080b57cec5SDimitry Andric
copyFrom(const Value & M)1090b57cec5SDimitry Andric void Value::copyFrom(const Value &M) {
1100b57cec5SDimitry Andric Type = M.Type;
1110b57cec5SDimitry Andric switch (Type) {
1120b57cec5SDimitry Andric case T_Null:
1130b57cec5SDimitry Andric case T_Boolean:
1140b57cec5SDimitry Andric case T_Double:
1150b57cec5SDimitry Andric case T_Integer:
116349cc55cSDimitry Andric case T_UINT64:
117e8d8bef9SDimitry Andric memcpy(&Union, &M.Union, sizeof(Union));
1180b57cec5SDimitry Andric break;
1190b57cec5SDimitry Andric case T_StringRef:
1200b57cec5SDimitry Andric create<StringRef>(M.as<StringRef>());
1210b57cec5SDimitry Andric break;
1220b57cec5SDimitry Andric case T_String:
1230b57cec5SDimitry Andric create<std::string>(M.as<std::string>());
1240b57cec5SDimitry Andric break;
1250b57cec5SDimitry Andric case T_Object:
1260b57cec5SDimitry Andric create<json::Object>(M.as<json::Object>());
1270b57cec5SDimitry Andric break;
1280b57cec5SDimitry Andric case T_Array:
1290b57cec5SDimitry Andric create<json::Array>(M.as<json::Array>());
1300b57cec5SDimitry Andric break;
1310b57cec5SDimitry Andric }
1320b57cec5SDimitry Andric }
1330b57cec5SDimitry Andric
moveFrom(const Value && M)1340b57cec5SDimitry Andric void Value::moveFrom(const Value &&M) {
1350b57cec5SDimitry Andric Type = M.Type;
1360b57cec5SDimitry Andric switch (Type) {
1370b57cec5SDimitry Andric case T_Null:
1380b57cec5SDimitry Andric case T_Boolean:
1390b57cec5SDimitry Andric case T_Double:
1400b57cec5SDimitry Andric case T_Integer:
141349cc55cSDimitry Andric case T_UINT64:
142e8d8bef9SDimitry Andric memcpy(&Union, &M.Union, sizeof(Union));
1430b57cec5SDimitry Andric break;
1440b57cec5SDimitry Andric case T_StringRef:
1450b57cec5SDimitry Andric create<StringRef>(M.as<StringRef>());
1460b57cec5SDimitry Andric break;
1470b57cec5SDimitry Andric case T_String:
1480b57cec5SDimitry Andric create<std::string>(std::move(M.as<std::string>()));
1490b57cec5SDimitry Andric M.Type = T_Null;
1500b57cec5SDimitry Andric break;
1510b57cec5SDimitry Andric case T_Object:
1520b57cec5SDimitry Andric create<json::Object>(std::move(M.as<json::Object>()));
1530b57cec5SDimitry Andric M.Type = T_Null;
1540b57cec5SDimitry Andric break;
1550b57cec5SDimitry Andric case T_Array:
1560b57cec5SDimitry Andric create<json::Array>(std::move(M.as<json::Array>()));
1570b57cec5SDimitry Andric M.Type = T_Null;
1580b57cec5SDimitry Andric break;
1590b57cec5SDimitry Andric }
1600b57cec5SDimitry Andric }
1610b57cec5SDimitry Andric
destroy()1620b57cec5SDimitry Andric void Value::destroy() {
1630b57cec5SDimitry Andric switch (Type) {
1640b57cec5SDimitry Andric case T_Null:
1650b57cec5SDimitry Andric case T_Boolean:
1660b57cec5SDimitry Andric case T_Double:
1670b57cec5SDimitry Andric case T_Integer:
168349cc55cSDimitry Andric case T_UINT64:
1690b57cec5SDimitry Andric break;
1700b57cec5SDimitry Andric case T_StringRef:
1710b57cec5SDimitry Andric as<StringRef>().~StringRef();
1720b57cec5SDimitry Andric break;
1730b57cec5SDimitry Andric case T_String:
1740b57cec5SDimitry Andric as<std::string>().~basic_string();
1750b57cec5SDimitry Andric break;
1760b57cec5SDimitry Andric case T_Object:
1770b57cec5SDimitry Andric as<json::Object>().~Object();
1780b57cec5SDimitry Andric break;
1790b57cec5SDimitry Andric case T_Array:
1800b57cec5SDimitry Andric as<json::Array>().~Array();
1810b57cec5SDimitry Andric break;
1820b57cec5SDimitry Andric }
1830b57cec5SDimitry Andric }
1840b57cec5SDimitry Andric
operator ==(const Value & L,const Value & R)1850b57cec5SDimitry Andric bool operator==(const Value &L, const Value &R) {
1860b57cec5SDimitry Andric if (L.kind() != R.kind())
1870b57cec5SDimitry Andric return false;
1880b57cec5SDimitry Andric switch (L.kind()) {
1890b57cec5SDimitry Andric case Value::Null:
1900b57cec5SDimitry Andric return *L.getAsNull() == *R.getAsNull();
1910b57cec5SDimitry Andric case Value::Boolean:
1920b57cec5SDimitry Andric return *L.getAsBoolean() == *R.getAsBoolean();
1930b57cec5SDimitry Andric case Value::Number:
1940b57cec5SDimitry Andric // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
1950b57cec5SDimitry Andric // The same integer must convert to the same double, per the standard.
1960b57cec5SDimitry Andric // However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32.
1970b57cec5SDimitry Andric // So we avoid floating point promotion for exact comparisons.
1980b57cec5SDimitry Andric if (L.Type == Value::T_Integer || R.Type == Value::T_Integer)
1990b57cec5SDimitry Andric return L.getAsInteger() == R.getAsInteger();
2000b57cec5SDimitry Andric return *L.getAsNumber() == *R.getAsNumber();
2010b57cec5SDimitry Andric case Value::String:
2020b57cec5SDimitry Andric return *L.getAsString() == *R.getAsString();
2030b57cec5SDimitry Andric case Value::Array:
2040b57cec5SDimitry Andric return *L.getAsArray() == *R.getAsArray();
2050b57cec5SDimitry Andric case Value::Object:
2060b57cec5SDimitry Andric return *L.getAsObject() == *R.getAsObject();
2070b57cec5SDimitry Andric }
2080b57cec5SDimitry Andric llvm_unreachable("Unknown value kind");
2090b57cec5SDimitry Andric }
2100b57cec5SDimitry Andric
report(llvm::StringLiteral Msg)211e8d8bef9SDimitry Andric void Path::report(llvm::StringLiteral Msg) {
212e8d8bef9SDimitry Andric // Walk up to the root context, and count the number of segments.
213e8d8bef9SDimitry Andric unsigned Count = 0;
214e8d8bef9SDimitry Andric const Path *P;
215e8d8bef9SDimitry Andric for (P = this; P->Parent != nullptr; P = P->Parent)
216e8d8bef9SDimitry Andric ++Count;
217e8d8bef9SDimitry Andric Path::Root *R = P->Seg.root();
218e8d8bef9SDimitry Andric // Fill in the error message and copy the path (in reverse order).
219e8d8bef9SDimitry Andric R->ErrorMessage = Msg;
220e8d8bef9SDimitry Andric R->ErrorPath.resize(Count);
221e8d8bef9SDimitry Andric auto It = R->ErrorPath.begin();
222e8d8bef9SDimitry Andric for (P = this; P->Parent != nullptr; P = P->Parent)
223e8d8bef9SDimitry Andric *It++ = P->Seg;
224e8d8bef9SDimitry Andric }
225e8d8bef9SDimitry Andric
getError() const226e8d8bef9SDimitry Andric Error Path::Root::getError() const {
227e8d8bef9SDimitry Andric std::string S;
228e8d8bef9SDimitry Andric raw_string_ostream OS(S);
229e8d8bef9SDimitry Andric OS << (ErrorMessage.empty() ? "invalid JSON contents" : ErrorMessage);
230e8d8bef9SDimitry Andric if (ErrorPath.empty()) {
231e8d8bef9SDimitry Andric if (!Name.empty())
232e8d8bef9SDimitry Andric OS << " when parsing " << Name;
233e8d8bef9SDimitry Andric } else {
234e8d8bef9SDimitry Andric OS << " at " << (Name.empty() ? "(root)" : Name);
235e8d8bef9SDimitry Andric for (const Path::Segment &S : llvm::reverse(ErrorPath)) {
236e8d8bef9SDimitry Andric if (S.isField())
237e8d8bef9SDimitry Andric OS << '.' << S.field();
238e8d8bef9SDimitry Andric else
239e8d8bef9SDimitry Andric OS << '[' << S.index() << ']';
240e8d8bef9SDimitry Andric }
241e8d8bef9SDimitry Andric }
242e8d8bef9SDimitry Andric return createStringError(llvm::inconvertibleErrorCode(), OS.str());
243e8d8bef9SDimitry Andric }
244e8d8bef9SDimitry Andric
245e8d8bef9SDimitry Andric namespace {
246e8d8bef9SDimitry Andric
sortedElements(const Object & O)247e8d8bef9SDimitry Andric std::vector<const Object::value_type *> sortedElements(const Object &O) {
248e8d8bef9SDimitry Andric std::vector<const Object::value_type *> Elements;
249e8d8bef9SDimitry Andric for (const auto &E : O)
250e8d8bef9SDimitry Andric Elements.push_back(&E);
251e8d8bef9SDimitry Andric llvm::sort(Elements,
252e8d8bef9SDimitry Andric [](const Object::value_type *L, const Object::value_type *R) {
253e8d8bef9SDimitry Andric return L->first < R->first;
254e8d8bef9SDimitry Andric });
255e8d8bef9SDimitry Andric return Elements;
256e8d8bef9SDimitry Andric }
257e8d8bef9SDimitry Andric
258e8d8bef9SDimitry Andric // Prints a one-line version of a value that isn't our main focus.
259e8d8bef9SDimitry Andric // We interleave writes to OS and JOS, exploiting the lack of extra buffering.
260e8d8bef9SDimitry Andric // This is OK as we own the implementation.
abbreviate(const Value & V,OStream & JOS)261e8d8bef9SDimitry Andric void abbreviate(const Value &V, OStream &JOS) {
262e8d8bef9SDimitry Andric switch (V.kind()) {
263e8d8bef9SDimitry Andric case Value::Array:
264e8d8bef9SDimitry Andric JOS.rawValue(V.getAsArray()->empty() ? "[]" : "[ ... ]");
265e8d8bef9SDimitry Andric break;
266e8d8bef9SDimitry Andric case Value::Object:
267e8d8bef9SDimitry Andric JOS.rawValue(V.getAsObject()->empty() ? "{}" : "{ ... }");
268e8d8bef9SDimitry Andric break;
269e8d8bef9SDimitry Andric case Value::String: {
270e8d8bef9SDimitry Andric llvm::StringRef S = *V.getAsString();
271e8d8bef9SDimitry Andric if (S.size() < 40) {
272e8d8bef9SDimitry Andric JOS.value(V);
273e8d8bef9SDimitry Andric } else {
274e8d8bef9SDimitry Andric std::string Truncated = fixUTF8(S.take_front(37));
275e8d8bef9SDimitry Andric Truncated.append("...");
276e8d8bef9SDimitry Andric JOS.value(Truncated);
277e8d8bef9SDimitry Andric }
278e8d8bef9SDimitry Andric break;
279e8d8bef9SDimitry Andric }
280e8d8bef9SDimitry Andric default:
281e8d8bef9SDimitry Andric JOS.value(V);
282e8d8bef9SDimitry Andric }
283e8d8bef9SDimitry Andric }
284e8d8bef9SDimitry Andric
285e8d8bef9SDimitry Andric // Prints a semi-expanded version of a value that is our main focus.
286e8d8bef9SDimitry Andric // Array/Object entries are printed, but not recursively as they may be huge.
abbreviateChildren(const Value & V,OStream & JOS)287e8d8bef9SDimitry Andric void abbreviateChildren(const Value &V, OStream &JOS) {
288e8d8bef9SDimitry Andric switch (V.kind()) {
289e8d8bef9SDimitry Andric case Value::Array:
290e8d8bef9SDimitry Andric JOS.array([&] {
291e8d8bef9SDimitry Andric for (const auto &I : *V.getAsArray())
292e8d8bef9SDimitry Andric abbreviate(I, JOS);
293e8d8bef9SDimitry Andric });
294e8d8bef9SDimitry Andric break;
295e8d8bef9SDimitry Andric case Value::Object:
296e8d8bef9SDimitry Andric JOS.object([&] {
297e8d8bef9SDimitry Andric for (const auto *KV : sortedElements(*V.getAsObject())) {
298e8d8bef9SDimitry Andric JOS.attributeBegin(KV->first);
299e8d8bef9SDimitry Andric abbreviate(KV->second, JOS);
300e8d8bef9SDimitry Andric JOS.attributeEnd();
301e8d8bef9SDimitry Andric }
302e8d8bef9SDimitry Andric });
303e8d8bef9SDimitry Andric break;
304e8d8bef9SDimitry Andric default:
305e8d8bef9SDimitry Andric JOS.value(V);
306e8d8bef9SDimitry Andric }
307e8d8bef9SDimitry Andric }
308e8d8bef9SDimitry Andric
309e8d8bef9SDimitry Andric } // namespace
310e8d8bef9SDimitry Andric
printErrorContext(const Value & R,raw_ostream & OS) const311e8d8bef9SDimitry Andric void Path::Root::printErrorContext(const Value &R, raw_ostream &OS) const {
312e8d8bef9SDimitry Andric OStream JOS(OS, /*IndentSize=*/2);
313e8d8bef9SDimitry Andric // PrintValue recurses down the path, printing the ancestors of our target.
314e8d8bef9SDimitry Andric // Siblings of nodes along the path are printed with abbreviate(), and the
315e8d8bef9SDimitry Andric // target itself is printed with the somewhat richer abbreviateChildren().
316e8d8bef9SDimitry Andric // 'Recurse' is the lambda itself, to allow recursive calls.
317e8d8bef9SDimitry Andric auto PrintValue = [&](const Value &V, ArrayRef<Segment> Path, auto &Recurse) {
318e8d8bef9SDimitry Andric // Print the target node itself, with the error as a comment.
319e8d8bef9SDimitry Andric // Also used if we can't follow our path, e.g. it names a field that
320e8d8bef9SDimitry Andric // *should* exist but doesn't.
321e8d8bef9SDimitry Andric auto HighlightCurrent = [&] {
322e8d8bef9SDimitry Andric std::string Comment = "error: ";
323e8d8bef9SDimitry Andric Comment.append(ErrorMessage.data(), ErrorMessage.size());
324e8d8bef9SDimitry Andric JOS.comment(Comment);
325e8d8bef9SDimitry Andric abbreviateChildren(V, JOS);
326e8d8bef9SDimitry Andric };
327e8d8bef9SDimitry Andric if (Path.empty()) // We reached our target.
328e8d8bef9SDimitry Andric return HighlightCurrent();
329e8d8bef9SDimitry Andric const Segment &S = Path.back(); // Path is in reverse order.
330e8d8bef9SDimitry Andric if (S.isField()) {
331e8d8bef9SDimitry Andric // Current node is an object, path names a field.
332e8d8bef9SDimitry Andric llvm::StringRef FieldName = S.field();
333e8d8bef9SDimitry Andric const Object *O = V.getAsObject();
334e8d8bef9SDimitry Andric if (!O || !O->get(FieldName))
335e8d8bef9SDimitry Andric return HighlightCurrent();
336e8d8bef9SDimitry Andric JOS.object([&] {
337e8d8bef9SDimitry Andric for (const auto *KV : sortedElements(*O)) {
338e8d8bef9SDimitry Andric JOS.attributeBegin(KV->first);
339e8d8bef9SDimitry Andric if (FieldName.equals(KV->first))
340e8d8bef9SDimitry Andric Recurse(KV->second, Path.drop_back(), Recurse);
341e8d8bef9SDimitry Andric else
342e8d8bef9SDimitry Andric abbreviate(KV->second, JOS);
343e8d8bef9SDimitry Andric JOS.attributeEnd();
344e8d8bef9SDimitry Andric }
345e8d8bef9SDimitry Andric });
346e8d8bef9SDimitry Andric } else {
347e8d8bef9SDimitry Andric // Current node is an array, path names an element.
348e8d8bef9SDimitry Andric const Array *A = V.getAsArray();
349e8d8bef9SDimitry Andric if (!A || S.index() >= A->size())
350e8d8bef9SDimitry Andric return HighlightCurrent();
351e8d8bef9SDimitry Andric JOS.array([&] {
352e8d8bef9SDimitry Andric unsigned Current = 0;
353e8d8bef9SDimitry Andric for (const auto &V : *A) {
354e8d8bef9SDimitry Andric if (Current++ == S.index())
355e8d8bef9SDimitry Andric Recurse(V, Path.drop_back(), Recurse);
356e8d8bef9SDimitry Andric else
357e8d8bef9SDimitry Andric abbreviate(V, JOS);
358e8d8bef9SDimitry Andric }
359e8d8bef9SDimitry Andric });
360e8d8bef9SDimitry Andric }
361e8d8bef9SDimitry Andric };
362e8d8bef9SDimitry Andric PrintValue(R, ErrorPath, PrintValue);
363e8d8bef9SDimitry Andric }
364e8d8bef9SDimitry Andric
3650b57cec5SDimitry Andric namespace {
3660b57cec5SDimitry Andric // Simple recursive-descent JSON parser.
3670b57cec5SDimitry Andric class Parser {
3680b57cec5SDimitry Andric public:
Parser(StringRef JSON)3690b57cec5SDimitry Andric Parser(StringRef JSON)
3700b57cec5SDimitry Andric : Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {}
3710b57cec5SDimitry Andric
checkUTF8()3720b57cec5SDimitry Andric bool checkUTF8() {
3730b57cec5SDimitry Andric size_t ErrOffset;
3740b57cec5SDimitry Andric if (isUTF8(StringRef(Start, End - Start), &ErrOffset))
3750b57cec5SDimitry Andric return true;
3760b57cec5SDimitry Andric P = Start + ErrOffset; // For line/column calculation.
3770b57cec5SDimitry Andric return parseError("Invalid UTF-8 sequence");
3780b57cec5SDimitry Andric }
3790b57cec5SDimitry Andric
3800b57cec5SDimitry Andric bool parseValue(Value &Out);
3810b57cec5SDimitry Andric
assertEnd()3820b57cec5SDimitry Andric bool assertEnd() {
3830b57cec5SDimitry Andric eatWhitespace();
3840b57cec5SDimitry Andric if (P == End)
3850b57cec5SDimitry Andric return true;
3860b57cec5SDimitry Andric return parseError("Text after end of document");
3870b57cec5SDimitry Andric }
3880b57cec5SDimitry Andric
takeError()3890b57cec5SDimitry Andric Error takeError() {
3900b57cec5SDimitry Andric assert(Err);
3910b57cec5SDimitry Andric return std::move(*Err);
3920b57cec5SDimitry Andric }
3930b57cec5SDimitry Andric
3940b57cec5SDimitry Andric private:
eatWhitespace()3950b57cec5SDimitry Andric void eatWhitespace() {
3960b57cec5SDimitry Andric while (P != End && (*P == ' ' || *P == '\r' || *P == '\n' || *P == '\t'))
3970b57cec5SDimitry Andric ++P;
3980b57cec5SDimitry Andric }
3990b57cec5SDimitry Andric
4000b57cec5SDimitry Andric // On invalid syntax, parseX() functions return false and set Err.
4010b57cec5SDimitry Andric bool parseNumber(char First, Value &Out);
4020b57cec5SDimitry Andric bool parseString(std::string &Out);
4030b57cec5SDimitry Andric bool parseUnicode(std::string &Out);
4040b57cec5SDimitry Andric bool parseError(const char *Msg); // always returns false
4050b57cec5SDimitry Andric
next()4060b57cec5SDimitry Andric char next() { return P == End ? 0 : *P++; }
peek()4070b57cec5SDimitry Andric char peek() { return P == End ? 0 : *P; }
isNumber(char C)4080b57cec5SDimitry Andric static bool isNumber(char C) {
4090b57cec5SDimitry Andric return C == '0' || C == '1' || C == '2' || C == '3' || C == '4' ||
4100b57cec5SDimitry Andric C == '5' || C == '6' || C == '7' || C == '8' || C == '9' ||
4110b57cec5SDimitry Andric C == 'e' || C == 'E' || C == '+' || C == '-' || C == '.';
4120b57cec5SDimitry Andric }
4130b57cec5SDimitry Andric
414bdd1243dSDimitry Andric std::optional<Error> Err;
4150b57cec5SDimitry Andric const char *Start, *P, *End;
4160b57cec5SDimitry Andric };
4170b57cec5SDimitry Andric
parseValue(Value & Out)4180b57cec5SDimitry Andric bool Parser::parseValue(Value &Out) {
4190b57cec5SDimitry Andric eatWhitespace();
4200b57cec5SDimitry Andric if (P == End)
4210b57cec5SDimitry Andric return parseError("Unexpected EOF");
4220b57cec5SDimitry Andric switch (char C = next()) {
4230b57cec5SDimitry Andric // Bare null/true/false are easy - first char identifies them.
4240b57cec5SDimitry Andric case 'n':
4250b57cec5SDimitry Andric Out = nullptr;
4260b57cec5SDimitry Andric return (next() == 'u' && next() == 'l' && next() == 'l') ||
4270b57cec5SDimitry Andric parseError("Invalid JSON value (null?)");
4280b57cec5SDimitry Andric case 't':
4290b57cec5SDimitry Andric Out = true;
4300b57cec5SDimitry Andric return (next() == 'r' && next() == 'u' && next() == 'e') ||
4310b57cec5SDimitry Andric parseError("Invalid JSON value (true?)");
4320b57cec5SDimitry Andric case 'f':
4330b57cec5SDimitry Andric Out = false;
4340b57cec5SDimitry Andric return (next() == 'a' && next() == 'l' && next() == 's' && next() == 'e') ||
4350b57cec5SDimitry Andric parseError("Invalid JSON value (false?)");
4360b57cec5SDimitry Andric case '"': {
4370b57cec5SDimitry Andric std::string S;
4380b57cec5SDimitry Andric if (parseString(S)) {
4390b57cec5SDimitry Andric Out = std::move(S);
4400b57cec5SDimitry Andric return true;
4410b57cec5SDimitry Andric }
4420b57cec5SDimitry Andric return false;
4430b57cec5SDimitry Andric }
4440b57cec5SDimitry Andric case '[': {
4450b57cec5SDimitry Andric Out = Array{};
4460b57cec5SDimitry Andric Array &A = *Out.getAsArray();
4470b57cec5SDimitry Andric eatWhitespace();
4480b57cec5SDimitry Andric if (peek() == ']') {
4490b57cec5SDimitry Andric ++P;
4500b57cec5SDimitry Andric return true;
4510b57cec5SDimitry Andric }
4520b57cec5SDimitry Andric for (;;) {
4530b57cec5SDimitry Andric A.emplace_back(nullptr);
4540b57cec5SDimitry Andric if (!parseValue(A.back()))
4550b57cec5SDimitry Andric return false;
4560b57cec5SDimitry Andric eatWhitespace();
4570b57cec5SDimitry Andric switch (next()) {
4580b57cec5SDimitry Andric case ',':
4590b57cec5SDimitry Andric eatWhitespace();
4600b57cec5SDimitry Andric continue;
4610b57cec5SDimitry Andric case ']':
4620b57cec5SDimitry Andric return true;
4630b57cec5SDimitry Andric default:
4640b57cec5SDimitry Andric return parseError("Expected , or ] after array element");
4650b57cec5SDimitry Andric }
4660b57cec5SDimitry Andric }
4670b57cec5SDimitry Andric }
4680b57cec5SDimitry Andric case '{': {
4690b57cec5SDimitry Andric Out = Object{};
4700b57cec5SDimitry Andric Object &O = *Out.getAsObject();
4710b57cec5SDimitry Andric eatWhitespace();
4720b57cec5SDimitry Andric if (peek() == '}') {
4730b57cec5SDimitry Andric ++P;
4740b57cec5SDimitry Andric return true;
4750b57cec5SDimitry Andric }
4760b57cec5SDimitry Andric for (;;) {
4770b57cec5SDimitry Andric if (next() != '"')
4780b57cec5SDimitry Andric return parseError("Expected object key");
4790b57cec5SDimitry Andric std::string K;
4800b57cec5SDimitry Andric if (!parseString(K))
4810b57cec5SDimitry Andric return false;
4820b57cec5SDimitry Andric eatWhitespace();
4830b57cec5SDimitry Andric if (next() != ':')
4840b57cec5SDimitry Andric return parseError("Expected : after object key");
4850b57cec5SDimitry Andric eatWhitespace();
4860b57cec5SDimitry Andric if (!parseValue(O[std::move(K)]))
4870b57cec5SDimitry Andric return false;
4880b57cec5SDimitry Andric eatWhitespace();
4890b57cec5SDimitry Andric switch (next()) {
4900b57cec5SDimitry Andric case ',':
4910b57cec5SDimitry Andric eatWhitespace();
4920b57cec5SDimitry Andric continue;
4930b57cec5SDimitry Andric case '}':
4940b57cec5SDimitry Andric return true;
4950b57cec5SDimitry Andric default:
4960b57cec5SDimitry Andric return parseError("Expected , or } after object property");
4970b57cec5SDimitry Andric }
4980b57cec5SDimitry Andric }
4990b57cec5SDimitry Andric }
5000b57cec5SDimitry Andric default:
5010b57cec5SDimitry Andric if (isNumber(C))
5020b57cec5SDimitry Andric return parseNumber(C, Out);
5030b57cec5SDimitry Andric return parseError("Invalid JSON value");
5040b57cec5SDimitry Andric }
5050b57cec5SDimitry Andric }
5060b57cec5SDimitry Andric
parseNumber(char First,Value & Out)5070b57cec5SDimitry Andric bool Parser::parseNumber(char First, Value &Out) {
5080b57cec5SDimitry Andric // Read the number into a string. (Must be null-terminated for strto*).
5090b57cec5SDimitry Andric SmallString<24> S;
5100b57cec5SDimitry Andric S.push_back(First);
5110b57cec5SDimitry Andric while (isNumber(peek()))
5120b57cec5SDimitry Andric S.push_back(next());
5130b57cec5SDimitry Andric char *End;
5140b57cec5SDimitry Andric // Try first to parse as integer, and if so preserve full 64 bits.
51581ad6265SDimitry Andric // We check for errno for out of bounds errors and for End == S.end()
51681ad6265SDimitry Andric // to make sure that the numeric string is not malformed.
51781ad6265SDimitry Andric errno = 0;
51881ad6265SDimitry Andric int64_t I = std::strtoll(S.c_str(), &End, 10);
51981ad6265SDimitry Andric if (End == S.end() && errno != ERANGE) {
5200b57cec5SDimitry Andric Out = int64_t(I);
5210b57cec5SDimitry Andric return true;
5220b57cec5SDimitry Andric }
52381ad6265SDimitry Andric // strtroull has a special handling for negative numbers, but in this
52481ad6265SDimitry Andric // case we don't want to do that because negative numbers were already
52581ad6265SDimitry Andric // handled in the previous block.
52681ad6265SDimitry Andric if (First != '-') {
52781ad6265SDimitry Andric errno = 0;
52881ad6265SDimitry Andric uint64_t UI = std::strtoull(S.c_str(), &End, 10);
52981ad6265SDimitry Andric if (End == S.end() && errno != ERANGE) {
53081ad6265SDimitry Andric Out = UI;
53181ad6265SDimitry Andric return true;
53281ad6265SDimitry Andric }
53381ad6265SDimitry Andric }
5340b57cec5SDimitry Andric // If it's not an integer
5350b57cec5SDimitry Andric Out = std::strtod(S.c_str(), &End);
5360b57cec5SDimitry Andric return End == S.end() || parseError("Invalid JSON value (number?)");
5370b57cec5SDimitry Andric }
5380b57cec5SDimitry Andric
parseString(std::string & Out)5390b57cec5SDimitry Andric bool Parser::parseString(std::string &Out) {
5400b57cec5SDimitry Andric // leading quote was already consumed.
5410b57cec5SDimitry Andric for (char C = next(); C != '"'; C = next()) {
5420b57cec5SDimitry Andric if (LLVM_UNLIKELY(P == End))
5430b57cec5SDimitry Andric return parseError("Unterminated string");
5440b57cec5SDimitry Andric if (LLVM_UNLIKELY((C & 0x1f) == C))
5450b57cec5SDimitry Andric return parseError("Control character in string");
5460b57cec5SDimitry Andric if (LLVM_LIKELY(C != '\\')) {
5470b57cec5SDimitry Andric Out.push_back(C);
5480b57cec5SDimitry Andric continue;
5490b57cec5SDimitry Andric }
5500b57cec5SDimitry Andric // Handle escape sequence.
5510b57cec5SDimitry Andric switch (C = next()) {
5520b57cec5SDimitry Andric case '"':
5530b57cec5SDimitry Andric case '\\':
5540b57cec5SDimitry Andric case '/':
5550b57cec5SDimitry Andric Out.push_back(C);
5560b57cec5SDimitry Andric break;
5570b57cec5SDimitry Andric case 'b':
5580b57cec5SDimitry Andric Out.push_back('\b');
5590b57cec5SDimitry Andric break;
5600b57cec5SDimitry Andric case 'f':
5610b57cec5SDimitry Andric Out.push_back('\f');
5620b57cec5SDimitry Andric break;
5630b57cec5SDimitry Andric case 'n':
5640b57cec5SDimitry Andric Out.push_back('\n');
5650b57cec5SDimitry Andric break;
5660b57cec5SDimitry Andric case 'r':
5670b57cec5SDimitry Andric Out.push_back('\r');
5680b57cec5SDimitry Andric break;
5690b57cec5SDimitry Andric case 't':
5700b57cec5SDimitry Andric Out.push_back('\t');
5710b57cec5SDimitry Andric break;
5720b57cec5SDimitry Andric case 'u':
5730b57cec5SDimitry Andric if (!parseUnicode(Out))
5740b57cec5SDimitry Andric return false;
5750b57cec5SDimitry Andric break;
5760b57cec5SDimitry Andric default:
5770b57cec5SDimitry Andric return parseError("Invalid escape sequence");
5780b57cec5SDimitry Andric }
5790b57cec5SDimitry Andric }
5800b57cec5SDimitry Andric return true;
5810b57cec5SDimitry Andric }
5820b57cec5SDimitry Andric
encodeUtf8(uint32_t Rune,std::string & Out)5830b57cec5SDimitry Andric static void encodeUtf8(uint32_t Rune, std::string &Out) {
5840b57cec5SDimitry Andric if (Rune < 0x80) {
5850b57cec5SDimitry Andric Out.push_back(Rune & 0x7F);
5860b57cec5SDimitry Andric } else if (Rune < 0x800) {
5870b57cec5SDimitry Andric uint8_t FirstByte = 0xC0 | ((Rune & 0x7C0) >> 6);
5880b57cec5SDimitry Andric uint8_t SecondByte = 0x80 | (Rune & 0x3F);
5890b57cec5SDimitry Andric Out.push_back(FirstByte);
5900b57cec5SDimitry Andric Out.push_back(SecondByte);
5910b57cec5SDimitry Andric } else if (Rune < 0x10000) {
5920b57cec5SDimitry Andric uint8_t FirstByte = 0xE0 | ((Rune & 0xF000) >> 12);
5930b57cec5SDimitry Andric uint8_t SecondByte = 0x80 | ((Rune & 0xFC0) >> 6);
5940b57cec5SDimitry Andric uint8_t ThirdByte = 0x80 | (Rune & 0x3F);
5950b57cec5SDimitry Andric Out.push_back(FirstByte);
5960b57cec5SDimitry Andric Out.push_back(SecondByte);
5970b57cec5SDimitry Andric Out.push_back(ThirdByte);
5980b57cec5SDimitry Andric } else if (Rune < 0x110000) {
5990b57cec5SDimitry Andric uint8_t FirstByte = 0xF0 | ((Rune & 0x1F0000) >> 18);
6000b57cec5SDimitry Andric uint8_t SecondByte = 0x80 | ((Rune & 0x3F000) >> 12);
6010b57cec5SDimitry Andric uint8_t ThirdByte = 0x80 | ((Rune & 0xFC0) >> 6);
6020b57cec5SDimitry Andric uint8_t FourthByte = 0x80 | (Rune & 0x3F);
6030b57cec5SDimitry Andric Out.push_back(FirstByte);
6040b57cec5SDimitry Andric Out.push_back(SecondByte);
6050b57cec5SDimitry Andric Out.push_back(ThirdByte);
6060b57cec5SDimitry Andric Out.push_back(FourthByte);
6070b57cec5SDimitry Andric } else {
6080b57cec5SDimitry Andric llvm_unreachable("Invalid codepoint");
6090b57cec5SDimitry Andric }
6100b57cec5SDimitry Andric }
6110b57cec5SDimitry Andric
6120b57cec5SDimitry Andric // Parse a UTF-16 \uNNNN escape sequence. "\u" has already been consumed.
6130b57cec5SDimitry Andric // May parse several sequential escapes to ensure proper surrogate handling.
6140b57cec5SDimitry Andric // We do not use ConvertUTF.h, it can't accept and replace unpaired surrogates.
6150b57cec5SDimitry Andric // These are invalid Unicode but valid JSON (RFC 8259, section 8.2).
parseUnicode(std::string & Out)6160b57cec5SDimitry Andric bool Parser::parseUnicode(std::string &Out) {
6170b57cec5SDimitry Andric // Invalid UTF is not a JSON error (RFC 8529§8.2). It gets replaced by U+FFFD.
6180b57cec5SDimitry Andric auto Invalid = [&] { Out.append(/* UTF-8 */ {'\xef', '\xbf', '\xbd'}); };
6190b57cec5SDimitry Andric // Decodes 4 hex digits from the stream into Out, returns false on error.
6200b57cec5SDimitry Andric auto Parse4Hex = [this](uint16_t &Out) -> bool {
6210b57cec5SDimitry Andric Out = 0;
6220b57cec5SDimitry Andric char Bytes[] = {next(), next(), next(), next()};
6230b57cec5SDimitry Andric for (unsigned char C : Bytes) {
6240b57cec5SDimitry Andric if (!std::isxdigit(C))
6250b57cec5SDimitry Andric return parseError("Invalid \\u escape sequence");
6260b57cec5SDimitry Andric Out <<= 4;
6270b57cec5SDimitry Andric Out |= (C > '9') ? (C & ~0x20) - 'A' + 10 : (C - '0');
6280b57cec5SDimitry Andric }
6290b57cec5SDimitry Andric return true;
6300b57cec5SDimitry Andric };
6310b57cec5SDimitry Andric uint16_t First; // UTF-16 code unit from the first \u escape.
6320b57cec5SDimitry Andric if (!Parse4Hex(First))
6330b57cec5SDimitry Andric return false;
6340b57cec5SDimitry Andric
6350b57cec5SDimitry Andric // We loop to allow proper surrogate-pair error handling.
6360b57cec5SDimitry Andric while (true) {
6370b57cec5SDimitry Andric // Case 1: the UTF-16 code unit is already a codepoint in the BMP.
6380b57cec5SDimitry Andric if (LLVM_LIKELY(First < 0xD800 || First >= 0xE000)) {
6390b57cec5SDimitry Andric encodeUtf8(First, Out);
6400b57cec5SDimitry Andric return true;
6410b57cec5SDimitry Andric }
6420b57cec5SDimitry Andric
6430b57cec5SDimitry Andric // Case 2: it's an (unpaired) trailing surrogate.
6440b57cec5SDimitry Andric if (LLVM_UNLIKELY(First >= 0xDC00)) {
6450b57cec5SDimitry Andric Invalid();
6460b57cec5SDimitry Andric return true;
6470b57cec5SDimitry Andric }
6480b57cec5SDimitry Andric
6490b57cec5SDimitry Andric // Case 3: it's a leading surrogate. We expect a trailing one next.
6500b57cec5SDimitry Andric // Case 3a: there's no trailing \u escape. Don't advance in the stream.
6510b57cec5SDimitry Andric if (LLVM_UNLIKELY(P + 2 > End || *P != '\\' || *(P + 1) != 'u')) {
6520b57cec5SDimitry Andric Invalid(); // Leading surrogate was unpaired.
6530b57cec5SDimitry Andric return true;
6540b57cec5SDimitry Andric }
6550b57cec5SDimitry Andric P += 2;
6560b57cec5SDimitry Andric uint16_t Second;
6570b57cec5SDimitry Andric if (!Parse4Hex(Second))
6580b57cec5SDimitry Andric return false;
6590b57cec5SDimitry Andric // Case 3b: there was another \u escape, but it wasn't a trailing surrogate.
6600b57cec5SDimitry Andric if (LLVM_UNLIKELY(Second < 0xDC00 || Second >= 0xE000)) {
6610b57cec5SDimitry Andric Invalid(); // Leading surrogate was unpaired.
6620b57cec5SDimitry Andric First = Second; // Second escape still needs to be processed.
6630b57cec5SDimitry Andric continue;
6640b57cec5SDimitry Andric }
6650b57cec5SDimitry Andric // Case 3c: a valid surrogate pair encoding an astral codepoint.
6660b57cec5SDimitry Andric encodeUtf8(0x10000 | ((First - 0xD800) << 10) | (Second - 0xDC00), Out);
6670b57cec5SDimitry Andric return true;
6680b57cec5SDimitry Andric }
6690b57cec5SDimitry Andric }
6700b57cec5SDimitry Andric
parseError(const char * Msg)6710b57cec5SDimitry Andric bool Parser::parseError(const char *Msg) {
6720b57cec5SDimitry Andric int Line = 1;
6730b57cec5SDimitry Andric const char *StartOfLine = Start;
6740b57cec5SDimitry Andric for (const char *X = Start; X < P; ++X) {
6750b57cec5SDimitry Andric if (*X == 0x0A) {
6760b57cec5SDimitry Andric ++Line;
6770b57cec5SDimitry Andric StartOfLine = X + 1;
6780b57cec5SDimitry Andric }
6790b57cec5SDimitry Andric }
6800b57cec5SDimitry Andric Err.emplace(
6818bcb0991SDimitry Andric std::make_unique<ParseError>(Msg, Line, P - StartOfLine, P - Start));
6820b57cec5SDimitry Andric return false;
6830b57cec5SDimitry Andric }
6840b57cec5SDimitry Andric } // namespace
6850b57cec5SDimitry Andric
parse(StringRef JSON)6860b57cec5SDimitry Andric Expected<Value> parse(StringRef JSON) {
6870b57cec5SDimitry Andric Parser P(JSON);
6880b57cec5SDimitry Andric Value E = nullptr;
6890b57cec5SDimitry Andric if (P.checkUTF8())
6900b57cec5SDimitry Andric if (P.parseValue(E))
6910b57cec5SDimitry Andric if (P.assertEnd())
6920b57cec5SDimitry Andric return std::move(E);
6930b57cec5SDimitry Andric return P.takeError();
6940b57cec5SDimitry Andric }
6950b57cec5SDimitry Andric char ParseError::ID = 0;
6960b57cec5SDimitry Andric
isUTF8(llvm::StringRef S,size_t * ErrOffset)6970b57cec5SDimitry Andric bool isUTF8(llvm::StringRef S, size_t *ErrOffset) {
6980b57cec5SDimitry Andric // Fast-path for ASCII, which is valid UTF-8.
6990b57cec5SDimitry Andric if (LLVM_LIKELY(isASCII(S)))
7000b57cec5SDimitry Andric return true;
7010b57cec5SDimitry Andric
7020b57cec5SDimitry Andric const UTF8 *Data = reinterpret_cast<const UTF8 *>(S.data()), *Rest = Data;
7030b57cec5SDimitry Andric if (LLVM_LIKELY(isLegalUTF8String(&Rest, Data + S.size())))
7040b57cec5SDimitry Andric return true;
7050b57cec5SDimitry Andric
7060b57cec5SDimitry Andric if (ErrOffset)
7070b57cec5SDimitry Andric *ErrOffset = Rest - Data;
7080b57cec5SDimitry Andric return false;
7090b57cec5SDimitry Andric }
7100b57cec5SDimitry Andric
fixUTF8(llvm::StringRef S)7110b57cec5SDimitry Andric std::string fixUTF8(llvm::StringRef S) {
7120b57cec5SDimitry Andric // This isn't particularly efficient, but is only for error-recovery.
7130b57cec5SDimitry Andric std::vector<UTF32> Codepoints(S.size()); // 1 codepoint per byte suffices.
7140b57cec5SDimitry Andric const UTF8 *In8 = reinterpret_cast<const UTF8 *>(S.data());
7150b57cec5SDimitry Andric UTF32 *Out32 = Codepoints.data();
7160b57cec5SDimitry Andric ConvertUTF8toUTF32(&In8, In8 + S.size(), &Out32, Out32 + Codepoints.size(),
7170b57cec5SDimitry Andric lenientConversion);
7180b57cec5SDimitry Andric Codepoints.resize(Out32 - Codepoints.data());
7190b57cec5SDimitry Andric std::string Res(4 * Codepoints.size(), 0); // 4 bytes per codepoint suffice
7200b57cec5SDimitry Andric const UTF32 *In32 = Codepoints.data();
7210b57cec5SDimitry Andric UTF8 *Out8 = reinterpret_cast<UTF8 *>(&Res[0]);
7220b57cec5SDimitry Andric ConvertUTF32toUTF8(&In32, In32 + Codepoints.size(), &Out8, Out8 + Res.size(),
7230b57cec5SDimitry Andric strictConversion);
7240b57cec5SDimitry Andric Res.resize(reinterpret_cast<char *>(Out8) - Res.data());
7250b57cec5SDimitry Andric return Res;
7260b57cec5SDimitry Andric }
7270b57cec5SDimitry Andric
quote(llvm::raw_ostream & OS,llvm::StringRef S)7280b57cec5SDimitry Andric static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
7290b57cec5SDimitry Andric OS << '\"';
7300b57cec5SDimitry Andric for (unsigned char C : S) {
7310b57cec5SDimitry Andric if (C == 0x22 || C == 0x5C)
7320b57cec5SDimitry Andric OS << '\\';
7330b57cec5SDimitry Andric if (C >= 0x20) {
7340b57cec5SDimitry Andric OS << C;
7350b57cec5SDimitry Andric continue;
7360b57cec5SDimitry Andric }
7370b57cec5SDimitry Andric OS << '\\';
7380b57cec5SDimitry Andric switch (C) {
7390b57cec5SDimitry Andric // A few characters are common enough to make short escapes worthwhile.
7400b57cec5SDimitry Andric case '\t':
7410b57cec5SDimitry Andric OS << 't';
7420b57cec5SDimitry Andric break;
7430b57cec5SDimitry Andric case '\n':
7440b57cec5SDimitry Andric OS << 'n';
7450b57cec5SDimitry Andric break;
7460b57cec5SDimitry Andric case '\r':
7470b57cec5SDimitry Andric OS << 'r';
7480b57cec5SDimitry Andric break;
7490b57cec5SDimitry Andric default:
7500b57cec5SDimitry Andric OS << 'u';
7510b57cec5SDimitry Andric llvm::write_hex(OS, C, llvm::HexPrintStyle::Lower, 4);
7520b57cec5SDimitry Andric break;
7530b57cec5SDimitry Andric }
7540b57cec5SDimitry Andric }
7550b57cec5SDimitry Andric OS << '\"';
7560b57cec5SDimitry Andric }
7570b57cec5SDimitry Andric
value(const Value & V)7580b57cec5SDimitry Andric void llvm::json::OStream::value(const Value &V) {
7590b57cec5SDimitry Andric switch (V.kind()) {
7600b57cec5SDimitry Andric case Value::Null:
7610b57cec5SDimitry Andric valueBegin();
7620b57cec5SDimitry Andric OS << "null";
7630b57cec5SDimitry Andric return;
7640b57cec5SDimitry Andric case Value::Boolean:
7650b57cec5SDimitry Andric valueBegin();
7660b57cec5SDimitry Andric OS << (*V.getAsBoolean() ? "true" : "false");
7670b57cec5SDimitry Andric return;
7680b57cec5SDimitry Andric case Value::Number:
7690b57cec5SDimitry Andric valueBegin();
7700b57cec5SDimitry Andric if (V.Type == Value::T_Integer)
7710b57cec5SDimitry Andric OS << *V.getAsInteger();
772349cc55cSDimitry Andric else if (V.Type == Value::T_UINT64)
773349cc55cSDimitry Andric OS << *V.getAsUINT64();
7740b57cec5SDimitry Andric else
7750b57cec5SDimitry Andric OS << format("%.*g", std::numeric_limits<double>::max_digits10,
7760b57cec5SDimitry Andric *V.getAsNumber());
7770b57cec5SDimitry Andric return;
7780b57cec5SDimitry Andric case Value::String:
7790b57cec5SDimitry Andric valueBegin();
7800b57cec5SDimitry Andric quote(OS, *V.getAsString());
7810b57cec5SDimitry Andric return;
7820b57cec5SDimitry Andric case Value::Array:
7830b57cec5SDimitry Andric return array([&] {
7840b57cec5SDimitry Andric for (const Value &E : *V.getAsArray())
7850b57cec5SDimitry Andric value(E);
7860b57cec5SDimitry Andric });
7870b57cec5SDimitry Andric case Value::Object:
7880b57cec5SDimitry Andric return object([&] {
7890b57cec5SDimitry Andric for (const Object::value_type *E : sortedElements(*V.getAsObject()))
7900b57cec5SDimitry Andric attribute(E->first, E->second);
7910b57cec5SDimitry Andric });
7920b57cec5SDimitry Andric }
7930b57cec5SDimitry Andric }
7940b57cec5SDimitry Andric
valueBegin()7950b57cec5SDimitry Andric void llvm::json::OStream::valueBegin() {
7960b57cec5SDimitry Andric assert(Stack.back().Ctx != Object && "Only attributes allowed here");
7970b57cec5SDimitry Andric if (Stack.back().HasValue) {
7980b57cec5SDimitry Andric assert(Stack.back().Ctx != Singleton && "Only one value allowed here");
7990b57cec5SDimitry Andric OS << ',';
8000b57cec5SDimitry Andric }
8010b57cec5SDimitry Andric if (Stack.back().Ctx == Array)
8020b57cec5SDimitry Andric newline();
803e8d8bef9SDimitry Andric flushComment();
8040b57cec5SDimitry Andric Stack.back().HasValue = true;
8050b57cec5SDimitry Andric }
8060b57cec5SDimitry Andric
comment(llvm::StringRef Comment)807e8d8bef9SDimitry Andric void OStream::comment(llvm::StringRef Comment) {
808e8d8bef9SDimitry Andric assert(PendingComment.empty() && "Only one comment per value!");
809e8d8bef9SDimitry Andric PendingComment = Comment;
810e8d8bef9SDimitry Andric }
811e8d8bef9SDimitry Andric
flushComment()812e8d8bef9SDimitry Andric void OStream::flushComment() {
813e8d8bef9SDimitry Andric if (PendingComment.empty())
814e8d8bef9SDimitry Andric return;
815e8d8bef9SDimitry Andric OS << (IndentSize ? "/* " : "/*");
816e8d8bef9SDimitry Andric // Be sure not to accidentally emit "*/". Transform to "* /".
817e8d8bef9SDimitry Andric while (!PendingComment.empty()) {
818e8d8bef9SDimitry Andric auto Pos = PendingComment.find("*/");
819e8d8bef9SDimitry Andric if (Pos == StringRef::npos) {
820e8d8bef9SDimitry Andric OS << PendingComment;
821e8d8bef9SDimitry Andric PendingComment = "";
822e8d8bef9SDimitry Andric } else {
823e8d8bef9SDimitry Andric OS << PendingComment.take_front(Pos) << "* /";
824e8d8bef9SDimitry Andric PendingComment = PendingComment.drop_front(Pos + 2);
825e8d8bef9SDimitry Andric }
826e8d8bef9SDimitry Andric }
827e8d8bef9SDimitry Andric OS << (IndentSize ? " */" : "*/");
828e8d8bef9SDimitry Andric // Comments are on their own line unless attached to an attribute value.
829e8d8bef9SDimitry Andric if (Stack.size() > 1 && Stack.back().Ctx == Singleton) {
830e8d8bef9SDimitry Andric if (IndentSize)
831e8d8bef9SDimitry Andric OS << ' ';
832e8d8bef9SDimitry Andric } else {
833e8d8bef9SDimitry Andric newline();
834e8d8bef9SDimitry Andric }
835e8d8bef9SDimitry Andric }
836e8d8bef9SDimitry Andric
newline()8370b57cec5SDimitry Andric void llvm::json::OStream::newline() {
8380b57cec5SDimitry Andric if (IndentSize) {
8390b57cec5SDimitry Andric OS.write('\n');
8400b57cec5SDimitry Andric OS.indent(Indent);
8410b57cec5SDimitry Andric }
8420b57cec5SDimitry Andric }
8430b57cec5SDimitry Andric
arrayBegin()8440b57cec5SDimitry Andric void llvm::json::OStream::arrayBegin() {
8450b57cec5SDimitry Andric valueBegin();
8460b57cec5SDimitry Andric Stack.emplace_back();
8470b57cec5SDimitry Andric Stack.back().Ctx = Array;
8480b57cec5SDimitry Andric Indent += IndentSize;
8490b57cec5SDimitry Andric OS << '[';
8500b57cec5SDimitry Andric }
8510b57cec5SDimitry Andric
arrayEnd()8520b57cec5SDimitry Andric void llvm::json::OStream::arrayEnd() {
8530b57cec5SDimitry Andric assert(Stack.back().Ctx == Array);
8540b57cec5SDimitry Andric Indent -= IndentSize;
8550b57cec5SDimitry Andric if (Stack.back().HasValue)
8560b57cec5SDimitry Andric newline();
8570b57cec5SDimitry Andric OS << ']';
858e8d8bef9SDimitry Andric assert(PendingComment.empty());
8590b57cec5SDimitry Andric Stack.pop_back();
8600b57cec5SDimitry Andric assert(!Stack.empty());
8610b57cec5SDimitry Andric }
8620b57cec5SDimitry Andric
objectBegin()8630b57cec5SDimitry Andric void llvm::json::OStream::objectBegin() {
8640b57cec5SDimitry Andric valueBegin();
8650b57cec5SDimitry Andric Stack.emplace_back();
8660b57cec5SDimitry Andric Stack.back().Ctx = Object;
8670b57cec5SDimitry Andric Indent += IndentSize;
8680b57cec5SDimitry Andric OS << '{';
8690b57cec5SDimitry Andric }
8700b57cec5SDimitry Andric
objectEnd()8710b57cec5SDimitry Andric void llvm::json::OStream::objectEnd() {
8720b57cec5SDimitry Andric assert(Stack.back().Ctx == Object);
8730b57cec5SDimitry Andric Indent -= IndentSize;
8740b57cec5SDimitry Andric if (Stack.back().HasValue)
8750b57cec5SDimitry Andric newline();
8760b57cec5SDimitry Andric OS << '}';
877e8d8bef9SDimitry Andric assert(PendingComment.empty());
8780b57cec5SDimitry Andric Stack.pop_back();
8790b57cec5SDimitry Andric assert(!Stack.empty());
8800b57cec5SDimitry Andric }
8810b57cec5SDimitry Andric
attributeBegin(llvm::StringRef Key)8820b57cec5SDimitry Andric void llvm::json::OStream::attributeBegin(llvm::StringRef Key) {
8830b57cec5SDimitry Andric assert(Stack.back().Ctx == Object);
8840b57cec5SDimitry Andric if (Stack.back().HasValue)
8850b57cec5SDimitry Andric OS << ',';
8860b57cec5SDimitry Andric newline();
887e8d8bef9SDimitry Andric flushComment();
8880b57cec5SDimitry Andric Stack.back().HasValue = true;
8890b57cec5SDimitry Andric Stack.emplace_back();
8900b57cec5SDimitry Andric Stack.back().Ctx = Singleton;
8910b57cec5SDimitry Andric if (LLVM_LIKELY(isUTF8(Key))) {
8920b57cec5SDimitry Andric quote(OS, Key);
8930b57cec5SDimitry Andric } else {
8940b57cec5SDimitry Andric assert(false && "Invalid UTF-8 in attribute key");
8950b57cec5SDimitry Andric quote(OS, fixUTF8(Key));
8960b57cec5SDimitry Andric }
8970b57cec5SDimitry Andric OS.write(':');
8980b57cec5SDimitry Andric if (IndentSize)
8990b57cec5SDimitry Andric OS.write(' ');
9000b57cec5SDimitry Andric }
9010b57cec5SDimitry Andric
attributeEnd()9020b57cec5SDimitry Andric void llvm::json::OStream::attributeEnd() {
9030b57cec5SDimitry Andric assert(Stack.back().Ctx == Singleton);
9040b57cec5SDimitry Andric assert(Stack.back().HasValue && "Attribute must have a value");
905e8d8bef9SDimitry Andric assert(PendingComment.empty());
9060b57cec5SDimitry Andric Stack.pop_back();
9070b57cec5SDimitry Andric assert(Stack.back().Ctx == Object);
9080b57cec5SDimitry Andric }
9090b57cec5SDimitry Andric
rawValueBegin()910e8d8bef9SDimitry Andric raw_ostream &llvm::json::OStream::rawValueBegin() {
911e8d8bef9SDimitry Andric valueBegin();
912e8d8bef9SDimitry Andric Stack.emplace_back();
913e8d8bef9SDimitry Andric Stack.back().Ctx = RawValue;
914e8d8bef9SDimitry Andric return OS;
915e8d8bef9SDimitry Andric }
916e8d8bef9SDimitry Andric
rawValueEnd()917e8d8bef9SDimitry Andric void llvm::json::OStream::rawValueEnd() {
918e8d8bef9SDimitry Andric assert(Stack.back().Ctx == RawValue);
919e8d8bef9SDimitry Andric Stack.pop_back();
920e8d8bef9SDimitry Andric }
921e8d8bef9SDimitry Andric
9220b57cec5SDimitry Andric } // namespace json
9230b57cec5SDimitry Andric } // namespace llvm
9240b57cec5SDimitry Andric
format(const llvm::json::Value & E,raw_ostream & OS,StringRef Options)9250b57cec5SDimitry Andric void llvm::format_provider<llvm::json::Value>::format(
9260b57cec5SDimitry Andric const llvm::json::Value &E, raw_ostream &OS, StringRef Options) {
9270b57cec5SDimitry Andric unsigned IndentAmount = 0;
9280b57cec5SDimitry Andric if (!Options.empty() && Options.getAsInteger(/*Radix=*/10, IndentAmount))
9290b57cec5SDimitry Andric llvm_unreachable("json::Value format options should be an integer");
9300b57cec5SDimitry Andric json::OStream(OS, IndentAmount).value(E);
9310b57cec5SDimitry Andric }
9320b57cec5SDimitry Andric
933