10b57cec5SDimitry Andric //=== JSON.cpp - JSON value, parsing and serialization - C++ -----------*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "llvm/Support/JSON.h"
10e8d8bef9SDimitry Andric #include "llvm/ADT/STLExtras.h"
11*fe013be4SDimitry Andric #include "llvm/ADT/StringExtras.h"
120b57cec5SDimitry Andric #include "llvm/Support/ConvertUTF.h"
13e8d8bef9SDimitry Andric #include "llvm/Support/Error.h"
140b57cec5SDimitry Andric #include "llvm/Support/Format.h"
1504eeddc0SDimitry Andric #include "llvm/Support/NativeFormatting.h"
16*fe013be4SDimitry Andric #include "llvm/Support/raw_ostream.h"
170b57cec5SDimitry Andric #include <cctype>
18*fe013be4SDimitry Andric #include <cerrno>
19bdd1243dSDimitry Andric #include <optional>
200b57cec5SDimitry Andric 
210b57cec5SDimitry Andric namespace llvm {
220b57cec5SDimitry Andric namespace json {
230b57cec5SDimitry Andric 
operator [](const ObjectKey & K)240b57cec5SDimitry Andric Value &Object::operator[](const ObjectKey &K) {
250b57cec5SDimitry Andric   return try_emplace(K, nullptr).first->getSecond();
260b57cec5SDimitry Andric }
operator [](ObjectKey && K)270b57cec5SDimitry Andric Value &Object::operator[](ObjectKey &&K) {
280b57cec5SDimitry Andric   return try_emplace(std::move(K), nullptr).first->getSecond();
290b57cec5SDimitry Andric }
get(StringRef K)300b57cec5SDimitry Andric Value *Object::get(StringRef K) {
310b57cec5SDimitry Andric   auto I = find(K);
320b57cec5SDimitry Andric   if (I == end())
330b57cec5SDimitry Andric     return nullptr;
340b57cec5SDimitry Andric   return &I->second;
350b57cec5SDimitry Andric }
get(StringRef K) const360b57cec5SDimitry Andric const Value *Object::get(StringRef K) const {
370b57cec5SDimitry Andric   auto I = find(K);
380b57cec5SDimitry Andric   if (I == end())
390b57cec5SDimitry Andric     return nullptr;
400b57cec5SDimitry Andric   return &I->second;
410b57cec5SDimitry Andric }
getNull(StringRef K) const42bdd1243dSDimitry Andric std::optional<std::nullptr_t> Object::getNull(StringRef K) const {
430b57cec5SDimitry Andric   if (auto *V = get(K))
440b57cec5SDimitry Andric     return V->getAsNull();
45bdd1243dSDimitry Andric   return std::nullopt;
460b57cec5SDimitry Andric }
getBoolean(StringRef K) const47bdd1243dSDimitry Andric std::optional<bool> Object::getBoolean(StringRef K) const {
480b57cec5SDimitry Andric   if (auto *V = get(K))
490b57cec5SDimitry Andric     return V->getAsBoolean();
50bdd1243dSDimitry Andric   return std::nullopt;
510b57cec5SDimitry Andric }
getNumber(StringRef K) const52bdd1243dSDimitry Andric std::optional<double> Object::getNumber(StringRef K) const {
530b57cec5SDimitry Andric   if (auto *V = get(K))
540b57cec5SDimitry Andric     return V->getAsNumber();
55bdd1243dSDimitry Andric   return std::nullopt;
560b57cec5SDimitry Andric }
getInteger(StringRef K) const57bdd1243dSDimitry Andric std::optional<int64_t> Object::getInteger(StringRef K) const {
580b57cec5SDimitry Andric   if (auto *V = get(K))
590b57cec5SDimitry Andric     return V->getAsInteger();
60bdd1243dSDimitry Andric   return std::nullopt;
610b57cec5SDimitry Andric }
getString(StringRef K) const62bdd1243dSDimitry Andric std::optional<llvm::StringRef> Object::getString(StringRef K) const {
630b57cec5SDimitry Andric   if (auto *V = get(K))
640b57cec5SDimitry Andric     return V->getAsString();
65bdd1243dSDimitry Andric   return std::nullopt;
660b57cec5SDimitry Andric }
getObject(StringRef K) const670b57cec5SDimitry Andric const json::Object *Object::getObject(StringRef K) const {
680b57cec5SDimitry Andric   if (auto *V = get(K))
690b57cec5SDimitry Andric     return V->getAsObject();
700b57cec5SDimitry Andric   return nullptr;
710b57cec5SDimitry Andric }
getObject(StringRef K)720b57cec5SDimitry Andric json::Object *Object::getObject(StringRef K) {
730b57cec5SDimitry Andric   if (auto *V = get(K))
740b57cec5SDimitry Andric     return V->getAsObject();
750b57cec5SDimitry Andric   return nullptr;
760b57cec5SDimitry Andric }
getArray(StringRef K) const770b57cec5SDimitry Andric const json::Array *Object::getArray(StringRef K) const {
780b57cec5SDimitry Andric   if (auto *V = get(K))
790b57cec5SDimitry Andric     return V->getAsArray();
800b57cec5SDimitry Andric   return nullptr;
810b57cec5SDimitry Andric }
getArray(StringRef K)820b57cec5SDimitry Andric json::Array *Object::getArray(StringRef K) {
830b57cec5SDimitry Andric   if (auto *V = get(K))
840b57cec5SDimitry Andric     return V->getAsArray();
850b57cec5SDimitry Andric   return nullptr;
860b57cec5SDimitry Andric }
operator ==(const Object & LHS,const Object & RHS)870b57cec5SDimitry Andric bool operator==(const Object &LHS, const Object &RHS) {
880b57cec5SDimitry Andric   if (LHS.size() != RHS.size())
890b57cec5SDimitry Andric     return false;
900b57cec5SDimitry Andric   for (const auto &L : LHS) {
910b57cec5SDimitry Andric     auto R = RHS.find(L.first);
920b57cec5SDimitry Andric     if (R == RHS.end() || L.second != R->second)
930b57cec5SDimitry Andric       return false;
940b57cec5SDimitry Andric   }
950b57cec5SDimitry Andric   return true;
960b57cec5SDimitry Andric }
970b57cec5SDimitry Andric 
Array(std::initializer_list<Value> Elements)980b57cec5SDimitry Andric Array::Array(std::initializer_list<Value> Elements) {
990b57cec5SDimitry Andric   V.reserve(Elements.size());
1000b57cec5SDimitry Andric   for (const Value &V : Elements) {
1010b57cec5SDimitry Andric     emplace_back(nullptr);
1020b57cec5SDimitry Andric     back().moveFrom(std::move(V));
1030b57cec5SDimitry Andric   }
1040b57cec5SDimitry Andric }
1050b57cec5SDimitry Andric 
Value(std::initializer_list<Value> Elements)1060b57cec5SDimitry Andric Value::Value(std::initializer_list<Value> Elements)
1070b57cec5SDimitry Andric     : Value(json::Array(Elements)) {}
1080b57cec5SDimitry Andric 
copyFrom(const Value & M)1090b57cec5SDimitry Andric void Value::copyFrom(const Value &M) {
1100b57cec5SDimitry Andric   Type = M.Type;
1110b57cec5SDimitry Andric   switch (Type) {
1120b57cec5SDimitry Andric   case T_Null:
1130b57cec5SDimitry Andric   case T_Boolean:
1140b57cec5SDimitry Andric   case T_Double:
1150b57cec5SDimitry Andric   case T_Integer:
116349cc55cSDimitry Andric   case T_UINT64:
117e8d8bef9SDimitry Andric     memcpy(&Union, &M.Union, sizeof(Union));
1180b57cec5SDimitry Andric     break;
1190b57cec5SDimitry Andric   case T_StringRef:
1200b57cec5SDimitry Andric     create<StringRef>(M.as<StringRef>());
1210b57cec5SDimitry Andric     break;
1220b57cec5SDimitry Andric   case T_String:
1230b57cec5SDimitry Andric     create<std::string>(M.as<std::string>());
1240b57cec5SDimitry Andric     break;
1250b57cec5SDimitry Andric   case T_Object:
1260b57cec5SDimitry Andric     create<json::Object>(M.as<json::Object>());
1270b57cec5SDimitry Andric     break;
1280b57cec5SDimitry Andric   case T_Array:
1290b57cec5SDimitry Andric     create<json::Array>(M.as<json::Array>());
1300b57cec5SDimitry Andric     break;
1310b57cec5SDimitry Andric   }
1320b57cec5SDimitry Andric }
1330b57cec5SDimitry Andric 
moveFrom(const Value && M)1340b57cec5SDimitry Andric void Value::moveFrom(const Value &&M) {
1350b57cec5SDimitry Andric   Type = M.Type;
1360b57cec5SDimitry Andric   switch (Type) {
1370b57cec5SDimitry Andric   case T_Null:
1380b57cec5SDimitry Andric   case T_Boolean:
1390b57cec5SDimitry Andric   case T_Double:
1400b57cec5SDimitry Andric   case T_Integer:
141349cc55cSDimitry Andric   case T_UINT64:
142e8d8bef9SDimitry Andric     memcpy(&Union, &M.Union, sizeof(Union));
1430b57cec5SDimitry Andric     break;
1440b57cec5SDimitry Andric   case T_StringRef:
1450b57cec5SDimitry Andric     create<StringRef>(M.as<StringRef>());
1460b57cec5SDimitry Andric     break;
1470b57cec5SDimitry Andric   case T_String:
1480b57cec5SDimitry Andric     create<std::string>(std::move(M.as<std::string>()));
1490b57cec5SDimitry Andric     M.Type = T_Null;
1500b57cec5SDimitry Andric     break;
1510b57cec5SDimitry Andric   case T_Object:
1520b57cec5SDimitry Andric     create<json::Object>(std::move(M.as<json::Object>()));
1530b57cec5SDimitry Andric     M.Type = T_Null;
1540b57cec5SDimitry Andric     break;
1550b57cec5SDimitry Andric   case T_Array:
1560b57cec5SDimitry Andric     create<json::Array>(std::move(M.as<json::Array>()));
1570b57cec5SDimitry Andric     M.Type = T_Null;
1580b57cec5SDimitry Andric     break;
1590b57cec5SDimitry Andric   }
1600b57cec5SDimitry Andric }
1610b57cec5SDimitry Andric 
destroy()1620b57cec5SDimitry Andric void Value::destroy() {
1630b57cec5SDimitry Andric   switch (Type) {
1640b57cec5SDimitry Andric   case T_Null:
1650b57cec5SDimitry Andric   case T_Boolean:
1660b57cec5SDimitry Andric   case T_Double:
1670b57cec5SDimitry Andric   case T_Integer:
168349cc55cSDimitry Andric   case T_UINT64:
1690b57cec5SDimitry Andric     break;
1700b57cec5SDimitry Andric   case T_StringRef:
1710b57cec5SDimitry Andric     as<StringRef>().~StringRef();
1720b57cec5SDimitry Andric     break;
1730b57cec5SDimitry Andric   case T_String:
1740b57cec5SDimitry Andric     as<std::string>().~basic_string();
1750b57cec5SDimitry Andric     break;
1760b57cec5SDimitry Andric   case T_Object:
1770b57cec5SDimitry Andric     as<json::Object>().~Object();
1780b57cec5SDimitry Andric     break;
1790b57cec5SDimitry Andric   case T_Array:
1800b57cec5SDimitry Andric     as<json::Array>().~Array();
1810b57cec5SDimitry Andric     break;
1820b57cec5SDimitry Andric   }
1830b57cec5SDimitry Andric }
1840b57cec5SDimitry Andric 
operator ==(const Value & L,const Value & R)1850b57cec5SDimitry Andric bool operator==(const Value &L, const Value &R) {
1860b57cec5SDimitry Andric   if (L.kind() != R.kind())
1870b57cec5SDimitry Andric     return false;
1880b57cec5SDimitry Andric   switch (L.kind()) {
1890b57cec5SDimitry Andric   case Value::Null:
1900b57cec5SDimitry Andric     return *L.getAsNull() == *R.getAsNull();
1910b57cec5SDimitry Andric   case Value::Boolean:
1920b57cec5SDimitry Andric     return *L.getAsBoolean() == *R.getAsBoolean();
1930b57cec5SDimitry Andric   case Value::Number:
1940b57cec5SDimitry Andric     // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
1950b57cec5SDimitry Andric     // The same integer must convert to the same double, per the standard.
1960b57cec5SDimitry Andric     // However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32.
1970b57cec5SDimitry Andric     // So we avoid floating point promotion for exact comparisons.
1980b57cec5SDimitry Andric     if (L.Type == Value::T_Integer || R.Type == Value::T_Integer)
1990b57cec5SDimitry Andric       return L.getAsInteger() == R.getAsInteger();
2000b57cec5SDimitry Andric     return *L.getAsNumber() == *R.getAsNumber();
2010b57cec5SDimitry Andric   case Value::String:
2020b57cec5SDimitry Andric     return *L.getAsString() == *R.getAsString();
2030b57cec5SDimitry Andric   case Value::Array:
2040b57cec5SDimitry Andric     return *L.getAsArray() == *R.getAsArray();
2050b57cec5SDimitry Andric   case Value::Object:
2060b57cec5SDimitry Andric     return *L.getAsObject() == *R.getAsObject();
2070b57cec5SDimitry Andric   }
2080b57cec5SDimitry Andric   llvm_unreachable("Unknown value kind");
2090b57cec5SDimitry Andric }
2100b57cec5SDimitry Andric 
report(llvm::StringLiteral Msg)211e8d8bef9SDimitry Andric void Path::report(llvm::StringLiteral Msg) {
212e8d8bef9SDimitry Andric   // Walk up to the root context, and count the number of segments.
213e8d8bef9SDimitry Andric   unsigned Count = 0;
214e8d8bef9SDimitry Andric   const Path *P;
215e8d8bef9SDimitry Andric   for (P = this; P->Parent != nullptr; P = P->Parent)
216e8d8bef9SDimitry Andric     ++Count;
217e8d8bef9SDimitry Andric   Path::Root *R = P->Seg.root();
218e8d8bef9SDimitry Andric   // Fill in the error message and copy the path (in reverse order).
219e8d8bef9SDimitry Andric   R->ErrorMessage = Msg;
220e8d8bef9SDimitry Andric   R->ErrorPath.resize(Count);
221e8d8bef9SDimitry Andric   auto It = R->ErrorPath.begin();
222e8d8bef9SDimitry Andric   for (P = this; P->Parent != nullptr; P = P->Parent)
223e8d8bef9SDimitry Andric     *It++ = P->Seg;
224e8d8bef9SDimitry Andric }
225e8d8bef9SDimitry Andric 
getError() const226e8d8bef9SDimitry Andric Error Path::Root::getError() const {
227e8d8bef9SDimitry Andric   std::string S;
228e8d8bef9SDimitry Andric   raw_string_ostream OS(S);
229e8d8bef9SDimitry Andric   OS << (ErrorMessage.empty() ? "invalid JSON contents" : ErrorMessage);
230e8d8bef9SDimitry Andric   if (ErrorPath.empty()) {
231e8d8bef9SDimitry Andric     if (!Name.empty())
232e8d8bef9SDimitry Andric       OS << " when parsing " << Name;
233e8d8bef9SDimitry Andric   } else {
234e8d8bef9SDimitry Andric     OS << " at " << (Name.empty() ? "(root)" : Name);
235e8d8bef9SDimitry Andric     for (const Path::Segment &S : llvm::reverse(ErrorPath)) {
236e8d8bef9SDimitry Andric       if (S.isField())
237e8d8bef9SDimitry Andric         OS << '.' << S.field();
238e8d8bef9SDimitry Andric       else
239e8d8bef9SDimitry Andric         OS << '[' << S.index() << ']';
240e8d8bef9SDimitry Andric     }
241e8d8bef9SDimitry Andric   }
242e8d8bef9SDimitry Andric   return createStringError(llvm::inconvertibleErrorCode(), OS.str());
243e8d8bef9SDimitry Andric }
244e8d8bef9SDimitry Andric 
245e8d8bef9SDimitry Andric namespace {
246e8d8bef9SDimitry Andric 
sortedElements(const Object & O)247e8d8bef9SDimitry Andric std::vector<const Object::value_type *> sortedElements(const Object &O) {
248e8d8bef9SDimitry Andric   std::vector<const Object::value_type *> Elements;
249e8d8bef9SDimitry Andric   for (const auto &E : O)
250e8d8bef9SDimitry Andric     Elements.push_back(&E);
251e8d8bef9SDimitry Andric   llvm::sort(Elements,
252e8d8bef9SDimitry Andric              [](const Object::value_type *L, const Object::value_type *R) {
253e8d8bef9SDimitry Andric                return L->first < R->first;
254e8d8bef9SDimitry Andric              });
255e8d8bef9SDimitry Andric   return Elements;
256e8d8bef9SDimitry Andric }
257e8d8bef9SDimitry Andric 
258e8d8bef9SDimitry Andric // Prints a one-line version of a value that isn't our main focus.
259e8d8bef9SDimitry Andric // We interleave writes to OS and JOS, exploiting the lack of extra buffering.
260e8d8bef9SDimitry Andric // This is OK as we own the implementation.
abbreviate(const Value & V,OStream & JOS)261e8d8bef9SDimitry Andric void abbreviate(const Value &V, OStream &JOS) {
262e8d8bef9SDimitry Andric   switch (V.kind()) {
263e8d8bef9SDimitry Andric   case Value::Array:
264e8d8bef9SDimitry Andric     JOS.rawValue(V.getAsArray()->empty() ? "[]" : "[ ... ]");
265e8d8bef9SDimitry Andric     break;
266e8d8bef9SDimitry Andric   case Value::Object:
267e8d8bef9SDimitry Andric     JOS.rawValue(V.getAsObject()->empty() ? "{}" : "{ ... }");
268e8d8bef9SDimitry Andric     break;
269e8d8bef9SDimitry Andric   case Value::String: {
270e8d8bef9SDimitry Andric     llvm::StringRef S = *V.getAsString();
271e8d8bef9SDimitry Andric     if (S.size() < 40) {
272e8d8bef9SDimitry Andric       JOS.value(V);
273e8d8bef9SDimitry Andric     } else {
274e8d8bef9SDimitry Andric       std::string Truncated = fixUTF8(S.take_front(37));
275e8d8bef9SDimitry Andric       Truncated.append("...");
276e8d8bef9SDimitry Andric       JOS.value(Truncated);
277e8d8bef9SDimitry Andric     }
278e8d8bef9SDimitry Andric     break;
279e8d8bef9SDimitry Andric   }
280e8d8bef9SDimitry Andric   default:
281e8d8bef9SDimitry Andric     JOS.value(V);
282e8d8bef9SDimitry Andric   }
283e8d8bef9SDimitry Andric }
284e8d8bef9SDimitry Andric 
285e8d8bef9SDimitry Andric // Prints a semi-expanded version of a value that is our main focus.
286e8d8bef9SDimitry Andric // Array/Object entries are printed, but not recursively as they may be huge.
abbreviateChildren(const Value & V,OStream & JOS)287e8d8bef9SDimitry Andric void abbreviateChildren(const Value &V, OStream &JOS) {
288e8d8bef9SDimitry Andric   switch (V.kind()) {
289e8d8bef9SDimitry Andric   case Value::Array:
290e8d8bef9SDimitry Andric     JOS.array([&] {
291e8d8bef9SDimitry Andric       for (const auto &I : *V.getAsArray())
292e8d8bef9SDimitry Andric         abbreviate(I, JOS);
293e8d8bef9SDimitry Andric     });
294e8d8bef9SDimitry Andric     break;
295e8d8bef9SDimitry Andric   case Value::Object:
296e8d8bef9SDimitry Andric     JOS.object([&] {
297e8d8bef9SDimitry Andric       for (const auto *KV : sortedElements(*V.getAsObject())) {
298e8d8bef9SDimitry Andric         JOS.attributeBegin(KV->first);
299e8d8bef9SDimitry Andric         abbreviate(KV->second, JOS);
300e8d8bef9SDimitry Andric         JOS.attributeEnd();
301e8d8bef9SDimitry Andric       }
302e8d8bef9SDimitry Andric     });
303e8d8bef9SDimitry Andric     break;
304e8d8bef9SDimitry Andric   default:
305e8d8bef9SDimitry Andric     JOS.value(V);
306e8d8bef9SDimitry Andric   }
307e8d8bef9SDimitry Andric }
308e8d8bef9SDimitry Andric 
309e8d8bef9SDimitry Andric } // namespace
310e8d8bef9SDimitry Andric 
printErrorContext(const Value & R,raw_ostream & OS) const311e8d8bef9SDimitry Andric void Path::Root::printErrorContext(const Value &R, raw_ostream &OS) const {
312e8d8bef9SDimitry Andric   OStream JOS(OS, /*IndentSize=*/2);
313e8d8bef9SDimitry Andric   // PrintValue recurses down the path, printing the ancestors of our target.
314e8d8bef9SDimitry Andric   // Siblings of nodes along the path are printed with abbreviate(), and the
315e8d8bef9SDimitry Andric   // target itself is printed with the somewhat richer abbreviateChildren().
316e8d8bef9SDimitry Andric   // 'Recurse' is the lambda itself, to allow recursive calls.
317e8d8bef9SDimitry Andric   auto PrintValue = [&](const Value &V, ArrayRef<Segment> Path, auto &Recurse) {
318e8d8bef9SDimitry Andric     // Print the target node itself, with the error as a comment.
319e8d8bef9SDimitry Andric     // Also used if we can't follow our path, e.g. it names a field that
320e8d8bef9SDimitry Andric     // *should* exist but doesn't.
321e8d8bef9SDimitry Andric     auto HighlightCurrent = [&] {
322e8d8bef9SDimitry Andric       std::string Comment = "error: ";
323e8d8bef9SDimitry Andric       Comment.append(ErrorMessage.data(), ErrorMessage.size());
324e8d8bef9SDimitry Andric       JOS.comment(Comment);
325e8d8bef9SDimitry Andric       abbreviateChildren(V, JOS);
326e8d8bef9SDimitry Andric     };
327e8d8bef9SDimitry Andric     if (Path.empty()) // We reached our target.
328e8d8bef9SDimitry Andric       return HighlightCurrent();
329e8d8bef9SDimitry Andric     const Segment &S = Path.back(); // Path is in reverse order.
330e8d8bef9SDimitry Andric     if (S.isField()) {
331e8d8bef9SDimitry Andric       // Current node is an object, path names a field.
332e8d8bef9SDimitry Andric       llvm::StringRef FieldName = S.field();
333e8d8bef9SDimitry Andric       const Object *O = V.getAsObject();
334e8d8bef9SDimitry Andric       if (!O || !O->get(FieldName))
335e8d8bef9SDimitry Andric         return HighlightCurrent();
336e8d8bef9SDimitry Andric       JOS.object([&] {
337e8d8bef9SDimitry Andric         for (const auto *KV : sortedElements(*O)) {
338e8d8bef9SDimitry Andric           JOS.attributeBegin(KV->first);
339e8d8bef9SDimitry Andric           if (FieldName.equals(KV->first))
340e8d8bef9SDimitry Andric             Recurse(KV->second, Path.drop_back(), Recurse);
341e8d8bef9SDimitry Andric           else
342e8d8bef9SDimitry Andric             abbreviate(KV->second, JOS);
343e8d8bef9SDimitry Andric           JOS.attributeEnd();
344e8d8bef9SDimitry Andric         }
345e8d8bef9SDimitry Andric       });
346e8d8bef9SDimitry Andric     } else {
347e8d8bef9SDimitry Andric       // Current node is an array, path names an element.
348e8d8bef9SDimitry Andric       const Array *A = V.getAsArray();
349e8d8bef9SDimitry Andric       if (!A || S.index() >= A->size())
350e8d8bef9SDimitry Andric         return HighlightCurrent();
351e8d8bef9SDimitry Andric       JOS.array([&] {
352e8d8bef9SDimitry Andric         unsigned Current = 0;
353e8d8bef9SDimitry Andric         for (const auto &V : *A) {
354e8d8bef9SDimitry Andric           if (Current++ == S.index())
355e8d8bef9SDimitry Andric             Recurse(V, Path.drop_back(), Recurse);
356e8d8bef9SDimitry Andric           else
357e8d8bef9SDimitry Andric             abbreviate(V, JOS);
358e8d8bef9SDimitry Andric         }
359e8d8bef9SDimitry Andric       });
360e8d8bef9SDimitry Andric     }
361e8d8bef9SDimitry Andric   };
362e8d8bef9SDimitry Andric   PrintValue(R, ErrorPath, PrintValue);
363e8d8bef9SDimitry Andric }
364e8d8bef9SDimitry Andric 
3650b57cec5SDimitry Andric namespace {
3660b57cec5SDimitry Andric // Simple recursive-descent JSON parser.
3670b57cec5SDimitry Andric class Parser {
3680b57cec5SDimitry Andric public:
Parser(StringRef JSON)3690b57cec5SDimitry Andric   Parser(StringRef JSON)
3700b57cec5SDimitry Andric       : Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {}
3710b57cec5SDimitry Andric 
checkUTF8()3720b57cec5SDimitry Andric   bool checkUTF8() {
3730b57cec5SDimitry Andric     size_t ErrOffset;
3740b57cec5SDimitry Andric     if (isUTF8(StringRef(Start, End - Start), &ErrOffset))
3750b57cec5SDimitry Andric       return true;
3760b57cec5SDimitry Andric     P = Start + ErrOffset; // For line/column calculation.
3770b57cec5SDimitry Andric     return parseError("Invalid UTF-8 sequence");
3780b57cec5SDimitry Andric   }
3790b57cec5SDimitry Andric 
3800b57cec5SDimitry Andric   bool parseValue(Value &Out);
3810b57cec5SDimitry Andric 
assertEnd()3820b57cec5SDimitry Andric   bool assertEnd() {
3830b57cec5SDimitry Andric     eatWhitespace();
3840b57cec5SDimitry Andric     if (P == End)
3850b57cec5SDimitry Andric       return true;
3860b57cec5SDimitry Andric     return parseError("Text after end of document");
3870b57cec5SDimitry Andric   }
3880b57cec5SDimitry Andric 
takeError()3890b57cec5SDimitry Andric   Error takeError() {
3900b57cec5SDimitry Andric     assert(Err);
3910b57cec5SDimitry Andric     return std::move(*Err);
3920b57cec5SDimitry Andric   }
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric private:
eatWhitespace()3950b57cec5SDimitry Andric   void eatWhitespace() {
3960b57cec5SDimitry Andric     while (P != End && (*P == ' ' || *P == '\r' || *P == '\n' || *P == '\t'))
3970b57cec5SDimitry Andric       ++P;
3980b57cec5SDimitry Andric   }
3990b57cec5SDimitry Andric 
4000b57cec5SDimitry Andric   // On invalid syntax, parseX() functions return false and set Err.
4010b57cec5SDimitry Andric   bool parseNumber(char First, Value &Out);
4020b57cec5SDimitry Andric   bool parseString(std::string &Out);
4030b57cec5SDimitry Andric   bool parseUnicode(std::string &Out);
4040b57cec5SDimitry Andric   bool parseError(const char *Msg); // always returns false
4050b57cec5SDimitry Andric 
next()4060b57cec5SDimitry Andric   char next() { return P == End ? 0 : *P++; }
peek()4070b57cec5SDimitry Andric   char peek() { return P == End ? 0 : *P; }
isNumber(char C)4080b57cec5SDimitry Andric   static bool isNumber(char C) {
4090b57cec5SDimitry Andric     return C == '0' || C == '1' || C == '2' || C == '3' || C == '4' ||
4100b57cec5SDimitry Andric            C == '5' || C == '6' || C == '7' || C == '8' || C == '9' ||
4110b57cec5SDimitry Andric            C == 'e' || C == 'E' || C == '+' || C == '-' || C == '.';
4120b57cec5SDimitry Andric   }
4130b57cec5SDimitry Andric 
414bdd1243dSDimitry Andric   std::optional<Error> Err;
4150b57cec5SDimitry Andric   const char *Start, *P, *End;
4160b57cec5SDimitry Andric };
4170b57cec5SDimitry Andric 
parseValue(Value & Out)4180b57cec5SDimitry Andric bool Parser::parseValue(Value &Out) {
4190b57cec5SDimitry Andric   eatWhitespace();
4200b57cec5SDimitry Andric   if (P == End)
4210b57cec5SDimitry Andric     return parseError("Unexpected EOF");
4220b57cec5SDimitry Andric   switch (char C = next()) {
4230b57cec5SDimitry Andric   // Bare null/true/false are easy - first char identifies them.
4240b57cec5SDimitry Andric   case 'n':
4250b57cec5SDimitry Andric     Out = nullptr;
4260b57cec5SDimitry Andric     return (next() == 'u' && next() == 'l' && next() == 'l') ||
4270b57cec5SDimitry Andric            parseError("Invalid JSON value (null?)");
4280b57cec5SDimitry Andric   case 't':
4290b57cec5SDimitry Andric     Out = true;
4300b57cec5SDimitry Andric     return (next() == 'r' && next() == 'u' && next() == 'e') ||
4310b57cec5SDimitry Andric            parseError("Invalid JSON value (true?)");
4320b57cec5SDimitry Andric   case 'f':
4330b57cec5SDimitry Andric     Out = false;
4340b57cec5SDimitry Andric     return (next() == 'a' && next() == 'l' && next() == 's' && next() == 'e') ||
4350b57cec5SDimitry Andric            parseError("Invalid JSON value (false?)");
4360b57cec5SDimitry Andric   case '"': {
4370b57cec5SDimitry Andric     std::string S;
4380b57cec5SDimitry Andric     if (parseString(S)) {
4390b57cec5SDimitry Andric       Out = std::move(S);
4400b57cec5SDimitry Andric       return true;
4410b57cec5SDimitry Andric     }
4420b57cec5SDimitry Andric     return false;
4430b57cec5SDimitry Andric   }
4440b57cec5SDimitry Andric   case '[': {
4450b57cec5SDimitry Andric     Out = Array{};
4460b57cec5SDimitry Andric     Array &A = *Out.getAsArray();
4470b57cec5SDimitry Andric     eatWhitespace();
4480b57cec5SDimitry Andric     if (peek() == ']') {
4490b57cec5SDimitry Andric       ++P;
4500b57cec5SDimitry Andric       return true;
4510b57cec5SDimitry Andric     }
4520b57cec5SDimitry Andric     for (;;) {
4530b57cec5SDimitry Andric       A.emplace_back(nullptr);
4540b57cec5SDimitry Andric       if (!parseValue(A.back()))
4550b57cec5SDimitry Andric         return false;
4560b57cec5SDimitry Andric       eatWhitespace();
4570b57cec5SDimitry Andric       switch (next()) {
4580b57cec5SDimitry Andric       case ',':
4590b57cec5SDimitry Andric         eatWhitespace();
4600b57cec5SDimitry Andric         continue;
4610b57cec5SDimitry Andric       case ']':
4620b57cec5SDimitry Andric         return true;
4630b57cec5SDimitry Andric       default:
4640b57cec5SDimitry Andric         return parseError("Expected , or ] after array element");
4650b57cec5SDimitry Andric       }
4660b57cec5SDimitry Andric     }
4670b57cec5SDimitry Andric   }
4680b57cec5SDimitry Andric   case '{': {
4690b57cec5SDimitry Andric     Out = Object{};
4700b57cec5SDimitry Andric     Object &O = *Out.getAsObject();
4710b57cec5SDimitry Andric     eatWhitespace();
4720b57cec5SDimitry Andric     if (peek() == '}') {
4730b57cec5SDimitry Andric       ++P;
4740b57cec5SDimitry Andric       return true;
4750b57cec5SDimitry Andric     }
4760b57cec5SDimitry Andric     for (;;) {
4770b57cec5SDimitry Andric       if (next() != '"')
4780b57cec5SDimitry Andric         return parseError("Expected object key");
4790b57cec5SDimitry Andric       std::string K;
4800b57cec5SDimitry Andric       if (!parseString(K))
4810b57cec5SDimitry Andric         return false;
4820b57cec5SDimitry Andric       eatWhitespace();
4830b57cec5SDimitry Andric       if (next() != ':')
4840b57cec5SDimitry Andric         return parseError("Expected : after object key");
4850b57cec5SDimitry Andric       eatWhitespace();
4860b57cec5SDimitry Andric       if (!parseValue(O[std::move(K)]))
4870b57cec5SDimitry Andric         return false;
4880b57cec5SDimitry Andric       eatWhitespace();
4890b57cec5SDimitry Andric       switch (next()) {
4900b57cec5SDimitry Andric       case ',':
4910b57cec5SDimitry Andric         eatWhitespace();
4920b57cec5SDimitry Andric         continue;
4930b57cec5SDimitry Andric       case '}':
4940b57cec5SDimitry Andric         return true;
4950b57cec5SDimitry Andric       default:
4960b57cec5SDimitry Andric         return parseError("Expected , or } after object property");
4970b57cec5SDimitry Andric       }
4980b57cec5SDimitry Andric     }
4990b57cec5SDimitry Andric   }
5000b57cec5SDimitry Andric   default:
5010b57cec5SDimitry Andric     if (isNumber(C))
5020b57cec5SDimitry Andric       return parseNumber(C, Out);
5030b57cec5SDimitry Andric     return parseError("Invalid JSON value");
5040b57cec5SDimitry Andric   }
5050b57cec5SDimitry Andric }
5060b57cec5SDimitry Andric 
parseNumber(char First,Value & Out)5070b57cec5SDimitry Andric bool Parser::parseNumber(char First, Value &Out) {
5080b57cec5SDimitry Andric   // Read the number into a string. (Must be null-terminated for strto*).
5090b57cec5SDimitry Andric   SmallString<24> S;
5100b57cec5SDimitry Andric   S.push_back(First);
5110b57cec5SDimitry Andric   while (isNumber(peek()))
5120b57cec5SDimitry Andric     S.push_back(next());
5130b57cec5SDimitry Andric   char *End;
5140b57cec5SDimitry Andric   // Try first to parse as integer, and if so preserve full 64 bits.
51581ad6265SDimitry Andric   // We check for errno for out of bounds errors and for End == S.end()
51681ad6265SDimitry Andric   // to make sure that the numeric string is not malformed.
51781ad6265SDimitry Andric   errno = 0;
51881ad6265SDimitry Andric   int64_t I = std::strtoll(S.c_str(), &End, 10);
51981ad6265SDimitry Andric   if (End == S.end() && errno != ERANGE) {
5200b57cec5SDimitry Andric     Out = int64_t(I);
5210b57cec5SDimitry Andric     return true;
5220b57cec5SDimitry Andric   }
52381ad6265SDimitry Andric   // strtroull has a special handling for negative numbers, but in this
52481ad6265SDimitry Andric   // case we don't want to do that because negative numbers were already
52581ad6265SDimitry Andric   // handled in the previous block.
52681ad6265SDimitry Andric   if (First != '-') {
52781ad6265SDimitry Andric     errno = 0;
52881ad6265SDimitry Andric     uint64_t UI = std::strtoull(S.c_str(), &End, 10);
52981ad6265SDimitry Andric     if (End == S.end() && errno != ERANGE) {
53081ad6265SDimitry Andric       Out = UI;
53181ad6265SDimitry Andric       return true;
53281ad6265SDimitry Andric     }
53381ad6265SDimitry Andric   }
5340b57cec5SDimitry Andric   // If it's not an integer
5350b57cec5SDimitry Andric   Out = std::strtod(S.c_str(), &End);
5360b57cec5SDimitry Andric   return End == S.end() || parseError("Invalid JSON value (number?)");
5370b57cec5SDimitry Andric }
5380b57cec5SDimitry Andric 
parseString(std::string & Out)5390b57cec5SDimitry Andric bool Parser::parseString(std::string &Out) {
5400b57cec5SDimitry Andric   // leading quote was already consumed.
5410b57cec5SDimitry Andric   for (char C = next(); C != '"'; C = next()) {
5420b57cec5SDimitry Andric     if (LLVM_UNLIKELY(P == End))
5430b57cec5SDimitry Andric       return parseError("Unterminated string");
5440b57cec5SDimitry Andric     if (LLVM_UNLIKELY((C & 0x1f) == C))
5450b57cec5SDimitry Andric       return parseError("Control character in string");
5460b57cec5SDimitry Andric     if (LLVM_LIKELY(C != '\\')) {
5470b57cec5SDimitry Andric       Out.push_back(C);
5480b57cec5SDimitry Andric       continue;
5490b57cec5SDimitry Andric     }
5500b57cec5SDimitry Andric     // Handle escape sequence.
5510b57cec5SDimitry Andric     switch (C = next()) {
5520b57cec5SDimitry Andric     case '"':
5530b57cec5SDimitry Andric     case '\\':
5540b57cec5SDimitry Andric     case '/':
5550b57cec5SDimitry Andric       Out.push_back(C);
5560b57cec5SDimitry Andric       break;
5570b57cec5SDimitry Andric     case 'b':
5580b57cec5SDimitry Andric       Out.push_back('\b');
5590b57cec5SDimitry Andric       break;
5600b57cec5SDimitry Andric     case 'f':
5610b57cec5SDimitry Andric       Out.push_back('\f');
5620b57cec5SDimitry Andric       break;
5630b57cec5SDimitry Andric     case 'n':
5640b57cec5SDimitry Andric       Out.push_back('\n');
5650b57cec5SDimitry Andric       break;
5660b57cec5SDimitry Andric     case 'r':
5670b57cec5SDimitry Andric       Out.push_back('\r');
5680b57cec5SDimitry Andric       break;
5690b57cec5SDimitry Andric     case 't':
5700b57cec5SDimitry Andric       Out.push_back('\t');
5710b57cec5SDimitry Andric       break;
5720b57cec5SDimitry Andric     case 'u':
5730b57cec5SDimitry Andric       if (!parseUnicode(Out))
5740b57cec5SDimitry Andric         return false;
5750b57cec5SDimitry Andric       break;
5760b57cec5SDimitry Andric     default:
5770b57cec5SDimitry Andric       return parseError("Invalid escape sequence");
5780b57cec5SDimitry Andric     }
5790b57cec5SDimitry Andric   }
5800b57cec5SDimitry Andric   return true;
5810b57cec5SDimitry Andric }
5820b57cec5SDimitry Andric 
encodeUtf8(uint32_t Rune,std::string & Out)5830b57cec5SDimitry Andric static void encodeUtf8(uint32_t Rune, std::string &Out) {
5840b57cec5SDimitry Andric   if (Rune < 0x80) {
5850b57cec5SDimitry Andric     Out.push_back(Rune & 0x7F);
5860b57cec5SDimitry Andric   } else if (Rune < 0x800) {
5870b57cec5SDimitry Andric     uint8_t FirstByte = 0xC0 | ((Rune & 0x7C0) >> 6);
5880b57cec5SDimitry Andric     uint8_t SecondByte = 0x80 | (Rune & 0x3F);
5890b57cec5SDimitry Andric     Out.push_back(FirstByte);
5900b57cec5SDimitry Andric     Out.push_back(SecondByte);
5910b57cec5SDimitry Andric   } else if (Rune < 0x10000) {
5920b57cec5SDimitry Andric     uint8_t FirstByte = 0xE0 | ((Rune & 0xF000) >> 12);
5930b57cec5SDimitry Andric     uint8_t SecondByte = 0x80 | ((Rune & 0xFC0) >> 6);
5940b57cec5SDimitry Andric     uint8_t ThirdByte = 0x80 | (Rune & 0x3F);
5950b57cec5SDimitry Andric     Out.push_back(FirstByte);
5960b57cec5SDimitry Andric     Out.push_back(SecondByte);
5970b57cec5SDimitry Andric     Out.push_back(ThirdByte);
5980b57cec5SDimitry Andric   } else if (Rune < 0x110000) {
5990b57cec5SDimitry Andric     uint8_t FirstByte = 0xF0 | ((Rune & 0x1F0000) >> 18);
6000b57cec5SDimitry Andric     uint8_t SecondByte = 0x80 | ((Rune & 0x3F000) >> 12);
6010b57cec5SDimitry Andric     uint8_t ThirdByte = 0x80 | ((Rune & 0xFC0) >> 6);
6020b57cec5SDimitry Andric     uint8_t FourthByte = 0x80 | (Rune & 0x3F);
6030b57cec5SDimitry Andric     Out.push_back(FirstByte);
6040b57cec5SDimitry Andric     Out.push_back(SecondByte);
6050b57cec5SDimitry Andric     Out.push_back(ThirdByte);
6060b57cec5SDimitry Andric     Out.push_back(FourthByte);
6070b57cec5SDimitry Andric   } else {
6080b57cec5SDimitry Andric     llvm_unreachable("Invalid codepoint");
6090b57cec5SDimitry Andric   }
6100b57cec5SDimitry Andric }
6110b57cec5SDimitry Andric 
6120b57cec5SDimitry Andric // Parse a UTF-16 \uNNNN escape sequence. "\u" has already been consumed.
6130b57cec5SDimitry Andric // May parse several sequential escapes to ensure proper surrogate handling.
6140b57cec5SDimitry Andric // We do not use ConvertUTF.h, it can't accept and replace unpaired surrogates.
6150b57cec5SDimitry Andric // These are invalid Unicode but valid JSON (RFC 8259, section 8.2).
parseUnicode(std::string & Out)6160b57cec5SDimitry Andric bool Parser::parseUnicode(std::string &Out) {
6170b57cec5SDimitry Andric   // Invalid UTF is not a JSON error (RFC 8529§8.2). It gets replaced by U+FFFD.
6180b57cec5SDimitry Andric   auto Invalid = [&] { Out.append(/* UTF-8 */ {'\xef', '\xbf', '\xbd'}); };
6190b57cec5SDimitry Andric   // Decodes 4 hex digits from the stream into Out, returns false on error.
6200b57cec5SDimitry Andric   auto Parse4Hex = [this](uint16_t &Out) -> bool {
6210b57cec5SDimitry Andric     Out = 0;
6220b57cec5SDimitry Andric     char Bytes[] = {next(), next(), next(), next()};
6230b57cec5SDimitry Andric     for (unsigned char C : Bytes) {
6240b57cec5SDimitry Andric       if (!std::isxdigit(C))
6250b57cec5SDimitry Andric         return parseError("Invalid \\u escape sequence");
6260b57cec5SDimitry Andric       Out <<= 4;
6270b57cec5SDimitry Andric       Out |= (C > '9') ? (C & ~0x20) - 'A' + 10 : (C - '0');
6280b57cec5SDimitry Andric     }
6290b57cec5SDimitry Andric     return true;
6300b57cec5SDimitry Andric   };
6310b57cec5SDimitry Andric   uint16_t First; // UTF-16 code unit from the first \u escape.
6320b57cec5SDimitry Andric   if (!Parse4Hex(First))
6330b57cec5SDimitry Andric     return false;
6340b57cec5SDimitry Andric 
6350b57cec5SDimitry Andric   // We loop to allow proper surrogate-pair error handling.
6360b57cec5SDimitry Andric   while (true) {
6370b57cec5SDimitry Andric     // Case 1: the UTF-16 code unit is already a codepoint in the BMP.
6380b57cec5SDimitry Andric     if (LLVM_LIKELY(First < 0xD800 || First >= 0xE000)) {
6390b57cec5SDimitry Andric       encodeUtf8(First, Out);
6400b57cec5SDimitry Andric       return true;
6410b57cec5SDimitry Andric     }
6420b57cec5SDimitry Andric 
6430b57cec5SDimitry Andric     // Case 2: it's an (unpaired) trailing surrogate.
6440b57cec5SDimitry Andric     if (LLVM_UNLIKELY(First >= 0xDC00)) {
6450b57cec5SDimitry Andric       Invalid();
6460b57cec5SDimitry Andric       return true;
6470b57cec5SDimitry Andric     }
6480b57cec5SDimitry Andric 
6490b57cec5SDimitry Andric     // Case 3: it's a leading surrogate. We expect a trailing one next.
6500b57cec5SDimitry Andric     // Case 3a: there's no trailing \u escape. Don't advance in the stream.
6510b57cec5SDimitry Andric     if (LLVM_UNLIKELY(P + 2 > End || *P != '\\' || *(P + 1) != 'u')) {
6520b57cec5SDimitry Andric       Invalid(); // Leading surrogate was unpaired.
6530b57cec5SDimitry Andric       return true;
6540b57cec5SDimitry Andric     }
6550b57cec5SDimitry Andric     P += 2;
6560b57cec5SDimitry Andric     uint16_t Second;
6570b57cec5SDimitry Andric     if (!Parse4Hex(Second))
6580b57cec5SDimitry Andric       return false;
6590b57cec5SDimitry Andric     // Case 3b: there was another \u escape, but it wasn't a trailing surrogate.
6600b57cec5SDimitry Andric     if (LLVM_UNLIKELY(Second < 0xDC00 || Second >= 0xE000)) {
6610b57cec5SDimitry Andric       Invalid();      // Leading surrogate was unpaired.
6620b57cec5SDimitry Andric       First = Second; // Second escape still needs to be processed.
6630b57cec5SDimitry Andric       continue;
6640b57cec5SDimitry Andric     }
6650b57cec5SDimitry Andric     // Case 3c: a valid surrogate pair encoding an astral codepoint.
6660b57cec5SDimitry Andric     encodeUtf8(0x10000 | ((First - 0xD800) << 10) | (Second - 0xDC00), Out);
6670b57cec5SDimitry Andric     return true;
6680b57cec5SDimitry Andric   }
6690b57cec5SDimitry Andric }
6700b57cec5SDimitry Andric 
parseError(const char * Msg)6710b57cec5SDimitry Andric bool Parser::parseError(const char *Msg) {
6720b57cec5SDimitry Andric   int Line = 1;
6730b57cec5SDimitry Andric   const char *StartOfLine = Start;
6740b57cec5SDimitry Andric   for (const char *X = Start; X < P; ++X) {
6750b57cec5SDimitry Andric     if (*X == 0x0A) {
6760b57cec5SDimitry Andric       ++Line;
6770b57cec5SDimitry Andric       StartOfLine = X + 1;
6780b57cec5SDimitry Andric     }
6790b57cec5SDimitry Andric   }
6800b57cec5SDimitry Andric   Err.emplace(
6818bcb0991SDimitry Andric       std::make_unique<ParseError>(Msg, Line, P - StartOfLine, P - Start));
6820b57cec5SDimitry Andric   return false;
6830b57cec5SDimitry Andric }
6840b57cec5SDimitry Andric } // namespace
6850b57cec5SDimitry Andric 
parse(StringRef JSON)6860b57cec5SDimitry Andric Expected<Value> parse(StringRef JSON) {
6870b57cec5SDimitry Andric   Parser P(JSON);
6880b57cec5SDimitry Andric   Value E = nullptr;
6890b57cec5SDimitry Andric   if (P.checkUTF8())
6900b57cec5SDimitry Andric     if (P.parseValue(E))
6910b57cec5SDimitry Andric       if (P.assertEnd())
6920b57cec5SDimitry Andric         return std::move(E);
6930b57cec5SDimitry Andric   return P.takeError();
6940b57cec5SDimitry Andric }
6950b57cec5SDimitry Andric char ParseError::ID = 0;
6960b57cec5SDimitry Andric 
isUTF8(llvm::StringRef S,size_t * ErrOffset)6970b57cec5SDimitry Andric bool isUTF8(llvm::StringRef S, size_t *ErrOffset) {
6980b57cec5SDimitry Andric   // Fast-path for ASCII, which is valid UTF-8.
6990b57cec5SDimitry Andric   if (LLVM_LIKELY(isASCII(S)))
7000b57cec5SDimitry Andric     return true;
7010b57cec5SDimitry Andric 
7020b57cec5SDimitry Andric   const UTF8 *Data = reinterpret_cast<const UTF8 *>(S.data()), *Rest = Data;
7030b57cec5SDimitry Andric   if (LLVM_LIKELY(isLegalUTF8String(&Rest, Data + S.size())))
7040b57cec5SDimitry Andric     return true;
7050b57cec5SDimitry Andric 
7060b57cec5SDimitry Andric   if (ErrOffset)
7070b57cec5SDimitry Andric     *ErrOffset = Rest - Data;
7080b57cec5SDimitry Andric   return false;
7090b57cec5SDimitry Andric }
7100b57cec5SDimitry Andric 
fixUTF8(llvm::StringRef S)7110b57cec5SDimitry Andric std::string fixUTF8(llvm::StringRef S) {
7120b57cec5SDimitry Andric   // This isn't particularly efficient, but is only for error-recovery.
7130b57cec5SDimitry Andric   std::vector<UTF32> Codepoints(S.size()); // 1 codepoint per byte suffices.
7140b57cec5SDimitry Andric   const UTF8 *In8 = reinterpret_cast<const UTF8 *>(S.data());
7150b57cec5SDimitry Andric   UTF32 *Out32 = Codepoints.data();
7160b57cec5SDimitry Andric   ConvertUTF8toUTF32(&In8, In8 + S.size(), &Out32, Out32 + Codepoints.size(),
7170b57cec5SDimitry Andric                      lenientConversion);
7180b57cec5SDimitry Andric   Codepoints.resize(Out32 - Codepoints.data());
7190b57cec5SDimitry Andric   std::string Res(4 * Codepoints.size(), 0); // 4 bytes per codepoint suffice
7200b57cec5SDimitry Andric   const UTF32 *In32 = Codepoints.data();
7210b57cec5SDimitry Andric   UTF8 *Out8 = reinterpret_cast<UTF8 *>(&Res[0]);
7220b57cec5SDimitry Andric   ConvertUTF32toUTF8(&In32, In32 + Codepoints.size(), &Out8, Out8 + Res.size(),
7230b57cec5SDimitry Andric                      strictConversion);
7240b57cec5SDimitry Andric   Res.resize(reinterpret_cast<char *>(Out8) - Res.data());
7250b57cec5SDimitry Andric   return Res;
7260b57cec5SDimitry Andric }
7270b57cec5SDimitry Andric 
quote(llvm::raw_ostream & OS,llvm::StringRef S)7280b57cec5SDimitry Andric static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
7290b57cec5SDimitry Andric   OS << '\"';
7300b57cec5SDimitry Andric   for (unsigned char C : S) {
7310b57cec5SDimitry Andric     if (C == 0x22 || C == 0x5C)
7320b57cec5SDimitry Andric       OS << '\\';
7330b57cec5SDimitry Andric     if (C >= 0x20) {
7340b57cec5SDimitry Andric       OS << C;
7350b57cec5SDimitry Andric       continue;
7360b57cec5SDimitry Andric     }
7370b57cec5SDimitry Andric     OS << '\\';
7380b57cec5SDimitry Andric     switch (C) {
7390b57cec5SDimitry Andric     // A few characters are common enough to make short escapes worthwhile.
7400b57cec5SDimitry Andric     case '\t':
7410b57cec5SDimitry Andric       OS << 't';
7420b57cec5SDimitry Andric       break;
7430b57cec5SDimitry Andric     case '\n':
7440b57cec5SDimitry Andric       OS << 'n';
7450b57cec5SDimitry Andric       break;
7460b57cec5SDimitry Andric     case '\r':
7470b57cec5SDimitry Andric       OS << 'r';
7480b57cec5SDimitry Andric       break;
7490b57cec5SDimitry Andric     default:
7500b57cec5SDimitry Andric       OS << 'u';
7510b57cec5SDimitry Andric       llvm::write_hex(OS, C, llvm::HexPrintStyle::Lower, 4);
7520b57cec5SDimitry Andric       break;
7530b57cec5SDimitry Andric     }
7540b57cec5SDimitry Andric   }
7550b57cec5SDimitry Andric   OS << '\"';
7560b57cec5SDimitry Andric }
7570b57cec5SDimitry Andric 
value(const Value & V)7580b57cec5SDimitry Andric void llvm::json::OStream::value(const Value &V) {
7590b57cec5SDimitry Andric   switch (V.kind()) {
7600b57cec5SDimitry Andric   case Value::Null:
7610b57cec5SDimitry Andric     valueBegin();
7620b57cec5SDimitry Andric     OS << "null";
7630b57cec5SDimitry Andric     return;
7640b57cec5SDimitry Andric   case Value::Boolean:
7650b57cec5SDimitry Andric     valueBegin();
7660b57cec5SDimitry Andric     OS << (*V.getAsBoolean() ? "true" : "false");
7670b57cec5SDimitry Andric     return;
7680b57cec5SDimitry Andric   case Value::Number:
7690b57cec5SDimitry Andric     valueBegin();
7700b57cec5SDimitry Andric     if (V.Type == Value::T_Integer)
7710b57cec5SDimitry Andric       OS << *V.getAsInteger();
772349cc55cSDimitry Andric     else if (V.Type == Value::T_UINT64)
773349cc55cSDimitry Andric       OS << *V.getAsUINT64();
7740b57cec5SDimitry Andric     else
7750b57cec5SDimitry Andric       OS << format("%.*g", std::numeric_limits<double>::max_digits10,
7760b57cec5SDimitry Andric                    *V.getAsNumber());
7770b57cec5SDimitry Andric     return;
7780b57cec5SDimitry Andric   case Value::String:
7790b57cec5SDimitry Andric     valueBegin();
7800b57cec5SDimitry Andric     quote(OS, *V.getAsString());
7810b57cec5SDimitry Andric     return;
7820b57cec5SDimitry Andric   case Value::Array:
7830b57cec5SDimitry Andric     return array([&] {
7840b57cec5SDimitry Andric       for (const Value &E : *V.getAsArray())
7850b57cec5SDimitry Andric         value(E);
7860b57cec5SDimitry Andric     });
7870b57cec5SDimitry Andric   case Value::Object:
7880b57cec5SDimitry Andric     return object([&] {
7890b57cec5SDimitry Andric       for (const Object::value_type *E : sortedElements(*V.getAsObject()))
7900b57cec5SDimitry Andric         attribute(E->first, E->second);
7910b57cec5SDimitry Andric     });
7920b57cec5SDimitry Andric   }
7930b57cec5SDimitry Andric }
7940b57cec5SDimitry Andric 
valueBegin()7950b57cec5SDimitry Andric void llvm::json::OStream::valueBegin() {
7960b57cec5SDimitry Andric   assert(Stack.back().Ctx != Object && "Only attributes allowed here");
7970b57cec5SDimitry Andric   if (Stack.back().HasValue) {
7980b57cec5SDimitry Andric     assert(Stack.back().Ctx != Singleton && "Only one value allowed here");
7990b57cec5SDimitry Andric     OS << ',';
8000b57cec5SDimitry Andric   }
8010b57cec5SDimitry Andric   if (Stack.back().Ctx == Array)
8020b57cec5SDimitry Andric     newline();
803e8d8bef9SDimitry Andric   flushComment();
8040b57cec5SDimitry Andric   Stack.back().HasValue = true;
8050b57cec5SDimitry Andric }
8060b57cec5SDimitry Andric 
comment(llvm::StringRef Comment)807e8d8bef9SDimitry Andric void OStream::comment(llvm::StringRef Comment) {
808e8d8bef9SDimitry Andric   assert(PendingComment.empty() && "Only one comment per value!");
809e8d8bef9SDimitry Andric   PendingComment = Comment;
810e8d8bef9SDimitry Andric }
811e8d8bef9SDimitry Andric 
flushComment()812e8d8bef9SDimitry Andric void OStream::flushComment() {
813e8d8bef9SDimitry Andric   if (PendingComment.empty())
814e8d8bef9SDimitry Andric     return;
815e8d8bef9SDimitry Andric   OS << (IndentSize ? "/* " : "/*");
816e8d8bef9SDimitry Andric   // Be sure not to accidentally emit "*/". Transform to "* /".
817e8d8bef9SDimitry Andric   while (!PendingComment.empty()) {
818e8d8bef9SDimitry Andric     auto Pos = PendingComment.find("*/");
819e8d8bef9SDimitry Andric     if (Pos == StringRef::npos) {
820e8d8bef9SDimitry Andric       OS << PendingComment;
821e8d8bef9SDimitry Andric       PendingComment = "";
822e8d8bef9SDimitry Andric     } else {
823e8d8bef9SDimitry Andric       OS << PendingComment.take_front(Pos) << "* /";
824e8d8bef9SDimitry Andric       PendingComment = PendingComment.drop_front(Pos + 2);
825e8d8bef9SDimitry Andric     }
826e8d8bef9SDimitry Andric   }
827e8d8bef9SDimitry Andric   OS << (IndentSize ? " */" : "*/");
828e8d8bef9SDimitry Andric   // Comments are on their own line unless attached to an attribute value.
829e8d8bef9SDimitry Andric   if (Stack.size() > 1 && Stack.back().Ctx == Singleton) {
830e8d8bef9SDimitry Andric     if (IndentSize)
831e8d8bef9SDimitry Andric       OS << ' ';
832e8d8bef9SDimitry Andric   } else {
833e8d8bef9SDimitry Andric     newline();
834e8d8bef9SDimitry Andric   }
835e8d8bef9SDimitry Andric }
836e8d8bef9SDimitry Andric 
newline()8370b57cec5SDimitry Andric void llvm::json::OStream::newline() {
8380b57cec5SDimitry Andric   if (IndentSize) {
8390b57cec5SDimitry Andric     OS.write('\n');
8400b57cec5SDimitry Andric     OS.indent(Indent);
8410b57cec5SDimitry Andric   }
8420b57cec5SDimitry Andric }
8430b57cec5SDimitry Andric 
arrayBegin()8440b57cec5SDimitry Andric void llvm::json::OStream::arrayBegin() {
8450b57cec5SDimitry Andric   valueBegin();
8460b57cec5SDimitry Andric   Stack.emplace_back();
8470b57cec5SDimitry Andric   Stack.back().Ctx = Array;
8480b57cec5SDimitry Andric   Indent += IndentSize;
8490b57cec5SDimitry Andric   OS << '[';
8500b57cec5SDimitry Andric }
8510b57cec5SDimitry Andric 
arrayEnd()8520b57cec5SDimitry Andric void llvm::json::OStream::arrayEnd() {
8530b57cec5SDimitry Andric   assert(Stack.back().Ctx == Array);
8540b57cec5SDimitry Andric   Indent -= IndentSize;
8550b57cec5SDimitry Andric   if (Stack.back().HasValue)
8560b57cec5SDimitry Andric     newline();
8570b57cec5SDimitry Andric   OS << ']';
858e8d8bef9SDimitry Andric   assert(PendingComment.empty());
8590b57cec5SDimitry Andric   Stack.pop_back();
8600b57cec5SDimitry Andric   assert(!Stack.empty());
8610b57cec5SDimitry Andric }
8620b57cec5SDimitry Andric 
objectBegin()8630b57cec5SDimitry Andric void llvm::json::OStream::objectBegin() {
8640b57cec5SDimitry Andric   valueBegin();
8650b57cec5SDimitry Andric   Stack.emplace_back();
8660b57cec5SDimitry Andric   Stack.back().Ctx = Object;
8670b57cec5SDimitry Andric   Indent += IndentSize;
8680b57cec5SDimitry Andric   OS << '{';
8690b57cec5SDimitry Andric }
8700b57cec5SDimitry Andric 
objectEnd()8710b57cec5SDimitry Andric void llvm::json::OStream::objectEnd() {
8720b57cec5SDimitry Andric   assert(Stack.back().Ctx == Object);
8730b57cec5SDimitry Andric   Indent -= IndentSize;
8740b57cec5SDimitry Andric   if (Stack.back().HasValue)
8750b57cec5SDimitry Andric     newline();
8760b57cec5SDimitry Andric   OS << '}';
877e8d8bef9SDimitry Andric   assert(PendingComment.empty());
8780b57cec5SDimitry Andric   Stack.pop_back();
8790b57cec5SDimitry Andric   assert(!Stack.empty());
8800b57cec5SDimitry Andric }
8810b57cec5SDimitry Andric 
attributeBegin(llvm::StringRef Key)8820b57cec5SDimitry Andric void llvm::json::OStream::attributeBegin(llvm::StringRef Key) {
8830b57cec5SDimitry Andric   assert(Stack.back().Ctx == Object);
8840b57cec5SDimitry Andric   if (Stack.back().HasValue)
8850b57cec5SDimitry Andric     OS << ',';
8860b57cec5SDimitry Andric   newline();
887e8d8bef9SDimitry Andric   flushComment();
8880b57cec5SDimitry Andric   Stack.back().HasValue = true;
8890b57cec5SDimitry Andric   Stack.emplace_back();
8900b57cec5SDimitry Andric   Stack.back().Ctx = Singleton;
8910b57cec5SDimitry Andric   if (LLVM_LIKELY(isUTF8(Key))) {
8920b57cec5SDimitry Andric     quote(OS, Key);
8930b57cec5SDimitry Andric   } else {
8940b57cec5SDimitry Andric     assert(false && "Invalid UTF-8 in attribute key");
8950b57cec5SDimitry Andric     quote(OS, fixUTF8(Key));
8960b57cec5SDimitry Andric   }
8970b57cec5SDimitry Andric   OS.write(':');
8980b57cec5SDimitry Andric   if (IndentSize)
8990b57cec5SDimitry Andric     OS.write(' ');
9000b57cec5SDimitry Andric }
9010b57cec5SDimitry Andric 
attributeEnd()9020b57cec5SDimitry Andric void llvm::json::OStream::attributeEnd() {
9030b57cec5SDimitry Andric   assert(Stack.back().Ctx == Singleton);
9040b57cec5SDimitry Andric   assert(Stack.back().HasValue && "Attribute must have a value");
905e8d8bef9SDimitry Andric   assert(PendingComment.empty());
9060b57cec5SDimitry Andric   Stack.pop_back();
9070b57cec5SDimitry Andric   assert(Stack.back().Ctx == Object);
9080b57cec5SDimitry Andric }
9090b57cec5SDimitry Andric 
rawValueBegin()910e8d8bef9SDimitry Andric raw_ostream &llvm::json::OStream::rawValueBegin() {
911e8d8bef9SDimitry Andric   valueBegin();
912e8d8bef9SDimitry Andric   Stack.emplace_back();
913e8d8bef9SDimitry Andric   Stack.back().Ctx = RawValue;
914e8d8bef9SDimitry Andric   return OS;
915e8d8bef9SDimitry Andric }
916e8d8bef9SDimitry Andric 
rawValueEnd()917e8d8bef9SDimitry Andric void llvm::json::OStream::rawValueEnd() {
918e8d8bef9SDimitry Andric   assert(Stack.back().Ctx == RawValue);
919e8d8bef9SDimitry Andric   Stack.pop_back();
920e8d8bef9SDimitry Andric }
921e8d8bef9SDimitry Andric 
9220b57cec5SDimitry Andric } // namespace json
9230b57cec5SDimitry Andric } // namespace llvm
9240b57cec5SDimitry Andric 
format(const llvm::json::Value & E,raw_ostream & OS,StringRef Options)9250b57cec5SDimitry Andric void llvm::format_provider<llvm::json::Value>::format(
9260b57cec5SDimitry Andric     const llvm::json::Value &E, raw_ostream &OS, StringRef Options) {
9270b57cec5SDimitry Andric   unsigned IndentAmount = 0;
9280b57cec5SDimitry Andric   if (!Options.empty() && Options.getAsInteger(/*Radix=*/10, IndentAmount))
9290b57cec5SDimitry Andric     llvm_unreachable("json::Value format options should be an integer");
9300b57cec5SDimitry Andric   json::OStream(OS, IndentAmount).value(E);
9310b57cec5SDimitry Andric }
9320b57cec5SDimitry Andric 
933