14ba319b5SDimitry Andric //=== JSON.cpp - JSON value, parsing and serialization - C++ -----------*-===//
24ba319b5SDimitry Andric //
34ba319b5SDimitry Andric // The LLVM Compiler Infrastructure
44ba319b5SDimitry Andric //
54ba319b5SDimitry Andric // This file is distributed under the University of Illinois Open Source
64ba319b5SDimitry Andric // License. See LICENSE.TXT for details.
74ba319b5SDimitry Andric //
84ba319b5SDimitry Andric //===---------------------------------------------------------------------===//
94ba319b5SDimitry Andric
104ba319b5SDimitry Andric #include "llvm/Support/JSON.h"
114ba319b5SDimitry Andric #include "llvm/Support/ConvertUTF.h"
124ba319b5SDimitry Andric #include "llvm/Support/Format.h"
134ba319b5SDimitry Andric #include <cctype>
144ba319b5SDimitry Andric
154ba319b5SDimitry Andric namespace llvm {
164ba319b5SDimitry Andric namespace json {
174ba319b5SDimitry Andric
operator [](const ObjectKey & K)184ba319b5SDimitry Andric Value &Object::operator[](const ObjectKey &K) {
194ba319b5SDimitry Andric return try_emplace(K, nullptr).first->getSecond();
204ba319b5SDimitry Andric }
operator [](ObjectKey && K)214ba319b5SDimitry Andric Value &Object::operator[](ObjectKey &&K) {
224ba319b5SDimitry Andric return try_emplace(std::move(K), nullptr).first->getSecond();
234ba319b5SDimitry Andric }
get(StringRef K)244ba319b5SDimitry Andric Value *Object::get(StringRef K) {
254ba319b5SDimitry Andric auto I = find(K);
264ba319b5SDimitry Andric if (I == end())
274ba319b5SDimitry Andric return nullptr;
284ba319b5SDimitry Andric return &I->second;
294ba319b5SDimitry Andric }
get(StringRef K) const304ba319b5SDimitry Andric const Value *Object::get(StringRef K) const {
314ba319b5SDimitry Andric auto I = find(K);
324ba319b5SDimitry Andric if (I == end())
334ba319b5SDimitry Andric return nullptr;
344ba319b5SDimitry Andric return &I->second;
354ba319b5SDimitry Andric }
getNull(StringRef K) const364ba319b5SDimitry Andric llvm::Optional<std::nullptr_t> Object::getNull(StringRef K) const {
374ba319b5SDimitry Andric if (auto *V = get(K))
384ba319b5SDimitry Andric return V->getAsNull();
394ba319b5SDimitry Andric return llvm::None;
404ba319b5SDimitry Andric }
getBoolean(StringRef K) const414ba319b5SDimitry Andric llvm::Optional<bool> Object::getBoolean(StringRef K) const {
424ba319b5SDimitry Andric if (auto *V = get(K))
434ba319b5SDimitry Andric return V->getAsBoolean();
444ba319b5SDimitry Andric return llvm::None;
454ba319b5SDimitry Andric }
getNumber(StringRef K) const464ba319b5SDimitry Andric llvm::Optional<double> Object::getNumber(StringRef K) const {
474ba319b5SDimitry Andric if (auto *V = get(K))
484ba319b5SDimitry Andric return V->getAsNumber();
494ba319b5SDimitry Andric return llvm::None;
504ba319b5SDimitry Andric }
getInteger(StringRef K) const514ba319b5SDimitry Andric llvm::Optional<int64_t> Object::getInteger(StringRef K) const {
524ba319b5SDimitry Andric if (auto *V = get(K))
534ba319b5SDimitry Andric return V->getAsInteger();
544ba319b5SDimitry Andric return llvm::None;
554ba319b5SDimitry Andric }
getString(StringRef K) const564ba319b5SDimitry Andric llvm::Optional<llvm::StringRef> Object::getString(StringRef K) const {
574ba319b5SDimitry Andric if (auto *V = get(K))
584ba319b5SDimitry Andric return V->getAsString();
594ba319b5SDimitry Andric return llvm::None;
604ba319b5SDimitry Andric }
getObject(StringRef K) const614ba319b5SDimitry Andric const json::Object *Object::getObject(StringRef K) const {
624ba319b5SDimitry Andric if (auto *V = get(K))
634ba319b5SDimitry Andric return V->getAsObject();
644ba319b5SDimitry Andric return nullptr;
654ba319b5SDimitry Andric }
getObject(StringRef K)664ba319b5SDimitry Andric json::Object *Object::getObject(StringRef K) {
674ba319b5SDimitry Andric if (auto *V = get(K))
684ba319b5SDimitry Andric return V->getAsObject();
694ba319b5SDimitry Andric return nullptr;
704ba319b5SDimitry Andric }
getArray(StringRef K) const714ba319b5SDimitry Andric const json::Array *Object::getArray(StringRef K) const {
724ba319b5SDimitry Andric if (auto *V = get(K))
734ba319b5SDimitry Andric return V->getAsArray();
744ba319b5SDimitry Andric return nullptr;
754ba319b5SDimitry Andric }
getArray(StringRef K)764ba319b5SDimitry Andric json::Array *Object::getArray(StringRef K) {
774ba319b5SDimitry Andric if (auto *V = get(K))
784ba319b5SDimitry Andric return V->getAsArray();
794ba319b5SDimitry Andric return nullptr;
804ba319b5SDimitry Andric }
operator ==(const Object & LHS,const Object & RHS)814ba319b5SDimitry Andric bool operator==(const Object &LHS, const Object &RHS) {
824ba319b5SDimitry Andric if (LHS.size() != RHS.size())
834ba319b5SDimitry Andric return false;
844ba319b5SDimitry Andric for (const auto &L : LHS) {
854ba319b5SDimitry Andric auto R = RHS.find(L.first);
864ba319b5SDimitry Andric if (R == RHS.end() || L.second != R->second)
874ba319b5SDimitry Andric return false;
884ba319b5SDimitry Andric }
894ba319b5SDimitry Andric return true;
904ba319b5SDimitry Andric }
914ba319b5SDimitry Andric
Array(std::initializer_list<Value> Elements)924ba319b5SDimitry Andric Array::Array(std::initializer_list<Value> Elements) {
934ba319b5SDimitry Andric V.reserve(Elements.size());
944ba319b5SDimitry Andric for (const Value &V : Elements) {
954ba319b5SDimitry Andric emplace_back(nullptr);
964ba319b5SDimitry Andric back().moveFrom(std::move(V));
974ba319b5SDimitry Andric }
984ba319b5SDimitry Andric }
994ba319b5SDimitry Andric
Value(std::initializer_list<Value> Elements)1004ba319b5SDimitry Andric Value::Value(std::initializer_list<Value> Elements)
1014ba319b5SDimitry Andric : Value(json::Array(Elements)) {}
1024ba319b5SDimitry Andric
copyFrom(const Value & M)1034ba319b5SDimitry Andric void Value::copyFrom(const Value &M) {
1044ba319b5SDimitry Andric Type = M.Type;
1054ba319b5SDimitry Andric switch (Type) {
1064ba319b5SDimitry Andric case T_Null:
1074ba319b5SDimitry Andric case T_Boolean:
1084ba319b5SDimitry Andric case T_Double:
1094ba319b5SDimitry Andric case T_Integer:
1104ba319b5SDimitry Andric memcpy(Union.buffer, M.Union.buffer, sizeof(Union.buffer));
1114ba319b5SDimitry Andric break;
1124ba319b5SDimitry Andric case T_StringRef:
1134ba319b5SDimitry Andric create<StringRef>(M.as<StringRef>());
1144ba319b5SDimitry Andric break;
1154ba319b5SDimitry Andric case T_String:
1164ba319b5SDimitry Andric create<std::string>(M.as<std::string>());
1174ba319b5SDimitry Andric break;
1184ba319b5SDimitry Andric case T_Object:
1194ba319b5SDimitry Andric create<json::Object>(M.as<json::Object>());
1204ba319b5SDimitry Andric break;
1214ba319b5SDimitry Andric case T_Array:
1224ba319b5SDimitry Andric create<json::Array>(M.as<json::Array>());
1234ba319b5SDimitry Andric break;
1244ba319b5SDimitry Andric }
1254ba319b5SDimitry Andric }
1264ba319b5SDimitry Andric
moveFrom(const Value && M)1274ba319b5SDimitry Andric void Value::moveFrom(const Value &&M) {
1284ba319b5SDimitry Andric Type = M.Type;
1294ba319b5SDimitry Andric switch (Type) {
1304ba319b5SDimitry Andric case T_Null:
1314ba319b5SDimitry Andric case T_Boolean:
1324ba319b5SDimitry Andric case T_Double:
1334ba319b5SDimitry Andric case T_Integer:
1344ba319b5SDimitry Andric memcpy(Union.buffer, M.Union.buffer, sizeof(Union.buffer));
1354ba319b5SDimitry Andric break;
1364ba319b5SDimitry Andric case T_StringRef:
1374ba319b5SDimitry Andric create<StringRef>(M.as<StringRef>());
1384ba319b5SDimitry Andric break;
1394ba319b5SDimitry Andric case T_String:
1404ba319b5SDimitry Andric create<std::string>(std::move(M.as<std::string>()));
1414ba319b5SDimitry Andric M.Type = T_Null;
1424ba319b5SDimitry Andric break;
1434ba319b5SDimitry Andric case T_Object:
1444ba319b5SDimitry Andric create<json::Object>(std::move(M.as<json::Object>()));
1454ba319b5SDimitry Andric M.Type = T_Null;
1464ba319b5SDimitry Andric break;
1474ba319b5SDimitry Andric case T_Array:
1484ba319b5SDimitry Andric create<json::Array>(std::move(M.as<json::Array>()));
1494ba319b5SDimitry Andric M.Type = T_Null;
1504ba319b5SDimitry Andric break;
1514ba319b5SDimitry Andric }
1524ba319b5SDimitry Andric }
1534ba319b5SDimitry Andric
destroy()1544ba319b5SDimitry Andric void Value::destroy() {
1554ba319b5SDimitry Andric switch (Type) {
1564ba319b5SDimitry Andric case T_Null:
1574ba319b5SDimitry Andric case T_Boolean:
1584ba319b5SDimitry Andric case T_Double:
1594ba319b5SDimitry Andric case T_Integer:
1604ba319b5SDimitry Andric break;
1614ba319b5SDimitry Andric case T_StringRef:
1624ba319b5SDimitry Andric as<StringRef>().~StringRef();
1634ba319b5SDimitry Andric break;
1644ba319b5SDimitry Andric case T_String:
1654ba319b5SDimitry Andric as<std::string>().~basic_string();
1664ba319b5SDimitry Andric break;
1674ba319b5SDimitry Andric case T_Object:
1684ba319b5SDimitry Andric as<json::Object>().~Object();
1694ba319b5SDimitry Andric break;
1704ba319b5SDimitry Andric case T_Array:
1714ba319b5SDimitry Andric as<json::Array>().~Array();
1724ba319b5SDimitry Andric break;
1734ba319b5SDimitry Andric }
1744ba319b5SDimitry Andric }
1754ba319b5SDimitry Andric
operator ==(const Value & L,const Value & R)1764ba319b5SDimitry Andric bool operator==(const Value &L, const Value &R) {
1774ba319b5SDimitry Andric if (L.kind() != R.kind())
1784ba319b5SDimitry Andric return false;
1794ba319b5SDimitry Andric switch (L.kind()) {
1804ba319b5SDimitry Andric case Value::Null:
1814ba319b5SDimitry Andric return *L.getAsNull() == *R.getAsNull();
1824ba319b5SDimitry Andric case Value::Boolean:
1834ba319b5SDimitry Andric return *L.getAsBoolean() == *R.getAsBoolean();
1844ba319b5SDimitry Andric case Value::Number:
185*b5893f02SDimitry Andric // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
186*b5893f02SDimitry Andric // The same integer must convert to the same double, per the standard.
187*b5893f02SDimitry Andric // However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32.
188*b5893f02SDimitry Andric // So we avoid floating point promotion for exact comparisons.
189*b5893f02SDimitry Andric if (L.Type == Value::T_Integer || R.Type == Value::T_Integer)
190*b5893f02SDimitry Andric return L.getAsInteger() == R.getAsInteger();
1914ba319b5SDimitry Andric return *L.getAsNumber() == *R.getAsNumber();
1924ba319b5SDimitry Andric case Value::String:
1934ba319b5SDimitry Andric return *L.getAsString() == *R.getAsString();
1944ba319b5SDimitry Andric case Value::Array:
1954ba319b5SDimitry Andric return *L.getAsArray() == *R.getAsArray();
1964ba319b5SDimitry Andric case Value::Object:
1974ba319b5SDimitry Andric return *L.getAsObject() == *R.getAsObject();
1984ba319b5SDimitry Andric }
1994ba319b5SDimitry Andric llvm_unreachable("Unknown value kind");
2004ba319b5SDimitry Andric }
2014ba319b5SDimitry Andric
2024ba319b5SDimitry Andric namespace {
2034ba319b5SDimitry Andric // Simple recursive-descent JSON parser.
2044ba319b5SDimitry Andric class Parser {
2054ba319b5SDimitry Andric public:
Parser(StringRef JSON)2064ba319b5SDimitry Andric Parser(StringRef JSON)
2074ba319b5SDimitry Andric : Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {}
2084ba319b5SDimitry Andric
checkUTF8()2094ba319b5SDimitry Andric bool checkUTF8() {
2104ba319b5SDimitry Andric size_t ErrOffset;
2114ba319b5SDimitry Andric if (isUTF8(StringRef(Start, End - Start), &ErrOffset))
2124ba319b5SDimitry Andric return true;
2134ba319b5SDimitry Andric P = Start + ErrOffset; // For line/column calculation.
2144ba319b5SDimitry Andric return parseError("Invalid UTF-8 sequence");
2154ba319b5SDimitry Andric }
2164ba319b5SDimitry Andric
2174ba319b5SDimitry Andric bool parseValue(Value &Out);
2184ba319b5SDimitry Andric
assertEnd()2194ba319b5SDimitry Andric bool assertEnd() {
2204ba319b5SDimitry Andric eatWhitespace();
2214ba319b5SDimitry Andric if (P == End)
2224ba319b5SDimitry Andric return true;
2234ba319b5SDimitry Andric return parseError("Text after end of document");
2244ba319b5SDimitry Andric }
2254ba319b5SDimitry Andric
takeError()2264ba319b5SDimitry Andric Error takeError() {
2274ba319b5SDimitry Andric assert(Err);
2284ba319b5SDimitry Andric return std::move(*Err);
2294ba319b5SDimitry Andric }
2304ba319b5SDimitry Andric
2314ba319b5SDimitry Andric private:
eatWhitespace()2324ba319b5SDimitry Andric void eatWhitespace() {
2334ba319b5SDimitry Andric while (P != End && (*P == ' ' || *P == '\r' || *P == '\n' || *P == '\t'))
2344ba319b5SDimitry Andric ++P;
2354ba319b5SDimitry Andric }
2364ba319b5SDimitry Andric
2374ba319b5SDimitry Andric // On invalid syntax, parseX() functions return false and set Err.
2384ba319b5SDimitry Andric bool parseNumber(char First, Value &Out);
2394ba319b5SDimitry Andric bool parseString(std::string &Out);
2404ba319b5SDimitry Andric bool parseUnicode(std::string &Out);
2414ba319b5SDimitry Andric bool parseError(const char *Msg); // always returns false
2424ba319b5SDimitry Andric
next()2434ba319b5SDimitry Andric char next() { return P == End ? 0 : *P++; }
peek()2444ba319b5SDimitry Andric char peek() { return P == End ? 0 : *P; }
isNumber(char C)2454ba319b5SDimitry Andric static bool isNumber(char C) {
2464ba319b5SDimitry Andric return C == '0' || C == '1' || C == '2' || C == '3' || C == '4' ||
2474ba319b5SDimitry Andric C == '5' || C == '6' || C == '7' || C == '8' || C == '9' ||
2484ba319b5SDimitry Andric C == 'e' || C == 'E' || C == '+' || C == '-' || C == '.';
2494ba319b5SDimitry Andric }
2504ba319b5SDimitry Andric
2514ba319b5SDimitry Andric Optional<Error> Err;
2524ba319b5SDimitry Andric const char *Start, *P, *End;
2534ba319b5SDimitry Andric };
2544ba319b5SDimitry Andric
parseValue(Value & Out)2554ba319b5SDimitry Andric bool Parser::parseValue(Value &Out) {
2564ba319b5SDimitry Andric eatWhitespace();
2574ba319b5SDimitry Andric if (P == End)
2584ba319b5SDimitry Andric return parseError("Unexpected EOF");
2594ba319b5SDimitry Andric switch (char C = next()) {
2604ba319b5SDimitry Andric // Bare null/true/false are easy - first char identifies them.
2614ba319b5SDimitry Andric case 'n':
2624ba319b5SDimitry Andric Out = nullptr;
2634ba319b5SDimitry Andric return (next() == 'u' && next() == 'l' && next() == 'l') ||
2644ba319b5SDimitry Andric parseError("Invalid JSON value (null?)");
2654ba319b5SDimitry Andric case 't':
2664ba319b5SDimitry Andric Out = true;
2674ba319b5SDimitry Andric return (next() == 'r' && next() == 'u' && next() == 'e') ||
2684ba319b5SDimitry Andric parseError("Invalid JSON value (true?)");
2694ba319b5SDimitry Andric case 'f':
2704ba319b5SDimitry Andric Out = false;
2714ba319b5SDimitry Andric return (next() == 'a' && next() == 'l' && next() == 's' && next() == 'e') ||
2724ba319b5SDimitry Andric parseError("Invalid JSON value (false?)");
2734ba319b5SDimitry Andric case '"': {
2744ba319b5SDimitry Andric std::string S;
2754ba319b5SDimitry Andric if (parseString(S)) {
2764ba319b5SDimitry Andric Out = std::move(S);
2774ba319b5SDimitry Andric return true;
2784ba319b5SDimitry Andric }
2794ba319b5SDimitry Andric return false;
2804ba319b5SDimitry Andric }
2814ba319b5SDimitry Andric case '[': {
2824ba319b5SDimitry Andric Out = Array{};
2834ba319b5SDimitry Andric Array &A = *Out.getAsArray();
2844ba319b5SDimitry Andric eatWhitespace();
2854ba319b5SDimitry Andric if (peek() == ']') {
2864ba319b5SDimitry Andric ++P;
2874ba319b5SDimitry Andric return true;
2884ba319b5SDimitry Andric }
2894ba319b5SDimitry Andric for (;;) {
2904ba319b5SDimitry Andric A.emplace_back(nullptr);
2914ba319b5SDimitry Andric if (!parseValue(A.back()))
2924ba319b5SDimitry Andric return false;
2934ba319b5SDimitry Andric eatWhitespace();
2944ba319b5SDimitry Andric switch (next()) {
2954ba319b5SDimitry Andric case ',':
2964ba319b5SDimitry Andric eatWhitespace();
2974ba319b5SDimitry Andric continue;
2984ba319b5SDimitry Andric case ']':
2994ba319b5SDimitry Andric return true;
3004ba319b5SDimitry Andric default:
3014ba319b5SDimitry Andric return parseError("Expected , or ] after array element");
3024ba319b5SDimitry Andric }
3034ba319b5SDimitry Andric }
3044ba319b5SDimitry Andric }
3054ba319b5SDimitry Andric case '{': {
3064ba319b5SDimitry Andric Out = Object{};
3074ba319b5SDimitry Andric Object &O = *Out.getAsObject();
3084ba319b5SDimitry Andric eatWhitespace();
3094ba319b5SDimitry Andric if (peek() == '}') {
3104ba319b5SDimitry Andric ++P;
3114ba319b5SDimitry Andric return true;
3124ba319b5SDimitry Andric }
3134ba319b5SDimitry Andric for (;;) {
3144ba319b5SDimitry Andric if (next() != '"')
3154ba319b5SDimitry Andric return parseError("Expected object key");
3164ba319b5SDimitry Andric std::string K;
3174ba319b5SDimitry Andric if (!parseString(K))
3184ba319b5SDimitry Andric return false;
3194ba319b5SDimitry Andric eatWhitespace();
3204ba319b5SDimitry Andric if (next() != ':')
3214ba319b5SDimitry Andric return parseError("Expected : after object key");
3224ba319b5SDimitry Andric eatWhitespace();
3234ba319b5SDimitry Andric if (!parseValue(O[std::move(K)]))
3244ba319b5SDimitry Andric return false;
3254ba319b5SDimitry Andric eatWhitespace();
3264ba319b5SDimitry Andric switch (next()) {
3274ba319b5SDimitry Andric case ',':
3284ba319b5SDimitry Andric eatWhitespace();
3294ba319b5SDimitry Andric continue;
3304ba319b5SDimitry Andric case '}':
3314ba319b5SDimitry Andric return true;
3324ba319b5SDimitry Andric default:
3334ba319b5SDimitry Andric return parseError("Expected , or } after object property");
3344ba319b5SDimitry Andric }
3354ba319b5SDimitry Andric }
3364ba319b5SDimitry Andric }
3374ba319b5SDimitry Andric default:
3384ba319b5SDimitry Andric if (isNumber(C))
3394ba319b5SDimitry Andric return parseNumber(C, Out);
3404ba319b5SDimitry Andric return parseError("Invalid JSON value");
3414ba319b5SDimitry Andric }
3424ba319b5SDimitry Andric }
3434ba319b5SDimitry Andric
parseNumber(char First,Value & Out)3444ba319b5SDimitry Andric bool Parser::parseNumber(char First, Value &Out) {
3454ba319b5SDimitry Andric // Read the number into a string. (Must be null-terminated for strto*).
3464ba319b5SDimitry Andric SmallString<24> S;
3474ba319b5SDimitry Andric S.push_back(First);
3484ba319b5SDimitry Andric while (isNumber(peek()))
3494ba319b5SDimitry Andric S.push_back(next());
3504ba319b5SDimitry Andric char *End;
3514ba319b5SDimitry Andric // Try first to parse as integer, and if so preserve full 64 bits.
3524ba319b5SDimitry Andric // strtoll returns long long >= 64 bits, so check it's in range too.
3534ba319b5SDimitry Andric auto I = std::strtoll(S.c_str(), &End, 10);
3544ba319b5SDimitry Andric if (End == S.end() && I >= std::numeric_limits<int64_t>::min() &&
3554ba319b5SDimitry Andric I <= std::numeric_limits<int64_t>::max()) {
3564ba319b5SDimitry Andric Out = int64_t(I);
3574ba319b5SDimitry Andric return true;
3584ba319b5SDimitry Andric }
3594ba319b5SDimitry Andric // If it's not an integer
3604ba319b5SDimitry Andric Out = std::strtod(S.c_str(), &End);
3614ba319b5SDimitry Andric return End == S.end() || parseError("Invalid JSON value (number?)");
3624ba319b5SDimitry Andric }
3634ba319b5SDimitry Andric
parseString(std::string & Out)3644ba319b5SDimitry Andric bool Parser::parseString(std::string &Out) {
3654ba319b5SDimitry Andric // leading quote was already consumed.
3664ba319b5SDimitry Andric for (char C = next(); C != '"'; C = next()) {
3674ba319b5SDimitry Andric if (LLVM_UNLIKELY(P == End))
3684ba319b5SDimitry Andric return parseError("Unterminated string");
3694ba319b5SDimitry Andric if (LLVM_UNLIKELY((C & 0x1f) == C))
3704ba319b5SDimitry Andric return parseError("Control character in string");
3714ba319b5SDimitry Andric if (LLVM_LIKELY(C != '\\')) {
3724ba319b5SDimitry Andric Out.push_back(C);
3734ba319b5SDimitry Andric continue;
3744ba319b5SDimitry Andric }
3754ba319b5SDimitry Andric // Handle escape sequence.
3764ba319b5SDimitry Andric switch (C = next()) {
3774ba319b5SDimitry Andric case '"':
3784ba319b5SDimitry Andric case '\\':
3794ba319b5SDimitry Andric case '/':
3804ba319b5SDimitry Andric Out.push_back(C);
3814ba319b5SDimitry Andric break;
3824ba319b5SDimitry Andric case 'b':
3834ba319b5SDimitry Andric Out.push_back('\b');
3844ba319b5SDimitry Andric break;
3854ba319b5SDimitry Andric case 'f':
3864ba319b5SDimitry Andric Out.push_back('\f');
3874ba319b5SDimitry Andric break;
3884ba319b5SDimitry Andric case 'n':
3894ba319b5SDimitry Andric Out.push_back('\n');
3904ba319b5SDimitry Andric break;
3914ba319b5SDimitry Andric case 'r':
3924ba319b5SDimitry Andric Out.push_back('\r');
3934ba319b5SDimitry Andric break;
3944ba319b5SDimitry Andric case 't':
3954ba319b5SDimitry Andric Out.push_back('\t');
3964ba319b5SDimitry Andric break;
3974ba319b5SDimitry Andric case 'u':
3984ba319b5SDimitry Andric if (!parseUnicode(Out))
3994ba319b5SDimitry Andric return false;
4004ba319b5SDimitry Andric break;
4014ba319b5SDimitry Andric default:
4024ba319b5SDimitry Andric return parseError("Invalid escape sequence");
4034ba319b5SDimitry Andric }
4044ba319b5SDimitry Andric }
4054ba319b5SDimitry Andric return true;
4064ba319b5SDimitry Andric }
4074ba319b5SDimitry Andric
encodeUtf8(uint32_t Rune,std::string & Out)4084ba319b5SDimitry Andric static void encodeUtf8(uint32_t Rune, std::string &Out) {
4094ba319b5SDimitry Andric if (Rune < 0x80) {
4104ba319b5SDimitry Andric Out.push_back(Rune & 0x7F);
4114ba319b5SDimitry Andric } else if (Rune < 0x800) {
4124ba319b5SDimitry Andric uint8_t FirstByte = 0xC0 | ((Rune & 0x7C0) >> 6);
4134ba319b5SDimitry Andric uint8_t SecondByte = 0x80 | (Rune & 0x3F);
4144ba319b5SDimitry Andric Out.push_back(FirstByte);
4154ba319b5SDimitry Andric Out.push_back(SecondByte);
4164ba319b5SDimitry Andric } else if (Rune < 0x10000) {
4174ba319b5SDimitry Andric uint8_t FirstByte = 0xE0 | ((Rune & 0xF000) >> 12);
4184ba319b5SDimitry Andric uint8_t SecondByte = 0x80 | ((Rune & 0xFC0) >> 6);
4194ba319b5SDimitry Andric uint8_t ThirdByte = 0x80 | (Rune & 0x3F);
4204ba319b5SDimitry Andric Out.push_back(FirstByte);
4214ba319b5SDimitry Andric Out.push_back(SecondByte);
4224ba319b5SDimitry Andric Out.push_back(ThirdByte);
4234ba319b5SDimitry Andric } else if (Rune < 0x110000) {
4244ba319b5SDimitry Andric uint8_t FirstByte = 0xF0 | ((Rune & 0x1F0000) >> 18);
4254ba319b5SDimitry Andric uint8_t SecondByte = 0x80 | ((Rune & 0x3F000) >> 12);
4264ba319b5SDimitry Andric uint8_t ThirdByte = 0x80 | ((Rune & 0xFC0) >> 6);
4274ba319b5SDimitry Andric uint8_t FourthByte = 0x80 | (Rune & 0x3F);
4284ba319b5SDimitry Andric Out.push_back(FirstByte);
4294ba319b5SDimitry Andric Out.push_back(SecondByte);
4304ba319b5SDimitry Andric Out.push_back(ThirdByte);
4314ba319b5SDimitry Andric Out.push_back(FourthByte);
4324ba319b5SDimitry Andric } else {
4334ba319b5SDimitry Andric llvm_unreachable("Invalid codepoint");
4344ba319b5SDimitry Andric }
4354ba319b5SDimitry Andric }
4364ba319b5SDimitry Andric
4374ba319b5SDimitry Andric // Parse a UTF-16 \uNNNN escape sequence. "\u" has already been consumed.
4384ba319b5SDimitry Andric // May parse several sequential escapes to ensure proper surrogate handling.
4394ba319b5SDimitry Andric // We do not use ConvertUTF.h, it can't accept and replace unpaired surrogates.
4404ba319b5SDimitry Andric // These are invalid Unicode but valid JSON (RFC 8259, section 8.2).
parseUnicode(std::string & Out)4414ba319b5SDimitry Andric bool Parser::parseUnicode(std::string &Out) {
4424ba319b5SDimitry Andric // Invalid UTF is not a JSON error (RFC 8529§8.2). It gets replaced by U+FFFD.
4434ba319b5SDimitry Andric auto Invalid = [&] { Out.append(/* UTF-8 */ {'\xef', '\xbf', '\xbd'}); };
4444ba319b5SDimitry Andric // Decodes 4 hex digits from the stream into Out, returns false on error.
4454ba319b5SDimitry Andric auto Parse4Hex = [this](uint16_t &Out) -> bool {
4464ba319b5SDimitry Andric Out = 0;
4474ba319b5SDimitry Andric char Bytes[] = {next(), next(), next(), next()};
4484ba319b5SDimitry Andric for (unsigned char C : Bytes) {
4494ba319b5SDimitry Andric if (!std::isxdigit(C))
4504ba319b5SDimitry Andric return parseError("Invalid \\u escape sequence");
4514ba319b5SDimitry Andric Out <<= 4;
4524ba319b5SDimitry Andric Out |= (C > '9') ? (C & ~0x20) - 'A' + 10 : (C - '0');
4534ba319b5SDimitry Andric }
4544ba319b5SDimitry Andric return true;
4554ba319b5SDimitry Andric };
4564ba319b5SDimitry Andric uint16_t First; // UTF-16 code unit from the first \u escape.
4574ba319b5SDimitry Andric if (!Parse4Hex(First))
4584ba319b5SDimitry Andric return false;
4594ba319b5SDimitry Andric
4604ba319b5SDimitry Andric // We loop to allow proper surrogate-pair error handling.
4614ba319b5SDimitry Andric while (true) {
4624ba319b5SDimitry Andric // Case 1: the UTF-16 code unit is already a codepoint in the BMP.
4634ba319b5SDimitry Andric if (LLVM_LIKELY(First < 0xD800 || First >= 0xE000)) {
4644ba319b5SDimitry Andric encodeUtf8(First, Out);
4654ba319b5SDimitry Andric return true;
4664ba319b5SDimitry Andric }
4674ba319b5SDimitry Andric
4684ba319b5SDimitry Andric // Case 2: it's an (unpaired) trailing surrogate.
4694ba319b5SDimitry Andric if (LLVM_UNLIKELY(First >= 0xDC00)) {
4704ba319b5SDimitry Andric Invalid();
4714ba319b5SDimitry Andric return true;
4724ba319b5SDimitry Andric }
4734ba319b5SDimitry Andric
4744ba319b5SDimitry Andric // Case 3: it's a leading surrogate. We expect a trailing one next.
4754ba319b5SDimitry Andric // Case 3a: there's no trailing \u escape. Don't advance in the stream.
4764ba319b5SDimitry Andric if (LLVM_UNLIKELY(P + 2 > End || *P != '\\' || *(P + 1) != 'u')) {
4774ba319b5SDimitry Andric Invalid(); // Leading surrogate was unpaired.
4784ba319b5SDimitry Andric return true;
4794ba319b5SDimitry Andric }
4804ba319b5SDimitry Andric P += 2;
4814ba319b5SDimitry Andric uint16_t Second;
4824ba319b5SDimitry Andric if (!Parse4Hex(Second))
4834ba319b5SDimitry Andric return false;
4844ba319b5SDimitry Andric // Case 3b: there was another \u escape, but it wasn't a trailing surrogate.
4854ba319b5SDimitry Andric if (LLVM_UNLIKELY(Second < 0xDC00 || Second >= 0xE000)) {
4864ba319b5SDimitry Andric Invalid(); // Leading surrogate was unpaired.
4874ba319b5SDimitry Andric First = Second; // Second escape still needs to be processed.
4884ba319b5SDimitry Andric continue;
4894ba319b5SDimitry Andric }
4904ba319b5SDimitry Andric // Case 3c: a valid surrogate pair encoding an astral codepoint.
4914ba319b5SDimitry Andric encodeUtf8(0x10000 | ((First - 0xD800) << 10) | (Second - 0xDC00), Out);
4924ba319b5SDimitry Andric return true;
4934ba319b5SDimitry Andric }
4944ba319b5SDimitry Andric }
4954ba319b5SDimitry Andric
parseError(const char * Msg)4964ba319b5SDimitry Andric bool Parser::parseError(const char *Msg) {
4974ba319b5SDimitry Andric int Line = 1;
4984ba319b5SDimitry Andric const char *StartOfLine = Start;
4994ba319b5SDimitry Andric for (const char *X = Start; X < P; ++X) {
5004ba319b5SDimitry Andric if (*X == 0x0A) {
5014ba319b5SDimitry Andric ++Line;
5024ba319b5SDimitry Andric StartOfLine = X + 1;
5034ba319b5SDimitry Andric }
5044ba319b5SDimitry Andric }
5054ba319b5SDimitry Andric Err.emplace(
5064ba319b5SDimitry Andric llvm::make_unique<ParseError>(Msg, Line, P - StartOfLine, P - Start));
5074ba319b5SDimitry Andric return false;
5084ba319b5SDimitry Andric }
5094ba319b5SDimitry Andric } // namespace
5104ba319b5SDimitry Andric
parse(StringRef JSON)5114ba319b5SDimitry Andric Expected<Value> parse(StringRef JSON) {
5124ba319b5SDimitry Andric Parser P(JSON);
5134ba319b5SDimitry Andric Value E = nullptr;
5144ba319b5SDimitry Andric if (P.checkUTF8())
5154ba319b5SDimitry Andric if (P.parseValue(E))
5164ba319b5SDimitry Andric if (P.assertEnd())
5174ba319b5SDimitry Andric return std::move(E);
5184ba319b5SDimitry Andric return P.takeError();
5194ba319b5SDimitry Andric }
5204ba319b5SDimitry Andric char ParseError::ID = 0;
5214ba319b5SDimitry Andric
sortedElements(const Object & O)5224ba319b5SDimitry Andric static std::vector<const Object::value_type *> sortedElements(const Object &O) {
5234ba319b5SDimitry Andric std::vector<const Object::value_type *> Elements;
5244ba319b5SDimitry Andric for (const auto &E : O)
5254ba319b5SDimitry Andric Elements.push_back(&E);
526*b5893f02SDimitry Andric llvm::sort(Elements,
5274ba319b5SDimitry Andric [](const Object::value_type *L, const Object::value_type *R) {
5284ba319b5SDimitry Andric return L->first < R->first;
5294ba319b5SDimitry Andric });
5304ba319b5SDimitry Andric return Elements;
5314ba319b5SDimitry Andric }
5324ba319b5SDimitry Andric
isUTF8(llvm::StringRef S,size_t * ErrOffset)5334ba319b5SDimitry Andric bool isUTF8(llvm::StringRef S, size_t *ErrOffset) {
5344ba319b5SDimitry Andric // Fast-path for ASCII, which is valid UTF-8.
5354ba319b5SDimitry Andric if (LLVM_LIKELY(isASCII(S)))
5364ba319b5SDimitry Andric return true;
5374ba319b5SDimitry Andric
5384ba319b5SDimitry Andric const UTF8 *Data = reinterpret_cast<const UTF8 *>(S.data()), *Rest = Data;
5394ba319b5SDimitry Andric if (LLVM_LIKELY(isLegalUTF8String(&Rest, Data + S.size())))
5404ba319b5SDimitry Andric return true;
5414ba319b5SDimitry Andric
5424ba319b5SDimitry Andric if (ErrOffset)
5434ba319b5SDimitry Andric *ErrOffset = Rest - Data;
5444ba319b5SDimitry Andric return false;
5454ba319b5SDimitry Andric }
5464ba319b5SDimitry Andric
fixUTF8(llvm::StringRef S)5474ba319b5SDimitry Andric std::string fixUTF8(llvm::StringRef S) {
5484ba319b5SDimitry Andric // This isn't particularly efficient, but is only for error-recovery.
5494ba319b5SDimitry Andric std::vector<UTF32> Codepoints(S.size()); // 1 codepoint per byte suffices.
5504ba319b5SDimitry Andric const UTF8 *In8 = reinterpret_cast<const UTF8 *>(S.data());
5514ba319b5SDimitry Andric UTF32 *Out32 = Codepoints.data();
5524ba319b5SDimitry Andric ConvertUTF8toUTF32(&In8, In8 + S.size(), &Out32, Out32 + Codepoints.size(),
5534ba319b5SDimitry Andric lenientConversion);
5544ba319b5SDimitry Andric Codepoints.resize(Out32 - Codepoints.data());
5554ba319b5SDimitry Andric std::string Res(4 * Codepoints.size(), 0); // 4 bytes per codepoint suffice
5564ba319b5SDimitry Andric const UTF32 *In32 = Codepoints.data();
5574ba319b5SDimitry Andric UTF8 *Out8 = reinterpret_cast<UTF8 *>(&Res[0]);
5584ba319b5SDimitry Andric ConvertUTF32toUTF8(&In32, In32 + Codepoints.size(), &Out8, Out8 + Res.size(),
5594ba319b5SDimitry Andric strictConversion);
5604ba319b5SDimitry Andric Res.resize(reinterpret_cast<char *>(Out8) - Res.data());
5614ba319b5SDimitry Andric return Res;
5624ba319b5SDimitry Andric }
5634ba319b5SDimitry Andric
5644ba319b5SDimitry Andric } // namespace json
5654ba319b5SDimitry Andric } // namespace llvm
5664ba319b5SDimitry Andric
quote(llvm::raw_ostream & OS,llvm::StringRef S)5674ba319b5SDimitry Andric static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
5684ba319b5SDimitry Andric OS << '\"';
5694ba319b5SDimitry Andric for (unsigned char C : S) {
5704ba319b5SDimitry Andric if (C == 0x22 || C == 0x5C)
5714ba319b5SDimitry Andric OS << '\\';
5724ba319b5SDimitry Andric if (C >= 0x20) {
5734ba319b5SDimitry Andric OS << C;
5744ba319b5SDimitry Andric continue;
5754ba319b5SDimitry Andric }
5764ba319b5SDimitry Andric OS << '\\';
5774ba319b5SDimitry Andric switch (C) {
5784ba319b5SDimitry Andric // A few characters are common enough to make short escapes worthwhile.
5794ba319b5SDimitry Andric case '\t':
5804ba319b5SDimitry Andric OS << 't';
5814ba319b5SDimitry Andric break;
5824ba319b5SDimitry Andric case '\n':
5834ba319b5SDimitry Andric OS << 'n';
5844ba319b5SDimitry Andric break;
5854ba319b5SDimitry Andric case '\r':
5864ba319b5SDimitry Andric OS << 'r';
5874ba319b5SDimitry Andric break;
5884ba319b5SDimitry Andric default:
5894ba319b5SDimitry Andric OS << 'u';
5904ba319b5SDimitry Andric llvm::write_hex(OS, C, llvm::HexPrintStyle::Lower, 4);
5914ba319b5SDimitry Andric break;
5924ba319b5SDimitry Andric }
5934ba319b5SDimitry Andric }
5944ba319b5SDimitry Andric OS << '\"';
5954ba319b5SDimitry Andric }
5964ba319b5SDimitry Andric
5974ba319b5SDimitry Andric enum IndenterAction {
5984ba319b5SDimitry Andric Indent,
5994ba319b5SDimitry Andric Outdent,
6004ba319b5SDimitry Andric Newline,
6014ba319b5SDimitry Andric Space,
6024ba319b5SDimitry Andric };
6034ba319b5SDimitry Andric
6044ba319b5SDimitry Andric // Prints JSON. The indenter can be used to control formatting.
6054ba319b5SDimitry Andric template <typename Indenter>
print(raw_ostream & OS,const Indenter & I) const6064ba319b5SDimitry Andric void llvm::json::Value::print(raw_ostream &OS, const Indenter &I) const {
6074ba319b5SDimitry Andric switch (Type) {
6084ba319b5SDimitry Andric case T_Null:
6094ba319b5SDimitry Andric OS << "null";
6104ba319b5SDimitry Andric break;
6114ba319b5SDimitry Andric case T_Boolean:
6124ba319b5SDimitry Andric OS << (as<bool>() ? "true" : "false");
6134ba319b5SDimitry Andric break;
6144ba319b5SDimitry Andric case T_Double:
6154ba319b5SDimitry Andric OS << format("%.*g", std::numeric_limits<double>::max_digits10,
6164ba319b5SDimitry Andric as<double>());
6174ba319b5SDimitry Andric break;
6184ba319b5SDimitry Andric case T_Integer:
6194ba319b5SDimitry Andric OS << as<int64_t>();
6204ba319b5SDimitry Andric break;
6214ba319b5SDimitry Andric case T_StringRef:
6224ba319b5SDimitry Andric quote(OS, as<StringRef>());
6234ba319b5SDimitry Andric break;
6244ba319b5SDimitry Andric case T_String:
6254ba319b5SDimitry Andric quote(OS, as<std::string>());
6264ba319b5SDimitry Andric break;
6274ba319b5SDimitry Andric case T_Object: {
6284ba319b5SDimitry Andric bool Comma = false;
6294ba319b5SDimitry Andric OS << '{';
6304ba319b5SDimitry Andric I(Indent);
6314ba319b5SDimitry Andric for (const auto *P : sortedElements(as<json::Object>())) {
6324ba319b5SDimitry Andric if (Comma)
6334ba319b5SDimitry Andric OS << ',';
6344ba319b5SDimitry Andric Comma = true;
6354ba319b5SDimitry Andric I(Newline);
6364ba319b5SDimitry Andric quote(OS, P->first);
6374ba319b5SDimitry Andric OS << ':';
6384ba319b5SDimitry Andric I(Space);
6394ba319b5SDimitry Andric P->second.print(OS, I);
6404ba319b5SDimitry Andric }
6414ba319b5SDimitry Andric I(Outdent);
6424ba319b5SDimitry Andric if (Comma)
6434ba319b5SDimitry Andric I(Newline);
6444ba319b5SDimitry Andric OS << '}';
6454ba319b5SDimitry Andric break;
6464ba319b5SDimitry Andric }
6474ba319b5SDimitry Andric case T_Array: {
6484ba319b5SDimitry Andric bool Comma = false;
6494ba319b5SDimitry Andric OS << '[';
6504ba319b5SDimitry Andric I(Indent);
6514ba319b5SDimitry Andric for (const auto &E : as<json::Array>()) {
6524ba319b5SDimitry Andric if (Comma)
6534ba319b5SDimitry Andric OS << ',';
6544ba319b5SDimitry Andric Comma = true;
6554ba319b5SDimitry Andric I(Newline);
6564ba319b5SDimitry Andric E.print(OS, I);
6574ba319b5SDimitry Andric }
6584ba319b5SDimitry Andric I(Outdent);
6594ba319b5SDimitry Andric if (Comma)
6604ba319b5SDimitry Andric I(Newline);
6614ba319b5SDimitry Andric OS << ']';
6624ba319b5SDimitry Andric break;
6634ba319b5SDimitry Andric }
6644ba319b5SDimitry Andric }
6654ba319b5SDimitry Andric }
6664ba319b5SDimitry Andric
format(const llvm::json::Value & E,raw_ostream & OS,StringRef Options)6674ba319b5SDimitry Andric void llvm::format_provider<llvm::json::Value>::format(
6684ba319b5SDimitry Andric const llvm::json::Value &E, raw_ostream &OS, StringRef Options) {
6694ba319b5SDimitry Andric if (Options.empty()) {
6704ba319b5SDimitry Andric OS << E;
6714ba319b5SDimitry Andric return;
6724ba319b5SDimitry Andric }
6734ba319b5SDimitry Andric unsigned IndentAmount = 0;
6744ba319b5SDimitry Andric if (Options.getAsInteger(/*Radix=*/10, IndentAmount))
6754ba319b5SDimitry Andric llvm_unreachable("json::Value format options should be an integer");
6764ba319b5SDimitry Andric unsigned IndentLevel = 0;
6774ba319b5SDimitry Andric E.print(OS, [&](IndenterAction A) {
6784ba319b5SDimitry Andric switch (A) {
6794ba319b5SDimitry Andric case Newline:
6804ba319b5SDimitry Andric OS << '\n';
6814ba319b5SDimitry Andric OS.indent(IndentLevel);
6824ba319b5SDimitry Andric break;
6834ba319b5SDimitry Andric case Space:
6844ba319b5SDimitry Andric OS << ' ';
6854ba319b5SDimitry Andric break;
6864ba319b5SDimitry Andric case Indent:
6874ba319b5SDimitry Andric IndentLevel += IndentAmount;
6884ba319b5SDimitry Andric break;
6894ba319b5SDimitry Andric case Outdent:
6904ba319b5SDimitry Andric IndentLevel -= IndentAmount;
6914ba319b5SDimitry Andric break;
6924ba319b5SDimitry Andric };
6934ba319b5SDimitry Andric });
6944ba319b5SDimitry Andric }
6954ba319b5SDimitry Andric
operator <<(raw_ostream & OS,const Value & E)6964ba319b5SDimitry Andric llvm::raw_ostream &llvm::json::operator<<(raw_ostream &OS, const Value &E) {
6974ba319b5SDimitry Andric E.print(OS, [](IndenterAction A) { /*ignore*/ });
6984ba319b5SDimitry Andric return OS;
6994ba319b5SDimitry Andric }
700