16be38247SSam McCall //=== JSON.cpp - JSON value, parsing and serialization - C++ -----------*-===// 26be38247SSam McCall // 36be38247SSam McCall // The LLVM Compiler Infrastructure 46be38247SSam McCall // 56be38247SSam McCall // This file is distributed under the University of Illinois Open Source 66be38247SSam McCall // License. See LICENSE.TXT for details. 76be38247SSam McCall // 86be38247SSam McCall //===---------------------------------------------------------------------===// 96be38247SSam McCall 106be38247SSam McCall #include "llvm/Support/JSON.h" 116be38247SSam McCall #include "llvm/Support/Format.h" 126be38247SSam McCall #include <cctype> 136be38247SSam McCall 146be38247SSam McCall namespace llvm { 156be38247SSam McCall namespace json { 166be38247SSam McCall 176be38247SSam McCall Value &Object::operator[](const ObjectKey &K) { 186be38247SSam McCall return try_emplace(K, nullptr).first->getSecond(); 196be38247SSam McCall } 206be38247SSam McCall Value &Object::operator[](ObjectKey &&K) { 216be38247SSam McCall return try_emplace(std::move(K), nullptr).first->getSecond(); 226be38247SSam McCall } 236be38247SSam McCall Value *Object::get(StringRef K) { 246be38247SSam McCall auto I = find(K); 256be38247SSam McCall if (I == end()) 266be38247SSam McCall return nullptr; 276be38247SSam McCall return &I->second; 286be38247SSam McCall } 296be38247SSam McCall const Value *Object::get(StringRef K) const { 306be38247SSam McCall auto I = find(K); 316be38247SSam McCall if (I == end()) 326be38247SSam McCall return nullptr; 336be38247SSam McCall return &I->second; 346be38247SSam McCall } 356be38247SSam McCall llvm::Optional<std::nullptr_t> Object::getNull(StringRef K) const { 366be38247SSam McCall if (auto *V = get(K)) 376be38247SSam McCall return V->getAsNull(); 386be38247SSam McCall return llvm::None; 396be38247SSam McCall } 406be38247SSam McCall llvm::Optional<bool> Object::getBoolean(StringRef K) const { 416be38247SSam McCall if (auto *V = get(K)) 426be38247SSam McCall return V->getAsBoolean(); 436be38247SSam McCall return llvm::None; 446be38247SSam McCall } 456be38247SSam McCall llvm::Optional<double> Object::getNumber(StringRef K) const { 466be38247SSam McCall if (auto *V = get(K)) 476be38247SSam McCall return V->getAsNumber(); 486be38247SSam McCall return llvm::None; 496be38247SSam McCall } 506be38247SSam McCall llvm::Optional<int64_t> Object::getInteger(StringRef K) const { 516be38247SSam McCall if (auto *V = get(K)) 526be38247SSam McCall return V->getAsInteger(); 536be38247SSam McCall return llvm::None; 546be38247SSam McCall } 556be38247SSam McCall llvm::Optional<llvm::StringRef> Object::getString(StringRef K) const { 566be38247SSam McCall if (auto *V = get(K)) 576be38247SSam McCall return V->getAsString(); 586be38247SSam McCall return llvm::None; 596be38247SSam McCall } 606be38247SSam McCall const json::Object *Object::getObject(StringRef K) const { 616be38247SSam McCall if (auto *V = get(K)) 626be38247SSam McCall return V->getAsObject(); 636be38247SSam McCall return nullptr; 646be38247SSam McCall } 656be38247SSam McCall json::Object *Object::getObject(StringRef K) { 666be38247SSam McCall if (auto *V = get(K)) 676be38247SSam McCall return V->getAsObject(); 686be38247SSam McCall return nullptr; 696be38247SSam McCall } 706be38247SSam McCall const json::Array *Object::getArray(StringRef K) const { 716be38247SSam McCall if (auto *V = get(K)) 726be38247SSam McCall return V->getAsArray(); 736be38247SSam McCall return nullptr; 746be38247SSam McCall } 756be38247SSam McCall json::Array *Object::getArray(StringRef K) { 766be38247SSam McCall if (auto *V = get(K)) 776be38247SSam McCall return V->getAsArray(); 786be38247SSam McCall return nullptr; 796be38247SSam McCall } 806be38247SSam McCall bool operator==(const Object &LHS, const Object &RHS) { 816be38247SSam McCall if (LHS.size() != RHS.size()) 826be38247SSam McCall return false; 836be38247SSam McCall for (const auto &L : LHS) { 846be38247SSam McCall auto R = RHS.find(L.first); 856be38247SSam McCall if (R == RHS.end() || L.second != R->second) 866be38247SSam McCall return false; 876be38247SSam McCall } 886be38247SSam McCall return true; 896be38247SSam McCall } 906be38247SSam McCall 916be38247SSam McCall Array::Array(std::initializer_list<Value> Elements) { 926be38247SSam McCall V.reserve(Elements.size()); 936be38247SSam McCall for (const Value &V : Elements) { 946be38247SSam McCall emplace_back(nullptr); 956be38247SSam McCall back().moveFrom(std::move(V)); 966be38247SSam McCall } 976be38247SSam McCall } 986be38247SSam McCall 996be38247SSam McCall Value::Value(std::initializer_list<Value> Elements) 1006be38247SSam McCall : Value(json::Array(Elements)) {} 1016be38247SSam McCall 1026be38247SSam McCall void Value::copyFrom(const Value &M) { 1036be38247SSam McCall Type = M.Type; 1046be38247SSam McCall switch (Type) { 1056be38247SSam McCall case T_Null: 1066be38247SSam McCall case T_Boolean: 107*d93eaeb7SSam McCall case T_Double: 108*d93eaeb7SSam McCall case T_Integer: 1096be38247SSam McCall memcpy(Union.buffer, M.Union.buffer, sizeof(Union.buffer)); 1106be38247SSam McCall break; 1116be38247SSam McCall case T_StringRef: 1126be38247SSam McCall create<StringRef>(M.as<StringRef>()); 1136be38247SSam McCall break; 1146be38247SSam McCall case T_String: 1156be38247SSam McCall create<std::string>(M.as<std::string>()); 1166be38247SSam McCall break; 1176be38247SSam McCall case T_Object: 1186be38247SSam McCall create<json::Object>(M.as<json::Object>()); 1196be38247SSam McCall break; 1206be38247SSam McCall case T_Array: 1216be38247SSam McCall create<json::Array>(M.as<json::Array>()); 1226be38247SSam McCall break; 1236be38247SSam McCall } 1246be38247SSam McCall } 1256be38247SSam McCall 1266be38247SSam McCall void Value::moveFrom(const Value &&M) { 1276be38247SSam McCall Type = M.Type; 1286be38247SSam McCall switch (Type) { 1296be38247SSam McCall case T_Null: 1306be38247SSam McCall case T_Boolean: 131*d93eaeb7SSam McCall case T_Double: 132*d93eaeb7SSam McCall case T_Integer: 1336be38247SSam McCall memcpy(Union.buffer, M.Union.buffer, sizeof(Union.buffer)); 1346be38247SSam McCall break; 1356be38247SSam McCall case T_StringRef: 1366be38247SSam McCall create<StringRef>(M.as<StringRef>()); 1376be38247SSam McCall break; 1386be38247SSam McCall case T_String: 1396be38247SSam McCall create<std::string>(std::move(M.as<std::string>())); 1406be38247SSam McCall M.Type = T_Null; 1416be38247SSam McCall break; 1426be38247SSam McCall case T_Object: 1436be38247SSam McCall create<json::Object>(std::move(M.as<json::Object>())); 1446be38247SSam McCall M.Type = T_Null; 1456be38247SSam McCall break; 1466be38247SSam McCall case T_Array: 1476be38247SSam McCall create<json::Array>(std::move(M.as<json::Array>())); 1486be38247SSam McCall M.Type = T_Null; 1496be38247SSam McCall break; 1506be38247SSam McCall } 1516be38247SSam McCall } 1526be38247SSam McCall 1536be38247SSam McCall void Value::destroy() { 1546be38247SSam McCall switch (Type) { 1556be38247SSam McCall case T_Null: 1566be38247SSam McCall case T_Boolean: 157*d93eaeb7SSam McCall case T_Double: 158*d93eaeb7SSam McCall case T_Integer: 1596be38247SSam McCall break; 1606be38247SSam McCall case T_StringRef: 1616be38247SSam McCall as<StringRef>().~StringRef(); 1626be38247SSam McCall break; 1636be38247SSam McCall case T_String: 1646be38247SSam McCall as<std::string>().~basic_string(); 1656be38247SSam McCall break; 1666be38247SSam McCall case T_Object: 1676be38247SSam McCall as<json::Object>().~Object(); 1686be38247SSam McCall break; 1696be38247SSam McCall case T_Array: 1706be38247SSam McCall as<json::Array>().~Array(); 1716be38247SSam McCall break; 1726be38247SSam McCall } 1736be38247SSam McCall } 1746be38247SSam McCall 1756be38247SSam McCall bool operator==(const Value &L, const Value &R) { 1766be38247SSam McCall if (L.kind() != R.kind()) 1776be38247SSam McCall return false; 1786be38247SSam McCall switch (L.kind()) { 1796be38247SSam McCall case Value::Null: 1806be38247SSam McCall return *L.getAsNull() == *R.getAsNull(); 1816be38247SSam McCall case Value::Boolean: 1826be38247SSam McCall return *L.getAsBoolean() == *R.getAsBoolean(); 1836be38247SSam McCall case Value::Number: 1846be38247SSam McCall return *L.getAsNumber() == *R.getAsNumber(); 1856be38247SSam McCall case Value::String: 1866be38247SSam McCall return *L.getAsString() == *R.getAsString(); 1876be38247SSam McCall case Value::Array: 1886be38247SSam McCall return *L.getAsArray() == *R.getAsArray(); 1896be38247SSam McCall case Value::Object: 1906be38247SSam McCall return *L.getAsObject() == *R.getAsObject(); 1916be38247SSam McCall } 1926be38247SSam McCall llvm_unreachable("Unknown value kind"); 1936be38247SSam McCall } 1946be38247SSam McCall 1956be38247SSam McCall namespace { 1966be38247SSam McCall // Simple recursive-descent JSON parser. 1976be38247SSam McCall class Parser { 1986be38247SSam McCall public: 1996be38247SSam McCall Parser(StringRef JSON) 2006be38247SSam McCall : Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {} 2016be38247SSam McCall 2026be38247SSam McCall bool parseValue(Value &Out); 2036be38247SSam McCall 2046be38247SSam McCall bool assertEnd() { 2056be38247SSam McCall eatWhitespace(); 2066be38247SSam McCall if (P == End) 2076be38247SSam McCall return true; 2086be38247SSam McCall return parseError("Text after end of document"); 2096be38247SSam McCall } 2106be38247SSam McCall 2116be38247SSam McCall Error takeError() { 2126be38247SSam McCall assert(Err); 2136be38247SSam McCall return std::move(*Err); 2146be38247SSam McCall } 2156be38247SSam McCall 2166be38247SSam McCall private: 2176be38247SSam McCall void eatWhitespace() { 2186be38247SSam McCall while (P != End && (*P == ' ' || *P == '\r' || *P == '\n' || *P == '\t')) 2196be38247SSam McCall ++P; 2206be38247SSam McCall } 2216be38247SSam McCall 2226be38247SSam McCall // On invalid syntax, parseX() functions return false and set Err. 223*d93eaeb7SSam McCall bool parseNumber(char First, Value &Out); 2246be38247SSam McCall bool parseString(std::string &Out); 2256be38247SSam McCall bool parseUnicode(std::string &Out); 2266be38247SSam McCall bool parseError(const char *Msg); // always returns false 2276be38247SSam McCall 2286be38247SSam McCall char next() { return P == End ? 0 : *P++; } 2296be38247SSam McCall char peek() { return P == End ? 0 : *P; } 2306be38247SSam McCall static bool isNumber(char C) { 2316be38247SSam McCall return C == '0' || C == '1' || C == '2' || C == '3' || C == '4' || 2326be38247SSam McCall C == '5' || C == '6' || C == '7' || C == '8' || C == '9' || 2336be38247SSam McCall C == 'e' || C == 'E' || C == '+' || C == '-' || C == '.'; 2346be38247SSam McCall } 2356be38247SSam McCall 2366be38247SSam McCall Optional<Error> Err; 2376be38247SSam McCall const char *Start, *P, *End; 2386be38247SSam McCall }; 2396be38247SSam McCall 2406be38247SSam McCall bool Parser::parseValue(Value &Out) { 2416be38247SSam McCall eatWhitespace(); 2426be38247SSam McCall if (P == End) 2436be38247SSam McCall return parseError("Unexpected EOF"); 2446be38247SSam McCall switch (char C = next()) { 2456be38247SSam McCall // Bare null/true/false are easy - first char identifies them. 2466be38247SSam McCall case 'n': 2476be38247SSam McCall Out = nullptr; 2486be38247SSam McCall return (next() == 'u' && next() == 'l' && next() == 'l') || 2496be38247SSam McCall parseError("Invalid JSON value (null?)"); 2506be38247SSam McCall case 't': 2516be38247SSam McCall Out = true; 2526be38247SSam McCall return (next() == 'r' && next() == 'u' && next() == 'e') || 2536be38247SSam McCall parseError("Invalid JSON value (true?)"); 2546be38247SSam McCall case 'f': 2556be38247SSam McCall Out = false; 2566be38247SSam McCall return (next() == 'a' && next() == 'l' && next() == 's' && next() == 'e') || 2576be38247SSam McCall parseError("Invalid JSON value (false?)"); 2586be38247SSam McCall case '"': { 2596be38247SSam McCall std::string S; 2606be38247SSam McCall if (parseString(S)) { 2616be38247SSam McCall Out = std::move(S); 2626be38247SSam McCall return true; 2636be38247SSam McCall } 2646be38247SSam McCall return false; 2656be38247SSam McCall } 2666be38247SSam McCall case '[': { 2676be38247SSam McCall Out = Array{}; 2686be38247SSam McCall Array &A = *Out.getAsArray(); 2696be38247SSam McCall eatWhitespace(); 2706be38247SSam McCall if (peek() == ']') { 2716be38247SSam McCall ++P; 2726be38247SSam McCall return true; 2736be38247SSam McCall } 2746be38247SSam McCall for (;;) { 2756be38247SSam McCall A.emplace_back(nullptr); 2766be38247SSam McCall if (!parseValue(A.back())) 2776be38247SSam McCall return false; 2786be38247SSam McCall eatWhitespace(); 2796be38247SSam McCall switch (next()) { 2806be38247SSam McCall case ',': 2816be38247SSam McCall eatWhitespace(); 2826be38247SSam McCall continue; 2836be38247SSam McCall case ']': 2846be38247SSam McCall return true; 2856be38247SSam McCall default: 2866be38247SSam McCall return parseError("Expected , or ] after array element"); 2876be38247SSam McCall } 2886be38247SSam McCall } 2896be38247SSam McCall } 2906be38247SSam McCall case '{': { 2916be38247SSam McCall Out = Object{}; 2926be38247SSam McCall Object &O = *Out.getAsObject(); 2936be38247SSam McCall eatWhitespace(); 2946be38247SSam McCall if (peek() == '}') { 2956be38247SSam McCall ++P; 2966be38247SSam McCall return true; 2976be38247SSam McCall } 2986be38247SSam McCall for (;;) { 2996be38247SSam McCall if (next() != '"') 3006be38247SSam McCall return parseError("Expected object key"); 3016be38247SSam McCall std::string K; 3026be38247SSam McCall if (!parseString(K)) 3036be38247SSam McCall return false; 3046be38247SSam McCall eatWhitespace(); 3056be38247SSam McCall if (next() != ':') 3066be38247SSam McCall return parseError("Expected : after object key"); 3076be38247SSam McCall eatWhitespace(); 3086be38247SSam McCall if (!parseValue(O[std::move(K)])) 3096be38247SSam McCall return false; 3106be38247SSam McCall eatWhitespace(); 3116be38247SSam McCall switch (next()) { 3126be38247SSam McCall case ',': 3136be38247SSam McCall eatWhitespace(); 3146be38247SSam McCall continue; 3156be38247SSam McCall case '}': 3166be38247SSam McCall return true; 3176be38247SSam McCall default: 3186be38247SSam McCall return parseError("Expected , or } after object property"); 3196be38247SSam McCall } 3206be38247SSam McCall } 3216be38247SSam McCall } 3226be38247SSam McCall default: 323*d93eaeb7SSam McCall if (isNumber(C)) 324*d93eaeb7SSam McCall return parseNumber(C, Out); 3256be38247SSam McCall return parseError("Invalid JSON value"); 3266be38247SSam McCall } 3276be38247SSam McCall } 3286be38247SSam McCall 329*d93eaeb7SSam McCall bool Parser::parseNumber(char First, Value &Out) { 330*d93eaeb7SSam McCall // Read the number into a string. (Must be null-terminated for strto*). 3316be38247SSam McCall SmallString<24> S; 3326be38247SSam McCall S.push_back(First); 3336be38247SSam McCall while (isNumber(peek())) 3346be38247SSam McCall S.push_back(next()); 3356be38247SSam McCall char *End; 336*d93eaeb7SSam McCall // Try first to parse as integer, and if so preserve full 64 bits. 337*d93eaeb7SSam McCall // strtoll returns long long >= 64 bits, so check it's in range too. 338*d93eaeb7SSam McCall auto I = std::strtoll(S.c_str(), &End, 10); 339*d93eaeb7SSam McCall if (End == S.end() && I >= std::numeric_limits<int64_t>::min() && 340*d93eaeb7SSam McCall I <= std::numeric_limits<int64_t>::max()) { 341*d93eaeb7SSam McCall Out = int64_t(I); 342*d93eaeb7SSam McCall return true; 343*d93eaeb7SSam McCall } 344*d93eaeb7SSam McCall // If it's not an integer 3456be38247SSam McCall Out = std::strtod(S.c_str(), &End); 3466be38247SSam McCall return End == S.end() || parseError("Invalid JSON value (number?)"); 3476be38247SSam McCall } 3486be38247SSam McCall 3496be38247SSam McCall bool Parser::parseString(std::string &Out) { 3506be38247SSam McCall // leading quote was already consumed. 3516be38247SSam McCall for (char C = next(); C != '"'; C = next()) { 3526be38247SSam McCall if (LLVM_UNLIKELY(P == End)) 3536be38247SSam McCall return parseError("Unterminated string"); 3546be38247SSam McCall if (LLVM_UNLIKELY((C & 0x1f) == C)) 3556be38247SSam McCall return parseError("Control character in string"); 3566be38247SSam McCall if (LLVM_LIKELY(C != '\\')) { 3576be38247SSam McCall Out.push_back(C); 3586be38247SSam McCall continue; 3596be38247SSam McCall } 3606be38247SSam McCall // Handle escape sequence. 3616be38247SSam McCall switch (C = next()) { 3626be38247SSam McCall case '"': 3636be38247SSam McCall case '\\': 3646be38247SSam McCall case '/': 3656be38247SSam McCall Out.push_back(C); 3666be38247SSam McCall break; 3676be38247SSam McCall case 'b': 3686be38247SSam McCall Out.push_back('\b'); 3696be38247SSam McCall break; 3706be38247SSam McCall case 'f': 3716be38247SSam McCall Out.push_back('\f'); 3726be38247SSam McCall break; 3736be38247SSam McCall case 'n': 3746be38247SSam McCall Out.push_back('\n'); 3756be38247SSam McCall break; 3766be38247SSam McCall case 'r': 3776be38247SSam McCall Out.push_back('\r'); 3786be38247SSam McCall break; 3796be38247SSam McCall case 't': 3806be38247SSam McCall Out.push_back('\t'); 3816be38247SSam McCall break; 3826be38247SSam McCall case 'u': 3836be38247SSam McCall if (!parseUnicode(Out)) 3846be38247SSam McCall return false; 3856be38247SSam McCall break; 3866be38247SSam McCall default: 3876be38247SSam McCall return parseError("Invalid escape sequence"); 3886be38247SSam McCall } 3896be38247SSam McCall } 3906be38247SSam McCall return true; 3916be38247SSam McCall } 3926be38247SSam McCall 3936be38247SSam McCall static void encodeUtf8(uint32_t Rune, std::string &Out) { 3946be38247SSam McCall if (Rune < 0x80) { 3956be38247SSam McCall Out.push_back(Rune & 0x7F); 3966be38247SSam McCall } else if (Rune < 0x800) { 3976be38247SSam McCall uint8_t FirstByte = 0xC0 | ((Rune & 0x7C0) >> 6); 3986be38247SSam McCall uint8_t SecondByte = 0x80 | (Rune & 0x3F); 3996be38247SSam McCall Out.push_back(FirstByte); 4006be38247SSam McCall Out.push_back(SecondByte); 4016be38247SSam McCall } else if (Rune < 0x10000) { 4026be38247SSam McCall uint8_t FirstByte = 0xE0 | ((Rune & 0xF000) >> 12); 4036be38247SSam McCall uint8_t SecondByte = 0x80 | ((Rune & 0xFC0) >> 6); 4046be38247SSam McCall uint8_t ThirdByte = 0x80 | (Rune & 0x3F); 4056be38247SSam McCall Out.push_back(FirstByte); 4066be38247SSam McCall Out.push_back(SecondByte); 4076be38247SSam McCall Out.push_back(ThirdByte); 4086be38247SSam McCall } else if (Rune < 0x110000) { 4096be38247SSam McCall uint8_t FirstByte = 0xF0 | ((Rune & 0x1F0000) >> 18); 4106be38247SSam McCall uint8_t SecondByte = 0x80 | ((Rune & 0x3F000) >> 12); 4116be38247SSam McCall uint8_t ThirdByte = 0x80 | ((Rune & 0xFC0) >> 6); 4126be38247SSam McCall uint8_t FourthByte = 0x80 | (Rune & 0x3F); 4136be38247SSam McCall Out.push_back(FirstByte); 4146be38247SSam McCall Out.push_back(SecondByte); 4156be38247SSam McCall Out.push_back(ThirdByte); 4166be38247SSam McCall Out.push_back(FourthByte); 4176be38247SSam McCall } else { 4186be38247SSam McCall llvm_unreachable("Invalid codepoint"); 4196be38247SSam McCall } 4206be38247SSam McCall } 4216be38247SSam McCall 4226be38247SSam McCall // Parse a UTF-16 \uNNNN escape sequence. "\u" has already been consumed. 4236be38247SSam McCall // May parse several sequential escapes to ensure proper surrogate handling. 4246be38247SSam McCall // We do not use ConvertUTF.h, it can't accept and replace unpaired surrogates. 4256be38247SSam McCall // These are invalid Unicode but valid JSON (RFC 8259, section 8.2). 4266be38247SSam McCall bool Parser::parseUnicode(std::string &Out) { 4276be38247SSam McCall // Invalid UTF is not a JSON error (RFC 8529§8.2). It gets replaced by U+FFFD. 4286be38247SSam McCall auto Invalid = [&] { Out.append(/* UTF-8 */ {'\xef', '\xbf', '\xbd'}); }; 4296be38247SSam McCall // Decodes 4 hex digits from the stream into Out, returns false on error. 4306be38247SSam McCall auto Parse4Hex = [this](uint16_t &Out) -> bool { 4316be38247SSam McCall Out = 0; 4326be38247SSam McCall char Bytes[] = {next(), next(), next(), next()}; 4336be38247SSam McCall for (unsigned char C : Bytes) { 4346be38247SSam McCall if (!std::isxdigit(C)) 4356be38247SSam McCall return parseError("Invalid \\u escape sequence"); 4366be38247SSam McCall Out <<= 4; 4376be38247SSam McCall Out |= (C > '9') ? (C & ~0x20) - 'A' + 10 : (C - '0'); 4386be38247SSam McCall } 4396be38247SSam McCall return true; 4406be38247SSam McCall }; 4416be38247SSam McCall uint16_t First; // UTF-16 code unit from the first \u escape. 4426be38247SSam McCall if (!Parse4Hex(First)) 4436be38247SSam McCall return false; 4446be38247SSam McCall 4456be38247SSam McCall // We loop to allow proper surrogate-pair error handling. 4466be38247SSam McCall while (true) { 4476be38247SSam McCall // Case 1: the UTF-16 code unit is already a codepoint in the BMP. 4486be38247SSam McCall if (LLVM_LIKELY(First < 0xD800 || First >= 0xE000)) { 4496be38247SSam McCall encodeUtf8(First, Out); 4506be38247SSam McCall return true; 4516be38247SSam McCall } 4526be38247SSam McCall 4536be38247SSam McCall // Case 2: it's an (unpaired) trailing surrogate. 4546be38247SSam McCall if (LLVM_UNLIKELY(First >= 0xDC00)) { 4556be38247SSam McCall Invalid(); 4566be38247SSam McCall return true; 4576be38247SSam McCall } 4586be38247SSam McCall 4596be38247SSam McCall // Case 3: it's a leading surrogate. We expect a trailing one next. 4606be38247SSam McCall // Case 3a: there's no trailing \u escape. Don't advance in the stream. 4616be38247SSam McCall if (!LLVM_LIKELY(P + 2 <= End && *P == '\\' && *(P + 1) == 'u')) { 4626be38247SSam McCall Invalid(); // Leading surrogate was unpaired. 4636be38247SSam McCall return true; 4646be38247SSam McCall } 4656be38247SSam McCall P += 2; 4666be38247SSam McCall uint16_t Second; 4676be38247SSam McCall if (!Parse4Hex(Second)) 4686be38247SSam McCall return false; 4696be38247SSam McCall // Case 3b: there was another \u escape, but it wasn't a trailing surrogate. 4706be38247SSam McCall if (LLVM_UNLIKELY(Second < 0xDC00 || Second >= 0xE000)) { 4716be38247SSam McCall Invalid(); // Leading surrogate was unpaired. 4726be38247SSam McCall First = Second; // Second escape still needs to be processed. 4736be38247SSam McCall continue; 4746be38247SSam McCall } 4756be38247SSam McCall // Case 3c: a valid surrogate pair encoding an astral codepoint. 4766be38247SSam McCall encodeUtf8(0x10000 | ((First - 0xD800) << 10) | (Second - 0xDC00), Out); 4776be38247SSam McCall return true; 4786be38247SSam McCall } 4796be38247SSam McCall } 4806be38247SSam McCall 4816be38247SSam McCall bool Parser::parseError(const char *Msg) { 4826be38247SSam McCall int Line = 1; 4836be38247SSam McCall const char *StartOfLine = Start; 4846be38247SSam McCall for (const char *X = Start; X < P; ++X) { 4856be38247SSam McCall if (*X == 0x0A) { 4866be38247SSam McCall ++Line; 4876be38247SSam McCall StartOfLine = X + 1; 4886be38247SSam McCall } 4896be38247SSam McCall } 4906be38247SSam McCall Err.emplace( 4916be38247SSam McCall llvm::make_unique<ParseError>(Msg, Line, P - StartOfLine, P - Start)); 4926be38247SSam McCall return false; 4936be38247SSam McCall } 4946be38247SSam McCall } // namespace 4956be38247SSam McCall 4966be38247SSam McCall Expected<Value> parse(StringRef JSON) { 4976be38247SSam McCall Parser P(JSON); 4986be38247SSam McCall Value E = nullptr; 4996be38247SSam McCall if (P.parseValue(E)) 5006be38247SSam McCall if (P.assertEnd()) 5016be38247SSam McCall return std::move(E); 5026be38247SSam McCall return P.takeError(); 5036be38247SSam McCall } 5046be38247SSam McCall char ParseError::ID = 0; 5056be38247SSam McCall 5066be38247SSam McCall static std::vector<const Object::value_type *> sortedElements(const Object &O) { 5076be38247SSam McCall std::vector<const Object::value_type *> Elements; 5086be38247SSam McCall for (const auto &E : O) 5096be38247SSam McCall Elements.push_back(&E); 5106be38247SSam McCall llvm::sort(Elements.begin(), Elements.end(), 5116be38247SSam McCall [](const Object::value_type *L, const Object::value_type *R) { 5126be38247SSam McCall return L->first < R->first; 5136be38247SSam McCall }); 5146be38247SSam McCall return Elements; 5156be38247SSam McCall } 5166be38247SSam McCall 5176be38247SSam McCall } // namespace json 5186be38247SSam McCall } // namespace llvm 5196be38247SSam McCall 5206be38247SSam McCall static void quote(llvm::raw_ostream &OS, llvm::StringRef S) { 5216be38247SSam McCall OS << '\"'; 5226be38247SSam McCall for (unsigned char C : S) { 5236be38247SSam McCall if (C == 0x22 || C == 0x5C) 5246be38247SSam McCall OS << '\\'; 5256be38247SSam McCall if (C >= 0x20) { 5266be38247SSam McCall OS << C; 5276be38247SSam McCall continue; 5286be38247SSam McCall } 5296be38247SSam McCall OS << '\\'; 5306be38247SSam McCall switch (C) { 5316be38247SSam McCall // A few characters are common enough to make short escapes worthwhile. 5326be38247SSam McCall case '\t': 5336be38247SSam McCall OS << 't'; 5346be38247SSam McCall break; 5356be38247SSam McCall case '\n': 5366be38247SSam McCall OS << 'n'; 5376be38247SSam McCall break; 5386be38247SSam McCall case '\r': 5396be38247SSam McCall OS << 'r'; 5406be38247SSam McCall break; 5416be38247SSam McCall default: 5426be38247SSam McCall OS << 'u'; 5436be38247SSam McCall llvm::write_hex(OS, C, llvm::HexPrintStyle::Lower, 4); 5446be38247SSam McCall break; 5456be38247SSam McCall } 5466be38247SSam McCall } 5476be38247SSam McCall OS << '\"'; 5486be38247SSam McCall } 5496be38247SSam McCall 5506be38247SSam McCall enum IndenterAction { 5516be38247SSam McCall Indent, 5526be38247SSam McCall Outdent, 5536be38247SSam McCall Newline, 5546be38247SSam McCall Space, 5556be38247SSam McCall }; 5566be38247SSam McCall 5576be38247SSam McCall // Prints JSON. The indenter can be used to control formatting. 5586be38247SSam McCall template <typename Indenter> 5596be38247SSam McCall void llvm::json::Value::print(raw_ostream &OS, const Indenter &I) const { 5606be38247SSam McCall switch (Type) { 5616be38247SSam McCall case T_Null: 5626be38247SSam McCall OS << "null"; 5636be38247SSam McCall break; 5646be38247SSam McCall case T_Boolean: 5656be38247SSam McCall OS << (as<bool>() ? "true" : "false"); 5666be38247SSam McCall break; 567*d93eaeb7SSam McCall case T_Double: 568*d93eaeb7SSam McCall OS << format("%.*g", std::numeric_limits<double>::max_digits10, 569*d93eaeb7SSam McCall as<double>()); 570*d93eaeb7SSam McCall break; 571*d93eaeb7SSam McCall case T_Integer: 572*d93eaeb7SSam McCall OS << as<int64_t>(); 5736be38247SSam McCall break; 5746be38247SSam McCall case T_StringRef: 5756be38247SSam McCall quote(OS, as<StringRef>()); 5766be38247SSam McCall break; 5776be38247SSam McCall case T_String: 5786be38247SSam McCall quote(OS, as<std::string>()); 5796be38247SSam McCall break; 5806be38247SSam McCall case T_Object: { 5816be38247SSam McCall bool Comma = false; 5826be38247SSam McCall OS << '{'; 5836be38247SSam McCall I(Indent); 5846be38247SSam McCall for (const auto *P : sortedElements(as<json::Object>())) { 5856be38247SSam McCall if (Comma) 5866be38247SSam McCall OS << ','; 5876be38247SSam McCall Comma = true; 5886be38247SSam McCall I(Newline); 5896be38247SSam McCall quote(OS, P->first); 5906be38247SSam McCall OS << ':'; 5916be38247SSam McCall I(Space); 5926be38247SSam McCall P->second.print(OS, I); 5936be38247SSam McCall } 5946be38247SSam McCall I(Outdent); 5956be38247SSam McCall if (Comma) 5966be38247SSam McCall I(Newline); 5976be38247SSam McCall OS << '}'; 5986be38247SSam McCall break; 5996be38247SSam McCall } 6006be38247SSam McCall case T_Array: { 6016be38247SSam McCall bool Comma = false; 6026be38247SSam McCall OS << '['; 6036be38247SSam McCall I(Indent); 6046be38247SSam McCall for (const auto &E : as<json::Array>()) { 6056be38247SSam McCall if (Comma) 6066be38247SSam McCall OS << ','; 6076be38247SSam McCall Comma = true; 6086be38247SSam McCall I(Newline); 6096be38247SSam McCall E.print(OS, I); 6106be38247SSam McCall } 6116be38247SSam McCall I(Outdent); 6126be38247SSam McCall if (Comma) 6136be38247SSam McCall I(Newline); 6146be38247SSam McCall OS << ']'; 6156be38247SSam McCall break; 6166be38247SSam McCall } 6176be38247SSam McCall } 6186be38247SSam McCall } 6196be38247SSam McCall 6206be38247SSam McCall void llvm::format_provider<llvm::json::Value>::format( 6216be38247SSam McCall const llvm::json::Value &E, raw_ostream &OS, StringRef Options) { 6226be38247SSam McCall if (Options.empty()) { 6236be38247SSam McCall OS << E; 6246be38247SSam McCall return; 6256be38247SSam McCall } 6266be38247SSam McCall unsigned IndentAmount = 0; 6276be38247SSam McCall if (Options.getAsInteger(/*Radix=*/10, IndentAmount)) 6286be38247SSam McCall llvm_unreachable("json::Value format options should be an integer"); 6296be38247SSam McCall unsigned IndentLevel = 0; 6306be38247SSam McCall E.print(OS, [&](IndenterAction A) { 6316be38247SSam McCall switch (A) { 6326be38247SSam McCall case Newline: 6336be38247SSam McCall OS << '\n'; 6346be38247SSam McCall OS.indent(IndentLevel); 6356be38247SSam McCall break; 6366be38247SSam McCall case Space: 6376be38247SSam McCall OS << ' '; 6386be38247SSam McCall break; 6396be38247SSam McCall case Indent: 6406be38247SSam McCall IndentLevel += IndentAmount; 6416be38247SSam McCall break; 6426be38247SSam McCall case Outdent: 6436be38247SSam McCall IndentLevel -= IndentAmount; 6446be38247SSam McCall break; 6456be38247SSam McCall }; 6466be38247SSam McCall }); 6476be38247SSam McCall } 6486be38247SSam McCall 6496be38247SSam McCall llvm::raw_ostream &llvm::json::operator<<(raw_ostream &OS, const Value &E) { 6506be38247SSam McCall E.print(OS, [](IndenterAction A) { /*ignore*/ }); 6516be38247SSam McCall return OS; 6526be38247SSam McCall } 653