1 #define VERSION "1.0.3" 2 3 /* CJSON - JSON support for Lua 4 * 5 * Copyright (c) 2010-2011 Mark Pulford <[email protected]> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining 8 * a copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sublicense, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be 16 * included in all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27 /* Caveats: 28 * - JSON "null" values are represented as lightuserdata since Lua 29 * tables cannot contain "nil". Compare with cjson.null. 30 * - Invalid UTF-8 characters are not detected and will be passed 31 * untouched. If required, UTF-8 error checking should be done 32 * outside this library. 33 * - Javascript comments are not part of the JSON spec, and are not 34 * currently supported. 35 * 36 * Note: Decoding is slower than encoding. Lua spends significant 37 * time (30%) managing tables when parsing JSON since it is 38 * difficult to know object/array sizes ahead of time. 39 */ 40 41 #include <assert.h> 42 #include <string.h> 43 #include <math.h> 44 #include "lua.h" 45 #include "lauxlib.h" 46 47 #include "strbuf.h" 48 49 #ifdef MISSING_ISINF 50 #define isinf(x) (!isnan(x) && isnan((x) - (x))) 51 #endif 52 53 #define DEFAULT_SPARSE_CONVERT 0 54 #define DEFAULT_SPARSE_RATIO 2 55 #define DEFAULT_SPARSE_SAFE 10 56 #define DEFAULT_MAX_DEPTH 20 57 #define DEFAULT_ENCODE_REFUSE_BADNUM 1 58 #define DEFAULT_DECODE_REFUSE_BADNUM 0 59 #define DEFAULT_ENCODE_KEEP_BUFFER 1 60 61 typedef enum { 62 T_OBJ_BEGIN, 63 T_OBJ_END, 64 T_ARR_BEGIN, 65 T_ARR_END, 66 T_STRING, 67 T_NUMBER, 68 T_BOOLEAN, 69 T_NULL, 70 T_COLON, 71 T_COMMA, 72 T_END, 73 T_WHITESPACE, 74 T_ERROR, 75 T_UNKNOWN 76 } json_token_type_t; 77 78 static const char *json_token_type_name[] = { 79 "T_OBJ_BEGIN", 80 "T_OBJ_END", 81 "T_ARR_BEGIN", 82 "T_ARR_END", 83 "T_STRING", 84 "T_NUMBER", 85 "T_BOOLEAN", 86 "T_NULL", 87 "T_COLON", 88 "T_COMMA", 89 "T_END", 90 "T_WHITESPACE", 91 "T_ERROR", 92 "T_UNKNOWN", 93 NULL 94 }; 95 96 typedef struct { 97 json_token_type_t ch2token[256]; 98 char escape2char[256]; /* Decoding */ 99 #if 0 100 char escapes[35][8]; /* Pre-generated escape string buffer */ 101 char *char2escape[256]; /* Encoding */ 102 #endif 103 strbuf_t encode_buf; 104 char number_fmt[8]; /* "%.XXg\0" */ 105 int current_depth; 106 107 int encode_sparse_convert; 108 int encode_sparse_ratio; 109 int encode_sparse_safe; 110 int encode_max_depth; 111 int encode_refuse_badnum; 112 int decode_refuse_badnum; 113 int encode_keep_buffer; 114 int encode_number_precision; 115 } json_config_t; 116 117 typedef struct { 118 const char *data; 119 int index; 120 strbuf_t *tmp; /* Temporary storage for strings */ 121 json_config_t *cfg; 122 } json_parse_t; 123 124 typedef struct { 125 json_token_type_t type; 126 int index; 127 union { 128 const char *string; 129 double number; 130 int boolean; 131 } value; 132 int string_len; 133 } json_token_t; 134 135 static const char *char2escape[256] = { 136 "\\u0000", "\\u0001", "\\u0002", "\\u0003", 137 "\\u0004", "\\u0005", "\\u0006", "\\u0007", 138 "\\b", "\\t", "\\n", "\\u000b", 139 "\\f", "\\r", "\\u000e", "\\u000f", 140 "\\u0010", "\\u0011", "\\u0012", "\\u0013", 141 "\\u0014", "\\u0015", "\\u0016", "\\u0017", 142 "\\u0018", "\\u0019", "\\u001a", "\\u001b", 143 "\\u001c", "\\u001d", "\\u001e", "\\u001f", 144 NULL, NULL, "\\\"", NULL, NULL, NULL, NULL, NULL, 145 NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\/", 146 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 147 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 148 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 149 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 150 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 151 NULL, NULL, NULL, NULL, "\\\\", NULL, NULL, NULL, 152 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 153 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 154 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 155 NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\u007f", 156 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 157 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 158 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 159 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 160 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 161 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 162 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 163 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 164 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 165 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 166 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 167 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 168 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 169 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 170 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 171 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 172 }; 173 174 static int json_config_key; 175 176 /* ===== CONFIGURATION ===== */ 177 178 static json_config_t *json_fetch_config(lua_State *l) 179 { 180 json_config_t *cfg; 181 182 lua_pushlightuserdata(l, &json_config_key); 183 lua_gettable(l, LUA_REGISTRYINDEX); 184 cfg = lua_touserdata(l, -1); 185 if (!cfg) 186 luaL_error(l, "BUG: Unable to fetch CJSON configuration"); 187 188 lua_pop(l, 1); 189 190 return cfg; 191 } 192 193 static void json_verify_arg_count(lua_State *l, int args) 194 { 195 luaL_argcheck(l, lua_gettop(l) <= args, args + 1, 196 "found too many arguments"); 197 } 198 199 /* Configures handling of extremely sparse arrays: 200 * convert: Convert extremely sparse arrays into objects? Otherwise error. 201 * ratio: 0: always allow sparse; 1: never allow sparse; >1: use ratio 202 * safe: Always use an array when the max index <= safe */ 203 static int json_cfg_encode_sparse_array(lua_State *l) 204 { 205 json_config_t *cfg; 206 int val; 207 208 json_verify_arg_count(l, 3); 209 cfg = json_fetch_config(l); 210 211 switch (lua_gettop(l)) { 212 case 3: 213 val = luaL_checkinteger(l, 3); 214 luaL_argcheck(l, val >= 0, 3, "expected integer >= 0"); 215 cfg->encode_sparse_safe = val; 216 case 2: 217 val = luaL_checkinteger(l, 2); 218 luaL_argcheck(l, val >= 0, 2, "expected integer >= 0"); 219 cfg->encode_sparse_ratio = val; 220 case 1: 221 luaL_argcheck(l, lua_isboolean(l, 1), 1, "expected boolean"); 222 cfg->encode_sparse_convert = lua_toboolean(l, 1); 223 } 224 225 lua_pushboolean(l, cfg->encode_sparse_convert); 226 lua_pushinteger(l, cfg->encode_sparse_ratio); 227 lua_pushinteger(l, cfg->encode_sparse_safe); 228 229 return 3; 230 } 231 232 /* Configures the maximum number of nested arrays/objects allowed when 233 * encoding */ 234 static int json_cfg_encode_max_depth(lua_State *l) 235 { 236 json_config_t *cfg; 237 int depth; 238 239 json_verify_arg_count(l, 1); 240 cfg = json_fetch_config(l); 241 242 if (lua_gettop(l)) { 243 depth = luaL_checkinteger(l, 1); 244 luaL_argcheck(l, depth > 0, 1, "expected positive integer"); 245 cfg->encode_max_depth = depth; 246 } 247 248 lua_pushinteger(l, cfg->encode_max_depth); 249 250 return 1; 251 } 252 253 static void json_set_number_precision(json_config_t *cfg, int prec) 254 { 255 cfg->encode_number_precision = prec; 256 sprintf(cfg->number_fmt, "%%.%dg", prec); 257 } 258 259 /* Configures number precision when converting doubles to text */ 260 static int json_cfg_encode_number_precision(lua_State *l) 261 { 262 json_config_t *cfg; 263 int precision; 264 265 json_verify_arg_count(l, 1); 266 cfg = json_fetch_config(l); 267 268 if (lua_gettop(l)) { 269 precision = luaL_checkinteger(l, 1); 270 luaL_argcheck(l, 1 <= precision && precision <= 14, 1, 271 "expected integer between 1 and 14"); 272 json_set_number_precision(cfg, precision); 273 } 274 275 lua_pushinteger(l, cfg->encode_number_precision); 276 277 return 1; 278 } 279 280 /* Configures JSON encoding buffer persistence */ 281 static int json_cfg_encode_keep_buffer(lua_State *l) 282 { 283 json_config_t *cfg; 284 285 json_verify_arg_count(l, 1); 286 cfg = json_fetch_config(l); 287 288 if (lua_gettop(l)) { 289 luaL_checktype(l, 1, LUA_TBOOLEAN); 290 cfg->encode_keep_buffer = lua_toboolean(l, 1); 291 } 292 293 lua_pushboolean(l, cfg->encode_keep_buffer); 294 295 return 1; 296 } 297 298 /* On argument: decode enum and set config variables 299 * **options must point to a NULL terminated array of 4 enums 300 * Returns: current enum value */ 301 static void json_enum_option(lua_State *l, const char **options, 302 int *opt1, int *opt2) 303 { 304 int setting; 305 306 if (lua_gettop(l)) { 307 if (lua_isboolean(l, 1)) 308 setting = lua_toboolean(l, 1) * 3; 309 else 310 setting = luaL_checkoption(l, 1, NULL, options); 311 312 *opt1 = setting & 1 ? 1 : 0; 313 *opt2 = setting & 2 ? 1 : 0; 314 } else { 315 setting = *opt1 | (*opt2 << 1); 316 } 317 318 if (setting) 319 lua_pushstring(l, options[setting]); 320 else 321 lua_pushboolean(l, 0); 322 } 323 324 325 /* When enabled, rejects: NaN, Infinity, hexidecimal numbers */ 326 static int json_cfg_refuse_invalid_numbers(lua_State *l) 327 { 328 static const char *options_enc_dec[] = { "none", "encode", "decode", 329 "both", NULL }; 330 json_config_t *cfg; 331 332 json_verify_arg_count(l, 1); 333 cfg = json_fetch_config(l); 334 335 json_enum_option(l, options_enc_dec, 336 &cfg->encode_refuse_badnum, 337 &cfg->decode_refuse_badnum); 338 339 return 1; 340 } 341 342 static int json_destroy_config(lua_State *l) 343 { 344 json_config_t *cfg; 345 346 cfg = lua_touserdata(l, 1); 347 if (cfg) 348 strbuf_free(&cfg->encode_buf); 349 cfg = NULL; 350 351 return 0; 352 } 353 354 static void json_create_config(lua_State *l) 355 { 356 json_config_t *cfg; 357 int i; 358 359 cfg = lua_newuserdata(l, sizeof(*cfg)); 360 361 /* Create GC method to clean up strbuf */ 362 lua_newtable(l); 363 lua_pushcfunction(l, json_destroy_config); 364 lua_setfield(l, -2, "__gc"); 365 lua_setmetatable(l, -2); 366 367 strbuf_init(&cfg->encode_buf, 0); 368 369 cfg->encode_sparse_convert = DEFAULT_SPARSE_CONVERT; 370 cfg->encode_sparse_ratio = DEFAULT_SPARSE_RATIO; 371 cfg->encode_sparse_safe = DEFAULT_SPARSE_SAFE; 372 cfg->encode_max_depth = DEFAULT_MAX_DEPTH; 373 cfg->encode_refuse_badnum = DEFAULT_ENCODE_REFUSE_BADNUM; 374 cfg->decode_refuse_badnum = DEFAULT_DECODE_REFUSE_BADNUM; 375 cfg->encode_keep_buffer = DEFAULT_ENCODE_KEEP_BUFFER; 376 json_set_number_precision(cfg, 14); 377 378 /* Decoding init */ 379 380 /* Tag all characters as an error */ 381 for (i = 0; i < 256; i++) 382 cfg->ch2token[i] = T_ERROR; 383 384 /* Set tokens that require no further processing */ 385 cfg->ch2token['{'] = T_OBJ_BEGIN; 386 cfg->ch2token['}'] = T_OBJ_END; 387 cfg->ch2token['['] = T_ARR_BEGIN; 388 cfg->ch2token[']'] = T_ARR_END; 389 cfg->ch2token[','] = T_COMMA; 390 cfg->ch2token[':'] = T_COLON; 391 cfg->ch2token['\0'] = T_END; 392 cfg->ch2token[' '] = T_WHITESPACE; 393 cfg->ch2token['\t'] = T_WHITESPACE; 394 cfg->ch2token['\n'] = T_WHITESPACE; 395 cfg->ch2token['\r'] = T_WHITESPACE; 396 397 /* Update characters that require further processing */ 398 cfg->ch2token['f'] = T_UNKNOWN; /* false? */ 399 cfg->ch2token['i'] = T_UNKNOWN; /* inf, ininity? */ 400 cfg->ch2token['I'] = T_UNKNOWN; 401 cfg->ch2token['n'] = T_UNKNOWN; /* null, nan? */ 402 cfg->ch2token['N'] = T_UNKNOWN; 403 cfg->ch2token['t'] = T_UNKNOWN; /* true? */ 404 cfg->ch2token['"'] = T_UNKNOWN; /* string? */ 405 cfg->ch2token['+'] = T_UNKNOWN; /* number? */ 406 cfg->ch2token['-'] = T_UNKNOWN; 407 for (i = 0; i < 10; i++) 408 cfg->ch2token['0' + i] = T_UNKNOWN; 409 410 /* Lookup table for parsing escape characters */ 411 for (i = 0; i < 256; i++) 412 cfg->escape2char[i] = 0; /* String error */ 413 cfg->escape2char['"'] = '"'; 414 cfg->escape2char['\\'] = '\\'; 415 cfg->escape2char['/'] = '/'; 416 cfg->escape2char['b'] = '\b'; 417 cfg->escape2char['t'] = '\t'; 418 cfg->escape2char['n'] = '\n'; 419 cfg->escape2char['f'] = '\f'; 420 cfg->escape2char['r'] = '\r'; 421 cfg->escape2char['u'] = 'u'; /* Unicode parsing required */ 422 423 424 #if 0 425 /* Initialise separate storage for pre-generated escape codes. 426 * Escapes 0-31 map directly, 34, 92, 127 follow afterwards to 427 * save memory. */ 428 for (i = 0 ; i < 32; i++) 429 sprintf(cfg->escapes[i], "\\u%04x", i); 430 strcpy(cfg->escapes[8], "\b"); /* Override simpler escapes */ 431 strcpy(cfg->escapes[9], "\t"); 432 strcpy(cfg->escapes[10], "\n"); 433 strcpy(cfg->escapes[12], "\f"); 434 strcpy(cfg->escapes[13], "\r"); 435 strcpy(cfg->escapes[32], "\\\""); /* chr(34) */ 436 strcpy(cfg->escapes[33], "\\\\"); /* chr(92) */ 437 sprintf(cfg->escapes[34], "\\u%04x", 127); /* char(127) */ 438 439 /* Initialise encoding escape lookup table */ 440 for (i = 0; i < 32; i++) 441 cfg->char2escape[i] = cfg->escapes[i]; 442 for (i = 32; i < 256; i++) 443 cfg->char2escape[i] = NULL; 444 cfg->char2escape[34] = cfg->escapes[32]; 445 cfg->char2escape[92] = cfg->escapes[33]; 446 cfg->char2escape[127] = cfg->escapes[34]; 447 #endif 448 } 449 450 /* ===== ENCODING ===== */ 451 452 static void json_encode_exception(lua_State *l, json_config_t *cfg, int lindex, 453 const char *reason) 454 { 455 if (!cfg->encode_keep_buffer) 456 strbuf_free(&cfg->encode_buf); 457 luaL_error(l, "Cannot serialise %s: %s", 458 lua_typename(l, lua_type(l, lindex)), reason); 459 } 460 461 /* json_append_string args: 462 * - lua_State 463 * - JSON strbuf 464 * - String (Lua stack index) 465 * 466 * Returns nothing. Doesn't remove string from Lua stack */ 467 static void json_append_string(lua_State *l, strbuf_t *json, int lindex) 468 { 469 const char *escstr; 470 int i; 471 const char *str; 472 size_t len; 473 474 str = lua_tolstring(l, lindex, &len); 475 476 /* Worst case is len * 6 (all unicode escapes). 477 * This buffer is reused constantly for small strings 478 * If there are any excess pages, they won't be hit anyway. 479 * This gains ~5% speedup. */ 480 strbuf_ensure_empty_length(json, len * 6 + 2); 481 482 strbuf_append_char_unsafe(json, '\"'); 483 for (i = 0; i < len; i++) { 484 escstr = char2escape[(unsigned char)str[i]]; 485 if (escstr) 486 strbuf_append_string(json, escstr); 487 else 488 strbuf_append_char_unsafe(json, str[i]); 489 } 490 strbuf_append_char_unsafe(json, '\"'); 491 } 492 493 /* Find the size of the array on the top of the Lua stack 494 * -1 object (not a pure array) 495 * >=0 elements in array 496 */ 497 static int lua_array_length(lua_State *l, json_config_t *cfg) 498 { 499 double k; 500 int max; 501 int items; 502 503 max = 0; 504 items = 0; 505 506 lua_pushnil(l); 507 /* table, startkey */ 508 while (lua_next(l, -2) != 0) { 509 /* table, key, value */ 510 if (lua_type(l, -2) == LUA_TNUMBER && 511 (k = lua_tonumber(l, -2))) { 512 /* Integer >= 1 ? */ 513 if (floor(k) == k && k >= 1) { 514 if (k > max) 515 max = k; 516 items++; 517 lua_pop(l, 1); 518 continue; 519 } 520 } 521 522 /* Must not be an array (non integer key) */ 523 lua_pop(l, 2); 524 return -1; 525 } 526 527 /* Encode excessively sparse arrays as objects (if enabled) */ 528 if (cfg->encode_sparse_ratio > 0 && 529 max > items * cfg->encode_sparse_ratio && 530 max > cfg->encode_sparse_safe) { 531 if (!cfg->encode_sparse_convert) 532 json_encode_exception(l, cfg, -1, "excessively sparse array"); 533 534 return -1; 535 } 536 537 return max; 538 } 539 540 static void json_encode_descend(lua_State *l, json_config_t *cfg) 541 { 542 cfg->current_depth++; 543 544 if (cfg->current_depth > cfg->encode_max_depth) { 545 if (!cfg->encode_keep_buffer) 546 strbuf_free(&cfg->encode_buf); 547 luaL_error(l, "Cannot serialise, excessive nesting (%d)", 548 cfg->current_depth); 549 } 550 } 551 552 static void json_append_data(lua_State *l, json_config_t *cfg, strbuf_t *json); 553 554 /* json_append_array args: 555 * - lua_State 556 * - JSON strbuf 557 * - Size of passwd Lua array (top of stack) */ 558 static void json_append_array(lua_State *l, json_config_t *cfg, strbuf_t *json, 559 int array_length) 560 { 561 int comma, i; 562 563 json_encode_descend(l, cfg); 564 565 strbuf_append_char(json, '['); 566 567 comma = 0; 568 for (i = 1; i <= array_length; i++) { 569 if (comma) 570 strbuf_append_char(json, ','); 571 else 572 comma = 1; 573 574 lua_rawgeti(l, -1, i); 575 json_append_data(l, cfg, json); 576 lua_pop(l, 1); 577 } 578 579 strbuf_append_char(json, ']'); 580 581 cfg->current_depth--; 582 } 583 584 static void json_append_number(lua_State *l, strbuf_t *json, int index, 585 json_config_t *cfg) 586 { 587 double num = lua_tonumber(l, index); 588 589 if (cfg->encode_refuse_badnum && (isinf(num) || isnan(num))) 590 json_encode_exception(l, cfg, index, "must not be NaN or Inf"); 591 592 /* Lowest double printed with %.14g is 21 characters long: 593 * -1.7976931348623e+308 594 * 595 * Use 32 to include the \0, and a few extra just in case.. 596 */ 597 strbuf_append_fmt(json, 32, cfg->number_fmt, num); 598 } 599 600 static void json_append_object(lua_State *l, json_config_t *cfg, 601 strbuf_t *json) 602 { 603 int comma, keytype; 604 605 json_encode_descend(l, cfg); 606 607 /* Object */ 608 strbuf_append_char(json, '{'); 609 610 lua_pushnil(l); 611 /* table, startkey */ 612 comma = 0; 613 while (lua_next(l, -2) != 0) { 614 if (comma) 615 strbuf_append_char(json, ','); 616 else 617 comma = 1; 618 619 /* table, key, value */ 620 keytype = lua_type(l, -2); 621 if (keytype == LUA_TNUMBER) { 622 strbuf_append_char(json, '"'); 623 json_append_number(l, json, -2, cfg); 624 strbuf_append_mem(json, "\":", 2); 625 } else if (keytype == LUA_TSTRING) { 626 json_append_string(l, json, -2); 627 strbuf_append_char(json, ':'); 628 } else { 629 json_encode_exception(l, cfg, -2, 630 "table key must be a number or string"); 631 /* never returns */ 632 } 633 634 /* table, key, value */ 635 json_append_data(l, cfg, json); 636 lua_pop(l, 1); 637 /* table, key */ 638 } 639 640 strbuf_append_char(json, '}'); 641 642 cfg->current_depth--; 643 } 644 645 /* Serialise Lua data into JSON string. */ 646 static void json_append_data(lua_State *l, json_config_t *cfg, strbuf_t *json) 647 { 648 int len; 649 650 switch (lua_type(l, -1)) { 651 case LUA_TSTRING: 652 json_append_string(l, json, -1); 653 break; 654 case LUA_TNUMBER: 655 json_append_number(l, json, -1, cfg); 656 break; 657 case LUA_TBOOLEAN: 658 if (lua_toboolean(l, -1)) 659 strbuf_append_mem(json, "true", 4); 660 else 661 strbuf_append_mem(json, "false", 5); 662 break; 663 case LUA_TTABLE: 664 len = lua_array_length(l, cfg); 665 if (len > 0) 666 json_append_array(l, cfg, json, len); 667 else 668 json_append_object(l, cfg, json); 669 break; 670 case LUA_TNIL: 671 strbuf_append_mem(json, "null", 4); 672 break; 673 case LUA_TLIGHTUSERDATA: 674 if (lua_touserdata(l, -1) == NULL) { 675 strbuf_append_mem(json, "null", 4); 676 break; 677 } 678 default: 679 /* Remaining types (LUA_TFUNCTION, LUA_TUSERDATA, LUA_TTHREAD, 680 * and LUA_TLIGHTUSERDATA) cannot be serialised */ 681 json_encode_exception(l, cfg, -1, "type not supported"); 682 /* never returns */ 683 } 684 } 685 686 static int json_encode(lua_State *l) 687 { 688 json_config_t *cfg; 689 char *json; 690 int len; 691 692 /* Can't use json_verify_arg_count() since we need to ensure 693 * there is only 1 argument */ 694 luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument"); 695 696 cfg = json_fetch_config(l); 697 cfg->current_depth = 0; 698 699 /* Reset the persistent buffer if it exists. 700 * Otherwise allocate a new buffer. */ 701 if (strbuf_allocated(&cfg->encode_buf)) 702 strbuf_reset(&cfg->encode_buf); 703 else 704 strbuf_init(&cfg->encode_buf, 0); 705 706 json_append_data(l, cfg, &cfg->encode_buf); 707 json = strbuf_string(&cfg->encode_buf, &len); 708 709 lua_pushlstring(l, json, len); 710 711 if (!cfg->encode_keep_buffer) 712 strbuf_free(&cfg->encode_buf); 713 714 return 1; 715 } 716 717 /* ===== DECODING ===== */ 718 719 static void json_process_value(lua_State *l, json_parse_t *json, 720 json_token_t *token); 721 722 static int hexdigit2int(char hex) 723 { 724 if ('0' <= hex && hex <= '9') 725 return hex - '0'; 726 727 /* Force lowercase */ 728 hex |= 0x20; 729 if ('a' <= hex && hex <= 'f') 730 return 10 + hex - 'a'; 731 732 return -1; 733 } 734 735 static int decode_hex4(const char *hex) 736 { 737 int digit[4]; 738 int i; 739 740 /* Convert ASCII hex digit to numeric digit 741 * Note: this returns an error for invalid hex digits, including 742 * NULL */ 743 for (i = 0; i < 4; i++) { 744 digit[i] = hexdigit2int(hex[i]); 745 if (digit[i] < 0) { 746 return -1; 747 } 748 } 749 750 return (digit[0] << 12) + 751 (digit[1] << 8) + 752 (digit[2] << 4) + 753 digit[3]; 754 } 755 756 /* Converts a Unicode codepoint to UTF-8. 757 * Returns UTF-8 string length, and up to 4 bytes in *utf8 */ 758 static int codepoint_to_utf8(char *utf8, int codepoint) 759 { 760 /* 0xxxxxxx */ 761 if (codepoint <= 0x7F) { 762 utf8[0] = codepoint; 763 return 1; 764 } 765 766 /* 110xxxxx 10xxxxxx */ 767 if (codepoint <= 0x7FF) { 768 utf8[0] = (codepoint >> 6) | 0xC0; 769 utf8[1] = (codepoint & 0x3F) | 0x80; 770 return 2; 771 } 772 773 /* 1110xxxx 10xxxxxx 10xxxxxx */ 774 if (codepoint <= 0xFFFF) { 775 utf8[0] = (codepoint >> 12) | 0xE0; 776 utf8[1] = ((codepoint >> 6) & 0x3F) | 0x80; 777 utf8[2] = (codepoint & 0x3F) | 0x80; 778 return 3; 779 } 780 781 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 782 if (codepoint <= 0x1FFFFF) { 783 utf8[0] = (codepoint >> 18) | 0xF0; 784 utf8[1] = ((codepoint >> 12) & 0x3F) | 0x80; 785 utf8[2] = ((codepoint >> 6) & 0x3F) | 0x80; 786 utf8[3] = (codepoint & 0x3F) | 0x80; 787 return 4; 788 } 789 790 return 0; 791 } 792 793 794 /* Called when index pointing to beginning of UTF-16 code escape: \uXXXX 795 * \u is guaranteed to exist, but the remaining hex characters may be 796 * missing. 797 * Translate to UTF-8 and append to temporary token string. 798 * Must advance index to the next character to be processed. 799 * Returns: 0 success 800 * -1 error 801 */ 802 static int json_append_unicode_escape(json_parse_t *json) 803 { 804 char utf8[4]; /* Surrogate pairs require 4 UTF-8 bytes */ 805 int codepoint; 806 int surrogate_low; 807 int len; 808 int escape_len = 6; 809 810 /* Fetch UTF-16 code unit */ 811 codepoint = decode_hex4(&json->data[json->index + 2]); 812 if (codepoint < 0) 813 return -1; 814 815 /* UTF-16 surrogate pairs take the following 2 byte form: 816 * 11011 x yyyyyyyyyy 817 * When x = 0: y is the high 10 bits of the codepoint 818 * x = 1: y is the low 10 bits of the codepoint 819 * 820 * Check for a surrogate pair (high or low) */ 821 if ((codepoint & 0xF800) == 0xD800) { 822 /* Error if the 1st surrogate is not high */ 823 if (codepoint & 0x400) 824 return -1; 825 826 /* Ensure the next code is a unicode escape */ 827 if (json->data[json->index + escape_len] != '\\' || 828 json->data[json->index + escape_len + 1] != 'u') { 829 return -1; 830 } 831 832 /* Fetch the next codepoint */ 833 surrogate_low = decode_hex4(&json->data[json->index + 2 + escape_len]); 834 if (surrogate_low < 0) 835 return -1; 836 837 /* Error if the 2nd code is not a low surrogate */ 838 if ((surrogate_low & 0xFC00) != 0xDC00) 839 return -1; 840 841 /* Calculate Unicode codepoint */ 842 codepoint = (codepoint & 0x3FF) << 10; 843 surrogate_low &= 0x3FF; 844 codepoint = (codepoint | surrogate_low) + 0x10000; 845 escape_len = 12; 846 } 847 848 /* Convert codepoint to UTF-8 */ 849 len = codepoint_to_utf8(utf8, codepoint); 850 if (!len) 851 return -1; 852 853 /* Append bytes and advance parse index */ 854 strbuf_append_mem_unsafe(json->tmp, utf8, len); 855 json->index += escape_len; 856 857 return 0; 858 } 859 860 static void json_set_token_error(json_token_t *token, json_parse_t *json, 861 const char *errtype) 862 { 863 token->type = T_ERROR; 864 token->index = json->index; 865 token->value.string = errtype; 866 } 867 868 static void json_next_string_token(json_parse_t *json, json_token_t *token) 869 { 870 char *escape2char = json->cfg->escape2char; 871 char ch; 872 873 /* Caller must ensure a string is next */ 874 assert(json->data[json->index] == '"'); 875 876 /* Skip " */ 877 json->index++; 878 879 /* json->tmp is the temporary strbuf used to accumulate the 880 * decoded string value. */ 881 strbuf_reset(json->tmp); 882 while ((ch = json->data[json->index]) != '"') { 883 if (!ch) { 884 /* Premature end of the string */ 885 json_set_token_error(token, json, "unexpected end of string"); 886 return; 887 } 888 889 /* Handle escapes */ 890 if (ch == '\\') { 891 /* Fetch escape character */ 892 ch = json->data[json->index + 1]; 893 894 /* Translate escape code and append to tmp string */ 895 ch = escape2char[(unsigned char)ch]; 896 if (ch == 'u') { 897 if (json_append_unicode_escape(json) == 0) 898 continue; 899 900 json_set_token_error(token, json, 901 "invalid unicode escape code"); 902 return; 903 } 904 if (!ch) { 905 json_set_token_error(token, json, "invalid escape code"); 906 return; 907 } 908 909 /* Skip '\' */ 910 json->index++; 911 } 912 /* Append normal character or translated single character 913 * Unicode escapes are handled above */ 914 strbuf_append_char_unsafe(json->tmp, ch); 915 json->index++; 916 } 917 json->index++; /* Eat final quote (") */ 918 919 strbuf_ensure_null(json->tmp); 920 921 token->type = T_STRING; 922 token->value.string = strbuf_string(json->tmp, &token->string_len); 923 } 924 925 /* JSON numbers should take the following form: 926 * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)? 927 * 928 * json_next_number_token() uses strtod() which allows other forms: 929 * - numbers starting with '+' 930 * - NaN, -NaN, infinity, -infinity 931 * - hexidecimal numbers 932 * - numbers with leading zeros 933 * 934 * json_is_invalid_number() detects "numbers" which may pass strtod()'s 935 * error checking, but should not be allowed with strict JSON. 936 * 937 * json_is_invalid_number() may pass numbers which cause strtod() 938 * to generate an error. 939 */ 940 static int json_is_invalid_number(json_parse_t *json) 941 { 942 int i = json->index; 943 944 /* Reject numbers starting with + */ 945 if (json->data[i] == '+') 946 return 1; 947 948 /* Skip minus sign if it exists */ 949 if (json->data[i] == '-') 950 i++; 951 952 /* Reject numbers starting with 0x, or leading zeros */ 953 if (json->data[i] == '0') { 954 int ch2 = json->data[i + 1]; 955 956 if ((ch2 | 0x20) == 'x' || /* Hex */ 957 ('0' <= ch2 && ch2 <= '9')) /* Leading zero */ 958 return 1; 959 960 return 0; 961 } else if (json->data[i] <= '9') { 962 return 0; /* Ordinary number */ 963 } 964 965 966 /* Reject inf/nan */ 967 if (!strncasecmp(&json->data[i], "inf", 3)) 968 return 1; 969 if (!strncasecmp(&json->data[i], "nan", 3)) 970 return 1; 971 972 /* Pass all other numbers which may still be invalid, but 973 * strtod() will catch them. */ 974 return 0; 975 } 976 977 static void json_next_number_token(json_parse_t *json, json_token_t *token) 978 { 979 const char *startptr; 980 char *endptr; 981 982 token->type = T_NUMBER; 983 startptr = &json->data[json->index]; 984 token->value.number = strtod(&json->data[json->index], &endptr); 985 if (startptr == endptr) 986 json_set_token_error(token, json, "invalid number"); 987 else 988 json->index += endptr - startptr; /* Skip the processed number */ 989 990 return; 991 } 992 993 /* Fills in the token struct. 994 * T_STRING will return a pointer to the json_parse_t temporary string 995 * T_ERROR will leave the json->index pointer at the error. 996 */ 997 static void json_next_token(json_parse_t *json, json_token_t *token) 998 { 999 json_token_type_t *ch2token = json->cfg->ch2token; 1000 int ch; 1001 1002 /* Eat whitespace. FIXME: UGLY */ 1003 token->type = ch2token[(unsigned char)json->data[json->index]]; 1004 while (token->type == T_WHITESPACE) 1005 token->type = ch2token[(unsigned char)json->data[++json->index]]; 1006 1007 token->index = json->index; 1008 1009 /* Don't advance the pointer for an error or the end */ 1010 if (token->type == T_ERROR) { 1011 json_set_token_error(token, json, "invalid token"); 1012 return; 1013 } 1014 1015 if (token->type == T_END) { 1016 return; 1017 } 1018 1019 /* Found a known single character token, advance index and return */ 1020 if (token->type != T_UNKNOWN) { 1021 json->index++; 1022 return; 1023 } 1024 1025 /* Process characters which triggered T_UNKNOWN */ 1026 ch = json->data[json->index]; 1027 1028 /* Must use strncmp() to match the front of the JSON string. 1029 * JSON identifier must be lowercase. 1030 * When strict_numbers if disabled, either case is allowed for 1031 * Infinity/NaN (since we are no longer following the spec..) */ 1032 if (ch == '"') { 1033 json_next_string_token(json, token); 1034 return; 1035 } else if (ch == '-' || ('0' <= ch && ch <= '9')) { 1036 if (json->cfg->decode_refuse_badnum && json_is_invalid_number(json)) { 1037 json_set_token_error(token, json, "invalid number"); 1038 return; 1039 } 1040 json_next_number_token(json, token); 1041 return; 1042 } else if (!strncmp(&json->data[json->index], "true", 4)) { 1043 token->type = T_BOOLEAN; 1044 token->value.boolean = 1; 1045 json->index += 4; 1046 return; 1047 } else if (!strncmp(&json->data[json->index], "false", 5)) { 1048 token->type = T_BOOLEAN; 1049 token->value.boolean = 0; 1050 json->index += 5; 1051 return; 1052 } else if (!strncmp(&json->data[json->index], "null", 4)) { 1053 token->type = T_NULL; 1054 json->index += 4; 1055 return; 1056 } else if (!json->cfg->decode_refuse_badnum && 1057 json_is_invalid_number(json)) { 1058 /* When refuse_badnum is disabled, only attempt to process 1059 * numbers we know are invalid JSON (Inf, NaN, hex) 1060 * This is required to generate an appropriate token error, 1061 * otherwise all bad tokens will register as "invalid number" 1062 */ 1063 json_next_number_token(json, token); 1064 return; 1065 } 1066 1067 /* Token starts with t/f/n but isn't recognised above. */ 1068 json_set_token_error(token, json, "invalid token"); 1069 } 1070 1071 /* This function does not return. 1072 * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED. 1073 * The only supported exception is the temporary parser string 1074 * json->tmp struct. 1075 * json and token should exist on the stack somewhere. 1076 * luaL_error() will long_jmp and release the stack */ 1077 static void json_throw_parse_error(lua_State *l, json_parse_t *json, 1078 const char *exp, json_token_t *token) 1079 { 1080 const char *found; 1081 1082 strbuf_free(json->tmp); 1083 1084 if (token->type == T_ERROR) 1085 found = token->value.string; 1086 else 1087 found = json_token_type_name[token->type]; 1088 1089 /* Note: token->index is 0 based, display starting from 1 */ 1090 luaL_error(l, "Expected %s but found %s at character %d", 1091 exp, found, token->index + 1); 1092 } 1093 1094 static void json_decode_checkstack(lua_State *l, json_parse_t *json, int n) 1095 { 1096 if (lua_checkstack(l, n)) 1097 return; 1098 1099 strbuf_free(json->tmp); 1100 luaL_error(l, "Too many nested data structures"); 1101 } 1102 1103 static void json_parse_object_context(lua_State *l, json_parse_t *json) 1104 { 1105 json_token_t token; 1106 1107 /* 3 slots required: 1108 * .., table, key, value */ 1109 json_decode_checkstack(l, json, 3); 1110 1111 lua_newtable(l); 1112 1113 json_next_token(json, &token); 1114 1115 /* Handle empty objects */ 1116 if (token.type == T_OBJ_END) { 1117 return; 1118 } 1119 1120 while (1) { 1121 if (token.type != T_STRING) 1122 json_throw_parse_error(l, json, "object key string", &token); 1123 1124 /* Push key */ 1125 lua_pushlstring(l, token.value.string, token.string_len); 1126 1127 json_next_token(json, &token); 1128 if (token.type != T_COLON) 1129 json_throw_parse_error(l, json, "colon", &token); 1130 1131 /* Fetch value */ 1132 json_next_token(json, &token); 1133 json_process_value(l, json, &token); 1134 1135 /* Set key = value */ 1136 lua_rawset(l, -3); 1137 1138 json_next_token(json, &token); 1139 1140 if (token.type == T_OBJ_END) 1141 return; 1142 1143 if (token.type != T_COMMA) 1144 json_throw_parse_error(l, json, "comma or object end", &token); 1145 1146 json_next_token(json, &token); 1147 } 1148 } 1149 1150 /* Handle the array context */ 1151 static void json_parse_array_context(lua_State *l, json_parse_t *json) 1152 { 1153 json_token_t token; 1154 int i; 1155 1156 /* 2 slots required: 1157 * .., table, value */ 1158 json_decode_checkstack(l, json, 2); 1159 1160 lua_newtable(l); 1161 1162 json_next_token(json, &token); 1163 1164 /* Handle empty arrays */ 1165 if (token.type == T_ARR_END) 1166 return; 1167 1168 for (i = 1; ; i++) { 1169 json_process_value(l, json, &token); 1170 lua_rawseti(l, -2, i); /* arr[i] = value */ 1171 1172 json_next_token(json, &token); 1173 1174 if (token.type == T_ARR_END) 1175 return; 1176 1177 if (token.type != T_COMMA) 1178 json_throw_parse_error(l, json, "comma or array end", &token); 1179 1180 json_next_token(json, &token); 1181 } 1182 } 1183 1184 /* Handle the "value" context */ 1185 static void json_process_value(lua_State *l, json_parse_t *json, 1186 json_token_t *token) 1187 { 1188 switch (token->type) { 1189 case T_STRING: 1190 lua_pushlstring(l, token->value.string, token->string_len); 1191 break;; 1192 case T_NUMBER: 1193 lua_pushnumber(l, token->value.number); 1194 break;; 1195 case T_BOOLEAN: 1196 lua_pushboolean(l, token->value.boolean); 1197 break;; 1198 case T_OBJ_BEGIN: 1199 json_parse_object_context(l, json); 1200 break;; 1201 case T_ARR_BEGIN: 1202 json_parse_array_context(l, json); 1203 break;; 1204 case T_NULL: 1205 /* In Lua, setting "t[k] = nil" will delete k from the table. 1206 * Hence a NULL pointer lightuserdata object is used instead */ 1207 lua_pushlightuserdata(l, NULL); 1208 break;; 1209 default: 1210 json_throw_parse_error(l, json, "value", token); 1211 } 1212 } 1213 1214 /* json_text must be null terminated string */ 1215 static void lua_json_decode(lua_State *l, const char *json_text, int json_len) 1216 { 1217 json_parse_t json; 1218 json_token_t token; 1219 1220 json.cfg = json_fetch_config(l); 1221 json.data = json_text; 1222 json.index = 0; 1223 1224 /* Ensure the temporary buffer can hold the entire string. 1225 * This means we no longer need to do length checks since the decoded 1226 * string must be smaller than the entire json string */ 1227 json.tmp = strbuf_new(json_len); 1228 1229 json_next_token(&json, &token); 1230 json_process_value(l, &json, &token); 1231 1232 /* Ensure there is no more input left */ 1233 json_next_token(&json, &token); 1234 1235 if (token.type != T_END) 1236 json_throw_parse_error(l, &json, "the end", &token); 1237 1238 strbuf_free(json.tmp); 1239 } 1240 1241 static int json_decode(lua_State *l) 1242 { 1243 const char *json; 1244 size_t len; 1245 1246 json_verify_arg_count(l, 1); 1247 1248 json = luaL_checklstring(l, 1, &len); 1249 1250 /* Detect Unicode other than UTF-8 (see RFC 4627, Sec 3) 1251 * 1252 * CJSON can support any simple data type, hence only the first 1253 * character is guaranteed to be ASCII (at worst: '"'). This is 1254 * still enough to detect whether the wrong encoding is in use. */ 1255 if (len >= 2 && (!json[0] || !json[1])) 1256 luaL_error(l, "JSON parser does not support UTF-16 or UTF-32"); 1257 1258 lua_json_decode(l, json, len); 1259 1260 return 1; 1261 } 1262 1263 /* ===== INITIALISATION ===== */ 1264 1265 int luaopen_cjson(lua_State *l) 1266 { 1267 luaL_Reg reg[] = { 1268 { "encode", json_encode }, 1269 { "decode", json_decode }, 1270 { "encode_sparse_array", json_cfg_encode_sparse_array }, 1271 { "encode_max_depth", json_cfg_encode_max_depth }, 1272 { "encode_number_precision", json_cfg_encode_number_precision }, 1273 { "encode_keep_buffer", json_cfg_encode_keep_buffer }, 1274 { "refuse_invalid_numbers", json_cfg_refuse_invalid_numbers }, 1275 { NULL, NULL } 1276 }; 1277 1278 /* Use json_fetch_config as a pointer. 1279 * It's faster than using a config string, and more unique */ 1280 lua_pushlightuserdata(l, &json_config_key); 1281 json_create_config(l); 1282 lua_settable(l, LUA_REGISTRYINDEX); 1283 1284 luaL_register(l, "cjson", reg); 1285 1286 /* Set cjson.null */ 1287 lua_pushlightuserdata(l, NULL); 1288 lua_setfield(l, -2, "null"); 1289 1290 /* Set cjson.version */ 1291 lua_pushliteral(l, VERSION); 1292 lua_setfield(l, -2, "version"); 1293 1294 /* Return cjson table */ 1295 return 1; 1296 } 1297 1298 /* vi:ai et sw=4 ts=4: 1299 */ 1300