xref: /redis-3.2.3/deps/lua/src/lua_cjson.c (revision fceef8e0)
1 #define VERSION "1.0.3"
2 
3 /* CJSON - JSON support for Lua
4  *
5  * Copyright (c) 2010-2011  Mark Pulford <[email protected]>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining
8  * a copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be
16  * included in all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 /* Caveats:
28  * - JSON "null" values are represented as lightuserdata since Lua
29  *   tables cannot contain "nil". Compare with cjson.null.
30  * - Invalid UTF-8 characters are not detected and will be passed
31  *   untouched. If required, UTF-8 error checking should be done
32  *   outside this library.
33  * - Javascript comments are not part of the JSON spec, and are not
34  *   currently supported.
35  *
36  * Note: Decoding is slower than encoding. Lua spends significant
37  *       time (30%) managing tables when parsing JSON since it is
38  *       difficult to know object/array sizes ahead of time.
39  */
40 
41 #include <assert.h>
42 #include <string.h>
43 #include <math.h>
44 #include "lua.h"
45 #include "lauxlib.h"
46 
47 #include "strbuf.h"
48 
49 #ifdef MISSING_ISINF
50 #define isinf(x) (!isnan(x) && isnan((x) - (x)))
51 #endif
52 
53 #define DEFAULT_SPARSE_CONVERT 0
54 #define DEFAULT_SPARSE_RATIO 2
55 #define DEFAULT_SPARSE_SAFE 10
56 #define DEFAULT_MAX_DEPTH 20
57 #define DEFAULT_ENCODE_REFUSE_BADNUM 1
58 #define DEFAULT_DECODE_REFUSE_BADNUM 0
59 #define DEFAULT_ENCODE_KEEP_BUFFER 1
60 
61 typedef enum {
62     T_OBJ_BEGIN,
63     T_OBJ_END,
64     T_ARR_BEGIN,
65     T_ARR_END,
66     T_STRING,
67     T_NUMBER,
68     T_BOOLEAN,
69     T_NULL,
70     T_COLON,
71     T_COMMA,
72     T_END,
73     T_WHITESPACE,
74     T_ERROR,
75     T_UNKNOWN
76 } json_token_type_t;
77 
78 static const char *json_token_type_name[] = {
79     "T_OBJ_BEGIN",
80     "T_OBJ_END",
81     "T_ARR_BEGIN",
82     "T_ARR_END",
83     "T_STRING",
84     "T_NUMBER",
85     "T_BOOLEAN",
86     "T_NULL",
87     "T_COLON",
88     "T_COMMA",
89     "T_END",
90     "T_WHITESPACE",
91     "T_ERROR",
92     "T_UNKNOWN",
93     NULL
94 };
95 
96 typedef struct {
97     json_token_type_t ch2token[256];
98     char escape2char[256];  /* Decoding */
99 #if 0
100     char escapes[35][8];    /* Pre-generated escape string buffer */
101     char *char2escape[256]; /* Encoding */
102 #endif
103     strbuf_t encode_buf;
104     char number_fmt[8];     /* "%.XXg\0" */
105     int current_depth;
106 
107     int encode_sparse_convert;
108     int encode_sparse_ratio;
109     int encode_sparse_safe;
110     int encode_max_depth;
111     int encode_refuse_badnum;
112     int decode_refuse_badnum;
113     int encode_keep_buffer;
114     int encode_number_precision;
115 } json_config_t;
116 
117 typedef struct {
118     const char *data;
119     int index;
120     strbuf_t *tmp;    /* Temporary storage for strings */
121     json_config_t *cfg;
122 } json_parse_t;
123 
124 typedef struct {
125     json_token_type_t type;
126     int index;
127     union {
128         const char *string;
129         double number;
130         int boolean;
131     } value;
132     int string_len;
133 } json_token_t;
134 
135 static const char *char2escape[256] = {
136     "\\u0000", "\\u0001", "\\u0002", "\\u0003",
137     "\\u0004", "\\u0005", "\\u0006", "\\u0007",
138     "\\b", "\\t", "\\n", "\\u000b",
139     "\\f", "\\r", "\\u000e", "\\u000f",
140     "\\u0010", "\\u0011", "\\u0012", "\\u0013",
141     "\\u0014", "\\u0015", "\\u0016", "\\u0017",
142     "\\u0018", "\\u0019", "\\u001a", "\\u001b",
143     "\\u001c", "\\u001d", "\\u001e", "\\u001f",
144     NULL, NULL, "\\\"", NULL, NULL, NULL, NULL, NULL,
145     NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\/",
146     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
147     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
148     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
149     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
150     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
151     NULL, NULL, NULL, NULL, "\\\\", NULL, NULL, NULL,
152     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
153     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
154     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
155     NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\u007f",
156     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
157     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
158     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
159     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
160     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
161     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
162     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
163     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
164     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
165     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
166     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
167     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
168     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
169     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
170     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
171     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
172 };
173 
174 static int json_config_key;
175 
176 /* ===== CONFIGURATION ===== */
177 
178 static json_config_t *json_fetch_config(lua_State *l)
179 {
180     json_config_t *cfg;
181 
182     lua_pushlightuserdata(l, &json_config_key);
183     lua_gettable(l, LUA_REGISTRYINDEX);
184     cfg = lua_touserdata(l, -1);
185     if (!cfg)
186         luaL_error(l, "BUG: Unable to fetch CJSON configuration");
187 
188     lua_pop(l, 1);
189 
190     return cfg;
191 }
192 
193 static void json_verify_arg_count(lua_State *l, int args)
194 {
195     luaL_argcheck(l, lua_gettop(l) <= args, args + 1,
196                   "found too many arguments");
197 }
198 
199 /* Configures handling of extremely sparse arrays:
200  * convert: Convert extremely sparse arrays into objects? Otherwise error.
201  * ratio: 0: always allow sparse; 1: never allow sparse; >1: use ratio
202  * safe: Always use an array when the max index <= safe */
203 static int json_cfg_encode_sparse_array(lua_State *l)
204 {
205     json_config_t *cfg;
206     int val;
207 
208     json_verify_arg_count(l, 3);
209     cfg = json_fetch_config(l);
210 
211     switch (lua_gettop(l)) {
212     case 3:
213         val = luaL_checkinteger(l, 3);
214         luaL_argcheck(l, val >= 0, 3, "expected integer >= 0");
215         cfg->encode_sparse_safe = val;
216     case 2:
217         val = luaL_checkinteger(l, 2);
218         luaL_argcheck(l, val >= 0, 2, "expected integer >= 0");
219         cfg->encode_sparse_ratio = val;
220     case 1:
221         luaL_argcheck(l, lua_isboolean(l, 1), 1, "expected boolean");
222         cfg->encode_sparse_convert = lua_toboolean(l, 1);
223     }
224 
225     lua_pushboolean(l, cfg->encode_sparse_convert);
226     lua_pushinteger(l, cfg->encode_sparse_ratio);
227     lua_pushinteger(l, cfg->encode_sparse_safe);
228 
229     return 3;
230 }
231 
232 /* Configures the maximum number of nested arrays/objects allowed when
233  * encoding */
234 static int json_cfg_encode_max_depth(lua_State *l)
235 {
236     json_config_t *cfg;
237     int depth;
238 
239     json_verify_arg_count(l, 1);
240     cfg = json_fetch_config(l);
241 
242     if (lua_gettop(l)) {
243         depth = luaL_checkinteger(l, 1);
244         luaL_argcheck(l, depth > 0, 1, "expected positive integer");
245         cfg->encode_max_depth = depth;
246     }
247 
248     lua_pushinteger(l, cfg->encode_max_depth);
249 
250     return 1;
251 }
252 
253 static void json_set_number_precision(json_config_t *cfg, int prec)
254 {
255     cfg->encode_number_precision = prec;
256     sprintf(cfg->number_fmt, "%%.%dg", prec);
257 }
258 
259 /* Configures number precision when converting doubles to text */
260 static int json_cfg_encode_number_precision(lua_State *l)
261 {
262     json_config_t *cfg;
263     int precision;
264 
265     json_verify_arg_count(l, 1);
266     cfg = json_fetch_config(l);
267 
268     if (lua_gettop(l)) {
269         precision = luaL_checkinteger(l, 1);
270         luaL_argcheck(l, 1 <= precision && precision <= 14, 1,
271                       "expected integer between 1 and 14");
272         json_set_number_precision(cfg, precision);
273     }
274 
275     lua_pushinteger(l, cfg->encode_number_precision);
276 
277     return 1;
278 }
279 
280 /* Configures JSON encoding buffer persistence */
281 static int json_cfg_encode_keep_buffer(lua_State *l)
282 {
283     json_config_t *cfg;
284 
285     json_verify_arg_count(l, 1);
286     cfg = json_fetch_config(l);
287 
288     if (lua_gettop(l)) {
289         luaL_checktype(l, 1, LUA_TBOOLEAN);
290         cfg->encode_keep_buffer = lua_toboolean(l, 1);
291     }
292 
293     lua_pushboolean(l, cfg->encode_keep_buffer);
294 
295     return 1;
296 }
297 
298 /* On argument: decode enum and set config variables
299  * **options must point to a NULL terminated array of 4 enums
300  * Returns: current enum value */
301 static void json_enum_option(lua_State *l, const char **options,
302                              int *opt1, int *opt2)
303 {
304     int setting;
305 
306     if (lua_gettop(l)) {
307         if (lua_isboolean(l, 1))
308             setting = lua_toboolean(l, 1) * 3;
309         else
310             setting = luaL_checkoption(l, 1, NULL, options);
311 
312         *opt1 = setting & 1 ? 1 : 0;
313         *opt2 = setting & 2 ? 1 : 0;
314     } else {
315         setting = *opt1 | (*opt2 << 1);
316     }
317 
318     if (setting)
319         lua_pushstring(l, options[setting]);
320     else
321         lua_pushboolean(l, 0);
322 }
323 
324 
325 /* When enabled, rejects: NaN, Infinity, hexidecimal numbers */
326 static int json_cfg_refuse_invalid_numbers(lua_State *l)
327 {
328     static const char *options_enc_dec[] = { "none", "encode", "decode",
329                                              "both", NULL };
330     json_config_t *cfg;
331 
332     json_verify_arg_count(l, 1);
333     cfg = json_fetch_config(l);
334 
335     json_enum_option(l, options_enc_dec,
336                      &cfg->encode_refuse_badnum,
337                      &cfg->decode_refuse_badnum);
338 
339     return 1;
340 }
341 
342 static int json_destroy_config(lua_State *l)
343 {
344     json_config_t *cfg;
345 
346     cfg = lua_touserdata(l, 1);
347     if (cfg)
348         strbuf_free(&cfg->encode_buf);
349     cfg = NULL;
350 
351     return 0;
352 }
353 
354 static void json_create_config(lua_State *l)
355 {
356     json_config_t *cfg;
357     int i;
358 
359     cfg = lua_newuserdata(l, sizeof(*cfg));
360 
361     /* Create GC method to clean up strbuf */
362     lua_newtable(l);
363     lua_pushcfunction(l, json_destroy_config);
364     lua_setfield(l, -2, "__gc");
365     lua_setmetatable(l, -2);
366 
367     strbuf_init(&cfg->encode_buf, 0);
368 
369     cfg->encode_sparse_convert = DEFAULT_SPARSE_CONVERT;
370     cfg->encode_sparse_ratio = DEFAULT_SPARSE_RATIO;
371     cfg->encode_sparse_safe = DEFAULT_SPARSE_SAFE;
372     cfg->encode_max_depth = DEFAULT_MAX_DEPTH;
373     cfg->encode_refuse_badnum = DEFAULT_ENCODE_REFUSE_BADNUM;
374     cfg->decode_refuse_badnum = DEFAULT_DECODE_REFUSE_BADNUM;
375     cfg->encode_keep_buffer = DEFAULT_ENCODE_KEEP_BUFFER;
376     json_set_number_precision(cfg, 14);
377 
378     /* Decoding init */
379 
380     /* Tag all characters as an error */
381     for (i = 0; i < 256; i++)
382         cfg->ch2token[i] = T_ERROR;
383 
384     /* Set tokens that require no further processing */
385     cfg->ch2token['{'] = T_OBJ_BEGIN;
386     cfg->ch2token['}'] = T_OBJ_END;
387     cfg->ch2token['['] = T_ARR_BEGIN;
388     cfg->ch2token[']'] = T_ARR_END;
389     cfg->ch2token[','] = T_COMMA;
390     cfg->ch2token[':'] = T_COLON;
391     cfg->ch2token['\0'] = T_END;
392     cfg->ch2token[' '] = T_WHITESPACE;
393     cfg->ch2token['\t'] = T_WHITESPACE;
394     cfg->ch2token['\n'] = T_WHITESPACE;
395     cfg->ch2token['\r'] = T_WHITESPACE;
396 
397     /* Update characters that require further processing */
398     cfg->ch2token['f'] = T_UNKNOWN;     /* false? */
399     cfg->ch2token['i'] = T_UNKNOWN;     /* inf, ininity? */
400     cfg->ch2token['I'] = T_UNKNOWN;
401     cfg->ch2token['n'] = T_UNKNOWN;     /* null, nan? */
402     cfg->ch2token['N'] = T_UNKNOWN;
403     cfg->ch2token['t'] = T_UNKNOWN;     /* true? */
404     cfg->ch2token['"'] = T_UNKNOWN;     /* string? */
405     cfg->ch2token['+'] = T_UNKNOWN;     /* number? */
406     cfg->ch2token['-'] = T_UNKNOWN;
407     for (i = 0; i < 10; i++)
408         cfg->ch2token['0' + i] = T_UNKNOWN;
409 
410     /* Lookup table for parsing escape characters */
411     for (i = 0; i < 256; i++)
412         cfg->escape2char[i] = 0;          /* String error */
413     cfg->escape2char['"'] = '"';
414     cfg->escape2char['\\'] = '\\';
415     cfg->escape2char['/'] = '/';
416     cfg->escape2char['b'] = '\b';
417     cfg->escape2char['t'] = '\t';
418     cfg->escape2char['n'] = '\n';
419     cfg->escape2char['f'] = '\f';
420     cfg->escape2char['r'] = '\r';
421     cfg->escape2char['u'] = 'u';          /* Unicode parsing required */
422 
423 
424 #if 0
425     /* Initialise separate storage for pre-generated escape codes.
426      * Escapes 0-31 map directly, 34, 92, 127 follow afterwards to
427      * save memory. */
428     for (i = 0 ; i < 32; i++)
429         sprintf(cfg->escapes[i], "\\u%04x", i);
430     strcpy(cfg->escapes[8], "\b");              /* Override simpler escapes */
431     strcpy(cfg->escapes[9], "\t");
432     strcpy(cfg->escapes[10], "\n");
433     strcpy(cfg->escapes[12], "\f");
434     strcpy(cfg->escapes[13], "\r");
435     strcpy(cfg->escapes[32], "\\\"");           /* chr(34) */
436     strcpy(cfg->escapes[33], "\\\\");           /* chr(92) */
437     sprintf(cfg->escapes[34], "\\u%04x", 127);  /* char(127) */
438 
439     /* Initialise encoding escape lookup table */
440     for (i = 0; i < 32; i++)
441         cfg->char2escape[i] = cfg->escapes[i];
442     for (i = 32; i < 256; i++)
443         cfg->char2escape[i] = NULL;
444     cfg->char2escape[34] = cfg->escapes[32];
445     cfg->char2escape[92] = cfg->escapes[33];
446     cfg->char2escape[127] = cfg->escapes[34];
447 #endif
448 }
449 
450 /* ===== ENCODING ===== */
451 
452 static void json_encode_exception(lua_State *l, json_config_t *cfg, int lindex,
453                                   const char *reason)
454 {
455     if (!cfg->encode_keep_buffer)
456         strbuf_free(&cfg->encode_buf);
457     luaL_error(l, "Cannot serialise %s: %s",
458                   lua_typename(l, lua_type(l, lindex)), reason);
459 }
460 
461 /* json_append_string args:
462  * - lua_State
463  * - JSON strbuf
464  * - String (Lua stack index)
465  *
466  * Returns nothing. Doesn't remove string from Lua stack */
467 static void json_append_string(lua_State *l, strbuf_t *json, int lindex)
468 {
469     const char *escstr;
470     int i;
471     const char *str;
472     size_t len;
473 
474     str = lua_tolstring(l, lindex, &len);
475 
476     /* Worst case is len * 6 (all unicode escapes).
477      * This buffer is reused constantly for small strings
478      * If there are any excess pages, they won't be hit anyway.
479      * This gains ~5% speedup. */
480     strbuf_ensure_empty_length(json, len * 6 + 2);
481 
482     strbuf_append_char_unsafe(json, '\"');
483     for (i = 0; i < len; i++) {
484         escstr = char2escape[(unsigned char)str[i]];
485         if (escstr)
486             strbuf_append_string(json, escstr);
487         else
488             strbuf_append_char_unsafe(json, str[i]);
489     }
490     strbuf_append_char_unsafe(json, '\"');
491 }
492 
493 /* Find the size of the array on the top of the Lua stack
494  * -1   object (not a pure array)
495  * >=0  elements in array
496  */
497 static int lua_array_length(lua_State *l, json_config_t *cfg)
498 {
499     double k;
500     int max;
501     int items;
502 
503     max = 0;
504     items = 0;
505 
506     lua_pushnil(l);
507     /* table, startkey */
508     while (lua_next(l, -2) != 0) {
509         /* table, key, value */
510         if (lua_type(l, -2) == LUA_TNUMBER &&
511             (k = lua_tonumber(l, -2))) {
512             /* Integer >= 1 ? */
513             if (floor(k) == k && k >= 1) {
514                 if (k > max)
515                     max = k;
516                 items++;
517                 lua_pop(l, 1);
518                 continue;
519             }
520         }
521 
522         /* Must not be an array (non integer key) */
523         lua_pop(l, 2);
524         return -1;
525     }
526 
527     /* Encode excessively sparse arrays as objects (if enabled) */
528     if (cfg->encode_sparse_ratio > 0 &&
529         max > items * cfg->encode_sparse_ratio &&
530         max > cfg->encode_sparse_safe) {
531         if (!cfg->encode_sparse_convert)
532             json_encode_exception(l, cfg, -1, "excessively sparse array");
533 
534         return -1;
535     }
536 
537     return max;
538 }
539 
540 static void json_encode_descend(lua_State *l, json_config_t *cfg)
541 {
542     cfg->current_depth++;
543 
544     if (cfg->current_depth > cfg->encode_max_depth) {
545         if (!cfg->encode_keep_buffer)
546             strbuf_free(&cfg->encode_buf);
547         luaL_error(l, "Cannot serialise, excessive nesting (%d)",
548                    cfg->current_depth);
549     }
550 }
551 
552 static void json_append_data(lua_State *l, json_config_t *cfg, strbuf_t *json);
553 
554 /* json_append_array args:
555  * - lua_State
556  * - JSON strbuf
557  * - Size of passwd Lua array (top of stack) */
558 static void json_append_array(lua_State *l, json_config_t *cfg, strbuf_t *json,
559                               int array_length)
560 {
561     int comma, i;
562 
563     json_encode_descend(l, cfg);
564 
565     strbuf_append_char(json, '[');
566 
567     comma = 0;
568     for (i = 1; i <= array_length; i++) {
569         if (comma)
570             strbuf_append_char(json, ',');
571         else
572             comma = 1;
573 
574         lua_rawgeti(l, -1, i);
575         json_append_data(l, cfg, json);
576         lua_pop(l, 1);
577     }
578 
579     strbuf_append_char(json, ']');
580 
581     cfg->current_depth--;
582 }
583 
584 static void json_append_number(lua_State *l, strbuf_t *json, int index,
585                                json_config_t *cfg)
586 {
587     double num = lua_tonumber(l, index);
588 
589     if (cfg->encode_refuse_badnum && (isinf(num) || isnan(num)))
590         json_encode_exception(l, cfg, index, "must not be NaN or Inf");
591 
592     /* Lowest double printed with %.14g is 21 characters long:
593      * -1.7976931348623e+308
594      *
595      * Use 32 to include the \0, and a few extra just in case..
596      */
597     strbuf_append_fmt(json, 32, cfg->number_fmt, num);
598 }
599 
600 static void json_append_object(lua_State *l, json_config_t *cfg,
601                                strbuf_t *json)
602 {
603     int comma, keytype;
604 
605     json_encode_descend(l, cfg);
606 
607     /* Object */
608     strbuf_append_char(json, '{');
609 
610     lua_pushnil(l);
611     /* table, startkey */
612     comma = 0;
613     while (lua_next(l, -2) != 0) {
614         if (comma)
615             strbuf_append_char(json, ',');
616         else
617             comma = 1;
618 
619         /* table, key, value */
620         keytype = lua_type(l, -2);
621         if (keytype == LUA_TNUMBER) {
622             strbuf_append_char(json, '"');
623             json_append_number(l, json, -2, cfg);
624             strbuf_append_mem(json, "\":", 2);
625         } else if (keytype == LUA_TSTRING) {
626             json_append_string(l, json, -2);
627             strbuf_append_char(json, ':');
628         } else {
629             json_encode_exception(l, cfg, -2,
630                                   "table key must be a number or string");
631             /* never returns */
632         }
633 
634         /* table, key, value */
635         json_append_data(l, cfg, json);
636         lua_pop(l, 1);
637         /* table, key */
638     }
639 
640     strbuf_append_char(json, '}');
641 
642     cfg->current_depth--;
643 }
644 
645 /* Serialise Lua data into JSON string. */
646 static void json_append_data(lua_State *l, json_config_t *cfg, strbuf_t *json)
647 {
648     int len;
649 
650     switch (lua_type(l, -1)) {
651     case LUA_TSTRING:
652         json_append_string(l, json, -1);
653         break;
654     case LUA_TNUMBER:
655         json_append_number(l, json, -1, cfg);
656         break;
657     case LUA_TBOOLEAN:
658         if (lua_toboolean(l, -1))
659             strbuf_append_mem(json, "true", 4);
660         else
661             strbuf_append_mem(json, "false", 5);
662         break;
663     case LUA_TTABLE:
664         len = lua_array_length(l, cfg);
665         if (len > 0)
666             json_append_array(l, cfg, json, len);
667         else
668             json_append_object(l, cfg, json);
669         break;
670     case LUA_TNIL:
671         strbuf_append_mem(json, "null", 4);
672         break;
673     case LUA_TLIGHTUSERDATA:
674         if (lua_touserdata(l, -1) == NULL) {
675             strbuf_append_mem(json, "null", 4);
676             break;
677         }
678     default:
679         /* Remaining types (LUA_TFUNCTION, LUA_TUSERDATA, LUA_TTHREAD,
680          * and LUA_TLIGHTUSERDATA) cannot be serialised */
681         json_encode_exception(l, cfg, -1, "type not supported");
682         /* never returns */
683     }
684 }
685 
686 static int json_encode(lua_State *l)
687 {
688     json_config_t *cfg;
689     char *json;
690     int len;
691 
692     /* Can't use json_verify_arg_count() since we need to ensure
693      * there is only 1 argument */
694     luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument");
695 
696     cfg = json_fetch_config(l);
697     cfg->current_depth = 0;
698 
699     /* Reset the persistent buffer if it exists.
700      * Otherwise allocate a new buffer. */
701     if (strbuf_allocated(&cfg->encode_buf))
702         strbuf_reset(&cfg->encode_buf);
703     else
704         strbuf_init(&cfg->encode_buf, 0);
705 
706     json_append_data(l, cfg, &cfg->encode_buf);
707     json = strbuf_string(&cfg->encode_buf, &len);
708 
709     lua_pushlstring(l, json, len);
710 
711     if (!cfg->encode_keep_buffer)
712         strbuf_free(&cfg->encode_buf);
713 
714     return 1;
715 }
716 
717 /* ===== DECODING ===== */
718 
719 static void json_process_value(lua_State *l, json_parse_t *json,
720                                json_token_t *token);
721 
722 static int hexdigit2int(char hex)
723 {
724     if ('0' <= hex  && hex <= '9')
725         return hex - '0';
726 
727     /* Force lowercase */
728     hex |= 0x20;
729     if ('a' <= hex && hex <= 'f')
730         return 10 + hex - 'a';
731 
732     return -1;
733 }
734 
735 static int decode_hex4(const char *hex)
736 {
737     int digit[4];
738     int i;
739 
740     /* Convert ASCII hex digit to numeric digit
741      * Note: this returns an error for invalid hex digits, including
742      *       NULL */
743     for (i = 0; i < 4; i++) {
744         digit[i] = hexdigit2int(hex[i]);
745         if (digit[i] < 0) {
746             return -1;
747         }
748     }
749 
750     return (digit[0] << 12) +
751            (digit[1] << 8) +
752            (digit[2] << 4) +
753             digit[3];
754 }
755 
756 /* Converts a Unicode codepoint to UTF-8.
757  * Returns UTF-8 string length, and up to 4 bytes in *utf8 */
758 static int codepoint_to_utf8(char *utf8, int codepoint)
759 {
760     /* 0xxxxxxx */
761     if (codepoint <= 0x7F) {
762         utf8[0] = codepoint;
763         return 1;
764     }
765 
766     /* 110xxxxx 10xxxxxx */
767     if (codepoint <= 0x7FF) {
768         utf8[0] = (codepoint >> 6) | 0xC0;
769         utf8[1] = (codepoint & 0x3F) | 0x80;
770         return 2;
771     }
772 
773     /* 1110xxxx 10xxxxxx 10xxxxxx */
774     if (codepoint <= 0xFFFF) {
775         utf8[0] = (codepoint >> 12) | 0xE0;
776         utf8[1] = ((codepoint >> 6) & 0x3F) | 0x80;
777         utf8[2] = (codepoint & 0x3F) | 0x80;
778         return 3;
779     }
780 
781     /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
782     if (codepoint <= 0x1FFFFF) {
783         utf8[0] = (codepoint >> 18) | 0xF0;
784         utf8[1] = ((codepoint >> 12) & 0x3F) | 0x80;
785         utf8[2] = ((codepoint >> 6) & 0x3F) | 0x80;
786         utf8[3] = (codepoint & 0x3F) | 0x80;
787         return 4;
788     }
789 
790     return 0;
791 }
792 
793 
794 /* Called when index pointing to beginning of UTF-16 code escape: \uXXXX
795  * \u is guaranteed to exist, but the remaining hex characters may be
796  * missing.
797  * Translate to UTF-8 and append to temporary token string.
798  * Must advance index to the next character to be processed.
799  * Returns: 0   success
800  *          -1  error
801  */
802 static int json_append_unicode_escape(json_parse_t *json)
803 {
804     char utf8[4];       /* Surrogate pairs require 4 UTF-8 bytes */
805     int codepoint;
806     int surrogate_low;
807     int len;
808     int escape_len = 6;
809 
810     /* Fetch UTF-16 code unit */
811     codepoint = decode_hex4(&json->data[json->index + 2]);
812     if (codepoint < 0)
813         return -1;
814 
815     /* UTF-16 surrogate pairs take the following 2 byte form:
816      *      11011 x yyyyyyyyyy
817      * When x = 0: y is the high 10 bits of the codepoint
818      *      x = 1: y is the low 10 bits of the codepoint
819      *
820      * Check for a surrogate pair (high or low) */
821     if ((codepoint & 0xF800) == 0xD800) {
822         /* Error if the 1st surrogate is not high */
823         if (codepoint & 0x400)
824             return -1;
825 
826         /* Ensure the next code is a unicode escape */
827         if (json->data[json->index + escape_len] != '\\' ||
828             json->data[json->index + escape_len + 1] != 'u') {
829             return -1;
830         }
831 
832         /* Fetch the next codepoint */
833         surrogate_low = decode_hex4(&json->data[json->index + 2 + escape_len]);
834         if (surrogate_low < 0)
835             return -1;
836 
837         /* Error if the 2nd code is not a low surrogate */
838         if ((surrogate_low & 0xFC00) != 0xDC00)
839             return -1;
840 
841         /* Calculate Unicode codepoint */
842         codepoint = (codepoint & 0x3FF) << 10;
843         surrogate_low &= 0x3FF;
844         codepoint = (codepoint | surrogate_low) + 0x10000;
845         escape_len = 12;
846     }
847 
848     /* Convert codepoint to UTF-8 */
849     len = codepoint_to_utf8(utf8, codepoint);
850     if (!len)
851         return -1;
852 
853     /* Append bytes and advance parse index */
854     strbuf_append_mem_unsafe(json->tmp, utf8, len);
855     json->index += escape_len;
856 
857     return 0;
858 }
859 
860 static void json_set_token_error(json_token_t *token, json_parse_t *json,
861                                  const char *errtype)
862 {
863     token->type = T_ERROR;
864     token->index = json->index;
865     token->value.string = errtype;
866 }
867 
868 static void json_next_string_token(json_parse_t *json, json_token_t *token)
869 {
870     char *escape2char = json->cfg->escape2char;
871     char ch;
872 
873     /* Caller must ensure a string is next */
874     assert(json->data[json->index] == '"');
875 
876     /* Skip " */
877     json->index++;
878 
879     /* json->tmp is the temporary strbuf used to accumulate the
880      * decoded string value. */
881     strbuf_reset(json->tmp);
882     while ((ch = json->data[json->index]) != '"') {
883         if (!ch) {
884             /* Premature end of the string */
885             json_set_token_error(token, json, "unexpected end of string");
886             return;
887         }
888 
889         /* Handle escapes */
890         if (ch == '\\') {
891             /* Fetch escape character */
892             ch = json->data[json->index + 1];
893 
894             /* Translate escape code and append to tmp string */
895             ch = escape2char[(unsigned char)ch];
896             if (ch == 'u') {
897                 if (json_append_unicode_escape(json) == 0)
898                     continue;
899 
900                 json_set_token_error(token, json,
901                                      "invalid unicode escape code");
902                 return;
903             }
904             if (!ch) {
905                 json_set_token_error(token, json, "invalid escape code");
906                 return;
907             }
908 
909             /* Skip '\' */
910             json->index++;
911         }
912         /* Append normal character or translated single character
913          * Unicode escapes are handled above */
914         strbuf_append_char_unsafe(json->tmp, ch);
915         json->index++;
916     }
917     json->index++;  /* Eat final quote (") */
918 
919     strbuf_ensure_null(json->tmp);
920 
921     token->type = T_STRING;
922     token->value.string = strbuf_string(json->tmp, &token->string_len);
923 }
924 
925 /* JSON numbers should take the following form:
926  *      -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
927  *
928  * json_next_number_token() uses strtod() which allows other forms:
929  * - numbers starting with '+'
930  * - NaN, -NaN, infinity, -infinity
931  * - hexidecimal numbers
932  * - numbers with leading zeros
933  *
934  * json_is_invalid_number() detects "numbers" which may pass strtod()'s
935  * error checking, but should not be allowed with strict JSON.
936  *
937  * json_is_invalid_number() may pass numbers which cause strtod()
938  * to generate an error.
939  */
940 static int json_is_invalid_number(json_parse_t *json)
941 {
942     int i = json->index;
943 
944     /* Reject numbers starting with + */
945     if (json->data[i] == '+')
946         return 1;
947 
948     /* Skip minus sign if it exists */
949     if (json->data[i] == '-')
950         i++;
951 
952     /* Reject numbers starting with 0x, or leading zeros */
953     if (json->data[i] == '0') {
954         int ch2 = json->data[i + 1];
955 
956         if ((ch2 | 0x20) == 'x' ||          /* Hex */
957             ('0' <= ch2 && ch2 <= '9'))     /* Leading zero */
958             return 1;
959 
960         return 0;
961     } else if (json->data[i] <= '9') {
962         return 0;                           /* Ordinary number */
963     }
964 
965 
966     /* Reject inf/nan */
967     if (!strncasecmp(&json->data[i], "inf", 3))
968         return 1;
969     if (!strncasecmp(&json->data[i], "nan", 3))
970         return 1;
971 
972     /* Pass all other numbers which may still be invalid, but
973      * strtod() will catch them. */
974     return 0;
975 }
976 
977 static void json_next_number_token(json_parse_t *json, json_token_t *token)
978 {
979     const char *startptr;
980     char *endptr;
981 
982     token->type = T_NUMBER;
983     startptr = &json->data[json->index];
984     token->value.number = strtod(&json->data[json->index], &endptr);
985     if (startptr == endptr)
986         json_set_token_error(token, json, "invalid number");
987     else
988         json->index += endptr - startptr;   /* Skip the processed number */
989 
990     return;
991 }
992 
993 /* Fills in the token struct.
994  * T_STRING will return a pointer to the json_parse_t temporary string
995  * T_ERROR will leave the json->index pointer at the error.
996  */
997 static void json_next_token(json_parse_t *json, json_token_t *token)
998 {
999     json_token_type_t *ch2token = json->cfg->ch2token;
1000     int ch;
1001 
1002     /* Eat whitespace. FIXME: UGLY */
1003     token->type = ch2token[(unsigned char)json->data[json->index]];
1004     while (token->type == T_WHITESPACE)
1005         token->type = ch2token[(unsigned char)json->data[++json->index]];
1006 
1007     token->index = json->index;
1008 
1009     /* Don't advance the pointer for an error or the end */
1010     if (token->type == T_ERROR) {
1011         json_set_token_error(token, json, "invalid token");
1012         return;
1013     }
1014 
1015     if (token->type == T_END) {
1016         return;
1017     }
1018 
1019     /* Found a known single character token, advance index and return */
1020     if (token->type != T_UNKNOWN) {
1021         json->index++;
1022         return;
1023     }
1024 
1025     /* Process characters which triggered T_UNKNOWN */
1026     ch = json->data[json->index];
1027 
1028     /* Must use strncmp() to match the front of the JSON string.
1029      * JSON identifier must be lowercase.
1030      * When strict_numbers if disabled, either case is allowed for
1031      * Infinity/NaN (since we are no longer following the spec..) */
1032     if (ch == '"') {
1033         json_next_string_token(json, token);
1034         return;
1035     } else if (ch == '-' || ('0' <= ch && ch <= '9')) {
1036         if (json->cfg->decode_refuse_badnum && json_is_invalid_number(json)) {
1037             json_set_token_error(token, json, "invalid number");
1038             return;
1039         }
1040         json_next_number_token(json, token);
1041         return;
1042     } else if (!strncmp(&json->data[json->index], "true", 4)) {
1043         token->type = T_BOOLEAN;
1044         token->value.boolean = 1;
1045         json->index += 4;
1046         return;
1047     } else if (!strncmp(&json->data[json->index], "false", 5)) {
1048         token->type = T_BOOLEAN;
1049         token->value.boolean = 0;
1050         json->index += 5;
1051         return;
1052     } else if (!strncmp(&json->data[json->index], "null", 4)) {
1053         token->type = T_NULL;
1054         json->index += 4;
1055         return;
1056     } else if (!json->cfg->decode_refuse_badnum &&
1057                json_is_invalid_number(json)) {
1058         /* When refuse_badnum is disabled, only attempt to process
1059          * numbers we know are invalid JSON (Inf, NaN, hex)
1060          * This is required to generate an appropriate token error,
1061          * otherwise all bad tokens will register as "invalid number"
1062          */
1063         json_next_number_token(json, token);
1064         return;
1065     }
1066 
1067     /* Token starts with t/f/n but isn't recognised above. */
1068     json_set_token_error(token, json, "invalid token");
1069 }
1070 
1071 /* This function does not return.
1072  * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
1073  * The only supported exception is the temporary parser string
1074  * json->tmp struct.
1075  * json and token should exist on the stack somewhere.
1076  * luaL_error() will long_jmp and release the stack */
1077 static void json_throw_parse_error(lua_State *l, json_parse_t *json,
1078                                    const char *exp, json_token_t *token)
1079 {
1080     const char *found;
1081 
1082     strbuf_free(json->tmp);
1083 
1084     if (token->type == T_ERROR)
1085         found = token->value.string;
1086     else
1087         found = json_token_type_name[token->type];
1088 
1089     /* Note: token->index is 0 based, display starting from 1 */
1090     luaL_error(l, "Expected %s but found %s at character %d",
1091                exp, found, token->index + 1);
1092 }
1093 
1094 static void json_decode_checkstack(lua_State *l, json_parse_t *json, int n)
1095 {
1096     if (lua_checkstack(l, n))
1097         return;
1098 
1099     strbuf_free(json->tmp);
1100     luaL_error(l, "Too many nested data structures");
1101 }
1102 
1103 static void json_parse_object_context(lua_State *l, json_parse_t *json)
1104 {
1105     json_token_t token;
1106 
1107     /* 3 slots required:
1108      * .., table, key, value */
1109     json_decode_checkstack(l, json, 3);
1110 
1111     lua_newtable(l);
1112 
1113     json_next_token(json, &token);
1114 
1115     /* Handle empty objects */
1116     if (token.type == T_OBJ_END) {
1117         return;
1118     }
1119 
1120     while (1) {
1121         if (token.type != T_STRING)
1122             json_throw_parse_error(l, json, "object key string", &token);
1123 
1124         /* Push key */
1125         lua_pushlstring(l, token.value.string, token.string_len);
1126 
1127         json_next_token(json, &token);
1128         if (token.type != T_COLON)
1129             json_throw_parse_error(l, json, "colon", &token);
1130 
1131         /* Fetch value */
1132         json_next_token(json, &token);
1133         json_process_value(l, json, &token);
1134 
1135         /* Set key = value */
1136         lua_rawset(l, -3);
1137 
1138         json_next_token(json, &token);
1139 
1140         if (token.type == T_OBJ_END)
1141             return;
1142 
1143         if (token.type != T_COMMA)
1144             json_throw_parse_error(l, json, "comma or object end", &token);
1145 
1146         json_next_token(json, &token);
1147     }
1148 }
1149 
1150 /* Handle the array context */
1151 static void json_parse_array_context(lua_State *l, json_parse_t *json)
1152 {
1153     json_token_t token;
1154     int i;
1155 
1156     /* 2 slots required:
1157      * .., table, value */
1158     json_decode_checkstack(l, json, 2);
1159 
1160     lua_newtable(l);
1161 
1162     json_next_token(json, &token);
1163 
1164     /* Handle empty arrays */
1165     if (token.type == T_ARR_END)
1166         return;
1167 
1168     for (i = 1; ; i++) {
1169         json_process_value(l, json, &token);
1170         lua_rawseti(l, -2, i);            /* arr[i] = value */
1171 
1172         json_next_token(json, &token);
1173 
1174         if (token.type == T_ARR_END)
1175             return;
1176 
1177         if (token.type != T_COMMA)
1178             json_throw_parse_error(l, json, "comma or array end", &token);
1179 
1180         json_next_token(json, &token);
1181     }
1182 }
1183 
1184 /* Handle the "value" context */
1185 static void json_process_value(lua_State *l, json_parse_t *json,
1186                                json_token_t *token)
1187 {
1188     switch (token->type) {
1189     case T_STRING:
1190         lua_pushlstring(l, token->value.string, token->string_len);
1191         break;;
1192     case T_NUMBER:
1193         lua_pushnumber(l, token->value.number);
1194         break;;
1195     case T_BOOLEAN:
1196         lua_pushboolean(l, token->value.boolean);
1197         break;;
1198     case T_OBJ_BEGIN:
1199         json_parse_object_context(l, json);
1200         break;;
1201     case T_ARR_BEGIN:
1202         json_parse_array_context(l, json);
1203         break;;
1204     case T_NULL:
1205         /* In Lua, setting "t[k] = nil" will delete k from the table.
1206          * Hence a NULL pointer lightuserdata object is used instead */
1207         lua_pushlightuserdata(l, NULL);
1208         break;;
1209     default:
1210         json_throw_parse_error(l, json, "value", token);
1211     }
1212 }
1213 
1214 /* json_text must be null terminated string */
1215 static void lua_json_decode(lua_State *l, const char *json_text, int json_len)
1216 {
1217     json_parse_t json;
1218     json_token_t token;
1219 
1220     json.cfg = json_fetch_config(l);
1221     json.data = json_text;
1222     json.index = 0;
1223 
1224     /* Ensure the temporary buffer can hold the entire string.
1225      * This means we no longer need to do length checks since the decoded
1226      * string must be smaller than the entire json string */
1227     json.tmp = strbuf_new(json_len);
1228 
1229     json_next_token(&json, &token);
1230     json_process_value(l, &json, &token);
1231 
1232     /* Ensure there is no more input left */
1233     json_next_token(&json, &token);
1234 
1235     if (token.type != T_END)
1236         json_throw_parse_error(l, &json, "the end", &token);
1237 
1238     strbuf_free(json.tmp);
1239 }
1240 
1241 static int json_decode(lua_State *l)
1242 {
1243     const char *json;
1244     size_t len;
1245 
1246     json_verify_arg_count(l, 1);
1247 
1248     json = luaL_checklstring(l, 1, &len);
1249 
1250     /* Detect Unicode other than UTF-8 (see RFC 4627, Sec 3)
1251      *
1252      * CJSON can support any simple data type, hence only the first
1253      * character is guaranteed to be ASCII (at worst: '"'). This is
1254      * still enough to detect whether the wrong encoding is in use. */
1255     if (len >= 2 && (!json[0] || !json[1]))
1256         luaL_error(l, "JSON parser does not support UTF-16 or UTF-32");
1257 
1258     lua_json_decode(l, json, len);
1259 
1260     return 1;
1261 }
1262 
1263 /* ===== INITIALISATION ===== */
1264 
1265 int luaopen_cjson(lua_State *l)
1266 {
1267     luaL_Reg reg[] = {
1268         { "encode", json_encode },
1269         { "decode", json_decode },
1270         { "encode_sparse_array", json_cfg_encode_sparse_array },
1271         { "encode_max_depth", json_cfg_encode_max_depth },
1272         { "encode_number_precision", json_cfg_encode_number_precision },
1273         { "encode_keep_buffer", json_cfg_encode_keep_buffer },
1274         { "refuse_invalid_numbers", json_cfg_refuse_invalid_numbers },
1275         { NULL, NULL }
1276     };
1277 
1278     /* Use json_fetch_config as a pointer.
1279      * It's faster than using a config string, and more unique */
1280     lua_pushlightuserdata(l, &json_config_key);
1281     json_create_config(l);
1282     lua_settable(l, LUA_REGISTRYINDEX);
1283 
1284     luaL_register(l, "cjson", reg);
1285 
1286     /* Set cjson.null */
1287     lua_pushlightuserdata(l, NULL);
1288     lua_setfield(l, -2, "null");
1289 
1290     /* Set cjson.version */
1291     lua_pushliteral(l, VERSION);
1292     lua_setfield(l, -2, "version");
1293 
1294     /* Return cjson table */
1295     return 1;
1296 }
1297 
1298 /* vi:ai et sw=4 ts=4:
1299  */
1300