1 /* Copyright (c) 2013, Vsevolod Stakhov
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 *
12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22 */
23
24 #include "ucl.h"
25 #include "ucl_internal.h"
26 #include "ucl_chartable.h"
27
28 /**
29 * @file ucl_parser.c
30 * The implementation of ucl parser
31 */
32
33 struct ucl_parser_saved_state {
34 unsigned int line;
35 unsigned int column;
36 size_t remain;
37 const unsigned char *pos;
38 };
39
40 /**
41 * Move up to len characters
42 * @param parser
43 * @param begin
44 * @param len
45 * @return new position in chunk
46 */
47 #define ucl_chunk_skipc(chunk, p) do{ \
48 if (*(p) == '\n') { \
49 (chunk)->line ++; \
50 (chunk)->column = 0; \
51 } \
52 else (chunk)->column ++; \
53 (p++); \
54 (chunk)->pos ++; \
55 (chunk)->remain --; \
56 } while (0)
57
58 static inline void
ucl_set_err(struct ucl_parser * parser,int code,const char * str,UT_string ** err)59 ucl_set_err (struct ucl_parser *parser, int code, const char *str, UT_string **err)
60 {
61 const char *fmt_string, *filename;
62 struct ucl_chunk *chunk = parser->chunks;
63
64 if (parser->cur_file) {
65 filename = parser->cur_file;
66 }
67 else {
68 filename = "<unknown>";
69 }
70
71 if (chunk->pos < chunk->end) {
72 if (isgraph (*chunk->pos)) {
73 fmt_string = "error while parsing %s: "
74 "line: %d, column: %d - '%s', character: '%c'";
75 }
76 else {
77 fmt_string = "error while parsing %s: "
78 "line: %d, column: %d - '%s', character: '0x%02x'";
79 }
80 ucl_create_err (err, fmt_string,
81 filename, chunk->line, chunk->column,
82 str, *chunk->pos);
83 }
84 else {
85 ucl_create_err (err, "error while parsing %s: at the end of chunk: %s",
86 filename, str);
87 }
88
89 parser->err_code = code;
90 }
91
92 static void
ucl_save_comment(struct ucl_parser * parser,const char * begin,size_t len)93 ucl_save_comment (struct ucl_parser *parser, const char *begin, size_t len)
94 {
95 ucl_object_t *nobj;
96
97 if (len > 0 && begin != NULL) {
98 nobj = ucl_object_fromstring_common (begin, len, 0);
99
100 if (parser->last_comment) {
101 /* We need to append data to an existing object */
102 DL_APPEND (parser->last_comment, nobj);
103 }
104 else {
105 parser->last_comment = nobj;
106 }
107 }
108 }
109
110 static void
ucl_attach_comment(struct ucl_parser * parser,ucl_object_t * obj,bool before)111 ucl_attach_comment (struct ucl_parser *parser, ucl_object_t *obj, bool before)
112 {
113 if (parser->last_comment) {
114 ucl_object_insert_key (parser->comments, parser->last_comment,
115 (const char *)&obj, sizeof (void *), true);
116
117 if (before) {
118 parser->last_comment->flags |= UCL_OBJECT_INHERITED;
119 }
120
121 parser->last_comment = NULL;
122 }
123 }
124
125 /**
126 * Skip all comments from the current pos resolving nested and multiline comments
127 * @param parser
128 * @return
129 */
130 static bool
ucl_skip_comments(struct ucl_parser * parser)131 ucl_skip_comments (struct ucl_parser *parser)
132 {
133 struct ucl_chunk *chunk = parser->chunks;
134 const unsigned char *p, *beg = NULL;
135 int comments_nested = 0;
136 bool quoted = false;
137
138 p = chunk->pos;
139
140 start:
141 if (chunk->remain > 0 && *p == '#') {
142 if (parser->state != UCL_STATE_SCOMMENT &&
143 parser->state != UCL_STATE_MCOMMENT) {
144 beg = p;
145
146 while (p < chunk->end) {
147 if (*p == '\n') {
148 if (parser->flags & UCL_PARSER_SAVE_COMMENTS) {
149 ucl_save_comment (parser, beg, p - beg);
150 beg = NULL;
151 }
152
153 ucl_chunk_skipc (chunk, p);
154
155 goto start;
156 }
157 ucl_chunk_skipc (chunk, p);
158 }
159 }
160 }
161 else if (chunk->remain >= 2 && *p == '/') {
162 if (p[1] == '*') {
163 beg = p;
164 ucl_chunk_skipc (chunk, p);
165 comments_nested ++;
166 ucl_chunk_skipc (chunk, p);
167
168 while (p < chunk->end) {
169 if (*p == '"' && *(p - 1) != '\\') {
170 quoted = !quoted;
171 }
172
173 if (!quoted) {
174 if (*p == '*') {
175 ucl_chunk_skipc (chunk, p);
176 if (*p == '/') {
177 comments_nested --;
178 if (comments_nested == 0) {
179 if (parser->flags & UCL_PARSER_SAVE_COMMENTS) {
180 ucl_save_comment (parser, beg, p - beg + 1);
181 beg = NULL;
182 }
183
184 ucl_chunk_skipc (chunk, p);
185 goto start;
186 }
187 }
188 ucl_chunk_skipc (chunk, p);
189 }
190 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
191 comments_nested ++;
192 ucl_chunk_skipc (chunk, p);
193 ucl_chunk_skipc (chunk, p);
194 continue;
195 }
196 }
197
198 ucl_chunk_skipc (chunk, p);
199 }
200 if (comments_nested != 0) {
201 ucl_set_err (parser, UCL_ENESTED,
202 "unfinished multiline comment", &parser->err);
203 return false;
204 }
205 }
206 }
207
208 if (beg && p > beg && (parser->flags & UCL_PARSER_SAVE_COMMENTS)) {
209 ucl_save_comment (parser, beg, p - beg);
210 }
211
212 return true;
213 }
214
215 /**
216 * Return multiplier for a character
217 * @param c multiplier character
218 * @param is_bytes if true use 1024 multiplier
219 * @return multiplier
220 */
221 static inline unsigned long
ucl_lex_num_multiplier(const unsigned char c,bool is_bytes)222 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
223 const struct {
224 char c;
225 long mult_normal;
226 long mult_bytes;
227 } multipliers[] = {
228 {'m', 1000 * 1000, 1024 * 1024},
229 {'k', 1000, 1024},
230 {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
231 };
232 int i;
233
234 for (i = 0; i < 3; i ++) {
235 if (tolower (c) == multipliers[i].c) {
236 if (is_bytes) {
237 return multipliers[i].mult_bytes;
238 }
239 return multipliers[i].mult_normal;
240 }
241 }
242
243 return 1;
244 }
245
246
247 /**
248 * Return multiplier for time scaling
249 * @param c
250 * @return
251 */
252 static inline double
ucl_lex_time_multiplier(const unsigned char c)253 ucl_lex_time_multiplier (const unsigned char c) {
254 const struct {
255 char c;
256 double mult;
257 } multipliers[] = {
258 {'m', 60},
259 {'h', 60 * 60},
260 {'d', 60 * 60 * 24},
261 {'w', 60 * 60 * 24 * 7},
262 {'y', 60 * 60 * 24 * 365}
263 };
264 int i;
265
266 for (i = 0; i < 5; i ++) {
267 if (tolower (c) == multipliers[i].c) {
268 return multipliers[i].mult;
269 }
270 }
271
272 return 1;
273 }
274
275 /**
276 * Return true if a character is a end of an atom
277 * @param c
278 * @return
279 */
280 static inline bool
ucl_lex_is_atom_end(const unsigned char c)281 ucl_lex_is_atom_end (const unsigned char c)
282 {
283 return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
284 }
285
286 static inline bool
ucl_lex_is_comment(const unsigned char c1,const unsigned char c2)287 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
288 {
289 if (c1 == '/') {
290 if (c2 == '*') {
291 return true;
292 }
293 }
294 else if (c1 == '#') {
295 return true;
296 }
297 return false;
298 }
299
300 /**
301 * Check variable found
302 * @param parser
303 * @param ptr
304 * @param remain
305 * @param out_len
306 * @param strict
307 * @param found
308 * @return
309 */
310 static inline const char *
ucl_check_variable_safe(struct ucl_parser * parser,const char * ptr,size_t remain,size_t * out_len,bool strict,bool * found)311 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
312 size_t *out_len, bool strict, bool *found)
313 {
314 struct ucl_variable *var;
315 unsigned char *dst;
316 size_t dstlen;
317 bool need_free = false;
318
319 LL_FOREACH (parser->variables, var) {
320 if (strict) {
321 if (remain == var->var_len) {
322 if (memcmp (ptr, var->var, var->var_len) == 0) {
323 *out_len += var->value_len;
324 *found = true;
325 return (ptr + var->var_len);
326 }
327 }
328 }
329 else {
330 if (remain >= var->var_len) {
331 if (memcmp (ptr, var->var, var->var_len) == 0) {
332 *out_len += var->value_len;
333 *found = true;
334 return (ptr + var->var_len);
335 }
336 }
337 }
338 }
339
340 /* XXX: can only handle ${VAR} */
341 if (!(*found) && parser->var_handler != NULL && strict) {
342 /* Call generic handler */
343 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
344 parser->var_data)) {
345 *out_len += dstlen;
346 *found = true;
347 if (need_free) {
348 free (dst);
349 }
350 return (ptr + remain);
351 }
352 }
353
354 return ptr;
355 }
356
357 /**
358 * Check for a variable in a given string
359 * @param parser
360 * @param ptr
361 * @param remain
362 * @param out_len
363 * @param vars_found
364 * @return
365 */
366 static const char *
ucl_check_variable(struct ucl_parser * parser,const char * ptr,size_t remain,size_t * out_len,bool * vars_found)367 ucl_check_variable (struct ucl_parser *parser, const char *ptr,
368 size_t remain, size_t *out_len, bool *vars_found)
369 {
370 const char *p, *end, *ret = ptr;
371 bool found = false;
372
373 if (*ptr == '{') {
374 /* We need to match the variable enclosed in braces */
375 p = ptr + 1;
376 end = ptr + remain;
377 while (p < end) {
378 if (*p == '}') {
379 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1,
380 out_len, true, &found);
381 if (found) {
382 /* {} must be excluded actually */
383 ret ++;
384 if (!*vars_found) {
385 *vars_found = true;
386 }
387 }
388 else {
389 *out_len += 2;
390 }
391 break;
392 }
393 p ++;
394 }
395 }
396 else if (*ptr != '$') {
397 /* Not count escaped dollar sign */
398 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
399 if (found && !*vars_found) {
400 *vars_found = true;
401 }
402 if (!found) {
403 (*out_len) ++;
404 }
405 }
406 else {
407 ret ++;
408 (*out_len) ++;
409 }
410
411 return ret;
412 }
413
414 /**
415 * Expand a single variable
416 * @param parser
417 * @param ptr
418 * @param remain
419 * @param dest
420 * @return
421 */
422 static const char *
ucl_expand_single_variable(struct ucl_parser * parser,const char * ptr,size_t remain,unsigned char ** dest)423 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
424 size_t remain, unsigned char **dest)
425 {
426 unsigned char *d = *dest, *dst;
427 const char *p = ptr + 1, *ret;
428 struct ucl_variable *var;
429 size_t dstlen;
430 bool need_free = false;
431 bool found = false;
432 bool strict = false;
433
434 ret = ptr + 1;
435 remain --;
436
437 if (*p == '$') {
438 *d++ = *p++;
439 *dest = d;
440 return p;
441 }
442 else if (*p == '{') {
443 p ++;
444 strict = true;
445 ret += 2;
446 remain -= 2;
447 }
448
449 LL_FOREACH (parser->variables, var) {
450 if (remain >= var->var_len) {
451 if (memcmp (p, var->var, var->var_len) == 0) {
452 memcpy (d, var->value, var->value_len);
453 ret += var->var_len;
454 d += var->value_len;
455 found = true;
456 break;
457 }
458 }
459 }
460 if (!found) {
461 if (strict && parser->var_handler != NULL) {
462 size_t var_len = 0;
463 while (var_len < remain && p[var_len] != '}')
464 var_len ++;
465
466 if (parser->var_handler (p, var_len, &dst, &dstlen, &need_free,
467 parser->var_data)) {
468 memcpy (d, dst, dstlen);
469 ret += var_len;
470 d += dstlen;
471 if (need_free) {
472 free (dst);
473 }
474 found = true;
475 }
476 }
477
478 /* Leave variable as is */
479 if (!found) {
480 if (strict) {
481 /* Copy '${' */
482 memcpy (d, ptr, 2);
483 d += 2;
484 ret --;
485 }
486 else {
487 memcpy (d, ptr, 1);
488 d ++;
489 }
490 }
491 }
492
493 *dest = d;
494 return ret;
495 }
496
497 /**
498 * Expand variables in string
499 * @param parser
500 * @param dst
501 * @param src
502 * @param in_len
503 * @return
504 */
505 static ssize_t
ucl_expand_variable(struct ucl_parser * parser,unsigned char ** dst,const char * src,size_t in_len)506 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
507 const char *src, size_t in_len)
508 {
509 const char *p, *end = src + in_len;
510 unsigned char *d;
511 size_t out_len = 0;
512 bool vars_found = false;
513
514 if (parser->flags & UCL_PARSER_DISABLE_MACRO) {
515 *dst = NULL;
516 return in_len;
517 }
518
519 p = src;
520 while (p != end) {
521 if (*p == '$') {
522 p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
523 }
524 else {
525 p ++;
526 out_len ++;
527 }
528 }
529
530 if (!vars_found) {
531 /* Trivial case */
532 *dst = NULL;
533 return in_len;
534 }
535
536 *dst = UCL_ALLOC (out_len + 1);
537 if (*dst == NULL) {
538 return in_len;
539 }
540
541 d = *dst;
542 p = src;
543 while (p != end) {
544 if (*p == '$') {
545 p = ucl_expand_single_variable (parser, p, end - p, &d);
546 }
547 else {
548 *d++ = *p++;
549 }
550 }
551
552 *d = '\0';
553
554 return out_len;
555 }
556
557 /**
558 * Store or copy pointer to the trash stack
559 * @param parser parser object
560 * @param src src string
561 * @param dst destination buffer (trash stack pointer)
562 * @param dst_const const destination pointer (e.g. value of object)
563 * @param in_len input length
564 * @param need_unescape need to unescape source (and copy it)
565 * @param need_lowercase need to lowercase value (and copy)
566 * @param need_expand need to expand variables (and copy as well)
567 * @return output length (excluding \0 symbol)
568 */
569 static inline ssize_t
ucl_copy_or_store_ptr(struct ucl_parser * parser,const unsigned char * src,unsigned char ** dst,const char ** dst_const,size_t in_len,bool need_unescape,bool need_lowercase,bool need_expand)570 ucl_copy_or_store_ptr (struct ucl_parser *parser,
571 const unsigned char *src, unsigned char **dst,
572 const char **dst_const, size_t in_len,
573 bool need_unescape, bool need_lowercase, bool need_expand)
574 {
575 ssize_t ret = -1, tret;
576 unsigned char *tmp;
577
578 if (need_unescape || need_lowercase ||
579 (need_expand && parser->variables != NULL) ||
580 !(parser->flags & UCL_PARSER_ZEROCOPY)) {
581 /* Copy string */
582 *dst = UCL_ALLOC (in_len + 1);
583 if (*dst == NULL) {
584 ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for a string",
585 &parser->err);
586 return false;
587 }
588 if (need_lowercase) {
589 ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
590 }
591 else {
592 ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
593 }
594
595 if (need_unescape) {
596 ret = ucl_unescape_json_string (*dst, ret);
597 }
598 if (need_expand) {
599 tmp = *dst;
600 tret = ret;
601 ret = ucl_expand_variable (parser, dst, tmp, ret);
602 if (*dst == NULL) {
603 /* Nothing to expand */
604 *dst = tmp;
605 ret = tret;
606 }
607 else {
608 /* Free unexpanded value */
609 UCL_FREE (in_len + 1, tmp);
610 }
611 }
612 *dst_const = *dst;
613 }
614 else {
615 *dst_const = src;
616 ret = in_len;
617 }
618
619 return ret;
620 }
621
622 /**
623 * Create and append an object at the specified level
624 * @param parser
625 * @param is_array
626 * @param level
627 * @return
628 */
629 static inline ucl_object_t *
ucl_parser_add_container(ucl_object_t * obj,struct ucl_parser * parser,bool is_array,int level)630 ucl_parser_add_container (ucl_object_t *obj, struct ucl_parser *parser,
631 bool is_array, int level)
632 {
633 struct ucl_stack *st;
634
635 if (!is_array) {
636 if (obj == NULL) {
637 obj = ucl_object_new_full (UCL_OBJECT, parser->chunks->priority);
638 }
639 else {
640 obj->type = UCL_OBJECT;
641 }
642 if (obj->value.ov == NULL) {
643 obj->value.ov = ucl_hash_create (parser->flags & UCL_PARSER_KEY_LOWERCASE);
644 }
645 parser->state = UCL_STATE_KEY;
646 }
647 else {
648 if (obj == NULL) {
649 obj = ucl_object_new_full (UCL_ARRAY, parser->chunks->priority);
650 }
651 else {
652 obj->type = UCL_ARRAY;
653 }
654 parser->state = UCL_STATE_VALUE;
655 }
656
657 st = UCL_ALLOC (sizeof (struct ucl_stack));
658
659 if (st == NULL) {
660 ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for an object",
661 &parser->err);
662 ucl_object_unref (obj);
663 return NULL;
664 }
665
666 st->obj = obj;
667 st->level = level;
668 LL_PREPEND (parser->stack, st);
669 parser->cur_obj = obj;
670
671 return obj;
672 }
673
674 int
ucl_maybe_parse_number(ucl_object_t * obj,const char * start,const char * end,const char ** pos,bool allow_double,bool number_bytes,bool allow_time)675 ucl_maybe_parse_number (ucl_object_t *obj,
676 const char *start, const char *end, const char **pos,
677 bool allow_double, bool number_bytes, bool allow_time)
678 {
679 const char *p = start, *c = start;
680 char *endptr;
681 bool got_dot = false, got_exp = false, need_double = false,
682 is_time = false, valid_start = false, is_hex = false,
683 is_neg = false;
684 double dv = 0;
685 int64_t lv = 0;
686
687 if (*p == '-') {
688 is_neg = true;
689 c ++;
690 p ++;
691 }
692 while (p < end) {
693 if (is_hex && isxdigit (*p)) {
694 p ++;
695 }
696 else if (isdigit (*p)) {
697 valid_start = true;
698 p ++;
699 }
700 else if (!is_hex && (*p == 'x' || *p == 'X')) {
701 is_hex = true;
702 allow_double = false;
703 c = p + 1;
704 }
705 else if (allow_double) {
706 if (p == c) {
707 /* Empty digits sequence, not a number */
708 *pos = start;
709 return EINVAL;
710 }
711 else if (*p == '.') {
712 if (got_dot) {
713 /* Double dots, not a number */
714 *pos = start;
715 return EINVAL;
716 }
717 else {
718 got_dot = true;
719 need_double = true;
720 p ++;
721 }
722 }
723 else if (*p == 'e' || *p == 'E') {
724 if (got_exp) {
725 /* Double exp, not a number */
726 *pos = start;
727 return EINVAL;
728 }
729 else {
730 got_exp = true;
731 need_double = true;
732 p ++;
733 if (p >= end) {
734 *pos = start;
735 return EINVAL;
736 }
737 if (!isdigit (*p) && *p != '+' && *p != '-') {
738 /* Wrong exponent sign */
739 *pos = start;
740 return EINVAL;
741 }
742 else {
743 p ++;
744 }
745 }
746 }
747 else {
748 /* Got the end of the number, need to check */
749 break;
750 }
751 }
752 else {
753 break;
754 }
755 }
756
757 if (!valid_start) {
758 *pos = start;
759 return EINVAL;
760 }
761
762 errno = 0;
763 if (need_double) {
764 dv = strtod (c, &endptr);
765 }
766 else {
767 if (is_hex) {
768 lv = strtoimax (c, &endptr, 16);
769 }
770 else {
771 lv = strtoimax (c, &endptr, 10);
772 }
773 }
774 if (errno == ERANGE) {
775 *pos = start;
776 return ERANGE;
777 }
778
779 /* Now check endptr */
780 if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') {
781 p = endptr;
782 goto set_obj;
783 }
784
785 if (endptr < end && endptr != start) {
786 p = endptr;
787 switch (*p) {
788 case 'm':
789 case 'M':
790 case 'g':
791 case 'G':
792 case 'k':
793 case 'K':
794 if (end - p >= 2) {
795 if (p[1] == 's' || p[1] == 'S') {
796 /* Milliseconds */
797 if (!need_double) {
798 need_double = true;
799 dv = lv;
800 }
801 is_time = true;
802 if (p[0] == 'm' || p[0] == 'M') {
803 dv /= 1000.;
804 }
805 else {
806 dv *= ucl_lex_num_multiplier (*p, false);
807 }
808 p += 2;
809 goto set_obj;
810 }
811 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
812 /* Bytes */
813 if (need_double) {
814 need_double = false;
815 lv = dv;
816 }
817 lv *= ucl_lex_num_multiplier (*p, true);
818 p += 2;
819 goto set_obj;
820 }
821 else if (ucl_lex_is_atom_end (p[1])) {
822 if (need_double) {
823 dv *= ucl_lex_num_multiplier (*p, false);
824 }
825 else {
826 lv *= ucl_lex_num_multiplier (*p, number_bytes);
827 }
828 p ++;
829 goto set_obj;
830 }
831 else if (allow_time && end - p >= 3) {
832 if (tolower (p[0]) == 'm' &&
833 tolower (p[1]) == 'i' &&
834 tolower (p[2]) == 'n') {
835 /* Minutes */
836 if (!need_double) {
837 need_double = true;
838 dv = lv;
839 }
840 is_time = true;
841 dv *= 60.;
842 p += 3;
843 goto set_obj;
844 }
845 }
846 }
847 else {
848 if (need_double) {
849 dv *= ucl_lex_num_multiplier (*p, false);
850 }
851 else {
852 lv *= ucl_lex_num_multiplier (*p, number_bytes);
853 }
854 p ++;
855 goto set_obj;
856 }
857 break;
858 case 'S':
859 case 's':
860 if (allow_time &&
861 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
862 if (!need_double) {
863 need_double = true;
864 dv = lv;
865 }
866 p ++;
867 is_time = true;
868 goto set_obj;
869 }
870 break;
871 case 'h':
872 case 'H':
873 case 'd':
874 case 'D':
875 case 'w':
876 case 'W':
877 case 'Y':
878 case 'y':
879 if (allow_time &&
880 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
881 if (!need_double) {
882 need_double = true;
883 dv = lv;
884 }
885 is_time = true;
886 dv *= ucl_lex_time_multiplier (*p);
887 p ++;
888 goto set_obj;
889 }
890 break;
891 case '\t':
892 case ' ':
893 while (p < end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) {
894 p++;
895 }
896 if (ucl_lex_is_atom_end(*p))
897 goto set_obj;
898 break;
899 }
900 }
901 else if (endptr == end) {
902 /* Just a number at the end of chunk */
903 p = endptr;
904 goto set_obj;
905 }
906
907 *pos = c;
908 return EINVAL;
909
910 set_obj:
911 if (obj != NULL) {
912 if (allow_double && (need_double || is_time)) {
913 if (!is_time) {
914 obj->type = UCL_FLOAT;
915 }
916 else {
917 obj->type = UCL_TIME;
918 }
919 obj->value.dv = is_neg ? (-dv) : dv;
920 }
921 else {
922 obj->type = UCL_INT;
923 obj->value.iv = is_neg ? (-lv) : lv;
924 }
925 }
926 *pos = p;
927 return 0;
928 }
929
930 /**
931 * Parse possible number
932 * @param parser
933 * @param chunk
934 * @param obj
935 * @return true if a number has been parsed
936 */
937 static bool
ucl_lex_number(struct ucl_parser * parser,struct ucl_chunk * chunk,ucl_object_t * obj)938 ucl_lex_number (struct ucl_parser *parser,
939 struct ucl_chunk *chunk, ucl_object_t *obj)
940 {
941 const unsigned char *pos;
942 int ret;
943
944 ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
945 true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
946
947 if (ret == 0) {
948 chunk->remain -= pos - chunk->pos;
949 chunk->column += pos - chunk->pos;
950 chunk->pos = pos;
951 return true;
952 }
953 else if (ret == ERANGE) {
954 ucl_set_err (parser, UCL_ESYNTAX, "numeric value out of range",
955 &parser->err);
956 }
957
958 return false;
959 }
960
961 /**
962 * Parse quoted string with possible escapes
963 * @param parser
964 * @param chunk
965 * @param need_unescape
966 * @param ucl_escape
967 * @param var_expand
968 * @return true if a string has been parsed
969 */
970 static bool
ucl_lex_json_string(struct ucl_parser * parser,struct ucl_chunk * chunk,bool * need_unescape,bool * ucl_escape,bool * var_expand)971 ucl_lex_json_string (struct ucl_parser *parser,
972 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
973 {
974 const unsigned char *p = chunk->pos;
975 unsigned char c;
976 int i;
977
978 while (p < chunk->end) {
979 c = *p;
980 if (c < 0x1F) {
981 /* Unmasked control character */
982 if (c == '\n') {
983 ucl_set_err (parser, UCL_ESYNTAX, "unexpected newline",
984 &parser->err);
985 }
986 else {
987 ucl_set_err (parser, UCL_ESYNTAX, "unexpected control character",
988 &parser->err);
989 }
990 return false;
991 }
992 else if (c == '\\') {
993 ucl_chunk_skipc (chunk, p);
994 c = *p;
995 if (p >= chunk->end) {
996 ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character",
997 &parser->err);
998 return false;
999 }
1000 else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
1001 if (c == 'u') {
1002 ucl_chunk_skipc (chunk, p);
1003 for (i = 0; i < 4 && p < chunk->end; i ++) {
1004 if (!isxdigit (*p)) {
1005 ucl_set_err (parser, UCL_ESYNTAX, "invalid utf escape",
1006 &parser->err);
1007 return false;
1008 }
1009 ucl_chunk_skipc (chunk, p);
1010 }
1011 if (p >= chunk->end) {
1012 ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character",
1013 &parser->err);
1014 return false;
1015 }
1016 }
1017 else {
1018 ucl_chunk_skipc (chunk, p);
1019 }
1020 }
1021 *need_unescape = true;
1022 *ucl_escape = true;
1023 continue;
1024 }
1025 else if (c == '"') {
1026 ucl_chunk_skipc (chunk, p);
1027 return true;
1028 }
1029 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
1030 *ucl_escape = true;
1031 }
1032 else if (c == '$') {
1033 *var_expand = true;
1034 }
1035 ucl_chunk_skipc (chunk, p);
1036 }
1037
1038 ucl_set_err (parser, UCL_ESYNTAX, "no quote at the end of json string",
1039 &parser->err);
1040 return false;
1041 }
1042
1043 static void
ucl_parser_append_elt(struct ucl_parser * parser,ucl_hash_t * cont,ucl_object_t * top,ucl_object_t * elt)1044 ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont,
1045 ucl_object_t *top,
1046 ucl_object_t *elt)
1047 {
1048 ucl_object_t *nobj;
1049
1050 if ((parser->flags & UCL_PARSER_NO_IMPLICIT_ARRAYS) == 0) {
1051 /* Implicit array */
1052 top->flags |= UCL_OBJECT_MULTIVALUE;
1053 DL_APPEND (top, elt);
1054 parser->stack->obj->len ++;
1055 }
1056 else {
1057 if ((top->flags & UCL_OBJECT_MULTIVALUE) != 0) {
1058 /* Just add to the explicit array */
1059 ucl_array_append (top, elt);
1060 }
1061 else {
1062 /* Convert to an array */
1063 nobj = ucl_object_typed_new (UCL_ARRAY);
1064 nobj->key = top->key;
1065 nobj->keylen = top->keylen;
1066 nobj->flags |= UCL_OBJECT_MULTIVALUE;
1067 ucl_array_append (nobj, top);
1068 ucl_array_append (nobj, elt);
1069 ucl_hash_replace (cont, top, nobj);
1070 }
1071 }
1072 }
1073
1074 bool
ucl_parser_process_object_element(struct ucl_parser * parser,ucl_object_t * nobj)1075 ucl_parser_process_object_element (struct ucl_parser *parser, ucl_object_t *nobj)
1076 {
1077 ucl_hash_t *container;
1078 ucl_object_t *tobj;
1079 char errmsg[256];
1080
1081 container = parser->stack->obj->value.ov;
1082
1083 tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj));
1084 if (tobj == NULL) {
1085 container = ucl_hash_insert_object (container, nobj,
1086 parser->flags & UCL_PARSER_KEY_LOWERCASE);
1087 nobj->prev = nobj;
1088 nobj->next = NULL;
1089 parser->stack->obj->len ++;
1090 }
1091 else {
1092 unsigned priold = ucl_object_get_priority (tobj),
1093 prinew = ucl_object_get_priority (nobj);
1094 switch (parser->chunks->strategy) {
1095
1096 case UCL_DUPLICATE_APPEND:
1097 /*
1098 * The logic here is the following:
1099 *
1100 * - if we have two objects with the same priority, then we form an
1101 * implicit or explicit array
1102 * - if a new object has bigger priority, then we overwrite an old one
1103 * - if a new object has lower priority, then we ignore it
1104 */
1105
1106
1107 /* Special case for inherited objects */
1108 if (tobj->flags & UCL_OBJECT_INHERITED) {
1109 prinew = priold + 1;
1110 }
1111
1112 if (priold == prinew) {
1113 ucl_parser_append_elt (parser, container, tobj, nobj);
1114 }
1115 else if (priold > prinew) {
1116 /*
1117 * We add this new object to a list of trash objects just to ensure
1118 * that it won't come to any real object
1119 * XXX: rather inefficient approach
1120 */
1121 DL_APPEND (parser->trash_objs, nobj);
1122 }
1123 else {
1124 ucl_hash_replace (container, tobj, nobj);
1125 ucl_object_unref (tobj);
1126 }
1127
1128 break;
1129
1130 case UCL_DUPLICATE_REWRITE:
1131 /* We just rewrite old values regardless of priority */
1132 ucl_hash_replace (container, tobj, nobj);
1133 ucl_object_unref (tobj);
1134
1135 break;
1136
1137 case UCL_DUPLICATE_ERROR:
1138 snprintf(errmsg, sizeof(errmsg),
1139 "duplicate element for key '%s' found",
1140 nobj->key);
1141 ucl_set_err (parser, UCL_EMERGE, errmsg, &parser->err);
1142 return false;
1143
1144 case UCL_DUPLICATE_MERGE:
1145 /*
1146 * Here we do have some old object so we just push it on top of objects stack
1147 * Check priority and then perform the merge on the remaining objects
1148 */
1149 if (tobj->type == UCL_OBJECT || tobj->type == UCL_ARRAY) {
1150 ucl_object_unref (nobj);
1151 nobj = tobj;
1152 }
1153 else if (priold == prinew) {
1154 ucl_parser_append_elt (parser, container, tobj, nobj);
1155 }
1156 else if (priold > prinew) {
1157 /*
1158 * We add this new object to a list of trash objects just to ensure
1159 * that it won't come to any real object
1160 * XXX: rather inefficient approach
1161 */
1162 DL_APPEND (parser->trash_objs, nobj);
1163 }
1164 else {
1165 ucl_hash_replace (container, tobj, nobj);
1166 ucl_object_unref (tobj);
1167 }
1168 break;
1169 }
1170 }
1171
1172 parser->stack->obj->value.ov = container;
1173 parser->cur_obj = nobj;
1174 ucl_attach_comment (parser, nobj, false);
1175
1176 return true;
1177 }
1178
1179 /**
1180 * Parse a key in an object
1181 * @param parser
1182 * @param chunk
1183 * @param next_key
1184 * @param end_of_object
1185 * @return true if a key has been parsed
1186 */
1187 static bool
ucl_parse_key(struct ucl_parser * parser,struct ucl_chunk * chunk,bool * next_key,bool * end_of_object)1188 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk,
1189 bool *next_key, bool *end_of_object)
1190 {
1191 const unsigned char *p, *c = NULL, *end, *t;
1192 const char *key = NULL;
1193 bool got_quote = false, got_eq = false, got_semicolon = false,
1194 need_unescape = false, ucl_escape = false, var_expand = false,
1195 got_content = false, got_sep = false;
1196 ucl_object_t *nobj;
1197 ssize_t keylen;
1198
1199 p = chunk->pos;
1200
1201 if (*p == '.') {
1202 /* It is macro actually */
1203 if (!(parser->flags & UCL_PARSER_DISABLE_MACRO)) {
1204 ucl_chunk_skipc (chunk, p);
1205 }
1206
1207 parser->prev_state = parser->state;
1208 parser->state = UCL_STATE_MACRO_NAME;
1209 *end_of_object = false;
1210 return true;
1211 }
1212 while (p < chunk->end) {
1213 /*
1214 * A key must start with alpha, number, '/' or '_' and end with space character
1215 */
1216 if (c == NULL) {
1217 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1218 if (!ucl_skip_comments (parser)) {
1219 return false;
1220 }
1221 p = chunk->pos;
1222 }
1223 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1224 ucl_chunk_skipc (chunk, p);
1225 }
1226 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
1227 /* The first symbol */
1228 c = p;
1229 ucl_chunk_skipc (chunk, p);
1230 got_content = true;
1231 }
1232 else if (*p == '"') {
1233 /* JSON style key */
1234 c = p + 1;
1235 got_quote = true;
1236 got_content = true;
1237 ucl_chunk_skipc (chunk, p);
1238 }
1239 else if (*p == '}') {
1240 /* We have actually end of an object */
1241 *end_of_object = true;
1242 return true;
1243 }
1244 else if (*p == '.') {
1245 ucl_chunk_skipc (chunk, p);
1246 parser->prev_state = parser->state;
1247 parser->state = UCL_STATE_MACRO_NAME;
1248 return true;
1249 }
1250 else {
1251 /* Invalid identifier */
1252 ucl_set_err (parser, UCL_ESYNTAX, "key must begin with a letter",
1253 &parser->err);
1254 return false;
1255 }
1256 }
1257 else {
1258 /* Parse the body of a key */
1259 if (!got_quote) {
1260 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
1261 got_content = true;
1262 ucl_chunk_skipc (chunk, p);
1263 }
1264 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
1265 end = p;
1266 break;
1267 }
1268 else {
1269 ucl_set_err (parser, UCL_ESYNTAX, "invalid character in a key",
1270 &parser->err);
1271 return false;
1272 }
1273 }
1274 else {
1275 /* We need to parse json like quoted string */
1276 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1277 return false;
1278 }
1279 /* Always escape keys obtained via json */
1280 end = chunk->pos - 1;
1281 p = chunk->pos;
1282 break;
1283 }
1284 }
1285 }
1286
1287 if (p >= chunk->end && got_content) {
1288 ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err);
1289 return false;
1290 }
1291 else if (!got_content) {
1292 return true;
1293 }
1294 *end_of_object = false;
1295 /* We are now at the end of the key, need to parse the rest */
1296 while (p < chunk->end) {
1297 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1298 ucl_chunk_skipc (chunk, p);
1299 }
1300 else if (*p == '=') {
1301 if (!got_eq && !got_semicolon) {
1302 ucl_chunk_skipc (chunk, p);
1303 got_eq = true;
1304 }
1305 else {
1306 ucl_set_err (parser, UCL_ESYNTAX, "unexpected '=' character",
1307 &parser->err);
1308 return false;
1309 }
1310 }
1311 else if (*p == ':') {
1312 if (!got_eq && !got_semicolon) {
1313 ucl_chunk_skipc (chunk, p);
1314 got_semicolon = true;
1315 }
1316 else {
1317 ucl_set_err (parser, UCL_ESYNTAX, "unexpected ':' character",
1318 &parser->err);
1319 return false;
1320 }
1321 }
1322 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1323 /* Check for comment */
1324 if (!ucl_skip_comments (parser)) {
1325 return false;
1326 }
1327 p = chunk->pos;
1328 }
1329 else {
1330 /* Start value */
1331 break;
1332 }
1333 }
1334
1335 if (p >= chunk->end && got_content) {
1336 ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err);
1337 return false;
1338 }
1339
1340 got_sep = got_semicolon || got_eq;
1341
1342 if (!got_sep) {
1343 /*
1344 * Maybe we have more keys nested, so search for termination character.
1345 * Possible choices:
1346 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1347 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1348 * 3) key1 value[;,\n] <- we treat that as linear object
1349 */
1350 t = p;
1351 *next_key = false;
1352 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1353 t ++;
1354 }
1355 /* Check first non-space character after a key */
1356 if (*t != '{' && *t != '[') {
1357 while (t < chunk->end) {
1358 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1359 break;
1360 }
1361 else if (*t == '{' || *t == '[') {
1362 *next_key = true;
1363 break;
1364 }
1365 t ++;
1366 }
1367 }
1368 }
1369
1370 /* Create a new object */
1371 nobj = ucl_object_new_full (UCL_NULL, parser->chunks->priority);
1372 keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1373 &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1374 if (keylen == -1) {
1375 ucl_object_unref (nobj);
1376 return false;
1377 }
1378 else if (keylen == 0) {
1379 ucl_set_err (parser, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1380 ucl_object_unref (nobj);
1381 return false;
1382 }
1383
1384 nobj->key = key;
1385 nobj->keylen = keylen;
1386
1387 if (!ucl_parser_process_object_element (parser, nobj)) {
1388 return false;
1389 }
1390
1391 if (ucl_escape) {
1392 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1393 }
1394
1395
1396 return true;
1397 }
1398
1399 /**
1400 * Parse a cl string
1401 * @param parser
1402 * @param chunk
1403 * @param var_expand
1404 * @param need_unescape
1405 * @return true if a key has been parsed
1406 */
1407 static bool
ucl_parse_string_value(struct ucl_parser * parser,struct ucl_chunk * chunk,bool * var_expand,bool * need_unescape)1408 ucl_parse_string_value (struct ucl_parser *parser,
1409 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1410 {
1411 const unsigned char *p;
1412 enum {
1413 UCL_BRACE_ROUND = 0,
1414 UCL_BRACE_SQUARE,
1415 UCL_BRACE_FIGURE
1416 };
1417 int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1418
1419 p = chunk->pos;
1420
1421 while (p < chunk->end) {
1422
1423 /* Skip pairs of figure braces */
1424 if (*p == '{') {
1425 braces[UCL_BRACE_FIGURE][0] ++;
1426 }
1427 else if (*p == '}') {
1428 braces[UCL_BRACE_FIGURE][1] ++;
1429 if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1430 /* This is not a termination symbol, continue */
1431 ucl_chunk_skipc (chunk, p);
1432 continue;
1433 }
1434 }
1435 /* Skip pairs of square braces */
1436 else if (*p == '[') {
1437 braces[UCL_BRACE_SQUARE][0] ++;
1438 }
1439 else if (*p == ']') {
1440 braces[UCL_BRACE_SQUARE][1] ++;
1441 if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1442 /* This is not a termination symbol, continue */
1443 ucl_chunk_skipc (chunk, p);
1444 continue;
1445 }
1446 }
1447 else if (*p == '$') {
1448 *var_expand = true;
1449 }
1450 else if (*p == '\\') {
1451 *need_unescape = true;
1452 ucl_chunk_skipc (chunk, p);
1453 if (p < chunk->end) {
1454 ucl_chunk_skipc (chunk, p);
1455 }
1456 continue;
1457 }
1458
1459 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1460 break;
1461 }
1462 ucl_chunk_skipc (chunk, p);
1463 }
1464
1465 return true;
1466 }
1467
1468 /**
1469 * Parse multiline string ending with \n{term}\n
1470 * @param parser
1471 * @param chunk
1472 * @param term
1473 * @param term_len
1474 * @param beg
1475 * @param var_expand
1476 * @return size of multiline string or 0 in case of error
1477 */
1478 static int
ucl_parse_multiline_string(struct ucl_parser * parser,struct ucl_chunk * chunk,const unsigned char * term,int term_len,unsigned char const ** beg,bool * var_expand)1479 ucl_parse_multiline_string (struct ucl_parser *parser,
1480 struct ucl_chunk *chunk, const unsigned char *term,
1481 int term_len, unsigned char const **beg,
1482 bool *var_expand)
1483 {
1484 const unsigned char *p, *c, *tend;
1485 bool newline = false;
1486 int len = 0;
1487
1488 p = chunk->pos;
1489
1490 c = p;
1491
1492 while (p < chunk->end) {
1493 if (newline) {
1494 if (chunk->end - p < term_len) {
1495 return 0;
1496 }
1497 else if (memcmp (p, term, term_len) == 0) {
1498 tend = p + term_len;
1499 if (*tend != '\n' && *tend != ';' && *tend != ',') {
1500 /* Incomplete terminator */
1501 ucl_chunk_skipc (chunk, p);
1502 continue;
1503 }
1504 len = p - c;
1505 chunk->remain -= term_len;
1506 chunk->pos = p + term_len;
1507 chunk->column = term_len;
1508 *beg = c;
1509 break;
1510 }
1511 }
1512 if (*p == '\n') {
1513 newline = true;
1514 }
1515 else {
1516 if (*p == '$') {
1517 *var_expand = true;
1518 }
1519 newline = false;
1520 }
1521 ucl_chunk_skipc (chunk, p);
1522 }
1523
1524 return len;
1525 }
1526
1527 static inline ucl_object_t*
ucl_parser_get_container(struct ucl_parser * parser)1528 ucl_parser_get_container (struct ucl_parser *parser)
1529 {
1530 ucl_object_t *t, *obj = NULL;
1531
1532 if (parser == NULL || parser->stack == NULL || parser->stack->obj == NULL) {
1533 return NULL;
1534 }
1535
1536 if (parser->stack->obj->type == UCL_ARRAY) {
1537 /* Object must be allocated */
1538 obj = ucl_object_new_full (UCL_NULL, parser->chunks->priority);
1539 t = parser->stack->obj;
1540
1541 if (!ucl_array_append (t, obj)) {
1542 ucl_object_unref (obj);
1543 return NULL;
1544 }
1545
1546 parser->cur_obj = obj;
1547 ucl_attach_comment (parser, obj, false);
1548 }
1549 else {
1550 /* Object has been already allocated */
1551 obj = parser->cur_obj;
1552 }
1553
1554 return obj;
1555 }
1556
1557 /**
1558 * Handle value data
1559 * @param parser
1560 * @param chunk
1561 * @return
1562 */
1563 static bool
ucl_parse_value(struct ucl_parser * parser,struct ucl_chunk * chunk)1564 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1565 {
1566 const unsigned char *p, *c;
1567 ucl_object_t *obj = NULL;
1568 unsigned int stripped_spaces;
1569 int str_len;
1570 bool need_unescape = false, ucl_escape = false, var_expand = false;
1571
1572 p = chunk->pos;
1573
1574 /* Skip any spaces and comments */
1575 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1576 (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1577 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1578 ucl_chunk_skipc (chunk, p);
1579 }
1580 if (!ucl_skip_comments (parser)) {
1581 return false;
1582 }
1583 p = chunk->pos;
1584 }
1585
1586 while (p < chunk->end) {
1587 c = p;
1588 switch (*p) {
1589 case '"':
1590 ucl_chunk_skipc (chunk, p);
1591
1592 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape,
1593 &var_expand)) {
1594 return false;
1595 }
1596
1597 obj = ucl_parser_get_container (parser);
1598 if (!obj) {
1599 return false;
1600 }
1601
1602 str_len = chunk->pos - c - 2;
1603 obj->type = UCL_STRING;
1604 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1,
1605 &obj->trash_stack[UCL_TRASH_VALUE],
1606 &obj->value.sv, str_len, need_unescape, false,
1607 var_expand)) == -1) {
1608 return false;
1609 }
1610 obj->len = str_len;
1611
1612 parser->state = UCL_STATE_AFTER_VALUE;
1613 p = chunk->pos;
1614
1615 return true;
1616 break;
1617 case '{':
1618 obj = ucl_parser_get_container (parser);
1619 /* We have a new object */
1620 obj = ucl_parser_add_container (obj, parser, false, parser->stack->level);
1621 if (obj == NULL) {
1622 return false;
1623 }
1624
1625 ucl_chunk_skipc (chunk, p);
1626
1627 return true;
1628 break;
1629 case '[':
1630 obj = ucl_parser_get_container (parser);
1631 /* We have a new array */
1632 obj = ucl_parser_add_container (obj, parser, true, parser->stack->level);
1633 if (obj == NULL) {
1634 return false;
1635 }
1636
1637 ucl_chunk_skipc (chunk, p);
1638
1639 return true;
1640 break;
1641 case ']':
1642 /* We have the array ending */
1643 if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1644 parser->state = UCL_STATE_AFTER_VALUE;
1645 return true;
1646 }
1647 else {
1648 goto parse_string;
1649 }
1650 break;
1651 case '<':
1652 obj = ucl_parser_get_container (parser);
1653 /* We have something like multiline value, which must be <<[A-Z]+\n */
1654 if (chunk->end - p > 3) {
1655 if (memcmp (p, "<<", 2) == 0) {
1656 p += 2;
1657 /* We allow only uppercase characters in multiline definitions */
1658 while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1659 p ++;
1660 }
1661 if (*p =='\n') {
1662 /* Set chunk positions and start multiline parsing */
1663 c += 2;
1664 chunk->remain -= p - c;
1665 chunk->pos = p + 1;
1666 chunk->column = 0;
1667 chunk->line ++;
1668 if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1669 p - c, &c, &var_expand)) == 0) {
1670 ucl_set_err (parser, UCL_ESYNTAX,
1671 "unterminated multiline value", &parser->err);
1672 return false;
1673 }
1674
1675 obj->type = UCL_STRING;
1676 obj->flags |= UCL_OBJECT_MULTILINE;
1677 if ((str_len = ucl_copy_or_store_ptr (parser, c,
1678 &obj->trash_stack[UCL_TRASH_VALUE],
1679 &obj->value.sv, str_len - 1, false,
1680 false, var_expand)) == -1) {
1681 return false;
1682 }
1683 obj->len = str_len;
1684
1685 parser->state = UCL_STATE_AFTER_VALUE;
1686
1687 return true;
1688 }
1689 }
1690 }
1691 /* Fallback to ordinary strings */
1692 default:
1693 parse_string:
1694 if (obj == NULL) {
1695 obj = ucl_parser_get_container (parser);
1696 }
1697
1698 /* Parse atom */
1699 if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1700 if (!ucl_lex_number (parser, chunk, obj)) {
1701 if (parser->state == UCL_STATE_ERROR) {
1702 return false;
1703 }
1704 }
1705 else {
1706 parser->state = UCL_STATE_AFTER_VALUE;
1707 return true;
1708 }
1709 /* Fallback to normal string */
1710 }
1711
1712 if (!ucl_parse_string_value (parser, chunk, &var_expand,
1713 &need_unescape)) {
1714 return false;
1715 }
1716 /* Cut trailing spaces */
1717 stripped_spaces = 0;
1718 while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1719 UCL_CHARACTER_WHITESPACE)) {
1720 stripped_spaces ++;
1721 }
1722 str_len = chunk->pos - c - stripped_spaces;
1723 if (str_len <= 0) {
1724 ucl_set_err (parser, UCL_ESYNTAX, "string value must not be empty",
1725 &parser->err);
1726 return false;
1727 }
1728 else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1729 obj->len = 0;
1730 obj->type = UCL_NULL;
1731 }
1732 else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1733 obj->type = UCL_STRING;
1734 if ((str_len = ucl_copy_or_store_ptr (parser, c,
1735 &obj->trash_stack[UCL_TRASH_VALUE],
1736 &obj->value.sv, str_len, need_unescape,
1737 false, var_expand)) == -1) {
1738 return false;
1739 }
1740 obj->len = str_len;
1741 }
1742 parser->state = UCL_STATE_AFTER_VALUE;
1743 p = chunk->pos;
1744
1745 return true;
1746 break;
1747 }
1748 }
1749
1750 return true;
1751 }
1752
1753 /**
1754 * Handle after value data
1755 * @param parser
1756 * @param chunk
1757 * @return
1758 */
1759 static bool
ucl_parse_after_value(struct ucl_parser * parser,struct ucl_chunk * chunk)1760 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1761 {
1762 const unsigned char *p;
1763 bool got_sep = false;
1764 struct ucl_stack *st;
1765
1766 p = chunk->pos;
1767
1768 while (p < chunk->end) {
1769 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1770 /* Skip whitespaces */
1771 ucl_chunk_skipc (chunk, p);
1772 }
1773 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1774 /* Skip comment */
1775 if (!ucl_skip_comments (parser)) {
1776 return false;
1777 }
1778 /* Treat comment as a separator */
1779 got_sep = true;
1780 p = chunk->pos;
1781 }
1782 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1783 if (*p == '}' || *p == ']') {
1784 if (parser->stack == NULL) {
1785 ucl_set_err (parser, UCL_ESYNTAX,
1786 "end of array or object detected without corresponding start",
1787 &parser->err);
1788 return false;
1789 }
1790 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1791 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1792
1793 /* Pop all nested objects from a stack */
1794 st = parser->stack;
1795 parser->stack = st->next;
1796 UCL_FREE (sizeof (struct ucl_stack), st);
1797
1798 if (parser->cur_obj) {
1799 ucl_attach_comment (parser, parser->cur_obj, true);
1800 }
1801
1802 while (parser->stack != NULL) {
1803 st = parser->stack;
1804
1805 if (st->next == NULL || st->next->level == st->level) {
1806 break;
1807 }
1808
1809 parser->stack = st->next;
1810 parser->cur_obj = st->obj;
1811 UCL_FREE (sizeof (struct ucl_stack), st);
1812 }
1813 }
1814 else {
1815 ucl_set_err (parser, UCL_ESYNTAX,
1816 "unexpected terminating symbol detected",
1817 &parser->err);
1818 return false;
1819 }
1820
1821 if (parser->stack == NULL) {
1822 /* Ignore everything after a top object */
1823 return true;
1824 }
1825 else {
1826 ucl_chunk_skipc (chunk, p);
1827 }
1828 got_sep = true;
1829 }
1830 else {
1831 /* Got a separator */
1832 got_sep = true;
1833 ucl_chunk_skipc (chunk, p);
1834 }
1835 }
1836 else {
1837 /* Anything else */
1838 if (!got_sep) {
1839 ucl_set_err (parser, UCL_ESYNTAX, "delimiter is missing",
1840 &parser->err);
1841 return false;
1842 }
1843 return true;
1844 }
1845 }
1846
1847 return true;
1848 }
1849
1850 static bool
ucl_skip_macro_as_comment(struct ucl_parser * parser,struct ucl_chunk * chunk)1851 ucl_skip_macro_as_comment (struct ucl_parser *parser,
1852 struct ucl_chunk *chunk)
1853 {
1854 const unsigned char *p, *c;
1855 enum {
1856 macro_skip_start = 0,
1857 macro_has_symbols,
1858 macro_has_obrace,
1859 macro_has_quote,
1860 macro_has_backslash,
1861 macro_has_sqbrace,
1862 macro_save
1863 } state = macro_skip_start, prev_state = macro_skip_start;
1864
1865 p = chunk->pos;
1866 c = chunk->pos;
1867
1868 while (p < chunk->end) {
1869 switch (state) {
1870 case macro_skip_start:
1871 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1872 state = macro_has_symbols;
1873 }
1874 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1875 state = macro_save;
1876 continue;
1877 }
1878
1879 ucl_chunk_skipc (chunk, p);
1880 break;
1881
1882 case macro_has_symbols:
1883 if (*p == '{') {
1884 state = macro_has_sqbrace;
1885 }
1886 else if (*p == '(') {
1887 state = macro_has_obrace;
1888 }
1889 else if (*p == '"') {
1890 state = macro_has_quote;
1891 }
1892 else if (*p == '\n') {
1893 state = macro_save;
1894 continue;
1895 }
1896
1897 ucl_chunk_skipc (chunk, p);
1898 break;
1899
1900 case macro_has_obrace:
1901 if (*p == '\\') {
1902 prev_state = state;
1903 state = macro_has_backslash;
1904 }
1905 else if (*p == ')') {
1906 state = macro_has_symbols;
1907 }
1908
1909 ucl_chunk_skipc (chunk, p);
1910 break;
1911
1912 case macro_has_sqbrace:
1913 if (*p == '\\') {
1914 prev_state = state;
1915 state = macro_has_backslash;
1916 }
1917 else if (*p == '}') {
1918 state = macro_save;
1919 }
1920
1921 ucl_chunk_skipc (chunk, p);
1922 break;
1923
1924 case macro_has_quote:
1925 if (*p == '\\') {
1926 prev_state = state;
1927 state = macro_has_backslash;
1928 }
1929 else if (*p == '"') {
1930 state = macro_save;
1931 }
1932
1933 ucl_chunk_skipc (chunk, p);
1934 break;
1935
1936 case macro_has_backslash:
1937 state = prev_state;
1938 ucl_chunk_skipc (chunk, p);
1939 break;
1940
1941 case macro_save:
1942 if (parser->flags & UCL_PARSER_SAVE_COMMENTS) {
1943 ucl_save_comment (parser, c, p - c);
1944 }
1945
1946 return true;
1947 }
1948 }
1949
1950 return false;
1951 }
1952
1953 /**
1954 * Handle macro data
1955 * @param parser
1956 * @param chunk
1957 * @param marco
1958 * @param macro_start
1959 * @param macro_len
1960 * @return
1961 */
1962 static bool
ucl_parse_macro_value(struct ucl_parser * parser,struct ucl_chunk * chunk,struct ucl_macro * macro,unsigned char const ** macro_start,size_t * macro_len)1963 ucl_parse_macro_value (struct ucl_parser *parser,
1964 struct ucl_chunk *chunk, struct ucl_macro *macro,
1965 unsigned char const **macro_start, size_t *macro_len)
1966 {
1967 const unsigned char *p, *c;
1968 bool need_unescape = false, ucl_escape = false, var_expand = false;
1969
1970 p = chunk->pos;
1971
1972 switch (*p) {
1973 case '"':
1974 /* We have macro value encoded in quotes */
1975 c = p;
1976 ucl_chunk_skipc (chunk, p);
1977 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1978 return false;
1979 }
1980
1981 *macro_start = c + 1;
1982 *macro_len = chunk->pos - c - 2;
1983 p = chunk->pos;
1984 break;
1985 case '{':
1986 /* We got a multiline macro body */
1987 ucl_chunk_skipc (chunk, p);
1988 /* Skip spaces at the beginning */
1989 while (p < chunk->end) {
1990 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1991 ucl_chunk_skipc (chunk, p);
1992 }
1993 else {
1994 break;
1995 }
1996 }
1997 c = p;
1998 while (p < chunk->end) {
1999 if (*p == '}') {
2000 break;
2001 }
2002 ucl_chunk_skipc (chunk, p);
2003 }
2004 *macro_start = c;
2005 *macro_len = p - c;
2006 ucl_chunk_skipc (chunk, p);
2007 break;
2008 default:
2009 /* Macro is not enclosed in quotes or braces */
2010 c = p;
2011 while (p < chunk->end) {
2012 if (ucl_lex_is_atom_end (*p)) {
2013 break;
2014 }
2015 ucl_chunk_skipc (chunk, p);
2016 }
2017 *macro_start = c;
2018 *macro_len = p - c;
2019 break;
2020 }
2021
2022 /* We are at the end of a macro */
2023 /* Skip ';' and space characters and return to previous state */
2024 while (p < chunk->end) {
2025 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
2026 break;
2027 }
2028 ucl_chunk_skipc (chunk, p);
2029 }
2030 return true;
2031 }
2032
2033 /**
2034 * Parse macro arguments as UCL object
2035 * @param parser parser structure
2036 * @param chunk the current data chunk
2037 * @return
2038 */
2039 static ucl_object_t *
ucl_parse_macro_arguments(struct ucl_parser * parser,struct ucl_chunk * chunk)2040 ucl_parse_macro_arguments (struct ucl_parser *parser,
2041 struct ucl_chunk *chunk)
2042 {
2043 ucl_object_t *res = NULL;
2044 struct ucl_parser *params_parser;
2045 int obraces = 1, ebraces = 0, state = 0;
2046 const unsigned char *p, *c;
2047 size_t args_len = 0;
2048 struct ucl_parser_saved_state saved;
2049
2050 saved.column = chunk->column;
2051 saved.line = chunk->line;
2052 saved.pos = chunk->pos;
2053 saved.remain = chunk->remain;
2054 p = chunk->pos;
2055
2056 if (*p != '(' || chunk->remain < 2) {
2057 return NULL;
2058 }
2059
2060 /* Set begin and start */
2061 ucl_chunk_skipc (chunk, p);
2062 c = p;
2063
2064 while ((p) < (chunk)->end) {
2065 switch (state) {
2066 case 0:
2067 /* Parse symbols and check for '(', ')' and '"' */
2068 if (*p == '(') {
2069 obraces ++;
2070 }
2071 else if (*p == ')') {
2072 ebraces ++;
2073 }
2074 else if (*p == '"') {
2075 state = 1;
2076 }
2077 /* Check pairing */
2078 if (obraces == ebraces) {
2079 state = 99;
2080 }
2081 else {
2082 args_len ++;
2083 }
2084 /* Check overflow */
2085 if (chunk->remain == 0) {
2086 goto restore_chunk;
2087 }
2088 ucl_chunk_skipc (chunk, p);
2089 break;
2090 case 1:
2091 /* We have quote character, so skip all but quotes */
2092 if (*p == '"' && *(p - 1) != '\\') {
2093 state = 0;
2094 }
2095 if (chunk->remain == 0) {
2096 goto restore_chunk;
2097 }
2098 args_len ++;
2099 ucl_chunk_skipc (chunk, p);
2100 break;
2101 case 99:
2102 /*
2103 * We have read the full body of arguments, so we need to parse and set
2104 * object from that
2105 */
2106 params_parser = ucl_parser_new (parser->flags);
2107 if (!ucl_parser_add_chunk (params_parser, c, args_len)) {
2108 ucl_set_err (parser, UCL_ESYNTAX, "macro arguments parsing error",
2109 &parser->err);
2110 }
2111 else {
2112 res = ucl_parser_get_object (params_parser);
2113 }
2114 ucl_parser_free (params_parser);
2115
2116 return res;
2117
2118 break;
2119 }
2120 }
2121
2122 return res;
2123
2124 restore_chunk:
2125 chunk->column = saved.column;
2126 chunk->line = saved.line;
2127 chunk->pos = saved.pos;
2128 chunk->remain = saved.remain;
2129
2130 return NULL;
2131 }
2132
2133 #define SKIP_SPACES_COMMENTS(parser, chunk, p) do { \
2134 while ((p) < (chunk)->end) { \
2135 if (!ucl_test_character (*(p), UCL_CHARACTER_WHITESPACE_UNSAFE)) { \
2136 if ((chunk)->remain >= 2 && ucl_lex_is_comment ((p)[0], (p)[1])) { \
2137 if (!ucl_skip_comments (parser)) { \
2138 return false; \
2139 } \
2140 p = (chunk)->pos; \
2141 } \
2142 break; \
2143 } \
2144 ucl_chunk_skipc (chunk, p); \
2145 } \
2146 } while(0)
2147
2148 /**
2149 * Handle the main states of rcl parser
2150 * @param parser parser structure
2151 * @return true if chunk has been parsed and false in case of error
2152 */
2153 static bool
ucl_state_machine(struct ucl_parser * parser)2154 ucl_state_machine (struct ucl_parser *parser)
2155 {
2156 ucl_object_t *obj, *macro_args;
2157 struct ucl_chunk *chunk = parser->chunks;
2158 const unsigned char *p, *c = NULL, *macro_start = NULL;
2159 unsigned char *macro_escaped;
2160 size_t macro_len = 0;
2161 struct ucl_macro *macro = NULL;
2162 bool next_key = false, end_of_object = false, ret;
2163
2164 if (parser->top_obj == NULL) {
2165 parser->state = UCL_STATE_INIT;
2166 }
2167
2168 p = chunk->pos;
2169 while (chunk->pos < chunk->end) {
2170 switch (parser->state) {
2171 case UCL_STATE_INIT:
2172 /*
2173 * At the init state we can either go to the parse array or object
2174 * if we got [ or { correspondingly or can just treat new data as
2175 * a key of newly created object
2176 */
2177 if (!ucl_skip_comments (parser)) {
2178 parser->prev_state = parser->state;
2179 parser->state = UCL_STATE_ERROR;
2180 return false;
2181 }
2182 else {
2183 /* Skip any spaces */
2184 while (p < chunk->end && ucl_test_character (*p,
2185 UCL_CHARACTER_WHITESPACE_UNSAFE)) {
2186 ucl_chunk_skipc (chunk, p);
2187 }
2188
2189 p = chunk->pos;
2190
2191 if (*p == '[') {
2192 parser->state = UCL_STATE_VALUE;
2193 ucl_chunk_skipc (chunk, p);
2194 }
2195 else {
2196 parser->state = UCL_STATE_KEY;
2197 if (*p == '{') {
2198 ucl_chunk_skipc (chunk, p);
2199 }
2200 }
2201
2202 if (parser->top_obj == NULL) {
2203 if (parser->state == UCL_STATE_VALUE) {
2204 obj = ucl_parser_add_container (NULL, parser, true, 0);
2205 }
2206 else {
2207 obj = ucl_parser_add_container (NULL, parser, false, 0);
2208 }
2209
2210 if (obj == NULL) {
2211 return false;
2212 }
2213
2214 parser->top_obj = obj;
2215 parser->cur_obj = obj;
2216 }
2217
2218 }
2219 break;
2220 case UCL_STATE_KEY:
2221 /* Skip any spaces */
2222 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
2223 ucl_chunk_skipc (chunk, p);
2224 }
2225 if (p == chunk->end || *p == '}') {
2226 /* We have the end of an object */
2227 parser->state = UCL_STATE_AFTER_VALUE;
2228 continue;
2229 }
2230 if (parser->stack == NULL) {
2231 /* No objects are on stack, but we want to parse a key */
2232 ucl_set_err (parser, UCL_ESYNTAX, "top object is finished but the parser "
2233 "expects a key", &parser->err);
2234 parser->prev_state = parser->state;
2235 parser->state = UCL_STATE_ERROR;
2236 return false;
2237 }
2238 if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
2239 parser->prev_state = parser->state;
2240 parser->state = UCL_STATE_ERROR;
2241 return false;
2242 }
2243 if (end_of_object) {
2244 p = chunk->pos;
2245 parser->state = UCL_STATE_AFTER_VALUE;
2246 continue;
2247 }
2248 else if (parser->state != UCL_STATE_MACRO_NAME) {
2249 if (next_key && parser->stack->obj->type == UCL_OBJECT) {
2250 /* Parse more keys and nest objects accordingly */
2251 obj = ucl_parser_add_container (parser->cur_obj, parser, false,
2252 parser->stack->level + 1);
2253 if (obj == NULL) {
2254 return false;
2255 }
2256 }
2257 else {
2258 parser->state = UCL_STATE_VALUE;
2259 }
2260 }
2261 else {
2262 c = chunk->pos;
2263 }
2264 p = chunk->pos;
2265 break;
2266 case UCL_STATE_VALUE:
2267 /* We need to check what we do have */
2268 if (!parser->cur_obj || !ucl_parse_value (parser, chunk)) {
2269 parser->prev_state = parser->state;
2270 parser->state = UCL_STATE_ERROR;
2271 return false;
2272 }
2273 /* State is set in ucl_parse_value call */
2274 p = chunk->pos;
2275 break;
2276 case UCL_STATE_AFTER_VALUE:
2277 if (!ucl_parse_after_value (parser, chunk)) {
2278 parser->prev_state = parser->state;
2279 parser->state = UCL_STATE_ERROR;
2280 return false;
2281 }
2282
2283 if (parser->stack != NULL) {
2284 if (parser->stack->obj->type == UCL_OBJECT) {
2285 parser->state = UCL_STATE_KEY;
2286 }
2287 else {
2288 /* Array */
2289 parser->state = UCL_STATE_VALUE;
2290 }
2291 }
2292 else {
2293 /* Skip everything at the end */
2294 return true;
2295 }
2296
2297 p = chunk->pos;
2298 break;
2299 case UCL_STATE_MACRO_NAME:
2300 if (parser->flags & UCL_PARSER_DISABLE_MACRO) {
2301 if (!ucl_skip_macro_as_comment (parser, chunk)) {
2302 /* We have invalid macro */
2303 ucl_create_err (&parser->err,
2304 "error on line %d at column %d: invalid macro",
2305 chunk->line,
2306 chunk->column);
2307 parser->state = UCL_STATE_ERROR;
2308 return false;
2309 }
2310 else {
2311 p = chunk->pos;
2312 parser->state = parser->prev_state;
2313 }
2314 }
2315 else {
2316 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) &&
2317 *p != '(') {
2318 ucl_chunk_skipc (chunk, p);
2319 }
2320 else {
2321 if (c != NULL && p - c > 0) {
2322 /* We got macro name */
2323 macro_len = (size_t) (p - c);
2324 HASH_FIND (hh, parser->macroes, c, macro_len, macro);
2325 if (macro == NULL) {
2326 ucl_create_err (&parser->err,
2327 "error on line %d at column %d: "
2328 "unknown macro: '%.*s', character: '%c'",
2329 chunk->line,
2330 chunk->column,
2331 (int) (p - c),
2332 c,
2333 *chunk->pos);
2334 parser->state = UCL_STATE_ERROR;
2335 return false;
2336 }
2337 /* Now we need to skip all spaces */
2338 SKIP_SPACES_COMMENTS(parser, chunk, p);
2339 parser->state = UCL_STATE_MACRO;
2340 }
2341 else {
2342 /* We have invalid macro name */
2343 ucl_create_err (&parser->err,
2344 "error on line %d at column %d: invalid macro name",
2345 chunk->line,
2346 chunk->column);
2347 parser->state = UCL_STATE_ERROR;
2348 return false;
2349 }
2350 }
2351 }
2352 break;
2353 case UCL_STATE_MACRO:
2354 if (*chunk->pos == '(') {
2355 macro_args = ucl_parse_macro_arguments (parser, chunk);
2356 p = chunk->pos;
2357 if (macro_args) {
2358 SKIP_SPACES_COMMENTS(parser, chunk, p);
2359 }
2360 }
2361 else {
2362 macro_args = NULL;
2363 }
2364 if (!ucl_parse_macro_value (parser, chunk, macro,
2365 ¯o_start, ¯o_len)) {
2366 parser->prev_state = parser->state;
2367 parser->state = UCL_STATE_ERROR;
2368 return false;
2369 }
2370 macro_len = ucl_expand_variable (parser, ¯o_escaped,
2371 macro_start, macro_len);
2372 parser->state = parser->prev_state;
2373
2374 if (macro_escaped == NULL && macro != NULL) {
2375 if (macro->is_context) {
2376 ret = macro->h.context_handler (macro_start, macro_len,
2377 macro_args,
2378 parser->top_obj,
2379 macro->ud);
2380 }
2381 else {
2382 ret = macro->h.handler (macro_start, macro_len, macro_args,
2383 macro->ud);
2384 }
2385 }
2386 else if (macro != NULL) {
2387 if (macro->is_context) {
2388 ret = macro->h.context_handler (macro_escaped, macro_len,
2389 macro_args,
2390 parser->top_obj,
2391 macro->ud);
2392 }
2393 else {
2394 ret = macro->h.handler (macro_escaped, macro_len, macro_args,
2395 macro->ud);
2396 }
2397
2398 UCL_FREE (macro_len + 1, macro_escaped);
2399 }
2400 else {
2401 ret = false;
2402 ucl_set_err (parser, UCL_EINTERNAL,
2403 "internal error: parser has macro undefined", &parser->err);
2404 }
2405
2406 /*
2407 * Chunk can be modified within macro handler
2408 */
2409 chunk = parser->chunks;
2410 p = chunk->pos;
2411
2412 if (macro_args) {
2413 ucl_object_unref (macro_args);
2414 }
2415
2416 if (!ret) {
2417 return false;
2418 }
2419 break;
2420 default:
2421 ucl_set_err (parser, UCL_EINTERNAL,
2422 "internal error: parser is in an unknown state", &parser->err);
2423 parser->state = UCL_STATE_ERROR;
2424 return false;
2425 }
2426 }
2427
2428 if (parser->last_comment) {
2429 if (parser->cur_obj) {
2430 ucl_attach_comment (parser, parser->cur_obj, true);
2431 }
2432 else if (parser->stack && parser->stack->obj) {
2433 ucl_attach_comment (parser, parser->stack->obj, true);
2434 }
2435 else if (parser->top_obj) {
2436 ucl_attach_comment (parser, parser->top_obj, true);
2437 }
2438 else {
2439 ucl_object_unref (parser->last_comment);
2440 }
2441 }
2442
2443 return true;
2444 }
2445
2446 struct ucl_parser*
ucl_parser_new(int flags)2447 ucl_parser_new (int flags)
2448 {
2449 struct ucl_parser *parser;
2450
2451 parser = UCL_ALLOC (sizeof (struct ucl_parser));
2452 if (parser == NULL) {
2453 return NULL;
2454 }
2455
2456 memset (parser, 0, sizeof (struct ucl_parser));
2457
2458 ucl_parser_register_macro (parser, "include", ucl_include_handler, parser);
2459 ucl_parser_register_macro (parser, "try_include", ucl_try_include_handler, parser);
2460 ucl_parser_register_macro (parser, "includes", ucl_includes_handler, parser);
2461 ucl_parser_register_macro (parser, "priority", ucl_priority_handler, parser);
2462 ucl_parser_register_macro (parser, "load", ucl_load_handler, parser);
2463 ucl_parser_register_context_macro (parser, "inherit", ucl_inherit_handler, parser);
2464
2465 parser->flags = flags;
2466 parser->includepaths = NULL;
2467
2468 if (flags & UCL_PARSER_SAVE_COMMENTS) {
2469 parser->comments = ucl_object_typed_new (UCL_OBJECT);
2470 }
2471
2472 if (!(flags & UCL_PARSER_NO_FILEVARS)) {
2473 /* Initial assumption about filevars */
2474 ucl_parser_set_filevars (parser, NULL, false);
2475 }
2476
2477 return parser;
2478 }
2479
2480 bool
ucl_parser_set_default_priority(struct ucl_parser * parser,unsigned prio)2481 ucl_parser_set_default_priority (struct ucl_parser *parser, unsigned prio)
2482 {
2483 if (parser == NULL) {
2484 return false;
2485 }
2486
2487 parser->default_priority = prio;
2488
2489 return true;
2490 }
2491
2492 void
ucl_parser_register_macro(struct ucl_parser * parser,const char * macro,ucl_macro_handler handler,void * ud)2493 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
2494 ucl_macro_handler handler, void* ud)
2495 {
2496 struct ucl_macro *new;
2497
2498 if (macro == NULL || handler == NULL) {
2499 return;
2500 }
2501
2502 new = UCL_ALLOC (sizeof (struct ucl_macro));
2503 if (new == NULL) {
2504 return;
2505 }
2506
2507 memset (new, 0, sizeof (struct ucl_macro));
2508 new->h.handler = handler;
2509 new->name = strdup (macro);
2510 new->ud = ud;
2511 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
2512 }
2513
2514 void
ucl_parser_register_context_macro(struct ucl_parser * parser,const char * macro,ucl_context_macro_handler handler,void * ud)2515 ucl_parser_register_context_macro (struct ucl_parser *parser, const char *macro,
2516 ucl_context_macro_handler handler, void* ud)
2517 {
2518 struct ucl_macro *new;
2519
2520 if (macro == NULL || handler == NULL) {
2521 return;
2522 }
2523
2524 new = UCL_ALLOC (sizeof (struct ucl_macro));
2525 if (new == NULL) {
2526 return;
2527 }
2528
2529 memset (new, 0, sizeof (struct ucl_macro));
2530 new->h.context_handler = handler;
2531 new->name = strdup (macro);
2532 new->ud = ud;
2533 new->is_context = true;
2534 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
2535 }
2536
2537 void
ucl_parser_register_variable(struct ucl_parser * parser,const char * var,const char * value)2538 ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
2539 const char *value)
2540 {
2541 struct ucl_variable *new = NULL, *cur;
2542
2543 if (var == NULL) {
2544 return;
2545 }
2546
2547 /* Find whether a variable already exists */
2548 LL_FOREACH (parser->variables, cur) {
2549 if (strcmp (cur->var, var) == 0) {
2550 new = cur;
2551 break;
2552 }
2553 }
2554
2555 if (value == NULL) {
2556
2557 if (new != NULL) {
2558 /* Remove variable */
2559 DL_DELETE (parser->variables, new);
2560 free (new->var);
2561 free (new->value);
2562 UCL_FREE (sizeof (struct ucl_variable), new);
2563 }
2564 else {
2565 /* Do nothing */
2566 return;
2567 }
2568 }
2569 else {
2570 if (new == NULL) {
2571 new = UCL_ALLOC (sizeof (struct ucl_variable));
2572 if (new == NULL) {
2573 return;
2574 }
2575 memset (new, 0, sizeof (struct ucl_variable));
2576 new->var = strdup (var);
2577 new->var_len = strlen (var);
2578 new->value = strdup (value);
2579 new->value_len = strlen (value);
2580
2581 DL_APPEND (parser->variables, new);
2582 }
2583 else {
2584 free (new->value);
2585 new->value = strdup (value);
2586 new->value_len = strlen (value);
2587 }
2588 }
2589 }
2590
2591 void
ucl_parser_set_variables_handler(struct ucl_parser * parser,ucl_variable_handler handler,void * ud)2592 ucl_parser_set_variables_handler (struct ucl_parser *parser,
2593 ucl_variable_handler handler, void *ud)
2594 {
2595 parser->var_handler = handler;
2596 parser->var_data = ud;
2597 }
2598
2599 bool
ucl_parser_add_chunk_full(struct ucl_parser * parser,const unsigned char * data,size_t len,unsigned priority,enum ucl_duplicate_strategy strat,enum ucl_parse_type parse_type)2600 ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data,
2601 size_t len, unsigned priority, enum ucl_duplicate_strategy strat,
2602 enum ucl_parse_type parse_type)
2603 {
2604 struct ucl_chunk *chunk;
2605
2606 if (parser == NULL) {
2607 return false;
2608 }
2609
2610 if (data == NULL && len != 0) {
2611 ucl_create_err (&parser->err, "invalid chunk added");
2612 return false;
2613 }
2614
2615 if (parser->state != UCL_STATE_ERROR) {
2616 chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
2617 if (chunk == NULL) {
2618 ucl_create_err (&parser->err, "cannot allocate chunk structure");
2619 return false;
2620 }
2621
2622 if (parse_type == UCL_PARSE_AUTO && len > 0) {
2623 /* We need to detect parse type by the first symbol */
2624 if ((*data & 0x80) == 0x80 && (*data >= 0xdc && *data <= 0xdf)) {
2625 parse_type = UCL_PARSE_MSGPACK;
2626 }
2627 else if (*data == '(') {
2628 parse_type = UCL_PARSE_CSEXP;
2629 }
2630 else {
2631 parse_type = UCL_PARSE_UCL;
2632 }
2633 }
2634
2635 chunk->begin = data;
2636 chunk->remain = len;
2637 chunk->pos = chunk->begin;
2638 chunk->end = chunk->begin + len;
2639 chunk->line = 1;
2640 chunk->column = 0;
2641 chunk->priority = priority;
2642 chunk->strategy = strat;
2643 chunk->parse_type = parse_type;
2644 LL_PREPEND (parser->chunks, chunk);
2645 parser->recursion ++;
2646
2647 if (parser->recursion > UCL_MAX_RECURSION) {
2648 ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
2649 parser->recursion);
2650 return false;
2651 }
2652
2653 if (len > 0) {
2654 /* Need to parse something */
2655 switch (parse_type) {
2656 default:
2657 case UCL_PARSE_UCL:
2658 return ucl_state_machine (parser);
2659 case UCL_PARSE_MSGPACK:
2660 return ucl_parse_msgpack (parser);
2661 case UCL_PARSE_CSEXP:
2662 return ucl_parse_csexp (parser);
2663 }
2664 }
2665 else {
2666 /* Just add empty chunk and go forward */
2667 if (parser->top_obj == NULL) {
2668 /*
2669 * In case of empty object, create one to indicate that we've
2670 * read something
2671 */
2672 parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority);
2673 }
2674
2675 return true;
2676 }
2677 }
2678
2679 ucl_create_err (&parser->err, "a parser is in an invalid state");
2680
2681 return false;
2682 }
2683
2684 bool
ucl_parser_add_chunk_priority(struct ucl_parser * parser,const unsigned char * data,size_t len,unsigned priority)2685 ucl_parser_add_chunk_priority (struct ucl_parser *parser,
2686 const unsigned char *data, size_t len, unsigned priority)
2687 {
2688 /* We dereference parser, so this check is essential */
2689 if (parser == NULL) {
2690 return false;
2691 }
2692
2693 return ucl_parser_add_chunk_full (parser, data, len,
2694 priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL);
2695 }
2696
2697 bool
ucl_parser_add_chunk(struct ucl_parser * parser,const unsigned char * data,size_t len)2698 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
2699 size_t len)
2700 {
2701 if (parser == NULL) {
2702 return false;
2703 }
2704
2705 return ucl_parser_add_chunk_full (parser, data, len,
2706 parser->default_priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL);
2707 }
2708
2709 bool
ucl_parser_add_string_priority(struct ucl_parser * parser,const char * data,size_t len,unsigned priority)2710 ucl_parser_add_string_priority (struct ucl_parser *parser, const char *data,
2711 size_t len, unsigned priority)
2712 {
2713 if (data == NULL) {
2714 ucl_create_err (&parser->err, "invalid string added");
2715 return false;
2716 }
2717 if (len == 0) {
2718 len = strlen (data);
2719 }
2720
2721 return ucl_parser_add_chunk_priority (parser,
2722 (const unsigned char *)data, len, priority);
2723 }
2724
2725 bool
ucl_parser_add_string(struct ucl_parser * parser,const char * data,size_t len)2726 ucl_parser_add_string (struct ucl_parser *parser, const char *data,
2727 size_t len)
2728 {
2729 if (parser == NULL) {
2730 return false;
2731 }
2732
2733 return ucl_parser_add_string_priority (parser,
2734 (const unsigned char *)data, len, parser->default_priority);
2735 }
2736
2737 bool
ucl_set_include_path(struct ucl_parser * parser,ucl_object_t * paths)2738 ucl_set_include_path (struct ucl_parser *parser, ucl_object_t *paths)
2739 {
2740 if (parser == NULL || paths == NULL) {
2741 return false;
2742 }
2743
2744 if (parser->includepaths == NULL) {
2745 parser->includepaths = ucl_object_copy (paths);
2746 }
2747 else {
2748 ucl_object_unref (parser->includepaths);
2749 parser->includepaths = ucl_object_copy (paths);
2750 }
2751
2752 if (parser->includepaths == NULL) {
2753 return false;
2754 }
2755
2756 return true;
2757 }
2758