1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Copyright (c) 2011 The FreeBSD Foundation
8 *
9 * Copyright (c) 2023 Dag-Erling Smørgrav
10 *
11 * Portions of this software were developed by David Chisnall
12 * under sponsorship from the FreeBSD Foundation.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * Chris Torek.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution.
25 * 3. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 */
41
42 #if defined(LIBC_SCCS) && !defined(lint)
43 static char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 6/4/93";
44 #endif /* LIBC_SCCS and not lint */
45 #include "namespace.h"
46 #include <ctype.h>
47 #include <inttypes.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <stddef.h>
51 #include <stdarg.h>
52 #include <string.h>
53 #include <wchar.h>
54 #include <wctype.h>
55 #include "un-namespace.h"
56
57 #include "collate.h"
58 #include "libc_private.h"
59 #include "local.h"
60 #include "xlocale_private.h"
61
62 #ifndef NO_FLOATING_POINT
63 #include <locale.h>
64 #endif
65
66 #define BUF 513 /* Maximum length of numeric string. */
67
68 /*
69 * Flags used during conversion.
70 */
71 #define LONG 0x01 /* l: long or double */
72 #define LONGDBL 0x02 /* L: long double */
73 #define SHORT 0x04 /* h: short */
74 #define SUPPRESS 0x08 /* *: suppress assignment */
75 #define POINTER 0x10 /* p: void * (as hex) */
76 #define NOSKIP 0x20 /* [ or c: do not skip blanks */
77 #define FASTINT 0x200 /* wfN: int_fastN_t */
78 #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */
79 #define INTMAXT 0x800 /* j: intmax_t */
80 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */
81 #define SIZET 0x2000 /* z: size_t */
82 #define SHORTSHORT 0x4000 /* hh: char */
83 #define UNSIGNED 0x8000 /* %[oupxX] conversions */
84
85 /*
86 * Conversion types.
87 */
88 #define CT_CHAR 0 /* %c conversion */
89 #define CT_CCL 1 /* %[...] conversion */
90 #define CT_STRING 2 /* %s conversion */
91 #define CT_INT 3 /* %[dioupxX] conversion */
92 #define CT_FLOAT 4 /* %[efgEFG] conversion */
93
94 static const u_char *__sccl(char *, const u_char *);
95 #ifndef NO_FLOATING_POINT
96 static int parsefloat(FILE *, char *, char *, locale_t);
97 #endif
98
99 __weak_reference(__vfscanf, vfscanf);
100
101 /*
102 * Conversion functions are passed a pointer to this object instead of
103 * a real parameter to indicate that the assignment-suppression (*)
104 * flag was specified. We could use a NULL pointer to indicate this,
105 * but that would mask bugs in applications that call scanf() with a
106 * NULL pointer.
107 */
108 static const int suppress;
109 #define SUPPRESS_PTR ((void *)&suppress)
110
111 static const mbstate_t initial_mbs;
112
113 /*
114 * The following conversion functions return the number of characters consumed,
115 * or -1 on input failure. Character class conversion returns 0 on match
116 * failure.
117 */
118
119 static __inline int
convert_char(FILE * fp,char * p,int width)120 convert_char(FILE *fp, char * p, int width)
121 {
122 int n;
123
124 if (p == SUPPRESS_PTR) {
125 size_t sum = 0;
126 for (;;) {
127 if ((n = fp->_r) < width) {
128 sum += n;
129 width -= n;
130 fp->_p += n;
131 if (__srefill(fp)) {
132 if (sum == 0)
133 return (-1);
134 break;
135 }
136 } else {
137 sum += width;
138 fp->_r -= width;
139 fp->_p += width;
140 break;
141 }
142 }
143 return (sum);
144 } else {
145 size_t r = __fread(p, 1, width, fp);
146
147 if (r == 0)
148 return (-1);
149 return (r);
150 }
151 }
152
153 static __inline int
convert_wchar(FILE * fp,wchar_t * wcp,int width,locale_t locale)154 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
155 {
156 mbstate_t mbs;
157 int n, nread;
158 wint_t wi;
159
160 mbs = initial_mbs;
161 n = 0;
162 while (width-- != 0 &&
163 (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) {
164 if (wcp != SUPPRESS_PTR)
165 *wcp++ = (wchar_t)wi;
166 n += nread;
167 }
168 if (n == 0)
169 return (-1);
170 return (n);
171 }
172
173 static __inline int
convert_ccl(FILE * fp,char * p,int width,const char * ccltab)174 convert_ccl(FILE *fp, char * p, int width, const char *ccltab)
175 {
176 char *p0;
177 int n;
178
179 if (p == SUPPRESS_PTR) {
180 n = 0;
181 while (ccltab[*fp->_p]) {
182 n++, fp->_r--, fp->_p++;
183 if (--width == 0)
184 break;
185 if (fp->_r <= 0 && __srefill(fp)) {
186 if (n == 0)
187 return (-1);
188 break;
189 }
190 }
191 } else {
192 p0 = p;
193 while (ccltab[*fp->_p]) {
194 fp->_r--;
195 *p++ = *fp->_p++;
196 if (--width == 0)
197 break;
198 if (fp->_r <= 0 && __srefill(fp)) {
199 if (p == p0)
200 return (-1);
201 break;
202 }
203 }
204 n = p - p0;
205 if (n == 0)
206 return (0);
207 *p = 0;
208 }
209 return (n);
210 }
211
212 static __inline int
convert_wccl(FILE * fp,wchar_t * wcp,int width,const char * ccltab,locale_t locale)213 convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab,
214 locale_t locale)
215 {
216 mbstate_t mbs;
217 wint_t wi;
218 int n, nread;
219
220 mbs = initial_mbs;
221 n = 0;
222 if (wcp == SUPPRESS_PTR) {
223 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
224 width-- != 0 && ccltab[wctob(wi)])
225 n += nread;
226 if (wi != WEOF)
227 __ungetwc(wi, fp, __get_locale());
228 } else {
229 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
230 width-- != 0 && ccltab[wctob(wi)]) {
231 *wcp++ = (wchar_t)wi;
232 n += nread;
233 }
234 if (wi != WEOF)
235 __ungetwc(wi, fp, __get_locale());
236 if (n == 0)
237 return (0);
238 *wcp = 0;
239 }
240 return (n);
241 }
242
243 static __inline int
convert_string(FILE * fp,char * p,int width)244 convert_string(FILE *fp, char * p, int width)
245 {
246 char *p0;
247 int n;
248
249 if (p == SUPPRESS_PTR) {
250 n = 0;
251 while (!isspace(*fp->_p)) {
252 n++, fp->_r--, fp->_p++;
253 if (--width == 0)
254 break;
255 if (fp->_r <= 0 && __srefill(fp))
256 break;
257 }
258 } else {
259 p0 = p;
260 while (!isspace(*fp->_p)) {
261 fp->_r--;
262 *p++ = *fp->_p++;
263 if (--width == 0)
264 break;
265 if (fp->_r <= 0 && __srefill(fp))
266 break;
267 }
268 *p = 0;
269 n = p - p0;
270 }
271 return (n);
272 }
273
274 static __inline int
convert_wstring(FILE * fp,wchar_t * wcp,int width,locale_t locale)275 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
276 {
277 mbstate_t mbs;
278 wint_t wi;
279 int n, nread;
280
281 mbs = initial_mbs;
282 n = 0;
283 if (wcp == SUPPRESS_PTR) {
284 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
285 width-- != 0 && !iswspace(wi))
286 n += nread;
287 if (wi != WEOF)
288 __ungetwc(wi, fp, __get_locale());
289 } else {
290 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
291 width-- != 0 && !iswspace(wi)) {
292 *wcp++ = (wchar_t)wi;
293 n += nread;
294 }
295 if (wi != WEOF)
296 __ungetwc(wi, fp, __get_locale());
297 *wcp = '\0';
298 }
299 return (n);
300 }
301
302 enum parseint_state {
303 begin,
304 havesign,
305 havezero,
306 haveprefix,
307 any,
308 };
309
310 static __inline int
parseint_fsm(int c,enum parseint_state * state,int * base)311 parseint_fsm(int c, enum parseint_state *state, int *base)
312 {
313 switch (c) {
314 case '+':
315 case '-':
316 if (*state == begin) {
317 *state = havesign;
318 return 1;
319 }
320 break;
321 case '0':
322 if (*state == begin || *state == havesign) {
323 *state = havezero;
324 } else {
325 *state = any;
326 }
327 return 1;
328 case '1':
329 case '2':
330 case '3':
331 case '4':
332 case '5':
333 case '6':
334 case '7':
335 if (*state == havezero && *base == 0) {
336 *base = 8;
337 }
338 /* FALL THROUGH */
339 case '8':
340 case '9':
341 if (*state == begin ||
342 *state == havesign) {
343 if (*base == 0) {
344 *base = 10;
345 }
346 }
347 if (*state == begin ||
348 *state == havesign ||
349 *state == havezero ||
350 *state == haveprefix ||
351 *state == any) {
352 if (*base > c - '0') {
353 *state = any;
354 return 1;
355 }
356 }
357 break;
358 case 'b':
359 if (*state == havezero) {
360 if (*base == 0 || *base == 2) {
361 *state = haveprefix;
362 *base = 2;
363 return 1;
364 }
365 }
366 /* FALL THROUGH */
367 case 'a':
368 case 'c':
369 case 'd':
370 case 'e':
371 case 'f':
372 if (*state == begin ||
373 *state == havesign ||
374 *state == havezero ||
375 *state == haveprefix ||
376 *state == any) {
377 if (*base > c - 'a' + 10) {
378 *state = any;
379 return 1;
380 }
381 }
382 break;
383 case 'B':
384 if (*state == havezero) {
385 if (*base == 0 || *base == 2) {
386 *state = haveprefix;
387 *base = 2;
388 return 1;
389 }
390 }
391 /* FALL THROUGH */
392 case 'A':
393 case 'C':
394 case 'D':
395 case 'E':
396 case 'F':
397 if (*state == begin ||
398 *state == havesign ||
399 *state == havezero ||
400 *state == haveprefix ||
401 *state == any) {
402 if (*base > c - 'A' + 10) {
403 *state = any;
404 return 1;
405 }
406 }
407 break;
408 case 'x':
409 case 'X':
410 if (*state == havezero) {
411 if (*base == 0 || *base == 16) {
412 *state = haveprefix;
413 *base = 16;
414 return 1;
415 }
416 }
417 break;
418 }
419 return 0;
420 }
421
422 /*
423 * Read an integer, storing it in buf.
424 *
425 * Return 0 on a match failure, and the number of characters read
426 * otherwise.
427 */
428 static __inline int
parseint(FILE * fp,char * __restrict buf,int width,int base)429 parseint(FILE *fp, char * __restrict buf, int width, int base)
430 {
431 enum parseint_state state = begin;
432 char *p;
433 int c;
434
435 for (p = buf; width; width--) {
436 c = __sgetc(fp);
437 if (c == EOF)
438 break;
439 if (!parseint_fsm(c, &state, &base))
440 break;
441 *p++ = c;
442 }
443 /*
444 * If we only had a sign, push it back. If we only had a 0b or 0x
445 * prefix (possibly preceded by a sign), we view it as "0" and
446 * push back the letter. In all other cases, if we stopped
447 * because we read a non-number character, push it back.
448 */
449 if (state == havesign) {
450 p--;
451 (void) __ungetc(*(u_char *)p, fp);
452 } else if (state == haveprefix) {
453 p--;
454 (void) __ungetc(c, fp);
455 } else if (width && c != EOF) {
456 (void) __ungetc(c, fp);
457 }
458 return (p - buf);
459 }
460
461 /*
462 * __vfscanf - MT-safe version
463 */
464 int
__vfscanf(FILE * fp,char const * fmt0,va_list ap)465 __vfscanf(FILE *fp, char const *fmt0, va_list ap)
466 {
467 int ret;
468
469 FLOCKFILE_CANCELSAFE(fp);
470 ret = __svfscanf(fp, __get_locale(), fmt0, ap);
471 FUNLOCKFILE_CANCELSAFE();
472 return (ret);
473 }
474 int
vfscanf_l(FILE * fp,locale_t locale,char const * fmt0,va_list ap)475 vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap)
476 {
477 int ret;
478 FIX_LOCALE(locale);
479
480 FLOCKFILE_CANCELSAFE(fp);
481 ret = __svfscanf(fp, locale, fmt0, ap);
482 FUNLOCKFILE_CANCELSAFE();
483 return (ret);
484 }
485
486 /*
487 * __svfscanf - non-MT-safe version of __vfscanf
488 */
489 int
__svfscanf(FILE * fp,locale_t locale,const char * fmt0,va_list ap)490 __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap)
491 {
492 #define GETARG(type) ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
493 const u_char *fmt = (const u_char *)fmt0;
494 int c; /* character from format, or conversion */
495 size_t width; /* field width, or 0 */
496 int flags; /* flags as defined above */
497 int nassigned; /* number of fields assigned */
498 int nconversions; /* number of conversions */
499 int nr; /* characters read by the current conversion */
500 int nread; /* number of characters consumed from fp */
501 int base; /* base argument to conversion function */
502 char ccltab[256]; /* character class table for %[...] */
503 char buf[BUF]; /* buffer for numeric conversions */
504
505 ORIENT(fp, -1);
506
507 nassigned = 0;
508 nconversions = 0;
509 nread = 0;
510 for (;;) {
511 c = *fmt++;
512 if (c == 0)
513 return (nassigned);
514 if (isspace(c)) {
515 while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p))
516 nread++, fp->_r--, fp->_p++;
517 continue;
518 }
519 if (c != '%')
520 goto literal;
521 width = 0;
522 flags = 0;
523 /*
524 * switch on the format. continue if done;
525 * break once format type is derived.
526 */
527 again: c = *fmt++;
528 switch (c) {
529 case '%':
530 literal:
531 if (fp->_r <= 0 && __srefill(fp))
532 goto input_failure;
533 if (*fp->_p != c)
534 goto match_failure;
535 fp->_r--, fp->_p++;
536 nread++;
537 continue;
538
539 case '*':
540 flags |= SUPPRESS;
541 goto again;
542 case 'j':
543 flags |= INTMAXT;
544 goto again;
545 case 'l':
546 if (flags & LONG) {
547 flags &= ~LONG;
548 flags |= LONGLONG;
549 } else
550 flags |= LONG;
551 goto again;
552 case 'q':
553 flags |= LONGLONG; /* not quite */
554 goto again;
555 case 't':
556 flags |= PTRDIFFT;
557 goto again;
558 case 'w':
559 /*
560 * Fixed-width integer types. On all platforms we
561 * support, int8_t is equivalent to char, int16_t
562 * is equivalent to short, int32_t is equivalent
563 * to int, int64_t is equivalent to long long int.
564 * Furthermore, int_fast8_t, int_fast16_t and
565 * int_fast32_t are equivalent to int, and
566 * int_fast64_t is equivalent to long long int.
567 */
568 flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT);
569 if (fmt[0] == 'f') {
570 flags |= FASTINT;
571 fmt++;
572 } else {
573 flags &= ~FASTINT;
574 }
575 if (fmt[0] == '8') {
576 if (!(flags & FASTINT))
577 flags |= SHORTSHORT;
578 else
579 /* no flag set = 32 */ ;
580 fmt += 1;
581 } else if (fmt[0] == '1' && fmt[1] == '6') {
582 if (!(flags & FASTINT))
583 flags |= SHORT;
584 else
585 /* no flag set = 32 */ ;
586 fmt += 2;
587 } else if (fmt[0] == '3' && fmt[1] == '2') {
588 /* no flag set = 32 */ ;
589 fmt += 2;
590 } else if (fmt[0] == '6' && fmt[1] == '4') {
591 flags |= LONGLONG;
592 fmt += 2;
593 } else {
594 goto match_failure;
595 }
596 goto again;
597 case 'z':
598 flags |= SIZET;
599 goto again;
600 case 'L':
601 flags |= LONGDBL;
602 goto again;
603 case 'h':
604 if (flags & SHORT) {
605 flags &= ~SHORT;
606 flags |= SHORTSHORT;
607 } else
608 flags |= SHORT;
609 goto again;
610
611 case '0': case '1': case '2': case '3': case '4':
612 case '5': case '6': case '7': case '8': case '9':
613 width = width * 10 + c - '0';
614 goto again;
615
616 /*
617 * Conversions.
618 */
619 case 'B':
620 case 'b':
621 c = CT_INT;
622 flags |= UNSIGNED;
623 base = 2;
624 break;
625
626 case 'd':
627 c = CT_INT;
628 base = 10;
629 break;
630
631 case 'i':
632 c = CT_INT;
633 base = 0;
634 break;
635
636 case 'o':
637 c = CT_INT;
638 flags |= UNSIGNED;
639 base = 8;
640 break;
641
642 case 'u':
643 c = CT_INT;
644 flags |= UNSIGNED;
645 base = 10;
646 break;
647
648 case 'X':
649 case 'x':
650 c = CT_INT;
651 flags |= UNSIGNED;
652 base = 16;
653 break;
654
655 #ifndef NO_FLOATING_POINT
656 case 'A': case 'E': case 'F': case 'G':
657 case 'a': case 'e': case 'f': case 'g':
658 c = CT_FLOAT;
659 break;
660 #endif
661
662 case 'S':
663 flags |= LONG;
664 /* FALLTHROUGH */
665 case 's':
666 c = CT_STRING;
667 break;
668
669 case '[':
670 fmt = __sccl(ccltab, fmt);
671 flags |= NOSKIP;
672 c = CT_CCL;
673 break;
674
675 case 'C':
676 flags |= LONG;
677 /* FALLTHROUGH */
678 case 'c':
679 flags |= NOSKIP;
680 c = CT_CHAR;
681 break;
682
683 case 'p': /* pointer format is like hex */
684 flags |= POINTER;
685 c = CT_INT; /* assumes sizeof(uintmax_t) */
686 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */
687 base = 16;
688 break;
689
690 case 'n':
691 if (flags & SUPPRESS) /* ??? */
692 continue;
693 if (flags & SHORTSHORT)
694 *va_arg(ap, char *) = nread;
695 else if (flags & SHORT)
696 *va_arg(ap, short *) = nread;
697 else if (flags & LONG)
698 *va_arg(ap, long *) = nread;
699 else if (flags & LONGLONG)
700 *va_arg(ap, long long *) = nread;
701 else if (flags & INTMAXT)
702 *va_arg(ap, intmax_t *) = nread;
703 else if (flags & SIZET)
704 *va_arg(ap, size_t *) = nread;
705 else if (flags & PTRDIFFT)
706 *va_arg(ap, ptrdiff_t *) = nread;
707 else
708 *va_arg(ap, int *) = nread;
709 continue;
710
711 default:
712 goto match_failure;
713
714 /*
715 * Disgusting backwards compatibility hack. XXX
716 */
717 case '\0': /* compat */
718 return (EOF);
719 }
720
721 /*
722 * We have a conversion that requires input.
723 */
724 if (fp->_r <= 0 && __srefill(fp))
725 goto input_failure;
726
727 /*
728 * Consume leading white space, except for formats
729 * that suppress this.
730 */
731 if ((flags & NOSKIP) == 0) {
732 while (isspace(*fp->_p)) {
733 nread++;
734 if (--fp->_r > 0)
735 fp->_p++;
736 else if (__srefill(fp))
737 goto input_failure;
738 }
739 /*
740 * Note that there is at least one character in
741 * the buffer, so conversions that do not set NOSKIP
742 * ca no longer result in an input failure.
743 */
744 }
745
746 /*
747 * Do the conversion.
748 */
749 switch (c) {
750
751 case CT_CHAR:
752 /* scan arbitrary characters (sets NOSKIP) */
753 if (width == 0)
754 width = 1;
755 if (flags & LONG) {
756 nr = convert_wchar(fp, GETARG(wchar_t *),
757 width, locale);
758 } else {
759 nr = convert_char(fp, GETARG(char *), width);
760 }
761 if (nr < 0)
762 goto input_failure;
763 break;
764
765 case CT_CCL:
766 /* scan a (nonempty) character class (sets NOSKIP) */
767 if (width == 0)
768 width = (size_t)~0; /* `infinity' */
769 if (flags & LONG) {
770 nr = convert_wccl(fp, GETARG(wchar_t *), width,
771 ccltab, locale);
772 } else {
773 nr = convert_ccl(fp, GETARG(char *), width,
774 ccltab);
775 }
776 if (nr <= 0) {
777 if (nr < 0)
778 goto input_failure;
779 else /* nr == 0 */
780 goto match_failure;
781 }
782 break;
783
784 case CT_STRING:
785 /* like CCL, but zero-length string OK, & no NOSKIP */
786 if (width == 0)
787 width = (size_t)~0;
788 if (flags & LONG) {
789 nr = convert_wstring(fp, GETARG(wchar_t *),
790 width, locale);
791 } else {
792 nr = convert_string(fp, GETARG(char *), width);
793 }
794 if (nr < 0)
795 goto input_failure;
796 break;
797
798 case CT_INT:
799 /* scan an integer as if by the conversion function */
800 #ifdef hardway
801 if (width == 0 || width > sizeof(buf) - 1)
802 width = sizeof(buf) - 1;
803 #else
804 /* size_t is unsigned, hence this optimisation */
805 if (--width > sizeof(buf) - 2)
806 width = sizeof(buf) - 2;
807 width++;
808 #endif
809 nr = parseint(fp, buf, width, base);
810 if (nr == 0)
811 goto match_failure;
812 if ((flags & SUPPRESS) == 0) {
813 uintmax_t res;
814
815 buf[nr] = '\0';
816 if ((flags & UNSIGNED) == 0)
817 res = strtoimax_l(buf, (char **)NULL, base, locale);
818 else
819 res = strtoumax_l(buf, (char **)NULL, base, locale);
820 if (flags & POINTER)
821 *va_arg(ap, void **) =
822 (void *)(uintptr_t)res;
823 else if (flags & SHORTSHORT)
824 *va_arg(ap, char *) = res;
825 else if (flags & SHORT)
826 *va_arg(ap, short *) = res;
827 else if (flags & LONG)
828 *va_arg(ap, long *) = res;
829 else if (flags & LONGLONG)
830 *va_arg(ap, long long *) = res;
831 else if (flags & INTMAXT)
832 *va_arg(ap, intmax_t *) = res;
833 else if (flags & PTRDIFFT)
834 *va_arg(ap, ptrdiff_t *) = res;
835 else if (flags & SIZET)
836 *va_arg(ap, size_t *) = res;
837 else
838 *va_arg(ap, int *) = res;
839 }
840 break;
841
842 #ifndef NO_FLOATING_POINT
843 case CT_FLOAT:
844 /* scan a floating point number as if by strtod */
845 if (width == 0 || width > sizeof(buf) - 1)
846 width = sizeof(buf) - 1;
847 nr = parsefloat(fp, buf, buf + width, locale);
848 if (nr == 0)
849 goto match_failure;
850 if ((flags & SUPPRESS) == 0) {
851 if (flags & LONGDBL) {
852 long double res = strtold_l(buf, NULL,
853 locale);
854 *va_arg(ap, long double *) = res;
855 } else if (flags & LONG) {
856 double res = strtod_l(buf, NULL,
857 locale);
858 *va_arg(ap, double *) = res;
859 } else {
860 float res = strtof_l(buf, NULL, locale);
861 *va_arg(ap, float *) = res;
862 }
863 }
864 break;
865 #endif /* !NO_FLOATING_POINT */
866 }
867 if (!(flags & SUPPRESS))
868 nassigned++;
869 nread += nr;
870 nconversions++;
871 }
872 input_failure:
873 return (nconversions != 0 ? nassigned : EOF);
874 match_failure:
875 return (nassigned);
876 }
877
878 /*
879 * Fill in the given table from the scanset at the given format
880 * (just after `['). Return a pointer to the character past the
881 * closing `]'. The table has a 1 wherever characters should be
882 * considered part of the scanset.
883 */
884 static const u_char *
__sccl(char * tab,const u_char * fmt)885 __sccl(char *tab, const u_char *fmt)
886 {
887 int c, n, v, i;
888 struct xlocale_collate *table =
889 (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
890
891 /* first `clear' the whole table */
892 c = *fmt++; /* first char hat => negated scanset */
893 if (c == '^') {
894 v = 1; /* default => accept */
895 c = *fmt++; /* get new first char */
896 } else
897 v = 0; /* default => reject */
898
899 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
900 (void) memset(tab, v, 256);
901
902 if (c == 0)
903 return (fmt - 1);/* format ended before closing ] */
904
905 /*
906 * Now set the entries corresponding to the actual scanset
907 * to the opposite of the above.
908 *
909 * The first character may be ']' (or '-') without being special;
910 * the last character may be '-'.
911 */
912 v = 1 - v;
913 for (;;) {
914 tab[c] = v; /* take character c */
915 doswitch:
916 n = *fmt++; /* and examine the next */
917 switch (n) {
918
919 case 0: /* format ended too soon */
920 return (fmt - 1);
921
922 case '-':
923 /*
924 * A scanset of the form
925 * [01+-]
926 * is defined as `the digit 0, the digit 1,
927 * the character +, the character -', but
928 * the effect of a scanset such as
929 * [a-zA-Z0-9]
930 * is implementation defined. The V7 Unix
931 * scanf treats `a-z' as `the letters a through
932 * z', but treats `a-a' as `the letter a, the
933 * character -, and the letter a'.
934 *
935 * For compatibility, the `-' is not considered
936 * to define a range if the character following
937 * it is either a close bracket (required by ANSI)
938 * or is not numerically greater than the character
939 * we just stored in the table (c).
940 */
941 n = *fmt;
942 if (n == ']'
943 || (table->__collate_load_error ? n < c :
944 __collate_range_cmp(n, c) < 0
945 )
946 ) {
947 c = '-';
948 break; /* resume the for(;;) */
949 }
950 fmt++;
951 /* fill in the range */
952 if (table->__collate_load_error) {
953 do {
954 tab[++c] = v;
955 } while (c < n);
956 } else {
957 for (i = 0; i < 256; i ++)
958 if (__collate_range_cmp(c, i) <= 0 &&
959 __collate_range_cmp(i, n) <= 0
960 )
961 tab[i] = v;
962 }
963 #if 1 /* XXX another disgusting compatibility hack */
964 c = n;
965 /*
966 * Alas, the V7 Unix scanf also treats formats
967 * such as [a-c-e] as `the letters a through e'.
968 * This too is permitted by the standard....
969 */
970 goto doswitch;
971 #else
972 c = *fmt++;
973 if (c == 0)
974 return (fmt - 1);
975 if (c == ']')
976 return (fmt);
977 #endif
978 break;
979
980 case ']': /* end of scanset */
981 return (fmt);
982
983 default: /* just another character */
984 c = n;
985 break;
986 }
987 }
988 /* NOTREACHED */
989 }
990
991 #ifndef NO_FLOATING_POINT
992 static int
parsefloat(FILE * fp,char * buf,char * end,locale_t locale)993 parsefloat(FILE *fp, char *buf, char *end, locale_t locale)
994 {
995 char *commit, *p;
996 int infnanpos = 0, decptpos = 0;
997 enum {
998 S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
999 S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS
1000 } state = S_START;
1001 unsigned char c;
1002 const char *decpt = localeconv_l(locale)->decimal_point;
1003 _Bool gotmantdig = 0, ishex = 0;
1004
1005 /*
1006 * We set commit = p whenever the string we have read so far
1007 * constitutes a valid representation of a floating point
1008 * number by itself. At some point, the parse will complete
1009 * or fail, and we will ungetc() back to the last commit point.
1010 * To ensure that the file offset gets updated properly, it is
1011 * always necessary to read at least one character that doesn't
1012 * match; thus, we can't short-circuit "infinity" or "nan(...)".
1013 */
1014 commit = buf - 1;
1015 for (p = buf; p < end; ) {
1016 c = *fp->_p;
1017 reswitch:
1018 switch (state) {
1019 case S_START:
1020 state = S_GOTSIGN;
1021 if (c == '-' || c == '+')
1022 break;
1023 else
1024 goto reswitch;
1025 case S_GOTSIGN:
1026 switch (c) {
1027 case '0':
1028 state = S_MAYBEHEX;
1029 commit = p;
1030 break;
1031 case 'I':
1032 case 'i':
1033 state = S_INF;
1034 break;
1035 case 'N':
1036 case 'n':
1037 state = S_NAN;
1038 break;
1039 default:
1040 state = S_DIGITS;
1041 goto reswitch;
1042 }
1043 break;
1044 case S_INF:
1045 if (infnanpos > 6 ||
1046 (c != "nfinity"[infnanpos] &&
1047 c != "NFINITY"[infnanpos]))
1048 goto parsedone;
1049 if (infnanpos == 1 || infnanpos == 6)
1050 commit = p; /* inf or infinity */
1051 infnanpos++;
1052 break;
1053 case S_NAN:
1054 switch (infnanpos) {
1055 case 0:
1056 if (c != 'A' && c != 'a')
1057 goto parsedone;
1058 break;
1059 case 1:
1060 if (c != 'N' && c != 'n')
1061 goto parsedone;
1062 else
1063 commit = p;
1064 break;
1065 case 2:
1066 if (c != '(')
1067 goto parsedone;
1068 break;
1069 default:
1070 if (c == ')') {
1071 commit = p;
1072 state = S_DONE;
1073 } else if (!isalnum(c) && c != '_')
1074 goto parsedone;
1075 break;
1076 }
1077 infnanpos++;
1078 break;
1079 case S_DONE:
1080 goto parsedone;
1081 case S_MAYBEHEX:
1082 state = S_DIGITS;
1083 if (c == 'X' || c == 'x') {
1084 ishex = 1;
1085 break;
1086 } else { /* we saw a '0', but no 'x' */
1087 gotmantdig = 1;
1088 goto reswitch;
1089 }
1090 case S_DIGITS:
1091 if ((ishex && isxdigit(c)) || isdigit(c)) {
1092 gotmantdig = 1;
1093 commit = p;
1094 break;
1095 } else {
1096 state = S_DECPT;
1097 goto reswitch;
1098 }
1099 case S_DECPT:
1100 if (c == decpt[decptpos]) {
1101 if (decpt[++decptpos] == '\0') {
1102 /* We read the complete decpt seq. */
1103 state = S_FRAC;
1104 if (gotmantdig)
1105 commit = p;
1106 }
1107 break;
1108 } else if (!decptpos) {
1109 /* We didn't read any decpt characters. */
1110 state = S_FRAC;
1111 goto reswitch;
1112 } else {
1113 /*
1114 * We read part of a multibyte decimal point,
1115 * but the rest is invalid, so bail.
1116 */
1117 goto parsedone;
1118 }
1119 case S_FRAC:
1120 if (((c == 'E' || c == 'e') && !ishex) ||
1121 ((c == 'P' || c == 'p') && ishex)) {
1122 if (!gotmantdig)
1123 goto parsedone;
1124 else
1125 state = S_EXP;
1126 } else if ((ishex && isxdigit(c)) || isdigit(c)) {
1127 commit = p;
1128 gotmantdig = 1;
1129 } else
1130 goto parsedone;
1131 break;
1132 case S_EXP:
1133 state = S_EXPDIGITS;
1134 if (c == '-' || c == '+')
1135 break;
1136 else
1137 goto reswitch;
1138 case S_EXPDIGITS:
1139 if (isdigit(c))
1140 commit = p;
1141 else
1142 goto parsedone;
1143 break;
1144 default:
1145 abort();
1146 }
1147 *p++ = c;
1148 if (--fp->_r > 0)
1149 fp->_p++;
1150 else if (__srefill(fp))
1151 break; /* EOF */
1152 }
1153
1154 parsedone:
1155 while (commit < --p)
1156 __ungetc(*(u_char *)p, fp);
1157 *++commit = '\0';
1158 return (commit - buf);
1159 }
1160 #endif
1161