1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Chris Torek.
9 *
10 * Copyright (c) 2011 The FreeBSD Foundation
11 *
12 * Copyright (c) 2023 Dag-Erling Smørgrav
13 *
14 * Portions of this software were developed by David Chisnall
15 * under sponsorship from the FreeBSD Foundation.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution.
25 * 3. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 */
41
42 #if 0
43 #if defined(LIBC_SCCS) && !defined(lint)
44 static char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 6/4/93";
45 #endif /* LIBC_SCCS and not lint */
46 #endif
47 #include "namespace.h"
48 #include <ctype.h>
49 #include <inttypes.h>
50 #include <limits.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <stddef.h>
54 #include <stdarg.h>
55 #include <string.h>
56 #include <wchar.h>
57 #include <wctype.h>
58 #include "un-namespace.h"
59
60 #include "libc_private.h"
61 #include "local.h"
62 #include "xlocale_private.h"
63
64 #define BUF 513 /* Maximum length of numeric string. */
65
66 /*
67 * Flags used during conversion.
68 */
69 #define LONG 0x01 /* l: long or double */
70 #define LONGDBL 0x02 /* L: long double */
71 #define SHORT 0x04 /* h: short */
72 #define SUPPRESS 0x08 /* *: suppress assignment */
73 #define POINTER 0x10 /* p: void * (as hex) */
74 #define NOSKIP 0x20 /* [ or c: do not skip blanks */
75 #define FASTINT 0x200 /* wfN: int_fastN_t */
76 #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */
77 #define INTMAXT 0x800 /* j: intmax_t */
78 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */
79 #define SIZET 0x2000 /* z: size_t */
80 #define SHORTSHORT 0x4000 /* hh: char */
81 #define UNSIGNED 0x8000 /* %[oupxX] conversions */
82
83 /*
84 * Conversion types.
85 */
86 #define CT_CHAR 0 /* %c conversion */
87 #define CT_CCL 1 /* %[...] conversion */
88 #define CT_STRING 2 /* %s conversion */
89 #define CT_INT 3 /* %[dioupxX] conversion */
90 #define CT_FLOAT 4 /* %[efgEFG] conversion */
91
92 #ifndef NO_FLOATING_POINT
93 static int parsefloat(FILE *, wchar_t *, wchar_t *, locale_t);
94 #endif
95
96 struct ccl {
97 const wchar_t *start; /* character class start */
98 const wchar_t *end; /* character class end */
99 int compl; /* ccl is complemented? */
100 };
101
102 static __inline int
inccl(const struct ccl * ccl,wint_t wi)103 inccl(const struct ccl *ccl, wint_t wi)
104 {
105
106 if (ccl->compl) {
107 return (wmemchr(ccl->start, wi, ccl->end - ccl->start)
108 == NULL);
109 } else {
110 return (wmemchr(ccl->start, wi, ccl->end - ccl->start) != NULL);
111 }
112 }
113
114 /*
115 * Conversion functions are passed a pointer to this object instead of
116 * a real parameter to indicate that the assignment-suppression (*)
117 * flag was specified. We could use a NULL pointer to indicate this,
118 * but that would mask bugs in applications that call scanf() with a
119 * NULL pointer.
120 */
121 static const int suppress;
122 #define SUPPRESS_PTR ((void *)&suppress)
123
124 static const mbstate_t initial_mbs;
125
126 /*
127 * The following conversion functions return the number of characters consumed,
128 * or -1 on input failure. Character class conversion returns 0 on match
129 * failure.
130 */
131
132 static __inline int
convert_char(FILE * fp,char * mbp,int width,locale_t locale)133 convert_char(FILE *fp, char * mbp, int width, locale_t locale)
134 {
135 mbstate_t mbs;
136 size_t nconv;
137 wint_t wi;
138 int n;
139
140 n = 0;
141 mbs = initial_mbs;
142 while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
143 if (mbp != SUPPRESS_PTR) {
144 nconv = wcrtomb(mbp, wi, &mbs);
145 if (nconv == (size_t)-1)
146 return (-1);
147 mbp += nconv;
148 }
149 n++;
150 }
151 if (n == 0)
152 return (-1);
153 return (n);
154 }
155
156 static __inline int
convert_wchar(FILE * fp,wchar_t * wcp,int width,locale_t locale)157 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
158 {
159 wint_t wi;
160 int n;
161
162 n = 0;
163 while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
164 if (wcp != SUPPRESS_PTR)
165 *wcp++ = (wchar_t)wi;
166 n++;
167 }
168 if (n == 0)
169 return (-1);
170 return (n);
171 }
172
173 static __inline int
convert_ccl(FILE * fp,char * mbp,int width,const struct ccl * ccl,locale_t locale)174 convert_ccl(FILE *fp, char * mbp, int width, const struct ccl *ccl,
175 locale_t locale)
176 {
177 mbstate_t mbs;
178 size_t nconv;
179 wint_t wi;
180 int n;
181
182 n = 0;
183 mbs = initial_mbs;
184 while ((wi = __fgetwc(fp, locale)) != WEOF &&
185 width-- != 0 && inccl(ccl, wi)) {
186 if (mbp != SUPPRESS_PTR) {
187 nconv = wcrtomb(mbp, wi, &mbs);
188 if (nconv == (size_t)-1)
189 return (-1);
190 mbp += nconv;
191 }
192 n++;
193 }
194 if (wi != WEOF)
195 __ungetwc(wi, fp, locale);
196 if (mbp != SUPPRESS_PTR)
197 *mbp = 0;
198 return (n);
199 }
200
201 static __inline int
convert_wccl(FILE * fp,wchar_t * wcp,int width,const struct ccl * ccl,locale_t locale)202 convert_wccl(FILE *fp, wchar_t *wcp, int width, const struct ccl *ccl,
203 locale_t locale)
204 {
205 wchar_t *wcp0;
206 wint_t wi;
207 int n;
208
209 if (wcp == SUPPRESS_PTR) {
210 n = 0;
211 while ((wi = __fgetwc(fp, locale)) != WEOF &&
212 width-- != 0 && inccl(ccl, wi))
213 n++;
214 if (wi != WEOF)
215 __ungetwc(wi, fp, locale);
216 } else {
217 wcp0 = wcp;
218 while ((wi = __fgetwc(fp, locale)) != WEOF &&
219 width-- != 0 && inccl(ccl, wi))
220 *wcp++ = (wchar_t)wi;
221 if (wi != WEOF)
222 __ungetwc(wi, fp, locale);
223 n = wcp - wcp0;
224 if (n == 0)
225 return (0);
226 *wcp = 0;
227 }
228 return (n);
229 }
230
231 static __inline int
convert_string(FILE * fp,char * mbp,int width,locale_t locale)232 convert_string(FILE *fp, char * mbp, int width, locale_t locale)
233 {
234 mbstate_t mbs;
235 size_t nconv;
236 wint_t wi;
237 int nread;
238
239 mbs = initial_mbs;
240 nread = 0;
241 while ((wi = __fgetwc(fp, locale)) != WEOF && width-- != 0 &&
242 !iswspace(wi)) {
243 if (mbp != SUPPRESS_PTR) {
244 nconv = wcrtomb(mbp, wi, &mbs);
245 if (nconv == (size_t)-1)
246 return (-1);
247 mbp += nconv;
248 }
249 nread++;
250 }
251 if (wi != WEOF)
252 __ungetwc(wi, fp, locale);
253 if (mbp != SUPPRESS_PTR)
254 *mbp = 0;
255 return (nread);
256 }
257
258 static __inline int
convert_wstring(FILE * fp,wchar_t * wcp,int width,locale_t locale)259 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
260 {
261 wint_t wi;
262 int nread;
263
264 nread = 0;
265 if (wcp == SUPPRESS_PTR) {
266 while ((wi = __fgetwc(fp, locale)) != WEOF &&
267 width-- != 0 && !iswspace(wi))
268 nread++;
269 if (wi != WEOF)
270 __ungetwc(wi, fp, locale);
271 } else {
272 while ((wi = __fgetwc(fp, locale)) != WEOF &&
273 width-- != 0 && !iswspace(wi)) {
274 *wcp++ = (wchar_t)wi;
275 nread++;
276 }
277 if (wi != WEOF)
278 __ungetwc(wi, fp, locale);
279 *wcp = '\0';
280 }
281 return (nread);
282 }
283
284 enum parseint_state {
285 begin,
286 havesign,
287 havezero,
288 haveprefix,
289 any,
290 };
291
292 static __inline int
parseint_fsm(wchar_t c,enum parseint_state * state,int * base)293 parseint_fsm(wchar_t c, enum parseint_state *state, int *base)
294 {
295 switch (c) {
296 case '+':
297 case '-':
298 if (*state == begin) {
299 *state = havesign;
300 return 1;
301 }
302 break;
303 case '0':
304 if (*state == begin || *state == havesign) {
305 *state = havezero;
306 } else {
307 *state = any;
308 }
309 return 1;
310 case '1':
311 case '2':
312 case '3':
313 case '4':
314 case '5':
315 case '6':
316 case '7':
317 if (*state == havezero && *base == 0) {
318 *base = 8;
319 }
320 /* FALL THROUGH */
321 case '8':
322 case '9':
323 if (*state == begin ||
324 *state == havesign) {
325 if (*base == 0) {
326 *base = 10;
327 }
328 }
329 if (*state == begin ||
330 *state == havesign ||
331 *state == havezero ||
332 *state == haveprefix ||
333 *state == any) {
334 if (*base > c - '0') {
335 *state = any;
336 return 1;
337 }
338 }
339 break;
340 case 'b':
341 if (*state == havezero) {
342 if (*base == 0 || *base == 2) {
343 *state = haveprefix;
344 *base = 2;
345 return 1;
346 }
347 }
348 /* FALL THROUGH */
349 case 'a':
350 case 'c':
351 case 'd':
352 case 'e':
353 case 'f':
354 if (*state == begin ||
355 *state == havesign ||
356 *state == havezero ||
357 *state == haveprefix ||
358 *state == any) {
359 if (*base > c - 'a' + 10) {
360 *state = any;
361 return 1;
362 }
363 }
364 break;
365 case 'B':
366 if (*state == havezero) {
367 if (*base == 0 || *base == 2) {
368 *state = haveprefix;
369 *base = 2;
370 return 1;
371 }
372 }
373 /* FALL THROUGH */
374 case 'A':
375 case 'C':
376 case 'D':
377 case 'E':
378 case 'F':
379 if (*state == begin ||
380 *state == havesign ||
381 *state == havezero ||
382 *state == haveprefix ||
383 *state == any) {
384 if (*base > c - 'A' + 10) {
385 *state = any;
386 return 1;
387 }
388 }
389 break;
390 case 'x':
391 case 'X':
392 if (*state == havezero) {
393 if (*base == 0 || *base == 16) {
394 *state = haveprefix;
395 *base = 16;
396 return 1;
397 }
398 }
399 break;
400 }
401 return 0;
402 }
403
404 /*
405 * Read an integer, storing it in buf.
406 *
407 * Return 0 on a match failure, and the number of characters read
408 * otherwise.
409 */
410 static __inline int
parseint(FILE * fp,wchar_t * __restrict buf,int width,int base,locale_t locale)411 parseint(FILE *fp, wchar_t * __restrict buf, int width, int base,
412 locale_t locale)
413 {
414 enum parseint_state state = begin;
415 wchar_t *wcp;
416 int c;
417
418 for (wcp = buf; width; width--) {
419 c = __fgetwc(fp, locale);
420 if (c == WEOF)
421 break;
422 if (!parseint_fsm(c, &state, &base))
423 break;
424 *wcp++ = (wchar_t)c;
425 }
426 /*
427 * If we only had a sign, push it back. If we only had a 0b or 0x
428 * prefix (possibly preceded by a sign), we view it as "0" and
429 * push back the letter. In all other cases, if we stopped
430 * because we read a non-number character, push it back.
431 */
432 if (state == havesign) {
433 wcp--;
434 __ungetwc(*wcp, fp, locale);
435 } else if (state == haveprefix) {
436 wcp--;
437 __ungetwc(c, fp, locale);
438 } else if (width && c != WEOF) {
439 __ungetwc(c, fp, locale);
440 }
441 return (wcp - buf);
442 }
443
444 /*
445 * MT-safe version.
446 */
447 int
vfwscanf_l(FILE * __restrict fp,locale_t locale,const wchar_t * __restrict fmt,va_list ap)448 vfwscanf_l(FILE * __restrict fp, locale_t locale,
449 const wchar_t * __restrict fmt, va_list ap)
450 {
451 int ret;
452 FIX_LOCALE(locale);
453
454 FLOCKFILE_CANCELSAFE(fp);
455 ORIENT(fp, 1);
456 ret = __vfwscanf(fp, locale, fmt, ap);
457 FUNLOCKFILE_CANCELSAFE();
458 return (ret);
459 }
460 int
vfwscanf(FILE * __restrict fp,const wchar_t * __restrict fmt,va_list ap)461 vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
462 {
463 return vfwscanf_l(fp, __get_locale(), fmt, ap);
464 }
465
466 /*
467 * Non-MT-safe version.
468 */
469 int
__vfwscanf(FILE * __restrict fp,locale_t locale,const wchar_t * __restrict fmt,va_list ap)470 __vfwscanf(FILE * __restrict fp, locale_t locale,
471 const wchar_t * __restrict fmt, va_list ap)
472 {
473 #define GETARG(type) ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
474 wint_t c; /* character from format, or conversion */
475 size_t width; /* field width, or 0 */
476 int flags; /* flags as defined above */
477 int nassigned; /* number of fields assigned */
478 int nconversions; /* number of conversions */
479 int nr; /* characters read by the current conversion */
480 int nread; /* number of characters consumed from fp */
481 int base; /* base argument to conversion function */
482 struct ccl ccl; /* character class info */
483 wchar_t buf[BUF]; /* buffer for numeric conversions */
484 wint_t wi; /* handy wint_t */
485
486 nassigned = 0;
487 nconversions = 0;
488 nread = 0;
489 ccl.start = ccl.end = NULL;
490 for (;;) {
491 c = *fmt++;
492 if (c == 0)
493 return (nassigned);
494 if (iswspace(c)) {
495 while ((c = __fgetwc(fp, locale)) != WEOF &&
496 iswspace_l(c, locale))
497 nread++;
498 if (c != WEOF)
499 __ungetwc(c, fp, locale);
500 continue;
501 }
502 if (c != '%')
503 goto literal;
504 width = 0;
505 flags = 0;
506 /*
507 * switch on the format. continue if done;
508 * break once format type is derived.
509 */
510 again: c = *fmt++;
511 switch (c) {
512 case '%':
513 literal:
514 if ((wi = __fgetwc(fp, locale)) == WEOF)
515 goto input_failure;
516 if (wi != c) {
517 __ungetwc(wi, fp, locale);
518 goto match_failure;
519 }
520 nread++;
521 continue;
522
523 case '*':
524 flags |= SUPPRESS;
525 goto again;
526 case 'j':
527 flags |= INTMAXT;
528 goto again;
529 case 'l':
530 if (flags & LONG) {
531 flags &= ~LONG;
532 flags |= LONGLONG;
533 } else
534 flags |= LONG;
535 goto again;
536 case 'q':
537 flags |= LONGLONG; /* not quite */
538 goto again;
539 case 't':
540 flags |= PTRDIFFT;
541 goto again;
542 case 'w':
543 /*
544 * Fixed-width integer types. On all platforms we
545 * support, int8_t is equivalent to char, int16_t
546 * is equivalent to short, int32_t is equivalent
547 * to int, int64_t is equivalent to long long int.
548 * Furthermore, int_fast8_t, int_fast16_t and
549 * int_fast32_t are equivalent to int, and
550 * int_fast64_t is equivalent to long long int.
551 */
552 flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT);
553 if (fmt[0] == 'f') {
554 flags |= FASTINT;
555 fmt++;
556 } else {
557 flags &= ~FASTINT;
558 }
559 if (fmt[0] == '8') {
560 if (!(flags & FASTINT))
561 flags |= SHORTSHORT;
562 else
563 /* no flag set = 32 */ ;
564 fmt += 1;
565 } else if (fmt[0] == '1' && fmt[1] == '6') {
566 if (!(flags & FASTINT))
567 flags |= SHORT;
568 else
569 /* no flag set = 32 */ ;
570 fmt += 2;
571 } else if (fmt[0] == '3' && fmt[1] == '2') {
572 /* no flag set = 32 */ ;
573 fmt += 2;
574 } else if (fmt[0] == '6' && fmt[1] == '4') {
575 flags |= LONGLONG;
576 fmt += 2;
577 } else {
578 goto match_failure;
579 }
580 goto again;
581 case 'z':
582 flags |= SIZET;
583 goto again;
584 case 'L':
585 flags |= LONGDBL;
586 goto again;
587 case 'h':
588 if (flags & SHORT) {
589 flags &= ~SHORT;
590 flags |= SHORTSHORT;
591 } else
592 flags |= SHORT;
593 goto again;
594
595 case '0': case '1': case '2': case '3': case '4':
596 case '5': case '6': case '7': case '8': case '9':
597 width = width * 10 + c - '0';
598 goto again;
599
600 /*
601 * Conversions.
602 */
603 case 'B':
604 case 'b':
605 c = CT_INT;
606 flags |= UNSIGNED;
607 base = 2;
608 break;
609
610 case 'd':
611 c = CT_INT;
612 base = 10;
613 break;
614
615 case 'i':
616 c = CT_INT;
617 base = 0;
618 break;
619
620 case 'o':
621 c = CT_INT;
622 flags |= UNSIGNED;
623 base = 8;
624 break;
625
626 case 'u':
627 c = CT_INT;
628 flags |= UNSIGNED;
629 base = 10;
630 break;
631
632 case 'X':
633 case 'x':
634 c = CT_INT;
635 flags |= UNSIGNED;
636 base = 16;
637 break;
638
639 #ifndef NO_FLOATING_POINT
640 case 'A': case 'E': case 'F': case 'G':
641 case 'a': case 'e': case 'f': case 'g':
642 c = CT_FLOAT;
643 break;
644 #endif
645
646 case 'S':
647 flags |= LONG;
648 /* FALLTHROUGH */
649 case 's':
650 c = CT_STRING;
651 break;
652
653 case '[':
654 ccl.start = fmt;
655 if (*fmt == '^') {
656 ccl.compl = 1;
657 fmt++;
658 } else
659 ccl.compl = 0;
660 if (*fmt == ']')
661 fmt++;
662 while (*fmt != '\0' && *fmt != ']')
663 fmt++;
664 ccl.end = fmt;
665 fmt++;
666 flags |= NOSKIP;
667 c = CT_CCL;
668 break;
669
670 case 'C':
671 flags |= LONG;
672 /* FALLTHROUGH */
673 case 'c':
674 flags |= NOSKIP;
675 c = CT_CHAR;
676 break;
677
678 case 'p': /* pointer format is like hex */
679 flags |= POINTER;
680 c = CT_INT; /* assumes sizeof(uintmax_t) */
681 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */
682 base = 16;
683 break;
684
685 case 'n':
686 if (flags & SUPPRESS) /* ??? */
687 continue;
688 if (flags & SHORTSHORT)
689 *va_arg(ap, char *) = nread;
690 else if (flags & SHORT)
691 *va_arg(ap, short *) = nread;
692 else if (flags & LONG)
693 *va_arg(ap, long *) = nread;
694 else if (flags & LONGLONG)
695 *va_arg(ap, long long *) = nread;
696 else if (flags & INTMAXT)
697 *va_arg(ap, intmax_t *) = nread;
698 else if (flags & SIZET)
699 *va_arg(ap, size_t *) = nread;
700 else if (flags & PTRDIFFT)
701 *va_arg(ap, ptrdiff_t *) = nread;
702 else
703 *va_arg(ap, int *) = nread;
704 continue;
705
706 default:
707 goto match_failure;
708
709 /*
710 * Disgusting backwards compatibility hack. XXX
711 */
712 case '\0': /* compat */
713 return (EOF);
714 }
715
716 /*
717 * Consume leading white space, except for formats
718 * that suppress this.
719 */
720 if ((flags & NOSKIP) == 0) {
721 while ((wi = __fgetwc(fp, locale)) != WEOF && iswspace(wi))
722 nread++;
723 if (wi == WEOF)
724 goto input_failure;
725 __ungetwc(wi, fp, locale);
726 }
727
728 /*
729 * Do the conversion.
730 */
731 switch (c) {
732
733 case CT_CHAR:
734 /* scan arbitrary characters (sets NOSKIP) */
735 if (width == 0)
736 width = 1;
737 if (flags & LONG) {
738 nr = convert_wchar(fp, GETARG(wchar_t *), width,
739 locale);
740 } else {
741 nr = convert_char(fp, GETARG(char *), width,
742 locale);
743 }
744 if (nr < 0)
745 goto input_failure;
746 break;
747
748 case CT_CCL:
749 /* scan a (nonempty) character class (sets NOSKIP) */
750 if (width == 0)
751 width = (size_t)~0; /* `infinity' */
752 /* take only those things in the class */
753 if (flags & LONG) {
754 nr = convert_wccl(fp, GETARG(wchar_t *), width,
755 &ccl, locale);
756 } else {
757 nr = convert_ccl(fp, GETARG(char *), width,
758 &ccl, locale);
759 }
760 if (nr <= 0) {
761 if (nr < 0)
762 goto input_failure;
763 else /* nr == 0 */
764 goto match_failure;
765 }
766 break;
767
768 case CT_STRING:
769 /* like CCL, but zero-length string OK, & no NOSKIP */
770 if (width == 0)
771 width = (size_t)~0;
772 if (flags & LONG) {
773 nr = convert_wstring(fp, GETARG(wchar_t *),
774 width, locale);
775 } else {
776 nr = convert_string(fp, GETARG(char *), width,
777 locale);
778 }
779 if (nr < 0)
780 goto input_failure;
781 break;
782
783 case CT_INT:
784 /* scan an integer as if by the conversion function */
785 if (width == 0 || width > sizeof(buf) /
786 sizeof(*buf) - 1)
787 width = sizeof(buf) / sizeof(*buf) - 1;
788
789 nr = parseint(fp, buf, width, base, locale);
790 if (nr == 0)
791 goto match_failure;
792 if ((flags & SUPPRESS) == 0) {
793 uintmax_t res;
794
795 buf[nr] = L'\0';
796 if ((flags & UNSIGNED) == 0)
797 res = wcstoimax(buf, NULL, base);
798 else
799 res = wcstoumax(buf, NULL, base);
800 if (flags & POINTER)
801 *va_arg(ap, void **) =
802 (void *)(uintptr_t)res;
803 else if (flags & SHORTSHORT)
804 *va_arg(ap, char *) = res;
805 else if (flags & SHORT)
806 *va_arg(ap, short *) = res;
807 else if (flags & LONG)
808 *va_arg(ap, long *) = res;
809 else if (flags & LONGLONG)
810 *va_arg(ap, long long *) = res;
811 else if (flags & INTMAXT)
812 *va_arg(ap, intmax_t *) = res;
813 else if (flags & PTRDIFFT)
814 *va_arg(ap, ptrdiff_t *) = res;
815 else if (flags & SIZET)
816 *va_arg(ap, size_t *) = res;
817 else
818 *va_arg(ap, int *) = res;
819 }
820 break;
821
822 #ifndef NO_FLOATING_POINT
823 case CT_FLOAT:
824 /* scan a floating point number as if by strtod */
825 if (width == 0 || width > sizeof(buf) /
826 sizeof(*buf) - 1)
827 width = sizeof(buf) / sizeof(*buf) - 1;
828 nr = parsefloat(fp, buf, buf + width, locale);
829 if (nr == 0)
830 goto match_failure;
831 if ((flags & SUPPRESS) == 0) {
832 if (flags & LONGDBL) {
833 long double res = wcstold(buf, NULL);
834 *va_arg(ap, long double *) = res;
835 } else if (flags & LONG) {
836 double res = wcstod(buf, NULL);
837 *va_arg(ap, double *) = res;
838 } else {
839 float res = wcstof(buf, NULL);
840 *va_arg(ap, float *) = res;
841 }
842 }
843 break;
844 #endif /* !NO_FLOATING_POINT */
845 }
846 if (!(flags & SUPPRESS))
847 nassigned++;
848 nread += nr;
849 nconversions++;
850 }
851 input_failure:
852 return (nconversions != 0 ? nassigned : EOF);
853 match_failure:
854 return (nassigned);
855 }
856
857 #ifndef NO_FLOATING_POINT
858 static int
parsefloat(FILE * fp,wchar_t * buf,wchar_t * end,locale_t locale)859 parsefloat(FILE *fp, wchar_t *buf, wchar_t *end, locale_t locale)
860 {
861 mbstate_t mbs;
862 size_t nconv;
863 wchar_t *commit, *p;
864 int infnanpos = 0;
865 enum {
866 S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
867 S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
868 } state = S_START;
869 wchar_t c;
870 wchar_t decpt;
871 _Bool gotmantdig = 0, ishex = 0;
872
873 mbs = initial_mbs;
874 nconv = mbrtowc(&decpt, localeconv()->decimal_point, MB_CUR_MAX, &mbs);
875 if (nconv == (size_t)-1 || nconv == (size_t)-2)
876 decpt = '.'; /* failsafe */
877
878 /*
879 * We set commit = p whenever the string we have read so far
880 * constitutes a valid representation of a floating point
881 * number by itself. At some point, the parse will complete
882 * or fail, and we will ungetc() back to the last commit point.
883 * To ensure that the file offset gets updated properly, it is
884 * always necessary to read at least one character that doesn't
885 * match; thus, we can't short-circuit "infinity" or "nan(...)".
886 */
887 commit = buf - 1;
888 c = WEOF;
889 for (p = buf; p < end; ) {
890 if ((c = __fgetwc(fp, locale)) == WEOF)
891 break;
892 reswitch:
893 switch (state) {
894 case S_START:
895 state = S_GOTSIGN;
896 if (c == '-' || c == '+')
897 break;
898 else
899 goto reswitch;
900 case S_GOTSIGN:
901 switch (c) {
902 case '0':
903 state = S_MAYBEHEX;
904 commit = p;
905 break;
906 case 'I':
907 case 'i':
908 state = S_INF;
909 break;
910 case 'N':
911 case 'n':
912 state = S_NAN;
913 break;
914 default:
915 state = S_DIGITS;
916 goto reswitch;
917 }
918 break;
919 case S_INF:
920 if (infnanpos > 6 ||
921 (c != "nfinity"[infnanpos] &&
922 c != "NFINITY"[infnanpos]))
923 goto parsedone;
924 if (infnanpos == 1 || infnanpos == 6)
925 commit = p; /* inf or infinity */
926 infnanpos++;
927 break;
928 case S_NAN:
929 switch (infnanpos) {
930 case 0:
931 if (c != 'A' && c != 'a')
932 goto parsedone;
933 break;
934 case 1:
935 if (c != 'N' && c != 'n')
936 goto parsedone;
937 else
938 commit = p;
939 break;
940 case 2:
941 if (c != '(')
942 goto parsedone;
943 break;
944 default:
945 if (c == ')') {
946 commit = p;
947 state = S_DONE;
948 } else if (!iswalnum(c) && c != '_')
949 goto parsedone;
950 break;
951 }
952 infnanpos++;
953 break;
954 case S_DONE:
955 goto parsedone;
956 case S_MAYBEHEX:
957 state = S_DIGITS;
958 if (c == 'X' || c == 'x') {
959 ishex = 1;
960 break;
961 } else { /* we saw a '0', but no 'x' */
962 gotmantdig = 1;
963 goto reswitch;
964 }
965 case S_DIGITS:
966 if ((ishex && iswxdigit(c)) || iswdigit(c))
967 gotmantdig = 1;
968 else {
969 state = S_FRAC;
970 if (c != decpt)
971 goto reswitch;
972 }
973 if (gotmantdig)
974 commit = p;
975 break;
976 case S_FRAC:
977 if (((c == 'E' || c == 'e') && !ishex) ||
978 ((c == 'P' || c == 'p') && ishex)) {
979 if (!gotmantdig)
980 goto parsedone;
981 else
982 state = S_EXP;
983 } else if ((ishex && iswxdigit(c)) || iswdigit(c)) {
984 commit = p;
985 gotmantdig = 1;
986 } else
987 goto parsedone;
988 break;
989 case S_EXP:
990 state = S_EXPDIGITS;
991 if (c == '-' || c == '+')
992 break;
993 else
994 goto reswitch;
995 case S_EXPDIGITS:
996 if (iswdigit(c))
997 commit = p;
998 else
999 goto parsedone;
1000 break;
1001 default:
1002 abort();
1003 }
1004 *p++ = c;
1005 c = WEOF;
1006 }
1007
1008 parsedone:
1009 if (c != WEOF)
1010 __ungetwc(c, fp, locale);
1011 while (commit < --p)
1012 __ungetwc(*p, fp, locale);
1013 *++commit = '\0';
1014 return (commit - buf);
1015 }
1016 #endif
1017