xref: /freebsd-13.1/usr.bin/printf/printf.c (revision ed062a30)
1 /*-
2  * Copyright 2014 Garrett D'Amore <[email protected]>
3  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 4. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 /*
32  * Important: This file is used both as a standalone program /usr/bin/printf
33  * and as a builtin for /bin/sh (#define SHELL).
34  */
35 
36 #ifndef SHELL
37 #ifndef lint
38 static char const copyright[] =
39 "@(#) Copyright (c) 1989, 1993\n\
40 	The Regents of the University of California.  All rights reserved.\n";
41 #endif /* not lint */
42 #endif
43 
44 #ifndef lint
45 #if 0
46 static char const sccsid[] = "@(#)printf.c	8.1 (Berkeley) 7/20/93";
47 #endif
48 static const char rcsid[] =
49   "$FreeBSD$";
50 #endif /* not lint */
51 
52 #include <sys/types.h>
53 
54 #include <ctype.h>
55 #include <err.h>
56 #include <errno.h>
57 #include <inttypes.h>
58 #include <limits.h>
59 #include <locale.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <string.h>
63 #include <unistd.h>
64 #include <wchar.h>
65 
66 #ifdef SHELL
67 #define	main printfcmd
68 #include "bltin/bltin.h"
69 #include "error.h"
70 #include "options.h"
71 #endif
72 
73 #define	PF(f, func) do {						\
74 	char *b = NULL;							\
75 	if (havewidth)							\
76 		if (haveprec)						\
77 			(void)asprintf(&b, f, fieldwidth, precision, func); \
78 		else							\
79 			(void)asprintf(&b, f, fieldwidth, func);	\
80 	else if (haveprec)						\
81 		(void)asprintf(&b, f, precision, func);			\
82 	else								\
83 		(void)asprintf(&b, f, func);				\
84 	if (b) {							\
85 		(void)fputs(b, stdout);					\
86 		free(b);						\
87 	}								\
88 } while (0)
89 
90 static int	 asciicode(void);
91 static char	*printf_doformat(char *, int *);
92 static int	 escape(char *, int, size_t *);
93 static int	 getchr(void);
94 static int	 getfloating(long double *, int);
95 static int	 getint(int *);
96 static int	 getnum(intmax_t *, uintmax_t *, int);
97 static const char
98 		*getstr(void);
99 static char	*mknum(char *, char);
100 static void	 usage(void);
101 
102 static const char digits[] = "0123456789";
103 
104 static int  myargc;
105 static char **myargv;
106 static char **gargv;
107 static char **maxargv;
108 
109 int
110 main(int argc, char *argv[])
111 {
112 	size_t len;
113 	int chopped, end, rval;
114 	char *format, *fmt, *start;
115 #ifndef SHELL
116 	int ch;
117 
118 	(void) setlocale(LC_ALL, "");
119 #endif
120 
121 #ifdef SHELL
122 	nextopt("");
123 	argc -= argptr - argv;
124 	argv = argptr;
125 #else
126 	while ((ch = getopt(argc, argv, "")) != -1)
127 		switch (ch) {
128 		case '?':
129 		default:
130 			usage();
131 			return (1);
132 		}
133 	argc -= optind;
134 	argv += optind;
135 #endif
136 
137 	if (argc < 1) {
138 		usage();
139 		return (1);
140 	}
141 
142 #ifdef SHELL
143 	INTOFF;
144 #endif
145 	/*
146 	 * Basic algorithm is to scan the format string for conversion
147 	 * specifications -- once one is found, find out if the field
148 	 * width or precision is a '*'; if it is, gather up value.  Note,
149 	 * format strings are reused as necessary to use up the provided
150 	 * arguments, arguments of zero/null string are provided to use
151 	 * up the format string.
152 	 */
153 	fmt = format = *argv;
154 	chopped = escape(fmt, 1, &len);		/* backslash interpretation */
155 	rval = end = 0;
156 	gargv = ++argv;
157 
158 	for (;;) {
159 		maxargv = gargv;
160 
161 		myargv = gargv;
162 		for (myargc = 0; gargv[myargc]; myargc++)
163 			/* nop */;
164 		start = fmt;
165 		while (fmt < format + len) {
166 			if (fmt[0] == '%') {
167 				fwrite(start, 1, fmt - start, stdout);
168 				if (fmt[1] == '%') {
169 					/* %% prints a % */
170 					putchar('%');
171 					fmt += 2;
172 				} else {
173 					fmt = printf_doformat(fmt, &rval);
174 					if (fmt == NULL) {
175 #ifdef SHELL
176 						INTON;
177 #endif
178 						return (1);
179 					}
180 					end = 0;
181 				}
182 				start = fmt;
183 			} else
184 				fmt++;
185 			if (gargv > maxargv)
186 				maxargv = gargv;
187 		}
188 		gargv = maxargv;
189 
190 		if (end == 1) {
191 			warnx("missing format character");
192 #ifdef SHELL
193 			INTON;
194 #endif
195 			return (1);
196 		}
197 		fwrite(start, 1, fmt - start, stdout);
198 		if (chopped || !*gargv) {
199 #ifdef SHELL
200 			INTON;
201 #endif
202 			return (rval);
203 		}
204 		/* Restart at the beginning of the format string. */
205 		fmt = format;
206 		end = 1;
207 	}
208 	/* NOTREACHED */
209 }
210 
211 
212 static char *
213 printf_doformat(char *fmt, int *rval)
214 {
215 	static const char skip1[] = "#'-+ 0";
216 	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
217 	char convch, nextch;
218 	char *start;
219 	char **fargv;
220 	char *dptr;
221 	int l;
222 
223 	start = alloca(strlen(fmt) + 1);
224 
225 	dptr = start;
226 	*dptr++ = '%';
227 	*dptr = 0;
228 
229 	fmt++;
230 
231 	/* look for "n$" field index specifier */
232 	l = strspn(fmt, digits);
233 	if ((l > 0) && (fmt[l] == '$')) {
234 		int idx = atoi(fmt);
235 		if (idx <= myargc) {
236 			gargv = &myargv[idx - 1];
237 		} else {
238 			gargv = &myargv[myargc];
239 		}
240 		if (gargv > maxargv)
241 			maxargv = gargv;
242 		fmt += l + 1;
243 
244 	/* save format argument */
245 	fargv = gargv;
246 	} else {
247 	fargv = NULL;
248 	}
249 
250 	/* skip to field width */
251 	while (strchr(skip1, *fmt) != NULL) {
252 		*dptr++ = *fmt++;
253 		*dptr = 0;
254 	}
255 
256 	if (*fmt == '*') {
257 
258 		fmt++;
259 		l = strspn(fmt, digits);
260 		if ((l > 0) && (fmt[l] == '$')) {
261 			int idx = atoi(fmt);
262 			if (idx <= myargc) {
263 				gargv = &myargv[idx - 1];
264 			} else {
265 				gargv = &myargv[myargc];
266 			}
267 			fmt += l + 1;
268 		}
269 
270 		if (getint(&fieldwidth))
271 			return (NULL);
272 		if (gargv > maxargv)
273 			maxargv = gargv;
274 		havewidth = 1;
275 
276 		*dptr++ = '*';
277 		*dptr = 0;
278 	} else {
279 		havewidth = 0;
280 
281 		/* skip to possible '.', get following precision */
282 		while (isdigit(*fmt)) {
283 			*dptr++ = *fmt++;
284 			*dptr = 0;
285 		}
286 	}
287 
288 	if (*fmt == '.') {
289 		/* precision present? */
290 		fmt++;
291 		*dptr++ = '.';
292 
293 		if (*fmt == '*') {
294 
295 			fmt++;
296 			l = strspn(fmt, digits);
297 			if ((l > 0) && (fmt[l] == '$')) {
298 				int idx = atoi(fmt);
299 				if (idx <= myargc) {
300 					gargv = &myargv[idx - 1];
301 				} else {
302 					gargv = &myargv[myargc];
303 				}
304 				fmt += l + 1;
305 			}
306 
307 			if (getint(&precision))
308 				return (NULL);
309 			if (gargv > maxargv)
310 				maxargv = gargv;
311 			haveprec = 1;
312 			*dptr++ = '*';
313 			*dptr = 0;
314 		} else {
315 			haveprec = 0;
316 
317 			/* skip to conversion char */
318 			while (isdigit(*fmt)) {
319 				*dptr++ = *fmt++;
320 				*dptr = 0;
321 			}
322 		}
323 	} else
324 		haveprec = 0;
325 	if (!*fmt) {
326 		warnx("missing format character");
327 		return (NULL);
328 	}
329 	*dptr++ = *fmt;
330 	*dptr = 0;
331 
332 	/*
333 	 * Look for a length modifier.  POSIX doesn't have these, so
334 	 * we only support them for floating-point conversions, which
335 	 * are extensions.  This is useful because the L modifier can
336 	 * be used to gain extra range and precision, while omitting
337 	 * it is more likely to produce consistent results on different
338 	 * architectures.  This is not so important for integers
339 	 * because overflow is the only bad thing that can happen to
340 	 * them, but consider the command  printf %a 1.1
341 	 */
342 	if (*fmt == 'L') {
343 		mod_ldbl = 1;
344 		fmt++;
345 		if (!strchr("aAeEfFgG", *fmt)) {
346 			warnx("bad modifier L for %%%c", *fmt);
347 			return (NULL);
348 		}
349 	} else {
350 		mod_ldbl = 0;
351 	}
352 
353 	/* save the current arg offset, and set to the format arg */
354 	if (fargv != NULL) {
355 	gargv = fargv;
356 	}
357 
358 	convch = *fmt;
359 	nextch = *++fmt;
360 
361 	*fmt = '\0';
362 	switch (convch) {
363 	case 'b': {
364 		size_t len;
365 		char *p;
366 		int getout;
367 
368 		p = strdup(getstr());
369 		if (p == NULL) {
370 			warnx("%s", strerror(ENOMEM));
371 			return (NULL);
372 		}
373 		getout = escape(p, 0, &len);
374 		*(fmt - 1) = 's';
375 		PF(start, p);
376 		*(fmt - 1) = 'b';
377 		free(p);
378 		if (getout)
379 			return (fmt);
380 		break;
381 	}
382 	case 'c': {
383 		char p;
384 
385 		p = getchr();
386 		PF(start, p);
387 		break;
388 	}
389 	case 's': {
390 		const char *p;
391 
392 		p = getstr();
393 		PF(start, p);
394 		break;
395 	}
396 	case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
397 		char *f;
398 		intmax_t val;
399 		uintmax_t uval;
400 		int signedconv;
401 
402 		signedconv = (convch == 'd' || convch == 'i');
403 		if ((f = mknum(start, convch)) == NULL)
404 			return (NULL);
405 		if (getnum(&val, &uval, signedconv))
406 			*rval = 1;
407 		if (signedconv)
408 			PF(f, val);
409 		else
410 			PF(f, uval);
411 		break;
412 	}
413 	case 'e': case 'E':
414 	case 'f': case 'F':
415 	case 'g': case 'G':
416 	case 'a': case 'A': {
417 		long double p;
418 
419 		if (getfloating(&p, mod_ldbl))
420 			*rval = 1;
421 		if (mod_ldbl)
422 			PF(start, p);
423 		else
424 			PF(start, (double)p);
425 		break;
426 	}
427 	default:
428 		warnx("illegal format character %c", convch);
429 		return (NULL);
430 	}
431 	*fmt = nextch;
432 	/* return the gargv to the next element */
433 	return (fmt);
434 }
435 
436 static char *
437 mknum(char *str, char ch)
438 {
439 	static char *copy;
440 	static size_t copy_size;
441 	char *newcopy;
442 	size_t len, newlen;
443 
444 	len = strlen(str) + 2;
445 	if (len > copy_size) {
446 		newlen = ((len + 1023) >> 10) << 10;
447 		if ((newcopy = realloc(copy, newlen)) == NULL)
448 		{
449 			warnx("%s", strerror(ENOMEM));
450 			return (NULL);
451 		}
452 		copy = newcopy;
453 		copy_size = newlen;
454 	}
455 
456 	memmove(copy, str, len - 3);
457 	copy[len - 3] = 'j';
458 	copy[len - 2] = ch;
459 	copy[len - 1] = '\0';
460 	return (copy);
461 }
462 
463 static int
464 escape(char *fmt, int percent, size_t *len)
465 {
466 	char *save, *store, c;
467 	int value;
468 
469 	for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
470 		if (c != '\\') {
471 			*store = c;
472 			continue;
473 		}
474 		switch (*++fmt) {
475 		case '\0':		/* EOS, user error */
476 			*store = '\\';
477 			*++store = '\0';
478 			*len = store - save;
479 			return (0);
480 		case '\\':		/* backslash */
481 		case '\'':		/* single quote */
482 			*store = *fmt;
483 			break;
484 		case 'a':		/* bell/alert */
485 			*store = '\a';
486 			break;
487 		case 'b':		/* backspace */
488 			*store = '\b';
489 			break;
490 		case 'c':
491 			*store = '\0';
492 			*len = store - save;
493 			return (1);
494 		case 'f':		/* form-feed */
495 			*store = '\f';
496 			break;
497 		case 'n':		/* newline */
498 			*store = '\n';
499 			break;
500 		case 'r':		/* carriage-return */
501 			*store = '\r';
502 			break;
503 		case 't':		/* horizontal tab */
504 			*store = '\t';
505 			break;
506 		case 'v':		/* vertical tab */
507 			*store = '\v';
508 			break;
509 					/* octal constant */
510 		case '0': case '1': case '2': case '3':
511 		case '4': case '5': case '6': case '7':
512 			c = (!percent && *fmt == '0') ? 4 : 3;
513 			for (value = 0;
514 			    c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
515 				value <<= 3;
516 				value += *fmt - '0';
517 			}
518 			--fmt;
519 			if (percent && value == '%') {
520 				*store++ = '%';
521 				*store = '%';
522 			} else
523 				*store = (char)value;
524 			break;
525 		default:
526 			*store = *fmt;
527 			break;
528 		}
529 	}
530 	*store = '\0';
531 	*len = store - save;
532 	return (0);
533 }
534 
535 static int
536 getchr(void)
537 {
538 	if (!*gargv)
539 		return ('\0');
540 	return ((int)**gargv++);
541 }
542 
543 static const char *
544 getstr(void)
545 {
546 	if (!*gargv)
547 		return ("");
548 	return (*gargv++);
549 }
550 
551 static int
552 getint(int *ip)
553 {
554 	intmax_t val;
555 	uintmax_t uval;
556 	int rval;
557 
558 	if (getnum(&val, &uval, 1))
559 		return (1);
560 	rval = 0;
561 	if (val < INT_MIN || val > INT_MAX) {
562 		warnx("%s: %s", *gargv, strerror(ERANGE));
563 		rval = 1;
564 	}
565 	*ip = (int)val;
566 	return (rval);
567 }
568 
569 static int
570 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
571 {
572 	char *ep;
573 	int rval;
574 
575 	if (!*gargv) {
576 		*ip = 0;
577 		return (0);
578 	}
579 	if (**gargv == '"' || **gargv == '\'') {
580 		if (signedconv)
581 			*ip = asciicode();
582 		else
583 			*uip = asciicode();
584 		return (0);
585 	}
586 	rval = 0;
587 	errno = 0;
588 	if (signedconv)
589 		*ip = strtoimax(*gargv, &ep, 0);
590 	else
591 		*uip = strtoumax(*gargv, &ep, 0);
592 	if (ep == *gargv) {
593 		warnx("%s: expected numeric value", *gargv);
594 		rval = 1;
595 	}
596 	else if (*ep != '\0') {
597 		warnx("%s: not completely converted", *gargv);
598 		rval = 1;
599 	}
600 	if (errno == ERANGE) {
601 		warnx("%s: %s", *gargv, strerror(ERANGE));
602 		rval = 1;
603 	}
604 	++gargv;
605 	return (rval);
606 }
607 
608 static int
609 getfloating(long double *dp, int mod_ldbl)
610 {
611 	char *ep;
612 	int rval;
613 
614 	if (!*gargv) {
615 		*dp = 0.0;
616 		return (0);
617 	}
618 	if (**gargv == '"' || **gargv == '\'') {
619 		*dp = asciicode();
620 		return (0);
621 	}
622 	rval = 0;
623 	errno = 0;
624 	if (mod_ldbl)
625 		*dp = strtold(*gargv, &ep);
626 	else
627 		*dp = strtod(*gargv, &ep);
628 	if (ep == *gargv) {
629 		warnx("%s: expected numeric value", *gargv);
630 		rval = 1;
631 	} else if (*ep != '\0') {
632 		warnx("%s: not completely converted", *gargv);
633 		rval = 1;
634 	}
635 	if (errno == ERANGE) {
636 		warnx("%s: %s", *gargv, strerror(ERANGE));
637 		rval = 1;
638 	}
639 	++gargv;
640 	return (rval);
641 }
642 
643 static int
644 asciicode(void)
645 {
646 	int ch;
647 	wchar_t wch;
648 	mbstate_t mbs;
649 
650 	ch = (unsigned char)**gargv;
651 	if (ch == '\'' || ch == '"') {
652 		memset(&mbs, 0, sizeof(mbs));
653 		switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
654 		case (size_t)-2:
655 		case (size_t)-1:
656 			wch = (unsigned char)gargv[0][1];
657 			break;
658 		case 0:
659 			wch = 0;
660 			break;
661 		}
662 		ch = wch;
663 	}
664 	++gargv;
665 	return (ch);
666 }
667 
668 static void
669 usage(void)
670 {
671 	(void)fprintf(stderr, "usage: printf format [arguments ...]\n");
672 }
673