xref: /freebsd-13.1/usr.bin/unifdef/unifdef.c (revision 61287be1)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2002 - 2015 Tony Finch <[email protected]>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * unifdef - remove ifdef'ed lines
30  *
31  * This code was derived from software contributed to Berkeley by Dave Yost.
32  * It was rewritten to support ANSI C by Tony Finch. The original version
33  * of unifdef carried the 4-clause BSD copyright licence. None of its code
34  * remains in this version (though some of the names remain) so it now
35  * carries a more liberal licence.
36  *
37  *  Wishlist:
38  *      provide an option which will append the name of the
39  *        appropriate symbol after #else's and #endif's
40  *      provide an option which will check symbols after
41  *        #else's and #endif's to see that they match their
42  *        corresponding #ifdef or #ifndef
43  *
44  *   These require better buffer handling, which would also make
45  *   it possible to handle all "dodgy" directives correctly.
46  */
47 
48 #include <sys/param.h>
49 #include <sys/stat.h>
50 #include <sys/tree.h>
51 
52 #include <assert.h>
53 #include <ctype.h>
54 #include <err.h>
55 #include <stdarg.h>
56 #include <stdbool.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61 
62 static const char copyright[] =
63     "@(#) $Version: unifdef-2.11 $\n"
64     "@(#) $FreeBSD$\n"
65     "@(#) $Author: Tony Finch ([email protected]) $\n"
66     "@(#) $URL: https://dotat.at/prog/unifdef $\n"
67 ;
68 
69 /* types of input lines: */
70 typedef enum {
71 	LT_TRUEI,		/* a true #if with ignore flag */
72 	LT_FALSEI,		/* a false #if with ignore flag */
73 	LT_IF,			/* an unknown #if */
74 	LT_TRUE,		/* a true #if */
75 	LT_FALSE,		/* a false #if */
76 	LT_ELIF,		/* an unknown #elif */
77 	LT_ELTRUE,		/* a true #elif */
78 	LT_ELFALSE,		/* a false #elif */
79 	LT_ELSE,		/* #else */
80 	LT_ENDIF,		/* #endif */
81 	LT_DODGY,		/* flag: directive is not on one line */
82 	LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
83 	LT_PLAIN,		/* ordinary line */
84 	LT_EOF,			/* end of file */
85 	LT_ERROR,		/* unevaluable #if */
86 	LT_COUNT
87 } Linetype;
88 
89 static char const * const linetype_name[] = {
90 	"TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
91 	"ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
92 	"DODGY TRUEI", "DODGY FALSEI",
93 	"DODGY IF", "DODGY TRUE", "DODGY FALSE",
94 	"DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
95 	"DODGY ELSE", "DODGY ENDIF",
96 	"PLAIN", "EOF", "ERROR"
97 };
98 
99 #define linetype_if2elif(lt) ((Linetype)(lt - LT_IF + LT_ELIF))
100 #define linetype_2dodgy(lt) ((Linetype)(lt + LT_DODGY))
101 
102 /* state of #if processing */
103 typedef enum {
104 	IS_OUTSIDE,
105 	IS_FALSE_PREFIX,	/* false #if followed by false #elifs */
106 	IS_TRUE_PREFIX,		/* first non-false #(el)if is true */
107 	IS_PASS_MIDDLE,		/* first non-false #(el)if is unknown */
108 	IS_FALSE_MIDDLE,	/* a false #elif after a pass state */
109 	IS_TRUE_MIDDLE,		/* a true #elif after a pass state */
110 	IS_PASS_ELSE,		/* an else after a pass state */
111 	IS_FALSE_ELSE,		/* an else after a true state */
112 	IS_TRUE_ELSE,		/* an else after only false states */
113 	IS_FALSE_TRAILER,	/* #elifs after a true are false */
114 	IS_COUNT
115 } Ifstate;
116 
117 static char const * const ifstate_name[] = {
118 	"OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
119 	"PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
120 	"PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
121 	"FALSE_TRAILER"
122 };
123 
124 /* state of comment parser */
125 typedef enum {
126 	NO_COMMENT = false,	/* outside a comment */
127 	C_COMMENT,		/* in a comment like this one */
128 	CXX_COMMENT,		/* between // and end of line */
129 	STARTING_COMMENT,	/* just after slash-backslash-newline */
130 	FINISHING_COMMENT,	/* star-backslash-newline in a C comment */
131 	CHAR_LITERAL,		/* inside '' */
132 	STRING_LITERAL		/* inside "" */
133 } Comment_state;
134 
135 static char const * const comment_name[] = {
136 	"NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING"
137 };
138 
139 /* state of preprocessor line parser */
140 typedef enum {
141 	LS_START,		/* only space and comments on this line */
142 	LS_HASH,		/* only space, comments, and a hash */
143 	LS_DIRTY		/* this line can't be a preprocessor line */
144 } Line_state;
145 
146 static char const * const linestate_name[] = {
147 	"START", "HASH", "DIRTY"
148 };
149 
150 /*
151  * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
152  */
153 #define	MAXDEPTH        64			/* maximum #if nesting */
154 #define	MAXLINE         4096			/* maximum length of line */
155 
156 /*
157  * Sometimes when editing a keyword the replacement text is longer, so
158  * we leave some space at the end of the tline buffer to accommodate this.
159  */
160 #define	EDITSLOP        10
161 
162 /*
163  * C17/18 allow 63 characters per macro name, but up to 127 arbitrarily large
164  * parameters.
165  */
166 struct macro {
167 	RB_ENTRY(macro)	entry;
168 	const char	*name;
169 	const char	*value;
170 	bool		ignore;		/* -iDsym or -iUsym */
171 };
172 
173 static int
macro_cmp(struct macro * a,struct macro * b)174 macro_cmp(struct macro *a, struct macro *b)
175 {
176 	return (strcmp(a->name, b->name));
177 }
178 
179 static RB_HEAD(MACROMAP, macro) macro_tree = RB_INITIALIZER(&macro_tree);
180 RB_GENERATE_STATIC(MACROMAP, macro, entry, macro_cmp);
181 
182 /*
183  * Globals.
184  */
185 
186 static bool             compblank;		/* -B: compress blank lines */
187 static bool             lnblank;		/* -b: blank deleted lines */
188 static bool             complement;		/* -c: do the complement */
189 static bool             debugging;		/* -d: debugging reports */
190 static bool             inplace;		/* -m: modify in place */
191 static bool             iocccok;		/* -e: fewer IOCCC errors */
192 static bool             strictlogic;		/* -K: keep ambiguous #ifs */
193 static bool             killconsts;		/* -k: eval constant #ifs */
194 static bool             lnnum;			/* -n: add #line directives */
195 static bool             symlist;		/* -s: output symbol list */
196 static bool             symdepth;		/* -S: output symbol depth */
197 static bool             text;			/* -t: this is a text file */
198 
199 static FILE            *input;			/* input file pointer */
200 static const char      *filename;		/* input file name */
201 static int              linenum;		/* current line number */
202 static const char      *linefile;		/* file name for #line */
203 static FILE            *output;			/* output file pointer */
204 static const char      *ofilename;		/* output file name */
205 static const char      *backext;		/* backup extension */
206 static char            *tempname;		/* avoid splatting input */
207 
208 static char             tline[MAXLINE+EDITSLOP];/* input buffer plus space */
209 static char            *keyword;		/* used for editing #elif's */
210 
211 /*
212  * When processing a file, the output's newline style will match the
213  * input's, and unifdef correctly handles CRLF or LF endings whatever
214  * the platform's native style. The stdio streams are opened in binary
215  * mode to accommodate platforms whose native newline style is CRLF.
216  * When the output isn't a processed input file (when it is error /
217  * debug / diagnostic messages) then unifdef uses native line endings.
218  */
219 
220 static const char      *newline;		/* input file format */
221 static const char       newline_unix[] = "\n";
222 static const char       newline_crlf[] = "\r\n";
223 
224 static Comment_state    incomment;		/* comment parser state */
225 static Line_state       linestate;		/* #if line parser state */
226 static Ifstate          ifstate[MAXDEPTH];	/* #if processor state */
227 static bool             ignoring[MAXDEPTH];	/* ignore comments state */
228 static int              stifline[MAXDEPTH];	/* start of current #if */
229 static int              depth;			/* current #if nesting */
230 static int              delcount;		/* count of deleted lines */
231 static unsigned         blankcount;		/* count of blank lines */
232 static unsigned         blankmax;		/* maximum recent blankcount */
233 static bool             constexpr;		/* constant #if expression */
234 static bool             zerosyms;		/* to format symdepth output */
235 static bool             firstsym;		/* ditto */
236 
237 static int              exitmode;		/* exit status mode */
238 static int              exitstat;		/* program exit status */
239 static bool             altered;		/* was this file modified? */
240 
241 static void             addsym1(bool, bool, char *);
242 static void             addsym2(bool, const char *, const char *);
243 static char            *astrcat(const char *, const char *);
244 static void             cleantemp(void);
245 static void             closeio(void);
246 static void             debug(const char *, ...);
247 static void             debugsym(const char *, const struct macro *);
248 static bool             defundef(void);
249 static void             defundefile(const char *);
250 static void             done(void);
251 static void             error(const char *);
252 static struct macro    *findsym(const char **);
253 static void             flushline(bool);
254 static void             hashline(void);
255 static void             help(void);
256 static Linetype         ifeval(const char **);
257 static void             ignoreoff(void);
258 static void             ignoreon(void);
259 static void             indirectsym(void);
260 static void             keywordedit(const char *);
261 static const char      *matchsym(const char *, const char *);
262 static void             nest(void);
263 static Linetype         parseline(void);
264 static void             process(void);
265 static void             processinout(const char *, const char *);
266 static const char      *skipargs(const char *);
267 static const char      *skipcomment(const char *);
268 static const char      *skiphash(void);
269 static const char      *skipline(const char *);
270 static const char      *skipsym(const char *);
271 static void             state(Ifstate);
272 static void             unnest(void);
273 static void             usage(void);
274 static void             version(void);
275 static const char      *xstrdup(const char *, const char *);
276 
277 #define endsym(c) (!isalnum((unsigned char)c) && c != '_')
278 
279 static FILE *
mktempmode(char * tmp,int mode)280 mktempmode(char *tmp, int mode)
281 {
282 	int rc, fd;
283 
284 	mode &= (S_IRWXU|S_IRWXG|S_IRWXO);
285 	fd = mkstemp(tmp);
286 	if (fd < 0)
287 		err(2, "can't create %s", tmp);
288 	rc = fchmod(fd, mode);
289 	if (rc < 0)
290 		err(2, "can't fchmod %s mode=0o%o", tmp, mode);
291 	return (fdopen(fd, "wb"));
292 }
293 
294 /*
295  * The main program.
296  */
297 int
main(int argc,char * argv[])298 main(int argc, char *argv[])
299 {
300 	int opt;
301 
302 	while ((opt = getopt(argc, argv, "i:D:U:f:I:M:o:x:bBcdehKklmnsStV")) != -1)
303 		switch (opt) {
304 		case 'i': /* treat stuff controlled by these symbols as text */
305 			/*
306 			 * For strict backwards-compatibility the U or D
307 			 * should be immediately after the -i but it doesn't
308 			 * matter much if we relax that requirement.
309 			 */
310 			opt = *optarg++;
311 			if (opt == 'D')
312 				addsym1(true, true, optarg);
313 			else if (opt == 'U')
314 				addsym1(true, false, optarg);
315 			else
316 				usage();
317 			break;
318 		case 'D': /* define a symbol */
319 			addsym1(false, true, optarg);
320 			break;
321 		case 'U': /* undef a symbol */
322 			addsym1(false, false, optarg);
323 			break;
324 		case 'I': /* no-op for compatibility with cpp */
325 			break;
326 		case 'b': /* blank deleted lines instead of omitting them */
327 		case 'l': /* backwards compatibility */
328 			lnblank = true;
329 			break;
330 		case 'B': /* compress blank lines around removed section */
331 			compblank = true;
332 			break;
333 		case 'c': /* treat -D as -U and vice versa */
334 			complement = true;
335 			break;
336 		case 'd':
337 			debugging = true;
338 			break;
339 		case 'e': /* fewer errors from dodgy lines */
340 			iocccok = true;
341 			break;
342 		case 'f': /* definitions file */
343 			defundefile(optarg);
344 			break;
345 		case 'h':
346 			help();
347 			break;
348 		case 'K': /* keep ambiguous #ifs */
349 			strictlogic = true;
350 			break;
351 		case 'k': /* process constant #ifs */
352 			killconsts = true;
353 			break;
354 		case 'm': /* modify in place */
355 			inplace = true;
356 			break;
357 		case 'M': /* modify in place and keep backup */
358 			inplace = true;
359 			if (strlen(optarg) > 0)
360 				backext = optarg;
361 			break;
362 		case 'n': /* add #line directive after deleted lines */
363 			lnnum = true;
364 			break;
365 		case 'o': /* output to a file */
366 			ofilename = optarg;
367 			break;
368 		case 's': /* only output list of symbols that control #ifs */
369 			symlist = true;
370 			break;
371 		case 'S': /* list symbols with their nesting depth */
372 			symlist = symdepth = true;
373 			break;
374 		case 't': /* don't parse C comments */
375 			text = true;
376 			break;
377 		case 'V':
378 			version();
379 			break;
380 		case 'x':
381 			exitmode = atoi(optarg);
382 			if(exitmode < 0 || exitmode > 2)
383 				usage();
384 			break;
385 		default:
386 			usage();
387 		}
388 	argc -= optind;
389 	argv += optind;
390 	if (compblank && lnblank)
391 		errx(2, "-B and -b are mutually exclusive");
392 	if (symlist && (ofilename != NULL || inplace || argc > 1))
393 		errx(2, "-s only works with one input file");
394 	if (argc > 1 && ofilename != NULL)
395 		errx(2, "-o cannot be used with multiple input files");
396 	if (argc > 1 && !inplace)
397 		errx(2, "multiple input files require -m or -M");
398 	if (argc == 0 && inplace)
399 		errx(2, "-m requires an input file");
400 	if (argc == 0)
401 		argc = 1;
402 	if (argc == 1 && !inplace && ofilename == NULL)
403 		ofilename = "-";
404 	indirectsym();
405 
406 	atexit(cleantemp);
407 	if (ofilename != NULL)
408 		processinout(*argv, ofilename);
409 	else while (argc-- > 0) {
410 		processinout(*argv, *argv);
411 		argv++;
412 	}
413 	switch(exitmode) {
414 	case(0): exit(exitstat);
415 	case(1): exit(!exitstat);
416 	case(2): exit(0);
417 	default: abort(); /* bug */
418 	}
419 }
420 
421 /*
422  * File logistics.
423  */
424 static void
processinout(const char * ifn,const char * ofn)425 processinout(const char *ifn, const char *ofn)
426 {
427 	struct stat st;
428 
429 	if (ifn == NULL || strcmp(ifn, "-") == 0) {
430 		filename = "[stdin]";
431 		linefile = NULL;
432 		input = stdin;
433 	} else {
434 		filename = ifn;
435 		linefile = ifn;
436 		input = fopen(ifn, "rb");
437 		if (input == NULL)
438 			err(2, "can't open %s", ifn);
439 	}
440 	if (strcmp(ofn, "-") == 0) {
441 		output = stdout;
442 		process();
443 		return;
444 	}
445 	if (stat(ofn, &st) < 0) {
446 		output = fopen(ofn, "wb");
447 		if (output == NULL)
448 			err(2, "can't create %s", ofn);
449 		process();
450 		return;
451 	}
452 
453 	tempname = astrcat(ofn, ".XXXXXX");
454 	output = mktempmode(tempname, st.st_mode);
455 	if (output == NULL)
456 		err(2, "can't create %s", tempname);
457 
458 	process();
459 
460 	if (backext != NULL) {
461 		char *backname = astrcat(ofn, backext);
462 		if (rename(ofn, backname) < 0)
463 			err(2, "can't rename \"%s\" to \"%s\"", ofn, backname);
464 		free(backname);
465 	}
466 	/* leave file unmodified if unifdef made no changes */
467 	if (!altered && backext == NULL) {
468 		if (remove(tempname) < 0)
469 			warn("can't remove \"%s\"", tempname);
470 	} else if (rename(tempname, ofn) < 0)
471 		err(2, "can't rename \"%s\" to \"%s\"", tempname, ofn);
472 	free(tempname);
473 	tempname = NULL;
474 }
475 
476 /*
477  * For cleaning up if there is an error.
478  */
479 static void
cleantemp(void)480 cleantemp(void)
481 {
482 	if (tempname != NULL)
483 		remove(tempname);
484 }
485 
486 /*
487  * Self-identification functions.
488  */
489 
490 static void
version(void)491 version(void)
492 {
493 	const char *c = copyright;
494 	for (;;) {
495 		while (*++c != '$')
496 			if (*c == '\0')
497 				exit(0);
498 		while (*++c != '$')
499 			putc(*c, stderr);
500 		putc('\n', stderr);
501 	}
502 }
503 
504 static void
synopsis(FILE * fp)505 synopsis(FILE *fp)
506 {
507 	fprintf(fp,
508 	    "usage:	unifdef [-bBcdehKkmnsStV] [-x{012}] [-Mext] [-opath] \\\n"
509 	    "		[-[i]Dsym[=val]] [-[i]Usym] [-fpath] ... [file] ...\n");
510 }
511 
512 static void
usage(void)513 usage(void)
514 {
515 	synopsis(stderr);
516 	exit(2);
517 }
518 
519 static void
help(void)520 help(void)
521 {
522 	synopsis(stdout);
523 	printf(
524 	    "	-Dsym=val  define preprocessor symbol with given value\n"
525 	    "	-Dsym      define preprocessor symbol with value 1\n"
526 	    "	-Usym	   preprocessor symbol is undefined\n"
527 	    "	-iDsym=val \\  ignore C strings and comments\n"
528 	    "	-iDsym      ) in sections controlled by these\n"
529 	    "	-iUsym	   /  preprocessor symbols\n"
530 	    "	-fpath	file containing #define and #undef directives\n"
531 	    "	-b	blank lines instead of deleting them\n"
532 	    "	-B	compress blank lines around deleted section\n"
533 	    "	-c	complement (invert) keep vs. delete\n"
534 	    "	-d	debugging mode\n"
535 	    "	-e	ignore multiline preprocessor directives\n"
536 	    "	-h	print help\n"
537 	    "	-Ipath	extra include file path (ignored)\n"
538 	    "	-K	disable && and || short-circuiting\n"
539 	    "	-k	process constant #if expressions\n"
540 	    "	-Mext	modify in place and keep backups\n"
541 	    "	-m	modify input files in place\n"
542 	    "	-n	add #line directives to output\n"
543 	    "	-opath	output file name\n"
544 	    "	-S	list #if control symbols with nesting\n"
545 	    "	-s	list #if control symbols\n"
546 	    "	-t	ignore C strings and comments\n"
547 	    "	-V	print version\n"
548 	    "	-x{012}	exit status mode\n"
549 	);
550 	exit(0);
551 }
552 
553 /*
554  * A state transition function alters the global #if processing state
555  * in a particular way. The table below is indexed by the current
556  * processing state and the type of the current line.
557  *
558  * Nesting is handled by keeping a stack of states; some transition
559  * functions increase or decrease the depth. They also maintain the
560  * ignore state on a stack. In some complicated cases they have to
561  * alter the preprocessor directive, as follows.
562  *
563  * When we have processed a group that starts off with a known-false
564  * #if/#elif sequence (which has therefore been deleted) followed by a
565  * #elif that we don't understand and therefore must keep, we edit the
566  * latter into a #if to keep the nesting correct. We use memcpy() to
567  * overwrite the 4 byte token "elif" with "if  " without a '\0' byte.
568  *
569  * When we find a true #elif in a group, the following block will
570  * always be kept and the rest of the sequence after the next #elif or
571  * #else will be discarded. We edit the #elif into a #else and the
572  * following directive to #endif since this has the desired behaviour.
573  *
574  * "Dodgy" directives are split across multiple lines, the most common
575  * example being a multi-line comment hanging off the right of the
576  * directive. We can handle them correctly only if there is no change
577  * from printing to dropping (or vice versa) caused by that directive.
578  * If the directive is the first of a group we have a choice between
579  * failing with an error, or passing it through unchanged instead of
580  * evaluating it. The latter is not the default to avoid questions from
581  * users about unifdef unexpectedly leaving behind preprocessor directives.
582  */
583 typedef void state_fn(void);
584 
585 /* report an error */
Eelif(void)586 static void Eelif (void) { error("Inappropriate #elif"); }
Eelse(void)587 static void Eelse (void) { error("Inappropriate #else"); }
Eendif(void)588 static void Eendif(void) { error("Inappropriate #endif"); }
Eeof(void)589 static void Eeof  (void) { error("Premature EOF"); }
Eioccc(void)590 static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
591 /* plain line handling */
print(void)592 static void print (void) { flushline(true); }
drop(void)593 static void drop  (void) { flushline(false); }
594 /* output lacks group's start line */
Strue(void)595 static void Strue (void) { drop();  ignoreoff(); state(IS_TRUE_PREFIX); }
Sfalse(void)596 static void Sfalse(void) { drop();  ignoreoff(); state(IS_FALSE_PREFIX); }
Selse(void)597 static void Selse (void) { drop();               state(IS_TRUE_ELSE); }
598 /* print/pass this block */
Pelif(void)599 static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
Pelse(void)600 static void Pelse (void) { print();              state(IS_PASS_ELSE); }
Pendif(void)601 static void Pendif(void) { print(); unnest(); }
602 /* discard this block */
Dfalse(void)603 static void Dfalse(void) { drop();  ignoreoff(); state(IS_FALSE_TRAILER); }
Delif(void)604 static void Delif (void) { drop();  ignoreoff(); state(IS_FALSE_MIDDLE); }
Delse(void)605 static void Delse (void) { drop();               state(IS_FALSE_ELSE); }
Dendif(void)606 static void Dendif(void) { drop();  unnest(); }
607 /* first line of group */
Fdrop(void)608 static void Fdrop (void) { nest();  Dfalse(); }
Fpass(void)609 static void Fpass (void) { nest();  Pelif(); }
Ftrue(void)610 static void Ftrue (void) { nest();  Strue(); }
Ffalse(void)611 static void Ffalse(void) { nest();  Sfalse(); }
612 /* variable pedantry for obfuscated lines */
Oiffy(void)613 static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); }
Oif(void)614 static void Oif   (void) { if (!iocccok) Eioccc(); Fpass(); }
Oelif(void)615 static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); }
616 /* ignore comments in this block */
Idrop(void)617 static void Idrop (void) { Fdrop();  ignoreon(); }
Itrue(void)618 static void Itrue (void) { Ftrue();  ignoreon(); }
Ifalse(void)619 static void Ifalse(void) { Ffalse(); ignoreon(); }
620 /* modify this line */
Mpass(void)621 static void Mpass (void) { memcpy(keyword, "if  ", 4); Pelif(); }
Mtrue(void)622 static void Mtrue (void) { keywordedit("else");  state(IS_TRUE_MIDDLE); }
Melif(void)623 static void Melif (void) { keywordedit("endif"); state(IS_FALSE_TRAILER); }
Melse(void)624 static void Melse (void) { keywordedit("endif"); state(IS_FALSE_ELSE); }
625 
626 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
627 /* IS_OUTSIDE */
628 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
629   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eendif,
630   print, done,  abort },
631 /* IS_FALSE_PREFIX */
632 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
633   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
634   drop,  Eeof,  abort },
635 /* IS_TRUE_PREFIX */
636 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
637   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
638   print, Eeof,  abort },
639 /* IS_PASS_MIDDLE */
640 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
641   Oiffy, Oiffy, Fpass, Oif,   Oif,   Pelif, Oelif, Oelif, Pelse, Pendif,
642   print, Eeof,  abort },
643 /* IS_FALSE_MIDDLE */
644 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
645   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
646   drop,  Eeof,  abort },
647 /* IS_TRUE_MIDDLE */
648 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
649   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
650   print, Eeof,  abort },
651 /* IS_PASS_ELSE */
652 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
653   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Pendif,
654   print, Eeof,  abort },
655 /* IS_FALSE_ELSE */
656 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
657   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
658   drop,  Eeof,  abort },
659 /* IS_TRUE_ELSE */
660 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
661   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eioccc,
662   print, Eeof,  abort },
663 /* IS_FALSE_TRAILER */
664 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
665   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
666   drop,  Eeof,  abort }
667 /*TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF
668   TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF (DODGY)
669   PLAIN  EOF    ERROR */
670 };
671 
672 /*
673  * State machine utility functions
674  */
675 static void
ignoreoff(void)676 ignoreoff(void)
677 {
678 	if (depth == 0)
679 		abort(); /* bug */
680 	ignoring[depth] = ignoring[depth-1];
681 }
682 static void
ignoreon(void)683 ignoreon(void)
684 {
685 	ignoring[depth] = true;
686 }
687 static void
keywordedit(const char * replacement)688 keywordedit(const char *replacement)
689 {
690 	snprintf(keyword, tline + sizeof(tline) - keyword,
691 	    "%s%s", replacement, newline);
692 	altered = true;
693 	print();
694 }
695 static void
nest(void)696 nest(void)
697 {
698 	if (depth > MAXDEPTH-1)
699 		abort(); /* bug */
700 	if (depth == MAXDEPTH-1)
701 		error("Too many levels of nesting");
702 	depth += 1;
703 	stifline[depth] = linenum;
704 }
705 static void
unnest(void)706 unnest(void)
707 {
708 	if (depth == 0)
709 		abort(); /* bug */
710 	depth -= 1;
711 }
712 static void
state(Ifstate is)713 state(Ifstate is)
714 {
715 	ifstate[depth] = is;
716 }
717 
718 /*
719  * The last state transition function. When this is called,
720  * lineval == LT_EOF, so the process() loop will terminate.
721  */
722 static void
done(void)723 done(void)
724 {
725 	if (incomment)
726 		error("EOF in comment");
727 	closeio();
728 }
729 
730 /*
731  * Write a line to the output or not, according to command line options.
732  * If writing fails, closeio() will print the error and exit.
733  */
734 static void
flushline(bool keep)735 flushline(bool keep)
736 {
737 	if (symlist)
738 		return;
739 	if (keep ^ complement) {
740 		bool blankline = tline[strspn(tline, " \t\r\n")] == '\0';
741 		if (blankline && compblank && blankcount != blankmax) {
742 			delcount += 1;
743 			blankcount += 1;
744 		} else {
745 			if (lnnum && delcount > 0)
746 				hashline();
747 			if (fputs(tline, output) == EOF)
748 				closeio();
749 			delcount = 0;
750 			blankmax = blankcount = blankline ? blankcount + 1 : 0;
751 		}
752 	} else {
753 		if (lnblank && fputs(newline, output) == EOF)
754 			closeio();
755 		altered = true;
756 		delcount += 1;
757 		blankcount = 0;
758 	}
759 	if (debugging && fflush(output) == EOF)
760 		closeio();
761 }
762 
763 /*
764  * Format of #line directives depends on whether we know the input filename.
765  */
766 static void
hashline(void)767 hashline(void)
768 {
769 	int e;
770 
771 	if (linefile == NULL)
772 		e = fprintf(output, "#line %d%s", linenum, newline);
773 	else
774 		e = fprintf(output, "#line %d \"%s\"%s",
775 		    linenum, linefile, newline);
776 	if (e < 0)
777 		closeio();
778 }
779 
780 /*
781  * Flush the output and handle errors.
782  */
783 static void
closeio(void)784 closeio(void)
785 {
786 	/* Tidy up after findsym(). */
787 	if (symdepth && !zerosyms)
788 		printf("\n");
789 	if (output != NULL && (ferror(output) || fclose(output) == EOF))
790 			err(2, "%s: can't write to output", filename);
791 	fclose(input);
792 }
793 
794 /*
795  * The driver for the state machine.
796  */
797 static void
process(void)798 process(void)
799 {
800 	Linetype lineval = LT_PLAIN;
801 	/* When compressing blank lines, act as if the file
802 	   is preceded by a large number of blank lines. */
803 	blankmax = blankcount = 1000;
804 	zerosyms = true;
805 	newline = NULL;
806 	linenum = 0;
807 	altered = false;
808 	while (lineval != LT_EOF) {
809 		lineval = parseline();
810 		trans_table[ifstate[depth]][lineval]();
811 		debug("process line %d %s -> %s depth %d",
812 		    linenum, linetype_name[lineval],
813 		    ifstate_name[ifstate[depth]], depth);
814 	}
815 	exitstat |= altered;
816 }
817 
818 /*
819  * Parse a line and determine its type. We keep the preprocessor line
820  * parser state between calls in the global variable linestate, with
821  * help from skipcomment().
822  */
823 static Linetype
parseline(void)824 parseline(void)
825 {
826 	const char *cp;
827 	struct macro *cursym;
828 	Linetype retval;
829 	Comment_state wascomment;
830 
831 	wascomment = incomment;
832 	cp = skiphash();
833 	if (cp == NULL)
834 		return (LT_EOF);
835 	if (newline == NULL) {
836 		if (strrchr(tline, '\n') == strrchr(tline, '\r') + 1)
837 			newline = newline_crlf;
838 		else
839 			newline = newline_unix;
840 	}
841 	if (*cp == '\0') {
842 		retval = LT_PLAIN;
843 		goto done;
844 	}
845 	keyword = tline + (cp - tline);
846 	if ((cp = matchsym("ifdef", keyword)) != NULL ||
847 	    (cp = matchsym("ifndef", keyword)) != NULL) {
848 		cp = skipcomment(cp);
849 		if ((cursym = findsym(&cp)) == NULL)
850 			retval = LT_IF;
851 		else {
852 			retval = (keyword[2] == 'n')
853 			    ? LT_FALSE : LT_TRUE;
854 			if (cursym->value == NULL)
855 				retval = (retval == LT_TRUE)
856 				    ? LT_FALSE : LT_TRUE;
857 			if (cursym->ignore)
858 				retval = (retval == LT_TRUE)
859 				    ? LT_TRUEI : LT_FALSEI;
860 		}
861 	} else if ((cp = matchsym("if", keyword)) != NULL)
862 		retval = ifeval(&cp);
863 	else if ((cp = matchsym("elif", keyword)) != NULL)
864 		retval = linetype_if2elif(ifeval(&cp));
865 	else if ((cp = matchsym("else", keyword)) != NULL)
866 		retval = LT_ELSE;
867 	else if ((cp = matchsym("endif", keyword)) != NULL)
868 		retval = LT_ENDIF;
869 	else {
870 		cp = skipsym(keyword);
871 		/* no way can we deal with a continuation inside a keyword */
872 		if (strncmp(cp, "\\\r\n", 3) == 0 ||
873 		    strncmp(cp, "\\\n", 2) == 0)
874 			Eioccc();
875 		cp = skipline(cp);
876 		retval = LT_PLAIN;
877 		goto done;
878 	}
879 	cp = skipcomment(cp);
880 	if (*cp != '\0') {
881 		cp = skipline(cp);
882 		if (retval == LT_TRUE || retval == LT_FALSE ||
883 		    retval == LT_TRUEI || retval == LT_FALSEI)
884 			retval = LT_IF;
885 		if (retval == LT_ELTRUE || retval == LT_ELFALSE)
886 			retval = LT_ELIF;
887 	}
888 	/* the following can happen if the last line of the file lacks a
889 	   newline or if there is too much whitespace in a directive */
890 	if (linestate == LS_HASH) {
891 		long len = cp - tline;
892 		if (fgets(tline + len, MAXLINE - len, input) == NULL) {
893 			if (ferror(input))
894 				err(2, "can't read %s", filename);
895 			/* append the missing newline at eof */
896 			strcpy(tline + len, newline);
897 			cp += strlen(newline);
898 			linestate = LS_START;
899 		} else {
900 			linestate = LS_DIRTY;
901 		}
902 	}
903 	if (retval != LT_PLAIN && (wascomment || linestate != LS_START)) {
904 		retval = linetype_2dodgy(retval);
905 		linestate = LS_DIRTY;
906 	}
907 done:
908 	debug("parser line %d state %s comment %s line", linenum,
909 	    comment_name[incomment], linestate_name[linestate]);
910 	return (retval);
911 }
912 
913 /*
914  * These are the binary operators that are supported by the expression
915  * evaluator.
916  */
op_strict(long * p,long v,Linetype at,Linetype bt)917 static Linetype op_strict(long *p, long v, Linetype at, Linetype bt) {
918 	if(at == LT_IF || bt == LT_IF) return (LT_IF);
919 	return (*p = v, v ? LT_TRUE : LT_FALSE);
920 }
op_lt(long * p,Linetype at,long a,Linetype bt,long b)921 static Linetype op_lt(long *p, Linetype at, long a, Linetype bt, long b) {
922 	return op_strict(p, a < b, at, bt);
923 }
op_gt(long * p,Linetype at,long a,Linetype bt,long b)924 static Linetype op_gt(long *p, Linetype at, long a, Linetype bt, long b) {
925 	return op_strict(p, a > b, at, bt);
926 }
op_le(long * p,Linetype at,long a,Linetype bt,long b)927 static Linetype op_le(long *p, Linetype at, long a, Linetype bt, long b) {
928 	return op_strict(p, a <= b, at, bt);
929 }
op_ge(long * p,Linetype at,long a,Linetype bt,long b)930 static Linetype op_ge(long *p, Linetype at, long a, Linetype bt, long b) {
931 	return op_strict(p, a >= b, at, bt);
932 }
op_eq(long * p,Linetype at,long a,Linetype bt,long b)933 static Linetype op_eq(long *p, Linetype at, long a, Linetype bt, long b) {
934 	return op_strict(p, a == b, at, bt);
935 }
op_ne(long * p,Linetype at,long a,Linetype bt,long b)936 static Linetype op_ne(long *p, Linetype at, long a, Linetype bt, long b) {
937 	return op_strict(p, a != b, at, bt);
938 }
op_or(long * p,Linetype at,long a,Linetype bt,long b)939 static Linetype op_or(long *p, Linetype at, long a, Linetype bt, long b) {
940 	if (!strictlogic && (at == LT_TRUE || bt == LT_TRUE))
941 		return (*p = 1, LT_TRUE);
942 	return op_strict(p, a || b, at, bt);
943 }
op_and(long * p,Linetype at,long a,Linetype bt,long b)944 static Linetype op_and(long *p, Linetype at, long a, Linetype bt, long b) {
945 	if (!strictlogic && (at == LT_FALSE || bt == LT_FALSE))
946 		return (*p = 0, LT_FALSE);
947 	return op_strict(p, a && b, at, bt);
948 }
op_blsh(long * p,Linetype at,long a,Linetype bt,long b)949 static Linetype op_blsh(long *p, Linetype at, long a, Linetype bt, long b) {
950 	return op_strict(p, a << b, at, bt);
951 }
op_brsh(long * p,Linetype at,long a,Linetype bt,long b)952 static Linetype op_brsh(long *p, Linetype at, long a, Linetype bt, long b) {
953 	return op_strict(p, a >> b, at, bt);
954 }
op_add(long * p,Linetype at,long a,Linetype bt,long b)955 static Linetype op_add(long *p, Linetype at, long a, Linetype bt, long b) {
956 	return op_strict(p, a + b, at, bt);
957 }
op_sub(long * p,Linetype at,long a,Linetype bt,long b)958 static Linetype op_sub(long *p, Linetype at, long a, Linetype bt, long b) {
959 	return op_strict(p, a - b, at, bt);
960 }
op_mul(long * p,Linetype at,long a,Linetype bt,long b)961 static Linetype op_mul(long *p, Linetype at, long a, Linetype bt, long b) {
962 	return op_strict(p, a * b, at, bt);
963 }
op_div(long * p,Linetype at,long a,Linetype bt,long b)964 static Linetype op_div(long *p, Linetype at, long a, Linetype bt, long b) {
965 	if (bt != LT_TRUE) {
966 		debug("eval division by zero");
967 		return (LT_ERROR);
968 	}
969 	return op_strict(p, a / b, at, bt);
970 }
op_mod(long * p,Linetype at,long a,Linetype bt,long b)971 static Linetype op_mod(long *p, Linetype at, long a, Linetype bt, long b) {
972 	return op_strict(p, a % b, at, bt);
973 }
op_bor(long * p,Linetype at,long a,Linetype bt,long b)974 static Linetype op_bor(long *p, Linetype at, long a, Linetype bt, long b) {
975 	return op_strict(p, a | b, at, bt);
976 }
op_bxor(long * p,Linetype at,long a,Linetype bt,long b)977 static Linetype op_bxor(long *p, Linetype at, long a, Linetype bt, long b) {
978 	return op_strict(p, a ^ b, at, bt);
979 }
op_band(long * p,Linetype at,long a,Linetype bt,long b)980 static Linetype op_band(long *p, Linetype at, long a, Linetype bt, long b) {
981 	return op_strict(p, a & b, at, bt);
982 }
983 
984 /*
985  * An evaluation function takes three arguments, as follows: (1) a pointer to
986  * an element of the precedence table which lists the operators at the current
987  * level of precedence; (2) a pointer to an integer which will receive the
988  * value of the expression; and (3) a pointer to a char* that points to the
989  * expression to be evaluated and that is updated to the end of the expression
990  * when evaluation is complete. The function returns LT_FALSE if the value of
991  * the expression is zero, LT_TRUE if it is non-zero, LT_IF if the expression
992  * depends on an unknown symbol, or LT_ERROR if there is a parse failure.
993  */
994 struct ops;
995 
996 typedef Linetype eval_fn(const struct ops *, long *, const char **);
997 
998 static eval_fn eval_table, eval_unary;
999 
1000 /*
1001  * The precedence table. Expressions involving binary operators are evaluated
1002  * in a table-driven way by eval_table. When it evaluates a subexpression it
1003  * calls the inner function with its first argument pointing to the next
1004  * element of the table. Innermost expressions have special non-table-driven
1005  * handling.
1006  *
1007  * The stop characters help with lexical analysis: an operator is not
1008  * recognized if it is followed by one of the stop characters because
1009  * that would make it a different operator.
1010  */
1011 struct op {
1012 	const char *str;
1013 	Linetype (*fn)(long *, Linetype, long, Linetype, long);
1014 	const char *stop;
1015 };
1016 struct ops {
1017 	eval_fn *inner;
1018 	struct op op[5];
1019 };
1020 static const struct ops eval_ops[] = {
1021 	{ eval_table, { { "||", op_or, NULL } } },
1022 	{ eval_table, { { "&&", op_and, NULL } } },
1023 	{ eval_table, { { "|", op_bor, "|" } } },
1024 	{ eval_table, { { "^", op_bxor, NULL } } },
1025 	{ eval_table, { { "&", op_band, "&" } } },
1026 	{ eval_table, { { "==", op_eq, NULL },
1027 			{ "!=", op_ne, NULL } } },
1028 	{ eval_table, { { "<=", op_le, NULL },
1029 			{ ">=", op_ge, NULL },
1030 			{ "<", op_lt, "<=" },
1031 			{ ">", op_gt, ">=" } } },
1032 	{ eval_table, { { "<<", op_blsh, NULL },
1033 			{ ">>", op_brsh, NULL } } },
1034 	{ eval_table, { { "+", op_add, NULL },
1035 			{ "-", op_sub, NULL } } },
1036 	{ eval_unary, { { "*", op_mul, NULL },
1037 			{ "/", op_div, NULL },
1038 			{ "%", op_mod, NULL } } },
1039 };
1040 
1041 /* Current operator precedence level */
prec(const struct ops * ops)1042 static long prec(const struct ops *ops)
1043 {
1044 	return (ops - eval_ops);
1045 }
1046 
1047 /*
1048  * Function for evaluating the innermost parts of expressions,
1049  * viz. !expr (expr) number defined(symbol) symbol
1050  * We reset the constexpr flag in the last two cases.
1051  */
1052 static Linetype
eval_unary(const struct ops * ops,long * valp,const char ** cpp)1053 eval_unary(const struct ops *ops, long *valp, const char **cpp)
1054 {
1055 	const char *cp;
1056 	char *ep;
1057 	struct macro *sym;
1058 	bool defparen;
1059 	Linetype lt;
1060 
1061 	cp = skipcomment(*cpp);
1062 	if (*cp == '!') {
1063 		debug("eval%d !", prec(ops));
1064 		cp++;
1065 		lt = eval_unary(ops, valp, &cp);
1066 		if (lt == LT_ERROR)
1067 			return (LT_ERROR);
1068 		if (lt != LT_IF) {
1069 			*valp = !*valp;
1070 			lt = *valp ? LT_TRUE : LT_FALSE;
1071 		}
1072 	} else if (*cp == '~') {
1073 		debug("eval%d ~", prec(ops));
1074 		cp++;
1075 		lt = eval_unary(ops, valp, &cp);
1076 		if (lt == LT_ERROR)
1077 			return (LT_ERROR);
1078 		if (lt != LT_IF) {
1079 			*valp = ~(*valp);
1080 			lt = *valp ? LT_TRUE : LT_FALSE;
1081 		}
1082 	} else if (*cp == '-') {
1083 		debug("eval%d -", prec(ops));
1084 		cp++;
1085 		lt = eval_unary(ops, valp, &cp);
1086 		if (lt == LT_ERROR)
1087 			return (LT_ERROR);
1088 		if (lt != LT_IF) {
1089 			*valp = -(*valp);
1090 			lt = *valp ? LT_TRUE : LT_FALSE;
1091 		}
1092 	} else if (*cp == '(') {
1093 		cp++;
1094 		debug("eval%d (", prec(ops));
1095 		lt = eval_table(eval_ops, valp, &cp);
1096 		if (lt == LT_ERROR)
1097 			return (LT_ERROR);
1098 		cp = skipcomment(cp);
1099 		if (*cp++ != ')')
1100 			return (LT_ERROR);
1101 	} else if (isdigit((unsigned char)*cp)) {
1102 		debug("eval%d number", prec(ops));
1103 		*valp = strtol(cp, &ep, 0);
1104 		if (ep == cp)
1105 			return (LT_ERROR);
1106 		lt = *valp ? LT_TRUE : LT_FALSE;
1107 		cp = ep;
1108 	} else if (matchsym("defined", cp) != NULL) {
1109 		cp = skipcomment(cp+7);
1110 		if (*cp == '(') {
1111 			cp = skipcomment(cp+1);
1112 			defparen = true;
1113 		} else {
1114 			defparen = false;
1115 		}
1116 		sym = findsym(&cp);
1117 		cp = skipcomment(cp);
1118 		if (defparen && *cp++ != ')') {
1119 			debug("eval%d defined missing ')'", prec(ops));
1120 			return (LT_ERROR);
1121 		}
1122 		if (sym == NULL) {
1123 			debug("eval%d defined unknown", prec(ops));
1124 			lt = LT_IF;
1125 		} else {
1126 			debug("eval%d defined %s", prec(ops), sym->name);
1127 			*valp = (sym->value != NULL);
1128 			lt = *valp ? LT_TRUE : LT_FALSE;
1129 		}
1130 		constexpr = false;
1131 	} else if (!endsym(*cp)) {
1132 		debug("eval%d symbol", prec(ops));
1133 		sym = findsym(&cp);
1134 		if (sym == NULL) {
1135 			lt = LT_IF;
1136 			cp = skipargs(cp);
1137 		} else if (sym->value == NULL) {
1138 			*valp = 0;
1139 			lt = LT_FALSE;
1140 		} else {
1141 			*valp = strtol(sym->value, &ep, 0);
1142 			if (*ep != '\0' || ep == sym->value)
1143 				return (LT_ERROR);
1144 			lt = *valp ? LT_TRUE : LT_FALSE;
1145 			cp = skipargs(cp);
1146 		}
1147 		constexpr = false;
1148 	} else {
1149 		debug("eval%d bad expr", prec(ops));
1150 		return (LT_ERROR);
1151 	}
1152 
1153 	*cpp = cp;
1154 	debug("eval%d = %d", prec(ops), *valp);
1155 	return (lt);
1156 }
1157 
1158 /*
1159  * Table-driven evaluation of binary operators.
1160  */
1161 static Linetype
eval_table(const struct ops * ops,long * valp,const char ** cpp)1162 eval_table(const struct ops *ops, long *valp, const char **cpp)
1163 {
1164 	const struct op *op;
1165 	const char *cp;
1166 	long val = 0;
1167 	Linetype lt, rt;
1168 
1169 	debug("eval%d", prec(ops));
1170 	cp = *cpp;
1171 	lt = ops->inner(ops+1, valp, &cp);
1172 	if (lt == LT_ERROR)
1173 		return (LT_ERROR);
1174 	for (;;) {
1175 		cp = skipcomment(cp);
1176 		for (op = ops->op; op->str != NULL; op++) {
1177 			if (strncmp(cp, op->str, strlen(op->str)) == 0) {
1178 				/* assume only one-char operators have stop chars */
1179 				if (op->stop != NULL && cp[1] != '\0' &&
1180 				    strchr(op->stop, cp[1]) != NULL)
1181 					continue;
1182 				else
1183 					break;
1184 			}
1185 		}
1186 		if (op->str == NULL)
1187 			break;
1188 		cp += strlen(op->str);
1189 		debug("eval%d %s", prec(ops), op->str);
1190 		rt = ops->inner(ops+1, &val, &cp);
1191 		if (rt == LT_ERROR)
1192 			return (LT_ERROR);
1193 		lt = op->fn(valp, lt, *valp, rt, val);
1194 	}
1195 
1196 	*cpp = cp;
1197 	debug("eval%d = %d", prec(ops), *valp);
1198 	debug("eval%d lt = %s", prec(ops), linetype_name[lt]);
1199 	return (lt);
1200 }
1201 
1202 /*
1203  * Evaluate the expression on a #if or #elif line. If we can work out
1204  * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
1205  * return just a generic LT_IF.
1206  */
1207 static Linetype
ifeval(const char ** cpp)1208 ifeval(const char **cpp)
1209 {
1210 	Linetype ret;
1211 	long val = 0;
1212 
1213 	debug("eval %s", *cpp);
1214 	constexpr = killconsts ? false : true;
1215 	ret = eval_table(eval_ops, &val, cpp);
1216 	debug("eval = %d", val);
1217 	return (constexpr ? LT_IF : ret == LT_ERROR ? LT_IF : ret);
1218 }
1219 
1220 /*
1221  * Read a line and examine its initial part to determine if it is a
1222  * preprocessor directive. Returns NULL on EOF, or a pointer to a
1223  * preprocessor directive name, or a pointer to the zero byte at the
1224  * end of the line.
1225  */
1226 static const char *
skiphash(void)1227 skiphash(void)
1228 {
1229 	const char *cp;
1230 
1231 	linenum++;
1232 	if (fgets(tline, MAXLINE, input) == NULL) {
1233 		if (ferror(input))
1234 			err(2, "can't read %s", filename);
1235 		else
1236 			return (NULL);
1237 	}
1238 	cp = skipcomment(tline);
1239 	if (linestate == LS_START && *cp == '#') {
1240 		linestate = LS_HASH;
1241 		return (skipcomment(cp + 1));
1242 	} else if (*cp == '\0') {
1243 		return (cp);
1244 	} else {
1245 		return (skipline(cp));
1246 	}
1247 }
1248 
1249 /*
1250  * Mark a line dirty and consume the rest of it, keeping track of the
1251  * lexical state.
1252  */
1253 static const char *
skipline(const char * cp)1254 skipline(const char *cp)
1255 {
1256 	const char *pcp;
1257 	if (*cp != '\0')
1258 		linestate = LS_DIRTY;
1259 	while (*cp != '\0') {
1260 		cp = skipcomment(pcp = cp);
1261 		if (pcp == cp)
1262 			cp++;
1263 	}
1264 	return (cp);
1265 }
1266 
1267 /*
1268  * Skip over comments, strings, and character literals and stop at the
1269  * next character position that is not whitespace. Between calls we keep
1270  * the comment state in the global variable incomment, and we also adjust
1271  * the global variable linestate when we see a newline.
1272  * XXX: doesn't cope with the buffer splitting inside a state transition.
1273  */
1274 static const char *
skipcomment(const char * cp)1275 skipcomment(const char *cp)
1276 {
1277 	if (text || ignoring[depth]) {
1278 		for (; isspace((unsigned char)*cp); cp++)
1279 			if (*cp == '\n')
1280 				linestate = LS_START;
1281 		return (cp);
1282 	}
1283 	while (*cp != '\0')
1284 		/* don't reset to LS_START after a line continuation */
1285 		if (strncmp(cp, "\\\r\n", 3) == 0)
1286 			cp += 3;
1287 		else if (strncmp(cp, "\\\n", 2) == 0)
1288 			cp += 2;
1289 		else switch (incomment) {
1290 		case NO_COMMENT:
1291 			if (strncmp(cp, "/\\\r\n", 4) == 0) {
1292 				incomment = STARTING_COMMENT;
1293 				cp += 4;
1294 			} else if (strncmp(cp, "/\\\n", 3) == 0) {
1295 				incomment = STARTING_COMMENT;
1296 				cp += 3;
1297 			} else if (strncmp(cp, "/*", 2) == 0) {
1298 				incomment = C_COMMENT;
1299 				cp += 2;
1300 			} else if (strncmp(cp, "//", 2) == 0) {
1301 				incomment = CXX_COMMENT;
1302 				cp += 2;
1303 			} else if (strncmp(cp, "\'", 1) == 0) {
1304 				incomment = CHAR_LITERAL;
1305 				linestate = LS_DIRTY;
1306 				cp += 1;
1307 			} else if (strncmp(cp, "\"", 1) == 0) {
1308 				incomment = STRING_LITERAL;
1309 				linestate = LS_DIRTY;
1310 				cp += 1;
1311 			} else if (strncmp(cp, "\n", 1) == 0) {
1312 				linestate = LS_START;
1313 				cp += 1;
1314 			} else if (strchr(" \r\t", *cp) != NULL) {
1315 				cp += 1;
1316 			} else
1317 				return (cp);
1318 			continue;
1319 		case CXX_COMMENT:
1320 			if (strncmp(cp, "\n", 1) == 0) {
1321 				incomment = NO_COMMENT;
1322 				linestate = LS_START;
1323 			}
1324 			cp += 1;
1325 			continue;
1326 		case CHAR_LITERAL:
1327 		case STRING_LITERAL:
1328 			if ((incomment == CHAR_LITERAL && cp[0] == '\'') ||
1329 			    (incomment == STRING_LITERAL && cp[0] == '\"')) {
1330 				incomment = NO_COMMENT;
1331 				cp += 1;
1332 			} else if (cp[0] == '\\') {
1333 				if (cp[1] == '\0')
1334 					cp += 1;
1335 				else
1336 					cp += 2;
1337 			} else if (strncmp(cp, "\n", 1) == 0) {
1338 				if (incomment == CHAR_LITERAL)
1339 					error("Unterminated char literal");
1340 				else
1341 					error("Unterminated string literal");
1342 			} else
1343 				cp += 1;
1344 			continue;
1345 		case C_COMMENT:
1346 			if (strncmp(cp, "*\\\r\n", 4) == 0) {
1347 				incomment = FINISHING_COMMENT;
1348 				cp += 4;
1349 			} else if (strncmp(cp, "*\\\n", 3) == 0) {
1350 				incomment = FINISHING_COMMENT;
1351 				cp += 3;
1352 			} else if (strncmp(cp, "*/", 2) == 0) {
1353 				incomment = NO_COMMENT;
1354 				cp += 2;
1355 			} else
1356 				cp += 1;
1357 			continue;
1358 		case STARTING_COMMENT:
1359 			if (*cp == '*') {
1360 				incomment = C_COMMENT;
1361 				cp += 1;
1362 			} else if (*cp == '/') {
1363 				incomment = CXX_COMMENT;
1364 				cp += 1;
1365 			} else {
1366 				incomment = NO_COMMENT;
1367 				linestate = LS_DIRTY;
1368 			}
1369 			continue;
1370 		case FINISHING_COMMENT:
1371 			if (*cp == '/') {
1372 				incomment = NO_COMMENT;
1373 				cp += 1;
1374 			} else
1375 				incomment = C_COMMENT;
1376 			continue;
1377 		default:
1378 			abort(); /* bug */
1379 		}
1380 	return (cp);
1381 }
1382 
1383 /*
1384  * Skip macro arguments.
1385  */
1386 static const char *
skipargs(const char * cp)1387 skipargs(const char *cp)
1388 {
1389 	const char *ocp = cp;
1390 	int level = 0;
1391 	cp = skipcomment(cp);
1392 	if (*cp != '(')
1393 		return (cp);
1394 	do {
1395 		if (*cp == '(')
1396 			level++;
1397 		if (*cp == ')')
1398 			level--;
1399 		cp = skipcomment(cp+1);
1400 	} while (level != 0 && *cp != '\0');
1401 	if (level == 0)
1402 		return (cp);
1403 	else
1404 	/* Rewind and re-detect the syntax error later. */
1405 		return (ocp);
1406 }
1407 
1408 /*
1409  * Skip over an identifier.
1410  */
1411 static const char *
skipsym(const char * cp)1412 skipsym(const char *cp)
1413 {
1414 	while (!endsym(*cp))
1415 		++cp;
1416 	return (cp);
1417 }
1418 
1419 /*
1420  * Skip whitespace and take a copy of any following identifier.
1421  */
1422 static const char *
getsym(const char ** cpp)1423 getsym(const char **cpp)
1424 {
1425 	const char *cp = *cpp, *sym;
1426 
1427 	cp = skipcomment(cp);
1428 	cp = skipsym(sym = cp);
1429 	if (cp == sym)
1430 		return NULL;
1431 	*cpp = cp;
1432 	return (xstrdup(sym, cp));
1433 }
1434 
1435 /*
1436  * Check that s (a symbol) matches the start of t, and that the
1437  * following character in t is not a symbol character. Returns a
1438  * pointer to the following character in t if there is a match,
1439  * otherwise NULL.
1440  */
1441 static const char *
matchsym(const char * s,const char * t)1442 matchsym(const char *s, const char *t)
1443 {
1444 	while (*s != '\0' && *t != '\0')
1445 		if (*s != *t)
1446 			return (NULL);
1447 		else
1448 			++s, ++t;
1449 	if (*s == '\0' && endsym(*t))
1450 		return(t);
1451 	else
1452 		return(NULL);
1453 }
1454 
1455 /*
1456  * Look for the symbol in the symbol table. If it is found, we return
1457  * the symbol table index, else we return -1.
1458  */
1459 static struct macro *
findsym(const char ** strp)1460 findsym(const char **strp)
1461 {
1462 	const char *str;
1463 	char *strkey;
1464 	struct macro key, *res;
1465 
1466 	str = *strp;
1467 	*strp = skipsym(str);
1468 	if (symlist) {
1469 		if (*strp == str)
1470 			return (NULL);
1471 		if (symdepth && firstsym)
1472 			printf("%s%3d", zerosyms ? "" : "\n", depth);
1473 		firstsym = zerosyms = false;
1474 		printf("%s%.*s%s",
1475 		       symdepth ? " " : "",
1476 		       (int)(*strp-str), str,
1477 		       symdepth ? "" : "\n");
1478 		/* we don't care about the value of the symbol */
1479 		return (NULL);
1480 	}
1481 
1482 	/*
1483 	 * 'str' just points into the current mid-parse input and is not
1484 	 * nul-terminated.  We know the length of the symbol, *strp - str, but
1485 	 * need to provide a nul-terminated lookup key for RB_FIND's comparison
1486 	 * function.  Create one here.
1487 	 */
1488 	strkey = malloc(*strp - str + 1);
1489 	memcpy(strkey, str, *strp - str);
1490 	strkey[*strp - str] = 0;
1491 
1492 	key.name = strkey;
1493 	res = RB_FIND(MACROMAP, &macro_tree, &key);
1494 	if (res != NULL)
1495 		debugsym("findsym", res);
1496 
1497 	free(strkey);
1498 	return (res);
1499 }
1500 
1501 /*
1502  * Resolve indirect symbol values to their final definitions.
1503  */
1504 static void
indirectsym(void)1505 indirectsym(void)
1506 {
1507 	const char *cp;
1508 	int changed;
1509 	struct macro *sym, *ind;
1510 
1511 	do {
1512 		changed = 0;
1513 		RB_FOREACH(sym, MACROMAP, &macro_tree) {
1514 			if (sym->value == NULL)
1515 				continue;
1516 			cp = sym->value;
1517 			ind = findsym(&cp);
1518 			if (ind == NULL || ind == sym ||
1519 			    *cp != '\0' ||
1520 			    ind->value == NULL ||
1521 			    ind->value == sym->value)
1522 				continue;
1523 			debugsym("indir...", sym);
1524 			sym->value = ind->value;
1525 			debugsym("...ectsym", sym);
1526 			changed++;
1527 		}
1528 	} while (changed);
1529 }
1530 
1531 /*
1532  * Add a symbol to the symbol table, specified with the format sym=val
1533  */
1534 static void
addsym1(bool ignorethis,bool definethis,char * symval)1535 addsym1(bool ignorethis, bool definethis, char *symval)
1536 {
1537 	const char *sym, *val;
1538 
1539 	sym = symval;
1540 	val = skipsym(sym);
1541 	if (definethis && *val == '=') {
1542 		symval[val - sym] = '\0';
1543 		val = val + 1;
1544 	} else if (*val == '\0') {
1545 		val = definethis ? "1" : NULL;
1546 	} else {
1547 		usage();
1548 	}
1549 	addsym2(ignorethis, sym, val);
1550 }
1551 
1552 /*
1553  * Add a symbol to the symbol table.
1554  */
1555 static void
addsym2(bool ignorethis,const char * symname,const char * val)1556 addsym2(bool ignorethis, const char *symname, const char *val)
1557 {
1558 	const char *cp = symname;
1559 	struct macro *sym, *r;
1560 
1561 	sym = findsym(&cp);
1562 	if (sym == NULL) {
1563 		sym = calloc(1, sizeof(*sym));
1564 		sym->ignore = ignorethis;
1565 		sym->name = symname;
1566 		sym->value = val;
1567 		r = RB_INSERT(MACROMAP, &macro_tree, sym);
1568 		assert(r == NULL);
1569 	}
1570 	debugsym("addsym", sym);
1571 }
1572 
1573 static void
debugsym(const char * why,const struct macro * sym)1574 debugsym(const char *why, const struct macro *sym)
1575 {
1576 	debug("%s %s%c%s", why, sym->name,
1577 	    sym->value ? '=' : ' ',
1578 	    sym->value ? sym->value : "undef");
1579 }
1580 
1581 /*
1582  * Add symbols to the symbol table from a file containing
1583  * #define and #undef preprocessor directives.
1584  */
1585 static void
defundefile(const char * fn)1586 defundefile(const char *fn)
1587 {
1588 	filename = fn;
1589 	input = fopen(fn, "rb");
1590 	if (input == NULL)
1591 		err(2, "can't open %s", fn);
1592 	linenum = 0;
1593 	while (defundef())
1594 		;
1595 	if (ferror(input))
1596 		err(2, "can't read %s", filename);
1597 	else
1598 		fclose(input);
1599 	if (incomment)
1600 		error("EOF in comment");
1601 }
1602 
1603 /*
1604  * Read and process one #define or #undef directive
1605  */
1606 static bool
defundef(void)1607 defundef(void)
1608 {
1609 	const char *cp, *kw, *sym, *val, *end;
1610 
1611 	cp = skiphash();
1612 	if (cp == NULL)
1613 		return (false);
1614 	if (*cp == '\0')
1615 		goto done;
1616 	/* strip trailing whitespace, and do a fairly rough check to
1617 	   avoid unsupported multi-line preprocessor directives */
1618 	end = cp + strlen(cp);
1619 	while (end > tline && strchr(" \t\n\r", end[-1]) != NULL)
1620 		--end;
1621 	if (end > tline && end[-1] == '\\')
1622 		Eioccc();
1623 
1624 	kw = cp;
1625 	if ((cp = matchsym("define", kw)) != NULL) {
1626 		sym = getsym(&cp);
1627 		if (sym == NULL)
1628 			error("Missing macro name in #define");
1629 		if (*cp == '(') {
1630 			val = "1";
1631 		} else {
1632 			cp = skipcomment(cp);
1633 			val = (cp < end) ? xstrdup(cp, end) : "";
1634 		}
1635 		debug("#define");
1636 		addsym2(false, sym, val);
1637 	} else if ((cp = matchsym("undef", kw)) != NULL) {
1638 		sym = getsym(&cp);
1639 		if (sym == NULL)
1640 			error("Missing macro name in #undef");
1641 		cp = skipcomment(cp);
1642 		debug("#undef");
1643 		addsym2(false, sym, NULL);
1644 	} else {
1645 		error("Unrecognized preprocessor directive");
1646 	}
1647 	skipline(cp);
1648 done:
1649 	debug("parser line %d state %s comment %s line", linenum,
1650 	    comment_name[incomment], linestate_name[linestate]);
1651 	return (true);
1652 }
1653 
1654 /*
1655  * Concatenate two strings into new memory, checking for failure.
1656  */
1657 static char *
astrcat(const char * s1,const char * s2)1658 astrcat(const char *s1, const char *s2)
1659 {
1660 	char *s;
1661 	int len;
1662 	size_t size;
1663 
1664 	len = snprintf(NULL, 0, "%s%s", s1, s2);
1665 	if (len < 0)
1666 		err(2, "snprintf");
1667 	size = (size_t)len + 1;
1668 	s = (char *)malloc(size);
1669 	if (s == NULL)
1670 		err(2, "malloc");
1671 	snprintf(s, size, "%s%s", s1, s2);
1672 	return (s);
1673 }
1674 
1675 /*
1676  * Duplicate a segment of a string, checking for failure.
1677  */
1678 static const char *
xstrdup(const char * start,const char * end)1679 xstrdup(const char *start, const char *end)
1680 {
1681 	size_t n;
1682 	char *s;
1683 
1684 	if (end < start) abort(); /* bug */
1685 	n = (size_t)(end - start) + 1;
1686 	s = malloc(n);
1687 	if (s == NULL)
1688 		err(2, "malloc");
1689 	snprintf(s, n, "%s", start);
1690 	return (s);
1691 }
1692 
1693 /*
1694  * Diagnostics.
1695  */
1696 static void
debug(const char * msg,...)1697 debug(const char *msg, ...)
1698 {
1699 	va_list ap;
1700 
1701 	if (debugging) {
1702 		va_start(ap, msg);
1703 		vwarnx(msg, ap);
1704 		va_end(ap);
1705 	}
1706 }
1707 
1708 static void
error(const char * msg)1709 error(const char *msg)
1710 {
1711 	if (depth == 0)
1712 		warnx("%s: %d: %s", filename, linenum, msg);
1713 	else
1714 		warnx("%s: %d: %s (#if line %d depth %d)",
1715 		    filename, linenum, msg, stifline[depth], depth);
1716 	closeio();
1717 	errx(2, "Output may be truncated");
1718 }
1719