1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2002 - 2015 Tony Finch <[email protected]>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * unifdef - remove ifdef'ed lines
30 *
31 * This code was derived from software contributed to Berkeley by Dave Yost.
32 * It was rewritten to support ANSI C by Tony Finch. The original version
33 * of unifdef carried the 4-clause BSD copyright licence. None of its code
34 * remains in this version (though some of the names remain) so it now
35 * carries a more liberal licence.
36 *
37 * Wishlist:
38 * provide an option which will append the name of the
39 * appropriate symbol after #else's and #endif's
40 * provide an option which will check symbols after
41 * #else's and #endif's to see that they match their
42 * corresponding #ifdef or #ifndef
43 *
44 * These require better buffer handling, which would also make
45 * it possible to handle all "dodgy" directives correctly.
46 */
47
48 #include <sys/param.h>
49 #include <sys/stat.h>
50 #include <sys/tree.h>
51
52 #include <assert.h>
53 #include <ctype.h>
54 #include <err.h>
55 #include <stdarg.h>
56 #include <stdbool.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61
62 static const char copyright[] =
63 "@(#) $Version: unifdef-2.11 $\n"
64 "@(#) $FreeBSD$\n"
65 "@(#) $Author: Tony Finch ([email protected]) $\n"
66 "@(#) $URL: https://dotat.at/prog/unifdef $\n"
67 ;
68
69 /* types of input lines: */
70 typedef enum {
71 LT_TRUEI, /* a true #if with ignore flag */
72 LT_FALSEI, /* a false #if with ignore flag */
73 LT_IF, /* an unknown #if */
74 LT_TRUE, /* a true #if */
75 LT_FALSE, /* a false #if */
76 LT_ELIF, /* an unknown #elif */
77 LT_ELTRUE, /* a true #elif */
78 LT_ELFALSE, /* a false #elif */
79 LT_ELSE, /* #else */
80 LT_ENDIF, /* #endif */
81 LT_DODGY, /* flag: directive is not on one line */
82 LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
83 LT_PLAIN, /* ordinary line */
84 LT_EOF, /* end of file */
85 LT_ERROR, /* unevaluable #if */
86 LT_COUNT
87 } Linetype;
88
89 static char const * const linetype_name[] = {
90 "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
91 "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
92 "DODGY TRUEI", "DODGY FALSEI",
93 "DODGY IF", "DODGY TRUE", "DODGY FALSE",
94 "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
95 "DODGY ELSE", "DODGY ENDIF",
96 "PLAIN", "EOF", "ERROR"
97 };
98
99 #define linetype_if2elif(lt) ((Linetype)(lt - LT_IF + LT_ELIF))
100 #define linetype_2dodgy(lt) ((Linetype)(lt + LT_DODGY))
101
102 /* state of #if processing */
103 typedef enum {
104 IS_OUTSIDE,
105 IS_FALSE_PREFIX, /* false #if followed by false #elifs */
106 IS_TRUE_PREFIX, /* first non-false #(el)if is true */
107 IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */
108 IS_FALSE_MIDDLE, /* a false #elif after a pass state */
109 IS_TRUE_MIDDLE, /* a true #elif after a pass state */
110 IS_PASS_ELSE, /* an else after a pass state */
111 IS_FALSE_ELSE, /* an else after a true state */
112 IS_TRUE_ELSE, /* an else after only false states */
113 IS_FALSE_TRAILER, /* #elifs after a true are false */
114 IS_COUNT
115 } Ifstate;
116
117 static char const * const ifstate_name[] = {
118 "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
119 "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
120 "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
121 "FALSE_TRAILER"
122 };
123
124 /* state of comment parser */
125 typedef enum {
126 NO_COMMENT = false, /* outside a comment */
127 C_COMMENT, /* in a comment like this one */
128 CXX_COMMENT, /* between // and end of line */
129 STARTING_COMMENT, /* just after slash-backslash-newline */
130 FINISHING_COMMENT, /* star-backslash-newline in a C comment */
131 CHAR_LITERAL, /* inside '' */
132 STRING_LITERAL /* inside "" */
133 } Comment_state;
134
135 static char const * const comment_name[] = {
136 "NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING"
137 };
138
139 /* state of preprocessor line parser */
140 typedef enum {
141 LS_START, /* only space and comments on this line */
142 LS_HASH, /* only space, comments, and a hash */
143 LS_DIRTY /* this line can't be a preprocessor line */
144 } Line_state;
145
146 static char const * const linestate_name[] = {
147 "START", "HASH", "DIRTY"
148 };
149
150 /*
151 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
152 */
153 #define MAXDEPTH 64 /* maximum #if nesting */
154 #define MAXLINE 4096 /* maximum length of line */
155
156 /*
157 * Sometimes when editing a keyword the replacement text is longer, so
158 * we leave some space at the end of the tline buffer to accommodate this.
159 */
160 #define EDITSLOP 10
161
162 /*
163 * C17/18 allow 63 characters per macro name, but up to 127 arbitrarily large
164 * parameters.
165 */
166 struct macro {
167 RB_ENTRY(macro) entry;
168 const char *name;
169 const char *value;
170 bool ignore; /* -iDsym or -iUsym */
171 };
172
173 static int
macro_cmp(struct macro * a,struct macro * b)174 macro_cmp(struct macro *a, struct macro *b)
175 {
176 return (strcmp(a->name, b->name));
177 }
178
179 static RB_HEAD(MACROMAP, macro) macro_tree = RB_INITIALIZER(¯o_tree);
180 RB_GENERATE_STATIC(MACROMAP, macro, entry, macro_cmp);
181
182 /*
183 * Globals.
184 */
185
186 static bool compblank; /* -B: compress blank lines */
187 static bool lnblank; /* -b: blank deleted lines */
188 static bool complement; /* -c: do the complement */
189 static bool debugging; /* -d: debugging reports */
190 static bool inplace; /* -m: modify in place */
191 static bool iocccok; /* -e: fewer IOCCC errors */
192 static bool strictlogic; /* -K: keep ambiguous #ifs */
193 static bool killconsts; /* -k: eval constant #ifs */
194 static bool lnnum; /* -n: add #line directives */
195 static bool symlist; /* -s: output symbol list */
196 static bool symdepth; /* -S: output symbol depth */
197 static bool text; /* -t: this is a text file */
198
199 static FILE *input; /* input file pointer */
200 static const char *filename; /* input file name */
201 static int linenum; /* current line number */
202 static const char *linefile; /* file name for #line */
203 static FILE *output; /* output file pointer */
204 static const char *ofilename; /* output file name */
205 static const char *backext; /* backup extension */
206 static char *tempname; /* avoid splatting input */
207
208 static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */
209 static char *keyword; /* used for editing #elif's */
210
211 /*
212 * When processing a file, the output's newline style will match the
213 * input's, and unifdef correctly handles CRLF or LF endings whatever
214 * the platform's native style. The stdio streams are opened in binary
215 * mode to accommodate platforms whose native newline style is CRLF.
216 * When the output isn't a processed input file (when it is error /
217 * debug / diagnostic messages) then unifdef uses native line endings.
218 */
219
220 static const char *newline; /* input file format */
221 static const char newline_unix[] = "\n";
222 static const char newline_crlf[] = "\r\n";
223
224 static Comment_state incomment; /* comment parser state */
225 static Line_state linestate; /* #if line parser state */
226 static Ifstate ifstate[MAXDEPTH]; /* #if processor state */
227 static bool ignoring[MAXDEPTH]; /* ignore comments state */
228 static int stifline[MAXDEPTH]; /* start of current #if */
229 static int depth; /* current #if nesting */
230 static int delcount; /* count of deleted lines */
231 static unsigned blankcount; /* count of blank lines */
232 static unsigned blankmax; /* maximum recent blankcount */
233 static bool constexpr; /* constant #if expression */
234 static bool zerosyms; /* to format symdepth output */
235 static bool firstsym; /* ditto */
236
237 static int exitmode; /* exit status mode */
238 static int exitstat; /* program exit status */
239 static bool altered; /* was this file modified? */
240
241 static void addsym1(bool, bool, char *);
242 static void addsym2(bool, const char *, const char *);
243 static char *astrcat(const char *, const char *);
244 static void cleantemp(void);
245 static void closeio(void);
246 static void debug(const char *, ...);
247 static void debugsym(const char *, const struct macro *);
248 static bool defundef(void);
249 static void defundefile(const char *);
250 static void done(void);
251 static void error(const char *);
252 static struct macro *findsym(const char **);
253 static void flushline(bool);
254 static void hashline(void);
255 static void help(void);
256 static Linetype ifeval(const char **);
257 static void ignoreoff(void);
258 static void ignoreon(void);
259 static void indirectsym(void);
260 static void keywordedit(const char *);
261 static const char *matchsym(const char *, const char *);
262 static void nest(void);
263 static Linetype parseline(void);
264 static void process(void);
265 static void processinout(const char *, const char *);
266 static const char *skipargs(const char *);
267 static const char *skipcomment(const char *);
268 static const char *skiphash(void);
269 static const char *skipline(const char *);
270 static const char *skipsym(const char *);
271 static void state(Ifstate);
272 static void unnest(void);
273 static void usage(void);
274 static void version(void);
275 static const char *xstrdup(const char *, const char *);
276
277 #define endsym(c) (!isalnum((unsigned char)c) && c != '_')
278
279 static FILE *
mktempmode(char * tmp,int mode)280 mktempmode(char *tmp, int mode)
281 {
282 int rc, fd;
283
284 mode &= (S_IRWXU|S_IRWXG|S_IRWXO);
285 fd = mkstemp(tmp);
286 if (fd < 0)
287 err(2, "can't create %s", tmp);
288 rc = fchmod(fd, mode);
289 if (rc < 0)
290 err(2, "can't fchmod %s mode=0o%o", tmp, mode);
291 return (fdopen(fd, "wb"));
292 }
293
294 /*
295 * The main program.
296 */
297 int
main(int argc,char * argv[])298 main(int argc, char *argv[])
299 {
300 int opt;
301
302 while ((opt = getopt(argc, argv, "i:D:U:f:I:M:o:x:bBcdehKklmnsStV")) != -1)
303 switch (opt) {
304 case 'i': /* treat stuff controlled by these symbols as text */
305 /*
306 * For strict backwards-compatibility the U or D
307 * should be immediately after the -i but it doesn't
308 * matter much if we relax that requirement.
309 */
310 opt = *optarg++;
311 if (opt == 'D')
312 addsym1(true, true, optarg);
313 else if (opt == 'U')
314 addsym1(true, false, optarg);
315 else
316 usage();
317 break;
318 case 'D': /* define a symbol */
319 addsym1(false, true, optarg);
320 break;
321 case 'U': /* undef a symbol */
322 addsym1(false, false, optarg);
323 break;
324 case 'I': /* no-op for compatibility with cpp */
325 break;
326 case 'b': /* blank deleted lines instead of omitting them */
327 case 'l': /* backwards compatibility */
328 lnblank = true;
329 break;
330 case 'B': /* compress blank lines around removed section */
331 compblank = true;
332 break;
333 case 'c': /* treat -D as -U and vice versa */
334 complement = true;
335 break;
336 case 'd':
337 debugging = true;
338 break;
339 case 'e': /* fewer errors from dodgy lines */
340 iocccok = true;
341 break;
342 case 'f': /* definitions file */
343 defundefile(optarg);
344 break;
345 case 'h':
346 help();
347 break;
348 case 'K': /* keep ambiguous #ifs */
349 strictlogic = true;
350 break;
351 case 'k': /* process constant #ifs */
352 killconsts = true;
353 break;
354 case 'm': /* modify in place */
355 inplace = true;
356 break;
357 case 'M': /* modify in place and keep backup */
358 inplace = true;
359 if (strlen(optarg) > 0)
360 backext = optarg;
361 break;
362 case 'n': /* add #line directive after deleted lines */
363 lnnum = true;
364 break;
365 case 'o': /* output to a file */
366 ofilename = optarg;
367 break;
368 case 's': /* only output list of symbols that control #ifs */
369 symlist = true;
370 break;
371 case 'S': /* list symbols with their nesting depth */
372 symlist = symdepth = true;
373 break;
374 case 't': /* don't parse C comments */
375 text = true;
376 break;
377 case 'V':
378 version();
379 break;
380 case 'x':
381 exitmode = atoi(optarg);
382 if(exitmode < 0 || exitmode > 2)
383 usage();
384 break;
385 default:
386 usage();
387 }
388 argc -= optind;
389 argv += optind;
390 if (compblank && lnblank)
391 errx(2, "-B and -b are mutually exclusive");
392 if (symlist && (ofilename != NULL || inplace || argc > 1))
393 errx(2, "-s only works with one input file");
394 if (argc > 1 && ofilename != NULL)
395 errx(2, "-o cannot be used with multiple input files");
396 if (argc > 1 && !inplace)
397 errx(2, "multiple input files require -m or -M");
398 if (argc == 0 && inplace)
399 errx(2, "-m requires an input file");
400 if (argc == 0)
401 argc = 1;
402 if (argc == 1 && !inplace && ofilename == NULL)
403 ofilename = "-";
404 indirectsym();
405
406 atexit(cleantemp);
407 if (ofilename != NULL)
408 processinout(*argv, ofilename);
409 else while (argc-- > 0) {
410 processinout(*argv, *argv);
411 argv++;
412 }
413 switch(exitmode) {
414 case(0): exit(exitstat);
415 case(1): exit(!exitstat);
416 case(2): exit(0);
417 default: abort(); /* bug */
418 }
419 }
420
421 /*
422 * File logistics.
423 */
424 static void
processinout(const char * ifn,const char * ofn)425 processinout(const char *ifn, const char *ofn)
426 {
427 struct stat st;
428
429 if (ifn == NULL || strcmp(ifn, "-") == 0) {
430 filename = "[stdin]";
431 linefile = NULL;
432 input = stdin;
433 } else {
434 filename = ifn;
435 linefile = ifn;
436 input = fopen(ifn, "rb");
437 if (input == NULL)
438 err(2, "can't open %s", ifn);
439 }
440 if (strcmp(ofn, "-") == 0) {
441 output = stdout;
442 process();
443 return;
444 }
445 if (stat(ofn, &st) < 0) {
446 output = fopen(ofn, "wb");
447 if (output == NULL)
448 err(2, "can't create %s", ofn);
449 process();
450 return;
451 }
452
453 tempname = astrcat(ofn, ".XXXXXX");
454 output = mktempmode(tempname, st.st_mode);
455 if (output == NULL)
456 err(2, "can't create %s", tempname);
457
458 process();
459
460 if (backext != NULL) {
461 char *backname = astrcat(ofn, backext);
462 if (rename(ofn, backname) < 0)
463 err(2, "can't rename \"%s\" to \"%s\"", ofn, backname);
464 free(backname);
465 }
466 /* leave file unmodified if unifdef made no changes */
467 if (!altered && backext == NULL) {
468 if (remove(tempname) < 0)
469 warn("can't remove \"%s\"", tempname);
470 } else if (rename(tempname, ofn) < 0)
471 err(2, "can't rename \"%s\" to \"%s\"", tempname, ofn);
472 free(tempname);
473 tempname = NULL;
474 }
475
476 /*
477 * For cleaning up if there is an error.
478 */
479 static void
cleantemp(void)480 cleantemp(void)
481 {
482 if (tempname != NULL)
483 remove(tempname);
484 }
485
486 /*
487 * Self-identification functions.
488 */
489
490 static void
version(void)491 version(void)
492 {
493 const char *c = copyright;
494 for (;;) {
495 while (*++c != '$')
496 if (*c == '\0')
497 exit(0);
498 while (*++c != '$')
499 putc(*c, stderr);
500 putc('\n', stderr);
501 }
502 }
503
504 static void
synopsis(FILE * fp)505 synopsis(FILE *fp)
506 {
507 fprintf(fp,
508 "usage: unifdef [-bBcdehKkmnsStV] [-x{012}] [-Mext] [-opath] \\\n"
509 " [-[i]Dsym[=val]] [-[i]Usym] [-fpath] ... [file] ...\n");
510 }
511
512 static void
usage(void)513 usage(void)
514 {
515 synopsis(stderr);
516 exit(2);
517 }
518
519 static void
help(void)520 help(void)
521 {
522 synopsis(stdout);
523 printf(
524 " -Dsym=val define preprocessor symbol with given value\n"
525 " -Dsym define preprocessor symbol with value 1\n"
526 " -Usym preprocessor symbol is undefined\n"
527 " -iDsym=val \\ ignore C strings and comments\n"
528 " -iDsym ) in sections controlled by these\n"
529 " -iUsym / preprocessor symbols\n"
530 " -fpath file containing #define and #undef directives\n"
531 " -b blank lines instead of deleting them\n"
532 " -B compress blank lines around deleted section\n"
533 " -c complement (invert) keep vs. delete\n"
534 " -d debugging mode\n"
535 " -e ignore multiline preprocessor directives\n"
536 " -h print help\n"
537 " -Ipath extra include file path (ignored)\n"
538 " -K disable && and || short-circuiting\n"
539 " -k process constant #if expressions\n"
540 " -Mext modify in place and keep backups\n"
541 " -m modify input files in place\n"
542 " -n add #line directives to output\n"
543 " -opath output file name\n"
544 " -S list #if control symbols with nesting\n"
545 " -s list #if control symbols\n"
546 " -t ignore C strings and comments\n"
547 " -V print version\n"
548 " -x{012} exit status mode\n"
549 );
550 exit(0);
551 }
552
553 /*
554 * A state transition function alters the global #if processing state
555 * in a particular way. The table below is indexed by the current
556 * processing state and the type of the current line.
557 *
558 * Nesting is handled by keeping a stack of states; some transition
559 * functions increase or decrease the depth. They also maintain the
560 * ignore state on a stack. In some complicated cases they have to
561 * alter the preprocessor directive, as follows.
562 *
563 * When we have processed a group that starts off with a known-false
564 * #if/#elif sequence (which has therefore been deleted) followed by a
565 * #elif that we don't understand and therefore must keep, we edit the
566 * latter into a #if to keep the nesting correct. We use memcpy() to
567 * overwrite the 4 byte token "elif" with "if " without a '\0' byte.
568 *
569 * When we find a true #elif in a group, the following block will
570 * always be kept and the rest of the sequence after the next #elif or
571 * #else will be discarded. We edit the #elif into a #else and the
572 * following directive to #endif since this has the desired behaviour.
573 *
574 * "Dodgy" directives are split across multiple lines, the most common
575 * example being a multi-line comment hanging off the right of the
576 * directive. We can handle them correctly only if there is no change
577 * from printing to dropping (or vice versa) caused by that directive.
578 * If the directive is the first of a group we have a choice between
579 * failing with an error, or passing it through unchanged instead of
580 * evaluating it. The latter is not the default to avoid questions from
581 * users about unifdef unexpectedly leaving behind preprocessor directives.
582 */
583 typedef void state_fn(void);
584
585 /* report an error */
Eelif(void)586 static void Eelif (void) { error("Inappropriate #elif"); }
Eelse(void)587 static void Eelse (void) { error("Inappropriate #else"); }
Eendif(void)588 static void Eendif(void) { error("Inappropriate #endif"); }
Eeof(void)589 static void Eeof (void) { error("Premature EOF"); }
Eioccc(void)590 static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
591 /* plain line handling */
print(void)592 static void print (void) { flushline(true); }
drop(void)593 static void drop (void) { flushline(false); }
594 /* output lacks group's start line */
Strue(void)595 static void Strue (void) { drop(); ignoreoff(); state(IS_TRUE_PREFIX); }
Sfalse(void)596 static void Sfalse(void) { drop(); ignoreoff(); state(IS_FALSE_PREFIX); }
Selse(void)597 static void Selse (void) { drop(); state(IS_TRUE_ELSE); }
598 /* print/pass this block */
Pelif(void)599 static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
Pelse(void)600 static void Pelse (void) { print(); state(IS_PASS_ELSE); }
Pendif(void)601 static void Pendif(void) { print(); unnest(); }
602 /* discard this block */
Dfalse(void)603 static void Dfalse(void) { drop(); ignoreoff(); state(IS_FALSE_TRAILER); }
Delif(void)604 static void Delif (void) { drop(); ignoreoff(); state(IS_FALSE_MIDDLE); }
Delse(void)605 static void Delse (void) { drop(); state(IS_FALSE_ELSE); }
Dendif(void)606 static void Dendif(void) { drop(); unnest(); }
607 /* first line of group */
Fdrop(void)608 static void Fdrop (void) { nest(); Dfalse(); }
Fpass(void)609 static void Fpass (void) { nest(); Pelif(); }
Ftrue(void)610 static void Ftrue (void) { nest(); Strue(); }
Ffalse(void)611 static void Ffalse(void) { nest(); Sfalse(); }
612 /* variable pedantry for obfuscated lines */
Oiffy(void)613 static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); }
Oif(void)614 static void Oif (void) { if (!iocccok) Eioccc(); Fpass(); }
Oelif(void)615 static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); }
616 /* ignore comments in this block */
Idrop(void)617 static void Idrop (void) { Fdrop(); ignoreon(); }
Itrue(void)618 static void Itrue (void) { Ftrue(); ignoreon(); }
Ifalse(void)619 static void Ifalse(void) { Ffalse(); ignoreon(); }
620 /* modify this line */
Mpass(void)621 static void Mpass (void) { memcpy(keyword, "if ", 4); Pelif(); }
Mtrue(void)622 static void Mtrue (void) { keywordedit("else"); state(IS_TRUE_MIDDLE); }
Melif(void)623 static void Melif (void) { keywordedit("endif"); state(IS_FALSE_TRAILER); }
Melse(void)624 static void Melse (void) { keywordedit("endif"); state(IS_FALSE_ELSE); }
625
626 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
627 /* IS_OUTSIDE */
628 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
629 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif,
630 print, done, abort },
631 /* IS_FALSE_PREFIX */
632 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
633 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
634 drop, Eeof, abort },
635 /* IS_TRUE_PREFIX */
636 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
637 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
638 print, Eeof, abort },
639 /* IS_PASS_MIDDLE */
640 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
641 Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif,
642 print, Eeof, abort },
643 /* IS_FALSE_MIDDLE */
644 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
645 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
646 drop, Eeof, abort },
647 /* IS_TRUE_MIDDLE */
648 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
649 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
650 print, Eeof, abort },
651 /* IS_PASS_ELSE */
652 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
653 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif,
654 print, Eeof, abort },
655 /* IS_FALSE_ELSE */
656 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
657 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
658 drop, Eeof, abort },
659 /* IS_TRUE_ELSE */
660 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
661 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc,
662 print, Eeof, abort },
663 /* IS_FALSE_TRAILER */
664 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
665 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
666 drop, Eeof, abort }
667 /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF
668 TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY)
669 PLAIN EOF ERROR */
670 };
671
672 /*
673 * State machine utility functions
674 */
675 static void
ignoreoff(void)676 ignoreoff(void)
677 {
678 if (depth == 0)
679 abort(); /* bug */
680 ignoring[depth] = ignoring[depth-1];
681 }
682 static void
ignoreon(void)683 ignoreon(void)
684 {
685 ignoring[depth] = true;
686 }
687 static void
keywordedit(const char * replacement)688 keywordedit(const char *replacement)
689 {
690 snprintf(keyword, tline + sizeof(tline) - keyword,
691 "%s%s", replacement, newline);
692 altered = true;
693 print();
694 }
695 static void
nest(void)696 nest(void)
697 {
698 if (depth > MAXDEPTH-1)
699 abort(); /* bug */
700 if (depth == MAXDEPTH-1)
701 error("Too many levels of nesting");
702 depth += 1;
703 stifline[depth] = linenum;
704 }
705 static void
unnest(void)706 unnest(void)
707 {
708 if (depth == 0)
709 abort(); /* bug */
710 depth -= 1;
711 }
712 static void
state(Ifstate is)713 state(Ifstate is)
714 {
715 ifstate[depth] = is;
716 }
717
718 /*
719 * The last state transition function. When this is called,
720 * lineval == LT_EOF, so the process() loop will terminate.
721 */
722 static void
done(void)723 done(void)
724 {
725 if (incomment)
726 error("EOF in comment");
727 closeio();
728 }
729
730 /*
731 * Write a line to the output or not, according to command line options.
732 * If writing fails, closeio() will print the error and exit.
733 */
734 static void
flushline(bool keep)735 flushline(bool keep)
736 {
737 if (symlist)
738 return;
739 if (keep ^ complement) {
740 bool blankline = tline[strspn(tline, " \t\r\n")] == '\0';
741 if (blankline && compblank && blankcount != blankmax) {
742 delcount += 1;
743 blankcount += 1;
744 } else {
745 if (lnnum && delcount > 0)
746 hashline();
747 if (fputs(tline, output) == EOF)
748 closeio();
749 delcount = 0;
750 blankmax = blankcount = blankline ? blankcount + 1 : 0;
751 }
752 } else {
753 if (lnblank && fputs(newline, output) == EOF)
754 closeio();
755 altered = true;
756 delcount += 1;
757 blankcount = 0;
758 }
759 if (debugging && fflush(output) == EOF)
760 closeio();
761 }
762
763 /*
764 * Format of #line directives depends on whether we know the input filename.
765 */
766 static void
hashline(void)767 hashline(void)
768 {
769 int e;
770
771 if (linefile == NULL)
772 e = fprintf(output, "#line %d%s", linenum, newline);
773 else
774 e = fprintf(output, "#line %d \"%s\"%s",
775 linenum, linefile, newline);
776 if (e < 0)
777 closeio();
778 }
779
780 /*
781 * Flush the output and handle errors.
782 */
783 static void
closeio(void)784 closeio(void)
785 {
786 /* Tidy up after findsym(). */
787 if (symdepth && !zerosyms)
788 printf("\n");
789 if (output != NULL && (ferror(output) || fclose(output) == EOF))
790 err(2, "%s: can't write to output", filename);
791 fclose(input);
792 }
793
794 /*
795 * The driver for the state machine.
796 */
797 static void
process(void)798 process(void)
799 {
800 Linetype lineval = LT_PLAIN;
801 /* When compressing blank lines, act as if the file
802 is preceded by a large number of blank lines. */
803 blankmax = blankcount = 1000;
804 zerosyms = true;
805 newline = NULL;
806 linenum = 0;
807 altered = false;
808 while (lineval != LT_EOF) {
809 lineval = parseline();
810 trans_table[ifstate[depth]][lineval]();
811 debug("process line %d %s -> %s depth %d",
812 linenum, linetype_name[lineval],
813 ifstate_name[ifstate[depth]], depth);
814 }
815 exitstat |= altered;
816 }
817
818 /*
819 * Parse a line and determine its type. We keep the preprocessor line
820 * parser state between calls in the global variable linestate, with
821 * help from skipcomment().
822 */
823 static Linetype
parseline(void)824 parseline(void)
825 {
826 const char *cp;
827 struct macro *cursym;
828 Linetype retval;
829 Comment_state wascomment;
830
831 wascomment = incomment;
832 cp = skiphash();
833 if (cp == NULL)
834 return (LT_EOF);
835 if (newline == NULL) {
836 if (strrchr(tline, '\n') == strrchr(tline, '\r') + 1)
837 newline = newline_crlf;
838 else
839 newline = newline_unix;
840 }
841 if (*cp == '\0') {
842 retval = LT_PLAIN;
843 goto done;
844 }
845 keyword = tline + (cp - tline);
846 if ((cp = matchsym("ifdef", keyword)) != NULL ||
847 (cp = matchsym("ifndef", keyword)) != NULL) {
848 cp = skipcomment(cp);
849 if ((cursym = findsym(&cp)) == NULL)
850 retval = LT_IF;
851 else {
852 retval = (keyword[2] == 'n')
853 ? LT_FALSE : LT_TRUE;
854 if (cursym->value == NULL)
855 retval = (retval == LT_TRUE)
856 ? LT_FALSE : LT_TRUE;
857 if (cursym->ignore)
858 retval = (retval == LT_TRUE)
859 ? LT_TRUEI : LT_FALSEI;
860 }
861 } else if ((cp = matchsym("if", keyword)) != NULL)
862 retval = ifeval(&cp);
863 else if ((cp = matchsym("elif", keyword)) != NULL)
864 retval = linetype_if2elif(ifeval(&cp));
865 else if ((cp = matchsym("else", keyword)) != NULL)
866 retval = LT_ELSE;
867 else if ((cp = matchsym("endif", keyword)) != NULL)
868 retval = LT_ENDIF;
869 else {
870 cp = skipsym(keyword);
871 /* no way can we deal with a continuation inside a keyword */
872 if (strncmp(cp, "\\\r\n", 3) == 0 ||
873 strncmp(cp, "\\\n", 2) == 0)
874 Eioccc();
875 cp = skipline(cp);
876 retval = LT_PLAIN;
877 goto done;
878 }
879 cp = skipcomment(cp);
880 if (*cp != '\0') {
881 cp = skipline(cp);
882 if (retval == LT_TRUE || retval == LT_FALSE ||
883 retval == LT_TRUEI || retval == LT_FALSEI)
884 retval = LT_IF;
885 if (retval == LT_ELTRUE || retval == LT_ELFALSE)
886 retval = LT_ELIF;
887 }
888 /* the following can happen if the last line of the file lacks a
889 newline or if there is too much whitespace in a directive */
890 if (linestate == LS_HASH) {
891 long len = cp - tline;
892 if (fgets(tline + len, MAXLINE - len, input) == NULL) {
893 if (ferror(input))
894 err(2, "can't read %s", filename);
895 /* append the missing newline at eof */
896 strcpy(tline + len, newline);
897 cp += strlen(newline);
898 linestate = LS_START;
899 } else {
900 linestate = LS_DIRTY;
901 }
902 }
903 if (retval != LT_PLAIN && (wascomment || linestate != LS_START)) {
904 retval = linetype_2dodgy(retval);
905 linestate = LS_DIRTY;
906 }
907 done:
908 debug("parser line %d state %s comment %s line", linenum,
909 comment_name[incomment], linestate_name[linestate]);
910 return (retval);
911 }
912
913 /*
914 * These are the binary operators that are supported by the expression
915 * evaluator.
916 */
op_strict(long * p,long v,Linetype at,Linetype bt)917 static Linetype op_strict(long *p, long v, Linetype at, Linetype bt) {
918 if(at == LT_IF || bt == LT_IF) return (LT_IF);
919 return (*p = v, v ? LT_TRUE : LT_FALSE);
920 }
op_lt(long * p,Linetype at,long a,Linetype bt,long b)921 static Linetype op_lt(long *p, Linetype at, long a, Linetype bt, long b) {
922 return op_strict(p, a < b, at, bt);
923 }
op_gt(long * p,Linetype at,long a,Linetype bt,long b)924 static Linetype op_gt(long *p, Linetype at, long a, Linetype bt, long b) {
925 return op_strict(p, a > b, at, bt);
926 }
op_le(long * p,Linetype at,long a,Linetype bt,long b)927 static Linetype op_le(long *p, Linetype at, long a, Linetype bt, long b) {
928 return op_strict(p, a <= b, at, bt);
929 }
op_ge(long * p,Linetype at,long a,Linetype bt,long b)930 static Linetype op_ge(long *p, Linetype at, long a, Linetype bt, long b) {
931 return op_strict(p, a >= b, at, bt);
932 }
op_eq(long * p,Linetype at,long a,Linetype bt,long b)933 static Linetype op_eq(long *p, Linetype at, long a, Linetype bt, long b) {
934 return op_strict(p, a == b, at, bt);
935 }
op_ne(long * p,Linetype at,long a,Linetype bt,long b)936 static Linetype op_ne(long *p, Linetype at, long a, Linetype bt, long b) {
937 return op_strict(p, a != b, at, bt);
938 }
op_or(long * p,Linetype at,long a,Linetype bt,long b)939 static Linetype op_or(long *p, Linetype at, long a, Linetype bt, long b) {
940 if (!strictlogic && (at == LT_TRUE || bt == LT_TRUE))
941 return (*p = 1, LT_TRUE);
942 return op_strict(p, a || b, at, bt);
943 }
op_and(long * p,Linetype at,long a,Linetype bt,long b)944 static Linetype op_and(long *p, Linetype at, long a, Linetype bt, long b) {
945 if (!strictlogic && (at == LT_FALSE || bt == LT_FALSE))
946 return (*p = 0, LT_FALSE);
947 return op_strict(p, a && b, at, bt);
948 }
op_blsh(long * p,Linetype at,long a,Linetype bt,long b)949 static Linetype op_blsh(long *p, Linetype at, long a, Linetype bt, long b) {
950 return op_strict(p, a << b, at, bt);
951 }
op_brsh(long * p,Linetype at,long a,Linetype bt,long b)952 static Linetype op_brsh(long *p, Linetype at, long a, Linetype bt, long b) {
953 return op_strict(p, a >> b, at, bt);
954 }
op_add(long * p,Linetype at,long a,Linetype bt,long b)955 static Linetype op_add(long *p, Linetype at, long a, Linetype bt, long b) {
956 return op_strict(p, a + b, at, bt);
957 }
op_sub(long * p,Linetype at,long a,Linetype bt,long b)958 static Linetype op_sub(long *p, Linetype at, long a, Linetype bt, long b) {
959 return op_strict(p, a - b, at, bt);
960 }
op_mul(long * p,Linetype at,long a,Linetype bt,long b)961 static Linetype op_mul(long *p, Linetype at, long a, Linetype bt, long b) {
962 return op_strict(p, a * b, at, bt);
963 }
op_div(long * p,Linetype at,long a,Linetype bt,long b)964 static Linetype op_div(long *p, Linetype at, long a, Linetype bt, long b) {
965 if (bt != LT_TRUE) {
966 debug("eval division by zero");
967 return (LT_ERROR);
968 }
969 return op_strict(p, a / b, at, bt);
970 }
op_mod(long * p,Linetype at,long a,Linetype bt,long b)971 static Linetype op_mod(long *p, Linetype at, long a, Linetype bt, long b) {
972 return op_strict(p, a % b, at, bt);
973 }
op_bor(long * p,Linetype at,long a,Linetype bt,long b)974 static Linetype op_bor(long *p, Linetype at, long a, Linetype bt, long b) {
975 return op_strict(p, a | b, at, bt);
976 }
op_bxor(long * p,Linetype at,long a,Linetype bt,long b)977 static Linetype op_bxor(long *p, Linetype at, long a, Linetype bt, long b) {
978 return op_strict(p, a ^ b, at, bt);
979 }
op_band(long * p,Linetype at,long a,Linetype bt,long b)980 static Linetype op_band(long *p, Linetype at, long a, Linetype bt, long b) {
981 return op_strict(p, a & b, at, bt);
982 }
983
984 /*
985 * An evaluation function takes three arguments, as follows: (1) a pointer to
986 * an element of the precedence table which lists the operators at the current
987 * level of precedence; (2) a pointer to an integer which will receive the
988 * value of the expression; and (3) a pointer to a char* that points to the
989 * expression to be evaluated and that is updated to the end of the expression
990 * when evaluation is complete. The function returns LT_FALSE if the value of
991 * the expression is zero, LT_TRUE if it is non-zero, LT_IF if the expression
992 * depends on an unknown symbol, or LT_ERROR if there is a parse failure.
993 */
994 struct ops;
995
996 typedef Linetype eval_fn(const struct ops *, long *, const char **);
997
998 static eval_fn eval_table, eval_unary;
999
1000 /*
1001 * The precedence table. Expressions involving binary operators are evaluated
1002 * in a table-driven way by eval_table. When it evaluates a subexpression it
1003 * calls the inner function with its first argument pointing to the next
1004 * element of the table. Innermost expressions have special non-table-driven
1005 * handling.
1006 *
1007 * The stop characters help with lexical analysis: an operator is not
1008 * recognized if it is followed by one of the stop characters because
1009 * that would make it a different operator.
1010 */
1011 struct op {
1012 const char *str;
1013 Linetype (*fn)(long *, Linetype, long, Linetype, long);
1014 const char *stop;
1015 };
1016 struct ops {
1017 eval_fn *inner;
1018 struct op op[5];
1019 };
1020 static const struct ops eval_ops[] = {
1021 { eval_table, { { "||", op_or, NULL } } },
1022 { eval_table, { { "&&", op_and, NULL } } },
1023 { eval_table, { { "|", op_bor, "|" } } },
1024 { eval_table, { { "^", op_bxor, NULL } } },
1025 { eval_table, { { "&", op_band, "&" } } },
1026 { eval_table, { { "==", op_eq, NULL },
1027 { "!=", op_ne, NULL } } },
1028 { eval_table, { { "<=", op_le, NULL },
1029 { ">=", op_ge, NULL },
1030 { "<", op_lt, "<=" },
1031 { ">", op_gt, ">=" } } },
1032 { eval_table, { { "<<", op_blsh, NULL },
1033 { ">>", op_brsh, NULL } } },
1034 { eval_table, { { "+", op_add, NULL },
1035 { "-", op_sub, NULL } } },
1036 { eval_unary, { { "*", op_mul, NULL },
1037 { "/", op_div, NULL },
1038 { "%", op_mod, NULL } } },
1039 };
1040
1041 /* Current operator precedence level */
prec(const struct ops * ops)1042 static long prec(const struct ops *ops)
1043 {
1044 return (ops - eval_ops);
1045 }
1046
1047 /*
1048 * Function for evaluating the innermost parts of expressions,
1049 * viz. !expr (expr) number defined(symbol) symbol
1050 * We reset the constexpr flag in the last two cases.
1051 */
1052 static Linetype
eval_unary(const struct ops * ops,long * valp,const char ** cpp)1053 eval_unary(const struct ops *ops, long *valp, const char **cpp)
1054 {
1055 const char *cp;
1056 char *ep;
1057 struct macro *sym;
1058 bool defparen;
1059 Linetype lt;
1060
1061 cp = skipcomment(*cpp);
1062 if (*cp == '!') {
1063 debug("eval%d !", prec(ops));
1064 cp++;
1065 lt = eval_unary(ops, valp, &cp);
1066 if (lt == LT_ERROR)
1067 return (LT_ERROR);
1068 if (lt != LT_IF) {
1069 *valp = !*valp;
1070 lt = *valp ? LT_TRUE : LT_FALSE;
1071 }
1072 } else if (*cp == '~') {
1073 debug("eval%d ~", prec(ops));
1074 cp++;
1075 lt = eval_unary(ops, valp, &cp);
1076 if (lt == LT_ERROR)
1077 return (LT_ERROR);
1078 if (lt != LT_IF) {
1079 *valp = ~(*valp);
1080 lt = *valp ? LT_TRUE : LT_FALSE;
1081 }
1082 } else if (*cp == '-') {
1083 debug("eval%d -", prec(ops));
1084 cp++;
1085 lt = eval_unary(ops, valp, &cp);
1086 if (lt == LT_ERROR)
1087 return (LT_ERROR);
1088 if (lt != LT_IF) {
1089 *valp = -(*valp);
1090 lt = *valp ? LT_TRUE : LT_FALSE;
1091 }
1092 } else if (*cp == '(') {
1093 cp++;
1094 debug("eval%d (", prec(ops));
1095 lt = eval_table(eval_ops, valp, &cp);
1096 if (lt == LT_ERROR)
1097 return (LT_ERROR);
1098 cp = skipcomment(cp);
1099 if (*cp++ != ')')
1100 return (LT_ERROR);
1101 } else if (isdigit((unsigned char)*cp)) {
1102 debug("eval%d number", prec(ops));
1103 *valp = strtol(cp, &ep, 0);
1104 if (ep == cp)
1105 return (LT_ERROR);
1106 lt = *valp ? LT_TRUE : LT_FALSE;
1107 cp = ep;
1108 } else if (matchsym("defined", cp) != NULL) {
1109 cp = skipcomment(cp+7);
1110 if (*cp == '(') {
1111 cp = skipcomment(cp+1);
1112 defparen = true;
1113 } else {
1114 defparen = false;
1115 }
1116 sym = findsym(&cp);
1117 cp = skipcomment(cp);
1118 if (defparen && *cp++ != ')') {
1119 debug("eval%d defined missing ')'", prec(ops));
1120 return (LT_ERROR);
1121 }
1122 if (sym == NULL) {
1123 debug("eval%d defined unknown", prec(ops));
1124 lt = LT_IF;
1125 } else {
1126 debug("eval%d defined %s", prec(ops), sym->name);
1127 *valp = (sym->value != NULL);
1128 lt = *valp ? LT_TRUE : LT_FALSE;
1129 }
1130 constexpr = false;
1131 } else if (!endsym(*cp)) {
1132 debug("eval%d symbol", prec(ops));
1133 sym = findsym(&cp);
1134 if (sym == NULL) {
1135 lt = LT_IF;
1136 cp = skipargs(cp);
1137 } else if (sym->value == NULL) {
1138 *valp = 0;
1139 lt = LT_FALSE;
1140 } else {
1141 *valp = strtol(sym->value, &ep, 0);
1142 if (*ep != '\0' || ep == sym->value)
1143 return (LT_ERROR);
1144 lt = *valp ? LT_TRUE : LT_FALSE;
1145 cp = skipargs(cp);
1146 }
1147 constexpr = false;
1148 } else {
1149 debug("eval%d bad expr", prec(ops));
1150 return (LT_ERROR);
1151 }
1152
1153 *cpp = cp;
1154 debug("eval%d = %d", prec(ops), *valp);
1155 return (lt);
1156 }
1157
1158 /*
1159 * Table-driven evaluation of binary operators.
1160 */
1161 static Linetype
eval_table(const struct ops * ops,long * valp,const char ** cpp)1162 eval_table(const struct ops *ops, long *valp, const char **cpp)
1163 {
1164 const struct op *op;
1165 const char *cp;
1166 long val = 0;
1167 Linetype lt, rt;
1168
1169 debug("eval%d", prec(ops));
1170 cp = *cpp;
1171 lt = ops->inner(ops+1, valp, &cp);
1172 if (lt == LT_ERROR)
1173 return (LT_ERROR);
1174 for (;;) {
1175 cp = skipcomment(cp);
1176 for (op = ops->op; op->str != NULL; op++) {
1177 if (strncmp(cp, op->str, strlen(op->str)) == 0) {
1178 /* assume only one-char operators have stop chars */
1179 if (op->stop != NULL && cp[1] != '\0' &&
1180 strchr(op->stop, cp[1]) != NULL)
1181 continue;
1182 else
1183 break;
1184 }
1185 }
1186 if (op->str == NULL)
1187 break;
1188 cp += strlen(op->str);
1189 debug("eval%d %s", prec(ops), op->str);
1190 rt = ops->inner(ops+1, &val, &cp);
1191 if (rt == LT_ERROR)
1192 return (LT_ERROR);
1193 lt = op->fn(valp, lt, *valp, rt, val);
1194 }
1195
1196 *cpp = cp;
1197 debug("eval%d = %d", prec(ops), *valp);
1198 debug("eval%d lt = %s", prec(ops), linetype_name[lt]);
1199 return (lt);
1200 }
1201
1202 /*
1203 * Evaluate the expression on a #if or #elif line. If we can work out
1204 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
1205 * return just a generic LT_IF.
1206 */
1207 static Linetype
ifeval(const char ** cpp)1208 ifeval(const char **cpp)
1209 {
1210 Linetype ret;
1211 long val = 0;
1212
1213 debug("eval %s", *cpp);
1214 constexpr = killconsts ? false : true;
1215 ret = eval_table(eval_ops, &val, cpp);
1216 debug("eval = %d", val);
1217 return (constexpr ? LT_IF : ret == LT_ERROR ? LT_IF : ret);
1218 }
1219
1220 /*
1221 * Read a line and examine its initial part to determine if it is a
1222 * preprocessor directive. Returns NULL on EOF, or a pointer to a
1223 * preprocessor directive name, or a pointer to the zero byte at the
1224 * end of the line.
1225 */
1226 static const char *
skiphash(void)1227 skiphash(void)
1228 {
1229 const char *cp;
1230
1231 linenum++;
1232 if (fgets(tline, MAXLINE, input) == NULL) {
1233 if (ferror(input))
1234 err(2, "can't read %s", filename);
1235 else
1236 return (NULL);
1237 }
1238 cp = skipcomment(tline);
1239 if (linestate == LS_START && *cp == '#') {
1240 linestate = LS_HASH;
1241 return (skipcomment(cp + 1));
1242 } else if (*cp == '\0') {
1243 return (cp);
1244 } else {
1245 return (skipline(cp));
1246 }
1247 }
1248
1249 /*
1250 * Mark a line dirty and consume the rest of it, keeping track of the
1251 * lexical state.
1252 */
1253 static const char *
skipline(const char * cp)1254 skipline(const char *cp)
1255 {
1256 const char *pcp;
1257 if (*cp != '\0')
1258 linestate = LS_DIRTY;
1259 while (*cp != '\0') {
1260 cp = skipcomment(pcp = cp);
1261 if (pcp == cp)
1262 cp++;
1263 }
1264 return (cp);
1265 }
1266
1267 /*
1268 * Skip over comments, strings, and character literals and stop at the
1269 * next character position that is not whitespace. Between calls we keep
1270 * the comment state in the global variable incomment, and we also adjust
1271 * the global variable linestate when we see a newline.
1272 * XXX: doesn't cope with the buffer splitting inside a state transition.
1273 */
1274 static const char *
skipcomment(const char * cp)1275 skipcomment(const char *cp)
1276 {
1277 if (text || ignoring[depth]) {
1278 for (; isspace((unsigned char)*cp); cp++)
1279 if (*cp == '\n')
1280 linestate = LS_START;
1281 return (cp);
1282 }
1283 while (*cp != '\0')
1284 /* don't reset to LS_START after a line continuation */
1285 if (strncmp(cp, "\\\r\n", 3) == 0)
1286 cp += 3;
1287 else if (strncmp(cp, "\\\n", 2) == 0)
1288 cp += 2;
1289 else switch (incomment) {
1290 case NO_COMMENT:
1291 if (strncmp(cp, "/\\\r\n", 4) == 0) {
1292 incomment = STARTING_COMMENT;
1293 cp += 4;
1294 } else if (strncmp(cp, "/\\\n", 3) == 0) {
1295 incomment = STARTING_COMMENT;
1296 cp += 3;
1297 } else if (strncmp(cp, "/*", 2) == 0) {
1298 incomment = C_COMMENT;
1299 cp += 2;
1300 } else if (strncmp(cp, "//", 2) == 0) {
1301 incomment = CXX_COMMENT;
1302 cp += 2;
1303 } else if (strncmp(cp, "\'", 1) == 0) {
1304 incomment = CHAR_LITERAL;
1305 linestate = LS_DIRTY;
1306 cp += 1;
1307 } else if (strncmp(cp, "\"", 1) == 0) {
1308 incomment = STRING_LITERAL;
1309 linestate = LS_DIRTY;
1310 cp += 1;
1311 } else if (strncmp(cp, "\n", 1) == 0) {
1312 linestate = LS_START;
1313 cp += 1;
1314 } else if (strchr(" \r\t", *cp) != NULL) {
1315 cp += 1;
1316 } else
1317 return (cp);
1318 continue;
1319 case CXX_COMMENT:
1320 if (strncmp(cp, "\n", 1) == 0) {
1321 incomment = NO_COMMENT;
1322 linestate = LS_START;
1323 }
1324 cp += 1;
1325 continue;
1326 case CHAR_LITERAL:
1327 case STRING_LITERAL:
1328 if ((incomment == CHAR_LITERAL && cp[0] == '\'') ||
1329 (incomment == STRING_LITERAL && cp[0] == '\"')) {
1330 incomment = NO_COMMENT;
1331 cp += 1;
1332 } else if (cp[0] == '\\') {
1333 if (cp[1] == '\0')
1334 cp += 1;
1335 else
1336 cp += 2;
1337 } else if (strncmp(cp, "\n", 1) == 0) {
1338 if (incomment == CHAR_LITERAL)
1339 error("Unterminated char literal");
1340 else
1341 error("Unterminated string literal");
1342 } else
1343 cp += 1;
1344 continue;
1345 case C_COMMENT:
1346 if (strncmp(cp, "*\\\r\n", 4) == 0) {
1347 incomment = FINISHING_COMMENT;
1348 cp += 4;
1349 } else if (strncmp(cp, "*\\\n", 3) == 0) {
1350 incomment = FINISHING_COMMENT;
1351 cp += 3;
1352 } else if (strncmp(cp, "*/", 2) == 0) {
1353 incomment = NO_COMMENT;
1354 cp += 2;
1355 } else
1356 cp += 1;
1357 continue;
1358 case STARTING_COMMENT:
1359 if (*cp == '*') {
1360 incomment = C_COMMENT;
1361 cp += 1;
1362 } else if (*cp == '/') {
1363 incomment = CXX_COMMENT;
1364 cp += 1;
1365 } else {
1366 incomment = NO_COMMENT;
1367 linestate = LS_DIRTY;
1368 }
1369 continue;
1370 case FINISHING_COMMENT:
1371 if (*cp == '/') {
1372 incomment = NO_COMMENT;
1373 cp += 1;
1374 } else
1375 incomment = C_COMMENT;
1376 continue;
1377 default:
1378 abort(); /* bug */
1379 }
1380 return (cp);
1381 }
1382
1383 /*
1384 * Skip macro arguments.
1385 */
1386 static const char *
skipargs(const char * cp)1387 skipargs(const char *cp)
1388 {
1389 const char *ocp = cp;
1390 int level = 0;
1391 cp = skipcomment(cp);
1392 if (*cp != '(')
1393 return (cp);
1394 do {
1395 if (*cp == '(')
1396 level++;
1397 if (*cp == ')')
1398 level--;
1399 cp = skipcomment(cp+1);
1400 } while (level != 0 && *cp != '\0');
1401 if (level == 0)
1402 return (cp);
1403 else
1404 /* Rewind and re-detect the syntax error later. */
1405 return (ocp);
1406 }
1407
1408 /*
1409 * Skip over an identifier.
1410 */
1411 static const char *
skipsym(const char * cp)1412 skipsym(const char *cp)
1413 {
1414 while (!endsym(*cp))
1415 ++cp;
1416 return (cp);
1417 }
1418
1419 /*
1420 * Skip whitespace and take a copy of any following identifier.
1421 */
1422 static const char *
getsym(const char ** cpp)1423 getsym(const char **cpp)
1424 {
1425 const char *cp = *cpp, *sym;
1426
1427 cp = skipcomment(cp);
1428 cp = skipsym(sym = cp);
1429 if (cp == sym)
1430 return NULL;
1431 *cpp = cp;
1432 return (xstrdup(sym, cp));
1433 }
1434
1435 /*
1436 * Check that s (a symbol) matches the start of t, and that the
1437 * following character in t is not a symbol character. Returns a
1438 * pointer to the following character in t if there is a match,
1439 * otherwise NULL.
1440 */
1441 static const char *
matchsym(const char * s,const char * t)1442 matchsym(const char *s, const char *t)
1443 {
1444 while (*s != '\0' && *t != '\0')
1445 if (*s != *t)
1446 return (NULL);
1447 else
1448 ++s, ++t;
1449 if (*s == '\0' && endsym(*t))
1450 return(t);
1451 else
1452 return(NULL);
1453 }
1454
1455 /*
1456 * Look for the symbol in the symbol table. If it is found, we return
1457 * the symbol table index, else we return -1.
1458 */
1459 static struct macro *
findsym(const char ** strp)1460 findsym(const char **strp)
1461 {
1462 const char *str;
1463 char *strkey;
1464 struct macro key, *res;
1465
1466 str = *strp;
1467 *strp = skipsym(str);
1468 if (symlist) {
1469 if (*strp == str)
1470 return (NULL);
1471 if (symdepth && firstsym)
1472 printf("%s%3d", zerosyms ? "" : "\n", depth);
1473 firstsym = zerosyms = false;
1474 printf("%s%.*s%s",
1475 symdepth ? " " : "",
1476 (int)(*strp-str), str,
1477 symdepth ? "" : "\n");
1478 /* we don't care about the value of the symbol */
1479 return (NULL);
1480 }
1481
1482 /*
1483 * 'str' just points into the current mid-parse input and is not
1484 * nul-terminated. We know the length of the symbol, *strp - str, but
1485 * need to provide a nul-terminated lookup key for RB_FIND's comparison
1486 * function. Create one here.
1487 */
1488 strkey = malloc(*strp - str + 1);
1489 memcpy(strkey, str, *strp - str);
1490 strkey[*strp - str] = 0;
1491
1492 key.name = strkey;
1493 res = RB_FIND(MACROMAP, ¯o_tree, &key);
1494 if (res != NULL)
1495 debugsym("findsym", res);
1496
1497 free(strkey);
1498 return (res);
1499 }
1500
1501 /*
1502 * Resolve indirect symbol values to their final definitions.
1503 */
1504 static void
indirectsym(void)1505 indirectsym(void)
1506 {
1507 const char *cp;
1508 int changed;
1509 struct macro *sym, *ind;
1510
1511 do {
1512 changed = 0;
1513 RB_FOREACH(sym, MACROMAP, ¯o_tree) {
1514 if (sym->value == NULL)
1515 continue;
1516 cp = sym->value;
1517 ind = findsym(&cp);
1518 if (ind == NULL || ind == sym ||
1519 *cp != '\0' ||
1520 ind->value == NULL ||
1521 ind->value == sym->value)
1522 continue;
1523 debugsym("indir...", sym);
1524 sym->value = ind->value;
1525 debugsym("...ectsym", sym);
1526 changed++;
1527 }
1528 } while (changed);
1529 }
1530
1531 /*
1532 * Add a symbol to the symbol table, specified with the format sym=val
1533 */
1534 static void
addsym1(bool ignorethis,bool definethis,char * symval)1535 addsym1(bool ignorethis, bool definethis, char *symval)
1536 {
1537 const char *sym, *val;
1538
1539 sym = symval;
1540 val = skipsym(sym);
1541 if (definethis && *val == '=') {
1542 symval[val - sym] = '\0';
1543 val = val + 1;
1544 } else if (*val == '\0') {
1545 val = definethis ? "1" : NULL;
1546 } else {
1547 usage();
1548 }
1549 addsym2(ignorethis, sym, val);
1550 }
1551
1552 /*
1553 * Add a symbol to the symbol table.
1554 */
1555 static void
addsym2(bool ignorethis,const char * symname,const char * val)1556 addsym2(bool ignorethis, const char *symname, const char *val)
1557 {
1558 const char *cp = symname;
1559 struct macro *sym, *r;
1560
1561 sym = findsym(&cp);
1562 if (sym == NULL) {
1563 sym = calloc(1, sizeof(*sym));
1564 sym->ignore = ignorethis;
1565 sym->name = symname;
1566 sym->value = val;
1567 r = RB_INSERT(MACROMAP, ¯o_tree, sym);
1568 assert(r == NULL);
1569 }
1570 debugsym("addsym", sym);
1571 }
1572
1573 static void
debugsym(const char * why,const struct macro * sym)1574 debugsym(const char *why, const struct macro *sym)
1575 {
1576 debug("%s %s%c%s", why, sym->name,
1577 sym->value ? '=' : ' ',
1578 sym->value ? sym->value : "undef");
1579 }
1580
1581 /*
1582 * Add symbols to the symbol table from a file containing
1583 * #define and #undef preprocessor directives.
1584 */
1585 static void
defundefile(const char * fn)1586 defundefile(const char *fn)
1587 {
1588 filename = fn;
1589 input = fopen(fn, "rb");
1590 if (input == NULL)
1591 err(2, "can't open %s", fn);
1592 linenum = 0;
1593 while (defundef())
1594 ;
1595 if (ferror(input))
1596 err(2, "can't read %s", filename);
1597 else
1598 fclose(input);
1599 if (incomment)
1600 error("EOF in comment");
1601 }
1602
1603 /*
1604 * Read and process one #define or #undef directive
1605 */
1606 static bool
defundef(void)1607 defundef(void)
1608 {
1609 const char *cp, *kw, *sym, *val, *end;
1610
1611 cp = skiphash();
1612 if (cp == NULL)
1613 return (false);
1614 if (*cp == '\0')
1615 goto done;
1616 /* strip trailing whitespace, and do a fairly rough check to
1617 avoid unsupported multi-line preprocessor directives */
1618 end = cp + strlen(cp);
1619 while (end > tline && strchr(" \t\n\r", end[-1]) != NULL)
1620 --end;
1621 if (end > tline && end[-1] == '\\')
1622 Eioccc();
1623
1624 kw = cp;
1625 if ((cp = matchsym("define", kw)) != NULL) {
1626 sym = getsym(&cp);
1627 if (sym == NULL)
1628 error("Missing macro name in #define");
1629 if (*cp == '(') {
1630 val = "1";
1631 } else {
1632 cp = skipcomment(cp);
1633 val = (cp < end) ? xstrdup(cp, end) : "";
1634 }
1635 debug("#define");
1636 addsym2(false, sym, val);
1637 } else if ((cp = matchsym("undef", kw)) != NULL) {
1638 sym = getsym(&cp);
1639 if (sym == NULL)
1640 error("Missing macro name in #undef");
1641 cp = skipcomment(cp);
1642 debug("#undef");
1643 addsym2(false, sym, NULL);
1644 } else {
1645 error("Unrecognized preprocessor directive");
1646 }
1647 skipline(cp);
1648 done:
1649 debug("parser line %d state %s comment %s line", linenum,
1650 comment_name[incomment], linestate_name[linestate]);
1651 return (true);
1652 }
1653
1654 /*
1655 * Concatenate two strings into new memory, checking for failure.
1656 */
1657 static char *
astrcat(const char * s1,const char * s2)1658 astrcat(const char *s1, const char *s2)
1659 {
1660 char *s;
1661 int len;
1662 size_t size;
1663
1664 len = snprintf(NULL, 0, "%s%s", s1, s2);
1665 if (len < 0)
1666 err(2, "snprintf");
1667 size = (size_t)len + 1;
1668 s = (char *)malloc(size);
1669 if (s == NULL)
1670 err(2, "malloc");
1671 snprintf(s, size, "%s%s", s1, s2);
1672 return (s);
1673 }
1674
1675 /*
1676 * Duplicate a segment of a string, checking for failure.
1677 */
1678 static const char *
xstrdup(const char * start,const char * end)1679 xstrdup(const char *start, const char *end)
1680 {
1681 size_t n;
1682 char *s;
1683
1684 if (end < start) abort(); /* bug */
1685 n = (size_t)(end - start) + 1;
1686 s = malloc(n);
1687 if (s == NULL)
1688 err(2, "malloc");
1689 snprintf(s, n, "%s", start);
1690 return (s);
1691 }
1692
1693 /*
1694 * Diagnostics.
1695 */
1696 static void
debug(const char * msg,...)1697 debug(const char *msg, ...)
1698 {
1699 va_list ap;
1700
1701 if (debugging) {
1702 va_start(ap, msg);
1703 vwarnx(msg, ap);
1704 va_end(ap);
1705 }
1706 }
1707
1708 static void
error(const char * msg)1709 error(const char *msg)
1710 {
1711 if (depth == 0)
1712 warnx("%s: %d: %s", filename, linenum, msg);
1713 else
1714 warnx("%s: %d: %s (#if line %d depth %d)",
1715 filename, linenum, msg, stifline[depth], depth);
1716 closeio();
1717 errx(2, "Output may be truncated");
1718 }
1719