xref: /freebsd-12.1/contrib/flex/parse.y (revision e1fc1971)
1 /* parse.y - parser for flex input */
2 
3 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
5 %token OPT_TABLES
6 
7 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
8 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
9 
10 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
11 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
12 
13 %left CCL_OP_DIFF CCL_OP_UNION
14 
15 /*
16  *POSIX and AT&T lex place the
17  * precedence of the repeat operator, {}, below that of concatenation.
18  * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
19  * Regular Expression (ERE) precedence that has the repeat operator
20  * higher than concatenation.  This causes ab{3} to yield abbb.
21  *
22  * In order to support the POSIX and AT&T precedence and the flex
23  * precedence we define two token sets for the begin and end tokens of
24  * the repeat operator, '{' and '}'.  The lexical scanner chooses
25  * which tokens to return based on whether posix_compat or lex_compat
26  * are specified. Specifying either posix_compat or lex_compat will
27  * cause flex to parse scanner files as per the AT&T and
28  * POSIX-mandated behavior.
29  */
30 
31 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
32 
33 
34 %{
35 /*  Copyright (c) 1990 The Regents of the University of California. */
36 /*  All rights reserved. */
37 
38 /*  This code is derived from software contributed to Berkeley by */
39 /*  Vern Paxson. */
40 
41 /*  The United States Government has rights in this work pursuant */
42 /*  to contract no. DE-AC03-76SF00098 between the United States */
43 /*  Department of Energy and the University of California. */
44 
45 /*  This file is part of flex. */
46 
47 /*  Redistribution and use in source and binary forms, with or without */
48 /*  modification, are permitted provided that the following conditions */
49 /*  are met: */
50 
51 /*  1. Redistributions of source code must retain the above copyright */
52 /*     notice, this list of conditions and the following disclaimer. */
53 /*  2. Redistributions in binary form must reproduce the above copyright */
54 /*     notice, this list of conditions and the following disclaimer in the */
55 /*     documentation and/or other materials provided with the distribution. */
56 
57 /*  Neither the name of the University nor the names of its contributors */
58 /*  may be used to endorse or promote products derived from this software */
59 /*  without specific prior written permission. */
60 
61 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
62 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
63 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
64 /*  PURPOSE. */
65 
66 #include "flexdef.h"
67 #include "tables.h"
68 
69 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
70 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
71 
72 int *scon_stk;
73 int scon_stk_ptr;
74 
75 static int madeany = false;  /* whether we've made the '.' character class */
76 static int ccldot, cclany;
77 int previous_continued_action;	/* whether the previous rule's action was '|' */
78 
79 #define format_warn3(fmt, a1, a2) \
80 	do{ \
81         char fw3_msg[MAXLINE];\
82         snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
83         warn( fw3_msg );\
84 	}while(0)
85 
86 /* Expand a POSIX character class expression. */
87 #define CCL_EXPR(func) \
88 	do{ \
89 	int c; \
90 	for ( c = 0; c < csize; ++c ) \
91 		if ( isascii(c) && func(c) ) \
92 			ccladd( currccl, c ); \
93 	}while(0)
94 
95 /* negated class */
96 #define CCL_NEG_EXPR(func) \
97 	do{ \
98 	int c; \
99 	for ( c = 0; c < csize; ++c ) \
100 		if ( !func(c) ) \
101 			ccladd( currccl, c ); \
102 	}while(0)
103 
104 /* While POSIX defines isblank(), it's not ANSI C. */
105 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
106 
107 /* On some over-ambitious machines, such as DEC Alpha's, the default
108  * token type is "long" instead of "int"; this leads to problems with
109  * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
110  * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
111  * following should ensure that the default token type is "int".
112  */
113 #define YYSTYPE int
114 
115 %}
116 
117 %%
118 goal		:  initlex sect1 sect1end sect2 initforrule
119 			{ /* add default rule */
120 			int def_rule;
121 
122 			pat = cclinit();
123 			cclnegate( pat );
124 
125 			def_rule = mkstate( -pat );
126 
127 			/* Remember the number of the default rule so we
128 			 * don't generate "can't match" warnings for it.
129 			 */
130 			default_rule = num_rules;
131 
132 			finish_rule( def_rule, false, 0, 0, 0);
133 
134 			for ( i = 1; i <= lastsc; ++i )
135 				scset[i] = mkbranch( scset[i], def_rule );
136 
137 			if ( spprdflt )
138 				add_action(
139 				"YY_FATAL_ERROR( \"flex scanner jammed\" )" );
140 			else
141 				add_action( "ECHO" );
142 
143 			add_action( ";\n\tYY_BREAK\n" );
144 			}
145 		;
146 
147 initlex		:
148 			{ /* initialize for processing rules */
149 
150 			/* Create default DFA start condition. */
151 			scinstal( "INITIAL", false );
152 			}
153 		;
154 
155 sect1		:  sect1 startconddecl namelist1
156 		|  sect1 options
157 		|
158 		|  error
159 			{ synerr( _("unknown error processing section 1") ); }
160 		;
161 
162 sect1end	:  SECTEND
163 			{
164 			check_options();
165 			scon_stk = allocate_integer_array( lastsc + 1 );
166 			scon_stk_ptr = 0;
167 			}
168 		;
169 
170 startconddecl	:  SCDECL
171 			{ xcluflg = false; }
172 
173 		|  XSCDECL
174 			{ xcluflg = true; }
175 		;
176 
177 namelist1	:  namelist1 NAME
178 			{ scinstal( nmstr, xcluflg ); }
179 
180 		|  NAME
181 			{ scinstal( nmstr, xcluflg ); }
182 
183 		|  error
184 			{ synerr( _("bad start condition list") ); }
185 		;
186 
187 options		:  OPTION_OP optionlist
188 		;
189 
190 optionlist	:  optionlist option
191 		|
192 		;
193 
194 option		:  OPT_OUTFILE '=' NAME
195 			{
196 			outfilename = copy_string( nmstr );
197 			did_outfilename = 1;
198 			}
199 		|  OPT_EXTRA_TYPE '=' NAME
200 			{ extra_type = copy_string( nmstr ); }
201 		|  OPT_PREFIX '=' NAME
202 			{ prefix = copy_string( nmstr ); }
203 		|  OPT_YYCLASS '=' NAME
204 			{ yyclass = copy_string( nmstr ); }
205 		|  OPT_HEADER '=' NAME
206 			{ headerfilename = copy_string( nmstr ); }
207 	    |  OPT_TABLES '=' NAME
208             { tablesext = true; tablesfilename = copy_string( nmstr ); }
209 		;
210 
211 sect2		:  sect2 scon initforrule flexrule '\n'
212 			{ scon_stk_ptr = $2; }
213 		|  sect2 scon '{' sect2 '}'
214 			{ scon_stk_ptr = $2; }
215 		|
216 		;
217 
218 initforrule	:
219 			{
220 			/* Initialize for a parse of one rule. */
221 			trlcontxt = variable_trail_rule = varlength = false;
222 			trailcnt = headcnt = rulelen = 0;
223 			current_state_type = STATE_NORMAL;
224 			previous_continued_action = continued_action;
225 			in_rule = true;
226 
227 			new_rule();
228 			}
229 		;
230 
231 flexrule	:  '^' rule
232 			{
233 			pat = $2;
234 			finish_rule( pat, variable_trail_rule,
235 				headcnt, trailcnt , previous_continued_action);
236 
237 			if ( scon_stk_ptr > 0 )
238 				{
239 				for ( i = 1; i <= scon_stk_ptr; ++i )
240 					scbol[scon_stk[i]] =
241 						mkbranch( scbol[scon_stk[i]],
242 								pat );
243 				}
244 
245 			else
246 				{
247 				/* Add to all non-exclusive start conditions,
248 				 * including the default (0) start condition.
249 				 */
250 
251 				for ( i = 1; i <= lastsc; ++i )
252 					if ( ! scxclu[i] )
253 						scbol[i] = mkbranch( scbol[i],
254 									pat );
255 				}
256 
257 			if ( ! bol_needed )
258 				{
259 				bol_needed = true;
260 
261 				if ( performance_report > 1 )
262 					pinpoint_message(
263 			"'^' operator results in sub-optimal performance" );
264 				}
265 			}
266 
267 		|  rule
268 			{
269 			pat = $1;
270 			finish_rule( pat, variable_trail_rule,
271 				headcnt, trailcnt , previous_continued_action);
272 
273 			if ( scon_stk_ptr > 0 )
274 				{
275 				for ( i = 1; i <= scon_stk_ptr; ++i )
276 					scset[scon_stk[i]] =
277 						mkbranch( scset[scon_stk[i]],
278 								pat );
279 				}
280 
281 			else
282 				{
283 				for ( i = 1; i <= lastsc; ++i )
284 					if ( ! scxclu[i] )
285 						scset[i] =
286 							mkbranch( scset[i],
287 								pat );
288 				}
289 			}
290 
291 		|  EOF_OP
292 			{
293 			if ( scon_stk_ptr > 0 )
294 				build_eof_action();
295 
296 			else
297 				{
298 				/* This EOF applies to all start conditions
299 				 * which don't already have EOF actions.
300 				 */
301 				for ( i = 1; i <= lastsc; ++i )
302 					if ( ! sceof[i] )
303 						scon_stk[++scon_stk_ptr] = i;
304 
305 				if ( scon_stk_ptr == 0 )
306 					warn(
307 			"all start conditions already have <<EOF>> rules" );
308 
309 				else
310 					build_eof_action();
311 				}
312 			}
313 
314 		|  error
315 			{ synerr( _("unrecognized rule") ); }
316 		;
317 
318 scon_stk_ptr	:
319 			{ $$ = scon_stk_ptr; }
320 		;
321 
322 scon		:  '<' scon_stk_ptr namelist2 '>'
323 			{ $$ = $2; }
324 
325 		|  '<' '*' '>'
326 			{
327 			$$ = scon_stk_ptr;
328 
329 			for ( i = 1; i <= lastsc; ++i )
330 				{
331 				int j;
332 
333 				for ( j = 1; j <= scon_stk_ptr; ++j )
334 					if ( scon_stk[j] == i )
335 						break;
336 
337 				if ( j > scon_stk_ptr )
338 					scon_stk[++scon_stk_ptr] = i;
339 				}
340 			}
341 
342 		|
343 			{ $$ = scon_stk_ptr; }
344 		;
345 
346 namelist2	:  namelist2 ',' sconname
347 
348 		|  sconname
349 
350 		|  error
351 			{ synerr( _("bad start condition list") ); }
352 		;
353 
354 sconname	:  NAME
355 			{
356 			if ( (scnum = sclookup( nmstr )) == 0 )
357 				format_pinpoint_message(
358 					"undeclared start condition %s",
359 					nmstr );
360 			else
361 				{
362 				for ( i = 1; i <= scon_stk_ptr; ++i )
363 					if ( scon_stk[i] == scnum )
364 						{
365 						format_warn(
366 							"<%s> specified twice",
367 							scname[scnum] );
368 						break;
369 						}
370 
371 				if ( i > scon_stk_ptr )
372 					scon_stk[++scon_stk_ptr] = scnum;
373 				}
374 			}
375 		;
376 
377 rule		:  re2 re
378 			{
379 			if ( transchar[lastst[$2]] != SYM_EPSILON )
380 				/* Provide final transition \now/ so it
381 				 * will be marked as a trailing context
382 				 * state.
383 				 */
384 				$2 = link_machines( $2,
385 						mkstate( SYM_EPSILON ) );
386 
387 			mark_beginning_as_normal( $2 );
388 			current_state_type = STATE_NORMAL;
389 
390 			if ( previous_continued_action )
391 				{
392 				/* We need to treat this as variable trailing
393 				 * context so that the backup does not happen
394 				 * in the action but before the action switch
395 				 * statement.  If the backup happens in the
396 				 * action, then the rules "falling into" this
397 				 * one's action will *also* do the backup,
398 				 * erroneously.
399 				 */
400 				if ( ! varlength || headcnt != 0 )
401 					warn(
402 		"trailing context made variable due to preceding '|' action" );
403 
404 				/* Mark as variable. */
405 				varlength = true;
406 				headcnt = 0;
407 
408 				}
409 
410 			if ( lex_compat || (varlength && headcnt == 0) )
411 				{ /* variable trailing context rule */
412 				/* Mark the first part of the rule as the
413 				 * accepting "head" part of a trailing
414 				 * context rule.
415 				 *
416 				 * By the way, we didn't do this at the
417 				 * beginning of this production because back
418 				 * then current_state_type was set up for a
419 				 * trail rule, and add_accept() can create
420 				 * a new state ...
421 				 */
422 				add_accept( $1,
423 					num_rules | YY_TRAILING_HEAD_MASK );
424 				variable_trail_rule = true;
425 				}
426 
427 			else
428 				trailcnt = rulelen;
429 
430 			$$ = link_machines( $1, $2 );
431 			}
432 
433 		|  re2 re '$'
434 			{ synerr( _("trailing context used twice") ); }
435 
436 		|  re '$'
437 			{
438 			headcnt = 0;
439 			trailcnt = 1;
440 			rulelen = 1;
441 			varlength = false;
442 
443 			current_state_type = STATE_TRAILING_CONTEXT;
444 
445 			if ( trlcontxt )
446 				{
447 				synerr( _("trailing context used twice") );
448 				$$ = mkstate( SYM_EPSILON );
449 				}
450 
451 			else if ( previous_continued_action )
452 				{
453 				/* See the comment in the rule for "re2 re"
454 				 * above.
455 				 */
456 				warn(
457 		"trailing context made variable due to preceding '|' action" );
458 
459 				varlength = true;
460 				}
461 
462 			if ( lex_compat || varlength )
463 				{
464 				/* Again, see the comment in the rule for
465 				 * "re2 re" above.
466 				 */
467 				add_accept( $1,
468 					num_rules | YY_TRAILING_HEAD_MASK );
469 				variable_trail_rule = true;
470 				}
471 
472 			trlcontxt = true;
473 
474 			eps = mkstate( SYM_EPSILON );
475 			$$ = link_machines( $1,
476 				link_machines( eps, mkstate( '\n' ) ) );
477 			}
478 
479 		|  re
480 			{
481 			$$ = $1;
482 
483 			if ( trlcontxt )
484 				{
485 				if ( lex_compat || (varlength && headcnt == 0) )
486 					/* Both head and trail are
487 					 * variable-length.
488 					 */
489 					variable_trail_rule = true;
490 				else
491 					trailcnt = rulelen;
492 				}
493 			}
494 		;
495 
496 
497 re		:  re '|' series
498 			{
499 			varlength = true;
500 			$$ = mkor( $1, $3 );
501 			}
502 
503 		|  series
504 			{ $$ = $1; }
505 		;
506 
507 
508 re2		:  re '/'
509 			{
510 			/* This rule is written separately so the
511 			 * reduction will occur before the trailing
512 			 * series is parsed.
513 			 */
514 
515 			if ( trlcontxt )
516 				synerr( _("trailing context used twice") );
517 			else
518 				trlcontxt = true;
519 
520 			if ( varlength )
521 				/* We hope the trailing context is
522 				 * fixed-length.
523 				 */
524 				varlength = false;
525 			else
526 				headcnt = rulelen;
527 
528 			rulelen = 0;
529 
530 			current_state_type = STATE_TRAILING_CONTEXT;
531 			$$ = $1;
532 			}
533 		;
534 
535 series		:  series singleton
536 			{
537 			/* This is where concatenation of adjacent patterns
538 			 * gets done.
539 			 */
540 			$$ = link_machines( $1, $2 );
541 			}
542 
543 		|  singleton
544 			{ $$ = $1; }
545 
546 		|  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
547 			{
548 			varlength = true;
549 
550 			if ( $3 > $5 || $3 < 0 )
551 				{
552 				synerr( _("bad iteration values") );
553 				$$ = $1;
554 				}
555 			else
556 				{
557 				if ( $3 == 0 )
558 					{
559 					if ( $5 <= 0 )
560 						{
561 						synerr(
562 						_("bad iteration values") );
563 						$$ = $1;
564 						}
565 					else
566 						$$ = mkopt(
567 							mkrep( $1, 1, $5 ) );
568 					}
569 				else
570 					$$ = mkrep( $1, $3, $5 );
571 				}
572 			}
573 
574 		|  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
575 			{
576 			varlength = true;
577 
578 			if ( $3 <= 0 )
579 				{
580 				synerr( _("iteration value must be positive") );
581 				$$ = $1;
582 				}
583 
584 			else
585 				$$ = mkrep( $1, $3, INFINITE_REPEAT );
586 			}
587 
588 		|  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
589 			{
590 			/* The series could be something like "(foo)",
591 			 * in which case we have no idea what its length
592 			 * is, so we punt here.
593 			 */
594 			varlength = true;
595 
596 			if ( $3 <= 0 )
597 				{
598 				  synerr( _("iteration value must be positive")
599 					  );
600 				$$ = $1;
601 				}
602 
603 			else
604 				$$ = link_machines( $1,
605 						copysingl( $1, $3 - 1 ) );
606 			}
607 
608 		;
609 
610 singleton	:  singleton '*'
611 			{
612 			varlength = true;
613 
614 			$$ = mkclos( $1 );
615 			}
616 
617 		|  singleton '+'
618 			{
619 			varlength = true;
620 			$$ = mkposcl( $1 );
621 			}
622 
623 		|  singleton '?'
624 			{
625 			varlength = true;
626 			$$ = mkopt( $1 );
627 			}
628 
629 		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
630 			{
631 			varlength = true;
632 
633 			if ( $3 > $5 || $3 < 0 )
634 				{
635 				synerr( _("bad iteration values") );
636 				$$ = $1;
637 				}
638 			else
639 				{
640 				if ( $3 == 0 )
641 					{
642 					if ( $5 <= 0 )
643 						{
644 						synerr(
645 						_("bad iteration values") );
646 						$$ = $1;
647 						}
648 					else
649 						$$ = mkopt(
650 							mkrep( $1, 1, $5 ) );
651 					}
652 				else
653 					$$ = mkrep( $1, $3, $5 );
654 				}
655 			}
656 
657 		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
658 			{
659 			varlength = true;
660 
661 			if ( $3 <= 0 )
662 				{
663 				synerr( _("iteration value must be positive") );
664 				$$ = $1;
665 				}
666 
667 			else
668 				$$ = mkrep( $1, $3, INFINITE_REPEAT );
669 			}
670 
671 		|  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
672 			{
673 			/* The singleton could be something like "(foo)",
674 			 * in which case we have no idea what its length
675 			 * is, so we punt here.
676 			 */
677 			varlength = true;
678 
679 			if ( $3 <= 0 )
680 				{
681 				synerr( _("iteration value must be positive") );
682 				$$ = $1;
683 				}
684 
685 			else
686 				$$ = link_machines( $1,
687 						copysingl( $1, $3 - 1 ) );
688 			}
689 
690 		|  '.'
691 			{
692 			if ( ! madeany )
693 				{
694 				/* Create the '.' character class. */
695                     ccldot = cclinit();
696                     ccladd( ccldot, '\n' );
697                     cclnegate( ccldot );
698 
699                     if ( useecs )
700                         mkeccl( ccltbl + cclmap[ccldot],
701                             ccllen[ccldot], nextecm,
702                             ecgroup, csize, csize );
703 
704 				/* Create the (?s:'.') character class. */
705                     cclany = cclinit();
706                     cclnegate( cclany );
707 
708                     if ( useecs )
709                         mkeccl( ccltbl + cclmap[cclany],
710                             ccllen[cclany], nextecm,
711                             ecgroup, csize, csize );
712 
713 				madeany = true;
714 				}
715 
716 			++rulelen;
717 
718             if (sf_dot_all())
719                 $$ = mkstate( -cclany );
720             else
721                 $$ = mkstate( -ccldot );
722 			}
723 
724 		|  fullccl
725 			{
726 				/* Sort characters for fast searching.
727 				 */
728 				qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp );
729 
730 			if ( useecs )
731 				mkeccl( ccltbl + cclmap[$1], ccllen[$1],
732 					nextecm, ecgroup, csize, csize );
733 
734 			++rulelen;
735 
736 			if (ccl_has_nl[$1])
737 				rule_has_nl[num_rules] = true;
738 
739 			$$ = mkstate( -$1 );
740 			}
741 
742 		|  PREVCCL
743 			{
744 			++rulelen;
745 
746 			if (ccl_has_nl[$1])
747 				rule_has_nl[num_rules] = true;
748 
749 			$$ = mkstate( -$1 );
750 			}
751 
752 		|  '"' string '"'
753 			{ $$ = $2; }
754 
755 		|  '(' re ')'
756 			{ $$ = $2; }
757 
758 		|  CHAR
759 			{
760 			++rulelen;
761 
762 			if ($1 == nlch)
763 				rule_has_nl[num_rules] = true;
764 
765             if (sf_case_ins() && has_case($1))
766                 /* create an alternation, as in (a|A) */
767                 $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
768             else
769                 $$ = mkstate( $1 );
770 			}
771 		;
772 fullccl:
773         fullccl CCL_OP_DIFF  braceccl  { $$ = ccl_set_diff  ($1, $3); }
774     |   fullccl CCL_OP_UNION braceccl  { $$ = ccl_set_union ($1, $3); }
775     |   braceccl
776     ;
777 
778 braceccl:
779 
780             '[' ccl ']' { $$ = $2; }
781 
782 		|  '[' '^' ccl ']'
783 			{
784 			cclnegate( $3 );
785 			$$ = $3;
786 			}
787 		;
788 
789 ccl		:  ccl CHAR '-' CHAR
790 			{
791 
792 			if (sf_case_ins())
793 			  {
794 
795 			    /* If one end of the range has case and the other
796 			     * does not, or the cases are different, then we're not
797 			     * sure what range the user is trying to express.
798 			     * Examples: [@-z] or [S-t]
799 			     */
800 			    if (has_case ($2) != has_case ($4)
801 				     || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
802 				     || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
803 			      format_warn3 (
804 			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
805 					    $2, $4);
806 
807 			    /* If the range spans uppercase characters but not
808 			     * lowercase (or vice-versa), then should we automatically
809 			     * include lowercase characters in the range?
810 			     * Example: [@-_] spans [a-z] but not [A-Z]
811 			     */
812 			    else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
813 			      format_warn3 (
814 			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
815 					    $2, $4);
816 			  }
817 
818 			if ( $2 > $4 )
819 				synerr( _("negative range in character class") );
820 
821 			else
822 				{
823 				for ( i = $2; i <= $4; ++i )
824 					ccladd( $1, i );
825 
826 				/* Keep track if this ccl is staying in
827 				 * alphabetical order.
828 				 */
829 				cclsorted = cclsorted && ($2 > lastchar);
830 				lastchar = $4;
831 
832                 /* Do it again for upper/lowercase */
833                 if (sf_case_ins() && has_case($2) && has_case($4)){
834                     $2 = reverse_case ($2);
835                     $4 = reverse_case ($4);
836 
837                     for ( i = $2; i <= $4; ++i )
838                         ccladd( $1, i );
839 
840                     cclsorted = cclsorted && ($2 > lastchar);
841                     lastchar = $4;
842                 }
843 
844 				}
845 
846 			$$ = $1;
847 			}
848 
849 		|  ccl CHAR
850 			{
851 			ccladd( $1, $2 );
852 			cclsorted = cclsorted && ($2 > lastchar);
853 			lastchar = $2;
854 
855             /* Do it again for upper/lowercase */
856             if (sf_case_ins() && has_case($2)){
857                 $2 = reverse_case ($2);
858                 ccladd ($1, $2);
859 
860                 cclsorted = cclsorted && ($2 > lastchar);
861                 lastchar = $2;
862             }
863 
864 			$$ = $1;
865 			}
866 
867 		|  ccl ccl_expr
868 			{
869 			/* Too hard to properly maintain cclsorted. */
870 			cclsorted = false;
871 			$$ = $1;
872 			}
873 
874 		|
875 			{
876 			cclsorted = true;
877 			lastchar = 0;
878 			currccl = $$ = cclinit();
879 			}
880 		;
881 
882 ccl_expr:
883            CCE_ALNUM	{ CCL_EXPR(isalnum); }
884 		|  CCE_ALPHA	{ CCL_EXPR(isalpha); }
885 		|  CCE_BLANK	{ CCL_EXPR(IS_BLANK); }
886 		|  CCE_CNTRL	{ CCL_EXPR(iscntrl); }
887 		|  CCE_DIGIT	{ CCL_EXPR(isdigit); }
888 		|  CCE_GRAPH	{ CCL_EXPR(isgraph); }
889 		|  CCE_LOWER	{
890                           CCL_EXPR(islower);
891                           if (sf_case_ins())
892                               CCL_EXPR(isupper);
893                         }
894 		|  CCE_PRINT	{ CCL_EXPR(isprint); }
895 		|  CCE_PUNCT	{ CCL_EXPR(ispunct); }
896 		|  CCE_SPACE	{ CCL_EXPR(isspace); }
897 		|  CCE_XDIGIT	{ CCL_EXPR(isxdigit); }
898 		|  CCE_UPPER	{
899                     CCL_EXPR(isupper);
900                     if (sf_case_ins())
901                         CCL_EXPR(islower);
902 				}
903 
904         |  CCE_NEG_ALNUM	{ CCL_NEG_EXPR(isalnum); }
905 		|  CCE_NEG_ALPHA	{ CCL_NEG_EXPR(isalpha); }
906 		|  CCE_NEG_BLANK	{ CCL_NEG_EXPR(IS_BLANK); }
907 		|  CCE_NEG_CNTRL	{ CCL_NEG_EXPR(iscntrl); }
908 		|  CCE_NEG_DIGIT	{ CCL_NEG_EXPR(isdigit); }
909 		|  CCE_NEG_GRAPH	{ CCL_NEG_EXPR(isgraph); }
910 		|  CCE_NEG_PRINT	{ CCL_NEG_EXPR(isprint); }
911 		|  CCE_NEG_PUNCT	{ CCL_NEG_EXPR(ispunct); }
912 		|  CCE_NEG_SPACE	{ CCL_NEG_EXPR(isspace); }
913 		|  CCE_NEG_XDIGIT	{ CCL_NEG_EXPR(isxdigit); }
914 		|  CCE_NEG_LOWER	{
915 				if ( sf_case_ins() )
916 					warn(_("[:^lower:] is ambiguous in case insensitive scanner"));
917 				else
918 					CCL_NEG_EXPR(islower);
919 				}
920 		|  CCE_NEG_UPPER	{
921 				if ( sf_case_ins() )
922 					warn(_("[:^upper:] ambiguous in case insensitive scanner"));
923 				else
924 					CCL_NEG_EXPR(isupper);
925 				}
926 		;
927 
928 string		:  string CHAR
929 			{
930 			if ( $2 == nlch )
931 				rule_has_nl[num_rules] = true;
932 
933 			++rulelen;
934 
935             if (sf_case_ins() && has_case($2))
936                 $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
937             else
938                 $$ = mkstate ($2);
939 
940 			$$ = link_machines( $1, $$);
941 			}
942 
943 		|
944 			{ $$ = mkstate( SYM_EPSILON ); }
945 		;
946 
947 %%
948 
949 
950 /* build_eof_action - build the "<<EOF>>" action for the active start
951  *                    conditions
952  */
953 
954 void build_eof_action()
955 	{
956 	int i;
957 	char action_text[MAXLINE];
958 
959 	for ( i = 1; i <= scon_stk_ptr; ++i )
960 		{
961 		if ( sceof[scon_stk[i]] )
962 			format_pinpoint_message(
963 				"multiple <<EOF>> rules for start condition %s",
964 				scname[scon_stk[i]] );
965 
966 		else
967 			{
968 			sceof[scon_stk[i]] = true;
969 
970 			if (previous_continued_action /* && previous action was regular */)
971 				add_action("YY_RULE_SETUP\n");
972 
973 			snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
974 				scname[scon_stk[i]] );
975 			add_action( action_text );
976 			}
977 		}
978 
979 	line_directive_out( (FILE *) 0, 1 );
980 
981 	/* This isn't a normal rule after all - don't count it as
982 	 * such, so we don't have any holes in the rule numbering
983 	 * (which make generating "rule can never match" warnings
984 	 * more difficult.
985 	 */
986 	--num_rules;
987 	++num_eof_rules;
988 	}
989 
990 
991 /* format_synerr - write out formatted syntax error */
992 
format_synerr(msg,arg)993 void format_synerr( msg, arg )
994 const char *msg, arg[];
995 	{
996 	char errmsg[MAXLINE];
997 
998 	(void) snprintf( errmsg, sizeof(errmsg), msg, arg );
999 	synerr( errmsg );
1000 	}
1001 
1002 
1003 /* synerr - report a syntax error */
1004 
synerr(str)1005 void synerr( str )
1006 const char *str;
1007 	{
1008 	syntaxerror = true;
1009 	pinpoint_message( str );
1010 	}
1011 
1012 
1013 /* format_warn - write out formatted warning */
1014 
format_warn(msg,arg)1015 void format_warn( msg, arg )
1016 const char *msg, arg[];
1017 	{
1018 	char warn_msg[MAXLINE];
1019 
1020 	snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1021 	warn( warn_msg );
1022 	}
1023 
1024 
1025 /* warn - report a warning, unless -w was given */
1026 
warn(str)1027 void warn( str )
1028 const char *str;
1029 	{
1030 	line_warning( str, linenum );
1031 	}
1032 
1033 /* format_pinpoint_message - write out a message formatted with one string,
1034  *			     pinpointing its location
1035  */
1036 
format_pinpoint_message(msg,arg)1037 void format_pinpoint_message( msg, arg )
1038 const char *msg, arg[];
1039 	{
1040 	char errmsg[MAXLINE];
1041 
1042 	snprintf( errmsg, sizeof(errmsg), msg, arg );
1043 	pinpoint_message( errmsg );
1044 	}
1045 
1046 
1047 /* pinpoint_message - write out a message, pinpointing its location */
1048 
pinpoint_message(str)1049 void pinpoint_message( str )
1050 const char *str;
1051 	{
1052 	line_pinpoint( str, linenum );
1053 	}
1054 
1055 
1056 /* line_warning - report a warning at a given line, unless -w was given */
1057 
line_warning(str,line)1058 void line_warning( str, line )
1059 const char *str;
1060 int line;
1061 	{
1062 	char warning[MAXLINE];
1063 
1064 	if ( ! nowarn )
1065 		{
1066 		snprintf( warning, sizeof(warning), "warning, %s", str );
1067 		line_pinpoint( warning, line );
1068 		}
1069 	}
1070 
1071 
1072 /* line_pinpoint - write out a message, pinpointing it at the given line */
1073 
line_pinpoint(str,line)1074 void line_pinpoint( str, line )
1075 const char *str;
1076 int line;
1077 	{
1078 	fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1079 	}
1080 
1081 
1082 /* yyerror - eat up an error message from the parser;
1083  *	     currently, messages are ignore
1084  */
1085 
yyerror(msg)1086 void yyerror( msg )
1087 const char *msg;
1088 	{
1089 	}
1090