1 /* parse.y - parser for flex input */
3 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
7 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
8 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
10 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
11 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
13 %left CCL_OP_DIFF CCL_OP_UNION
16 *POSIX and AT&T lex place the
17 * precedence of the repeat operator, {}, below that of concatenation.
18 * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended
19 * Regular Expression (ERE) precedence that has the repeat operator
20 * higher than concatenation. This causes ab{3} to yield abbb.
22 * In order to support the POSIX and AT&T precedence and the flex
23 * precedence we define two token sets for the begin and end tokens of
24 * the repeat operator, '{' and '}'. The lexical scanner chooses
25 * which tokens to return based on whether posix_compat or lex_compat
26 * are specified. Specifying either posix_compat or lex_compat will
27 * cause flex to parse scanner files as per the AT&T and
28 * POSIX-mandated behavior.
31 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
35 /* Copyright (c) 1990 The Regents of the University of California. */
36 /* All rights reserved. */
38 /* This code is derived from software contributed to Berkeley by */
41 /* The United States Government has rights in this work pursuant */
42 /* to contract no. DE-AC03-76SF00098 between the United States */
43 /* Department of Energy and the University of California. */
45 /* This file is part of flex. */
47 /* Redistribution and use in source and binary forms, with or without */
48 /* modification, are permitted provided that the following conditions */
51 /* 1. Redistributions of source code must retain the above copyright */
52 /* notice, this list of conditions and the following disclaimer. */
53 /* 2. Redistributions in binary form must reproduce the above copyright */
54 /* notice, this list of conditions and the following disclaimer in the */
55 /* documentation and/or other materials provided with the distribution. */
57 /* Neither the name of the University nor the names of its contributors */
58 /* may be used to endorse or promote products derived from this software */
59 /* without specific prior written permission. */
61 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
62 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
63 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
69 int pat
, scnum
, eps
, headcnt
, trailcnt
, lastchar
, i
, rulelen
;
70 int trlcontxt
, xcluflg
, currccl
, cclsorted
, varlength
, variable_trail_rule
;
75 static int madeany
= false
; /* whether we've made the '.' character class */
76 static int ccldot
, cclany
;
77 int previous_continued_action
; /* whether the previous rule's action was '|' */
79 #define format_warn3(fmt, a1, a2) \
81 char fw3_msg
[MAXLINE
];\
82 snprintf
( fw3_msg
, MAXLINE
,(fmt
), (a1
), (a2
) );\
86 /* Expand a POSIX character class expression. */
87 #define CCL_EXPR(func) \
90 for
( c
= 0; c
< csize
; ++c
) \
91 if
( isascii
(c
) && func
(c
) ) \
92 ccladd
( currccl
, c
); \
96 #define CCL_NEG_EXPR(func) \
99 for
( c
= 0; c
< csize
; ++c
) \
101 ccladd
( currccl
, c
); \
104 /* While POSIX defines isblank(), it's not ANSI C. */
105 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
107 /* On some over-ambitious machines, such as DEC Alpha's, the default
108 * token type is "long" instead of "int"; this leads to problems with
109 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
110 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
111 * following should ensure that the default token type is "int".
118 goal
: initlex sect1 sect1end sect2 initforrule
119 { /* add default rule */
125 def_rule
= mkstate
( -pat
);
127 /* Remember the number of the default rule so we
128 * don't generate "can't match" warnings for it.
130 default_rule
= num_rules
;
132 finish_rule
( def_rule
, false
, 0, 0, 0);
134 for
( i
= 1; i
<= lastsc
; ++i
)
135 scset
[i
] = mkbranch
( scset
[i
], def_rule
);
139 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
141 add_action
( "ECHO" );
143 add_action
( ";\n\tYY_BREAK\n" );
148 { /* initialize for processing rules */
150 /* Create default DFA start condition. */
151 scinstal
( "INITIAL", false
);
155 sect1
: sect1 startconddecl namelist1
159 { synerr
( _
("unknown error processing section 1") ); }
165 scon_stk
= allocate_integer_array
( lastsc
+ 1 );
170 startconddecl
: SCDECL
177 namelist1
: namelist1 NAME
178 { scinstal
( nmstr
, xcluflg
); }
181 { scinstal
( nmstr
, xcluflg
); }
184 { synerr
( _
("bad start condition list") ); }
187 options
: OPTION_OP optionlist
190 optionlist
: optionlist option
194 option
: OPT_OUTFILE
'=' NAME
196 outfilename
= copy_string
( nmstr
);
199 | OPT_EXTRA_TYPE
'=' NAME
200 { extra_type
= copy_string
( nmstr
); }
201 | OPT_PREFIX
'=' NAME
202 { prefix
= copy_string
( nmstr
); }
203 | OPT_YYCLASS
'=' NAME
204 { yyclass
= copy_string
( nmstr
); }
205 | OPT_HEADER
'=' NAME
206 { headerfilename
= copy_string
( nmstr
); }
207 | OPT_TABLES
'=' NAME
208 { tablesext
= true
; tablesfilename
= copy_string
( nmstr
); }
211 sect2
: sect2 scon initforrule flexrule
'\n'
212 { scon_stk_ptr
= $2; }
213 | sect2 scon
'{' sect2
'}'
214 { scon_stk_ptr
= $2; }
220 /* Initialize for a parse of one rule. */
221 trlcontxt
= variable_trail_rule
= varlength
= false
;
222 trailcnt
= headcnt
= rulelen
= 0;
223 current_state_type
= STATE_NORMAL
;
224 previous_continued_action
= continued_action
;
234 finish_rule
( pat
, variable_trail_rule
,
235 headcnt
, trailcnt
, previous_continued_action
);
237 if
( scon_stk_ptr
> 0 )
239 for
( i
= 1; i
<= scon_stk_ptr
; ++i
)
241 mkbranch
( scbol
[scon_stk
[i
]],
247 /* Add to all non-exclusive start conditions,
248 * including the default (0) start condition.
251 for
( i
= 1; i
<= lastsc
; ++i
)
253 scbol
[i
] = mkbranch
( scbol
[i
],
261 if
( performance_report
> 1 )
263 "'^' operator results in sub-optimal performance" );
270 finish_rule
( pat
, variable_trail_rule
,
271 headcnt
, trailcnt
, previous_continued_action
);
273 if
( scon_stk_ptr
> 0 )
275 for
( i
= 1; i
<= scon_stk_ptr
; ++i
)
277 mkbranch
( scset
[scon_stk
[i
]],
283 for
( i
= 1; i
<= lastsc
; ++i
)
293 if
( scon_stk_ptr
> 0 )
298 /* This EOF applies to all start conditions
299 * which don't already have EOF actions.
301 for
( i
= 1; i
<= lastsc
; ++i
)
303 scon_stk
[++scon_stk_ptr
] = i
;
305 if
( scon_stk_ptr
== 0 )
307 "all start conditions already have <<EOF>> rules" );
315 { synerr
( _
("unrecognized rule") ); }
319 { $$
= scon_stk_ptr
; }
322 scon
: '<' scon_stk_ptr namelist2
'>'
329 for
( i
= 1; i
<= lastsc
; ++i
)
333 for
( j
= 1; j
<= scon_stk_ptr
; ++j
)
334 if
( scon_stk
[j
] == i
)
337 if
( j
> scon_stk_ptr
)
338 scon_stk
[++scon_stk_ptr
] = i
;
343 { $$
= scon_stk_ptr
; }
346 namelist2
: namelist2
',' sconname
351 { synerr
( _
("bad start condition list") ); }
356 if
( (scnum
= sclookup
( nmstr
)) == 0 )
357 format_pinpoint_message
(
358 "undeclared start condition %s",
362 for
( i
= 1; i
<= scon_stk_ptr
; ++i
)
363 if
( scon_stk
[i
] == scnum
)
366 "<%s> specified twice",
371 if
( i
> scon_stk_ptr
)
372 scon_stk
[++scon_stk_ptr
] = scnum
;
379 if
( transchar
[lastst
[$2]] != SYM_EPSILON
)
380 /* Provide final transition \now/ so it
381 * will be marked as a trailing context
384 $2 = link_machines
( $2,
385 mkstate
( SYM_EPSILON
) );
387 mark_beginning_as_normal
( $2 );
388 current_state_type
= STATE_NORMAL
;
390 if
( previous_continued_action
)
392 /* We need to treat this as variable trailing
393 * context so that the backup does not happen
394 * in the action but before the action switch
395 * statement. If the backup happens in the
396 * action, then the rules "falling into" this
397 * one's action will *also* do the backup,
400 if
( ! varlength || headcnt
!= 0 )
402 "trailing context made variable due to preceding '|' action" );
404 /* Mark as variable. */
410 if
( lex_compat ||
(varlength
&& headcnt
== 0) )
411 { /* variable trailing context rule */
412 /* Mark the first part of the rule as the
413 * accepting "head" part of a trailing
416 * By the way, we didn't do this at the
417 * beginning of this production because back
418 * then current_state_type was set up for a
419 * trail rule, and add_accept() can create
423 num_rules | YY_TRAILING_HEAD_MASK
);
424 variable_trail_rule
= true
;
430 $$
= link_machines
( $1, $2 );
434 { synerr
( _
("trailing context used twice") ); }
443 current_state_type
= STATE_TRAILING_CONTEXT
;
447 synerr
( _
("trailing context used twice") );
448 $$
= mkstate
( SYM_EPSILON
);
451 else if
( previous_continued_action
)
453 /* See the comment in the rule for "re2 re"
457 "trailing context made variable due to preceding '|' action" );
462 if
( lex_compat || varlength
)
464 /* Again, see the comment in the rule for
468 num_rules | YY_TRAILING_HEAD_MASK
);
469 variable_trail_rule
= true
;
474 eps
= mkstate
( SYM_EPSILON
);
475 $$
= link_machines
( $1,
476 link_machines
( eps
, mkstate
( '\n' ) ) );
485 if
( lex_compat ||
(varlength
&& headcnt
== 0) )
486 /* Both head and trail are
489 variable_trail_rule
= true
;
510 /* This rule is written separately so the
511 * reduction will occur before the trailing
516 synerr
( _
("trailing context used twice") );
521 /* We hope the trailing context is
530 current_state_type
= STATE_TRAILING_CONTEXT
;
535 series
: series singleton
537 /* This is where concatenation of adjacent patterns
540 $$
= link_machines
( $1, $2 );
546 | series BEGIN_REPEAT_POSIX NUMBER
',' NUMBER END_REPEAT_POSIX
550 if
( $3 > $5 ||
$3 < 0 )
552 synerr
( _
("bad iteration values") );
562 _
("bad iteration values") );
567 mkrep
( $1, 1, $5 ) );
570 $$
= mkrep
( $1, $3, $5 );
574 | series BEGIN_REPEAT_POSIX NUMBER
',' END_REPEAT_POSIX
580 synerr
( _
("iteration value must be positive") );
585 $$
= mkrep
( $1, $3, INFINITE_REPEAT
);
588 | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
590 /* The series could be something like "(foo)",
591 * in which case we have no idea what its length
592 * is, so we punt here.
598 synerr
( _
("iteration value must be positive")
604 $$
= link_machines
( $1,
605 copysingl
( $1, $3 - 1 ) );
610 singleton
: singleton
'*'
629 | singleton BEGIN_REPEAT_FLEX NUMBER
',' NUMBER END_REPEAT_FLEX
633 if
( $3 > $5 ||
$3 < 0 )
635 synerr
( _
("bad iteration values") );
645 _
("bad iteration values") );
650 mkrep
( $1, 1, $5 ) );
653 $$
= mkrep
( $1, $3, $5 );
657 | singleton BEGIN_REPEAT_FLEX NUMBER
',' END_REPEAT_FLEX
663 synerr
( _
("iteration value must be positive") );
668 $$
= mkrep
( $1, $3, INFINITE_REPEAT
);
671 | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
673 /* The singleton could be something like "(foo)",
674 * in which case we have no idea what its length
675 * is, so we punt here.
681 synerr
( _
("iteration value must be positive") );
686 $$
= link_machines
( $1,
687 copysingl
( $1, $3 - 1 ) );
694 /* Create the '.' character class. */
696 ccladd
( ccldot
, '\n' );
700 mkeccl
( ccltbl
+ cclmap
[ccldot
],
701 ccllen
[ccldot
], nextecm
,
702 ecgroup
, csize
, csize
);
704 /* Create the (?s:'.') character class. */
709 mkeccl
( ccltbl
+ cclmap
[cclany
],
710 ccllen
[cclany
], nextecm
,
711 ecgroup
, csize
, csize
);
719 $$
= mkstate
( -cclany
);
721 $$
= mkstate
( -ccldot
);
726 /* Sort characters for fast searching.
728 qsort
( ccltbl
+ cclmap
[$1], ccllen
[$1], sizeof
(*ccltbl
), cclcmp
);
731 mkeccl
( ccltbl
+ cclmap
[$1], ccllen
[$1],
732 nextecm
, ecgroup
, csize
, csize
);
737 rule_has_nl
[num_rules
] = true
;
747 rule_has_nl
[num_rules
] = true
;
763 rule_has_nl
[num_rules
] = true
;
765 if
(sf_case_ins
() && has_case
($1))
766 /* create an alternation, as in (a|A) */
767 $$
= mkor
(mkstate
($1), mkstate
(reverse_case
($1)));
773 fullccl CCL_OP_DIFF braceccl
{ $$
= ccl_set_diff
($1, $3); }
774 | fullccl CCL_OP_UNION braceccl
{ $$
= ccl_set_union
($1, $3); }
780 '[' ccl
']' { $$
= $2; }
789 ccl
: ccl CHAR
'-' CHAR
795 /* If one end of the range has case and the other
796 * does not, or the cases are different, then we're not
797 * sure what range the user is trying to express.
798 * Examples: [@-z] or [S-t]
800 if
(has_case
($2) != has_case
($4)
801 ||
(has_case
($2) && (b_islower
($2) != b_islower
($4)))
802 ||
(has_case
($2) && (b_isupper
($2) != b_isupper
($4))))
804 _
("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
807 /* If the range spans uppercase characters but not
808 * lowercase (or vice-versa), then should we automatically
809 * include lowercase characters in the range?
810 * Example: [@-_] spans [a-z] but not [A-Z]
812 else if
(!has_case
($2) && !has_case
($4) && !range_covers_case
($2, $4))
814 _
("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
819 synerr
( _
("negative range in character class") );
823 for
( i
= $2; i
<= $4; ++i
)
826 /* Keep track if this ccl is staying in
827 * alphabetical order.
829 cclsorted
= cclsorted
&& ($2 > lastchar
);
832 /* Do it again for upper/lowercase */
833 if
(sf_case_ins
() && has_case
($2) && has_case
($4)){
834 $2 = reverse_case
($2);
835 $4 = reverse_case
($4);
837 for
( i
= $2; i
<= $4; ++i
)
840 cclsorted
= cclsorted
&& ($2 > lastchar
);
852 cclsorted
= cclsorted
&& ($2 > lastchar
);
855 /* Do it again for upper/lowercase */
856 if
(sf_case_ins
() && has_case
($2)){
857 $2 = reverse_case
($2);
860 cclsorted
= cclsorted
&& ($2 > lastchar
);
869 /* Too hard to properly maintain cclsorted. */
878 currccl
= $$
= cclinit
();
883 CCE_ALNUM
{ CCL_EXPR
(isalnum
); }
884 | CCE_ALPHA
{ CCL_EXPR
(isalpha
); }
885 | CCE_BLANK
{ CCL_EXPR
(IS_BLANK
); }
886 | CCE_CNTRL
{ CCL_EXPR
(iscntrl
); }
887 | CCE_DIGIT
{ CCL_EXPR
(isdigit
); }
888 | CCE_GRAPH
{ CCL_EXPR
(isgraph
); }
894 | CCE_PRINT
{ CCL_EXPR
(isprint
); }
895 | CCE_PUNCT
{ CCL_EXPR
(ispunct
); }
896 | CCE_SPACE
{ CCL_EXPR
(isspace
); }
897 | CCE_XDIGIT
{ CCL_EXPR
(isxdigit
); }
904 | CCE_NEG_ALNUM
{ CCL_NEG_EXPR
(isalnum
); }
905 | CCE_NEG_ALPHA
{ CCL_NEG_EXPR
(isalpha
); }
906 | CCE_NEG_BLANK
{ CCL_NEG_EXPR
(IS_BLANK
); }
907 | CCE_NEG_CNTRL
{ CCL_NEG_EXPR
(iscntrl
); }
908 | CCE_NEG_DIGIT
{ CCL_NEG_EXPR
(isdigit
); }
909 | CCE_NEG_GRAPH
{ CCL_NEG_EXPR
(isgraph
); }
910 | CCE_NEG_PRINT
{ CCL_NEG_EXPR
(isprint
); }
911 | CCE_NEG_PUNCT
{ CCL_NEG_EXPR
(ispunct
); }
912 | CCE_NEG_SPACE
{ CCL_NEG_EXPR
(isspace
); }
913 | CCE_NEG_XDIGIT
{ CCL_NEG_EXPR
(isxdigit
); }
916 warn
(_
("[:^lower:] is ambiguous in case insensitive scanner"));
918 CCL_NEG_EXPR
(islower
);
922 warn
(_
("[:^upper:] ambiguous in case insensitive scanner"));
924 CCL_NEG_EXPR
(isupper
);
931 rule_has_nl
[num_rules
] = true
;
935 if
(sf_case_ins
() && has_case
($2))
936 $$
= mkor
(mkstate
($2), mkstate
(reverse_case
($2)));
940 $$
= link_machines
( $1, $$
);
944 { $$
= mkstate
( SYM_EPSILON
); }
950 /* build_eof_action - build the "<<EOF>>" action for the active start
954 void build_eof_action
()
957 char action_text
[MAXLINE
];
959 for
( i
= 1; i
<= scon_stk_ptr
; ++i
)
961 if
( sceof
[scon_stk
[i
]] )
962 format_pinpoint_message
(
963 "multiple <<EOF>> rules for start condition %s",
964 scname
[scon_stk
[i
]] );
968 sceof
[scon_stk
[i
]] = true
;
970 if
(previous_continued_action
/* && previous action was regular */)
971 add_action
("YY_RULE_SETUP\n");
973 snprintf
( action_text
, sizeof
(action_text
), "case YY_STATE_EOF(%s):\n",
974 scname
[scon_stk
[i
]] );
975 add_action
( action_text
);
979 line_directive_out
( (FILE *) 0, 1 );
981 /* This isn't a normal rule after all - don't count it as
982 * such, so we don't have any holes in the rule numbering
983 * (which make generating "rule can never match" warnings
991 /* format_synerr - write out formatted syntax error */
993 void format_synerr
( msg
, arg
)
994 const char *msg
, arg
[];
996 char errmsg
[MAXLINE
];
998 (void) snprintf
( errmsg
, sizeof
(errmsg
), msg
, arg
);
1003 /* synerr - report a syntax error */
1009 pinpoint_message
( str
);
1013 /* format_warn - write out formatted warning */
1015 void format_warn
( msg
, arg
)
1016 const char *msg
, arg
[];
1018 char warn_msg
[MAXLINE
];
1020 snprintf
( warn_msg
, sizeof
(warn_msg
), msg
, arg
);
1025 /* warn - report a warning, unless -w was given */
1030 line_warning
( str
, linenum
);
1033 /* format_pinpoint_message - write out a message formatted with one string,
1034 * pinpointing its location
1037 void format_pinpoint_message
( msg
, arg
)
1038 const char *msg
, arg
[];
1040 char errmsg
[MAXLINE
];
1042 snprintf
( errmsg
, sizeof
(errmsg
), msg
, arg
);
1043 pinpoint_message
( errmsg
);
1047 /* pinpoint_message - write out a message, pinpointing its location */
1049 void pinpoint_message
( str
)
1052 line_pinpoint
( str
, linenum
);
1056 /* line_warning - report a warning at a given line, unless -w was given */
1058 void line_warning
( str
, line
)
1062 char warning
[MAXLINE
];
1066 snprintf
( warning
, sizeof
(warning
), "warning, %s", str
);
1067 line_pinpoint
( warning
, line
);
1072 /* line_pinpoint - write out a message, pinpointing it at the given line */
1074 void line_pinpoint
( str
, line
)
1078 fprintf
( stderr
, "%s:%d: %s\n", infilename
, line
, str
);
1082 /* yyerror - eat up an error message from the parser;
1083 * currently, messages are ignore