3 * The Regina Rexx Interpreter
4 * Copyright (C) 1992-1994 Anders Christensen <anders@pvv.unit.no>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the Free
18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 /* Define ASCII_0_TERMINATES_STRING if you want that ASCII-0 terminates
28 * an input string. Normally this should not happen. Input strings are
29 * terminated by a length encoding. The string {"", length=1} is invalid for
30 * the lexer (ASCII-0 is not allowed) while {"", length=0} is allowed (this
32 * ASCII_0_TERMINATES_STRING is only(!) for backward compatibility and
33 * shouldn't be used under normal circumstances.
36 #define ASCII_0_TERMINATES_STRING
41 #define YYLMAX BUFFERSIZE
45 #define YY_CHAR YY_CHAR_TYPE
47 #define YY_INPUT(buf,result,max_size) result=fill_buffer(buf,max_size)
50 /* NOTE: Every comment is replaced by a '`' character in the lower input
51 * routines. These should check for such (illegal) characters.
53 #define MY_ISBLANK(c) (((c)==' ')||((c)=='\t')||((c)=='\v')||((c)=='\f')|| \
54 ((c)=='\r')||((c)=='`'))
56 PROTECTION_VAR(regina_parser)
57 /* externals which are protected by regina_parser */
58 internal_parser_type parser_data = {NULL, };
60 char retvalue[BUFFERSIZE] ;
61 unsigned SymbolDetect = 0;
62 /* end of externals protected by regina_parser */
64 /* locals, they are protected by regina_parser, too */
65 static int nextline = 1;
66 static int nextstart = 1;
67 static int do_level = 0 ;
68 static int in_numform=0, next_numform=0 ;
69 static int obs_with=0, in_do=0, in_then=0;
70 static int in_parse=0 ;
71 static int in_trace=0, itflag=0 ;
72 static int in_signal=0, in_call=0 ;
73 static enum { not_in_address = 0,
74 in_address_keyword, /* ADDRESS just seen */
75 in_address_main, /* after the first word */
76 in_address_value, /* like main but VALUE was seen */
77 in_address_with} in_address = not_in_address,
78 last_in_address = not_in_address ;
79 static enum {no_seek_with = 0,
81 seek_with_from_address} seek_with = no_seek_with ;
83 * expression_ended is set if a typical expression has ended and the next
84 * token has to be preceeded by a CONCATENATE (abuttal) operator in most
87 static int expression_ended=0;
90 * insert_abuttal is set if the last token was a possible end of an expression.
91 * The next token may need to be preceeded by an additional CONCATENATE
92 * operator under some conditions like not being "in_parse".
94 static int insert_abuttal=0;
97 * A symbol may be pending after a CONTATENATE (abuttal) operator has been
98 * returned. This symbol will be stored here. 0 indicates no pending symbol.
99 * ASCII 0 will never been returned, there is no need for another indicator.
101 static int delayed_symbol=0;
104 * inhibit_delayed_abuttal can be set only if delayed_symbol has been set,
105 * but not always. An opening parenthesis must have been detected, too.
106 * It inhibits the use of a CONCATENATE (abuttal) operator after the staring
107 * parenthesis. Examples:
108 * CONCATENATE {var_symbol} <-- needs a CONCATENATE, another {var_symbol}
110 * CONCATENATE {var_symbol} "(" <-- needs a CONCATENATE, another {var_symbol}
111 * is an expression's start.
113 static int inhibit_delayed_abuttal=0;
116 static int kill_this_space=0, kill_next_space=1 ;
117 static int extnextline = -1, extnextstart; /* for a delayed line increment */
118 static int linenr=1 ;
119 static int contline = 0;
121 static int singlequote=0, doblequote=0 ;
122 static int firstln=0 ;
123 static int bufptr=0 ;
124 /* Previous bug. 8-bits clean combined with EOF ==> need an int */
125 static int chbuffer[LOOKAHEAD] ;
126 static int ipretflag=0, cch=0 ;
127 static const char *interptr=NULL ;
128 static const char *last_interptr=NULL ;
129 static const char *interptrmax ;
130 static int cchmax = 0 ;
132 static YY_CHAR_TYPE *rmspc( YY_CHAR_TYPE *instr ) ;
133 static void set_assignment( void );
134 static int process_number_or_const( const char *text, int len );
135 static int process_hex_or_bin_string( char *text, int len, int base );
136 static void compress_string( char *dest, const char *src );
137 static int fill_buffer( char *buf, int max_size ) ;
138 #define SET_NEXTSTART() (nextstart += yyleng)
140 #define YY_FATAL_ERROR(s) exiterror( ERR_PROG_UNREADABLE, 1, s )
144 %start comm signal sgtype procd parse then with
145 %start numeric do1 other value1 ifcont signame nmform
159 * ANSI 6.2.2 definitions (partially with extensions)
165 general [_!?A-Za-z]|{extra}
169 * sigh, we need the reversed classes. flex isn't smart
170 * enough to provide it.
171 * Added "0-9" to notGeneral... to fix bug 724390
173 notGeneralDot [^_!?A-Za-z0-9#$@.]
174 notGeneralParen [^_!?A-Za-z0-9#$@.(]
177 blank [\f\v\r]|{space}
181 * A comment is converted to a sequence of ` signs in the
185 between ({blank}|{comment})*
187 var_symbol_c {dot}|{digit}|{general}
188 var_symbol {general}{var_symbol_c}*
190 * A number is a little bit different to detect signs in the
193 const_symbol ({dot}|{digit}){var_symbol_c}*
194 reserved_symbol {dot}[a-zA-Z]+
195 symbol {var_symbol}|{const_symbol}
197 mantissa ({dot}?{digit}+)|({digit}+{dot}{digit}*)
198 exponent [eE][-+]?{digit}+
200 * read ANSI 6.2.1.1 carefully. We have to check the chars
201 * after the exponent for notGeneralDot below.
203 number {mantissa}{exponent}?
205 hex_digit {digit}|[a-fA-F]
207 hex_digit_tuple {blank}*{hex_digit}{hex_digit}
208 bin_digit_tuple {blank}*{bin_digit}{bin_digit}{bin_digit}{bin_digit}
210 hex_string_start {hex_digit}{hex_digit}?
211 hex_string ({hex_string_start}{hex_digit_tuple}*)?
212 bin_string_start {bin_digit}({bin_digit}({bin_digit}{bin_digit}?)?)?
213 bin_string ({bin_string_start}{bin_digit_tuple}*)?
215 text_string '([^']|'')*'|\"([^"]|\"\")*\"
216 mintext_string '([^']|'')+'|\"([^"]|\"\")+\"
221 * The list of keywords.
223 key_address [aA][dD][dD][rR][eE][sS][sS]
226 key_call [cC][aA][lL][lL]
227 key_caseless [cC][aA][sS][eE][lL][eE][sS][sS]
228 key_digits [dD][iI][gG][iI][tT][sS]
230 key_dotline {dot}[lL][iI][nN][eE]
231 key_drop [dD][rR][oO][pP]
232 key_else [eE][lL][sS][eE]
234 key_engineering [eE][nN][gG][iI][nN][eE][eE][rR][iI][nN][gG]
235 key_error [eE][rR][rR][oO][rR]
236 key_exit [eE][xX][iI][tT]
237 key_expose [eE][xX][pP][oO][sS][eE]
238 key_external [eE][xX][tT][eE][rR][nN][aA][lL]
239 key_failure [fF][aA][iI][lL][uU][rR][eE]
241 key_forever [fF][oO][rR][eE][vV][eE][rR]
242 key_form [fF][oO][rR][mM]
243 key_fuzz [fF][uU][zZ][zZ]
244 key_halt [hH][aA][lL][tT]
246 key_interpret [iI][nN][tT][eE][rR][pP][rR][eE][tT]
247 key_iterate [iI][tT][eE][rR][aA][tT][eE]
248 key_leave [lL][eE][aA][vV][eE]
249 key_linein [lL][iI][nN][eE][iI][nN]
250 key_lostdigits [lL][oO][sS][tT][dD][iI][gG][iI][tT][sS]
251 key_lower [lL][oO][wW][eE][rR]
252 key_name [nN][aA][mM][eE]
254 key_notready [nN][oO][tT][rR][eE][aA][dD][yY]
255 key_novalue [nN][oO][vV][aA][lL][uU][eE]
256 key_numeric [nN][uU][mM][eE][rR][iI][cC]
259 key_options [oO][pP][tT][iI][oO][nN][sS]
260 key_otherwise [oO][tT][hH][eE][rR][wW][iI][sS][eE]
261 key_parse [pP][aA][rR][sS][eE]
262 key_procedure [pP][rR][oO][cC][eE][dD][uU][rR][eE]
263 key_pull [pP][uU][lL][lL]
264 key_push [pP][uU][sS][hH]
265 key_queue [qQ][uU][eE][uU][eE]
266 key_return [rR][eE][tT][uU][rR][nN]
268 key_scientific [sS][cC][iI][eE][nN][tT][iI][fF][iI][cC]
269 key_select [sS][eE][lL][eE][cC][tT]
270 key_signal [sS][iI][gG][nN][aA][lL]
271 key_source [sS][oO][uU][rR][cC][eE]
272 key_syntax [sS][yY][nN][tT][aA][xX]
273 key_then [tT][hH][eE][nN]
275 key_trace [tT][rR][aA][cC][eE]
276 key_until [uU][nN][tT][iI][lL]
277 key_upper [uU][pP][pP][eE][rR]
278 key_value [vV][aA][lL][uU][eE]
280 key_version [vV][eE][rR][sS][iI][oO][nN]
281 key_when [wW][hH][eE][nN]
282 key_while [wW][hH][iI][lL][eE]
283 key_with [wW][iI][tT][hH]
289 if ( delayed_symbol )
292 retval = delayed_symbol;
295 * Pass the meaning of inhibit_delayed_abuttal to the normal algorithm
296 * for automatic abuttal detection.
298 expression_ended = !inhibit_delayed_abuttal;
299 inhibit_delayed_abuttal = 0;
311 last_in_address = in_address ; /* just for the "Environment" */
312 /* there can't be an intermediate SPACE between ADDRESS and the next word*/
313 if ( in_address == in_address_keyword )
314 in_address = in_address_main ;
316 kill_this_space = kill_next_space ;
317 kill_next_space = 0 ;
323 seek_with = no_seek_with ;
325 itflag = (in_trace) ;
327 if (extnextline != -1)
329 parser_data.tstart = nextstart = extnextstart;
330 parser_data.tline = nextline = extnextline;
335 parser_data.tstart = nextstart ;
336 parser_data.tline = nextline ;
338 insert_abuttal = expression_ended ;
339 expression_ended = 0 ;
345 <ifcont>{between}{EOL}{between} {
347 * Fixes bug 579597, "[;\r?\n]" instead of "{EOL}" causes the bug.
350 if ((ptr = strchr(yytext, '\n')) != NULL)
352 nextstart = yyleng - (int) (ptr - (char *) yytext) ;
354 if (extnextline != -1)
357 extnextstart = nextstart; /* fixes bug 938204 */
364 {between}{EOL}{between} {
369 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
370 exiterror( ERR_INVALID_TEMPLATE, 1, yytext ) ;
372 obs_with = in_do = 0 ;
373 in_signal = in_call = 0 ;
374 in_address = not_in_address;
376 if ( seek_with == seek_with_from_address )
377 seek_with = no_seek_with ;
378 if ((ptr = strchr(yytext, '\n')) != NULL)
380 nextstart = yyleng - (int) (ptr - (char *) yytext) ;
382 if (extnextline != -1)
385 extnextstart = nextstart; /* fixes bug 938204 */
392 {key_dotline}/{notGeneralDot} {
396 /* support C-like __LINE__ value */
397 /* determine current source line, and create a numeric literal */
399 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
401 sprintf (work, "%d", parser_data.tline);
403 /* copy back work, right-to-left */
405 for (i = strlen (work) - 1; i >= 0; i--)
411 <comm>{key_address}{between} {
412 BEGIN value1 ; /* Allow a following VALUE keyword */
413 seek_with = seek_with_from_address ;
414 in_address = in_address_keyword ;
415 in_call = 1 ; /* Allow the next words to be given as in CALL. */
419 <comm>{key_arg}{between} {
425 <comm>{key_call}{between} {
431 <comm>{key_do}{between} {
433 assert( do_level >=0 ) ;
439 <comm>{key_drop}{between} {
445 <comm>{key_else}{between} {
450 <comm>{key_exit}{between} {
455 <comm>{key_if}{between} {
458 parser_data.if_linenr = linenr - 1;
462 <comm>{key_interpret}{between} {
467 <comm>{key_iterate}{between} {
472 <comm>{key_leave}{between} {
477 <comm>{key_options}{between} {
482 <comm>{key_nop}{between} {
487 <comm>{key_numeric}{between} {
492 <comm>{key_parse}{between} {
498 <comm>{key_procedure}{between} {
503 <comm>{key_pull}{between} {
509 <comm>{key_push}{between} {
514 <comm>{key_queue}{between} {
519 <comm>{key_return}{between} {
524 <comm>{key_say}{between} {
529 <comm>{key_select}{between} {
531 assert( do_level >= 0 ) ;
536 <comm>{key_signal}{between} {
542 <comm>{key_trace}{between} {
548 <comm>{key_upper}{between} {
554 <comm>{key_when}{between} {
557 parser_data.when_linenr = linenr - 1;
561 <comm>{key_otherwise}{between} {
566 <comm>{key_end}{between} {
568 assert( do_level >= 0 ) ;
571 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
572 exiterror( ERR_UNMATCHED_END, 1 ) ;
599 <comm>{const_symbol}{between}={between} {
600 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
601 if ( yytext[0] == '.' )
605 * FIXME, FGC: ANSI 6.2.3.1 forces an error 50.1 if yytext[0]=='.'
606 * ANSI 6.3.2.8 forces this error 31.3 which is wrong
610 if ( !KNOWN_RESERVED( retvalue, retlength ) )
612 yytext[retlength] = '\0';
613 exiterror( ERR_RESERVED_SYMBOL, 1, yytext );
617 yytext[retlength] = '\0';
618 exiterror( ERR_INVALID_START, 3, yytext );
623 yytext[retlength] = '\0';
624 exiterror( ERR_INVALID_START, 2, yytext );
626 /* known reserved variable */
628 return ASSIGNMENTVARIABLE ; }
630 <comm>{var_symbol}{between}={between} {
636 return ASSIGNMENTVARIABLE ; }
638 <nmform,signal,value1>{between}{key_value}{between} {
641 if ( ( last_in_address == in_address_keyword )
642 && ( in_address == in_address_main ) )
644 BEGIN other ; /* the next useful expression will set it to "other"
645 * in either case. The BEGIN-states aren't very
646 * handy in most cases; they are not flexible enough.
648 in_address = in_address_value ;
656 if ((!in_trace)&&(!in_address)&&(!in_signal)&&(!in_call)&&(!in_numform))
658 in_trace = in_signal = in_call = 0 ;
662 <signal>{key_on}{between} {
667 <signal>{key_off}{between} {
672 <signame>{key_name}{between} {
677 <sgtype>{key_error}{between} {
682 <sgtype>{key_halt}{between} {
687 <sgtype>{key_novalue}{between} {
692 <sgtype>{key_notready}{between} {
697 <sgtype>{key_failure}{between} {
702 <sgtype>{key_syntax}{between} {
707 <sgtype>{key_lostdigits}{between} {
710 return LOSTDIGITS ; }
712 <value1>{between}{trace_char}+{between} {
713 if (!in_trace) REJECT ;
714 strcpy(retvalue,rmspc( yytext )) ;
718 <procd>{key_expose}{between} {
724 <parse>{key_upper}{between} {
728 <parse>{key_lower}{between} {
732 <parse>{key_caseless}{between} {
736 <parse>{key_arg}{between} {
741 <parse>{key_numeric}{between} {
746 <parse>{key_pull}{between} {
751 <parse>{key_source}{between} {
756 <parse>{key_external}{between} {
761 <parse>{key_linein}{between} {
766 <parse>{key_version}{between} {
771 <parse>{key_var}{between} {
773 in_parse = 2; /* accept a variable and treat func(a) as func (a) */
777 <parse>{key_value}{between} {
778 seek_with = seek_with_from_parse ;
781 BEGIN with ; /* in fact this works as a "not comm" */
785 <comm>{between}{key_then}{between} {
790 <other,ifcont>{between}{key_then}{between} {
791 if (in_then!=1) REJECT ;
797 {between}{key_with}{between} {
801 if ((in_do)||(!seek_with))
804 if ( seek_with == seek_with_from_parse )
806 seek_with = no_seek_with ;
807 if (in_address) /* any address state */
808 in_address = in_address_with ; /* WITH seen */
813 <numeric>{key_digits}{between} {
818 <numeric>{key_form}{between} {
824 <nmform>{key_scientific}{between} {
826 return SCIENTIFIC ; }
828 <nmform>{key_engineering}{between} {
830 return ENGINEERING ; }
832 <numeric>{key_fuzz}{between} {
837 <do1>{key_forever}{between} {
851 else if ( in_do == 1 )
853 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
854 exiterror( ERR_INVALID_DO_SYNTAX, 1, "TO" ) ;
865 else if ( in_do == 1 )
867 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
868 exiterror( ERR_INVALID_DO_SYNTAX, 1, "BY" ) ;
879 else if ( in_do == 1 )
881 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
882 exiterror( ERR_INVALID_DO_SYNTAX, 1, "FOR" ) ;
886 {key_while}{between} {
891 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
892 exiterror( ERR_INVALID_DO_SYNTAX, 1, "WHILE" ) ;
901 {key_until}{between} {
905 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
906 exiterror( ERR_INVALID_DO_SYNTAX, 1, "UNTIL" ) ;
916 <do1>{reserved_symbol}{between}/= {
920 if ( !KNOWN_RESERVED( retvalue, retlength ) )
921 exiterror( ERR_RESERVED_SYMBOL, 1, yytext ) ;
923 return DOVARIABLE ; }
925 <do1>{var_symbol}{between}/= {
928 strcpy(retvalue,rmspc( yytext )) ;
930 return DOVARIABLE ; }
932 <comm>{symbol}{between}:{between} {
936 for( i = 0; ( ch = yytext[i] ) != '\0'; i++ )
939 * A blank or ':' can't occur in the normal text. They are terminators.
943 || ( MY_ISBLANK( ch ) ) )
945 retvalue[i] = (char) rx_toupper( ch );
952 <comm>{mintext_string}{between}:{between} {
955 compress_string( retvalue, yytext );
961 {mintext_string}{comment}*\( {
964 compress_string( retvalue, yytext );
966 kill_next_space = 1 ;
967 if ( insert_abuttal ) {
968 inhibit_delayed_abuttal = 1 ;
969 delayed_symbol = EXFUNCNAME ;
971 return CONCATENATE ; }
973 expression_ended = 0 ;
975 return EXFUNCNAME ; }
979 ('{hex_string}'|\"{hex_string}\")[xX]/{notGeneralDot} {
983 return process_hex_or_bin_string( yytext, yyleng, 16 ); }
987 ('{bin_string}'|\"{bin_string}\")[bB]/{notGeneralDot} {
988 return process_hex_or_bin_string( yytext, yyleng, 2 ); }
990 {text_string}[xXbB]/{notGeneralParen} {
992 int i,base,charcount,first,tuple;
994 base = ( rx_toupper( yytext[yyleng - 1] ) == 'X' ) ? 16 : 2;
995 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
997 * We are sure to have an invalid string since the above patterns won't
998 * match. We can either have invalid characters or misplaced spaces.
1000 * Blanks as the first characters are forbidden.
1002 if ( MY_ISBLANK( yytext[1] ) )
1003 exiterror( ERR_INVALID_HEX_CONST, ( base == 16 ) ? 1 : 2, 1 );
1005 tuple = ( base == 16 ) ? 2 : 4;
1006 for ( i = 1, first = 1, charcount = 0; i < yyleng - 2; i++ )
1009 if ( MY_ISBLANK( c ) )
1012 * The first tuple may have less than tuple chars
1014 if ( ( ( charcount % tuple ) == 0 ) || first )
1020 exiterror( ERR_INVALID_HEX_CONST, ( base == 16 ) ? 1 : 2, i );
1025 if ( ( c == '0' ) || ( c == '1' ) )
1030 if ( rx_isxdigit( c ) )
1033 exiterror( ERR_INVALID_HEX_CONST, ( base == 16 ) ? 3 : 4, c );
1036 * We didn't match something like "1 12 34 "X Assume this as the error.
1037 * Look back to the first blank in the last sequence.
1039 for ( i = yyleng - 2; i > 1; i-- )
1040 if ( !MY_ISBLANK( yytext[i - 1] ) )
1042 exiterror( ERR_INVALID_HEX_CONST, ( base == 16 ) ? 1 : 2, i ); }
1047 compress_string( retvalue, yytext );
1049 strcpy( yytext, retvalue ); /* proper error display */
1053 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1054 exiterror( ERR_INV_SUBKEYWORD, 11, "ENGINEERING SCIENTIFIC", retvalue ) ;
1058 if ( insert_abuttal && !in_parse && !in_signal && !in_call ) {
1059 delayed_symbol = STRING ;
1061 return CONCATENATE ; }
1066 kill_next_space = 1 ;
1069 expression_ended = 1 ;
1078 strcpy(retvalue,yytext) ;
1082 {digit}+/{notGeneralDot} {
1084 * This is the same as of the "{digit}+" rule above. flex is very stupid.
1085 * (Or is it a feature?)
1086 * The number below will take precedence instead of a plain "{digit}+",
1087 * even of the fact that the above rule may match the same length of
1088 * characters. flex seems to count the next "expect characters" for the
1089 * comparison which rule shall be used.
1093 strcpy(retvalue,yytext) ;
1097 {number}/{notGeneralDot} {
1099 * must split two rule parts because of the "/" rule-part.
1100 * This fixes bug 602283.
1102 return process_number_or_const( yytext, yyleng );
1106 return process_number_or_const( yytext, yyleng );
1112 * this might be a symbol in front of a function, but only if next
1113 * char in input stream is "(".
1116 if (in_trace) REJECT ;
1118 memcpy( retvalue, yytext, yyleng + 1 ); /* include terminating '\0' */
1119 mem_upper( retvalue, yyleng );
1123 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1124 exiterror( ERR_INV_SUBKEYWORD, 11, "ENGINEERING SCIENTIFIC", retvalue ) ;
1127 if ( ( last_in_address == in_address_keyword )
1128 && ( in_address == in_address_main ) )
1130 kill_next_space = 1 ;
1141 * This has precedence over checking the parenthesis below.
1145 kill_next_space = 1;
1150 if ( in_parse == 2 )
1153 * This has precedence over checking the parenthesis below.
1154 * Fixes bug 1109335.
1157 kill_next_space = 1;
1162 /* We must check if a '(' follows. Remember the number of eaten chars. */
1165 for (; ( i = yyinput() ) == '`';)
1167 for (; ( i = input() ) == '`';)
1175 /* input() has destroyed the yytext-terminator re-set it */
1176 yytext[yyleng] = '\0';
1182 kill_next_space = 1 ;
1183 if ( insert_abuttal )
1185 inhibit_delayed_abuttal = 1 ;
1186 delayed_symbol = INFUNCNAME ;
1187 return CONCATENATE ;
1189 expression_ended = 0 ;
1193 if ( insert_abuttal && !in_parse ) {
1194 delayed_symbol = SIMSYMBOL ;
1195 return CONCATENATE ; }
1197 expression_ended = 1 ;
1199 if ( in_address == in_address_with )
1200 kill_next_space = 1 ;
1201 if (SymbolDetect) /* allow a fast breakout */
1203 /* We define a tricky preprocessor directive. This will give us
1204 * maximum performance without the loss of control or errors produced
1207 #define RET_IF(s) if ((SymbolDetect & SD_##s) && \
1208 (yyleng == sizeof(#s) - 1) && \
1209 (strncmp(retvalue, \
1211 sizeof(#s) - 1) == 0)) \
1213 /* e.g. RET_IF(INPUT); is replaced by:
1214 * if ((SymbolDetect & SD_INPUT) &&
1215 * (yyleng == sizeof("INPUT") - 1) &&
1216 * (strncmp(retvalue,
1218 * sizeof("INPUT") - 1) == 0))
1233 return SIMSYMBOL ; }
1238 * this is mainly the content of var_symbol but we do a REJECT for all but
1239 * normal variable names.
1242 if ( in_trace || in_numform || in_call )
1245 if ( ( last_in_address == in_address_keyword )
1246 && ( in_address == in_address_main ) )
1250 if ( !KNOWN_RESERVED( retvalue, retlength ) )
1253 /* We must check if a '(' follows. Remember the number of eaten chars. */
1255 for ( j = 1; ( i = yyinput() ) == '`'; )
1257 for ( j = 1; ( i = input() ) == '`'; )
1265 /* input() has destroyed the yytext-terminator re-set it */
1266 yytext[yyleng] = '\0';
1271 REJECT; /* function name can't start with '.' */
1274 if ( insert_abuttal && !in_parse )
1276 delayed_symbol = SIMSYMBOL;
1280 expression_ended = 1;
1285 expression_ended = 1 ;
1291 if ( insert_abuttal )
1293 inhibit_delayed_abuttal = 1 ;
1294 delayed_symbol = '(' ;
1296 return CONCATENATE ;
1301 {between}\,{between} {
1305 {between}\-{between} {
1310 {between}\+{between} {
1315 {between}\/{between} {
1319 {between}%{between} {
1323 {between}\*{between} {
1327 {between}\|{between} {
1331 {between}&{between} {
1335 {between}={between} {
1340 /* why don't I have a {between} in the beginning of this re? bug? */
1345 {between}\>{blank}*\>{between} {
1349 {between}\<{blank}*\<{between} {
1353 {between}{not}{blank}*\>{blank}*\>{between} {
1357 {between}{not}{blank}*\<{blank}*\<{between} {
1361 {between}\>{blank}*\>{blank}*={between} {
1365 {between}\<{blank}*\<{blank}*={between} {
1369 {between}(\>|{not}{blank}*(\<{blank}*=|={blank}*\<)){between} {
1373 {between}({not}{blank}*\<|={blank}*\>|\>{blank}*=){between} {
1377 {between}(\<|{not}{blank}*(\>{blank}*=|={blank}*\>)){between} {
1381 {between}({not}{blank}*\>|={blank}*\<|\<{blank}*=){between} {
1385 {between}({not}{blank}*=|\<{blank}*\>|\>{blank}*\<){between} {
1387 return DIFFERENT ; }
1389 {between}={blank}*={between} {
1391 return EQUALEQUAL ; }
1393 {between}{not}{blank}*={blank}*={between} {
1395 return NOTEQUALEQUAL ; }
1397 {between}\/{blank}*\/{between} {
1401 {between}&{blank}*&{between} {
1405 {between}\|{blank}*\|{between} {
1407 return CONCATENATE ; }
1409 {between}\*{blank}*\*{between} {
1413 {between}{blank}{between} {
1414 if ( in_address == in_address_value ) /* Always allow spaces in the VALUE */
1415 { /* part of the ADDRESS stmt. */
1419 if (kill_this_space)
1425 return (((in_parse)&&(!seek_with)) ? yylex() : SPACE) ; }
1429 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1430 exiterror( ERR_UNMATCHED_QUOTE, 0 ) ; }
1435 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1436 exiterror( ERR_SYMBOL_EXPECTED, 1, yytext ) ;}
1440 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1441 exiterror( ERR_INVALID_CHAR, 1, yytext[0], yytext[0] ); }
1446 #define NORMALSTAT 0
1447 #define COMMENTSTAT 1
1448 #define SINGLEQUOTE 2
1449 #define DOUBLEQUOTE 3
1452 * set_assignment sets an assignment variable in retvalue from yytext with
1453 * a trailing {between}[={between}] stripped.
1454 * retlength is set, too.
1456 static void set_assignment( void )
1460 for( retlength = 0; ( ch = yytext[retlength] ) != 0; retlength++ )
1463 * A blank or '=' can't occur in the normal text. They are terminators.
1467 || ( MY_ISBLANK( ch ) ) )
1469 retvalue[retlength] = (char) rx_toupper( ch );
1471 retvalue[retlength] = '\0' ;
1474 /* rmspc uppercases all characters and removes blanks from a string.
1475 * Returns the input string.
1477 static YY_CHAR_TYPE *rmspc( YY_CHAR_TYPE *instr )
1479 YY_CHAR_TYPE *retval=instr ,
1483 while ((c = *instr++) != '\0')
1485 c = (YY_CHAR_TYPE) rx_toupper(c);
1486 /* These characters are treated as blanks: */
1487 if ((c!='`') && (c!=' ') && (c!=',') && (c!='\t') && (c!='\n'))
1496 * process_number_or_const does the processing of a decimal number or a const.
1497 * The detection was to complicated to put it into one routine.
1498 * We have to read some more characters to decide whether or not we have
1500 * Returns a lexical code.
1502 static int process_number_or_const( const char *text, int len )
1507 * Copy and uppercase the return value.
1509 memcpy( retvalue, text, len + 1 ); /* include terminating '\0' */
1510 mem_upper( retvalue, len );
1514 parser_data.tline = linenr - 1; /* set tline for exiterror */
1515 exiterror( ERR_INV_SUBKEYWORD, 11, "ENGINEERING SCIENTIFIC", retvalue );
1522 * Plain number or const_symbol.
1524 if ( KNOWN_RESERVED( retvalue, len ) )
1532 * This has precedence over checking the parenthesis below.
1536 kill_next_space = 1;
1542 * We must check if a '(' follows. Remember the number of eaten chars.
1546 while ( ( c = yyinput() ) == '`' )
1548 while ( ( c = input() ) == '`' )
1559 * input() has destroyed the yytext-terminator re-set it
1561 yytext[yyleng] = '\0';
1565 kill_next_space = 1;
1566 if ( insert_abuttal )
1568 inhibit_delayed_abuttal = 1;
1569 delayed_symbol = INFUNCNAME;
1572 expression_ended = 0;
1576 if ( insert_abuttal && !in_parse )
1578 delayed_symbol = symbol;
1582 expression_ended = 1;
1587 * compress_string removes quotes or double quotes from the src and changes
1588 * double delimiter to one. The delimiter should be either ' or ".
1590 static void compress_string( char *dest, const char *src )
1592 char c, delim = *src++;
1596 if ( ( c = *src++ ) == delim )
1611 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1612 exiterror( ERR_UNMATCHED_QUOTE, 0 ) ;
1621 * process_hex_or_bin_string convert the string in text which is given as
1622 * a hexstring or a binstring to a "normal" string. base must be either 2 or
1624 * We rely on having a well-formed string. This must be ensured. It is
1625 * something of the form "content"x or 'content'b or similar.
1626 * Returns a lexical code. retvalue and retlength will be filled.
1628 static int process_hex_or_bin_string( char *text, int len, int base )
1632 unsigned char *dest,sum;
1641 * First, count the number of valid chars to determine the missed leading
1642 * zeros of the first final character.
1644 for ( i = 0, left = 0 ; i < len; i++ )
1647 if ( !MY_ISBLANK( c ) )
1652 * left shall be the count of input char left to process one dest char.
1653 * Accessing one character after the content's end is allowed.
1655 left %= ( base == 2 ) ? 8 : 2;
1657 dest = (unsigned char *) retvalue;
1664 while ( len && MY_ISBLANK( c ) )
1677 sum |= (unsigned char) ( c - '0' );
1691 dest[retlength++] = sum;
1695 dest[retlength] = '\0';
1697 /* We must check if a '(' follows. Remember the number of eaten chars. */
1700 for (; ( i = yyinput() ) == '`';)
1702 for (; ( i = input() ) == '`';)
1710 /* input() has destroyed the yytext-terminator re-set it */
1717 kill_next_space = 1;
1720 inhibit_delayed_abuttal = 1;
1721 delayed_symbol = INFUNCNAME;
1724 expression_ended = 0;
1728 if ( insert_abuttal && !in_parse && !in_call )
1730 delayed_symbol = STRING;
1737 kill_next_space = 1;
1740 expression_ended = 1;
1742 return ( base == 2 ) ? BINSTRING : HEXSTRING;
1745 /* get_next_line: Lower level input fetcher.
1746 * Reads exactly one line from the input stream (file or string).
1747 * All EOL characters are removed and the result is stored in
1748 * last_source_line. A check for line overflow occurred here.
1749 * A special check is done for CP/M ^Z (DOS and friends use this for
1750 * "backward" compatibility, too).
1751 * line is filled with valid values on success.
1752 * max must reflect size of line and should be at least BUFFERSIZE + 2;
1753 * Returns -1 (no input) or the number of valid chars in line.
1755 static int get_next_line( char *line, int max, FILE *stream )
1757 lineboxptr newline ;
1758 offsrcline *incore_newline;
1762 if (inEOF) /* You can't use myunputc if EOF is reached! */
1765 while (pos <= max - 2)
1769 c = chbuffer[--bufptr] ;
1772 if (interptr>=interptrmax)
1775 #ifdef ASCII_0_TERMINATES_STRING
1776 if ((c = *interptr++) == '\0')
1786 if ( parser_data.TSD->HaltRaised )
1787 halt_raised( parser_data.TSD );
1790 if ((c=='\r') || (c=='\n') || (c==EOF))
1792 line[pos++] = (char) (unsigned char) c ;
1795 /* first, check for overflow */
1796 if ((c!='\r') && (c!='\n') && (c!=EOF))
1798 parser_data.tline = linenr ; /* set tline for exiterror */
1799 exiterror( ERR_TOO_LONG_LINE, 0 ) ;
1802 /* We have either a (first) line terminator or EOF */
1805 if ((pos==1) && (line[0]=='\x1A')) /* CP/M ^Z EOF? */
1812 chbuffer[bufptr++] = EOF; /* push back EOF for reuse */
1816 /* Maybe we have CRLF or LFCR. Check for the pair character. */
1817 char pairChar = (c == '\r') ? '\n' : '\r';
1820 /* get one more char and consume it if it is the pair of the EOL */
1823 if (chbuffer[bufptr - 1] == (int) pairChar)
1828 if ((interptr < interptrmax) && (*interptr == pairChar))
1833 int next = getc(stream);
1834 if (next != pairChar)
1836 /* ungetc may break some runtime stuff. Use the internal lookahead*/
1837 chbuffer[bufptr++] = next;
1839 if ( parser_data.TSD->HaltRaised )
1840 halt_raised( parser_data.TSD );
1845 cch = 0 ; /* not needed ? */
1848 if (parser_data.incore_source)
1851 * We can use the incore string to describe a source line, but we
1852 * MUST incement linenr otherwise .LINE doesn't work in instore macros.
1853 * This will probably also allow errors to be reported for the correct line number.
1856 incore_newline = FreshLine() ;
1857 incore_newline->length = pos - 1 ;
1858 incore_newline->offset = last_interptr - parser_data.incore_source ;
1859 last_interptr = interptr;
1863 newline = (lineboxptr)Malloc(sizeof(linebox)) ;
1864 newline->line = Str_make_TSD( parser_data.TSD, pos - 1 ) ;
1865 newline->line->len = pos - 1 ;
1866 memcpy(newline->line->value, line, pos - 1 ) ;
1867 newline->prev = parser_data.last_source_line ;
1868 newline->next = NULL ;
1869 newline->lineno = linenr++ ;
1871 if (parser_data.first_source_line==NULL)
1872 parser_data.first_source_line = newline ;
1874 parser_data.last_source_line->next = newline ;
1875 parser_data.last_source_line = newline ;
1880 /* fill_buffer: Higher level input fetcher.
1881 * (To allow the C-file to compile, all Rexx comments in this comment
1882 * are written as "{*" "*}" instead of the normal, C-like manner.)
1883 * Reads lines from the input stream (yyin or string) with get_next_line.
1884 * Only one line is returned to allow the saving of the line number.
1885 * This routine replaces all comments by '`' signs. This allows
1886 * the detection of a "pseudo" blank: The fragment "say x{* *}y" uses two
1887 * variables, not one called "xy". The parsing of comments must be done
1888 * here to check for the actual numbers of open and closes ("{*" and "*}").
1889 * While doing this we must always check for strings since "'{*'" is not part
1891 * Here is a problem: Is this a nested valid comment: "{* '{*' *} *}"?
1892 * I think so although you cannot remove the outer comment signs without an
1893 * error. Everything within a comment is a comment (per def.). Counting
1894 * opens and closes of comment signs is an ugly trick to help the user.
1895 * He/she must know what he/she is doing if nesting comments!
1897 * max_size gives the maximum size of buf. This is filled up with input.
1898 * We never return less than one character until EOF is reached. Thus, we
1899 * read more than one true input line if a comment spans over more than one
1901 * A line will either be terminated by a single '\n' or by a blank. The
1902 * later one replaces a line continuation (',' [spaces] EOL).
1903 * Errors in this low
1905 * Conclusion: We have to fight very hard to set the expected line number.
1906 * * Comments spanning over lines set them on getting the
1908 * * Concatenated lines set
1910 static int fill_buffer( char *buf, int max_size )
1912 /* statics protected by regina_parser */
1913 static char line[BUFFERSIZE+2] ; /* special buffer to allow max_size */
1914 static int pos = 0, max = 0 ; /* being smaller than BUFFERSIZE+1 */
1915 static int nesting = 0; /* nesting level of comments */
1916 int nesting_start_line = 0; /* start line of comment for errortext() */
1918 int i, squote, dquote;
1927 max = get_next_line( line, sizeof(line), yyin ) ;
1928 if (max < 0) /* empty input file */
1930 /* test for both #! - fixes bug 1230639 */
1934 { /* Ignore first line beginning this way for unix compat */
1936 memcpy( line, "/**/\n", 5 );
1939 else if (pos < max) /* Are there still characters to transmit? */
1941 /* Buffer already checked for correctness */
1942 if (max_size > max - pos)
1943 max_size = max - pos;
1944 memcpy(buf, line + pos, max_size);
1948 else /* Need next line */
1950 if (contline && !nesting)
1952 extnextline = ++nextline ;
1957 max = get_next_line( line, sizeof(line), yyin ) ;
1958 if (max < 0) /* empty input file */
1962 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1963 exiterror( ERR_UNMATCHED_QUOTE, 1 ) ;
1969 /* A new line is available, check first for an ending comment */
1970 dest = line; /* we change comments in line */
1971 if (nesting) /* This may lead to more line reading */
1974 * The first time extnextline is non-zero, we have the comment
1975 * starting sequence line. This is saved for use if no matching
1976 * ending comment sequence is found, so that the error message
1977 * reflects the start of the comment.
1978 * Regina feature request: #508788
1980 if ( extnextline < 0 )
1981 nesting_start_line = nextline+1;
1982 extnextline = ++nextline ;
1983 extnextstart = 1; /* See Reference (*) below */
1988 if (c == '*') /* start of comment end? */
1990 if (line[pos+1] == '/')
1991 { /* pos+1 always exists, at least '\n' or '\0' */
2003 else if (c == '/') /* start of new begin? */
2005 if (line[pos+1] == '*')
2018 max = get_next_line( line, sizeof(line), yyin ) ;
2019 if (max < 0) /* empty input file */
2021 if ( nesting_start_line )
2022 parser_data.tline = nesting_start_line ; /* set tline for exiterror */
2024 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
2025 exiterror( ERR_UNMATCHED_QUOTE, 1 ) ;
2028 /* This is a comment continuation. If the lexer will return
2029 * something it already has a valid tline/tstart pair.
2030 * The lexer will return the current token and on the NEXT
2031 * call it expects a valid nextline/nextstart pair.
2033 extnextline = ++nextline; extnextstart = 1;
2034 dest = line; /* we change comments in line */
2035 goto repeated_nesting;
2037 extnextstart = pos + 1;
2039 { /* Exception! Have a look at: "x='y',{*\n\n*}\n'z'". This should
2040 * result in "x = 'y' 'z'".
2041 * We must parse until EOL and check for whitespaces and comments...
2050 if (line[pos+1] == '*')
2054 goto repeated_nesting;
2057 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
2058 exiterror( ERR_YACC_SYNTAX, 1, parser_data.tline ) ; /* standard error */
2062 /* All done, it was a continuation line. */
2063 /* contline will be resetted by: */
2064 return fill_buffer( buf, max_size ) ;
2067 /* We have something to play with. Run through the input and check for
2068 * strings including comments.
2070 squote = dquote = 0;
2074 /* We use selective loops to reduce comparisons */
2079 if (c == '*') /* start of comment end? */
2081 if (line[pos+1] == '/')
2082 { /* pos+1 always exists, at least '\n' or '\0' */
2093 else if (c == '/') /* start of new begin? */
2095 if (line[pos+1] == '*')
2104 } while (pos < max);
2107 while ((c = line[pos]) != '\'')
2112 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
2113 exiterror( ERR_UNMATCHED_QUOTE, 2 ) ;
2122 while ((c = line[pos]) != '\"')
2127 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
2128 exiterror( ERR_UNMATCHED_QUOTE, 3 ) ;
2135 else if (line_comment)
2137 while ((c = line[pos]) >= ' ') /* not at end of line yet */
2142 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
2143 exiterror( ERR_UNMATCHED_QUOTE, 3 ) ;
2146 *dest++ = c; /* line terminator */
2152 switch (c = line[pos])
2167 if (line[pos + 1] == '*')
2182 case '-': /* line "--" comments */
2183 if (line[pos + 1] == '-')
2199 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
2200 exiterror( ERR_INVALID_CHAR, 1, c, c ) ;
2210 max = (int) (dest - line);
2212 /* Now we can replace a ',' [spaces|comments] '\n' with the line
2213 * continuation, but check for nesting first
2216 { /* Don't leave ANY spaces at EOL. That would confuse the lexer. */
2218 while ((i >= 0) && rx_isspace(line[i]))
2221 /* Of course, there is one exception: line continuation */
2222 while ((i >= 0) && (line[i] == '`'))
2224 if ((i >= 0) && (line[i] == ','))
2231 * At this point the lexer can't determine the nextline since we eat up
2232 * the \n. This leads to an incorrect count. But either the '`'-signs
2233 * are ignored or they are follows of a "token", a valid word.
2234 * Look at "say x;say y ``". This will cause the lexer to
2235 * return at least 4 tokens (SAY "x" ";" SAY) before "y" will be
2236 * returned. We can only set nextline/nextstart at "y".
2237 * Result: We set this pair at the start of the next call to
2238 * fill_buffer such that the next call to yylex will set the correct
2244 i = max - 1; /* on last valid char */
2247 if (!MY_ISBLANK(line[i]) && (line[i] != '\n'))
2251 /* i now -1 or on last nonblank */
2252 if ((i >= 0) && (line[i] == ','))
2253 { /* FIXME: What shall be do with "," followed by EOF? */
2262 memcpy(buf, line, max_size);
2268 /* yywrap MAY be called by the lexer is EOF encounters, see (f)lex docu */
2271 assert( do_level>= 0 ) ;
2274 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
2275 exiterror( ERR_INCOMPLETE_STRUCT, 0 ) ;
2280 /******************************************************************************
2281 ******************************************************************************
2282 * global interface ***********************************************************
2283 ******************************************************************************
2284 *****************************************************************************/
2286 /* initalize all local and global values */
2287 static void init_it_all( tsd_t *TSD )
2289 #if defined(FLEX_SCANNER) && defined(FLEX_DEBUG)
2298 expression_ended = 0 ;
2299 insert_abuttal = 0 ;
2303 inhibit_delayed_abuttal = 0 ;
2310 in_address = not_in_address ;
2311 seek_with = no_seek_with ;
2312 kill_this_space = 0 ;
2325 interptrmax = NULL ;
2326 /* non-zero values */
2330 kill_next_space = 1 ;
2334 memset(&parser_data, 0, sizeof(internal_parser_type));
2335 parser_data.TSD = TSD;
2338 /* fetch may only be called by fetch_protected. The parser and lexer are
2339 * already protected by regina_parser by fetch_protected.
2340 * This function prepares the lexer and parser and call them. The
2341 * result and all generated values are stored in result. The parser
2342 * tree isn't executed here.
2343 * Exactly fptr xor str must be non-null.
2345 static void fetch(tsd_t *TSD, FILE *fptr, const streng *str,
2346 internal_parser_type *result)
2348 init_it_all( TSD ) ;
2352 yy_delete_buffer(YY_CURRENT_BUFFER) ;
2363 interptr = str->value ;
2364 last_interptr = interptr ;
2365 interptrmax = interptr + cchmax ;
2366 parser_data.incore_source = str->value; /* fixes bug 972796 */
2367 result->incore_source = str->value;
2372 parser_data.result = __reginaparse();
2375 yy_delete_buffer(YY_CURRENT_BUFFER) ;
2381 *result = parser_data;
2382 /* Some functions assume null values if parsing isn't running: */
2383 memset(&parser_data, 0, sizeof(internal_parser_type));
2386 /* This function serializes the parser/lexer requests of the process and
2387 * call fetch which will make the work. Look there.
2389 static void fetch_protected( tsd_t * volatile TSD, FILE *fptr,
2390 const streng *str, internal_parser_type *result )
2392 volatile int panicked = 0;
2393 tsd_t * volatile saved_TSD;
2395 THREAD_PROTECT( regina_parser )
2396 TSD->in_protected = 1;
2397 memset(&parser_data, 0, sizeof(internal_parser_type));
2399 saved_TSD = TSD; /* vars used until here */
2400 if ( setjmp( TSD->protect_return ) )
2402 TSD = saved_TSD; /* prevents bugs like 592393 */
2406 fetch( TSD, fptr, str, result );
2408 TSD->in_protected = 0;
2409 THREAD_UNPROTECT( regina_parser )
2415 * We got a fatal condition while fetching the input.
2417 memset(result, 0, sizeof(internal_parser_type));
2420 * FIXME: Currently no time to investigate it, but we have to do
2421 * a cleanup of the node and source lines of parser_data
2422 * here. Test this with
2425 * interpret 'nop;"a='
2427 if ( TSD->delayed_error_type == PROTECTED_DelayedInterpreterExit )
2428 jump_interpreter_exit( TSD, TSD->expected_exit_error );
2429 if ( TSD->delayed_error_type == PROTECTED_DelayedRexxSignal )
2430 jump_rexx_signal( TSD );
2431 jump_script_exit( TSD, TSD->systeminfo->result );
2434 /* fetch_file reads in a REXX file from disk (or a pipe). It returns without
2435 * executing the program. The parsed tree with all needed values including
2436 * the result of the parsing is copied to result.
2437 * fptr remains open after this call.
2438 * type is either PARSE_ONLY or PARSE_AND_TIN. In the later case a tinned variant of the
2439 * parsing tree is created, too.
2441 void fetch_file(tsd_t *TSD, FILE *fptr, internal_parser_type *result)
2443 fetch_protected(TSD, fptr, NULL, result);
2446 /* fetch_string reads in a REXX macro from a streng. It returns without
2447 * executing the program. The parsed tree with all needed values including
2448 * the result of the parsing is copied to result.
2449 * type is either PARSE_ONLY or PARSE_AND_TIN. In the later case a tinned variant of the
2450 * parsing tree is created, too.
2451 * The function is typically called by an "INTERPRET" instruction.
2453 void fetch_string(tsd_t *TSD, const streng *str, internal_parser_type *result)
2455 fetch_protected(TSD, NULL, str, result);