3 static char *RCSid = "$Id$";
7 * The Regina Rexx Interpreter
8 * Copyright (C) 1992-1994 Anders Christensen <anders@pvv.unit.no>
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Library General Public License for more details.
20 * You should have received a copy of the GNU Library General Public
21 * License along with this library; if not, write to the Free
22 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
31 /* Define ASCII_0_TERMINATES_STRING if you want that ASCII-0 terminates
32 * an input string. Normally this should not happen. Input strings are
33 * terminated by a length encoding. The string {"", length=1} is invalid for
34 * the lexer (ASCII-0 is not allowed) while {"", length=0} is allowed (this
36 * ASCII_0_TERMINATES_STRING is only(!) for backward compatibility and
37 * shouldn't be used under normal circumstances.
40 #define ASCII_0_TERMINATES_STRING
45 #define YYLMAX BUFFERSIZE
49 #define YY_CHAR YY_CHAR_TYPE
51 #define YY_INPUT(buf,result,max_size) result=fill_buffer(buf,max_size)
54 /* NOTE: Every comment is replaced by a '`' character in the lower input
55 * routines. These should check for such (illegal) characters.
57 #define MY_ISBLANK(c) (((c)==' ')||((c)=='\t')||((c)=='`'))
59 PROTECTION_VAR(regina_parser)
60 /* externals which are protected by regina_parser */
61 internal_parser_type parser_data = {NULL, };
63 char retvalue[BUFFERSIZE] ;
64 unsigned SymbolDetect = 0;
65 /* end of externals protected by regina_parser */
67 /* locals, they are protected by regina_parser, too */
68 static int nextline = 1;
69 static int nextstart = 1;
70 static int do_level = 0 ;
71 static int in_numform=0, next_numform=0 ;
72 static int obs_with=0, in_do=0, in_then=0, dontlast=0 ;
74 static int code=0, nexta=0, in_parse=0, in_trace=0, itflag=0 ;
75 static int in_signal=0, in_call=0 ;
76 static enum { not_in_address = 0,
77 in_address_keyword, /* ADDRESS just seen */
78 in_address_main, /* after the first word */
79 in_address_value, /* like main but VALUE was seen */
80 in_address_with} in_address = not_in_address,
81 last_in_address = not_in_address ;
82 static enum {no_seek_with = 0,
84 seek_with_from_address} seek_with = no_seek_with ;
85 static int preva=0, lasta=0 ;
87 static int kill_this_space=0, kill_next_space=1 ;
88 static int extnextline = -1, extnextstart; /* for a delayed line increment */
90 static int contline = 0;
92 static int singlequote=0, doblequote=0 ;
93 static int firstln=0 ;
95 /* Previous bug. 8-bits clean combined with EOF ==> need at least short */
96 static short chbuffer[LOOKAHEAD] ;
97 static int ipretflag=0, cch=0 ;
98 static const char *interptr=NULL ;
99 static const char *interptrmax ;
100 static int cchmax = 0 ;
102 static YY_CHAR_TYPE *rmspc( YY_CHAR_TYPE *instr ) ;
103 static int fill_buffer( char *buf, int max_size ) ;
104 #define SET_NEXTSTART() (nextstart += yyleng)
106 #define YY_FATAL_ERROR(s) exiterror( ERR_PROG_UNREADABLE, 1, s )
110 %start comm signal sgtype procd parse then with
111 %start numeric do1 other value1 ifcont signame nmform
121 /* int yy_startcond=comm ; FGC: Needless? */
126 csym [0-9.][a-zA-Z0-9.@#$!?_]*
127 ssym [a-zA-Z@#$!?_][a-zA-Z0-9.@#$!?_]*
128 strs ('([^']|'')*'|\"([^"]|\"\")*\")
130 sym [a-zA-Z0-9.@#$!?_]+
133 hex {bl}*{hsym}*({bl}+({hsym}{hsym})+)*{bl}*
134 bin {bl}*{bsym}*({bl}+({bsym}{bsym}{bsym}{bsym})+)*{bl}*
136 vtail [a-zA-Z@#$!?_][a-zA-Z0-9@#$!?_]*
137 ctail [0-9][a-zA-Z0-9@#$!?_]*
175 lasta = (dontlast==0) ;
187 last_in_address = in_address ; /* just for the "Environment" */
188 /* there can't be an intermediate SPACE between ADDRESS and the next word*/
189 if (in_address == in_address_keyword)
190 in_address = in_address_main ;
192 kill_this_space = kill_next_space ;
193 kill_next_space = 0 ;
197 in_trace = seek_with = 0 ;
198 itflag = (in_trace) ;
200 if (extnextline != -1)
202 parser_data.tstart = nextstart = extnextstart;
203 parser_data.tline = nextline = extnextline;
208 parser_data.tstart = nextstart ;
209 parser_data.tline = nextline ;
215 \`* SET_NEXTSTART() ;
217 <ifcont>{bl}[;\r?\n]{bl} {
219 if ((ptr = strchr(yytext, '\n')) != NULL)
221 nextstart = yyleng - (int) (ptr - (char *) yytext) ;
223 if (extnextline != -1)
235 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
236 exiterror( ERR_INVALID_TEMPLATE, 1, yytext ) ;
238 obs_with = in_do = 0 ;
239 in_signal = in_address = in_call = 0 ;
241 if (seek_with == seek_with_from_address)
242 seek_with = no_seek_with ;
243 if ((ptr = strchr(yytext, '\n')) != NULL)
245 nextstart = yyleng - (int) (ptr - (char *) yytext) ;
247 if (extnextline != -1)
254 <comm>{a}{d}{d}{r}{e}{s}{s}{bl} {
255 BEGIN value1 ; /* Allow a following VALUE keyword */
256 seek_with = seek_with_from_address ;
257 in_address = in_address_keyword ;
258 in_call = 1 ; /* Allow the next words to be given as in CALL. */
262 <comm>{a}{r}{g}{bl} {
268 <comm>{c}{a}{l}{l}{bl} {
276 assert( do_level >=0 ) ;
282 <comm>{d}{r}{o}{p}{bl} {
288 <comm>{e}{l}{s}{e}{bl} {
293 <comm>{e}{x}{i}{t}{bl} {
301 parser_data.if_linenr = linenr - 1;
305 <comm>{i}{n}{t}{e}{r}{p}{r}{e}{t}{bl} {
310 <comm>{i}{t}{e}{r}{a}{t}{e}{bl} {
315 <comm>{l}{e}{a}{v}{e}{bl} {
320 <comm>{o}{p}{t}{i}{o}{n}{s}{bl} {
325 <comm>{n}{o}{p}{bl} {
330 <comm>{n}{u}{m}{e}{r}{i}{c}{bl} {
335 <comm>{p}{a}{r}{s}{e}{bl} {
341 <comm>{p}{r}{o}{c}{e}{d}{u}{r}{e}{bl} {
346 <comm>{p}{u}{l}{l}{bl} {
352 <comm>{p}{u}{s}{h}{bl} {
357 <comm>{q}{u}{e}{u}{e}{bl} {
362 <comm>{r}{e}{t}{u}{r}{n}{bl} {
367 <comm>{s}{a}{y}{bl} {
372 <comm>{s}{e}{l}{e}{c}{t}{bl} {
374 assert( do_level >= 0 ) ;
379 <comm>{s}{i}{g}{n}{a}{l}{bl} {
385 <comm>{t}{r}{a}{c}{e}{bl} {
391 <comm>{u}{p}{p}{e}{r}{bl} {
397 <comm>{w}{h}{e}{n}{bl} {
400 parser_data.when_linenr = linenr - 1;
404 <comm>{o}{t}{h}{e}{r}{w}{i}{s}{e}{bl} {
409 <comm>{e}{n}{d}{bl} {
411 assert( do_level >= 0 ) ;
414 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
415 exiterror( ERR_UNMATCHED_END, 1 ) ;
440 /* FGC: What should the following mean after a REJECT? */
448 <comm>{csym}{bl}={bl} {
449 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
450 if (yytext[0] == '.')
451 exiterror( ERR_INVALID_START, 3, yytext ) ;
453 exiterror( ERR_INVALID_START, 2, yytext ) ;
456 <comm>{ssym}{bl}={bl} {
461 for (i=0;yytext[i];i++)
462 if ('a' <= yytext[i] && yytext[i] <= 'z')
463 retvalue[j++] = (char) (yytext[i] & 0xDF) ; /* ASCII only */
464 else if (yytext[i]!='=' && yytext[i]!='\n' && !MY_ISBLANK(yytext[i]))
465 retvalue[j++] = yytext[i] ;
469 return ASSIGNMENTVARIABLE ; }
471 <nmform,signal,value1>{bl}{v}{a}{l}{u}{e}{bl} {
474 if ((last_in_address == in_address_keyword) &&
475 (in_address == in_address_main))
477 BEGIN other ; /* the next useful expression will set it to "other"
478 * in either case. The BEGIN-states aren't very
479 * handy in most cases; they are not flexible enough.
481 in_address = in_address_value ;
488 if ((!in_trace)&&(!in_address)&&(!in_signal)&&(!in_call)&&(!in_numform))
490 in_trace = in_signal = in_call = 0 ;
499 <signal>{o}{f}{f}{bl} {
504 <signame>{n}{a}{m}{e}{bl} {
509 <sgtype>{e}{r}{r}{o}{r}{bl} {
514 <sgtype>{h}{a}{l}{t}{bl} {
519 <sgtype>{n}{o}{v}{a}{l}{u}{e}{bl} {
524 <sgtype>{n}{o}{t}{r}{e}{a}{d}{y}{bl} {
529 <sgtype>{f}{a}{i}{l}{u}{r}{e}{bl} {
534 <sgtype>{s}{y}{n}{t}{a}{x}{bl} {
539 <sgtype>{l}{o}{s}{t}{d}{i}{g}{i}{t}{s}{bl} {
542 return LOSTDIGITS ; }
544 <value1>{bl}[a-zA-Z?]+{bl} {
545 if (!in_trace) REJECT ;
546 strcpy(retvalue,rmspc( yytext )) ;
550 <procd>{e}{x}{p}{o}{s}{e}{bl} {
556 <parse>{u}{p}{p}{e}{r}{bl} {
560 <parse>{a}{r}{g}{bl} {
565 <parse>{n}{u}{m}{e}{r}{i}{c}{bl} {
570 <parse>{p}{u}{l}{l}{bl} {
575 <parse>{s}{o}{u}{r}{c}{e}{bl} {
580 <parse>{e}{x}{t}{e}{r}{n}{a}{l}{bl} {
585 <parse>{l}{i}{n}{e}{i}{n}{bl} {
590 <parse>{v}{e}{r}{s}{i}{o}{n}{bl} {
595 <parse>{v}{a}{r}{bl} {
600 <parse>{v}{a}{l}{u}{e}{bl} {
601 seek_with = seek_with_from_parse ;
604 BEGIN with ; /* in fact this works as a "not comm" */
608 <comm>{bl}{t}{h}{e}{n}{bl} {
613 <other,ifcont>{bl}{t}{h}{e}{n}{bl} {
614 if (in_then!=1) REJECT ;
620 {bl}{w}{i}{t}{h}{bl} {
622 if ((in_do)||(!seek_with))
624 if (seek_with == seek_with_from_parse)
627 if (in_address) /* any address state */
628 in_address = in_address_with ; /* WITH seen */
633 <numeric>{d}{i}{g}{i}{t}{s}{bl} {
638 <numeric>{f}{o}{r}{m}{bl} {
644 <nmform>{s}{c}{i}{e}{n}{t}{i}{f}{i}{c}{bl} {
646 return SCIENTIFIC ; }
648 <nmform>{e}{n}{g}{i}{n}{e}{e}{r}{i}{n}{g}{bl} {
650 return ENGINEERING ; }
652 <numeric>{f}{u}{z}{z}{bl} {
657 <do1>{f}{o}{r}{e}{v}{e}{r}{bl} {
673 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
674 exiterror( ERR_INVALID_DO_SYNTAX, 1, "TO" ) ;
687 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
688 exiterror( ERR_INVALID_DO_SYNTAX, 1, "BY" ) ;
701 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
702 exiterror( ERR_INVALID_DO_SYNTAX, 1, "FOR" ) ;
706 {bl}{w}{h}{i}{l}{e}{bl} {
711 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
712 exiterror( ERR_INVALID_DO_SYNTAX, 1, "WHILE" ) ;
721 {bl}{u}{n}{t}{i}{l}{bl} {
725 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
726 exiterror( ERR_INVALID_DO_SYNTAX, 1, "UNTIL" ) ;
739 strcpy(retvalue,rmspc( yytext )) ;
741 return DOVARIABLE ; }
743 <comm>{sym}{bl}:{bl} { /* set labl to sym for other kind or vice versa*/
747 for (i=j=0;(ch=yytext[i])!=0;i++) {
748 if ('a' <= ch && ch <= 'z')
749 retvalue[j++] = (char) (ch & 0xDF) ; /* ASCII only */
750 /* FIXME: the following is WRONG, must first cut off {bl}:{bl} and
751 * then fixup strings, FGC */
752 else if ((ch!=',')&&(ch!='\n')&&(ch!=':')&&!MY_ISBLANK(ch))
753 retvalue[j++] = ch ; }
759 ('([^']|'')+'|\"([^"]|\"\")+\")`*\( {
763 strcpy(retvalue,&yytext[1]) ;
764 for (i=3; i<=yyleng && retvalue[yyleng-i]=='`'; i++) ;
765 retvalue[yyleng-i] = '\0' ;
767 kill_next_space = 1 ;
769 nexta = dontlast = 1 ;
772 return CONCATENATE ; }
776 return EXFUNCNAME ; }
780 ('{hex}'|\"{hex}\")[xX]/[^a-zA-Z0-9.@#$!?_(] {
785 /* first group can be large and odd-numbered; find # of zeros to pad */
786 for (i=1; (yytext[i]!=ech) && isxdigit(yytext[i]); i++) ;
788 /* j is the number of digits processed */
792 for (i=1;(ech!=(ch=yytext[i]));i++)
796 if ((i==1)||(j)) /* leading space or space within a byte */
798 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
799 exiterror( ERR_INVALID_HEX_CONST, 1, i ) ;
802 else if (isxdigit(ch))
804 sum = sum *16 + (HEXVAL(ch)) ;
807 retvalue[k++] = (char) sum ;
813 if ((i>1) && isspace(yytext[i-1]))
815 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
816 exiterror( ERR_INVALID_HEX_CONST, 1, i ) ;
822 if ((preva==1)&&(!in_parse)&&(!in_call))
833 kill_next_space = 1 ;
844 ('{bin}'|\"{bin}\")[bB]/[^a-zA-Z0-9.@#$!?_(] {
849 /* first group can be large and odd-numbered; find # of zeros to pad */
850 for (i=1; (yytext[i]!=ech) && isdigit(yytext[i]); i++) ;
852 /* j is the number of digits processed */
856 for (i=1;(ech!=(ch=yytext[i]));i++)
860 if ((i==1)||(j)) /* leading space or space within a byte */
862 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
863 exiterror( ERR_INVALID_HEX_CONST, 2, i ) ;
866 else if ((ch=='0')||(ch=='1'))
868 sum = sum *2 + (ch-'0') ;
871 retvalue[k++] = (char) sum ;
877 if ((i>1) && isspace(yytext[i-1]))
879 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
880 exiterror( ERR_INVALID_HEX_CONST, 2, i ) ;
884 /* then pack the nibbles */
885 for (i=j=(k%2); i<=k; i++)
888 retvalue[i/2] = (char)((retvalue[i/2]&0xf0) + retvalue[i-j]) ;
890 retvalue[i/2] = (char)((retvalue[i-j]&0x0f)<<4) ;
893 retvalue[retlength=i/2] = '\0' ;
895 if ((preva==1)&&(!in_parse)&&(!in_call))
906 kill_next_space = 1 ;
915 ('([^']|'')*'|\"([^"]|"")*\")[xXbB]/[^a-zA-Z0-9.@#$!?_(] {
919 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
920 for (i=0;yytext[i]; i++)
922 if ( !isxdigit(yytext[i]) && !(yytext[i] == '\"' && yytext[0] == '\"') && !(yytext[i] == '\'' && yytext[0] == '\'') && yytext[i] != ' ' )
928 exiterror( ERR_INVALID_HEX_CONST, 3, bad ) ;
932 ('([^']|'')*'|\"([^"]|\"\")*\") {
935 for (i=1; yytext[i+1]; i++)
939 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
940 exiterror( ERR_UNMATCHED_QUOTE, 0 ) ;
943 if (yytext[i]==yytext[0] && yytext[i+1]==yytext[0])
944 for (j=i+1; yytext[j]; j++)
945 yytext[j] = yytext[j+1] ;
948 yytext[strlen(yytext)-1] = '\0' ;
949 strcpy(retvalue,&yytext[1]) ;
953 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
954 exiterror( ERR_INV_SUBKEYWORD, 11, "ENGINEERING SCIENTIFIC", retvalue ) ;
957 if ((preva==1)&&(!in_parse)&&(!in_call)) {
961 return CONCATENATE ; }
966 kill_next_space = 1 ;
978 strcpy(retvalue,yytext) ;
982 (((([0-9]+\.|\.?[0-9])[0-9]*{e}(\-|\+)[0-9]+))|([.0-9][a-zA-Z0-9.$!?@#_]*))\`*\( {
985 for (i=0; i<yyleng-1 && yytext[i]!='`'; i++) /* Copy value only */
986 retvalue[i] = (char) toupper(yytext[i]) ;
991 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
992 exiterror( ERR_INV_SUBKEYWORD, 11, "ENGINEERING SCIENTIFIC", retvalue ) ;
996 kill_next_space = 1 ;
999 nexta = dontlast = 1 ;
1002 return CONCATENATE ;
1006 return INFUNCNAME ; }
1008 ((([0-9]+\.|\.?[0-9])[0-9]*{e}(\-|\+)[0-9]+))|([.0-9][a-zA-Z0-9.$!?@#_]*) {
1011 for (i=0; i<=yyleng; i++) /* Copy '\0', too */
1012 retvalue[i] = (char) toupper(yytext[i]) ;
1016 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1017 exiterror( ERR_INV_SUBKEYWORD, 11, "ENGINEERING SCIENTIFIC", retvalue ) ;
1024 kill_next_space = 1 ;
1030 if ((preva==1)&&(!in_parse)) {
1034 return CONCATENATE ; }
1038 return CONSYMBOL ; }
1043 * this might be a symbol in front of a function, but only if next
1044 * char in input stream is "(".
1047 if (in_trace) REJECT ;
1049 for (i=0; i<=yyleng; i++) /* include terminating '\0' */
1050 retvalue[i] = (char) toupper(yytext[i]) ;
1054 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1055 exiterror( ERR_INV_SUBKEYWORD, 11, "ENGINEERING SCIENTIFIC", retvalue ) ;
1058 if ((last_in_address == in_address_keyword) &&
1059 (in_address == in_address_main))
1061 kill_next_space = 1 ;
1067 /* We must check if a '(' follows. Remember the number of eaten chars. */
1069 for (;(i=input())=='`';)
1076 /* input() has destroyed the yytext-terminator re-set it */
1077 yytext[yyleng] = '\0';
1084 kill_next_space = 1 ;
1087 nexta = dontlast = 1 ;
1089 return CONCATENATE ;
1097 kill_next_space = 1 ;
1105 if ((preva==1)&&(!in_parse)) {
1108 return CONCATENATE ; }
1112 if (in_address == in_address_with)
1113 kill_next_space = 1 ;
1114 if (SymbolDetect) /* allow a fast breakout */
1116 /* We define a tricky preprocessor directive. This will give us
1117 * maximum performance without the loss of control or errors produced
1120 #define RET_IF(s) if ((SymbolDetect & SD_##s) && \
1121 (yyleng == sizeof(#s) - 1) && \
1122 (strncmp(retvalue, \
1124 sizeof(#s) - 1) == 0)) \
1126 /* e.g. RET_IF(INPUT); is replaced by:
1127 * if ((SymbolDetect & SD_INPUT) &&
1128 * (yyleng == sizeof("INPUT") - 1) &&
1129 * (strncmp(retvalue,
1131 * sizeof("INPUT") - 1) == 0))
1146 return SIMSYMBOL ; }
1157 nexta = dontlast = 1 ;
1160 return CONCATENATE ;
1204 /* why don't I have a {bl} in the beginning of this re? bug? */
1217 {bl}{not}{bl}\>{bl}\>{bl} {
1221 {bl}{not}{bl}\<{bl}\<{bl} {
1225 {bl}\>{bl}\>{bl}={bl} {
1229 {bl}\<{bl}\<{bl}={bl} {
1233 {bl}(\>|{not}{bl}(\<{bl}=|={bl}\<)){bl} {
1237 {bl}({not}{bl}\<|={bl}\>|\>{bl}=){bl} {
1241 {bl}(\<|{not}{bl}(\>{bl}=|={bl}\>)){bl} {
1245 {bl}({not}{bl}\>|={bl}\<|\<{bl}=){bl} {
1249 {bl}({not}{bl}=|\<{bl}\>|\>{bl}\<){bl} {
1251 return DIFFERENT ; }
1255 return EQUALEQUAL ; }
1257 {bl}{not}{bl}={bl}={bl} {
1259 return NOTEQUALEQUAL ; }
1271 return CONCATENATE ; }
1278 if (in_address == in_address_value) /* Always allow spaces in the VALUE */
1279 { /* part of the ADDRESS stmt. */
1283 if (kill_this_space)
1289 return (((in_parse)&&(!seek_with)) ? yylex() : SPACE) ; }
1293 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1294 exiterror( ERR_UNMATCHED_QUOTE, 0 ) ; }
1297 [^A-Za-z0-9 \t\n@#$&|.?!_*()+=%\\^'";:<,>/-] {
1299 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1300 exiterror( ERR_INVALID_CHAR, 1, yytext[0], yytext[0] ) ; }
1304 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1305 exiterror( ERR_SYMBOL_EXPECTED, 1, yytext ) ;}
1309 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1310 exiterror( ERR_INTERPRETER_FAILURE, 1, __FILE__, __LINE__, "" ) ;}
1315 #define NORMALSTAT 0
1316 #define COMMENTSTAT 1
1317 #define SINGLEQUOTE 2
1318 #define DOUBLEQUOTE 3
1321 /* rmspc uppercases all characters and removes blanks from a string.
1322 * Returns the input string.
1324 static YY_CHAR_TYPE *rmspc( YY_CHAR_TYPE *instr )
1326 YY_CHAR_TYPE *retval=instr ,
1330 while ((c = *instr++) != '\0')
1332 c = (YY_CHAR_TYPE) toupper(c);
1333 /* These characters are treated as blanks: */
1334 if ((c!='`') && (c!=' ') && (c!=',') && (c!='\t') && (c!='\n'))
1343 /* get_next_line: Lower level input fetcher.
1344 * Reads exactly one line from the input stream (file or string).
1345 * All EOL characters are removed and the result is stored in
1346 * last_source_line. A check for line overflow occurred here.
1347 * A special check is done for CP/M ^Z (DOS and friends use this for
1348 * "backward" compatibility, too).
1349 * line is filled with valid values on success.
1350 * max must reflect size of line and should be at least BUFFERSIZE + 2;
1351 * Returns -1 (no input) or the number of valid chars in line.
1353 static int get_next_line( char *line, int max, FILE *stream )
1355 lineboxptr newline ;
1356 offsrcline *incore_newline;
1358 int c = 0, nextEOL ;
1360 if (inEOF) /* You can't use myunputc if EOF is reached! */
1363 while (pos <= max - 2)
1367 c = chbuffer[--bufptr] ;
1370 if (interptr>=interptrmax)
1373 #ifdef ASCII_0_TERMINATES_STRING
1374 if ((c = *interptr++) == '\0')
1383 if ((c=='\r') || (c=='\n') || (c==EOF))
1385 line[pos++] = (char) (unsigned char) c ;
1388 /* first, check for overflow */
1389 if ((c!='\r') && (c!='\n') && (c!=EOF))
1391 parser_data.tline = linenr ; /* set tline for exiterror */
1392 exiterror( ERR_TOO_LONG_LINE, 0 ) ;
1395 /* We have either a (first) line terminator or EOF */
1398 if ((pos==1) && (line[0]=='\x1A')) /* CP/M ^Z EOF? */
1409 /* get one more char */
1411 nextEOL = chbuffer[--bufptr] ;
1414 if (interptr>=interptrmax)
1417 #ifdef ASCII_0_TERMINATES_STRING
1418 if ((nextEOL = *interptr++) == '\0')
1421 nextEOL = *interptr++ ;
1425 nextEOL = getc(stream) ;
1428 /* Decide if the next character is the last char of a EOL pair.
1429 * Valid pairs are CR/LF or LF/CR. Put nextEOL back if there is no pair.
1431 if (((c!='\n') || (nextEOL!='\r')) &&
1432 ((c!='\r') || (nextEOL!='\n')))
1433 chbuffer[bufptr++] = (short) nextEOL ;
1435 cch = 0 ; /* not needed ? */
1438 if (parser_data.incore_source)
1439 { /* We can use the incore string to describe a source line. */
1440 incore_newline = FreshLine() ;
1441 incore_newline->length = pos - 1 ;
1442 /* FIXME: What happens on the second attempt to read EOF or with CRLF? */
1443 incore_newline->offset = interptr - parser_data.incore_source ;
1447 newline = (lineboxptr)Malloc(sizeof(linebox)) ;
1448 newline->line = Str_make_TSD( parser_data.TSD, pos - 1 ) ;
1449 newline->line->len = pos - 1 ;
1450 memcpy(newline->line->value, line, pos - 1 ) ;
1451 newline->prev = parser_data.last_source_line ;
1452 newline->next = NULL ;
1453 newline->lineno = linenr++ ;
1455 if (parser_data.first_source_line==NULL)
1456 parser_data.first_source_line = newline ;
1458 parser_data.last_source_line->next = newline ;
1459 parser_data.last_source_line = newline ;
1464 /* fill_buffer: Higher level input fetcher.
1465 * (To allow the C-file to compile, all Rexx comments in this comment
1466 * are written as "{*" "*}" instead of the normal, C-like manner.)
1467 * Reads lines from the input stream (yyin or string) with get_next_line.
1468 * Only one line is returned to allow the saving of the line number.
1469 * This routine replaces all comments by '`' signs. This allows
1470 * the detection of a "pseudo" blank: The fragment "say x{* *}y" uses two
1471 * variables, not one called "xy". The parsing of comments must be done
1472 * here to check for the actual numbers of open and closes ("{*" and "*}").
1473 * While doing this we must always check for strings since "'{*'" is not part
1475 * Here is a problem: Is this a nested valid comment: "{* '{*' *} *}"?
1476 * I think so although you cannot remove the outer comment signs without an
1477 * error. Everything within a comment is a comment (per def.). Counting
1478 * opens and closes of comment signs is an ugly trick to help the user.
1479 * He/she must know what he/she is doing if nesting comments!
1481 * max_size gives the maximum size of buf. This is filled up with input.
1482 * We never return less than one character until EOF is reached. Thus, we
1483 * read more than one true input line if a comment spans over more than one
1485 * A line will either be terminated by a single '\n' or by a blank. The
1486 * later one replaces a line continuation (',' [spaces] EOL).
1487 * Errors in this low
1489 * Conclusion: We have to fight very hard to set the expected line number.
1490 * * Comments spanning over lines set them on getting the
1492 * * Concatenated lines set
1494 static int fill_buffer( char *buf, int max_size )
1496 /* statics protected by regina_parser */
1497 static char line[BUFFERSIZE+2] ; /* special buffer to allow max_size */
1498 static int pos = 0, max = 0 ; /* being smaller than BUFFERSIZE+1 */
1499 static int nesting = 0; /* nesting level of comments */
1500 int nesting_start_line = 0; /* start line of comment for errortext() */
1502 int i, squote, dquote;
1510 max = get_next_line( line, sizeof(line), yyin ) ;
1511 if (max < 0) /* empty input file */
1515 { /* Ignore first line beginning this way for unix compat */
1517 return fill_buffer( buf, max_size ) ;
1521 { /* Ignore first line beginning this way for unix compat */
1523 memcpy( line, "/**/\n", 5 );
1527 else if (pos < max) /* Are there still characters to transmit? */
1529 /* Buffer already checked for correctness */
1530 if (max_size > max - pos)
1531 max_size = max - pos;
1532 memcpy(buf, line + pos, max_size);
1536 else /* Need next line */
1538 if (contline && !nesting)
1540 extnextline = ++nextline ;
1545 max = get_next_line( line, sizeof(line), yyin ) ;
1546 if (max < 0) /* empty input file */
1550 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1551 exiterror( ERR_UNMATCHED_QUOTE, 1 ) ;
1557 /* A new line is available, check first for an ending comment */
1558 dest = line; /* we change comments in line */
1559 if (nesting) /* This may lead to more line reading */
1562 * The first time extnextline is non-zero, we have the comment
1563 * starting sequence line. This is saved for use if no matching
1564 * ending comment sequence is found, so that the error message
1565 * reflects the start of the comment.
1566 * Regina feature request: #508788
1568 if ( extnextline < 0 )
1569 nesting_start_line = nextline+1;
1570 extnextline = ++nextline ;
1571 extnextstart = 1; /* See Reference (*) below */
1576 if (c == '*') /* start of comment end? */
1578 if (line[pos+1] == '/')
1579 { /* pos+1 always exists, at least '\n' or '\0' */
1591 else if (c == '/') /* start of new begin? */
1593 if (line[pos+1] == '*')
1606 max = get_next_line( line, sizeof(line), yyin ) ;
1607 if (max < 0) /* empty input file */
1609 if ( nesting_start_line )
1610 parser_data.tline = nesting_start_line ; /* set tline for exiterror */
1612 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1613 exiterror( ERR_UNMATCHED_QUOTE, 1 ) ;
1616 /* This is a comment continuation. If the lexer will return
1617 * something it already has a valid tline/tstart pair.
1618 * The lexer will return the current token and on the NEXT
1619 * call it expects a valid nextline/nextstart pair.
1621 extnextline = ++nextline; extnextstart = 1;
1622 dest = line; /* we change comments in line */
1623 goto repeated_nesting;
1625 extnextstart = pos + 1;
1627 { /* Exception! Have a look at: "x='y',{*\n\n*}\n'z'". This should
1628 * result in "x = 'y' 'z'".
1629 * We must parse until EOL and check for whitespaces and comments...
1638 if (line[pos+1] == '*')
1642 goto repeated_nesting;
1645 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1646 exiterror( ERR_YACC_SYNTAX, 1, parser_data.tline ) ; /* standard error */
1650 /* All done, it was a continuation line. */
1651 /* contline will be resetted by: */
1652 return fill_buffer( buf, max_size ) ;
1655 /* We have something to play with. Run through the input and check for
1656 * strings including comments.
1658 squote = dquote = 0;
1661 /* We use selective loops to reduce comparisons */
1666 if (c == '*') /* start of comment end? */
1668 if (line[pos+1] == '/')
1669 { /* pos+1 always exists, at least '\n' or '\0' */
1680 else if (c == '/') /* start of new begin? */
1682 if (line[pos+1] == '*')
1691 } while (pos < max);
1694 while ((c = line[pos]) != '\'')
1699 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1700 exiterror( ERR_UNMATCHED_QUOTE, 2 ) ;
1709 while ((c = line[pos]) != '\"')
1714 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1715 exiterror( ERR_UNMATCHED_QUOTE, 3 ) ;
1724 switch (c = line[pos])
1739 if (line[pos + 1] == '*')
1755 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1756 exiterror( ERR_INVALID_CHAR, 1, c, c ) ;
1766 max = (int) (dest - line);
1768 /* Now we can replace a ',' [spaces|comments] '\n' with the line
1769 * continuation, but check for nesting first
1772 { /* Don't leave ANY spaces at EOL. That would confuse the lexer. */
1774 while ((i >= 0) && isspace(line[i]))
1777 /* Of course, there is one exception: line continuation */
1778 while ((i >= 0) && (line[i] == '`'))
1780 if ((i >= 0) && (line[i] == ','))
1787 * At this point the lexer can't determine the nextline since we eat up
1788 * the \n. This leads to an incorrect count. But either the '`'-signs
1789 * are ignored or they are follows of a "token", a valid word.
1790 * Look at "say x;say y ``". This will cause the lexer to
1791 * return at least 4 tokens (SAY "x" ";" SAY) before "y" will be
1792 * returned. We can only set nextline/nextstart at "y".
1793 * Result: We set this pair at the start of the next call to
1794 * fill_buffer such that the next call to yylex will set the correct
1800 i = max - 1; /* on last valid char */
1803 if (!MY_ISBLANK(line[i]) && (line[i] != '\n'))
1807 /* i now -1 or on last nonblank */
1808 if ((i >= 0) && (line[i] == ','))
1809 { /* FIXME: What shall be do with "," followed by EOF? */
1818 memcpy(buf, line, max_size);
1824 /* yywrap MAY be called by the lexer is EOF encounters, see (f)lex docu */
1827 assert( do_level>= 0 ) ;
1830 parser_data.tline = linenr - 1 ; /* set tline for exiterror */
1831 exiterror( ERR_INCOMPLETE_STRUCT, 0 ) ;
1836 /******************************************************************************
1837 ******************************************************************************
1838 * global interface ***********************************************************
1839 ******************************************************************************
1840 *****************************************************************************/
1842 /* initalize all local and global values */
1843 static void init_it_all( tsd_t *TSD )
1845 #if defined(FLEX_SCANNER) && defined(FLEX_DEBUG)
1870 kill_this_space = 0 ;
1884 interptrmax = NULL ;
1885 /* non-zero values */
1889 kill_next_space = 1 ;
1893 memset(&parser_data, 0, sizeof(internal_parser_type));
1894 parser_data.TSD = TSD;
1897 /* fetch may only be called by fetch_protected. The parser and lexer are
1898 * already protected by regina_parser by fetch_protected.
1899 * This function prepares the lexer and parser and call them. The
1900 * result and all generated values are stored in result. The parser
1901 * tree isn't executed here.
1902 * Exactly fptr xor str must be non-null.
1904 static void fetch(tsd_t *TSD, FILE *fptr, const streng *str,
1905 internal_parser_type *result)
1907 init_it_all( TSD ) ;
1911 yy_delete_buffer(YY_CURRENT_BUFFER) ;
1922 interptr = str->value ;
1923 interptrmax = interptr + cchmax ;
1924 result->incore_source = str->value;
1929 parser_data.result = __reginaparse();
1932 yy_delete_buffer(YY_CURRENT_BUFFER) ;
1938 *result = parser_data;
1939 /* Some functions assume null values if parsing isn't running: */
1940 memset(&parser_data, 0, sizeof(internal_parser_type));
1943 /* This function serializes the parser/lexer requests of the process and
1944 * call fetch which will make the work. Look there.
1946 static void fetch_protected(tsd_t *TSD, FILE *fptr, const streng *str,
1947 internal_parser_type *result)
1949 volatile int panicked = 0;
1951 THREAD_PROTECT(regina_parser)
1952 TSD->in_protected = 1;
1954 if ( setjmp( TSD->protect_return ) )
1957 fetch(TSD, fptr, str, result);
1959 TSD->in_protected = 0;
1960 THREAD_UNPROTECT(regina_parser)
1965 /* We got a fatal condition while fetching the input. */
1966 if (TSD->delayed_error_type == PROTECTED_DelayedExit)
1967 TSD->MTExit(TSD->expected_exit_error);
1968 if (TSD->delayed_error_type == PROTECTED_DelayedSetjmpBuf)
1969 longjmp( *(TSD->currlevel->buf), 1 ) ;
1970 longjmp( *(TSD->systeminfo->panic), 1 ) ;
1973 /* fetch_file reads in a REXX file from disk (or a pipe). It returns without
1974 * executing the program. The parsed tree with all needed values including
1975 * the result of the parsing is copied to result.
1976 * fptr remains open after this call.
1977 * type is either PARSE_ONLY or PARSE_AND_TIN. In the later case a tinned variant of the
1978 * parsing tree is created, too.
1980 void fetch_file(tsd_t *TSD, FILE *fptr, internal_parser_type *result)
1982 fetch_protected(TSD, fptr, NULL, result);
1985 /* fetch_string reads in a REXX macro from a streng. It returns without
1986 * executing the program. The parsed tree with all needed values including
1987 * the result of the parsing is copied to result.
1988 * type is either PARSE_ONLY or PARSE_AND_TIN. In the later case a tinned variant of the
1989 * parsing tree is created, too.
1990 * The function is typically called by an "INTERPRET" instruction.
1992 void fetch_string(tsd_t *TSD, const streng *str, internal_parser_type *result)
1994 fetch_protected(TSD, NULL, str, result);