1 /* $Id: parse.y,v 1.23 2002/12/12 17:25:59 slobasso Exp $ */
11 #include "interpret.h"
16 #include <X11/Intrinsic.h>
19 #include "../util/VMSparam.h"
22 #include <sys/param.h>
30 /* Macros to add error processing to AddOp and AddSym calls */
31 #define ADD_OP(op) if (!AddOp(op, &ErrMsg)) return 1
32 #define ADD_SYM(sym) if (!AddSym(sym, &ErrMsg)) return 1
33 #define ADD_IMMED(val) if (!AddImmediate(val, &ErrMsg)) return 1
34 #define ADD_BR_OFF(to) if (!AddBranchOffset(to, &ErrMsg)) return 1
35 #define SET_BR_OFF(from, to) *((int *)(from)) = ((Inst *)(to)) - ((Inst *)(from))
37 /* Max. length for a string constant (... there shouldn't be a maximum) */
38 #define MAX_STRING_CONST_LEN 5000
40 static const char CVSID
[] = "$Id: parse.y,v 1.23 2002/12/12 17:25:59 slobasso Exp $";
41 static int yyerror(char *s
);
42 static int yylex(void);
44 static int follow
(char expect
, int yes
, int no
);
45 static int follow2
(char expect1
, int yes1
, char expect2
, int yes2
, int no
);
46 static int follow_non_whitespace
(char expect
, int yes
, int no
);
47 static Symbol
*matchesActionRoutine
(char **inPtr
);
51 extern Inst
*LoopStack
[]; /* addresses of break, cont stmts */
52 extern Inst
**LoopStackPtr
; /* to fill at the end of a loop */
61 %token
<sym
> NUMBER STRING SYMBOL
62 %token IF WHILE ELSE FOR BREAK CONTINUE RETURN
64 %type
<inst
> cond comastmts for while else and or arrayexpr
71 %right
'=' ADDEQ SUBEQ MULEQ DIVEQ MODEQ ANDEQ OREQ
77 %left GT GE LT LE EQ NE IN
80 %nonassoc UNARY_MINUS NOT
89 program: blank stmts
{
90 ADD_OP
(OP_RETURN_NO_VAL
); return
0;
92 | blank
'{' blank stmts
'}' {
93 ADD_OP
(OP_RETURN_NO_VAL
); return
0;
95 | blank
'{' blank
'}' {
96 ADD_OP
(OP_RETURN_NO_VAL
); return
0;
102 block: '{' blank stmts
'}' blank
103 |
'{' blank
'}' blank
109 stmt: simpstmt
'\n' blank
110 | IF
'(' cond
')' blank block %prec IF_NO_ELSE
{
111 SET_BR_OFF
($3, GetPC
());
113 | IF
'(' cond
')' blank block else blank block %prec ELSE
{
114 SET_BR_OFF
($3, ($7+1)); SET_BR_OFF
($7, GetPC
());
116 | while
'(' cond
')' blank block
{
117 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($1);
118 SET_BR_OFF
($3, GetPC
()); FillLoopAddrs
(GetPC
(), $1);
120 | for
'(' comastmts
';' cond
';' comastmts
')' blank block
{
121 FillLoopAddrs
(GetPC
()+2+($7-($5+1)), GetPC
());
122 SwapCode
($5+1, $7, GetPC
());
123 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($3); SET_BR_OFF
($5, GetPC
());
125 | for
'(' SYMBOL IN arrayexpr
')' {
126 Symbol
*iterSym
= InstallIteratorSymbol
();
127 ADD_OP
(OP_BEGIN_ARRAY_ITER
); ADD_SYM
(iterSym
);
128 ADD_OP
(OP_ARRAY_ITER
); ADD_SYM
($3); ADD_SYM
(iterSym
); ADD_BR_OFF
(0);
131 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($5+2);
132 SET_BR_OFF
($5+5, GetPC
());
133 FillLoopAddrs
(GetPC
(), $5+2);
136 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
(0);
137 if
(AddBreakAddr
(GetPC
()-1)) {
138 yyerror("break outside loop"); YYERROR;
141 | CONTINUE
'\n' blank
{
142 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
(0);
143 if
(AddContinueAddr
(GetPC
()-1)) {
144 yyerror("continue outside loop"); YYERROR;
147 | RETURN expr
'\n' blank
{
150 | RETURN
'\n' blank
{
151 ADD_OP
(OP_RETURN_NO_VAL
);
154 simpstmt: SYMBOL
'=' expr
{
155 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
157 | evalsym ADDEQ expr
{
158 ADD_OP
(OP_ADD
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
160 | evalsym SUBEQ expr
{
161 ADD_OP
(OP_SUB
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
163 | evalsym MULEQ expr
{
164 ADD_OP
(OP_MUL
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
166 | evalsym DIVEQ expr
{
167 ADD_OP
(OP_DIV
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
169 | evalsym MODEQ expr
{
170 ADD_OP
(OP_MOD
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
172 | evalsym ANDEQ expr
{
173 ADD_OP
(OP_BIT_AND
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
175 | evalsym OREQ expr
{
176 ADD_OP
(OP_BIT_OR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
178 | DELETE arraylv
'[' arglist
']' {
179 ADD_OP
(OP_ARRAY_DELETE
); ADD_IMMED
((void *)$4);
181 | initarraylv
'[' arglist
']' '=' expr
{
182 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
184 | initarraylv
'[' arglist
']' ADDEQ expr
{
185 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
187 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
189 | initarraylv
'[' arglist
']' SUBEQ expr
{
190 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
192 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
194 | initarraylv
'[' arglist
']' MULEQ expr
{
195 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
197 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
199 | initarraylv
'[' arglist
']' DIVEQ expr
{
200 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
202 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
204 | initarraylv
'[' arglist
']' MODEQ expr
{
205 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
207 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
209 | initarraylv
'[' arglist
']' ANDEQ expr
{
210 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
212 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
214 | initarraylv
'[' arglist
']' OREQ expr
{
215 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
217 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
219 | initarraylv
'[' arglist
']' INCR
{
220 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$3);
222 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
224 | initarraylv
'[' arglist
']' DECR
{
225 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$3);
227 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
229 | INCR initarraylv
'[' arglist
']' {
230 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$4);
232 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$4);
234 | DECR initarraylv
'[' arglist
']' {
235 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$4);
237 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$4);
239 | SYMBOL
'(' arglist
')' {
240 ADD_OP
(OP_SUBR_CALL
);
241 ADD_SYM
(PromoteToGlobal
($1)); ADD_IMMED
((void *)$3);
244 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_INCR
);
245 ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
248 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_INCR
);
249 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
252 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_DECR
);
253 ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
256 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DECR
);
257 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
261 $$
= $1; ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
264 comastmts: /* nothing */ {
270 | comastmts
',' simpstmt
{
274 arglist: /* nothing */ {
284 expr: numexpr %prec CONCAT
285 | expr numexpr %prec CONCAT
{
289 initarraylv: SYMBOL
{
290 ADD_OP
(OP_PUSH_ARRAY_SYM
); ADD_SYM
($1); ADD_IMMED
((void *)1);
292 | initarraylv
'[' arglist
']' {
293 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
((void *)$3);
297 ADD_OP
(OP_PUSH_ARRAY_SYM
); ADD_SYM
($1); ADD_IMMED
((void *)0);
299 | arraylv
'[' arglist
']' {
300 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
((void *)$3);
308 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
311 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
314 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
316 | SYMBOL
'(' arglist
')' {
317 ADD_OP
(OP_SUBR_CALL
);
318 ADD_SYM
(PromoteToGlobal
($1)); ADD_IMMED
((void *)$3);
319 ADD_OP
(OP_FETCH_RET_VAL
);
322 | numexpr
'[' arglist
']' {
323 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
((void *)$3);
325 | numexpr
'+' numexpr
{
328 | numexpr
'-' numexpr
{
331 | numexpr
'*' numexpr
{
334 | numexpr
'/' numexpr
{
337 | numexpr
'%' numexpr
{
340 | numexpr POW numexpr
{
343 |
'-' numexpr %prec UNARY_MINUS
{
346 | numexpr GT numexpr
{
349 | numexpr GE numexpr
{
352 | numexpr LT numexpr
{
355 | numexpr LE numexpr
{
358 | numexpr EQ numexpr
{
361 | numexpr NE numexpr
{
364 | numexpr
'&' numexpr
{
367 | numexpr
'|' numexpr
{
370 | numexpr and numexpr %prec AND
{
371 ADD_OP
(OP_AND
); SET_BR_OFF
($2, GetPC
());
373 | numexpr or numexpr %prec OR
{
374 ADD_OP
(OP_OR
); SET_BR_OFF
($2, GetPC
());
380 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_INCR
);
381 ADD_OP
(OP_DUP
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
384 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DUP
);
385 ADD_OP
(OP_INCR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
388 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_DECR
);
389 ADD_OP
(OP_DUP
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
392 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DUP
);
393 ADD_OP
(OP_DECR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
395 | numexpr IN numexpr
{
400 $$
= GetPC
(); StartLoopAddrList
();
404 StartLoopAddrList
(); $$
= GetPC
();
408 ADD_OP
(OP_BRANCH
); $$
= GetPC
(); ADD_BR_OFF
(0);
411 cond: /* nothing */ {
412 ADD_OP
(OP_BRANCH_NEVER
); $$
= GetPC
(); ADD_BR_OFF
(0);
415 ADD_OP
(OP_BRANCH_FALSE
); $$
= GetPC
(); ADD_BR_OFF
(0);
419 ADD_OP
(OP_DUP
); ADD_OP
(OP_BRANCH_FALSE
); $$
= GetPC
();
424 ADD_OP
(OP_DUP
); ADD_OP
(OP_BRANCH_TRUE
); $$
= GetPC
();
432 %%
/* User Subroutines Section */
436 ** Parse a null terminated string and create a program from it (this is the
437 ** parser entry point). The program created by this routine can be
438 ** executed using ExecuteProgram. Returns program on success, or NULL
439 ** on failure. If the command failed, the error message is returned
440 ** as a pointer to a static string in msg, and the length of the string up
441 ** to where parsing failed in stoppedAt.
443 Program
*ParseMacro
(char *expr
, char **msg
, char **stoppedAt
)
447 BeginCreatingProgram
();
449 /* call yyparse to parse the string and check for success. If the parse
450 failed, return the error message and string index (the grammar aborts
451 parsing at the first error) */
456 FreeProgram
(FinishCreatingProgram
());
460 /* get the newly created program */
461 prog
= FinishCreatingProgram
();
463 /* parse succeeded */
470 static int yylex(void)
474 static DataValue value
= {0, {0}};
475 static char escape
[] = "\\\"ntbrfav";
476 static char replace
[] = "\\\"\n\t\b\r\f\a\v";
478 /* skip whitespace and backslash-newline combinations which are
479 also considered whitespace */
481 if
(*InPtr
== '\\' && *(InPtr
+ 1) == '\n')
483 else if
(*InPtr
== ' ' ||
*InPtr
== '\t')
491 while
(*InPtr
!= '\n' && *InPtr
!= '\0') InPtr
++;
493 /* return end of input at the end of the string */
494 if
(*InPtr
== '\0') {
498 /* process number tokens */
499 if
(isdigit
((unsigned char)*InPtr
)) { /* number */
501 sscanf
(InPtr
, "%d%n", &value.val.n
, &len
);
502 sprintf
(name
, "const %d", value.val.n
);
505 if
((yylval.sym
=LookupSymbol
(name
)) == NULL
)
506 yylval.sym
= InstallSymbol
(name
, CONST_SYM
, value
);
510 /* process symbol tokens. "define" is a special case not handled
511 by this parser, considered end of input. Another special case
512 is action routine names which are allowed to contain '-' despite
513 the ambiguity, handled in matchesActionRoutine. */
514 if
(isalpha
((unsigned char)*InPtr
) ||
*InPtr
== '$') {
515 if
((s
=matchesActionRoutine
(&InPtr
)) == NULL
) {
516 char symName
[MAX_SYM_LEN
+1], *p
= symName
;
518 while
(isalnum
((unsigned char)*InPtr
) ||
*InPtr
=='_') {
519 if
(p
>= symName
+ MAX_SYM_LEN
)
525 if
(!strcmp
(symName
, "while")) return WHILE
;
526 if
(!strcmp
(symName
, "if")) return IF
;
527 if
(!strcmp
(symName
, "else")) return ELSE
;
528 if
(!strcmp
(symName
, "for")) return FOR
;
529 if
(!strcmp
(symName
, "break")) return BREAK
;
530 if
(!strcmp
(symName
, "continue")) return CONTINUE
;
531 if
(!strcmp
(symName
, "return")) return RETURN
;
532 if
(!strcmp
(symName
, "in")) return IN
;
533 if
(!strcmp
(symName
, "delete") && follow_non_whitespace
('(', SYMBOL
, DELETE
) == DELETE
) return DELETE
;
534 if
(!strcmp
(symName
, "define")) {
538 if
((s
=LookupSymbol
(symName
)) == NULL
) {
539 s
= InstallSymbol
(symName
, symName
[0]=='$' ?
540 (isdigit
((unsigned char)symName
[1]) ?
541 ARG_SYM
: GLOBAL_SYM
) : LOCAL_SYM
, value
);
542 s
->value.tag
= NO_TAG
;
549 /* process quoted strings w/ embedded escape sequences */
550 if
(*InPtr
== '\"') {
551 char string[MAX_STRING_CONST_LEN
], *p
= string;
553 while
(*InPtr
!= '\0' && *InPtr
!= '\"' && *InPtr
!= '\n') {
554 if
(p
>= string + MAX_STRING_CONST_LEN
) {
558 if
(*InPtr
== '\\') {
560 if
(*InPtr
== '\n') {
564 for
(i
=0; escape
[i
]!='\0'; i
++) {
565 if
(escape
[i
] == '\0') {
568 } else if
(escape
[i
] == *InPtr
) {
579 yylval.sym
= InstallStringConstSymbol
(string);
583 /* process remaining two character tokens or return single char as token */
585 case
'>': return follow
('=', GE
, GT
);
586 case
'<': return follow
('=', LE
, LT
);
587 case
'=': return follow
('=', EQ
, '=');
588 case
'!': return follow
('=', NE
, NOT
);
589 case
'+': return follow2
('+', INCR
, '=', ADDEQ
, '+');
590 case
'-': return follow2
('-', DECR
, '=', SUBEQ
, '-');
591 case
'|': return follow2
('|', OR
, '=', OREQ
, '|');
592 case
'&': return follow2
('&', AND
, '=', ANDEQ
, '&');
593 case
'*': return follow2
('*', POW
, '=', MULEQ
, '*');
594 case
'/': return follow
('=', DIVEQ
, '/');
595 case
'%': return follow
('=', MODEQ
, '%');
596 case
'^': return POW
;
597 default
: return
*(InPtr
-1);
602 ** look ahead for >=, etc.
604 static int follow
(char expect
, int yes
, int no
)
606 if
(*InPtr
++ == expect
)
611 static int follow2
(char expect1
, int yes1
, char expect2
, int yes2
, int no
)
613 char next
= *InPtr
++;
622 static int follow_non_whitespace
(char expect
, int yes
, int no
)
624 char *localInPtr
= InPtr
;
627 if
(*localInPtr
== ' ' ||
*localInPtr
== '\t') {
630 else if
(*localInPtr
== '\\' && *(localInPtr
+ 1) == '\n') {
633 else if
(*localInPtr
== expect
) {
643 ** Look (way) ahead for hyphenated routine names which begin at inPtr. A
644 ** hyphenated name is allowed if it is pre-defined in the global symbol
645 ** table. If a matching name exists, returns the symbol, and update "inPtr".
647 ** I know this is horrible language design, but existing nedit action routine
648 ** names contain hyphens. Handling them here in the lexical analysis process
649 ** is much easier than trying to deal with it in the parser itself. (sorry)
651 static Symbol
*matchesActionRoutine
(char **inPtr
)
655 char symbolName
[MAX_SYM_LEN
+1];
659 for
(c
= *inPtr
; isalnum
((unsigned char)*c
) ||
*c
=='_' ||
660 ( *c
=='-' && isalnum
((unsigned char)(*(c
+1)))); c
++) {
668 s
= LookupSymbol
(symbolName
);
675 ** Called by yacc to report errors (just stores for returning when
676 ** parsing is aborted. The error token action is to immediate abort
677 ** parsing, so this message is immediately reported to the caller
680 static int yyerror(char *s
)