1 /* $Id: parse.y,v 1.27 2003/12/19 23:23:31 slobasso Exp $ */
11 #include "interpret.h"
17 #include <X11/Intrinsic.h>
20 #include "../util/VMSparam.h"
23 #include <sys/param.h>
31 /* Macros to add error processing to AddOp and AddSym calls */
32 #define ADD_OP(op) if (!AddOp(op, &ErrMsg)) return 1
33 #define ADD_SYM(sym) if (!AddSym(sym, &ErrMsg)) return 1
34 #define ADD_IMMED(val) if (!AddImmediate(val, &ErrMsg)) return 1
35 #define ADD_BR_OFF(to) if (!AddBranchOffset(to, &ErrMsg)) return 1
36 #define SET_BR_OFF(from, to) *((int *)(from)) = ((Inst *)(to)) - ((Inst *)(from))
38 /* Max. length for a string constant (... there shouldn't be a maximum) */
39 #define MAX_STRING_CONST_LEN 5000
41 static const char CVSID
[] = "$Id: parse.y,v 1.27 2003/12/19 23:23:31 slobasso Exp $";
42 static int yyerror(char *s
);
43 static int yylex(void);
45 static int follow
(char expect
, int yes
, int no
);
46 static int follow2
(char expect1
, int yes1
, char expect2
, int yes2
, int no
);
47 static int follow_non_whitespace
(char expect
, int yes
, int no
);
48 static Symbol
*matchesActionRoutine
(char **inPtr
);
52 extern Inst
*LoopStack
[]; /* addresses of break, cont stmts */
53 extern Inst
**LoopStackPtr
; /* to fill at the end of a loop */
62 %token
<sym
> NUMBER STRING SYMBOL
63 %token DELETE ARG_LOOKUP
64 %token IF WHILE ELSE FOR BREAK CONTINUE RETURN
66 %type
<inst
> cond comastmts for while else and or arrayexpr
72 %nonassoc SYMBOL ARG_LOOKUP
73 %right
'=' ADDEQ SUBEQ MULEQ DIVEQ MODEQ ANDEQ OREQ
79 %left GT GE LT LE EQ NE IN
82 %nonassoc UNARY_MINUS NOT
91 program: blank stmts
{
92 ADD_OP
(OP_RETURN_NO_VAL
); return
0;
94 | blank
'{' blank stmts
'}' {
95 ADD_OP
(OP_RETURN_NO_VAL
); return
0;
97 | blank
'{' blank
'}' {
98 ADD_OP
(OP_RETURN_NO_VAL
); return
0;
104 block: '{' blank stmts
'}' blank
105 |
'{' blank
'}' blank
111 stmt: simpstmt
'\n' blank
112 | IF
'(' cond
')' blank block %prec IF_NO_ELSE
{
113 SET_BR_OFF
($3, GetPC
());
115 | IF
'(' cond
')' blank block else blank block %prec ELSE
{
116 SET_BR_OFF
($3, ($7+1)); SET_BR_OFF
($7, GetPC
());
118 | while
'(' cond
')' blank block
{
119 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($1);
120 SET_BR_OFF
($3, GetPC
()); FillLoopAddrs
(GetPC
(), $1);
122 | for
'(' comastmts
';' cond
';' comastmts
')' blank block
{
123 FillLoopAddrs
(GetPC
()+2+($7-($5+1)), GetPC
());
124 SwapCode
($5+1, $7, GetPC
());
125 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($3); SET_BR_OFF
($5, GetPC
());
127 | for
'(' SYMBOL IN arrayexpr
')' {
128 Symbol
*iterSym
= InstallIteratorSymbol
();
129 ADD_OP
(OP_BEGIN_ARRAY_ITER
); ADD_SYM
(iterSym
);
130 ADD_OP
(OP_ARRAY_ITER
); ADD_SYM
($3); ADD_SYM
(iterSym
); ADD_BR_OFF
(0);
133 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($5+2);
134 SET_BR_OFF
($5+5, GetPC
());
135 FillLoopAddrs
(GetPC
(), $5+2);
138 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
(0);
139 if
(AddBreakAddr
(GetPC
()-1)) {
140 yyerror("break outside loop"); YYERROR;
143 | CONTINUE
'\n' blank
{
144 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
(0);
145 if
(AddContinueAddr
(GetPC
()-1)) {
146 yyerror("continue outside loop"); YYERROR;
149 | RETURN expr
'\n' blank
{
152 | RETURN
'\n' blank
{
153 ADD_OP
(OP_RETURN_NO_VAL
);
156 simpstmt: SYMBOL
'=' expr
{
157 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
159 | evalsym ADDEQ expr
{
160 ADD_OP
(OP_ADD
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
162 | evalsym SUBEQ expr
{
163 ADD_OP
(OP_SUB
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
165 | evalsym MULEQ expr
{
166 ADD_OP
(OP_MUL
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
168 | evalsym DIVEQ expr
{
169 ADD_OP
(OP_DIV
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
171 | evalsym MODEQ expr
{
172 ADD_OP
(OP_MOD
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
174 | evalsym ANDEQ expr
{
175 ADD_OP
(OP_BIT_AND
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
177 | evalsym OREQ expr
{
178 ADD_OP
(OP_BIT_OR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
180 | DELETE arraylv
'[' arglist
']' {
181 ADD_OP
(OP_ARRAY_DELETE
); ADD_IMMED
((void *)$4);
183 | initarraylv
'[' arglist
']' '=' expr
{
184 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
186 | initarraylv
'[' arglist
']' ADDEQ expr
{
187 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
189 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
191 | initarraylv
'[' arglist
']' SUBEQ expr
{
192 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
194 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
196 | initarraylv
'[' arglist
']' MULEQ expr
{
197 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
199 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
201 | initarraylv
'[' arglist
']' DIVEQ expr
{
202 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
204 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
206 | initarraylv
'[' arglist
']' MODEQ expr
{
207 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
209 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
211 | initarraylv
'[' arglist
']' ANDEQ expr
{
212 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
214 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
216 | initarraylv
'[' arglist
']' OREQ expr
{
217 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
219 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
221 | initarraylv
'[' arglist
']' INCR
{
222 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$3);
224 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
226 | initarraylv
'[' arglist
']' DECR
{
227 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$3);
229 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
231 | INCR initarraylv
'[' arglist
']' {
232 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$4);
234 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$4);
236 | DECR initarraylv
'[' arglist
']' {
237 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$4);
239 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$4);
241 | SYMBOL
'(' arglist
')' {
242 ADD_OP
(OP_SUBR_CALL
);
243 ADD_SYM
(PromoteToGlobal
($1)); ADD_IMMED
((void *)$3);
246 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_INCR
);
247 ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
250 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_INCR
);
251 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
254 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_DECR
);
255 ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
258 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DECR
);
259 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
263 $$
= $1; ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
266 comastmts: /* nothing */ {
272 | comastmts
',' simpstmt
{
276 arglist: /* nothing */ {
286 expr: numexpr %prec CONCAT
287 | expr numexpr %prec CONCAT
{
291 initarraylv: SYMBOL
{
292 ADD_OP
(OP_PUSH_ARRAY_SYM
); ADD_SYM
($1); ADD_IMMED
((void *)1);
294 | initarraylv
'[' arglist
']' {
295 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
((void *)$3);
299 ADD_OP
(OP_PUSH_ARRAY_SYM
); ADD_SYM
($1); ADD_IMMED
((void *)0);
301 | arraylv
'[' arglist
']' {
302 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
((void *)$3);
310 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
313 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
316 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
318 | SYMBOL
'(' arglist
')' {
319 ADD_OP
(OP_SUBR_CALL
);
320 ADD_SYM
(PromoteToGlobal
($1)); ADD_IMMED
((void *)$3);
321 ADD_OP
(OP_FETCH_RET_VAL
);
324 | ARG_LOOKUP
'[' numexpr
']' {
327 | ARG_LOOKUP
'[' ']' {
328 ADD_OP
(OP_PUSH_ARG_COUNT
);
331 ADD_OP
(OP_PUSH_ARG_ARRAY
);
333 | numexpr
'[' arglist
']' {
334 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
((void *)$3);
336 | numexpr
'+' numexpr
{
339 | numexpr
'-' numexpr
{
342 | numexpr
'*' numexpr
{
345 | numexpr
'/' numexpr
{
348 | numexpr
'%' numexpr
{
351 | numexpr POW numexpr
{
354 |
'-' numexpr %prec UNARY_MINUS
{
357 | numexpr GT numexpr
{
360 | numexpr GE numexpr
{
363 | numexpr LT numexpr
{
366 | numexpr LE numexpr
{
369 | numexpr EQ numexpr
{
372 | numexpr NE numexpr
{
375 | numexpr
'&' numexpr
{
378 | numexpr
'|' numexpr
{
381 | numexpr and numexpr %prec AND
{
382 ADD_OP
(OP_AND
); SET_BR_OFF
($2, GetPC
());
384 | numexpr or numexpr %prec OR
{
385 ADD_OP
(OP_OR
); SET_BR_OFF
($2, GetPC
());
391 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_INCR
);
392 ADD_OP
(OP_DUP
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
395 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DUP
);
396 ADD_OP
(OP_INCR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
399 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_DECR
);
400 ADD_OP
(OP_DUP
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
403 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DUP
);
404 ADD_OP
(OP_DECR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
406 | numexpr IN numexpr
{
411 $$
= GetPC
(); StartLoopAddrList
();
415 StartLoopAddrList
(); $$
= GetPC
();
419 ADD_OP
(OP_BRANCH
); $$
= GetPC
(); ADD_BR_OFF
(0);
422 cond: /* nothing */ {
423 ADD_OP
(OP_BRANCH_NEVER
); $$
= GetPC
(); ADD_BR_OFF
(0);
426 ADD_OP
(OP_BRANCH_FALSE
); $$
= GetPC
(); ADD_BR_OFF
(0);
430 ADD_OP
(OP_DUP
); ADD_OP
(OP_BRANCH_FALSE
); $$
= GetPC
();
435 ADD_OP
(OP_DUP
); ADD_OP
(OP_BRANCH_TRUE
); $$
= GetPC
();
443 %%
/* User Subroutines Section */
447 ** Parse a null terminated string and create a program from it (this is the
448 ** parser entry point). The program created by this routine can be
449 ** executed using ExecuteProgram. Returns program on success, or NULL
450 ** on failure. If the command failed, the error message is returned
451 ** as a pointer to a static string in msg, and the length of the string up
452 ** to where parsing failed in stoppedAt.
454 Program
*ParseMacro
(char *expr
, char **msg
, char **stoppedAt
)
458 BeginCreatingProgram
();
460 /* call yyparse to parse the string and check for success. If the parse
461 failed, return the error message and string index (the grammar aborts
462 parsing at the first error) */
467 FreeProgram
(FinishCreatingProgram
());
471 /* get the newly created program */
472 prog
= FinishCreatingProgram
();
474 /* parse succeeded */
481 static int yylex(void)
485 static DataValue value
= {NO_TAG
, {0}};
486 static char escape
[] = "\\\"ntbrfav";
487 static char replace
[] = "\\\"\n\t\b\r\f\a\v";
489 /* skip whitespace, backslash-newline combinations, and comments, which are
490 all considered whitespace */
492 if
(*InPtr
== '\\' && *(InPtr
+ 1) == '\n')
494 else if
(*InPtr
== ' ' ||
*InPtr
== '\t')
496 else if
(*InPtr
== '#')
497 while
(*InPtr
!= '\n' && *InPtr
!= '\0') {
498 /* Comments stop at escaped newlines */
499 if
(*InPtr
== '\\' && *(InPtr
+ 1) == '\n') {
509 /* return end of input at the end of the string */
510 if
(*InPtr
== '\0') {
514 /* process number tokens */
515 if
(isdigit
((unsigned char)*InPtr
)) { /* number */
517 sscanf
(InPtr
, "%d%n", &value.val.n
, &len
);
518 sprintf
(name
, "const %d", value.val.n
);
521 if
((yylval.sym
=LookupSymbol
(name
)) == NULL
)
522 yylval.sym
= InstallSymbol
(name
, CONST_SYM
, value
);
526 /* process symbol tokens. "define" is a special case not handled
527 by this parser, considered end of input. Another special case
528 is action routine names which are allowed to contain '-' despite
529 the ambiguity, handled in matchesActionRoutine. */
530 if
(isalpha
((unsigned char)*InPtr
) ||
*InPtr
== '$') {
531 if
((s
=matchesActionRoutine
(&InPtr
)) == NULL
) {
532 char symName
[MAX_SYM_LEN
+1], *p
= symName
;
534 while
(isalnum
((unsigned char)*InPtr
) ||
*InPtr
=='_') {
535 if
(p
>= symName
+ MAX_SYM_LEN
)
541 if
(!strcmp
(symName
, "while")) return WHILE
;
542 if
(!strcmp
(symName
, "if")) return IF
;
543 if
(!strcmp
(symName
, "else")) return ELSE
;
544 if
(!strcmp
(symName
, "for")) return FOR
;
545 if
(!strcmp
(symName
, "break")) return BREAK
;
546 if
(!strcmp
(symName
, "continue")) return CONTINUE
;
547 if
(!strcmp
(symName
, "return")) return RETURN
;
548 if
(!strcmp
(symName
, "in")) return IN
;
549 if
(!strcmp
(symName
, "$args")) return ARG_LOOKUP
;
550 if
(!strcmp
(symName
, "delete") && follow_non_whitespace
('(', SYMBOL
, DELETE
) == DELETE
) return DELETE
;
551 if
(!strcmp
(symName
, "define")) {
555 if
((s
=LookupSymbol
(symName
)) == NULL
) {
556 s
= InstallSymbol
(symName
, symName
[0]=='$' ?
557 (((symName
[1] > '0' && symName
[1] <= '9') && symName
[2] == 0) ?
558 ARG_SYM
: GLOBAL_SYM
) : LOCAL_SYM
, value
);
559 s
->value.tag
= NO_TAG
;
566 /* process quoted strings w/ embedded escape sequences */
567 if
(*InPtr
== '\"') {
568 char string[MAX_STRING_CONST_LEN
], *p
= string;
570 while
(*InPtr
!= '\0' && *InPtr
!= '\"' && *InPtr
!= '\n') {
571 if
(p
>= string + MAX_STRING_CONST_LEN
) {
575 if
(*InPtr
== '\\') {
577 if
(*InPtr
== '\n') {
581 for
(i
=0; escape
[i
]!='\0'; i
++) {
582 if
(escape
[i
] == '\0') {
585 } else if
(escape
[i
] == *InPtr
) {
596 yylval.sym
= InstallStringConstSymbol
(string);
600 /* process remaining two character tokens or return single char as token */
602 case
'>': return follow
('=', GE
, GT
);
603 case
'<': return follow
('=', LE
, LT
);
604 case
'=': return follow
('=', EQ
, '=');
605 case
'!': return follow
('=', NE
, NOT
);
606 case
'+': return follow2
('+', INCR
, '=', ADDEQ
, '+');
607 case
'-': return follow2
('-', DECR
, '=', SUBEQ
, '-');
608 case
'|': return follow2
('|', OR
, '=', OREQ
, '|');
609 case
'&': return follow2
('&', AND
, '=', ANDEQ
, '&');
610 case
'*': return follow2
('*', POW
, '=', MULEQ
, '*');
611 case
'/': return follow
('=', DIVEQ
, '/');
612 case
'%': return follow
('=', MODEQ
, '%');
613 case
'^': return POW
;
614 default
: return
*(InPtr
-1);
619 ** look ahead for >=, etc.
621 static int follow
(char expect
, int yes
, int no
)
623 if
(*InPtr
++ == expect
)
628 static int follow2
(char expect1
, int yes1
, char expect2
, int yes2
, int no
)
630 char next
= *InPtr
++;
639 static int follow_non_whitespace
(char expect
, int yes
, int no
)
641 char *localInPtr
= InPtr
;
644 if
(*localInPtr
== ' ' ||
*localInPtr
== '\t') {
647 else if
(*localInPtr
== '\\' && *(localInPtr
+ 1) == '\n') {
650 else if
(*localInPtr
== expect
) {
660 ** Look (way) ahead for hyphenated routine names which begin at inPtr. A
661 ** hyphenated name is allowed if it is pre-defined in the global symbol
662 ** table. If a matching name exists, returns the symbol, and update "inPtr".
664 ** I know this is horrible language design, but existing nedit action routine
665 ** names contain hyphens. Handling them here in the lexical analysis process
666 ** is much easier than trying to deal with it in the parser itself. (sorry)
668 static Symbol
*matchesActionRoutine
(char **inPtr
)
672 char symbolName
[MAX_SYM_LEN
+1];
676 for
(c
= *inPtr
; isalnum
((unsigned char)*c
) ||
*c
=='_' ||
677 ( *c
=='-' && isalnum
((unsigned char)(*(c
+1)))); c
++) {
685 s
= LookupSymbol
(symbolName
);
692 ** Called by yacc to report errors (just stores for returning when
693 ** parsing is aborted. The error token action is to immediate abort
694 ** parsing, so this message is immediately reported to the caller
697 static int yyerror(char *s
)