1 /* $Id: parse.y,v 1.22 2002/09/26 12:37:39 ajhood Exp $ */
11 #include "interpret.h"
16 #include <X11/Intrinsic.h>
19 #include "../util/VMSparam.h"
22 #include <sys/param.h>
30 /* Macros to add error processing to AddOp and AddSym calls */
31 #define ADD_OP(op) if (!AddOp(op, &ErrMsg)) return 1
32 #define ADD_SYM(sym) if (!AddSym(sym, &ErrMsg)) return 1
33 #define ADD_IMMED(val) if (!AddImmediate(val, &ErrMsg)) return 1
34 #define ADD_BR_OFF(to) if (!AddBranchOffset(to, &ErrMsg)) return 1
35 #define SET_BR_OFF(from, to) *((int *)(from)) = ((Inst *)(to)) - ((Inst *)(from))
37 /* Max. length for a string constant (... there shouldn't be a maximum) */
38 #define MAX_STRING_CONST_LEN 5000
40 static const char CVSID
[] = "$Id: parse.y,v 1.22 2002/09/26 12:37:39 ajhood Exp $";
41 static int yyerror(char *s
);
42 static int yylex(void);
44 static int follow
(char expect
, int yes
, int no
);
45 static int follow2
(char expect1
, int yes1
, char expect2
, int yes2
, int no
);
46 static int follow_non_whitespace
(char expect
, int yes
, int no
);
47 static Symbol
*matchesActionRoutine
(char **inPtr
);
51 extern Inst
*LoopStack
[]; /* addresses of break, cont stmts */
52 extern Inst
**LoopStackPtr
; /* to fill at the end of a loop */
61 %token
<sym
> NUMBER STRING SYMBOL
62 %token IF WHILE ELSE FOR BREAK CONTINUE RETURN
64 %type
<inst
> cond comastmts for while else and or arrayexpr
71 %right
'=' ADDEQ SUBEQ MULEQ DIVEQ MODEQ ANDEQ OREQ
77 %left GT GE LT LE EQ NE IN
80 %nonassoc UNARY_MINUS NOT
89 program: blank stmts
{
90 ADD_OP
(OP_RETURN_NO_VAL
); return
0;
92 | blank
'{' blank stmts
'}' {
93 ADD_OP
(OP_RETURN_NO_VAL
); return
0;
95 | blank
'{' blank
'}' {
96 ADD_OP
(OP_RETURN_NO_VAL
); return
0;
102 block: '{' blank stmts
'}' blank
103 |
'{' blank
'}' blank
109 stmt: simpstmt
'\n' blank
110 | IF
'(' cond
')' blank block %prec IF_NO_ELSE
{
111 SET_BR_OFF
($3, GetPC
());
113 | IF
'(' cond
')' blank block else blank block %prec ELSE
{
114 SET_BR_OFF
($3, ($7+1)); SET_BR_OFF
($7, GetPC
());
116 | while
'(' cond
')' blank block
{
117 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($1);
118 SET_BR_OFF
($3, GetPC
()); FillLoopAddrs
(GetPC
(), $1);
120 | for
'(' comastmts
';' cond
';' comastmts
')' blank block
{
121 FillLoopAddrs
(GetPC
()+2+($7-($5+1)), GetPC
());
122 SwapCode
($5+1, $7, GetPC
());
123 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($3); SET_BR_OFF
($5, GetPC
());
125 | for
'(' SYMBOL IN arrayexpr
')' {
126 Symbol
*iterSym
= InstallIteratorSymbol
();
127 ADD_OP
(OP_BEGIN_ARRAY_ITER
); ADD_SYM
(iterSym
);
128 ADD_OP
(OP_ARRAY_ITER
); ADD_SYM
($3); ADD_SYM
(iterSym
); ADD_BR_OFF
(0);
131 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($5+2);
132 SET_BR_OFF
($5+5, GetPC
());
133 FillLoopAddrs
(GetPC
(), $5+2);
136 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
(0);
137 if
(AddBreakAddr
(GetPC
()-1)) {
138 yyerror("break outside loop"); YYERROR;
141 | CONTINUE
'\n' blank
{
142 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
(0);
143 if
(AddContinueAddr
(GetPC
()-1)) {
144 yyerror("continue outside loop"); YYERROR;
147 | RETURN expr
'\n' blank
{
150 | RETURN
'\n' blank
{
151 ADD_OP
(OP_RETURN_NO_VAL
);
154 simpstmt: SYMBOL
'=' expr
{
155 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
157 | evalsym ADDEQ expr
{
158 ADD_OP
(OP_ADD
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
160 | evalsym SUBEQ expr
{
161 ADD_OP
(OP_SUB
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
163 | evalsym MULEQ expr
{
164 ADD_OP
(OP_MUL
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
166 | evalsym DIVEQ expr
{
167 ADD_OP
(OP_DIV
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
169 | evalsym MODEQ expr
{
170 ADD_OP
(OP_MOD
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
172 | evalsym ANDEQ expr
{
173 ADD_OP
(OP_BIT_AND
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
175 | evalsym OREQ expr
{
176 ADD_OP
(OP_BIT_OR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
178 | DELETE arraylv
'[' arglist
']' {
179 ADD_OP
(OP_ARRAY_DELETE
); ADD_IMMED
((void *)$4);
181 | initarraylv
'[' arglist
']' '=' expr
{
182 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
184 | initarraylv
'[' arglist
']' ADDEQ expr
{
185 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
187 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
189 | initarraylv
'[' arglist
']' SUBEQ expr
{
190 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
192 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
194 | initarraylv
'[' arglist
']' MULEQ expr
{
195 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
197 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
199 | initarraylv
'[' arglist
']' DIVEQ expr
{
200 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
202 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
204 | initarraylv
'[' arglist
']' MODEQ expr
{
205 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
207 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
209 | initarraylv
'[' arglist
']' ANDEQ expr
{
210 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
212 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
214 | initarraylv
'[' arglist
']' OREQ expr
{
215 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
217 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
219 | initarraylv
'[' arglist
']' INCR
{
220 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$3);
222 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
224 | initarraylv
'[' arglist
']' DECR
{
225 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$3);
227 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
229 | INCR initarraylv
'[' arglist
']' {
230 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$4);
232 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$4);
234 | DECR initarraylv
'[' arglist
']' {
235 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$4);
237 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$4);
239 | SYMBOL
'(' arglist
')' {
240 ADD_OP
(OP_SUBR_CALL
);
241 ADD_SYM
(PromoteToGlobal
($1)); ADD_IMMED
((void *)$3);
244 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_INCR
);
245 ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
248 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_INCR
);
249 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
252 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_DECR
);
253 ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
256 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DECR
);
257 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
261 $$
= $1; ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
264 comastmts: /* nothing */ {
270 | comastmts
',' simpstmt
{
274 arglist: /* nothing */ {
284 expr: numexpr %prec CONCAT
285 | expr numexpr %prec CONCAT
{
289 initarraylv: SYMBOL
{
290 ADD_OP
(OP_PUSH_ARRAY_SYM
); ADD_SYM
($1); ADD_IMMED
((void *)1);
292 | initarraylv
'[' arglist
']' {
293 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
((void *)$3);
297 ADD_OP
(OP_PUSH_ARRAY_SYM
); ADD_SYM
($1); ADD_IMMED
((void *)0);
299 | arraylv
'[' arglist
']' {
300 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
((void *)$3);
308 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
311 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
314 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
316 | SYMBOL
'(' arglist
')' {
317 ADD_OP
(OP_SUBR_CALL
);
318 ADD_SYM
(PromoteToGlobal
($1)); ADD_IMMED
((void *)$3);
319 ADD_OP
(OP_FETCH_RET_VAL
);
322 | numexpr
'[' arglist
']' {
323 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
((void *)$3);
325 | numexpr
'+' numexpr
{
328 | numexpr
'-' numexpr
{
331 | numexpr
'*' numexpr
{
334 | numexpr
'/' numexpr
{
337 | numexpr
'%' numexpr
{
340 | numexpr POW numexpr
{
343 |
'-' numexpr %prec UNARY_MINUS
{
346 | numexpr GT numexpr
{
349 | numexpr GE numexpr
{
352 | numexpr LT numexpr
{
355 | numexpr LE numexpr
{
358 | numexpr EQ numexpr
{
361 | numexpr NE numexpr
{
364 | numexpr
'&' numexpr
{
367 | numexpr
'|' numexpr
{
370 | numexpr and numexpr %prec AND
{
371 ADD_OP
(OP_AND
); SET_BR_OFF
($2, GetPC
());
373 | numexpr or numexpr %prec OR
{
374 ADD_OP
(OP_OR
); SET_BR_OFF
($2, GetPC
());
380 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_INCR
);
381 ADD_OP
(OP_DUP
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
384 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DUP
);
385 ADD_OP
(OP_INCR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
388 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_DECR
);
389 ADD_OP
(OP_DUP
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
392 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DUP
);
393 ADD_OP
(OP_DECR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
395 | numexpr IN numexpr
{
400 $$
= GetPC
(); StartLoopAddrList
();
404 StartLoopAddrList
(); $$
= GetPC
();
408 ADD_OP
(OP_BRANCH
); $$
= GetPC
(); ADD_BR_OFF
(0);
411 cond: /* nothing */ {
412 ADD_OP
(OP_BRANCH_NEVER
); $$
= GetPC
(); ADD_BR_OFF
(0);
415 ADD_OP
(OP_BRANCH_FALSE
); $$
= GetPC
(); ADD_BR_OFF
(0);
419 ADD_OP
(OP_DUP
); ADD_OP
(OP_BRANCH_FALSE
); $$
= GetPC
();
424 ADD_OP
(OP_DUP
); ADD_OP
(OP_BRANCH_TRUE
); $$
= GetPC
();
432 %%
/* User Subroutines Section */
436 ** Parse a null terminated string and create a program from it (this is the
437 ** parser entry point). The program created by this routine can be
438 ** executed using ExecuteProgram. Returns program on success, or NULL
439 ** on failure. If the command failed, the error message is returned
440 ** as a pointer to a static string in msg, and the length of the string up
441 ** to where parsing failed in stoppedAt.
443 Program
*ParseMacro
(char *expr
, char **msg
, char **stoppedAt
)
447 BeginCreatingProgram
();
449 /* call yyparse to parse the string and check for success. If the parse
450 failed, return the error message and string index (the grammar aborts
451 parsing at the first error) */
456 FreeProgram
(FinishCreatingProgram
());
460 /* get the newly created program */
461 prog
= FinishCreatingProgram
();
463 /* parse succeeded */
470 static int yylex(void)
474 static int stringConstIndex
= 0;
475 static DataValue value
= {0, {0}};
476 static char escape
[] = "\\\"ntbrfav";
477 static char replace
[] = "\\\"\n\t\b\r\f\a\v";
479 /* skip whitespace and backslash-newline combinations which are
480 also considered whitespace */
482 if
(*InPtr
== '\\' && *(InPtr
+ 1) == '\n')
484 else if
(*InPtr
== ' ' ||
*InPtr
== '\t')
492 while
(*InPtr
!= '\n' && *InPtr
!= '\0') InPtr
++;
494 /* return end of input at the end of the string */
495 if
(*InPtr
== '\0') {
499 /* process number tokens */
500 if
(isdigit
((unsigned char)*InPtr
)) { /* number */
502 sscanf
(InPtr
, "%d%n", &value.val.n
, &len
);
503 sprintf
(name
, "const %d", value.val.n
);
506 if
((yylval.sym
=LookupSymbol
(name
)) == NULL
)
507 yylval.sym
= InstallSymbol
(name
, CONST_SYM
, value
);
511 /* process symbol tokens. "define" is a special case not handled
512 by this parser, considered end of input. Another special case
513 is action routine names which are allowed to contain '-' despite
514 the ambiguity, handled in matchesActionRoutine. */
515 if
(isalpha
((unsigned char)*InPtr
) ||
*InPtr
== '$') {
516 if
((s
=matchesActionRoutine
(&InPtr
)) == NULL
) {
517 char symName
[MAX_SYM_LEN
+1], *p
= symName
;
519 while
(isalnum
((unsigned char)*InPtr
) ||
*InPtr
=='_') {
520 if
(p
>= symName
+ MAX_SYM_LEN
)
526 if
(!strcmp
(symName
, "while")) return WHILE
;
527 if
(!strcmp
(symName
, "if")) return IF
;
528 if
(!strcmp
(symName
, "else")) return ELSE
;
529 if
(!strcmp
(symName
, "for")) return FOR
;
530 if
(!strcmp
(symName
, "break")) return BREAK
;
531 if
(!strcmp
(symName
, "continue")) return CONTINUE
;
532 if
(!strcmp
(symName
, "return")) return RETURN
;
533 if
(!strcmp
(symName
, "in")) return IN
;
534 if
(!strcmp
(symName
, "delete") && follow_non_whitespace
('(', SYMBOL
, DELETE
) == DELETE
) return DELETE
;
535 if
(!strcmp
(symName
, "define")) {
539 if
((s
=LookupSymbol
(symName
)) == NULL
) {
540 s
= InstallSymbol
(symName
, symName
[0]=='$' ?
541 (isdigit
((unsigned char)symName
[1]) ?
542 ARG_SYM
: GLOBAL_SYM
) : LOCAL_SYM
, value
);
543 s
->value.tag
= NO_TAG
;
550 /* process quoted strings w/ embedded escape sequences */
551 if
(*InPtr
== '\"') {
552 char string[MAX_STRING_CONST_LEN
], *p
= string;
555 while
(*InPtr
!= '\0' && *InPtr
!= '\"' && *InPtr
!= '\n') {
556 if
(p
>= string + MAX_STRING_CONST_LEN
) {
560 if
(*InPtr
== '\\') {
562 if
(*InPtr
== '\n') {
566 for
(i
=0; escape
[i
]!='\0'; i
++) {
567 if
(escape
[i
] == '\0') {
570 } else if
(escape
[i
] == *InPtr
) {
581 if
((yylval.sym
= LookupStringConstSymbol
(string)) == NULL
) {
582 value.val.str
= AllocString
(p
-string+1);
583 strcpy
(value.val.str
, string);
584 value.tag
= STRING_TAG
;
585 sprintf
(stringName
, "string #%d", stringConstIndex
++);
586 yylval.sym
= InstallSymbol
(stringName
, CONST_SYM
, value
);
591 /* process remaining two character tokens or return single char as token */
593 case
'>': return follow
('=', GE
, GT
);
594 case
'<': return follow
('=', LE
, LT
);
595 case
'=': return follow
('=', EQ
, '=');
596 case
'!': return follow
('=', NE
, NOT
);
597 case
'+': return follow2
('+', INCR
, '=', ADDEQ
, '+');
598 case
'-': return follow2
('-', DECR
, '=', SUBEQ
, '-');
599 case
'|': return follow2
('|', OR
, '=', OREQ
, '|');
600 case
'&': return follow2
('&', AND
, '=', ANDEQ
, '&');
601 case
'*': return follow2
('*', POW
, '=', MULEQ
, '*');
602 case
'/': return follow
('=', DIVEQ
, '/');
603 case
'%': return follow
('=', MODEQ
, '%');
604 case
'^': return POW
;
605 default
: return
*(InPtr
-1);
610 ** look ahead for >=, etc.
612 static int follow
(char expect
, int yes
, int no
)
614 if
(*InPtr
++ == expect
)
619 static int follow2
(char expect1
, int yes1
, char expect2
, int yes2
, int no
)
621 char next
= *InPtr
++;
630 static int follow_non_whitespace
(char expect
, int yes
, int no
)
632 char *localInPtr
= InPtr
;
635 if
(*localInPtr
== ' ' ||
*localInPtr
== '\t') {
638 else if
(*localInPtr
== '\\' && *(localInPtr
+ 1) == '\n') {
641 else if
(*localInPtr
== expect
) {
651 ** Look (way) ahead for hyphenated routine names which begin at inPtr. A
652 ** hyphenated name is allowed if it is pre-defined in the global symbol
653 ** table. If a matching name exists, returns the symbol, and update "inPtr".
655 ** I know this is horrible language design, but existing nedit action routine
656 ** names contain hyphens. Handling them here in the lexical analysis process
657 ** is much easier than trying to deal with it in the parser itself. (sorry)
659 static Symbol
*matchesActionRoutine
(char **inPtr
)
663 char symbolName
[MAX_SYM_LEN
+1];
667 for
(c
= *inPtr
; isalnum
((unsigned char)*c
) ||
*c
=='_' ||
668 ( *c
=='-' && isalnum
((unsigned char)(*(c
+1)))); c
++) {
676 s
= LookupSymbol
(symbolName
);
683 ** Called by yacc to report errors (just stores for returning when
684 ** parsing is aborted. The error token action is to immediate abort
685 ** parsing, so this message is immediately reported to the caller
688 static int yyerror(char *s
)