1 /* $Id: parse.y,v 1.25 2003/05/07 10:51:52 edg Exp $ */
11 #include "interpret.h"
17 #include <X11/Intrinsic.h>
20 #include "../util/VMSparam.h"
23 #include <sys/param.h>
31 /* Macros to add error processing to AddOp and AddSym calls */
32 #define ADD_OP(op) if (!AddOp(op, &ErrMsg)) return 1
33 #define ADD_SYM(sym) if (!AddSym(sym, &ErrMsg)) return 1
34 #define ADD_IMMED(val) if (!AddImmediate(val, &ErrMsg)) return 1
35 #define ADD_BR_OFF(to) if (!AddBranchOffset(to, &ErrMsg)) return 1
36 #define SET_BR_OFF(from, to) *((int *)(from)) = ((Inst *)(to)) - ((Inst *)(from))
38 /* Max. length for a string constant (... there shouldn't be a maximum) */
39 #define MAX_STRING_CONST_LEN 5000
41 static const char CVSID
[] = "$Id: parse.y,v 1.25 2003/05/07 10:51:52 edg Exp $";
42 static int yyerror(char *s
);
43 static int yylex(void);
45 static int follow
(char expect
, int yes
, int no
);
46 static int follow2
(char expect1
, int yes1
, char expect2
, int yes2
, int no
);
47 static int follow_non_whitespace
(char expect
, int yes
, int no
);
48 static Symbol
*matchesActionRoutine
(char **inPtr
);
52 extern Inst
*LoopStack
[]; /* addresses of break, cont stmts */
53 extern Inst
**LoopStackPtr
; /* to fill at the end of a loop */
62 %token
<sym
> NUMBER STRING SYMBOL
63 %token IF WHILE ELSE FOR BREAK CONTINUE RETURN
65 %type
<inst
> cond comastmts for while else and or arrayexpr
72 %right
'=' ADDEQ SUBEQ MULEQ DIVEQ MODEQ ANDEQ OREQ
78 %left GT GE LT LE EQ NE IN
81 %nonassoc UNARY_MINUS NOT
90 program: blank stmts
{
91 ADD_OP
(OP_RETURN_NO_VAL
); return
0;
93 | blank
'{' blank stmts
'}' {
94 ADD_OP
(OP_RETURN_NO_VAL
); return
0;
96 | blank
'{' blank
'}' {
97 ADD_OP
(OP_RETURN_NO_VAL
); return
0;
103 block: '{' blank stmts
'}' blank
104 |
'{' blank
'}' blank
110 stmt: simpstmt
'\n' blank
111 | IF
'(' cond
')' blank block %prec IF_NO_ELSE
{
112 SET_BR_OFF
($3, GetPC
());
114 | IF
'(' cond
')' blank block else blank block %prec ELSE
{
115 SET_BR_OFF
($3, ($7+1)); SET_BR_OFF
($7, GetPC
());
117 | while
'(' cond
')' blank block
{
118 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($1);
119 SET_BR_OFF
($3, GetPC
()); FillLoopAddrs
(GetPC
(), $1);
121 | for
'(' comastmts
';' cond
';' comastmts
')' blank block
{
122 FillLoopAddrs
(GetPC
()+2+($7-($5+1)), GetPC
());
123 SwapCode
($5+1, $7, GetPC
());
124 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($3); SET_BR_OFF
($5, GetPC
());
126 | for
'(' SYMBOL IN arrayexpr
')' {
127 Symbol
*iterSym
= InstallIteratorSymbol
();
128 ADD_OP
(OP_BEGIN_ARRAY_ITER
); ADD_SYM
(iterSym
);
129 ADD_OP
(OP_ARRAY_ITER
); ADD_SYM
($3); ADD_SYM
(iterSym
); ADD_BR_OFF
(0);
132 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($5+2);
133 SET_BR_OFF
($5+5, GetPC
());
134 FillLoopAddrs
(GetPC
(), $5+2);
137 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
(0);
138 if
(AddBreakAddr
(GetPC
()-1)) {
139 yyerror("break outside loop"); YYERROR;
142 | CONTINUE
'\n' blank
{
143 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
(0);
144 if
(AddContinueAddr
(GetPC
()-1)) {
145 yyerror("continue outside loop"); YYERROR;
148 | RETURN expr
'\n' blank
{
151 | RETURN
'\n' blank
{
152 ADD_OP
(OP_RETURN_NO_VAL
);
155 simpstmt: SYMBOL
'=' expr
{
156 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
158 | evalsym ADDEQ expr
{
159 ADD_OP
(OP_ADD
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
161 | evalsym SUBEQ expr
{
162 ADD_OP
(OP_SUB
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
164 | evalsym MULEQ expr
{
165 ADD_OP
(OP_MUL
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
167 | evalsym DIVEQ expr
{
168 ADD_OP
(OP_DIV
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
170 | evalsym MODEQ expr
{
171 ADD_OP
(OP_MOD
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
173 | evalsym ANDEQ expr
{
174 ADD_OP
(OP_BIT_AND
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
176 | evalsym OREQ expr
{
177 ADD_OP
(OP_BIT_OR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
179 | DELETE arraylv
'[' arglist
']' {
180 ADD_OP
(OP_ARRAY_DELETE
); ADD_IMMED
((void *)$4);
182 | initarraylv
'[' arglist
']' '=' expr
{
183 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
185 | initarraylv
'[' arglist
']' ADDEQ expr
{
186 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
188 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
190 | initarraylv
'[' arglist
']' SUBEQ expr
{
191 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
193 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
195 | initarraylv
'[' arglist
']' MULEQ expr
{
196 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
198 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
200 | initarraylv
'[' arglist
']' DIVEQ expr
{
201 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
203 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
205 | initarraylv
'[' arglist
']' MODEQ expr
{
206 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
208 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
210 | initarraylv
'[' arglist
']' ANDEQ expr
{
211 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
213 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
215 | initarraylv
'[' arglist
']' OREQ expr
{
216 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)1); ADD_IMMED
((void *)$3);
218 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
220 | initarraylv
'[' arglist
']' INCR
{
221 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$3);
223 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
225 | initarraylv
'[' arglist
']' DECR
{
226 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$3);
228 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$3);
230 | INCR initarraylv
'[' arglist
']' {
231 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$4);
233 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$4);
235 | DECR initarraylv
'[' arglist
']' {
236 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
((void *)0); ADD_IMMED
((void *)$4);
238 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
((void *)$4);
240 | SYMBOL
'(' arglist
')' {
241 ADD_OP
(OP_SUBR_CALL
);
242 ADD_SYM
(PromoteToGlobal
($1)); ADD_IMMED
((void *)$3);
245 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_INCR
);
246 ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
249 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_INCR
);
250 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
253 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_DECR
);
254 ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
257 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DECR
);
258 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
262 $$
= $1; ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
265 comastmts: /* nothing */ {
271 | comastmts
',' simpstmt
{
275 arglist: /* nothing */ {
285 expr: numexpr %prec CONCAT
286 | expr numexpr %prec CONCAT
{
290 initarraylv: SYMBOL
{
291 ADD_OP
(OP_PUSH_ARRAY_SYM
); ADD_SYM
($1); ADD_IMMED
((void *)1);
293 | initarraylv
'[' arglist
']' {
294 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
((void *)$3);
298 ADD_OP
(OP_PUSH_ARRAY_SYM
); ADD_SYM
($1); ADD_IMMED
((void *)0);
300 | arraylv
'[' arglist
']' {
301 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
((void *)$3);
309 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
312 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
315 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
317 | SYMBOL
'(' arglist
')' {
318 ADD_OP
(OP_SUBR_CALL
);
319 ADD_SYM
(PromoteToGlobal
($1)); ADD_IMMED
((void *)$3);
320 ADD_OP
(OP_FETCH_RET_VAL
);
323 | numexpr
'[' arglist
']' {
324 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
((void *)$3);
326 | numexpr
'+' numexpr
{
329 | numexpr
'-' numexpr
{
332 | numexpr
'*' numexpr
{
335 | numexpr
'/' numexpr
{
338 | numexpr
'%' numexpr
{
341 | numexpr POW numexpr
{
344 |
'-' numexpr %prec UNARY_MINUS
{
347 | numexpr GT numexpr
{
350 | numexpr GE numexpr
{
353 | numexpr LT numexpr
{
356 | numexpr LE numexpr
{
359 | numexpr EQ numexpr
{
362 | numexpr NE numexpr
{
365 | numexpr
'&' numexpr
{
368 | numexpr
'|' numexpr
{
371 | numexpr and numexpr %prec AND
{
372 ADD_OP
(OP_AND
); SET_BR_OFF
($2, GetPC
());
374 | numexpr or numexpr %prec OR
{
375 ADD_OP
(OP_OR
); SET_BR_OFF
($2, GetPC
());
381 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_INCR
);
382 ADD_OP
(OP_DUP
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
385 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DUP
);
386 ADD_OP
(OP_INCR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
389 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_DECR
);
390 ADD_OP
(OP_DUP
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
393 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DUP
);
394 ADD_OP
(OP_DECR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
396 | numexpr IN numexpr
{
401 $$
= GetPC
(); StartLoopAddrList
();
405 StartLoopAddrList
(); $$
= GetPC
();
409 ADD_OP
(OP_BRANCH
); $$
= GetPC
(); ADD_BR_OFF
(0);
412 cond: /* nothing */ {
413 ADD_OP
(OP_BRANCH_NEVER
); $$
= GetPC
(); ADD_BR_OFF
(0);
416 ADD_OP
(OP_BRANCH_FALSE
); $$
= GetPC
(); ADD_BR_OFF
(0);
420 ADD_OP
(OP_DUP
); ADD_OP
(OP_BRANCH_FALSE
); $$
= GetPC
();
425 ADD_OP
(OP_DUP
); ADD_OP
(OP_BRANCH_TRUE
); $$
= GetPC
();
433 %%
/* User Subroutines Section */
437 ** Parse a null terminated string and create a program from it (this is the
438 ** parser entry point). The program created by this routine can be
439 ** executed using ExecuteProgram. Returns program on success, or NULL
440 ** on failure. If the command failed, the error message is returned
441 ** as a pointer to a static string in msg, and the length of the string up
442 ** to where parsing failed in stoppedAt.
444 Program
*ParseMacro
(char *expr
, char **msg
, char **stoppedAt
)
448 BeginCreatingProgram
();
450 /* call yyparse to parse the string and check for success. If the parse
451 failed, return the error message and string index (the grammar aborts
452 parsing at the first error) */
457 FreeProgram
(FinishCreatingProgram
());
461 /* get the newly created program */
462 prog
= FinishCreatingProgram
();
464 /* parse succeeded */
471 static int yylex(void)
475 static DataValue value
= {NO_TAG
, {0}};
476 static char escape
[] = "\\\"ntbrfav";
477 static char replace
[] = "\\\"\n\t\b\r\f\a\v";
479 /* skip whitespace and backslash-newline combinations which are
480 also considered whitespace */
482 if
(*InPtr
== '\\' && *(InPtr
+ 1) == '\n')
484 else if
(*InPtr
== ' ' ||
*InPtr
== '\t')
492 while
(*InPtr
!= '\n' && *InPtr
!= '\0') InPtr
++;
494 /* return end of input at the end of the string */
495 if
(*InPtr
== '\0') {
499 /* process number tokens */
500 if
(isdigit
((unsigned char)*InPtr
)) { /* number */
502 sscanf
(InPtr
, "%d%n", &value.val.n
, &len
);
503 sprintf
(name
, "const %d", value.val.n
);
506 if
((yylval.sym
=LookupSymbol
(name
)) == NULL
)
507 yylval.sym
= InstallSymbol
(name
, CONST_SYM
, value
);
511 /* process symbol tokens. "define" is a special case not handled
512 by this parser, considered end of input. Another special case
513 is action routine names which are allowed to contain '-' despite
514 the ambiguity, handled in matchesActionRoutine. */
515 if
(isalpha
((unsigned char)*InPtr
) ||
*InPtr
== '$') {
516 if
((s
=matchesActionRoutine
(&InPtr
)) == NULL
) {
517 char symName
[MAX_SYM_LEN
+1], *p
= symName
;
519 while
(isalnum
((unsigned char)*InPtr
) ||
*InPtr
=='_') {
520 if
(p
>= symName
+ MAX_SYM_LEN
)
526 if
(!strcmp
(symName
, "while")) return WHILE
;
527 if
(!strcmp
(symName
, "if")) return IF
;
528 if
(!strcmp
(symName
, "else")) return ELSE
;
529 if
(!strcmp
(symName
, "for")) return FOR
;
530 if
(!strcmp
(symName
, "break")) return BREAK
;
531 if
(!strcmp
(symName
, "continue")) return CONTINUE
;
532 if
(!strcmp
(symName
, "return")) return RETURN
;
533 if
(!strcmp
(symName
, "in")) return IN
;
534 if
(!strcmp
(symName
, "delete") && follow_non_whitespace
('(', SYMBOL
, DELETE
) == DELETE
) return DELETE
;
535 if
(!strcmp
(symName
, "define")) {
539 if
((s
=LookupSymbol
(symName
)) == NULL
) {
540 s
= InstallSymbol
(symName
, symName
[0]=='$' ?
541 (isdigit
((unsigned char)symName
[1]) ?
542 ARG_SYM
: GLOBAL_SYM
) : LOCAL_SYM
, value
);
543 s
->value.tag
= NO_TAG
;
550 /* process quoted strings w/ embedded escape sequences */
551 if
(*InPtr
== '\"') {
552 char string[MAX_STRING_CONST_LEN
], *p
= string;
554 while
(*InPtr
!= '\0' && *InPtr
!= '\"' && *InPtr
!= '\n') {
555 if
(p
>= string + MAX_STRING_CONST_LEN
) {
559 if
(*InPtr
== '\\') {
561 if
(*InPtr
== '\n') {
565 for
(i
=0; escape
[i
]!='\0'; i
++) {
566 if
(escape
[i
] == '\0') {
569 } else if
(escape
[i
] == *InPtr
) {
580 yylval.sym
= InstallStringConstSymbol
(string);
584 /* process remaining two character tokens or return single char as token */
586 case
'>': return follow
('=', GE
, GT
);
587 case
'<': return follow
('=', LE
, LT
);
588 case
'=': return follow
('=', EQ
, '=');
589 case
'!': return follow
('=', NE
, NOT
);
590 case
'+': return follow2
('+', INCR
, '=', ADDEQ
, '+');
591 case
'-': return follow2
('-', DECR
, '=', SUBEQ
, '-');
592 case
'|': return follow2
('|', OR
, '=', OREQ
, '|');
593 case
'&': return follow2
('&', AND
, '=', ANDEQ
, '&');
594 case
'*': return follow2
('*', POW
, '=', MULEQ
, '*');
595 case
'/': return follow
('=', DIVEQ
, '/');
596 case
'%': return follow
('=', MODEQ
, '%');
597 case
'^': return POW
;
598 default
: return
*(InPtr
-1);
603 ** look ahead for >=, etc.
605 static int follow
(char expect
, int yes
, int no
)
607 if
(*InPtr
++ == expect
)
612 static int follow2
(char expect1
, int yes1
, char expect2
, int yes2
, int no
)
614 char next
= *InPtr
++;
623 static int follow_non_whitespace
(char expect
, int yes
, int no
)
625 char *localInPtr
= InPtr
;
628 if
(*localInPtr
== ' ' ||
*localInPtr
== '\t') {
631 else if
(*localInPtr
== '\\' && *(localInPtr
+ 1) == '\n') {
634 else if
(*localInPtr
== expect
) {
644 ** Look (way) ahead for hyphenated routine names which begin at inPtr. A
645 ** hyphenated name is allowed if it is pre-defined in the global symbol
646 ** table. If a matching name exists, returns the symbol, and update "inPtr".
648 ** I know this is horrible language design, but existing nedit action routine
649 ** names contain hyphens. Handling them here in the lexical analysis process
650 ** is much easier than trying to deal with it in the parser itself. (sorry)
652 static Symbol
*matchesActionRoutine
(char **inPtr
)
656 char symbolName
[MAX_SYM_LEN
+1];
660 for
(c
= *inPtr
; isalnum
((unsigned char)*c
) ||
*c
=='_' ||
661 ( *c
=='-' && isalnum
((unsigned char)(*(c
+1)))); c
++) {
669 s
= LookupSymbol
(symbolName
);
676 ** Called by yacc to report errors (just stores for returning when
677 ** parsing is aborted. The error token action is to immediate abort
678 ** parsing, so this message is immediately reported to the caller
681 static int yyerror(char *s
)