1 /* $Id: parse.y,v 1.16 2001/08/08 08:34:06 amai Exp $ */
6 #include <X11/Intrinsic.h>
9 #include "../util/VMSparam.h"
12 #include <sys/param.h>
19 #include "interpret.h"
22 /* Macros to add error processing to AddOp and AddSym calls */
23 #define ADD_OP(op) if (!AddOp(op, &ErrMsg)) return 1
24 #define ADD_SYM(sym) if (!AddSym(sym, &ErrMsg)) return 1
25 #define ADD_IMMED(val) if (!AddImmediate(val, &ErrMsg)) return 1
26 #define ADD_BR_OFF(to) if (!AddBranchOffset(to, &ErrMsg)) return 1
27 #define SET_BR_OFF(from, to) *((int *)(from)) = ((Inst *)(to)) - ((Inst *)(from))
29 /* Max. length for a string constant (... there shouldn't be a maximum) */
30 #define MAX_STRING_CONST_LEN 5000
32 static const char CVSID
[] = "$Id: parse.y,v 1.16 2001/08/08 08:34:06 amai Exp $";
33 static int yyerror(char *s
);
34 static int yylex(void);
36 static int follow
(char expect
, int yes
, int no
);
37 static int follow2
(char expect1
, int yes1
, char expect2
, int yes2
, int no
);
38 static int follow_non_whitespace
(char expect
, int yes
, int no
);
39 static Symbol
*matchesActionRoutine
(char **inPtr
);
51 %token
<sym
> NUMBER STRING SYMBOL
52 %token IF WHILE ELSE FOR BREAK CONTINUE RETURN
54 %type
<inst
> cond comastmts for while else and or
61 %right
'=' ADDEQ SUBEQ MULEQ DIVEQ MODEQ ANDEQ OREQ
67 %left GT GE LT LE EQ NE IN
70 %nonassoc UNARY_MINUS NOT
79 program: blank stmts
{ ADD_OP
(OP_RETURN_NO_VAL
); return
0; }
80 | blank
'{' blank stmts
'}' { ADD_OP
(OP_RETURN_NO_VAL
); return
0; }
81 | blank
'{' blank
'}' { ADD_OP
(OP_RETURN_NO_VAL
); return
0; }
84 block: '{' blank stmts
'}' blank
91 stmt: simpstmt
'\n' blank
92 | IF
'(' cond
')' blank block %prec IF_NO_ELSE
93 { SET_BR_OFF
($3, GetPC
()); }
94 | IF
'(' cond
')' blank block else blank block %prec ELSE
95 { SET_BR_OFF
($3, ($7+1)); SET_BR_OFF
($7, GetPC
()); }
96 | while
'(' cond
')' blank block
{ ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($1);
97 SET_BR_OFF
($3, GetPC
()); FillLoopAddrs
(GetPC
(), $1); }
98 | for
'(' comastmts
';' cond
';' comastmts
')' blank block
99 { FillLoopAddrs
(GetPC
()+2+($7-($5+1)), GetPC
());
100 SwapCode
($5+1, $7, GetPC
());
101 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($3); SET_BR_OFF
($5, GetPC
()); }
102 | for
'(' SYMBOL IN SYMBOL
')'
103 { Symbol
*iterSym
= InstallIteratorSymbol
();
104 ADD_OP
(OP_BEGIN_ARRAY_ITER
); ADD_SYM
($5); ADD_SYM
(iterSym
);
105 ADD_OP
(OP_ARRAY_ITER
); ADD_SYM
($3); ADD_SYM
(iterSym
);
108 { FillLoopAddrs
(GetPC
()+2, GetPC
());
109 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($1+3);
110 SET_BR_OFF
($1+6, GetPC
());}
112 { ADD_OP
(OP_BRANCH
); ADD_BR_OFF
(0); AddBreakAddr
(GetPC
()-1); }
113 | CONTINUE
'\n' blank
114 { ADD_OP
(OP_BRANCH
); ADD_BR_OFF
(0); AddContinueAddr
(GetPC
()-1); }
115 | RETURN expr
'\n' blank
{ ADD_OP
(OP_RETURN
); }
116 | RETURN
'\n' blank
{ ADD_OP
(OP_RETURN_NO_VAL
); }
118 simpstmt: SYMBOL
'=' expr
{ ADD_OP
(OP_ASSIGN
); ADD_SYM
($1); }
119 | evalsym ADDEQ expr
{ ADD_OP
(OP_ADD
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1); }
120 | evalsym SUBEQ expr
{ ADD_OP
(OP_SUB
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1); }
121 | evalsym MULEQ expr
{ ADD_OP
(OP_MUL
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1); }
122 | evalsym DIVEQ expr
{ ADD_OP
(OP_DIV
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1); }
123 | evalsym MODEQ expr
{ ADD_OP
(OP_MOD
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1); }
124 | evalsym ANDEQ expr
{ ADD_OP
(OP_BIT_AND
); ADD_OP
(OP_ASSIGN
);
126 | evalsym OREQ expr
{ ADD_OP
(OP_BIT_OR
); ADD_OP
(OP_ASSIGN
);
128 | DELETE SYMBOL
'[' arglist
']'
129 { ADD_OP
(OP_ARRAY_DELETE
); ADD_SYM
($2); ADD_IMMED
((void *)$4); }
130 | SYMBOL
'[' arglist
']' '=' expr
131 { ADD_OP
(OP_ARRAY_ASSIGN
); ADD_SYM
($1); ADD_IMMED
((void *)$3); }
132 | SYMBOL
'[' arglist
']' { ADD_OP
(OP_PUSH_SYM
);
133 ADD_SYM
($1); ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
((void *)$3); }
135 { ADD_OP
(OP_ADD
); ADD_OP
(OP_ARRAY_ASSIGN
); ADD_SYM
($1);
136 ADD_IMMED
((void *)$3);}
137 | SYMBOL
'[' arglist
']' { ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
138 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
((void *)$3); }
140 { ADD_OP
(OP_SUB
); ADD_OP
(OP_ARRAY_ASSIGN
); ADD_SYM
($1);
141 ADD_IMMED
((void *)$3); }
142 | SYMBOL
'[' arglist
']'
143 { ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_ARRAY_REF
);
144 ADD_IMMED
((void *)$3); }
146 { ADD_OP
(OP_MUL
); ADD_OP
(OP_ARRAY_ASSIGN
); ADD_SYM
($1);
147 ADD_IMMED
((void *)$3); }
148 | SYMBOL
'[' arglist
']'
149 { ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_ARRAY_REF
);
150 ADD_IMMED
((void *)$3); }
152 { ADD_OP
(OP_DIV
); ADD_OP
(OP_ARRAY_ASSIGN
); ADD_SYM
($1);
153 ADD_IMMED
((void *)$3); }
154 | SYMBOL
'[' arglist
']'
155 { ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_ARRAY_REF
);
156 ADD_IMMED
((void *)$3); }
158 { ADD_OP
(OP_MOD
); ADD_OP
(OP_ARRAY_ASSIGN
); ADD_SYM
($1);
159 ADD_IMMED
((void *)$3); }
160 | SYMBOL
'[' arglist
']'
161 { ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_ARRAY_REF
);
162 ADD_IMMED
((void *)$3); }
164 { ADD_OP
(OP_BIT_AND
); ADD_OP
(OP_ARRAY_ASSIGN
);
165 ADD_SYM
($1); ADD_IMMED
((void *)$3); }
166 | SYMBOL
'[' arglist
']'
167 { ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_ARRAY_REF
);
168 ADD_IMMED
((void *)$3); }
170 { ADD_OP
(OP_BIT_OR
); ADD_OP
(OP_ARRAY_ASSIGN
);
171 ADD_SYM
($1); ADD_IMMED
((void *)$3); }
172 | SYMBOL
'(' arglist
')' { ADD_OP
(OP_SUBR_CALL
);
173 ADD_SYM
(PromoteToGlobal
($1)); ADD_IMMED
((void *)$3); }
174 | INCR SYMBOL
{ ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_INCR
);
175 ADD_OP
(OP_ASSIGN
); ADD_SYM
($2); }
176 | SYMBOL INCR
{ ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_INCR
);
177 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1); }
178 | DECR SYMBOL
{ ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_DECR
);
179 ADD_OP
(OP_ASSIGN
); ADD_SYM
($2); }
180 | SYMBOL DECR
{ ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DECR
);
181 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1); }
183 evalsym: SYMBOL
{ $$
= $1; ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); }
185 comastmts: /* nothing */ { $$
= GetPC
(); }
186 | simpstmt
{ $$
= GetPC
(); }
187 | comastmts
',' simpstmt
{ $$
= GetPC
(); }
189 arglist: /* nothing */ { $$
= 0;}
191 | arglist
',' expr
{ $$
= $1 + 1; }
193 expr: numexpr %prec CONCAT
194 | expr numexpr %prec CONCAT
{ ADD_OP
(OP_CONCAT
); }
196 numexpr: NUMBER
{ ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); }
197 | STRING
{ ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); }
198 | SYMBOL
{ ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); }
199 | SYMBOL
'(' arglist
')' { ADD_OP
(OP_SUBR_CALL
);
200 ADD_SYM
(PromoteToGlobal
($1)); ADD_IMMED
((void *)$3);
201 ADD_OP
(OP_FETCH_RET_VAL
);}
203 | numexpr
'[' arglist
']'
204 { ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
((void *)$3); }
205 | numexpr
'+' numexpr
{ ADD_OP
(OP_ADD
); }
206 | numexpr
'-' numexpr
{ ADD_OP
(OP_SUB
); }
207 | numexpr
'*' numexpr
{ ADD_OP
(OP_MUL
); }
208 | numexpr
'/' numexpr
{ ADD_OP
(OP_DIV
); }
209 | numexpr
'%' numexpr
{ ADD_OP
(OP_MOD
); }
210 | numexpr POW numexpr
{ ADD_OP
(OP_POWER
); }
211 |
'-' numexpr %prec UNARY_MINUS
{ ADD_OP
(OP_NEGATE
); }
212 | numexpr GT numexpr
{ ADD_OP
(OP_GT
); }
213 | numexpr GE numexpr
{ ADD_OP
(OP_GE
); }
214 | numexpr LT numexpr
{ ADD_OP
(OP_LT
); }
215 | numexpr LE numexpr
{ ADD_OP
(OP_LE
); }
216 | numexpr EQ numexpr
{ ADD_OP
(OP_EQ
); }
217 | numexpr NE numexpr
{ ADD_OP
(OP_NE
); }
218 | numexpr
'&' numexpr
{ ADD_OP
(OP_BIT_AND
); }
219 | numexpr
'|' numexpr
{ ADD_OP
(OP_BIT_OR
); }
220 | numexpr and numexpr %prec AND
221 { ADD_OP
(OP_AND
); SET_BR_OFF
($2, GetPC
()); }
222 | numexpr or numexpr %prec OR
223 { ADD_OP
(OP_OR
); SET_BR_OFF
($2, GetPC
()); }
224 | NOT numexpr
{ ADD_OP
(OP_NOT
); }
225 | INCR SYMBOL
{ ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_INCR
);
226 ADD_OP
(OP_DUP
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($2); }
227 | SYMBOL INCR
{ ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DUP
);
228 ADD_OP
(OP_INCR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1); }
229 | DECR SYMBOL
{ ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_DECR
);
230 ADD_OP
(OP_DUP
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($2); }
231 | SYMBOL DECR
{ ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DUP
);
232 ADD_OP
(OP_DECR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1); }
233 | numexpr IN numexpr
{ ADD_OP
(OP_IN_ARRAY
); }
235 while: WHILE
{ $$
= GetPC
(); StartLoopAddrList
(); }
237 for: FOR
{ StartLoopAddrList
(); $$
= GetPC
(); }
239 else: ELSE
{ ADD_OP
(OP_BRANCH
); $$
= GetPC
(); ADD_BR_OFF
(0); }
241 cond: /* nothing */ { ADD_OP
(OP_BRANCH_NEVER
); $$
= GetPC
(); ADD_BR_OFF
(0); }
242 | numexpr
{ ADD_OP
(OP_BRANCH_FALSE
); $$
= GetPC
(); ADD_BR_OFF
(0); }
244 and: AND
{ ADD_OP
(OP_DUP
); ADD_OP
(OP_BRANCH_FALSE
); $$
= GetPC
();
247 or: OR
{ ADD_OP
(OP_DUP
); ADD_OP
(OP_BRANCH_TRUE
); $$
= GetPC
();
254 %%
/* User Subroutines Section */
258 ** Parse a null terminated string and create a program from it (this is the
259 ** parser entry point). The program created by this routine can be
260 ** executed using ExecuteProgram. Returns program on success, or NULL
261 ** on failure. If the command failed, the error message is returned
262 ** as a pointer to a static string in msg, and the length of the string up
263 ** to where parsing failed in stoppedAt.
265 Program
*ParseMacro
(char *expr
, char **msg
, char **stoppedAt
)
269 BeginCreatingProgram
();
271 /* call yyparse to parse the string and check for success. If the parse
272 failed, return the error message and string index (the grammar aborts
273 parsing at the first error) */
278 FreeProgram
(FinishCreatingProgram
());
282 /* get the newly created program */
283 prog
= FinishCreatingProgram
();
285 /* parse succeeded */
292 static int yylex(void)
296 static int stringConstIndex
= 0;
297 static DataValue value
= {0, {0}};
298 static char escape
[] = "\\\"ntbrfav";
299 static char replace
[] = "\\\"\n\t\b\r\f\a\v";
301 /* skip whitespace and backslash-newline combinations which are
302 also considered whitespace */
304 if
(*InPtr
== '\\' && *(InPtr
+ 1) == '\n')
306 else if
(*InPtr
== ' ' ||
*InPtr
== '\t')
314 while
(*InPtr
!= '\n' && *InPtr
!= '\0') InPtr
++;
316 /* return end of input at the end of the string */
317 if
(*InPtr
== '\0') {
321 /* process number tokens */
322 if
(isdigit
(*InPtr
)) { /* number */
324 sscanf
(InPtr
, "%d%n", &value.val.n
, &len
);
325 sprintf
(name
, "const %d", value.val.n
);
328 if
((yylval.sym
=LookupSymbol
(name
)) == NULL
)
329 yylval.sym
= InstallSymbol
(name
, CONST_SYM
, value
);
333 /* process symbol tokens. "define" is a special case not handled
334 by this parser, considered end of input. Another special case
335 is action routine names which are allowed to contain '-' despite
336 the ambiguity, handled in matchesActionRoutine. */
337 if
(isalpha
(*InPtr
) ||
*InPtr
== '$') {
338 if
((s
=matchesActionRoutine
(&InPtr
)) == NULL
) {
339 char symName
[MAX_SYM_LEN
+1], *p
= symName
;
341 while
(isalnum
(*InPtr
) ||
*InPtr
=='_') {
342 if
(p
>= symName
+ MAX_SYM_LEN
)
348 if
(!strcmp
(symName
, "while")) return WHILE
;
349 if
(!strcmp
(symName
, "if")) return IF
;
350 if
(!strcmp
(symName
, "else")) return ELSE
;
351 if
(!strcmp
(symName
, "for")) return FOR
;
352 if
(!strcmp
(symName
, "break")) return BREAK
;
353 if
(!strcmp
(symName
, "continue")) return CONTINUE
;
354 if
(!strcmp
(symName
, "return")) return RETURN
;
355 if
(!strcmp
(symName
, "in")) return IN
;
356 if
(!strcmp
(symName
, "delete") && follow_non_whitespace
('(', SYMBOL
, DELETE
) == DELETE
) return DELETE
;
357 if
(!strcmp
(symName
, "define")) {
361 if
((s
=LookupSymbol
(symName
)) == NULL
) {
362 s
= InstallSymbol
(symName
, symName
[0]=='$' ?
(isdigit
(symName
[1]) ?
363 ARG_SYM
: GLOBAL_SYM
) : LOCAL_SYM
, value
);
364 s
->value.tag
= NO_TAG
;
371 /* process quoted strings w/ embedded escape sequences */
372 if
(*InPtr
== '\"') {
373 char string[MAX_STRING_CONST_LEN
], *p
= string;
376 while
(*InPtr
!= '\0' && *InPtr
!= '\"' && *InPtr
!= '\n') {
377 if
(p
>= string + MAX_STRING_CONST_LEN
) {
381 if
(*InPtr
== '\\') {
383 if
(*InPtr
== '\n') {
387 for
(i
=0; escape
[i
]!='\0'; i
++) {
388 if
(escape
[i
] == '\0') {
391 } else if
(escape
[i
] == *InPtr
) {
402 if
((yylval.sym
= LookupStringConstSymbol
(string)) == NULL
) {
403 value.val.str
= AllocString
(p
-string+1);
404 strcpy
(value.val.str
, string);
405 value.tag
= STRING_TAG
;
406 sprintf
(stringName
, "string #%d", stringConstIndex
++);
407 yylval.sym
= InstallSymbol
(stringName
, CONST_SYM
, value
);
412 /* process remaining two character tokens or return single char as token */
414 case
'>': return follow
('=', GE
, GT
);
415 case
'<': return follow
('=', LE
, LT
);
416 case
'=': return follow
('=', EQ
, '=');
417 case
'!': return follow
('=', NE
, NOT
);
418 case
'+': return follow2
('+', INCR
, '=', ADDEQ
, '+');
419 case
'-': return follow2
('-', DECR
, '=', SUBEQ
, '-');
420 case
'|': return follow2
('|', OR
, '=', OREQ
, '|');
421 case
'&': return follow2
('&', AND
, '=', ANDEQ
, '&');
422 case
'*': return follow2
('*', POW
, '=', MULEQ
, '*');
423 case
'/': return follow
('=', DIVEQ
, '/');
424 case
'%': return follow
('=', MODEQ
, '%');
425 case
'^': return POW
;
426 default
: return
*(InPtr
-1);
431 ** look ahead for >=, etc.
433 static int follow
(char expect
, int yes
, int no
)
435 if
(*InPtr
++ == expect
)
440 static int follow2
(char expect1
, int yes1
, char expect2
, int yes2
, int no
)
442 char next
= *InPtr
++;
451 static int follow_non_whitespace
(char expect
, int yes
, int no
)
453 char *localInPtr
= InPtr
;
456 if
(*localInPtr
== ' ' ||
*localInPtr
== '\t') {
459 else if
(*localInPtr
== '\\' && *(localInPtr
+ 1) == '\n') {
462 else if
(*localInPtr
== expect
) {
472 ** Look (way) ahead for hyphenated routine names which begin at inPtr. A
473 ** hyphenated name is allowed if it is pre-defined in the global symbol
474 ** table. If a matching name exists, returns the symbol, and update "inPtr".
476 ** I know this is horrible language design, but existing nedit action routine
477 ** names contain hyphens. Handling them here in the lexical analysis process
478 ** is much easier than trying to deal with it in the parser itself. (sorry)
480 static Symbol
*matchesActionRoutine
(char **inPtr
)
484 char symbolName
[MAX_SYM_LEN
+1];
488 for
(c
= *inPtr
; isalnum
(*c
) ||
*c
=='_' ||
(*c
=='-'&&isalnum
(*(c
+1))); c
++){
496 s
= LookupSymbol
(symbolName
);
503 ** Called by yacc to report errors (just stores for returning when
504 ** parsing is aborted. The error token action is to immediate abort
505 ** parsing, so this message is immediately reported to the caller
508 static int yyerror(char *s
)