482504: Bad CapsLock grab on certain keyboard configurations
[nedit.git] / source / parse.y
blob05c35ed88ff26abf60ecc94608139f6dc46c794e
1 /* $Id: parse.y,v 1.16 2001/08/08 08:34:06 amai Exp $ */
2 %{
3 #include <string.h>
4 #include <stdio.h>
5 #include <ctype.h>
6 #include <X11/Intrinsic.h>
7 #include <Xm/Xm.h>
8 #ifdef VMS
9 #include "../util/VMSparam.h"
10 #else
11 #ifndef __MVS__
12 #include <sys/param.h>
13 #endif
14 #endif /*VMS*/
16 #include "textBuf.h"
17 #include "nedit.h"
18 #include "rbTree.h"
19 #include "interpret.h"
20 #include "parse.h"
22 /* Macros to add error processing to AddOp and AddSym calls */
23 #define ADD_OP(op) if (!AddOp(op, &ErrMsg)) return 1
24 #define ADD_SYM(sym) if (!AddSym(sym, &ErrMsg)) return 1
25 #define ADD_IMMED(val) if (!AddImmediate(val, &ErrMsg)) return 1
26 #define ADD_BR_OFF(to) if (!AddBranchOffset(to, &ErrMsg)) return 1
27 #define SET_BR_OFF(from, to) *((int *)(from)) = ((Inst *)(to)) - ((Inst *)(from))
29 /* Max. length for a string constant (... there shouldn't be a maximum) */
30 #define MAX_STRING_CONST_LEN 5000
32 static const char CVSID[] = "$Id: parse.y,v 1.16 2001/08/08 08:34:06 amai Exp $";
33 static int yyerror(char *s);
34 static int yylex(void);
35 int yyparse(void);
36 static int follow(char expect, int yes, int no);
37 static int follow2(char expect1, int yes1, char expect2, int yes2, int no);
38 static int follow_non_whitespace(char expect, int yes, int no);
39 static Symbol *matchesActionRoutine(char **inPtr);
41 static char *ErrMsg;
42 static char *InPtr;
46 %union {
47 Symbol *sym;
48 Inst *inst;
49 int nArgs;
51 %token <sym> NUMBER STRING SYMBOL
52 %token IF WHILE ELSE FOR BREAK CONTINUE RETURN
53 %type <nArgs> arglist
54 %type <inst> cond comastmts for while else and or
55 %type <sym> evalsym
57 %nonassoc IF_NO_ELSE
58 %nonassoc ELSE
60 %nonassoc SYMBOL
61 %right '=' ADDEQ SUBEQ MULEQ DIVEQ MODEQ ANDEQ OREQ
62 %left CONCAT
63 %left OR
64 %left AND
65 %left '|'
66 %left '&'
67 %left GT GE LT LE EQ NE IN
68 %left '+' '-'
69 %left '*' '/' '%'
70 %nonassoc UNARY_MINUS NOT
71 %nonassoc DELETE
72 %nonassoc INCR DECR
73 %right POW
74 %nonassoc '['
75 %nonassoc '('
77 %% /* Rules */
79 program: blank stmts { ADD_OP(OP_RETURN_NO_VAL); return 0; }
80 | blank '{' blank stmts '}' { ADD_OP(OP_RETURN_NO_VAL); return 0; }
81 | blank '{' blank '}' { ADD_OP(OP_RETURN_NO_VAL); return 0; }
82 | error { return 1; }
84 block: '{' blank stmts '}' blank
85 | '{' blank '}' blank
86 | stmt
88 stmts: stmt
89 | stmts stmt
91 stmt: simpstmt '\n' blank
92 | IF '(' cond ')' blank block %prec IF_NO_ELSE
93 { SET_BR_OFF($3, GetPC()); }
94 | IF '(' cond ')' blank block else blank block %prec ELSE
95 { SET_BR_OFF($3, ($7+1)); SET_BR_OFF($7, GetPC()); }
96 | while '(' cond ')' blank block { ADD_OP(OP_BRANCH); ADD_BR_OFF($1);
97 SET_BR_OFF($3, GetPC()); FillLoopAddrs(GetPC(), $1); }
98 | for '(' comastmts ';' cond ';' comastmts ')' blank block
99 { FillLoopAddrs(GetPC()+2+($7-($5+1)), GetPC());
100 SwapCode($5+1, $7, GetPC());
101 ADD_OP(OP_BRANCH); ADD_BR_OFF($3); SET_BR_OFF($5, GetPC()); }
102 | for '(' SYMBOL IN SYMBOL ')'
103 { Symbol *iterSym = InstallIteratorSymbol();
104 ADD_OP(OP_BEGIN_ARRAY_ITER); ADD_SYM($5); ADD_SYM(iterSym);
105 ADD_OP(OP_ARRAY_ITER); ADD_SYM($3); ADD_SYM(iterSym);
106 ADD_BR_OFF(0); }
107 blank block
108 { FillLoopAddrs(GetPC()+2, GetPC());
109 ADD_OP(OP_BRANCH); ADD_BR_OFF($1+3);
110 SET_BR_OFF($1+6, GetPC());}
111 | BREAK '\n' blank
112 { ADD_OP(OP_BRANCH); ADD_BR_OFF(0); AddBreakAddr(GetPC()-1); }
113 | CONTINUE '\n' blank
114 { ADD_OP(OP_BRANCH); ADD_BR_OFF(0); AddContinueAddr(GetPC()-1); }
115 | RETURN expr '\n' blank { ADD_OP(OP_RETURN); }
116 | RETURN '\n' blank { ADD_OP(OP_RETURN_NO_VAL); }
118 simpstmt: SYMBOL '=' expr { ADD_OP(OP_ASSIGN); ADD_SYM($1); }
119 | evalsym ADDEQ expr { ADD_OP(OP_ADD); ADD_OP(OP_ASSIGN); ADD_SYM($1); }
120 | evalsym SUBEQ expr { ADD_OP(OP_SUB); ADD_OP(OP_ASSIGN); ADD_SYM($1); }
121 | evalsym MULEQ expr { ADD_OP(OP_MUL); ADD_OP(OP_ASSIGN); ADD_SYM($1); }
122 | evalsym DIVEQ expr { ADD_OP(OP_DIV); ADD_OP(OP_ASSIGN); ADD_SYM($1); }
123 | evalsym MODEQ expr { ADD_OP(OP_MOD); ADD_OP(OP_ASSIGN); ADD_SYM($1); }
124 | evalsym ANDEQ expr { ADD_OP(OP_BIT_AND); ADD_OP(OP_ASSIGN);
125 ADD_SYM($1); }
126 | evalsym OREQ expr { ADD_OP(OP_BIT_OR); ADD_OP(OP_ASSIGN);
127 ADD_SYM($1); }
128 | DELETE SYMBOL '[' arglist ']'
129 { ADD_OP(OP_ARRAY_DELETE); ADD_SYM($2); ADD_IMMED((void *)$4); }
130 | SYMBOL '[' arglist ']' '=' expr
131 { ADD_OP(OP_ARRAY_ASSIGN); ADD_SYM($1); ADD_IMMED((void *)$3); }
132 | SYMBOL '[' arglist ']' { ADD_OP(OP_PUSH_SYM);
133 ADD_SYM($1); ADD_OP(OP_ARRAY_REF); ADD_IMMED((void *)$3); }
134 ADDEQ expr
135 { ADD_OP(OP_ADD); ADD_OP(OP_ARRAY_ASSIGN); ADD_SYM($1);
136 ADD_IMMED((void *)$3);}
137 | SYMBOL '[' arglist ']' { ADD_OP(OP_PUSH_SYM); ADD_SYM($1);
138 ADD_OP(OP_ARRAY_REF); ADD_IMMED((void *)$3); }
139 SUBEQ expr
140 { ADD_OP(OP_SUB); ADD_OP(OP_ARRAY_ASSIGN); ADD_SYM($1);
141 ADD_IMMED((void *)$3); }
142 | SYMBOL '[' arglist ']'
143 { ADD_OP(OP_PUSH_SYM); ADD_SYM($1); ADD_OP(OP_ARRAY_REF);
144 ADD_IMMED((void *)$3); }
145 MULEQ expr
146 { ADD_OP(OP_MUL); ADD_OP(OP_ARRAY_ASSIGN); ADD_SYM($1);
147 ADD_IMMED((void *)$3); }
148 | SYMBOL '[' arglist ']'
149 { ADD_OP(OP_PUSH_SYM); ADD_SYM($1); ADD_OP(OP_ARRAY_REF);
150 ADD_IMMED((void *)$3); }
151 DIVEQ expr
152 { ADD_OP(OP_DIV); ADD_OP(OP_ARRAY_ASSIGN); ADD_SYM($1);
153 ADD_IMMED((void *)$3); }
154 | SYMBOL '[' arglist ']'
155 { ADD_OP(OP_PUSH_SYM); ADD_SYM($1); ADD_OP(OP_ARRAY_REF);
156 ADD_IMMED((void *)$3); }
157 MODEQ expr
158 { ADD_OP(OP_MOD); ADD_OP(OP_ARRAY_ASSIGN); ADD_SYM($1);
159 ADD_IMMED((void *)$3); }
160 | SYMBOL '[' arglist ']'
161 { ADD_OP(OP_PUSH_SYM); ADD_SYM($1); ADD_OP(OP_ARRAY_REF);
162 ADD_IMMED((void *)$3); }
163 ANDEQ expr
164 { ADD_OP(OP_BIT_AND); ADD_OP(OP_ARRAY_ASSIGN);
165 ADD_SYM($1); ADD_IMMED((void *)$3); }
166 | SYMBOL '[' arglist ']'
167 { ADD_OP(OP_PUSH_SYM); ADD_SYM($1); ADD_OP(OP_ARRAY_REF);
168 ADD_IMMED((void *)$3); }
169 OREQ expr
170 { ADD_OP(OP_BIT_OR); ADD_OP(OP_ARRAY_ASSIGN);
171 ADD_SYM($1); ADD_IMMED((void *)$3); }
172 | SYMBOL '(' arglist ')' { ADD_OP(OP_SUBR_CALL);
173 ADD_SYM(PromoteToGlobal($1)); ADD_IMMED((void *)$3); }
174 | INCR SYMBOL { ADD_OP(OP_PUSH_SYM); ADD_SYM($2); ADD_OP(OP_INCR);
175 ADD_OP(OP_ASSIGN); ADD_SYM($2); }
176 | SYMBOL INCR { ADD_OP(OP_PUSH_SYM); ADD_SYM($1); ADD_OP(OP_INCR);
177 ADD_OP(OP_ASSIGN); ADD_SYM($1); }
178 | DECR SYMBOL { ADD_OP(OP_PUSH_SYM); ADD_SYM($2); ADD_OP(OP_DECR);
179 ADD_OP(OP_ASSIGN); ADD_SYM($2); }
180 | SYMBOL DECR { ADD_OP(OP_PUSH_SYM); ADD_SYM($1); ADD_OP(OP_DECR);
181 ADD_OP(OP_ASSIGN); ADD_SYM($1); }
183 evalsym: SYMBOL { $$ = $1; ADD_OP(OP_PUSH_SYM); ADD_SYM($1); }
185 comastmts: /* nothing */ { $$ = GetPC(); }
186 | simpstmt { $$ = GetPC(); }
187 | comastmts ',' simpstmt { $$ = GetPC(); }
189 arglist: /* nothing */ { $$ = 0;}
190 | expr { $$ = 1; }
191 | arglist ',' expr { $$ = $1 + 1; }
193 expr: numexpr %prec CONCAT
194 | expr numexpr %prec CONCAT { ADD_OP(OP_CONCAT); }
196 numexpr: NUMBER { ADD_OP(OP_PUSH_SYM); ADD_SYM($1); }
197 | STRING { ADD_OP(OP_PUSH_SYM); ADD_SYM($1); }
198 | SYMBOL { ADD_OP(OP_PUSH_SYM); ADD_SYM($1); }
199 | SYMBOL '(' arglist ')' { ADD_OP(OP_SUBR_CALL);
200 ADD_SYM(PromoteToGlobal($1)); ADD_IMMED((void *)$3);
201 ADD_OP(OP_FETCH_RET_VAL);}
202 | '(' expr ')'
203 | numexpr '[' arglist ']'
204 { ADD_OP(OP_ARRAY_REF); ADD_IMMED((void *)$3); }
205 | numexpr '+' numexpr { ADD_OP(OP_ADD); }
206 | numexpr '-' numexpr { ADD_OP(OP_SUB); }
207 | numexpr '*' numexpr { ADD_OP(OP_MUL); }
208 | numexpr '/' numexpr { ADD_OP(OP_DIV); }
209 | numexpr '%' numexpr { ADD_OP(OP_MOD); }
210 | numexpr POW numexpr { ADD_OP(OP_POWER); }
211 | '-' numexpr %prec UNARY_MINUS { ADD_OP(OP_NEGATE); }
212 | numexpr GT numexpr { ADD_OP(OP_GT); }
213 | numexpr GE numexpr { ADD_OP(OP_GE); }
214 | numexpr LT numexpr { ADD_OP(OP_LT); }
215 | numexpr LE numexpr { ADD_OP(OP_LE); }
216 | numexpr EQ numexpr { ADD_OP(OP_EQ); }
217 | numexpr NE numexpr { ADD_OP(OP_NE); }
218 | numexpr '&' numexpr { ADD_OP(OP_BIT_AND); }
219 | numexpr '|' numexpr { ADD_OP(OP_BIT_OR); }
220 | numexpr and numexpr %prec AND
221 { ADD_OP(OP_AND); SET_BR_OFF($2, GetPC()); }
222 | numexpr or numexpr %prec OR
223 { ADD_OP(OP_OR); SET_BR_OFF($2, GetPC()); }
224 | NOT numexpr { ADD_OP(OP_NOT); }
225 | INCR SYMBOL { ADD_OP(OP_PUSH_SYM); ADD_SYM($2); ADD_OP(OP_INCR);
226 ADD_OP(OP_DUP); ADD_OP(OP_ASSIGN); ADD_SYM($2); }
227 | SYMBOL INCR { ADD_OP(OP_PUSH_SYM); ADD_SYM($1); ADD_OP(OP_DUP);
228 ADD_OP(OP_INCR); ADD_OP(OP_ASSIGN); ADD_SYM($1); }
229 | DECR SYMBOL { ADD_OP(OP_PUSH_SYM); ADD_SYM($2); ADD_OP(OP_DECR);
230 ADD_OP(OP_DUP); ADD_OP(OP_ASSIGN); ADD_SYM($2); }
231 | SYMBOL DECR { ADD_OP(OP_PUSH_SYM); ADD_SYM($1); ADD_OP(OP_DUP);
232 ADD_OP(OP_DECR); ADD_OP(OP_ASSIGN); ADD_SYM($1); }
233 | numexpr IN numexpr { ADD_OP(OP_IN_ARRAY); }
235 while: WHILE { $$ = GetPC(); StartLoopAddrList(); }
237 for: FOR { StartLoopAddrList(); $$ = GetPC(); }
239 else: ELSE { ADD_OP(OP_BRANCH); $$ = GetPC(); ADD_BR_OFF(0); }
241 cond: /* nothing */ { ADD_OP(OP_BRANCH_NEVER); $$ = GetPC(); ADD_BR_OFF(0); }
242 | numexpr { ADD_OP(OP_BRANCH_FALSE); $$ = GetPC(); ADD_BR_OFF(0); }
244 and: AND { ADD_OP(OP_DUP); ADD_OP(OP_BRANCH_FALSE); $$ = GetPC();
245 ADD_BR_OFF(0); }
247 or: OR { ADD_OP(OP_DUP); ADD_OP(OP_BRANCH_TRUE); $$ = GetPC();
248 ADD_BR_OFF(0); }
250 blank: /* nothing */
251 | blank '\n'
254 %% /* User Subroutines Section */
258 ** Parse a null terminated string and create a program from it (this is the
259 ** parser entry point). The program created by this routine can be
260 ** executed using ExecuteProgram. Returns program on success, or NULL
261 ** on failure. If the command failed, the error message is returned
262 ** as a pointer to a static string in msg, and the length of the string up
263 ** to where parsing failed in stoppedAt.
265 Program *ParseMacro(char *expr, char **msg, char **stoppedAt)
267 Program *prog;
269 BeginCreatingProgram();
271 /* call yyparse to parse the string and check for success. If the parse
272 failed, return the error message and string index (the grammar aborts
273 parsing at the first error) */
274 InPtr = expr;
275 if (yyparse()) {
276 *msg = ErrMsg;
277 *stoppedAt = InPtr;
278 FreeProgram(FinishCreatingProgram());
279 return NULL;
282 /* get the newly created program */
283 prog = FinishCreatingProgram();
285 /* parse succeeded */
286 *msg = "";
287 *stoppedAt = InPtr;
288 return prog;
292 static int yylex(void)
294 int i, len;
295 Symbol *s;
296 static int stringConstIndex = 0;
297 static DataValue value = {0, {0}};
298 static char escape[] = "\\\"ntbrfav";
299 static char replace[] = "\\\"\n\t\b\r\f\a\v";
301 /* skip whitespace and backslash-newline combinations which are
302 also considered whitespace */
303 for (;;) {
304 if (*InPtr == '\\' && *(InPtr + 1) == '\n')
305 InPtr += 2;
306 else if (*InPtr == ' ' || *InPtr == '\t')
307 InPtr++;
308 else
309 break;
312 /* skip comments */
313 if (*InPtr == '#')
314 while (*InPtr != '\n' && *InPtr != '\0') InPtr++;
316 /* return end of input at the end of the string */
317 if (*InPtr == '\0') {
318 return 0;
321 /* process number tokens */
322 if (isdigit(*InPtr)) { /* number */
323 char name[28];
324 sscanf(InPtr, "%d%n", &value.val.n, &len);
325 sprintf(name, "const %d", value.val.n);
326 InPtr += len;
327 value.tag = INT_TAG;
328 if ((yylval.sym=LookupSymbol(name)) == NULL)
329 yylval.sym = InstallSymbol(name, CONST_SYM, value);
330 return NUMBER;
333 /* process symbol tokens. "define" is a special case not handled
334 by this parser, considered end of input. Another special case
335 is action routine names which are allowed to contain '-' despite
336 the ambiguity, handled in matchesActionRoutine. */
337 if (isalpha(*InPtr) || *InPtr == '$') {
338 if ((s=matchesActionRoutine(&InPtr)) == NULL) {
339 char symName[MAX_SYM_LEN+1], *p = symName;
340 *p++ = *InPtr++;
341 while (isalnum(*InPtr) || *InPtr=='_') {
342 if (p >= symName + MAX_SYM_LEN)
343 InPtr++;
344 else
345 *p++ = *InPtr++;
347 *p = '\0';
348 if (!strcmp(symName, "while")) return WHILE;
349 if (!strcmp(symName, "if")) return IF;
350 if (!strcmp(symName, "else")) return ELSE;
351 if (!strcmp(symName, "for")) return FOR;
352 if (!strcmp(symName, "break")) return BREAK;
353 if (!strcmp(symName, "continue")) return CONTINUE;
354 if (!strcmp(symName, "return")) return RETURN;
355 if (!strcmp(symName, "in")) return IN;
356 if (!strcmp(symName, "delete") && follow_non_whitespace('(', SYMBOL, DELETE) == DELETE) return DELETE;
357 if (!strcmp(symName, "define")) {
358 InPtr -= 6;
359 return 0;
361 if ((s=LookupSymbol(symName)) == NULL) {
362 s = InstallSymbol(symName, symName[0]=='$' ? (isdigit(symName[1]) ?
363 ARG_SYM : GLOBAL_SYM) : LOCAL_SYM, value);
364 s->value.tag = NO_TAG;
367 yylval.sym = s;
368 return SYMBOL;
371 /* process quoted strings w/ embedded escape sequences */
372 if (*InPtr == '\"') {
373 char string[MAX_STRING_CONST_LEN], *p = string;
374 char stringName[25];
375 InPtr++;
376 while (*InPtr != '\0' && *InPtr != '\"' && *InPtr != '\n') {
377 if (p >= string + MAX_STRING_CONST_LEN) {
378 InPtr++;
379 continue;
381 if (*InPtr == '\\') {
382 InPtr++;
383 if (*InPtr == '\n') {
384 InPtr++;
385 continue;
387 for (i=0; escape[i]!='\0'; i++) {
388 if (escape[i] == '\0') {
389 *p++= *InPtr++;
390 break;
391 } else if (escape[i] == *InPtr) {
392 *p++ = replace[i];
393 InPtr++;
394 break;
397 } else
398 *p++= *InPtr++;
400 *p = '\0';
401 InPtr++;
402 if ((yylval.sym = LookupStringConstSymbol(string)) == NULL) {
403 value.val.str = AllocString(p-string+1);
404 strcpy(value.val.str, string);
405 value.tag = STRING_TAG;
406 sprintf(stringName, "string #%d", stringConstIndex++);
407 yylval.sym = InstallSymbol(stringName, CONST_SYM, value);
409 return STRING;
412 /* process remaining two character tokens or return single char as token */
413 switch(*InPtr++) {
414 case '>': return follow('=', GE, GT);
415 case '<': return follow('=', LE, LT);
416 case '=': return follow('=', EQ, '=');
417 case '!': return follow('=', NE, NOT);
418 case '+': return follow2('+', INCR, '=', ADDEQ, '+');
419 case '-': return follow2('-', DECR, '=', SUBEQ, '-');
420 case '|': return follow2('|', OR, '=', OREQ, '|');
421 case '&': return follow2('&', AND, '=', ANDEQ, '&');
422 case '*': return follow2('*', POW, '=', MULEQ, '*');
423 case '/': return follow('=', DIVEQ, '/');
424 case '%': return follow('=', MODEQ, '%');
425 case '^': return POW;
426 default: return *(InPtr-1);
431 ** look ahead for >=, etc.
433 static int follow(char expect, int yes, int no)
435 if (*InPtr++ == expect)
436 return yes;
437 InPtr--;
438 return no;
440 static int follow2(char expect1, int yes1, char expect2, int yes2, int no)
442 char next = *InPtr++;
443 if (next == expect1)
444 return yes1;
445 if (next == expect2)
446 return yes2;
447 InPtr--;
448 return no;
451 static int follow_non_whitespace(char expect, int yes, int no)
453 char *localInPtr = InPtr;
455 while (1) {
456 if (*localInPtr == ' ' || *localInPtr == '\t') {
457 ++localInPtr;
459 else if (*localInPtr == '\\' && *(localInPtr + 1) == '\n') {
460 localInPtr += 2;
462 else if (*localInPtr == expect) {
463 return(yes);
465 else {
466 return(no);
472 ** Look (way) ahead for hyphenated routine names which begin at inPtr. A
473 ** hyphenated name is allowed if it is pre-defined in the global symbol
474 ** table. If a matching name exists, returns the symbol, and update "inPtr".
476 ** I know this is horrible language design, but existing nedit action routine
477 ** names contain hyphens. Handling them here in the lexical analysis process
478 ** is much easier than trying to deal with it in the parser itself. (sorry)
480 static Symbol *matchesActionRoutine(char **inPtr)
482 char *c, *symPtr;
483 int hasDash = False;
484 char symbolName[MAX_SYM_LEN+1];
485 Symbol *s;
487 symPtr = symbolName;
488 for (c = *inPtr; isalnum(*c) || *c=='_' || (*c=='-'&&isalnum(*(c+1))); c++){
489 if (*c == '-')
490 hasDash = True;
491 *symPtr++ = *c;
493 if (!hasDash)
494 return NULL;
495 *symPtr = '\0';
496 s = LookupSymbol(symbolName);
497 if (s != NULL)
498 *inPtr = c;
499 return s;
503 ** Called by yacc to report errors (just stores for returning when
504 ** parsing is aborted. The error token action is to immediate abort
505 ** parsing, so this message is immediately reported to the caller
506 ** of ParseExpr)
508 static int yyerror(char *s)
510 ErrMsg = s;
511 return 0;