push 553a83299288f61678d62ade87a3a2a5489a8ad8
[wine/hacks.git] / dlls / jscript / lex.c
blob8ae15d3fc6d7ad88a2d32c05bafe762f4e066f8f
1 /*
2 * Copyright 2008 Jacek Caban for CodeWeavers
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 #include <math.h>
21 #include "jscript.h"
22 #include "activscp.h"
23 #include "objsafe.h"
24 #include "engine.h"
26 #include "parser.tab.h"
28 #include "wine/debug.h"
29 #include "wine/unicode.h"
31 WINE_DEFAULT_DEBUG_CHANNEL(jscript);
33 static const WCHAR breakW[] = {'b','r','e','a','k',0};
34 static const WCHAR caseW[] = {'c','a','s','e',0};
35 static const WCHAR catchW[] = {'c','a','t','c','h',0};
36 static const WCHAR continueW[] = {'c','o','n','t','i','n','u','e',0};
37 static const WCHAR defaultW[] = {'d','e','f','a','u','l','t',0};
38 static const WCHAR deleteW[] = {'d','e','l','e','t','e',0};
39 static const WCHAR doW[] = {'d','o',0};
40 static const WCHAR elseW[] = {'e','l','s','e',0};
41 static const WCHAR falseW[] = {'f','a','l','s','e',0};
42 static const WCHAR finallyW[] = {'f','i','n','a','l','l','y',0};
43 static const WCHAR forW[] = {'f','o','r',0};
44 static const WCHAR functionW[] = {'f','u','n','c','t','i','o','n',0};
45 static const WCHAR ifW[] = {'i','f',0};
46 static const WCHAR inW[] = {'i','n',0};
47 static const WCHAR instanceofW[] = {'i','n','s','t','a','n','c','e','o','f',0};
48 static const WCHAR newW[] = {'n','e','w',0};
49 static const WCHAR nullW[] = {'n','u','l','l',0};
50 static const WCHAR returnW[] = {'r','e','t','u','r','n',0};
51 static const WCHAR switchW[] = {'s','w','i','t','c','h',0};
52 static const WCHAR thisW[] = {'t','h','i','s',0};
53 static const WCHAR throwW[] = {'t','h','r','o','w',0};
54 static const WCHAR trueW[] = {'t','r','u','e',0};
55 static const WCHAR tryW[] = {'t','r','y',0};
56 static const WCHAR typeofW[] = {'t','y','p','e','o','f',0};
57 static const WCHAR undefinedW[] = {'u','n','d','e','f','i','n','e','d',0};
58 static const WCHAR varW[] = {'v','a','r',0};
59 static const WCHAR voidW[] = {'v','o','i','d',0};
60 static const WCHAR whileW[] = {'w','h','i','l','e',0};
61 static const WCHAR withW[] = {'w','i','t','h',0};
63 static const struct {
64 const WCHAR *word;
65 int token;
66 } keywords[] = {
67 {breakW, kBREAK},
68 {caseW, kCASE},
69 {catchW, kCATCH},
70 {continueW, kCONTINUE},
71 {defaultW, kDEFAULT},
72 {deleteW, kDELETE},
73 {doW, kDO},
74 {elseW, kELSE},
75 {falseW, kFALSE},
76 {finallyW, kFINALLY},
77 {forW, kFOR},
78 {functionW, kFUNCTION},
79 {ifW, kIF},
80 {inW, kIN},
81 {instanceofW, kINSTANCEOF},
82 {newW, kNEW},
83 {nullW, kNULL},
84 {returnW, kRETURN},
85 {switchW, kSWITCH},
86 {thisW, kTHIS},
87 {throwW, kTHROW},
88 {trueW, kTRUE},
89 {tryW, kTRY},
90 {typeofW, kTYPEOF},
91 {undefinedW, kUNDEFINED},
92 {varW, kVAR},
93 {voidW, kVOID},
94 {whileW, kWHILE},
95 {withW, kWITH}
98 static int lex_error(parser_ctx_t *ctx, HRESULT hres)
100 ctx->hres = hres;
101 return -1;
104 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word)
106 const WCHAR *p1 = ctx->ptr;
107 const WCHAR *p2 = word;
109 while(p1 < ctx->end && *p2) {
110 if(*p1 != *p2)
111 return *p1 - *p2;
112 p1++;
113 p2++;
116 if(*p2 || (p1 < ctx->end && isalnumW(*p1)))
117 return 1;
119 ctx->ptr = p1;
120 return 0;
123 /* ECMA-262 3rd Edition 7.3 */
124 static BOOL is_endline(WCHAR c)
126 return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029;
129 static BOOL is_identifier_char(WCHAR c)
131 return isalnumW(c) || c == '$' || c == '_' || c == '\\';
134 static int hex_to_int(WCHAR c)
136 if('0' <= c && c <= '9')
137 return c-'0';
139 if('a' <= c && c <= 'f')
140 return c-'a'+10;
142 if('A' <= c && c <= 'F')
143 return c-'A'+10;
145 return -1;
148 static int check_keywords(parser_ctx_t *ctx)
150 int min = 0, max = sizeof(keywords)/sizeof(keywords[0])-1, r, i;
152 while(min <= max) {
153 i = (min+max)/2;
155 r = check_keyword(ctx, keywords[i].word);
156 if(!r)
157 return keywords[i].token;
159 if(r > 0)
160 min = i+1;
161 else
162 max = i-1;
165 return 0;
168 static void skip_spaces(parser_ctx_t *ctx)
170 while(ctx->ptr < ctx->end && isspaceW(*ctx->ptr)) {
171 if(is_endline(*ctx->ptr++))
172 ctx->nl = TRUE;
176 static BOOL skip_comment(parser_ctx_t *ctx)
178 if(ctx->ptr+1 >= ctx->end || *ctx->ptr != '/')
179 return FALSE;
181 switch(ctx->ptr[1]) {
182 case '*':
183 ctx->ptr += 2;
184 while(ctx->ptr+1 < ctx->end && (ctx->ptr[0] != '*' || ctx->ptr[1] != '/'))
185 ctx->ptr++;
187 if(ctx->ptr[0] == '*' && ctx->ptr[1] == '/') {
188 ctx->ptr += 2;
189 }else {
190 WARN("unexpected end of file (missing end of comment)\n");
191 ctx->ptr = ctx->end;
193 break;
194 case '/':
195 ctx->ptr += 2;
196 while(ctx->ptr < ctx->end && !is_endline(*ctx->ptr))
197 ctx->ptr++;
198 break;
199 default:
200 return FALSE;
203 return TRUE;
206 static BOOL unescape(WCHAR *str)
208 WCHAR *pd, *p, c;
209 int i;
211 pd = p = str;
212 while(*p) {
213 if(*p != '\\') {
214 *pd++ = *p++;
215 continue;
218 p++;
219 c = 0;
221 switch(*p) {
222 case '\'':
223 case '\"':
224 case '\\':
225 c = *p;
226 break;
227 case 'b':
228 c = '\b';
229 break;
230 case 't':
231 c = '\t';
232 break;
233 case 'n':
234 c = '\n';
235 break;
236 case 'v':
237 c = '\v';
238 break;
239 case 'f':
240 c = '\f';
241 break;
242 case 'r':
243 c = '\r';
244 break;
245 case '0':
246 break;
247 case 'x':
248 i = hex_to_int(*++p);
249 if(i == -1)
250 return FALSE;
251 c = i << 16;
253 i = hex_to_int(*++p);
254 if(i == -1)
255 return FALSE;
256 c += i;
257 break;
258 case 'u':
259 i = hex_to_int(*++p);
260 if(i == -1)
261 return FALSE;
262 c = i << 24;
264 i = hex_to_int(*++p);
265 if(i == -1)
266 return FALSE;
267 c += i << 16;
269 i = hex_to_int(*++p);
270 if(i == -1)
271 return FALSE;
272 c += 1 << 8;
274 i = hex_to_int(*++p);
275 if(i == -1)
276 return FALSE;
277 c += i;
278 break;
279 default:
280 c = *p;
283 *pd++ = c;
284 p++;
287 *pd = 0;
288 return TRUE;
291 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret)
293 const WCHAR *ptr = ctx->ptr++;
294 WCHAR *wstr;
295 int len;
297 while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr))
298 ctx->ptr++;
300 len = ctx->ptr-ptr;
302 *ret = wstr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
303 memcpy(wstr, ptr, (len+1)*sizeof(WCHAR));
304 wstr[len] = 0;
306 /* FIXME: unescape */
307 return tIdentifier;
310 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret, WCHAR endch)
312 const WCHAR *ptr = ++ctx->ptr;
313 WCHAR *wstr;
314 int len;
316 while(ctx->ptr < ctx->end && *ctx->ptr != endch) {
317 if(*ctx->ptr++ == '\\')
318 ctx->ptr++;
321 if(ctx->ptr == ctx->end) {
322 WARN("unexpected end of file\n");
323 return lex_error(ctx, E_FAIL);
326 len = ctx->ptr-ptr;
328 *ret = wstr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
329 memcpy(wstr, ptr, (len+1)*sizeof(WCHAR));
330 wstr[len] = 0;
332 ctx->ptr++;
334 if(!unescape(wstr)) {
335 WARN("unescape failed\n");
336 return lex_error(ctx, E_FAIL);
339 return tStringLiteral;
342 static literal_t *alloc_int_literal(parser_ctx_t *ctx, LONG l)
344 literal_t *ret = parser_alloc(ctx, sizeof(literal_t));
346 ret->vt = VT_I4;
347 ret->u.lval = l;
349 return ret;
352 static int parse_double_literal(parser_ctx_t *ctx, LONG int_part, literal_t **literal)
354 double d, tmp = 1.0;
356 if(ctx->ptr == ctx->end || !isdigitW(*ctx->ptr)) {
357 ERR("No digit after point\n");
358 return 0;
361 d = int_part;
362 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr))
363 d += (tmp /= 10.0)*(*ctx->ptr++ - '0');
365 if(ctx->ptr < ctx->end && (*ctx->ptr == 'e' || *ctx->ptr == 'E')) {
366 int sign = 1, e = 0;
368 ctx->ptr++;
369 if(ctx->ptr < ctx->end) {
370 if(*ctx->ptr == '+') {
371 ctx->ptr++;
372 }else if(*ctx->ptr == '-') {
373 sign = -1;
374 ctx->ptr++;
375 }else if(!isdigitW(*ctx->ptr)) {
376 WARN("Expected exponent part\n");
377 return lex_error(ctx, E_FAIL);
381 if(ctx->ptr == ctx->end) {
382 WARN("unexpected end of file\n");
383 return lex_error(ctx, E_FAIL);
386 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr))
387 e = e*10 + *ctx->ptr++ - '0';
388 e *= sign;
390 d = pow(d, e);
393 *literal = parser_alloc(ctx, sizeof(literal_t));
394 (*literal)->vt = VT_R8;
395 (*literal)->u.dval = d;
397 return tNumericLiteral;
400 static int parse_numeric_literal(parser_ctx_t *ctx, literal_t **literal)
402 LONG l, d;
404 l = *ctx->ptr++ - '0';
405 if(ctx->ptr == ctx->end) {
406 *literal = alloc_int_literal(ctx, l);
407 return tNumericLiteral;
410 if(!l) {
411 if(*ctx->ptr == 'x' || *ctx->ptr == 'X') {
412 if(++ctx->ptr == ctx->end) {
413 ERR("unexpexted end of file\n");
414 return 0;
417 while(ctx->ptr < ctx->end && (d = hex_to_int(*ctx->ptr)) != -1) {
418 l = l*16 + d;
419 ctx->ptr++;
422 if(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr)) {
423 WARN("unexpected identifier char\n");
424 return lex_error(ctx, E_FAIL);
427 *literal = alloc_int_literal(ctx, l);
428 return tNumericLiteral;
431 if(isdigitW(*ctx->ptr) || is_identifier_char(*ctx->ptr)) {
432 WARN("wrong char after zero\n");
433 return lex_error(ctx, E_FAIL);
436 *literal = alloc_int_literal(ctx, 0);
439 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr))
440 l = l*10 + *(ctx->ptr++)-'0';
442 if(ctx->ptr < ctx->end) {
443 if(*ctx->ptr == '.') {
444 ctx->ptr++;
445 return parse_double_literal(ctx, l, literal);
448 if(is_identifier_char(*ctx->ptr)) {
449 WARN("unexpected identifier char\n");
450 return lex_error(ctx, E_FAIL);
454 *literal = alloc_int_literal(ctx, l);
455 return tNumericLiteral;
458 int parser_lex(void *lval, parser_ctx_t *ctx)
460 int ret;
462 ctx->nl = FALSE;
464 do {
465 skip_spaces(ctx);
466 if(ctx->ptr == ctx->end)
467 return 0;
468 }while(skip_comment(ctx));
470 if(isalphaW(*ctx->ptr)) {
471 ret = check_keywords(ctx);
472 if(ret)
473 return ret;
475 return parse_identifier(ctx, (const WCHAR**)lval);
478 if(isdigitW(*ctx->ptr))
479 return parse_numeric_literal(ctx, lval);
481 switch(*ctx->ptr) {
482 case '{':
483 case '}':
484 case '(':
485 case ')':
486 case '[':
487 case ']':
488 case ';':
489 case ',':
490 case '~':
491 case '?':
492 case ':':
493 return *ctx->ptr++;
495 case '.':
496 if(++ctx->ptr < ctx->end && isdigitW(*ctx->ptr))
497 return parse_double_literal(ctx, 0, lval);
498 return '.';
500 case '<':
501 if(++ctx->ptr == ctx->end) {
502 *(int*)lval = EXPR_LESS;
503 return tRelOper;
506 switch(*ctx->ptr) {
507 case '=': /* <= */
508 ctx->ptr++;
509 *(int*)lval = EXPR_LESSEQ;
510 return tRelOper;
511 case '<': /* << */
512 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* <<= */
513 ctx->ptr++;
514 *(int*)lval = EXPR_ASSIGNLSHIFT;
515 return tAssignOper;
517 *(int*)lval = EXPR_LSHIFT;
518 return tShiftOper;
519 default: /* < */
520 *(int*)lval = EXPR_LESS;
521 return tRelOper;
524 case '>':
525 if(++ctx->ptr == ctx->end) { /* > */
526 *(int*)lval = EXPR_GREATER;
527 return tRelOper;
530 switch(*ctx->ptr) {
531 case '=': /* >= */
532 ctx->ptr++;
533 *(int*)lval = EXPR_GREATEREQ;
534 return tRelOper;
535 case '>': /* >> */
536 if(++ctx->ptr < ctx->end) {
537 if(*ctx->ptr == '=') { /* >>= */
538 ctx->ptr++;
539 *(int*)lval = EXPR_ASSIGNRSHIFT;
540 return tAssignOper;
542 if(*ctx->ptr == '>') { /* >>> */
543 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* >>>= */
544 ctx->ptr++;
545 *(int*)lval = EXPR_ASSIGNRRSHIFT;
546 return tAssignOper;
548 *(int*)lval = EXPR_RRSHIFT;
549 return tRelOper;
552 *(int*)lval = EXPR_RSHIFT;
553 return tShiftOper;
554 default:
555 *(int*)lval = EXPR_GREATER;
556 return tRelOper;
559 case '+':
560 ctx->ptr++;
561 if(ctx->ptr < ctx->end) {
562 switch(*ctx->ptr) {
563 case '+': /* ++ */
564 ctx->ptr++;
565 return tINC;
566 case '=': /* += */
567 ctx->ptr++;
568 *(int*)lval = EXPR_ASSIGNADD;
569 return tAssignOper;
572 return '+';
574 case '-':
575 ctx->ptr++;
576 if(ctx->ptr < ctx->end) {
577 switch(*ctx->ptr) {
578 case '-': /* -- */
579 ctx->ptr++;
580 return tDEC;
581 case '=': /* -= */
582 ctx->ptr++;
583 *(int*)lval = EXPR_ASSIGNSUB;
584 return tAssignOper;
587 return '-';
589 case '*':
590 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* *= */
591 ctx->ptr++;
592 *(int*)lval = EXPR_ASSIGNMUL;
593 return tAssignOper;
595 return '*';
597 case '%':
598 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* %= */
599 ctx->ptr++;
600 *(int*)lval = EXPR_ASSIGNMOD;
601 return tAssignOper;
603 return '%';
605 case '&':
606 if(++ctx->ptr < ctx->end) {
607 switch(*ctx->ptr) {
608 case '=': /* &= */
609 ctx->ptr++;
610 *(int*)lval = EXPR_ASSIGNAND;
611 return tAssignOper;
612 case '&': /* && */
613 ctx->ptr++;
614 return tANDAND;
617 return '&';
619 case '|':
620 if(++ctx->ptr < ctx->end) {
621 switch(*ctx->ptr) {
622 case '=': /* |= */
623 ctx->ptr++;
624 *(int*)lval = EXPR_ASSIGNOR;
625 return tAssignOper;
626 case '|': /* || */
627 ctx->ptr++;
628 return tOROR;
631 return '|';
633 case '^':
634 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* ^= */
635 ctx->ptr++;
636 *(int*)lval = EXPR_ASSIGNXOR;
637 return tAssignOper;
639 return '^';
641 case '!':
642 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* != */
643 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* !== */
644 ctx->ptr++;
645 *(int*)lval = EXPR_NOTEQEQ;
646 return tEqOper;
648 *(int*)lval = EXPR_NOTEQ;
649 return tEqOper;
651 return '!';
653 case '=':
654 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* == */
655 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* === */
656 ctx->ptr++;
657 *(int*)lval = EXPR_EQEQ;
658 return tEqOper;
660 *(int*)lval = EXPR_EQ;
661 return tEqOper;
663 return '=';
665 case '/':
666 if(++ctx->ptr < ctx->end) {
667 if(*ctx->ptr == '=') { /* /= */
668 ctx->ptr++;
669 *(int*)lval = EXPR_ASSIGNMUL;
670 return tAssignOper;
673 return '/';
675 case '\"':
676 case '\'':
677 return parse_string_literal(ctx, (const WCHAR**)lval, *ctx->ptr);
679 case '_':
680 case '$':
681 return parse_identifier(ctx, lval);
684 WARN("unexpected char '%c' %d\n", *ctx->ptr, *ctx->ptr);
685 return 0;