vbscript: Rename OP_long expression to OP_int.
[wine.git] / dlls / vbscript / lex.c
blobc628934610b6e041313c5c7926339ed6ff63c090
1 /*
2 * Copyright 2011 Jacek Caban for CodeWeavers
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 #include <assert.h>
20 #include <limits.h>
21 #include <math.h>
23 #include "vbscript.h"
24 #include "parse.h"
25 #include "parser.tab.h"
27 #include "wine/debug.h"
29 WINE_DEFAULT_DEBUG_CHANNEL(vbscript);
31 static const WCHAR andW[] = {'a','n','d',0};
32 static const WCHAR byrefW[] = {'b','y','r','e','f',0};
33 static const WCHAR byvalW[] = {'b','y','v','a','l',0};
34 static const WCHAR callW[] = {'c','a','l','l',0};
35 static const WCHAR caseW[] = {'c','a','s','e',0};
36 static const WCHAR classW[] = {'c','l','a','s','s',0};
37 static const WCHAR constW[] = {'c','o','n','s','t',0};
38 static const WCHAR defaultW[] = {'d','e','f','a','u','l','t',0};
39 static const WCHAR dimW[] = {'d','i','m',0};
40 static const WCHAR doW[] = {'d','o',0};
41 static const WCHAR eachW[] = {'e','a','c','h',0};
42 static const WCHAR elseW[] = {'e','l','s','e',0};
43 static const WCHAR elseifW[] = {'e','l','s','e','i','f',0};
44 static const WCHAR emptyW[] = {'e','m','p','t','y',0};
45 static const WCHAR endW[] = {'e','n','d',0};
46 static const WCHAR eqvW[] = {'e','q','v',0};
47 static const WCHAR errorW[] = {'e','r','r','o','r',0};
48 static const WCHAR exitW[] = {'e','x','i','t',0};
49 static const WCHAR explicitW[] = {'e','x','p','l','i','c','i','t',0};
50 static const WCHAR falseW[] = {'f','a','l','s','e',0};
51 static const WCHAR forW[] = {'f','o','r',0};
52 static const WCHAR functionW[] = {'f','u','n','c','t','i','o','n',0};
53 static const WCHAR getW[] = {'g','e','t',0};
54 static const WCHAR gotoW[] = {'g','o','t','o',0};
55 static const WCHAR ifW[] = {'i','f',0};
56 static const WCHAR impW[] = {'i','m','p',0};
57 static const WCHAR inW[] = {'i','n',0};
58 static const WCHAR isW[] = {'i','s',0};
59 static const WCHAR letW[] = {'l','e','t',0};
60 static const WCHAR loopW[] = {'l','o','o','p',0};
61 static const WCHAR meW[] = {'m','e',0};
62 static const WCHAR modW[] = {'m','o','d',0};
63 static const WCHAR newW[] = {'n','e','w',0};
64 static const WCHAR nextW[] = {'n','e','x','t',0};
65 static const WCHAR notW[] = {'n','o','t',0};
66 static const WCHAR nothingW[] = {'n','o','t','h','i','n','g',0};
67 static const WCHAR nullW[] = {'n','u','l','l',0};
68 static const WCHAR onW[] = {'o','n',0};
69 static const WCHAR optionW[] = {'o','p','t','i','o','n',0};
70 static const WCHAR orW[] = {'o','r',0};
71 static const WCHAR privateW[] = {'p','r','i','v','a','t','e',0};
72 static const WCHAR propertyW[] = {'p','r','o','p','e','r','t','y',0};
73 static const WCHAR publicW[] = {'p','u','b','l','i','c',0};
74 static const WCHAR remW[] = {'r','e','m',0};
75 static const WCHAR resumeW[] = {'r','e','s','u','m','e',0};
76 static const WCHAR selectW[] = {'s','e','l','e','c','t',0};
77 static const WCHAR setW[] = {'s','e','t',0};
78 static const WCHAR stepW[] = {'s','t','e','p',0};
79 static const WCHAR stopW[] = {'s','t','o','p',0};
80 static const WCHAR subW[] = {'s','u','b',0};
81 static const WCHAR thenW[] = {'t','h','e','n',0};
82 static const WCHAR toW[] = {'t','o',0};
83 static const WCHAR trueW[] = {'t','r','u','e',0};
84 static const WCHAR untilW[] = {'u','n','t','i','l',0};
85 static const WCHAR wendW[] = {'w','e','n','d',0};
86 static const WCHAR whileW[] = {'w','h','i','l','e',0};
87 static const WCHAR xorW[] = {'x','o','r',0};
89 static const struct {
90 const WCHAR *word;
91 int token;
92 } keywords[] = {
93 {andW, tAND},
94 {byrefW, tBYREF},
95 {byvalW, tBYVAL},
96 {callW, tCALL},
97 {caseW, tCASE},
98 {classW, tCLASS},
99 {constW, tCONST},
100 {defaultW, tDEFAULT},
101 {dimW, tDIM},
102 {doW, tDO},
103 {eachW, tEACH},
104 {elseW, tELSE},
105 {elseifW, tELSEIF},
106 {emptyW, tEMPTY},
107 {endW, tEND},
108 {eqvW, tEQV},
109 {errorW, tERROR},
110 {exitW, tEXIT},
111 {explicitW, tEXPLICIT},
112 {falseW, tFALSE},
113 {forW, tFOR},
114 {functionW, tFUNCTION},
115 {getW, tGET},
116 {gotoW, tGOTO},
117 {ifW, tIF},
118 {impW, tIMP},
119 {inW, tIN},
120 {isW, tIS},
121 {letW, tLET},
122 {loopW, tLOOP},
123 {meW, tME},
124 {modW, tMOD},
125 {newW, tNEW},
126 {nextW, tNEXT},
127 {notW, tNOT},
128 {nothingW, tNOTHING},
129 {nullW, tNULL},
130 {onW, tON},
131 {optionW, tOPTION},
132 {orW, tOR},
133 {privateW, tPRIVATE},
134 {propertyW, tPROPERTY},
135 {publicW, tPUBLIC},
136 {remW, tREM},
137 {resumeW, tRESUME},
138 {selectW, tSELECT},
139 {setW, tSET},
140 {stepW, tSTEP},
141 {stopW, tSTOP},
142 {subW, tSUB},
143 {thenW, tTHEN},
144 {toW, tTO},
145 {trueW, tTRUE},
146 {untilW, tUNTIL},
147 {wendW, tWEND},
148 {whileW, tWHILE},
149 {xorW, tXOR}
152 static inline BOOL is_identifier_char(WCHAR c)
154 return iswalnum(c) || c == '_';
157 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word, const WCHAR **lval)
159 const WCHAR *p1 = ctx->ptr;
160 const WCHAR *p2 = word;
161 WCHAR c;
163 while(p1 < ctx->end && *p2) {
164 c = towlower(*p1);
165 if(c != *p2)
166 return c - *p2;
167 p1++;
168 p2++;
171 if(*p2 || (p1 < ctx->end && is_identifier_char(*p1)))
172 return 1;
174 ctx->ptr = p1;
175 *lval = word;
176 return 0;
179 static int check_keywords(parser_ctx_t *ctx, const WCHAR **lval)
181 int min = 0, max = ARRAY_SIZE(keywords)-1, r, i;
183 while(min <= max) {
184 i = (min+max)/2;
186 r = check_keyword(ctx, keywords[i].word, lval);
187 if(!r)
188 return keywords[i].token;
190 if(r > 0)
191 min = i+1;
192 else
193 max = i-1;
196 return 0;
199 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret)
201 const WCHAR *ptr = ctx->ptr++;
202 WCHAR *str;
203 int len;
205 while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr))
206 ctx->ptr++;
207 len = ctx->ptr-ptr;
209 str = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
210 if(!str)
211 return 0;
213 memcpy(str, ptr, (len+1)*sizeof(WCHAR));
214 str[len] = 0;
215 *ret = str;
216 return tIdentifier;
219 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret)
221 const WCHAR *ptr = ++ctx->ptr;
222 WCHAR *rptr;
223 int len = 0;
225 while(ctx->ptr < ctx->end) {
226 if(*ctx->ptr == '\n' || *ctx->ptr == '\r') {
227 FIXME("newline inside string literal\n");
228 return 0;
231 if(*ctx->ptr == '"') {
232 if(ctx->ptr[1] != '"')
233 break;
234 len--;
235 ctx->ptr++;
237 ctx->ptr++;
240 if(ctx->ptr == ctx->end) {
241 FIXME("unterminated string literal\n");
242 return 0;
245 len += ctx->ptr-ptr;
247 *ret = rptr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
248 if(!rptr)
249 return 0;
251 while(ptr < ctx->ptr) {
252 if(*ptr == '"')
253 ptr++;
254 *rptr++ = *ptr++;
257 *rptr = 0;
258 ctx->ptr++;
259 return tString;
262 static int parse_numeric_literal(parser_ctx_t *ctx, void **ret)
264 BOOL use_int = TRUE;
265 LONGLONG d = 0, hlp;
266 int exp = 0;
267 double r;
269 if(*ctx->ptr == '0' && !('0' <= ctx->ptr[1] && ctx->ptr[1] <= '9') && ctx->ptr[1] != '.')
270 return *ctx->ptr++;
272 while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr)) {
273 hlp = d*10 + *(ctx->ptr++) - '0';
274 if(d>MAXLONGLONG/10 || hlp<0) {
275 exp++;
276 break;
278 else
279 d = hlp;
281 while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr)) {
282 exp++;
283 ctx->ptr++;
286 if(*ctx->ptr == '.') {
287 use_int = FALSE;
288 ctx->ptr++;
290 while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr)) {
291 hlp = d*10 + *(ctx->ptr++) - '0';
292 if(d>MAXLONGLONG/10 || hlp<0)
293 break;
295 d = hlp;
296 exp--;
298 while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr))
299 ctx->ptr++;
302 if(*ctx->ptr == 'e' || *ctx->ptr == 'E') {
303 int e = 0, sign = 1;
305 if(*++ctx->ptr == '-') {
306 ctx->ptr++;
307 sign = -1;
310 if(!iswdigit(*ctx->ptr)) {
311 FIXME("Invalid numeric literal\n");
312 return 0;
315 use_int = FALSE;
317 do {
318 e = e*10 + *(ctx->ptr++) - '0';
319 if(sign == -1 && -e+exp < -(INT_MAX/100)) {
320 /* The literal will be rounded to 0 anyway. */
321 while(iswdigit(*ctx->ptr))
322 ctx->ptr++;
323 *(double*)ret = 0;
324 return tDouble;
327 if(sign*e + exp > INT_MAX/100) {
328 FIXME("Invalid numeric literal\n");
329 return 0;
331 } while(iswdigit(*ctx->ptr));
333 exp += sign*e;
336 if(use_int && (LONG)d == d) {
337 *(LONG*)ret = d;
338 return tInt;
341 r = exp>=0 ? d*pow(10, exp) : d/pow(10, -exp);
342 if(isinf(r)) {
343 FIXME("Invalid numeric literal\n");
344 return 0;
347 *(double*)ret = r;
348 return tDouble;
351 static int hex_to_int(WCHAR c)
353 if('0' <= c && c <= '9')
354 return c-'0';
355 if('a' <= c && c <= 'f')
356 return c+10-'a';
357 if('A' <= c && c <= 'F')
358 return c+10-'A';
359 return -1;
362 static int parse_hex_literal(parser_ctx_t *ctx, LONG *ret)
364 const WCHAR *begin = ctx->ptr;
365 LONG l = 0, d;
367 while((d = hex_to_int(*++ctx->ptr)) != -1)
368 l = l*16 + d;
370 if(begin + 9 /* max digits+1 */ < ctx->ptr || (*ctx->ptr != '&' && is_identifier_char(*ctx->ptr))) {
371 FIXME("invalid literal\n");
372 return 0;
375 if(*ctx->ptr == '&')
376 ctx->ptr++;
378 *ret = l;
379 return tInt;
382 static void skip_spaces(parser_ctx_t *ctx)
384 while(*ctx->ptr == ' ' || *ctx->ptr == '\t')
385 ctx->ptr++;
388 static int comment_line(parser_ctx_t *ctx)
390 static const WCHAR newlineW[] = {'\n','\r',0};
391 ctx->ptr = wcspbrk(ctx->ptr, newlineW);
392 if(ctx->ptr)
393 ctx->ptr++;
394 else
395 ctx->ptr = ctx->end;
396 return tNL;
399 static int parse_next_token(void *lval, parser_ctx_t *ctx)
401 WCHAR c;
403 skip_spaces(ctx);
404 if(ctx->ptr == ctx->end)
405 return ctx->last_token == tNL ? tEOF : tNL;
407 c = *ctx->ptr;
409 if('0' <= c && c <= '9')
410 return parse_numeric_literal(ctx, lval);
412 if(iswalpha(c)) {
413 int ret = check_keywords(ctx, lval);
414 if(!ret)
415 return parse_identifier(ctx, lval);
416 if(ret != tREM)
417 return ret;
418 c = '\'';
421 switch(c) {
422 case '\n':
423 case '\r':
424 ctx->ptr++;
425 return tNL;
426 case '\'':
427 return comment_line(ctx);
428 case ':':
429 case ')':
430 case ',':
431 case '=':
432 case '+':
433 case '*':
434 case '/':
435 case '^':
436 case '\\':
437 case '.':
438 case '_':
439 return *ctx->ptr++;
440 case '-':
441 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '>')
442 return comment_line(ctx);
443 ctx->ptr++;
444 return '-';
445 case '(':
446 /* NOTE:
447 * We resolve empty brackets in lexer instead of parser to avoid complex conflicts
448 * in call statement special case |f()| without 'call' keyword
450 ctx->ptr++;
451 skip_spaces(ctx);
452 if(*ctx->ptr == ')') {
453 ctx->ptr++;
454 return tEMPTYBRACKETS;
456 return '(';
457 case '"':
458 return parse_string_literal(ctx, lval);
459 case '&':
460 if(*++ctx->ptr == 'h' || *ctx->ptr == 'H')
461 return parse_hex_literal(ctx, lval);
462 return '&';
463 case '<':
464 switch(*++ctx->ptr) {
465 case '>':
466 ctx->ptr++;
467 return tNEQ;
468 case '=':
469 ctx->ptr++;
470 return tLTEQ;
471 case '!':
472 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '-')
473 return comment_line(ctx);
475 return '<';
476 case '>':
477 if(*++ctx->ptr == '=') {
478 ctx->ptr++;
479 return tGTEQ;
481 return '>';
482 default:
483 FIXME("Unhandled char %c in %s\n", *ctx->ptr, debugstr_w(ctx->ptr));
486 return 0;
489 int parser_lex(void *lval, parser_ctx_t *ctx)
491 int ret;
493 while(1) {
494 ret = parse_next_token(lval, ctx);
495 if(ret == '_') {
496 skip_spaces(ctx);
497 if(*ctx->ptr != '\n' && *ctx->ptr != '\r') {
498 FIXME("'_' not followed by newline\n");
499 return 0;
501 if(*ctx->ptr == '\r')
502 ctx->ptr++;
503 if(*ctx->ptr == '\n')
504 ctx->ptr++;
505 continue;
507 if(ret != tNL || ctx->last_token != tNL)
508 break;
510 ctx->last_nl = ctx->ptr-ctx->code;
513 return (ctx->last_token = ret);