mshtml: Implement MarkupServices_ParseString.
[wine.git] / dlls / vbscript / lex.c
blob8c5c69ea429ce4b9e04060fd4f8fd465e5690650
1 /*
2 * Copyright 2011 Jacek Caban for CodeWeavers
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 #include <assert.h>
20 #include <limits.h>
21 #include <math.h>
23 #include "vbscript.h"
24 #include "parse.h"
25 #include "parser.tab.h"
27 #include "wine/debug.h"
29 WINE_DEFAULT_DEBUG_CHANNEL(vbscript);
31 static const struct {
32 const WCHAR *word;
33 int token;
34 } keywords[] = {
35 {L"and", tAND},
36 {L"byref", tBYREF},
37 {L"byval", tBYVAL},
38 {L"call", tCALL},
39 {L"case", tCASE},
40 {L"class", tCLASS},
41 {L"const", tCONST},
42 {L"default", tDEFAULT},
43 {L"dim", tDIM},
44 {L"do", tDO},
45 {L"each", tEACH},
46 {L"else", tELSE},
47 {L"elseif", tELSEIF},
48 {L"empty", tEMPTY},
49 {L"end", tEND},
50 {L"eqv", tEQV},
51 {L"error", tERROR},
52 {L"exit", tEXIT},
53 {L"explicit", tEXPLICIT},
54 {L"false", tFALSE},
55 {L"for", tFOR},
56 {L"function", tFUNCTION},
57 {L"get", tGET},
58 {L"goto", tGOTO},
59 {L"if", tIF},
60 {L"imp", tIMP},
61 {L"in", tIN},
62 {L"is", tIS},
63 {L"let", tLET},
64 {L"loop", tLOOP},
65 {L"me", tME},
66 {L"mod", tMOD},
67 {L"new", tNEW},
68 {L"next", tNEXT},
69 {L"not", tNOT},
70 {L"nothing", tNOTHING},
71 {L"null", tNULL},
72 {L"on", tON},
73 {L"option", tOPTION},
74 {L"or", tOR},
75 {L"preserve", tPRESERVE},
76 {L"private", tPRIVATE},
77 {L"property", tPROPERTY},
78 {L"public", tPUBLIC},
79 {L"redim", tREDIM},
80 {L"rem", tREM},
81 {L"resume", tRESUME},
82 {L"select", tSELECT},
83 {L"set", tSET},
84 {L"step", tSTEP},
85 {L"stop", tSTOP},
86 {L"sub", tSUB},
87 {L"then", tTHEN},
88 {L"to", tTO},
89 {L"true", tTRUE},
90 {L"until", tUNTIL},
91 {L"wend", tWEND},
92 {L"while", tWHILE},
93 {L"with", tWITH},
94 {L"xor", tXOR}
97 static inline BOOL is_identifier_char(WCHAR c)
99 return iswalnum(c) || c == '_';
102 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word, const WCHAR **lval)
104 const WCHAR *p1 = ctx->ptr;
105 const WCHAR *p2 = word;
106 WCHAR c;
108 while(p1 < ctx->end && *p2) {
109 c = towlower(*p1);
110 if(c != *p2)
111 return c - *p2;
112 p1++;
113 p2++;
116 if(*p2 || (p1 < ctx->end && is_identifier_char(*p1)))
117 return 1;
119 ctx->ptr = p1;
120 *lval = word;
121 return 0;
124 static int check_keywords(parser_ctx_t *ctx, const WCHAR **lval)
126 int min = 0, max = ARRAY_SIZE(keywords)-1, r, i;
128 while(min <= max) {
129 i = (min+max)/2;
131 r = check_keyword(ctx, keywords[i].word, lval);
132 if(!r)
133 return keywords[i].token;
135 if(r > 0)
136 min = i+1;
137 else
138 max = i-1;
141 return 0;
144 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret)
146 const WCHAR *ptr = ctx->ptr++;
147 WCHAR *str;
148 int len;
150 while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr))
151 ctx->ptr++;
152 len = ctx->ptr-ptr;
154 str = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
155 if(!str)
156 return 0;
158 memcpy(str, ptr, (len+1)*sizeof(WCHAR));
159 str[len] = 0;
160 *ret = str;
161 return tIdentifier;
164 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret)
166 const WCHAR *ptr = ++ctx->ptr;
167 WCHAR *rptr;
168 int len = 0;
170 while(ctx->ptr < ctx->end) {
171 if(*ctx->ptr == '\n' || *ctx->ptr == '\r') {
172 FIXME("newline inside string literal\n");
173 return 0;
176 if(*ctx->ptr == '"') {
177 if(ctx->ptr[1] != '"')
178 break;
179 len--;
180 ctx->ptr++;
182 ctx->ptr++;
185 if(ctx->ptr == ctx->end) {
186 FIXME("unterminated string literal\n");
187 return 0;
190 len += ctx->ptr-ptr;
192 *ret = rptr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
193 if(!rptr)
194 return 0;
196 while(ptr < ctx->ptr) {
197 if(*ptr == '"')
198 ptr++;
199 *rptr++ = *ptr++;
202 *rptr = 0;
203 ctx->ptr++;
204 return tString;
207 static int parse_date_literal(parser_ctx_t *ctx, DATE *ret)
209 const WCHAR *ptr = ++ctx->ptr;
210 WCHAR *rptr;
211 int len = 0;
212 HRESULT res;
214 while(ctx->ptr < ctx->end) {
215 if(*ctx->ptr == '\n' || *ctx->ptr == '\r') {
216 FIXME("newline inside date literal\n");
217 return 0;
220 if(*ctx->ptr == '#')
221 break;
222 ctx->ptr++;
225 if(ctx->ptr == ctx->end) {
226 FIXME("unterminated date literal\n");
227 return 0;
230 len += ctx->ptr-ptr;
232 rptr = malloc((len+1)*sizeof(WCHAR));
233 if(!rptr)
234 return 0;
236 memcpy( rptr, ptr, len * sizeof(WCHAR));
237 rptr[len] = 0;
238 res = VarDateFromStr(rptr, ctx->lcid, 0, ret);
239 free(rptr);
240 if (FAILED(res)) {
241 FIXME("Invalid date literal\n");
242 return 0;
245 ctx->ptr++;
246 return tDate;
249 static int parse_numeric_literal(parser_ctx_t *ctx, void **ret)
251 BOOL use_int = TRUE;
252 LONGLONG d = 0, hlp;
253 int exp = 0;
254 double r;
256 if(*ctx->ptr == '0' && !('0' <= ctx->ptr[1] && ctx->ptr[1] <= '9') && ctx->ptr[1] != '.')
257 return *ctx->ptr++;
259 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr)) {
260 hlp = d*10 + *(ctx->ptr++) - '0';
261 if(d>MAXLONGLONG/10 || hlp<0) {
262 exp++;
263 break;
265 else
266 d = hlp;
268 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr)) {
269 exp++;
270 ctx->ptr++;
273 if(*ctx->ptr == '.') {
274 use_int = FALSE;
275 ctx->ptr++;
277 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr)) {
278 hlp = d*10 + *(ctx->ptr++) - '0';
279 if(d>MAXLONGLONG/10 || hlp<0)
280 break;
282 d = hlp;
283 exp--;
285 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr))
286 ctx->ptr++;
289 if(*ctx->ptr == 'e' || *ctx->ptr == 'E') {
290 int e = 0, sign = 1;
292 ctx->ptr++;
293 if(*ctx->ptr == '-') {
294 ctx->ptr++;
295 sign = -1;
296 }else if(*ctx->ptr == '+') {
297 ctx->ptr++;
300 if(!is_digit(*ctx->ptr)) {
301 FIXME("Invalid numeric literal\n");
302 return 0;
305 use_int = FALSE;
307 do {
308 e = e*10 + *(ctx->ptr++) - '0';
309 if(sign == -1 && -e+exp < -(INT_MAX/100)) {
310 /* The literal will be rounded to 0 anyway. */
311 while(is_digit(*ctx->ptr))
312 ctx->ptr++;
313 *(double*)ret = 0;
314 return tDouble;
317 if(sign*e + exp > INT_MAX/100) {
318 FIXME("Invalid numeric literal\n");
319 return 0;
321 } while(is_digit(*ctx->ptr));
323 exp += sign*e;
326 if(use_int && (LONG)d == d) {
327 *(LONG*)ret = d;
328 return tInt;
331 r = exp>=0 ? d*pow(10, exp) : d/pow(10, -exp);
332 if(isinf(r)) {
333 FIXME("Invalid numeric literal\n");
334 return 0;
337 *(double*)ret = r;
338 return tDouble;
341 static int hex_to_int(WCHAR c)
343 if('0' <= c && c <= '9')
344 return c-'0';
345 if('a' <= c && c <= 'f')
346 return c+10-'a';
347 if('A' <= c && c <= 'F')
348 return c+10-'A';
349 return -1;
352 static int parse_hex_literal(parser_ctx_t *ctx, LONG *ret)
354 const WCHAR *begin = ctx->ptr;
355 unsigned l = 0, d;
357 while((d = hex_to_int(*++ctx->ptr)) != -1)
358 l = l*16 + d;
360 if(begin + 9 /* max digits+1 */ < ctx->ptr) {
361 FIXME("invalid literal\n");
362 return 0;
365 if(*ctx->ptr == '&') {
366 ctx->ptr++;
367 *ret = l;
368 }else {
369 *ret = l == (UINT16)l ? (INT16)l : l;
371 return tInt;
374 static void skip_spaces(parser_ctx_t *ctx)
376 while(*ctx->ptr == ' ' || *ctx->ptr == '\t')
377 ctx->ptr++;
380 static int comment_line(parser_ctx_t *ctx)
382 ctx->ptr = wcspbrk(ctx->ptr, L"\n\r");
383 if(ctx->ptr)
384 ctx->ptr++;
385 else
386 ctx->ptr = ctx->end;
387 return tNL;
390 static int parse_next_token(void *lval, unsigned *loc, parser_ctx_t *ctx)
392 WCHAR c;
394 skip_spaces(ctx);
395 *loc = ctx->ptr - ctx->code;
396 if(ctx->ptr == ctx->end)
397 return ctx->last_token == tNL ? 0 : tNL;
399 c = *ctx->ptr;
401 if('0' <= c && c <= '9')
402 return parse_numeric_literal(ctx, lval);
404 if(iswalpha(c)) {
405 int ret = 0;
406 if(ctx->last_token != '.' && ctx->last_token != tDOT)
407 ret = check_keywords(ctx, lval);
408 if(!ret)
409 return parse_identifier(ctx, lval);
410 if(ret != tREM)
411 return ret;
412 c = '\'';
415 switch(c) {
416 case '\n':
417 case '\r':
418 ctx->ptr++;
419 return tNL;
420 case '\'':
421 return comment_line(ctx);
422 case ':':
423 case ')':
424 case ',':
425 case '+':
426 case '*':
427 case '/':
428 case '^':
429 case '\\':
430 case '_':
431 return *ctx->ptr++;
432 case '.':
434 * We need to distinguish between '.' used as part of a member expression and
435 * a beginning of a dot expression (a member expression accessing with statement
436 * expression) and a floating point number like ".2" .
438 c = ctx->ptr > ctx->code ? ctx->ptr[-1] : '\n';
439 if (is_identifier_char(c) || c == ')') {
440 ctx->ptr++;
441 return '.';
443 c = ctx->ptr[1];
444 if('0' <= c && c <= '9')
445 return parse_numeric_literal(ctx, lval);
446 ctx->ptr++;
447 return tDOT;
448 case '-':
449 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '>')
450 return comment_line(ctx);
451 ctx->ptr++;
452 return '-';
453 case '(':
454 /* NOTE:
455 * We resolve empty brackets in lexer instead of parser to avoid complex conflicts
456 * in call statement special case |f()| without 'call' keyword
458 ctx->ptr++;
459 skip_spaces(ctx);
460 if(*ctx->ptr == ')') {
461 ctx->ptr++;
462 return tEMPTYBRACKETS;
465 * Parser can't predict if bracket is part of argument expression or an argument
466 * in call expression. We predict it here instead.
468 if(ctx->last_token == tIdentifier || ctx->last_token == ')')
469 return '(';
470 return tEXPRLBRACKET;
471 case '"':
472 return parse_string_literal(ctx, lval);
473 case '#':
474 return parse_date_literal(ctx, lval);
475 case '&':
476 if((*++ctx->ptr == 'h' || *ctx->ptr == 'H') && hex_to_int(ctx->ptr[1]) != -1)
477 return parse_hex_literal(ctx, lval);
478 return '&';
479 case '=':
480 switch(*++ctx->ptr) {
481 case '<':
482 ctx->ptr++;
483 return tLTEQ;
484 case '>':
485 ctx->ptr++;
486 return tGTEQ;
488 return '=';
489 case '<':
490 switch(*++ctx->ptr) {
491 case '>':
492 ctx->ptr++;
493 return tNEQ;
494 case '=':
495 ctx->ptr++;
496 return tLTEQ;
497 case '!':
498 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '-')
499 return comment_line(ctx);
501 return '<';
502 case '>':
503 switch(*++ctx->ptr) {
504 case '=':
505 ctx->ptr++;
506 return tGTEQ;
507 case '<':
508 ctx->ptr++;
509 return tNEQ;
511 return '>';
512 default:
513 FIXME("Unhandled char %c in %s\n", *ctx->ptr, debugstr_w(ctx->ptr));
516 return 0;
519 int parser_lex(void *lval, unsigned *loc, parser_ctx_t *ctx)
521 int ret;
523 if (ctx->last_token == tEXPRESSION)
525 ctx->last_token = tNL;
526 return tEXPRESSION;
529 while(1) {
530 ret = parse_next_token(lval, loc, ctx);
531 if(ret == '_') {
532 skip_spaces(ctx);
533 if(*ctx->ptr != '\n' && *ctx->ptr != '\r') {
534 FIXME("'_' not followed by newline\n");
535 return 0;
537 if(*ctx->ptr == '\r')
538 ctx->ptr++;
539 if(*ctx->ptr == '\n')
540 ctx->ptr++;
541 continue;
543 if(ret != tNL || ctx->last_token != tNL)
544 break;
546 ctx->last_nl = ctx->ptr-ctx->code;
549 return (ctx->last_token = ret);