riched20: Merge the richole object with the text services object.
[wine.git] / dlls / vbscript / lex.c
blob357cad7158c03a58c57410412a264f73b3691684
1 /*
2 * Copyright 2011 Jacek Caban for CodeWeavers
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 #include <assert.h>
20 #include <limits.h>
21 #include <math.h>
23 #include "vbscript.h"
24 #include "parse.h"
25 #include "parser.tab.h"
27 #include "wine/debug.h"
29 WINE_DEFAULT_DEBUG_CHANNEL(vbscript);
31 static const struct {
32 const WCHAR *word;
33 int token;
34 } keywords[] = {
35 {L"and", tAND},
36 {L"byref", tBYREF},
37 {L"byval", tBYVAL},
38 {L"call", tCALL},
39 {L"case", tCASE},
40 {L"class", tCLASS},
41 {L"const", tCONST},
42 {L"default", tDEFAULT},
43 {L"dim", tDIM},
44 {L"do", tDO},
45 {L"each", tEACH},
46 {L"else", tELSE},
47 {L"elseif", tELSEIF},
48 {L"empty", tEMPTY},
49 {L"end", tEND},
50 {L"eqv", tEQV},
51 {L"error", tERROR},
52 {L"exit", tEXIT},
53 {L"explicit", tEXPLICIT},
54 {L"false", tFALSE},
55 {L"for", tFOR},
56 {L"function", tFUNCTION},
57 {L"get", tGET},
58 {L"goto", tGOTO},
59 {L"if", tIF},
60 {L"imp", tIMP},
61 {L"in", tIN},
62 {L"is", tIS},
63 {L"let", tLET},
64 {L"loop", tLOOP},
65 {L"me", tME},
66 {L"mod", tMOD},
67 {L"new", tNEW},
68 {L"next", tNEXT},
69 {L"not", tNOT},
70 {L"nothing", tNOTHING},
71 {L"null", tNULL},
72 {L"on", tON},
73 {L"option", tOPTION},
74 {L"or", tOR},
75 {L"preserve", tPRESERVE},
76 {L"private", tPRIVATE},
77 {L"property", tPROPERTY},
78 {L"public", tPUBLIC},
79 {L"redim", tREDIM},
80 {L"rem", tREM},
81 {L"resume", tRESUME},
82 {L"select", tSELECT},
83 {L"set", tSET},
84 {L"step", tSTEP},
85 {L"stop", tSTOP},
86 {L"sub", tSUB},
87 {L"then", tTHEN},
88 {L"to", tTO},
89 {L"true", tTRUE},
90 {L"until", tUNTIL},
91 {L"wend", tWEND},
92 {L"while", tWHILE},
93 {L"with", tWITH},
94 {L"xor", tXOR}
97 static inline BOOL is_identifier_char(WCHAR c)
99 return iswalnum(c) || c == '_';
102 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word, const WCHAR **lval)
104 const WCHAR *p1 = ctx->ptr;
105 const WCHAR *p2 = word;
106 WCHAR c;
108 while(p1 < ctx->end && *p2) {
109 c = towlower(*p1);
110 if(c != *p2)
111 return c - *p2;
112 p1++;
113 p2++;
116 if(*p2 || (p1 < ctx->end && is_identifier_char(*p1)))
117 return 1;
119 ctx->ptr = p1;
120 *lval = word;
121 return 0;
124 static int check_keywords(parser_ctx_t *ctx, const WCHAR **lval)
126 int min = 0, max = ARRAY_SIZE(keywords)-1, r, i;
128 while(min <= max) {
129 i = (min+max)/2;
131 r = check_keyword(ctx, keywords[i].word, lval);
132 if(!r)
133 return keywords[i].token;
135 if(r > 0)
136 min = i+1;
137 else
138 max = i-1;
141 return 0;
144 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret)
146 const WCHAR *ptr = ctx->ptr++;
147 WCHAR *str;
148 int len;
150 while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr))
151 ctx->ptr++;
152 len = ctx->ptr-ptr;
154 str = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
155 if(!str)
156 return 0;
158 memcpy(str, ptr, (len+1)*sizeof(WCHAR));
159 str[len] = 0;
160 *ret = str;
161 return tIdentifier;
164 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret)
166 const WCHAR *ptr = ++ctx->ptr;
167 WCHAR *rptr;
168 int len = 0;
170 while(ctx->ptr < ctx->end) {
171 if(*ctx->ptr == '\n' || *ctx->ptr == '\r') {
172 FIXME("newline inside string literal\n");
173 return 0;
176 if(*ctx->ptr == '"') {
177 if(ctx->ptr[1] != '"')
178 break;
179 len--;
180 ctx->ptr++;
182 ctx->ptr++;
185 if(ctx->ptr == ctx->end) {
186 FIXME("unterminated string literal\n");
187 return 0;
190 len += ctx->ptr-ptr;
192 *ret = rptr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
193 if(!rptr)
194 return 0;
196 while(ptr < ctx->ptr) {
197 if(*ptr == '"')
198 ptr++;
199 *rptr++ = *ptr++;
202 *rptr = 0;
203 ctx->ptr++;
204 return tString;
207 static int parse_numeric_literal(parser_ctx_t *ctx, void **ret)
209 BOOL use_int = TRUE;
210 LONGLONG d = 0, hlp;
211 int exp = 0;
212 double r;
214 if(*ctx->ptr == '0' && !('0' <= ctx->ptr[1] && ctx->ptr[1] <= '9') && ctx->ptr[1] != '.')
215 return *ctx->ptr++;
217 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr)) {
218 hlp = d*10 + *(ctx->ptr++) - '0';
219 if(d>MAXLONGLONG/10 || hlp<0) {
220 exp++;
221 break;
223 else
224 d = hlp;
226 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr)) {
227 exp++;
228 ctx->ptr++;
231 if(*ctx->ptr == '.') {
232 use_int = FALSE;
233 ctx->ptr++;
235 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr)) {
236 hlp = d*10 + *(ctx->ptr++) - '0';
237 if(d>MAXLONGLONG/10 || hlp<0)
238 break;
240 d = hlp;
241 exp--;
243 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr))
244 ctx->ptr++;
247 if(*ctx->ptr == 'e' || *ctx->ptr == 'E') {
248 int e = 0, sign = 1;
250 ctx->ptr++;
251 if(*ctx->ptr == '-') {
252 ctx->ptr++;
253 sign = -1;
254 }else if(*ctx->ptr == '+') {
255 ctx->ptr++;
258 if(!is_digit(*ctx->ptr)) {
259 FIXME("Invalid numeric literal\n");
260 return 0;
263 use_int = FALSE;
265 do {
266 e = e*10 + *(ctx->ptr++) - '0';
267 if(sign == -1 && -e+exp < -(INT_MAX/100)) {
268 /* The literal will be rounded to 0 anyway. */
269 while(is_digit(*ctx->ptr))
270 ctx->ptr++;
271 *(double*)ret = 0;
272 return tDouble;
275 if(sign*e + exp > INT_MAX/100) {
276 FIXME("Invalid numeric literal\n");
277 return 0;
279 } while(is_digit(*ctx->ptr));
281 exp += sign*e;
284 if(use_int && (LONG)d == d) {
285 *(LONG*)ret = d;
286 return tInt;
289 r = exp>=0 ? d*pow(10, exp) : d/pow(10, -exp);
290 if(isinf(r)) {
291 FIXME("Invalid numeric literal\n");
292 return 0;
295 *(double*)ret = r;
296 return tDouble;
299 static int hex_to_int(WCHAR c)
301 if('0' <= c && c <= '9')
302 return c-'0';
303 if('a' <= c && c <= 'f')
304 return c+10-'a';
305 if('A' <= c && c <= 'F')
306 return c+10-'A';
307 return -1;
310 static int parse_hex_literal(parser_ctx_t *ctx, LONG *ret)
312 const WCHAR *begin = ctx->ptr;
313 unsigned l = 0, d;
315 while((d = hex_to_int(*++ctx->ptr)) != -1)
316 l = l*16 + d;
318 if(begin + 9 /* max digits+1 */ < ctx->ptr) {
319 FIXME("invalid literal\n");
320 return 0;
323 if(*ctx->ptr == '&') {
324 ctx->ptr++;
325 *ret = l;
326 }else {
327 *ret = l == (UINT16)l ? (INT16)l : l;
329 return tInt;
332 static void skip_spaces(parser_ctx_t *ctx)
334 while(*ctx->ptr == ' ' || *ctx->ptr == '\t')
335 ctx->ptr++;
338 static int comment_line(parser_ctx_t *ctx)
340 ctx->ptr = wcspbrk(ctx->ptr, L"\n\r");
341 if(ctx->ptr)
342 ctx->ptr++;
343 else
344 ctx->ptr = ctx->end;
345 return tNL;
348 static int parse_next_token(void *lval, unsigned *loc, parser_ctx_t *ctx)
350 WCHAR c;
352 skip_spaces(ctx);
353 *loc = ctx->ptr - ctx->code;
354 if(ctx->ptr == ctx->end)
355 return ctx->last_token == tNL ? 0 : tNL;
357 c = *ctx->ptr;
359 if('0' <= c && c <= '9')
360 return parse_numeric_literal(ctx, lval);
362 if(iswalpha(c)) {
363 int ret = 0;
364 if(ctx->last_token != '.' && ctx->last_token != tDOT)
365 ret = check_keywords(ctx, lval);
366 if(!ret)
367 return parse_identifier(ctx, lval);
368 if(ret != tREM)
369 return ret;
370 c = '\'';
373 switch(c) {
374 case '\n':
375 case '\r':
376 ctx->ptr++;
377 return tNL;
378 case '\'':
379 return comment_line(ctx);
380 case ':':
381 case ')':
382 case ',':
383 case '=':
384 case '+':
385 case '*':
386 case '/':
387 case '^':
388 case '\\':
389 case '_':
390 return *ctx->ptr++;
391 case '.':
393 * We need to distinguish between '.' used as part of a member expression and
394 * a beginning of a dot expression (a member expression accessing with statement
395 * expression) and a floating point number like ".2" .
397 c = ctx->ptr > ctx->code ? ctx->ptr[-1] : '\n';
398 if (is_identifier_char(c) || c == ')') {
399 ctx->ptr++;
400 return '.';
402 c = ctx->ptr[1];
403 if('0' <= c && c <= '9')
404 return parse_numeric_literal(ctx, lval);
405 ctx->ptr++;
406 return tDOT;
407 case '-':
408 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '>')
409 return comment_line(ctx);
410 ctx->ptr++;
411 return '-';
412 case '(':
413 /* NOTE:
414 * We resolve empty brackets in lexer instead of parser to avoid complex conflicts
415 * in call statement special case |f()| without 'call' keyword
417 ctx->ptr++;
418 skip_spaces(ctx);
419 if(*ctx->ptr == ')') {
420 ctx->ptr++;
421 return tEMPTYBRACKETS;
424 * Parser can't predict if bracket is part of argument expression or an argument
425 * in call expression. We predict it here instead.
427 if(ctx->last_token == tIdentifier || ctx->last_token == ')')
428 return '(';
429 return tEXPRLBRACKET;
430 case '"':
431 return parse_string_literal(ctx, lval);
432 case '&':
433 if(*++ctx->ptr == 'h' || *ctx->ptr == 'H')
434 return parse_hex_literal(ctx, lval);
435 return '&';
436 case '<':
437 switch(*++ctx->ptr) {
438 case '>':
439 ctx->ptr++;
440 return tNEQ;
441 case '=':
442 ctx->ptr++;
443 return tLTEQ;
444 case '!':
445 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '-')
446 return comment_line(ctx);
448 return '<';
449 case '>':
450 if(*++ctx->ptr == '=') {
451 ctx->ptr++;
452 return tGTEQ;
454 return '>';
455 default:
456 FIXME("Unhandled char %c in %s\n", *ctx->ptr, debugstr_w(ctx->ptr));
459 return 0;
462 int parser_lex(void *lval, unsigned *loc, parser_ctx_t *ctx)
464 int ret;
466 if (ctx->last_token == tEXPRESSION)
468 ctx->last_token = tNL;
469 return tEXPRESSION;
472 while(1) {
473 ret = parse_next_token(lval, loc, ctx);
474 if(ret == '_') {
475 skip_spaces(ctx);
476 if(*ctx->ptr != '\n' && *ctx->ptr != '\r') {
477 FIXME("'_' not followed by newline\n");
478 return 0;
480 if(*ctx->ptr == '\r')
481 ctx->ptr++;
482 if(*ctx->ptr == '\n')
483 ctx->ptr++;
484 continue;
486 if(ret != tNL || ctx->last_token != tNL)
487 break;
489 ctx->last_nl = ctx->ptr-ctx->code;
492 return (ctx->last_token = ret);