dpnet/tests: Add a trailing '\n' to some ok() calls.
[wine.git] / dlls / vbscript / lex.c
blob98b4cbb842cd7a16b10f7ff53522fd7e175ab8e7
1 /*
2 * Copyright 2011 Jacek Caban for CodeWeavers
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 #include "config.h"
20 #include "wine/port.h"
22 #include <assert.h>
23 #include <limits.h>
25 #include "vbscript.h"
26 #include "parse.h"
27 #include "parser.tab.h"
29 #include "wine/debug.h"
31 WINE_DEFAULT_DEBUG_CHANNEL(vbscript);
33 static const WCHAR andW[] = {'a','n','d',0};
34 static const WCHAR byrefW[] = {'b','y','r','e','f',0};
35 static const WCHAR byvalW[] = {'b','y','v','a','l',0};
36 static const WCHAR callW[] = {'c','a','l','l',0};
37 static const WCHAR caseW[] = {'c','a','s','e',0};
38 static const WCHAR classW[] = {'c','l','a','s','s',0};
39 static const WCHAR constW[] = {'c','o','n','s','t',0};
40 static const WCHAR defaultW[] = {'d','e','f','a','u','l','t',0};
41 static const WCHAR dimW[] = {'d','i','m',0};
42 static const WCHAR doW[] = {'d','o',0};
43 static const WCHAR eachW[] = {'e','a','c','h',0};
44 static const WCHAR elseW[] = {'e','l','s','e',0};
45 static const WCHAR elseifW[] = {'e','l','s','e','i','f',0};
46 static const WCHAR emptyW[] = {'e','m','p','t','y',0};
47 static const WCHAR endW[] = {'e','n','d',0};
48 static const WCHAR eqvW[] = {'e','q','v',0};
49 static const WCHAR errorW[] = {'e','r','r','o','r',0};
50 static const WCHAR exitW[] = {'e','x','i','t',0};
51 static const WCHAR explicitW[] = {'e','x','p','l','i','c','i','t',0};
52 static const WCHAR falseW[] = {'f','a','l','s','e',0};
53 static const WCHAR forW[] = {'f','o','r',0};
54 static const WCHAR functionW[] = {'f','u','n','c','t','i','o','n',0};
55 static const WCHAR getW[] = {'g','e','t',0};
56 static const WCHAR gotoW[] = {'g','o','t','o',0};
57 static const WCHAR ifW[] = {'i','f',0};
58 static const WCHAR impW[] = {'i','m','p',0};
59 static const WCHAR inW[] = {'i','n',0};
60 static const WCHAR isW[] = {'i','s',0};
61 static const WCHAR letW[] = {'l','e','t',0};
62 static const WCHAR loopW[] = {'l','o','o','p',0};
63 static const WCHAR meW[] = {'m','e',0};
64 static const WCHAR modW[] = {'m','o','d',0};
65 static const WCHAR newW[] = {'n','e','w',0};
66 static const WCHAR nextW[] = {'n','e','x','t',0};
67 static const WCHAR notW[] = {'n','o','t',0};
68 static const WCHAR nothingW[] = {'n','o','t','h','i','n','g',0};
69 static const WCHAR nullW[] = {'n','u','l','l',0};
70 static const WCHAR onW[] = {'o','n',0};
71 static const WCHAR optionW[] = {'o','p','t','i','o','n',0};
72 static const WCHAR orW[] = {'o','r',0};
73 static const WCHAR privateW[] = {'p','r','i','v','a','t','e',0};
74 static const WCHAR propertyW[] = {'p','r','o','p','e','r','t','y',0};
75 static const WCHAR publicW[] = {'p','u','b','l','i','c',0};
76 static const WCHAR remW[] = {'r','e','m',0};
77 static const WCHAR resumeW[] = {'r','e','s','u','m','e',0};
78 static const WCHAR selectW[] = {'s','e','l','e','c','t',0};
79 static const WCHAR setW[] = {'s','e','t',0};
80 static const WCHAR stepW[] = {'s','t','e','p',0};
81 static const WCHAR stopW[] = {'s','t','o','p',0};
82 static const WCHAR subW[] = {'s','u','b',0};
83 static const WCHAR thenW[] = {'t','h','e','n',0};
84 static const WCHAR toW[] = {'t','o',0};
85 static const WCHAR trueW[] = {'t','r','u','e',0};
86 static const WCHAR untilW[] = {'u','n','t','i','l',0};
87 static const WCHAR wendW[] = {'w','e','n','d',0};
88 static const WCHAR whileW[] = {'w','h','i','l','e',0};
89 static const WCHAR xorW[] = {'x','o','r',0};
91 static const struct {
92 const WCHAR *word;
93 int token;
94 } keywords[] = {
95 {andW, tAND},
96 {byrefW, tBYREF},
97 {byvalW, tBYVAL},
98 {callW, tCALL},
99 {caseW, tCASE},
100 {classW, tCLASS},
101 {constW, tCONST},
102 {defaultW, tDEFAULT},
103 {dimW, tDIM},
104 {doW, tDO},
105 {eachW, tEACH},
106 {elseW, tELSE},
107 {elseifW, tELSEIF},
108 {emptyW, tEMPTY},
109 {endW, tEND},
110 {eqvW, tEQV},
111 {errorW, tERROR},
112 {exitW, tEXIT},
113 {explicitW, tEXPLICIT},
114 {falseW, tFALSE},
115 {forW, tFOR},
116 {functionW, tFUNCTION},
117 {getW, tGET},
118 {gotoW, tGOTO},
119 {ifW, tIF},
120 {impW, tIMP},
121 {inW, tIN},
122 {isW, tIS},
123 {letW, tLET},
124 {loopW, tLOOP},
125 {meW, tME},
126 {modW, tMOD},
127 {newW, tNEW},
128 {nextW, tNEXT},
129 {notW, tNOT},
130 {nothingW, tNOTHING},
131 {nullW, tNULL},
132 {onW, tON},
133 {optionW, tOPTION},
134 {orW, tOR},
135 {privateW, tPRIVATE},
136 {propertyW, tPROPERTY},
137 {publicW, tPUBLIC},
138 {remW, tREM},
139 {resumeW, tRESUME},
140 {selectW, tSELECT},
141 {setW, tSET},
142 {stepW, tSTEP},
143 {stopW, tSTOP},
144 {subW, tSUB},
145 {thenW, tTHEN},
146 {toW, tTO},
147 {trueW, tTRUE},
148 {untilW, tUNTIL},
149 {wendW, tWEND},
150 {whileW, tWHILE},
151 {xorW, tXOR}
154 static inline BOOL is_identifier_char(WCHAR c)
156 return isalnumW(c) || c == '_';
159 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word)
161 const WCHAR *p1 = ctx->ptr;
162 const WCHAR *p2 = word;
163 WCHAR c;
165 while(p1 < ctx->end && *p2) {
166 c = tolowerW(*p1);
167 if(c != *p2)
168 return c - *p2;
169 p1++;
170 p2++;
173 if(*p2 || (p1 < ctx->end && is_identifier_char(*p1)))
174 return 1;
176 ctx->ptr = p1;
177 return 0;
180 static int check_keywords(parser_ctx_t *ctx)
182 int min = 0, max = sizeof(keywords)/sizeof(keywords[0])-1, r, i;
184 while(min <= max) {
185 i = (min+max)/2;
187 r = check_keyword(ctx, keywords[i].word);
188 if(!r)
189 return keywords[i].token;
191 if(r > 0)
192 min = i+1;
193 else
194 max = i-1;
197 return 0;
200 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret)
202 const WCHAR *ptr = ctx->ptr++;
203 WCHAR *str;
204 int len;
206 while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr))
207 ctx->ptr++;
208 len = ctx->ptr-ptr;
210 str = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
211 if(!str)
212 return 0;
214 memcpy(str, ptr, (len+1)*sizeof(WCHAR));
215 str[len] = 0;
216 *ret = str;
217 return tIdentifier;
220 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret)
222 const WCHAR *ptr = ++ctx->ptr;
223 WCHAR *rptr;
224 int len = 0;
226 while(ctx->ptr < ctx->end) {
227 if(*ctx->ptr == '\n') {
228 FIXME("newline inside string literal\n");
229 return 0;
232 if(*ctx->ptr == '"') {
233 if(ctx->ptr[1] != '"')
234 break;
235 len--;
236 ctx->ptr++;
238 ctx->ptr++;
241 if(ctx->ptr == ctx->end) {
242 FIXME("unterminated string literal\n");
243 return 0;
246 len += ctx->ptr-ptr;
248 *ret = rptr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
249 if(!rptr)
250 return 0;
252 while(ptr < ctx->ptr) {
253 if(*ptr == '"')
254 ptr++;
255 *rptr++ = *ptr++;
258 *rptr = 0;
259 ctx->ptr++;
260 return tString;
263 static int parse_numeric_literal(parser_ctx_t *ctx, void **ret)
265 BOOL use_int = TRUE;
266 LONGLONG d = 0, hlp;
267 int exp = 0;
268 double r;
270 if(*ctx->ptr == '0' && !('0' <= ctx->ptr[1] && ctx->ptr[1] <= '9') && ctx->ptr[1] != '.')
271 return *ctx->ptr++;
273 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) {
274 hlp = d*10 + *(ctx->ptr++) - '0';
275 if(d>MAXLONGLONG/10 || hlp<0) {
276 exp++;
277 break;
279 else
280 d = hlp;
282 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) {
283 exp++;
284 ctx->ptr++;
287 if(*ctx->ptr == '.') {
288 use_int = FALSE;
289 ctx->ptr++;
291 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) {
292 hlp = d*10 + *(ctx->ptr++) - '0';
293 if(d>MAXLONGLONG/10 || hlp<0)
294 break;
296 d = hlp;
297 exp--;
299 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr))
300 ctx->ptr++;
303 if(*ctx->ptr == 'e' || *ctx->ptr == 'E') {
304 int e = 0, sign = 1;
306 if(*++ctx->ptr == '-') {
307 ctx->ptr++;
308 sign = -1;
311 if(!isdigitW(*ctx->ptr)) {
312 FIXME("Invalid numeric literal\n");
313 return 0;
316 use_int = FALSE;
318 do {
319 e = e*10 + *(ctx->ptr++) - '0';
320 if(sign == -1 && -e+exp < -(INT_MAX/100)) {
321 /* The literal will be rounded to 0 anyway. */
322 while(isdigitW(*ctx->ptr))
323 ctx->ptr++;
324 *(double*)ret = 0;
325 return tDouble;
328 if(sign*e + exp > INT_MAX/100) {
329 FIXME("Invalid numeric literal\n");
330 return 0;
332 } while(isdigitW(*ctx->ptr));
334 exp += sign*e;
337 if(use_int && (LONG)d == d) {
338 LONG l = d;
339 *(LONG*)ret = l;
340 return (short)l == l ? tShort : tLong;
343 r = exp>=0 ? d*pow(10, exp) : d/pow(10, -exp);
344 if(isinf(r)) {
345 FIXME("Invalid numeric literal\n");
346 return 0;
349 *(double*)ret = r;
350 return tDouble;
353 static int hex_to_int(WCHAR c)
355 if('0' <= c && c <= '9')
356 return c-'0';
357 if('a' <= c && c <= 'f')
358 return c+10-'a';
359 if('A' <= c && c <= 'F')
360 return c+10-'A';
361 return -1;
364 static int parse_hex_literal(parser_ctx_t *ctx, LONG *ret)
366 const WCHAR *begin = ctx->ptr;
367 LONG l = 0, d;
369 while((d = hex_to_int(*++ctx->ptr)) != -1)
370 l = l*16 + d;
372 if(begin + 9 /* max digits+1 */ < ctx->ptr || (*ctx->ptr != '&' && is_identifier_char(*ctx->ptr))) {
373 FIXME("invalid literal\n");
374 return 0;
377 if(*ctx->ptr == '&')
378 ctx->ptr++;
380 *ret = l;
381 return (short)l == l ? tShort : tLong;
384 static void skip_spaces(parser_ctx_t *ctx)
386 while(*ctx->ptr == ' ' || *ctx->ptr == '\t' || *ctx->ptr == '\r')
387 ctx->ptr++;
390 static int comment_line(parser_ctx_t *ctx)
392 ctx->ptr = strchrW(ctx->ptr, '\n');
393 if(ctx->ptr)
394 ctx->ptr++;
395 else
396 ctx->ptr = ctx->end;
397 return tNL;
400 static int parse_next_token(void *lval, parser_ctx_t *ctx)
402 WCHAR c;
404 skip_spaces(ctx);
405 if(ctx->ptr == ctx->end)
406 return ctx->last_token == tNL ? tEOF : tNL;
408 c = *ctx->ptr;
410 if('0' <= c && c <= '9')
411 return parse_numeric_literal(ctx, lval);
413 if(isalphaW(c)) {
414 int ret = check_keywords(ctx);
415 if(!ret)
416 return parse_identifier(ctx, lval);
417 if(ret != tREM)
418 return ret;
419 c = '\'';
422 switch(c) {
423 case '\n':
424 ctx->ptr++;
425 return tNL;
426 case '\'':
427 return comment_line(ctx);
428 case ':':
429 case ')':
430 case ',':
431 case '=':
432 case '+':
433 case '*':
434 case '/':
435 case '^':
436 case '\\':
437 case '.':
438 case '_':
439 return *ctx->ptr++;
440 case '-':
441 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '>')
442 return comment_line(ctx);
443 ctx->ptr++;
444 return '-';
445 case '(':
446 /* NOTE:
447 * We resolve empty brackets in lexer instead of parser to avoid complex conflicts
448 * in call statement special case |f()| without 'call' keyword
450 ctx->ptr++;
451 skip_spaces(ctx);
452 if(*ctx->ptr == ')') {
453 ctx->ptr++;
454 return tEMPTYBRACKETS;
456 return '(';
457 case '"':
458 return parse_string_literal(ctx, lval);
459 case '&':
460 if(*++ctx->ptr == 'h' || *ctx->ptr == 'H')
461 return parse_hex_literal(ctx, lval);
462 return '&';
463 case '<':
464 switch(*++ctx->ptr) {
465 case '>':
466 ctx->ptr++;
467 return tNEQ;
468 case '=':
469 ctx->ptr++;
470 return tLTEQ;
471 case '!':
472 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '-')
473 return comment_line(ctx);
475 return '<';
476 case '>':
477 if(*++ctx->ptr == '=') {
478 ctx->ptr++;
479 return tGTEQ;
481 return '>';
482 default:
483 FIXME("Unhandled char %c in %s\n", *ctx->ptr, debugstr_w(ctx->ptr));
486 return 0;
489 int parser_lex(void *lval, parser_ctx_t *ctx)
491 int ret;
493 while(1) {
494 ret = parse_next_token(lval, ctx);
495 if(ret == '_') {
496 skip_spaces(ctx);
497 if(*ctx->ptr != '\n') {
498 FIXME("'_' not followed by newline\n");
499 return 0;
501 ctx->ptr++;
502 continue;
504 if(ret != tNL || ctx->last_token != tNL)
505 break;
507 ctx->last_nl = ctx->ptr-ctx->code;
510 return (ctx->last_token = ret);