(DISTFILES): Comment out a few missing files.
[mono-project.git] / mcs / class / Microsoft.JScript / Microsoft.JScript / TokenStream.cs
blobfa2c302e7dd07ca608b43c4d0e67eb962e25cc96
1 //
2 // TokenStream.cs: Port of Mozilla's Rhino TokenStream
3 // This class implements the JScript scanner
4 //
5 // Author:
6 // Cesar Lopez Nataren (cesar@ciencias.unam.mx)
7 //
8 // (C) 2004, Cesar Lopez Nataren
9 //
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 using System;
33 using System.IO;
34 using System.Collections;
35 using System.Globalization;
37 namespace Microsoft.JScript {
38 public class TokenStream {
41 // fields
43 string source_name;
44 public string SourceName {
45 get { return source_name; }
46 set { source_name = value; }
49 int line_number;
50 public int LineNumber {
51 get { return line_number; }
52 set { line_number = value; }
55 bool hit_eof;
56 public bool EOF {
57 get { return hit_eof; }
60 int token_number;
61 public int TokenNumber {
62 get { return token_number; }
63 set { token_number = value; }
66 int pushback_token;
68 // tokenize newlines
69 bool significant_eol;
71 int string_buffer_top;
72 char [] string_buffer = new char [128];
74 // Room backtrace from to < on failed match of the last - in <!--
75 int [] unget_buffer = new int [3];
76 int unget_cursor;
78 int line_start;
79 int line_end_char;
81 string source_string;
82 char [] source_buffer;
83 int source_end;
84 int source_cursor;
86 static int EOF_CHAR = -1;
87 static int EOL_HINT_MASK = 0xdfd0;
89 StreamReader source_reader;
91 bool dirty_line;
93 string _string;
94 public string GetString {
95 get { return _string; }
98 static bool reserved_keyword_as_identifier;
100 double number;
101 public double GetNumber {
102 get { return number; }
106 int op;
107 public int GetOp ()
109 return op;
112 internal bool allow_reg_exp;
114 internal string reg_exp_flags;
117 // methods
120 public TokenStream (StreamReader source_reader, string source_string, string source_name, int line_number)
122 pushback_token = Token.EOF;
123 SourceName = source_name;
124 this.line_number = line_number;
125 if (source_reader != null) {
126 if (source_string != null)
128 this.source_reader = source_reader;
129 source_buffer = new char [512];
130 source_end = 0;
131 } else {
132 if (source_string == null)
134 this.source_string = source_string;
135 source_end = source_string.Length;
137 source_cursor = 0;
140 static bool IsKeyword (string s)
142 return Token.EOF != StringToKeyword (s);
145 static int StringToKeyword (string name)
147 // The following assumes that Token.EOF == 0
149 Id_break = Token.BREAK,
150 Id_case = Token.CASE,
151 Id_continue = Token.CONTINUE,
152 Id_default = Token.DEFAULT,
153 Id_delete = Token.DELPROP,
154 Id_do = Token.DO,
155 Id_else = Token.ELSE,
156 Id_export = Token.EXPORT,
157 Id_false = Token.FALSE,
158 Id_for = Token.FOR,
159 Id_function = Token.FUNCTION,
160 Id_if = Token.IF,
161 Id_in = Token.IN,
162 Id_new = Token.NEW,
163 Id_null = Token.NULL,
164 Id_return = Token.RETURN,
165 Id_switch = Token.SWITCH,
166 Id_this = Token.THIS,
167 Id_true = Token.TRUE,
168 Id_typeof = Token.TYPEOF,
169 Id_var = Token.VAR,
170 Id_void = Token.VOID,
171 Id_while = Token.WHILE,
172 Id_with = Token.WITH,
174 // the following are #ifdef RESERVE_JAVA_KEYWORDS in jsscan.c
175 Id_abstract = Token.RESERVED,
176 Id_boolean = Token.RESERVED,
177 Id_byte = Token.RESERVED,
178 Id_catch = Token.CATCH,
179 Id_char = Token.RESERVED,
180 Id_class = Token.RESERVED,
181 Id_const = Token.RESERVED,
182 Id_debugger = Token.RESERVED,
183 Id_double = Token.RESERVED,
184 Id_enum = Token.RESERVED,
185 Id_extends = Token.RESERVED,
186 Id_final = Token.RESERVED,
187 Id_finally = Token.FINALLY,
188 Id_float = Token.RESERVED,
189 Id_goto = Token.RESERVED,
190 Id_implements = Token.RESERVED,
191 Id_import = Token.IMPORT,
192 Id_instanceof = Token.INSTANCEOF,
193 Id_int = Token.RESERVED,
194 Id_interface = Token.RESERVED,
195 Id_long = Token.RESERVED,
196 Id_native = Token.RESERVED,
197 Id_package = Token.RESERVED,
198 Id_private = Token.RESERVED,
199 Id_protected = Token.RESERVED,
200 Id_public = Token.RESERVED,
201 Id_short = Token.RESERVED,
202 Id_static = Token.RESERVED,
203 Id_super = Token.RESERVED,
204 Id_synchronized = Token.RESERVED,
205 Id_throw = Token.THROW,
206 Id_throws = Token.RESERVED,
207 Id_transient = Token.RESERVED,
208 Id_try = Token.TRY,
209 Id_volatile = Token.RESERVED;
211 int id;
212 string s = name;
214 L0: {
215 id = 0;
216 string X = String.Empty;
217 int c;
219 L: {
220 switch (s.Length) {
221 case 2: c = s [1];
222 if (c == 'f') {
223 if (s [0] == 'i') {
224 id = Id_if;
225 goto LEAVE_L0;
227 } else if (c == 'n') {
228 if (s [0] == 'i') {
229 id = Id_in;
230 goto LEAVE_L0;
232 } else if (c == 'o') {
233 if (s [0] == 'd') {
234 id = Id_do;
235 goto LEAVE_L0;
238 goto LEAVE_L;
239 case 3:
240 switch (s [0]) {
241 case 'f':
242 if (s [2] == 'r' && s [1] == 'o') {
243 id = Id_for;
244 goto LEAVE_L0;
246 goto LEAVE_L;
247 case 'i':
248 if (s [2] == 't' && s [1] == 'n') {
249 id = Id_int;
250 goto LEAVE_L0;
252 goto LEAVE_L;
253 case 'n':
254 if (s [2] == 'w' && s [1] == 'e') {
255 id = Id_new;
256 goto LEAVE_L0;
258 goto LEAVE_L;
259 case 't':
260 if (s [2] == 'y' && s [1] == 'r') {
261 id = Id_try;
262 goto LEAVE_L0;
264 goto LEAVE_L;
265 case 'v':
266 if (s [2] == 'r' && s [1] == 'a') {
267 id = Id_var;
268 goto LEAVE_L0;
270 goto LEAVE_L;
272 goto LEAVE_L;
273 case 4:
274 switch (s [0]) {
275 case 'b':
276 X = "byte";
277 id = Id_byte;
278 goto LEAVE_L;
279 case 'c':
280 c = s [3];
281 if (c == 'e') {
282 if (s [2] == 's' && s [1] == 'a') {
283 id = Id_case;
284 goto LEAVE_L0;
286 } else if (c == 'r') {
287 if (s [2] == 'a' && s [1] == 'h') {
288 id = Id_char;
289 goto LEAVE_L0;
292 goto LEAVE_L;
293 case 'e':
294 c = s [3];
295 if (c == 'e') {
296 if (s [2] == 's' && s [1] == 'l') {
297 id = Id_else;
298 goto LEAVE_L0;
300 } else if (c == 'm') {
301 if (s [2] == 'u' && s [1] == 'n') {
302 id = Id_enum;
303 goto LEAVE_L0;
306 goto LEAVE_L;
307 case 'g':
308 X = "goto";
309 id = Id_goto;
310 goto LEAVE_L;
311 case 'l':
312 X = "long";
313 id = Id_long;
314 goto LEAVE_L;
315 case 't':
316 c = s [3];
317 if (c == 'e') {
318 if (s [2] == 'u' && s [1] == 'r') {
319 id = Id_true;
320 goto LEAVE_L0;
322 } else if (c == 's') {
323 if (s [2] == 'i' && s [1] == 'h') {
324 id = Id_this;
325 goto LEAVE_L0;
328 goto LEAVE_L;
329 case 'v':
330 X = "void";
331 id = Id_void;
332 goto LEAVE_L;
333 case 'w':
334 X = "with";
335 id = Id_with;
336 goto LEAVE_L;
338 goto LEAVE_L;
339 case 5:
340 switch (s [2]) {
341 case 'a':
342 X = "class";
343 id = Id_class;
344 goto LEAVE_L;
345 case 'e':
346 X = "break";
347 id = Id_break;
348 goto LEAVE_L;
349 case 'i':
350 X = "while";
351 id = Id_while;
352 goto LEAVE_L;
353 case 'l':
354 X = "false";
355 id = Id_false;
356 goto LEAVE_L;
357 case 'n':
358 c = s [0];
359 if (c == 'c') {
360 X = "const";
361 id = Id_const;
362 } else if (c == 'f') {
363 X = "final";
364 id = Id_final;
366 goto LEAVE_L;
367 case 'o':
368 c = s [0];
369 if (c == 'f') {
370 X = "float";
371 id = Id_float;
372 } else if (c == 's') {
373 X = "short";
374 id = Id_short;
376 goto LEAVE_L;
377 case 'p':
378 X = "super";
379 id = Id_super;
380 goto LEAVE_L;
381 case 'r':
382 X = "throw";
383 id = Id_throw;
384 goto LEAVE_L;
385 case 't':
386 X = "catch";
387 id = Id_catch;
388 goto LEAVE_L;
390 goto LEAVE_L;
391 case 6:
392 switch (s [1]) {
393 case 'a':
394 X = "native";
395 id = Id_native;
396 goto LEAVE_L;
397 case 'e':
398 c = s [0];
399 if (c == 'd') {
400 X = "delete";
401 id = Id_delete;
402 } else if (c == 'r') {
403 X = "return";
404 id = Id_return;
406 goto LEAVE_L;
407 case 'h':
408 X = "throws";
409 id = Id_throws;
410 goto LEAVE_L;
411 case 'm':
412 X = "import";
413 id = Id_import;
414 goto LEAVE_L;
415 case 'o':
416 X = "double";
417 id = Id_double;
418 goto LEAVE_L;
419 case 't':
420 X = "static";
421 id = Id_static;
422 goto LEAVE_L;
423 case 'u':
424 X = "public";
425 id = Id_public;
426 goto LEAVE_L;
427 case 'w':
428 X = "switch";
429 id = Id_switch;
430 goto LEAVE_L;
431 case 'x':
432 X = "export";
433 id = Id_export;
434 goto LEAVE_L;
435 case 'y':
436 X = "typeof";
437 id = Id_typeof;
438 goto LEAVE_L;
440 goto LEAVE_L;
441 case 7:
442 switch (s [1]) {
443 case 'a':
444 X = "package";
445 id = Id_package;
446 goto LEAVE_L;
447 case 'e':
448 X = "default";
449 id = Id_default;
450 goto LEAVE_L;
451 case 'i':
452 X = "finally";
453 id = Id_finally;
454 goto LEAVE_L;
455 case 'o':
456 X = "boolean";
457 id = Id_boolean;
458 goto LEAVE_L;
459 case 'r':
460 X = "private";
461 id = Id_private;
462 goto LEAVE_L;
463 case 'x':
464 X = "extends";
465 id = Id_extends;
466 goto LEAVE_L;
468 goto LEAVE_L;
469 case 8:
470 switch (s [0]) {
471 case 'a':
472 X = "abstract";
473 id = Id_abstract;
474 goto LEAVE_L;
475 case 'c':
476 X = "continue";
477 id = Id_continue;
478 goto LEAVE_L;
479 case 'd':
480 X = "debbuger";
481 id = Id_debugger;
482 goto LEAVE_L;
483 case 'f':
484 X = "function";
485 id = Id_function;
486 goto LEAVE_L;
487 case 'v':
488 X = "volatile";
489 id = Id_volatile;
490 goto LEAVE_L;
492 goto LEAVE_L;
493 case 9:
494 c = s [0];
495 if (c == 'i') {
496 X = "interface";
497 id = Id_interface;
498 } else if (c == 'p') {
499 X = "protected";
500 id = Id_protected;
501 } else if (c == 't') {
502 X = "transient";
503 id = Id_transient;
505 goto LEAVE_L;
506 case 10:
507 c = s [1];
508 if (c == 'm') {
509 X = "implements";
510 id = Id_implements;
511 } else if (c == 'n') {
512 X = "instanceof";
513 id = Id_instanceof;
515 goto LEAVE_L;
516 case 12:
517 X = "synchronized";
518 id = Id_synchronized;
519 goto LEAVE_L;
522 LEAVE_L:
523 if (X != null && X != s && !X.Equals (s))
524 id = 0;
526 LEAVE_L0:
527 if (id == 0)
528 return Token.EOF;
529 return id & 0xff;
534 // return and pop the token from the stream if it matches otherwise return null
536 public bool MatchToken (int to_match)
538 int token = GetToken ();
539 if (token == to_match)
540 return true;
541 // did not match, push back the token
542 TokenNumber--;
543 pushback_token = token;
544 return false;
547 public void UnGetToken (int tt)
549 // Can not unreadmore than one token
550 if (pushback_token != Token.EOF && tt != Token.ERROR)
552 pushback_token = tt;
553 TokenNumber--;
556 public int PeekToken ()
558 int result = GetToken ();
559 pushback_token = result;
560 TokenNumber--;
561 return result;
564 public int PeekTokenSameLine ()
566 significant_eol = true;
567 int result = GetToken ();
568 pushback_token = result;
569 TokenNumber--;
570 significant_eol = false;
571 return result;
574 public int GetToken ()
576 int c;
577 TokenNumber++;
579 // Check for pushed-back token
580 if (pushback_token != Token.EOF) {
581 int result = pushback_token;
582 pushback_token = Token.EOF;
583 if (result != Token.EOL || significant_eol)
584 return result;
587 retry:
588 for (;;) {
589 // Eat whitespace, possibly sensitive to newlines
590 for (;;) {
591 c = GetChar ();
592 if (c == EOF_CHAR)
593 return Token.EOF;
594 else if (c == '\n') {
595 dirty_line = false;
596 if (significant_eol)
597 return Token.EOL;
598 } else if (!IsJSSpace (c)) {
599 if (c != '-')
600 dirty_line = true;
601 break;
605 // identifier/keyword/instanceof?
606 // watch out for starting with a <backslash>
607 bool identifier_start;
608 bool is_unicode_escape_start = false;
610 if (c == '\\') {
611 c = GetChar ();
612 if (c == 'u') {
613 identifier_start = true;
614 is_unicode_escape_start = true;
615 string_buffer_top = 0;
616 } else {
617 identifier_start = false;
618 UnGetChar (c);
619 c = '\\';
621 } else {
622 identifier_start = IsJavaIdentifierStart ((char) c);
623 if (identifier_start) {
624 string_buffer_top = 0;
625 AddToString (c);
629 if (identifier_start) {
630 bool contains_escape = is_unicode_escape_start;
631 for (;;) {
632 if (is_unicode_escape_start) {
633 // strictly speaking we should probably push-back
634 // all the bad characters if the <backslash>uXXXX
635 // sequence is malformed. But since there isn't a
636 // correct context(is there?) for a bad Unicode
637 // escape sequence in an identifier, we can report
638 // an error here.
639 int escape_val = 0;
640 for (int i = 0; i != 4; ++i) {
641 c = GetChar ();
642 escape_val = (escape_val << 4) | xDigitToInt (c);
643 // Next check takes care about c < 0 and bad escape
644 if (escape_val < 0)
645 break;
647 if (escape_val < 0) {
648 ReportCurrentLineError ("msg.invalid.escape");
649 return Token.ERROR;
651 AddToString (escape_val);
652 is_unicode_escape_start = false;
653 } else {
654 c = GetChar ();
655 if (c == '\\') {
656 c = GetChar ();
657 if (c == 'u') {
658 is_unicode_escape_start = true;
659 contains_escape = true;
660 } else {
661 ReportCurrentLineError ("msg.illegal.character");
662 return Token.ERROR;
664 } else {
665 if (c == EOF_CHAR || !IsJavaIdentifierPart ((char) c))
666 break;
667 AddToString (c);
671 UnGetChar (c);
673 string str = GetStringFromBuffer ();
674 if (!contains_escape) {
675 // OPT we shouldn't have to make a string (object!) to
676 // check if it's a keyword.
678 // Return the corresponding token if it's a keyword
679 int result = StringToKeyword (str);
680 if (result != Token.EOF) {
681 if (result != Token.RESERVED)
682 return result;
683 else if (!reserved_keyword_as_identifier)
684 return result;
685 else {
686 // If implementation permits to use future reserved
687 // keywords in violation with the EcmaScript,
688 // treat it as name but issue warning
689 ReportCurrentLineWarning ("msg.reserved.keyword", str);
690 Console.WriteLine ("Warning: using future reserved keyword as name");
694 _string = String.Intern (str);
695 return Token.NAME;
698 // is it a number?
699 if (IsDigit (c) || (c == '.' && IsDigit (PeekChar ()))) {
700 string_buffer_top = 0;
701 int _base = 10;
703 if (c == '0') {
704 c = GetChar ();
705 if (c == 'x' || c == 'X') {
706 _base = 16;
707 c = GetChar ();
708 } else if (IsDigit (c))
709 _base = 8;
710 else
711 AddToString ('0');
714 if (_base == 16) {
715 while (0 <= xDigitToInt (c)) {
716 AddToString (c);
717 c = GetChar ();
719 } else {
720 while ('0' <= c && c <= '9') {
722 * We permit 08 and 09 as decimal numbers, which
723 * makes our behavior a superset of the ECMA
724 * numeric grammar. We might not always be so
725 * permissive, so we warn about it.
727 if (_base == 8 && c >= '8') {
728 ReportCurrentLineWarning ("msg.bad.octal.literal", c == '8' ? "8" : "9");
729 _base = 10;
731 AddToString (c);
732 c = GetChar ();
736 bool is_integer = true;
738 if (_base == 10 && (c == '.' || c == 'e' || c == 'E')) {
739 is_integer = false;
740 if (c == '.') {
741 do {
742 AddToString (c);
743 c = GetChar ();
744 } while (IsDigit (c));
746 if (c == 'e' || c == 'E') {
747 AddToString (c);
748 c = GetChar ();
749 if (c == '+' || c == '-') {
750 AddToString (c);
751 c = GetChar ();
753 if (!IsDigit (c)) {
754 ReportCurrentLineError ("msg.missing.exponent");
755 return Token.ERROR;
757 do {
758 AddToString (c);
759 c = GetChar ();
760 } while (IsDigit (c));
763 UnGetChar (c);
764 string num_string = GetStringFromBuffer ();
766 double dval;
767 if (_base == 10 && !is_integer) {
768 try {
769 // Use C# conversion to number from string
770 dval = Double.Parse (num_string);
771 } catch (FormatException ex) {
772 ReportCurrentLineError ("msg.caught.nfe");
773 return Token.ERROR;
775 } else
776 dval = StringToNumber (num_string, 0, _base);
778 number = dval;
779 return Token.NUMBER;
782 // is it a string?
783 if (c == '"' || c == '\'') {
784 // We attempt to accumulate a string the fast way, by
785 // building it directly out of the reader. But if there
786 // are any escaped characters in the string, we revert to
787 // building it out of a StringBuffer.
789 int quote_char = c;
790 string_buffer_top = 0;
791 c = GetChar ();
793 strLoop: while (c != quote_char) {
794 if (c == '\n' || c == EOF_CHAR) {
795 UnGetChar (c);
796 ReportCurrentLineError ("msg.unterminated.string.lit");
797 return Token.ERROR;
800 if (c == '\\') {
801 // We've hit an escaped character
802 int escape_val;
804 c = GetChar ();
805 switch (c) {
806 case 'b': c = '\b'; break;
807 case 'f': c = '\f'; break;
808 case 'n': c = '\n'; break;
809 case 'r': c = '\r'; break;
810 case 't': c = '\t'; break;
812 // \v a late addition to the ECMA spec,
813 // it is not in Java, so use 0xb
814 case 'v': c = 0xb; break;
816 case 'u':
817 // Get 4 hex digits; if the u escape is not
818 // followed by 4 hex digits, use 'u' + the
819 // literal character sequence that follows.
820 int escape_start = string_buffer_top;
821 AddToString ('u');
822 escape_val = 0;
823 for (int i = 0; i != 4; ++i) {
824 c = GetChar ();
825 escape_val = (escape_val << 4) | xDigitToInt (c);
826 if (escape_val < 0)
827 goto strLoop;
828 AddToString (c);
830 // prepare for replace of stored 'u' sequence
831 // by escape value
832 string_buffer_top = escape_start;
833 c = escape_val;
834 break;
835 case 'x':
836 // Get 2 hex digits, defaulting to 'x'+literal
837 // sequence, as above.
838 c = GetChar ();
839 escape_val = xDigitToInt (c);
840 if (escape_val < 0) {
841 AddToString ('x');
842 goto strLoop;
843 } else {
844 int c1 = c;
845 c = GetChar ();
846 escape_val = (escape_val << 4) | xDigitToInt (c);
847 if (escape_val < 0) {
848 AddToString ('x');
849 AddToString (c1);
850 goto strLoop;
851 } else // got 2 hex digits
852 c = escape_val;
854 break;
855 default:
856 if ('0' <= c && c < '8') {
857 int val = c - '0';
858 c = GetChar ();
859 if ('0' <= c && c < '8') {
860 val = 8 * val + c - '0';
861 c = GetChar ();
862 if ('0' <= c && c < '8' && val <= 037) {
863 // c is 3rd char of octal sequence only
864 // if the resulting val <= 0377
865 val = 8 * val + c - '0';
866 c = GetChar ();
869 UnGetChar (c);
870 c = val;
872 break;
875 AddToString (c);
876 c = GetChar ();
878 string str = GetStringFromBuffer ();
879 _string = String.Intern (str);
880 return Token.STRING;
883 switch (c) {
884 case ';': return Token.SEMI;
885 case '[': return Token.LB;
886 case ']': return Token.RB;
887 case '{': return Token.LC;
888 case '}': return Token.RC;
889 case '(': return Token.LP;
890 case ')': return Token.RP;
891 case ',': return Token.COMMA;
892 case '?': return Token.HOOK;
893 case ':': return Token.COLON;
894 case '.': return Token.DOT;
896 case '|':
897 if (MatchChar ('|'))
898 return Token.OR;
899 else if (MatchChar ('=')) {
900 op = Token.BITOR;
901 return Token.ASSIGNOP;
902 } else
903 return Token.BITOR;
905 case '^':
906 if (MatchChar ('=')) {
907 op = Token.BITXOR;
908 return Token.ASSIGNOP;
909 } else
910 return Token.BITXOR;
912 case '&':
913 if (MatchChar ('&'))
914 return Token.AND;
915 else if (MatchChar ('=')) {
916 op = Token.BITAND;
917 return Token.ASSIGNOP;
918 } else
919 return Token.BITAND;
921 case '=':
922 if (MatchChar ('=')) {
923 if (MatchChar ('='))
924 return Token.SHEQ;
925 else
926 return Token.EQ;
927 } else
928 return Token.ASSIGN;
930 case '!':
931 if (MatchChar ('=')) {
932 if (MatchChar ('='))
933 return Token.SHNE;
934 else
935 return Token.NE;
936 } else
937 return Token.NOT;
939 case '<':
940 /* NB:treat HTML begin-comment as comment-till-eol */
941 if (MatchChar ('!')) {
942 if (MatchChar ('-')) {
943 if (MatchChar ('-')) {
944 SkipLine ();
945 goto retry;
947 UnGetChar ('-');
949 UnGetChar ('!');
951 if (MatchChar ('<')) {
952 if (MatchChar ('=')) {
953 op = Token.LSH;
954 return Token.ASSIGNOP;
955 } else
956 return Token.LSH;
957 } else {
958 if (MatchChar ('='))
959 return Token.LE;
960 else
961 return Token.LT;
964 case '>':
965 if (MatchChar ('>')) {
966 if (MatchChar ('>')) {
967 if (MatchChar ('=')) {
968 op = Token.URSH;
969 return Token.ASSIGNOP;
970 } else
971 return Token.URSH;
972 } else {
973 if (MatchChar ('=')) {
974 op = Token.RSH;
975 return Token.ASSIGNOP;
976 } else
977 return Token.RSH;
979 } else {
980 if (MatchChar ('='))
981 return Token.GE;
982 else
983 return Token.GT;
986 case '*':
987 if (MatchChar ('=')) {
988 op = Token.MUL;
989 return Token.ASSIGNOP;
990 } else
991 return Token.MUL;
993 case '/':
994 // is it a // comment?
995 if (MatchChar ('/')) {
996 SkipLine ();
997 goto retry;
999 if (MatchChar ('*')) {
1000 bool look_for_slash = false;
1001 for (;;) {
1002 c = GetChar ();
1003 if (c == EOF_CHAR) {
1004 ReportCurrentLineError ("msg.unterminated.comment");
1005 return Token.ERROR;
1006 } else if (c == '*')
1007 look_for_slash = true;
1008 else if (c == '/') {
1009 if (look_for_slash)
1010 goto retry;
1011 } else
1012 look_for_slash = false;
1016 // is it a RegExp?
1017 if (allow_reg_exp) {
1018 string_buffer_top = 0;
1019 while ((c = GetChar ()) != '/') {
1020 if (c == '\n' || c == EOF_CHAR) {
1021 UnGetChar (c);
1022 ReportCurrentLineError ("msg.unterminated.re.lit");
1023 return Token.ERROR;
1025 if (c == '\\') {
1026 AddToString (c);
1027 c = GetChar ();
1029 AddToString (c);
1031 int re_end = string_buffer_top;
1033 while (true) {
1034 if (MatchChar ('g'))
1035 AddToString ('g');
1036 else if (MatchChar ('i'))
1037 AddToString ('i');
1038 else if (MatchChar ('m'))
1039 AddToString ('m');
1040 else
1041 break;
1044 if (IsAlpha (PeekChar ())) {
1045 ReportCurrentLineError ("msg.invalid.re.flag");
1046 return Token.ERROR;
1049 _string = to_string (string_buffer).Substring (0, re_end);
1050 reg_exp_flags = to_string (string_buffer).Substring (re_end, string_buffer_top - re_end);
1051 return Token.REGEXP;
1054 if (MatchChar ('=')) {
1055 op = Token.DIV;
1056 return Token.ASSIGNOP;
1057 } else
1058 return Token.DIV;
1060 case '%':
1061 if (MatchChar ('=')) {
1062 op = Token.MOD;
1063 return Token.ASSIGNOP;
1064 } else
1065 return Token.MOD;
1067 case '~':
1068 return Token.BITNOT;
1070 case '+':
1071 if (MatchChar ('=')) {
1072 op = Token.ADD;
1073 return Token.ASSIGNOP;
1074 } else if (MatchChar ('+'))
1075 return Token.INC;
1076 else
1077 return Token.ADD;
1079 case '-':
1080 if (MatchChar ('=')) {
1081 op = Token.SUB;
1082 c = Token.ASSIGNOP;
1083 } else if (MatchChar ('-')) {
1084 if (!dirty_line) {
1085 // treat HTML end-comment after possible whitespace
1086 // after line start as comment-utill-eol
1087 if (MatchChar ('>')) {
1088 SkipLine ();
1089 goto retry;
1092 c = Token.DEC;
1093 } else
1094 c = Token.SUB;
1095 dirty_line = true;
1096 return c;
1098 default:
1099 ReportCurrentLineError ("msg.illegal.character");
1100 return Token.ERROR;
1106 static bool IsAlpha (int c)
1108 // Use 'Z' < 'a'
1109 if (c <= 'Z')
1110 return 'A' <= c;
1111 else
1112 return 'a' <= c && c <= 'z';
1115 double StringToNumber (string s, int start, int radix)
1117 char digit_max = '9';
1118 char lower_case_bound = 'a';
1119 char upper_case_bound = 'A';
1120 int len = s.Length;
1122 if (radix > 10) {
1123 lower_case_bound = (char) ('a' + radix - 10);
1124 upper_case_bound = (char) ('A' + radix - 10);
1127 int end;
1128 double sum = 0.0;
1130 for (end = start; end < len; end++) {
1131 char c = s [end];
1132 int new_digit;
1133 if ('0' <= c && c <= digit_max)
1134 new_digit = c - '0';
1135 else if ('a' <= c && c < lower_case_bound)
1136 new_digit = c - 'a' + 10;
1137 else if ('A' <= c && c < upper_case_bound)
1138 new_digit = c - 'A' + 10;
1139 else
1140 break;
1141 sum = sum * radix + new_digit;
1144 if (start == end)
1145 return Double.NaN;
1147 if (sum >= 9007199254740992.0) {
1148 if (radix == 10) {
1149 /* If we're accumulating a decimal number and the number
1150 * is >= 2^53, then the result from the repeated multiply-add
1151 * above may be inaccurate. Call Java to get the correct
1152 * answer.
1154 try {
1155 return Double.Parse (s);
1156 } catch (FormatException fe) {
1157 return Double.NaN;
1159 } else if (radix == 2 || radix == 4 || radix == 8 ||
1160 radix == 16 || radix == 32) {
1161 /* The number may also be inaccurate for one of these bases.
1162 * This happens if the addition in value*radix + digit causes
1163 * a round-down to an even least significant mantissa bit
1164 * when the first dropped bit is a one. If any of the
1165 * following digits in the number (which haven't been added
1166 * in yet) are nonzero then the correct action would have
1167 * been to round up instead of down. An example of this
1168 * occurs when reading the number 0x1000000000000081, which
1169 * rounds to 0x1000000000000000 instead of 0x1000000000000100.
1171 int bit_shift_in_char = 1;
1172 int digit = 0;
1174 const int SKIP_LEADING_ZEROS = 0;
1175 const int FIRST_EXACT_53_BITS = 1;
1176 const int AFTER_BIT_53 = 2;
1177 const int ZEROS_AFTER_54 = 3;
1178 const int MIXED_AFTER_54 = 4;
1180 int state = SKIP_LEADING_ZEROS;
1181 int exact_bits_limit = 53;
1182 double factor = 0.0;
1183 bool bit53 = false;
1184 // bit54 is the 54th bit (the first dropped from the mantissa)
1185 bool bit54 = false;
1187 for (;;) {
1188 if (bit_shift_in_char == 1) {
1189 if (start == end)
1190 break;
1191 digit = s [start++];
1192 if ('0' <= digit && digit <= '9')
1193 digit -= '0';
1194 else if ('a' <= digit && digit <= 'z')
1195 digit -= 'a' - 10;
1196 else
1197 digit -= 'A' - 10;
1198 bit_shift_in_char = radix;
1200 bit_shift_in_char >>= 1;
1201 bool bit = (digit & bit_shift_in_char) != 0;
1203 switch (state) {
1204 case SKIP_LEADING_ZEROS:
1205 if (bit) {
1206 --exact_bits_limit;
1207 sum = 1.0;
1208 state = FIRST_EXACT_53_BITS;
1210 break;
1211 case FIRST_EXACT_53_BITS:
1212 sum *= 2.0;
1213 if (bit)
1214 sum += 1.0;
1215 --exact_bits_limit;
1216 if (exact_bits_limit == 0) {
1217 bit53 = bit;
1218 state = AFTER_BIT_53;
1220 break;
1221 case AFTER_BIT_53:
1222 bit54 = bit;
1223 factor = 2.0;
1224 state = ZEROS_AFTER_54;
1225 break;
1226 // FIXME: check if this work
1227 case ZEROS_AFTER_54:
1228 case MIXED_AFTER_54:
1229 if (state == ZEROS_AFTER_54 && bit) {
1230 state = MIXED_AFTER_54;
1232 // fallthrough
1233 factor *= 2;
1234 break;
1237 switch (state) {
1238 case SKIP_LEADING_ZEROS:
1239 sum = 0.0;
1240 break;
1241 case FIRST_EXACT_53_BITS:
1242 case AFTER_BIT_53:
1243 // do nothing
1244 break;
1245 case ZEROS_AFTER_54:
1246 // x1.1 -> x1 + 1 (round up)
1247 // x0.1 -> x0 (round down)
1248 if (bit54 & bit53)
1249 sum += 1.0;
1250 sum *= factor;
1251 break;
1252 case MIXED_AFTER_54:
1253 // x.100...1.. -> x + 1 (round up)
1254 // x.0anything -> x (round down)
1255 if (bit54)
1256 sum += 1.0;
1257 sum *= factor;
1258 break;
1261 /* We don't worry about inaccurate numbers for any other base. */
1263 return sum;
1266 bool IsDigit (int c)
1268 return '0' <= c && c <= '9';
1271 static int xDigitToInt (int c)
1273 // use 0..9 < A..Z < a..z
1274 if (c <= '9') {
1275 c -= '0';
1276 if (0 <= c)
1277 return c;
1278 } else if (c <= 'F') {
1279 if ('A' <= c)
1280 return c - ('A' - 10);
1281 } else if (c <= 'f') {
1282 if ('a' <= c)
1283 return c - ('a' - 10);
1285 return -1;
1289 public static bool IsJSSpace (int c)
1291 if (c < 127)
1292 return c == 0x20 || c == 0x9 || c == 0xC || c == 0xB;
1293 else
1294 return c == 0xA0 || Char.GetUnicodeCategory ((char) c) == UnicodeCategory.SpaceSeparator;
1297 public static bool IsJSLineTerminator (int c)
1299 return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029;
1302 static bool IsJSFormatChar (int c)
1304 return (c > 127) && (Char.GetUnicodeCategory ((char) c) == UnicodeCategory.Format);
1307 string GetStringFromBuffer ()
1309 return new string (string_buffer, 0, string_buffer_top);
1312 void AddToString (int c)
1314 int N = string_buffer_top;
1315 if (N == string_buffer.Length) {
1316 char [] tmp = new char [string_buffer.Length * 2];
1317 Array.Copy (string_buffer, 0, tmp, 0, N);
1318 string_buffer = tmp;
1320 string_buffer [N] = (char) c;
1321 string_buffer_top = N + 1;
1324 void UnGetChar (int c)
1326 // can not unread past across line boundary
1327 if (unget_cursor != 0 && unget_buffer [unget_cursor - 1] == '\n')
1329 unget_buffer [unget_cursor++] = c;
1332 bool MatchChar (int test)
1334 int c = GetChar ();
1335 if (c == test)
1336 return true;
1337 else {
1338 UnGetChar (c);
1339 return false;
1343 int PeekChar ()
1345 int c = GetChar ();
1346 UnGetChar (c);
1347 return c;
1351 int GetChar ()
1353 if (unget_cursor != 0)
1354 return unget_buffer [--unget_cursor];
1356 for (;;) {
1357 int c;
1358 if (source_string != null) {
1359 if (source_cursor == source_end) {
1360 hit_eof = true;
1361 return EOF_CHAR;
1363 c = source_string [source_cursor++];
1364 } else {
1365 if (source_cursor == source_end) {
1366 if (!FillSourceBuffer ()) {
1367 hit_eof = true;
1368 return EOF_CHAR;
1371 c = source_buffer [source_cursor++];
1374 if (line_end_char >= 0) {
1375 if (line_end_char == '\r' && c == '\n') {
1376 line_end_char = '\n';
1377 continue;
1379 line_end_char = -1;
1380 line_start = source_cursor - 1;
1381 LineNumber++;
1384 if (c <= 127) {
1385 if (c == '\n' || c == '\r') {
1386 line_end_char = c;
1387 c = '\n';
1389 } else {
1390 if (IsJSFormatChar (c))
1391 continue;
1392 if ((c & EOL_HINT_MASK) == 0 && IsJSLineTerminator (c)) {
1393 line_end_char = c;
1394 c = '\n';
1397 return c;
1401 void SkipLine ()
1403 // skip to end of line
1404 int c;
1405 while ((c = GetChar ()) != EOF_CHAR && c != '\n')
1407 UnGetChar (c);
1410 bool FillSourceBuffer ()
1412 if (source_string == null)
1414 if (source_end == source_buffer.Length) {
1415 if (line_start != 0) {
1416 Array.Copy (source_buffer, line_start, source_buffer, 0, source_end - line_start);
1417 source_end -= line_start;
1418 source_cursor -= line_start;
1419 line_start = 0;
1420 } else {
1421 char [] tmp = new char [source_buffer.Length * 2];
1422 Array.Copy (source_buffer, 0, tmp, 0, source_end);
1423 source_buffer = tmp;
1426 int n = source_reader.Read (source_buffer, source_end, source_buffer.Length - source_end);
1427 if (n == 0)
1428 return false;
1429 source_end += n;
1430 return true;
1433 public void ReportCurrentLineWarning (string message, string str)
1435 Console.WriteLine ("warning: {0}, {1}, {2}, {3}", message, SourceName, LineNumber, str);
1438 public void ReportCurrentLineError (string message)
1440 Console.WriteLine ("error: {0}, {1}, {2}", message, SourceName, LineNumber);
1443 // FIXME: we don't check for combining mark yet
1444 static bool IsJavaIdentifierPart (char c)
1446 UnicodeCategory unicode_category = Char.GetUnicodeCategory (c);
1447 return Char.IsLetter (c) || unicode_category == UnicodeCategory.CurrencySymbol ||
1448 unicode_category == UnicodeCategory.ConnectorPunctuation || Char.IsDigit (c) ||
1449 unicode_category == UnicodeCategory.LetterNumber ||
1450 unicode_category == UnicodeCategory.NonSpacingMark || IsIdentifierIgnorable (c);
1453 static bool IsIdentifierIgnorable (char c)
1455 return (c >= '\u0000' && c <= '\u0008') || (c >= '\u000E' && c <= '\u001B') ||
1456 (c >= '\u007F' && c <= '\u009F') || Char.GetUnicodeCategory (c) == UnicodeCategory.Format;
1459 static bool IsJavaIdentifierStart (char c)
1461 UnicodeCategory unicode_category = Char.GetUnicodeCategory (c);
1462 return Char.IsLetter (c) || unicode_category == UnicodeCategory.LetterNumber ||
1463 unicode_category == UnicodeCategory.CurrencySymbol ||
1464 unicode_category == UnicodeCategory.ConnectorPunctuation;
1467 internal static string to_string (Array a)
1469 string s = String.Empty;
1470 foreach (char c in a)
1471 s += c;
1472 return s;