gtk+-4.0: Update to 3.93.0+f4c1a404
[vala-gnome.git] / vala / valascanner.vala
blob063b261bece1d89e659b759d3bf366707cefe195
1 /* valascanner.vala
3 * Copyright (C) 2008-2012 Jürg Billeter
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 * Author:
20 * Jürg Billeter <j@bitron.ch>
21 * Jukka-Pekka Iivonen <jp0409@jippii.fi>
24 using GLib;
26 /**
27 * Lexical scanner for Vala source files.
29 public class Vala.Scanner {
30 public SourceFile source_file { get; private set; }
32 TokenType previous;
33 char* current;
34 char* end;
36 int line;
37 int column;
39 Comment _comment;
41 Conditional[] conditional_stack;
43 struct Conditional {
44 public bool matched;
45 public bool else_found;
46 public bool skip_section;
49 State[] state_stack;
51 enum State {
52 PARENS,
53 BRACE,
54 BRACKET,
55 TEMPLATE,
56 TEMPLATE_PART,
57 REGEX_LITERAL
60 public Scanner (SourceFile source_file) {
61 this.source_file = source_file;
63 char* begin = source_file.get_mapped_contents ();
64 end = begin + source_file.get_mapped_length ();
66 current = begin;
68 line = 1;
69 column = 1;
72 public void seek (SourceLocation location) {
73 current = location.pos;
74 line = location.line;
75 column = location.column;
77 conditional_stack = null;
78 state_stack = null;
81 bool in_template () {
82 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE);
85 bool in_template_part () {
86 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE_PART);
89 bool in_regex_literal () {
90 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
93 bool is_ident_char (char c) {
94 return (c.isalnum () || c == '_');
97 SourceReference get_source_reference (int offset, int length = 0) {
98 return new SourceReference (source_file, SourceLocation (current, line, column + offset), SourceLocation (current + length, line, column + offset + length));
101 public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
102 TokenType type;
103 char* begin = current;
104 token_begin = SourceLocation (begin, line, column);
106 int token_length_in_chars = -1;
108 if (current >= end) {
109 type = TokenType.EOF;
110 } else {
111 switch (current[0]) {
112 case '/':
113 type = TokenType.CLOSE_REGEX_LITERAL;
114 current++;
115 state_stack.length--;
116 var fl_i = false;
117 var fl_s = false;
118 var fl_m = false;
119 var fl_x = false;
120 while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
121 switch (current[0]) {
122 case 'i':
123 if (fl_i) {
124 Report.error (get_source_reference (token_length_in_chars), "modifier 'i' used more than once");
126 fl_i = true;
127 break;
128 case 's':
129 if (fl_s) {
130 Report.error (get_source_reference (token_length_in_chars), "modifier 's' used more than once");
132 fl_s = true;
133 break;
134 case 'm':
135 if (fl_m) {
136 Report.error (get_source_reference (token_length_in_chars), "modifier 'm' used more than once");
138 fl_m = true;
139 break;
140 case 'x':
141 if (fl_x) {
142 Report.error (get_source_reference (token_length_in_chars), "modifier 'x' used more than once");
144 fl_x = true;
145 break;
147 current++;
148 token_length_in_chars++;
150 break;
151 default:
152 type = TokenType.REGEX_LITERAL;
153 token_length_in_chars = 0;
154 while (current < end && current[0] != '/') {
155 if (current[0] == '\\') {
156 current++;
157 token_length_in_chars++;
158 if (current >= end) {
159 break;
162 switch (current[0]) {
163 case '\'':
164 case '"':
165 case '\\':
166 case '/':
167 case '^':
168 case '$':
169 case '.':
170 case '[':
171 case ']':
172 case '{':
173 case '}':
174 case '(':
175 case ')':
176 case '?':
177 case '*':
178 case '+':
179 case '-':
180 case '#':
181 case '&':
182 case '~':
183 case ':':
184 case ';':
185 case '<':
186 case '>':
187 case '|':
188 case '%':
189 case '=':
190 case '@':
191 case '0':
192 case 'b':
193 case 'B':
194 case 'f':
195 case 'n':
196 case 'N':
197 case 'r':
198 case 'R':
199 case 't':
200 case 'v':
201 case 'a':
202 case 'A':
203 case 'p':
204 case 'P':
205 case 'e':
206 case 'd':
207 case 'D':
208 case 's':
209 case 'S':
210 case 'w':
211 case 'W':
212 case 'G':
213 case 'z':
214 case 'Z':
215 current++;
216 token_length_in_chars++;
217 break;
218 case 'u':
219 // u escape character has four hex digits
220 current++;
221 token_length_in_chars++;
222 int digit_length;
223 for (digit_length = 0; digit_length < 4 && current < end && current[0].isxdigit (); digit_length++) {
224 current++;
225 token_length_in_chars++;
227 if (digit_length != 4) {
228 Report.error (get_source_reference (token_length_in_chars), "\\u requires four hex digits");
230 break;
231 case 'x':
232 // hexadecimal escape character requires two hex digits
233 current++;
234 token_length_in_chars++;
235 int digit_length;
236 for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
237 current++;
238 token_length_in_chars++;
240 if (digit_length < 1) {
241 Report.error (get_source_reference (token_length_in_chars), "\\x requires at least one hex digit");
243 break;
244 default:
245 // back references \1 through \99
246 if (current[0].isdigit ()) {
247 current++;
248 token_length_in_chars++;
249 if (current[0].isdigit ()) {
250 current++;
251 token_length_in_chars++;
253 } else {
254 Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
256 break;
258 } else if (current[0] == '\n') {
259 break;
260 } else {
261 unichar u = ((string) current).get_char_validated ((long) (end - current));
262 if (u != (unichar) (-1)) {
263 current += u.to_utf8 (null);
264 token_length_in_chars++;
265 } else {
266 current++;
267 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
271 if (current >= end || current[0] == '\n') {
272 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
273 state_stack.length--;
274 return read_token (out token_begin, out token_end);
276 break;
280 if (token_length_in_chars < 0) {
281 column += (int) (current - begin);
282 } else {
283 column += token_length_in_chars;
286 token_end = SourceLocation (current, line, column - 1);
288 return type;
291 public static TokenType get_identifier_or_keyword (char* begin, int len) {
292 switch (len) {
293 case 2:
294 switch (begin[0]) {
295 case 'a':
296 if (matches (begin, "as")) return TokenType.AS;
297 break;
298 case 'd':
299 if (matches (begin, "do")) return TokenType.DO;
300 break;
301 case 'i':
302 switch (begin[1]) {
303 case 'f':
304 return TokenType.IF;
305 case 'n':
306 return TokenType.IN;
307 case 's':
308 return TokenType.IS;
310 break;
312 break;
313 case 3:
314 switch (begin[0]) {
315 case 'f':
316 if (matches (begin, "for")) return TokenType.FOR;
317 break;
318 case 'g':
319 if (matches (begin, "get")) return TokenType.GET;
320 break;
321 case 'n':
322 if (matches (begin, "new")) return TokenType.NEW;
323 break;
324 case 'o':
325 if (matches (begin, "out")) return TokenType.OUT;
326 break;
327 case 'r':
328 if (matches (begin, "ref")) return TokenType.REF;
329 break;
330 case 's':
331 if (matches (begin, "set")) return TokenType.SET;
332 break;
333 case 't':
334 if (matches (begin, "try")) return TokenType.TRY;
335 break;
336 case 'v':
337 if (matches (begin, "var")) return TokenType.VAR;
338 break;
340 break;
341 case 4:
342 switch (begin[0]) {
343 case 'b':
344 if (matches (begin, "base")) return TokenType.BASE;
345 break;
346 case 'c':
347 if (matches (begin, "case")) return TokenType.CASE;
348 break;
349 case 'e':
350 switch (begin[1]) {
351 case 'l':
352 if (matches (begin, "else")) return TokenType.ELSE;
353 break;
354 case 'n':
355 if (matches (begin, "enum")) return TokenType.ENUM;
356 break;
358 break;
359 case 'l':
360 if (matches (begin, "lock")) return TokenType.LOCK;
361 break;
362 case 'n':
363 if (matches (begin, "null")) return TokenType.NULL;
364 break;
365 case 't':
366 switch (begin[1]) {
367 case 'h':
368 if (matches (begin, "this")) return TokenType.THIS;
369 break;
370 case 'r':
371 if (matches (begin, "true")) return TokenType.TRUE;
372 break;
374 break;
375 case 'v':
376 if (matches (begin, "void")) return TokenType.VOID;
377 break;
378 case 'w':
379 if (matches (begin, "weak")) return TokenType.WEAK;
380 break;
382 break;
383 case 5:
384 switch (begin[0]) {
385 case 'a':
386 if (matches (begin, "async")) return TokenType.ASYNC;
387 break;
388 case 'b':
389 if (matches (begin, "break")) return TokenType.BREAK;
390 break;
391 case 'c':
392 switch (begin[1]) {
393 case 'a':
394 if (matches (begin, "catch")) return TokenType.CATCH;
395 break;
396 case 'l':
397 if (matches (begin, "class")) return TokenType.CLASS;
398 break;
399 case 'o':
400 if (matches (begin, "const")) return TokenType.CONST;
401 break;
403 break;
404 case 'f':
405 if (matches (begin, "false")) return TokenType.FALSE;
406 break;
407 case 'o':
408 if (matches (begin, "owned")) return TokenType.OWNED;
409 break;
410 case 't':
411 if (matches (begin, "throw")) return TokenType.THROW;
412 break;
413 case 'u':
414 if (matches (begin, "using")) return TokenType.USING;
415 break;
416 case 'w':
417 if (matches (begin, "while")) return TokenType.WHILE;
418 break;
419 case 'y':
420 if (matches (begin, "yield")) return TokenType.YIELD;
421 break;
423 break;
424 case 6:
425 switch (begin[0]) {
426 case 'd':
427 if (matches (begin, "delete")) return TokenType.DELETE;
428 break;
429 case 'e':
430 if (matches (begin, "extern")) return TokenType.EXTERN;
431 break;
432 case 'i':
433 if (matches (begin, "inline")) return TokenType.INLINE;
434 break;
435 case 'p':
436 switch (begin[1]) {
437 case 'a':
438 if (matches (begin, "params")) return TokenType.PARAMS;
439 break;
440 case 'u':
441 if (matches (begin, "public")) return TokenType.PUBLIC;
442 break;
444 break;
445 case 'r':
446 if (matches (begin, "return")) return TokenType.RETURN;
447 break;
448 case 's':
449 switch (begin[1]) {
450 case 'e':
451 if (matches (begin, "sealed")) return TokenType.SEALED;
452 break;
453 case 'i':
454 switch (begin[2]) {
455 case 'g':
456 if (matches (begin, "signal")) return TokenType.SIGNAL;
457 break;
458 case 'z':
459 if (matches (begin, "sizeof")) return TokenType.SIZEOF;
460 break;
462 break;
463 case 't':
464 switch (begin[2]) {
465 case 'a':
466 if (matches (begin, "static")) return TokenType.STATIC;
467 break;
468 case 'r':
469 if (matches (begin, "struct")) return TokenType.STRUCT;
470 break;
472 break;
473 case 'w':
474 if (matches (begin, "switch")) return TokenType.SWITCH;
475 break;
477 break;
478 case 't':
479 switch (begin[1]) {
480 case 'h':
481 if (matches (begin, "throws")) return TokenType.THROWS;
482 break;
483 case 'y':
484 if (matches (begin, "typeof")) return TokenType.TYPEOF;
485 break;
487 break;
489 break;
490 case 7:
491 switch (begin[0]) {
492 case 'd':
493 switch (begin[1]) {
494 case 'e':
495 if (matches (begin, "default")) return TokenType.DEFAULT;
496 break;
497 case 'y':
498 if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
499 break;
501 break;
502 case 'e':
503 if (matches (begin, "ensures")) return TokenType.ENSURES;
504 break;
505 case 'f':
506 switch (begin[1]) {
507 case 'i':
508 if (matches (begin, "finally")) return TokenType.FINALLY;
509 break;
510 case 'o':
511 if (matches (begin, "foreach")) return TokenType.FOREACH;
512 break;
514 break;
515 case 'p':
516 if (matches (begin, "private")) return TokenType.PRIVATE;
517 break;
518 case 'u':
519 if (matches (begin, "unowned")) return TokenType.UNOWNED;
520 break;
521 case 'v':
522 if (matches (begin, "virtual")) return TokenType.VIRTUAL;
523 break;
525 break;
526 case 8:
527 switch (begin[0]) {
528 case 'a':
529 if (matches (begin, "abstract")) return TokenType.ABSTRACT;
530 break;
531 case 'c':
532 if (matches (begin, "continue")) return TokenType.CONTINUE;
533 break;
534 case 'd':
535 if (matches (begin, "delegate")) return TokenType.DELEGATE;
536 break;
537 case 'i':
538 if (matches (begin, "internal")) return TokenType.INTERNAL;
539 break;
540 case 'o':
541 if (matches (begin, "override")) return TokenType.OVERRIDE;
542 break;
543 case 'r':
544 if (matches (begin, "requires")) return TokenType.REQUIRES;
545 break;
546 case 'v':
547 if (matches (begin, "volatile")) return TokenType.VOLATILE;
548 break;
550 break;
551 case 9:
552 switch (begin[0]) {
553 case 'c':
554 if (matches (begin, "construct")) return TokenType.CONSTRUCT;
555 break;
556 case 'i':
557 if (matches (begin, "interface")) return TokenType.INTERFACE;
558 break;
559 case 'n':
560 if (matches (begin, "namespace")) return TokenType.NAMESPACE;
561 break;
562 case 'p':
563 if (matches (begin, "protected")) return TokenType.PROTECTED;
564 break;
566 break;
567 case 11:
568 if (matches (begin, "errordomain")) return TokenType.ERRORDOMAIN;
569 break;
571 return TokenType.IDENTIFIER;
574 TokenType read_number () {
575 var type = TokenType.INTEGER_LITERAL;
577 // integer part
578 if (current < end - 2 && current[0] == '0'
579 && current[1] == 'x' && current[2].isxdigit ()) {
580 // hexadecimal integer literal
581 current += 2;
582 while (current < end && current[0].isxdigit ()) {
583 current++;
585 } else {
586 // decimal number
587 while (current < end && current[0].isdigit ()) {
588 current++;
592 // fractional part
593 if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
594 type = TokenType.REAL_LITERAL;
595 current++;
596 while (current < end && current[0].isdigit ()) {
597 current++;
601 // exponent part
602 if (current < end && current[0].tolower () == 'e') {
603 type = TokenType.REAL_LITERAL;
604 current++;
605 if (current < end && (current[0] == '+' || current[0] == '-')) {
606 current++;
608 while (current < end && current[0].isdigit ()) {
609 current++;
613 // type suffix
614 if (current < end) {
615 bool real_literal = (type == TokenType.REAL_LITERAL);
617 switch (current[0]) {
618 case 'l':
619 case 'L':
620 if (type == TokenType.INTEGER_LITERAL) {
621 current++;
622 if (current < end && current[0].tolower () == 'l') {
623 current++;
626 break;
627 case 'u':
628 case 'U':
629 if (type == TokenType.INTEGER_LITERAL) {
630 current++;
631 if (current < end && current[0].tolower () == 'l') {
632 current++;
633 if (current < end && current[0].tolower () == 'l') {
634 current++;
638 break;
639 case 'f':
640 case 'F':
641 case 'd':
642 case 'D':
643 type = TokenType.REAL_LITERAL;
644 current++;
645 break;
648 if (!real_literal && is_ident_char (current[0])) {
649 // allow identifiers to start with a digit
650 // as long as they contain at least one char
651 while (current < end && is_ident_char (current[0])) {
652 current++;
654 type = TokenType.IDENTIFIER;
658 return type;
661 public TokenType read_template_token (out SourceLocation token_begin, out SourceLocation token_end) {
662 TokenType type;
663 char* begin = current;
664 token_begin = SourceLocation (begin, line, column);
666 int token_length_in_chars = -1;
668 if (current >= end) {
669 type = TokenType.EOF;
670 } else {
671 switch (current[0]) {
672 case '"':
673 type = TokenType.CLOSE_TEMPLATE;
674 current++;
675 state_stack.length--;
676 break;
677 case '$':
678 token_begin.pos++; // $ is not part of following token
679 current++;
680 if (current[0].isalpha () || current[0] == '_') {
681 int len = 0;
682 while (current < end && is_ident_char (current[0])) {
683 current++;
684 len++;
686 type = TokenType.IDENTIFIER;
687 state_stack += State.TEMPLATE_PART;
688 } else if (current[0] == '(') {
689 current++;
690 column += 2;
691 state_stack += State.PARENS;
692 return read_token (out token_begin, out token_end);
693 } else if (current[0] == '$') {
694 type = TokenType.TEMPLATE_STRING_LITERAL;
695 current++;
696 state_stack += State.TEMPLATE_PART;
697 } else {
698 Report.error (get_source_reference (1), "unexpected character");
699 return read_template_token (out token_begin, out token_end);
701 break;
702 default:
703 type = TokenType.TEMPLATE_STRING_LITERAL;
704 token_length_in_chars = 0;
705 while (current < end && current[0] != '"' && current[0] != '$') {
706 if (current[0] == '\\') {
707 current++;
708 token_length_in_chars++;
709 if (current >= end) {
710 break;
713 switch (current[0]) {
714 case '\'':
715 case '"':
716 case '\\':
717 case '0':
718 case 'b':
719 case 'f':
720 case 'n':
721 case 'r':
722 case 't':
723 case 'v':
724 current++;
725 token_length_in_chars++;
726 break;
727 case 'u':
728 // u escape character has four hex digits
729 current++;
730 token_length_in_chars++;
731 int digit_length;
732 for (digit_length = 0; digit_length < 4 && current < end && current[0].isxdigit (); digit_length++) {
733 current++;
734 token_length_in_chars++;
736 if (digit_length != 4) {
737 Report.error (get_source_reference (token_length_in_chars), "\\u requires four hex digits");
739 break;
740 case 'x':
741 // hexadecimal escape character requires two hex digits
742 current++;
743 token_length_in_chars++;
744 int digit_length;
745 for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
746 current++;
747 token_length_in_chars++;
749 if (digit_length < 1) {
750 Report.error (get_source_reference (token_length_in_chars), "\\x requires at least one hex digit");
752 break;
753 default:
754 Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
755 break;
757 } else if (current[0] == '\n') {
758 current++;
759 line++;
760 column = 1;
761 token_length_in_chars = 1;
762 } else {
763 unichar u = ((string) current).get_char_validated ((long) (end - current));
764 if (u != (unichar) (-1)) {
765 current += u.to_utf8 (null);
766 token_length_in_chars++;
767 } else {
768 current++;
769 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
773 if (current >= end) {
774 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
775 state_stack.length--;
776 return read_token (out token_begin, out token_end);
778 state_stack += State.TEMPLATE_PART;
779 break;
783 if (token_length_in_chars < 0) {
784 column += (int) (current - begin);
785 } else {
786 column += token_length_in_chars;
789 token_end = SourceLocation (current, line, column - 1);
791 return type;
794 public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
795 if (in_template ()) {
796 return read_template_token (out token_begin, out token_end);
797 } else if (in_template_part ()) {
798 state_stack.length--;
800 token_begin = SourceLocation (current, line, column);
801 token_end = SourceLocation (current, line, column - 1);
803 return TokenType.COMMA;
804 } else if (in_regex_literal ()) {
805 return read_regex_token (out token_begin, out token_end);
808 space ();
810 TokenType type;
811 char* begin = current;
812 token_begin = SourceLocation (begin, line, column);
814 int token_length_in_chars = -1;
816 if (current >= end) {
817 type = TokenType.EOF;
818 } else if (current[0].isalpha () || current[0] == '_') {
819 int len = 0;
820 while (current < end && is_ident_char (current[0])) {
821 current++;
822 len++;
824 type = get_identifier_or_keyword (begin, len);
825 } else if (current[0] == '@') {
826 if (current < end - 1 && current[1] == '"') {
827 type = TokenType.OPEN_TEMPLATE;
828 current += 2;
829 state_stack += State.TEMPLATE;
830 } else {
831 token_begin.pos++; // @ is not part of the identifier
832 current++;
833 int len = 0;
834 while (current < end && is_ident_char (current[0])) {
835 current++;
836 len++;
838 type = TokenType.IDENTIFIER;
840 } else if (current[0].isdigit ()) {
841 type = read_number ();
842 } else {
843 switch (current[0]) {
844 case '{':
845 type = TokenType.OPEN_BRACE;
846 current++;
847 state_stack += State.BRACE;
848 break;
849 case '}':
850 type = TokenType.CLOSE_BRACE;
851 current++;
852 if (state_stack.length > 0) {
853 state_stack.length--;
855 break;
856 case '(':
857 type = TokenType.OPEN_PARENS;
858 current++;
859 state_stack += State.PARENS;
860 break;
861 case ')':
862 type = TokenType.CLOSE_PARENS;
863 current++;
864 if (state_stack.length > 0) {
865 state_stack.length--;
867 if (in_template ()) {
868 type = TokenType.COMMA;
870 break;
871 case '[':
872 type = TokenType.OPEN_BRACKET;
873 current++;
874 state_stack += State.BRACKET;
875 break;
876 case ']':
877 type = TokenType.CLOSE_BRACKET;
878 current++;
879 if (state_stack.length > 0) {
880 state_stack.length--;
882 break;
883 case '.':
884 type = TokenType.DOT;
885 current++;
886 if (current < end - 1) {
887 if (current[0] == '.' && current[1] == '.') {
888 type = TokenType.ELLIPSIS;
889 current += 2;
892 break;
893 case ':':
894 type = TokenType.COLON;
895 current++;
896 if (current < end && current[0] == ':') {
897 type = TokenType.DOUBLE_COLON;
898 current++;
900 break;
901 case ',':
902 type = TokenType.COMMA;
903 current++;
904 break;
905 case ';':
906 type = TokenType.SEMICOLON;
907 current++;
908 break;
909 case '#':
910 type = TokenType.HASH;
911 current++;
912 break;
913 case '?':
914 type = TokenType.INTERR;
915 current++;
916 if (current < end && current[0] == '?') {
917 type = TokenType.OP_COALESCING;
918 current++;
920 break;
921 case '|':
922 type = TokenType.BITWISE_OR;
923 current++;
924 if (current < end) {
925 switch (current[0]) {
926 case '=':
927 type = TokenType.ASSIGN_BITWISE_OR;
928 current++;
929 break;
930 case '|':
931 type = TokenType.OP_OR;
932 current++;
933 break;
936 break;
937 case '&':
938 type = TokenType.BITWISE_AND;
939 current++;
940 if (current < end) {
941 switch (current[0]) {
942 case '=':
943 type = TokenType.ASSIGN_BITWISE_AND;
944 current++;
945 break;
946 case '&':
947 type = TokenType.OP_AND;
948 current++;
949 break;
952 break;
953 case '^':
954 type = TokenType.CARRET;
955 current++;
956 if (current < end && current[0] == '=') {
957 type = TokenType.ASSIGN_BITWISE_XOR;
958 current++;
960 break;
961 case '~':
962 type = TokenType.TILDE;
963 current++;
964 break;
965 case '=':
966 type = TokenType.ASSIGN;
967 current++;
968 if (current < end) {
969 switch (current[0]) {
970 case '=':
971 type = TokenType.OP_EQ;
972 current++;
973 break;
974 case '>':
975 type = TokenType.LAMBDA;
976 current++;
977 break;
980 break;
981 case '<':
982 type = TokenType.OP_LT;
983 current++;
984 if (current < end) {
985 switch (current[0]) {
986 case '=':
987 type = TokenType.OP_LE;
988 current++;
989 break;
990 case '<':
991 type = TokenType.OP_SHIFT_LEFT;
992 current++;
993 if (current < end && current[0] == '=') {
994 type = TokenType.ASSIGN_SHIFT_LEFT;
995 current++;
997 break;
1000 break;
1001 case '>':
1002 type = TokenType.OP_GT;
1003 current++;
1004 if (current < end && current[0] == '=') {
1005 type = TokenType.OP_GE;
1006 current++;
1008 break;
1009 case '!':
1010 type = TokenType.OP_NEG;
1011 current++;
1012 if (current < end && current[0] == '=') {
1013 type = TokenType.OP_NE;
1014 current++;
1016 break;
1017 case '+':
1018 type = TokenType.PLUS;
1019 current++;
1020 if (current < end) {
1021 switch (current[0]) {
1022 case '=':
1023 type = TokenType.ASSIGN_ADD;
1024 current++;
1025 break;
1026 case '+':
1027 type = TokenType.OP_INC;
1028 current++;
1029 break;
1032 break;
1033 case '-':
1034 type = TokenType.MINUS;
1035 current++;
1036 if (current < end) {
1037 switch (current[0]) {
1038 case '=':
1039 type = TokenType.ASSIGN_SUB;
1040 current++;
1041 break;
1042 case '-':
1043 type = TokenType.OP_DEC;
1044 current++;
1045 break;
1046 case '>':
1047 type = TokenType.OP_PTR;
1048 current++;
1049 break;
1052 break;
1053 case '*':
1054 type = TokenType.STAR;
1055 current++;
1056 if (current < end && current[0] == '=') {
1057 type = TokenType.ASSIGN_MUL;
1058 current++;
1060 break;
1061 case '/':
1062 switch (previous) {
1063 case TokenType.ASSIGN:
1064 case TokenType.COMMA:
1065 case TokenType.MINUS:
1066 case TokenType.OP_AND:
1067 case TokenType.OP_COALESCING:
1068 case TokenType.OP_EQ:
1069 case TokenType.OP_GE:
1070 case TokenType.OP_GT:
1071 case TokenType.OP_LE:
1072 case TokenType.OP_LT:
1073 case TokenType.OP_NE:
1074 case TokenType.OP_NEG:
1075 case TokenType.OP_OR:
1076 case TokenType.OPEN_BRACE:
1077 case TokenType.OPEN_PARENS:
1078 case TokenType.PLUS:
1079 case TokenType.RETURN:
1080 type = TokenType.OPEN_REGEX_LITERAL;
1081 state_stack += State.REGEX_LITERAL;
1082 current++;
1083 break;
1084 default:
1085 type = TokenType.DIV;
1086 current++;
1087 if (current < end && current[0] == '=') {
1088 type = TokenType.ASSIGN_DIV;
1089 current++;
1091 break;
1093 break;
1094 case '%':
1095 type = TokenType.PERCENT;
1096 current++;
1097 if (current < end && current[0] == '=') {
1098 type = TokenType.ASSIGN_PERCENT;
1099 current++;
1101 break;
1102 case '\'':
1103 case '"':
1104 if (begin[0] == '\'') {
1105 type = TokenType.CHARACTER_LITERAL;
1106 } else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
1107 type = TokenType.VERBATIM_STRING_LITERAL;
1108 token_length_in_chars = 6;
1109 current += 3;
1110 while (current < end - 4) {
1111 if (current[0] == '"' && current[1] == '"' && current[2] == '"' && current[3] != '"') {
1112 break;
1113 } else if (current[0] == '\n') {
1114 current++;
1115 line++;
1116 column = 1;
1117 token_length_in_chars = 3;
1118 } else {
1119 unichar u = ((string) current).get_char_validated ((long) (end - current));
1120 if (u != (unichar) (-1)) {
1121 current += u.to_utf8 (null);
1122 token_length_in_chars++;
1123 } else {
1124 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
1128 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
1129 current += 3;
1130 } else {
1131 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"\"\"");
1133 break;
1134 } else {
1135 type = TokenType.STRING_LITERAL;
1137 token_length_in_chars = 2;
1138 current++;
1139 while (current < end && current[0] != begin[0]) {
1140 if (current[0] == '\\') {
1141 current++;
1142 token_length_in_chars++;
1143 if (current >= end) {
1144 break;
1147 switch (current[0]) {
1148 case '\'':
1149 case '"':
1150 case '\\':
1151 case '0':
1152 case 'b':
1153 case 'f':
1154 case 'n':
1155 case 'r':
1156 case 't':
1157 case 'v':
1158 case '$':
1159 current++;
1160 token_length_in_chars++;
1161 break;
1162 case 'u':
1163 // u escape character has four hex digits
1164 current++;
1165 token_length_in_chars++;
1166 int digit_length;
1167 for (digit_length = 0; digit_length < 4 && current < end && current[0].isxdigit (); digit_length++) {
1168 current++;
1169 token_length_in_chars++;
1171 if (digit_length != 4) {
1172 Report.error (get_source_reference (token_length_in_chars), "\\u requires four hex digits");
1174 break;
1175 case 'x':
1176 // hexadecimal escape character requires two hex digits
1177 current++;
1178 token_length_in_chars++;
1179 int digit_length;
1180 for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
1181 current++;
1182 token_length_in_chars++;
1184 if (digit_length < 1) {
1185 Report.error (get_source_reference (token_length_in_chars), "\\x requires at least one hex digit");
1187 break;
1188 default:
1189 Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
1190 break;
1192 } else if (current[0] == '\n') {
1193 current++;
1194 line++;
1195 column = 1;
1196 token_length_in_chars = 1;
1197 } else {
1198 unichar u = ((string) current).get_char_validated ((long) (end - current));
1199 if (u != (unichar) (-1)) {
1200 current += u.to_utf8 (null);
1201 token_length_in_chars++;
1202 } else {
1203 current++;
1204 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
1207 if (current < end && begin[0] == '\'' && current[0] != '\'') {
1208 // multiple characters in single character literal
1209 Report.error (get_source_reference (token_length_in_chars), "invalid character literal");
1212 if (current < end) {
1213 current++;
1214 } else {
1215 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected %c".printf (begin[0]));
1217 break;
1218 default:
1219 unichar u = ((string) current).get_char_validated ((long) (end - current));
1220 if (u != (unichar) (-1)) {
1221 current += u.to_utf8 (null);
1222 Report.error (get_source_reference (0), "syntax error, unexpected character");
1223 } else {
1224 current++;
1225 Report.error (get_source_reference (0), "invalid UTF-8 character");
1227 column++;
1228 return read_token (out token_begin, out token_end);
1232 if (token_length_in_chars < 0) {
1233 column += (int) (current - begin);
1234 } else {
1235 column += token_length_in_chars;
1238 token_end = SourceLocation (current, line, column - 1);
1239 previous = type;
1241 return type;
1244 static bool matches (char* begin, string keyword) {
1245 char* keyword_array = (char*) keyword;
1246 long len = keyword.length;
1247 for (int i = 0; i < len; i++) {
1248 if (begin[i] != keyword_array[i]) {
1249 return false;
1252 return true;
1255 bool pp_whitespace () {
1256 bool found = false;
1257 while (current < end && current[0].isspace () && current[0] != '\n') {
1258 found = true;
1259 current++;
1260 column++;
1262 return found;
1265 void pp_space () {
1266 while (pp_whitespace () || comment ()) {
1270 void pp_directive () {
1271 // hash sign
1272 current++;
1273 column++;
1275 if (line == 1 && column == 2 && current < end && current[0] == '!') {
1276 // hash bang: #!
1277 // skip until end of line or end of file
1278 while (current < end && current[0] != '\n') {
1279 current++;
1281 return;
1284 pp_space ();
1286 char* begin = current;
1287 int len = 0;
1288 while (current < end && current[0].isalnum ()) {
1289 current++;
1290 column++;
1291 len++;
1294 if (len == 2 && matches (begin, "if")) {
1295 parse_pp_if ();
1296 } else if (len == 4 && matches (begin, "elif")) {
1297 parse_pp_elif ();
1298 } else if (len == 4 && matches (begin, "else")) {
1299 parse_pp_else ();
1300 } else if (len == 5 && matches (begin, "endif")) {
1301 parse_pp_endif ();
1302 } else {
1303 Report.error (get_source_reference (-len, len), "syntax error, invalid preprocessing directive");
1306 if (conditional_stack.length > 0
1307 && conditional_stack[conditional_stack.length - 1].skip_section) {
1308 // skip lines until next preprocessing directive
1309 bool bol = false;
1310 while (current < end) {
1311 if (bol && current[0] == '#') {
1312 // go back to begin of line
1313 current -= (column - 1);
1314 column = 1;
1315 return;
1317 if (current[0] == '\n') {
1318 line++;
1319 column = 0;
1320 bol = true;
1321 } else if (!current[0].isspace ()) {
1322 bol = false;
1324 current++;
1325 column++;
1330 void pp_eol () {
1331 pp_space ();
1332 if (current >= end || current[0] != '\n') {
1333 Report.error (get_source_reference (0), "syntax error, expected newline");
1337 void parse_pp_if () {
1338 pp_space ();
1340 bool condition = parse_pp_expression ();
1342 pp_eol ();
1344 conditional_stack += Conditional ();
1346 if (condition && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1347 // condition true => process code within if
1348 conditional_stack[conditional_stack.length - 1].matched = true;
1349 } else {
1350 // skip lines until next preprocessing directive
1351 conditional_stack[conditional_stack.length - 1].skip_section = true;
1355 void parse_pp_elif () {
1356 pp_space ();
1358 bool condition = parse_pp_expression ();
1360 pp_eol ();
1362 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1363 Report.error (get_source_reference (0), "syntax error, unexpected #elif");
1364 return;
1367 if (condition && !conditional_stack[conditional_stack.length - 1].matched
1368 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1369 // condition true => process code within if
1370 conditional_stack[conditional_stack.length - 1].matched = true;
1371 conditional_stack[conditional_stack.length - 1].skip_section = false;
1372 } else {
1373 // skip lines until next preprocessing directive
1374 conditional_stack[conditional_stack.length - 1].skip_section = true;
1378 void parse_pp_else () {
1379 pp_eol ();
1381 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1382 Report.error (get_source_reference (0), "syntax error, unexpected #else");
1383 return;
1386 if (!conditional_stack[conditional_stack.length - 1].matched
1387 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1388 // condition true => process code within if
1389 conditional_stack[conditional_stack.length - 1].matched = true;
1390 conditional_stack[conditional_stack.length - 1].skip_section = false;
1391 } else {
1392 // skip lines until next preprocessing directive
1393 conditional_stack[conditional_stack.length - 1].skip_section = true;
1397 void parse_pp_endif () {
1398 pp_eol ();
1400 if (conditional_stack.length == 0) {
1401 Report.error (get_source_reference (0), "syntax error, unexpected #endif");
1402 return;
1405 conditional_stack.length--;
1408 bool parse_pp_symbol () {
1409 int len = 0;
1410 while (current < end && is_ident_char (current[0])) {
1411 current++;
1412 column++;
1413 len++;
1416 if (len == 0) {
1417 Report.error (get_source_reference (0), "syntax error, expected identifier");
1418 return false;
1421 string identifier = ((string) (current - len)).substring (0, len);
1422 bool defined;
1423 if (identifier == "true") {
1424 defined = true;
1425 } else if (identifier == "false") {
1426 defined = false;
1427 } else {
1428 defined = source_file.context.is_defined (identifier);
1431 return defined;
1434 bool parse_pp_primary_expression () {
1435 if (current >= end) {
1436 Report.error (get_source_reference (0), "syntax error, expected identifier");
1437 } else if (is_ident_char (current[0])) {
1438 return parse_pp_symbol ();
1439 } else if (current[0] == '(') {
1440 current++;
1441 column++;
1442 pp_space ();
1443 bool result = parse_pp_expression ();
1444 pp_space ();
1445 if (current < end && current[0] == ')') {
1446 current++;
1447 column++;
1448 } else {
1449 Report.error (get_source_reference (0), "syntax error, expected `)'");
1451 return result;
1452 } else {
1453 Report.error (get_source_reference (0), "syntax error, expected identifier");
1455 return false;
1458 bool parse_pp_unary_expression () {
1459 if (current < end && current[0] == '!') {
1460 current++;
1461 column++;
1462 pp_space ();
1463 return !parse_pp_unary_expression ();
1466 return parse_pp_primary_expression ();
1469 bool parse_pp_equality_expression () {
1470 bool left = parse_pp_unary_expression ();
1471 pp_space ();
1472 while (true) {
1473 if (current < end - 1 && current[0] == '=' && current[1] == '=') {
1474 current += 2;
1475 column += 2;
1476 pp_space ();
1477 bool right = parse_pp_unary_expression ();
1478 left = (left == right);
1479 } else if (current < end - 1 && current[0] == '!' && current[1] == '=') {
1480 current += 2;
1481 column += 2;
1482 pp_space ();
1483 bool right = parse_pp_unary_expression ();
1484 left = (left != right);
1485 } else {
1486 break;
1489 return left;
1492 bool parse_pp_and_expression () {
1493 bool left = parse_pp_equality_expression ();
1494 pp_space ();
1495 while (current < end - 1 && current[0] == '&' && current[1] == '&') {
1496 current += 2;
1497 column += 2;
1498 pp_space ();
1499 bool right = parse_pp_equality_expression ();
1500 left = left && right;
1502 return left;
1505 bool parse_pp_or_expression () {
1506 bool left = parse_pp_and_expression ();
1507 pp_space ();
1508 while (current < end - 1 && current[0] == '|' && current[1] == '|') {
1509 current += 2;
1510 column += 2;
1511 pp_space ();
1512 bool right = parse_pp_and_expression ();
1513 left = left || right;
1515 return left;
1518 bool parse_pp_expression () {
1519 return parse_pp_or_expression ();
1522 bool whitespace () {
1523 bool found = false;
1524 bool bol = (column == 1);
1525 while (current < end && current[0].isspace ()) {
1526 if (current[0] == '\n') {
1527 line++;
1528 column = 0;
1529 bol = true;
1531 found = true;
1532 current++;
1533 column++;
1535 if (bol && current < end && current[0] == '#') {
1536 pp_directive ();
1537 return true;
1539 return found;
1542 bool comment (bool file_comment = false) {
1543 if (current == null
1544 || current > end - 2
1545 || current[0] != '/'
1546 || (current[1] != '/' && current[1] != '*')) {
1547 return false;
1550 if (current[1] == '/') {
1551 SourceReference source_reference = null;
1552 if (file_comment) {
1553 source_reference = get_source_reference (0);
1556 // single-line comment
1557 current += 2;
1558 char* begin = current;
1560 // skip until end of line or end of file
1561 while (current < end && current[0] != '\n') {
1562 current++;
1565 if (source_reference != null) {
1566 push_comment (((string) begin).substring (0, (long) (current - begin)), source_reference, file_comment);
1568 } else {
1569 SourceReference source_reference = null;
1571 if (file_comment && current[2] == '*') {
1572 return false;
1575 if (current[2] == '*' || file_comment) {
1576 source_reference = get_source_reference (0);
1579 current += 2;
1580 column += 2;
1582 char* begin = current;
1583 while (current < end - 1
1584 && (current[0] != '*' || current[1] != '/')) {
1585 if (current[0] == '\n') {
1586 line++;
1587 column = 0;
1589 current++;
1590 column++;
1593 if (current == end - 1) {
1594 Report.error (get_source_reference (0), "syntax error, expected */");
1595 return true;
1598 if (source_reference != null) {
1599 push_comment (((string) begin).substring (0, (long) (current - begin)), source_reference, file_comment);
1602 current += 2;
1603 column += 2;
1606 return true;
1609 void space () {
1610 while (whitespace () || comment ()) {
1614 public void parse_file_comments () {
1615 while (whitespace () || comment (true)) {
1619 void push_comment (string comment_item, SourceReference source_reference, bool file_comment) {
1620 if (comment_item[0] == '*') {
1621 if (_comment != null) {
1622 // extra doc comment, add it to source file comments
1623 source_file.add_comment (_comment);
1625 _comment = new Comment (comment_item, source_reference);
1628 if (file_comment) {
1629 source_file.add_comment (new Comment (comment_item, source_reference));
1630 _comment = null;
1635 * Clears and returns the content of the comment stack.
1637 * @return saved comment
1639 public Comment? pop_comment () {
1640 if (_comment == null) {
1641 return null;
1644 var comment = _comment;
1645 _comment = null;
1646 return comment;