vapi: Update GIR-based bindings
[vala-gnome.git] / vala / valascanner.vala
blob8b50a59d4eb94c90b80796c7a316b886f334e867
1 /* valascanner.vala
3 * Copyright (C) 2008-2012 Jürg Billeter
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 * Author:
20 * Jürg Billeter <j@bitron.ch>
21 * Jukka-Pekka Iivonen <jp0409@jippii.fi>
24 using GLib;
26 /**
27 * Lexical scanner for Vala source files.
29 public class Vala.Scanner {
30 public SourceFile source_file { get; private set; }
32 TokenType previous;
33 char* current;
34 char* end;
36 int line;
37 int column;
39 Comment _comment;
41 Conditional[] conditional_stack;
43 struct Conditional {
44 public bool matched;
45 public bool else_found;
46 public bool skip_section;
49 State[] state_stack;
51 enum State {
52 PARENS,
53 BRACE,
54 BRACKET,
55 TEMPLATE,
56 TEMPLATE_PART,
57 REGEX_LITERAL
60 public Scanner (SourceFile source_file) {
61 this.source_file = source_file;
63 char* begin = source_file.get_mapped_contents ();
64 end = begin + source_file.get_mapped_length ();
66 current = begin;
68 line = 1;
69 column = 1;
72 public void seek (SourceLocation location) {
73 current = location.pos;
74 line = location.line;
75 column = location.column;
77 conditional_stack = null;
78 state_stack = null;
81 bool in_template () {
82 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE);
85 bool in_template_part () {
86 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE_PART);
89 bool in_regex_literal () {
90 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
93 bool is_ident_char (char c) {
94 return (c.isalnum () || c == '_');
97 SourceReference get_source_reference (int offset, int length = 0) {
98 return new SourceReference (source_file, SourceLocation (current, line, column + offset), SourceLocation (current + length, line, column + offset + length));
101 public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
102 TokenType type;
103 char* begin = current;
104 token_begin = SourceLocation (begin, line, column);
106 int token_length_in_chars = -1;
108 if (current >= end) {
109 type = TokenType.EOF;
110 } else {
111 switch (current[0]) {
112 case '/':
113 type = TokenType.CLOSE_REGEX_LITERAL;
114 current++;
115 state_stack.length--;
116 var fl_i = false;
117 var fl_s = false;
118 var fl_m = false;
119 var fl_x = false;
120 while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
121 switch (current[0]) {
122 case 'i':
123 if (fl_i) {
124 Report.error (get_source_reference (token_length_in_chars), "modifier 'i' used more than once");
126 fl_i = true;
127 break;
128 case 's':
129 if (fl_s) {
130 Report.error (get_source_reference (token_length_in_chars), "modifier 's' used more than once");
132 fl_s = true;
133 break;
134 case 'm':
135 if (fl_m) {
136 Report.error (get_source_reference (token_length_in_chars), "modifier 'm' used more than once");
138 fl_m = true;
139 break;
140 case 'x':
141 if (fl_x) {
142 Report.error (get_source_reference (token_length_in_chars), "modifier 'x' used more than once");
144 fl_x = true;
145 break;
147 current++;
148 token_length_in_chars++;
150 break;
151 default:
152 type = TokenType.REGEX_LITERAL;
153 token_length_in_chars = 0;
154 while (current < end && current[0] != '/') {
155 if (current[0] == '\\') {
156 current++;
157 token_length_in_chars++;
158 if (current >= end) {
159 break;
162 switch (current[0]) {
163 case '\'':
164 case '"':
165 case '\\':
166 case '/':
167 case '^':
168 case '$':
169 case '.':
170 case '[':
171 case ']':
172 case '{':
173 case '}':
174 case '(':
175 case ')':
176 case '?':
177 case '*':
178 case '+':
179 case '-':
180 case '#':
181 case '&':
182 case '~':
183 case ':':
184 case ';':
185 case '<':
186 case '>':
187 case '|':
188 case '%':
189 case '=':
190 case '@':
191 case '0':
192 case 'b':
193 case 'B':
194 case 'f':
195 case 'n':
196 case 'r':
197 case 't':
198 case 'v':
199 case 'a':
200 case 'A':
201 case 'p':
202 case 'P':
203 case 'e':
204 case 'd':
205 case 'D':
206 case 's':
207 case 'S':
208 case 'w':
209 case 'W':
210 case 'G':
211 case 'z':
212 case 'Z':
213 current++;
214 token_length_in_chars++;
215 break;
216 case 'u':
217 // u escape character has four hex digits
218 current++;
219 token_length_in_chars++;
220 int digit_length;
221 for (digit_length = 0; digit_length < 4 && current < end && current[0].isxdigit (); digit_length++) {
222 current++;
223 token_length_in_chars++;
225 if (digit_length != 4) {
226 Report.error (get_source_reference (token_length_in_chars), "\\u requires four hex digits");
228 break;
229 case 'x':
230 // hexadecimal escape character requires two hex digits
231 current++;
232 token_length_in_chars++;
233 int digit_length;
234 for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
235 current++;
236 token_length_in_chars++;
238 if (digit_length < 1) {
239 Report.error (get_source_reference (token_length_in_chars), "\\x requires at least one hex digit");
241 break;
242 default:
243 Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
244 break;
246 } else if (current[0] == '\n') {
247 break;
248 } else {
249 unichar u = ((string) current).get_char_validated ((long) (end - current));
250 if (u != (unichar) (-1)) {
251 current += u.to_utf8 (null);
252 token_length_in_chars++;
253 } else {
254 current++;
255 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
259 if (current >= end || current[0] == '\n') {
260 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
261 state_stack.length--;
262 return read_token (out token_begin, out token_end);
264 break;
268 if (token_length_in_chars < 0) {
269 column += (int) (current - begin);
270 } else {
271 column += token_length_in_chars;
274 token_end = SourceLocation (current, line, column - 1);
276 return type;
279 public static TokenType get_identifier_or_keyword (char* begin, int len) {
280 switch (len) {
281 case 2:
282 switch (begin[0]) {
283 case 'a':
284 if (matches (begin, "as")) return TokenType.AS;
285 break;
286 case 'd':
287 if (matches (begin, "do")) return TokenType.DO;
288 break;
289 case 'i':
290 switch (begin[1]) {
291 case 'f':
292 return TokenType.IF;
293 case 'n':
294 return TokenType.IN;
295 case 's':
296 return TokenType.IS;
298 break;
300 break;
301 case 3:
302 switch (begin[0]) {
303 case 'f':
304 if (matches (begin, "for")) return TokenType.FOR;
305 break;
306 case 'g':
307 if (matches (begin, "get")) return TokenType.GET;
308 break;
309 case 'n':
310 if (matches (begin, "new")) return TokenType.NEW;
311 break;
312 case 'o':
313 if (matches (begin, "out")) return TokenType.OUT;
314 break;
315 case 'r':
316 if (matches (begin, "ref")) return TokenType.REF;
317 break;
318 case 's':
319 if (matches (begin, "set")) return TokenType.SET;
320 break;
321 case 't':
322 if (matches (begin, "try")) return TokenType.TRY;
323 break;
324 case 'v':
325 if (matches (begin, "var")) return TokenType.VAR;
326 break;
328 break;
329 case 4:
330 switch (begin[0]) {
331 case 'b':
332 if (matches (begin, "base")) return TokenType.BASE;
333 break;
334 case 'c':
335 if (matches (begin, "case")) return TokenType.CASE;
336 break;
337 case 'e':
338 switch (begin[1]) {
339 case 'l':
340 if (matches (begin, "else")) return TokenType.ELSE;
341 break;
342 case 'n':
343 if (matches (begin, "enum")) return TokenType.ENUM;
344 break;
346 break;
347 case 'l':
348 if (matches (begin, "lock")) return TokenType.LOCK;
349 break;
350 case 'n':
351 if (matches (begin, "null")) return TokenType.NULL;
352 break;
353 case 't':
354 switch (begin[1]) {
355 case 'h':
356 if (matches (begin, "this")) return TokenType.THIS;
357 break;
358 case 'r':
359 if (matches (begin, "true")) return TokenType.TRUE;
360 break;
362 break;
363 case 'v':
364 if (matches (begin, "void")) return TokenType.VOID;
365 break;
366 case 'w':
367 if (matches (begin, "weak")) return TokenType.WEAK;
368 break;
370 break;
371 case 5:
372 switch (begin[0]) {
373 case 'a':
374 if (matches (begin, "async")) return TokenType.ASYNC;
375 break;
376 case 'b':
377 if (matches (begin, "break")) return TokenType.BREAK;
378 break;
379 case 'c':
380 switch (begin[1]) {
381 case 'a':
382 if (matches (begin, "catch")) return TokenType.CATCH;
383 break;
384 case 'l':
385 if (matches (begin, "class")) return TokenType.CLASS;
386 break;
387 case 'o':
388 if (matches (begin, "const")) return TokenType.CONST;
389 break;
391 break;
392 case 'f':
393 if (matches (begin, "false")) return TokenType.FALSE;
394 break;
395 case 'o':
396 if (matches (begin, "owned")) return TokenType.OWNED;
397 break;
398 case 't':
399 if (matches (begin, "throw")) return TokenType.THROW;
400 break;
401 case 'u':
402 if (matches (begin, "using")) return TokenType.USING;
403 break;
404 case 'w':
405 if (matches (begin, "while")) return TokenType.WHILE;
406 break;
407 case 'y':
408 if (matches (begin, "yield")) return TokenType.YIELD;
409 break;
411 break;
412 case 6:
413 switch (begin[0]) {
414 case 'd':
415 if (matches (begin, "delete")) return TokenType.DELETE;
416 break;
417 case 'e':
418 if (matches (begin, "extern")) return TokenType.EXTERN;
419 break;
420 case 'i':
421 if (matches (begin, "inline")) return TokenType.INLINE;
422 break;
423 case 'p':
424 switch (begin[1]) {
425 case 'a':
426 if (matches (begin, "params")) return TokenType.PARAMS;
427 break;
428 case 'u':
429 if (matches (begin, "public")) return TokenType.PUBLIC;
430 break;
432 break;
433 case 'r':
434 if (matches (begin, "return")) return TokenType.RETURN;
435 break;
436 case 's':
437 switch (begin[1]) {
438 case 'e':
439 if (matches (begin, "sealed")) return TokenType.SEALED;
440 break;
441 case 'i':
442 switch (begin[2]) {
443 case 'g':
444 if (matches (begin, "signal")) return TokenType.SIGNAL;
445 break;
446 case 'z':
447 if (matches (begin, "sizeof")) return TokenType.SIZEOF;
448 break;
450 break;
451 case 't':
452 switch (begin[2]) {
453 case 'a':
454 if (matches (begin, "static")) return TokenType.STATIC;
455 break;
456 case 'r':
457 if (matches (begin, "struct")) return TokenType.STRUCT;
458 break;
460 break;
461 case 'w':
462 if (matches (begin, "switch")) return TokenType.SWITCH;
463 break;
465 break;
466 case 't':
467 switch (begin[1]) {
468 case 'h':
469 if (matches (begin, "throws")) return TokenType.THROWS;
470 break;
471 case 'y':
472 if (matches (begin, "typeof")) return TokenType.TYPEOF;
473 break;
475 break;
477 break;
478 case 7:
479 switch (begin[0]) {
480 case 'd':
481 switch (begin[1]) {
482 case 'e':
483 if (matches (begin, "default")) return TokenType.DEFAULT;
484 break;
485 case 'y':
486 if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
487 break;
489 break;
490 case 'e':
491 if (matches (begin, "ensures")) return TokenType.ENSURES;
492 break;
493 case 'f':
494 switch (begin[1]) {
495 case 'i':
496 if (matches (begin, "finally")) return TokenType.FINALLY;
497 break;
498 case 'o':
499 if (matches (begin, "foreach")) return TokenType.FOREACH;
500 break;
502 break;
503 case 'p':
504 if (matches (begin, "private")) return TokenType.PRIVATE;
505 break;
506 case 'u':
507 if (matches (begin, "unowned")) return TokenType.UNOWNED;
508 break;
509 case 'v':
510 if (matches (begin, "virtual")) return TokenType.VIRTUAL;
511 break;
513 break;
514 case 8:
515 switch (begin[0]) {
516 case 'a':
517 if (matches (begin, "abstract")) return TokenType.ABSTRACT;
518 break;
519 case 'c':
520 if (matches (begin, "continue")) return TokenType.CONTINUE;
521 break;
522 case 'd':
523 if (matches (begin, "delegate")) return TokenType.DELEGATE;
524 break;
525 case 'i':
526 if (matches (begin, "internal")) return TokenType.INTERNAL;
527 break;
528 case 'o':
529 if (matches (begin, "override")) return TokenType.OVERRIDE;
530 break;
531 case 'r':
532 if (matches (begin, "requires")) return TokenType.REQUIRES;
533 break;
534 case 'v':
535 if (matches (begin, "volatile")) return TokenType.VOLATILE;
536 break;
538 break;
539 case 9:
540 switch (begin[0]) {
541 case 'c':
542 if (matches (begin, "construct")) return TokenType.CONSTRUCT;
543 break;
544 case 'i':
545 if (matches (begin, "interface")) return TokenType.INTERFACE;
546 break;
547 case 'n':
548 if (matches (begin, "namespace")) return TokenType.NAMESPACE;
549 break;
550 case 'p':
551 if (matches (begin, "protected")) return TokenType.PROTECTED;
552 break;
554 break;
555 case 11:
556 if (matches (begin, "errordomain")) return TokenType.ERRORDOMAIN;
557 break;
559 return TokenType.IDENTIFIER;
562 TokenType read_number () {
563 var type = TokenType.INTEGER_LITERAL;
565 // integer part
566 if (current < end - 2 && current[0] == '0'
567 && current[1] == 'x' && current[2].isxdigit ()) {
568 // hexadecimal integer literal
569 current += 2;
570 while (current < end && current[0].isxdigit ()) {
571 current++;
573 } else {
574 // decimal number
575 while (current < end && current[0].isdigit ()) {
576 current++;
580 // fractional part
581 if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
582 type = TokenType.REAL_LITERAL;
583 current++;
584 while (current < end && current[0].isdigit ()) {
585 current++;
589 // exponent part
590 if (current < end && current[0].tolower () == 'e') {
591 type = TokenType.REAL_LITERAL;
592 current++;
593 if (current < end && (current[0] == '+' || current[0] == '-')) {
594 current++;
596 while (current < end && current[0].isdigit ()) {
597 current++;
601 // type suffix
602 if (current < end) {
603 bool real_literal = (type == TokenType.REAL_LITERAL);
605 switch (current[0]) {
606 case 'l':
607 case 'L':
608 if (type == TokenType.INTEGER_LITERAL) {
609 current++;
610 if (current < end && current[0].tolower () == 'l') {
611 current++;
614 break;
615 case 'u':
616 case 'U':
617 if (type == TokenType.INTEGER_LITERAL) {
618 current++;
619 if (current < end && current[0].tolower () == 'l') {
620 current++;
621 if (current < end && current[0].tolower () == 'l') {
622 current++;
626 break;
627 case 'f':
628 case 'F':
629 case 'd':
630 case 'D':
631 type = TokenType.REAL_LITERAL;
632 current++;
633 break;
636 if (!real_literal && is_ident_char (current[0])) {
637 // allow identifiers to start with a digit
638 // as long as they contain at least one char
639 while (current < end && is_ident_char (current[0])) {
640 current++;
642 type = TokenType.IDENTIFIER;
646 return type;
649 public TokenType read_template_token (out SourceLocation token_begin, out SourceLocation token_end) {
650 TokenType type;
651 char* begin = current;
652 token_begin = SourceLocation (begin, line, column);
654 int token_length_in_chars = -1;
656 if (current >= end) {
657 type = TokenType.EOF;
658 } else {
659 switch (current[0]) {
660 case '"':
661 type = TokenType.CLOSE_TEMPLATE;
662 current++;
663 state_stack.length--;
664 break;
665 case '$':
666 token_begin.pos++; // $ is not part of following token
667 current++;
668 if (current[0].isalpha () || current[0] == '_') {
669 int len = 0;
670 while (current < end && is_ident_char (current[0])) {
671 current++;
672 len++;
674 type = TokenType.IDENTIFIER;
675 state_stack += State.TEMPLATE_PART;
676 } else if (current[0] == '(') {
677 current++;
678 column += 2;
679 state_stack += State.PARENS;
680 return read_token (out token_begin, out token_end);
681 } else if (current[0] == '$') {
682 type = TokenType.TEMPLATE_STRING_LITERAL;
683 current++;
684 state_stack += State.TEMPLATE_PART;
685 } else {
686 Report.error (get_source_reference (1), "unexpected character");
687 return read_template_token (out token_begin, out token_end);
689 break;
690 default:
691 type = TokenType.TEMPLATE_STRING_LITERAL;
692 token_length_in_chars = 0;
693 while (current < end && current[0] != '"' && current[0] != '$') {
694 if (current[0] == '\\') {
695 current++;
696 token_length_in_chars++;
697 if (current >= end) {
698 break;
701 switch (current[0]) {
702 case '\'':
703 case '"':
704 case '\\':
705 case '0':
706 case 'b':
707 case 'f':
708 case 'n':
709 case 'r':
710 case 't':
711 case 'v':
712 current++;
713 token_length_in_chars++;
714 break;
715 case 'u':
716 // u escape character has four hex digits
717 current++;
718 token_length_in_chars++;
719 int digit_length;
720 for (digit_length = 0; digit_length < 4 && current < end && current[0].isxdigit (); digit_length++) {
721 current++;
722 token_length_in_chars++;
724 if (digit_length != 4) {
725 Report.error (get_source_reference (token_length_in_chars), "\\u requires four hex digits");
727 break;
728 case 'x':
729 // hexadecimal escape character requires two hex digits
730 current++;
731 token_length_in_chars++;
732 int digit_length;
733 for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
734 current++;
735 token_length_in_chars++;
737 if (digit_length < 1) {
738 Report.error (get_source_reference (token_length_in_chars), "\\x requires at least one hex digit");
740 break;
741 default:
742 Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
743 break;
745 } else if (current[0] == '\n') {
746 current++;
747 line++;
748 column = 1;
749 token_length_in_chars = 1;
750 } else {
751 unichar u = ((string) current).get_char_validated ((long) (end - current));
752 if (u != (unichar) (-1)) {
753 current += u.to_utf8 (null);
754 token_length_in_chars++;
755 } else {
756 current++;
757 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
761 if (current >= end) {
762 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
763 state_stack.length--;
764 return read_token (out token_begin, out token_end);
766 state_stack += State.TEMPLATE_PART;
767 break;
771 if (token_length_in_chars < 0) {
772 column += (int) (current - begin);
773 } else {
774 column += token_length_in_chars;
777 token_end = SourceLocation (current, line, column - 1);
779 return type;
782 public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
783 if (in_template ()) {
784 return read_template_token (out token_begin, out token_end);
785 } else if (in_template_part ()) {
786 state_stack.length--;
788 token_begin = SourceLocation (current, line, column);
789 token_end = SourceLocation (current, line, column - 1);
791 return TokenType.COMMA;
792 } else if (in_regex_literal ()) {
793 return read_regex_token (out token_begin, out token_end);
796 space ();
798 TokenType type;
799 char* begin = current;
800 token_begin = SourceLocation (begin, line, column);
802 int token_length_in_chars = -1;
804 if (current >= end) {
805 type = TokenType.EOF;
806 } else if (current[0].isalpha () || current[0] == '_') {
807 int len = 0;
808 while (current < end && is_ident_char (current[0])) {
809 current++;
810 len++;
812 type = get_identifier_or_keyword (begin, len);
813 } else if (current[0] == '@') {
814 if (current < end - 1 && current[1] == '"') {
815 type = TokenType.OPEN_TEMPLATE;
816 current += 2;
817 state_stack += State.TEMPLATE;
818 } else {
819 token_begin.pos++; // @ is not part of the identifier
820 current++;
821 int len = 0;
822 while (current < end && is_ident_char (current[0])) {
823 current++;
824 len++;
826 type = TokenType.IDENTIFIER;
828 } else if (current[0].isdigit ()) {
829 type = read_number ();
830 } else {
831 switch (current[0]) {
832 case '{':
833 type = TokenType.OPEN_BRACE;
834 current++;
835 state_stack += State.BRACE;
836 break;
837 case '}':
838 type = TokenType.CLOSE_BRACE;
839 current++;
840 if (state_stack.length > 0) {
841 state_stack.length--;
843 break;
844 case '(':
845 type = TokenType.OPEN_PARENS;
846 current++;
847 state_stack += State.PARENS;
848 break;
849 case ')':
850 type = TokenType.CLOSE_PARENS;
851 current++;
852 if (state_stack.length > 0) {
853 state_stack.length--;
855 if (in_template ()) {
856 type = TokenType.COMMA;
858 break;
859 case '[':
860 type = TokenType.OPEN_BRACKET;
861 current++;
862 state_stack += State.BRACKET;
863 break;
864 case ']':
865 type = TokenType.CLOSE_BRACKET;
866 current++;
867 if (state_stack.length > 0) {
868 state_stack.length--;
870 break;
871 case '.':
872 type = TokenType.DOT;
873 current++;
874 if (current < end - 1) {
875 if (current[0] == '.' && current[1] == '.') {
876 type = TokenType.ELLIPSIS;
877 current += 2;
880 break;
881 case ':':
882 type = TokenType.COLON;
883 current++;
884 if (current < end && current[0] == ':') {
885 type = TokenType.DOUBLE_COLON;
886 current++;
888 break;
889 case ',':
890 type = TokenType.COMMA;
891 current++;
892 break;
893 case ';':
894 type = TokenType.SEMICOLON;
895 current++;
896 break;
897 case '#':
898 type = TokenType.HASH;
899 current++;
900 break;
901 case '?':
902 type = TokenType.INTERR;
903 current++;
904 if (current < end && current[0] == '?') {
905 type = TokenType.OP_COALESCING;
906 current++;
908 break;
909 case '|':
910 type = TokenType.BITWISE_OR;
911 current++;
912 if (current < end) {
913 switch (current[0]) {
914 case '=':
915 type = TokenType.ASSIGN_BITWISE_OR;
916 current++;
917 break;
918 case '|':
919 type = TokenType.OP_OR;
920 current++;
921 break;
924 break;
925 case '&':
926 type = TokenType.BITWISE_AND;
927 current++;
928 if (current < end) {
929 switch (current[0]) {
930 case '=':
931 type = TokenType.ASSIGN_BITWISE_AND;
932 current++;
933 break;
934 case '&':
935 type = TokenType.OP_AND;
936 current++;
937 break;
940 break;
941 case '^':
942 type = TokenType.CARRET;
943 current++;
944 if (current < end && current[0] == '=') {
945 type = TokenType.ASSIGN_BITWISE_XOR;
946 current++;
948 break;
949 case '~':
950 type = TokenType.TILDE;
951 current++;
952 break;
953 case '=':
954 type = TokenType.ASSIGN;
955 current++;
956 if (current < end) {
957 switch (current[0]) {
958 case '=':
959 type = TokenType.OP_EQ;
960 current++;
961 break;
962 case '>':
963 type = TokenType.LAMBDA;
964 current++;
965 break;
968 break;
969 case '<':
970 type = TokenType.OP_LT;
971 current++;
972 if (current < end) {
973 switch (current[0]) {
974 case '=':
975 type = TokenType.OP_LE;
976 current++;
977 break;
978 case '<':
979 type = TokenType.OP_SHIFT_LEFT;
980 current++;
981 if (current < end && current[0] == '=') {
982 type = TokenType.ASSIGN_SHIFT_LEFT;
983 current++;
985 break;
988 break;
989 case '>':
990 type = TokenType.OP_GT;
991 current++;
992 if (current < end && current[0] == '=') {
993 type = TokenType.OP_GE;
994 current++;
996 break;
997 case '!':
998 type = TokenType.OP_NEG;
999 current++;
1000 if (current < end && current[0] == '=') {
1001 type = TokenType.OP_NE;
1002 current++;
1004 break;
1005 case '+':
1006 type = TokenType.PLUS;
1007 current++;
1008 if (current < end) {
1009 switch (current[0]) {
1010 case '=':
1011 type = TokenType.ASSIGN_ADD;
1012 current++;
1013 break;
1014 case '+':
1015 type = TokenType.OP_INC;
1016 current++;
1017 break;
1020 break;
1021 case '-':
1022 type = TokenType.MINUS;
1023 current++;
1024 if (current < end) {
1025 switch (current[0]) {
1026 case '=':
1027 type = TokenType.ASSIGN_SUB;
1028 current++;
1029 break;
1030 case '-':
1031 type = TokenType.OP_DEC;
1032 current++;
1033 break;
1034 case '>':
1035 type = TokenType.OP_PTR;
1036 current++;
1037 break;
1040 break;
1041 case '*':
1042 type = TokenType.STAR;
1043 current++;
1044 if (current < end && current[0] == '=') {
1045 type = TokenType.ASSIGN_MUL;
1046 current++;
1048 break;
1049 case '/':
1050 switch (previous) {
1051 case TokenType.ASSIGN:
1052 case TokenType.COMMA:
1053 case TokenType.MINUS:
1054 case TokenType.OP_AND:
1055 case TokenType.OP_COALESCING:
1056 case TokenType.OP_EQ:
1057 case TokenType.OP_GE:
1058 case TokenType.OP_GT:
1059 case TokenType.OP_LE:
1060 case TokenType.OP_LT:
1061 case TokenType.OP_NE:
1062 case TokenType.OP_NEG:
1063 case TokenType.OP_OR:
1064 case TokenType.OPEN_BRACE:
1065 case TokenType.OPEN_PARENS:
1066 case TokenType.PLUS:
1067 case TokenType.RETURN:
1068 type = TokenType.OPEN_REGEX_LITERAL;
1069 state_stack += State.REGEX_LITERAL;
1070 current++;
1071 break;
1072 default:
1073 type = TokenType.DIV;
1074 current++;
1075 if (current < end && current[0] == '=') {
1076 type = TokenType.ASSIGN_DIV;
1077 current++;
1079 break;
1081 break;
1082 case '%':
1083 type = TokenType.PERCENT;
1084 current++;
1085 if (current < end && current[0] == '=') {
1086 type = TokenType.ASSIGN_PERCENT;
1087 current++;
1089 break;
1090 case '\'':
1091 case '"':
1092 if (begin[0] == '\'') {
1093 type = TokenType.CHARACTER_LITERAL;
1094 } else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
1095 type = TokenType.VERBATIM_STRING_LITERAL;
1096 token_length_in_chars = 6;
1097 current += 3;
1098 while (current < end - 4) {
1099 if (current[0] == '"' && current[1] == '"' && current[2] == '"' && current[3] != '"') {
1100 break;
1101 } else if (current[0] == '\n') {
1102 current++;
1103 line++;
1104 column = 1;
1105 token_length_in_chars = 3;
1106 } else {
1107 unichar u = ((string) current).get_char_validated ((long) (end - current));
1108 if (u != (unichar) (-1)) {
1109 current += u.to_utf8 (null);
1110 token_length_in_chars++;
1111 } else {
1112 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
1116 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
1117 current += 3;
1118 } else {
1119 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"\"\"");
1121 break;
1122 } else {
1123 type = TokenType.STRING_LITERAL;
1125 token_length_in_chars = 2;
1126 current++;
1127 while (current < end && current[0] != begin[0]) {
1128 if (current[0] == '\\') {
1129 current++;
1130 token_length_in_chars++;
1131 if (current >= end) {
1132 break;
1135 switch (current[0]) {
1136 case '\'':
1137 case '"':
1138 case '\\':
1139 case '0':
1140 case 'b':
1141 case 'f':
1142 case 'n':
1143 case 'r':
1144 case 't':
1145 case 'v':
1146 case '$':
1147 current++;
1148 token_length_in_chars++;
1149 break;
1150 case 'u':
1151 // u escape character has four hex digits
1152 current++;
1153 token_length_in_chars++;
1154 int digit_length;
1155 for (digit_length = 0; digit_length < 4 && current < end && current[0].isxdigit (); digit_length++) {
1156 current++;
1157 token_length_in_chars++;
1159 if (digit_length != 4) {
1160 Report.error (get_source_reference (token_length_in_chars), "\\u requires four hex digits");
1162 break;
1163 case 'x':
1164 // hexadecimal escape character requires two hex digits
1165 current++;
1166 token_length_in_chars++;
1167 int digit_length;
1168 for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
1169 current++;
1170 token_length_in_chars++;
1172 if (digit_length < 1) {
1173 Report.error (get_source_reference (token_length_in_chars), "\\x requires at least one hex digit");
1175 break;
1176 default:
1177 Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
1178 break;
1180 } else if (current[0] == '\n') {
1181 current++;
1182 line++;
1183 column = 1;
1184 token_length_in_chars = 1;
1185 } else {
1186 unichar u = ((string) current).get_char_validated ((long) (end - current));
1187 if (u != (unichar) (-1)) {
1188 current += u.to_utf8 (null);
1189 token_length_in_chars++;
1190 } else {
1191 current++;
1192 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
1195 if (current < end && begin[0] == '\'' && current[0] != '\'') {
1196 // multiple characters in single character literal
1197 Report.error (get_source_reference (token_length_in_chars), "invalid character literal");
1200 if (current < end) {
1201 current++;
1202 } else {
1203 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected %c".printf (begin[0]));
1205 break;
1206 default:
1207 unichar u = ((string) current).get_char_validated ((long) (end - current));
1208 if (u != (unichar) (-1)) {
1209 current += u.to_utf8 (null);
1210 Report.error (get_source_reference (0), "syntax error, unexpected character");
1211 } else {
1212 current++;
1213 Report.error (get_source_reference (0), "invalid UTF-8 character");
1215 column++;
1216 return read_token (out token_begin, out token_end);
1220 if (token_length_in_chars < 0) {
1221 column += (int) (current - begin);
1222 } else {
1223 column += token_length_in_chars;
1226 token_end = SourceLocation (current, line, column - 1);
1227 previous = type;
1229 return type;
1232 static bool matches (char* begin, string keyword) {
1233 char* keyword_array = (char*) keyword;
1234 long len = keyword.length;
1235 for (int i = 0; i < len; i++) {
1236 if (begin[i] != keyword_array[i]) {
1237 return false;
1240 return true;
1243 bool pp_whitespace () {
1244 bool found = false;
1245 while (current < end && current[0].isspace () && current[0] != '\n') {
1246 found = true;
1247 current++;
1248 column++;
1250 return found;
1253 void pp_space () {
1254 while (pp_whitespace () || comment ()) {
1258 void pp_directive () {
1259 // hash sign
1260 current++;
1261 column++;
1263 if (line == 1 && column == 2 && current < end && current[0] == '!') {
1264 // hash bang: #!
1265 // skip until end of line or end of file
1266 while (current < end && current[0] != '\n') {
1267 current++;
1269 return;
1272 pp_space ();
1274 char* begin = current;
1275 int len = 0;
1276 while (current < end && current[0].isalnum ()) {
1277 current++;
1278 column++;
1279 len++;
1282 if (len == 2 && matches (begin, "if")) {
1283 parse_pp_if ();
1284 } else if (len == 4 && matches (begin, "elif")) {
1285 parse_pp_elif ();
1286 } else if (len == 4 && matches (begin, "else")) {
1287 parse_pp_else ();
1288 } else if (len == 5 && matches (begin, "endif")) {
1289 parse_pp_endif ();
1290 } else {
1291 Report.error (get_source_reference (-len, len), "syntax error, invalid preprocessing directive");
1294 if (conditional_stack.length > 0
1295 && conditional_stack[conditional_stack.length - 1].skip_section) {
1296 // skip lines until next preprocessing directive
1297 bool bol = false;
1298 while (current < end) {
1299 if (bol && current[0] == '#') {
1300 // go back to begin of line
1301 current -= (column - 1);
1302 column = 1;
1303 return;
1305 if (current[0] == '\n') {
1306 line++;
1307 column = 0;
1308 bol = true;
1309 } else if (!current[0].isspace ()) {
1310 bol = false;
1312 current++;
1313 column++;
1318 void pp_eol () {
1319 pp_space ();
1320 if (current >= end || current[0] != '\n') {
1321 Report.error (get_source_reference (0), "syntax error, expected newline");
1325 void parse_pp_if () {
1326 pp_space ();
1328 bool condition = parse_pp_expression ();
1330 pp_eol ();
1332 conditional_stack += Conditional ();
1334 if (condition && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1335 // condition true => process code within if
1336 conditional_stack[conditional_stack.length - 1].matched = true;
1337 } else {
1338 // skip lines until next preprocessing directive
1339 conditional_stack[conditional_stack.length - 1].skip_section = true;
1343 void parse_pp_elif () {
1344 pp_space ();
1346 bool condition = parse_pp_expression ();
1348 pp_eol ();
1350 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1351 Report.error (get_source_reference (0), "syntax error, unexpected #elif");
1352 return;
1355 if (condition && !conditional_stack[conditional_stack.length - 1].matched
1356 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1357 // condition true => process code within if
1358 conditional_stack[conditional_stack.length - 1].matched = true;
1359 conditional_stack[conditional_stack.length - 1].skip_section = false;
1360 } else {
1361 // skip lines until next preprocessing directive
1362 conditional_stack[conditional_stack.length - 1].skip_section = true;
1366 void parse_pp_else () {
1367 pp_eol ();
1369 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1370 Report.error (get_source_reference (0), "syntax error, unexpected #else");
1371 return;
1374 if (!conditional_stack[conditional_stack.length - 1].matched
1375 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1376 // condition true => process code within if
1377 conditional_stack[conditional_stack.length - 1].matched = true;
1378 conditional_stack[conditional_stack.length - 1].skip_section = false;
1379 } else {
1380 // skip lines until next preprocessing directive
1381 conditional_stack[conditional_stack.length - 1].skip_section = true;
1385 void parse_pp_endif () {
1386 pp_eol ();
1388 if (conditional_stack.length == 0) {
1389 Report.error (get_source_reference (0), "syntax error, unexpected #endif");
1390 return;
1393 conditional_stack.length--;
1396 bool parse_pp_symbol () {
1397 int len = 0;
1398 while (current < end && is_ident_char (current[0])) {
1399 current++;
1400 column++;
1401 len++;
1404 if (len == 0) {
1405 Report.error (get_source_reference (0), "syntax error, expected identifier");
1406 return false;
1409 string identifier = ((string) (current - len)).substring (0, len);
1410 bool defined;
1411 if (identifier == "true") {
1412 defined = true;
1413 } else if (identifier == "false") {
1414 defined = false;
1415 } else {
1416 defined = source_file.context.is_defined (identifier);
1419 return defined;
1422 bool parse_pp_primary_expression () {
1423 if (current >= end) {
1424 Report.error (get_source_reference (0), "syntax error, expected identifier");
1425 } else if (is_ident_char (current[0])) {
1426 return parse_pp_symbol ();
1427 } else if (current[0] == '(') {
1428 current++;
1429 column++;
1430 pp_space ();
1431 bool result = parse_pp_expression ();
1432 pp_space ();
1433 if (current < end && current[0] == ')') {
1434 current++;
1435 column++;
1436 } else {
1437 Report.error (get_source_reference (0), "syntax error, expected `)'");
1439 return result;
1440 } else {
1441 Report.error (get_source_reference (0), "syntax error, expected identifier");
1443 return false;
1446 bool parse_pp_unary_expression () {
1447 if (current < end && current[0] == '!') {
1448 current++;
1449 column++;
1450 pp_space ();
1451 return !parse_pp_unary_expression ();
1454 return parse_pp_primary_expression ();
1457 bool parse_pp_equality_expression () {
1458 bool left = parse_pp_unary_expression ();
1459 pp_space ();
1460 while (true) {
1461 if (current < end - 1 && current[0] == '=' && current[1] == '=') {
1462 current += 2;
1463 column += 2;
1464 pp_space ();
1465 bool right = parse_pp_unary_expression ();
1466 left = (left == right);
1467 } else if (current < end - 1 && current[0] == '!' && current[1] == '=') {
1468 current += 2;
1469 column += 2;
1470 pp_space ();
1471 bool right = parse_pp_unary_expression ();
1472 left = (left != right);
1473 } else {
1474 break;
1477 return left;
1480 bool parse_pp_and_expression () {
1481 bool left = parse_pp_equality_expression ();
1482 pp_space ();
1483 while (current < end - 1 && current[0] == '&' && current[1] == '&') {
1484 current += 2;
1485 column += 2;
1486 pp_space ();
1487 bool right = parse_pp_equality_expression ();
1488 left = left && right;
1490 return left;
1493 bool parse_pp_or_expression () {
1494 bool left = parse_pp_and_expression ();
1495 pp_space ();
1496 while (current < end - 1 && current[0] == '|' && current[1] == '|') {
1497 current += 2;
1498 column += 2;
1499 pp_space ();
1500 bool right = parse_pp_and_expression ();
1501 left = left || right;
1503 return left;
1506 bool parse_pp_expression () {
1507 return parse_pp_or_expression ();
1510 bool whitespace () {
1511 bool found = false;
1512 bool bol = (column == 1);
1513 while (current < end && current[0].isspace ()) {
1514 if (current[0] == '\n') {
1515 line++;
1516 column = 0;
1517 bol = true;
1519 found = true;
1520 current++;
1521 column++;
1523 if (bol && current < end && current[0] == '#') {
1524 pp_directive ();
1525 return true;
1527 return found;
1530 bool comment (bool file_comment = false) {
1531 if (current == null
1532 || current > end - 2
1533 || current[0] != '/'
1534 || (current[1] != '/' && current[1] != '*')) {
1535 return false;
1538 if (current[1] == '/') {
1539 SourceReference source_reference = null;
1540 if (file_comment) {
1541 source_reference = get_source_reference (0);
1544 // single-line comment
1545 current += 2;
1546 char* begin = current;
1548 // skip until end of line or end of file
1549 while (current < end && current[0] != '\n') {
1550 current++;
1553 if (source_reference != null) {
1554 push_comment (((string) begin).substring (0, (long) (current - begin)), source_reference, file_comment);
1556 } else {
1557 SourceReference source_reference = null;
1559 if (file_comment && current[2] == '*') {
1560 return false;
1563 if (current[2] == '*' || file_comment) {
1564 source_reference = get_source_reference (0);
1567 current += 2;
1568 column += 2;
1570 char* begin = current;
1571 while (current < end - 1
1572 && (current[0] != '*' || current[1] != '/')) {
1573 if (current[0] == '\n') {
1574 line++;
1575 column = 0;
1577 current++;
1578 column++;
1581 if (current == end - 1) {
1582 Report.error (get_source_reference (0), "syntax error, expected */");
1583 return true;
1586 if (source_reference != null) {
1587 push_comment (((string) begin).substring (0, (long) (current - begin)), source_reference, file_comment);
1590 current += 2;
1591 column += 2;
1594 return true;
1597 void space () {
1598 while (whitespace () || comment ()) {
1602 public void parse_file_comments () {
1603 while (whitespace () || comment (true)) {
1607 void push_comment (string comment_item, SourceReference source_reference, bool file_comment) {
1608 if (comment_item[0] == '*') {
1609 if (_comment != null) {
1610 // extra doc comment, add it to source file comments
1611 source_file.add_comment (_comment);
1613 _comment = new Comment (comment_item, source_reference);
1616 if (file_comment) {
1617 source_file.add_comment (new Comment (comment_item, source_reference));
1618 _comment = null;
1623 * Clears and returns the content of the comment stack.
1625 * @return saved comment
1627 public Comment? pop_comment () {
1628 if (_comment == null) {
1629 return null;
1632 var comment = _comment;
1633 _comment = null;
1634 return comment;