codegen: Custom abstract methods of GLib.Source are handled differently
[vala-gnome.git] / vala / valageniescanner.vala
blobc72bb45c1bf627466521741bbacfb9c7524ac0e3
1 /* valageniescanner.vala
3 * Copyright (C) 2008-2012 Jamie McCracken, Jürg Billeter
4 * Based on code by Jürg Billeter
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * Author:
21 * Jamie McCracken jamiemcc gnome org
24 using GLib;
26 /**
27 * Lexical scanner for Genie source files.
29 public class Vala.Genie.Scanner {
30 public SourceFile source_file { get; private set; }
32 public int indent_spaces { get; set;}
34 char* begin;
35 char* current;
36 char* end;
38 int line;
39 int column;
41 int current_indent_level;
42 int indent_level;
43 int pending_dedents;
45 /* track open parens and braces for automatic line continuations */
46 int open_parens_count;
47 int open_brace_count;
49 TokenType last_token;
50 bool parse_started;
52 Comment _comment;
54 Conditional[] conditional_stack;
56 struct Conditional {
57 public bool matched;
58 public bool else_found;
59 public bool skip_section;
62 State[] state_stack;
64 enum State {
65 PARENS,
66 BRACE,
67 BRACKET,
68 REGEX_LITERAL,
69 TEMPLATE,
70 TEMPLATE_PART
73 public Scanner (SourceFile source_file) {
74 this.source_file = source_file;
76 begin = source_file.get_mapped_contents ();
77 end = begin + source_file.get_mapped_length ();
79 current = begin;
81 _indent_spaces = 0;
82 line = 1;
83 column = 1;
84 current_indent_level = 0;
85 indent_level = 0;
86 pending_dedents = 0;
88 open_parens_count = 0;
89 open_brace_count = 0;
91 parse_started = false;
92 last_token = TokenType.NONE;
96 bool in_template () {
97 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE);
100 bool in_template_part () {
101 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE_PART);
104 bool is_ident_char (char c) {
105 return (c.isalnum () || c == '_');
108 bool in_regex_literal () {
109 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
112 SourceReference get_source_reference (int offset, int length = 0) {
113 return new SourceReference (source_file, SourceLocation (current, line, column + offset), SourceLocation (current + length, line, column + offset + length));
116 public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
117 TokenType type;
118 char* begin = current;
119 token_begin = SourceLocation (begin, line, column);
121 int token_length_in_chars = -1;
123 if (current >= end) {
124 type = TokenType.EOF;
125 } else {
126 switch (current[0]) {
127 case '/':
128 type = TokenType.CLOSE_REGEX_LITERAL;
129 current++;
130 state_stack.length--;
131 var fl_i = false;
132 var fl_s = false;
133 var fl_m = false;
134 var fl_x = false;
135 while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
136 switch (current[0]) {
137 case 'i':
138 if (fl_i) {
139 Report.error (get_source_reference (token_length_in_chars), "modifier 'i' used more than once");
141 fl_i = true;
142 break;
143 case 's':
144 if (fl_s) {
145 Report.error (get_source_reference (token_length_in_chars), "modifier 's' used more than once");
147 fl_s = true;
148 break;
149 case 'm':
150 if (fl_m) {
151 Report.error (get_source_reference (token_length_in_chars), "modifier 'm' used more than once");
153 fl_m = true;
154 break;
155 case 'x':
156 if (fl_x) {
157 Report.error (get_source_reference (token_length_in_chars), "modifier 'x' used more than once");
159 fl_x = true;
160 break;
162 current++;
163 token_length_in_chars++;
165 break;
166 default:
167 type = TokenType.REGEX_LITERAL;
168 token_length_in_chars = 0;
169 while (current < end && current[0] != '/') {
170 if (current[0] == '\\') {
171 current++;
172 token_length_in_chars++;
173 if (current >= end) {
174 break;
177 switch (current[0]) {
178 case '\'':
179 case '"':
180 case '\\':
181 case '/':
182 case '^':
183 case '$':
184 case '.':
185 case '[':
186 case ']':
187 case '{':
188 case '}':
189 case '(':
190 case ')':
191 case '?':
192 case '*':
193 case '+':
194 case '-':
195 case '#':
196 case '&':
197 case '~':
198 case ':':
199 case ';':
200 case '<':
201 case '>':
202 case '|':
203 case '%':
204 case '=':
205 case '@':
206 case '0':
207 case 'b':
208 case 'B':
209 case 'f':
210 case 'n':
211 case 'N':
212 case 'r':
213 case 'R':
214 case 't':
215 case 'v':
216 case 'a':
217 case 'A':
218 case 'p':
219 case 'P':
220 case 'e':
221 case 'd':
222 case 'D':
223 case 's':
224 case 'S':
225 case 'w':
226 case 'W':
227 case 'G':
228 case 'z':
229 case 'Z':
230 current++;
231 token_length_in_chars++;
232 break;
233 case 'u':
234 // u escape character has four hex digits
235 current++;
236 token_length_in_chars++;
237 int digit_length;
238 for (digit_length = 0; digit_length < 4 && current < end && current[0].isxdigit (); digit_length++) {
239 current++;
240 token_length_in_chars++;
242 if (digit_length != 4) {
243 Report.error (get_source_reference (token_length_in_chars), "\\u requires four hex digits");
245 break;
246 case 'x':
247 // hexadecimal escape character requires two hex digits
248 current++;
249 token_length_in_chars++;
250 int digit_length;
251 for (digit_length = 0; digit_length < 2 && current < end && current[0].isxdigit (); digit_length++) {
252 current++;
253 token_length_in_chars++;
255 if (digit_length != 2) {
256 Report.error (get_source_reference (token_length_in_chars), "\\x requires two hex digits");
258 break;
259 default:
260 // back references \1 through \99
261 if (current[0].isdigit ()) {
262 current++;
263 token_length_in_chars++;
264 if (current[0].isdigit ()) {
265 current++;
266 token_length_in_chars++;
268 } else {
269 Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
271 break;
273 } else if (current[0] == '\n') {
274 break;
275 } else {
276 unichar u = ((string) current).get_char_validated ((long) (end - current));
277 if (u != (unichar) (-1)) {
278 current += u.to_utf8 (null);
279 token_length_in_chars++;
280 } else {
281 current++;
282 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
286 if (current >= end || current[0] == '\n') {
287 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
288 state_stack.length--;
289 return read_token (out token_begin, out token_end);
291 break;
295 if (token_length_in_chars < 0) {
296 column += (int) (current - begin);
297 } else {
298 column += token_length_in_chars;
301 token_end = SourceLocation (current, line, column - 1);
303 return type;
307 public void seek (SourceLocation location) {
308 current = location.pos;
309 line = location.line;
310 column = location.column;
312 conditional_stack = null;
313 state_stack = null;
316 public static TokenType get_identifier_or_keyword (char* begin, int len) {
317 switch (len) {
318 case 2:
319 switch (begin[0]) {
320 case 'a':
321 if (matches (begin, "as")) return TokenType.AS;
322 break;
323 case 'd':
324 if (matches (begin, "do")) return TokenType.DO;
325 break;
326 case 'i':
327 switch (begin[1]) {
328 case 'f':
329 return TokenType.IF;
330 case 'n':
331 return TokenType.IN;
332 case 's':
333 return TokenType.IS;
335 break;
336 case 'o':
337 if (matches (begin, "of")) return TokenType.OF;
339 if (matches (begin, "or")) return TokenType.OP_OR;
340 break;
341 case 't':
342 if (matches (begin, "to")) return TokenType.TO;
343 break;
345 break;
346 case 3:
347 switch (begin[0]) {
348 case 'a':
349 if (matches (begin, "and")) return TokenType.OP_AND;
350 break;
351 case 'd':
352 if (matches (begin, "def")) return TokenType.DEF;
353 break;
354 case 'f':
355 if (matches (begin, "for")) return TokenType.FOR;
356 break;
357 case 'g':
358 if (matches (begin, "get")) return TokenType.GET;
359 break;
360 case 'i':
361 if (matches (begin, "isa")) return TokenType.ISA;
362 break;
363 case 'n':
364 switch (begin[1]) {
365 case 'e':
366 if (matches (begin, "new")) return TokenType.NEW;
367 break;
368 case 'o':
369 if (matches (begin, "not")) return TokenType.OP_NEG;
370 break;
372 break;
373 case 'o':
374 if (matches (begin, "out")) return TokenType.OUT;
375 break;
376 case 'r':
377 if (matches (begin, "ref")) return TokenType.REF;
378 break;
379 case 's':
380 if (matches (begin, "set")) return TokenType.SET;
381 break;
382 case 't':
383 if (matches (begin, "try")) return TokenType.TRY;
384 break;
385 case 'v':
386 if (matches (begin, "var")) return TokenType.VAR;
387 break;
389 break;
390 case 4:
391 switch (begin[0]) {
392 case 'c':
393 if (matches (begin, "case")) return TokenType.CASE;
394 break;
395 case 'd':
396 if (matches (begin, "dict")) return TokenType.DICT;
397 break;
398 case 'e':
399 switch (begin[1]) {
400 case 'l':
401 if (matches (begin, "else")) return TokenType.ELSE;
402 break;
403 case 'n':
404 if (matches (begin, "enum")) return TokenType.ENUM;
405 break;
407 break;
408 case 'i':
409 if (matches (begin, "init")) return TokenType.INIT;
410 break;
411 case 'l':
412 switch (begin[1]) {
413 case 'i':
414 if (matches (begin, "list")) return TokenType.LIST;
415 break;
416 case 'o':
417 if (matches (begin, "lock")) return TokenType.LOCK;
418 break;
420 break;
422 case 'n':
423 if (matches (begin, "null")) return TokenType.NULL;
424 break;
425 case 'p':
426 switch (begin[1]) {
427 case 'a':
428 if (matches (begin, "pass")) return TokenType.PASS;
429 break;
430 case 'r':
431 if (matches (begin, "prop")) return TokenType.PROP;
432 break;
434 break;
435 case 's':
436 if (matches (begin, "self")) return TokenType.THIS;
437 break;
438 case 't':
439 if (matches (begin, "true")) return TokenType.TRUE;
440 break;
441 case 'u':
442 if (matches (begin, "uses")) return TokenType.USES;
443 break;
444 case 'v':
445 if (matches (begin, "void")) return TokenType.VOID;
446 break;
447 case 'w':
448 switch (begin[1]) {
449 case 'e':
450 if (matches (begin, "weak")) return TokenType.WEAK;
451 break;
452 case 'h':
453 if (matches (begin, "when")) return TokenType.WHEN;
454 break;
456 break;
458 break;
459 case 5:
460 switch (begin[0]) {
461 case 'a':
462 switch (begin[1]) {
463 case 'r':
464 if (matches (begin, "array")) return TokenType.ARRAY;
465 break;
466 case 's':
467 if (matches (begin, "async")) return TokenType.ASYNC;
468 break;
470 break;
471 case 'b':
472 if (matches (begin, "break")) return TokenType.BREAK;
473 break;
474 case 'c':
475 switch (begin[1]) {
476 case 'l':
477 if (matches (begin, "class")) return TokenType.CLASS;
478 break;
479 case 'o':
480 if (matches (begin, "const")) return TokenType.CONST;
481 break;
483 break;
484 case 'e':
485 if (matches (begin, "event")) return TokenType.EVENT;
486 break;
487 case 'f':
488 switch (begin[1]) {
489 case 'a':
490 if (matches (begin, "false")) return TokenType.FALSE;
491 break;
492 case 'i':
493 if (matches (begin, "final")) return TokenType.FINAL;
494 break;
496 break;
497 case 'o':
498 if (matches (begin, "owned")) return TokenType.OWNED;
499 break;
500 case 'p':
501 if (matches (begin, "print")) return TokenType.PRINT;
502 break;
503 case 's':
504 if (matches (begin, "super")) return TokenType.SUPER;
505 break;
506 case 'r':
507 if (matches (begin, "raise")) return TokenType.RAISE;
508 break;
509 case 'w':
510 if (matches (begin, "while")) return TokenType.WHILE;
511 break;
512 case 'y':
513 if (matches (begin, "yield")) return TokenType.YIELD;
514 break;
516 break;
517 case 6:
518 switch (begin[0]) {
519 case 'a':
520 if (matches (begin, "assert")) return TokenType.ASSERT;
521 break;
522 case 'd':
523 switch (begin[1]) {
524 case 'e':
525 if (matches (begin, "delete")) return TokenType.DELETE;
526 break;
527 case 'o':
528 if (matches (begin, "downto")) return TokenType.DOWNTO;
529 break;
531 break;
532 case 'e':
533 switch (begin[1]) {
534 case 'x':
535 switch (begin[2]) {
536 case 'c':
537 if (matches (begin, "except")) return TokenType.EXCEPT;
538 break;
539 case 't':
540 if (matches (begin, "extern")) return TokenType.EXTERN;
541 break;
543 break;
545 break;
546 case 'i':
547 if (matches (begin, "inline")) return TokenType.INLINE;
548 break;
549 case 'p':
550 switch (begin[1]) {
551 case 'a':
552 if (matches (begin, "params")) return TokenType.PARAMS;
553 break;
554 case 'u':
555 if (matches (begin, "public")) return TokenType.PUBLIC;
556 break;
558 break;
559 case 'r':
560 switch (begin[1]) {
561 case 'a':
562 if (matches (begin, "raises")) return TokenType.RAISES;
563 break;
564 case 'e':
565 if (matches (begin, "return")) return TokenType.RETURN;
566 break;
568 break;
569 case 's':
570 switch (begin[1]) {
571 case 'e':
572 if (matches (begin, "sealed")) return TokenType.SEALED;
573 break;
574 case 'i':
575 if (matches (begin, "sizeof")) return TokenType.SIZEOF;
576 break;
577 case 't':
578 switch (begin[2]) {
579 case 'a':
580 if (matches (begin, "static")) return TokenType.STATIC;
581 break;
582 case 'r':
583 if (matches (begin, "struct")) return TokenType.STRUCT;
584 break;
586 break;
588 break;
589 case 't':
590 if (matches (begin, "typeof")) return TokenType.TYPEOF;
591 break;
593 break;
594 case 7:
595 switch (begin[0]) {
596 case 'd':
597 switch (begin[1]) {
598 case 'e':
599 if (matches (begin, "default")) return TokenType.DEFAULT;
600 break;
601 case 'y':
602 if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
603 break;
605 break;
606 case 'e':
607 if (matches (begin, "ensures")) return TokenType.ENSURES;
608 break;
609 case 'f':
610 switch (begin[1]) {
611 case 'i':
612 if (matches (begin, "finally")) return TokenType.FINALLY;
613 break;
615 break;
616 case 'p':
617 if (matches (begin, "private")) return TokenType.PRIVATE;
618 break;
619 case 'u':
620 if (matches (begin, "unowned")) return TokenType.UNOWNED;
621 break;
622 case 'v':
623 if (matches (begin, "virtual")) return TokenType.VIRTUAL;
624 break;
626 break;
627 case 8:
628 switch (begin[0]) {
629 case 'a':
630 if (matches (begin, "abstract")) return TokenType.ABSTRACT;
631 break;
632 case 'c':
633 if (matches (begin, "continue")) return TokenType.CONTINUE;
634 break;
635 case 'd':
636 if (matches (begin, "delegate")) return TokenType.DELEGATE;
637 break;
638 case 'i':
639 if (matches (begin, "internal")) return TokenType.INTERNAL;
640 break;
641 case 'o':
642 if (matches (begin, "override")) return TokenType.OVERRIDE;
643 break;
644 case 'r':
645 switch (begin[2]) {
646 case 'a':
647 if (matches (begin, "readonly")) return TokenType.READONLY;
648 break;
649 case 'q':
650 if (matches (begin, "requires")) return TokenType.REQUIRES;
651 break;
653 break;
654 case 'v':
655 if (matches (begin, "volatile")) return TokenType.VOLATILE;
656 break;
658 break;
659 case 9:
660 switch (begin[0]) {
661 case 'c':
662 if (matches (begin, "construct")) return TokenType.CONSTRUCT;
663 break;
664 case 'e':
665 if (matches (begin, "exception")) return TokenType.ERRORDOMAIN;
666 break;
667 case 'i':
668 if (matches (begin, "interface")) return TokenType.INTERFACE;
669 break;
670 case 'n':
671 if (matches (begin, "namespace")) return TokenType.NAMESPACE;
672 break;
673 case 'p':
674 if (matches (begin, "protected")) return TokenType.PROTECTED;
675 break;
676 case 'w':
677 if (matches (begin, "writeonly")) return TokenType.WRITEONLY;
678 break;
680 break;
681 case 10:
682 switch (begin[0]) {
683 case 'i':
684 if (matches (begin, "implements")) return TokenType.IMPLEMENTS;
685 break;
687 break;
689 return TokenType.IDENTIFIER;
693 public TokenType read_template_token (out SourceLocation token_begin, out SourceLocation token_end) {
694 TokenType type;
695 char* begin = current;
696 token_begin = SourceLocation (begin, line, column);
698 int token_length_in_chars = -1;
700 if (current >= end) {
701 type = TokenType.EOF;
702 } else {
703 switch (current[0]) {
704 case '"':
705 type = TokenType.CLOSE_TEMPLATE;
706 current++;
707 state_stack.length--;
708 break;
709 case '$':
710 token_begin.pos++; // $ is not part of following token
711 current++;
712 if (current[0].isalpha () || current[0] == '_') {
713 int len = 0;
714 while (current < end && is_ident_char (current[0])) {
715 current++;
716 len++;
718 type = TokenType.IDENTIFIER;
719 state_stack += State.TEMPLATE_PART;
720 } else if (current[0] == '(') {
721 current++;
722 column += 2;
723 state_stack += State.PARENS;
724 return read_token (out token_begin, out token_end);
725 } else if (current[0] == '$') {
726 type = TokenType.TEMPLATE_STRING_LITERAL;
727 current++;
728 state_stack += State.TEMPLATE_PART;
729 } else {
730 Report.error (get_source_reference (1), "unexpected character");
731 return read_template_token (out token_begin, out token_end);
733 break;
734 default:
735 type = TokenType.TEMPLATE_STRING_LITERAL;
736 token_length_in_chars = 0;
737 while (current < end && current[0] != '"' && current[0] != '$') {
738 if (current[0] == '\\') {
739 current++;
740 token_length_in_chars++;
741 if (current >= end) {
742 break;
745 switch (current[0]) {
746 case '\'':
747 case '"':
748 case '\\':
749 case '0':
750 case 'b':
751 case 'f':
752 case 'n':
753 case 'r':
754 case 't':
755 case 'v':
756 current++;
757 token_length_in_chars++;
758 break;
759 case 'u':
760 // u escape character has four hex digits
761 current++;
762 token_length_in_chars++;
763 int digit_length;
764 for (digit_length = 0; digit_length < 4 && current < end && current[0].isxdigit (); digit_length++) {
765 current++;
766 token_length_in_chars++;
768 if (digit_length != 4) {
769 Report.error (get_source_reference (token_length_in_chars), "\\u requires four hex digits");
771 break;
772 case 'x':
773 // hexadecimal escape character requires two hex digits
774 current++;
775 token_length_in_chars++;
776 int digit_length;
777 for (digit_length = 0; digit_length < 2 && current < end && current[0].isxdigit (); digit_length++) {
778 current++;
779 token_length_in_chars++;
781 if (digit_length != 2) {
782 Report.error (get_source_reference (token_length_in_chars), "\\x requires two hex digits");
784 break;
785 default:
786 Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
787 break;
789 } else if (current[0] == '\n') {
790 current++;
791 line++;
792 column = 1;
793 token_length_in_chars = 1;
794 } else {
795 unichar u = ((string) current).get_char_validated ((long) (end - current));
796 if (u != (unichar) (-1)) {
797 current += u.to_utf8 (null);
798 token_length_in_chars++;
799 } else {
800 current++;
801 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
805 if (current >= end) {
806 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
807 state_stack.length--;
808 return read_token (out token_begin, out token_end);
810 state_stack += State.TEMPLATE_PART;
811 break;
815 if (token_length_in_chars < 0) {
816 column += (int) (current - begin);
817 } else {
818 column += token_length_in_chars;
821 token_end = SourceLocation (current, line, column - 1);
823 return type;
827 public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
828 if (current == null) {
829 token_begin = SourceLocation (current, line, column);
830 token_end = SourceLocation (current, line, column);
831 return TokenType.EOF;
834 if (in_template ()) {
835 return read_template_token (out token_begin, out token_end);
836 } else if (in_template_part ()) {
837 state_stack.length--;
839 token_begin = SourceLocation (current, line, column);
840 token_end = SourceLocation (current, line, column - 1);
842 return TokenType.COMMA;
843 } else if (in_regex_literal ()) {
844 return read_regex_token (out token_begin, out token_end);
849 /* emit dedents if outstanding before checking any other chars */
851 if (pending_dedents > 0) {
852 pending_dedents--;
853 indent_level--;
855 token_begin = SourceLocation (current, line, column);
856 token_end = SourceLocation (current, line, column);
858 last_token = TokenType.DEDENT;
860 return TokenType.DEDENT;
863 if ((_indent_spaces == 0 ) || (last_token != TokenType.EOL)) {
864 /* scrub whitespace (excluding newlines) and comments */
865 space ();
869 /* handle explicit line continuation (lines ending with "\") */
870 while (current < end && current[0] == '\\' && current[1] == '\n') {
871 current += 2;
872 line++;
873 skip_space_tabs ();
876 /* handle automatic line continuations (when inside parens or braces) */
877 while (current < end && current[0] == '\n' && (open_parens_count > 0 || open_brace_count > 0)) {
878 current++;
879 line++;
880 skip_space_tabs ();
884 /* handle non-consecutive new line once parsing is underway - EOL */
885 if (newline () && parse_started && last_token != TokenType.EOL && last_token != TokenType.SEMICOLON) {
886 token_begin = SourceLocation (current, line, column);
887 token_end = SourceLocation (current, line, column);
889 last_token = TokenType.EOL;
891 return TokenType.EOL;
895 while (skip_newlines ()) {
896 token_begin = SourceLocation (current, line, column);
898 current_indent_level = count_tabs ();
900 /* if its an empty new line then ignore */
901 if (current_indent_level == -1) {
902 continue;
905 if (current_indent_level > indent_level) {
906 indent_level = current_indent_level;
908 token_end = SourceLocation (current, line, column);
910 last_token = TokenType.INDENT;
912 return TokenType.INDENT;
913 } else if (current_indent_level < indent_level) {
914 indent_level--;
916 pending_dedents = (indent_level - current_indent_level);
917 token_end = SourceLocation (current, line, column);
919 last_token = TokenType.DEDENT;
921 return TokenType.DEDENT;
925 TokenType type;
926 char* begin = current;
927 token_begin = SourceLocation (begin, line, column);
929 int token_length_in_chars = -1;
931 parse_started = true;
933 if (current >= end) {
934 if (indent_level > 0) {
935 indent_level--;
937 pending_dedents = indent_level;
939 type = TokenType.DEDENT;
940 } else {
941 type = TokenType.EOF;
943 } else if (current[0].isalpha () || current[0] == '_') {
944 int len = 0;
945 while (current < end && is_ident_char (current[0])) {
946 current++;
947 len++;
949 type = get_identifier_or_keyword (begin, len);
950 } else if (current[0] == '@') {
951 if (current < end - 1 && current[1] == '"') {
952 type = TokenType.OPEN_TEMPLATE;
953 current += 2;
954 state_stack += State.TEMPLATE;
955 } else {
956 token_begin.pos++; // @ is not part of the identifier
957 current++;
958 int len = 0;
959 while (current < end && is_ident_char (current[0])) {
960 current++;
961 len++;
963 type = TokenType.IDENTIFIER;
965 } else if (current[0].isdigit ()) {
966 while (current < end && current[0].isdigit ()) {
967 current++;
969 type = TokenType.INTEGER_LITERAL;
970 if (current < end && current[0].tolower () == 'l') {
971 current++;
972 if (current < end && current[0].tolower () == 'l') {
973 current++;
975 } else if (current < end && current[0].tolower () == 'u') {
976 current++;
977 if (current < end && current[0].tolower () == 'l') {
978 current++;
979 if (current < end && current[0].tolower () == 'l') {
980 current++;
983 } else if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
984 current++;
985 while (current < end && current[0].isdigit ()) {
986 current++;
988 if (current < end && current[0].tolower () == 'e') {
989 current++;
990 if (current < end && (current[0] == '+' || current[0] == '-')) {
991 current++;
993 while (current < end && current[0].isdigit ()) {
994 current++;
997 if (current < end && current[0].tolower () == 'f') {
998 current++;
1000 type = TokenType.REAL_LITERAL;
1001 } else if (current < end && current == begin + 1
1002 && begin[0] == '0' && begin[1] == 'x' && begin[2].isxdigit ()) {
1003 // hexadecimal integer literal
1004 current++;
1005 while (current < end && current[0].isxdigit ()) {
1006 current++;
1008 } else if (current < end && is_ident_char (current[0])) {
1009 // allow identifiers to start with a digit
1010 // as long as they contain at least one char
1011 while (current < end && is_ident_char (current[0])) {
1012 current++;
1014 type = TokenType.IDENTIFIER;
1016 } else {
1017 switch (current[0]) {
1018 case '{':
1019 type = TokenType.OPEN_BRACE;
1020 open_brace_count++;
1021 state_stack += State.BRACE;
1022 current++;
1023 break;
1024 case '}':
1025 type = TokenType.CLOSE_BRACE;
1026 open_brace_count--;
1027 if (state_stack.length > 0) {
1028 state_stack.length--;
1030 current++;
1031 break;
1032 case '(':
1033 type = TokenType.OPEN_PARENS;
1034 open_parens_count++;
1035 state_stack += State.PARENS;
1036 current++;
1037 break;
1038 case ')':
1039 type = TokenType.CLOSE_PARENS;
1040 open_parens_count--;
1041 current++;
1042 if (state_stack.length > 0) {
1043 state_stack.length--;
1045 if (in_template ()) {
1046 type = TokenType.COMMA;
1048 break;
1049 case '[':
1050 type = TokenType.OPEN_BRACKET;
1051 state_stack += State.BRACKET;
1052 current++;
1053 break;
1054 case ']':
1055 type = TokenType.CLOSE_BRACKET;
1056 if (state_stack.length > 0) {
1057 state_stack.length--;
1059 current++;
1060 break;
1061 case '.':
1062 type = TokenType.DOT;
1063 current++;
1064 if (current < end - 1) {
1065 if (current[0] == '.' && current[1] == '.') {
1066 type = TokenType.ELLIPSIS;
1067 current += 2;
1070 break;
1071 case ':':
1072 type = TokenType.COLON;
1073 current++;
1074 break;
1075 case ',':
1076 type = TokenType.COMMA;
1077 current++;
1078 break;
1079 case ';':
1080 type = TokenType.SEMICOLON;
1081 current++;
1082 break;
1083 case '#':
1084 type = TokenType.HASH;
1085 current++;
1086 break;
1087 case '?':
1088 type = TokenType.INTERR;
1089 current++;
1090 break;
1091 case '|':
1092 type = TokenType.BITWISE_OR;
1093 current++;
1094 if (current < end) {
1095 switch (current[0]) {
1096 case '=':
1097 type = TokenType.ASSIGN_BITWISE_OR;
1098 current++;
1099 break;
1100 case '|':
1101 type = TokenType.OP_OR;
1102 current++;
1103 break;
1106 break;
1107 case '&':
1108 type = TokenType.BITWISE_AND;
1109 current++;
1110 if (current < end) {
1111 switch (current[0]) {
1112 case '=':
1113 type = TokenType.ASSIGN_BITWISE_AND;
1114 current++;
1115 break;
1116 case '&':
1117 type = TokenType.OP_AND;
1118 current++;
1119 break;
1122 break;
1123 case '^':
1124 type = TokenType.CARRET;
1125 current++;
1126 if (current < end && current[0] == '=') {
1127 type = TokenType.ASSIGN_BITWISE_XOR;
1128 current++;
1130 break;
1131 case '~':
1132 type = TokenType.TILDE;
1133 current++;
1134 break;
1135 case '=':
1136 type = TokenType.ASSIGN;
1137 current++;
1138 if (current < end) {
1139 switch (current[0]) {
1140 case '=':
1141 type = TokenType.OP_EQ;
1142 current++;
1143 break;
1144 case '>':
1145 type = TokenType.LAMBDA;
1146 current++;
1147 break;
1150 break;
1151 case '<':
1152 type = TokenType.OP_LT;
1153 current++;
1154 if (current < end) {
1155 switch (current[0]) {
1156 case '=':
1157 type = TokenType.OP_LE;
1158 current++;
1159 break;
1160 case '<':
1161 type = TokenType.OP_SHIFT_LEFT;
1162 current++;
1163 if (current < end && current[0] == '=') {
1164 type = TokenType.ASSIGN_SHIFT_LEFT;
1165 current++;
1167 break;
1170 break;
1171 case '>':
1172 type = TokenType.OP_GT;
1173 current++;
1174 if (current < end && current[0] == '=') {
1175 type = TokenType.OP_GE;
1176 current++;
1178 break;
1179 case '!':
1180 type = TokenType.OP_NEG;
1181 current++;
1182 if (current < end && current[0] == '=') {
1183 type = TokenType.OP_NE;
1184 current++;
1186 break;
1187 case '+':
1188 type = TokenType.PLUS;
1189 current++;
1190 if (current < end) {
1191 switch (current[0]) {
1192 case '=':
1193 type = TokenType.ASSIGN_ADD;
1194 current++;
1195 break;
1196 case '+':
1197 type = TokenType.OP_INC;
1198 current++;
1199 break;
1202 break;
1203 case '-':
1204 type = TokenType.MINUS;
1205 current++;
1206 if (current < end) {
1207 switch (current[0]) {
1208 case '=':
1209 type = TokenType.ASSIGN_SUB;
1210 current++;
1211 break;
1212 case '-':
1213 type = TokenType.OP_DEC;
1214 current++;
1215 break;
1216 case '>':
1217 type = TokenType.OP_PTR;
1218 current++;
1219 break;
1222 break;
1223 case '*':
1224 type = TokenType.STAR;
1225 current++;
1226 if (current < end && current[0] == '=') {
1227 type = TokenType.ASSIGN_MUL;
1228 current++;
1230 break;
1231 case '/':
1232 switch (last_token) {
1233 case TokenType.ASSIGN:
1234 case TokenType.COMMA:
1235 case TokenType.MINUS:
1236 case TokenType.OP_AND:
1237 case TokenType.OP_EQ:
1238 case TokenType.OP_GE:
1239 case TokenType.OP_GT:
1240 case TokenType.OP_INC:
1241 case TokenType.OP_LE:
1242 case TokenType.OP_LT:
1243 case TokenType.OP_NE:
1244 case TokenType.OP_NEG:
1245 case TokenType.OP_OR:
1246 case TokenType.OPEN_BRACE:
1247 case TokenType.OPEN_PARENS:
1248 case TokenType.PLUS:
1249 case TokenType.RETURN:
1250 type = TokenType.OPEN_REGEX_LITERAL;
1251 state_stack += State.REGEX_LITERAL;
1252 current++;
1253 break;
1254 default:
1255 type = TokenType.DIV;
1256 current++;
1257 if (current < end && current[0] == '=') {
1258 type = TokenType.ASSIGN_DIV;
1259 current++;
1261 break;
1263 break;
1265 case '%':
1266 type = TokenType.PERCENT;
1267 current++;
1268 if (current < end && current[0] == '=') {
1269 type = TokenType.ASSIGN_PERCENT;
1270 current++;
1272 break;
1273 case '\'':
1274 case '"':
1275 if (begin[0] == '\'') {
1276 type = TokenType.CHARACTER_LITERAL;
1277 } else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
1278 type = TokenType.VERBATIM_STRING_LITERAL;
1279 token_length_in_chars = 6;
1280 current += 3;
1281 while (current < end - 4) {
1282 if (current[0] == '"' && current[1] == '"' && current[2] == '"' && current[3] != '"') {
1283 break;
1284 } else if (current[0] == '\n') {
1285 current++;
1286 line++;
1287 column = 1;
1288 token_length_in_chars = 3;
1289 } else {
1290 unichar u = ((string) current).get_char_validated ((long) (end - current));
1291 if (u != (unichar) (-1)) {
1292 current += u.to_utf8 (null);
1293 token_length_in_chars++;
1294 } else {
1295 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
1299 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
1300 current += 3;
1301 } else {
1302 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"\"\"");
1304 break;
1305 } else {
1306 type = TokenType.STRING_LITERAL;
1308 token_length_in_chars = 2;
1309 current++;
1310 while (current < end && current[0] != begin[0]) {
1311 if (current[0] == '\\') {
1312 current++;
1313 token_length_in_chars++;
1314 if (current >= end) {
1315 break;
1318 switch (current[0]) {
1319 case '\'':
1320 case '"':
1321 case '\\':
1322 case '0':
1323 case 'b':
1324 case 'f':
1325 case 'n':
1326 case 'r':
1327 case 't':
1328 case 'v':
1329 current++;
1330 token_length_in_chars++;
1331 break;
1332 case 'u':
1333 // u escape character has four hex digits
1334 current++;
1335 token_length_in_chars++;
1336 int digit_length;
1337 for (digit_length = 0; digit_length < 4 && current < end && current[0].isxdigit (); digit_length++) {
1338 current++;
1339 token_length_in_chars++;
1341 if (digit_length != 4) {
1342 Report.error (get_source_reference (token_length_in_chars), "\\u requires four hex digits");
1344 break;
1345 case 'x':
1346 // hexadecimal escape character requires two hex digits
1347 current++;
1348 token_length_in_chars++;
1349 int digit_length;
1350 for (digit_length = 0; digit_length < 2 && current < end && current[0].isxdigit (); digit_length++) {
1351 current++;
1352 token_length_in_chars++;
1354 if (digit_length != 2) {
1355 Report.error (get_source_reference (token_length_in_chars), "\\x requires two hex digits");
1357 break;
1358 default:
1359 Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
1360 break;
1362 } else if (current[0] == '\n') {
1363 current++;
1364 line++;
1365 column = 1;
1366 token_length_in_chars = 1;
1367 } else {
1368 unichar u = ((string) current).get_char_validated ((long) (end - current));
1369 if (u != (unichar) (-1)) {
1370 current += u.to_utf8 (null);
1371 token_length_in_chars++;
1372 } else {
1373 current++;
1374 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
1377 if (current < end && begin[0] == '\'' && current[0] != '\'') {
1378 // multiple characters in single character literal
1379 Report.error (get_source_reference (token_length_in_chars), "invalid character literal");
1382 if (current < end) {
1383 current++;
1384 } else {
1385 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected %c".printf (begin[0]));
1387 break;
1388 default:
1389 unichar u = ((string) current).get_char_validated ((long) (end - current));
1390 if (u != (unichar) (-1)) {
1391 current += u.to_utf8 (null);
1392 Report.error (get_source_reference (0), "syntax error, unexpected character");
1393 } else {
1394 current++;
1395 Report.error (get_source_reference (0), "invalid UTF-8 character");
1397 column++;
1398 return read_token (out token_begin, out token_end);
1402 if (token_length_in_chars < 0) {
1403 column += (int) (current - begin);
1404 } else {
1405 column += token_length_in_chars;
1408 token_end = SourceLocation (current, line, column - 1);
1409 last_token = type;
1411 return type;
1414 int count_tabs ()
1417 int tab_count = 0;
1420 if (_indent_spaces == 0) {
1421 while (current < end && current[0] == '\t') {
1422 current++;
1423 column++;
1424 tab_count++;
1426 } else {
1427 int space_count = 0;
1428 while (current < end && current[0] == ' ') {
1429 current++;
1430 column++;
1431 space_count++;
1434 tab_count = space_count / _indent_spaces;
1438 /* ignore comments and whitspace and other lines that contain no code */
1440 space ();
1442 if ((current < end) && (current[0] == '\n')) return -1;
1444 return tab_count;
1447 static bool matches (char* begin, string keyword) {
1448 char* keyword_array = (char *) keyword;
1449 long len = keyword.length;
1450 for (int i = 0; i < len; i++) {
1451 if (begin[i] != keyword_array[i]) {
1452 return false;
1455 return true;
1458 bool whitespace () {
1459 bool found = false;
1460 while (current < end && current[0].isspace () && current[0] != '\n' ) {
1462 found = true;
1463 current++;
1464 column++;
1467 if ((column == 1) && (current < end) && (current[0] == '#')) {
1468 pp_directive ();
1469 return true;
1472 return found;
1475 inline bool newline () {
1476 if (current[0] == '\n') {
1477 return true;
1480 return false;
1483 bool skip_newlines () {
1484 bool new_lines = false;
1486 while (newline ()) {
1487 current++;
1489 line++;
1490 column = 1;
1491 current_indent_level = 0;
1493 new_lines = true;
1496 return new_lines;
1499 bool comment (bool file_comment = false) {
1500 if (current == null
1501 || current > end - 2
1502 || current[0] != '/'
1503 || (current[1] != '/' && current[1] != '*')) {
1504 return false;
1508 if (current[1] == '/') {
1509 // single-line comment
1511 SourceReference source_reference = null;
1512 if (file_comment) {
1513 source_reference = get_source_reference (0);
1516 current += 2;
1518 // skip until end of line or end of file
1519 while (current < end && current[0] != '\n') {
1520 current++;
1523 /* do not ignore EOL if comment does not exclusively occupy the line */
1524 if (current[0] == '\n' && last_token == TokenType.EOL) {
1525 current++;
1526 line++;
1527 column = 1;
1528 current_indent_level = 0;
1531 if (source_reference != null) {
1532 push_comment (((string) begin).substring (0, (long) (current - begin)), source_reference, file_comment);
1535 } else {
1536 // delimited comment
1537 SourceReference source_reference = null;
1538 if (file_comment && current[2] == '*') {
1539 return false;
1542 if (current[2] == '*' || file_comment) {
1543 source_reference = get_source_reference (0);
1546 current += 2;
1547 char* begin = current;
1549 while (current < end - 1
1550 && (current[0] != '*' || current[1] != '/')) {
1551 if (current[0] == '\n') {
1552 line++;
1553 column = 0;
1555 current++;
1556 column++;
1558 if (current == end - 1) {
1559 Report.error (get_source_reference (0), "syntax error, expected */");
1560 return true;
1563 if (source_reference != null) {
1564 string comment = ((string) begin).substring (0, (long) (current - begin));
1565 push_comment (comment, source_reference, file_comment);
1568 current += 2;
1569 column += 2;
1572 return true;
1575 bool skip_tabs () {
1576 bool found = false;
1577 while (current < end && current[0] == '\t' ) {
1578 current++;
1579 column++;
1580 found = true;
1583 return found;
1586 void skip_space_tabs () {
1587 while (whitespace () || skip_tabs () || comment () ) {
1592 void space () {
1593 while (whitespace () || comment ()) {
1597 public void parse_file_comments () {
1598 while (whitespace () || comment (true)) {
1603 void push_comment (string comment_item, SourceReference source_reference, bool file_comment) {
1604 if (comment_item[0] == '*') {
1605 if (_comment != null) {
1606 // extra doc comment, add it to source file comments
1607 source_file.add_comment (_comment);
1609 _comment = new Comment (comment_item, source_reference);
1612 if (file_comment) {
1613 source_file.add_comment (new Comment (comment_item, source_reference));
1614 _comment = null;
1619 * Clears and returns the content of the comment stack.
1621 * @return saved comment
1623 public Comment? pop_comment () {
1624 if (_comment == null) {
1625 return null;
1628 var comment = _comment;
1629 _comment = null;
1630 return comment;
1633 bool pp_whitespace () {
1634 bool found = false;
1635 while (current < end && current[0].isspace () && current[0] != '\n') {
1636 found = true;
1637 current++;
1638 column++;
1640 return found;
1643 void pp_space () {
1644 while (pp_whitespace () || comment ()) {
1648 void pp_directive () {
1649 // hash sign
1650 current++;
1651 column++;
1653 pp_space ();
1655 char* begin = current;
1656 int len = 0;
1657 while (current < end && current[0].isalnum ()) {
1658 current++;
1659 column++;
1660 len++;
1663 if (len == 2 && matches (begin, "if")) {
1664 parse_pp_if ();
1665 } else if (len == 4 && matches (begin, "elif")) {
1666 parse_pp_elif ();
1667 } else if (len == 4 && matches (begin, "else")) {
1668 parse_pp_else ();
1669 } else if (len == 5 && matches (begin, "endif")) {
1670 parse_pp_endif ();
1671 } else {
1672 Report.error (get_source_reference (-len, len), "syntax error, invalid preprocessing directive");
1675 if (conditional_stack.length > 0
1676 && conditional_stack[conditional_stack.length - 1].skip_section) {
1677 // skip lines until next preprocessing directive
1678 bool bol = false;
1679 while (current < end) {
1680 if (bol && current < end && current[0] == '#') {
1681 // go back to begin of line
1682 current -= (column - 1);
1683 column = 1;
1684 return;
1686 if (current[0] == '\n') {
1687 line++;
1688 column = 0;
1689 bol = true;
1690 } else if (!current[0].isspace ()) {
1691 bol = false;
1693 current++;
1694 column++;
1699 void pp_eol () {
1700 pp_space ();
1701 if (current >= end || current[0] != '\n') {
1702 Report.error (get_source_reference (0), "syntax error, expected newline");
1706 void parse_pp_if () {
1707 pp_space ();
1709 bool condition = parse_pp_expression ();
1711 pp_eol ();
1713 conditional_stack += Conditional ();
1715 if (condition && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1716 // condition true => process code within if
1717 conditional_stack[conditional_stack.length - 1].matched = true;
1718 } else {
1719 // skip lines until next preprocessing directive
1720 conditional_stack[conditional_stack.length - 1].skip_section = true;
1724 void parse_pp_elif () {
1725 pp_space ();
1727 bool condition = parse_pp_expression ();
1729 pp_eol ();
1731 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1732 Report.error (get_source_reference (0), "syntax error, unexpected #elif");
1733 return;
1736 if (condition && !conditional_stack[conditional_stack.length - 1].matched
1737 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1738 // condition true => process code within if
1739 conditional_stack[conditional_stack.length - 1].matched = true;
1740 conditional_stack[conditional_stack.length - 1].skip_section = false;
1741 } else {
1742 // skip lines until next preprocessing directive
1743 conditional_stack[conditional_stack.length - 1].skip_section = true;
1747 void parse_pp_else () {
1748 pp_eol ();
1750 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1751 Report.error (get_source_reference (0), "syntax error, unexpected #else");
1752 return;
1755 if (!conditional_stack[conditional_stack.length - 1].matched
1756 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1757 // condition true => process code within if
1758 conditional_stack[conditional_stack.length - 1].matched = true;
1759 conditional_stack[conditional_stack.length - 1].skip_section = false;
1760 } else {
1761 // skip lines until next preprocessing directive
1762 conditional_stack[conditional_stack.length - 1].skip_section = true;
1766 void parse_pp_endif () {
1767 pp_eol ();
1769 if (conditional_stack.length == 0) {
1770 Report.error (get_source_reference (0), "syntax error, unexpected #endif");
1771 return;
1774 conditional_stack.length--;
1777 bool parse_pp_symbol () {
1778 int len = 0;
1779 while (current < end && is_ident_char (current[0])) {
1780 current++;
1781 column++;
1782 len++;
1785 if (len == 0) {
1786 Report.error (get_source_reference (0), "syntax error, expected identifier");
1787 return false;
1790 string identifier = ((string) (current - len)).substring (0, len);
1791 bool defined;
1792 if (identifier == "true") {
1793 defined = true;
1794 } else if (identifier == "false") {
1795 defined = false;
1796 } else {
1797 defined = source_file.context.is_defined (identifier);
1800 return defined;
1803 bool parse_pp_primary_expression () {
1804 if (current >= end) {
1805 Report.error (get_source_reference (0), "syntax error, expected identifier");
1806 } else if (is_ident_char (current[0])) {
1807 return parse_pp_symbol ();
1808 } else if (current[0] == '(') {
1809 current++;
1810 column++;
1811 pp_space ();
1812 bool result = parse_pp_expression ();
1813 pp_space ();
1814 if (current < end && current[0] == ')') {
1815 current++;
1816 column++;
1817 } else {
1818 Report.error (get_source_reference (0), "syntax error, expected `)'");
1820 return result;
1821 } else {
1822 Report.error (get_source_reference (0), "syntax error, expected identifier");
1824 return false;
1827 bool parse_pp_unary_expression () {
1828 if (current < end && current[0] == '!') {
1829 current++;
1830 column++;
1831 pp_space ();
1832 return !parse_pp_unary_expression ();
1835 return parse_pp_primary_expression ();
1838 bool parse_pp_equality_expression () {
1839 bool left = parse_pp_unary_expression ();
1840 pp_space ();
1841 while (true) {
1842 if (current < end - 1 && current[0] == '=' && current[1] == '=') {
1843 current += 2;
1844 column += 2;
1845 pp_space ();
1846 bool right = parse_pp_unary_expression ();
1847 left = (left == right);
1848 } else if (current < end - 1 && current[0] == '!' && current[1] == '=') {
1849 current += 2;
1850 column += 2;
1851 pp_space ();
1852 bool right = parse_pp_unary_expression ();
1853 left = (left != right);
1854 } else {
1855 break;
1858 return left;
1861 bool parse_pp_and_expression () {
1862 bool left = parse_pp_equality_expression ();
1863 pp_space ();
1864 while (current < end - 1 && current[0] == '&' && current[1] == '&') {
1865 current += 2;
1866 column += 2;
1867 pp_space ();
1868 bool right = parse_pp_equality_expression ();
1869 left = left && right;
1871 return left;
1874 bool parse_pp_or_expression () {
1875 bool left = parse_pp_and_expression ();
1876 pp_space ();
1877 while (current < end - 1 && current[0] == '|' && current[1] == '|') {
1878 current += 2;
1879 column += 2;
1880 pp_space ();
1881 bool right = parse_pp_and_expression ();
1882 left = left || right;
1884 return left;
1887 bool parse_pp_expression () {
1888 return parse_pp_or_expression ();