linux: Add psiginfo(3)
[vala-gnome.git] / vala / valageniescanner.vala
blobe232a77bc44e281e285cb4c8ff9d4ce65ff0f5f9
1 /* valageniescanner.vala
3 * Copyright (C) 2008-2012 Jamie McCracken, Jürg Billeter
4 * Based on code by Jürg Billeter
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * Author:
21 * Jamie McCracken jamiemcc gnome org
24 using GLib;
26 /**
27 * Lexical scanner for Genie source files.
29 public class Vala.Genie.Scanner {
30 public SourceFile source_file { get; private set; }
32 public int indent_spaces { get; set;}
34 char* begin;
35 char* current;
36 char* end;
38 int line;
39 int column;
41 int current_indent_level;
42 int indent_level;
43 int pending_dedents;
45 /* track open parens and braces for automatic line continuations */
46 int open_parens_count;
47 int open_brace_count;
49 TokenType last_token;
50 bool parse_started;
52 Comment _comment;
54 Conditional[] conditional_stack;
56 struct Conditional {
57 public bool matched;
58 public bool else_found;
59 public bool skip_section;
62 State[] state_stack;
64 enum State {
65 PARENS,
66 BRACE,
67 BRACKET,
68 REGEX_LITERAL,
69 TEMPLATE,
70 TEMPLATE_PART
73 public Scanner (SourceFile source_file) {
74 this.source_file = source_file;
76 begin = source_file.get_mapped_contents ();
77 end = begin + source_file.get_mapped_length ();
79 current = begin;
81 _indent_spaces = 0;
82 line = 1;
83 column = 1;
84 current_indent_level = 0;
85 indent_level = 0;
86 pending_dedents = 0;
88 open_parens_count = 0;
89 open_brace_count = 0;
91 parse_started = false;
92 last_token = TokenType.NONE;
96 bool in_template () {
97 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE);
100 bool in_template_part () {
101 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE_PART);
104 bool is_ident_char (char c) {
105 return (c.isalnum () || c == '_');
108 bool in_regex_literal () {
109 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
112 SourceReference get_source_reference (int offset, int length = 0) {
113 return new SourceReference (source_file, SourceLocation (current, line, column + offset), SourceLocation (current + length, line, column + offset + length));
116 public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
117 TokenType type;
118 char* begin = current;
119 token_begin = SourceLocation (begin, line, column);
121 int token_length_in_chars = -1;
123 if (current >= end) {
124 type = TokenType.EOF;
125 } else {
126 switch (current[0]) {
127 case '/':
128 type = TokenType.CLOSE_REGEX_LITERAL;
129 current++;
130 state_stack.length--;
131 var fl_i = false;
132 var fl_s = false;
133 var fl_m = false;
134 var fl_x = false;
135 while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
136 switch (current[0]) {
137 case 'i':
138 if (fl_i) {
139 Report.error (get_source_reference (token_length_in_chars), "modifier 'i' used more than once");
141 fl_i = true;
142 break;
143 case 's':
144 if (fl_s) {
145 Report.error (get_source_reference (token_length_in_chars), "modifier 's' used more than once");
147 fl_s = true;
148 break;
149 case 'm':
150 if (fl_m) {
151 Report.error (get_source_reference (token_length_in_chars), "modifier 'm' used more than once");
153 fl_m = true;
154 break;
155 case 'x':
156 if (fl_x) {
157 Report.error (get_source_reference (token_length_in_chars), "modifier 'x' used more than once");
159 fl_x = true;
160 break;
162 current++;
163 token_length_in_chars++;
165 break;
166 default:
167 type = TokenType.REGEX_LITERAL;
168 token_length_in_chars = 0;
169 while (current < end && current[0] != '/') {
170 if (current[0] == '\\') {
171 current++;
172 token_length_in_chars++;
173 if (current >= end) {
174 break;
177 switch (current[0]) {
178 case '\'':
179 case '"':
180 case '\\':
181 case '/':
182 case '^':
183 case '$':
184 case '.':
185 case '[':
186 case ']':
187 case '{':
188 case '}':
189 case '(':
190 case ')':
191 case '?':
192 case '*':
193 case '+':
194 case '-':
195 case '#':
196 case '&':
197 case '~':
198 case ':':
199 case ';':
200 case '<':
201 case '>':
202 case '|':
203 case '%':
204 case '=':
205 case '@':
206 case '0':
207 case 'b':
208 case 'B':
209 case 'f':
210 case 'n':
211 case 'r':
212 case 't':
213 case 'a':
214 case 'A':
215 case 'p':
216 case 'P':
217 case 'e':
218 case 'd':
219 case 'D':
220 case 's':
221 case 'S':
222 case 'w':
223 case 'W':
224 case 'G':
225 case 'z':
226 case 'Z':
227 current++;
228 token_length_in_chars++;
229 break;
230 case 'x':
231 // hexadecimal escape character
232 current++;
233 token_length_in_chars++;
234 while (current < end && current[0].isxdigit ()) {
235 current++;
236 token_length_in_chars++;
238 break;
239 default:
240 Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
241 break;
243 } else if (current[0] == '\n') {
244 break;
245 } else {
246 unichar u = ((string) current).get_char_validated ((long) (end - current));
247 if (u != (unichar) (-1)) {
248 current += u.to_utf8 (null);
249 token_length_in_chars++;
250 } else {
251 current++;
252 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
256 if (current >= end || current[0] == '\n') {
257 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
258 state_stack.length--;
259 return read_token (out token_begin, out token_end);
261 break;
265 if (token_length_in_chars < 0) {
266 column += (int) (current - begin);
267 } else {
268 column += token_length_in_chars;
271 token_end = SourceLocation (current, line, column - 1);
273 return type;
277 public void seek (SourceLocation location) {
278 current = location.pos;
279 line = location.line;
280 column = location.column;
282 conditional_stack = null;
283 state_stack = null;
286 public static TokenType get_identifier_or_keyword (char* begin, int len) {
287 switch (len) {
288 case 2:
289 switch (begin[0]) {
290 case 'a':
291 if (matches (begin, "as")) return TokenType.AS;
292 break;
293 case 'd':
294 if (matches (begin, "do")) return TokenType.DO;
295 break;
296 case 'i':
297 switch (begin[1]) {
298 case 'f':
299 return TokenType.IF;
300 case 'n':
301 return TokenType.IN;
302 case 's':
303 return TokenType.IS;
305 break;
306 case 'o':
307 if (matches (begin, "of")) return TokenType.OF;
309 if (matches (begin, "or")) return TokenType.OP_OR;
310 break;
311 case 't':
312 if (matches (begin, "to")) return TokenType.TO;
313 break;
315 break;
316 case 3:
317 switch (begin[0]) {
318 case 'a':
319 if (matches (begin, "and")) return TokenType.OP_AND;
320 break;
321 case 'd':
322 if (matches (begin, "def")) return TokenType.DEF;
323 break;
324 case 'f':
325 if (matches (begin, "for")) return TokenType.FOR;
326 break;
327 case 'g':
328 if (matches (begin, "get")) return TokenType.GET;
329 break;
330 case 'i':
331 if (matches (begin, "isa")) return TokenType.ISA;
332 break;
333 case 'n':
334 switch (begin[1]) {
335 case 'e':
336 if (matches (begin, "new")) return TokenType.NEW;
337 break;
338 case 'o':
339 if (matches (begin, "not")) return TokenType.OP_NEG;
340 break;
342 break;
343 case 'o':
344 if (matches (begin, "out")) return TokenType.OUT;
345 break;
346 case 'r':
347 if (matches (begin, "ref")) return TokenType.REF;
348 break;
349 case 's':
350 if (matches (begin, "set")) return TokenType.SET;
351 break;
352 case 't':
353 if (matches (begin, "try")) return TokenType.TRY;
354 break;
355 case 'v':
356 if (matches (begin, "var")) return TokenType.VAR;
357 break;
359 break;
360 case 4:
361 switch (begin[0]) {
362 case 'c':
363 if (matches (begin, "case")) return TokenType.CASE;
364 break;
365 case 'd':
366 if (matches (begin, "dict")) return TokenType.DICT;
367 break;
368 case 'e':
369 switch (begin[1]) {
370 case 'l':
371 if (matches (begin, "else")) return TokenType.ELSE;
372 break;
373 case 'n':
374 if (matches (begin, "enum")) return TokenType.ENUM;
375 break;
377 break;
378 case 'i':
379 if (matches (begin, "init")) return TokenType.INIT;
380 break;
381 case 'l':
382 switch (begin[1]) {
383 case 'i':
384 if (matches (begin, "list")) return TokenType.LIST;
385 break;
386 case 'o':
387 if (matches (begin, "lock")) return TokenType.LOCK;
388 break;
390 break;
392 case 'n':
393 if (matches (begin, "null")) return TokenType.NULL;
394 break;
395 case 'p':
396 switch (begin[1]) {
397 case 'a':
398 if (matches (begin, "pass")) return TokenType.PASS;
399 break;
400 case 'r':
401 if (matches (begin, "prop")) return TokenType.PROP;
402 break;
404 break;
405 case 's':
406 if (matches (begin, "self")) return TokenType.THIS;
407 break;
408 case 't':
409 if (matches (begin, "true")) return TokenType.TRUE;
410 break;
411 case 'u':
412 if (matches (begin, "uses")) return TokenType.USES;
413 break;
414 case 'v':
415 if (matches (begin, "void")) return TokenType.VOID;
416 break;
417 case 'w':
418 switch (begin[1]) {
419 case 'e':
420 if (matches (begin, "weak")) return TokenType.WEAK;
421 break;
422 case 'h':
423 if (matches (begin, "when")) return TokenType.WHEN;
424 break;
426 break;
428 break;
429 case 5:
430 switch (begin[0]) {
431 case 'a':
432 switch (begin[1]) {
433 case 'r':
434 if (matches (begin, "array")) return TokenType.ARRAY;
435 break;
436 case 's':
437 if (matches (begin, "async")) return TokenType.ASYNC;
438 break;
440 break;
441 case 'b':
442 if (matches (begin, "break")) return TokenType.BREAK;
443 break;
444 case 'c':
445 switch (begin[1]) {
446 case 'l':
447 if (matches (begin, "class")) return TokenType.CLASS;
448 break;
449 case 'o':
450 if (matches (begin, "const")) return TokenType.CONST;
451 break;
453 break;
454 case 'e':
455 if (matches (begin, "event")) return TokenType.EVENT;
456 break;
457 case 'f':
458 switch (begin[1]) {
459 case 'a':
460 if (matches (begin, "false")) return TokenType.FALSE;
461 break;
462 case 'i':
463 if (matches (begin, "final")) return TokenType.FINAL;
464 break;
466 break;
467 case 'o':
468 if (matches (begin, "owned")) return TokenType.OWNED;
469 break;
470 case 'p':
471 if (matches (begin, "print")) return TokenType.PRINT;
472 break;
473 case 's':
474 if (matches (begin, "super")) return TokenType.SUPER;
475 break;
476 case 'r':
477 if (matches (begin, "raise")) return TokenType.RAISE;
478 break;
479 case 'w':
480 if (matches (begin, "while")) return TokenType.WHILE;
481 break;
482 case 'y':
483 if (matches (begin, "yield")) return TokenType.YIELD;
484 break;
486 break;
487 case 6:
488 switch (begin[0]) {
489 case 'a':
490 if (matches (begin, "assert")) return TokenType.ASSERT;
491 break;
492 case 'd':
493 switch (begin[1]) {
494 case 'e':
495 if (matches (begin, "delete")) return TokenType.DELETE;
496 break;
497 case 'o':
498 if (matches (begin, "downto")) return TokenType.DOWNTO;
499 break;
501 break;
502 case 'e':
503 switch (begin[1]) {
504 case 'x':
505 switch (begin[2]) {
506 case 'c':
507 if (matches (begin, "except")) return TokenType.EXCEPT;
508 break;
509 case 't':
510 if (matches (begin, "extern")) return TokenType.EXTERN;
511 break;
513 break;
515 break;
516 case 'i':
517 if (matches (begin, "inline")) return TokenType.INLINE;
518 break;
519 case 'p':
520 switch (begin[1]) {
521 case 'a':
522 if (matches (begin, "params")) return TokenType.PARAMS;
523 break;
524 case 'u':
525 if (matches (begin, "public")) return TokenType.PUBLIC;
526 break;
528 break;
529 case 'r':
530 switch (begin[1]) {
531 case 'a':
532 if (matches (begin, "raises")) return TokenType.RAISES;
533 break;
534 case 'e':
535 if (matches (begin, "return")) return TokenType.RETURN;
536 break;
538 break;
539 case 's':
540 switch (begin[1]) {
541 case 'e':
542 if (matches (begin, "sealed")) return TokenType.SEALED;
543 break;
544 case 'i':
545 if (matches (begin, "sizeof")) return TokenType.SIZEOF;
546 break;
547 case 't':
548 switch (begin[2]) {
549 case 'a':
550 if (matches (begin, "static")) return TokenType.STATIC;
551 break;
552 case 'r':
553 if (matches (begin, "struct")) return TokenType.STRUCT;
554 break;
556 break;
558 break;
559 case 't':
560 if (matches (begin, "typeof")) return TokenType.TYPEOF;
561 break;
563 break;
564 case 7:
565 switch (begin[0]) {
566 case 'd':
567 switch (begin[1]) {
568 case 'e':
569 if (matches (begin, "default")) return TokenType.DEFAULT;
570 break;
571 case 'y':
572 if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
573 break;
575 break;
576 case 'e':
577 if (matches (begin, "ensures")) return TokenType.ENSURES;
578 break;
579 case 'f':
580 switch (begin[1]) {
581 case 'i':
582 if (matches (begin, "finally")) return TokenType.FINALLY;
583 break;
585 break;
586 case 'p':
587 if (matches (begin, "private")) return TokenType.PRIVATE;
588 break;
589 case 'u':
590 if (matches (begin, "unowned")) return TokenType.UNOWNED;
591 break;
592 case 'v':
593 if (matches (begin, "virtual")) return TokenType.VIRTUAL;
594 break;
596 break;
597 case 8:
598 switch (begin[0]) {
599 case 'a':
600 if (matches (begin, "abstract")) return TokenType.ABSTRACT;
601 break;
602 case 'c':
603 if (matches (begin, "continue")) return TokenType.CONTINUE;
604 break;
605 case 'd':
606 if (matches (begin, "delegate")) return TokenType.DELEGATE;
607 break;
608 case 'i':
609 if (matches (begin, "internal")) return TokenType.INTERNAL;
610 break;
611 case 'o':
612 if (matches (begin, "override")) return TokenType.OVERRIDE;
613 break;
614 case 'r':
615 switch (begin[2]) {
616 case 'a':
617 if (matches (begin, "readonly")) return TokenType.READONLY;
618 break;
619 case 'q':
620 if (matches (begin, "requires")) return TokenType.REQUIRES;
621 break;
623 break;
624 case 'v':
625 if (matches (begin, "volatile")) return TokenType.VOLATILE;
626 break;
628 break;
629 case 9:
630 switch (begin[0]) {
631 case 'c':
632 if (matches (begin, "construct")) return TokenType.CONSTRUCT;
633 break;
634 case 'e':
635 if (matches (begin, "exception")) return TokenType.ERRORDOMAIN;
636 break;
637 case 'i':
638 if (matches (begin, "interface")) return TokenType.INTERFACE;
639 break;
640 case 'n':
641 if (matches (begin, "namespace")) return TokenType.NAMESPACE;
642 break;
643 case 'p':
644 if (matches (begin, "protected")) return TokenType.PROTECTED;
645 break;
646 case 'w':
647 if (matches (begin, "writeonly")) return TokenType.WRITEONLY;
648 break;
650 break;
651 case 10:
652 switch (begin[0]) {
653 case 'i':
654 if (matches (begin, "implements")) return TokenType.IMPLEMENTS;
655 break;
657 break;
659 return TokenType.IDENTIFIER;
663 public TokenType read_template_token (out SourceLocation token_begin, out SourceLocation token_end) {
664 TokenType type;
665 char* begin = current;
666 token_begin = SourceLocation (begin, line, column);
668 int token_length_in_chars = -1;
670 if (current >= end) {
671 type = TokenType.EOF;
672 } else {
673 switch (current[0]) {
674 case '"':
675 type = TokenType.CLOSE_TEMPLATE;
676 current++;
677 state_stack.length--;
678 break;
679 case '$':
680 token_begin.pos++; // $ is not part of following token
681 current++;
682 if (current[0].isalpha () || current[0] == '_') {
683 int len = 0;
684 while (current < end && is_ident_char (current[0])) {
685 current++;
686 len++;
688 type = TokenType.IDENTIFIER;
689 state_stack += State.TEMPLATE_PART;
690 } else if (current[0] == '(') {
691 current++;
692 column += 2;
693 state_stack += State.PARENS;
694 return read_token (out token_begin, out token_end);
695 } else if (current[0] == '$') {
696 type = TokenType.TEMPLATE_STRING_LITERAL;
697 current++;
698 state_stack += State.TEMPLATE_PART;
699 } else {
700 Report.error (get_source_reference (1), "unexpected character");
701 return read_template_token (out token_begin, out token_end);
703 break;
704 default:
705 type = TokenType.TEMPLATE_STRING_LITERAL;
706 token_length_in_chars = 0;
707 while (current < end && current[0] != '"' && current[0] != '$') {
708 if (current[0] == '\\') {
709 current++;
710 token_length_in_chars++;
711 if (current >= end) {
712 break;
715 switch (current[0]) {
716 case '\'':
717 case '"':
718 case '\\':
719 case '0':
720 case 'b':
721 case 'f':
722 case 'n':
723 case 'r':
724 case 't':
725 current++;
726 token_length_in_chars++;
727 break;
728 case 'x':
729 // hexadecimal escape character
730 current++;
731 token_length_in_chars++;
732 while (current < end && current[0].isxdigit ()) {
733 current++;
734 token_length_in_chars++;
736 break;
737 default:
738 Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
739 break;
741 } else if (current[0] == '\n') {
742 current++;
743 line++;
744 column = 1;
745 token_length_in_chars = 1;
746 } else {
747 unichar u = ((string) current).get_char_validated ((long) (end - current));
748 if (u != (unichar) (-1)) {
749 current += u.to_utf8 (null);
750 token_length_in_chars++;
751 } else {
752 current++;
753 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
757 if (current >= end) {
758 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
759 state_stack.length--;
760 return read_token (out token_begin, out token_end);
762 state_stack += State.TEMPLATE_PART;
763 break;
767 if (token_length_in_chars < 0) {
768 column += (int) (current - begin);
769 } else {
770 column += token_length_in_chars;
773 token_end = SourceLocation (current, line, column - 1);
775 return type;
779 public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
780 if (current == null) {
781 token_begin = SourceLocation (current, line, column);
782 token_end = SourceLocation (current, line, column);
783 return TokenType.EOF;
786 if (in_template ()) {
787 return read_template_token (out token_begin, out token_end);
788 } else if (in_template_part ()) {
789 state_stack.length--;
791 token_begin = SourceLocation (current, line, column);
792 token_end = SourceLocation (current, line, column - 1);
794 return TokenType.COMMA;
795 } else if (in_regex_literal ()) {
796 return read_regex_token (out token_begin, out token_end);
801 /* emit dedents if outstanding before checking any other chars */
803 if (pending_dedents > 0) {
804 pending_dedents--;
805 indent_level--;
807 token_begin = SourceLocation (current, line, column);
808 token_end = SourceLocation (current, line, column);
810 last_token = TokenType.DEDENT;
812 return TokenType.DEDENT;
815 if ((_indent_spaces == 0 ) || (last_token != TokenType.EOL)) {
816 /* scrub whitespace (excluding newlines) and comments */
817 space ();
821 /* handle explicit line continuation (lines ending with "\") */
822 while (current < end && current[0] == '\\' && current[1] == '\n') {
823 current += 2;
824 line++;
825 skip_space_tabs ();
828 /* handle automatic line continuations (when inside parens or braces) */
829 while (current < end && current[0] == '\n' && (open_parens_count > 0 || open_brace_count > 0)) {
830 current++;
831 line++;
832 skip_space_tabs ();
836 /* handle non-consecutive new line once parsing is underway - EOL */
837 if (newline () && parse_started && last_token != TokenType.EOL && last_token != TokenType.SEMICOLON) {
838 token_begin = SourceLocation (current, line, column);
839 token_end = SourceLocation (current, line, column);
841 last_token = TokenType.EOL;
843 return TokenType.EOL;
847 while (skip_newlines ()) {
848 token_begin = SourceLocation (current, line, column);
850 current_indent_level = count_tabs ();
852 /* if its an empty new line then ignore */
853 if (current_indent_level == -1) {
854 continue;
857 if (current_indent_level > indent_level) {
858 indent_level = current_indent_level;
860 token_end = SourceLocation (current, line, column);
862 last_token = TokenType.INDENT;
864 return TokenType.INDENT;
865 } else if (current_indent_level < indent_level) {
866 indent_level--;
868 pending_dedents = (indent_level - current_indent_level);
869 token_end = SourceLocation (current, line, column);
871 last_token = TokenType.DEDENT;
873 return TokenType.DEDENT;
877 TokenType type;
878 char* begin = current;
879 token_begin = SourceLocation (begin, line, column);
881 int token_length_in_chars = -1;
883 parse_started = true;
885 if (current >= end) {
886 if (indent_level > 0) {
887 indent_level--;
889 pending_dedents = indent_level;
891 type = TokenType.DEDENT;
892 } else {
893 type = TokenType.EOF;
895 } else if (current[0].isalpha () || current[0] == '_') {
896 int len = 0;
897 while (current < end && is_ident_char (current[0])) {
898 current++;
899 len++;
901 type = get_identifier_or_keyword (begin, len);
902 } else if (current[0] == '@') {
903 if (current < end - 1 && current[1] == '"') {
904 type = TokenType.OPEN_TEMPLATE;
905 current += 2;
906 state_stack += State.TEMPLATE;
907 } else {
908 token_begin.pos++; // @ is not part of the identifier
909 current++;
910 int len = 0;
911 while (current < end && is_ident_char (current[0])) {
912 current++;
913 len++;
915 type = TokenType.IDENTIFIER;
917 } else if (current[0].isdigit ()) {
918 while (current < end && current[0].isdigit ()) {
919 current++;
921 type = TokenType.INTEGER_LITERAL;
922 if (current < end && current[0].tolower () == 'l') {
923 current++;
924 if (current < end && current[0].tolower () == 'l') {
925 current++;
927 } else if (current < end && current[0].tolower () == 'u') {
928 current++;
929 if (current < end && current[0].tolower () == 'l') {
930 current++;
931 if (current < end && current[0].tolower () == 'l') {
932 current++;
935 } else if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
936 current++;
937 while (current < end && current[0].isdigit ()) {
938 current++;
940 if (current < end && current[0].tolower () == 'e') {
941 current++;
942 if (current < end && (current[0] == '+' || current[0] == '-')) {
943 current++;
945 while (current < end && current[0].isdigit ()) {
946 current++;
949 if (current < end && current[0].tolower () == 'f') {
950 current++;
952 type = TokenType.REAL_LITERAL;
953 } else if (current < end && current == begin + 1
954 && begin[0] == '0' && begin[1] == 'x' && begin[2].isxdigit ()) {
955 // hexadecimal integer literal
956 current++;
957 while (current < end && current[0].isxdigit ()) {
958 current++;
960 } else if (current < end && is_ident_char (current[0])) {
961 // allow identifiers to start with a digit
962 // as long as they contain at least one char
963 while (current < end && is_ident_char (current[0])) {
964 current++;
966 type = TokenType.IDENTIFIER;
968 } else {
969 switch (current[0]) {
970 case '{':
971 type = TokenType.OPEN_BRACE;
972 open_brace_count++;
973 state_stack += State.BRACE;
974 current++;
975 break;
976 case '}':
977 type = TokenType.CLOSE_BRACE;
978 open_brace_count--;
979 if (state_stack.length > 0) {
980 state_stack.length--;
982 current++;
983 break;
984 case '(':
985 type = TokenType.OPEN_PARENS;
986 open_parens_count++;
987 state_stack += State.PARENS;
988 current++;
989 break;
990 case ')':
991 type = TokenType.CLOSE_PARENS;
992 open_parens_count--;
993 current++;
994 if (state_stack.length > 0) {
995 state_stack.length--;
997 if (in_template ()) {
998 type = TokenType.COMMA;
1000 break;
1001 case '[':
1002 type = TokenType.OPEN_BRACKET;
1003 state_stack += State.BRACKET;
1004 current++;
1005 break;
1006 case ']':
1007 type = TokenType.CLOSE_BRACKET;
1008 if (state_stack.length > 0) {
1009 state_stack.length--;
1011 current++;
1012 break;
1013 case '.':
1014 type = TokenType.DOT;
1015 current++;
1016 if (current < end - 1) {
1017 if (current[0] == '.' && current[1] == '.') {
1018 type = TokenType.ELLIPSIS;
1019 current += 2;
1022 break;
1023 case ':':
1024 type = TokenType.COLON;
1025 current++;
1026 break;
1027 case ',':
1028 type = TokenType.COMMA;
1029 current++;
1030 break;
1031 case ';':
1032 type = TokenType.SEMICOLON;
1033 current++;
1034 break;
1035 case '#':
1036 type = TokenType.HASH;
1037 current++;
1038 break;
1039 case '?':
1040 type = TokenType.INTERR;
1041 current++;
1042 break;
1043 case '|':
1044 type = TokenType.BITWISE_OR;
1045 current++;
1046 if (current < end) {
1047 switch (current[0]) {
1048 case '=':
1049 type = TokenType.ASSIGN_BITWISE_OR;
1050 current++;
1051 break;
1052 case '|':
1053 type = TokenType.OP_OR;
1054 current++;
1055 break;
1058 break;
1059 case '&':
1060 type = TokenType.BITWISE_AND;
1061 current++;
1062 if (current < end) {
1063 switch (current[0]) {
1064 case '=':
1065 type = TokenType.ASSIGN_BITWISE_AND;
1066 current++;
1067 break;
1068 case '&':
1069 type = TokenType.OP_AND;
1070 current++;
1071 break;
1074 break;
1075 case '^':
1076 type = TokenType.CARRET;
1077 current++;
1078 if (current < end && current[0] == '=') {
1079 type = TokenType.ASSIGN_BITWISE_XOR;
1080 current++;
1082 break;
1083 case '~':
1084 type = TokenType.TILDE;
1085 current++;
1086 break;
1087 case '=':
1088 type = TokenType.ASSIGN;
1089 current++;
1090 if (current < end) {
1091 switch (current[0]) {
1092 case '=':
1093 type = TokenType.OP_EQ;
1094 current++;
1095 break;
1096 case '>':
1097 type = TokenType.LAMBDA;
1098 current++;
1099 break;
1102 break;
1103 case '<':
1104 type = TokenType.OP_LT;
1105 current++;
1106 if (current < end) {
1107 switch (current[0]) {
1108 case '=':
1109 type = TokenType.OP_LE;
1110 current++;
1111 break;
1112 case '<':
1113 type = TokenType.OP_SHIFT_LEFT;
1114 current++;
1115 if (current < end && current[0] == '=') {
1116 type = TokenType.ASSIGN_SHIFT_LEFT;
1117 current++;
1119 break;
1122 break;
1123 case '>':
1124 type = TokenType.OP_GT;
1125 current++;
1126 if (current < end && current[0] == '=') {
1127 type = TokenType.OP_GE;
1128 current++;
1130 break;
1131 case '!':
1132 type = TokenType.OP_NEG;
1133 current++;
1134 if (current < end && current[0] == '=') {
1135 type = TokenType.OP_NE;
1136 current++;
1138 break;
1139 case '+':
1140 type = TokenType.PLUS;
1141 current++;
1142 if (current < end) {
1143 switch (current[0]) {
1144 case '=':
1145 type = TokenType.ASSIGN_ADD;
1146 current++;
1147 break;
1148 case '+':
1149 type = TokenType.OP_INC;
1150 current++;
1151 break;
1154 break;
1155 case '-':
1156 type = TokenType.MINUS;
1157 current++;
1158 if (current < end) {
1159 switch (current[0]) {
1160 case '=':
1161 type = TokenType.ASSIGN_SUB;
1162 current++;
1163 break;
1164 case '-':
1165 type = TokenType.OP_DEC;
1166 current++;
1167 break;
1168 case '>':
1169 type = TokenType.OP_PTR;
1170 current++;
1171 break;
1174 break;
1175 case '*':
1176 type = TokenType.STAR;
1177 current++;
1178 if (current < end && current[0] == '=') {
1179 type = TokenType.ASSIGN_MUL;
1180 current++;
1182 break;
1183 case '/':
1184 switch (last_token) {
1185 case TokenType.ASSIGN:
1186 case TokenType.COMMA:
1187 case TokenType.MINUS:
1188 case TokenType.OP_AND:
1189 case TokenType.OP_EQ:
1190 case TokenType.OP_GE:
1191 case TokenType.OP_GT:
1192 case TokenType.OP_INC:
1193 case TokenType.OP_LE:
1194 case TokenType.OP_LT:
1195 case TokenType.OP_NE:
1196 case TokenType.OP_NEG:
1197 case TokenType.OP_OR:
1198 case TokenType.OPEN_BRACE:
1199 case TokenType.OPEN_PARENS:
1200 case TokenType.PLUS:
1201 case TokenType.RETURN:
1202 type = TokenType.OPEN_REGEX_LITERAL;
1203 state_stack += State.REGEX_LITERAL;
1204 current++;
1205 break;
1206 default:
1207 type = TokenType.DIV;
1208 current++;
1209 if (current < end && current[0] == '=') {
1210 type = TokenType.ASSIGN_DIV;
1211 current++;
1213 break;
1215 break;
1217 case '%':
1218 type = TokenType.PERCENT;
1219 current++;
1220 if (current < end && current[0] == '=') {
1221 type = TokenType.ASSIGN_PERCENT;
1222 current++;
1224 break;
1225 case '\'':
1226 case '"':
1227 if (begin[0] == '\'') {
1228 type = TokenType.CHARACTER_LITERAL;
1229 } else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
1230 type = TokenType.VERBATIM_STRING_LITERAL;
1231 token_length_in_chars = 6;
1232 current += 3;
1233 while (current < end - 4) {
1234 if (current[0] == '"' && current[1] == '"' && current[2] == '"' && current[3] != '"') {
1235 break;
1236 } else if (current[0] == '\n') {
1237 current++;
1238 line++;
1239 column = 1;
1240 token_length_in_chars = 3;
1241 } else {
1242 unichar u = ((string) current).get_char_validated ((long) (end - current));
1243 if (u != (unichar) (-1)) {
1244 current += u.to_utf8 (null);
1245 token_length_in_chars++;
1246 } else {
1247 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
1251 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
1252 current += 3;
1253 } else {
1254 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"\"\"");
1256 break;
1257 } else {
1258 type = TokenType.STRING_LITERAL;
1260 token_length_in_chars = 2;
1261 current++;
1262 while (current < end && current[0] != begin[0]) {
1263 if (current[0] == '\\') {
1264 current++;
1265 token_length_in_chars++;
1266 if (current >= end) {
1267 break;
1270 switch (current[0]) {
1271 case '\'':
1272 case '"':
1273 case '\\':
1274 case '0':
1275 case 'b':
1276 case 'f':
1277 case 'n':
1278 case 'r':
1279 case 't':
1280 current++;
1281 token_length_in_chars++;
1282 break;
1283 case 'x':
1284 // hexadecimal escape character
1285 current++;
1286 token_length_in_chars++;
1287 while (current < end && current[0].isxdigit ()) {
1288 current++;
1289 token_length_in_chars++;
1291 break;
1292 default:
1293 Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
1294 break;
1296 } else if (current[0] == '\n') {
1297 current++;
1298 line++;
1299 column = 1;
1300 token_length_in_chars = 1;
1301 } else {
1302 unichar u = ((string) current).get_char_validated ((long) (end - current));
1303 if (u != (unichar) (-1)) {
1304 current += u.to_utf8 (null);
1305 token_length_in_chars++;
1306 } else {
1307 current++;
1308 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
1311 if (current < end && begin[0] == '\'' && current[0] != '\'') {
1312 // multiple characters in single character literal
1313 Report.error (get_source_reference (token_length_in_chars), "invalid character literal");
1316 if (current < end) {
1317 current++;
1318 } else {
1319 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected %c".printf (begin[0]));
1321 break;
1322 default:
1323 unichar u = ((string) current).get_char_validated ((long) (end - current));
1324 if (u != (unichar) (-1)) {
1325 current += u.to_utf8 (null);
1326 Report.error (get_source_reference (0), "syntax error, unexpected character");
1327 } else {
1328 current++;
1329 Report.error (get_source_reference (0), "invalid UTF-8 character");
1331 column++;
1332 return read_token (out token_begin, out token_end);
1336 if (token_length_in_chars < 0) {
1337 column += (int) (current - begin);
1338 } else {
1339 column += token_length_in_chars;
1342 token_end = SourceLocation (current, line, column - 1);
1343 last_token = type;
1345 return type;
1348 int count_tabs ()
1351 int tab_count = 0;
1354 if (_indent_spaces == 0) {
1355 while (current < end && current[0] == '\t') {
1356 current++;
1357 column++;
1358 tab_count++;
1360 } else {
1361 int space_count = 0;
1362 while (current < end && current[0] == ' ') {
1363 current++;
1364 column++;
1365 space_count++;
1368 tab_count = space_count / _indent_spaces;
1372 /* ignore comments and whitspace and other lines that contain no code */
1374 space ();
1376 if ((current < end) && (current[0] == '\n')) return -1;
1378 return tab_count;
1381 static bool matches (char* begin, string keyword) {
1382 char* keyword_array = (char *) keyword;
1383 long len = keyword.length;
1384 for (int i = 0; i < len; i++) {
1385 if (begin[i] != keyword_array[i]) {
1386 return false;
1389 return true;
1392 bool whitespace () {
1393 bool found = false;
1394 while (current < end && current[0].isspace () && current[0] != '\n' ) {
1396 found = true;
1397 current++;
1398 column++;
1401 if ((column == 1) && (current < end) && (current[0] == '#')) {
1402 pp_directive ();
1403 return true;
1406 return found;
1409 inline bool newline () {
1410 if (current[0] == '\n') {
1411 return true;
1414 return false;
1417 bool skip_newlines () {
1418 bool new_lines = false;
1420 while (newline ()) {
1421 current++;
1423 line++;
1424 column = 1;
1425 current_indent_level = 0;
1427 new_lines = true;
1430 return new_lines;
1433 bool comment (bool file_comment = false) {
1434 if (current == null
1435 || current > end - 2
1436 || current[0] != '/'
1437 || (current[1] != '/' && current[1] != '*')) {
1438 return false;
1442 if (current[1] == '/') {
1443 // single-line comment
1445 SourceReference source_reference = null;
1446 if (file_comment) {
1447 source_reference = get_source_reference (0);
1450 current += 2;
1452 // skip until end of line or end of file
1453 while (current < end && current[0] != '\n') {
1454 current++;
1457 /* do not ignore EOL if comment does not exclusively occupy the line */
1458 if (current[0] == '\n' && last_token == TokenType.EOL) {
1459 current++;
1460 line++;
1461 column = 1;
1462 current_indent_level = 0;
1465 if (source_reference != null) {
1466 push_comment (((string) begin).substring (0, (long) (current - begin)), source_reference, file_comment);
1469 } else {
1470 // delimited comment
1471 SourceReference source_reference = null;
1472 if (file_comment && current[2] == '*') {
1473 return false;
1476 if (current[2] == '*' || file_comment) {
1477 source_reference = get_source_reference (0);
1480 current += 2;
1481 char* begin = current;
1483 while (current < end - 1
1484 && (current[0] != '*' || current[1] != '/')) {
1485 if (current[0] == '\n') {
1486 line++;
1487 column = 0;
1489 current++;
1490 column++;
1492 if (current == end - 1) {
1493 Report.error (get_source_reference (0), "syntax error, expected */");
1494 return true;
1497 if (source_reference != null) {
1498 string comment = ((string) begin).substring (0, (long) (current - begin));
1499 push_comment (comment, source_reference, file_comment);
1502 current += 2;
1503 column += 2;
1506 return true;
1509 bool skip_tabs () {
1510 bool found = false;
1511 while (current < end && current[0] == '\t' ) {
1512 current++;
1513 column++;
1514 found = true;
1517 return found;
1520 void skip_space_tabs () {
1521 while (whitespace () || skip_tabs () || comment () ) {
1526 void space () {
1527 while (whitespace () || comment ()) {
1531 public void parse_file_comments () {
1532 while (whitespace () || comment (true)) {
1537 void push_comment (string comment_item, SourceReference source_reference, bool file_comment) {
1538 if (comment_item[0] == '*') {
1539 if (_comment != null) {
1540 // extra doc comment, add it to source file comments
1541 source_file.add_comment (_comment);
1543 _comment = new Comment (comment_item, source_reference);
1546 if (file_comment) {
1547 source_file.add_comment (new Comment (comment_item, source_reference));
1548 _comment = null;
1553 * Clears and returns the content of the comment stack.
1555 * @return saved comment
1557 public Comment? pop_comment () {
1558 if (_comment == null) {
1559 return null;
1562 var comment = _comment;
1563 _comment = null;
1564 return comment;
1567 bool pp_whitespace () {
1568 bool found = false;
1569 while (current < end && current[0].isspace () && current[0] != '\n') {
1570 found = true;
1571 current++;
1572 column++;
1574 return found;
1577 void pp_space () {
1578 while (pp_whitespace () || comment ()) {
1582 void pp_directive () {
1583 // hash sign
1584 current++;
1585 column++;
1587 pp_space ();
1589 char* begin = current;
1590 int len = 0;
1591 while (current < end && current[0].isalnum ()) {
1592 current++;
1593 column++;
1594 len++;
1597 if (len == 2 && matches (begin, "if")) {
1598 parse_pp_if ();
1599 } else if (len == 4 && matches (begin, "elif")) {
1600 parse_pp_elif ();
1601 } else if (len == 4 && matches (begin, "else")) {
1602 parse_pp_else ();
1603 } else if (len == 5 && matches (begin, "endif")) {
1604 parse_pp_endif ();
1605 } else {
1606 Report.error (get_source_reference (-len, len), "syntax error, invalid preprocessing directive");
1609 if (conditional_stack.length > 0
1610 && conditional_stack[conditional_stack.length - 1].skip_section) {
1611 // skip lines until next preprocessing directive
1612 bool bol = false;
1613 while (current < end) {
1614 if (bol && current < end && current[0] == '#') {
1615 // go back to begin of line
1616 current -= (column - 1);
1617 column = 1;
1618 return;
1620 if (current[0] == '\n') {
1621 line++;
1622 column = 0;
1623 bol = true;
1624 } else if (!current[0].isspace ()) {
1625 bol = false;
1627 current++;
1628 column++;
1633 void pp_eol () {
1634 pp_space ();
1635 if (current >= end || current[0] != '\n') {
1636 Report.error (get_source_reference (0), "syntax error, expected newline");
1640 void parse_pp_if () {
1641 pp_space ();
1643 bool condition = parse_pp_expression ();
1645 pp_eol ();
1647 conditional_stack += Conditional ();
1649 if (condition && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1650 // condition true => process code within if
1651 conditional_stack[conditional_stack.length - 1].matched = true;
1652 } else {
1653 // skip lines until next preprocessing directive
1654 conditional_stack[conditional_stack.length - 1].skip_section = true;
1658 void parse_pp_elif () {
1659 pp_space ();
1661 bool condition = parse_pp_expression ();
1663 pp_eol ();
1665 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1666 Report.error (get_source_reference (0), "syntax error, unexpected #elif");
1667 return;
1670 if (condition && !conditional_stack[conditional_stack.length - 1].matched
1671 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1672 // condition true => process code within if
1673 conditional_stack[conditional_stack.length - 1].matched = true;
1674 conditional_stack[conditional_stack.length - 1].skip_section = false;
1675 } else {
1676 // skip lines until next preprocessing directive
1677 conditional_stack[conditional_stack.length - 1].skip_section = true;
1681 void parse_pp_else () {
1682 pp_eol ();
1684 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1685 Report.error (get_source_reference (0), "syntax error, unexpected #else");
1686 return;
1689 if (!conditional_stack[conditional_stack.length - 1].matched
1690 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1691 // condition true => process code within if
1692 conditional_stack[conditional_stack.length - 1].matched = true;
1693 conditional_stack[conditional_stack.length - 1].skip_section = false;
1694 } else {
1695 // skip lines until next preprocessing directive
1696 conditional_stack[conditional_stack.length - 1].skip_section = true;
1700 void parse_pp_endif () {
1701 pp_eol ();
1703 if (conditional_stack.length == 0) {
1704 Report.error (get_source_reference (0), "syntax error, unexpected #endif");
1705 return;
1708 conditional_stack.length--;
1711 bool parse_pp_symbol () {
1712 int len = 0;
1713 while (current < end && is_ident_char (current[0])) {
1714 current++;
1715 column++;
1716 len++;
1719 if (len == 0) {
1720 Report.error (get_source_reference (0), "syntax error, expected identifier");
1721 return false;
1724 string identifier = ((string) (current - len)).substring (0, len);
1725 bool defined;
1726 if (identifier == "true") {
1727 defined = true;
1728 } else if (identifier == "false") {
1729 defined = false;
1730 } else {
1731 defined = source_file.context.is_defined (identifier);
1734 return defined;
1737 bool parse_pp_primary_expression () {
1738 if (current >= end) {
1739 Report.error (get_source_reference (0), "syntax error, expected identifier");
1740 } else if (is_ident_char (current[0])) {
1741 return parse_pp_symbol ();
1742 } else if (current[0] == '(') {
1743 current++;
1744 column++;
1745 pp_space ();
1746 bool result = parse_pp_expression ();
1747 pp_space ();
1748 if (current < end && current[0] == ')') {
1749 current++;
1750 column++;
1751 } else {
1752 Report.error (get_source_reference (0), "syntax error, expected `)'");
1754 return result;
1755 } else {
1756 Report.error (get_source_reference (0), "syntax error, expected identifier");
1758 return false;
1761 bool parse_pp_unary_expression () {
1762 if (current < end && current[0] == '!') {
1763 current++;
1764 column++;
1765 pp_space ();
1766 return !parse_pp_unary_expression ();
1769 return parse_pp_primary_expression ();
1772 bool parse_pp_equality_expression () {
1773 bool left = parse_pp_unary_expression ();
1774 pp_space ();
1775 while (true) {
1776 if (current < end - 1 && current[0] == '=' && current[1] == '=') {
1777 current += 2;
1778 column += 2;
1779 pp_space ();
1780 bool right = parse_pp_unary_expression ();
1781 left = (left == right);
1782 } else if (current < end - 1 && current[0] == '!' && current[1] == '=') {
1783 current += 2;
1784 column += 2;
1785 pp_space ();
1786 bool right = parse_pp_unary_expression ();
1787 left = (left != right);
1788 } else {
1789 break;
1792 return left;
1795 bool parse_pp_and_expression () {
1796 bool left = parse_pp_equality_expression ();
1797 pp_space ();
1798 while (current < end - 1 && current[0] == '&' && current[1] == '&') {
1799 current += 2;
1800 column += 2;
1801 pp_space ();
1802 bool right = parse_pp_equality_expression ();
1803 left = left && right;
1805 return left;
1808 bool parse_pp_or_expression () {
1809 bool left = parse_pp_and_expression ();
1810 pp_space ();
1811 while (current < end - 1 && current[0] == '|' && current[1] == '|') {
1812 current += 2;
1813 column += 2;
1814 pp_space ();
1815 bool right = parse_pp_and_expression ();
1816 left = left || right;
1818 return left;
1821 bool parse_pp_expression () {
1822 return parse_pp_or_expression ();