1 /* valageniescanner.vala
3 * Copyright (C) 2008-2012 Jamie McCracken, Jürg Billeter
4 * Based on code by Jürg Billeter
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 * Jamie McCracken jamiemcc gnome org
27 * Lexical scanner for Genie source files.
29 public class Vala
.Genie
.Scanner
{
30 public SourceFile source_file
{ get; private set; }
32 public int indent_spaces
{ get; set;}
41 int current_indent_level
;
45 /* track open parens and braces for automatic line continuations */
46 int open_parens_count
;
54 Conditional
[] conditional_stack
;
58 public bool else_found
;
59 public bool skip_section
;
73 public Scanner (SourceFile source_file
) {
74 this
.source_file
= source_file
;
76 begin
= source_file
.get_mapped_contents ();
77 end
= begin
+ source_file
.get_mapped_length ();
84 current_indent_level
= 0;
88 open_parens_count
= 0;
91 parse_started
= false;
92 last_token
= TokenType
.NONE
;
97 return (state_stack
.length
> 0 && state_stack
[state_stack
.length
- 1] == State
.TEMPLATE
);
100 bool in_template_part () {
101 return (state_stack
.length
> 0 && state_stack
[state_stack
.length
- 1] == State
.TEMPLATE_PART
);
104 bool is_ident_char (char c
) {
105 return (c
.isalnum () || c
== '_');
108 bool in_regex_literal () {
109 return (state_stack
.length
> 0 && state_stack
[state_stack
.length
- 1] == State
.REGEX_LITERAL
);
112 SourceReference
get_source_reference (int offset
, int length
= 0) {
113 return new
SourceReference (source_file
, SourceLocation (current
, line
, column
+ offset
), SourceLocation (current
+ length
, line
, column
+ offset
+ length
));
116 public TokenType
read_regex_token (out SourceLocation token_begin
, out SourceLocation token_end
) {
118 char* begin
= current
;
119 token_begin
= SourceLocation (begin
, line
, column
);
121 int token_length_in_chars
= -1;
123 if (current
>= end
) {
124 type
= TokenType
.EOF
;
126 switch (current
[0]) {
128 type
= TokenType
.CLOSE_REGEX_LITERAL
;
130 state_stack
.length
--;
135 while (current
[0] == 'i' || current
[0] == 's' || current
[0] == 'm' || current
[0] == 'x') {
136 switch (current
[0]) {
139 Report
.error (get_source_reference (token_length_in_chars
), "modifier 'i' used more than once");
145 Report
.error (get_source_reference (token_length_in_chars
), "modifier 's' used more than once");
151 Report
.error (get_source_reference (token_length_in_chars
), "modifier 'm' used more than once");
157 Report
.error (get_source_reference (token_length_in_chars
), "modifier 'x' used more than once");
163 token_length_in_chars
++;
167 type
= TokenType
.REGEX_LITERAL
;
168 token_length_in_chars
= 0;
169 while (current
< end
&& current
[0] != '/') {
170 if (current
[0] == '\\') {
172 token_length_in_chars
++;
173 if (current
>= end
) {
177 switch (current
[0]) {
231 token_length_in_chars
++;
234 // u escape character has four hex digits
236 token_length_in_chars
++;
238 for (digit_length
= 0; digit_length
< 4 && current
< end
&& current
[0].isxdigit (); digit_length
++) {
240 token_length_in_chars
++;
242 if (digit_length
!= 4) {
243 Report
.error (get_source_reference (token_length_in_chars
), "\\u requires four hex digits");
247 // hexadecimal escape character requires two hex digits
249 token_length_in_chars
++;
251 for (digit_length
= 0; digit_length
< 2 && current
< end
&& current
[0].isxdigit (); digit_length
++) {
253 token_length_in_chars
++;
255 if (digit_length
!= 2) {
256 Report
.error (get_source_reference (token_length_in_chars
), "\\x requires two hex digits");
260 // back references \1 through \99
261 if (current
[0].isdigit ()) {
263 token_length_in_chars
++;
264 if (current
[0].isdigit ()) {
266 token_length_in_chars
++;
269 Report
.error (get_source_reference (token_length_in_chars
), "invalid escape sequence");
273 } else if (current
[0] == '\n') {
276 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
277 if (u
!= (unichar
) (-1)) {
278 current
+= u
.to_utf8 (null);
279 token_length_in_chars
++;
282 Report
.error (get_source_reference (token_length_in_chars
), "invalid UTF-8 character");
286 if (current
>= end
|| current
[0] == '\n') {
287 Report
.error (get_source_reference (token_length_in_chars
), "syntax error, expected \"");
288 state_stack
.length
--;
289 return read_token (out token_begin
, out token_end
);
295 if (token_length_in_chars
< 0) {
296 column
+= (int) (current
- begin
);
298 column
+= token_length_in_chars
;
301 token_end
= SourceLocation (current
, line
, column
- 1);
307 public void seek (SourceLocation location
) {
308 current
= location
.pos
;
309 line
= location
.line
;
310 column
= location
.column
;
312 conditional_stack
= null;
316 public static TokenType
get_identifier_or_keyword (char* begin
, int len
) {
321 if (matches (begin
, "as")) return TokenType
.AS
;
324 if (matches (begin
, "do")) return TokenType
.DO
;
337 if (matches (begin
, "of")) return TokenType
.OF
;
339 if (matches (begin
, "or")) return TokenType
.OP_OR
;
342 if (matches (begin
, "to")) return TokenType
.TO
;
349 if (matches (begin
, "and")) return TokenType
.OP_AND
;
352 if (matches (begin
, "def")) return TokenType
.DEF
;
355 if (matches (begin
, "for")) return TokenType
.FOR
;
358 if (matches (begin
, "get")) return TokenType
.GET
;
361 if (matches (begin
, "isa")) return TokenType
.ISA
;
366 if (matches (begin
, "new")) return TokenType
.NEW
;
369 if (matches (begin
, "not")) return TokenType
.OP_NEG
;
374 if (matches (begin
, "out")) return TokenType
.OUT
;
377 if (matches (begin
, "ref")) return TokenType
.REF
;
380 if (matches (begin
, "set")) return TokenType
.SET
;
383 if (matches (begin
, "try")) return TokenType
.TRY
;
386 if (matches (begin
, "var")) return TokenType
.VAR
;
393 if (matches (begin
, "case")) return TokenType
.CASE
;
396 if (matches (begin
, "dict")) return TokenType
.DICT
;
401 if (matches (begin
, "else")) return TokenType
.ELSE
;
404 if (matches (begin
, "enum")) return TokenType
.ENUM
;
409 if (matches (begin
, "init")) return TokenType
.INIT
;
414 if (matches (begin
, "list")) return TokenType
.LIST
;
417 if (matches (begin
, "lock")) return TokenType
.LOCK
;
423 if (matches (begin
, "null")) return TokenType
.NULL
;
428 if (matches (begin
, "pass")) return TokenType
.PASS
;
431 if (matches (begin
, "prop")) return TokenType
.PROP
;
436 if (matches (begin
, "self")) return TokenType
.THIS
;
439 if (matches (begin
, "true")) return TokenType
.TRUE
;
442 if (matches (begin
, "uses")) return TokenType
.USES
;
445 if (matches (begin
, "void")) return TokenType
.VOID
;
450 if (matches (begin
, "weak")) return TokenType
.WEAK
;
453 if (matches (begin
, "when")) return TokenType
.WHEN
;
464 if (matches (begin
, "array")) return TokenType
.ARRAY
;
467 if (matches (begin
, "async")) return TokenType
.ASYNC
;
472 if (matches (begin
, "break")) return TokenType
.BREAK
;
477 if (matches (begin
, "class")) return TokenType
.CLASS
;
480 if (matches (begin
, "const")) return TokenType
.CONST
;
485 if (matches (begin
, "event")) return TokenType
.EVENT
;
490 if (matches (begin
, "false")) return TokenType
.FALSE
;
493 if (matches (begin
, "final")) return TokenType
.FINAL
;
498 if (matches (begin
, "owned")) return TokenType
.OWNED
;
501 if (matches (begin
, "print")) return TokenType
.PRINT
;
504 if (matches (begin
, "super")) return TokenType
.SUPER
;
507 if (matches (begin
, "raise")) return TokenType
.RAISE
;
510 if (matches (begin
, "while")) return TokenType
.WHILE
;
513 if (matches (begin
, "yield")) return TokenType
.YIELD
;
520 if (matches (begin
, "assert")) return TokenType
.ASSERT
;
525 if (matches (begin
, "delete")) return TokenType
.DELETE
;
528 if (matches (begin
, "downto")) return TokenType
.DOWNTO
;
537 if (matches (begin
, "except")) return TokenType
.EXCEPT
;
540 if (matches (begin
, "extern")) return TokenType
.EXTERN
;
547 if (matches (begin
, "inline")) return TokenType
.INLINE
;
552 if (matches (begin
, "params")) return TokenType
.PARAMS
;
555 if (matches (begin
, "public")) return TokenType
.PUBLIC
;
562 if (matches (begin
, "raises")) return TokenType
.RAISES
;
565 if (matches (begin
, "return")) return TokenType
.RETURN
;
572 if (matches (begin
, "sealed")) return TokenType
.SEALED
;
575 if (matches (begin
, "sizeof")) return TokenType
.SIZEOF
;
580 if (matches (begin
, "static")) return TokenType
.STATIC
;
583 if (matches (begin
, "struct")) return TokenType
.STRUCT
;
590 if (matches (begin
, "typeof")) return TokenType
.TYPEOF
;
599 if (matches (begin
, "default")) return TokenType
.DEFAULT
;
602 if (matches (begin
, "dynamic")) return TokenType
.DYNAMIC
;
607 if (matches (begin
, "ensures")) return TokenType
.ENSURES
;
612 if (matches (begin
, "finally")) return TokenType
.FINALLY
;
617 if (matches (begin
, "private")) return TokenType
.PRIVATE
;
620 if (matches (begin
, "unowned")) return TokenType
.UNOWNED
;
623 if (matches (begin
, "virtual")) return TokenType
.VIRTUAL
;
630 if (matches (begin
, "abstract")) return TokenType
.ABSTRACT
;
633 if (matches (begin
, "continue")) return TokenType
.CONTINUE
;
636 if (matches (begin
, "delegate")) return TokenType
.DELEGATE
;
639 if (matches (begin
, "internal")) return TokenType
.INTERNAL
;
642 if (matches (begin
, "override")) return TokenType
.OVERRIDE
;
647 if (matches (begin
, "readonly")) return TokenType
.READONLY
;
650 if (matches (begin
, "requires")) return TokenType
.REQUIRES
;
655 if (matches (begin
, "volatile")) return TokenType
.VOLATILE
;
662 if (matches (begin
, "construct")) return TokenType
.CONSTRUCT
;
665 if (matches (begin
, "exception")) return TokenType
.ERRORDOMAIN
;
668 if (matches (begin
, "interface")) return TokenType
.INTERFACE
;
671 if (matches (begin
, "namespace")) return TokenType
.NAMESPACE
;
674 if (matches (begin
, "protected")) return TokenType
.PROTECTED
;
677 if (matches (begin
, "writeonly")) return TokenType
.WRITEONLY
;
684 if (matches (begin
, "implements")) return TokenType
.IMPLEMENTS
;
689 return TokenType
.IDENTIFIER
;
693 public TokenType
read_template_token (out SourceLocation token_begin
, out SourceLocation token_end
) {
695 char* begin
= current
;
696 token_begin
= SourceLocation (begin
, line
, column
);
698 int token_length_in_chars
= -1;
700 if (current
>= end
) {
701 type
= TokenType
.EOF
;
703 switch (current
[0]) {
705 type
= TokenType
.CLOSE_TEMPLATE
;
707 state_stack
.length
--;
710 token_begin
.pos
++; // $ is not part of following token
712 if (current
[0].isalpha () || current
[0] == '_') {
714 while (current
< end
&& is_ident_char (current
[0])) {
718 type
= TokenType
.IDENTIFIER
;
719 state_stack
+= State
.TEMPLATE_PART
;
720 } else if (current
[0] == '(') {
723 state_stack
+= State
.PARENS
;
724 return read_token (out token_begin
, out token_end
);
725 } else if (current
[0] == '$') {
726 type
= TokenType
.TEMPLATE_STRING_LITERAL
;
728 state_stack
+= State
.TEMPLATE_PART
;
730 Report
.error (get_source_reference (1), "unexpected character");
731 return read_template_token (out token_begin
, out token_end
);
735 type
= TokenType
.TEMPLATE_STRING_LITERAL
;
736 token_length_in_chars
= 0;
737 while (current
< end
&& current
[0] != '"' && current
[0] != '$') {
738 if (current
[0] == '\\') {
740 token_length_in_chars
++;
741 if (current
>= end
) {
745 switch (current
[0]) {
757 token_length_in_chars
++;
760 // u escape character has four hex digits
762 token_length_in_chars
++;
764 for (digit_length
= 0; digit_length
< 4 && current
< end
&& current
[0].isxdigit (); digit_length
++) {
766 token_length_in_chars
++;
768 if (digit_length
!= 4) {
769 Report
.error (get_source_reference (token_length_in_chars
), "\\u requires four hex digits");
773 // hexadecimal escape character requires two hex digits
775 token_length_in_chars
++;
777 for (digit_length
= 0; digit_length
< 2 && current
< end
&& current
[0].isxdigit (); digit_length
++) {
779 token_length_in_chars
++;
781 if (digit_length
!= 2) {
782 Report
.error (get_source_reference (token_length_in_chars
), "\\x requires two hex digits");
786 Report
.error (get_source_reference (token_length_in_chars
), "invalid escape sequence");
789 } else if (current
[0] == '\n') {
793 token_length_in_chars
= 1;
795 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
796 if (u
!= (unichar
) (-1)) {
797 current
+= u
.to_utf8 (null);
798 token_length_in_chars
++;
801 Report
.error (get_source_reference (token_length_in_chars
), "invalid UTF-8 character");
805 if (current
>= end
) {
806 Report
.error (get_source_reference (token_length_in_chars
), "syntax error, expected \"");
807 state_stack
.length
--;
808 return read_token (out token_begin
, out token_end
);
810 state_stack
+= State
.TEMPLATE_PART
;
815 if (token_length_in_chars
< 0) {
816 column
+= (int) (current
- begin
);
818 column
+= token_length_in_chars
;
821 token_end
= SourceLocation (current
, line
, column
- 1);
827 public TokenType
read_token (out SourceLocation token_begin
, out SourceLocation token_end
) {
828 if (current
== null) {
829 token_begin
= SourceLocation (current
, line
, column
);
830 token_end
= SourceLocation (current
, line
, column
);
831 return TokenType
.EOF
;
834 if (in_template ()) {
835 return read_template_token (out token_begin
, out token_end
);
836 } else if (in_template_part ()) {
837 state_stack
.length
--;
839 token_begin
= SourceLocation (current
, line
, column
);
840 token_end
= SourceLocation (current
, line
, column
- 1);
842 return TokenType
.COMMA
;
843 } else if (in_regex_literal ()) {
844 return read_regex_token (out token_begin
, out token_end
);
849 /* emit dedents if outstanding before checking any other chars */
851 if (pending_dedents
> 0) {
855 token_begin
= SourceLocation (current
, line
, column
);
856 token_end
= SourceLocation (current
, line
, column
);
858 last_token
= TokenType
.DEDENT
;
860 return TokenType
.DEDENT
;
863 if ((_indent_spaces
== 0 ) || (last_token
!= TokenType
.EOL
)) {
864 /* scrub whitespace (excluding newlines) and comments */
869 /* handle explicit line continuation (lines ending with "\") */
870 while (current
< end
&& current
[0] == '\\' && current
[1] == '\n') {
876 /* handle automatic line continuations (when inside parens or braces) */
877 while (current
< end
&& current
[0] == '\n' && (open_parens_count
> 0 || open_brace_count
> 0)) {
884 /* handle non-consecutive new line once parsing is underway - EOL */
885 if (newline () && parse_started
&& last_token
!= TokenType
.EOL
&& last_token
!= TokenType
.SEMICOLON
) {
886 token_begin
= SourceLocation (current
, line
, column
);
887 token_end
= SourceLocation (current
, line
, column
);
889 last_token
= TokenType
.EOL
;
891 return TokenType
.EOL
;
895 while (skip_newlines ()) {
896 token_begin
= SourceLocation (current
, line
, column
);
898 current_indent_level
= count_tabs ();
900 /* if its an empty new line then ignore */
901 if (current_indent_level
== -1) {
905 if (current_indent_level
> indent_level
) {
906 indent_level
= current_indent_level
;
908 token_end
= SourceLocation (current
, line
, column
);
910 last_token
= TokenType
.INDENT
;
912 return TokenType
.INDENT
;
913 } else if (current_indent_level
< indent_level
) {
916 pending_dedents
= (indent_level
- current_indent_level
);
917 token_end
= SourceLocation (current
, line
, column
);
919 last_token
= TokenType
.DEDENT
;
921 return TokenType
.DEDENT
;
926 char* begin
= current
;
927 token_begin
= SourceLocation (begin
, line
, column
);
929 int token_length_in_chars
= -1;
931 parse_started
= true;
933 if (current
>= end
) {
934 if (indent_level
> 0) {
937 pending_dedents
= indent_level
;
939 type
= TokenType
.DEDENT
;
941 type
= TokenType
.EOF
;
943 } else if (current
[0].isalpha () || current
[0] == '_') {
945 while (current
< end
&& is_ident_char (current
[0])) {
949 type
= get_identifier_or_keyword (begin
, len
);
950 } else if (current
[0] == '@') {
951 if (current
< end
- 1 && current
[1] == '"') {
952 type
= TokenType
.OPEN_TEMPLATE
;
954 state_stack
+= State
.TEMPLATE
;
956 token_begin
.pos
++; // @ is not part of the identifier
959 while (current
< end
&& is_ident_char (current
[0])) {
963 type
= TokenType
.IDENTIFIER
;
965 } else if (current
[0].isdigit ()) {
966 while (current
< end
&& current
[0].isdigit ()) {
969 type
= TokenType
.INTEGER_LITERAL
;
970 if (current
< end
&& current
[0].tolower () == 'l') {
972 if (current
< end
&& current
[0].tolower () == 'l') {
975 } else if (current
< end
&& current
[0].tolower () == 'u') {
977 if (current
< end
&& current
[0].tolower () == 'l') {
979 if (current
< end
&& current
[0].tolower () == 'l') {
983 } else if (current
< end
- 1 && current
[0] == '.' && current
[1].isdigit ()) {
985 while (current
< end
&& current
[0].isdigit ()) {
988 if (current
< end
&& current
[0].tolower () == 'e') {
990 if (current
< end
&& (current
[0] == '+' || current
[0] == '-')) {
993 while (current
< end
&& current
[0].isdigit ()) {
997 if (current
< end
&& current
[0].tolower () == 'f') {
1000 type
= TokenType
.REAL_LITERAL
;
1001 } else if (current
< end
&& current
== begin
+ 1
1002 && begin
[0] == '0' && begin
[1] == 'x' && begin
[2].isxdigit ()) {
1003 // hexadecimal integer literal
1005 while (current
< end
&& current
[0].isxdigit ()) {
1008 } else if (current
< end
&& is_ident_char (current
[0])) {
1009 // allow identifiers to start with a digit
1010 // as long as they contain at least one char
1011 while (current
< end
&& is_ident_char (current
[0])) {
1014 type
= TokenType
.IDENTIFIER
;
1017 switch (current
[0]) {
1019 type
= TokenType
.OPEN_BRACE
;
1021 state_stack
+= State
.BRACE
;
1025 type
= TokenType
.CLOSE_BRACE
;
1027 if (state_stack
.length
> 0) {
1028 state_stack
.length
--;
1033 type
= TokenType
.OPEN_PARENS
;
1034 open_parens_count
++;
1035 state_stack
+= State
.PARENS
;
1039 type
= TokenType
.CLOSE_PARENS
;
1040 open_parens_count
--;
1042 if (state_stack
.length
> 0) {
1043 state_stack
.length
--;
1045 if (in_template ()) {
1046 type
= TokenType
.COMMA
;
1050 type
= TokenType
.OPEN_BRACKET
;
1051 state_stack
+= State
.BRACKET
;
1055 type
= TokenType
.CLOSE_BRACKET
;
1056 if (state_stack
.length
> 0) {
1057 state_stack
.length
--;
1062 type
= TokenType
.DOT
;
1064 if (current
< end
- 1) {
1065 if (current
[0] == '.' && current
[1] == '.') {
1066 type
= TokenType
.ELLIPSIS
;
1072 type
= TokenType
.COLON
;
1076 type
= TokenType
.COMMA
;
1080 type
= TokenType
.SEMICOLON
;
1084 type
= TokenType
.HASH
;
1088 type
= TokenType
.INTERR
;
1092 type
= TokenType
.BITWISE_OR
;
1094 if (current
< end
) {
1095 switch (current
[0]) {
1097 type
= TokenType
.ASSIGN_BITWISE_OR
;
1101 type
= TokenType
.OP_OR
;
1108 type
= TokenType
.BITWISE_AND
;
1110 if (current
< end
) {
1111 switch (current
[0]) {
1113 type
= TokenType
.ASSIGN_BITWISE_AND
;
1117 type
= TokenType
.OP_AND
;
1124 type
= TokenType
.CARRET
;
1126 if (current
< end
&& current
[0] == '=') {
1127 type
= TokenType
.ASSIGN_BITWISE_XOR
;
1132 type
= TokenType
.TILDE
;
1136 type
= TokenType
.ASSIGN
;
1138 if (current
< end
) {
1139 switch (current
[0]) {
1141 type
= TokenType
.OP_EQ
;
1145 type
= TokenType
.LAMBDA
;
1152 type
= TokenType
.OP_LT
;
1154 if (current
< end
) {
1155 switch (current
[0]) {
1157 type
= TokenType
.OP_LE
;
1161 type
= TokenType
.OP_SHIFT_LEFT
;
1163 if (current
< end
&& current
[0] == '=') {
1164 type
= TokenType
.ASSIGN_SHIFT_LEFT
;
1172 type
= TokenType
.OP_GT
;
1174 if (current
< end
&& current
[0] == '=') {
1175 type
= TokenType
.OP_GE
;
1180 type
= TokenType
.OP_NEG
;
1182 if (current
< end
&& current
[0] == '=') {
1183 type
= TokenType
.OP_NE
;
1188 type
= TokenType
.PLUS
;
1190 if (current
< end
) {
1191 switch (current
[0]) {
1193 type
= TokenType
.ASSIGN_ADD
;
1197 type
= TokenType
.OP_INC
;
1204 type
= TokenType
.MINUS
;
1206 if (current
< end
) {
1207 switch (current
[0]) {
1209 type
= TokenType
.ASSIGN_SUB
;
1213 type
= TokenType
.OP_DEC
;
1217 type
= TokenType
.OP_PTR
;
1224 type
= TokenType
.STAR
;
1226 if (current
< end
&& current
[0] == '=') {
1227 type
= TokenType
.ASSIGN_MUL
;
1232 switch (last_token
) {
1233 case TokenType
.ASSIGN
:
1234 case TokenType
.COMMA
:
1235 case TokenType
.MINUS
:
1236 case TokenType
.OP_AND
:
1237 case TokenType
.OP_EQ
:
1238 case TokenType
.OP_GE
:
1239 case TokenType
.OP_GT
:
1240 case TokenType
.OP_INC
:
1241 case TokenType
.OP_LE
:
1242 case TokenType
.OP_LT
:
1243 case TokenType
.OP_NE
:
1244 case TokenType
.OP_NEG
:
1245 case TokenType
.OP_OR
:
1246 case TokenType
.OPEN_BRACE
:
1247 case TokenType
.OPEN_PARENS
:
1248 case TokenType
.PLUS
:
1249 case TokenType
.RETURN
:
1250 type
= TokenType
.OPEN_REGEX_LITERAL
;
1251 state_stack
+= State
.REGEX_LITERAL
;
1255 type
= TokenType
.DIV
;
1257 if (current
< end
&& current
[0] == '=') {
1258 type
= TokenType
.ASSIGN_DIV
;
1266 type
= TokenType
.PERCENT
;
1268 if (current
< end
&& current
[0] == '=') {
1269 type
= TokenType
.ASSIGN_PERCENT
;
1275 if (begin
[0] == '\'') {
1276 type
= TokenType
.CHARACTER_LITERAL
;
1277 } else if (current
< end
- 6 && begin
[1] == '"' && begin
[2] == '"') {
1278 type
= TokenType
.VERBATIM_STRING_LITERAL
;
1279 token_length_in_chars
= 6;
1281 while (current
< end
- 4) {
1282 if (current
[0] == '"' && current
[1] == '"' && current
[2] == '"' && current
[3] != '"') {
1284 } else if (current
[0] == '\n') {
1288 token_length_in_chars
= 3;
1290 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
1291 if (u
!= (unichar
) (-1)) {
1292 current
+= u
.to_utf8 (null);
1293 token_length_in_chars
++;
1295 Report
.error (get_source_reference (token_length_in_chars
), "invalid UTF-8 character");
1299 if (current
[0] == '"' && current
[1] == '"' && current
[2] == '"') {
1302 Report
.error (get_source_reference (token_length_in_chars
), "syntax error, expected \"\"\"");
1306 type
= TokenType
.STRING_LITERAL
;
1308 token_length_in_chars
= 2;
1310 while (current
< end
&& current
[0] != begin
[0]) {
1311 if (current
[0] == '\\') {
1313 token_length_in_chars
++;
1314 if (current
>= end
) {
1318 switch (current
[0]) {
1330 token_length_in_chars
++;
1333 // u escape character has four hex digits
1335 token_length_in_chars
++;
1337 for (digit_length
= 0; digit_length
< 4 && current
< end
&& current
[0].isxdigit (); digit_length
++) {
1339 token_length_in_chars
++;
1341 if (digit_length
!= 4) {
1342 Report
.error (get_source_reference (token_length_in_chars
), "\\u requires four hex digits");
1346 // hexadecimal escape character requires two hex digits
1348 token_length_in_chars
++;
1350 for (digit_length
= 0; digit_length
< 2 && current
< end
&& current
[0].isxdigit (); digit_length
++) {
1352 token_length_in_chars
++;
1354 if (digit_length
!= 2) {
1355 Report
.error (get_source_reference (token_length_in_chars
), "\\x requires two hex digits");
1359 Report
.error (get_source_reference (token_length_in_chars
), "invalid escape sequence");
1362 } else if (current
[0] == '\n') {
1366 token_length_in_chars
= 1;
1368 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
1369 if (u
!= (unichar
) (-1)) {
1370 current
+= u
.to_utf8 (null);
1371 token_length_in_chars
++;
1374 Report
.error (get_source_reference (token_length_in_chars
), "invalid UTF-8 character");
1377 if (current
< end
&& begin
[0] == '\'' && current
[0] != '\'') {
1378 // multiple characters in single character literal
1379 Report
.error (get_source_reference (token_length_in_chars
), "invalid character literal");
1382 if (current
< end
) {
1385 Report
.error (get_source_reference (token_length_in_chars
), "syntax error, expected %c".printf (begin
[0]));
1389 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
1390 if (u
!= (unichar
) (-1)) {
1391 current
+= u
.to_utf8 (null);
1392 Report
.error (get_source_reference (0), "syntax error, unexpected character");
1395 Report
.error (get_source_reference (0), "invalid UTF-8 character");
1398 return read_token (out token_begin
, out token_end
);
1402 if (token_length_in_chars
< 0) {
1403 column
+= (int) (current
- begin
);
1405 column
+= token_length_in_chars
;
1408 token_end
= SourceLocation (current
, line
, column
- 1);
1420 if (_indent_spaces
== 0) {
1421 while (current
< end
&& current
[0] == '\t') {
1427 int space_count
= 0;
1428 while (current
< end
&& current
[0] == ' ') {
1434 tab_count
= space_count
/ _indent_spaces
;
1438 /* ignore comments and whitspace and other lines that contain no code */
1442 if ((current
< end
) && (current
[0] == '\n')) return -1;
1447 static bool matches (char* begin
, string keyword
) {
1448 char* keyword_array
= (char *) keyword
;
1449 long len
= keyword
.length
;
1450 for (int i
= 0; i
< len
; i
++) {
1451 if (begin
[i
] != keyword_array
[i
]) {
1458 bool whitespace () {
1460 while (current
< end
&& current
[0].isspace () && current
[0] != '\n' ) {
1467 if ((column
== 1) && (current
< end
) && (current
[0] == '#')) {
1475 inline
bool newline () {
1476 if (current
[0] == '\n') {
1483 bool skip_newlines () {
1484 bool new_lines
= false;
1486 while (newline ()) {
1491 current_indent_level
= 0;
1499 bool comment (bool file_comment
= false) {
1501 || current
> end
- 2
1502 || current
[0] != '/'
1503 || (current
[1] != '/' && current
[1] != '*')) {
1508 if (current
[1] == '/') {
1509 // single-line comment
1511 SourceReference source_reference
= null;
1513 source_reference
= get_source_reference (0);
1518 // skip until end of line or end of file
1519 while (current
< end
&& current
[0] != '\n') {
1523 /* do not ignore EOL if comment does not exclusively occupy the line */
1524 if (current
[0] == '\n' && last_token
== TokenType
.EOL
) {
1528 current_indent_level
= 0;
1531 if (source_reference
!= null) {
1532 push_comment (((string) begin
).substring (0, (long) (current
- begin
)), source_reference
, file_comment
);
1536 // delimited comment
1537 SourceReference source_reference
= null;
1538 if (file_comment
&& current
[2] == '*') {
1542 if (current
[2] == '*' || file_comment
) {
1543 source_reference
= get_source_reference (0);
1547 char* begin
= current
;
1549 while (current
< end
- 1
1550 && (current
[0] != '*' || current
[1] != '/')) {
1551 if (current
[0] == '\n') {
1558 if (current
== end
- 1) {
1559 Report
.error (get_source_reference (0), "syntax error, expected */");
1563 if (source_reference
!= null) {
1564 string comment
= ((string) begin
).substring (0, (long) (current
- begin
));
1565 push_comment (comment
, source_reference
, file_comment
);
1577 while (current
< end
&& current
[0] == '\t' ) {
1586 void skip_space_tabs () {
1587 while (whitespace () || skip_tabs () || comment () ) {
1593 while (whitespace () || comment ()) {
1597 public void parse_file_comments () {
1598 while (whitespace () || comment (true)) {
1603 void push_comment (string comment_item
, SourceReference source_reference
, bool file_comment
) {
1604 if (comment_item
[0] == '*') {
1605 if (_comment
!= null) {
1606 // extra doc comment, add it to source file comments
1607 source_file
.add_comment (_comment
);
1609 _comment
= new
Comment (comment_item
, source_reference
);
1613 source_file
.add_comment (new
Comment (comment_item
, source_reference
));
1619 * Clears and returns the content of the comment stack.
1621 * @return saved comment
1623 public Comment?
pop_comment () {
1624 if (_comment
== null) {
1628 var comment
= _comment
;
1633 bool pp_whitespace () {
1635 while (current
< end
&& current
[0].isspace () && current
[0] != '\n') {
1644 while (pp_whitespace () || comment ()) {
1648 void pp_directive () {
1655 char* begin
= current
;
1657 while (current
< end
&& current
[0].isalnum ()) {
1663 if (len
== 2 && matches (begin
, "if")) {
1665 } else if (len
== 4 && matches (begin
, "elif")) {
1667 } else if (len
== 4 && matches (begin
, "else")) {
1669 } else if (len
== 5 && matches (begin
, "endif")) {
1672 Report
.error (get_source_reference (-len
, len
), "syntax error, invalid preprocessing directive");
1675 if (conditional_stack
.length
> 0
1676 && conditional_stack
[conditional_stack
.length
- 1].skip_section
) {
1677 // skip lines until next preprocessing directive
1679 while (current
< end
) {
1680 if (bol
&& current
< end
&& current
[0] == '#') {
1681 // go back to begin of line
1682 current
-= (column
- 1);
1686 if (current
[0] == '\n') {
1690 } else if (!current
[0].isspace ()) {
1701 if (current
>= end
|| current
[0] != '\n') {
1702 Report
.error (get_source_reference (0), "syntax error, expected newline");
1706 void parse_pp_if () {
1709 bool condition
= parse_pp_expression ();
1713 conditional_stack
+= Conditional ();
1715 if (condition
&& (conditional_stack
.length
== 1 || !conditional_stack
[conditional_stack
.length
- 2].skip_section
)) {
1716 // condition true => process code within if
1717 conditional_stack
[conditional_stack
.length
- 1].matched
= true;
1719 // skip lines until next preprocessing directive
1720 conditional_stack
[conditional_stack
.length
- 1].skip_section
= true;
1724 void parse_pp_elif () {
1727 bool condition
= parse_pp_expression ();
1731 if (conditional_stack
.length
== 0 || conditional_stack
[conditional_stack
.length
- 1].else_found
) {
1732 Report
.error (get_source_reference (0), "syntax error, unexpected #elif");
1736 if (condition
&& !conditional_stack
[conditional_stack
.length
- 1].matched
1737 && (conditional_stack
.length
== 1 || !conditional_stack
[conditional_stack
.length
- 2].skip_section
)) {
1738 // condition true => process code within if
1739 conditional_stack
[conditional_stack
.length
- 1].matched
= true;
1740 conditional_stack
[conditional_stack
.length
- 1].skip_section
= false;
1742 // skip lines until next preprocessing directive
1743 conditional_stack
[conditional_stack
.length
- 1].skip_section
= true;
1747 void parse_pp_else () {
1750 if (conditional_stack
.length
== 0 || conditional_stack
[conditional_stack
.length
- 1].else_found
) {
1751 Report
.error (get_source_reference (0), "syntax error, unexpected #else");
1755 if (!conditional_stack
[conditional_stack
.length
- 1].matched
1756 && (conditional_stack
.length
== 1 || !conditional_stack
[conditional_stack
.length
- 2].skip_section
)) {
1757 // condition true => process code within if
1758 conditional_stack
[conditional_stack
.length
- 1].matched
= true;
1759 conditional_stack
[conditional_stack
.length
- 1].skip_section
= false;
1761 // skip lines until next preprocessing directive
1762 conditional_stack
[conditional_stack
.length
- 1].skip_section
= true;
1766 void parse_pp_endif () {
1769 if (conditional_stack
.length
== 0) {
1770 Report
.error (get_source_reference (0), "syntax error, unexpected #endif");
1774 conditional_stack
.length
--;
1777 bool parse_pp_symbol () {
1779 while (current
< end
&& is_ident_char (current
[0])) {
1786 Report
.error (get_source_reference (0), "syntax error, expected identifier");
1790 string identifier
= ((string) (current
- len
)).substring (0, len
);
1792 if (identifier
== "true") {
1794 } else if (identifier
== "false") {
1797 defined
= source_file
.context
.is_defined (identifier
);
1803 bool parse_pp_primary_expression () {
1804 if (current
>= end
) {
1805 Report
.error (get_source_reference (0), "syntax error, expected identifier");
1806 } else if (is_ident_char (current
[0])) {
1807 return parse_pp_symbol ();
1808 } else if (current
[0] == '(') {
1812 bool result
= parse_pp_expression ();
1814 if (current
< end
&& current
[0] == ')') {
1818 Report
.error (get_source_reference (0), "syntax error, expected `)'");
1822 Report
.error (get_source_reference (0), "syntax error, expected identifier");
1827 bool parse_pp_unary_expression () {
1828 if (current
< end
&& current
[0] == '!') {
1832 return !parse_pp_unary_expression ();
1835 return parse_pp_primary_expression ();
1838 bool parse_pp_equality_expression () {
1839 bool left
= parse_pp_unary_expression ();
1842 if (current
< end
- 1 && current
[0] == '=' && current
[1] == '=') {
1846 bool right
= parse_pp_unary_expression ();
1847 left
= (left
== right
);
1848 } else if (current
< end
- 1 && current
[0] == '!' && current
[1] == '=') {
1852 bool right
= parse_pp_unary_expression ();
1853 left
= (left
!= right
);
1861 bool parse_pp_and_expression () {
1862 bool left
= parse_pp_equality_expression ();
1864 while (current
< end
- 1 && current
[0] == '&' && current
[1] == '&') {
1868 bool right
= parse_pp_equality_expression ();
1869 left
= left
&& right
;
1874 bool parse_pp_or_expression () {
1875 bool left
= parse_pp_and_expression ();
1877 while (current
< end
- 1 && current
[0] == '|' && current
[1] == '|') {
1881 bool right
= parse_pp_and_expression ();
1882 left
= left
|| right
;
1887 bool parse_pp_expression () {
1888 return parse_pp_or_expression ();