2 // cs-tokenizer.cs: The Tokenizer for the C# compiler
3 // This also implements the preprocessor
5 // Author: Miguel de Icaza (miguel@gnu.org)
6 // Marek Safar (marek.safar@seznam.cz)
8 // Dual licensed under the terms of the MIT X11 or GNU GPL
10 // Copyright 2001, 2002 Ximian, Inc (http://www.ximian.com)
11 // Copyright 2004-2008 Novell, Inc
17 using System
.Collections
;
19 using System
.Globalization
;
20 using System
.Reflection
;
25 /// Tokenizer for C# source code.
28 public class Tokenizer
: yyParser
.yyInput
32 public readonly int Token
;
33 public KeywordEntry Next
;
34 public readonly char[] Value
;
36 public KeywordEntry (string value, int token
)
38 this.Value
= value.ToCharArray ();
43 SeekableStreamReader reader
;
45 CompilationUnit file_name
;
46 CompilerContext context
;
53 bool handle_get_set
= false;
54 bool handle_remove_add
= false;
55 bool handle_where
= false;
56 bool handle_typeof
= false;
57 bool lambda_arguments_parsing
;
58 Location current_comment_location
= Location
.Null
;
59 ArrayList escaped_identifiers
;
60 int parsing_generic_less_than
;
63 // Used mainly for parser optimizations. Some expressions for instance
64 // can appear only in block (including initializer, base initializer)
67 public int parsing_block
;
68 internal bool query_parsing
;
71 // When parsing type only, useful for ambiguous nullable types
73 public int parsing_type
;
76 // Set when parsing generic declaration (type or method header)
78 public bool parsing_generic_declaration
;
81 // The value indicates that we have not reach any declaration or
84 public int parsing_declaration
;
87 // The special character to inject on streams to trigger the EXPRESSION_PARSE
88 // token to be returned. It just happens to be a Unicode character that
89 // would never be part of a program (can not be an identifier).
91 // This character is only tested just before the tokenizer is about to report
92 // an error; So on the regular operation mode, this addition will have no
93 // impact on the tokenizer's performance.
96 public const int EvalStatementParserCharacter
= 0x2190; // Unicode Left Arrow
97 public const int EvalCompilationUnitParserCharacter
= 0x2191; // Unicode Arrow
98 public const int EvalUsingDeclarationsParserCharacter
= 0x2192; // Unicode Arrow
101 // XML documentation buffer. The save point is used to divide
102 // comments on types and comments on members.
104 StringBuilder xml_comment_buffer
;
107 // See comment on XmlCommentState enumeration.
109 XmlCommentState xml_doc_state
= XmlCommentState
.Allowed
;
112 // Whether tokens have been seen on this line
114 bool tokens_seen
= false;
117 // Set to true once the GENERATE_COMPLETION token has bee
118 // returned. This helps produce one GENERATE_COMPLETION,
119 // as many COMPLETE_COMPLETION as necessary to complete the
120 // AST tree and one final EOF.
125 // Whether a token has been seen on the file
126 // This is needed because `define' is not allowed to be used
127 // after a token has been seen.
129 bool any_token_seen
= false;
131 static readonly char[] simple_whitespaces
= new char[] { ' ', '\t' }
;
133 public bool PropertyParsing
{
134 get { return handle_get_set; }
135 set { handle_get_set = value; }
138 public bool EventParsing
{
139 get { return handle_remove_add; }
140 set { handle_remove_add = value; }
143 public bool ConstraintsParsing
{
144 get { return handle_where; }
145 set { handle_where = value; }
148 public bool TypeOfParsing
{
149 get { return handle_typeof; }
150 set { handle_typeof = value; }
153 public XmlCommentState doc_state
{
154 get { return xml_doc_state; }
156 if (value == XmlCommentState
.Allowed
) {
157 check_incorrect_doc_comment ();
158 reset_doc_comment ();
160 xml_doc_state
= value;
165 // This is used to trigger completion generation on the parser
166 public bool CompleteOnEOF
;
168 void AddEscapedIdentifier (LocatedToken lt
)
170 if (escaped_identifiers
== null)
171 escaped_identifiers
= new ArrayList ();
173 escaped_identifiers
.Add (lt
);
176 public bool IsEscapedIdentifier (Location loc
)
178 if (escaped_identifiers
!= null) {
179 foreach (LocatedToken lt
in escaped_identifiers
)
180 if (lt
.Location
.Equals (loc
))
190 static KeywordEntry
[][] keywords
;
191 static Hashtable keyword_strings
;
192 static NumberStyles styles
;
193 static NumberFormatInfo csharp_format_info
;
196 // Values for the associated token returned
198 internal int putback_char
; // Used by repl only
204 const int TAKING
= 1;
205 const int ELSE_SEEN
= 4;
206 const int PARENT_TAKING
= 8;
207 const int REGION
= 16;
210 // pre-processor if stack state:
214 static System
.Text
.StringBuilder string_builder
;
216 const int max_id_size
= 512;
217 static char [] id_builder
= new char [max_id_size
];
219 static CharArrayHashtable
[] identifiers
= new CharArrayHashtable
[max_id_size
+ 1];
221 const int max_number_size
= 512;
222 static char [] number_builder
= new char [max_number_size
];
223 static int number_pos
;
226 // Details about the error encoutered by the tokenizer
228 string error_details
;
230 public string error
{
232 return error_details
;
243 // This is used when the tokenizer needs to save
244 // the current position as it needs to do some parsing
245 // on its own to deamiguate a token in behalf of the
248 Stack position_stack
= new Stack (2);
255 public int putback_char
;
256 public int previous_col
;
257 public Stack ifstack
;
258 public int parsing_generic_less_than
;
259 public int current_token
;
261 public Position (Tokenizer t
)
263 position
= t
.reader
.Position
;
265 ref_line
= t
.ref_line
;
268 putback_char
= t
.putback_char
;
269 previous_col
= t
.previous_col
;
270 if (t
.ifstack
!= null && t
.ifstack
.Count
!= 0)
271 ifstack
= (Stack
)t
.ifstack
.Clone ();
272 parsing_generic_less_than
= t
.parsing_generic_less_than
;
273 current_token
= t
.current_token
;
277 public void PushPosition ()
279 position_stack
.Push (new Position (this));
282 public void PopPosition ()
284 Position p
= (Position
) position_stack
.Pop ();
286 reader
.Position
= p
.position
;
287 ref_line
= p
.ref_line
;
291 putback_char
= p
.putback_char
;
292 previous_col
= p
.previous_col
;
294 parsing_generic_less_than
= p
.parsing_generic_less_than
;
295 current_token
= p
.current_token
;
298 // Do not reset the position, ignore it.
299 public void DiscardPosition ()
301 position_stack
.Pop ();
304 static void AddKeyword (string kw
, int token
)
306 keyword_strings
.Add (kw
, kw
);
308 int length
= kw
.Length
;
309 if (keywords
[length
] == null) {
310 keywords
[length
] = new KeywordEntry
['z' - '_' + 1];
313 int char_index
= kw
[0] - '_';
314 KeywordEntry kwe
= keywords
[length
] [char_index
];
316 keywords
[length
] [char_index
] = new KeywordEntry (kw
, token
);
320 while (kwe
.Next
!= null) {
324 kwe
.Next
= new KeywordEntry (kw
, token
);
327 static void InitTokens ()
329 keyword_strings
= new Hashtable ();
331 // 11 is the length of the longest keyword for now
332 keywords
= new KeywordEntry
[11] [];
334 AddKeyword ("__arglist", Token
.ARGLIST
);
335 AddKeyword ("abstract", Token
.ABSTRACT
);
336 AddKeyword ("as", Token
.AS
);
337 AddKeyword ("add", Token
.ADD
);
338 AddKeyword ("base", Token
.BASE
);
339 AddKeyword ("bool", Token
.BOOL
);
340 AddKeyword ("break", Token
.BREAK
);
341 AddKeyword ("byte", Token
.BYTE
);
342 AddKeyword ("case", Token
.CASE
);
343 AddKeyword ("catch", Token
.CATCH
);
344 AddKeyword ("char", Token
.CHAR
);
345 AddKeyword ("checked", Token
.CHECKED
);
346 AddKeyword ("class", Token
.CLASS
);
347 AddKeyword ("const", Token
.CONST
);
348 AddKeyword ("continue", Token
.CONTINUE
);
349 AddKeyword ("decimal", Token
.DECIMAL
);
350 AddKeyword ("default", Token
.DEFAULT
);
351 AddKeyword ("delegate", Token
.DELEGATE
);
352 AddKeyword ("do", Token
.DO
);
353 AddKeyword ("double", Token
.DOUBLE
);
354 AddKeyword ("else", Token
.ELSE
);
355 AddKeyword ("enum", Token
.ENUM
);
356 AddKeyword ("event", Token
.EVENT
);
357 AddKeyword ("explicit", Token
.EXPLICIT
);
358 AddKeyword ("extern", Token
.EXTERN
);
359 AddKeyword ("false", Token
.FALSE
);
360 AddKeyword ("finally", Token
.FINALLY
);
361 AddKeyword ("fixed", Token
.FIXED
);
362 AddKeyword ("float", Token
.FLOAT
);
363 AddKeyword ("for", Token
.FOR
);
364 AddKeyword ("foreach", Token
.FOREACH
);
365 AddKeyword ("goto", Token
.GOTO
);
366 AddKeyword ("get", Token
.GET
);
367 AddKeyword ("if", Token
.IF
);
368 AddKeyword ("implicit", Token
.IMPLICIT
);
369 AddKeyword ("in", Token
.IN
);
370 AddKeyword ("int", Token
.INT
);
371 AddKeyword ("interface", Token
.INTERFACE
);
372 AddKeyword ("internal", Token
.INTERNAL
);
373 AddKeyword ("is", Token
.IS
);
374 AddKeyword ("lock", Token
.LOCK
);
375 AddKeyword ("long", Token
.LONG
);
376 AddKeyword ("namespace", Token
.NAMESPACE
);
377 AddKeyword ("new", Token
.NEW
);
378 AddKeyword ("null", Token
.NULL
);
379 AddKeyword ("object", Token
.OBJECT
);
380 AddKeyword ("operator", Token
.OPERATOR
);
381 AddKeyword ("out", Token
.OUT
);
382 AddKeyword ("override", Token
.OVERRIDE
);
383 AddKeyword ("params", Token
.PARAMS
);
384 AddKeyword ("private", Token
.PRIVATE
);
385 AddKeyword ("protected", Token
.PROTECTED
);
386 AddKeyword ("public", Token
.PUBLIC
);
387 AddKeyword ("readonly", Token
.READONLY
);
388 AddKeyword ("ref", Token
.REF
);
389 AddKeyword ("remove", Token
.REMOVE
);
390 AddKeyword ("return", Token
.RETURN
);
391 AddKeyword ("sbyte", Token
.SBYTE
);
392 AddKeyword ("sealed", Token
.SEALED
);
393 AddKeyword ("set", Token
.SET
);
394 AddKeyword ("short", Token
.SHORT
);
395 AddKeyword ("sizeof", Token
.SIZEOF
);
396 AddKeyword ("stackalloc", Token
.STACKALLOC
);
397 AddKeyword ("static", Token
.STATIC
);
398 AddKeyword ("string", Token
.STRING
);
399 AddKeyword ("struct", Token
.STRUCT
);
400 AddKeyword ("switch", Token
.SWITCH
);
401 AddKeyword ("this", Token
.THIS
);
402 AddKeyword ("throw", Token
.THROW
);
403 AddKeyword ("true", Token
.TRUE
);
404 AddKeyword ("try", Token
.TRY
);
405 AddKeyword ("typeof", Token
.TYPEOF
);
406 AddKeyword ("uint", Token
.UINT
);
407 AddKeyword ("ulong", Token
.ULONG
);
408 AddKeyword ("unchecked", Token
.UNCHECKED
);
409 AddKeyword ("unsafe", Token
.UNSAFE
);
410 AddKeyword ("ushort", Token
.USHORT
);
411 AddKeyword ("using", Token
.USING
);
412 AddKeyword ("virtual", Token
.VIRTUAL
);
413 AddKeyword ("void", Token
.VOID
);
414 AddKeyword ("volatile", Token
.VOLATILE
);
415 AddKeyword ("while", Token
.WHILE
);
416 AddKeyword ("partial", Token
.PARTIAL
);
417 AddKeyword ("where", Token
.WHERE
);
420 AddKeyword ("from", Token
.FROM
);
421 AddKeyword ("join", Token
.JOIN
);
422 AddKeyword ("on", Token
.ON
);
423 AddKeyword ("equals", Token
.EQUALS
);
424 AddKeyword ("select", Token
.SELECT
);
425 AddKeyword ("group", Token
.GROUP
);
426 AddKeyword ("by", Token
.BY
);
427 AddKeyword ("let", Token
.LET
);
428 AddKeyword ("orderby", Token
.ORDERBY
);
429 AddKeyword ("ascending", Token
.ASCENDING
);
430 AddKeyword ("descending", Token
.DESCENDING
);
431 AddKeyword ("into", Token
.INTO
);
440 csharp_format_info
= NumberFormatInfo
.InvariantInfo
;
441 styles
= NumberStyles
.Float
;
443 string_builder
= new System
.Text
.StringBuilder ();
446 int GetKeyword (char[] id
, int id_len
)
449 // Keywords are stored in an array of arrays grouped by their
450 // length and then by the first character
452 if (id_len
>= keywords
.Length
|| keywords
[id_len
] == null)
455 int first_index
= id
[0] - '_';
456 if (first_index
> 'z')
459 KeywordEntry kwe
= keywords
[id_len
] [first_index
];
466 for (int i
= 1; i
< id_len
; ++i
) {
467 if (id
[i
] != kwe
.Value
[i
]) {
473 } while (kwe
!= null && res
== 0);
487 if (!handle_remove_add
)
491 if (parsing_declaration
== 0)
492 res
= Token
.EXTERN_ALIAS
;
495 if (peek_token () == Token
.COLON
) {
497 res
= Token
.DEFAULT_COLON
;
501 if (!handle_where
&& !query_parsing
)
506 // A query expression is any expression that starts with `from identifier'
507 // followed by any token except ; , =
509 if (!query_parsing
) {
510 if (lambda_arguments_parsing
) {
516 // HACK: to disable generics micro-parser, because PushPosition does not
517 // store identifiers array
518 parsing_generic_less_than
= 1;
520 case Token
.IDENTIFIER
:
532 next_token
= xtoken ();
533 if (next_token
== Token
.SEMICOLON
|| next_token
== Token
.COMMA
|| next_token
== Token
.EQUALS
)
536 res
= Token
.FROM_FIRST
;
537 query_parsing
= true;
538 if (RootContext
.Version
<= LanguageVersion
.ISO_2
)
539 Report
.FeatureIsNotAvailable (Location
, "query expressions");
542 Expression
.Error_VoidInvalidInTheContext (Location
, Report
);
546 // HACK: A token is not a keyword so we need to restore identifiers buffer
547 // which has been overwritten before we grabbed the identifier
548 id_builder
[0] = 'f'; id_builder
[1] = 'r'; id_builder
[2] = 'o'; id_builder
[3] = 'm';
562 case Token
.ASCENDING
:
563 case Token
.DESCENDING
:
570 case Token
.NAMESPACE
:
571 // TODO: some explanation needed
572 check_incorrect_doc_comment ();
576 if (parsing_block
> 0) {
581 // Save current position and parse next token.
584 next_token
= token ();
585 bool ok
= (next_token
== Token
.CLASS
) ||
586 (next_token
== Token
.STRUCT
) ||
587 (next_token
== Token
.INTERFACE
) ||
588 (next_token
== Token
.VOID
);
593 if (next_token
== Token
.VOID
) {
594 if (RootContext
.Version
== LanguageVersion
.ISO_1
||
595 RootContext
.Version
== LanguageVersion
.ISO_2
)
596 Report
.FeatureIsNotAvailable (Location
, "partial methods");
597 } else if (RootContext
.Version
== LanguageVersion
.ISO_1
)
598 Report
.FeatureIsNotAvailable (Location
, "partial types");
603 if (next_token
< Token
.LAST_KEYWORD
) {
604 Report
.Error (267, Location
,
605 "The `partial' modifier can be used only immediately before `class', `struct', `interface', or `void' keyword");
616 public Location Location
{
618 return new Location (ref_line
, hidden
? -1 : col
);
622 public Tokenizer (SeekableStreamReader input
, CompilationUnit file
, CompilerContext ctx
)
624 this.ref_name
= file
;
625 this.file_name
= file
;
631 xml_comment_buffer
= new StringBuilder ();
634 // FIXME: This could be `Location.Push' but we have to
635 // find out why the MS compiler allows this
637 Mono
.CSharp
.Location
.Push (file
, file
);
640 static bool is_identifier_start_character (int c
)
642 return (c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z') || c
== '_' || Char
.IsLetter ((char)c
);
645 static bool is_identifier_part_character (char c
)
647 if (c
>= 'a' && c
<= 'z')
650 if (c
>= 'A' && c
<= 'Z')
653 if (c
== '_' || (c
>= '0' && c
<= '9'))
659 return Char
.IsLetter (c
) || Char
.GetUnicodeCategory (c
) == UnicodeCategory
.ConnectorPunctuation
;
662 public static bool IsKeyword (string s
)
664 return keyword_strings
[s
] != null;
668 // Open parens micro parser. Detects both lambda and cast ambiguity.
671 int TokenizeOpenParens ()
676 int bracket_level
= 0;
677 bool is_type
= false;
678 bool can_be_type
= false;
681 ptoken
= current_token
;
684 switch (current_token
) {
685 case Token
.CLOSE_PARENS
:
689 // Expression inside parens is lambda, (int i) =>
691 if (current_token
== Token
.ARROW
) {
692 if (RootContext
.Version
<= LanguageVersion
.ISO_2
)
693 Report
.FeatureIsNotAvailable (Location
, "lambda expressions");
695 return Token
.OPEN_PARENS_LAMBDA
;
699 // Expression inside parens is single type, (int[])
702 return Token
.OPEN_PARENS_CAST
;
705 // Expression is possible cast, look at next token, (T)null
708 switch (current_token
) {
709 case Token
.OPEN_PARENS
:
712 case Token
.IDENTIFIER
:
713 case Token
.LITERAL_INTEGER
:
714 case Token
.LITERAL_FLOAT
:
715 case Token
.LITERAL_DOUBLE
:
716 case Token
.LITERAL_DECIMAL
:
717 case Token
.LITERAL_CHARACTER
:
718 case Token
.LITERAL_STRING
:
731 case Token
.UNCHECKED
:
736 // These can be part of a member access
750 return Token
.OPEN_PARENS_CAST
;
753 return Token
.OPEN_PARENS
;
756 case Token
.DOUBLE_COLON
:
757 if (ptoken
!= Token
.IDENTIFIER
&& ptoken
!= Token
.OP_GENERICS_GT
)
762 case Token
.IDENTIFIER
:
765 case Token
.OP_GENERICS_LT
:
767 case Token
.DOUBLE_COLON
:
769 if (bracket_level
== 0)
773 can_be_type
= is_type
= false;
793 if (bracket_level
== 0)
798 if (bracket_level
== 0) {
800 can_be_type
= is_type
= false;
804 case Token
.OP_GENERICS_LT
:
805 case Token
.OPEN_BRACKET
:
806 if (bracket_level
++ == 0)
810 case Token
.OP_GENERICS_GT
:
811 case Token
.CLOSE_BRACKET
:
815 case Token
.INTERR_NULLABLE
:
817 if (bracket_level
== 0)
823 can_be_type
= is_type
= false;
827 return Token
.OPEN_PARENS
;
832 public static bool IsValidIdentifier (string s
)
834 if (s
== null || s
.Length
== 0)
837 if (!is_identifier_start_character (s
[0]))
840 for (int i
= 1; i
< s
.Length
; i
++)
841 if (! is_identifier_part_character (s
[i
]))
847 bool parse_less_than ()
850 int the_token
= token ();
851 if (the_token
== Token
.OPEN_BRACKET
) {
853 the_token
= token ();
854 } while (the_token
!= Token
.CLOSE_BRACKET
);
855 the_token
= token ();
856 } else if (the_token
== Token
.IN
|| the_token
== Token
.OUT
) {
857 the_token
= token ();
860 case Token
.IDENTIFIER
:
878 case Token
.OP_GENERICS_GT
:
885 the_token
= token ();
887 if (the_token
== Token
.OP_GENERICS_GT
)
889 else if (the_token
== Token
.COMMA
|| the_token
== Token
.DOT
|| the_token
== Token
.DOUBLE_COLON
)
891 else if (the_token
== Token
.INTERR_NULLABLE
|| the_token
== Token
.STAR
)
893 else if (the_token
== Token
.OP_GENERICS_LT
) {
894 if (!parse_less_than ())
897 } else if (the_token
== Token
.OPEN_BRACKET
) {
899 the_token
= token ();
900 if (the_token
== Token
.CLOSE_BRACKET
)
902 else if (the_token
== Token
.COMMA
)
903 goto rank_specifiers
;
910 bool parse_generic_dimension (out int dimension
)
915 int the_token
= token ();
916 if (the_token
== Token
.OP_GENERICS_GT
)
918 else if (the_token
== Token
.COMMA
) {
926 public int peek_token ()
931 the_token
= token ();
938 // Tonizes `?' using custom disambiguous rules to return one
939 // of following tokens: INTERR_NULLABLE, OP_COALESCING, INTERR
941 // Tricky expression look like:
943 // Foo ? a = x ? b : c;
945 int TokenizePossibleNullableType ()
947 if (parsing_block
== 0 || parsing_type
> 0)
948 return Token
.INTERR_NULLABLE
;
950 int d
= peek_char ();
953 return Token
.OP_COALESCING
;
956 switch (current_token
) {
957 case Token
.CLOSE_PARENS
:
961 case Token
.LITERAL_INTEGER
:
962 case Token
.LITERAL_STRING
:
967 if (d
== ',' || d
== ';' || d
== '>')
968 return Token
.INTERR_NULLABLE
;
969 if (d
== '*' || (d
>= '0' && d
<= '9'))
974 current_token
= Token
.NONE
;
977 case Token
.LITERAL_INTEGER
:
978 case Token
.LITERAL_STRING
:
979 case Token
.LITERAL_CHARACTER
:
980 case Token
.LITERAL_DECIMAL
:
981 case Token
.LITERAL_DOUBLE
:
982 case Token
.LITERAL_FLOAT
:
988 next_token
= Token
.INTERR
;
991 case Token
.SEMICOLON
:
993 case Token
.CLOSE_PARENS
:
994 case Token
.OPEN_BRACKET
:
995 case Token
.OP_GENERICS_GT
:
996 next_token
= Token
.INTERR_NULLABLE
;
1004 if (next_token
== -1) {
1005 switch (xtoken ()) {
1007 case Token
.SEMICOLON
:
1008 case Token
.OPEN_BRACE
:
1009 case Token
.CLOSE_PARENS
:
1011 next_token
= Token
.INTERR_NULLABLE
;
1015 next_token
= Token
.INTERR
;
1023 // All shorcuts failed, do it hard way
1025 while ((ntoken
= xtoken ()) != Token
.EOF
) {
1026 if (ntoken
== Token
.SEMICOLON
)
1029 if (ntoken
== Token
.COLON
) {
1030 if (++colons
== interrs
)
1035 if (ntoken
== Token
.INTERR
) {
1041 next_token
= colons
!= interrs
? Token
.INTERR_NULLABLE
: Token
.INTERR
;
1050 bool decimal_digits (int c
)
1053 bool seen_digits
= false;
1056 if (number_pos
== max_number_size
)
1057 Error_NumericConstantTooLong ();
1058 number_builder
[number_pos
++] = (char) c
;
1062 // We use peek_char2, because decimal_digits needs to do a
1063 // 2-character look-ahead (5.ToString for example).
1065 while ((d
= peek_char2 ()) != -1){
1066 if (d
>= '0' && d
<= '9'){
1067 if (number_pos
== max_number_size
)
1068 Error_NumericConstantTooLong ();
1069 number_builder
[number_pos
++] = (char) d
;
1079 static bool is_hex (int e
)
1081 return (e
>= '0' && e
<= '9') || (e
>= 'A' && e
<= 'F') || (e
>= 'a' && e
<= 'f');
1084 static int real_type_suffix (int c
)
1090 t
= Token
.LITERAL_FLOAT
;
1093 t
= Token
.LITERAL_DOUBLE
;
1096 t
= Token
.LITERAL_DECIMAL
;
1104 int integer_type_suffix (ulong ul
, int c
)
1106 bool is_unsigned
= false;
1107 bool is_long
= false;
1110 bool scanning
= true;
1123 // if we have not seen anything in between
1124 // report this error
1126 Report
.Warning (78, 4, Location
, "The 'l' suffix is easily confused with the digit '1' (use 'L' for clarity)");
1129 // This goto statement causes the MS CLR 2.0 beta 1 csc to report an error, so
1130 // work around that.
1154 if (is_long
&& is_unsigned
){
1156 return Token
.LITERAL_INTEGER
;
1157 } else if (is_unsigned
){
1158 // uint if possible, or ulong else.
1160 if ((ul
& 0xffffffff00000000) == 0)
1164 } else if (is_long
){
1165 // long if possible, ulong otherwise
1166 if ((ul
& 0x8000000000000000) != 0)
1171 // int, uint, long or ulong in that order
1172 if ((ul
& 0xffffffff00000000) == 0){
1173 uint ui
= (uint) ul
;
1175 if ((ui
& 0x80000000) != 0)
1180 if ((ul
& 0x8000000000000000) != 0)
1186 return Token
.LITERAL_INTEGER
;
1190 // given `c' as the next char in the input decide whether
1191 // we need to convert to a special type, and then choose
1192 // the best representation for the integer
1194 int adjust_int (int c
)
1197 if (number_pos
> 9){
1198 ulong ul
= (uint) (number_builder
[0] - '0');
1200 for (int i
= 1; i
< number_pos
; i
++){
1201 ul
= checked ((ul
* 10) + ((uint)(number_builder
[i
] - '0')));
1203 return integer_type_suffix (ul
, c
);
1205 uint ui
= (uint) (number_builder
[0] - '0');
1207 for (int i
= 1; i
< number_pos
; i
++){
1208 ui
= checked ((ui
* 10) + ((uint)(number_builder
[i
] - '0')));
1210 return integer_type_suffix (ui
, c
);
1212 } catch (OverflowException
) {
1213 error_details
= "Integral constant is too large";
1214 Report
.Error (1021, Location
, error_details
);
1216 return Token
.LITERAL_INTEGER
;
1218 catch (FormatException
) {
1219 Report
.Error (1013, Location
, "Invalid number");
1221 return Token
.LITERAL_INTEGER
;
1225 int adjust_real (int t
)
1227 string s
= new String (number_builder
, 0, number_pos
);
1228 const string error_details
= "Floating-point constant is outside the range of type `{0}'";
1231 case Token
.LITERAL_DECIMAL
:
1233 val
= System
.Decimal
.Parse (s
, styles
, csharp_format_info
);
1234 } catch (OverflowException
) {
1236 Report
.Error (594, Location
, error_details
, "decimal");
1239 case Token
.LITERAL_FLOAT
:
1241 val
= float.Parse (s
, styles
, csharp_format_info
);
1242 } catch (OverflowException
) {
1244 Report
.Error (594, Location
, error_details
, "float");
1248 case Token
.LITERAL_DOUBLE
:
1250 t
= Token
.LITERAL_DOUBLE
;
1252 val
= System
.Double
.Parse (s
, styles
, csharp_format_info
);
1253 } catch (OverflowException
) {
1255 Report
.Error (594, Location
, error_details
, "double");
1268 while ((d
= peek_char ()) != -1){
1270 number_builder
[number_pos
++] = (char) d
;
1276 string s
= new String (number_builder
, 0, number_pos
);
1278 if (number_pos
<= 8)
1279 ul
= System
.UInt32
.Parse (s
, NumberStyles
.HexNumber
);
1281 ul
= System
.UInt64
.Parse (s
, NumberStyles
.HexNumber
);
1282 } catch (OverflowException
){
1283 error_details
= "Integral constant is too large";
1284 Report
.Error (1021, Location
, error_details
);
1286 return Token
.LITERAL_INTEGER
;
1288 catch (FormatException
) {
1289 Report
.Error (1013, Location
, "Invalid number");
1291 return Token
.LITERAL_INTEGER
;
1294 return integer_type_suffix (ul
, peek_char ());
1298 // Invoked if we know we have .digits or digits
1300 int is_number (int c
)
1302 bool is_real
= false;
1307 if (c
>= '0' && c
<= '9'){
1309 int peek
= peek_char ();
1311 if (peek
== 'x' || peek
== 'X')
1312 return handle_hex ();
1319 // We need to handle the case of
1320 // "1.1" vs "1.string" (LITERAL_FLOAT vs NUMBER DOT IDENTIFIER)
1323 if (decimal_digits ('.')){
1329 return adjust_int (-1);
1333 if (c
== 'e' || c
== 'E'){
1335 if (number_pos
== max_number_size
)
1336 Error_NumericConstantTooLong ();
1337 number_builder
[number_pos
++] = 'e';
1341 if (number_pos
== max_number_size
)
1342 Error_NumericConstantTooLong ();
1343 number_builder
[number_pos
++] = '+';
1345 } else if (c
== '-') {
1346 if (number_pos
== max_number_size
)
1347 Error_NumericConstantTooLong ();
1348 number_builder
[number_pos
++] = '-';
1351 if (number_pos
== max_number_size
)
1352 Error_NumericConstantTooLong ();
1353 number_builder
[number_pos
++] = '+';
1360 type
= real_type_suffix (c
);
1361 if (type
== Token
.NONE
&& !is_real
){
1363 return adjust_int (c
);
1367 if (type
== Token
.NONE
){
1372 return adjust_real (type
);
1374 Console
.WriteLine ("This should not be reached");
1375 throw new Exception ("Is Number should never reach this point");
1379 // Accepts exactly count (4 or 8) hex, no more no less
1381 int getHex (int count
, out int surrogate
, out bool error
)
1386 int top
= count
!= -1 ? count
: 4;
1391 for (i
= 0; i
< top
; i
++){
1394 if (c
>= '0' && c
<= '9')
1395 c
= (int) c
- (int) '0';
1396 else if (c
>= 'A' && c
<= 'F')
1397 c
= (int) c
- (int) 'A' + 10;
1398 else if (c
>= 'a' && c
<= 'f')
1399 c
= (int) c
- (int) 'a' + 10;
1405 total
= (total
* 16) + c
;
1407 int p
= peek_char ();
1410 if (!is_hex ((char)p
))
1416 if (total
> 0x0010FFFF) {
1421 if (total
>= 0x00010000) {
1422 surrogate
= ((total
- 0x00010000) % 0x0400 + 0xDC00);
1423 total
= ((total
- 0x00010000) / 0x0400 + 0xD800);
1430 int escape (int c
, out int surrogate
)
1466 v
= getHex (-1, out surrogate
, out error
);
1472 return EscapeUnicode (d
, out surrogate
);
1475 Report
.Error (1009, Location
, "Unrecognized escape sequence `\\{0}'", ((char)d
).ToString ());
1484 int EscapeUnicode (int ch
, out int surrogate
)
1488 ch
= getHex (8, out surrogate
, out error
);
1490 ch
= getHex (4, out surrogate
, out error
);
1494 Report
.Error (1009, Location
, "Unrecognized escape sequence");
1502 if (putback_char
!= -1) {
1515 void advance_line ()
1525 if (putback_char
== -1)
1526 putback_char
= reader
.Read ();
1527 return putback_char
;
1532 if (putback_char
!= -1)
1533 return putback_char
;
1534 return reader
.Peek ();
1537 void putback (int c
)
1539 if (putback_char
!= -1){
1540 Console
.WriteLine ("Col: " + col
);
1541 Console
.WriteLine ("Row: " + line
);
1542 Console
.WriteLine ("Name: " + ref_name
.Name
);
1543 Console
.WriteLine ("Current [{0}] putting back [{1}] ", putback_char
, c
);
1544 throw new Exception ("This should not happen putback on putback");
1546 if (c
== '\n' || col
== 0) {
1547 // It won't happen though.
1557 public bool advance ()
1559 return peek_char () != -1 || CompleteOnEOF
;
1562 public Object Value
{
1568 public Object
value ()
1575 current_token
= xtoken ();
1576 return current_token
;
1579 static StringBuilder static_cmd_arg
= new System
.Text
.StringBuilder ();
1581 void get_cmd_arg (out string cmd
, out string arg
)
1585 tokens_seen
= false;
1588 // skip over white space
1591 } while (c
== '\r' || c
== ' ' || c
== '\t');
1593 static_cmd_arg
.Length
= 0;
1594 while (c
!= -1 && is_identifier_part_character ((char)c
)) {
1595 static_cmd_arg
.Append ((char)c
);
1598 int peek
= peek_char ();
1599 if (peek
== 'U' || peek
== 'u') {
1601 c
= EscapeUnicode (c
, out surrogate
);
1602 if (surrogate
!= 0) {
1603 if (is_identifier_part_character ((char) c
))
1604 static_cmd_arg
.Append ((char) c
);
1611 cmd
= static_cmd_arg
.ToString ();
1613 // skip over white space
1614 while (c
== '\r' || c
== ' ' || c
== '\t')
1617 static_cmd_arg
.Length
= 0;
1618 int has_identifier_argument
= 0;
1620 while (c
!= -1 && c
!= '\n' && c
!= '\r') {
1621 if (c
== '\\' && has_identifier_argument
>= 0) {
1622 if (has_identifier_argument
!= 0 || (cmd
== "define" || cmd
== "if" || cmd
== "elif" || cmd
== "undef")) {
1623 has_identifier_argument
= 1;
1625 int peek
= peek_char ();
1626 if (peek
== 'U' || peek
== 'u') {
1628 c
= EscapeUnicode (c
, out surrogate
);
1629 if (surrogate
!= 0) {
1630 if (is_identifier_part_character ((char) c
))
1631 static_cmd_arg
.Append ((char) c
);
1636 has_identifier_argument
= -1;
1639 static_cmd_arg
.Append ((char) c
);
1643 if (static_cmd_arg
.Length
!= 0)
1644 arg
= static_cmd_arg
.ToString ();
1648 // Handles the #line directive
1650 bool PreProcessLine (string arg
)
1652 if (arg
.Length
== 0)
1655 if (arg
== "default"){
1657 ref_name
= file_name
;
1659 Location
.Push (file_name
, ref_name
);
1661 } else if (arg
== "hidden"){
1669 if ((pos
= arg
.IndexOf (' ')) != -1 && pos
!= 0){
1670 ref_line
= System
.Int32
.Parse (arg
.Substring (0, pos
));
1673 char [] quotes
= { '\"' }
;
1675 string name
= arg
.Substring (pos
). Trim (quotes
);
1676 ref_name
= Location
.LookupFile (file_name
, name
);
1677 file_name
.AddFile (ref_name
);
1679 Location
.Push (file_name
, ref_name
);
1681 ref_line
= System
.Int32
.Parse (arg
);
1692 // Handles #define and #undef
1694 void PreProcessDefinition (bool is_define
, string ident
, bool caller_is_taking
)
1696 if (ident
.Length
== 0 || ident
== "true" || ident
== "false"){
1697 Report
.Error (1001, Location
, "Missing identifier to pre-processor directive");
1701 if (ident
.IndexOfAny (simple_whitespaces
) != -1){
1702 Error_EndLineExpected ();
1706 if (!is_identifier_start_character (ident
[0]))
1707 Report
.Error (1001, Location
, "Identifier expected: {0}", ident
);
1709 foreach (char c
in ident
.Substring (1)){
1710 if (!is_identifier_part_character (c
)){
1711 Report
.Error (1001, Location
, "Identifier expected: {0}", ident
);
1716 if (!caller_is_taking
)
1723 if (RootContext
.IsConditionalDefined (ident
))
1726 file_name
.AddDefine (ident
);
1731 file_name
.AddUndefine (ident
);
1735 static byte read_hex (string arg
, int pos
, out bool error
)
1742 if ((c
>= '0') && (c
<= '9'))
1743 total
= (int) c
- (int) '0';
1744 else if ((c
>= 'A') && (c
<= 'F'))
1745 total
= (int) c
- (int) 'A' + 10;
1746 else if ((c
>= 'a') && (c
<= 'f'))
1747 total
= (int) c
- (int) 'a' + 10;
1756 if ((c
>= '0') && (c
<= '9'))
1757 total
+= (int) c
- (int) '0';
1758 else if ((c
>= 'A') && (c
<= 'F'))
1759 total
+= (int) c
- (int) 'A' + 10;
1760 else if ((c
>= 'a') && (c
<= 'f'))
1761 total
+= (int) c
- (int) 'a' + 10;
1767 return (byte) total
;
1771 /// Handles #pragma checksum
1773 bool PreProcessPragmaChecksum (string arg
)
1775 if ((arg
[0] != ' ') && (arg
[0] != '\t'))
1778 arg
= arg
.Trim (simple_whitespaces
);
1779 if ((arg
.Length
< 2) || (arg
[0] != '"'))
1782 StringBuilder file_sb
= new StringBuilder ();
1786 while ((ch
= arg
[pos
++]) != '"') {
1787 if (pos
>= arg
.Length
)
1791 if (pos
+1 >= arg
.Length
)
1796 file_sb
.Append (ch
);
1799 if ((pos
+2 >= arg
.Length
) || ((arg
[pos
] != ' ') && (arg
[pos
] != '\t')))
1802 arg
= arg
.Substring (pos
).Trim (simple_whitespaces
);
1803 if ((arg
.Length
< 42) || (arg
[0] != '"') || (arg
[1] != '{') ||
1804 (arg
[10] != '-') || (arg
[15] != '-') || (arg
[20] != '-') ||
1805 (arg
[25] != '-') || (arg
[38] != '}') || (arg
[39] != '"'))
1809 byte[] guid_bytes
= new byte [16];
1811 for (int i
= 0; i
< 4; i
++) {
1812 guid_bytes
[i
] = read_hex (arg
, 2+2*i
, out error
);
1816 for (int i
= 0; i
< 2; i
++) {
1817 guid_bytes
[i
+4] = read_hex (arg
, 11+2*i
, out error
);
1820 guid_bytes
[i
+6] = read_hex (arg
, 16+2*i
, out error
);
1823 guid_bytes
[i
+8] = read_hex (arg
, 21+2*i
, out error
);
1828 for (int i
= 0; i
< 6; i
++) {
1829 guid_bytes
[i
+10] = read_hex (arg
, 26+2*i
, out error
);
1834 arg
= arg
.Substring (40).Trim (simple_whitespaces
);
1835 if ((arg
.Length
< 34) || (arg
[0] != '"') || (arg
[33] != '"'))
1838 byte[] checksum_bytes
= new byte [16];
1839 for (int i
= 0; i
< 16; i
++) {
1840 checksum_bytes
[i
] = read_hex (arg
, 1+2*i
, out error
);
1845 arg
= arg
.Substring (34).Trim (simple_whitespaces
);
1849 SourceFile file
= Location
.LookupFile (file_name
, file_sb
.ToString ());
1850 file
.SetChecksum (guid_bytes
, checksum_bytes
);
1851 ref_name
.AutoGenerated
= true;
1856 /// Handles #pragma directive
1858 void PreProcessPragma (string arg
)
1860 const string warning
= "warning";
1861 const string w_disable
= "warning disable";
1862 const string w_restore
= "warning restore";
1863 const string checksum
= "checksum";
1865 if (arg
== w_disable
) {
1866 Report
.RegisterWarningRegion (Location
).WarningDisable (Location
.Row
);
1870 if (arg
== w_restore
) {
1871 Report
.RegisterWarningRegion (Location
).WarningEnable (Location
.Row
);
1875 if (arg
.StartsWith (w_disable
)) {
1876 int[] codes
= ParseNumbers (arg
.Substring (w_disable
.Length
));
1877 foreach (int code
in codes
) {
1879 Report
.RegisterWarningRegion (Location
).WarningDisable (Location
, code
, Report
);
1884 if (arg
.StartsWith (w_restore
)) {
1885 int[] codes
= ParseNumbers (arg
.Substring (w_restore
.Length
));
1886 Hashtable w_table
= Report
.warning_ignore_table
;
1887 foreach (int code
in codes
) {
1888 if (w_table
!= null && w_table
.Contains (code
))
1889 Report
.Warning (1635, 1, Location
, "Cannot restore warning `CS{0:0000}' because it was disabled globally", code
);
1890 Report
.RegisterWarningRegion (Location
).WarningEnable (Location
, code
, Report
);
1895 if (arg
.StartsWith (warning
)) {
1896 Report
.Warning (1634, 1, Location
, "Expected disable or restore");
1900 if (arg
.StartsWith (checksum
)) {
1901 if (!PreProcessPragmaChecksum (arg
.Substring (checksum
.Length
)))
1902 Warning_InvalidPragmaChecksum ();
1906 Report
.Warning (1633, 1, Location
, "Unrecognized #pragma directive");
1909 int[] ParseNumbers (string text
)
1911 string[] string_array
= text
.Split (',');
1912 int[] values
= new int [string_array
.Length
];
1914 foreach (string string_code
in string_array
) {
1916 values
[index
++] = int.Parse (string_code
, System
.Globalization
.CultureInfo
.InvariantCulture
);
1918 catch (FormatException
) {
1919 Report
.Warning (1692, 1, Location
, "Invalid number");
1925 bool eval_val (string s
)
1932 return file_name
.IsConditionalDefined (s
);
1935 bool pp_primary (ref string s
)
1944 s
= s
.Substring (1);
1945 bool val
= pp_expr (ref s
, false);
1946 if (s
.Length
> 0 && s
[0] == ')'){
1947 s
= s
.Substring (1);
1950 Error_InvalidDirective ();
1954 if (is_identifier_start_character (c
)){
1960 if (is_identifier_part_character (c
)){
1964 bool v
= eval_val (s
.Substring (0, j
));
1965 s
= s
.Substring (j
);
1968 bool vv
= eval_val (s
);
1973 Error_InvalidDirective ();
1977 bool pp_unary (ref string s
)
1984 if (len
> 1 && s
[1] == '='){
1985 Error_InvalidDirective ();
1988 s
= s
.Substring (1);
1989 return ! pp_primary (ref s
);
1991 return pp_primary (ref s
);
1993 Error_InvalidDirective ();
1998 bool pp_eq (ref string s
)
2000 bool va
= pp_unary (ref s
);
2006 if (len
> 2 && s
[1] == '='){
2007 s
= s
.Substring (2);
2008 return va
== pp_unary (ref s
);
2010 Error_InvalidDirective ();
2013 } else if (s
[0] == '!' && len
> 1 && s
[1] == '='){
2014 s
= s
.Substring (2);
2016 return va
!= pp_unary (ref s
);
2025 bool pp_and (ref string s
)
2027 bool va
= pp_eq (ref s
);
2033 if (len
> 2 && s
[1] == '&'){
2034 s
= s
.Substring (2);
2035 return (va
& pp_and (ref s
));
2037 Error_InvalidDirective ();
2046 // Evaluates an expression for `#if' or `#elif'
2048 bool pp_expr (ref string s
, bool isTerm
)
2050 bool va
= pp_and (ref s
);
2057 if (len
> 2 && s
[1] == '|'){
2058 s
= s
.Substring (2);
2059 return va
| pp_expr (ref s
, isTerm
);
2061 Error_InvalidDirective ();
2066 Error_EndLineExpected ();
2074 bool eval (string s
)
2076 bool v
= pp_expr (ref s
, true);
2085 void Error_NumericConstantTooLong ()
2087 Report
.Error (1021, Location
, "Numeric constant too long");
2090 void Error_InvalidDirective ()
2092 Report
.Error (1517, Location
, "Invalid preprocessor directive");
2095 void Error_UnexpectedDirective (string extra
)
2099 "Unexpected processor directive ({0})", extra
);
2102 void Error_TokensSeen ()
2104 Report
.Error (1032, Location
,
2105 "Cannot define or undefine preprocessor symbols after first token in file");
2108 void Eror_WrongPreprocessorLocation ()
2110 Report
.Error (1040, Location
,
2111 "Preprocessor directives must appear as the first non-whitespace character on a line");
2114 void Error_EndLineExpected ()
2116 Report
.Error (1025, Location
, "Single-line comment or end-of-line expected");
2119 void Warning_InvalidPragmaChecksum ()
2121 Report
.Warning (1695, 1, Location
,
2122 "Invalid #pragma checksum syntax; should be " +
2123 "#pragma checksum \"filename\" " +
2124 "\"{XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX}\" \"XXXX...\"");
2127 // if true, then the code continues processing the code
2128 // if false, the code stays in a loop until another directive is
2130 // When caller_is_taking is false we ignore all directives except the ones
2131 // which can help us to identify where the #if block ends
2132 bool handle_preprocessing_directive (bool caller_is_taking
)
2135 bool region_directive
= false;
2137 get_cmd_arg (out cmd
, out arg
);
2139 // Eat any trailing whitespaces and single-line comments
2140 if (arg
.IndexOf ("//") != -1)
2141 arg
= arg
.Substring (0, arg
.IndexOf ("//"));
2142 arg
= arg
.Trim (simple_whitespaces
);
2145 // The first group of pre-processing instructions is always processed
2149 region_directive
= true;
2154 if (ifstack
== null || ifstack
.Count
== 0){
2155 Error_UnexpectedDirective ("no #region for this #endregion");
2158 int pop
= (int) ifstack
.Pop ();
2160 if ((pop
& REGION
) == 0)
2161 Report
.Error (1027, Location
, "Expected `#endif' directive");
2163 return caller_is_taking
;
2166 if (ifstack
== null)
2167 ifstack
= new Stack (2);
2169 int flags
= region_directive
? REGION
: 0;
2170 if (ifstack
.Count
== 0){
2171 flags
|= PARENT_TAKING
;
2173 int state
= (int) ifstack
.Peek ();
2174 if ((state
& TAKING
) != 0) {
2175 flags
|= PARENT_TAKING
;
2179 if (caller_is_taking
&& eval (arg
)) {
2180 ifstack
.Push (flags
| TAKING
);
2183 ifstack
.Push (flags
);
2187 if (ifstack
== null || ifstack
.Count
== 0){
2188 Error_UnexpectedDirective ("no #if for this #endif");
2191 pop
= (int) ifstack
.Pop ();
2193 if ((pop
& REGION
) != 0)
2194 Report
.Error (1038, Location
, "#endregion directive expected");
2196 if (arg
.Length
!= 0) {
2197 Error_EndLineExpected ();
2200 if (ifstack
.Count
== 0)
2203 int state
= (int) ifstack
.Peek ();
2204 return (state
& TAKING
) != 0;
2208 if (ifstack
== null || ifstack
.Count
== 0){
2209 Error_UnexpectedDirective ("no #if for this #elif");
2212 int state
= (int) ifstack
.Pop ();
2214 if ((state
& REGION
) != 0) {
2215 Report
.Error (1038, Location
, "#endregion directive expected");
2219 if ((state
& ELSE_SEEN
) != 0){
2220 Error_UnexpectedDirective ("#elif not valid after #else");
2224 if ((state
& TAKING
) != 0) {
2229 if (eval (arg
) && ((state
& PARENT_TAKING
) != 0)){
2230 ifstack
.Push (state
| TAKING
);
2234 ifstack
.Push (state
);
2239 if (ifstack
== null || ifstack
.Count
== 0){
2240 Error_UnexpectedDirective ("no #if for this #else");
2243 int state
= (int) ifstack
.Peek ();
2245 if ((state
& REGION
) != 0) {
2246 Report
.Error (1038, Location
, "#endregion directive expected");
2250 if ((state
& ELSE_SEEN
) != 0){
2251 Error_UnexpectedDirective ("#else within #else");
2257 if (arg
.Length
!= 0) {
2258 Error_EndLineExpected ();
2263 if ((state
& PARENT_TAKING
) != 0) {
2264 ret
= (state
& TAKING
) == 0;
2272 ifstack
.Push (state
| ELSE_SEEN
);
2277 if (any_token_seen
){
2278 Error_TokensSeen ();
2279 return caller_is_taking
;
2281 PreProcessDefinition (true, arg
, caller_is_taking
);
2282 return caller_is_taking
;
2285 if (any_token_seen
){
2286 Error_TokensSeen ();
2287 return caller_is_taking
;
2289 PreProcessDefinition (false, arg
, caller_is_taking
);
2290 return caller_is_taking
;
2294 // These are only processed if we are in a `taking' block
2296 if (!caller_is_taking
)
2301 Report
.Error (1029, Location
, "#error: '{0}'", arg
);
2305 Report
.Warning (1030, 1, Location
, "#warning: `{0}'", arg
);
2309 if (RootContext
.Version
== LanguageVersion
.ISO_1
) {
2310 Report
.FeatureIsNotAvailable (Location
, "#pragma");
2314 PreProcessPragma (arg
);
2318 if (!PreProcessLine (arg
))
2321 "The line number specified for #line directive is missing or invalid");
2322 return caller_is_taking
;
2325 Report
.Error (1024, Location
, "Wrong preprocessor directive");
2330 private int consume_string (bool quoted
)
2333 string_builder
.Length
= 0;
2335 while ((c
= get_char ()) != -1){
2337 if (quoted
&& peek_char () == '"'){
2338 string_builder
.Append ((char) c
);
2342 val
= string_builder
.ToString ();
2343 return Token
.LITERAL_STRING
;
2349 Report
.Error (1010, Location
, "Newline in constant");
2354 c
= escape (c
, out surrogate
);
2357 if (surrogate
!= 0) {
2358 string_builder
.Append ((char) c
);
2362 string_builder
.Append ((char) c
);
2365 Report
.Error (1039, Location
, "Unterminated string literal");
2369 private int consume_identifier (int s
)
2371 int res
= consume_identifier (s
, false);
2373 if (doc_state
== XmlCommentState
.Allowed
)
2374 doc_state
= XmlCommentState
.NotAllowed
;
2379 private int consume_identifier (int c
, bool quoted
)
2385 c
= escape (c
, out surrogate
);
2386 if (surrogate
!= 0) {
2387 id_builder
[pos
++] = (char) c
;
2392 id_builder
[pos
++] = (char) c
;
2393 Location loc
= Location
;
2395 while ((c
= get_char ()) != -1) {
2397 if (is_identifier_part_character ((char) c
)){
2398 if (pos
== max_id_size
){
2399 Report
.Error (645, loc
, "Identifier too long (limit is 512 chars)");
2403 id_builder
[pos
++] = (char) c
;
2404 } else if (c
== '\\') {
2406 c
= escape (c
, out surrogate
);
2407 if (surrogate
!= 0) {
2408 if (is_identifier_part_character ((char) c
))
2409 id_builder
[pos
++] = (char) c
;
2420 // Optimization: avoids doing the keyword lookup
2421 // on uppercase letters
2423 if (id_builder
[0] >= '_' && !quoted
) {
2424 int keyword
= GetKeyword (id_builder
, pos
);
2425 if (keyword
!= -1) {
2426 // TODO: No need to store location for keyword, required location cleanup
2433 // Keep identifiers in an array of hashtables to avoid needless
2436 CharArrayHashtable identifiers_group
= identifiers
[pos
];
2437 if (identifiers_group
!= null) {
2438 val
= identifiers_group
[id_builder
];
2440 val
= new LocatedToken (loc
, (string) val
);
2442 AddEscapedIdentifier ((LocatedToken
) val
);
2443 return Token
.IDENTIFIER
;
2446 identifiers_group
= new CharArrayHashtable (pos
);
2447 identifiers
[pos
] = identifiers_group
;
2450 char [] chars
= new char [pos
];
2451 Array
.Copy (id_builder
, chars
, pos
);
2453 val
= new String (id_builder
, 0, pos
);
2454 identifiers_group
.Add (chars
, val
);
2456 if (RootContext
.Version
== LanguageVersion
.ISO_1
) {
2457 for (int i
= 1; i
< chars
.Length
; i
+= 3) {
2458 if (chars
[i
] == '_' && (chars
[i
- 1] == '_' || chars
[i
+ 1] == '_')) {
2459 Report
.Error (1638, loc
,
2460 "`{0}': Any identifier with double underscores cannot be used when ISO language version mode is specified", val
.ToString ());
2465 val
= new LocatedToken (loc
, (string) val
);
2467 AddEscapedIdentifier ((LocatedToken
) val
);
2468 return Token
.IDENTIFIER
;
2471 public int xtoken ()
2475 // Whether we have seen comments on the current line
2476 bool comments_seen
= false;
2477 while ((c
= get_char ()) != -1) {
2480 col
= ((col
+ 8) / 8) * 8;
2488 case 0xFEFF: // Ignore BOM anywhere in the file
2491 /* This is required for compatibility with .NET
2493 if (peek_char () == 0xBB) {
2496 if (get_char () == 0xBF)
2503 if (peek_char () != '\n')
2508 any_token_seen
|= tokens_seen
;
2509 tokens_seen
= false;
2510 comments_seen
= false;
2515 return consume_identifier (c
);
2519 return Token
.OPEN_BRACE
;
2522 return Token
.CLOSE_BRACE
;
2524 // To block doccomment inside attribute declaration.
2525 if (doc_state
== XmlCommentState
.Allowed
)
2526 doc_state
= XmlCommentState
.NotAllowed
;
2527 return Token
.OPEN_BRACKET
;
2529 return Token
.CLOSE_BRACKET
;
2533 // An expression versions of parens can appear in block context only
2535 if (parsing_block
!= 0 && !lambda_arguments_parsing
) {
2538 // Optmize most common case where we know that parens
2541 switch (current_token
) {
2542 case Token
.IDENTIFIER
:
2550 case Token
.DELEGATE
:
2551 case Token
.OP_GENERICS_GT
:
2552 return Token
.OPEN_PARENS
;
2555 // Optimize using peek
2556 int xx
= peek_char ();
2563 return Token
.OPEN_PARENS
;
2566 lambda_arguments_parsing
= true;
2568 d
= TokenizeOpenParens ();
2570 lambda_arguments_parsing
= false;
2574 return Token
.OPEN_PARENS
;
2576 return Token
.CLOSE_PARENS
;
2580 return Token
.SEMICOLON
;
2584 return TokenizePossibleNullableType ();
2586 if (parsing_generic_less_than
++ > 0)
2587 return Token
.OP_GENERICS_LT
;
2589 return TokenizeLessThan ();
2599 if (parsing_generic_less_than
> 1 || (parsing_generic_less_than
== 1 && d
!= '>')) {
2600 parsing_generic_less_than
--;
2601 return Token
.OP_GENERICS_GT
;
2610 return Token
.OP_SHIFT_RIGHT_ASSIGN
;
2612 return Token
.OP_SHIFT_RIGHT
;
2621 } else if (d
== '=') {
2622 d
= Token
.OP_ADD_ASSIGN
;
2633 } else if (d
== '=')
2634 d
= Token
.OP_SUB_ASSIGN
;
2644 if (peek_char () == '='){
2661 return Token
.ASSIGN
;
2667 return Token
.OP_AND
;
2671 return Token
.OP_AND_ASSIGN
;
2673 return Token
.BITWISE_AND
;
2683 return Token
.OP_OR_ASSIGN
;
2685 return Token
.BITWISE_OR
;
2688 if (peek_char () == '='){
2690 return Token
.OP_MULT_ASSIGN
;
2699 return Token
.OP_DIV_ASSIGN
;
2702 // Handle double-slash comments.
2705 if (RootContext
.Documentation
!= null && peek_char () == '/') {
2707 // Don't allow ////.
2708 if ((d
= peek_char ()) != '/') {
2709 update_comment_location ();
2710 if (doc_state
== XmlCommentState
.Allowed
)
2711 handle_one_line_xml_comment ();
2712 else if (doc_state
== XmlCommentState
.NotAllowed
)
2713 warn_incorrect_doc_comment ();
2716 while ((d
= get_char ()) != -1 && (d
!= '\n') && d
!= '\r');
2718 any_token_seen
|= tokens_seen
;
2719 tokens_seen
= false;
2720 comments_seen
= false;
2722 } else if (d
== '*'){
2724 bool docAppend
= false;
2725 if (RootContext
.Documentation
!= null && peek_char () == '*') {
2727 update_comment_location ();
2728 // But when it is /**/, just do nothing.
2729 if (peek_char () == '/') {
2733 if (doc_state
== XmlCommentState
.Allowed
)
2735 else if (doc_state
== XmlCommentState
.NotAllowed
)
2736 warn_incorrect_doc_comment ();
2739 int current_comment_start
= 0;
2741 current_comment_start
= xml_comment_buffer
.Length
;
2742 xml_comment_buffer
.Append (Environment
.NewLine
);
2745 while ((d
= get_char ()) != -1){
2746 if (d
== '*' && peek_char () == '/'){
2748 comments_seen
= true;
2752 xml_comment_buffer
.Append ((char) d
);
2755 any_token_seen
|= tokens_seen
;
2756 tokens_seen
= false;
2758 // Reset 'comments_seen' just to be consistent.
2759 // It doesn't matter either way, here.
2761 comments_seen
= false;
2765 Report
.Error (1035, Location
, "End-of-file found, '*/' expected");
2768 update_formatted_doc_comment (current_comment_start
);
2774 if (peek_char () == '='){
2776 return Token
.OP_MOD_ASSIGN
;
2778 return Token
.PERCENT
;
2781 if (peek_char () == '='){
2783 return Token
.OP_XOR_ASSIGN
;
2785 return Token
.CARRET
;
2788 if (peek_char () == ':') {
2790 return Token
.DOUBLE_COLON
;
2794 case '0': case '1': case '2': case '3': case '4':
2795 case '5': case '6': case '7': case '8': case '9':
2797 return is_number (c
);
2799 case '\n': // white space
2800 any_token_seen
|= tokens_seen
;
2801 tokens_seen
= false;
2802 comments_seen
= false;
2808 if (d
>= '0' && d
<= '9')
2809 return is_number (c
);
2813 if (tokens_seen
|| comments_seen
) {
2814 Eror_WrongPreprocessorLocation ();
2818 if (handle_preprocessing_directive (true))
2821 bool directive_expected
= false;
2822 while ((c
= get_char ()) != -1) {
2824 directive_expected
= true;
2825 } else if (!directive_expected
) {
2826 // TODO: Implement comment support for disabled code and uncomment this code
2828 // Eror_WrongPreprocessorLocation ();
2829 // return Token.ERROR;
2834 if (c
== ' ' || c
== '\t' || c
== '\r' || c
== '\n' || c
== '\f' || c
== '\v' )
2838 if (handle_preprocessing_directive (false))
2841 directive_expected
= false;
2845 tokens_seen
= false;
2852 return consume_string (false);
2855 return TokenizeBackslash ();
2861 return consume_string (true);
2864 if (is_identifier_start_character (c
)){
2865 return consume_identifier (c
, true);
2868 Report
.Error (1646, Location
, "Keyword, identifier, or string expected after verbatim specifier: @");
2871 case EvalStatementParserCharacter
:
2872 return Token
.EVAL_STATEMENT_PARSER
;
2873 case EvalCompilationUnitParserCharacter
:
2874 return Token
.EVAL_COMPILATION_UNIT_PARSER
;
2875 case EvalUsingDeclarationsParserCharacter
:
2876 return Token
.EVAL_USING_DECLARATIONS_UNIT_PARSER
;
2879 if (is_identifier_start_character (c
)) {
2881 return consume_identifier (c
);
2884 error_details
= ((char)c
).ToString ();
2890 return Token
.COMPLETE_COMPLETION
;
2893 return Token
.GENERATE_COMPLETION
;
2900 int TokenizeBackslash ()
2902 int c
= get_char ();
2905 error_details
= "Empty character literal";
2906 Report
.Error (1011, Location
, error_details
);
2909 if (c
== '\r' || c
== '\n') {
2910 Report
.Error (1010, Location
, "Newline in constant");
2915 c
= escape (c
, out d
);
2919 throw new NotImplementedException ();
2925 Report
.Error (1012, Location
, "Too many characters in character literal");
2927 // Try to recover, read until newline or next "'"
2928 while ((c
= get_char ()) != -1) {
2929 if (c
== '\n' || c
== '\'')
2935 return Token
.LITERAL_CHARACTER
;
2938 int TokenizeLessThan ()
2941 if (handle_typeof
) {
2943 if (parse_generic_dimension (out d
)) {
2946 return Token
.GENERIC_DIMENSION
;
2951 // Save current position and parse next token.
2953 if (parse_less_than ()) {
2954 if (parsing_generic_declaration
&& token () != Token
.DOT
) {
2955 d
= Token
.OP_GENERICS_LT_DECL
;
2957 d
= Token
.OP_GENERICS_LT
;
2964 parsing_generic_less_than
= 0;
2973 return Token
.OP_SHIFT_LEFT_ASSIGN
;
2975 return Token
.OP_SHIFT_LEFT
;
2986 // Handles one line xml comment
2988 private void handle_one_line_xml_comment ()
2991 while ((c
= peek_char ()) == ' ')
2992 get_char (); // skip heading whitespaces.
2993 while ((c
= peek_char ()) != -1 && c
!= '\n' && c
!= '\r') {
2994 xml_comment_buffer
.Append ((char) get_char ());
2996 if (c
== '\r' || c
== '\n')
2997 xml_comment_buffer
.Append (Environment
.NewLine
);
3001 // Remove heading "*" in Javadoc-like xml documentation.
3003 private void update_formatted_doc_comment (int current_comment_start
)
3005 int length
= xml_comment_buffer
.Length
- current_comment_start
;
3006 string [] lines
= xml_comment_buffer
.ToString (
3007 current_comment_start
,
3008 length
).Replace ("\r", "").Split ('\n');
3010 // The first line starts with /**, thus it is not target
3011 // for the format check.
3012 for (int i
= 1; i
< lines
.Length
; i
++) {
3013 string s
= lines
[i
];
3014 int idx
= s
.IndexOf ('*');
3017 if (i
< lines
.Length
- 1)
3021 head
= s
.Substring (0, idx
);
3022 foreach (char c
in head
)
3025 lines
[i
] = s
.Substring (idx
+ 1);
3027 xml_comment_buffer
.Remove (current_comment_start
, length
);
3028 xml_comment_buffer
.Insert (current_comment_start
, String
.Join (Environment
.NewLine
, lines
));
3032 // Updates current comment location.
3034 private void update_comment_location ()
3036 if (current_comment_location
.IsNull
) {
3037 // "-2" is for heading "//" or "/*"
3038 current_comment_location
=
3039 new Location (ref_line
, hidden
? -1 : col
- 2);
3044 // Checks if there was incorrect doc comments and raise
3047 public void check_incorrect_doc_comment ()
3049 if (xml_comment_buffer
.Length
> 0)
3050 warn_incorrect_doc_comment ();
3054 // Raises a warning when tokenizer found incorrect doccomment
3057 private void warn_incorrect_doc_comment ()
3059 if (doc_state
!= XmlCommentState
.Error
) {
3060 doc_state
= XmlCommentState
.Error
;
3061 // in csc, it is 'XML comment is not placed on
3062 // a valid language element'. But that does not
3064 Report
.Warning (1587, 2, Location
, "XML comment is not placed on a valid language element");
3069 // Consumes the saved xml comment lines (if any)
3070 // as for current target member or type.
3072 public string consume_doc_comment ()
3074 if (xml_comment_buffer
.Length
> 0) {
3075 string ret
= xml_comment_buffer
.ToString ();
3076 reset_doc_comment ();
3083 get { return context.Report; }
3086 void reset_doc_comment ()
3088 xml_comment_buffer
.Length
= 0;
3089 current_comment_location
= Location
.Null
;
3092 public void cleanup ()
3094 if (ifstack
!= null && ifstack
.Count
>= 1) {
3095 int state
= (int) ifstack
.Pop ();
3096 if ((state
& REGION
) != 0)
3097 Report
.Error (1038, Location
, "#endregion directive expected");
3099 Report
.Error (1027, Location
, "Expected `#endif' directive");
3105 // Indicates whether it accepts XML documentation or not.
3107 public enum XmlCommentState
{
3108 // comment is allowed in this state.
3110 // comment is not allowed in this state.
3112 // once comments appeared when it is NotAllowed, then the
3113 // state is changed to it, until the state is changed to