2 // cs-tokenizer.cs: The Tokenizer for the C# compiler
3 // This also implements the preprocessor
5 // Author: Miguel de Icaza (miguel@gnu.org)
6 // Marek Safar (marek.safar@seznam.cz)
8 // Dual licensed under the terms of the MIT X11 or GNU GPL
10 // Copyright 2001, 2002 Ximian, Inc (http://www.ximian.com)
11 // Copyright 2004-2008 Novell, Inc
17 using System
.Collections
.Generic
;
19 using System
.Globalization
;
20 using System
.Reflection
;
25 /// Tokenizer for C# source code.
28 public class Tokenizer
: yyParser
.yyInput
32 public readonly int Token
;
33 public KeywordEntry Next
;
34 public readonly char[] Value
;
36 public KeywordEntry (string value, int token
)
38 this.Value
= value.ToCharArray ();
43 sealed class IdentifiersComparer
: IEqualityComparer
<char[]>
47 public IdentifiersComparer (int length
)
52 public bool Equals (char[] x
, char[] y
)
54 for (int i
= 0; i
< length
; ++i
)
61 public int GetHashCode (char[] obj
)
64 for (int i
= 0; i
< length
; ++i
)
65 h
= (h
<< 5) - h
+ obj
[i
];
72 // This class has to be used in the parser only, it reuses token
73 // details after each parse
75 public class LocatedToken
80 static LocatedToken
[] buffer
;
83 private LocatedToken ()
87 public static LocatedToken
Create (int row
, int column
)
89 return Create (null, row
, column
);
92 public static LocatedToken
Create (string value, int row
, int column
)
95 // TODO: I am not very happy about the logic but it's the best
96 // what I could come up with for now.
97 // Ideally we should be using just tiny buffer (256 elements) which
98 // is enough to hold all details for currect stack and recycle elements
99 // poped from the stack but there is a trick needed to recycle
103 if (pos
>= buffer
.Length
) {
104 entry
= new LocatedToken ();
106 entry
= buffer
[pos
];
108 entry
= new LocatedToken ();
109 buffer
[pos
] = entry
;
116 entry
.column
= column
;
121 // Used for token not required by expression evaluator
123 public static LocatedToken
CreateOptional (int row
, int col
)
126 return Create (row
, col
);
131 public static void Initialize ()
134 buffer
= new LocatedToken
[10000];
138 public Location Location
{
139 get { return new Location (row, column); }
142 public string Value
{
143 get { return value; }
147 SeekableStreamReader reader
;
149 CompilationUnit file_name
;
150 CompilerContext context
;
157 bool handle_get_set
= false;
158 bool handle_remove_add
= false;
159 bool handle_where
= false;
160 bool handle_typeof
= false;
161 bool lambda_arguments_parsing
;
162 Location current_comment_location
= Location
.Null
;
163 List
<Location
> escaped_identifiers
;
164 int parsing_generic_less_than
;
167 // Used mainly for parser optimizations. Some expressions for instance
168 // can appear only in block (including initializer, base initializer)
171 public int parsing_block
;
172 internal bool query_parsing
;
175 // When parsing type only, useful for ambiguous nullable types
177 public int parsing_type
;
180 // Set when parsing generic declaration (type or method header)
182 public bool parsing_generic_declaration
;
185 // The value indicates that we have not reach any declaration or
188 public int parsing_declaration
;
191 // The special character to inject on streams to trigger the EXPRESSION_PARSE
192 // token to be returned. It just happens to be a Unicode character that
193 // would never be part of a program (can not be an identifier).
195 // This character is only tested just before the tokenizer is about to report
196 // an error; So on the regular operation mode, this addition will have no
197 // impact on the tokenizer's performance.
200 public const int EvalStatementParserCharacter
= 0x2190; // Unicode Left Arrow
201 public const int EvalCompilationUnitParserCharacter
= 0x2191; // Unicode Arrow
202 public const int EvalUsingDeclarationsParserCharacter
= 0x2192; // Unicode Arrow
205 // XML documentation buffer. The save point is used to divide
206 // comments on types and comments on members.
208 StringBuilder xml_comment_buffer
;
211 // See comment on XmlCommentState enumeration.
213 XmlCommentState xml_doc_state
= XmlCommentState
.Allowed
;
216 // Whether tokens have been seen on this line
218 bool tokens_seen
= false;
221 // Set to true once the GENERATE_COMPLETION token has bee
222 // returned. This helps produce one GENERATE_COMPLETION,
223 // as many COMPLETE_COMPLETION as necessary to complete the
224 // AST tree and one final EOF.
229 // Whether a token has been seen on the file
230 // This is needed because `define' is not allowed to be used
231 // after a token has been seen.
233 bool any_token_seen
= false;
235 static readonly char[] simple_whitespaces
= new char[] { ' ', '\t' }
;
237 public bool PropertyParsing
{
238 get { return handle_get_set; }
239 set { handle_get_set = value; }
242 public bool EventParsing
{
243 get { return handle_remove_add; }
244 set { handle_remove_add = value; }
247 public bool ConstraintsParsing
{
248 get { return handle_where; }
249 set { handle_where = value; }
252 public bool TypeOfParsing
{
253 get { return handle_typeof; }
254 set { handle_typeof = value; }
257 public XmlCommentState doc_state
{
258 get { return xml_doc_state; }
260 if (value == XmlCommentState
.Allowed
) {
261 check_incorrect_doc_comment ();
262 reset_doc_comment ();
264 xml_doc_state
= value;
269 // This is used to trigger completion generation on the parser
270 public bool CompleteOnEOF
;
272 void AddEscapedIdentifier (Location loc
)
274 if (escaped_identifiers
== null)
275 escaped_identifiers
= new List
<Location
> ();
277 escaped_identifiers
.Add (loc
);
280 public bool IsEscapedIdentifier (Location loc
)
282 if (escaped_identifiers
!= null) {
283 foreach (Location lt
in escaped_identifiers
)
294 static KeywordEntry
[][] keywords
;
295 static Dictionary
<string, object> keyword_strings
; // TODO: HashSet
296 static NumberStyles styles
;
297 static NumberFormatInfo csharp_format_info
;
300 // Values for the associated token returned
302 internal int putback_char
; // Used by repl only
308 const int TAKING
= 1;
309 const int ELSE_SEEN
= 4;
310 const int PARENT_TAKING
= 8;
311 const int REGION
= 16;
314 // pre-processor if stack state:
318 static System
.Text
.StringBuilder string_builder
;
320 const int max_id_size
= 512;
321 static char [] id_builder
= new char [max_id_size
];
323 public static Dictionary
<char[], string>[] identifiers
= new Dictionary
<char[], string>[max_id_size
+ 1];
325 const int max_number_size
= 512;
326 static char [] number_builder
= new char [max_number_size
];
327 static int number_pos
;
329 static StringBuilder static_cmd_arg
= new System
.Text
.StringBuilder ();
332 // Details about the error encoutered by the tokenizer
334 string error_details
;
336 public string error
{
338 return error_details
;
349 // This is used when the tokenizer needs to save
350 // the current position as it needs to do some parsing
351 // on its own to deamiguate a token in behalf of the
354 Stack
<Position
> position_stack
= new Stack
<Position
> (2);
362 public int putback_char
;
363 public int previous_col
;
364 public Stack
<int> ifstack
;
365 public int parsing_generic_less_than
;
366 public int current_token
;
369 public Position (Tokenizer t
)
371 position
= t
.reader
.Position
;
373 ref_line
= t
.ref_line
;
376 putback_char
= t
.putback_char
;
377 previous_col
= t
.previous_col
;
378 if (t
.ifstack
!= null && t
.ifstack
.Count
!= 0) {
379 // There is no simple way to clone Stack<T> all
380 // methods reverse the order
381 var clone
= t
.ifstack
.ToArray ();
382 Array
.Reverse (clone
);
383 ifstack
= new Stack
<int> (clone
);
385 parsing_generic_less_than
= t
.parsing_generic_less_than
;
386 current_token
= t
.current_token
;
391 public void PushPosition ()
393 position_stack
.Push (new Position (this));
396 public void PopPosition ()
398 Position p
= position_stack
.Pop ();
400 reader
.Position
= p
.position
;
401 ref_line
= p
.ref_line
;
405 putback_char
= p
.putback_char
;
406 previous_col
= p
.previous_col
;
408 parsing_generic_less_than
= p
.parsing_generic_less_than
;
409 current_token
= p
.current_token
;
413 // Do not reset the position, ignore it.
414 public void DiscardPosition ()
416 position_stack
.Pop ();
419 static void AddKeyword (string kw
, int token
)
421 keyword_strings
.Add (kw
, null);
423 int length
= kw
.Length
;
424 if (keywords
[length
] == null) {
425 keywords
[length
] = new KeywordEntry
['z' - '_' + 1];
428 int char_index
= kw
[0] - '_';
429 KeywordEntry kwe
= keywords
[length
] [char_index
];
431 keywords
[length
] [char_index
] = new KeywordEntry (kw
, token
);
435 while (kwe
.Next
!= null) {
439 kwe
.Next
= new KeywordEntry (kw
, token
);
442 static void InitTokens ()
444 keyword_strings
= new Dictionary
<string, object> ();
446 // 11 is the length of the longest keyword for now
447 keywords
= new KeywordEntry
[11] [];
449 AddKeyword ("__arglist", Token
.ARGLIST
);
450 AddKeyword ("abstract", Token
.ABSTRACT
);
451 AddKeyword ("as", Token
.AS
);
452 AddKeyword ("add", Token
.ADD
);
453 AddKeyword ("base", Token
.BASE
);
454 AddKeyword ("bool", Token
.BOOL
);
455 AddKeyword ("break", Token
.BREAK
);
456 AddKeyword ("byte", Token
.BYTE
);
457 AddKeyword ("case", Token
.CASE
);
458 AddKeyword ("catch", Token
.CATCH
);
459 AddKeyword ("char", Token
.CHAR
);
460 AddKeyword ("checked", Token
.CHECKED
);
461 AddKeyword ("class", Token
.CLASS
);
462 AddKeyword ("const", Token
.CONST
);
463 AddKeyword ("continue", Token
.CONTINUE
);
464 AddKeyword ("decimal", Token
.DECIMAL
);
465 AddKeyword ("default", Token
.DEFAULT
);
466 AddKeyword ("delegate", Token
.DELEGATE
);
467 AddKeyword ("do", Token
.DO
);
468 AddKeyword ("double", Token
.DOUBLE
);
469 AddKeyword ("else", Token
.ELSE
);
470 AddKeyword ("enum", Token
.ENUM
);
471 AddKeyword ("event", Token
.EVENT
);
472 AddKeyword ("explicit", Token
.EXPLICIT
);
473 AddKeyword ("extern", Token
.EXTERN
);
474 AddKeyword ("false", Token
.FALSE
);
475 AddKeyword ("finally", Token
.FINALLY
);
476 AddKeyword ("fixed", Token
.FIXED
);
477 AddKeyword ("float", Token
.FLOAT
);
478 AddKeyword ("for", Token
.FOR
);
479 AddKeyword ("foreach", Token
.FOREACH
);
480 AddKeyword ("goto", Token
.GOTO
);
481 AddKeyword ("get", Token
.GET
);
482 AddKeyword ("if", Token
.IF
);
483 AddKeyword ("implicit", Token
.IMPLICIT
);
484 AddKeyword ("in", Token
.IN
);
485 AddKeyword ("int", Token
.INT
);
486 AddKeyword ("interface", Token
.INTERFACE
);
487 AddKeyword ("internal", Token
.INTERNAL
);
488 AddKeyword ("is", Token
.IS
);
489 AddKeyword ("lock", Token
.LOCK
);
490 AddKeyword ("long", Token
.LONG
);
491 AddKeyword ("namespace", Token
.NAMESPACE
);
492 AddKeyword ("new", Token
.NEW
);
493 AddKeyword ("null", Token
.NULL
);
494 AddKeyword ("object", Token
.OBJECT
);
495 AddKeyword ("operator", Token
.OPERATOR
);
496 AddKeyword ("out", Token
.OUT
);
497 AddKeyword ("override", Token
.OVERRIDE
);
498 AddKeyword ("params", Token
.PARAMS
);
499 AddKeyword ("private", Token
.PRIVATE
);
500 AddKeyword ("protected", Token
.PROTECTED
);
501 AddKeyword ("public", Token
.PUBLIC
);
502 AddKeyword ("readonly", Token
.READONLY
);
503 AddKeyword ("ref", Token
.REF
);
504 AddKeyword ("remove", Token
.REMOVE
);
505 AddKeyword ("return", Token
.RETURN
);
506 AddKeyword ("sbyte", Token
.SBYTE
);
507 AddKeyword ("sealed", Token
.SEALED
);
508 AddKeyword ("set", Token
.SET
);
509 AddKeyword ("short", Token
.SHORT
);
510 AddKeyword ("sizeof", Token
.SIZEOF
);
511 AddKeyword ("stackalloc", Token
.STACKALLOC
);
512 AddKeyword ("static", Token
.STATIC
);
513 AddKeyword ("string", Token
.STRING
);
514 AddKeyword ("struct", Token
.STRUCT
);
515 AddKeyword ("switch", Token
.SWITCH
);
516 AddKeyword ("this", Token
.THIS
);
517 AddKeyword ("throw", Token
.THROW
);
518 AddKeyword ("true", Token
.TRUE
);
519 AddKeyword ("try", Token
.TRY
);
520 AddKeyword ("typeof", Token
.TYPEOF
);
521 AddKeyword ("uint", Token
.UINT
);
522 AddKeyword ("ulong", Token
.ULONG
);
523 AddKeyword ("unchecked", Token
.UNCHECKED
);
524 AddKeyword ("unsafe", Token
.UNSAFE
);
525 AddKeyword ("ushort", Token
.USHORT
);
526 AddKeyword ("using", Token
.USING
);
527 AddKeyword ("virtual", Token
.VIRTUAL
);
528 AddKeyword ("void", Token
.VOID
);
529 AddKeyword ("volatile", Token
.VOLATILE
);
530 AddKeyword ("while", Token
.WHILE
);
531 AddKeyword ("partial", Token
.PARTIAL
);
532 AddKeyword ("where", Token
.WHERE
);
535 AddKeyword ("from", Token
.FROM
);
536 AddKeyword ("join", Token
.JOIN
);
537 AddKeyword ("on", Token
.ON
);
538 AddKeyword ("equals", Token
.EQUALS
);
539 AddKeyword ("select", Token
.SELECT
);
540 AddKeyword ("group", Token
.GROUP
);
541 AddKeyword ("by", Token
.BY
);
542 AddKeyword ("let", Token
.LET
);
543 AddKeyword ("orderby", Token
.ORDERBY
);
544 AddKeyword ("ascending", Token
.ASCENDING
);
545 AddKeyword ("descending", Token
.DESCENDING
);
546 AddKeyword ("into", Token
.INTO
);
555 csharp_format_info
= NumberFormatInfo
.InvariantInfo
;
556 styles
= NumberStyles
.Float
;
558 string_builder
= new System
.Text
.StringBuilder ();
561 int GetKeyword (char[] id
, int id_len
)
564 // Keywords are stored in an array of arrays grouped by their
565 // length and then by the first character
567 if (id_len
>= keywords
.Length
|| keywords
[id_len
] == null)
570 int first_index
= id
[0] - '_';
571 if (first_index
> 'z')
574 KeywordEntry kwe
= keywords
[id_len
] [first_index
];
581 for (int i
= 1; i
< id_len
; ++i
) {
582 if (id
[i
] != kwe
.Value
[i
]) {
588 } while (res
== 0 && kwe
!= null);
602 if (!handle_remove_add
)
606 if (parsing_declaration
== 0)
607 res
= Token
.EXTERN_ALIAS
;
610 if (peek_token () == Token
.COLON
) {
612 res
= Token
.DEFAULT_COLON
;
616 if (!handle_where
&& !query_parsing
)
621 // A query expression is any expression that starts with `from identifier'
622 // followed by any token except ; , =
624 if (!query_parsing
) {
625 if (lambda_arguments_parsing
) {
631 // HACK: to disable generics micro-parser, because PushPosition does not
632 // store identifiers array
633 parsing_generic_less_than
= 1;
635 case Token
.IDENTIFIER
:
647 next_token
= xtoken ();
648 if (next_token
== Token
.SEMICOLON
|| next_token
== Token
.COMMA
|| next_token
== Token
.EQUALS
)
651 res
= Token
.FROM_FIRST
;
652 query_parsing
= true;
653 if (RootContext
.Version
<= LanguageVersion
.ISO_2
)
654 Report
.FeatureIsNotAvailable (Location
, "query expressions");
657 Expression
.Error_VoidInvalidInTheContext (Location
, Report
);
661 // HACK: A token is not a keyword so we need to restore identifiers buffer
662 // which has been overwritten before we grabbed the identifier
663 id_builder
[0] = 'f'; id_builder
[1] = 'r'; id_builder
[2] = 'o'; id_builder
[3] = 'm';
677 case Token
.ASCENDING
:
678 case Token
.DESCENDING
:
685 case Token
.NAMESPACE
:
686 // TODO: some explanation needed
687 check_incorrect_doc_comment ();
691 if (parsing_block
> 0) {
696 // Save current position and parse next token.
699 next_token
= token ();
700 bool ok
= (next_token
== Token
.CLASS
) ||
701 (next_token
== Token
.STRUCT
) ||
702 (next_token
== Token
.INTERFACE
) ||
703 (next_token
== Token
.VOID
);
708 if (next_token
== Token
.VOID
) {
709 if (RootContext
.Version
== LanguageVersion
.ISO_1
||
710 RootContext
.Version
== LanguageVersion
.ISO_2
)
711 Report
.FeatureIsNotAvailable (Location
, "partial methods");
712 } else if (RootContext
.Version
== LanguageVersion
.ISO_1
)
713 Report
.FeatureIsNotAvailable (Location
, "partial types");
718 if (next_token
< Token
.LAST_KEYWORD
) {
719 Report
.Error (267, Location
,
720 "The `partial' modifier can be used only immediately before `class', `struct', `interface', or `void' keyword");
731 public Location Location
{
733 return new Location (ref_line
, hidden
? -1 : col
);
737 public Tokenizer (SeekableStreamReader input
, CompilationUnit file
, CompilerContext ctx
)
739 this.ref_name
= file
;
740 this.file_name
= file
;
746 xml_comment_buffer
= new StringBuilder ();
749 // FIXME: This could be `Location.Push' but we have to
750 // find out why the MS compiler allows this
752 Mono
.CSharp
.Location
.Push (file
, file
);
755 static bool is_identifier_start_character (int c
)
757 return (c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z') || c
== '_' || Char
.IsLetter ((char)c
);
760 static bool is_identifier_part_character (char c
)
762 if (c
>= 'a' && c
<= 'z')
765 if (c
>= 'A' && c
<= 'Z')
768 if (c
== '_' || (c
>= '0' && c
<= '9'))
774 return Char
.IsLetter (c
) || Char
.GetUnicodeCategory (c
) == UnicodeCategory
.ConnectorPunctuation
;
777 public static bool IsKeyword (string s
)
779 return keyword_strings
.ContainsKey (s
);
783 // Open parens micro parser. Detects both lambda and cast ambiguity.
785 int TokenizeOpenParens ()
790 int bracket_level
= 0;
791 bool is_type
= false;
792 bool can_be_type
= false;
795 ptoken
= current_token
;
798 switch (current_token
) {
799 case Token
.CLOSE_PARENS
:
803 // Expression inside parens is lambda, (int i) =>
805 if (current_token
== Token
.ARROW
)
806 return Token
.OPEN_PARENS_LAMBDA
;
809 // Expression inside parens is single type, (int[])
812 return Token
.OPEN_PARENS_CAST
;
815 // Expression is possible cast, look at next token, (T)null
818 switch (current_token
) {
819 case Token
.OPEN_PARENS
:
822 case Token
.IDENTIFIER
:
836 case Token
.UNCHECKED
:
841 // These can be part of a member access
855 return Token
.OPEN_PARENS_CAST
;
858 return Token
.OPEN_PARENS
;
861 case Token
.DOUBLE_COLON
:
862 if (ptoken
!= Token
.IDENTIFIER
&& ptoken
!= Token
.OP_GENERICS_GT
)
867 case Token
.IDENTIFIER
:
870 case Token
.OP_GENERICS_LT
:
872 case Token
.DOUBLE_COLON
:
874 if (bracket_level
== 0)
878 can_be_type
= is_type
= false;
898 if (bracket_level
== 0)
903 if (bracket_level
== 0) {
905 can_be_type
= is_type
= false;
909 case Token
.OP_GENERICS_LT
:
910 case Token
.OPEN_BRACKET
:
911 if (bracket_level
++ == 0)
915 case Token
.OP_GENERICS_GT
:
916 case Token
.CLOSE_BRACKET
:
920 case Token
.INTERR_NULLABLE
:
922 if (bracket_level
== 0)
928 can_be_type
= is_type
= false;
932 return Token
.OPEN_PARENS
;
937 public static bool IsValidIdentifier (string s
)
939 if (s
== null || s
.Length
== 0)
942 if (!is_identifier_start_character (s
[0]))
945 for (int i
= 1; i
< s
.Length
; i
++)
946 if (! is_identifier_part_character (s
[i
]))
952 bool parse_less_than ()
955 int the_token
= token ();
956 if (the_token
== Token
.OPEN_BRACKET
) {
958 the_token
= token ();
959 } while (the_token
!= Token
.CLOSE_BRACKET
);
960 the_token
= token ();
961 } else if (the_token
== Token
.IN
|| the_token
== Token
.OUT
) {
962 the_token
= token ();
965 case Token
.IDENTIFIER
:
983 case Token
.OP_GENERICS_GT
:
990 the_token
= token ();
992 if (the_token
== Token
.OP_GENERICS_GT
)
994 else if (the_token
== Token
.COMMA
|| the_token
== Token
.DOT
|| the_token
== Token
.DOUBLE_COLON
)
996 else if (the_token
== Token
.INTERR_NULLABLE
|| the_token
== Token
.STAR
)
998 else if (the_token
== Token
.OP_GENERICS_LT
) {
999 if (!parse_less_than ())
1002 } else if (the_token
== Token
.OPEN_BRACKET
) {
1004 the_token
= token ();
1005 if (the_token
== Token
.CLOSE_BRACKET
)
1007 else if (the_token
== Token
.COMMA
)
1008 goto rank_specifiers
;
1015 bool parse_generic_dimension (out int dimension
)
1020 int the_token
= token ();
1021 if (the_token
== Token
.OP_GENERICS_GT
)
1023 else if (the_token
== Token
.COMMA
) {
1031 public int peek_token ()
1036 the_token
= token ();
1043 // Tonizes `?' using custom disambiguous rules to return one
1044 // of following tokens: INTERR_NULLABLE, OP_COALESCING, INTERR
1046 // Tricky expression look like:
1048 // Foo ? a = x ? b : c;
1050 int TokenizePossibleNullableType ()
1052 if (parsing_block
== 0 || parsing_type
> 0)
1053 return Token
.INTERR_NULLABLE
;
1055 int d
= peek_char ();
1058 return Token
.OP_COALESCING
;
1061 switch (current_token
) {
1062 case Token
.CLOSE_PARENS
:
1067 return Token
.INTERR
;
1071 if (d
== ',' || d
== ';' || d
== '>')
1072 return Token
.INTERR_NULLABLE
;
1073 if (d
== '*' || (d
>= '0' && d
<= '9'))
1074 return Token
.INTERR
;
1078 current_token
= Token
.NONE
;
1080 switch (xtoken ()) {
1087 next_token
= Token
.INTERR
;
1090 case Token
.SEMICOLON
:
1092 case Token
.CLOSE_PARENS
:
1093 case Token
.OPEN_BRACKET
:
1094 case Token
.OP_GENERICS_GT
:
1095 next_token
= Token
.INTERR_NULLABLE
;
1103 if (next_token
== -1) {
1104 switch (xtoken ()) {
1106 case Token
.SEMICOLON
:
1107 case Token
.OPEN_BRACE
:
1108 case Token
.CLOSE_PARENS
:
1110 next_token
= Token
.INTERR_NULLABLE
;
1114 next_token
= Token
.INTERR
;
1122 // All shorcuts failed, do it hard way
1124 while ((ntoken
= xtoken ()) != Token
.EOF
) {
1125 if (ntoken
== Token
.SEMICOLON
)
1128 if (ntoken
== Token
.COLON
) {
1129 if (++colons
== interrs
)
1134 if (ntoken
== Token
.INTERR
) {
1140 next_token
= colons
!= interrs
? Token
.INTERR_NULLABLE
: Token
.INTERR
;
1149 bool decimal_digits (int c
)
1152 bool seen_digits
= false;
1155 if (number_pos
== max_number_size
)
1156 Error_NumericConstantTooLong ();
1157 number_builder
[number_pos
++] = (char) c
;
1161 // We use peek_char2, because decimal_digits needs to do a
1162 // 2-character look-ahead (5.ToString for example).
1164 while ((d
= peek_char2 ()) != -1){
1165 if (d
>= '0' && d
<= '9'){
1166 if (number_pos
== max_number_size
)
1167 Error_NumericConstantTooLong ();
1168 number_builder
[number_pos
++] = (char) d
;
1178 static bool is_hex (int e
)
1180 return (e
>= '0' && e
<= '9') || (e
>= 'A' && e
<= 'F') || (e
>= 'a' && e
<= 'f');
1183 static TypeCode
real_type_suffix (int c
)
1187 return TypeCode
.Single
;
1189 return TypeCode
.Double
;
1191 return TypeCode
.Decimal
;
1193 return TypeCode
.Empty
;
1197 int integer_type_suffix (ulong ul
, int c
)
1199 bool is_unsigned
= false;
1200 bool is_long
= false;
1203 bool scanning
= true;
1216 // if we have not seen anything in between
1217 // report this error
1219 Report
.Warning (78, 4, Location
, "The 'l' suffix is easily confused with the digit '1' (use 'L' for clarity)");
1239 if (is_long
&& is_unsigned
){
1240 val
= new ULongLiteral (ul
, Location
);
1241 return Token
.LITERAL
;
1245 // uint if possible, or ulong else.
1247 if ((ul
& 0xffffffff00000000) == 0)
1248 val
= new UIntLiteral ((uint) ul
, Location
);
1250 val
= new ULongLiteral (ul
, Location
);
1251 } else if (is_long
){
1252 // long if possible, ulong otherwise
1253 if ((ul
& 0x8000000000000000) != 0)
1254 val
= new ULongLiteral (ul
, Location
);
1256 val
= new LongLiteral ((long) ul
, Location
);
1258 // int, uint, long or ulong in that order
1259 if ((ul
& 0xffffffff00000000) == 0){
1260 uint ui
= (uint) ul
;
1262 if ((ui
& 0x80000000) != 0)
1263 val
= new UIntLiteral (ui
, Location
);
1265 val
= new IntLiteral ((int) ui
, Location
);
1267 if ((ul
& 0x8000000000000000) != 0)
1268 val
= new ULongLiteral (ul
, Location
);
1270 val
= new LongLiteral ((long) ul
, Location
);
1273 return Token
.LITERAL
;
1277 // given `c' as the next char in the input decide whether
1278 // we need to convert to a special type, and then choose
1279 // the best representation for the integer
1281 int adjust_int (int c
)
1284 if (number_pos
> 9){
1285 ulong ul
= (uint) (number_builder
[0] - '0');
1287 for (int i
= 1; i
< number_pos
; i
++){
1288 ul
= checked ((ul
* 10) + ((uint)(number_builder
[i
] - '0')));
1290 return integer_type_suffix (ul
, c
);
1292 uint ui
= (uint) (number_builder
[0] - '0');
1294 for (int i
= 1; i
< number_pos
; i
++){
1295 ui
= checked ((ui
* 10) + ((uint)(number_builder
[i
] - '0')));
1297 return integer_type_suffix (ui
, c
);
1299 } catch (OverflowException
) {
1300 error_details
= "Integral constant is too large";
1301 Report
.Error (1021, Location
, error_details
);
1302 val
= new IntLiteral (0, Location
);
1303 return Token
.LITERAL
;
1305 catch (FormatException
) {
1306 Report
.Error (1013, Location
, "Invalid number");
1307 val
= new IntLiteral (0, Location
);
1308 return Token
.LITERAL
;
1312 int adjust_real (TypeCode t
)
1314 string s
= new String (number_builder
, 0, number_pos
);
1315 const string error_details
= "Floating-point constant is outside the range of type `{0}'";
1318 case TypeCode
.Decimal
:
1320 val
= new DecimalLiteral (decimal.Parse (s
, styles
, csharp_format_info
), Location
);
1321 } catch (OverflowException
) {
1322 val
= new DecimalLiteral (0, Location
);
1323 Report
.Error (594, Location
, error_details
, "decimal");
1326 case TypeCode
.Single
:
1328 val
= new FloatLiteral (float.Parse (s
, styles
, csharp_format_info
), Location
);
1329 } catch (OverflowException
) {
1330 val
= new FloatLiteral (0, Location
);
1331 Report
.Error (594, Location
, error_details
, "float");
1336 val
= new DoubleLiteral (double.Parse (s
, styles
, csharp_format_info
), Location
);
1337 } catch (OverflowException
) {
1338 val
= new DoubleLiteral (0, Location
);
1339 Report
.Error (594, Location
, error_details
, "double");
1344 return Token
.LITERAL
;
1353 while ((d
= peek_char ()) != -1){
1355 number_builder
[number_pos
++] = (char) d
;
1361 string s
= new String (number_builder
, 0, number_pos
);
1363 if (number_pos
<= 8)
1364 ul
= System
.UInt32
.Parse (s
, NumberStyles
.HexNumber
);
1366 ul
= System
.UInt64
.Parse (s
, NumberStyles
.HexNumber
);
1367 } catch (OverflowException
){
1368 error_details
= "Integral constant is too large";
1369 Report
.Error (1021, Location
, error_details
);
1370 val
= new IntLiteral (0, Location
);
1371 return Token
.LITERAL
;
1373 catch (FormatException
) {
1374 Report
.Error (1013, Location
, "Invalid number");
1375 val
= new IntLiteral (0, Location
);
1376 return Token
.LITERAL
;
1379 return integer_type_suffix (ul
, peek_char ());
1383 // Invoked if we know we have .digits or digits
1385 int is_number (int c
)
1387 bool is_real
= false;
1391 if (c
>= '0' && c
<= '9'){
1393 int peek
= peek_char ();
1395 if (peek
== 'x' || peek
== 'X')
1396 return handle_hex ();
1403 // We need to handle the case of
1404 // "1.1" vs "1.string" (LITERAL_FLOAT vs NUMBER DOT IDENTIFIER)
1407 if (decimal_digits ('.')){
1413 return adjust_int (-1);
1417 if (c
== 'e' || c
== 'E'){
1419 if (number_pos
== max_number_size
)
1420 Error_NumericConstantTooLong ();
1421 number_builder
[number_pos
++] = 'e';
1425 if (number_pos
== max_number_size
)
1426 Error_NumericConstantTooLong ();
1427 number_builder
[number_pos
++] = '+';
1429 } else if (c
== '-') {
1430 if (number_pos
== max_number_size
)
1431 Error_NumericConstantTooLong ();
1432 number_builder
[number_pos
++] = '-';
1435 if (number_pos
== max_number_size
)
1436 Error_NumericConstantTooLong ();
1437 number_builder
[number_pos
++] = '+';
1444 var type
= real_type_suffix (c
);
1445 if (type
== TypeCode
.Empty
&& !is_real
){
1447 return adjust_int (c
);
1452 if (type
== TypeCode
.Empty
){
1457 return adjust_real (type
);
1459 throw new Exception ("Is Number should never reach this point");
1463 // Accepts exactly count (4 or 8) hex, no more no less
1465 int getHex (int count
, out int surrogate
, out bool error
)
1470 int top
= count
!= -1 ? count
: 4;
1475 for (i
= 0; i
< top
; i
++){
1478 if (c
>= '0' && c
<= '9')
1479 c
= (int) c
- (int) '0';
1480 else if (c
>= 'A' && c
<= 'F')
1481 c
= (int) c
- (int) 'A' + 10;
1482 else if (c
>= 'a' && c
<= 'f')
1483 c
= (int) c
- (int) 'a' + 10;
1489 total
= (total
* 16) + c
;
1491 int p
= peek_char ();
1494 if (!is_hex ((char)p
))
1500 if (total
> 0x0010FFFF) {
1505 if (total
>= 0x00010000) {
1506 surrogate
= ((total
- 0x00010000) % 0x0400 + 0xDC00);
1507 total
= ((total
- 0x00010000) / 0x0400 + 0xD800);
1514 int escape (int c
, out int surrogate
)
1550 v
= getHex (-1, out surrogate
, out error
);
1556 return EscapeUnicode (d
, out surrogate
);
1559 Report
.Error (1009, Location
, "Unrecognized escape sequence `\\{0}'", ((char)d
).ToString ());
1568 int EscapeUnicode (int ch
, out int surrogate
)
1572 ch
= getHex (8, out surrogate
, out error
);
1574 ch
= getHex (4, out surrogate
, out error
);
1578 Report
.Error (1009, Location
, "Unrecognized escape sequence");
1586 if (putback_char
!= -1) {
1599 void advance_line ()
1609 if (putback_char
== -1)
1610 putback_char
= reader
.Read ();
1611 return putback_char
;
1616 if (putback_char
!= -1)
1617 return putback_char
;
1618 return reader
.Peek ();
1621 void putback (int c
)
1623 if (putback_char
!= -1){
1624 Console
.WriteLine ("Col: " + col
);
1625 Console
.WriteLine ("Row: " + line
);
1626 Console
.WriteLine ("Name: " + ref_name
.Name
);
1627 Console
.WriteLine ("Current [{0}] putting back [{1}] ", putback_char
, c
);
1628 throw new Exception ("This should not happen putback on putback");
1630 if (c
== '\n' || col
== 0) {
1631 // It won't happen though.
1641 public bool advance ()
1643 return peek_char () != -1 || CompleteOnEOF
;
1646 public Object Value
{
1652 public Object
value ()
1659 current_token
= xtoken ();
1660 return current_token
;
1663 void get_cmd_arg (out string cmd
, out string arg
)
1667 tokens_seen
= false;
1670 // skip over white space
1673 } while (c
== '\r' || c
== ' ' || c
== '\t');
1675 static_cmd_arg
.Length
= 0;
1676 while (c
!= -1 && is_identifier_part_character ((char)c
)) {
1677 static_cmd_arg
.Append ((char)c
);
1680 int peek
= peek_char ();
1681 if (peek
== 'U' || peek
== 'u') {
1683 c
= EscapeUnicode (c
, out surrogate
);
1684 if (surrogate
!= 0) {
1685 if (is_identifier_part_character ((char) c
))
1686 static_cmd_arg
.Append ((char) c
);
1693 cmd
= static_cmd_arg
.ToString ();
1695 // skip over white space
1696 while (c
== '\r' || c
== ' ' || c
== '\t')
1699 static_cmd_arg
.Length
= 0;
1700 int has_identifier_argument
= 0;
1702 while (c
!= -1 && c
!= '\n' && c
!= '\r') {
1703 if (c
== '\\' && has_identifier_argument
>= 0) {
1704 if (has_identifier_argument
!= 0 || (cmd
== "define" || cmd
== "if" || cmd
== "elif" || cmd
== "undef")) {
1705 has_identifier_argument
= 1;
1707 int peek
= peek_char ();
1708 if (peek
== 'U' || peek
== 'u') {
1710 c
= EscapeUnicode (c
, out surrogate
);
1711 if (surrogate
!= 0) {
1712 if (is_identifier_part_character ((char) c
))
1713 static_cmd_arg
.Append ((char) c
);
1718 has_identifier_argument
= -1;
1721 static_cmd_arg
.Append ((char) c
);
1725 if (static_cmd_arg
.Length
!= 0)
1726 arg
= static_cmd_arg
.ToString ();
1730 // Handles the #line directive
1732 bool PreProcessLine (string arg
)
1734 if (arg
.Length
== 0)
1737 if (arg
== "default"){
1739 ref_name
= file_name
;
1741 Location
.Push (file_name
, ref_name
);
1743 } else if (arg
== "hidden"){
1751 if ((pos
= arg
.IndexOf (' ')) != -1 && pos
!= 0){
1752 ref_line
= System
.Int32
.Parse (arg
.Substring (0, pos
));
1755 char [] quotes
= { '\"' }
;
1757 string name
= arg
.Substring (pos
). Trim (quotes
);
1758 ref_name
= Location
.LookupFile (file_name
, name
);
1759 file_name
.AddFile (ref_name
);
1761 Location
.Push (file_name
, ref_name
);
1763 ref_line
= System
.Int32
.Parse (arg
);
1774 // Handles #define and #undef
1776 void PreProcessDefinition (bool is_define
, string ident
, bool caller_is_taking
)
1778 if (ident
.Length
== 0 || ident
== "true" || ident
== "false"){
1779 Report
.Error (1001, Location
, "Missing identifier to pre-processor directive");
1783 if (ident
.IndexOfAny (simple_whitespaces
) != -1){
1784 Error_EndLineExpected ();
1788 if (!is_identifier_start_character (ident
[0]))
1789 Report
.Error (1001, Location
, "Identifier expected: {0}", ident
);
1791 foreach (char c
in ident
.Substring (1)){
1792 if (!is_identifier_part_character (c
)){
1793 Report
.Error (1001, Location
, "Identifier expected: {0}", ident
);
1798 if (!caller_is_taking
)
1805 if (RootContext
.IsConditionalDefined (ident
))
1808 file_name
.AddDefine (ident
);
1813 file_name
.AddUndefine (ident
);
1817 static byte read_hex (string arg
, int pos
, out bool error
)
1824 if ((c
>= '0') && (c
<= '9'))
1825 total
= (int) c
- (int) '0';
1826 else if ((c
>= 'A') && (c
<= 'F'))
1827 total
= (int) c
- (int) 'A' + 10;
1828 else if ((c
>= 'a') && (c
<= 'f'))
1829 total
= (int) c
- (int) 'a' + 10;
1838 if ((c
>= '0') && (c
<= '9'))
1839 total
+= (int) c
- (int) '0';
1840 else if ((c
>= 'A') && (c
<= 'F'))
1841 total
+= (int) c
- (int) 'A' + 10;
1842 else if ((c
>= 'a') && (c
<= 'f'))
1843 total
+= (int) c
- (int) 'a' + 10;
1849 return (byte) total
;
1853 /// Handles #pragma checksum
1855 bool PreProcessPragmaChecksum (string arg
)
1857 if ((arg
[0] != ' ') && (arg
[0] != '\t'))
1860 arg
= arg
.Trim (simple_whitespaces
);
1861 if ((arg
.Length
< 2) || (arg
[0] != '"'))
1864 StringBuilder file_sb
= new StringBuilder ();
1868 while ((ch
= arg
[pos
++]) != '"') {
1869 if (pos
>= arg
.Length
)
1873 if (pos
+1 >= arg
.Length
)
1878 file_sb
.Append (ch
);
1881 if ((pos
+2 >= arg
.Length
) || ((arg
[pos
] != ' ') && (arg
[pos
] != '\t')))
1884 arg
= arg
.Substring (pos
).Trim (simple_whitespaces
);
1885 if ((arg
.Length
< 42) || (arg
[0] != '"') || (arg
[1] != '{') ||
1886 (arg
[10] != '-') || (arg
[15] != '-') || (arg
[20] != '-') ||
1887 (arg
[25] != '-') || (arg
[38] != '}') || (arg
[39] != '"'))
1891 byte[] guid_bytes
= new byte [16];
1893 for (int i
= 0; i
< 4; i
++) {
1894 guid_bytes
[i
] = read_hex (arg
, 2+2*i
, out error
);
1898 for (int i
= 0; i
< 2; i
++) {
1899 guid_bytes
[i
+4] = read_hex (arg
, 11+2*i
, out error
);
1902 guid_bytes
[i
+6] = read_hex (arg
, 16+2*i
, out error
);
1905 guid_bytes
[i
+8] = read_hex (arg
, 21+2*i
, out error
);
1910 for (int i
= 0; i
< 6; i
++) {
1911 guid_bytes
[i
+10] = read_hex (arg
, 26+2*i
, out error
);
1916 arg
= arg
.Substring (40).Trim (simple_whitespaces
);
1917 if ((arg
.Length
< 34) || (arg
[0] != '"') || (arg
[33] != '"'))
1920 byte[] checksum_bytes
= new byte [16];
1921 for (int i
= 0; i
< 16; i
++) {
1922 checksum_bytes
[i
] = read_hex (arg
, 1+2*i
, out error
);
1927 arg
= arg
.Substring (34).Trim (simple_whitespaces
);
1931 SourceFile file
= Location
.LookupFile (file_name
, file_sb
.ToString ());
1932 file
.SetChecksum (guid_bytes
, checksum_bytes
);
1933 ref_name
.AutoGenerated
= true;
1938 /// Handles #pragma directive
1940 void PreProcessPragma (string arg
)
1942 const string warning
= "warning";
1943 const string w_disable
= "warning disable";
1944 const string w_restore
= "warning restore";
1945 const string checksum
= "checksum";
1947 if (arg
== w_disable
) {
1948 Report
.RegisterWarningRegion (Location
).WarningDisable (Location
.Row
);
1952 if (arg
== w_restore
) {
1953 Report
.RegisterWarningRegion (Location
).WarningEnable (Location
.Row
);
1957 if (arg
.StartsWith (w_disable
)) {
1958 int[] codes
= ParseNumbers (arg
.Substring (w_disable
.Length
));
1959 foreach (int code
in codes
) {
1961 Report
.RegisterWarningRegion (Location
).WarningDisable (Location
, code
, Report
);
1966 if (arg
.StartsWith (w_restore
)) {
1967 int[] codes
= ParseNumbers (arg
.Substring (w_restore
.Length
));
1968 var w_table
= Report
.warning_ignore_table
;
1969 foreach (int code
in codes
) {
1970 if (w_table
!= null && w_table
.ContainsKey (code
))
1971 Report
.Warning (1635, 1, Location
, "Cannot restore warning `CS{0:0000}' because it was disabled globally", code
);
1972 Report
.RegisterWarningRegion (Location
).WarningEnable (Location
, code
, Report
);
1977 if (arg
.StartsWith (warning
)) {
1978 Report
.Warning (1634, 1, Location
, "Expected disable or restore");
1982 if (arg
.StartsWith (checksum
)) {
1983 if (!PreProcessPragmaChecksum (arg
.Substring (checksum
.Length
)))
1984 Warning_InvalidPragmaChecksum ();
1988 Report
.Warning (1633, 1, Location
, "Unrecognized #pragma directive");
1991 int[] ParseNumbers (string text
)
1993 string[] string_array
= text
.Split (',');
1994 int[] values
= new int [string_array
.Length
];
1996 foreach (string string_code
in string_array
) {
1998 values
[index
++] = int.Parse (string_code
, System
.Globalization
.CultureInfo
.InvariantCulture
);
2000 catch (FormatException
) {
2001 Report
.Warning (1692, 1, Location
, "Invalid number");
2007 bool eval_val (string s
)
2014 return file_name
.IsConditionalDefined (s
);
2017 bool pp_primary (ref string s
)
2026 s
= s
.Substring (1);
2027 bool val
= pp_expr (ref s
, false);
2028 if (s
.Length
> 0 && s
[0] == ')'){
2029 s
= s
.Substring (1);
2032 Error_InvalidDirective ();
2036 if (is_identifier_start_character (c
)){
2042 if (is_identifier_part_character (c
)){
2046 bool v
= eval_val (s
.Substring (0, j
));
2047 s
= s
.Substring (j
);
2050 bool vv
= eval_val (s
);
2055 Error_InvalidDirective ();
2059 bool pp_unary (ref string s
)
2066 if (len
> 1 && s
[1] == '='){
2067 Error_InvalidDirective ();
2070 s
= s
.Substring (1);
2071 return ! pp_primary (ref s
);
2073 return pp_primary (ref s
);
2075 Error_InvalidDirective ();
2080 bool pp_eq (ref string s
)
2082 bool va
= pp_unary (ref s
);
2088 if (len
> 2 && s
[1] == '='){
2089 s
= s
.Substring (2);
2090 return va
== pp_unary (ref s
);
2092 Error_InvalidDirective ();
2095 } else if (s
[0] == '!' && len
> 1 && s
[1] == '='){
2096 s
= s
.Substring (2);
2098 return va
!= pp_unary (ref s
);
2107 bool pp_and (ref string s
)
2109 bool va
= pp_eq (ref s
);
2115 if (len
> 2 && s
[1] == '&'){
2116 s
= s
.Substring (2);
2117 return (va
& pp_and (ref s
));
2119 Error_InvalidDirective ();
2128 // Evaluates an expression for `#if' or `#elif'
2130 bool pp_expr (ref string s
, bool isTerm
)
2132 bool va
= pp_and (ref s
);
2139 if (len
> 2 && s
[1] == '|'){
2140 s
= s
.Substring (2);
2141 return va
| pp_expr (ref s
, isTerm
);
2143 Error_InvalidDirective ();
2148 Error_EndLineExpected ();
2156 bool eval (string s
)
2158 bool v
= pp_expr (ref s
, true);
2167 void Error_NumericConstantTooLong ()
2169 Report
.Error (1021, Location
, "Numeric constant too long");
2172 void Error_InvalidDirective ()
2174 Report
.Error (1517, Location
, "Invalid preprocessor directive");
2177 void Error_UnexpectedDirective (string extra
)
2181 "Unexpected processor directive ({0})", extra
);
2184 void Error_TokensSeen ()
2186 Report
.Error (1032, Location
,
2187 "Cannot define or undefine preprocessor symbols after first token in file");
2190 void Eror_WrongPreprocessorLocation ()
2192 Report
.Error (1040, Location
,
2193 "Preprocessor directives must appear as the first non-whitespace character on a line");
2196 void Error_EndLineExpected ()
2198 Report
.Error (1025, Location
, "Single-line comment or end-of-line expected");
2201 void Warning_InvalidPragmaChecksum ()
2203 Report
.Warning (1695, 1, Location
,
2204 "Invalid #pragma checksum syntax; should be " +
2205 "#pragma checksum \"filename\" " +
2206 "\"{XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX}\" \"XXXX...\"");
2209 // if true, then the code continues processing the code
2210 // if false, the code stays in a loop until another directive is
2212 // When caller_is_taking is false we ignore all directives except the ones
2213 // which can help us to identify where the #if block ends
2214 bool handle_preprocessing_directive (bool caller_is_taking
)
2217 bool region_directive
= false;
2219 get_cmd_arg (out cmd
, out arg
);
2221 // Eat any trailing whitespaces and single-line comments
2222 if (arg
.IndexOf ("//") != -1)
2223 arg
= arg
.Substring (0, arg
.IndexOf ("//"));
2224 arg
= arg
.Trim (simple_whitespaces
);
2227 // The first group of pre-processing instructions is always processed
2231 region_directive
= true;
2236 if (ifstack
== null || ifstack
.Count
== 0){
2237 Error_UnexpectedDirective ("no #region for this #endregion");
2240 int pop
= ifstack
.Pop ();
2242 if ((pop
& REGION
) == 0)
2243 Report
.Error (1027, Location
, "Expected `#endif' directive");
2245 return caller_is_taking
;
2248 if (ifstack
== null)
2249 ifstack
= new Stack
<int> (2);
2251 int flags
= region_directive
? REGION
: 0;
2252 if (ifstack
.Count
== 0){
2253 flags
|= PARENT_TAKING
;
2255 int state
= ifstack
.Peek ();
2256 if ((state
& TAKING
) != 0) {
2257 flags
|= PARENT_TAKING
;
2261 if (caller_is_taking
&& eval (arg
)) {
2262 ifstack
.Push (flags
| TAKING
);
2265 ifstack
.Push (flags
);
2269 if (ifstack
== null || ifstack
.Count
== 0){
2270 Error_UnexpectedDirective ("no #if for this #endif");
2273 pop
= ifstack
.Pop ();
2275 if ((pop
& REGION
) != 0)
2276 Report
.Error (1038, Location
, "#endregion directive expected");
2278 if (arg
.Length
!= 0) {
2279 Error_EndLineExpected ();
2282 if (ifstack
.Count
== 0)
2285 int state
= ifstack
.Peek ();
2286 return (state
& TAKING
) != 0;
2290 if (ifstack
== null || ifstack
.Count
== 0){
2291 Error_UnexpectedDirective ("no #if for this #elif");
2294 int state
= ifstack
.Pop ();
2296 if ((state
& REGION
) != 0) {
2297 Report
.Error (1038, Location
, "#endregion directive expected");
2301 if ((state
& ELSE_SEEN
) != 0){
2302 Error_UnexpectedDirective ("#elif not valid after #else");
2306 if ((state
& TAKING
) != 0) {
2311 if (eval (arg
) && ((state
& PARENT_TAKING
) != 0)){
2312 ifstack
.Push (state
| TAKING
);
2316 ifstack
.Push (state
);
2321 if (ifstack
== null || ifstack
.Count
== 0){
2322 Error_UnexpectedDirective ("no #if for this #else");
2325 int state
= ifstack
.Peek ();
2327 if ((state
& REGION
) != 0) {
2328 Report
.Error (1038, Location
, "#endregion directive expected");
2332 if ((state
& ELSE_SEEN
) != 0){
2333 Error_UnexpectedDirective ("#else within #else");
2339 if (arg
.Length
!= 0) {
2340 Error_EndLineExpected ();
2345 if ((state
& PARENT_TAKING
) != 0) {
2346 ret
= (state
& TAKING
) == 0;
2354 ifstack
.Push (state
| ELSE_SEEN
);
2359 if (any_token_seen
){
2360 Error_TokensSeen ();
2361 return caller_is_taking
;
2363 PreProcessDefinition (true, arg
, caller_is_taking
);
2364 return caller_is_taking
;
2367 if (any_token_seen
){
2368 Error_TokensSeen ();
2369 return caller_is_taking
;
2371 PreProcessDefinition (false, arg
, caller_is_taking
);
2372 return caller_is_taking
;
2376 // These are only processed if we are in a `taking' block
2378 if (!caller_is_taking
)
2383 Report
.Error (1029, Location
, "#error: '{0}'", arg
);
2387 Report
.Warning (1030, 1, Location
, "#warning: `{0}'", arg
);
2391 if (RootContext
.Version
== LanguageVersion
.ISO_1
) {
2392 Report
.FeatureIsNotAvailable (Location
, "#pragma");
2396 PreProcessPragma (arg
);
2400 if (!PreProcessLine (arg
))
2403 "The line number specified for #line directive is missing or invalid");
2404 return caller_is_taking
;
2407 Report
.Error (1024, Location
, "Wrong preprocessor directive");
2412 private int consume_string (bool quoted
)
2415 string_builder
.Length
= 0;
2417 while ((c
= get_char ()) != -1){
2419 if (quoted
&& peek_char () == '"'){
2420 string_builder
.Append ((char) c
);
2424 val
= new StringLiteral (string_builder
.ToString (), Location
);
2425 return Token
.LITERAL
;
2431 Report
.Error (1010, Location
, "Newline in constant");
2436 c
= escape (c
, out surrogate
);
2439 if (surrogate
!= 0) {
2440 string_builder
.Append ((char) c
);
2444 string_builder
.Append ((char) c
);
2447 Report
.Error (1039, Location
, "Unterminated string literal");
2451 private int consume_identifier (int s
)
2453 int res
= consume_identifier (s
, false);
2455 if (doc_state
== XmlCommentState
.Allowed
)
2456 doc_state
= XmlCommentState
.NotAllowed
;
2461 int consume_identifier (int c
, bool quoted
)
2464 // This method is very performance sensitive. It accounts
2465 // for approximately 25% of all parser time
2473 c
= escape (c
, out surrogate
);
2474 if (surrogate
!= 0) {
2475 id_builder
[pos
++] = (char) c
;
2480 id_builder
[pos
++] = (char) c
;
2486 if ((c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z') || c
== '_' || (c
>= '0' && c
<= '9')) {
2487 id_builder
[pos
++] = (char) c
;
2494 c
= escape (c
, out surrogate
);
2495 if (surrogate
!= 0) {
2496 if (is_identifier_part_character ((char) c
))
2497 id_builder
[pos
++] = (char) c
;
2503 } else if (Char
.IsLetter ((char) c
) || Char
.GetUnicodeCategory ((char) c
) == UnicodeCategory
.ConnectorPunctuation
) {
2504 id_builder
[pos
++] = (char) c
;
2511 } catch (IndexOutOfRangeException
) {
2512 Report
.Error (645, Location
, "Identifier too long (limit is 512 chars)");
2520 // Optimization: avoids doing the keyword lookup
2521 // on uppercase letters
2523 if (id_builder
[0] >= '_' && !quoted
) {
2524 int keyword
= GetKeyword (id_builder
, pos
);
2525 if (keyword
!= -1) {
2526 val
= LocatedToken
.Create (null, ref_line
, column
);
2532 // Keep identifiers in an array of hashtables to avoid needless
2535 var identifiers_group
= identifiers
[pos
];
2537 if (identifiers_group
!= null) {
2538 if (identifiers_group
.TryGetValue (id_builder
, out s
)) {
2539 val
= LocatedToken
.Create (s
, ref_line
, column
);
2541 AddEscapedIdentifier (((LocatedToken
) val
).Location
);
2542 return Token
.IDENTIFIER
;
2545 // TODO: this should be number of files dependant
2546 // corlib compilation peaks at 1000 and System.Core at 150
2547 int capacity
= pos
> 20 ? 10 : 100;
2548 identifiers_group
= new Dictionary
<char[],string> (capacity
, new IdentifiersComparer (pos
));
2549 identifiers
[pos
] = identifiers_group
;
2552 char [] chars
= new char [pos
];
2553 Array
.Copy (id_builder
, chars
, pos
);
2555 s
= new string (id_builder
, 0, pos
);
2556 identifiers_group
.Add (chars
, s
);
2558 val
= LocatedToken
.Create (s
, ref_line
, column
);
2560 AddEscapedIdentifier (((LocatedToken
) val
).Location
);
2562 return Token
.IDENTIFIER
;
2565 public int xtoken ()
2569 // Whether we have seen comments on the current line
2570 bool comments_seen
= false;
2571 while ((c
= get_char ()) != -1) {
2574 col
= ((col
+ 8) / 8) * 8;
2582 case 0xFEFF: // Ignore BOM anywhere in the file
2585 /* This is required for compatibility with .NET
2587 if (peek_char () == 0xBB) {
2590 if (get_char () == 0xBF)
2597 if (peek_char () != '\n')
2602 any_token_seen
|= tokens_seen
;
2603 tokens_seen
= false;
2604 comments_seen
= false;
2609 return consume_identifier (c
);
2612 val
= LocatedToken
.Create (ref_line
, col
);
2613 return Token
.OPEN_BRACE
;
2615 val
= LocatedToken
.Create (ref_line
, col
);
2616 return Token
.CLOSE_BRACE
;
2618 // To block doccomment inside attribute declaration.
2619 if (doc_state
== XmlCommentState
.Allowed
)
2620 doc_state
= XmlCommentState
.NotAllowed
;
2621 val
= LocatedToken
.CreateOptional (ref_line
, col
);
2622 return Token
.OPEN_BRACKET
;
2624 val
= LocatedToken
.CreateOptional (ref_line
, col
);
2625 return Token
.CLOSE_BRACKET
;
2627 val
= LocatedToken
.Create (ref_line
, col
);
2629 // An expression versions of parens can appear in block context only
2631 if (parsing_block
!= 0 && !lambda_arguments_parsing
) {
2634 // Optmize most common case where we know that parens
2637 switch (current_token
) {
2638 case Token
.IDENTIFIER
:
2646 case Token
.DELEGATE
:
2647 case Token
.OP_GENERICS_GT
:
2648 return Token
.OPEN_PARENS
;
2651 // Optimize using peek
2652 int xx
= peek_char ();
2659 return Token
.OPEN_PARENS
;
2662 lambda_arguments_parsing
= true;
2664 d
= TokenizeOpenParens ();
2666 lambda_arguments_parsing
= false;
2670 return Token
.OPEN_PARENS
;
2672 val
= LocatedToken
.CreateOptional (ref_line
, col
);
2673 return Token
.CLOSE_PARENS
;
2675 val
= LocatedToken
.CreateOptional (ref_line
, col
);
2678 val
= LocatedToken
.CreateOptional (ref_line
, col
);
2679 return Token
.SEMICOLON
;
2681 val
= LocatedToken
.Create (ref_line
, col
);
2684 val
= LocatedToken
.Create (ref_line
, col
);
2685 return TokenizePossibleNullableType ();
2687 val
= LocatedToken
.Create (ref_line
, col
);
2688 if (parsing_generic_less_than
++ > 0)
2689 return Token
.OP_GENERICS_LT
;
2691 return TokenizeLessThan ();
2694 val
= LocatedToken
.Create (ref_line
, col
);
2702 if (parsing_generic_less_than
> 1 || (parsing_generic_less_than
== 1 && d
!= '>')) {
2703 parsing_generic_less_than
--;
2704 return Token
.OP_GENERICS_GT
;
2713 return Token
.OP_SHIFT_RIGHT_ASSIGN
;
2715 return Token
.OP_SHIFT_RIGHT
;
2721 val
= LocatedToken
.Create (ref_line
, col
);
2725 } else if (d
== '=') {
2726 d
= Token
.OP_ADD_ASSIGN
;
2734 val
= LocatedToken
.Create (ref_line
, col
);
2738 } else if (d
== '=')
2739 d
= Token
.OP_SUB_ASSIGN
;
2749 val
= LocatedToken
.Create (ref_line
, col
);
2750 if (peek_char () == '='){
2757 val
= LocatedToken
.Create (ref_line
, col
);
2768 return Token
.ASSIGN
;
2771 val
= LocatedToken
.Create (ref_line
, col
);
2775 return Token
.OP_AND
;
2779 return Token
.OP_AND_ASSIGN
;
2781 return Token
.BITWISE_AND
;
2784 val
= LocatedToken
.Create (ref_line
, col
);
2792 return Token
.OP_OR_ASSIGN
;
2794 return Token
.BITWISE_OR
;
2797 val
= LocatedToken
.Create (ref_line
, col
);
2798 if (peek_char () == '='){
2800 return Token
.OP_MULT_ASSIGN
;
2807 val
= LocatedToken
.Create (ref_line
, col
);
2809 return Token
.OP_DIV_ASSIGN
;
2812 // Handle double-slash comments.
2815 if (RootContext
.Documentation
!= null && peek_char () == '/') {
2817 // Don't allow ////.
2818 if ((d
= peek_char ()) != '/') {
2819 update_comment_location ();
2820 if (doc_state
== XmlCommentState
.Allowed
)
2821 handle_one_line_xml_comment ();
2822 else if (doc_state
== XmlCommentState
.NotAllowed
)
2823 warn_incorrect_doc_comment ();
2826 while ((d
= get_char ()) != -1 && (d
!= '\n') && d
!= '\r');
2828 any_token_seen
|= tokens_seen
;
2829 tokens_seen
= false;
2830 comments_seen
= false;
2832 } else if (d
== '*'){
2834 bool docAppend
= false;
2835 if (RootContext
.Documentation
!= null && peek_char () == '*') {
2837 update_comment_location ();
2838 // But when it is /**/, just do nothing.
2839 if (peek_char () == '/') {
2843 if (doc_state
== XmlCommentState
.Allowed
)
2845 else if (doc_state
== XmlCommentState
.NotAllowed
)
2846 warn_incorrect_doc_comment ();
2849 int current_comment_start
= 0;
2851 current_comment_start
= xml_comment_buffer
.Length
;
2852 xml_comment_buffer
.Append (Environment
.NewLine
);
2855 while ((d
= get_char ()) != -1){
2856 if (d
== '*' && peek_char () == '/'){
2858 comments_seen
= true;
2862 xml_comment_buffer
.Append ((char) d
);
2865 any_token_seen
|= tokens_seen
;
2866 tokens_seen
= false;
2868 // Reset 'comments_seen' just to be consistent.
2869 // It doesn't matter either way, here.
2871 comments_seen
= false;
2875 Report
.Error (1035, Location
, "End-of-file found, '*/' expected");
2878 update_formatted_doc_comment (current_comment_start
);
2884 val
= LocatedToken
.Create (ref_line
, col
);
2885 if (peek_char () == '='){
2887 return Token
.OP_MOD_ASSIGN
;
2889 return Token
.PERCENT
;
2892 val
= LocatedToken
.Create (ref_line
, col
);
2893 if (peek_char () == '='){
2895 return Token
.OP_XOR_ASSIGN
;
2897 return Token
.CARRET
;
2900 val
= LocatedToken
.Create (ref_line
, col
);
2901 if (peek_char () == ':') {
2903 return Token
.DOUBLE_COLON
;
2907 case '0': case '1': case '2': case '3': case '4':
2908 case '5': case '6': case '7': case '8': case '9':
2910 return is_number (c
);
2912 case '\n': // white space
2913 any_token_seen
|= tokens_seen
;
2914 tokens_seen
= false;
2915 comments_seen
= false;
2921 if (d
>= '0' && d
<= '9')
2922 return is_number (c
);
2926 if (tokens_seen
|| comments_seen
) {
2927 Eror_WrongPreprocessorLocation ();
2931 if (handle_preprocessing_directive (true))
2934 bool directive_expected
= false;
2935 while ((c
= get_char ()) != -1) {
2937 directive_expected
= true;
2938 } else if (!directive_expected
) {
2939 // TODO: Implement comment support for disabled code and uncomment this code
2941 // Eror_WrongPreprocessorLocation ();
2942 // return Token.ERROR;
2947 if (c
== ' ' || c
== '\t' || c
== '\r' || c
== '\n' || c
== '\f' || c
== '\v' )
2951 if (handle_preprocessing_directive (false))
2954 directive_expected
= false;
2958 tokens_seen
= false;
2965 return consume_string (false);
2968 return TokenizeBackslash ();
2974 return consume_string (true);
2977 if (is_identifier_start_character (c
)){
2978 return consume_identifier (c
, true);
2981 Report
.Error (1646, Location
, "Keyword, identifier, or string expected after verbatim specifier: @");
2984 case EvalStatementParserCharacter
:
2985 return Token
.EVAL_STATEMENT_PARSER
;
2986 case EvalCompilationUnitParserCharacter
:
2987 return Token
.EVAL_COMPILATION_UNIT_PARSER
;
2988 case EvalUsingDeclarationsParserCharacter
:
2989 return Token
.EVAL_USING_DECLARATIONS_UNIT_PARSER
;
2992 if (is_identifier_start_character (c
)) {
2994 return consume_identifier (c
);
2997 error_details
= ((char)c
).ToString ();
3003 return Token
.COMPLETE_COMPLETION
;
3006 return Token
.GENERATE_COMPLETION
;
3013 int TokenizeBackslash ()
3015 int c
= get_char ();
3018 error_details
= "Empty character literal";
3019 Report
.Error (1011, Location
, error_details
);
3022 if (c
== '\r' || c
== '\n') {
3023 Report
.Error (1010, Location
, "Newline in constant");
3028 c
= escape (c
, out d
);
3032 throw new NotImplementedException ();
3034 val
= new CharLiteral ((char) c
, Location
);
3038 Report
.Error (1012, Location
, "Too many characters in character literal");
3040 // Try to recover, read until newline or next "'"
3041 while ((c
= get_char ()) != -1) {
3042 if (c
== '\n' || c
== '\'')
3048 return Token
.LITERAL
;
3051 int TokenizeLessThan ()
3054 if (handle_typeof
) {
3056 if (parse_generic_dimension (out d
)) {
3059 return Token
.GENERIC_DIMENSION
;
3064 // Save current position and parse next token.
3066 if (parse_less_than ()) {
3067 if (parsing_generic_declaration
&& token () != Token
.DOT
) {
3068 d
= Token
.OP_GENERICS_LT_DECL
;
3070 d
= Token
.OP_GENERICS_LT
;
3077 parsing_generic_less_than
= 0;
3086 return Token
.OP_SHIFT_LEFT_ASSIGN
;
3088 return Token
.OP_SHIFT_LEFT
;
3099 // Handles one line xml comment
3101 private void handle_one_line_xml_comment ()
3104 while ((c
= peek_char ()) == ' ')
3105 get_char (); // skip heading whitespaces.
3106 while ((c
= peek_char ()) != -1 && c
!= '\n' && c
!= '\r') {
3107 xml_comment_buffer
.Append ((char) get_char ());
3109 if (c
== '\r' || c
== '\n')
3110 xml_comment_buffer
.Append (Environment
.NewLine
);
3114 // Remove heading "*" in Javadoc-like xml documentation.
3116 private void update_formatted_doc_comment (int current_comment_start
)
3118 int length
= xml_comment_buffer
.Length
- current_comment_start
;
3119 string [] lines
= xml_comment_buffer
.ToString (
3120 current_comment_start
,
3121 length
).Replace ("\r", "").Split ('\n');
3123 // The first line starts with /**, thus it is not target
3124 // for the format check.
3125 for (int i
= 1; i
< lines
.Length
; i
++) {
3126 string s
= lines
[i
];
3127 int idx
= s
.IndexOf ('*');
3130 if (i
< lines
.Length
- 1)
3134 head
= s
.Substring (0, idx
);
3135 foreach (char c
in head
)
3138 lines
[i
] = s
.Substring (idx
+ 1);
3140 xml_comment_buffer
.Remove (current_comment_start
, length
);
3141 xml_comment_buffer
.Insert (current_comment_start
, String
.Join (Environment
.NewLine
, lines
));
3145 // Updates current comment location.
3147 private void update_comment_location ()
3149 if (current_comment_location
.IsNull
) {
3150 // "-2" is for heading "//" or "/*"
3151 current_comment_location
=
3152 new Location (ref_line
, hidden
? -1 : col
- 2);
3157 // Checks if there was incorrect doc comments and raise
3160 public void check_incorrect_doc_comment ()
3162 if (xml_comment_buffer
.Length
> 0)
3163 warn_incorrect_doc_comment ();
3167 // Raises a warning when tokenizer found incorrect doccomment
3170 private void warn_incorrect_doc_comment ()
3172 if (doc_state
!= XmlCommentState
.Error
) {
3173 doc_state
= XmlCommentState
.Error
;
3174 // in csc, it is 'XML comment is not placed on
3175 // a valid language element'. But that does not
3177 Report
.Warning (1587, 2, Location
, "XML comment is not placed on a valid language element");
3182 // Consumes the saved xml comment lines (if any)
3183 // as for current target member or type.
3185 public string consume_doc_comment ()
3187 if (xml_comment_buffer
.Length
> 0) {
3188 string ret
= xml_comment_buffer
.ToString ();
3189 reset_doc_comment ();
3196 get { return context.Report; }
3199 void reset_doc_comment ()
3201 xml_comment_buffer
.Length
= 0;
3202 current_comment_location
= Location
.Null
;
3205 public void cleanup ()
3207 if (ifstack
!= null && ifstack
.Count
>= 1) {
3208 int state
= ifstack
.Pop ();
3209 if ((state
& REGION
) != 0)
3210 Report
.Error (1038, Location
, "#endregion directive expected");
3212 Report
.Error (1027, Location
, "Expected `#endif' directive");
3218 // Indicates whether it accepts XML documentation or not.
3220 public enum XmlCommentState
{
3221 // comment is allowed in this state.
3223 // comment is not allowed in this state.
3225 // once comments appeared when it is NotAllowed, then the
3226 // state is changed to it, until the state is changed to