2 // cs-tokenizer.cs: The Tokenizer for the C# compiler
3 // This also implements the preprocessor
5 // Author: Miguel de Icaza (miguel@gnu.org)
6 // Marek Safar (marek.safar@seznam.cz)
8 // Dual licensed under the terms of the MIT X11 or GNU GPL
10 // Copyright 2001, 2002 Ximian, Inc (http://www.ximian.com)
11 // Copyright 2004-2008 Novell, Inc
17 using System
.Collections
.Generic
;
19 using System
.Globalization
;
20 using System
.Reflection
;
25 /// Tokenizer for C# source code.
28 public class Tokenizer
: yyParser
.yyInput
32 public readonly int Token
;
33 public KeywordEntry Next
;
34 public readonly char[] Value
;
36 public KeywordEntry (string value, int token
)
38 this.Value
= value.ToCharArray ();
43 sealed class IdentifiersComparer
: IEqualityComparer
<char[]>
47 public IdentifiersComparer (int length
)
52 public bool Equals (char[] x
, char[] y
)
54 for (int i
= 0; i
< length
; ++i
)
61 public int GetHashCode (char[] obj
)
64 for (int i
= 0; i
< length
; ++i
)
65 h
= (h
<< 5) - h
+ obj
[i
];
72 // This class has to be used in the parser only, it reuses token
73 // details after each parse
75 public class LocatedToken
80 static LocatedToken
[] buffer
;
83 private LocatedToken ()
87 public static LocatedToken
Create (int row
, int column
)
89 return Create (null, row
, column
);
92 public static LocatedToken
Create (string value, int row
, int column
)
95 // TODO: I am not very happy about the logic but it's the best
96 // what I could come up with for now.
97 // Ideally we should be using just tiny buffer (256 elements) which
98 // is enough to hold all details for currect stack and recycle elements
99 // poped from the stack but there is a trick needed to recycle
103 if (pos
>= buffer
.Length
) {
104 entry
= new LocatedToken ();
106 entry
= buffer
[pos
];
108 entry
= new LocatedToken ();
109 buffer
[pos
] = entry
;
116 entry
.column
= column
;
120 public static void Initialize ()
123 buffer
= new LocatedToken
[10000];
127 public Location Location
{
128 get { return new Location (row, column); }
131 public string Value
{
132 get { return value; }
136 SeekableStreamReader reader
;
138 CompilationUnit file_name
;
139 CompilerContext context
;
146 bool handle_get_set
= false;
147 bool handle_remove_add
= false;
148 bool handle_where
= false;
149 bool handle_typeof
= false;
150 bool lambda_arguments_parsing
;
151 Location current_comment_location
= Location
.Null
;
152 List
<Location
> escaped_identifiers
;
153 int parsing_generic_less_than
;
156 // Used mainly for parser optimizations. Some expressions for instance
157 // can appear only in block (including initializer, base initializer)
160 public int parsing_block
;
161 internal bool query_parsing
;
164 // When parsing type only, useful for ambiguous nullable types
166 public int parsing_type
;
169 // Set when parsing generic declaration (type or method header)
171 public bool parsing_generic_declaration
;
174 // The value indicates that we have not reach any declaration or
177 public int parsing_declaration
;
180 // The special character to inject on streams to trigger the EXPRESSION_PARSE
181 // token to be returned. It just happens to be a Unicode character that
182 // would never be part of a program (can not be an identifier).
184 // This character is only tested just before the tokenizer is about to report
185 // an error; So on the regular operation mode, this addition will have no
186 // impact on the tokenizer's performance.
189 public const int EvalStatementParserCharacter
= 0x2190; // Unicode Left Arrow
190 public const int EvalCompilationUnitParserCharacter
= 0x2191; // Unicode Arrow
191 public const int EvalUsingDeclarationsParserCharacter
= 0x2192; // Unicode Arrow
194 // XML documentation buffer. The save point is used to divide
195 // comments on types and comments on members.
197 StringBuilder xml_comment_buffer
;
200 // See comment on XmlCommentState enumeration.
202 XmlCommentState xml_doc_state
= XmlCommentState
.Allowed
;
205 // Whether tokens have been seen on this line
207 bool tokens_seen
= false;
210 // Set to true once the GENERATE_COMPLETION token has bee
211 // returned. This helps produce one GENERATE_COMPLETION,
212 // as many COMPLETE_COMPLETION as necessary to complete the
213 // AST tree and one final EOF.
218 // Whether a token has been seen on the file
219 // This is needed because `define' is not allowed to be used
220 // after a token has been seen.
222 bool any_token_seen
= false;
224 static readonly char[] simple_whitespaces
= new char[] { ' ', '\t' }
;
226 public bool PropertyParsing
{
227 get { return handle_get_set; }
228 set { handle_get_set = value; }
231 public bool EventParsing
{
232 get { return handle_remove_add; }
233 set { handle_remove_add = value; }
236 public bool ConstraintsParsing
{
237 get { return handle_where; }
238 set { handle_where = value; }
241 public bool TypeOfParsing
{
242 get { return handle_typeof; }
243 set { handle_typeof = value; }
246 public XmlCommentState doc_state
{
247 get { return xml_doc_state; }
249 if (value == XmlCommentState
.Allowed
) {
250 check_incorrect_doc_comment ();
251 reset_doc_comment ();
253 xml_doc_state
= value;
258 // This is used to trigger completion generation on the parser
259 public bool CompleteOnEOF
;
261 void AddEscapedIdentifier (Location loc
)
263 if (escaped_identifiers
== null)
264 escaped_identifiers
= new List
<Location
> ();
266 escaped_identifiers
.Add (loc
);
269 public bool IsEscapedIdentifier (Location loc
)
271 if (escaped_identifiers
!= null) {
272 foreach (Location lt
in escaped_identifiers
)
283 static KeywordEntry
[][] keywords
;
284 static Dictionary
<string, object> keyword_strings
; // TODO: HashSet
285 static NumberStyles styles
;
286 static NumberFormatInfo csharp_format_info
;
289 // Values for the associated token returned
291 internal int putback_char
; // Used by repl only
297 const int TAKING
= 1;
298 const int ELSE_SEEN
= 4;
299 const int PARENT_TAKING
= 8;
300 const int REGION
= 16;
303 // pre-processor if stack state:
307 static System
.Text
.StringBuilder string_builder
;
309 const int max_id_size
= 512;
310 static char [] id_builder
= new char [max_id_size
];
312 public static Dictionary
<char[], string>[] identifiers
= new Dictionary
<char[], string>[max_id_size
+ 1];
314 const int max_number_size
= 512;
315 static char [] number_builder
= new char [max_number_size
];
316 static int number_pos
;
318 static StringBuilder static_cmd_arg
= new System
.Text
.StringBuilder ();
321 // Details about the error encoutered by the tokenizer
323 string error_details
;
325 public string error
{
327 return error_details
;
338 // This is used when the tokenizer needs to save
339 // the current position as it needs to do some parsing
340 // on its own to deamiguate a token in behalf of the
343 Stack
<Position
> position_stack
= new Stack
<Position
> (2);
351 public int putback_char
;
352 public int previous_col
;
353 public Stack
<int> ifstack
;
354 public int parsing_generic_less_than
;
355 public int current_token
;
357 public Position (Tokenizer t
)
359 position
= t
.reader
.Position
;
361 ref_line
= t
.ref_line
;
364 putback_char
= t
.putback_char
;
365 previous_col
= t
.previous_col
;
366 if (t
.ifstack
!= null && t
.ifstack
.Count
!= 0) {
367 // There is no simple way to clone Stack<T> all
368 // methods reverse the order
369 var clone
= t
.ifstack
.ToArray ();
370 Array
.Reverse (clone
);
371 ifstack
= new Stack
<int> (clone
);
373 parsing_generic_less_than
= t
.parsing_generic_less_than
;
374 current_token
= t
.current_token
;
378 public void PushPosition ()
380 position_stack
.Push (new Position (this));
383 public void PopPosition ()
385 Position p
= position_stack
.Pop ();
387 reader
.Position
= p
.position
;
388 ref_line
= p
.ref_line
;
392 putback_char
= p
.putback_char
;
393 previous_col
= p
.previous_col
;
395 parsing_generic_less_than
= p
.parsing_generic_less_than
;
396 current_token
= p
.current_token
;
399 // Do not reset the position, ignore it.
400 public void DiscardPosition ()
402 position_stack
.Pop ();
405 static void AddKeyword (string kw
, int token
)
407 keyword_strings
.Add (kw
, null);
409 int length
= kw
.Length
;
410 if (keywords
[length
] == null) {
411 keywords
[length
] = new KeywordEntry
['z' - '_' + 1];
414 int char_index
= kw
[0] - '_';
415 KeywordEntry kwe
= keywords
[length
] [char_index
];
417 keywords
[length
] [char_index
] = new KeywordEntry (kw
, token
);
421 while (kwe
.Next
!= null) {
425 kwe
.Next
= new KeywordEntry (kw
, token
);
428 static void InitTokens ()
430 keyword_strings
= new Dictionary
<string, object> ();
432 // 11 is the length of the longest keyword for now
433 keywords
= new KeywordEntry
[11] [];
435 AddKeyword ("__arglist", Token
.ARGLIST
);
436 AddKeyword ("abstract", Token
.ABSTRACT
);
437 AddKeyword ("as", Token
.AS
);
438 AddKeyword ("add", Token
.ADD
);
439 AddKeyword ("base", Token
.BASE
);
440 AddKeyword ("bool", Token
.BOOL
);
441 AddKeyword ("break", Token
.BREAK
);
442 AddKeyword ("byte", Token
.BYTE
);
443 AddKeyword ("case", Token
.CASE
);
444 AddKeyword ("catch", Token
.CATCH
);
445 AddKeyword ("char", Token
.CHAR
);
446 AddKeyword ("checked", Token
.CHECKED
);
447 AddKeyword ("class", Token
.CLASS
);
448 AddKeyword ("const", Token
.CONST
);
449 AddKeyword ("continue", Token
.CONTINUE
);
450 AddKeyword ("decimal", Token
.DECIMAL
);
451 AddKeyword ("default", Token
.DEFAULT
);
452 AddKeyword ("delegate", Token
.DELEGATE
);
453 AddKeyword ("do", Token
.DO
);
454 AddKeyword ("double", Token
.DOUBLE
);
455 AddKeyword ("else", Token
.ELSE
);
456 AddKeyword ("enum", Token
.ENUM
);
457 AddKeyword ("event", Token
.EVENT
);
458 AddKeyword ("explicit", Token
.EXPLICIT
);
459 AddKeyword ("extern", Token
.EXTERN
);
460 AddKeyword ("false", Token
.FALSE
);
461 AddKeyword ("finally", Token
.FINALLY
);
462 AddKeyword ("fixed", Token
.FIXED
);
463 AddKeyword ("float", Token
.FLOAT
);
464 AddKeyword ("for", Token
.FOR
);
465 AddKeyword ("foreach", Token
.FOREACH
);
466 AddKeyword ("goto", Token
.GOTO
);
467 AddKeyword ("get", Token
.GET
);
468 AddKeyword ("if", Token
.IF
);
469 AddKeyword ("implicit", Token
.IMPLICIT
);
470 AddKeyword ("in", Token
.IN
);
471 AddKeyword ("int", Token
.INT
);
472 AddKeyword ("interface", Token
.INTERFACE
);
473 AddKeyword ("internal", Token
.INTERNAL
);
474 AddKeyword ("is", Token
.IS
);
475 AddKeyword ("lock", Token
.LOCK
);
476 AddKeyword ("long", Token
.LONG
);
477 AddKeyword ("namespace", Token
.NAMESPACE
);
478 AddKeyword ("new", Token
.NEW
);
479 AddKeyword ("null", Token
.NULL
);
480 AddKeyword ("object", Token
.OBJECT
);
481 AddKeyword ("operator", Token
.OPERATOR
);
482 AddKeyword ("out", Token
.OUT
);
483 AddKeyword ("override", Token
.OVERRIDE
);
484 AddKeyword ("params", Token
.PARAMS
);
485 AddKeyword ("private", Token
.PRIVATE
);
486 AddKeyword ("protected", Token
.PROTECTED
);
487 AddKeyword ("public", Token
.PUBLIC
);
488 AddKeyword ("readonly", Token
.READONLY
);
489 AddKeyword ("ref", Token
.REF
);
490 AddKeyword ("remove", Token
.REMOVE
);
491 AddKeyword ("return", Token
.RETURN
);
492 AddKeyword ("sbyte", Token
.SBYTE
);
493 AddKeyword ("sealed", Token
.SEALED
);
494 AddKeyword ("set", Token
.SET
);
495 AddKeyword ("short", Token
.SHORT
);
496 AddKeyword ("sizeof", Token
.SIZEOF
);
497 AddKeyword ("stackalloc", Token
.STACKALLOC
);
498 AddKeyword ("static", Token
.STATIC
);
499 AddKeyword ("string", Token
.STRING
);
500 AddKeyword ("struct", Token
.STRUCT
);
501 AddKeyword ("switch", Token
.SWITCH
);
502 AddKeyword ("this", Token
.THIS
);
503 AddKeyword ("throw", Token
.THROW
);
504 AddKeyword ("true", Token
.TRUE
);
505 AddKeyword ("try", Token
.TRY
);
506 AddKeyword ("typeof", Token
.TYPEOF
);
507 AddKeyword ("uint", Token
.UINT
);
508 AddKeyword ("ulong", Token
.ULONG
);
509 AddKeyword ("unchecked", Token
.UNCHECKED
);
510 AddKeyword ("unsafe", Token
.UNSAFE
);
511 AddKeyword ("ushort", Token
.USHORT
);
512 AddKeyword ("using", Token
.USING
);
513 AddKeyword ("virtual", Token
.VIRTUAL
);
514 AddKeyword ("void", Token
.VOID
);
515 AddKeyword ("volatile", Token
.VOLATILE
);
516 AddKeyword ("while", Token
.WHILE
);
517 AddKeyword ("partial", Token
.PARTIAL
);
518 AddKeyword ("where", Token
.WHERE
);
521 AddKeyword ("from", Token
.FROM
);
522 AddKeyword ("join", Token
.JOIN
);
523 AddKeyword ("on", Token
.ON
);
524 AddKeyword ("equals", Token
.EQUALS
);
525 AddKeyword ("select", Token
.SELECT
);
526 AddKeyword ("group", Token
.GROUP
);
527 AddKeyword ("by", Token
.BY
);
528 AddKeyword ("let", Token
.LET
);
529 AddKeyword ("orderby", Token
.ORDERBY
);
530 AddKeyword ("ascending", Token
.ASCENDING
);
531 AddKeyword ("descending", Token
.DESCENDING
);
532 AddKeyword ("into", Token
.INTO
);
541 csharp_format_info
= NumberFormatInfo
.InvariantInfo
;
542 styles
= NumberStyles
.Float
;
544 string_builder
= new System
.Text
.StringBuilder ();
547 int GetKeyword (char[] id
, int id_len
)
550 // Keywords are stored in an array of arrays grouped by their
551 // length and then by the first character
553 if (id_len
>= keywords
.Length
|| keywords
[id_len
] == null)
556 int first_index
= id
[0] - '_';
557 if (first_index
> 'z')
560 KeywordEntry kwe
= keywords
[id_len
] [first_index
];
567 for (int i
= 1; i
< id_len
; ++i
) {
568 if (id
[i
] != kwe
.Value
[i
]) {
574 } while (res
== 0 && kwe
!= null);
588 if (!handle_remove_add
)
592 if (parsing_declaration
== 0)
593 res
= Token
.EXTERN_ALIAS
;
596 if (peek_token () == Token
.COLON
) {
598 res
= Token
.DEFAULT_COLON
;
602 if (!handle_where
&& !query_parsing
)
607 // A query expression is any expression that starts with `from identifier'
608 // followed by any token except ; , =
610 if (!query_parsing
) {
611 if (lambda_arguments_parsing
) {
617 // HACK: to disable generics micro-parser, because PushPosition does not
618 // store identifiers array
619 parsing_generic_less_than
= 1;
621 case Token
.IDENTIFIER
:
633 next_token
= xtoken ();
634 if (next_token
== Token
.SEMICOLON
|| next_token
== Token
.COMMA
|| next_token
== Token
.EQUALS
)
637 res
= Token
.FROM_FIRST
;
638 query_parsing
= true;
639 if (RootContext
.Version
<= LanguageVersion
.ISO_2
)
640 Report
.FeatureIsNotAvailable (Location
, "query expressions");
643 Expression
.Error_VoidInvalidInTheContext (Location
, Report
);
647 // HACK: A token is not a keyword so we need to restore identifiers buffer
648 // which has been overwritten before we grabbed the identifier
649 id_builder
[0] = 'f'; id_builder
[1] = 'r'; id_builder
[2] = 'o'; id_builder
[3] = 'm';
663 case Token
.ASCENDING
:
664 case Token
.DESCENDING
:
671 case Token
.NAMESPACE
:
672 // TODO: some explanation needed
673 check_incorrect_doc_comment ();
677 if (parsing_block
> 0) {
682 // Save current position and parse next token.
685 next_token
= token ();
686 bool ok
= (next_token
== Token
.CLASS
) ||
687 (next_token
== Token
.STRUCT
) ||
688 (next_token
== Token
.INTERFACE
) ||
689 (next_token
== Token
.VOID
);
694 if (next_token
== Token
.VOID
) {
695 if (RootContext
.Version
== LanguageVersion
.ISO_1
||
696 RootContext
.Version
== LanguageVersion
.ISO_2
)
697 Report
.FeatureIsNotAvailable (Location
, "partial methods");
698 } else if (RootContext
.Version
== LanguageVersion
.ISO_1
)
699 Report
.FeatureIsNotAvailable (Location
, "partial types");
704 if (next_token
< Token
.LAST_KEYWORD
) {
705 Report
.Error (267, Location
,
706 "The `partial' modifier can be used only immediately before `class', `struct', `interface', or `void' keyword");
717 public Location Location
{
719 return new Location (ref_line
, hidden
? -1 : col
);
723 public Tokenizer (SeekableStreamReader input
, CompilationUnit file
, CompilerContext ctx
)
725 this.ref_name
= file
;
726 this.file_name
= file
;
732 xml_comment_buffer
= new StringBuilder ();
735 // FIXME: This could be `Location.Push' but we have to
736 // find out why the MS compiler allows this
738 Mono
.CSharp
.Location
.Push (file
, file
);
741 static bool is_identifier_start_character (int c
)
743 return (c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z') || c
== '_' || Char
.IsLetter ((char)c
);
746 static bool is_identifier_part_character (char c
)
748 if (c
>= 'a' && c
<= 'z')
751 if (c
>= 'A' && c
<= 'Z')
754 if (c
== '_' || (c
>= '0' && c
<= '9'))
760 return Char
.IsLetter (c
) || Char
.GetUnicodeCategory (c
) == UnicodeCategory
.ConnectorPunctuation
;
763 public static bool IsKeyword (string s
)
765 return keyword_strings
.ContainsKey (s
);
769 // Open parens micro parser. Detects both lambda and cast ambiguity.
772 int TokenizeOpenParens ()
777 int bracket_level
= 0;
778 bool is_type
= false;
779 bool can_be_type
= false;
782 ptoken
= current_token
;
785 switch (current_token
) {
786 case Token
.CLOSE_PARENS
:
790 // Expression inside parens is lambda, (int i) =>
792 if (current_token
== Token
.ARROW
) {
793 if (RootContext
.Version
<= LanguageVersion
.ISO_2
)
794 Report
.FeatureIsNotAvailable (Location
, "lambda expressions");
796 return Token
.OPEN_PARENS_LAMBDA
;
800 // Expression inside parens is single type, (int[])
803 return Token
.OPEN_PARENS_CAST
;
806 // Expression is possible cast, look at next token, (T)null
809 switch (current_token
) {
810 case Token
.OPEN_PARENS
:
813 case Token
.IDENTIFIER
:
827 case Token
.UNCHECKED
:
832 // These can be part of a member access
846 return Token
.OPEN_PARENS_CAST
;
849 return Token
.OPEN_PARENS
;
852 case Token
.DOUBLE_COLON
:
853 if (ptoken
!= Token
.IDENTIFIER
&& ptoken
!= Token
.OP_GENERICS_GT
)
858 case Token
.IDENTIFIER
:
861 case Token
.OP_GENERICS_LT
:
863 case Token
.DOUBLE_COLON
:
865 if (bracket_level
== 0)
869 can_be_type
= is_type
= false;
889 if (bracket_level
== 0)
894 if (bracket_level
== 0) {
896 can_be_type
= is_type
= false;
900 case Token
.OP_GENERICS_LT
:
901 case Token
.OPEN_BRACKET
:
902 if (bracket_level
++ == 0)
906 case Token
.OP_GENERICS_GT
:
907 case Token
.CLOSE_BRACKET
:
911 case Token
.INTERR_NULLABLE
:
913 if (bracket_level
== 0)
919 can_be_type
= is_type
= false;
923 return Token
.OPEN_PARENS
;
928 public static bool IsValidIdentifier (string s
)
930 if (s
== null || s
.Length
== 0)
933 if (!is_identifier_start_character (s
[0]))
936 for (int i
= 1; i
< s
.Length
; i
++)
937 if (! is_identifier_part_character (s
[i
]))
943 bool parse_less_than ()
946 int the_token
= token ();
947 if (the_token
== Token
.OPEN_BRACKET
) {
949 the_token
= token ();
950 } while (the_token
!= Token
.CLOSE_BRACKET
);
951 the_token
= token ();
952 } else if (the_token
== Token
.IN
|| the_token
== Token
.OUT
) {
953 the_token
= token ();
956 case Token
.IDENTIFIER
:
974 case Token
.OP_GENERICS_GT
:
981 the_token
= token ();
983 if (the_token
== Token
.OP_GENERICS_GT
)
985 else if (the_token
== Token
.COMMA
|| the_token
== Token
.DOT
|| the_token
== Token
.DOUBLE_COLON
)
987 else if (the_token
== Token
.INTERR_NULLABLE
|| the_token
== Token
.STAR
)
989 else if (the_token
== Token
.OP_GENERICS_LT
) {
990 if (!parse_less_than ())
993 } else if (the_token
== Token
.OPEN_BRACKET
) {
995 the_token
= token ();
996 if (the_token
== Token
.CLOSE_BRACKET
)
998 else if (the_token
== Token
.COMMA
)
999 goto rank_specifiers
;
1006 bool parse_generic_dimension (out int dimension
)
1011 int the_token
= token ();
1012 if (the_token
== Token
.OP_GENERICS_GT
)
1014 else if (the_token
== Token
.COMMA
) {
1022 public int peek_token ()
1027 the_token
= token ();
1034 // Tonizes `?' using custom disambiguous rules to return one
1035 // of following tokens: INTERR_NULLABLE, OP_COALESCING, INTERR
1037 // Tricky expression look like:
1039 // Foo ? a = x ? b : c;
1041 int TokenizePossibleNullableType ()
1043 if (parsing_block
== 0 || parsing_type
> 0)
1044 return Token
.INTERR_NULLABLE
;
1046 int d
= peek_char ();
1049 return Token
.OP_COALESCING
;
1052 switch (current_token
) {
1053 case Token
.CLOSE_PARENS
:
1058 return Token
.INTERR
;
1062 if (d
== ',' || d
== ';' || d
== '>')
1063 return Token
.INTERR_NULLABLE
;
1064 if (d
== '*' || (d
>= '0' && d
<= '9'))
1065 return Token
.INTERR
;
1069 current_token
= Token
.NONE
;
1071 switch (xtoken ()) {
1078 next_token
= Token
.INTERR
;
1081 case Token
.SEMICOLON
:
1083 case Token
.CLOSE_PARENS
:
1084 case Token
.OPEN_BRACKET
:
1085 case Token
.OP_GENERICS_GT
:
1086 next_token
= Token
.INTERR_NULLABLE
;
1094 if (next_token
== -1) {
1095 switch (xtoken ()) {
1097 case Token
.SEMICOLON
:
1098 case Token
.OPEN_BRACE
:
1099 case Token
.CLOSE_PARENS
:
1101 next_token
= Token
.INTERR_NULLABLE
;
1105 next_token
= Token
.INTERR
;
1113 // All shorcuts failed, do it hard way
1115 while ((ntoken
= xtoken ()) != Token
.EOF
) {
1116 if (ntoken
== Token
.SEMICOLON
)
1119 if (ntoken
== Token
.COLON
) {
1120 if (++colons
== interrs
)
1125 if (ntoken
== Token
.INTERR
) {
1131 next_token
= colons
!= interrs
? Token
.INTERR_NULLABLE
: Token
.INTERR
;
1140 bool decimal_digits (int c
)
1143 bool seen_digits
= false;
1146 if (number_pos
== max_number_size
)
1147 Error_NumericConstantTooLong ();
1148 number_builder
[number_pos
++] = (char) c
;
1152 // We use peek_char2, because decimal_digits needs to do a
1153 // 2-character look-ahead (5.ToString for example).
1155 while ((d
= peek_char2 ()) != -1){
1156 if (d
>= '0' && d
<= '9'){
1157 if (number_pos
== max_number_size
)
1158 Error_NumericConstantTooLong ();
1159 number_builder
[number_pos
++] = (char) d
;
1169 static bool is_hex (int e
)
1171 return (e
>= '0' && e
<= '9') || (e
>= 'A' && e
<= 'F') || (e
>= 'a' && e
<= 'f');
1174 static TypeCode
real_type_suffix (int c
)
1178 return TypeCode
.Single
;
1180 return TypeCode
.Double
;
1182 return TypeCode
.Decimal
;
1184 return TypeCode
.Empty
;
1188 int integer_type_suffix (ulong ul
, int c
)
1190 bool is_unsigned
= false;
1191 bool is_long
= false;
1194 bool scanning
= true;
1207 // if we have not seen anything in between
1208 // report this error
1210 Report
.Warning (78, 4, Location
, "The 'l' suffix is easily confused with the digit '1' (use 'L' for clarity)");
1230 if (is_long
&& is_unsigned
){
1231 val
= new ULongLiteral (ul
, Location
);
1232 return Token
.LITERAL
;
1236 // uint if possible, or ulong else.
1238 if ((ul
& 0xffffffff00000000) == 0)
1239 val
= new UIntLiteral ((uint) ul
, Location
);
1241 val
= new ULongLiteral (ul
, Location
);
1242 } else if (is_long
){
1243 // long if possible, ulong otherwise
1244 if ((ul
& 0x8000000000000000) != 0)
1245 val
= new ULongLiteral (ul
, Location
);
1247 val
= new LongLiteral ((long) ul
, Location
);
1249 // int, uint, long or ulong in that order
1250 if ((ul
& 0xffffffff00000000) == 0){
1251 uint ui
= (uint) ul
;
1253 if ((ui
& 0x80000000) != 0)
1254 val
= new UIntLiteral (ui
, Location
);
1256 val
= new IntLiteral ((int) ui
, Location
);
1258 if ((ul
& 0x8000000000000000) != 0)
1259 val
= new ULongLiteral (ul
, Location
);
1261 val
= new LongLiteral ((long) ul
, Location
);
1264 return Token
.LITERAL
;
1268 // given `c' as the next char in the input decide whether
1269 // we need to convert to a special type, and then choose
1270 // the best representation for the integer
1272 int adjust_int (int c
)
1275 if (number_pos
> 9){
1276 ulong ul
= (uint) (number_builder
[0] - '0');
1278 for (int i
= 1; i
< number_pos
; i
++){
1279 ul
= checked ((ul
* 10) + ((uint)(number_builder
[i
] - '0')));
1281 return integer_type_suffix (ul
, c
);
1283 uint ui
= (uint) (number_builder
[0] - '0');
1285 for (int i
= 1; i
< number_pos
; i
++){
1286 ui
= checked ((ui
* 10) + ((uint)(number_builder
[i
] - '0')));
1288 return integer_type_suffix (ui
, c
);
1290 } catch (OverflowException
) {
1291 error_details
= "Integral constant is too large";
1292 Report
.Error (1021, Location
, error_details
);
1293 val
= new IntLiteral (0, Location
);
1294 return Token
.LITERAL
;
1296 catch (FormatException
) {
1297 Report
.Error (1013, Location
, "Invalid number");
1298 val
= new IntLiteral (0, Location
);
1299 return Token
.LITERAL
;
1303 int adjust_real (TypeCode t
)
1305 string s
= new String (number_builder
, 0, number_pos
);
1306 const string error_details
= "Floating-point constant is outside the range of type `{0}'";
1309 case TypeCode
.Decimal
:
1311 val
= new DecimalLiteral (decimal.Parse (s
, styles
, csharp_format_info
), Location
);
1312 } catch (OverflowException
) {
1313 val
= new DecimalLiteral (0, Location
);
1314 Report
.Error (594, Location
, error_details
, "decimal");
1317 case TypeCode
.Single
:
1319 val
= new FloatLiteral (float.Parse (s
, styles
, csharp_format_info
), Location
);
1320 } catch (OverflowException
) {
1321 val
= new FloatLiteral (0, Location
);
1322 Report
.Error (594, Location
, error_details
, "float");
1327 val
= new DoubleLiteral (double.Parse (s
, styles
, csharp_format_info
), Location
);
1328 } catch (OverflowException
) {
1329 val
= new DoubleLiteral (0, Location
);
1330 Report
.Error (594, Location
, error_details
, "double");
1335 return Token
.LITERAL
;
1344 while ((d
= peek_char ()) != -1){
1346 number_builder
[number_pos
++] = (char) d
;
1352 string s
= new String (number_builder
, 0, number_pos
);
1354 if (number_pos
<= 8)
1355 ul
= System
.UInt32
.Parse (s
, NumberStyles
.HexNumber
);
1357 ul
= System
.UInt64
.Parse (s
, NumberStyles
.HexNumber
);
1358 } catch (OverflowException
){
1359 error_details
= "Integral constant is too large";
1360 Report
.Error (1021, Location
, error_details
);
1361 val
= new IntLiteral (0, Location
);
1362 return Token
.LITERAL
;
1364 catch (FormatException
) {
1365 Report
.Error (1013, Location
, "Invalid number");
1366 val
= new IntLiteral (0, Location
);
1367 return Token
.LITERAL
;
1370 return integer_type_suffix (ul
, peek_char ());
1374 // Invoked if we know we have .digits or digits
1376 int is_number (int c
)
1378 bool is_real
= false;
1382 if (c
>= '0' && c
<= '9'){
1384 int peek
= peek_char ();
1386 if (peek
== 'x' || peek
== 'X')
1387 return handle_hex ();
1394 // We need to handle the case of
1395 // "1.1" vs "1.string" (LITERAL_FLOAT vs NUMBER DOT IDENTIFIER)
1398 if (decimal_digits ('.')){
1404 return adjust_int (-1);
1408 if (c
== 'e' || c
== 'E'){
1410 if (number_pos
== max_number_size
)
1411 Error_NumericConstantTooLong ();
1412 number_builder
[number_pos
++] = 'e';
1416 if (number_pos
== max_number_size
)
1417 Error_NumericConstantTooLong ();
1418 number_builder
[number_pos
++] = '+';
1420 } else if (c
== '-') {
1421 if (number_pos
== max_number_size
)
1422 Error_NumericConstantTooLong ();
1423 number_builder
[number_pos
++] = '-';
1426 if (number_pos
== max_number_size
)
1427 Error_NumericConstantTooLong ();
1428 number_builder
[number_pos
++] = '+';
1435 var type
= real_type_suffix (c
);
1436 if (type
== TypeCode
.Empty
&& !is_real
){
1438 return adjust_int (c
);
1443 if (type
== TypeCode
.Empty
){
1448 return adjust_real (type
);
1450 throw new Exception ("Is Number should never reach this point");
1454 // Accepts exactly count (4 or 8) hex, no more no less
1456 int getHex (int count
, out int surrogate
, out bool error
)
1461 int top
= count
!= -1 ? count
: 4;
1466 for (i
= 0; i
< top
; i
++){
1469 if (c
>= '0' && c
<= '9')
1470 c
= (int) c
- (int) '0';
1471 else if (c
>= 'A' && c
<= 'F')
1472 c
= (int) c
- (int) 'A' + 10;
1473 else if (c
>= 'a' && c
<= 'f')
1474 c
= (int) c
- (int) 'a' + 10;
1480 total
= (total
* 16) + c
;
1482 int p
= peek_char ();
1485 if (!is_hex ((char)p
))
1491 if (total
> 0x0010FFFF) {
1496 if (total
>= 0x00010000) {
1497 surrogate
= ((total
- 0x00010000) % 0x0400 + 0xDC00);
1498 total
= ((total
- 0x00010000) / 0x0400 + 0xD800);
1505 int escape (int c
, out int surrogate
)
1541 v
= getHex (-1, out surrogate
, out error
);
1547 return EscapeUnicode (d
, out surrogate
);
1550 Report
.Error (1009, Location
, "Unrecognized escape sequence `\\{0}'", ((char)d
).ToString ());
1559 int EscapeUnicode (int ch
, out int surrogate
)
1563 ch
= getHex (8, out surrogate
, out error
);
1565 ch
= getHex (4, out surrogate
, out error
);
1569 Report
.Error (1009, Location
, "Unrecognized escape sequence");
1577 if (putback_char
!= -1) {
1590 void advance_line ()
1600 if (putback_char
== -1)
1601 putback_char
= reader
.Read ();
1602 return putback_char
;
1607 if (putback_char
!= -1)
1608 return putback_char
;
1609 return reader
.Peek ();
1612 void putback (int c
)
1614 if (putback_char
!= -1){
1615 Console
.WriteLine ("Col: " + col
);
1616 Console
.WriteLine ("Row: " + line
);
1617 Console
.WriteLine ("Name: " + ref_name
.Name
);
1618 Console
.WriteLine ("Current [{0}] putting back [{1}] ", putback_char
, c
);
1619 throw new Exception ("This should not happen putback on putback");
1621 if (c
== '\n' || col
== 0) {
1622 // It won't happen though.
1632 public bool advance ()
1634 return peek_char () != -1 || CompleteOnEOF
;
1637 public Object Value
{
1643 public Object
value ()
1650 current_token
= xtoken ();
1651 return current_token
;
1654 void get_cmd_arg (out string cmd
, out string arg
)
1658 tokens_seen
= false;
1661 // skip over white space
1664 } while (c
== '\r' || c
== ' ' || c
== '\t');
1666 static_cmd_arg
.Length
= 0;
1667 while (c
!= -1 && is_identifier_part_character ((char)c
)) {
1668 static_cmd_arg
.Append ((char)c
);
1671 int peek
= peek_char ();
1672 if (peek
== 'U' || peek
== 'u') {
1674 c
= EscapeUnicode (c
, out surrogate
);
1675 if (surrogate
!= 0) {
1676 if (is_identifier_part_character ((char) c
))
1677 static_cmd_arg
.Append ((char) c
);
1684 cmd
= static_cmd_arg
.ToString ();
1686 // skip over white space
1687 while (c
== '\r' || c
== ' ' || c
== '\t')
1690 static_cmd_arg
.Length
= 0;
1691 int has_identifier_argument
= 0;
1693 while (c
!= -1 && c
!= '\n' && c
!= '\r') {
1694 if (c
== '\\' && has_identifier_argument
>= 0) {
1695 if (has_identifier_argument
!= 0 || (cmd
== "define" || cmd
== "if" || cmd
== "elif" || cmd
== "undef")) {
1696 has_identifier_argument
= 1;
1698 int peek
= peek_char ();
1699 if (peek
== 'U' || peek
== 'u') {
1701 c
= EscapeUnicode (c
, out surrogate
);
1702 if (surrogate
!= 0) {
1703 if (is_identifier_part_character ((char) c
))
1704 static_cmd_arg
.Append ((char) c
);
1709 has_identifier_argument
= -1;
1712 static_cmd_arg
.Append ((char) c
);
1716 if (static_cmd_arg
.Length
!= 0)
1717 arg
= static_cmd_arg
.ToString ();
1721 // Handles the #line directive
1723 bool PreProcessLine (string arg
)
1725 if (arg
.Length
== 0)
1728 if (arg
== "default"){
1730 ref_name
= file_name
;
1732 Location
.Push (file_name
, ref_name
);
1734 } else if (arg
== "hidden"){
1742 if ((pos
= arg
.IndexOf (' ')) != -1 && pos
!= 0){
1743 ref_line
= System
.Int32
.Parse (arg
.Substring (0, pos
));
1746 char [] quotes
= { '\"' }
;
1748 string name
= arg
.Substring (pos
). Trim (quotes
);
1749 ref_name
= Location
.LookupFile (file_name
, name
);
1750 file_name
.AddFile (ref_name
);
1752 Location
.Push (file_name
, ref_name
);
1754 ref_line
= System
.Int32
.Parse (arg
);
1765 // Handles #define and #undef
1767 void PreProcessDefinition (bool is_define
, string ident
, bool caller_is_taking
)
1769 if (ident
.Length
== 0 || ident
== "true" || ident
== "false"){
1770 Report
.Error (1001, Location
, "Missing identifier to pre-processor directive");
1774 if (ident
.IndexOfAny (simple_whitespaces
) != -1){
1775 Error_EndLineExpected ();
1779 if (!is_identifier_start_character (ident
[0]))
1780 Report
.Error (1001, Location
, "Identifier expected: {0}", ident
);
1782 foreach (char c
in ident
.Substring (1)){
1783 if (!is_identifier_part_character (c
)){
1784 Report
.Error (1001, Location
, "Identifier expected: {0}", ident
);
1789 if (!caller_is_taking
)
1796 if (RootContext
.IsConditionalDefined (ident
))
1799 file_name
.AddDefine (ident
);
1804 file_name
.AddUndefine (ident
);
1808 static byte read_hex (string arg
, int pos
, out bool error
)
1815 if ((c
>= '0') && (c
<= '9'))
1816 total
= (int) c
- (int) '0';
1817 else if ((c
>= 'A') && (c
<= 'F'))
1818 total
= (int) c
- (int) 'A' + 10;
1819 else if ((c
>= 'a') && (c
<= 'f'))
1820 total
= (int) c
- (int) 'a' + 10;
1829 if ((c
>= '0') && (c
<= '9'))
1830 total
+= (int) c
- (int) '0';
1831 else if ((c
>= 'A') && (c
<= 'F'))
1832 total
+= (int) c
- (int) 'A' + 10;
1833 else if ((c
>= 'a') && (c
<= 'f'))
1834 total
+= (int) c
- (int) 'a' + 10;
1840 return (byte) total
;
1844 /// Handles #pragma checksum
1846 bool PreProcessPragmaChecksum (string arg
)
1848 if ((arg
[0] != ' ') && (arg
[0] != '\t'))
1851 arg
= arg
.Trim (simple_whitespaces
);
1852 if ((arg
.Length
< 2) || (arg
[0] != '"'))
1855 StringBuilder file_sb
= new StringBuilder ();
1859 while ((ch
= arg
[pos
++]) != '"') {
1860 if (pos
>= arg
.Length
)
1864 if (pos
+1 >= arg
.Length
)
1869 file_sb
.Append (ch
);
1872 if ((pos
+2 >= arg
.Length
) || ((arg
[pos
] != ' ') && (arg
[pos
] != '\t')))
1875 arg
= arg
.Substring (pos
).Trim (simple_whitespaces
);
1876 if ((arg
.Length
< 42) || (arg
[0] != '"') || (arg
[1] != '{') ||
1877 (arg
[10] != '-') || (arg
[15] != '-') || (arg
[20] != '-') ||
1878 (arg
[25] != '-') || (arg
[38] != '}') || (arg
[39] != '"'))
1882 byte[] guid_bytes
= new byte [16];
1884 for (int i
= 0; i
< 4; i
++) {
1885 guid_bytes
[i
] = read_hex (arg
, 2+2*i
, out error
);
1889 for (int i
= 0; i
< 2; i
++) {
1890 guid_bytes
[i
+4] = read_hex (arg
, 11+2*i
, out error
);
1893 guid_bytes
[i
+6] = read_hex (arg
, 16+2*i
, out error
);
1896 guid_bytes
[i
+8] = read_hex (arg
, 21+2*i
, out error
);
1901 for (int i
= 0; i
< 6; i
++) {
1902 guid_bytes
[i
+10] = read_hex (arg
, 26+2*i
, out error
);
1907 arg
= arg
.Substring (40).Trim (simple_whitespaces
);
1908 if ((arg
.Length
< 34) || (arg
[0] != '"') || (arg
[33] != '"'))
1911 byte[] checksum_bytes
= new byte [16];
1912 for (int i
= 0; i
< 16; i
++) {
1913 checksum_bytes
[i
] = read_hex (arg
, 1+2*i
, out error
);
1918 arg
= arg
.Substring (34).Trim (simple_whitespaces
);
1922 SourceFile file
= Location
.LookupFile (file_name
, file_sb
.ToString ());
1923 file
.SetChecksum (guid_bytes
, checksum_bytes
);
1924 ref_name
.AutoGenerated
= true;
1929 /// Handles #pragma directive
1931 void PreProcessPragma (string arg
)
1933 const string warning
= "warning";
1934 const string w_disable
= "warning disable";
1935 const string w_restore
= "warning restore";
1936 const string checksum
= "checksum";
1938 if (arg
== w_disable
) {
1939 Report
.RegisterWarningRegion (Location
).WarningDisable (Location
.Row
);
1943 if (arg
== w_restore
) {
1944 Report
.RegisterWarningRegion (Location
).WarningEnable (Location
.Row
);
1948 if (arg
.StartsWith (w_disable
)) {
1949 int[] codes
= ParseNumbers (arg
.Substring (w_disable
.Length
));
1950 foreach (int code
in codes
) {
1952 Report
.RegisterWarningRegion (Location
).WarningDisable (Location
, code
, Report
);
1957 if (arg
.StartsWith (w_restore
)) {
1958 int[] codes
= ParseNumbers (arg
.Substring (w_restore
.Length
));
1959 var w_table
= Report
.warning_ignore_table
;
1960 foreach (int code
in codes
) {
1961 if (w_table
!= null && w_table
.ContainsKey (code
))
1962 Report
.Warning (1635, 1, Location
, "Cannot restore warning `CS{0:0000}' because it was disabled globally", code
);
1963 Report
.RegisterWarningRegion (Location
).WarningEnable (Location
, code
, Report
);
1968 if (arg
.StartsWith (warning
)) {
1969 Report
.Warning (1634, 1, Location
, "Expected disable or restore");
1973 if (arg
.StartsWith (checksum
)) {
1974 if (!PreProcessPragmaChecksum (arg
.Substring (checksum
.Length
)))
1975 Warning_InvalidPragmaChecksum ();
1979 Report
.Warning (1633, 1, Location
, "Unrecognized #pragma directive");
1982 int[] ParseNumbers (string text
)
1984 string[] string_array
= text
.Split (',');
1985 int[] values
= new int [string_array
.Length
];
1987 foreach (string string_code
in string_array
) {
1989 values
[index
++] = int.Parse (string_code
, System
.Globalization
.CultureInfo
.InvariantCulture
);
1991 catch (FormatException
) {
1992 Report
.Warning (1692, 1, Location
, "Invalid number");
1998 bool eval_val (string s
)
2005 return file_name
.IsConditionalDefined (s
);
2008 bool pp_primary (ref string s
)
2017 s
= s
.Substring (1);
2018 bool val
= pp_expr (ref s
, false);
2019 if (s
.Length
> 0 && s
[0] == ')'){
2020 s
= s
.Substring (1);
2023 Error_InvalidDirective ();
2027 if (is_identifier_start_character (c
)){
2033 if (is_identifier_part_character (c
)){
2037 bool v
= eval_val (s
.Substring (0, j
));
2038 s
= s
.Substring (j
);
2041 bool vv
= eval_val (s
);
2046 Error_InvalidDirective ();
2050 bool pp_unary (ref string s
)
2057 if (len
> 1 && s
[1] == '='){
2058 Error_InvalidDirective ();
2061 s
= s
.Substring (1);
2062 return ! pp_primary (ref s
);
2064 return pp_primary (ref s
);
2066 Error_InvalidDirective ();
2071 bool pp_eq (ref string s
)
2073 bool va
= pp_unary (ref s
);
2079 if (len
> 2 && s
[1] == '='){
2080 s
= s
.Substring (2);
2081 return va
== pp_unary (ref s
);
2083 Error_InvalidDirective ();
2086 } else if (s
[0] == '!' && len
> 1 && s
[1] == '='){
2087 s
= s
.Substring (2);
2089 return va
!= pp_unary (ref s
);
2098 bool pp_and (ref string s
)
2100 bool va
= pp_eq (ref s
);
2106 if (len
> 2 && s
[1] == '&'){
2107 s
= s
.Substring (2);
2108 return (va
& pp_and (ref s
));
2110 Error_InvalidDirective ();
2119 // Evaluates an expression for `#if' or `#elif'
2121 bool pp_expr (ref string s
, bool isTerm
)
2123 bool va
= pp_and (ref s
);
2130 if (len
> 2 && s
[1] == '|'){
2131 s
= s
.Substring (2);
2132 return va
| pp_expr (ref s
, isTerm
);
2134 Error_InvalidDirective ();
2139 Error_EndLineExpected ();
2147 bool eval (string s
)
2149 bool v
= pp_expr (ref s
, true);
2158 void Error_NumericConstantTooLong ()
2160 Report
.Error (1021, Location
, "Numeric constant too long");
2163 void Error_InvalidDirective ()
2165 Report
.Error (1517, Location
, "Invalid preprocessor directive");
2168 void Error_UnexpectedDirective (string extra
)
2172 "Unexpected processor directive ({0})", extra
);
2175 void Error_TokensSeen ()
2177 Report
.Error (1032, Location
,
2178 "Cannot define or undefine preprocessor symbols after first token in file");
2181 void Eror_WrongPreprocessorLocation ()
2183 Report
.Error (1040, Location
,
2184 "Preprocessor directives must appear as the first non-whitespace character on a line");
2187 void Error_EndLineExpected ()
2189 Report
.Error (1025, Location
, "Single-line comment or end-of-line expected");
2192 void Warning_InvalidPragmaChecksum ()
2194 Report
.Warning (1695, 1, Location
,
2195 "Invalid #pragma checksum syntax; should be " +
2196 "#pragma checksum \"filename\" " +
2197 "\"{XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX}\" \"XXXX...\"");
2200 // if true, then the code continues processing the code
2201 // if false, the code stays in a loop until another directive is
2203 // When caller_is_taking is false we ignore all directives except the ones
2204 // which can help us to identify where the #if block ends
2205 bool handle_preprocessing_directive (bool caller_is_taking
)
2208 bool region_directive
= false;
2210 get_cmd_arg (out cmd
, out arg
);
2212 // Eat any trailing whitespaces and single-line comments
2213 if (arg
.IndexOf ("//") != -1)
2214 arg
= arg
.Substring (0, arg
.IndexOf ("//"));
2215 arg
= arg
.Trim (simple_whitespaces
);
2218 // The first group of pre-processing instructions is always processed
2222 region_directive
= true;
2227 if (ifstack
== null || ifstack
.Count
== 0){
2228 Error_UnexpectedDirective ("no #region for this #endregion");
2231 int pop
= ifstack
.Pop ();
2233 if ((pop
& REGION
) == 0)
2234 Report
.Error (1027, Location
, "Expected `#endif' directive");
2236 return caller_is_taking
;
2239 if (ifstack
== null)
2240 ifstack
= new Stack
<int> (2);
2242 int flags
= region_directive
? REGION
: 0;
2243 if (ifstack
.Count
== 0){
2244 flags
|= PARENT_TAKING
;
2246 int state
= ifstack
.Peek ();
2247 if ((state
& TAKING
) != 0) {
2248 flags
|= PARENT_TAKING
;
2252 if (caller_is_taking
&& eval (arg
)) {
2253 ifstack
.Push (flags
| TAKING
);
2256 ifstack
.Push (flags
);
2260 if (ifstack
== null || ifstack
.Count
== 0){
2261 Error_UnexpectedDirective ("no #if for this #endif");
2264 pop
= ifstack
.Pop ();
2266 if ((pop
& REGION
) != 0)
2267 Report
.Error (1038, Location
, "#endregion directive expected");
2269 if (arg
.Length
!= 0) {
2270 Error_EndLineExpected ();
2273 if (ifstack
.Count
== 0)
2276 int state
= ifstack
.Peek ();
2277 return (state
& TAKING
) != 0;
2281 if (ifstack
== null || ifstack
.Count
== 0){
2282 Error_UnexpectedDirective ("no #if for this #elif");
2285 int state
= ifstack
.Pop ();
2287 if ((state
& REGION
) != 0) {
2288 Report
.Error (1038, Location
, "#endregion directive expected");
2292 if ((state
& ELSE_SEEN
) != 0){
2293 Error_UnexpectedDirective ("#elif not valid after #else");
2297 if ((state
& TAKING
) != 0) {
2302 if (eval (arg
) && ((state
& PARENT_TAKING
) != 0)){
2303 ifstack
.Push (state
| TAKING
);
2307 ifstack
.Push (state
);
2312 if (ifstack
== null || ifstack
.Count
== 0){
2313 Error_UnexpectedDirective ("no #if for this #else");
2316 int state
= ifstack
.Peek ();
2318 if ((state
& REGION
) != 0) {
2319 Report
.Error (1038, Location
, "#endregion directive expected");
2323 if ((state
& ELSE_SEEN
) != 0){
2324 Error_UnexpectedDirective ("#else within #else");
2330 if (arg
.Length
!= 0) {
2331 Error_EndLineExpected ();
2336 if ((state
& PARENT_TAKING
) != 0) {
2337 ret
= (state
& TAKING
) == 0;
2345 ifstack
.Push (state
| ELSE_SEEN
);
2350 if (any_token_seen
){
2351 Error_TokensSeen ();
2352 return caller_is_taking
;
2354 PreProcessDefinition (true, arg
, caller_is_taking
);
2355 return caller_is_taking
;
2358 if (any_token_seen
){
2359 Error_TokensSeen ();
2360 return caller_is_taking
;
2362 PreProcessDefinition (false, arg
, caller_is_taking
);
2363 return caller_is_taking
;
2367 // These are only processed if we are in a `taking' block
2369 if (!caller_is_taking
)
2374 Report
.Error (1029, Location
, "#error: '{0}'", arg
);
2378 Report
.Warning (1030, 1, Location
, "#warning: `{0}'", arg
);
2382 if (RootContext
.Version
== LanguageVersion
.ISO_1
) {
2383 Report
.FeatureIsNotAvailable (Location
, "#pragma");
2387 PreProcessPragma (arg
);
2391 if (!PreProcessLine (arg
))
2394 "The line number specified for #line directive is missing or invalid");
2395 return caller_is_taking
;
2398 Report
.Error (1024, Location
, "Wrong preprocessor directive");
2403 private int consume_string (bool quoted
)
2406 string_builder
.Length
= 0;
2408 while ((c
= get_char ()) != -1){
2410 if (quoted
&& peek_char () == '"'){
2411 string_builder
.Append ((char) c
);
2415 val
= new StringLiteral (string_builder
.ToString (), Location
);
2416 return Token
.LITERAL
;
2422 Report
.Error (1010, Location
, "Newline in constant");
2427 c
= escape (c
, out surrogate
);
2430 if (surrogate
!= 0) {
2431 string_builder
.Append ((char) c
);
2435 string_builder
.Append ((char) c
);
2438 Report
.Error (1039, Location
, "Unterminated string literal");
2442 private int consume_identifier (int s
)
2444 int res
= consume_identifier (s
, false);
2446 if (doc_state
== XmlCommentState
.Allowed
)
2447 doc_state
= XmlCommentState
.NotAllowed
;
2452 int consume_identifier (int c
, bool quoted
)
2455 // This method is very performance sensitive. It accounts
2456 // for approximately 25% of all parser time
2464 c
= escape (c
, out surrogate
);
2465 if (surrogate
!= 0) {
2466 id_builder
[pos
++] = (char) c
;
2471 id_builder
[pos
++] = (char) c
;
2477 if ((c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z') || c
== '_' || (c
>= '0' && c
<= '9')) {
2478 id_builder
[pos
++] = (char) c
;
2485 c
= escape (c
, out surrogate
);
2486 if (surrogate
!= 0) {
2487 if (is_identifier_part_character ((char) c
))
2488 id_builder
[pos
++] = (char) c
;
2494 } else if (Char
.IsLetter ((char) c
) || Char
.GetUnicodeCategory ((char) c
) == UnicodeCategory
.ConnectorPunctuation
) {
2495 id_builder
[pos
++] = (char) c
;
2502 } catch (IndexOutOfRangeException
) {
2503 Report
.Error (645, Location
, "Identifier too long (limit is 512 chars)");
2511 // Optimization: avoids doing the keyword lookup
2512 // on uppercase letters
2514 if (id_builder
[0] >= '_' && !quoted
) {
2515 int keyword
= GetKeyword (id_builder
, pos
);
2516 if (keyword
!= -1) {
2517 val
= LocatedToken
.Create (null, ref_line
, column
);
2523 // Keep identifiers in an array of hashtables to avoid needless
2526 var identifiers_group
= identifiers
[pos
];
2528 if (identifiers_group
!= null) {
2529 if (identifiers_group
.TryGetValue (id_builder
, out s
)) {
2530 val
= LocatedToken
.Create (s
, ref_line
, column
);
2532 AddEscapedIdentifier (((LocatedToken
) val
).Location
);
2533 return Token
.IDENTIFIER
;
2536 // TODO: this should be number of files dependant
2537 // corlib compilation peaks at 1000 and System.Core at 150
2538 int capacity
= pos
> 20 ? 10 : 100;
2539 identifiers_group
= new Dictionary
<char[],string> (capacity
, new IdentifiersComparer (pos
));
2540 identifiers
[pos
] = identifiers_group
;
2543 char [] chars
= new char [pos
];
2544 Array
.Copy (id_builder
, chars
, pos
);
2546 s
= new string (id_builder
, 0, pos
);
2547 identifiers_group
.Add (chars
, s
);
2549 val
= LocatedToken
.Create (s
, ref_line
, column
);
2551 AddEscapedIdentifier (((LocatedToken
) val
).Location
);
2553 return Token
.IDENTIFIER
;
2556 public int xtoken ()
2560 // Whether we have seen comments on the current line
2561 bool comments_seen
= false;
2562 while ((c
= get_char ()) != -1) {
2565 col
= ((col
+ 8) / 8) * 8;
2573 case 0xFEFF: // Ignore BOM anywhere in the file
2576 /* This is required for compatibility with .NET
2578 if (peek_char () == 0xBB) {
2581 if (get_char () == 0xBF)
2588 if (peek_char () != '\n')
2593 any_token_seen
|= tokens_seen
;
2594 tokens_seen
= false;
2595 comments_seen
= false;
2600 return consume_identifier (c
);
2603 val
= LocatedToken
.Create (ref_line
, col
);
2604 return Token
.OPEN_BRACE
;
2606 val
= LocatedToken
.Create (ref_line
, col
);
2607 return Token
.CLOSE_BRACE
;
2609 // To block doccomment inside attribute declaration.
2610 if (doc_state
== XmlCommentState
.Allowed
)
2611 doc_state
= XmlCommentState
.NotAllowed
;
2612 return Token
.OPEN_BRACKET
;
2614 return Token
.CLOSE_BRACKET
;
2616 val
= LocatedToken
.Create (ref_line
, col
);
2618 // An expression versions of parens can appear in block context only
2620 if (parsing_block
!= 0 && !lambda_arguments_parsing
) {
2623 // Optmize most common case where we know that parens
2626 switch (current_token
) {
2627 case Token
.IDENTIFIER
:
2635 case Token
.DELEGATE
:
2636 case Token
.OP_GENERICS_GT
:
2637 return Token
.OPEN_PARENS
;
2640 // Optimize using peek
2641 int xx
= peek_char ();
2648 return Token
.OPEN_PARENS
;
2651 lambda_arguments_parsing
= true;
2653 d
= TokenizeOpenParens ();
2655 lambda_arguments_parsing
= false;
2659 return Token
.OPEN_PARENS
;
2661 return Token
.CLOSE_PARENS
;
2665 return Token
.SEMICOLON
;
2669 return TokenizePossibleNullableType ();
2671 if (parsing_generic_less_than
++ > 0)
2672 return Token
.OP_GENERICS_LT
;
2674 return TokenizeLessThan ();
2684 if (parsing_generic_less_than
> 1 || (parsing_generic_less_than
== 1 && d
!= '>')) {
2685 parsing_generic_less_than
--;
2686 return Token
.OP_GENERICS_GT
;
2695 return Token
.OP_SHIFT_RIGHT_ASSIGN
;
2697 return Token
.OP_SHIFT_RIGHT
;
2706 } else if (d
== '=') {
2707 d
= Token
.OP_ADD_ASSIGN
;
2718 } else if (d
== '=')
2719 d
= Token
.OP_SUB_ASSIGN
;
2729 if (peek_char () == '='){
2746 return Token
.ASSIGN
;
2752 return Token
.OP_AND
;
2756 return Token
.OP_AND_ASSIGN
;
2758 return Token
.BITWISE_AND
;
2768 return Token
.OP_OR_ASSIGN
;
2770 return Token
.BITWISE_OR
;
2773 if (peek_char () == '='){
2775 return Token
.OP_MULT_ASSIGN
;
2777 val
= LocatedToken
.Create (ref_line
, col
);
2784 return Token
.OP_DIV_ASSIGN
;
2787 // Handle double-slash comments.
2790 if (RootContext
.Documentation
!= null && peek_char () == '/') {
2792 // Don't allow ////.
2793 if ((d
= peek_char ()) != '/') {
2794 update_comment_location ();
2795 if (doc_state
== XmlCommentState
.Allowed
)
2796 handle_one_line_xml_comment ();
2797 else if (doc_state
== XmlCommentState
.NotAllowed
)
2798 warn_incorrect_doc_comment ();
2801 while ((d
= get_char ()) != -1 && (d
!= '\n') && d
!= '\r');
2803 any_token_seen
|= tokens_seen
;
2804 tokens_seen
= false;
2805 comments_seen
= false;
2807 } else if (d
== '*'){
2809 bool docAppend
= false;
2810 if (RootContext
.Documentation
!= null && peek_char () == '*') {
2812 update_comment_location ();
2813 // But when it is /**/, just do nothing.
2814 if (peek_char () == '/') {
2818 if (doc_state
== XmlCommentState
.Allowed
)
2820 else if (doc_state
== XmlCommentState
.NotAllowed
)
2821 warn_incorrect_doc_comment ();
2824 int current_comment_start
= 0;
2826 current_comment_start
= xml_comment_buffer
.Length
;
2827 xml_comment_buffer
.Append (Environment
.NewLine
);
2830 while ((d
= get_char ()) != -1){
2831 if (d
== '*' && peek_char () == '/'){
2833 comments_seen
= true;
2837 xml_comment_buffer
.Append ((char) d
);
2840 any_token_seen
|= tokens_seen
;
2841 tokens_seen
= false;
2843 // Reset 'comments_seen' just to be consistent.
2844 // It doesn't matter either way, here.
2846 comments_seen
= false;
2850 Report
.Error (1035, Location
, "End-of-file found, '*/' expected");
2853 update_formatted_doc_comment (current_comment_start
);
2859 if (peek_char () == '='){
2861 return Token
.OP_MOD_ASSIGN
;
2863 return Token
.PERCENT
;
2866 if (peek_char () == '='){
2868 return Token
.OP_XOR_ASSIGN
;
2870 return Token
.CARRET
;
2873 if (peek_char () == ':') {
2875 return Token
.DOUBLE_COLON
;
2879 case '0': case '1': case '2': case '3': case '4':
2880 case '5': case '6': case '7': case '8': case '9':
2882 return is_number (c
);
2884 case '\n': // white space
2885 any_token_seen
|= tokens_seen
;
2886 tokens_seen
= false;
2887 comments_seen
= false;
2893 if (d
>= '0' && d
<= '9')
2894 return is_number (c
);
2898 if (tokens_seen
|| comments_seen
) {
2899 Eror_WrongPreprocessorLocation ();
2903 if (handle_preprocessing_directive (true))
2906 bool directive_expected
= false;
2907 while ((c
= get_char ()) != -1) {
2909 directive_expected
= true;
2910 } else if (!directive_expected
) {
2911 // TODO: Implement comment support for disabled code and uncomment this code
2913 // Eror_WrongPreprocessorLocation ();
2914 // return Token.ERROR;
2919 if (c
== ' ' || c
== '\t' || c
== '\r' || c
== '\n' || c
== '\f' || c
== '\v' )
2923 if (handle_preprocessing_directive (false))
2926 directive_expected
= false;
2930 tokens_seen
= false;
2937 return consume_string (false);
2940 return TokenizeBackslash ();
2946 return consume_string (true);
2949 if (is_identifier_start_character (c
)){
2950 return consume_identifier (c
, true);
2953 Report
.Error (1646, Location
, "Keyword, identifier, or string expected after verbatim specifier: @");
2956 case EvalStatementParserCharacter
:
2957 return Token
.EVAL_STATEMENT_PARSER
;
2958 case EvalCompilationUnitParserCharacter
:
2959 return Token
.EVAL_COMPILATION_UNIT_PARSER
;
2960 case EvalUsingDeclarationsParserCharacter
:
2961 return Token
.EVAL_USING_DECLARATIONS_UNIT_PARSER
;
2964 if (is_identifier_start_character (c
)) {
2966 return consume_identifier (c
);
2969 error_details
= ((char)c
).ToString ();
2975 return Token
.COMPLETE_COMPLETION
;
2978 return Token
.GENERATE_COMPLETION
;
2985 int TokenizeBackslash ()
2987 int c
= get_char ();
2990 error_details
= "Empty character literal";
2991 Report
.Error (1011, Location
, error_details
);
2994 if (c
== '\r' || c
== '\n') {
2995 Report
.Error (1010, Location
, "Newline in constant");
3000 c
= escape (c
, out d
);
3004 throw new NotImplementedException ();
3006 val
= new CharLiteral ((char) c
, Location
);
3010 Report
.Error (1012, Location
, "Too many characters in character literal");
3012 // Try to recover, read until newline or next "'"
3013 while ((c
= get_char ()) != -1) {
3014 if (c
== '\n' || c
== '\'')
3020 return Token
.LITERAL
;
3023 int TokenizeLessThan ()
3026 if (handle_typeof
) {
3028 if (parse_generic_dimension (out d
)) {
3031 return Token
.GENERIC_DIMENSION
;
3036 // Save current position and parse next token.
3038 if (parse_less_than ()) {
3039 if (parsing_generic_declaration
&& token () != Token
.DOT
) {
3040 d
= Token
.OP_GENERICS_LT_DECL
;
3042 d
= Token
.OP_GENERICS_LT
;
3049 parsing_generic_less_than
= 0;
3058 return Token
.OP_SHIFT_LEFT_ASSIGN
;
3060 return Token
.OP_SHIFT_LEFT
;
3071 // Handles one line xml comment
3073 private void handle_one_line_xml_comment ()
3076 while ((c
= peek_char ()) == ' ')
3077 get_char (); // skip heading whitespaces.
3078 while ((c
= peek_char ()) != -1 && c
!= '\n' && c
!= '\r') {
3079 xml_comment_buffer
.Append ((char) get_char ());
3081 if (c
== '\r' || c
== '\n')
3082 xml_comment_buffer
.Append (Environment
.NewLine
);
3086 // Remove heading "*" in Javadoc-like xml documentation.
3088 private void update_formatted_doc_comment (int current_comment_start
)
3090 int length
= xml_comment_buffer
.Length
- current_comment_start
;
3091 string [] lines
= xml_comment_buffer
.ToString (
3092 current_comment_start
,
3093 length
).Replace ("\r", "").Split ('\n');
3095 // The first line starts with /**, thus it is not target
3096 // for the format check.
3097 for (int i
= 1; i
< lines
.Length
; i
++) {
3098 string s
= lines
[i
];
3099 int idx
= s
.IndexOf ('*');
3102 if (i
< lines
.Length
- 1)
3106 head
= s
.Substring (0, idx
);
3107 foreach (char c
in head
)
3110 lines
[i
] = s
.Substring (idx
+ 1);
3112 xml_comment_buffer
.Remove (current_comment_start
, length
);
3113 xml_comment_buffer
.Insert (current_comment_start
, String
.Join (Environment
.NewLine
, lines
));
3117 // Updates current comment location.
3119 private void update_comment_location ()
3121 if (current_comment_location
.IsNull
) {
3122 // "-2" is for heading "//" or "/*"
3123 current_comment_location
=
3124 new Location (ref_line
, hidden
? -1 : col
- 2);
3129 // Checks if there was incorrect doc comments and raise
3132 public void check_incorrect_doc_comment ()
3134 if (xml_comment_buffer
.Length
> 0)
3135 warn_incorrect_doc_comment ();
3139 // Raises a warning when tokenizer found incorrect doccomment
3142 private void warn_incorrect_doc_comment ()
3144 if (doc_state
!= XmlCommentState
.Error
) {
3145 doc_state
= XmlCommentState
.Error
;
3146 // in csc, it is 'XML comment is not placed on
3147 // a valid language element'. But that does not
3149 Report
.Warning (1587, 2, Location
, "XML comment is not placed on a valid language element");
3154 // Consumes the saved xml comment lines (if any)
3155 // as for current target member or type.
3157 public string consume_doc_comment ()
3159 if (xml_comment_buffer
.Length
> 0) {
3160 string ret
= xml_comment_buffer
.ToString ();
3161 reset_doc_comment ();
3168 get { return context.Report; }
3171 void reset_doc_comment ()
3173 xml_comment_buffer
.Length
= 0;
3174 current_comment_location
= Location
.Null
;
3177 public void cleanup ()
3179 if (ifstack
!= null && ifstack
.Count
>= 1) {
3180 int state
= ifstack
.Pop ();
3181 if ((state
& REGION
) != 0)
3182 Report
.Error (1038, Location
, "#endregion directive expected");
3184 Report
.Error (1027, Location
, "Expected `#endif' directive");
3190 // Indicates whether it accepts XML documentation or not.
3192 public enum XmlCommentState
{
3193 // comment is allowed in this state.
3195 // comment is not allowed in this state.
3197 // once comments appeared when it is NotAllowed, then the
3198 // state is changed to it, until the state is changed to