2 // cs-tokenizer.cs: The Tokenizer for the C# compiler
3 // This also implements the preprocessor
5 // Author: Miguel de Icaza (miguel@gnu.org)
6 // Marek Safar (marek.safar@seznam.cz)
8 // Dual licensed under the terms of the MIT X11 or GNU GPL
10 // Copyright 2001, 2002 Ximian, Inc (http://www.ximian.com)
11 // Copyright 2004-2008 Novell, Inc
17 using System
.Collections
.Generic
;
19 using System
.Globalization
;
20 using System
.Reflection
;
25 /// Tokenizer for C# source code.
28 public class Tokenizer
: yyParser
.yyInput
32 public readonly int Token
;
33 public KeywordEntry Next
;
34 public readonly char[] Value
;
36 public KeywordEntry (string value, int token
)
38 this.Value
= value.ToCharArray ();
43 sealed class IdentifiersComparer
: IEqualityComparer
<char[]>
47 public IdentifiersComparer (int length
)
52 public bool Equals (char[] x
, char[] y
)
54 for (int i
= 0; i
< length
; ++i
)
61 public int GetHashCode (char[] obj
)
64 for (int i
= 0; i
< length
; ++i
)
65 h
= (h
<< 5) - h
+ obj
[i
];
72 // This class has to be used in the parser only, it reuses token
73 // details after each parse
75 public class LocatedToken
80 static LocatedToken
[] buffer
;
83 private LocatedToken ()
87 public static LocatedToken
Create (int row
, int column
)
89 return Create (null, row
, column
);
92 public static LocatedToken
Create (string value, int row
, int column
)
95 // TODO: I am not very happy about the logic but it's the best
96 // what I could come up with for now.
97 // Ideally we should be using just tiny buffer (256 elements) which
98 // is enough to hold all details for currect stack and recycle elements
99 // poped from the stack but there is a trick needed to recycle
103 if (pos
>= buffer
.Length
) {
104 entry
= new LocatedToken ();
106 entry
= buffer
[pos
];
108 entry
= new LocatedToken ();
109 buffer
[pos
] = entry
;
116 entry
.column
= column
;
121 // Used for token not required by expression evaluator
123 public static LocatedToken
CreateOptional (int row
, int col
)
126 return Create (row
, col
);
131 public static void Initialize ()
134 buffer
= new LocatedToken
[10000];
138 public Location Location
{
139 get { return new Location (row, column); }
142 public string Value
{
143 get { return value; }
147 SeekableStreamReader reader
;
149 CompilationUnit file_name
;
150 CompilerContext context
;
158 bool handle_get_set
= false;
159 bool handle_remove_add
= false;
160 bool handle_where
= false;
161 bool handle_typeof
= false;
162 bool lambda_arguments_parsing
;
163 Location current_comment_location
= Location
.Null
;
164 List
<Location
> escaped_identifiers
;
165 int parsing_generic_less_than
;
168 // Used mainly for parser optimizations. Some expressions for instance
169 // can appear only in block (including initializer, base initializer)
172 public int parsing_block
;
173 internal bool query_parsing
;
176 // When parsing type only, useful for ambiguous nullable types
178 public int parsing_type
;
181 // Set when parsing generic declaration (type or method header)
183 public bool parsing_generic_declaration
;
186 // The value indicates that we have not reach any declaration or
189 public int parsing_declaration
;
192 // The special character to inject on streams to trigger the EXPRESSION_PARSE
193 // token to be returned. It just happens to be a Unicode character that
194 // would never be part of a program (can not be an identifier).
196 // This character is only tested just before the tokenizer is about to report
197 // an error; So on the regular operation mode, this addition will have no
198 // impact on the tokenizer's performance.
201 public const int EvalStatementParserCharacter
= 0x2190; // Unicode Left Arrow
202 public const int EvalCompilationUnitParserCharacter
= 0x2191; // Unicode Arrow
203 public const int EvalUsingDeclarationsParserCharacter
= 0x2192; // Unicode Arrow
206 // XML documentation buffer. The save point is used to divide
207 // comments on types and comments on members.
209 StringBuilder xml_comment_buffer
;
212 // See comment on XmlCommentState enumeration.
214 XmlCommentState xml_doc_state
= XmlCommentState
.Allowed
;
217 // Whether tokens have been seen on this line
219 bool tokens_seen
= false;
222 // Set to true once the GENERATE_COMPLETION token has bee
223 // returned. This helps produce one GENERATE_COMPLETION,
224 // as many COMPLETE_COMPLETION as necessary to complete the
225 // AST tree and one final EOF.
230 // Whether a token has been seen on the file
231 // This is needed because `define' is not allowed to be used
232 // after a token has been seen.
234 bool any_token_seen
= false;
236 static readonly char[] simple_whitespaces
= new char[] { ' ', '\t' }
;
238 public bool PropertyParsing
{
239 get { return handle_get_set; }
240 set { handle_get_set = value; }
243 public bool EventParsing
{
244 get { return handle_remove_add; }
245 set { handle_remove_add = value; }
248 public bool ConstraintsParsing
{
249 get { return handle_where; }
250 set { handle_where = value; }
253 public bool TypeOfParsing
{
254 get { return handle_typeof; }
255 set { handle_typeof = value; }
259 get { return tab_size; }
260 set { tab_size = value; }
263 public XmlCommentState doc_state
{
264 get { return xml_doc_state; }
266 if (value == XmlCommentState
.Allowed
) {
267 check_incorrect_doc_comment ();
268 reset_doc_comment ();
270 xml_doc_state
= value;
275 // This is used to trigger completion generation on the parser
276 public bool CompleteOnEOF
;
278 void AddEscapedIdentifier (Location loc
)
280 if (escaped_identifiers
== null)
281 escaped_identifiers
= new List
<Location
> ();
283 escaped_identifiers
.Add (loc
);
286 public bool IsEscapedIdentifier (Location loc
)
288 if (escaped_identifiers
!= null) {
289 foreach (Location lt
in escaped_identifiers
)
300 static KeywordEntry
[][] keywords
;
301 static Dictionary
<string, object> keyword_strings
; // TODO: HashSet
302 static NumberStyles styles
;
303 static NumberFormatInfo csharp_format_info
;
306 // Values for the associated token returned
308 internal int putback_char
; // Used by repl only
314 const int TAKING
= 1;
315 const int ELSE_SEEN
= 4;
316 const int PARENT_TAKING
= 8;
317 const int REGION
= 16;
320 // pre-processor if stack state:
324 static System
.Text
.StringBuilder string_builder
;
326 const int max_id_size
= 512;
327 static char [] id_builder
= new char [max_id_size
];
329 public static Dictionary
<char[], string>[] identifiers
= new Dictionary
<char[], string>[max_id_size
+ 1];
331 const int max_number_size
= 512;
332 static char [] number_builder
= new char [max_number_size
];
333 static int number_pos
;
335 static StringBuilder static_cmd_arg
= new System
.Text
.StringBuilder ();
338 // Details about the error encoutered by the tokenizer
340 string error_details
;
342 public string error
{
344 return error_details
;
355 // This is used when the tokenizer needs to save
356 // the current position as it needs to do some parsing
357 // on its own to deamiguate a token in behalf of the
360 Stack
<Position
> position_stack
= new Stack
<Position
> (2);
368 public int putback_char
;
369 public int previous_col
;
370 public Stack
<int> ifstack
;
371 public int parsing_generic_less_than
;
372 public int current_token
;
375 public Position (Tokenizer t
)
377 position
= t
.reader
.Position
;
379 ref_line
= t
.ref_line
;
382 putback_char
= t
.putback_char
;
383 previous_col
= t
.previous_col
;
384 if (t
.ifstack
!= null && t
.ifstack
.Count
!= 0) {
385 // There is no simple way to clone Stack<T> all
386 // methods reverse the order
387 var clone
= t
.ifstack
.ToArray ();
388 Array
.Reverse (clone
);
389 ifstack
= new Stack
<int> (clone
);
391 parsing_generic_less_than
= t
.parsing_generic_less_than
;
392 current_token
= t
.current_token
;
397 public void PushPosition ()
399 position_stack
.Push (new Position (this));
402 public void PopPosition ()
404 Position p
= position_stack
.Pop ();
406 reader
.Position
= p
.position
;
407 ref_line
= p
.ref_line
;
411 putback_char
= p
.putback_char
;
412 previous_col
= p
.previous_col
;
414 parsing_generic_less_than
= p
.parsing_generic_less_than
;
415 current_token
= p
.current_token
;
419 // Do not reset the position, ignore it.
420 public void DiscardPosition ()
422 position_stack
.Pop ();
425 static void AddKeyword (string kw
, int token
)
427 keyword_strings
.Add (kw
, null);
429 int length
= kw
.Length
;
430 if (keywords
[length
] == null) {
431 keywords
[length
] = new KeywordEntry
['z' - '_' + 1];
434 int char_index
= kw
[0] - '_';
435 KeywordEntry kwe
= keywords
[length
] [char_index
];
437 keywords
[length
] [char_index
] = new KeywordEntry (kw
, token
);
441 while (kwe
.Next
!= null) {
445 kwe
.Next
= new KeywordEntry (kw
, token
);
448 static void InitTokens ()
450 keyword_strings
= new Dictionary
<string, object> ();
452 // 11 is the length of the longest keyword for now
453 keywords
= new KeywordEntry
[11] [];
455 AddKeyword ("__arglist", Token
.ARGLIST
);
456 AddKeyword ("abstract", Token
.ABSTRACT
);
457 AddKeyword ("as", Token
.AS
);
458 AddKeyword ("add", Token
.ADD
);
459 AddKeyword ("base", Token
.BASE
);
460 AddKeyword ("bool", Token
.BOOL
);
461 AddKeyword ("break", Token
.BREAK
);
462 AddKeyword ("byte", Token
.BYTE
);
463 AddKeyword ("case", Token
.CASE
);
464 AddKeyword ("catch", Token
.CATCH
);
465 AddKeyword ("char", Token
.CHAR
);
466 AddKeyword ("checked", Token
.CHECKED
);
467 AddKeyword ("class", Token
.CLASS
);
468 AddKeyword ("const", Token
.CONST
);
469 AddKeyword ("continue", Token
.CONTINUE
);
470 AddKeyword ("decimal", Token
.DECIMAL
);
471 AddKeyword ("default", Token
.DEFAULT
);
472 AddKeyword ("delegate", Token
.DELEGATE
);
473 AddKeyword ("do", Token
.DO
);
474 AddKeyword ("double", Token
.DOUBLE
);
475 AddKeyword ("else", Token
.ELSE
);
476 AddKeyword ("enum", Token
.ENUM
);
477 AddKeyword ("event", Token
.EVENT
);
478 AddKeyword ("explicit", Token
.EXPLICIT
);
479 AddKeyword ("extern", Token
.EXTERN
);
480 AddKeyword ("false", Token
.FALSE
);
481 AddKeyword ("finally", Token
.FINALLY
);
482 AddKeyword ("fixed", Token
.FIXED
);
483 AddKeyword ("float", Token
.FLOAT
);
484 AddKeyword ("for", Token
.FOR
);
485 AddKeyword ("foreach", Token
.FOREACH
);
486 AddKeyword ("goto", Token
.GOTO
);
487 AddKeyword ("get", Token
.GET
);
488 AddKeyword ("if", Token
.IF
);
489 AddKeyword ("implicit", Token
.IMPLICIT
);
490 AddKeyword ("in", Token
.IN
);
491 AddKeyword ("int", Token
.INT
);
492 AddKeyword ("interface", Token
.INTERFACE
);
493 AddKeyword ("internal", Token
.INTERNAL
);
494 AddKeyword ("is", Token
.IS
);
495 AddKeyword ("lock", Token
.LOCK
);
496 AddKeyword ("long", Token
.LONG
);
497 AddKeyword ("namespace", Token
.NAMESPACE
);
498 AddKeyword ("new", Token
.NEW
);
499 AddKeyword ("null", Token
.NULL
);
500 AddKeyword ("object", Token
.OBJECT
);
501 AddKeyword ("operator", Token
.OPERATOR
);
502 AddKeyword ("out", Token
.OUT
);
503 AddKeyword ("override", Token
.OVERRIDE
);
504 AddKeyword ("params", Token
.PARAMS
);
505 AddKeyword ("private", Token
.PRIVATE
);
506 AddKeyword ("protected", Token
.PROTECTED
);
507 AddKeyword ("public", Token
.PUBLIC
);
508 AddKeyword ("readonly", Token
.READONLY
);
509 AddKeyword ("ref", Token
.REF
);
510 AddKeyword ("remove", Token
.REMOVE
);
511 AddKeyword ("return", Token
.RETURN
);
512 AddKeyword ("sbyte", Token
.SBYTE
);
513 AddKeyword ("sealed", Token
.SEALED
);
514 AddKeyword ("set", Token
.SET
);
515 AddKeyword ("short", Token
.SHORT
);
516 AddKeyword ("sizeof", Token
.SIZEOF
);
517 AddKeyword ("stackalloc", Token
.STACKALLOC
);
518 AddKeyword ("static", Token
.STATIC
);
519 AddKeyword ("string", Token
.STRING
);
520 AddKeyword ("struct", Token
.STRUCT
);
521 AddKeyword ("switch", Token
.SWITCH
);
522 AddKeyword ("this", Token
.THIS
);
523 AddKeyword ("throw", Token
.THROW
);
524 AddKeyword ("true", Token
.TRUE
);
525 AddKeyword ("try", Token
.TRY
);
526 AddKeyword ("typeof", Token
.TYPEOF
);
527 AddKeyword ("uint", Token
.UINT
);
528 AddKeyword ("ulong", Token
.ULONG
);
529 AddKeyword ("unchecked", Token
.UNCHECKED
);
530 AddKeyword ("unsafe", Token
.UNSAFE
);
531 AddKeyword ("ushort", Token
.USHORT
);
532 AddKeyword ("using", Token
.USING
);
533 AddKeyword ("virtual", Token
.VIRTUAL
);
534 AddKeyword ("void", Token
.VOID
);
535 AddKeyword ("volatile", Token
.VOLATILE
);
536 AddKeyword ("while", Token
.WHILE
);
537 AddKeyword ("partial", Token
.PARTIAL
);
538 AddKeyword ("where", Token
.WHERE
);
541 AddKeyword ("from", Token
.FROM
);
542 AddKeyword ("join", Token
.JOIN
);
543 AddKeyword ("on", Token
.ON
);
544 AddKeyword ("equals", Token
.EQUALS
);
545 AddKeyword ("select", Token
.SELECT
);
546 AddKeyword ("group", Token
.GROUP
);
547 AddKeyword ("by", Token
.BY
);
548 AddKeyword ("let", Token
.LET
);
549 AddKeyword ("orderby", Token
.ORDERBY
);
550 AddKeyword ("ascending", Token
.ASCENDING
);
551 AddKeyword ("descending", Token
.DESCENDING
);
552 AddKeyword ("into", Token
.INTO
);
561 csharp_format_info
= NumberFormatInfo
.InvariantInfo
;
562 styles
= NumberStyles
.Float
;
564 string_builder
= new System
.Text
.StringBuilder ();
567 int GetKeyword (char[] id
, int id_len
)
570 // Keywords are stored in an array of arrays grouped by their
571 // length and then by the first character
573 if (id_len
>= keywords
.Length
|| keywords
[id_len
] == null)
576 int first_index
= id
[0] - '_';
577 if (first_index
> 'z')
580 KeywordEntry kwe
= keywords
[id_len
] [first_index
];
587 for (int i
= 1; i
< id_len
; ++i
) {
588 if (id
[i
] != kwe
.Value
[i
]) {
594 } while (res
== 0 && kwe
!= null);
608 if (!handle_remove_add
)
612 if (parsing_declaration
== 0)
613 res
= Token
.EXTERN_ALIAS
;
616 if (peek_token () == Token
.COLON
) {
618 res
= Token
.DEFAULT_COLON
;
622 if (!handle_where
&& !query_parsing
)
627 // A query expression is any expression that starts with `from identifier'
628 // followed by any token except ; , =
630 if (!query_parsing
) {
631 if (lambda_arguments_parsing
) {
637 // HACK: to disable generics micro-parser, because PushPosition does not
638 // store identifiers array
639 parsing_generic_less_than
= 1;
641 case Token
.IDENTIFIER
:
653 next_token
= xtoken ();
654 if (next_token
== Token
.SEMICOLON
|| next_token
== Token
.COMMA
|| next_token
== Token
.EQUALS
)
657 res
= Token
.FROM_FIRST
;
658 query_parsing
= true;
659 if (RootContext
.Version
<= LanguageVersion
.ISO_2
)
660 Report
.FeatureIsNotAvailable (Location
, "query expressions");
663 Expression
.Error_VoidInvalidInTheContext (Location
, Report
);
667 // HACK: A token is not a keyword so we need to restore identifiers buffer
668 // which has been overwritten before we grabbed the identifier
669 id_builder
[0] = 'f'; id_builder
[1] = 'r'; id_builder
[2] = 'o'; id_builder
[3] = 'm';
683 case Token
.ASCENDING
:
684 case Token
.DESCENDING
:
691 case Token
.NAMESPACE
:
692 // TODO: some explanation needed
693 check_incorrect_doc_comment ();
697 if (parsing_block
> 0) {
702 // Save current position and parse next token.
705 next_token
= token ();
706 bool ok
= (next_token
== Token
.CLASS
) ||
707 (next_token
== Token
.STRUCT
) ||
708 (next_token
== Token
.INTERFACE
) ||
709 (next_token
== Token
.VOID
);
714 if (next_token
== Token
.VOID
) {
715 if (RootContext
.Version
== LanguageVersion
.ISO_1
||
716 RootContext
.Version
== LanguageVersion
.ISO_2
)
717 Report
.FeatureIsNotAvailable (Location
, "partial methods");
718 } else if (RootContext
.Version
== LanguageVersion
.ISO_1
)
719 Report
.FeatureIsNotAvailable (Location
, "partial types");
724 if (next_token
< Token
.LAST_KEYWORD
) {
725 Report
.Error (267, Location
,
726 "The `partial' modifier can be used only immediately before `class', `struct', `interface', or `void' keyword");
737 public Location Location
{
739 return new Location (ref_line
, hidden
? -1 : col
);
743 public Tokenizer (SeekableStreamReader input
, CompilationUnit file
, CompilerContext ctx
)
745 this.ref_name
= file
;
746 this.file_name
= file
;
752 xml_comment_buffer
= new StringBuilder ();
754 if (Environment
.OSVersion
.Platform
== PlatformID
.Win32NT
)
760 // FIXME: This could be `Location.Push' but we have to
761 // find out why the MS compiler allows this
763 Mono
.CSharp
.Location
.Push (file
, file
);
766 static bool is_identifier_start_character (int c
)
768 return (c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z') || c
== '_' || Char
.IsLetter ((char)c
);
771 static bool is_identifier_part_character (char c
)
773 if (c
>= 'a' && c
<= 'z')
776 if (c
>= 'A' && c
<= 'Z')
779 if (c
== '_' || (c
>= '0' && c
<= '9'))
785 return Char
.IsLetter (c
) || Char
.GetUnicodeCategory (c
) == UnicodeCategory
.ConnectorPunctuation
;
788 public static bool IsKeyword (string s
)
790 return keyword_strings
.ContainsKey (s
);
794 // Open parens micro parser. Detects both lambda and cast ambiguity.
796 int TokenizeOpenParens ()
801 int bracket_level
= 0;
802 bool is_type
= false;
803 bool can_be_type
= false;
806 ptoken
= current_token
;
809 switch (current_token
) {
810 case Token
.CLOSE_PARENS
:
814 // Expression inside parens is lambda, (int i) =>
816 if (current_token
== Token
.ARROW
)
817 return Token
.OPEN_PARENS_LAMBDA
;
820 // Expression inside parens is single type, (int[])
823 return Token
.OPEN_PARENS_CAST
;
826 // Expression is possible cast, look at next token, (T)null
829 switch (current_token
) {
830 case Token
.OPEN_PARENS
:
833 case Token
.IDENTIFIER
:
847 case Token
.UNCHECKED
:
852 // These can be part of a member access
866 return Token
.OPEN_PARENS_CAST
;
869 return Token
.OPEN_PARENS
;
872 case Token
.DOUBLE_COLON
:
873 if (ptoken
!= Token
.IDENTIFIER
&& ptoken
!= Token
.OP_GENERICS_GT
)
878 case Token
.IDENTIFIER
:
881 case Token
.OP_GENERICS_LT
:
883 case Token
.DOUBLE_COLON
:
885 if (bracket_level
== 0)
889 can_be_type
= is_type
= false;
909 if (bracket_level
== 0)
914 if (bracket_level
== 0) {
916 can_be_type
= is_type
= false;
920 case Token
.OP_GENERICS_LT
:
921 case Token
.OPEN_BRACKET
:
922 if (bracket_level
++ == 0)
926 case Token
.OP_GENERICS_GT
:
927 case Token
.CLOSE_BRACKET
:
931 case Token
.INTERR_NULLABLE
:
933 if (bracket_level
== 0)
939 can_be_type
= is_type
= false;
943 return Token
.OPEN_PARENS
;
948 public static bool IsValidIdentifier (string s
)
950 if (s
== null || s
.Length
== 0)
953 if (!is_identifier_start_character (s
[0]))
956 for (int i
= 1; i
< s
.Length
; i
++)
957 if (! is_identifier_part_character (s
[i
]))
963 bool parse_less_than ()
966 int the_token
= token ();
967 if (the_token
== Token
.OPEN_BRACKET
) {
969 the_token
= token ();
970 } while (the_token
!= Token
.CLOSE_BRACKET
);
971 the_token
= token ();
972 } else if (the_token
== Token
.IN
|| the_token
== Token
.OUT
) {
973 the_token
= token ();
976 case Token
.IDENTIFIER
:
994 case Token
.OP_GENERICS_GT
:
1001 the_token
= token ();
1003 if (the_token
== Token
.OP_GENERICS_GT
)
1005 else if (the_token
== Token
.COMMA
|| the_token
== Token
.DOT
|| the_token
== Token
.DOUBLE_COLON
)
1007 else if (the_token
== Token
.INTERR_NULLABLE
|| the_token
== Token
.STAR
)
1009 else if (the_token
== Token
.OP_GENERICS_LT
) {
1010 if (!parse_less_than ())
1013 } else if (the_token
== Token
.OPEN_BRACKET
) {
1015 the_token
= token ();
1016 if (the_token
== Token
.CLOSE_BRACKET
)
1018 else if (the_token
== Token
.COMMA
)
1019 goto rank_specifiers
;
1026 bool parse_generic_dimension (out int dimension
)
1031 int the_token
= token ();
1032 if (the_token
== Token
.OP_GENERICS_GT
)
1034 else if (the_token
== Token
.COMMA
) {
1042 public int peek_token ()
1047 the_token
= token ();
1054 // Tonizes `?' using custom disambiguous rules to return one
1055 // of following tokens: INTERR_NULLABLE, OP_COALESCING, INTERR
1057 // Tricky expression look like:
1059 // Foo ? a = x ? b : c;
1061 int TokenizePossibleNullableType ()
1063 if (parsing_block
== 0 || parsing_type
> 0)
1064 return Token
.INTERR_NULLABLE
;
1066 int d
= peek_char ();
1069 return Token
.OP_COALESCING
;
1072 switch (current_token
) {
1073 case Token
.CLOSE_PARENS
:
1078 return Token
.INTERR
;
1082 if (d
== ',' || d
== ';' || d
== '>')
1083 return Token
.INTERR_NULLABLE
;
1084 if (d
== '*' || (d
>= '0' && d
<= '9'))
1085 return Token
.INTERR
;
1089 current_token
= Token
.NONE
;
1091 switch (xtoken ()) {
1098 next_token
= Token
.INTERR
;
1101 case Token
.SEMICOLON
:
1103 case Token
.CLOSE_PARENS
:
1104 case Token
.OPEN_BRACKET
:
1105 case Token
.OP_GENERICS_GT
:
1106 next_token
= Token
.INTERR_NULLABLE
;
1114 if (next_token
== -1) {
1115 switch (xtoken ()) {
1117 case Token
.SEMICOLON
:
1118 case Token
.OPEN_BRACE
:
1119 case Token
.CLOSE_PARENS
:
1121 next_token
= Token
.INTERR_NULLABLE
;
1125 next_token
= Token
.INTERR
;
1133 // All shorcuts failed, do it hard way
1135 while ((ntoken
= xtoken ()) != Token
.EOF
) {
1136 if (ntoken
== Token
.SEMICOLON
)
1139 if (ntoken
== Token
.COLON
) {
1140 if (++colons
== interrs
)
1145 if (ntoken
== Token
.INTERR
) {
1151 next_token
= colons
!= interrs
? Token
.INTERR_NULLABLE
: Token
.INTERR
;
1160 bool decimal_digits (int c
)
1163 bool seen_digits
= false;
1166 if (number_pos
== max_number_size
)
1167 Error_NumericConstantTooLong ();
1168 number_builder
[number_pos
++] = (char) c
;
1172 // We use peek_char2, because decimal_digits needs to do a
1173 // 2-character look-ahead (5.ToString for example).
1175 while ((d
= peek_char2 ()) != -1){
1176 if (d
>= '0' && d
<= '9'){
1177 if (number_pos
== max_number_size
)
1178 Error_NumericConstantTooLong ();
1179 number_builder
[number_pos
++] = (char) d
;
1189 static bool is_hex (int e
)
1191 return (e
>= '0' && e
<= '9') || (e
>= 'A' && e
<= 'F') || (e
>= 'a' && e
<= 'f');
1194 static TypeCode
real_type_suffix (int c
)
1198 return TypeCode
.Single
;
1200 return TypeCode
.Double
;
1202 return TypeCode
.Decimal
;
1204 return TypeCode
.Empty
;
1208 int integer_type_suffix (ulong ul
, int c
)
1210 bool is_unsigned
= false;
1211 bool is_long
= false;
1214 bool scanning
= true;
1227 // if we have not seen anything in between
1228 // report this error
1230 Report
.Warning (78, 4, Location
, "The 'l' suffix is easily confused with the digit '1' (use 'L' for clarity)");
1250 if (is_long
&& is_unsigned
){
1251 val
= new ULongLiteral (ul
, Location
);
1252 return Token
.LITERAL
;
1256 // uint if possible, or ulong else.
1258 if ((ul
& 0xffffffff00000000) == 0)
1259 val
= new UIntLiteral ((uint) ul
, Location
);
1261 val
= new ULongLiteral (ul
, Location
);
1262 } else if (is_long
){
1263 // long if possible, ulong otherwise
1264 if ((ul
& 0x8000000000000000) != 0)
1265 val
= new ULongLiteral (ul
, Location
);
1267 val
= new LongLiteral ((long) ul
, Location
);
1269 // int, uint, long or ulong in that order
1270 if ((ul
& 0xffffffff00000000) == 0){
1271 uint ui
= (uint) ul
;
1273 if ((ui
& 0x80000000) != 0)
1274 val
= new UIntLiteral (ui
, Location
);
1276 val
= new IntLiteral ((int) ui
, Location
);
1278 if ((ul
& 0x8000000000000000) != 0)
1279 val
= new ULongLiteral (ul
, Location
);
1281 val
= new LongLiteral ((long) ul
, Location
);
1284 return Token
.LITERAL
;
1288 // given `c' as the next char in the input decide whether
1289 // we need to convert to a special type, and then choose
1290 // the best representation for the integer
1292 int adjust_int (int c
)
1295 if (number_pos
> 9){
1296 ulong ul
= (uint) (number_builder
[0] - '0');
1298 for (int i
= 1; i
< number_pos
; i
++){
1299 ul
= checked ((ul
* 10) + ((uint)(number_builder
[i
] - '0')));
1301 return integer_type_suffix (ul
, c
);
1303 uint ui
= (uint) (number_builder
[0] - '0');
1305 for (int i
= 1; i
< number_pos
; i
++){
1306 ui
= checked ((ui
* 10) + ((uint)(number_builder
[i
] - '0')));
1308 return integer_type_suffix (ui
, c
);
1310 } catch (OverflowException
) {
1311 error_details
= "Integral constant is too large";
1312 Report
.Error (1021, Location
, error_details
);
1313 val
= new IntLiteral (0, Location
);
1314 return Token
.LITERAL
;
1316 catch (FormatException
) {
1317 Report
.Error (1013, Location
, "Invalid number");
1318 val
= new IntLiteral (0, Location
);
1319 return Token
.LITERAL
;
1323 int adjust_real (TypeCode t
)
1325 string s
= new String (number_builder
, 0, number_pos
);
1326 const string error_details
= "Floating-point constant is outside the range of type `{0}'";
1329 case TypeCode
.Decimal
:
1331 val
= new DecimalLiteral (decimal.Parse (s
, styles
, csharp_format_info
), Location
);
1332 } catch (OverflowException
) {
1333 val
= new DecimalLiteral (0, Location
);
1334 Report
.Error (594, Location
, error_details
, "decimal");
1337 case TypeCode
.Single
:
1339 val
= new FloatLiteral (float.Parse (s
, styles
, csharp_format_info
), Location
);
1340 } catch (OverflowException
) {
1341 val
= new FloatLiteral (0, Location
);
1342 Report
.Error (594, Location
, error_details
, "float");
1347 val
= new DoubleLiteral (double.Parse (s
, styles
, csharp_format_info
), Location
);
1348 } catch (OverflowException
) {
1349 val
= new DoubleLiteral (0, Location
);
1350 Report
.Error (594, Location
, error_details
, "double");
1355 return Token
.LITERAL
;
1364 while ((d
= peek_char ()) != -1){
1366 number_builder
[number_pos
++] = (char) d
;
1372 string s
= new String (number_builder
, 0, number_pos
);
1374 if (number_pos
<= 8)
1375 ul
= System
.UInt32
.Parse (s
, NumberStyles
.HexNumber
);
1377 ul
= System
.UInt64
.Parse (s
, NumberStyles
.HexNumber
);
1378 } catch (OverflowException
){
1379 error_details
= "Integral constant is too large";
1380 Report
.Error (1021, Location
, error_details
);
1381 val
= new IntLiteral (0, Location
);
1382 return Token
.LITERAL
;
1384 catch (FormatException
) {
1385 Report
.Error (1013, Location
, "Invalid number");
1386 val
= new IntLiteral (0, Location
);
1387 return Token
.LITERAL
;
1390 return integer_type_suffix (ul
, peek_char ());
1394 // Invoked if we know we have .digits or digits
1396 int is_number (int c
)
1398 bool is_real
= false;
1402 if (c
>= '0' && c
<= '9'){
1404 int peek
= peek_char ();
1406 if (peek
== 'x' || peek
== 'X')
1407 return handle_hex ();
1414 // We need to handle the case of
1415 // "1.1" vs "1.string" (LITERAL_FLOAT vs NUMBER DOT IDENTIFIER)
1418 if (decimal_digits ('.')){
1424 return adjust_int (-1);
1428 if (c
== 'e' || c
== 'E'){
1430 if (number_pos
== max_number_size
)
1431 Error_NumericConstantTooLong ();
1432 number_builder
[number_pos
++] = 'e';
1436 if (number_pos
== max_number_size
)
1437 Error_NumericConstantTooLong ();
1438 number_builder
[number_pos
++] = '+';
1440 } else if (c
== '-') {
1441 if (number_pos
== max_number_size
)
1442 Error_NumericConstantTooLong ();
1443 number_builder
[number_pos
++] = '-';
1446 if (number_pos
== max_number_size
)
1447 Error_NumericConstantTooLong ();
1448 number_builder
[number_pos
++] = '+';
1455 var type
= real_type_suffix (c
);
1456 if (type
== TypeCode
.Empty
&& !is_real
){
1458 return adjust_int (c
);
1463 if (type
== TypeCode
.Empty
){
1468 return adjust_real (type
);
1470 throw new Exception ("Is Number should never reach this point");
1474 // Accepts exactly count (4 or 8) hex, no more no less
1476 int getHex (int count
, out int surrogate
, out bool error
)
1481 int top
= count
!= -1 ? count
: 4;
1486 for (i
= 0; i
< top
; i
++){
1489 if (c
>= '0' && c
<= '9')
1490 c
= (int) c
- (int) '0';
1491 else if (c
>= 'A' && c
<= 'F')
1492 c
= (int) c
- (int) 'A' + 10;
1493 else if (c
>= 'a' && c
<= 'f')
1494 c
= (int) c
- (int) 'a' + 10;
1500 total
= (total
* 16) + c
;
1502 int p
= peek_char ();
1505 if (!is_hex ((char)p
))
1511 if (total
> 0x0010FFFF) {
1516 if (total
>= 0x00010000) {
1517 surrogate
= ((total
- 0x00010000) % 0x0400 + 0xDC00);
1518 total
= ((total
- 0x00010000) / 0x0400 + 0xD800);
1525 int escape (int c
, out int surrogate
)
1561 v
= getHex (-1, out surrogate
, out error
);
1567 return EscapeUnicode (d
, out surrogate
);
1570 Report
.Error (1009, Location
, "Unrecognized escape sequence `\\{0}'", ((char)d
).ToString ());
1579 int EscapeUnicode (int ch
, out int surrogate
)
1583 ch
= getHex (8, out surrogate
, out error
);
1585 ch
= getHex (4, out surrogate
, out error
);
1589 Report
.Error (1009, Location
, "Unrecognized escape sequence");
1597 if (putback_char
!= -1) {
1610 void advance_line ()
1620 if (putback_char
== -1)
1621 putback_char
= reader
.Read ();
1622 return putback_char
;
1627 if (putback_char
!= -1)
1628 return putback_char
;
1629 return reader
.Peek ();
1632 void putback (int c
)
1634 if (putback_char
!= -1){
1635 Console
.WriteLine ("Col: " + col
);
1636 Console
.WriteLine ("Row: " + line
);
1637 Console
.WriteLine ("Name: " + ref_name
.Name
);
1638 Console
.WriteLine ("Current [{0}] putting back [{1}] ", putback_char
, c
);
1639 throw new Exception ("This should not happen putback on putback");
1641 if (c
== '\n' || col
== 0) {
1642 // It won't happen though.
1652 public bool advance ()
1654 return peek_char () != -1 || CompleteOnEOF
;
1657 public Object Value
{
1663 public Object
value ()
1670 current_token
= xtoken ();
1671 return current_token
;
1674 void get_cmd_arg (out string cmd
, out string arg
)
1678 tokens_seen
= false;
1681 // skip over white space
1684 } while (c
== '\r' || c
== ' ' || c
== '\t');
1686 static_cmd_arg
.Length
= 0;
1687 while (c
!= -1 && is_identifier_part_character ((char)c
)) {
1688 static_cmd_arg
.Append ((char)c
);
1691 int peek
= peek_char ();
1692 if (peek
== 'U' || peek
== 'u') {
1694 c
= EscapeUnicode (c
, out surrogate
);
1695 if (surrogate
!= 0) {
1696 if (is_identifier_part_character ((char) c
))
1697 static_cmd_arg
.Append ((char) c
);
1704 cmd
= static_cmd_arg
.ToString ();
1706 // skip over white space
1707 while (c
== '\r' || c
== ' ' || c
== '\t')
1710 static_cmd_arg
.Length
= 0;
1711 int has_identifier_argument
= 0;
1713 while (c
!= -1 && c
!= '\n' && c
!= '\r') {
1714 if (c
== '\\' && has_identifier_argument
>= 0) {
1715 if (has_identifier_argument
!= 0 || (cmd
== "define" || cmd
== "if" || cmd
== "elif" || cmd
== "undef")) {
1716 has_identifier_argument
= 1;
1718 int peek
= peek_char ();
1719 if (peek
== 'U' || peek
== 'u') {
1721 c
= EscapeUnicode (c
, out surrogate
);
1722 if (surrogate
!= 0) {
1723 if (is_identifier_part_character ((char) c
))
1724 static_cmd_arg
.Append ((char) c
);
1729 has_identifier_argument
= -1;
1732 static_cmd_arg
.Append ((char) c
);
1736 if (static_cmd_arg
.Length
!= 0)
1737 arg
= static_cmd_arg
.ToString ();
1741 // Handles the #line directive
1743 bool PreProcessLine (string arg
)
1745 if (arg
.Length
== 0)
1748 if (arg
== "default"){
1750 ref_name
= file_name
;
1752 Location
.Push (file_name
, ref_name
);
1754 } else if (arg
== "hidden"){
1762 if ((pos
= arg
.IndexOf (' ')) != -1 && pos
!= 0){
1763 ref_line
= System
.Int32
.Parse (arg
.Substring (0, pos
));
1766 char [] quotes
= { '\"' }
;
1768 string name
= arg
.Substring (pos
). Trim (quotes
);
1769 ref_name
= Location
.LookupFile (file_name
, name
);
1770 file_name
.AddFile (ref_name
);
1772 Location
.Push (file_name
, ref_name
);
1774 ref_line
= System
.Int32
.Parse (arg
);
1785 // Handles #define and #undef
1787 void PreProcessDefinition (bool is_define
, string ident
, bool caller_is_taking
)
1789 if (ident
.Length
== 0 || ident
== "true" || ident
== "false"){
1790 Report
.Error (1001, Location
, "Missing identifier to pre-processor directive");
1794 if (ident
.IndexOfAny (simple_whitespaces
) != -1){
1795 Error_EndLineExpected ();
1799 if (!is_identifier_start_character (ident
[0]))
1800 Report
.Error (1001, Location
, "Identifier expected: {0}", ident
);
1802 foreach (char c
in ident
.Substring (1)){
1803 if (!is_identifier_part_character (c
)){
1804 Report
.Error (1001, Location
, "Identifier expected: {0}", ident
);
1809 if (!caller_is_taking
)
1816 if (RootContext
.IsConditionalDefined (ident
))
1819 file_name
.AddDefine (ident
);
1824 file_name
.AddUndefine (ident
);
1828 static byte read_hex (string arg
, int pos
, out bool error
)
1835 if ((c
>= '0') && (c
<= '9'))
1836 total
= (int) c
- (int) '0';
1837 else if ((c
>= 'A') && (c
<= 'F'))
1838 total
= (int) c
- (int) 'A' + 10;
1839 else if ((c
>= 'a') && (c
<= 'f'))
1840 total
= (int) c
- (int) 'a' + 10;
1849 if ((c
>= '0') && (c
<= '9'))
1850 total
+= (int) c
- (int) '0';
1851 else if ((c
>= 'A') && (c
<= 'F'))
1852 total
+= (int) c
- (int) 'A' + 10;
1853 else if ((c
>= 'a') && (c
<= 'f'))
1854 total
+= (int) c
- (int) 'a' + 10;
1860 return (byte) total
;
1864 /// Handles #pragma checksum
1866 bool PreProcessPragmaChecksum (string arg
)
1868 if ((arg
[0] != ' ') && (arg
[0] != '\t'))
1871 arg
= arg
.Trim (simple_whitespaces
);
1872 if ((arg
.Length
< 2) || (arg
[0] != '"'))
1875 StringBuilder file_sb
= new StringBuilder ();
1879 while ((ch
= arg
[pos
++]) != '"') {
1880 if (pos
>= arg
.Length
)
1884 if (pos
+1 >= arg
.Length
)
1889 file_sb
.Append (ch
);
1892 if ((pos
+2 >= arg
.Length
) || ((arg
[pos
] != ' ') && (arg
[pos
] != '\t')))
1895 arg
= arg
.Substring (pos
).Trim (simple_whitespaces
);
1896 if ((arg
.Length
< 42) || (arg
[0] != '"') || (arg
[1] != '{') ||
1897 (arg
[10] != '-') || (arg
[15] != '-') || (arg
[20] != '-') ||
1898 (arg
[25] != '-') || (arg
[38] != '}') || (arg
[39] != '"'))
1902 byte[] guid_bytes
= new byte [16];
1904 for (int i
= 0; i
< 4; i
++) {
1905 guid_bytes
[i
] = read_hex (arg
, 2+2*i
, out error
);
1909 for (int i
= 0; i
< 2; i
++) {
1910 guid_bytes
[i
+4] = read_hex (arg
, 11+2*i
, out error
);
1913 guid_bytes
[i
+6] = read_hex (arg
, 16+2*i
, out error
);
1916 guid_bytes
[i
+8] = read_hex (arg
, 21+2*i
, out error
);
1921 for (int i
= 0; i
< 6; i
++) {
1922 guid_bytes
[i
+10] = read_hex (arg
, 26+2*i
, out error
);
1927 arg
= arg
.Substring (40).Trim (simple_whitespaces
);
1928 if ((arg
.Length
< 34) || (arg
[0] != '"') || (arg
[33] != '"'))
1931 byte[] checksum_bytes
= new byte [16];
1932 for (int i
= 0; i
< 16; i
++) {
1933 checksum_bytes
[i
] = read_hex (arg
, 1+2*i
, out error
);
1938 arg
= arg
.Substring (34).Trim (simple_whitespaces
);
1942 SourceFile file
= Location
.LookupFile (file_name
, file_sb
.ToString ());
1943 file
.SetChecksum (guid_bytes
, checksum_bytes
);
1944 ref_name
.AutoGenerated
= true;
1949 /// Handles #pragma directive
1951 void PreProcessPragma (string arg
)
1953 const string warning
= "warning";
1954 const string w_disable
= "warning disable";
1955 const string w_restore
= "warning restore";
1956 const string checksum
= "checksum";
1958 if (arg
== w_disable
) {
1959 Report
.RegisterWarningRegion (Location
).WarningDisable (Location
.Row
);
1963 if (arg
== w_restore
) {
1964 Report
.RegisterWarningRegion (Location
).WarningEnable (Location
.Row
);
1968 if (arg
.StartsWith (w_disable
)) {
1969 int[] codes
= ParseNumbers (arg
.Substring (w_disable
.Length
));
1970 foreach (int code
in codes
) {
1972 Report
.RegisterWarningRegion (Location
).WarningDisable (Location
, code
, Report
);
1977 if (arg
.StartsWith (w_restore
)) {
1978 int[] codes
= ParseNumbers (arg
.Substring (w_restore
.Length
));
1979 var w_table
= Report
.warning_ignore_table
;
1980 foreach (int code
in codes
) {
1981 if (w_table
!= null && w_table
.ContainsKey (code
))
1982 Report
.Warning (1635, 1, Location
, "Cannot restore warning `CS{0:0000}' because it was disabled globally", code
);
1983 Report
.RegisterWarningRegion (Location
).WarningEnable (Location
, code
, Report
);
1988 if (arg
.StartsWith (warning
)) {
1989 Report
.Warning (1634, 1, Location
, "Expected disable or restore");
1993 if (arg
.StartsWith (checksum
)) {
1994 if (!PreProcessPragmaChecksum (arg
.Substring (checksum
.Length
)))
1995 Warning_InvalidPragmaChecksum ();
1999 Report
.Warning (1633, 1, Location
, "Unrecognized #pragma directive");
2002 int[] ParseNumbers (string text
)
2004 string[] string_array
= text
.Split (',');
2005 int[] values
= new int [string_array
.Length
];
2007 foreach (string string_code
in string_array
) {
2009 values
[index
++] = int.Parse (string_code
, System
.Globalization
.CultureInfo
.InvariantCulture
);
2011 catch (FormatException
) {
2012 Report
.Warning (1692, 1, Location
, "Invalid number");
2018 bool eval_val (string s
)
2025 return file_name
.IsConditionalDefined (s
);
2028 bool pp_primary (ref string s
)
2037 s
= s
.Substring (1);
2038 bool val
= pp_expr (ref s
, false);
2039 if (s
.Length
> 0 && s
[0] == ')'){
2040 s
= s
.Substring (1);
2043 Error_InvalidDirective ();
2047 if (is_identifier_start_character (c
)){
2053 if (is_identifier_part_character (c
)){
2057 bool v
= eval_val (s
.Substring (0, j
));
2058 s
= s
.Substring (j
);
2061 bool vv
= eval_val (s
);
2066 Error_InvalidDirective ();
2070 bool pp_unary (ref string s
)
2077 if (len
> 1 && s
[1] == '='){
2078 Error_InvalidDirective ();
2081 s
= s
.Substring (1);
2082 return ! pp_primary (ref s
);
2084 return pp_primary (ref s
);
2086 Error_InvalidDirective ();
2091 bool pp_eq (ref string s
)
2093 bool va
= pp_unary (ref s
);
2099 if (len
> 2 && s
[1] == '='){
2100 s
= s
.Substring (2);
2101 return va
== pp_unary (ref s
);
2103 Error_InvalidDirective ();
2106 } else if (s
[0] == '!' && len
> 1 && s
[1] == '='){
2107 s
= s
.Substring (2);
2109 return va
!= pp_unary (ref s
);
2118 bool pp_and (ref string s
)
2120 bool va
= pp_eq (ref s
);
2126 if (len
> 2 && s
[1] == '&'){
2127 s
= s
.Substring (2);
2128 return (va
& pp_and (ref s
));
2130 Error_InvalidDirective ();
2139 // Evaluates an expression for `#if' or `#elif'
2141 bool pp_expr (ref string s
, bool isTerm
)
2143 bool va
= pp_and (ref s
);
2150 if (len
> 2 && s
[1] == '|'){
2151 s
= s
.Substring (2);
2152 return va
| pp_expr (ref s
, isTerm
);
2154 Error_InvalidDirective ();
2159 Error_EndLineExpected ();
2167 bool eval (string s
)
2169 bool v
= pp_expr (ref s
, true);
2178 void Error_NumericConstantTooLong ()
2180 Report
.Error (1021, Location
, "Numeric constant too long");
2183 void Error_InvalidDirective ()
2185 Report
.Error (1517, Location
, "Invalid preprocessor directive");
2188 void Error_UnexpectedDirective (string extra
)
2192 "Unexpected processor directive ({0})", extra
);
2195 void Error_TokensSeen ()
2197 Report
.Error (1032, Location
,
2198 "Cannot define or undefine preprocessor symbols after first token in file");
2201 void Eror_WrongPreprocessorLocation ()
2203 Report
.Error (1040, Location
,
2204 "Preprocessor directives must appear as the first non-whitespace character on a line");
2207 void Error_EndLineExpected ()
2209 Report
.Error (1025, Location
, "Single-line comment or end-of-line expected");
2212 void Warning_InvalidPragmaChecksum ()
2214 Report
.Warning (1695, 1, Location
,
2215 "Invalid #pragma checksum syntax; should be " +
2216 "#pragma checksum \"filename\" " +
2217 "\"{XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX}\" \"XXXX...\"");
2220 // if true, then the code continues processing the code
2221 // if false, the code stays in a loop until another directive is
2223 // When caller_is_taking is false we ignore all directives except the ones
2224 // which can help us to identify where the #if block ends
2225 bool handle_preprocessing_directive (bool caller_is_taking
)
2228 bool region_directive
= false;
2230 get_cmd_arg (out cmd
, out arg
);
2232 // Eat any trailing whitespaces and single-line comments
2233 if (arg
.IndexOf ("//") != -1)
2234 arg
= arg
.Substring (0, arg
.IndexOf ("//"));
2235 arg
= arg
.Trim (simple_whitespaces
);
2238 // The first group of pre-processing instructions is always processed
2242 region_directive
= true;
2247 if (ifstack
== null || ifstack
.Count
== 0){
2248 Error_UnexpectedDirective ("no #region for this #endregion");
2251 int pop
= ifstack
.Pop ();
2253 if ((pop
& REGION
) == 0)
2254 Report
.Error (1027, Location
, "Expected `#endif' directive");
2256 return caller_is_taking
;
2259 if (ifstack
== null)
2260 ifstack
= new Stack
<int> (2);
2262 int flags
= region_directive
? REGION
: 0;
2263 if (ifstack
.Count
== 0){
2264 flags
|= PARENT_TAKING
;
2266 int state
= ifstack
.Peek ();
2267 if ((state
& TAKING
) != 0) {
2268 flags
|= PARENT_TAKING
;
2272 if (caller_is_taking
&& eval (arg
)) {
2273 ifstack
.Push (flags
| TAKING
);
2276 ifstack
.Push (flags
);
2280 if (ifstack
== null || ifstack
.Count
== 0){
2281 Error_UnexpectedDirective ("no #if for this #endif");
2284 pop
= ifstack
.Pop ();
2286 if ((pop
& REGION
) != 0)
2287 Report
.Error (1038, Location
, "#endregion directive expected");
2289 if (arg
.Length
!= 0) {
2290 Error_EndLineExpected ();
2293 if (ifstack
.Count
== 0)
2296 int state
= ifstack
.Peek ();
2297 return (state
& TAKING
) != 0;
2301 if (ifstack
== null || ifstack
.Count
== 0){
2302 Error_UnexpectedDirective ("no #if for this #elif");
2305 int state
= ifstack
.Pop ();
2307 if ((state
& REGION
) != 0) {
2308 Report
.Error (1038, Location
, "#endregion directive expected");
2312 if ((state
& ELSE_SEEN
) != 0){
2313 Error_UnexpectedDirective ("#elif not valid after #else");
2317 if ((state
& TAKING
) != 0) {
2322 if (eval (arg
) && ((state
& PARENT_TAKING
) != 0)){
2323 ifstack
.Push (state
| TAKING
);
2327 ifstack
.Push (state
);
2332 if (ifstack
== null || ifstack
.Count
== 0){
2333 Error_UnexpectedDirective ("no #if for this #else");
2336 int state
= ifstack
.Peek ();
2338 if ((state
& REGION
) != 0) {
2339 Report
.Error (1038, Location
, "#endregion directive expected");
2343 if ((state
& ELSE_SEEN
) != 0){
2344 Error_UnexpectedDirective ("#else within #else");
2350 if (arg
.Length
!= 0) {
2351 Error_EndLineExpected ();
2356 if ((state
& PARENT_TAKING
) != 0) {
2357 ret
= (state
& TAKING
) == 0;
2365 ifstack
.Push (state
| ELSE_SEEN
);
2370 if (any_token_seen
){
2371 Error_TokensSeen ();
2372 return caller_is_taking
;
2374 PreProcessDefinition (true, arg
, caller_is_taking
);
2375 return caller_is_taking
;
2378 if (any_token_seen
){
2379 Error_TokensSeen ();
2380 return caller_is_taking
;
2382 PreProcessDefinition (false, arg
, caller_is_taking
);
2383 return caller_is_taking
;
2387 // These are only processed if we are in a `taking' block
2389 if (!caller_is_taking
)
2394 Report
.Error (1029, Location
, "#error: '{0}'", arg
);
2398 Report
.Warning (1030, 1, Location
, "#warning: `{0}'", arg
);
2402 if (RootContext
.Version
== LanguageVersion
.ISO_1
) {
2403 Report
.FeatureIsNotAvailable (Location
, "#pragma");
2407 PreProcessPragma (arg
);
2411 if (!PreProcessLine (arg
))
2414 "The line number specified for #line directive is missing or invalid");
2415 return caller_is_taking
;
2418 Report
.Error (1024, Location
, "Wrong preprocessor directive");
2423 private int consume_string (bool quoted
)
2426 string_builder
.Length
= 0;
2428 while ((c
= get_char ()) != -1){
2430 if (quoted
&& peek_char () == '"'){
2431 string_builder
.Append ((char) c
);
2435 val
= new StringLiteral (string_builder
.ToString (), Location
);
2436 return Token
.LITERAL
;
2442 Report
.Error (1010, Location
, "Newline in constant");
2447 c
= escape (c
, out surrogate
);
2450 if (surrogate
!= 0) {
2451 string_builder
.Append ((char) c
);
2455 string_builder
.Append ((char) c
);
2458 Report
.Error (1039, Location
, "Unterminated string literal");
2462 private int consume_identifier (int s
)
2464 int res
= consume_identifier (s
, false);
2466 if (doc_state
== XmlCommentState
.Allowed
)
2467 doc_state
= XmlCommentState
.NotAllowed
;
2472 int consume_identifier (int c
, bool quoted
)
2475 // This method is very performance sensitive. It accounts
2476 // for approximately 25% of all parser time
2484 c
= escape (c
, out surrogate
);
2485 if (surrogate
!= 0) {
2486 id_builder
[pos
++] = (char) c
;
2491 id_builder
[pos
++] = (char) c
;
2497 if ((c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z') || c
== '_' || (c
>= '0' && c
<= '9')) {
2498 id_builder
[pos
++] = (char) c
;
2505 c
= escape (c
, out surrogate
);
2506 if (surrogate
!= 0) {
2507 if (is_identifier_part_character ((char) c
))
2508 id_builder
[pos
++] = (char) c
;
2514 } else if (Char
.IsLetter ((char) c
) || Char
.GetUnicodeCategory ((char) c
) == UnicodeCategory
.ConnectorPunctuation
) {
2515 id_builder
[pos
++] = (char) c
;
2522 } catch (IndexOutOfRangeException
) {
2523 Report
.Error (645, Location
, "Identifier too long (limit is 512 chars)");
2531 // Optimization: avoids doing the keyword lookup
2532 // on uppercase letters
2534 if (id_builder
[0] >= '_' && !quoted
) {
2535 int keyword
= GetKeyword (id_builder
, pos
);
2536 if (keyword
!= -1) {
2537 val
= LocatedToken
.Create (null, ref_line
, column
);
2543 // Keep identifiers in an array of hashtables to avoid needless
2546 var identifiers_group
= identifiers
[pos
];
2548 if (identifiers_group
!= null) {
2549 if (identifiers_group
.TryGetValue (id_builder
, out s
)) {
2550 val
= LocatedToken
.Create (s
, ref_line
, column
);
2552 AddEscapedIdentifier (((LocatedToken
) val
).Location
);
2553 return Token
.IDENTIFIER
;
2556 // TODO: this should be number of files dependant
2557 // corlib compilation peaks at 1000 and System.Core at 150
2558 int capacity
= pos
> 20 ? 10 : 100;
2559 identifiers_group
= new Dictionary
<char[],string> (capacity
, new IdentifiersComparer (pos
));
2560 identifiers
[pos
] = identifiers_group
;
2563 char [] chars
= new char [pos
];
2564 Array
.Copy (id_builder
, chars
, pos
);
2566 s
= new string (id_builder
, 0, pos
);
2567 identifiers_group
.Add (chars
, s
);
2569 val
= LocatedToken
.Create (s
, ref_line
, column
);
2571 AddEscapedIdentifier (((LocatedToken
) val
).Location
);
2573 return Token
.IDENTIFIER
;
2576 public int xtoken ()
2580 // Whether we have seen comments on the current line
2581 bool comments_seen
= false;
2582 while ((c
= get_char ()) != -1) {
2585 col
= ((col
+ tab_size
) / tab_size
) * tab_size
;
2593 case 0xFEFF: // Ignore BOM anywhere in the file
2596 /* This is required for compatibility with .NET
2598 if (peek_char () == 0xBB) {
2601 if (get_char () == 0xBF)
2608 if (peek_char () != '\n')
2613 any_token_seen
|= tokens_seen
;
2614 tokens_seen
= false;
2615 comments_seen
= false;
2620 return consume_identifier (c
);
2623 val
= LocatedToken
.Create (ref_line
, col
);
2624 return Token
.OPEN_BRACE
;
2626 val
= LocatedToken
.Create (ref_line
, col
);
2627 return Token
.CLOSE_BRACE
;
2629 // To block doccomment inside attribute declaration.
2630 if (doc_state
== XmlCommentState
.Allowed
)
2631 doc_state
= XmlCommentState
.NotAllowed
;
2632 val
= LocatedToken
.CreateOptional (ref_line
, col
);
2633 return Token
.OPEN_BRACKET
;
2635 val
= LocatedToken
.CreateOptional (ref_line
, col
);
2636 return Token
.CLOSE_BRACKET
;
2638 val
= LocatedToken
.Create (ref_line
, col
);
2640 // An expression versions of parens can appear in block context only
2642 if (parsing_block
!= 0 && !lambda_arguments_parsing
) {
2645 // Optmize most common case where we know that parens
2648 switch (current_token
) {
2649 case Token
.IDENTIFIER
:
2657 case Token
.DELEGATE
:
2658 case Token
.OP_GENERICS_GT
:
2659 return Token
.OPEN_PARENS
;
2662 // Optimize using peek
2663 int xx
= peek_char ();
2670 return Token
.OPEN_PARENS
;
2673 lambda_arguments_parsing
= true;
2675 d
= TokenizeOpenParens ();
2677 lambda_arguments_parsing
= false;
2681 return Token
.OPEN_PARENS
;
2683 val
= LocatedToken
.CreateOptional (ref_line
, col
);
2684 return Token
.CLOSE_PARENS
;
2686 val
= LocatedToken
.CreateOptional (ref_line
, col
);
2689 val
= LocatedToken
.CreateOptional (ref_line
, col
);
2690 return Token
.SEMICOLON
;
2692 val
= LocatedToken
.Create (ref_line
, col
);
2695 val
= LocatedToken
.Create (ref_line
, col
);
2696 return TokenizePossibleNullableType ();
2698 val
= LocatedToken
.Create (ref_line
, col
);
2699 if (parsing_generic_less_than
++ > 0)
2700 return Token
.OP_GENERICS_LT
;
2702 return TokenizeLessThan ();
2705 val
= LocatedToken
.Create (ref_line
, col
);
2713 if (parsing_generic_less_than
> 1 || (parsing_generic_less_than
== 1 && d
!= '>')) {
2714 parsing_generic_less_than
--;
2715 return Token
.OP_GENERICS_GT
;
2724 return Token
.OP_SHIFT_RIGHT_ASSIGN
;
2726 return Token
.OP_SHIFT_RIGHT
;
2732 val
= LocatedToken
.Create (ref_line
, col
);
2736 } else if (d
== '=') {
2737 d
= Token
.OP_ADD_ASSIGN
;
2745 val
= LocatedToken
.Create (ref_line
, col
);
2749 } else if (d
== '=')
2750 d
= Token
.OP_SUB_ASSIGN
;
2760 val
= LocatedToken
.Create (ref_line
, col
);
2761 if (peek_char () == '='){
2768 val
= LocatedToken
.Create (ref_line
, col
);
2779 return Token
.ASSIGN
;
2782 val
= LocatedToken
.Create (ref_line
, col
);
2786 return Token
.OP_AND
;
2790 return Token
.OP_AND_ASSIGN
;
2792 return Token
.BITWISE_AND
;
2795 val
= LocatedToken
.Create (ref_line
, col
);
2803 return Token
.OP_OR_ASSIGN
;
2805 return Token
.BITWISE_OR
;
2808 val
= LocatedToken
.Create (ref_line
, col
);
2809 if (peek_char () == '='){
2811 return Token
.OP_MULT_ASSIGN
;
2818 val
= LocatedToken
.Create (ref_line
, col
);
2820 return Token
.OP_DIV_ASSIGN
;
2823 // Handle double-slash comments.
2826 if (RootContext
.Documentation
!= null && peek_char () == '/') {
2828 // Don't allow ////.
2829 if ((d
= peek_char ()) != '/') {
2830 update_comment_location ();
2831 if (doc_state
== XmlCommentState
.Allowed
)
2832 handle_one_line_xml_comment ();
2833 else if (doc_state
== XmlCommentState
.NotAllowed
)
2834 warn_incorrect_doc_comment ();
2837 while ((d
= get_char ()) != -1 && (d
!= '\n') && d
!= '\r');
2839 any_token_seen
|= tokens_seen
;
2840 tokens_seen
= false;
2841 comments_seen
= false;
2843 } else if (d
== '*'){
2845 bool docAppend
= false;
2846 if (RootContext
.Documentation
!= null && peek_char () == '*') {
2848 update_comment_location ();
2849 // But when it is /**/, just do nothing.
2850 if (peek_char () == '/') {
2854 if (doc_state
== XmlCommentState
.Allowed
)
2856 else if (doc_state
== XmlCommentState
.NotAllowed
)
2857 warn_incorrect_doc_comment ();
2860 int current_comment_start
= 0;
2862 current_comment_start
= xml_comment_buffer
.Length
;
2863 xml_comment_buffer
.Append (Environment
.NewLine
);
2866 while ((d
= get_char ()) != -1){
2867 if (d
== '*' && peek_char () == '/'){
2869 comments_seen
= true;
2873 xml_comment_buffer
.Append ((char) d
);
2876 any_token_seen
|= tokens_seen
;
2877 tokens_seen
= false;
2879 // Reset 'comments_seen' just to be consistent.
2880 // It doesn't matter either way, here.
2882 comments_seen
= false;
2886 Report
.Error (1035, Location
, "End-of-file found, '*/' expected");
2889 update_formatted_doc_comment (current_comment_start
);
2895 val
= LocatedToken
.Create (ref_line
, col
);
2896 if (peek_char () == '='){
2898 return Token
.OP_MOD_ASSIGN
;
2900 return Token
.PERCENT
;
2903 val
= LocatedToken
.Create (ref_line
, col
);
2904 if (peek_char () == '='){
2906 return Token
.OP_XOR_ASSIGN
;
2908 return Token
.CARRET
;
2911 val
= LocatedToken
.Create (ref_line
, col
);
2912 if (peek_char () == ':') {
2914 return Token
.DOUBLE_COLON
;
2918 case '0': case '1': case '2': case '3': case '4':
2919 case '5': case '6': case '7': case '8': case '9':
2921 return is_number (c
);
2923 case '\n': // white space
2924 any_token_seen
|= tokens_seen
;
2925 tokens_seen
= false;
2926 comments_seen
= false;
2932 if (d
>= '0' && d
<= '9')
2933 return is_number (c
);
2937 if (tokens_seen
|| comments_seen
) {
2938 Eror_WrongPreprocessorLocation ();
2942 if (handle_preprocessing_directive (true))
2945 bool directive_expected
= false;
2946 while ((c
= get_char ()) != -1) {
2948 directive_expected
= true;
2949 } else if (!directive_expected
) {
2950 // TODO: Implement comment support for disabled code and uncomment this code
2952 // Eror_WrongPreprocessorLocation ();
2953 // return Token.ERROR;
2958 if (c
== ' ' || c
== '\t' || c
== '\r' || c
== '\n' || c
== '\f' || c
== '\v' )
2962 if (handle_preprocessing_directive (false))
2965 directive_expected
= false;
2969 tokens_seen
= false;
2976 return consume_string (false);
2979 return TokenizeBackslash ();
2985 return consume_string (true);
2988 if (is_identifier_start_character (c
)){
2989 return consume_identifier (c
, true);
2992 Report
.Error (1646, Location
, "Keyword, identifier, or string expected after verbatim specifier: @");
2995 case EvalStatementParserCharacter
:
2996 return Token
.EVAL_STATEMENT_PARSER
;
2997 case EvalCompilationUnitParserCharacter
:
2998 return Token
.EVAL_COMPILATION_UNIT_PARSER
;
2999 case EvalUsingDeclarationsParserCharacter
:
3000 return Token
.EVAL_USING_DECLARATIONS_UNIT_PARSER
;
3003 if (is_identifier_start_character (c
)) {
3005 return consume_identifier (c
);
3008 error_details
= ((char)c
).ToString ();
3014 return Token
.COMPLETE_COMPLETION
;
3017 return Token
.GENERATE_COMPLETION
;
3024 int TokenizeBackslash ()
3026 int c
= get_char ();
3029 error_details
= "Empty character literal";
3030 Report
.Error (1011, Location
, error_details
);
3033 if (c
== '\r' || c
== '\n') {
3034 Report
.Error (1010, Location
, "Newline in constant");
3039 c
= escape (c
, out d
);
3043 throw new NotImplementedException ();
3045 val
= new CharLiteral ((char) c
, Location
);
3049 Report
.Error (1012, Location
, "Too many characters in character literal");
3051 // Try to recover, read until newline or next "'"
3052 while ((c
= get_char ()) != -1) {
3053 if (c
== '\n' || c
== '\'')
3059 return Token
.LITERAL
;
3062 int TokenizeLessThan ()
3065 if (handle_typeof
) {
3067 if (parse_generic_dimension (out d
)) {
3070 return Token
.GENERIC_DIMENSION
;
3075 // Save current position and parse next token.
3077 if (parse_less_than ()) {
3078 if (parsing_generic_declaration
&& token () != Token
.DOT
) {
3079 d
= Token
.OP_GENERICS_LT_DECL
;
3081 d
= Token
.OP_GENERICS_LT
;
3088 parsing_generic_less_than
= 0;
3097 return Token
.OP_SHIFT_LEFT_ASSIGN
;
3099 return Token
.OP_SHIFT_LEFT
;
3110 // Handles one line xml comment
3112 private void handle_one_line_xml_comment ()
3115 while ((c
= peek_char ()) == ' ')
3116 get_char (); // skip heading whitespaces.
3117 while ((c
= peek_char ()) != -1 && c
!= '\n' && c
!= '\r') {
3118 xml_comment_buffer
.Append ((char) get_char ());
3120 if (c
== '\r' || c
== '\n')
3121 xml_comment_buffer
.Append (Environment
.NewLine
);
3125 // Remove heading "*" in Javadoc-like xml documentation.
3127 private void update_formatted_doc_comment (int current_comment_start
)
3129 int length
= xml_comment_buffer
.Length
- current_comment_start
;
3130 string [] lines
= xml_comment_buffer
.ToString (
3131 current_comment_start
,
3132 length
).Replace ("\r", "").Split ('\n');
3134 // The first line starts with /**, thus it is not target
3135 // for the format check.
3136 for (int i
= 1; i
< lines
.Length
; i
++) {
3137 string s
= lines
[i
];
3138 int idx
= s
.IndexOf ('*');
3141 if (i
< lines
.Length
- 1)
3145 head
= s
.Substring (0, idx
);
3146 foreach (char c
in head
)
3149 lines
[i
] = s
.Substring (idx
+ 1);
3151 xml_comment_buffer
.Remove (current_comment_start
, length
);
3152 xml_comment_buffer
.Insert (current_comment_start
, String
.Join (Environment
.NewLine
, lines
));
3156 // Updates current comment location.
3158 private void update_comment_location ()
3160 if (current_comment_location
.IsNull
) {
3161 // "-2" is for heading "//" or "/*"
3162 current_comment_location
=
3163 new Location (ref_line
, hidden
? -1 : col
- 2);
3168 // Checks if there was incorrect doc comments and raise
3171 public void check_incorrect_doc_comment ()
3173 if (xml_comment_buffer
.Length
> 0)
3174 warn_incorrect_doc_comment ();
3178 // Raises a warning when tokenizer found incorrect doccomment
3181 private void warn_incorrect_doc_comment ()
3183 if (doc_state
!= XmlCommentState
.Error
) {
3184 doc_state
= XmlCommentState
.Error
;
3185 // in csc, it is 'XML comment is not placed on
3186 // a valid language element'. But that does not
3188 Report
.Warning (1587, 2, Location
, "XML comment is not placed on a valid language element");
3193 // Consumes the saved xml comment lines (if any)
3194 // as for current target member or type.
3196 public string consume_doc_comment ()
3198 if (xml_comment_buffer
.Length
> 0) {
3199 string ret
= xml_comment_buffer
.ToString ();
3200 reset_doc_comment ();
3207 get { return context.Report; }
3210 void reset_doc_comment ()
3212 xml_comment_buffer
.Length
= 0;
3213 current_comment_location
= Location
.Null
;
3216 public void cleanup ()
3218 if (ifstack
!= null && ifstack
.Count
>= 1) {
3219 int state
= ifstack
.Pop ();
3220 if ((state
& REGION
) != 0)
3221 Report
.Error (1038, Location
, "#endregion directive expected");
3223 Report
.Error (1027, Location
, "Expected `#endif' directive");
3229 // Indicates whether it accepts XML documentation or not.
3231 public enum XmlCommentState
{
3232 // comment is allowed in this state.
3234 // comment is not allowed in this state.
3236 // once comments appeared when it is NotAllowed, then the
3237 // state is changed to it, until the state is changed to