add SafeSerializationEventArgs
[mcs.git] / mcs / cs-tokenizer.cs
blob5d1c92c54159e13f22995161cdc23ca7d69ac297
1 //
2 // cs-tokenizer.cs: The Tokenizer for the C# compiler
3 // This also implements the preprocessor
4 //
5 // Author: Miguel de Icaza (miguel@gnu.org)
6 // Marek Safar (marek.safar@seznam.cz)
7 //
8 // Dual licensed under the terms of the MIT X11 or GNU GPL
9 //
10 // Copyright 2001, 2002 Ximian, Inc (http://www.ximian.com)
11 // Copyright 2004-2008 Novell, Inc
15 using System;
16 using System.Text;
17 using System.Collections.Generic;
18 using System.IO;
19 using System.Globalization;
20 using System.Reflection;
22 namespace Mono.CSharp
24 /// <summary>
25 /// Tokenizer for C# source code.
26 /// </summary>
28 public class Tokenizer : yyParser.yyInput
30 class KeywordEntry
32 public readonly int Token;
33 public KeywordEntry Next;
34 public readonly char[] Value;
36 public KeywordEntry (string value, int token)
38 this.Value = value.ToCharArray ();
39 this.Token = token;
43 sealed class IdentifiersComparer : IEqualityComparer<char[]>
45 readonly int length;
47 public IdentifiersComparer (int length)
49 this.length = length;
52 public bool Equals (char[] x, char[] y)
54 for (int i = 0; i < length; ++i)
55 if (x [i] != y [i])
56 return false;
58 return true;
61 public int GetHashCode (char[] obj)
63 int h = 0;
64 for (int i = 0; i < length; ++i)
65 h = (h << 5) - h + obj [i];
67 return h;
72 // This class has to be used in the parser only, it reuses token
73 // details after each parse
75 public class LocatedToken
77 int row, column;
78 string value;
80 static LocatedToken[] buffer;
81 static int pos;
83 private LocatedToken ()
87 public static LocatedToken Create (int row, int column)
89 return Create (null, row, column);
92 public static LocatedToken Create (string value, int row, int column)
95 // TODO: I am not very happy about the logic but it's the best
96 // what I could come up with for now.
97 // Ideally we should be using just tiny buffer (256 elements) which
98 // is enough to hold all details for currect stack and recycle elements
99 // poped from the stack but there is a trick needed to recycle
100 // them properly.
102 LocatedToken entry;
103 if (pos >= buffer.Length) {
104 entry = new LocatedToken ();
105 } else {
106 entry = buffer [pos];
107 if (entry == null) {
108 entry = new LocatedToken ();
109 buffer [pos] = entry;
112 ++pos;
114 entry.value = value;
115 entry.row = row;
116 entry.column = column;
117 return entry;
121 // Used for token not required by expression evaluator
123 public static LocatedToken CreateOptional (int row, int col)
125 #if false
126 return Create (row, col);
127 #endif
128 return null;
131 public static void Initialize ()
133 if (buffer == null)
134 buffer = new LocatedToken [10000];
135 pos = 0;
138 public Location Location {
139 get { return new Location (row, column); }
142 public string Value {
143 get { return value; }
147 SeekableStreamReader reader;
148 SourceFile ref_name;
149 CompilationUnit file_name;
150 CompilerContext context;
151 bool hidden = false;
152 int ref_line = 1;
153 int line = 1;
154 int col = 0;
155 int previous_col;
156 int current_token;
157 int tab_size;
158 bool handle_get_set = false;
159 bool handle_remove_add = false;
160 bool handle_where = false;
161 bool handle_typeof = false;
162 bool lambda_arguments_parsing;
163 Location current_comment_location = Location.Null;
164 List<Location> escaped_identifiers;
165 int parsing_generic_less_than;
168 // Used mainly for parser optimizations. Some expressions for instance
169 // can appear only in block (including initializer, base initializer)
170 // scope only
172 public int parsing_block;
173 internal bool query_parsing;
176 // When parsing type only, useful for ambiguous nullable types
178 public int parsing_type;
181 // Set when parsing generic declaration (type or method header)
183 public bool parsing_generic_declaration;
186 // The value indicates that we have not reach any declaration or
187 // namespace yet
189 public int parsing_declaration;
192 // The special character to inject on streams to trigger the EXPRESSION_PARSE
193 // token to be returned. It just happens to be a Unicode character that
194 // would never be part of a program (can not be an identifier).
196 // This character is only tested just before the tokenizer is about to report
197 // an error; So on the regular operation mode, this addition will have no
198 // impact on the tokenizer's performance.
201 public const int EvalStatementParserCharacter = 0x2190; // Unicode Left Arrow
202 public const int EvalCompilationUnitParserCharacter = 0x2191; // Unicode Arrow
203 public const int EvalUsingDeclarationsParserCharacter = 0x2192; // Unicode Arrow
206 // XML documentation buffer. The save point is used to divide
207 // comments on types and comments on members.
209 StringBuilder xml_comment_buffer;
212 // See comment on XmlCommentState enumeration.
214 XmlCommentState xml_doc_state = XmlCommentState.Allowed;
217 // Whether tokens have been seen on this line
219 bool tokens_seen = false;
222 // Set to true once the GENERATE_COMPLETION token has bee
223 // returned. This helps produce one GENERATE_COMPLETION,
224 // as many COMPLETE_COMPLETION as necessary to complete the
225 // AST tree and one final EOF.
227 bool generated;
230 // Whether a token has been seen on the file
231 // This is needed because `define' is not allowed to be used
232 // after a token has been seen.
234 bool any_token_seen = false;
236 static readonly char[] simple_whitespaces = new char[] { ' ', '\t' };
238 public bool PropertyParsing {
239 get { return handle_get_set; }
240 set { handle_get_set = value; }
243 public bool EventParsing {
244 get { return handle_remove_add; }
245 set { handle_remove_add = value; }
248 public bool ConstraintsParsing {
249 get { return handle_where; }
250 set { handle_where = value; }
253 public bool TypeOfParsing {
254 get { return handle_typeof; }
255 set { handle_typeof = value; }
258 public int TabSize {
259 get { return tab_size; }
260 set { tab_size = value; }
263 public XmlCommentState doc_state {
264 get { return xml_doc_state; }
265 set {
266 if (value == XmlCommentState.Allowed) {
267 check_incorrect_doc_comment ();
268 reset_doc_comment ();
270 xml_doc_state = value;
275 // This is used to trigger completion generation on the parser
276 public bool CompleteOnEOF;
278 void AddEscapedIdentifier (Location loc)
280 if (escaped_identifiers == null)
281 escaped_identifiers = new List<Location> ();
283 escaped_identifiers.Add (loc);
286 public bool IsEscapedIdentifier (Location loc)
288 if (escaped_identifiers != null) {
289 foreach (Location lt in escaped_identifiers)
290 if (lt.Equals (loc))
291 return true;
294 return false;
298 // Class variables
300 static KeywordEntry[][] keywords;
301 static Dictionary<string, object> keyword_strings; // TODO: HashSet
302 static NumberStyles styles;
303 static NumberFormatInfo csharp_format_info;
306 // Values for the associated token returned
308 internal int putback_char; // Used by repl only
309 object val;
312 // Pre-processor
314 const int TAKING = 1;
315 const int ELSE_SEEN = 4;
316 const int PARENT_TAKING = 8;
317 const int REGION = 16;
320 // pre-processor if stack state:
322 Stack<int> ifstack;
324 static System.Text.StringBuilder string_builder;
326 const int max_id_size = 512;
327 static char [] id_builder = new char [max_id_size];
329 public static Dictionary<char[], string>[] identifiers = new Dictionary<char[], string>[max_id_size + 1];
331 const int max_number_size = 512;
332 static char [] number_builder = new char [max_number_size];
333 static int number_pos;
335 static StringBuilder static_cmd_arg = new System.Text.StringBuilder ();
338 // Details about the error encoutered by the tokenizer
340 string error_details;
342 public string error {
343 get {
344 return error_details;
348 public int Line {
349 get {
350 return ref_line;
355 // This is used when the tokenizer needs to save
356 // the current position as it needs to do some parsing
357 // on its own to deamiguate a token in behalf of the
358 // parser.
360 Stack<Position> position_stack = new Stack<Position> (2);
362 class Position {
363 public int position;
364 public int line;
365 public int ref_line;
366 public int col;
367 public bool hidden;
368 public int putback_char;
369 public int previous_col;
370 public Stack<int> ifstack;
371 public int parsing_generic_less_than;
372 public int current_token;
373 public object val;
375 public Position (Tokenizer t)
377 position = t.reader.Position;
378 line = t.line;
379 ref_line = t.ref_line;
380 col = t.col;
381 hidden = t.hidden;
382 putback_char = t.putback_char;
383 previous_col = t.previous_col;
384 if (t.ifstack != null && t.ifstack.Count != 0) {
385 // There is no simple way to clone Stack<T> all
386 // methods reverse the order
387 var clone = t.ifstack.ToArray ();
388 Array.Reverse (clone);
389 ifstack = new Stack<int> (clone);
391 parsing_generic_less_than = t.parsing_generic_less_than;
392 current_token = t.current_token;
393 val = t.val;
397 public void PushPosition ()
399 position_stack.Push (new Position (this));
402 public void PopPosition ()
404 Position p = position_stack.Pop ();
406 reader.Position = p.position;
407 ref_line = p.ref_line;
408 line = p.line;
409 col = p.col;
410 hidden = p.hidden;
411 putback_char = p.putback_char;
412 previous_col = p.previous_col;
413 ifstack = p.ifstack;
414 parsing_generic_less_than = p.parsing_generic_less_than;
415 current_token = p.current_token;
416 val = p.val;
419 // Do not reset the position, ignore it.
420 public void DiscardPosition ()
422 position_stack.Pop ();
425 static void AddKeyword (string kw, int token)
427 keyword_strings.Add (kw, null);
429 int length = kw.Length;
430 if (keywords [length] == null) {
431 keywords [length] = new KeywordEntry ['z' - '_' + 1];
434 int char_index = kw [0] - '_';
435 KeywordEntry kwe = keywords [length] [char_index];
436 if (kwe == null) {
437 keywords [length] [char_index] = new KeywordEntry (kw, token);
438 return;
441 while (kwe.Next != null) {
442 kwe = kwe.Next;
445 kwe.Next = new KeywordEntry (kw, token);
448 static void InitTokens ()
450 keyword_strings = new Dictionary<string, object> ();
452 // 11 is the length of the longest keyword for now
453 keywords = new KeywordEntry [11] [];
455 AddKeyword ("__arglist", Token.ARGLIST);
456 AddKeyword ("abstract", Token.ABSTRACT);
457 AddKeyword ("as", Token.AS);
458 AddKeyword ("add", Token.ADD);
459 AddKeyword ("base", Token.BASE);
460 AddKeyword ("bool", Token.BOOL);
461 AddKeyword ("break", Token.BREAK);
462 AddKeyword ("byte", Token.BYTE);
463 AddKeyword ("case", Token.CASE);
464 AddKeyword ("catch", Token.CATCH);
465 AddKeyword ("char", Token.CHAR);
466 AddKeyword ("checked", Token.CHECKED);
467 AddKeyword ("class", Token.CLASS);
468 AddKeyword ("const", Token.CONST);
469 AddKeyword ("continue", Token.CONTINUE);
470 AddKeyword ("decimal", Token.DECIMAL);
471 AddKeyword ("default", Token.DEFAULT);
472 AddKeyword ("delegate", Token.DELEGATE);
473 AddKeyword ("do", Token.DO);
474 AddKeyword ("double", Token.DOUBLE);
475 AddKeyword ("else", Token.ELSE);
476 AddKeyword ("enum", Token.ENUM);
477 AddKeyword ("event", Token.EVENT);
478 AddKeyword ("explicit", Token.EXPLICIT);
479 AddKeyword ("extern", Token.EXTERN);
480 AddKeyword ("false", Token.FALSE);
481 AddKeyword ("finally", Token.FINALLY);
482 AddKeyword ("fixed", Token.FIXED);
483 AddKeyword ("float", Token.FLOAT);
484 AddKeyword ("for", Token.FOR);
485 AddKeyword ("foreach", Token.FOREACH);
486 AddKeyword ("goto", Token.GOTO);
487 AddKeyword ("get", Token.GET);
488 AddKeyword ("if", Token.IF);
489 AddKeyword ("implicit", Token.IMPLICIT);
490 AddKeyword ("in", Token.IN);
491 AddKeyword ("int", Token.INT);
492 AddKeyword ("interface", Token.INTERFACE);
493 AddKeyword ("internal", Token.INTERNAL);
494 AddKeyword ("is", Token.IS);
495 AddKeyword ("lock", Token.LOCK);
496 AddKeyword ("long", Token.LONG);
497 AddKeyword ("namespace", Token.NAMESPACE);
498 AddKeyword ("new", Token.NEW);
499 AddKeyword ("null", Token.NULL);
500 AddKeyword ("object", Token.OBJECT);
501 AddKeyword ("operator", Token.OPERATOR);
502 AddKeyword ("out", Token.OUT);
503 AddKeyword ("override", Token.OVERRIDE);
504 AddKeyword ("params", Token.PARAMS);
505 AddKeyword ("private", Token.PRIVATE);
506 AddKeyword ("protected", Token.PROTECTED);
507 AddKeyword ("public", Token.PUBLIC);
508 AddKeyword ("readonly", Token.READONLY);
509 AddKeyword ("ref", Token.REF);
510 AddKeyword ("remove", Token.REMOVE);
511 AddKeyword ("return", Token.RETURN);
512 AddKeyword ("sbyte", Token.SBYTE);
513 AddKeyword ("sealed", Token.SEALED);
514 AddKeyword ("set", Token.SET);
515 AddKeyword ("short", Token.SHORT);
516 AddKeyword ("sizeof", Token.SIZEOF);
517 AddKeyword ("stackalloc", Token.STACKALLOC);
518 AddKeyword ("static", Token.STATIC);
519 AddKeyword ("string", Token.STRING);
520 AddKeyword ("struct", Token.STRUCT);
521 AddKeyword ("switch", Token.SWITCH);
522 AddKeyword ("this", Token.THIS);
523 AddKeyword ("throw", Token.THROW);
524 AddKeyword ("true", Token.TRUE);
525 AddKeyword ("try", Token.TRY);
526 AddKeyword ("typeof", Token.TYPEOF);
527 AddKeyword ("uint", Token.UINT);
528 AddKeyword ("ulong", Token.ULONG);
529 AddKeyword ("unchecked", Token.UNCHECKED);
530 AddKeyword ("unsafe", Token.UNSAFE);
531 AddKeyword ("ushort", Token.USHORT);
532 AddKeyword ("using", Token.USING);
533 AddKeyword ("virtual", Token.VIRTUAL);
534 AddKeyword ("void", Token.VOID);
535 AddKeyword ("volatile", Token.VOLATILE);
536 AddKeyword ("while", Token.WHILE);
537 AddKeyword ("partial", Token.PARTIAL);
538 AddKeyword ("where", Token.WHERE);
540 // LINQ keywords
541 AddKeyword ("from", Token.FROM);
542 AddKeyword ("join", Token.JOIN);
543 AddKeyword ("on", Token.ON);
544 AddKeyword ("equals", Token.EQUALS);
545 AddKeyword ("select", Token.SELECT);
546 AddKeyword ("group", Token.GROUP);
547 AddKeyword ("by", Token.BY);
548 AddKeyword ("let", Token.LET);
549 AddKeyword ("orderby", Token.ORDERBY);
550 AddKeyword ("ascending", Token.ASCENDING);
551 AddKeyword ("descending", Token.DESCENDING);
552 AddKeyword ("into", Token.INTO);
556 // Class initializer
558 static Tokenizer ()
560 InitTokens ();
561 csharp_format_info = NumberFormatInfo.InvariantInfo;
562 styles = NumberStyles.Float;
564 string_builder = new System.Text.StringBuilder ();
567 int GetKeyword (char[] id, int id_len)
570 // Keywords are stored in an array of arrays grouped by their
571 // length and then by the first character
573 if (id_len >= keywords.Length || keywords [id_len] == null)
574 return -1;
576 int first_index = id [0] - '_';
577 if (first_index > 'z')
578 return -1;
580 KeywordEntry kwe = keywords [id_len] [first_index];
581 if (kwe == null)
582 return -1;
584 int res;
585 do {
586 res = kwe.Token;
587 for (int i = 1; i < id_len; ++i) {
588 if (id [i] != kwe.Value [i]) {
589 res = 0;
590 kwe = kwe.Next;
591 break;
594 } while (res == 0 && kwe != null);
596 if (res == 0)
597 return -1;
599 int next_token;
600 switch (res) {
601 case Token.GET:
602 case Token.SET:
603 if (!handle_get_set)
604 res = -1;
605 break;
606 case Token.REMOVE:
607 case Token.ADD:
608 if (!handle_remove_add)
609 res = -1;
610 break;
611 case Token.EXTERN:
612 if (parsing_declaration == 0)
613 res = Token.EXTERN_ALIAS;
614 break;
615 case Token.DEFAULT:
616 if (peek_token () == Token.COLON) {
617 token ();
618 res = Token.DEFAULT_COLON;
620 break;
621 case Token.WHERE:
622 if (!handle_where && !query_parsing)
623 res = -1;
624 break;
625 case Token.FROM:
627 // A query expression is any expression that starts with `from identifier'
628 // followed by any token except ; , =
630 if (!query_parsing) {
631 if (lambda_arguments_parsing) {
632 res = -1;
633 break;
636 PushPosition ();
637 // HACK: to disable generics micro-parser, because PushPosition does not
638 // store identifiers array
639 parsing_generic_less_than = 1;
640 switch (xtoken ()) {
641 case Token.IDENTIFIER:
642 case Token.INT:
643 case Token.BOOL:
644 case Token.BYTE:
645 case Token.CHAR:
646 case Token.DECIMAL:
647 case Token.FLOAT:
648 case Token.LONG:
649 case Token.OBJECT:
650 case Token.STRING:
651 case Token.UINT:
652 case Token.ULONG:
653 next_token = xtoken ();
654 if (next_token == Token.SEMICOLON || next_token == Token.COMMA || next_token == Token.EQUALS)
655 goto default;
657 res = Token.FROM_FIRST;
658 query_parsing = true;
659 if (RootContext.Version <= LanguageVersion.ISO_2)
660 Report.FeatureIsNotAvailable (Location, "query expressions");
661 break;
662 case Token.VOID:
663 Expression.Error_VoidInvalidInTheContext (Location, Report);
664 break;
665 default:
666 PopPosition ();
667 // HACK: A token is not a keyword so we need to restore identifiers buffer
668 // which has been overwritten before we grabbed the identifier
669 id_builder [0] = 'f'; id_builder [1] = 'r'; id_builder [2] = 'o'; id_builder [3] = 'm';
670 return -1;
672 PopPosition ();
674 break;
675 case Token.JOIN:
676 case Token.ON:
677 case Token.EQUALS:
678 case Token.SELECT:
679 case Token.GROUP:
680 case Token.BY:
681 case Token.LET:
682 case Token.ORDERBY:
683 case Token.ASCENDING:
684 case Token.DESCENDING:
685 case Token.INTO:
686 if (!query_parsing)
687 res = -1;
688 break;
690 case Token.USING:
691 case Token.NAMESPACE:
692 // TODO: some explanation needed
693 check_incorrect_doc_comment ();
694 break;
696 case Token.PARTIAL:
697 if (parsing_block > 0) {
698 res = -1;
699 break;
702 // Save current position and parse next token.
703 PushPosition ();
705 next_token = token ();
706 bool ok = (next_token == Token.CLASS) ||
707 (next_token == Token.STRUCT) ||
708 (next_token == Token.INTERFACE) ||
709 (next_token == Token.VOID);
711 PopPosition ();
713 if (ok) {
714 if (next_token == Token.VOID) {
715 if (RootContext.Version == LanguageVersion.ISO_1 ||
716 RootContext.Version == LanguageVersion.ISO_2)
717 Report.FeatureIsNotAvailable (Location, "partial methods");
718 } else if (RootContext.Version == LanguageVersion.ISO_1)
719 Report.FeatureIsNotAvailable (Location, "partial types");
721 return res;
724 if (next_token < Token.LAST_KEYWORD) {
725 Report.Error (267, Location,
726 "The `partial' modifier can be used only immediately before `class', `struct', `interface', or `void' keyword");
727 return token ();
730 res = -1;
731 break;
734 return res;
737 public Location Location {
738 get {
739 return new Location (ref_line, hidden ? -1 : col);
743 public Tokenizer (SeekableStreamReader input, CompilationUnit file, CompilerContext ctx)
745 this.ref_name = file;
746 this.file_name = file;
747 this.context = ctx;
748 reader = input;
750 putback_char = -1;
752 xml_comment_buffer = new StringBuilder ();
754 if (Environment.OSVersion.Platform == PlatformID.Win32NT)
755 tab_size = 4;
756 else
757 tab_size = 8;
760 // FIXME: This could be `Location.Push' but we have to
761 // find out why the MS compiler allows this
763 Mono.CSharp.Location.Push (file, file);
766 static bool is_identifier_start_character (int c)
768 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || Char.IsLetter ((char)c);
771 static bool is_identifier_part_character (char c)
773 if (c >= 'a' && c <= 'z')
774 return true;
776 if (c >= 'A' && c <= 'Z')
777 return true;
779 if (c == '_' || (c >= '0' && c <= '9'))
780 return true;
782 if (c < 0x80)
783 return false;
785 return Char.IsLetter (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation;
788 public static bool IsKeyword (string s)
790 return keyword_strings.ContainsKey (s);
794 // Open parens micro parser. Detects both lambda and cast ambiguity.
796 int TokenizeOpenParens ()
798 int ptoken;
799 current_token = -1;
801 int bracket_level = 0;
802 bool is_type = false;
803 bool can_be_type = false;
805 while (true) {
806 ptoken = current_token;
807 token ();
809 switch (current_token) {
810 case Token.CLOSE_PARENS:
811 token ();
814 // Expression inside parens is lambda, (int i) =>
816 if (current_token == Token.ARROW)
817 return Token.OPEN_PARENS_LAMBDA;
820 // Expression inside parens is single type, (int[])
822 if (is_type)
823 return Token.OPEN_PARENS_CAST;
826 // Expression is possible cast, look at next token, (T)null
828 if (can_be_type) {
829 switch (current_token) {
830 case Token.OPEN_PARENS:
831 case Token.BANG:
832 case Token.TILDE:
833 case Token.IDENTIFIER:
834 case Token.LITERAL:
835 case Token.BASE:
836 case Token.CHECKED:
837 case Token.DELEGATE:
838 case Token.FALSE:
839 case Token.FIXED:
840 case Token.NEW:
841 case Token.NULL:
842 case Token.SIZEOF:
843 case Token.THIS:
844 case Token.THROW:
845 case Token.TRUE:
846 case Token.TYPEOF:
847 case Token.UNCHECKED:
848 case Token.UNSAFE:
849 case Token.DEFAULT:
852 // These can be part of a member access
854 case Token.INT:
855 case Token.UINT:
856 case Token.SHORT:
857 case Token.USHORT:
858 case Token.LONG:
859 case Token.ULONG:
860 case Token.DOUBLE:
861 case Token.FLOAT:
862 case Token.CHAR:
863 case Token.BYTE:
864 case Token.DECIMAL:
865 case Token.BOOL:
866 return Token.OPEN_PARENS_CAST;
869 return Token.OPEN_PARENS;
871 case Token.DOT:
872 case Token.DOUBLE_COLON:
873 if (ptoken != Token.IDENTIFIER && ptoken != Token.OP_GENERICS_GT)
874 goto default;
876 continue;
878 case Token.IDENTIFIER:
879 switch (ptoken) {
880 case Token.DOT:
881 case Token.OP_GENERICS_LT:
882 case Token.COMMA:
883 case Token.DOUBLE_COLON:
884 case -1:
885 if (bracket_level == 0)
886 can_be_type = true;
887 continue;
888 default:
889 can_be_type = is_type = false;
890 continue;
893 case Token.OBJECT:
894 case Token.STRING:
895 case Token.BOOL:
896 case Token.DECIMAL:
897 case Token.FLOAT:
898 case Token.DOUBLE:
899 case Token.SBYTE:
900 case Token.BYTE:
901 case Token.SHORT:
902 case Token.USHORT:
903 case Token.INT:
904 case Token.UINT:
905 case Token.LONG:
906 case Token.ULONG:
907 case Token.CHAR:
908 case Token.VOID:
909 if (bracket_level == 0)
910 is_type = true;
911 continue;
913 case Token.COMMA:
914 if (bracket_level == 0) {
915 bracket_level = 100;
916 can_be_type = is_type = false;
918 continue;
920 case Token.OP_GENERICS_LT:
921 case Token.OPEN_BRACKET:
922 if (bracket_level++ == 0)
923 is_type = true;
924 continue;
926 case Token.OP_GENERICS_GT:
927 case Token.CLOSE_BRACKET:
928 --bracket_level;
929 continue;
931 case Token.INTERR_NULLABLE:
932 case Token.STAR:
933 if (bracket_level == 0)
934 is_type = true;
935 continue;
937 case Token.REF:
938 case Token.OUT:
939 can_be_type = is_type = false;
940 continue;
942 default:
943 return Token.OPEN_PARENS;
948 public static bool IsValidIdentifier (string s)
950 if (s == null || s.Length == 0)
951 return false;
953 if (!is_identifier_start_character (s [0]))
954 return false;
956 for (int i = 1; i < s.Length; i ++)
957 if (! is_identifier_part_character (s [i]))
958 return false;
960 return true;
963 bool parse_less_than ()
965 start:
966 int the_token = token ();
967 if (the_token == Token.OPEN_BRACKET) {
968 do {
969 the_token = token ();
970 } while (the_token != Token.CLOSE_BRACKET);
971 the_token = token ();
972 } else if (the_token == Token.IN || the_token == Token.OUT) {
973 the_token = token ();
975 switch (the_token) {
976 case Token.IDENTIFIER:
977 case Token.OBJECT:
978 case Token.STRING:
979 case Token.BOOL:
980 case Token.DECIMAL:
981 case Token.FLOAT:
982 case Token.DOUBLE:
983 case Token.SBYTE:
984 case Token.BYTE:
985 case Token.SHORT:
986 case Token.USHORT:
987 case Token.INT:
988 case Token.UINT:
989 case Token.LONG:
990 case Token.ULONG:
991 case Token.CHAR:
992 case Token.VOID:
993 break;
994 case Token.OP_GENERICS_GT:
995 return true;
997 default:
998 return false;
1000 again:
1001 the_token = token ();
1003 if (the_token == Token.OP_GENERICS_GT)
1004 return true;
1005 else if (the_token == Token.COMMA || the_token == Token.DOT || the_token == Token.DOUBLE_COLON)
1006 goto start;
1007 else if (the_token == Token.INTERR_NULLABLE || the_token == Token.STAR)
1008 goto again;
1009 else if (the_token == Token.OP_GENERICS_LT) {
1010 if (!parse_less_than ())
1011 return false;
1012 goto again;
1013 } else if (the_token == Token.OPEN_BRACKET) {
1014 rank_specifiers:
1015 the_token = token ();
1016 if (the_token == Token.CLOSE_BRACKET)
1017 goto again;
1018 else if (the_token == Token.COMMA)
1019 goto rank_specifiers;
1020 return false;
1023 return false;
1026 bool parse_generic_dimension (out int dimension)
1028 dimension = 1;
1030 again:
1031 int the_token = token ();
1032 if (the_token == Token.OP_GENERICS_GT)
1033 return true;
1034 else if (the_token == Token.COMMA) {
1035 dimension++;
1036 goto again;
1039 return false;
1042 public int peek_token ()
1044 int the_token;
1046 PushPosition ();
1047 the_token = token ();
1048 PopPosition ();
1050 return the_token;
1054 // Tonizes `?' using custom disambiguous rules to return one
1055 // of following tokens: INTERR_NULLABLE, OP_COALESCING, INTERR
1057 // Tricky expression look like:
1059 // Foo ? a = x ? b : c;
1061 int TokenizePossibleNullableType ()
1063 if (parsing_block == 0 || parsing_type > 0)
1064 return Token.INTERR_NULLABLE;
1066 int d = peek_char ();
1067 if (d == '?') {
1068 get_char ();
1069 return Token.OP_COALESCING;
1072 switch (current_token) {
1073 case Token.CLOSE_PARENS:
1074 case Token.TRUE:
1075 case Token.FALSE:
1076 case Token.NULL:
1077 case Token.LITERAL:
1078 return Token.INTERR;
1081 if (d != ' ') {
1082 if (d == ',' || d == ';' || d == '>')
1083 return Token.INTERR_NULLABLE;
1084 if (d == '*' || (d >= '0' && d <= '9'))
1085 return Token.INTERR;
1088 PushPosition ();
1089 current_token = Token.NONE;
1090 int next_token;
1091 switch (xtoken ()) {
1092 case Token.LITERAL:
1093 case Token.TRUE:
1094 case Token.FALSE:
1095 case Token.NULL:
1096 case Token.THIS:
1097 case Token.NEW:
1098 next_token = Token.INTERR;
1099 break;
1101 case Token.SEMICOLON:
1102 case Token.COMMA:
1103 case Token.CLOSE_PARENS:
1104 case Token.OPEN_BRACKET:
1105 case Token.OP_GENERICS_GT:
1106 next_token = Token.INTERR_NULLABLE;
1107 break;
1109 default:
1110 next_token = -1;
1111 break;
1114 if (next_token == -1) {
1115 switch (xtoken ()) {
1116 case Token.COMMA:
1117 case Token.SEMICOLON:
1118 case Token.OPEN_BRACE:
1119 case Token.CLOSE_PARENS:
1120 case Token.IN:
1121 next_token = Token.INTERR_NULLABLE;
1122 break;
1124 case Token.COLON:
1125 next_token = Token.INTERR;
1126 break;
1128 default:
1129 int ntoken;
1130 int interrs = 1;
1131 int colons = 0;
1133 // All shorcuts failed, do it hard way
1135 while ((ntoken = xtoken ()) != Token.EOF) {
1136 if (ntoken == Token.SEMICOLON)
1137 break;
1139 if (ntoken == Token.COLON) {
1140 if (++colons == interrs)
1141 break;
1142 continue;
1145 if (ntoken == Token.INTERR) {
1146 ++interrs;
1147 continue;
1151 next_token = colons != interrs ? Token.INTERR_NULLABLE : Token.INTERR;
1152 break;
1156 PopPosition ();
1157 return next_token;
1160 bool decimal_digits (int c)
1162 int d;
1163 bool seen_digits = false;
1165 if (c != -1){
1166 if (number_pos == max_number_size)
1167 Error_NumericConstantTooLong ();
1168 number_builder [number_pos++] = (char) c;
1172 // We use peek_char2, because decimal_digits needs to do a
1173 // 2-character look-ahead (5.ToString for example).
1175 while ((d = peek_char2 ()) != -1){
1176 if (d >= '0' && d <= '9'){
1177 if (number_pos == max_number_size)
1178 Error_NumericConstantTooLong ();
1179 number_builder [number_pos++] = (char) d;
1180 get_char ();
1181 seen_digits = true;
1182 } else
1183 break;
1186 return seen_digits;
1189 static bool is_hex (int e)
1191 return (e >= '0' && e <= '9') || (e >= 'A' && e <= 'F') || (e >= 'a' && e <= 'f');
1194 static TypeCode real_type_suffix (int c)
1196 switch (c){
1197 case 'F': case 'f':
1198 return TypeCode.Single;
1199 case 'D': case 'd':
1200 return TypeCode.Double;
1201 case 'M': case 'm':
1202 return TypeCode.Decimal;
1203 default:
1204 return TypeCode.Empty;
1208 int integer_type_suffix (ulong ul, int c)
1210 bool is_unsigned = false;
1211 bool is_long = false;
1213 if (c != -1){
1214 bool scanning = true;
1215 do {
1216 switch (c){
1217 case 'U': case 'u':
1218 if (is_unsigned)
1219 scanning = false;
1220 is_unsigned = true;
1221 get_char ();
1222 break;
1224 case 'l':
1225 if (!is_unsigned){
1227 // if we have not seen anything in between
1228 // report this error
1230 Report.Warning (78, 4, Location, "The 'l' suffix is easily confused with the digit '1' (use 'L' for clarity)");
1233 goto case 'L';
1235 case 'L':
1236 if (is_long)
1237 scanning = false;
1238 is_long = true;
1239 get_char ();
1240 break;
1242 default:
1243 scanning = false;
1244 break;
1246 c = peek_char ();
1247 } while (scanning);
1250 if (is_long && is_unsigned){
1251 val = new ULongLiteral (ul, Location);
1252 return Token.LITERAL;
1255 if (is_unsigned){
1256 // uint if possible, or ulong else.
1258 if ((ul & 0xffffffff00000000) == 0)
1259 val = new UIntLiteral ((uint) ul, Location);
1260 else
1261 val = new ULongLiteral (ul, Location);
1262 } else if (is_long){
1263 // long if possible, ulong otherwise
1264 if ((ul & 0x8000000000000000) != 0)
1265 val = new ULongLiteral (ul, Location);
1266 else
1267 val = new LongLiteral ((long) ul, Location);
1268 } else {
1269 // int, uint, long or ulong in that order
1270 if ((ul & 0xffffffff00000000) == 0){
1271 uint ui = (uint) ul;
1273 if ((ui & 0x80000000) != 0)
1274 val = new UIntLiteral (ui, Location);
1275 else
1276 val = new IntLiteral ((int) ui, Location);
1277 } else {
1278 if ((ul & 0x8000000000000000) != 0)
1279 val = new ULongLiteral (ul, Location);
1280 else
1281 val = new LongLiteral ((long) ul, Location);
1284 return Token.LITERAL;
1288 // given `c' as the next char in the input decide whether
1289 // we need to convert to a special type, and then choose
1290 // the best representation for the integer
1292 int adjust_int (int c)
1294 try {
1295 if (number_pos > 9){
1296 ulong ul = (uint) (number_builder [0] - '0');
1298 for (int i = 1; i < number_pos; i++){
1299 ul = checked ((ul * 10) + ((uint)(number_builder [i] - '0')));
1301 return integer_type_suffix (ul, c);
1302 } else {
1303 uint ui = (uint) (number_builder [0] - '0');
1305 for (int i = 1; i < number_pos; i++){
1306 ui = checked ((ui * 10) + ((uint)(number_builder [i] - '0')));
1308 return integer_type_suffix (ui, c);
1310 } catch (OverflowException) {
1311 error_details = "Integral constant is too large";
1312 Report.Error (1021, Location, error_details);
1313 val = new IntLiteral (0, Location);
1314 return Token.LITERAL;
1316 catch (FormatException) {
1317 Report.Error (1013, Location, "Invalid number");
1318 val = new IntLiteral (0, Location);
1319 return Token.LITERAL;
1323 int adjust_real (TypeCode t)
1325 string s = new String (number_builder, 0, number_pos);
1326 const string error_details = "Floating-point constant is outside the range of type `{0}'";
1328 switch (t){
1329 case TypeCode.Decimal:
1330 try {
1331 val = new DecimalLiteral (decimal.Parse (s, styles, csharp_format_info), Location);
1332 } catch (OverflowException) {
1333 val = new DecimalLiteral (0, Location);
1334 Report.Error (594, Location, error_details, "decimal");
1336 break;
1337 case TypeCode.Single:
1338 try {
1339 val = new FloatLiteral (float.Parse (s, styles, csharp_format_info), Location);
1340 } catch (OverflowException) {
1341 val = new FloatLiteral (0, Location);
1342 Report.Error (594, Location, error_details, "float");
1344 break;
1345 default:
1346 try {
1347 val = new DoubleLiteral (double.Parse (s, styles, csharp_format_info), Location);
1348 } catch (OverflowException) {
1349 val = new DoubleLiteral (0, Location);
1350 Report.Error (594, Location, error_details, "double");
1352 break;
1355 return Token.LITERAL;
1358 int handle_hex ()
1360 int d;
1361 ulong ul;
1363 get_char ();
1364 while ((d = peek_char ()) != -1){
1365 if (is_hex (d)){
1366 number_builder [number_pos++] = (char) d;
1367 get_char ();
1368 } else
1369 break;
1372 string s = new String (number_builder, 0, number_pos);
1373 try {
1374 if (number_pos <= 8)
1375 ul = System.UInt32.Parse (s, NumberStyles.HexNumber);
1376 else
1377 ul = System.UInt64.Parse (s, NumberStyles.HexNumber);
1378 } catch (OverflowException){
1379 error_details = "Integral constant is too large";
1380 Report.Error (1021, Location, error_details);
1381 val = new IntLiteral (0, Location);
1382 return Token.LITERAL;
1384 catch (FormatException) {
1385 Report.Error (1013, Location, "Invalid number");
1386 val = new IntLiteral (0, Location);
1387 return Token.LITERAL;
1390 return integer_type_suffix (ul, peek_char ());
1394 // Invoked if we know we have .digits or digits
1396 int is_number (int c)
1398 bool is_real = false;
1400 number_pos = 0;
1402 if (c >= '0' && c <= '9'){
1403 if (c == '0'){
1404 int peek = peek_char ();
1406 if (peek == 'x' || peek == 'X')
1407 return handle_hex ();
1409 decimal_digits (c);
1410 c = get_char ();
1414 // We need to handle the case of
1415 // "1.1" vs "1.string" (LITERAL_FLOAT vs NUMBER DOT IDENTIFIER)
1417 if (c == '.'){
1418 if (decimal_digits ('.')){
1419 is_real = true;
1420 c = get_char ();
1421 } else {
1422 putback ('.');
1423 number_pos--;
1424 return adjust_int (-1);
1428 if (c == 'e' || c == 'E'){
1429 is_real = true;
1430 if (number_pos == max_number_size)
1431 Error_NumericConstantTooLong ();
1432 number_builder [number_pos++] = 'e';
1433 c = get_char ();
1435 if (c == '+'){
1436 if (number_pos == max_number_size)
1437 Error_NumericConstantTooLong ();
1438 number_builder [number_pos++] = '+';
1439 c = -1;
1440 } else if (c == '-') {
1441 if (number_pos == max_number_size)
1442 Error_NumericConstantTooLong ();
1443 number_builder [number_pos++] = '-';
1444 c = -1;
1445 } else {
1446 if (number_pos == max_number_size)
1447 Error_NumericConstantTooLong ();
1448 number_builder [number_pos++] = '+';
1451 decimal_digits (c);
1452 c = get_char ();
1455 var type = real_type_suffix (c);
1456 if (type == TypeCode.Empty && !is_real){
1457 putback (c);
1458 return adjust_int (c);
1461 is_real = true;
1463 if (type == TypeCode.Empty){
1464 putback (c);
1467 if (is_real)
1468 return adjust_real (type);
1470 throw new Exception ("Is Number should never reach this point");
1474 // Accepts exactly count (4 or 8) hex, no more no less
1476 int getHex (int count, out int surrogate, out bool error)
1478 int i;
1479 int total = 0;
1480 int c;
1481 int top = count != -1 ? count : 4;
1483 get_char ();
1484 error = false;
1485 surrogate = 0;
1486 for (i = 0; i < top; i++){
1487 c = get_char ();
1489 if (c >= '0' && c <= '9')
1490 c = (int) c - (int) '0';
1491 else if (c >= 'A' && c <= 'F')
1492 c = (int) c - (int) 'A' + 10;
1493 else if (c >= 'a' && c <= 'f')
1494 c = (int) c - (int) 'a' + 10;
1495 else {
1496 error = true;
1497 return 0;
1500 total = (total * 16) + c;
1501 if (count == -1){
1502 int p = peek_char ();
1503 if (p == -1)
1504 break;
1505 if (!is_hex ((char)p))
1506 break;
1510 if (top == 8) {
1511 if (total > 0x0010FFFF) {
1512 error = true;
1513 return 0;
1516 if (total >= 0x00010000) {
1517 surrogate = ((total - 0x00010000) % 0x0400 + 0xDC00);
1518 total = ((total - 0x00010000) / 0x0400 + 0xD800);
1522 return total;
1525 int escape (int c, out int surrogate)
1527 bool error;
1528 int d;
1529 int v;
1531 d = peek_char ();
1532 if (c != '\\') {
1533 surrogate = 0;
1534 return c;
1537 switch (d){
1538 case 'a':
1539 v = '\a'; break;
1540 case 'b':
1541 v = '\b'; break;
1542 case 'n':
1543 v = '\n'; break;
1544 case 't':
1545 v = '\t'; break;
1546 case 'v':
1547 v = '\v'; break;
1548 case 'r':
1549 v = '\r'; break;
1550 case '\\':
1551 v = '\\'; break;
1552 case 'f':
1553 v = '\f'; break;
1554 case '0':
1555 v = 0; break;
1556 case '"':
1557 v = '"'; break;
1558 case '\'':
1559 v = '\''; break;
1560 case 'x':
1561 v = getHex (-1, out surrogate, out error);
1562 if (error)
1563 goto default;
1564 return v;
1565 case 'u':
1566 case 'U':
1567 return EscapeUnicode (d, out surrogate);
1568 default:
1569 surrogate = 0;
1570 Report.Error (1009, Location, "Unrecognized escape sequence `\\{0}'", ((char)d).ToString ());
1571 return d;
1574 get_char ();
1575 surrogate = 0;
1576 return v;
1579 int EscapeUnicode (int ch, out int surrogate)
1581 bool error;
1582 if (ch == 'U') {
1583 ch = getHex (8, out surrogate, out error);
1584 } else {
1585 ch = getHex (4, out surrogate, out error);
1588 if (error)
1589 Report.Error (1009, Location, "Unrecognized escape sequence");
1591 return ch;
1594 int get_char ()
1596 int x;
1597 if (putback_char != -1) {
1598 x = putback_char;
1599 putback_char = -1;
1600 } else
1601 x = reader.Read ();
1602 if (x == '\n') {
1603 advance_line ();
1604 } else {
1605 col++;
1607 return x;
1610 void advance_line ()
1612 line++;
1613 ref_line++;
1614 previous_col = col;
1615 col = 0;
1618 int peek_char ()
1620 if (putback_char == -1)
1621 putback_char = reader.Read ();
1622 return putback_char;
1625 int peek_char2 ()
1627 if (putback_char != -1)
1628 return putback_char;
1629 return reader.Peek ();
1632 void putback (int c)
1634 if (putback_char != -1){
1635 Console.WriteLine ("Col: " + col);
1636 Console.WriteLine ("Row: " + line);
1637 Console.WriteLine ("Name: " + ref_name.Name);
1638 Console.WriteLine ("Current [{0}] putting back [{1}] ", putback_char, c);
1639 throw new Exception ("This should not happen putback on putback");
1641 if (c == '\n' || col == 0) {
1642 // It won't happen though.
1643 line--;
1644 ref_line--;
1645 col = previous_col;
1647 else
1648 col--;
1649 putback_char = c;
1652 public bool advance ()
1654 return peek_char () != -1 || CompleteOnEOF;
1657 public Object Value {
1658 get {
1659 return val;
1663 public Object value ()
1665 return val;
1668 public int token ()
1670 current_token = xtoken ();
1671 return current_token;
1674 void get_cmd_arg (out string cmd, out string arg)
1676 int c;
1678 tokens_seen = false;
1679 arg = "";
1681 // skip over white space
1682 do {
1683 c = get_char ();
1684 } while (c == '\r' || c == ' ' || c == '\t');
1686 static_cmd_arg.Length = 0;
1687 while (c != -1 && is_identifier_part_character ((char)c)) {
1688 static_cmd_arg.Append ((char)c);
1689 c = get_char ();
1690 if (c == '\\') {
1691 int peek = peek_char ();
1692 if (peek == 'U' || peek == 'u') {
1693 int surrogate;
1694 c = EscapeUnicode (c, out surrogate);
1695 if (surrogate != 0) {
1696 if (is_identifier_part_character ((char) c))
1697 static_cmd_arg.Append ((char) c);
1698 c = surrogate;
1704 cmd = static_cmd_arg.ToString ();
1706 // skip over white space
1707 while (c == '\r' || c == ' ' || c == '\t')
1708 c = get_char ();
1710 static_cmd_arg.Length = 0;
1711 int has_identifier_argument = 0;
1713 while (c != -1 && c != '\n' && c != '\r') {
1714 if (c == '\\' && has_identifier_argument >= 0) {
1715 if (has_identifier_argument != 0 || (cmd == "define" || cmd == "if" || cmd == "elif" || cmd == "undef")) {
1716 has_identifier_argument = 1;
1718 int peek = peek_char ();
1719 if (peek == 'U' || peek == 'u') {
1720 int surrogate;
1721 c = EscapeUnicode (c, out surrogate);
1722 if (surrogate != 0) {
1723 if (is_identifier_part_character ((char) c))
1724 static_cmd_arg.Append ((char) c);
1725 c = surrogate;
1728 } else {
1729 has_identifier_argument = -1;
1732 static_cmd_arg.Append ((char) c);
1733 c = get_char ();
1736 if (static_cmd_arg.Length != 0)
1737 arg = static_cmd_arg.ToString ();
1741 // Handles the #line directive
1743 bool PreProcessLine (string arg)
1745 if (arg.Length == 0)
1746 return false;
1748 if (arg == "default"){
1749 ref_line = line;
1750 ref_name = file_name;
1751 hidden = false;
1752 Location.Push (file_name, ref_name);
1753 return true;
1754 } else if (arg == "hidden"){
1755 hidden = true;
1756 return true;
1759 try {
1760 int pos;
1762 if ((pos = arg.IndexOf (' ')) != -1 && pos != 0){
1763 ref_line = System.Int32.Parse (arg.Substring (0, pos));
1764 pos++;
1766 char [] quotes = { '\"' };
1768 string name = arg.Substring (pos). Trim (quotes);
1769 ref_name = Location.LookupFile (file_name, name);
1770 file_name.AddFile (ref_name);
1771 hidden = false;
1772 Location.Push (file_name, ref_name);
1773 } else {
1774 ref_line = System.Int32.Parse (arg);
1775 hidden = false;
1777 } catch {
1778 return false;
1781 return true;
1785 // Handles #define and #undef
1787 void PreProcessDefinition (bool is_define, string ident, bool caller_is_taking)
1789 if (ident.Length == 0 || ident == "true" || ident == "false"){
1790 Report.Error (1001, Location, "Missing identifier to pre-processor directive");
1791 return;
1794 if (ident.IndexOfAny (simple_whitespaces) != -1){
1795 Error_EndLineExpected ();
1796 return;
1799 if (!is_identifier_start_character (ident [0]))
1800 Report.Error (1001, Location, "Identifier expected: {0}", ident);
1802 foreach (char c in ident.Substring (1)){
1803 if (!is_identifier_part_character (c)){
1804 Report.Error (1001, Location, "Identifier expected: {0}", ident);
1805 return;
1809 if (!caller_is_taking)
1810 return;
1812 if (is_define) {
1814 // #define ident
1816 if (RootContext.IsConditionalDefined (ident))
1817 return;
1819 file_name.AddDefine (ident);
1820 } else {
1822 // #undef ident
1824 file_name.AddUndefine (ident);
1828 static byte read_hex (string arg, int pos, out bool error)
1830 error = false;
1832 int total;
1833 char c = arg [pos];
1835 if ((c >= '0') && (c <= '9'))
1836 total = (int) c - (int) '0';
1837 else if ((c >= 'A') && (c <= 'F'))
1838 total = (int) c - (int) 'A' + 10;
1839 else if ((c >= 'a') && (c <= 'f'))
1840 total = (int) c - (int) 'a' + 10;
1841 else {
1842 error = true;
1843 return 0;
1846 total *= 16;
1847 c = arg [pos+1];
1849 if ((c >= '0') && (c <= '9'))
1850 total += (int) c - (int) '0';
1851 else if ((c >= 'A') && (c <= 'F'))
1852 total += (int) c - (int) 'A' + 10;
1853 else if ((c >= 'a') && (c <= 'f'))
1854 total += (int) c - (int) 'a' + 10;
1855 else {
1856 error = true;
1857 return 0;
1860 return (byte) total;
1863 /// <summary>
1864 /// Handles #pragma checksum
1865 /// </summary>
1866 bool PreProcessPragmaChecksum (string arg)
1868 if ((arg [0] != ' ') && (arg [0] != '\t'))
1869 return false;
1871 arg = arg.Trim (simple_whitespaces);
1872 if ((arg.Length < 2) || (arg [0] != '"'))
1873 return false;
1875 StringBuilder file_sb = new StringBuilder ();
1877 int pos = 1;
1878 char ch;
1879 while ((ch = arg [pos++]) != '"') {
1880 if (pos >= arg.Length)
1881 return false;
1883 if (ch == '\\') {
1884 if (pos+1 >= arg.Length)
1885 return false;
1886 ch = arg [pos++];
1889 file_sb.Append (ch);
1892 if ((pos+2 >= arg.Length) || ((arg [pos] != ' ') && (arg [pos] != '\t')))
1893 return false;
1895 arg = arg.Substring (pos).Trim (simple_whitespaces);
1896 if ((arg.Length < 42) || (arg [0] != '"') || (arg [1] != '{') ||
1897 (arg [10] != '-') || (arg [15] != '-') || (arg [20] != '-') ||
1898 (arg [25] != '-') || (arg [38] != '}') || (arg [39] != '"'))
1899 return false;
1901 bool error;
1902 byte[] guid_bytes = new byte [16];
1904 for (int i = 0; i < 4; i++) {
1905 guid_bytes [i] = read_hex (arg, 2+2*i, out error);
1906 if (error)
1907 return false;
1909 for (int i = 0; i < 2; i++) {
1910 guid_bytes [i+4] = read_hex (arg, 11+2*i, out error);
1911 if (error)
1912 return false;
1913 guid_bytes [i+6] = read_hex (arg, 16+2*i, out error);
1914 if (error)
1915 return false;
1916 guid_bytes [i+8] = read_hex (arg, 21+2*i, out error);
1917 if (error)
1918 return false;
1921 for (int i = 0; i < 6; i++) {
1922 guid_bytes [i+10] = read_hex (arg, 26+2*i, out error);
1923 if (error)
1924 return false;
1927 arg = arg.Substring (40).Trim (simple_whitespaces);
1928 if ((arg.Length < 34) || (arg [0] != '"') || (arg [33] != '"'))
1929 return false;
1931 byte[] checksum_bytes = new byte [16];
1932 for (int i = 0; i < 16; i++) {
1933 checksum_bytes [i] = read_hex (arg, 1+2*i, out error);
1934 if (error)
1935 return false;
1938 arg = arg.Substring (34).Trim (simple_whitespaces);
1939 if (arg.Length > 0)
1940 return false;
1942 SourceFile file = Location.LookupFile (file_name, file_sb.ToString ());
1943 file.SetChecksum (guid_bytes, checksum_bytes);
1944 ref_name.AutoGenerated = true;
1945 return true;
1948 /// <summary>
1949 /// Handles #pragma directive
1950 /// </summary>
1951 void PreProcessPragma (string arg)
1953 const string warning = "warning";
1954 const string w_disable = "warning disable";
1955 const string w_restore = "warning restore";
1956 const string checksum = "checksum";
1958 if (arg == w_disable) {
1959 Report.RegisterWarningRegion (Location).WarningDisable (Location.Row);
1960 return;
1963 if (arg == w_restore) {
1964 Report.RegisterWarningRegion (Location).WarningEnable (Location.Row);
1965 return;
1968 if (arg.StartsWith (w_disable)) {
1969 int[] codes = ParseNumbers (arg.Substring (w_disable.Length));
1970 foreach (int code in codes) {
1971 if (code != 0)
1972 Report.RegisterWarningRegion (Location).WarningDisable (Location, code, Report);
1974 return;
1977 if (arg.StartsWith (w_restore)) {
1978 int[] codes = ParseNumbers (arg.Substring (w_restore.Length));
1979 var w_table = Report.warning_ignore_table;
1980 foreach (int code in codes) {
1981 if (w_table != null && w_table.ContainsKey (code))
1982 Report.Warning (1635, 1, Location, "Cannot restore warning `CS{0:0000}' because it was disabled globally", code);
1983 Report.RegisterWarningRegion (Location).WarningEnable (Location, code, Report);
1985 return;
1988 if (arg.StartsWith (warning)) {
1989 Report.Warning (1634, 1, Location, "Expected disable or restore");
1990 return;
1993 if (arg.StartsWith (checksum)) {
1994 if (!PreProcessPragmaChecksum (arg.Substring (checksum.Length)))
1995 Warning_InvalidPragmaChecksum ();
1996 return;
1999 Report.Warning (1633, 1, Location, "Unrecognized #pragma directive");
2002 int[] ParseNumbers (string text)
2004 string[] string_array = text.Split (',');
2005 int[] values = new int [string_array.Length];
2006 int index = 0;
2007 foreach (string string_code in string_array) {
2008 try {
2009 values[index++] = int.Parse (string_code, System.Globalization.CultureInfo.InvariantCulture);
2011 catch (FormatException) {
2012 Report.Warning (1692, 1, Location, "Invalid number");
2015 return values;
2018 bool eval_val (string s)
2020 if (s == "true")
2021 return true;
2022 if (s == "false")
2023 return false;
2025 return file_name.IsConditionalDefined (s);
2028 bool pp_primary (ref string s)
2030 s = s.Trim ();
2031 int len = s.Length;
2033 if (len > 0){
2034 char c = s [0];
2036 if (c == '('){
2037 s = s.Substring (1);
2038 bool val = pp_expr (ref s, false);
2039 if (s.Length > 0 && s [0] == ')'){
2040 s = s.Substring (1);
2041 return val;
2043 Error_InvalidDirective ();
2044 return false;
2047 if (is_identifier_start_character (c)){
2048 int j = 1;
2050 while (j < len){
2051 c = s [j];
2053 if (is_identifier_part_character (c)){
2054 j++;
2055 continue;
2057 bool v = eval_val (s.Substring (0, j));
2058 s = s.Substring (j);
2059 return v;
2061 bool vv = eval_val (s);
2062 s = "";
2063 return vv;
2066 Error_InvalidDirective ();
2067 return false;
2070 bool pp_unary (ref string s)
2072 s = s.Trim ();
2073 int len = s.Length;
2075 if (len > 0){
2076 if (s [0] == '!'){
2077 if (len > 1 && s [1] == '='){
2078 Error_InvalidDirective ();
2079 return false;
2081 s = s.Substring (1);
2082 return ! pp_primary (ref s);
2083 } else
2084 return pp_primary (ref s);
2085 } else {
2086 Error_InvalidDirective ();
2087 return false;
2091 bool pp_eq (ref string s)
2093 bool va = pp_unary (ref s);
2095 s = s.Trim ();
2096 int len = s.Length;
2097 if (len > 0){
2098 if (s [0] == '='){
2099 if (len > 2 && s [1] == '='){
2100 s = s.Substring (2);
2101 return va == pp_unary (ref s);
2102 } else {
2103 Error_InvalidDirective ();
2104 return false;
2106 } else if (s [0] == '!' && len > 1 && s [1] == '='){
2107 s = s.Substring (2);
2109 return va != pp_unary (ref s);
2114 return va;
2118 bool pp_and (ref string s)
2120 bool va = pp_eq (ref s);
2122 s = s.Trim ();
2123 int len = s.Length;
2124 if (len > 0){
2125 if (s [0] == '&'){
2126 if (len > 2 && s [1] == '&'){
2127 s = s.Substring (2);
2128 return (va & pp_and (ref s));
2129 } else {
2130 Error_InvalidDirective ();
2131 return false;
2135 return va;
2139 // Evaluates an expression for `#if' or `#elif'
2141 bool pp_expr (ref string s, bool isTerm)
2143 bool va = pp_and (ref s);
2144 s = s.Trim ();
2145 int len = s.Length;
2146 if (len > 0){
2147 char c = s [0];
2149 if (c == '|'){
2150 if (len > 2 && s [1] == '|'){
2151 s = s.Substring (2);
2152 return va | pp_expr (ref s, isTerm);
2153 } else {
2154 Error_InvalidDirective ();
2155 return false;
2158 if (isTerm) {
2159 Error_EndLineExpected ();
2160 return false;
2164 return va;
2167 bool eval (string s)
2169 bool v = pp_expr (ref s, true);
2170 s = s.Trim ();
2171 if (s.Length != 0){
2172 return false;
2175 return v;
2178 void Error_NumericConstantTooLong ()
2180 Report.Error (1021, Location, "Numeric constant too long");
2183 void Error_InvalidDirective ()
2185 Report.Error (1517, Location, "Invalid preprocessor directive");
2188 void Error_UnexpectedDirective (string extra)
2190 Report.Error (
2191 1028, Location,
2192 "Unexpected processor directive ({0})", extra);
2195 void Error_TokensSeen ()
2197 Report.Error (1032, Location,
2198 "Cannot define or undefine preprocessor symbols after first token in file");
2201 void Eror_WrongPreprocessorLocation ()
2203 Report.Error (1040, Location,
2204 "Preprocessor directives must appear as the first non-whitespace character on a line");
2207 void Error_EndLineExpected ()
2209 Report.Error (1025, Location, "Single-line comment or end-of-line expected");
2212 void Warning_InvalidPragmaChecksum ()
2214 Report.Warning (1695, 1, Location,
2215 "Invalid #pragma checksum syntax; should be " +
2216 "#pragma checksum \"filename\" " +
2217 "\"{XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX}\" \"XXXX...\"");
2220 // if true, then the code continues processing the code
2221 // if false, the code stays in a loop until another directive is
2222 // reached.
2223 // When caller_is_taking is false we ignore all directives except the ones
2224 // which can help us to identify where the #if block ends
2225 bool handle_preprocessing_directive (bool caller_is_taking)
2227 string cmd, arg;
2228 bool region_directive = false;
2230 get_cmd_arg (out cmd, out arg);
2232 // Eat any trailing whitespaces and single-line comments
2233 if (arg.IndexOf ("//") != -1)
2234 arg = arg.Substring (0, arg.IndexOf ("//"));
2235 arg = arg.Trim (simple_whitespaces);
2238 // The first group of pre-processing instructions is always processed
2240 switch (cmd){
2241 case "region":
2242 region_directive = true;
2243 arg = "true";
2244 goto case "if";
2246 case "endregion":
2247 if (ifstack == null || ifstack.Count == 0){
2248 Error_UnexpectedDirective ("no #region for this #endregion");
2249 return true;
2251 int pop = ifstack.Pop ();
2253 if ((pop & REGION) == 0)
2254 Report.Error (1027, Location, "Expected `#endif' directive");
2256 return caller_is_taking;
2258 case "if":
2259 if (ifstack == null)
2260 ifstack = new Stack<int> (2);
2262 int flags = region_directive ? REGION : 0;
2263 if (ifstack.Count == 0){
2264 flags |= PARENT_TAKING;
2265 } else {
2266 int state = ifstack.Peek ();
2267 if ((state & TAKING) != 0) {
2268 flags |= PARENT_TAKING;
2272 if (caller_is_taking && eval (arg)) {
2273 ifstack.Push (flags | TAKING);
2274 return true;
2276 ifstack.Push (flags);
2277 return false;
2279 case "endif":
2280 if (ifstack == null || ifstack.Count == 0){
2281 Error_UnexpectedDirective ("no #if for this #endif");
2282 return true;
2283 } else {
2284 pop = ifstack.Pop ();
2286 if ((pop & REGION) != 0)
2287 Report.Error (1038, Location, "#endregion directive expected");
2289 if (arg.Length != 0) {
2290 Error_EndLineExpected ();
2293 if (ifstack.Count == 0)
2294 return true;
2296 int state = ifstack.Peek ();
2297 return (state & TAKING) != 0;
2300 case "elif":
2301 if (ifstack == null || ifstack.Count == 0){
2302 Error_UnexpectedDirective ("no #if for this #elif");
2303 return true;
2304 } else {
2305 int state = ifstack.Pop ();
2307 if ((state & REGION) != 0) {
2308 Report.Error (1038, Location, "#endregion directive expected");
2309 return true;
2312 if ((state & ELSE_SEEN) != 0){
2313 Error_UnexpectedDirective ("#elif not valid after #else");
2314 return true;
2317 if ((state & TAKING) != 0) {
2318 ifstack.Push (0);
2319 return false;
2322 if (eval (arg) && ((state & PARENT_TAKING) != 0)){
2323 ifstack.Push (state | TAKING);
2324 return true;
2327 ifstack.Push (state);
2328 return false;
2331 case "else":
2332 if (ifstack == null || ifstack.Count == 0){
2333 Error_UnexpectedDirective ("no #if for this #else");
2334 return true;
2335 } else {
2336 int state = ifstack.Peek ();
2338 if ((state & REGION) != 0) {
2339 Report.Error (1038, Location, "#endregion directive expected");
2340 return true;
2343 if ((state & ELSE_SEEN) != 0){
2344 Error_UnexpectedDirective ("#else within #else");
2345 return true;
2348 ifstack.Pop ();
2350 if (arg.Length != 0) {
2351 Error_EndLineExpected ();
2352 return true;
2355 bool ret = false;
2356 if ((state & PARENT_TAKING) != 0) {
2357 ret = (state & TAKING) == 0;
2359 if (ret)
2360 state |= TAKING;
2361 else
2362 state &= ~TAKING;
2365 ifstack.Push (state | ELSE_SEEN);
2367 return ret;
2369 case "define":
2370 if (any_token_seen){
2371 Error_TokensSeen ();
2372 return caller_is_taking;
2374 PreProcessDefinition (true, arg, caller_is_taking);
2375 return caller_is_taking;
2377 case "undef":
2378 if (any_token_seen){
2379 Error_TokensSeen ();
2380 return caller_is_taking;
2382 PreProcessDefinition (false, arg, caller_is_taking);
2383 return caller_is_taking;
2387 // These are only processed if we are in a `taking' block
2389 if (!caller_is_taking)
2390 return false;
2392 switch (cmd){
2393 case "error":
2394 Report.Error (1029, Location, "#error: '{0}'", arg);
2395 return true;
2397 case "warning":
2398 Report.Warning (1030, 1, Location, "#warning: `{0}'", arg);
2399 return true;
2401 case "pragma":
2402 if (RootContext.Version == LanguageVersion.ISO_1) {
2403 Report.FeatureIsNotAvailable (Location, "#pragma");
2404 return true;
2407 PreProcessPragma (arg);
2408 return true;
2410 case "line":
2411 if (!PreProcessLine (arg))
2412 Report.Error (
2413 1576, Location,
2414 "The line number specified for #line directive is missing or invalid");
2415 return caller_is_taking;
2418 Report.Error (1024, Location, "Wrong preprocessor directive");
2419 return true;
2423 private int consume_string (bool quoted)
2425 int c;
2426 string_builder.Length = 0;
2428 while ((c = get_char ()) != -1){
2429 if (c == '"'){
2430 if (quoted && peek_char () == '"'){
2431 string_builder.Append ((char) c);
2432 get_char ();
2433 continue;
2434 } else {
2435 val = new StringLiteral (string_builder.ToString (), Location);
2436 return Token.LITERAL;
2440 if (c == '\n'){
2441 if (!quoted)
2442 Report.Error (1010, Location, "Newline in constant");
2445 if (!quoted){
2446 int surrogate;
2447 c = escape (c, out surrogate);
2448 if (c == -1)
2449 return Token.ERROR;
2450 if (surrogate != 0) {
2451 string_builder.Append ((char) c);
2452 c = surrogate;
2455 string_builder.Append ((char) c);
2458 Report.Error (1039, Location, "Unterminated string literal");
2459 return Token.EOF;
2462 private int consume_identifier (int s)
2464 int res = consume_identifier (s, false);
2466 if (doc_state == XmlCommentState.Allowed)
2467 doc_state = XmlCommentState.NotAllowed;
2469 return res;
2472 int consume_identifier (int c, bool quoted)
2475 // This method is very performance sensitive. It accounts
2476 // for approximately 25% of all parser time
2479 int pos = 0;
2480 int column = col;
2482 if (c == '\\') {
2483 int surrogate;
2484 c = escape (c, out surrogate);
2485 if (surrogate != 0) {
2486 id_builder [pos++] = (char) c;
2487 c = surrogate;
2491 id_builder [pos++] = (char) c;
2493 try {
2494 while (true) {
2495 c = reader.Read ();
2497 if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9')) {
2498 id_builder [pos++] = (char) c;
2499 continue;
2502 if (c < 0x80) {
2503 if (c == '\\') {
2504 int surrogate;
2505 c = escape (c, out surrogate);
2506 if (surrogate != 0) {
2507 if (is_identifier_part_character ((char) c))
2508 id_builder[pos++] = (char) c;
2509 c = surrogate;
2512 continue;
2514 } else if (Char.IsLetter ((char) c) || Char.GetUnicodeCategory ((char) c) == UnicodeCategory.ConnectorPunctuation) {
2515 id_builder [pos++] = (char) c;
2516 continue;
2519 putback_char = c;
2520 break;
2522 } catch (IndexOutOfRangeException) {
2523 Report.Error (645, Location, "Identifier too long (limit is 512 chars)");
2524 col += pos - 1;
2525 return Token.ERROR;
2528 col += pos - 1;
2531 // Optimization: avoids doing the keyword lookup
2532 // on uppercase letters
2534 if (id_builder [0] >= '_' && !quoted) {
2535 int keyword = GetKeyword (id_builder, pos);
2536 if (keyword != -1) {
2537 val = LocatedToken.Create (null, ref_line, column);
2538 return keyword;
2543 // Keep identifiers in an array of hashtables to avoid needless
2544 // allocations
2546 var identifiers_group = identifiers [pos];
2547 string s;
2548 if (identifiers_group != null) {
2549 if (identifiers_group.TryGetValue (id_builder, out s)) {
2550 val = LocatedToken.Create (s, ref_line, column);
2551 if (quoted)
2552 AddEscapedIdentifier (((LocatedToken) val).Location);
2553 return Token.IDENTIFIER;
2555 } else {
2556 // TODO: this should be number of files dependant
2557 // corlib compilation peaks at 1000 and System.Core at 150
2558 int capacity = pos > 20 ? 10 : 100;
2559 identifiers_group = new Dictionary<char[],string> (capacity, new IdentifiersComparer (pos));
2560 identifiers [pos] = identifiers_group;
2563 char [] chars = new char [pos];
2564 Array.Copy (id_builder, chars, pos);
2566 s = new string (id_builder, 0, pos);
2567 identifiers_group.Add (chars, s);
2569 val = LocatedToken.Create (s, ref_line, column);
2570 if (quoted)
2571 AddEscapedIdentifier (((LocatedToken) val).Location);
2573 return Token.IDENTIFIER;
2576 public int xtoken ()
2578 int d, c;
2580 // Whether we have seen comments on the current line
2581 bool comments_seen = false;
2582 while ((c = get_char ()) != -1) {
2583 switch (c) {
2584 case '\t':
2585 col = ((col + tab_size) / tab_size) * tab_size;
2586 continue;
2588 case ' ':
2589 case '\f':
2590 case '\v':
2591 case 0xa0:
2592 case 0:
2593 case 0xFEFF: // Ignore BOM anywhere in the file
2594 continue;
2596 /* This is required for compatibility with .NET
2597 case 0xEF:
2598 if (peek_char () == 0xBB) {
2599 PushPosition ();
2600 get_char ();
2601 if (get_char () == 0xBF)
2602 continue;
2603 PopPosition ();
2605 break;
2607 case '\r':
2608 if (peek_char () != '\n')
2609 advance_line ();
2610 else
2611 get_char ();
2613 any_token_seen |= tokens_seen;
2614 tokens_seen = false;
2615 comments_seen = false;
2616 continue;
2618 case '\\':
2619 tokens_seen = true;
2620 return consume_identifier (c);
2622 case '{':
2623 val = LocatedToken.Create (ref_line, col);
2624 return Token.OPEN_BRACE;
2625 case '}':
2626 val = LocatedToken.Create (ref_line, col);
2627 return Token.CLOSE_BRACE;
2628 case '[':
2629 // To block doccomment inside attribute declaration.
2630 if (doc_state == XmlCommentState.Allowed)
2631 doc_state = XmlCommentState.NotAllowed;
2632 val = LocatedToken.CreateOptional (ref_line, col);
2633 return Token.OPEN_BRACKET;
2634 case ']':
2635 val = LocatedToken.CreateOptional (ref_line, col);
2636 return Token.CLOSE_BRACKET;
2637 case '(':
2638 val = LocatedToken.Create (ref_line, col);
2640 // An expression versions of parens can appear in block context only
2642 if (parsing_block != 0 && !lambda_arguments_parsing) {
2645 // Optmize most common case where we know that parens
2646 // is not special
2648 switch (current_token) {
2649 case Token.IDENTIFIER:
2650 case Token.IF:
2651 case Token.FOR:
2652 case Token.FOREACH:
2653 case Token.TYPEOF:
2654 case Token.WHILE:
2655 case Token.USING:
2656 case Token.DEFAULT:
2657 case Token.DELEGATE:
2658 case Token.OP_GENERICS_GT:
2659 return Token.OPEN_PARENS;
2662 // Optimize using peek
2663 int xx = peek_char ();
2664 switch (xx) {
2665 case '(':
2666 case '\'':
2667 case '"':
2668 case '0':
2669 case '1':
2670 return Token.OPEN_PARENS;
2673 lambda_arguments_parsing = true;
2674 PushPosition ();
2675 d = TokenizeOpenParens ();
2676 PopPosition ();
2677 lambda_arguments_parsing = false;
2678 return d;
2681 return Token.OPEN_PARENS;
2682 case ')':
2683 val = LocatedToken.CreateOptional (ref_line, col);
2684 return Token.CLOSE_PARENS;
2685 case ',':
2686 val = LocatedToken.CreateOptional (ref_line, col);
2687 return Token.COMMA;
2688 case ';':
2689 val = LocatedToken.CreateOptional (ref_line, col);
2690 return Token.SEMICOLON;
2691 case '~':
2692 val = LocatedToken.Create (ref_line, col);
2693 return Token.TILDE;
2694 case '?':
2695 val = LocatedToken.Create (ref_line, col);
2696 return TokenizePossibleNullableType ();
2697 case '<':
2698 val = LocatedToken.Create (ref_line, col);
2699 if (parsing_generic_less_than++ > 0)
2700 return Token.OP_GENERICS_LT;
2702 return TokenizeLessThan ();
2704 case '>':
2705 val = LocatedToken.Create (ref_line, col);
2706 d = peek_char ();
2708 if (d == '='){
2709 get_char ();
2710 return Token.OP_GE;
2713 if (parsing_generic_less_than > 1 || (parsing_generic_less_than == 1 && d != '>')) {
2714 parsing_generic_less_than--;
2715 return Token.OP_GENERICS_GT;
2718 if (d == '>') {
2719 get_char ();
2720 d = peek_char ();
2722 if (d == '=') {
2723 get_char ();
2724 return Token.OP_SHIFT_RIGHT_ASSIGN;
2726 return Token.OP_SHIFT_RIGHT;
2729 return Token.OP_GT;
2731 case '+':
2732 val = LocatedToken.Create (ref_line, col);
2733 d = peek_char ();
2734 if (d == '+') {
2735 d = Token.OP_INC;
2736 } else if (d == '=') {
2737 d = Token.OP_ADD_ASSIGN;
2738 } else {
2739 return Token.PLUS;
2741 get_char ();
2742 return d;
2744 case '-':
2745 val = LocatedToken.Create (ref_line, col);
2746 d = peek_char ();
2747 if (d == '-') {
2748 d = Token.OP_DEC;
2749 } else if (d == '=')
2750 d = Token.OP_SUB_ASSIGN;
2751 else if (d == '>')
2752 d = Token.OP_PTR;
2753 else {
2754 return Token.MINUS;
2756 get_char ();
2757 return d;
2759 case '!':
2760 val = LocatedToken.Create (ref_line, col);
2761 if (peek_char () == '='){
2762 get_char ();
2763 return Token.OP_NE;
2765 return Token.BANG;
2767 case '=':
2768 val = LocatedToken.Create (ref_line, col);
2769 d = peek_char ();
2770 if (d == '='){
2771 get_char ();
2772 return Token.OP_EQ;
2774 if (d == '>'){
2775 get_char ();
2776 return Token.ARROW;
2779 return Token.ASSIGN;
2781 case '&':
2782 val = LocatedToken.Create (ref_line, col);
2783 d = peek_char ();
2784 if (d == '&'){
2785 get_char ();
2786 return Token.OP_AND;
2788 if (d == '='){
2789 get_char ();
2790 return Token.OP_AND_ASSIGN;
2792 return Token.BITWISE_AND;
2794 case '|':
2795 val = LocatedToken.Create (ref_line, col);
2796 d = peek_char ();
2797 if (d == '|'){
2798 get_char ();
2799 return Token.OP_OR;
2801 if (d == '='){
2802 get_char ();
2803 return Token.OP_OR_ASSIGN;
2805 return Token.BITWISE_OR;
2807 case '*':
2808 val = LocatedToken.Create (ref_line, col);
2809 if (peek_char () == '='){
2810 get_char ();
2811 return Token.OP_MULT_ASSIGN;
2813 return Token.STAR;
2815 case '/':
2816 d = peek_char ();
2817 if (d == '='){
2818 val = LocatedToken.Create (ref_line, col);
2819 get_char ();
2820 return Token.OP_DIV_ASSIGN;
2823 // Handle double-slash comments.
2824 if (d == '/'){
2825 get_char ();
2826 if (RootContext.Documentation != null && peek_char () == '/') {
2827 get_char ();
2828 // Don't allow ////.
2829 if ((d = peek_char ()) != '/') {
2830 update_comment_location ();
2831 if (doc_state == XmlCommentState.Allowed)
2832 handle_one_line_xml_comment ();
2833 else if (doc_state == XmlCommentState.NotAllowed)
2834 warn_incorrect_doc_comment ();
2837 while ((d = get_char ()) != -1 && (d != '\n') && d != '\r');
2839 any_token_seen |= tokens_seen;
2840 tokens_seen = false;
2841 comments_seen = false;
2842 continue;
2843 } else if (d == '*'){
2844 get_char ();
2845 bool docAppend = false;
2846 if (RootContext.Documentation != null && peek_char () == '*') {
2847 get_char ();
2848 update_comment_location ();
2849 // But when it is /**/, just do nothing.
2850 if (peek_char () == '/') {
2851 get_char ();
2852 continue;
2854 if (doc_state == XmlCommentState.Allowed)
2855 docAppend = true;
2856 else if (doc_state == XmlCommentState.NotAllowed)
2857 warn_incorrect_doc_comment ();
2860 int current_comment_start = 0;
2861 if (docAppend) {
2862 current_comment_start = xml_comment_buffer.Length;
2863 xml_comment_buffer.Append (Environment.NewLine);
2866 while ((d = get_char ()) != -1){
2867 if (d == '*' && peek_char () == '/'){
2868 get_char ();
2869 comments_seen = true;
2870 break;
2872 if (docAppend)
2873 xml_comment_buffer.Append ((char) d);
2875 if (d == '\n'){
2876 any_token_seen |= tokens_seen;
2877 tokens_seen = false;
2879 // Reset 'comments_seen' just to be consistent.
2880 // It doesn't matter either way, here.
2882 comments_seen = false;
2885 if (!comments_seen)
2886 Report.Error (1035, Location, "End-of-file found, '*/' expected");
2888 if (docAppend)
2889 update_formatted_doc_comment (current_comment_start);
2890 continue;
2892 return Token.DIV;
2894 case '%':
2895 val = LocatedToken.Create (ref_line, col);
2896 if (peek_char () == '='){
2897 get_char ();
2898 return Token.OP_MOD_ASSIGN;
2900 return Token.PERCENT;
2902 case '^':
2903 val = LocatedToken.Create (ref_line, col);
2904 if (peek_char () == '='){
2905 get_char ();
2906 return Token.OP_XOR_ASSIGN;
2908 return Token.CARRET;
2910 case ':':
2911 val = LocatedToken.Create (ref_line, col);
2912 if (peek_char () == ':') {
2913 get_char ();
2914 return Token.DOUBLE_COLON;
2916 return Token.COLON;
2918 case '0': case '1': case '2': case '3': case '4':
2919 case '5': case '6': case '7': case '8': case '9':
2920 tokens_seen = true;
2921 return is_number (c);
2923 case '\n': // white space
2924 any_token_seen |= tokens_seen;
2925 tokens_seen = false;
2926 comments_seen = false;
2927 continue;
2929 case '.':
2930 tokens_seen = true;
2931 d = peek_char ();
2932 if (d >= '0' && d <= '9')
2933 return is_number (c);
2934 return Token.DOT;
2936 case '#':
2937 if (tokens_seen || comments_seen) {
2938 Eror_WrongPreprocessorLocation ();
2939 return Token.ERROR;
2942 if (handle_preprocessing_directive (true))
2943 continue;
2945 bool directive_expected = false;
2946 while ((c = get_char ()) != -1) {
2947 if (col == 1) {
2948 directive_expected = true;
2949 } else if (!directive_expected) {
2950 // TODO: Implement comment support for disabled code and uncomment this code
2951 // if (c == '#') {
2952 // Eror_WrongPreprocessorLocation ();
2953 // return Token.ERROR;
2954 // }
2955 continue;
2958 if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f' || c == '\v' )
2959 continue;
2961 if (c == '#') {
2962 if (handle_preprocessing_directive (false))
2963 break;
2965 directive_expected = false;
2968 if (c != -1) {
2969 tokens_seen = false;
2970 continue;
2973 return Token.EOF;
2975 case '"':
2976 return consume_string (false);
2978 case '\'':
2979 return TokenizeBackslash ();
2981 case '@':
2982 c = get_char ();
2983 if (c == '"') {
2984 tokens_seen = true;
2985 return consume_string (true);
2988 if (is_identifier_start_character (c)){
2989 return consume_identifier (c, true);
2992 Report.Error (1646, Location, "Keyword, identifier, or string expected after verbatim specifier: @");
2993 return Token.ERROR;
2995 case EvalStatementParserCharacter:
2996 return Token.EVAL_STATEMENT_PARSER;
2997 case EvalCompilationUnitParserCharacter:
2998 return Token.EVAL_COMPILATION_UNIT_PARSER;
2999 case EvalUsingDeclarationsParserCharacter:
3000 return Token.EVAL_USING_DECLARATIONS_UNIT_PARSER;
3003 if (is_identifier_start_character (c)) {
3004 tokens_seen = true;
3005 return consume_identifier (c);
3008 error_details = ((char)c).ToString ();
3009 return Token.ERROR;
3012 if (CompleteOnEOF){
3013 if (generated)
3014 return Token.COMPLETE_COMPLETION;
3016 generated = true;
3017 return Token.GENERATE_COMPLETION;
3021 return Token.EOF;
3024 int TokenizeBackslash ()
3026 int c = get_char ();
3027 tokens_seen = true;
3028 if (c == '\'') {
3029 error_details = "Empty character literal";
3030 Report.Error (1011, Location, error_details);
3031 return Token.ERROR;
3033 if (c == '\r' || c == '\n') {
3034 Report.Error (1010, Location, "Newline in constant");
3035 return Token.ERROR;
3038 int d;
3039 c = escape (c, out d);
3040 if (c == -1)
3041 return Token.ERROR;
3042 if (d != 0)
3043 throw new NotImplementedException ();
3045 val = new CharLiteral ((char) c, Location);
3046 c = get_char ();
3048 if (c != '\'') {
3049 Report.Error (1012, Location, "Too many characters in character literal");
3051 // Try to recover, read until newline or next "'"
3052 while ((c = get_char ()) != -1) {
3053 if (c == '\n' || c == '\'')
3054 break;
3056 return Token.ERROR;
3059 return Token.LITERAL;
3062 int TokenizeLessThan ()
3064 int d;
3065 if (handle_typeof) {
3066 PushPosition ();
3067 if (parse_generic_dimension (out d)) {
3068 val = d;
3069 DiscardPosition ();
3070 return Token.GENERIC_DIMENSION;
3072 PopPosition ();
3075 // Save current position and parse next token.
3076 PushPosition ();
3077 if (parse_less_than ()) {
3078 if (parsing_generic_declaration && token () != Token.DOT) {
3079 d = Token.OP_GENERICS_LT_DECL;
3080 } else {
3081 d = Token.OP_GENERICS_LT;
3083 PopPosition ();
3084 return d;
3087 PopPosition ();
3088 parsing_generic_less_than = 0;
3090 d = peek_char ();
3091 if (d == '<') {
3092 get_char ();
3093 d = peek_char ();
3095 if (d == '=') {
3096 get_char ();
3097 return Token.OP_SHIFT_LEFT_ASSIGN;
3099 return Token.OP_SHIFT_LEFT;
3102 if (d == '=') {
3103 get_char ();
3104 return Token.OP_LE;
3106 return Token.OP_LT;
3110 // Handles one line xml comment
3112 private void handle_one_line_xml_comment ()
3114 int c;
3115 while ((c = peek_char ()) == ' ')
3116 get_char (); // skip heading whitespaces.
3117 while ((c = peek_char ()) != -1 && c != '\n' && c != '\r') {
3118 xml_comment_buffer.Append ((char) get_char ());
3120 if (c == '\r' || c == '\n')
3121 xml_comment_buffer.Append (Environment.NewLine);
3125 // Remove heading "*" in Javadoc-like xml documentation.
3127 private void update_formatted_doc_comment (int current_comment_start)
3129 int length = xml_comment_buffer.Length - current_comment_start;
3130 string [] lines = xml_comment_buffer.ToString (
3131 current_comment_start,
3132 length).Replace ("\r", "").Split ('\n');
3134 // The first line starts with /**, thus it is not target
3135 // for the format check.
3136 for (int i = 1; i < lines.Length; i++) {
3137 string s = lines [i];
3138 int idx = s.IndexOf ('*');
3139 string head = null;
3140 if (idx < 0) {
3141 if (i < lines.Length - 1)
3142 return;
3143 head = s;
3144 } else
3145 head = s.Substring (0, idx);
3146 foreach (char c in head)
3147 if (c != ' ')
3148 return;
3149 lines [i] = s.Substring (idx + 1);
3151 xml_comment_buffer.Remove (current_comment_start, length);
3152 xml_comment_buffer.Insert (current_comment_start, String.Join (Environment.NewLine, lines));
3156 // Updates current comment location.
3158 private void update_comment_location ()
3160 if (current_comment_location.IsNull) {
3161 // "-2" is for heading "//" or "/*"
3162 current_comment_location =
3163 new Location (ref_line, hidden ? -1 : col - 2);
3168 // Checks if there was incorrect doc comments and raise
3169 // warnings.
3171 public void check_incorrect_doc_comment ()
3173 if (xml_comment_buffer.Length > 0)
3174 warn_incorrect_doc_comment ();
3178 // Raises a warning when tokenizer found incorrect doccomment
3179 // markup.
3181 private void warn_incorrect_doc_comment ()
3183 if (doc_state != XmlCommentState.Error) {
3184 doc_state = XmlCommentState.Error;
3185 // in csc, it is 'XML comment is not placed on
3186 // a valid language element'. But that does not
3187 // make sense.
3188 Report.Warning (1587, 2, Location, "XML comment is not placed on a valid language element");
3193 // Consumes the saved xml comment lines (if any)
3194 // as for current target member or type.
3196 public string consume_doc_comment ()
3198 if (xml_comment_buffer.Length > 0) {
3199 string ret = xml_comment_buffer.ToString ();
3200 reset_doc_comment ();
3201 return ret;
3203 return null;
3206 Report Report {
3207 get { return context.Report; }
3210 void reset_doc_comment ()
3212 xml_comment_buffer.Length = 0;
3213 current_comment_location = Location.Null;
3216 public void cleanup ()
3218 if (ifstack != null && ifstack.Count >= 1) {
3219 int state = ifstack.Pop ();
3220 if ((state & REGION) != 0)
3221 Report.Error (1038, Location, "#endregion directive expected");
3222 else
3223 Report.Error (1027, Location, "Expected `#endif' directive");
3229 // Indicates whether it accepts XML documentation or not.
3231 public enum XmlCommentState {
3232 // comment is allowed in this state.
3233 Allowed,
3234 // comment is not allowed in this state.
3235 NotAllowed,
3236 // once comments appeared when it is NotAllowed, then the
3237 // state is changed to it, until the state is changed to
3238 // .Allowed.
3239 Error