Implement proper ctor parameter checking
[mono-project.git] / mcs / mcs / cs-tokenizer.cs
blobdaec2f2c6b492a8b6c43e9b786067aaec5e940f4
1 //
2 // cs-tokenizer.cs: The Tokenizer for the C# compiler
3 // This also implements the preprocessor
4 //
5 // Author: Miguel de Icaza (miguel@gnu.org)
6 // Marek Safar (marek.safar@seznam.cz)
7 //
8 // Dual licensed under the terms of the MIT X11 or GNU GPL
9 //
10 // Copyright 2001, 2002 Ximian, Inc (http://www.ximian.com)
11 // Copyright 2004-2008 Novell, Inc
15 using System;
16 using System.Text;
17 using System.Collections.Generic;
18 using System.Globalization;
19 using System.Diagnostics;
21 namespace Mono.CSharp
23 /// <summary>
24 /// Tokenizer for C# source code.
25 /// </summary>
27 public class Tokenizer : yyParser.yyInput
29 class KeywordEntry<T>
31 public readonly T Token;
32 public KeywordEntry<T> Next;
33 public readonly char[] Value;
35 public KeywordEntry (string value, T token)
37 this.Value = value.ToCharArray ();
38 this.Token = token;
42 sealed class IdentifiersComparer : IEqualityComparer<char[]>
44 readonly int length;
46 public IdentifiersComparer (int length)
48 this.length = length;
51 public bool Equals (char[] x, char[] y)
53 for (int i = 0; i < length; ++i)
54 if (x [i] != y [i])
55 return false;
57 return true;
60 public int GetHashCode (char[] obj)
62 int h = 0;
63 for (int i = 0; i < length; ++i)
64 h = (h << 5) - h + obj [i];
66 return h;
71 // This class has to be used in the parser only, it reuses token
72 // details after each parse
74 public class LocatedToken
76 int row, column;
77 string value;
79 static LocatedToken[] buffer;
80 static int pos;
82 private LocatedToken ()
86 public static LocatedToken Create (int row, int column)
88 return Create (null, row, column);
91 public static LocatedToken Create (string value, int row, int column)
94 // TODO: I am not very happy about the logic but it's the best
95 // what I could come up with for now.
96 // Ideally we should be using just tiny buffer (256 elements) which
97 // is enough to hold all details for currect stack and recycle elements
98 // poped from the stack but there is a trick needed to recycle
99 // them properly.
101 LocatedToken entry;
102 if (pos >= buffer.Length) {
103 entry = new LocatedToken ();
104 } else {
105 entry = buffer [pos];
106 if (entry == null) {
107 entry = new LocatedToken ();
108 buffer [pos] = entry;
111 ++pos;
113 entry.value = value;
114 entry.row = row;
115 entry.column = column;
116 return entry;
120 // Used for token not required by expression evaluator
122 [Conditional ("FULL_AST")]
123 public static void CreateOptional (int row, int col, ref object token)
125 token = Create (row, col);
128 public static void Initialize ()
130 if (buffer == null)
131 buffer = new LocatedToken [10000];
132 pos = 0;
135 public Location Location {
136 get { return new Location (row, column); }
139 public string Value {
140 get { return value; }
144 enum PreprocessorDirective
146 Invalid = 0,
148 Region = 1,
149 Endregion = 2,
150 If = 3 | RequiresArgument,
151 Endif = 4,
152 Elif = 5 | RequiresArgument,
153 Else = 6,
154 Define = 7 | RequiresArgument,
155 Undef = 8 | RequiresArgument,
156 Error = 9,
157 Warning = 10,
158 Pragma = 11 | CustomArgumentsParsing,
159 Line = 12,
161 CustomArgumentsParsing = 1 << 10,
162 RequiresArgument = 1 << 11
165 SeekableStreamReader reader;
166 SourceFile ref_name;
167 CompilationSourceFile file_name;
168 CompilerContext context;
169 bool hidden = false;
170 int ref_line = 1;
171 int line = 1;
172 int col = 0;
173 int previous_col;
174 int current_token;
175 int tab_size;
176 bool handle_get_set = false;
177 bool handle_remove_add = false;
178 bool handle_where = false;
179 bool handle_typeof = false;
180 bool lambda_arguments_parsing;
181 List<Location> escaped_identifiers;
182 int parsing_generic_less_than;
183 readonly bool doc_processing;
186 // Used mainly for parser optimizations. Some expressions for instance
187 // can appear only in block (including initializer, base initializer)
188 // scope only
190 public int parsing_block;
191 internal bool query_parsing;
194 // When parsing type only, useful for ambiguous nullable types
196 public int parsing_type;
199 // Set when parsing generic declaration (type or method header)
201 public bool parsing_generic_declaration;
202 public bool parsing_generic_declaration_doc;
205 // The value indicates that we have not reach any declaration or
206 // namespace yet
208 public int parsing_declaration;
210 public bool parsing_attribute_section;
213 // The special characters to inject on streams to run the unit parser
214 // in the special expression mode. Using private characters from
215 // Plane Sixteen (U+100000 to U+10FFFD)
217 // This character is only tested just before the tokenizer is about to report
218 // an error; So on the regular operation mode, this addition will have no
219 // impact on the tokenizer's performance.
222 public const int EvalStatementParserCharacter = 0x100000;
223 public const int EvalCompilationUnitParserCharacter = 0x100001;
224 public const int EvalUsingDeclarationsParserCharacter = 0x100002;
225 public const int DocumentationXref = 0x100003;
228 // XML documentation buffer. The save point is used to divide
229 // comments on types and comments on members.
231 StringBuilder xml_comment_buffer;
234 // See comment on XmlCommentState enumeration.
236 XmlCommentState xml_doc_state = XmlCommentState.Allowed;
239 // Whether tokens have been seen on this line
241 bool tokens_seen = false;
244 // Set to true once the GENERATE_COMPLETION token has bee
245 // returned. This helps produce one GENERATE_COMPLETION,
246 // as many COMPLETE_COMPLETION as necessary to complete the
247 // AST tree and one final EOF.
249 bool generated;
252 // Whether a token has been seen on the file
253 // This is needed because `define' is not allowed to be used
254 // after a token has been seen.
256 bool any_token_seen;
259 // Class variables
261 static readonly KeywordEntry<int>[][] keywords;
262 static readonly KeywordEntry<PreprocessorDirective>[][] keywords_preprocessor;
263 static readonly Dictionary<string, object> keyword_strings; // TODO: HashSet
264 static readonly NumberStyles styles;
265 static readonly NumberFormatInfo csharp_format_info;
267 // Pragma arguments
268 static readonly char[] pragma_warning = "warning".ToCharArray ();
269 static readonly char[] pragma_warning_disable = "disable".ToCharArray ();
270 static readonly char[] pragma_warning_restore = "restore".ToCharArray ();
271 static readonly char[] pragma_checksum = "checksum".ToCharArray ();
273 static readonly char[] simple_whitespaces = new char[] { ' ', '\t' };
275 public bool PropertyParsing {
276 get { return handle_get_set; }
277 set { handle_get_set = value; }
280 public bool EventParsing {
281 get { return handle_remove_add; }
282 set { handle_remove_add = value; }
285 public bool ConstraintsParsing {
286 get { return handle_where; }
287 set { handle_where = value; }
290 public bool TypeOfParsing {
291 get { return handle_typeof; }
292 set { handle_typeof = value; }
295 public int TabSize {
296 get { return tab_size; }
297 set { tab_size = value; }
300 public XmlCommentState doc_state {
301 get { return xml_doc_state; }
302 set {
303 if (value == XmlCommentState.Allowed) {
304 check_incorrect_doc_comment ();
305 reset_doc_comment ();
307 xml_doc_state = value;
312 // This is used to trigger completion generation on the parser
313 public bool CompleteOnEOF;
315 void AddEscapedIdentifier (Location loc)
317 if (escaped_identifiers == null)
318 escaped_identifiers = new List<Location> ();
320 escaped_identifiers.Add (loc);
323 public bool IsEscapedIdentifier (MemberName name)
325 return escaped_identifiers != null && escaped_identifiers.Contains (name.Location);
329 // Values for the associated token returned
331 internal int putback_char; // Used by repl only
332 object val;
335 // Pre-processor
337 const int TAKING = 1;
338 const int ELSE_SEEN = 4;
339 const int PARENT_TAKING = 8;
340 const int REGION = 16;
343 // pre-processor if stack state:
345 Stack<int> ifstack;
347 static System.Text.StringBuilder string_builder;
349 const int max_id_size = 512;
350 static readonly char [] id_builder = new char [max_id_size];
352 public static Dictionary<char[], string>[] identifiers = new Dictionary<char[], string>[max_id_size + 1];
354 const int max_number_size = 512;
355 static char [] number_builder = new char [max_number_size];
356 static int number_pos;
358 static char[] value_builder = new char[256];
360 public int Line {
361 get {
362 return ref_line;
367 // This is used when the tokenizer needs to save
368 // the current position as it needs to do some parsing
369 // on its own to deamiguate a token in behalf of the
370 // parser.
372 Stack<Position> position_stack = new Stack<Position> (2);
374 class Position {
375 public int position;
376 public int line;
377 public int ref_line;
378 public int col;
379 public bool hidden;
380 public int putback_char;
381 public int previous_col;
382 public Stack<int> ifstack;
383 public int parsing_generic_less_than;
384 public int current_token;
385 public object val;
387 public Position (Tokenizer t)
389 position = t.reader.Position;
390 line = t.line;
391 ref_line = t.ref_line;
392 col = t.col;
393 hidden = t.hidden;
394 putback_char = t.putback_char;
395 previous_col = t.previous_col;
396 if (t.ifstack != null && t.ifstack.Count != 0) {
397 // There is no simple way to clone Stack<T> all
398 // methods reverse the order
399 var clone = t.ifstack.ToArray ();
400 Array.Reverse (clone);
401 ifstack = new Stack<int> (clone);
403 parsing_generic_less_than = t.parsing_generic_less_than;
404 current_token = t.current_token;
405 val = t.val;
409 public Tokenizer (SeekableStreamReader input, CompilationSourceFile file, CompilerContext ctx)
411 this.ref_name = file;
412 this.file_name = file;
413 this.context = ctx;
414 reader = input;
416 putback_char = -1;
418 xml_comment_buffer = new StringBuilder ();
419 doc_processing = ctx.Settings.DocumentationFile != null;
421 if (Environment.OSVersion.Platform == PlatformID.Win32NT)
422 tab_size = 4;
423 else
424 tab_size = 8;
427 // FIXME: This could be `Location.Push' but we have to
428 // find out why the MS compiler allows this
430 Mono.CSharp.Location.Push (file, file);
433 public void PushPosition ()
435 position_stack.Push (new Position (this));
438 public void PopPosition ()
440 Position p = position_stack.Pop ();
442 reader.Position = p.position;
443 ref_line = p.ref_line;
444 line = p.line;
445 col = p.col;
446 hidden = p.hidden;
447 putback_char = p.putback_char;
448 previous_col = p.previous_col;
449 ifstack = p.ifstack;
450 parsing_generic_less_than = p.parsing_generic_less_than;
451 current_token = p.current_token;
452 val = p.val;
455 // Do not reset the position, ignore it.
456 public void DiscardPosition ()
458 position_stack.Pop ();
461 static void AddKeyword (string kw, int token)
463 keyword_strings.Add (kw, null);
465 AddKeyword (keywords, kw, token);
468 static void AddPreprocessorKeyword (string kw, PreprocessorDirective directive)
470 AddKeyword (keywords_preprocessor, kw, directive);
473 static void AddKeyword<T> (KeywordEntry<T>[][] keywords, string kw, T token)
475 int length = kw.Length;
476 if (keywords[length] == null) {
477 keywords[length] = new KeywordEntry<T>['z' - '_' + 1];
480 int char_index = kw[0] - '_';
481 var kwe = keywords[length][char_index];
482 if (kwe == null) {
483 keywords[length][char_index] = new KeywordEntry<T> (kw, token);
484 return;
487 while (kwe.Next != null) {
488 kwe = kwe.Next;
491 kwe.Next = new KeywordEntry<T> (kw, token);
495 // Class initializer
497 static Tokenizer ()
499 keyword_strings = new Dictionary<string, object> ();
501 // 11 is the length of the longest keyword for now
502 keywords = new KeywordEntry<int>[11][];
504 AddKeyword ("__arglist", Token.ARGLIST);
505 AddKeyword ("__makeref", Token.MAKEREF);
506 AddKeyword ("__reftype", Token.REFTYPE);
507 AddKeyword ("__refvalue", Token.REFVALUE);
508 AddKeyword ("abstract", Token.ABSTRACT);
509 AddKeyword ("as", Token.AS);
510 AddKeyword ("add", Token.ADD);
511 AddKeyword ("base", Token.BASE);
512 AddKeyword ("bool", Token.BOOL);
513 AddKeyword ("break", Token.BREAK);
514 AddKeyword ("byte", Token.BYTE);
515 AddKeyword ("case", Token.CASE);
516 AddKeyword ("catch", Token.CATCH);
517 AddKeyword ("char", Token.CHAR);
518 AddKeyword ("checked", Token.CHECKED);
519 AddKeyword ("class", Token.CLASS);
520 AddKeyword ("const", Token.CONST);
521 AddKeyword ("continue", Token.CONTINUE);
522 AddKeyword ("decimal", Token.DECIMAL);
523 AddKeyword ("default", Token.DEFAULT);
524 AddKeyword ("delegate", Token.DELEGATE);
525 AddKeyword ("do", Token.DO);
526 AddKeyword ("double", Token.DOUBLE);
527 AddKeyword ("else", Token.ELSE);
528 AddKeyword ("enum", Token.ENUM);
529 AddKeyword ("event", Token.EVENT);
530 AddKeyword ("explicit", Token.EXPLICIT);
531 AddKeyword ("extern", Token.EXTERN);
532 AddKeyword ("false", Token.FALSE);
533 AddKeyword ("finally", Token.FINALLY);
534 AddKeyword ("fixed", Token.FIXED);
535 AddKeyword ("float", Token.FLOAT);
536 AddKeyword ("for", Token.FOR);
537 AddKeyword ("foreach", Token.FOREACH);
538 AddKeyword ("goto", Token.GOTO);
539 AddKeyword ("get", Token.GET);
540 AddKeyword ("if", Token.IF);
541 AddKeyword ("implicit", Token.IMPLICIT);
542 AddKeyword ("in", Token.IN);
543 AddKeyword ("int", Token.INT);
544 AddKeyword ("interface", Token.INTERFACE);
545 AddKeyword ("internal", Token.INTERNAL);
546 AddKeyword ("is", Token.IS);
547 AddKeyword ("lock", Token.LOCK);
548 AddKeyword ("long", Token.LONG);
549 AddKeyword ("namespace", Token.NAMESPACE);
550 AddKeyword ("new", Token.NEW);
551 AddKeyword ("null", Token.NULL);
552 AddKeyword ("object", Token.OBJECT);
553 AddKeyword ("operator", Token.OPERATOR);
554 AddKeyword ("out", Token.OUT);
555 AddKeyword ("override", Token.OVERRIDE);
556 AddKeyword ("params", Token.PARAMS);
557 AddKeyword ("private", Token.PRIVATE);
558 AddKeyword ("protected", Token.PROTECTED);
559 AddKeyword ("public", Token.PUBLIC);
560 AddKeyword ("readonly", Token.READONLY);
561 AddKeyword ("ref", Token.REF);
562 AddKeyword ("remove", Token.REMOVE);
563 AddKeyword ("return", Token.RETURN);
564 AddKeyword ("sbyte", Token.SBYTE);
565 AddKeyword ("sealed", Token.SEALED);
566 AddKeyword ("set", Token.SET);
567 AddKeyword ("short", Token.SHORT);
568 AddKeyword ("sizeof", Token.SIZEOF);
569 AddKeyword ("stackalloc", Token.STACKALLOC);
570 AddKeyword ("static", Token.STATIC);
571 AddKeyword ("string", Token.STRING);
572 AddKeyword ("struct", Token.STRUCT);
573 AddKeyword ("switch", Token.SWITCH);
574 AddKeyword ("this", Token.THIS);
575 AddKeyword ("throw", Token.THROW);
576 AddKeyword ("true", Token.TRUE);
577 AddKeyword ("try", Token.TRY);
578 AddKeyword ("typeof", Token.TYPEOF);
579 AddKeyword ("uint", Token.UINT);
580 AddKeyword ("ulong", Token.ULONG);
581 AddKeyword ("unchecked", Token.UNCHECKED);
582 AddKeyword ("unsafe", Token.UNSAFE);
583 AddKeyword ("ushort", Token.USHORT);
584 AddKeyword ("using", Token.USING);
585 AddKeyword ("virtual", Token.VIRTUAL);
586 AddKeyword ("void", Token.VOID);
587 AddKeyword ("volatile", Token.VOLATILE);
588 AddKeyword ("while", Token.WHILE);
589 AddKeyword ("partial", Token.PARTIAL);
590 AddKeyword ("where", Token.WHERE);
592 // LINQ keywords
593 AddKeyword ("from", Token.FROM);
594 AddKeyword ("join", Token.JOIN);
595 AddKeyword ("on", Token.ON);
596 AddKeyword ("equals", Token.EQUALS);
597 AddKeyword ("select", Token.SELECT);
598 AddKeyword ("group", Token.GROUP);
599 AddKeyword ("by", Token.BY);
600 AddKeyword ("let", Token.LET);
601 AddKeyword ("orderby", Token.ORDERBY);
602 AddKeyword ("ascending", Token.ASCENDING);
603 AddKeyword ("descending", Token.DESCENDING);
604 AddKeyword ("into", Token.INTO);
606 // Contextual async keywords
607 AddKeyword ("async", Token.ASYNC);
608 AddKeyword ("await", Token.AWAIT);
610 keywords_preprocessor = new KeywordEntry<PreprocessorDirective>[10][];
612 AddPreprocessorKeyword ("region", PreprocessorDirective.Region);
613 AddPreprocessorKeyword ("endregion", PreprocessorDirective.Endregion);
614 AddPreprocessorKeyword ("if", PreprocessorDirective.If);
615 AddPreprocessorKeyword ("endif", PreprocessorDirective.Endif);
616 AddPreprocessorKeyword ("elif", PreprocessorDirective.Elif);
617 AddPreprocessorKeyword ("else", PreprocessorDirective.Else);
618 AddPreprocessorKeyword ("define", PreprocessorDirective.Define);
619 AddPreprocessorKeyword ("undef", PreprocessorDirective.Undef);
620 AddPreprocessorKeyword ("error", PreprocessorDirective.Error);
621 AddPreprocessorKeyword ("warning", PreprocessorDirective.Warning);
622 AddPreprocessorKeyword ("pragma", PreprocessorDirective.Pragma);
623 AddPreprocessorKeyword ("line", PreprocessorDirective.Line);
625 csharp_format_info = NumberFormatInfo.InvariantInfo;
626 styles = NumberStyles.Float;
628 string_builder = new System.Text.StringBuilder ();
631 int GetKeyword (char[] id, int id_len)
634 // Keywords are stored in an array of arrays grouped by their
635 // length and then by the first character
637 if (id_len >= keywords.Length || keywords [id_len] == null)
638 return -1;
640 int first_index = id [0] - '_';
641 if (first_index > 'z' - '_')
642 return -1;
644 var kwe = keywords [id_len] [first_index];
645 if (kwe == null)
646 return -1;
648 int res;
649 do {
650 res = kwe.Token;
651 for (int i = 1; i < id_len; ++i) {
652 if (id [i] != kwe.Value [i]) {
653 res = 0;
654 kwe = kwe.Next;
655 break;
658 } while (res == 0 && kwe != null);
660 if (res == 0)
661 return -1;
663 int next_token;
664 switch (res) {
665 case Token.GET:
666 case Token.SET:
667 if (!handle_get_set)
668 res = -1;
669 break;
670 case Token.REMOVE:
671 case Token.ADD:
672 if (!handle_remove_add)
673 res = -1;
674 break;
675 case Token.EXTERN:
676 if (parsing_declaration == 0)
677 res = Token.EXTERN_ALIAS;
678 break;
679 case Token.DEFAULT:
680 if (peek_token () == Token.COLON) {
681 token ();
682 res = Token.DEFAULT_COLON;
684 break;
685 case Token.WHERE:
686 if (!handle_where && !query_parsing)
687 res = -1;
688 break;
689 case Token.FROM:
691 // A query expression is any expression that starts with `from identifier'
692 // followed by any token except ; , =
694 if (!query_parsing) {
695 if (lambda_arguments_parsing) {
696 res = -1;
697 break;
700 PushPosition ();
701 // HACK: to disable generics micro-parser, because PushPosition does not
702 // store identifiers array
703 parsing_generic_less_than = 1;
704 switch (xtoken ()) {
705 case Token.IDENTIFIER:
706 case Token.INT:
707 case Token.BOOL:
708 case Token.BYTE:
709 case Token.CHAR:
710 case Token.DECIMAL:
711 case Token.FLOAT:
712 case Token.LONG:
713 case Token.OBJECT:
714 case Token.STRING:
715 case Token.UINT:
716 case Token.ULONG:
717 next_token = xtoken ();
718 if (next_token == Token.SEMICOLON || next_token == Token.COMMA || next_token == Token.EQUALS)
719 goto default;
721 res = Token.FROM_FIRST;
722 query_parsing = true;
723 if (context.Settings.Version <= LanguageVersion.ISO_2)
724 Report.FeatureIsNotAvailable (context, Location, "query expressions");
725 break;
726 case Token.VOID:
727 Expression.Error_VoidInvalidInTheContext (Location, Report);
728 break;
729 default:
730 PopPosition ();
731 // HACK: A token is not a keyword so we need to restore identifiers buffer
732 // which has been overwritten before we grabbed the identifier
733 id_builder [0] = 'f'; id_builder [1] = 'r'; id_builder [2] = 'o'; id_builder [3] = 'm';
734 return -1;
736 PopPosition ();
738 break;
739 case Token.JOIN:
740 case Token.ON:
741 case Token.EQUALS:
742 case Token.SELECT:
743 case Token.GROUP:
744 case Token.BY:
745 case Token.LET:
746 case Token.ORDERBY:
747 case Token.ASCENDING:
748 case Token.DESCENDING:
749 case Token.INTO:
750 if (!query_parsing)
751 res = -1;
752 break;
754 case Token.USING:
755 case Token.NAMESPACE:
756 // TODO: some explanation needed
757 check_incorrect_doc_comment ();
758 break;
760 case Token.PARTIAL:
761 if (parsing_block > 0) {
762 res = -1;
763 break;
766 // Save current position and parse next token.
767 PushPosition ();
769 next_token = token ();
770 bool ok = (next_token == Token.CLASS) ||
771 (next_token == Token.STRUCT) ||
772 (next_token == Token.INTERFACE) ||
773 (next_token == Token.VOID);
775 PopPosition ();
777 if (ok) {
778 if (next_token == Token.VOID) {
779 if (context.Settings.Version <= LanguageVersion.ISO_2)
780 Report.FeatureIsNotAvailable (context, Location, "partial methods");
781 } else if (context.Settings.Version == LanguageVersion.ISO_1)
782 Report.FeatureIsNotAvailable (context, Location, "partial types");
784 return res;
787 if (next_token < Token.LAST_KEYWORD) {
788 Report.Error (267, Location,
789 "The `partial' modifier can be used only immediately before `class', `struct', `interface', or `void' keyword");
790 return token ();
793 res = -1;
794 break;
796 // TODO: async, it's modifiers context only
797 case Token.ASYNC:
798 if (context.Settings.Version != LanguageVersion.Future) {
799 res = -1;
801 break;
803 // TODO: async, it's async block context only
804 case Token.AWAIT:
805 if (context.Settings.Version != LanguageVersion.Future) {
806 res = -1;
809 break;
813 return res;
816 static PreprocessorDirective GetPreprocessorDirective (char[] id, int id_len)
819 // Keywords are stored in an array of arrays grouped by their
820 // length and then by the first character
822 if (id_len >= keywords_preprocessor.Length || keywords_preprocessor[id_len] == null)
823 return PreprocessorDirective.Invalid;
825 int first_index = id[0] - '_';
826 if (first_index > 'z' - '_')
827 return PreprocessorDirective.Invalid;
829 var kwe = keywords_preprocessor[id_len][first_index];
830 if (kwe == null)
831 return PreprocessorDirective.Invalid;
833 PreprocessorDirective res = PreprocessorDirective.Invalid;
834 do {
835 res = kwe.Token;
836 for (int i = 1; i < id_len; ++i) {
837 if (id[i] != kwe.Value[i]) {
838 res = 0;
839 kwe = kwe.Next;
840 break;
843 } while (res == PreprocessorDirective.Invalid && kwe != null);
845 return res;
848 public Location Location {
849 get {
850 return new Location (ref_line, hidden ? -1 : col);
854 static bool is_identifier_start_character (int c)
856 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || Char.IsLetter ((char)c);
859 static bool is_identifier_part_character (char c)
861 if (c >= 'a' && c <= 'z')
862 return true;
864 if (c >= 'A' && c <= 'Z')
865 return true;
867 if (c == '_' || (c >= '0' && c <= '9'))
868 return true;
870 if (c < 0x80)
871 return false;
873 return Char.IsLetter (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation;
876 public static bool IsKeyword (string s)
878 return keyword_strings.ContainsKey (s);
882 // Open parens micro parser. Detects both lambda and cast ambiguity.
884 int TokenizeOpenParens ()
886 int ptoken;
887 current_token = -1;
889 int bracket_level = 0;
890 bool is_type = false;
891 bool can_be_type = false;
893 while (true) {
894 ptoken = current_token;
895 token ();
897 switch (current_token) {
898 case Token.CLOSE_PARENS:
899 token ();
902 // Expression inside parens is lambda, (int i) =>
904 if (current_token == Token.ARROW)
905 return Token.OPEN_PARENS_LAMBDA;
908 // Expression inside parens is single type, (int[])
910 if (is_type)
911 return Token.OPEN_PARENS_CAST;
914 // Expression is possible cast, look at next token, (T)null
916 if (can_be_type) {
917 switch (current_token) {
918 case Token.OPEN_PARENS:
919 case Token.BANG:
920 case Token.TILDE:
921 case Token.IDENTIFIER:
922 case Token.LITERAL:
923 case Token.BASE:
924 case Token.CHECKED:
925 case Token.DELEGATE:
926 case Token.FALSE:
927 case Token.FIXED:
928 case Token.NEW:
929 case Token.NULL:
930 case Token.SIZEOF:
931 case Token.THIS:
932 case Token.THROW:
933 case Token.TRUE:
934 case Token.TYPEOF:
935 case Token.UNCHECKED:
936 case Token.UNSAFE:
937 case Token.DEFAULT:
940 // These can be part of a member access
942 case Token.INT:
943 case Token.UINT:
944 case Token.SHORT:
945 case Token.USHORT:
946 case Token.LONG:
947 case Token.ULONG:
948 case Token.DOUBLE:
949 case Token.FLOAT:
950 case Token.CHAR:
951 case Token.BYTE:
952 case Token.DECIMAL:
953 case Token.BOOL:
954 return Token.OPEN_PARENS_CAST;
957 return Token.OPEN_PARENS;
959 case Token.DOT:
960 case Token.DOUBLE_COLON:
961 if (ptoken != Token.IDENTIFIER && ptoken != Token.OP_GENERICS_GT)
962 goto default;
964 continue;
966 case Token.IDENTIFIER:
967 switch (ptoken) {
968 case Token.DOT:
969 if (bracket_level == 0) {
970 is_type = false;
971 can_be_type = true;
974 continue;
975 case Token.OP_GENERICS_LT:
976 case Token.COMMA:
977 case Token.DOUBLE_COLON:
978 case -1:
979 if (bracket_level == 0)
980 can_be_type = true;
981 continue;
982 default:
983 can_be_type = is_type = false;
984 continue;
987 case Token.OBJECT:
988 case Token.STRING:
989 case Token.BOOL:
990 case Token.DECIMAL:
991 case Token.FLOAT:
992 case Token.DOUBLE:
993 case Token.SBYTE:
994 case Token.BYTE:
995 case Token.SHORT:
996 case Token.USHORT:
997 case Token.INT:
998 case Token.UINT:
999 case Token.LONG:
1000 case Token.ULONG:
1001 case Token.CHAR:
1002 case Token.VOID:
1003 if (bracket_level == 0)
1004 is_type = true;
1005 continue;
1007 case Token.COMMA:
1008 if (bracket_level == 0) {
1009 bracket_level = 100;
1010 can_be_type = is_type = false;
1012 continue;
1014 case Token.OP_GENERICS_LT:
1015 case Token.OPEN_BRACKET:
1016 if (bracket_level++ == 0)
1017 is_type = true;
1018 continue;
1020 case Token.OP_GENERICS_GT:
1021 case Token.CLOSE_BRACKET:
1022 --bracket_level;
1023 continue;
1025 case Token.INTERR_NULLABLE:
1026 case Token.STAR:
1027 if (bracket_level == 0)
1028 is_type = true;
1029 continue;
1031 case Token.REF:
1032 case Token.OUT:
1033 can_be_type = is_type = false;
1034 continue;
1036 default:
1037 return Token.OPEN_PARENS;
1042 public static bool IsValidIdentifier (string s)
1044 if (s == null || s.Length == 0)
1045 return false;
1047 if (!is_identifier_start_character (s [0]))
1048 return false;
1050 for (int i = 1; i < s.Length; i ++)
1051 if (! is_identifier_part_character (s [i]))
1052 return false;
1054 return true;
1057 bool parse_less_than ()
1059 start:
1060 int the_token = token ();
1061 if (the_token == Token.OPEN_BRACKET) {
1062 do {
1063 the_token = token ();
1064 } while (the_token != Token.CLOSE_BRACKET);
1065 the_token = token ();
1066 } else if (the_token == Token.IN || the_token == Token.OUT) {
1067 the_token = token ();
1069 switch (the_token) {
1070 case Token.IDENTIFIER:
1071 case Token.OBJECT:
1072 case Token.STRING:
1073 case Token.BOOL:
1074 case Token.DECIMAL:
1075 case Token.FLOAT:
1076 case Token.DOUBLE:
1077 case Token.SBYTE:
1078 case Token.BYTE:
1079 case Token.SHORT:
1080 case Token.USHORT:
1081 case Token.INT:
1082 case Token.UINT:
1083 case Token.LONG:
1084 case Token.ULONG:
1085 case Token.CHAR:
1086 case Token.VOID:
1087 break;
1088 case Token.OP_GENERICS_GT:
1089 case Token.IN:
1090 case Token.OUT:
1091 return true;
1093 default:
1094 return false;
1096 again:
1097 the_token = token ();
1099 if (the_token == Token.OP_GENERICS_GT)
1100 return true;
1101 else if (the_token == Token.COMMA || the_token == Token.DOT || the_token == Token.DOUBLE_COLON)
1102 goto start;
1103 else if (the_token == Token.INTERR_NULLABLE || the_token == Token.STAR)
1104 goto again;
1105 else if (the_token == Token.OP_GENERICS_LT) {
1106 if (!parse_less_than ())
1107 return false;
1108 goto again;
1109 } else if (the_token == Token.OPEN_BRACKET) {
1110 rank_specifiers:
1111 the_token = token ();
1112 if (the_token == Token.CLOSE_BRACKET)
1113 goto again;
1114 else if (the_token == Token.COMMA)
1115 goto rank_specifiers;
1116 return false;
1119 return false;
1122 bool parse_generic_dimension (out int dimension)
1124 dimension = 1;
1126 again:
1127 int the_token = token ();
1128 if (the_token == Token.OP_GENERICS_GT)
1129 return true;
1130 else if (the_token == Token.COMMA) {
1131 dimension++;
1132 goto again;
1135 return false;
1138 public int peek_token ()
1140 int the_token;
1142 PushPosition ();
1143 the_token = token ();
1144 PopPosition ();
1146 return the_token;
1150 // Tonizes `?' using custom disambiguous rules to return one
1151 // of following tokens: INTERR_NULLABLE, OP_COALESCING, INTERR
1153 // Tricky expression look like:
1155 // Foo ? a = x ? b : c;
1157 int TokenizePossibleNullableType ()
1159 if (parsing_block == 0 || parsing_type > 0)
1160 return Token.INTERR_NULLABLE;
1162 int d = peek_char ();
1163 if (d == '?') {
1164 get_char ();
1165 return Token.OP_COALESCING;
1168 switch (current_token) {
1169 case Token.CLOSE_PARENS:
1170 case Token.TRUE:
1171 case Token.FALSE:
1172 case Token.NULL:
1173 case Token.LITERAL:
1174 return Token.INTERR;
1177 if (d != ' ') {
1178 if (d == ',' || d == ';' || d == '>')
1179 return Token.INTERR_NULLABLE;
1180 if (d == '*' || (d >= '0' && d <= '9'))
1181 return Token.INTERR;
1184 PushPosition ();
1185 current_token = Token.NONE;
1186 int next_token;
1187 switch (xtoken ()) {
1188 case Token.LITERAL:
1189 case Token.TRUE:
1190 case Token.FALSE:
1191 case Token.NULL:
1192 case Token.THIS:
1193 case Token.NEW:
1194 next_token = Token.INTERR;
1195 break;
1197 case Token.SEMICOLON:
1198 case Token.COMMA:
1199 case Token.CLOSE_PARENS:
1200 case Token.OPEN_BRACKET:
1201 case Token.OP_GENERICS_GT:
1202 case Token.INTERR:
1203 next_token = Token.INTERR_NULLABLE;
1204 break;
1206 default:
1207 next_token = -1;
1208 break;
1211 if (next_token == -1) {
1212 switch (xtoken ()) {
1213 case Token.COMMA:
1214 case Token.SEMICOLON:
1215 case Token.OPEN_BRACE:
1216 case Token.CLOSE_PARENS:
1217 case Token.IN:
1218 next_token = Token.INTERR_NULLABLE;
1219 break;
1221 case Token.COLON:
1222 next_token = Token.INTERR;
1223 break;
1225 default:
1226 int ntoken;
1227 int interrs = 1;
1228 int colons = 0;
1230 // All shorcuts failed, do it hard way
1232 while ((ntoken = xtoken ()) != Token.EOF) {
1233 if (ntoken == Token.SEMICOLON)
1234 break;
1236 if (ntoken == Token.COLON) {
1237 if (++colons == interrs)
1238 break;
1239 continue;
1242 if (ntoken == Token.INTERR) {
1243 ++interrs;
1244 continue;
1248 next_token = colons != interrs ? Token.INTERR_NULLABLE : Token.INTERR;
1249 break;
1253 PopPosition ();
1254 return next_token;
1257 bool decimal_digits (int c)
1259 int d;
1260 bool seen_digits = false;
1262 if (c != -1){
1263 if (number_pos == max_number_size)
1264 Error_NumericConstantTooLong ();
1265 number_builder [number_pos++] = (char) c;
1269 // We use peek_char2, because decimal_digits needs to do a
1270 // 2-character look-ahead (5.ToString for example).
1272 while ((d = peek_char2 ()) != -1){
1273 if (d >= '0' && d <= '9'){
1274 if (number_pos == max_number_size)
1275 Error_NumericConstantTooLong ();
1276 number_builder [number_pos++] = (char) d;
1277 get_char ();
1278 seen_digits = true;
1279 } else
1280 break;
1283 return seen_digits;
1286 static bool is_hex (int e)
1288 return (e >= '0' && e <= '9') || (e >= 'A' && e <= 'F') || (e >= 'a' && e <= 'f');
1291 static TypeCode real_type_suffix (int c)
1293 switch (c){
1294 case 'F': case 'f':
1295 return TypeCode.Single;
1296 case 'D': case 'd':
1297 return TypeCode.Double;
1298 case 'M': case 'm':
1299 return TypeCode.Decimal;
1300 default:
1301 return TypeCode.Empty;
1305 ILiteralConstant integer_type_suffix (ulong ul, int c, Location loc)
1307 bool is_unsigned = false;
1308 bool is_long = false;
1310 if (c != -1){
1311 bool scanning = true;
1312 do {
1313 switch (c){
1314 case 'U': case 'u':
1315 if (is_unsigned)
1316 scanning = false;
1317 is_unsigned = true;
1318 get_char ();
1319 break;
1321 case 'l':
1322 if (!is_unsigned){
1324 // if we have not seen anything in between
1325 // report this error
1327 Report.Warning (78, 4, Location, "The 'l' suffix is easily confused with the digit '1' (use 'L' for clarity)");
1330 goto case 'L';
1332 case 'L':
1333 if (is_long)
1334 scanning = false;
1335 is_long = true;
1336 get_char ();
1337 break;
1339 default:
1340 scanning = false;
1341 break;
1343 c = peek_char ();
1344 } while (scanning);
1347 if (is_long && is_unsigned){
1348 return new ULongLiteral (context.BuiltinTypes, ul, loc);
1351 if (is_unsigned){
1352 // uint if possible, or ulong else.
1354 if ((ul & 0xffffffff00000000) == 0)
1355 return new UIntLiteral (context.BuiltinTypes, (uint) ul, loc);
1356 else
1357 return new ULongLiteral (context.BuiltinTypes, ul, loc);
1358 } else if (is_long){
1359 // long if possible, ulong otherwise
1360 if ((ul & 0x8000000000000000) != 0)
1361 return new ULongLiteral (context.BuiltinTypes, ul, loc);
1362 else
1363 return new LongLiteral (context.BuiltinTypes, (long) ul, loc);
1364 } else {
1365 // int, uint, long or ulong in that order
1366 if ((ul & 0xffffffff00000000) == 0){
1367 uint ui = (uint) ul;
1369 if ((ui & 0x80000000) != 0)
1370 return new UIntLiteral (context.BuiltinTypes, ui, loc);
1371 else
1372 return new IntLiteral (context.BuiltinTypes, (int) ui, loc);
1373 } else {
1374 if ((ul & 0x8000000000000000) != 0)
1375 return new ULongLiteral (context.BuiltinTypes, ul, loc);
1376 else
1377 return new LongLiteral (context.BuiltinTypes, (long) ul, loc);
1383 // given `c' as the next char in the input decide whether
1384 // we need to convert to a special type, and then choose
1385 // the best representation for the integer
1387 ILiteralConstant adjust_int (int c, Location loc)
1389 try {
1390 if (number_pos > 9){
1391 ulong ul = (uint) (number_builder [0] - '0');
1393 for (int i = 1; i < number_pos; i++){
1394 ul = checked ((ul * 10) + ((uint)(number_builder [i] - '0')));
1397 return integer_type_suffix (ul, c, loc);
1398 } else {
1399 uint ui = (uint) (number_builder [0] - '0');
1401 for (int i = 1; i < number_pos; i++){
1402 ui = checked ((ui * 10) + ((uint)(number_builder [i] - '0')));
1405 return integer_type_suffix (ui, c, loc);
1407 } catch (OverflowException) {
1408 Error_NumericConstantTooLong ();
1409 return new IntLiteral (context.BuiltinTypes, 0, loc);
1411 catch (FormatException) {
1412 Report.Error (1013, Location, "Invalid number");
1413 return new IntLiteral (context.BuiltinTypes, 0, loc);
1417 ILiteralConstant adjust_real (TypeCode t, Location loc)
1419 string s = new string (number_builder, 0, number_pos);
1420 const string error_details = "Floating-point constant is outside the range of type `{0}'";
1422 switch (t){
1423 case TypeCode.Decimal:
1424 try {
1425 return new DecimalLiteral (context.BuiltinTypes, decimal.Parse (s, styles, csharp_format_info), loc);
1426 } catch (OverflowException) {
1427 Report.Error (594, Location, error_details, "decimal");
1428 return new DecimalLiteral (context.BuiltinTypes, 0, loc);
1430 case TypeCode.Single:
1431 try {
1432 return new FloatLiteral (context.BuiltinTypes, float.Parse (s, styles, csharp_format_info), loc);
1433 } catch (OverflowException) {
1434 Report.Error (594, Location, error_details, "float");
1435 return new FloatLiteral (context.BuiltinTypes, 0, loc);
1437 default:
1438 try {
1439 return new DoubleLiteral (context.BuiltinTypes, double.Parse (s, styles, csharp_format_info), loc);
1440 } catch (OverflowException) {
1441 Report.Error (594, loc, error_details, "double");
1442 return new DoubleLiteral (context.BuiltinTypes, 0, loc);
1447 ILiteralConstant handle_hex (Location loc)
1449 int d;
1450 ulong ul;
1452 get_char ();
1453 while ((d = peek_char ()) != -1){
1454 if (is_hex (d)){
1455 number_builder [number_pos++] = (char) d;
1456 get_char ();
1457 } else
1458 break;
1461 string s = new String (number_builder, 0, number_pos);
1463 try {
1464 if (number_pos <= 8)
1465 ul = System.UInt32.Parse (s, NumberStyles.HexNumber);
1466 else
1467 ul = System.UInt64.Parse (s, NumberStyles.HexNumber);
1469 return integer_type_suffix (ul, peek_char (), loc);
1470 } catch (OverflowException){
1471 Error_NumericConstantTooLong ();
1472 return new IntLiteral (context.BuiltinTypes, 0, loc);
1474 catch (FormatException) {
1475 Report.Error (1013, Location, "Invalid number");
1476 return new IntLiteral (context.BuiltinTypes, 0, loc);
1481 // Invoked if we know we have .digits or digits
1483 int is_number (int c)
1485 ILiteralConstant res;
1487 #if FULL_AST
1488 int read_start = reader.Position - 1;
1489 #endif
1490 number_pos = 0;
1491 var loc = Location;
1493 if (c >= '0' && c <= '9'){
1494 if (c == '0'){
1495 int peek = peek_char ();
1497 if (peek == 'x' || peek == 'X') {
1498 val = res = handle_hex (loc);
1499 #if FULL_AST
1500 res.ParsedValue = reader.ReadChars (read_start, reader.Position - 1);
1501 #endif
1503 return Token.LITERAL;
1506 decimal_digits (c);
1507 c = get_char ();
1511 // We need to handle the case of
1512 // "1.1" vs "1.string" (LITERAL_FLOAT vs NUMBER DOT IDENTIFIER)
1514 bool is_real = false;
1515 if (c == '.'){
1516 if (decimal_digits ('.')){
1517 is_real = true;
1518 c = get_char ();
1519 } else {
1520 putback ('.');
1521 number_pos--;
1522 val = res = adjust_int (-1, loc);
1524 #if FULL_AST
1525 res.ParsedValue = reader.ReadChars (read_start, reader.Position - 1);
1526 #endif
1527 return Token.LITERAL;
1531 if (c == 'e' || c == 'E'){
1532 is_real = true;
1533 if (number_pos == max_number_size)
1534 Error_NumericConstantTooLong ();
1535 number_builder [number_pos++] = (char) c;
1536 c = get_char ();
1538 if (c == '+'){
1539 if (number_pos == max_number_size)
1540 Error_NumericConstantTooLong ();
1541 number_builder [number_pos++] = '+';
1542 c = -1;
1543 } else if (c == '-') {
1544 if (number_pos == max_number_size)
1545 Error_NumericConstantTooLong ();
1546 number_builder [number_pos++] = '-';
1547 c = -1;
1548 } else {
1549 if (number_pos == max_number_size)
1550 Error_NumericConstantTooLong ();
1551 number_builder [number_pos++] = '+';
1554 decimal_digits (c);
1555 c = get_char ();
1558 var type = real_type_suffix (c);
1559 if (type == TypeCode.Empty && !is_real) {
1560 putback (c);
1561 res = adjust_int (c, loc);
1562 } else {
1563 is_real = true;
1565 if (type == TypeCode.Empty) {
1566 putback (c);
1569 res = adjust_real (type, loc);
1572 val = res;
1574 #if FULL_AST
1575 res.ParsedValue = reader.ReadChars (read_start, reader.Position - (type == TypeCode.Empty ? 1 : 0));
1576 #endif
1578 return Token.LITERAL;
1582 // Accepts exactly count (4 or 8) hex, no more no less
1584 int getHex (int count, out int surrogate, out bool error)
1586 int i;
1587 int total = 0;
1588 int c;
1589 int top = count != -1 ? count : 4;
1591 get_char ();
1592 error = false;
1593 surrogate = 0;
1594 for (i = 0; i < top; i++){
1595 c = get_char ();
1597 if (c >= '0' && c <= '9')
1598 c = (int) c - (int) '0';
1599 else if (c >= 'A' && c <= 'F')
1600 c = (int) c - (int) 'A' + 10;
1601 else if (c >= 'a' && c <= 'f')
1602 c = (int) c - (int) 'a' + 10;
1603 else {
1604 error = true;
1605 return 0;
1608 total = (total * 16) + c;
1609 if (count == -1){
1610 int p = peek_char ();
1611 if (p == -1)
1612 break;
1613 if (!is_hex ((char)p))
1614 break;
1618 if (top == 8) {
1619 if (total > 0x0010FFFF) {
1620 error = true;
1621 return 0;
1624 if (total >= 0x00010000) {
1625 surrogate = ((total - 0x00010000) % 0x0400 + 0xDC00);
1626 total = ((total - 0x00010000) / 0x0400 + 0xD800);
1630 return total;
1633 int escape (int c, out int surrogate)
1635 bool error;
1636 int d;
1637 int v;
1639 d = peek_char ();
1640 if (c != '\\') {
1641 surrogate = 0;
1642 return c;
1645 switch (d){
1646 case 'a':
1647 v = '\a'; break;
1648 case 'b':
1649 v = '\b'; break;
1650 case 'n':
1651 v = '\n'; break;
1652 case 't':
1653 v = '\t'; break;
1654 case 'v':
1655 v = '\v'; break;
1656 case 'r':
1657 v = '\r'; break;
1658 case '\\':
1659 v = '\\'; break;
1660 case 'f':
1661 v = '\f'; break;
1662 case '0':
1663 v = 0; break;
1664 case '"':
1665 v = '"'; break;
1666 case '\'':
1667 v = '\''; break;
1668 case 'x':
1669 v = getHex (-1, out surrogate, out error);
1670 if (error)
1671 goto default;
1672 return v;
1673 case 'u':
1674 case 'U':
1675 return EscapeUnicode (d, out surrogate);
1676 default:
1677 surrogate = 0;
1678 Report.Error (1009, Location, "Unrecognized escape sequence `\\{0}'", ((char)d).ToString ());
1679 return d;
1682 get_char ();
1683 surrogate = 0;
1684 return v;
1687 int EscapeUnicode (int ch, out int surrogate)
1689 bool error;
1690 if (ch == 'U') {
1691 ch = getHex (8, out surrogate, out error);
1692 } else {
1693 ch = getHex (4, out surrogate, out error);
1696 if (error)
1697 Report.Error (1009, Location, "Unrecognized escape sequence");
1699 return ch;
1702 int get_char ()
1704 int x;
1705 if (putback_char != -1) {
1706 x = putback_char;
1707 putback_char = -1;
1708 } else {
1709 x = reader.Read ();
1712 if (x == '\r') {
1713 if (peek_char () == '\n') {
1714 putback_char = -1;
1717 x = '\n';
1718 advance_line ();
1719 } else if (x == '\n') {
1720 advance_line ();
1721 } else {
1722 col++;
1724 return x;
1727 void advance_line ()
1729 line++;
1730 ref_line++;
1731 previous_col = col;
1732 col = 0;
1735 int peek_char ()
1737 if (putback_char == -1)
1738 putback_char = reader.Read ();
1739 return putback_char;
1742 int peek_char2 ()
1744 if (putback_char != -1)
1745 return putback_char;
1746 return reader.Peek ();
1749 void putback (int c)
1751 if (putback_char != -1){
1752 Console.WriteLine ("Col: " + col);
1753 Console.WriteLine ("Row: " + line);
1754 Console.WriteLine ("Name: " + ref_name.Name);
1755 Console.WriteLine ("Current [{0}] putting back [{1}] ", putback_char, c);
1756 throw new Exception ("This should not happen putback on putback");
1758 if (c == '\n' || col == 0) {
1759 // It won't happen though.
1760 line--;
1761 ref_line--;
1762 col = previous_col;
1764 else
1765 col--;
1766 putback_char = c;
1769 public bool advance ()
1771 return peek_char () != -1 || CompleteOnEOF;
1774 public Object Value {
1775 get {
1776 return val;
1780 public Object value ()
1782 return val;
1785 public int token ()
1787 current_token = xtoken ();
1788 return current_token;
1791 int TokenizePreprocessorIdentifier (out int c)
1793 // skip over white space
1794 do {
1795 c = get_char ();
1796 } while (c == ' ' || c == '\t');
1799 int pos = 0;
1800 while (c != -1 && c >= 'a' && c <= 'z') {
1801 id_builder[pos++] = (char) c;
1802 c = get_char ();
1803 if (c == '\\') {
1804 int peek = peek_char ();
1805 if (peek == 'U' || peek == 'u') {
1806 int surrogate;
1807 c = EscapeUnicode (c, out surrogate);
1808 if (surrogate != 0) {
1809 if (is_identifier_part_character ((char) c)) {
1810 id_builder[pos++] = (char) c;
1812 c = surrogate;
1818 return pos;
1821 PreprocessorDirective get_cmd_arg (out string arg)
1823 int c;
1825 tokens_seen = false;
1826 arg = "";
1828 var cmd = GetPreprocessorDirective (id_builder, TokenizePreprocessorIdentifier (out c));
1830 if ((cmd & PreprocessorDirective.CustomArgumentsParsing) != 0)
1831 return cmd;
1833 // skip over white space
1834 while (c == ' ' || c == '\t')
1835 c = get_char ();
1837 int has_identifier_argument = (int)(cmd & PreprocessorDirective.RequiresArgument);
1838 int pos = 0;
1840 while (c != -1 && c != '\n') {
1841 if (c == '\\' && has_identifier_argument >= 0) {
1842 if (has_identifier_argument != 0) {
1843 has_identifier_argument = 1;
1845 int peek = peek_char ();
1846 if (peek == 'U' || peek == 'u') {
1847 int surrogate;
1848 c = EscapeUnicode (c, out surrogate);
1849 if (surrogate != 0) {
1850 if (is_identifier_part_character ((char) c)) {
1851 if (pos == value_builder.Length)
1852 Array.Resize (ref value_builder, pos * 2);
1854 value_builder[pos++] = (char) c;
1856 c = surrogate;
1859 } else {
1860 has_identifier_argument = -1;
1862 } else if (c == '/' && peek_char () == '/') {
1864 // Eat single-line comments
1866 get_char ();
1867 do {
1868 c = get_char ();
1869 } while (c != -1 && c != '\n');
1871 break;
1874 if (pos == value_builder.Length)
1875 Array.Resize (ref value_builder, pos * 2);
1877 value_builder[pos++] = (char) c;
1878 c = get_char ();
1881 if (pos != 0) {
1882 if (pos > max_id_size)
1883 arg = new string (value_builder, 0, pos);
1884 else
1885 arg = InternIdentifier (value_builder, pos);
1887 // Eat any trailing whitespaces
1888 arg = arg.Trim (simple_whitespaces);
1891 return cmd;
1895 // Handles the #line directive
1897 bool PreProcessLine (string arg)
1899 if (arg.Length == 0)
1900 return false;
1902 if (arg == "default"){
1903 ref_line = line;
1904 ref_name = file_name;
1905 hidden = false;
1906 Location.Push (file_name, ref_name);
1907 return true;
1908 } else if (arg == "hidden"){
1909 hidden = true;
1910 return true;
1913 try {
1914 int pos;
1916 if ((pos = arg.IndexOf (' ')) != -1 && pos != 0){
1917 ref_line = System.Int32.Parse (arg.Substring (0, pos));
1918 pos++;
1920 char [] quotes = { '\"' };
1922 string name = arg.Substring (pos). Trim (quotes);
1923 ref_name = context.LookupFile (file_name, name);
1924 file_name.AddIncludeFile (ref_name);
1925 hidden = false;
1926 Location.Push (file_name, ref_name);
1927 } else {
1928 ref_line = System.Int32.Parse (arg);
1929 hidden = false;
1931 } catch {
1932 return false;
1935 return true;
1939 // Handles #define and #undef
1941 void PreProcessDefinition (bool is_define, string ident, bool caller_is_taking)
1943 if (ident.Length == 0 || ident == "true" || ident == "false"){
1944 Report.Error (1001, Location, "Missing identifier to pre-processor directive");
1945 return;
1948 if (ident.IndexOfAny (simple_whitespaces) != -1){
1949 Error_EndLineExpected ();
1950 return;
1953 if (!is_identifier_start_character (ident [0]))
1954 Report.Error (1001, Location, "Identifier expected: {0}", ident);
1956 foreach (char c in ident.Substring (1)){
1957 if (!is_identifier_part_character (c)){
1958 Report.Error (1001, Location, "Identifier expected: {0}", ident);
1959 return;
1963 if (!caller_is_taking)
1964 return;
1966 if (is_define) {
1968 // #define ident
1970 if (context.Settings.IsConditionalSymbolDefined (ident))
1971 return;
1973 file_name.AddDefine (ident);
1974 } else {
1976 // #undef ident
1978 file_name.AddUndefine (ident);
1982 byte read_hex (out bool error)
1984 int total;
1985 int c = get_char ();
1987 if ((c >= '0') && (c <= '9'))
1988 total = (int) c - (int) '0';
1989 else if ((c >= 'A') && (c <= 'F'))
1990 total = (int) c - (int) 'A' + 10;
1991 else if ((c >= 'a') && (c <= 'f'))
1992 total = (int) c - (int) 'a' + 10;
1993 else {
1994 error = true;
1995 return 0;
1998 total *= 16;
1999 c = get_char ();
2001 if ((c >= '0') && (c <= '9'))
2002 total += (int) c - (int) '0';
2003 else if ((c >= 'A') && (c <= 'F'))
2004 total += (int) c - (int) 'A' + 10;
2005 else if ((c >= 'a') && (c <= 'f'))
2006 total += (int) c - (int) 'a' + 10;
2007 else {
2008 error = true;
2009 return 0;
2012 error = false;
2013 return (byte) total;
2017 // Parses #pragma checksum
2019 bool ParsePragmaChecksum ()
2022 // The syntax is ` "foo.txt" "{guid}" "hash"'
2024 int c = get_char ();
2026 if (c != '"')
2027 return false;
2029 string_builder.Length = 0;
2030 while (c != -1 && c != '\n') {
2031 c = get_char ();
2032 if (c == '"') {
2033 c = get_char ();
2034 break;
2037 string_builder.Append ((char) c);
2040 if (string_builder.Length == 0) {
2041 Report.Warning (1709, 1, Location, "Filename specified for preprocessor directive is empty");
2044 // TODO: Any white-spaces count
2045 if (c != ' ')
2046 return false;
2048 SourceFile file = context.LookupFile (file_name, string_builder.ToString ());
2050 if (get_char () != '"' || get_char () != '{')
2051 return false;
2053 bool error;
2054 byte[] guid_bytes = new byte [16];
2055 int i = 0;
2057 for (; i < 4; i++) {
2058 guid_bytes [i] = read_hex (out error);
2059 if (error)
2060 return false;
2063 if (get_char () != '-')
2064 return false;
2066 for (; i < 10; i++) {
2067 guid_bytes [i] = read_hex (out error);
2068 if (error)
2069 return false;
2071 guid_bytes [i++] = read_hex (out error);
2072 if (error)
2073 return false;
2075 if (get_char () != '-')
2076 return false;
2079 for (; i < 16; i++) {
2080 guid_bytes [i] = read_hex (out error);
2081 if (error)
2082 return false;
2085 if (get_char () != '}' || get_char () != '"')
2086 return false;
2088 // TODO: Any white-spaces count
2089 c = get_char ();
2090 if (c != ' ')
2091 return false;
2093 if (get_char () != '"')
2094 return false;
2096 // Any length of checksum
2097 List<byte> checksum_bytes = new List<byte> (16);
2099 c = peek_char ();
2100 while (c != '"' && c != -1) {
2101 checksum_bytes.Add (read_hex (out error));
2102 if (error)
2103 return false;
2105 c = peek_char ();
2108 if (c == '/') {
2109 ReadSingleLineComment ();
2110 } else if (get_char () != '"') {
2111 return false;
2114 file.SetChecksum (guid_bytes, checksum_bytes.ToArray ());
2115 ref_name.AutoGenerated = true;
2116 return true;
2119 bool IsTokenIdentifierEqual (char[] identifier)
2121 for (int i = 0; i < identifier.Length; ++i) {
2122 if (identifier[i] != id_builder[i])
2123 return false;
2126 return true;
2129 int TokenizePragmaNumber (ref int c)
2131 number_pos = 0;
2133 int number;
2135 if (c >= '0' && c <= '9') {
2136 decimal_digits (c);
2137 uint ui = (uint) (number_builder[0] - '0');
2139 try {
2140 for (int i = 1; i < number_pos; i++) {
2141 ui = checked ((ui * 10) + ((uint) (number_builder[i] - '0')));
2144 number = (int) ui;
2145 } catch (OverflowException) {
2146 Error_NumericConstantTooLong ();
2147 number = -1;
2151 c = get_char ();
2153 // skip over white space
2154 while (c == ' ' || c == '\t')
2155 c = get_char ();
2157 if (c == ',') {
2158 c = get_char ();
2161 // skip over white space
2162 while (c == ' ' || c == '\t')
2163 c = get_char ();
2164 } else {
2165 number = -1;
2166 if (c == '/') {
2167 ReadSingleLineComment ();
2168 } else {
2169 Report.Warning (1692, 1, Location, "Invalid number");
2171 // Read everything till the end of the line or file
2172 do {
2173 c = get_char ();
2174 } while (c != -1 && c != '\n');
2178 return number;
2181 void ReadSingleLineComment ()
2183 if (peek_char () != '/')
2184 Report.Warning (1696, 1, Location, "Single-line comment or end-of-line expected");
2186 // Read everything till the end of the line or file
2187 int c;
2188 do {
2189 c = get_char ();
2190 } while (c != -1 && c != '\n');
2193 /// <summary>
2194 /// Handles #pragma directive
2195 /// </summary>
2196 void ParsePragmaDirective (string arg)
2198 int c;
2199 int length = TokenizePreprocessorIdentifier (out c);
2200 if (length == pragma_warning.Length && IsTokenIdentifierEqual (pragma_warning)) {
2201 length = TokenizePreprocessorIdentifier (out c);
2204 // #pragma warning disable
2205 // #pragma warning restore
2207 if (length == pragma_warning_disable.Length) {
2208 bool disable = IsTokenIdentifierEqual (pragma_warning_disable);
2209 if (disable || IsTokenIdentifierEqual (pragma_warning_restore)) {
2210 // skip over white space
2211 while (c == ' ' || c == '\t')
2212 c = get_char ();
2214 var loc = Location;
2216 if (c == '\n' || c == '/') {
2217 if (c == '/')
2218 ReadSingleLineComment ();
2221 // Disable/Restore all warnings
2223 if (disable) {
2224 Report.RegisterWarningRegion (loc).WarningDisable (loc.Row);
2225 } else {
2226 Report.RegisterWarningRegion (loc).WarningEnable (loc.Row);
2228 } else {
2230 // Disable/Restore a warning or group of warnings
2232 int code;
2233 do {
2234 code = TokenizePragmaNumber (ref c);
2235 if (code > 0) {
2236 if (disable) {
2237 Report.RegisterWarningRegion (loc).WarningDisable (loc, code, Report);
2238 } else {
2239 Report.RegisterWarningRegion (loc).WarningEnable (loc, code, Report);
2242 } while (code >= 0 && c != '\n' && c != -1);
2245 return;
2249 Report.Warning (1634, 1, Location, "Expected disable or restore");
2250 return;
2254 // #pragma checksum
2256 if (length == pragma_checksum.Length && IsTokenIdentifierEqual (pragma_checksum)) {
2257 if (c != ' ' || !ParsePragmaChecksum ()) {
2258 Report.Warning (1695, 1, Location,
2259 "Invalid #pragma checksum syntax. Expected \"filename\" \"{XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX}\" \"XXXX...\"");
2262 return;
2265 Report.Warning (1633, 1, Location, "Unrecognized #pragma directive");
2268 bool eval_val (string s)
2270 if (s == "true")
2271 return true;
2272 if (s == "false")
2273 return false;
2275 return file_name.IsConditionalDefined (context, s);
2278 bool pp_primary (ref string s)
2280 s = s.Trim ();
2281 int len = s.Length;
2283 if (len > 0){
2284 char c = s [0];
2286 if (c == '('){
2287 s = s.Substring (1);
2288 bool val = pp_expr (ref s, false);
2289 if (s.Length > 0 && s [0] == ')'){
2290 s = s.Substring (1);
2291 return val;
2293 Error_InvalidDirective ();
2294 return false;
2297 if (is_identifier_start_character (c)){
2298 int j = 1;
2300 while (j < len){
2301 c = s [j];
2303 if (is_identifier_part_character (c)){
2304 j++;
2305 continue;
2307 bool v = eval_val (s.Substring (0, j));
2308 s = s.Substring (j);
2309 return v;
2311 bool vv = eval_val (s);
2312 s = "";
2313 return vv;
2316 Error_InvalidDirective ();
2317 return false;
2320 bool pp_unary (ref string s)
2322 s = s.Trim ();
2323 int len = s.Length;
2325 if (len > 0){
2326 if (s [0] == '!'){
2327 if (len > 1 && s [1] == '='){
2328 Error_InvalidDirective ();
2329 return false;
2331 s = s.Substring (1);
2332 return ! pp_primary (ref s);
2333 } else
2334 return pp_primary (ref s);
2335 } else {
2336 Error_InvalidDirective ();
2337 return false;
2341 bool pp_eq (ref string s)
2343 bool va = pp_unary (ref s);
2345 s = s.Trim ();
2346 int len = s.Length;
2347 if (len > 0){
2348 if (s [0] == '='){
2349 if (len > 2 && s [1] == '='){
2350 s = s.Substring (2);
2351 return va == pp_unary (ref s);
2352 } else {
2353 Error_InvalidDirective ();
2354 return false;
2356 } else if (s [0] == '!' && len > 1 && s [1] == '='){
2357 s = s.Substring (2);
2359 return va != pp_unary (ref s);
2364 return va;
2368 bool pp_and (ref string s)
2370 bool va = pp_eq (ref s);
2372 s = s.Trim ();
2373 int len = s.Length;
2374 if (len > 0){
2375 if (s [0] == '&'){
2376 if (len > 2 && s [1] == '&'){
2377 s = s.Substring (2);
2378 return (va & pp_and (ref s));
2379 } else {
2380 Error_InvalidDirective ();
2381 return false;
2385 return va;
2389 // Evaluates an expression for `#if' or `#elif'
2391 bool pp_expr (ref string s, bool isTerm)
2393 bool va = pp_and (ref s);
2394 s = s.Trim ();
2395 int len = s.Length;
2396 if (len > 0){
2397 char c = s [0];
2399 if (c == '|'){
2400 if (len > 2 && s [1] == '|'){
2401 s = s.Substring (2);
2402 return va | pp_expr (ref s, isTerm);
2403 } else {
2404 Error_InvalidDirective ();
2405 return false;
2408 if (isTerm) {
2409 Error_EndLineExpected ();
2410 return false;
2414 return va;
2417 bool eval (string s)
2419 bool v = pp_expr (ref s, true);
2420 s = s.Trim ();
2421 if (s.Length != 0){
2422 return false;
2425 return v;
2428 void Error_NumericConstantTooLong ()
2430 Report.Error (1021, Location, "Integral constant is too large");
2433 void Error_InvalidDirective ()
2435 Report.Error (1517, Location, "Invalid preprocessor directive");
2438 void Error_UnexpectedDirective (string extra)
2440 Report.Error (
2441 1028, Location,
2442 "Unexpected processor directive ({0})", extra);
2445 void Error_TokensSeen ()
2447 Report.Error (1032, Location,
2448 "Cannot define or undefine preprocessor symbols after first token in file");
2451 void Eror_WrongPreprocessorLocation ()
2453 Report.Error (1040, Location,
2454 "Preprocessor directives must appear as the first non-whitespace character on a line");
2457 void Error_EndLineExpected ()
2459 Report.Error (1025, Location, "Single-line comment or end-of-line expected");
2463 // Raises a warning when tokenizer found documentation comment
2464 // on unexpected place
2466 void WarningMisplacedComment (Location loc)
2468 if (doc_state != XmlCommentState.Error) {
2469 doc_state = XmlCommentState.Error;
2470 Report.Warning (1587, 2, loc, "XML comment is not placed on a valid language element");
2475 // if true, then the code continues processing the code
2476 // if false, the code stays in a loop until another directive is
2477 // reached.
2478 // When caller_is_taking is false we ignore all directives except the ones
2479 // which can help us to identify where the #if block ends
2480 bool ParsePreprocessingDirective (bool caller_is_taking)
2482 string arg;
2483 bool region_directive = false;
2485 var directive = get_cmd_arg (out arg);
2488 // The first group of pre-processing instructions is always processed
2490 switch (directive) {
2491 case PreprocessorDirective.Region:
2492 region_directive = true;
2493 arg = "true";
2494 goto case PreprocessorDirective.If;
2496 case PreprocessorDirective.Endregion:
2497 if (ifstack == null || ifstack.Count == 0){
2498 Error_UnexpectedDirective ("no #region for this #endregion");
2499 return true;
2501 int pop = ifstack.Pop ();
2503 if ((pop & REGION) == 0)
2504 Report.Error (1027, Location, "Expected `#endif' directive");
2506 return caller_is_taking;
2508 case PreprocessorDirective.If:
2509 if (ifstack == null)
2510 ifstack = new Stack<int> (2);
2512 int flags = region_directive ? REGION : 0;
2513 if (ifstack.Count == 0){
2514 flags |= PARENT_TAKING;
2515 } else {
2516 int state = ifstack.Peek ();
2517 if ((state & TAKING) != 0) {
2518 flags |= PARENT_TAKING;
2522 if (eval (arg) && caller_is_taking) {
2523 ifstack.Push (flags | TAKING);
2524 return true;
2526 ifstack.Push (flags);
2527 return false;
2529 case PreprocessorDirective.Endif:
2530 if (ifstack == null || ifstack.Count == 0){
2531 Error_UnexpectedDirective ("no #if for this #endif");
2532 return true;
2533 } else {
2534 pop = ifstack.Pop ();
2536 if ((pop & REGION) != 0)
2537 Report.Error (1038, Location, "#endregion directive expected");
2539 if (arg.Length != 0) {
2540 Error_EndLineExpected ();
2543 if (ifstack.Count == 0)
2544 return true;
2546 int state = ifstack.Peek ();
2547 return (state & TAKING) != 0;
2550 case PreprocessorDirective.Elif:
2551 if (ifstack == null || ifstack.Count == 0){
2552 Error_UnexpectedDirective ("no #if for this #elif");
2553 return true;
2554 } else {
2555 int state = ifstack.Pop ();
2557 if ((state & REGION) != 0) {
2558 Report.Error (1038, Location, "#endregion directive expected");
2559 return true;
2562 if ((state & ELSE_SEEN) != 0){
2563 Error_UnexpectedDirective ("#elif not valid after #else");
2564 return true;
2567 if ((state & TAKING) != 0) {
2568 ifstack.Push (0);
2569 return false;
2572 if (eval (arg) && ((state & PARENT_TAKING) != 0)){
2573 ifstack.Push (state | TAKING);
2574 return true;
2577 ifstack.Push (state);
2578 return false;
2581 case PreprocessorDirective.Else:
2582 if (ifstack == null || ifstack.Count == 0){
2583 Error_UnexpectedDirective ("no #if for this #else");
2584 return true;
2585 } else {
2586 int state = ifstack.Peek ();
2588 if ((state & REGION) != 0) {
2589 Report.Error (1038, Location, "#endregion directive expected");
2590 return true;
2593 if ((state & ELSE_SEEN) != 0){
2594 Error_UnexpectedDirective ("#else within #else");
2595 return true;
2598 ifstack.Pop ();
2600 if (arg.Length != 0) {
2601 Error_EndLineExpected ();
2602 return true;
2605 bool ret = false;
2606 if ((state & PARENT_TAKING) != 0) {
2607 ret = (state & TAKING) == 0;
2609 if (ret)
2610 state |= TAKING;
2611 else
2612 state &= ~TAKING;
2615 ifstack.Push (state | ELSE_SEEN);
2617 return ret;
2619 case PreprocessorDirective.Define:
2620 if (any_token_seen){
2621 Error_TokensSeen ();
2622 return caller_is_taking;
2624 PreProcessDefinition (true, arg, caller_is_taking);
2625 return caller_is_taking;
2627 case PreprocessorDirective.Undef:
2628 if (any_token_seen){
2629 Error_TokensSeen ();
2630 return caller_is_taking;
2632 PreProcessDefinition (false, arg, caller_is_taking);
2633 return caller_is_taking;
2635 case PreprocessorDirective.Invalid:
2636 Report.Error (1024, Location, "Wrong preprocessor directive");
2637 return true;
2641 // These are only processed if we are in a `taking' block
2643 if (!caller_is_taking)
2644 return false;
2646 switch (directive){
2647 case PreprocessorDirective.Error:
2648 Report.Error (1029, Location, "#error: '{0}'", arg);
2649 return true;
2651 case PreprocessorDirective.Warning:
2652 Report.Warning (1030, 1, Location, "#warning: `{0}'", arg);
2653 return true;
2655 case PreprocessorDirective.Pragma:
2656 if (context.Settings.Version == LanguageVersion.ISO_1) {
2657 Report.FeatureIsNotAvailable (context, Location, "#pragma");
2660 ParsePragmaDirective (arg);
2661 return true;
2663 case PreprocessorDirective.Line:
2664 if (!PreProcessLine (arg))
2665 Report.Error (
2666 1576, Location,
2667 "The line number specified for #line directive is missing or invalid");
2668 return caller_is_taking;
2671 throw new NotImplementedException (directive.ToString ());
2674 private int consume_string (bool quoted)
2676 int c;
2677 int pos = 0;
2678 Location start_location = Location;
2679 if (quoted)
2680 start_location = start_location - 1;
2682 #if FULL_AST
2683 int reader_pos = reader.Position;
2684 #endif
2686 while (true){
2687 c = get_char ();
2688 if (c == '"') {
2689 if (quoted && peek_char () == '"') {
2690 if (pos == value_builder.Length)
2691 Array.Resize (ref value_builder, pos * 2);
2693 value_builder[pos++] = (char) c;
2694 get_char ();
2695 continue;
2698 string s;
2699 if (pos == 0)
2700 s = string.Empty;
2701 else if (pos <= 4)
2702 s = InternIdentifier (value_builder, pos);
2703 else
2704 s = new string (value_builder, 0, pos);
2706 ILiteralConstant res = new StringLiteral (context.BuiltinTypes, s, start_location);
2707 val = res;
2708 #if FULL_AST
2709 res.ParsedValue = quoted ?
2710 reader.ReadChars (reader_pos - 2, reader.Position - 1) :
2711 reader.ReadChars (reader_pos - 1, reader.Position);
2712 #endif
2714 return Token.LITERAL;
2717 if (c == '\n') {
2718 if (!quoted) {
2719 Report.Error (1010, Location, "Newline in constant");
2720 val = new StringLiteral (context.BuiltinTypes, new string (value_builder, 0, pos), start_location);
2721 return Token.LITERAL;
2723 } else if (c == '\\' && !quoted) {
2724 int surrogate;
2725 c = escape (c, out surrogate);
2726 if (c == -1)
2727 return Token.ERROR;
2728 if (surrogate != 0) {
2729 if (pos == value_builder.Length)
2730 Array.Resize (ref value_builder, pos * 2);
2732 value_builder[pos++] = (char) c;
2733 c = surrogate;
2735 } else if (c == -1) {
2736 Report.Error (1039, Location, "Unterminated string literal");
2737 return Token.EOF;
2740 if (pos == value_builder.Length)
2741 Array.Resize (ref value_builder, pos * 2);
2743 value_builder[pos++] = (char) c;
2747 private int consume_identifier (int s)
2749 int res = consume_identifier (s, false);
2751 if (doc_state == XmlCommentState.Allowed)
2752 doc_state = XmlCommentState.NotAllowed;
2754 return res;
2757 int consume_identifier (int c, bool quoted)
2760 // This method is very performance sensitive. It accounts
2761 // for approximately 25% of all parser time
2764 int pos = 0;
2765 int column = col;
2766 if (quoted)
2767 --column;
2769 if (c == '\\') {
2770 int surrogate;
2771 c = escape (c, out surrogate);
2772 if (surrogate != 0) {
2773 id_builder [pos++] = (char) c;
2774 c = surrogate;
2778 id_builder [pos++] = (char) c;
2780 try {
2781 while (true) {
2782 c = reader.Read ();
2784 if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9')) {
2785 id_builder [pos++] = (char) c;
2786 continue;
2789 if (c < 0x80) {
2790 if (c == '\\') {
2791 int surrogate;
2792 c = escape (c, out surrogate);
2793 if (is_identifier_part_character ((char) c))
2794 id_builder[pos++] = (char) c;
2796 if (surrogate != 0) {
2797 c = surrogate;
2800 continue;
2802 } else if (Char.IsLetter ((char) c) || Char.GetUnicodeCategory ((char) c) == UnicodeCategory.ConnectorPunctuation) {
2803 id_builder [pos++] = (char) c;
2804 continue;
2807 putback_char = c;
2808 break;
2810 } catch (IndexOutOfRangeException) {
2811 Report.Error (645, Location, "Identifier too long (limit is 512 chars)");
2812 --pos;
2813 col += pos;
2816 col += pos - 1;
2819 // Optimization: avoids doing the keyword lookup
2820 // on uppercase letters
2822 if (id_builder [0] >= '_' && !quoted) {
2823 int keyword = GetKeyword (id_builder, pos);
2824 if (keyword != -1) {
2825 val = LocatedToken.Create (null, ref_line, column);
2826 return keyword;
2830 string s = InternIdentifier (id_builder, pos);
2831 val = LocatedToken.Create (s, ref_line, column);
2832 if (quoted && parsing_attribute_section)
2833 AddEscapedIdentifier (((LocatedToken) val).Location);
2835 return Token.IDENTIFIER;
2838 static string InternIdentifier (char[] charBuffer, int length)
2841 // Keep identifiers in an array of hashtables to avoid needless
2842 // allocations
2844 var identifiers_group = identifiers[length];
2845 string s;
2846 if (identifiers_group != null) {
2847 if (identifiers_group.TryGetValue (charBuffer, out s)) {
2848 return s;
2850 } else {
2851 // TODO: this should be number of files dependant
2852 // corlib compilation peaks at 1000 and System.Core at 150
2853 int capacity = length > 20 ? 10 : 100;
2854 identifiers_group = new Dictionary<char[], string> (capacity, new IdentifiersComparer (length));
2855 identifiers[length] = identifiers_group;
2858 char[] chars = new char[length];
2859 Array.Copy (charBuffer, chars, length);
2861 s = new string (charBuffer, 0, length);
2862 identifiers_group.Add (chars, s);
2863 return s;
2866 public int xtoken ()
2868 int d, c;
2870 // Whether we have seen comments on the current line
2871 bool comments_seen = false;
2872 while ((c = get_char ()) != -1) {
2873 switch (c) {
2874 case '\t':
2875 col = ((col - 1 + tab_size) / tab_size) * tab_size;
2876 continue;
2878 case ' ':
2879 case '\f':
2880 case '\v':
2881 case 0xa0:
2882 case 0:
2883 case 0xFEFF: // Ignore BOM anywhere in the file
2884 continue;
2886 /* This is required for compatibility with .NET
2887 case 0xEF:
2888 if (peek_char () == 0xBB) {
2889 PushPosition ();
2890 get_char ();
2891 if (get_char () == 0xBF)
2892 continue;
2893 PopPosition ();
2895 break;
2897 case '\\':
2898 tokens_seen = true;
2899 return consume_identifier (c);
2901 case '{':
2902 val = LocatedToken.Create (ref_line, col);
2903 return Token.OPEN_BRACE;
2904 case '}':
2905 val = LocatedToken.Create (ref_line, col);
2906 return Token.CLOSE_BRACE;
2907 case '[':
2908 // To block doccomment inside attribute declaration.
2909 if (doc_state == XmlCommentState.Allowed)
2910 doc_state = XmlCommentState.NotAllowed;
2912 val = LocatedToken.Create (ref_line, col);
2914 if (parsing_block == 0 || lambda_arguments_parsing)
2915 return Token.OPEN_BRACKET;
2917 int next = peek_char ();
2918 switch (next) {
2919 case ']':
2920 case ',':
2921 return Token.OPEN_BRACKET;
2923 case ' ':
2924 case '\f':
2925 case '\v':
2926 case '\r':
2927 case '\n':
2928 case '/':
2929 next = peek_token ();
2930 if (next == Token.COMMA || next == Token.CLOSE_BRACKET)
2931 return Token.OPEN_BRACKET;
2933 return Token.OPEN_BRACKET_EXPR;
2934 default:
2935 return Token.OPEN_BRACKET_EXPR;
2937 case ']':
2938 LocatedToken.CreateOptional (ref_line, col, ref val);
2939 return Token.CLOSE_BRACKET;
2940 case '(':
2941 val = LocatedToken.Create (ref_line, col);
2943 // An expression versions of parens can appear in block context only
2945 if (parsing_block != 0 && !lambda_arguments_parsing) {
2948 // Optmize most common case where we know that parens
2949 // is not special
2951 switch (current_token) {
2952 case Token.IDENTIFIER:
2953 case Token.IF:
2954 case Token.FOR:
2955 case Token.FOREACH:
2956 case Token.TYPEOF:
2957 case Token.WHILE:
2958 case Token.USING:
2959 case Token.DEFAULT:
2960 case Token.DELEGATE:
2961 case Token.OP_GENERICS_GT:
2962 return Token.OPEN_PARENS;
2965 // Optimize using peek
2966 int xx = peek_char ();
2967 switch (xx) {
2968 case '(':
2969 case '\'':
2970 case '"':
2971 case '0':
2972 case '1':
2973 return Token.OPEN_PARENS;
2976 lambda_arguments_parsing = true;
2977 PushPosition ();
2978 d = TokenizeOpenParens ();
2979 PopPosition ();
2980 lambda_arguments_parsing = false;
2981 return d;
2984 return Token.OPEN_PARENS;
2985 case ')':
2986 LocatedToken.CreateOptional (ref_line, col, ref val);
2987 return Token.CLOSE_PARENS;
2988 case ',':
2989 LocatedToken.CreateOptional (ref_line, col, ref val);
2990 return Token.COMMA;
2991 case ';':
2992 LocatedToken.CreateOptional (ref_line, col, ref val);
2993 return Token.SEMICOLON;
2994 case '~':
2995 val = LocatedToken.Create (ref_line, col);
2996 return Token.TILDE;
2997 case '?':
2998 val = LocatedToken.Create (ref_line, col);
2999 return TokenizePossibleNullableType ();
3000 case '<':
3001 val = LocatedToken.Create (ref_line, col);
3002 if (parsing_generic_less_than++ > 0)
3003 return Token.OP_GENERICS_LT;
3005 return TokenizeLessThan ();
3007 case '>':
3008 val = LocatedToken.Create (ref_line, col);
3009 d = peek_char ();
3011 if (d == '='){
3012 get_char ();
3013 return Token.OP_GE;
3016 if (parsing_generic_less_than > 1 || (parsing_generic_less_than == 1 && d != '>')) {
3017 parsing_generic_less_than--;
3018 return Token.OP_GENERICS_GT;
3021 if (d == '>') {
3022 get_char ();
3023 d = peek_char ();
3025 if (d == '=') {
3026 get_char ();
3027 return Token.OP_SHIFT_RIGHT_ASSIGN;
3029 return Token.OP_SHIFT_RIGHT;
3032 return Token.OP_GT;
3034 case '+':
3035 val = LocatedToken.Create (ref_line, col);
3036 d = peek_char ();
3037 if (d == '+') {
3038 d = Token.OP_INC;
3039 } else if (d == '=') {
3040 d = Token.OP_ADD_ASSIGN;
3041 } else {
3042 return Token.PLUS;
3044 get_char ();
3045 return d;
3047 case '-':
3048 val = LocatedToken.Create (ref_line, col);
3049 d = peek_char ();
3050 if (d == '-') {
3051 d = Token.OP_DEC;
3052 } else if (d == '=')
3053 d = Token.OP_SUB_ASSIGN;
3054 else if (d == '>')
3055 d = Token.OP_PTR;
3056 else {
3057 return Token.MINUS;
3059 get_char ();
3060 return d;
3062 case '!':
3063 val = LocatedToken.Create (ref_line, col);
3064 if (peek_char () == '='){
3065 get_char ();
3066 return Token.OP_NE;
3068 return Token.BANG;
3070 case '=':
3071 val = LocatedToken.Create (ref_line, col);
3072 d = peek_char ();
3073 if (d == '='){
3074 get_char ();
3075 return Token.OP_EQ;
3077 if (d == '>'){
3078 get_char ();
3079 return Token.ARROW;
3082 return Token.ASSIGN;
3084 case '&':
3085 val = LocatedToken.Create (ref_line, col);
3086 d = peek_char ();
3087 if (d == '&'){
3088 get_char ();
3089 return Token.OP_AND;
3091 if (d == '='){
3092 get_char ();
3093 return Token.OP_AND_ASSIGN;
3095 return Token.BITWISE_AND;
3097 case '|':
3098 val = LocatedToken.Create (ref_line, col);
3099 d = peek_char ();
3100 if (d == '|'){
3101 get_char ();
3102 return Token.OP_OR;
3104 if (d == '='){
3105 get_char ();
3106 return Token.OP_OR_ASSIGN;
3108 return Token.BITWISE_OR;
3110 case '*':
3111 val = LocatedToken.Create (ref_line, col);
3112 if (peek_char () == '='){
3113 get_char ();
3114 return Token.OP_MULT_ASSIGN;
3116 return Token.STAR;
3118 case '/':
3119 d = peek_char ();
3120 if (d == '='){
3121 val = LocatedToken.Create (ref_line, col);
3122 get_char ();
3123 return Token.OP_DIV_ASSIGN;
3126 // Handle double-slash comments.
3127 if (d == '/'){
3128 get_char ();
3129 if (doc_processing) {
3130 if (peek_char () == '/') {
3131 get_char ();
3132 // Don't allow ////.
3133 if ((d = peek_char ()) != '/') {
3134 if (doc_state == XmlCommentState.Allowed)
3135 handle_one_line_xml_comment ();
3136 else if (doc_state == XmlCommentState.NotAllowed)
3137 WarningMisplacedComment (Location - 3);
3139 } else {
3140 if (xml_comment_buffer.Length > 0)
3141 doc_state = XmlCommentState.NotAllowed;
3145 while ((d = get_char ()) != -1 && d != '\n');
3147 any_token_seen |= tokens_seen;
3148 tokens_seen = false;
3149 comments_seen = false;
3150 continue;
3151 } else if (d == '*'){
3152 get_char ();
3153 bool docAppend = false;
3154 if (doc_processing && peek_char () == '*') {
3155 get_char ();
3156 // But when it is /**/, just do nothing.
3157 if (peek_char () == '/') {
3158 get_char ();
3159 continue;
3161 if (doc_state == XmlCommentState.Allowed)
3162 docAppend = true;
3163 else if (doc_state == XmlCommentState.NotAllowed) {
3164 WarningMisplacedComment (Location - 2);
3168 int current_comment_start = 0;
3169 if (docAppend) {
3170 current_comment_start = xml_comment_buffer.Length;
3171 xml_comment_buffer.Append (Environment.NewLine);
3174 while ((d = get_char ()) != -1){
3175 if (d == '*' && peek_char () == '/'){
3176 get_char ();
3177 comments_seen = true;
3178 break;
3180 if (docAppend)
3181 xml_comment_buffer.Append ((char) d);
3183 if (d == '\n'){
3184 any_token_seen |= tokens_seen;
3185 tokens_seen = false;
3187 // Reset 'comments_seen' just to be consistent.
3188 // It doesn't matter either way, here.
3190 comments_seen = false;
3193 if (!comments_seen)
3194 Report.Error (1035, Location, "End-of-file found, '*/' expected");
3196 if (docAppend)
3197 update_formatted_doc_comment (current_comment_start);
3198 continue;
3200 val = LocatedToken.Create (ref_line, col);
3201 return Token.DIV;
3203 case '%':
3204 val = LocatedToken.Create (ref_line, col);
3205 if (peek_char () == '='){
3206 get_char ();
3207 return Token.OP_MOD_ASSIGN;
3209 return Token.PERCENT;
3211 case '^':
3212 val = LocatedToken.Create (ref_line, col);
3213 if (peek_char () == '='){
3214 get_char ();
3215 return Token.OP_XOR_ASSIGN;
3217 return Token.CARRET;
3219 case ':':
3220 val = LocatedToken.Create (ref_line, col);
3221 if (peek_char () == ':') {
3222 get_char ();
3223 return Token.DOUBLE_COLON;
3225 return Token.COLON;
3227 case '0': case '1': case '2': case '3': case '4':
3228 case '5': case '6': case '7': case '8': case '9':
3229 tokens_seen = true;
3230 return is_number (c);
3232 case '\n': // white space
3233 any_token_seen |= tokens_seen;
3234 tokens_seen = false;
3235 comments_seen = false;
3236 continue;
3238 case '.':
3239 tokens_seen = true;
3240 d = peek_char ();
3241 if (d >= '0' && d <= '9')
3242 return is_number (c);
3244 LocatedToken.CreateOptional (ref_line, col, ref val);
3245 return Token.DOT;
3247 case '#':
3248 if (tokens_seen || comments_seen) {
3249 Eror_WrongPreprocessorLocation ();
3250 return Token.ERROR;
3253 if (ParsePreprocessingDirective (true))
3254 continue;
3256 bool directive_expected = false;
3257 while ((c = get_char ()) != -1) {
3258 if (col == 1) {
3259 directive_expected = true;
3260 } else if (!directive_expected) {
3261 // TODO: Implement comment support for disabled code and uncomment this code
3262 // if (c == '#') {
3263 // Eror_WrongPreprocessorLocation ();
3264 // return Token.ERROR;
3265 // }
3266 continue;
3269 if (c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\v' )
3270 continue;
3272 if (c == '#') {
3273 if (ParsePreprocessingDirective (false))
3274 break;
3276 directive_expected = false;
3279 if (c != -1) {
3280 tokens_seen = false;
3281 continue;
3284 return Token.EOF;
3286 case '"':
3287 return consume_string (false);
3289 case '\'':
3290 return TokenizeBackslash ();
3292 case '@':
3293 c = get_char ();
3294 if (c == '"') {
3295 tokens_seen = true;
3296 return consume_string (true);
3299 if (is_identifier_start_character (c)){
3300 return consume_identifier (c, true);
3303 Report.Error (1646, Location, "Keyword, identifier, or string expected after verbatim specifier: @");
3304 return Token.ERROR;
3306 case EvalStatementParserCharacter:
3307 return Token.EVAL_STATEMENT_PARSER;
3308 case EvalCompilationUnitParserCharacter:
3309 return Token.EVAL_COMPILATION_UNIT_PARSER;
3310 case EvalUsingDeclarationsParserCharacter:
3311 return Token.EVAL_USING_DECLARATIONS_UNIT_PARSER;
3312 case DocumentationXref:
3313 return Token.DOC_SEE;
3316 if (is_identifier_start_character (c)) {
3317 tokens_seen = true;
3318 return consume_identifier (c);
3321 if (char.IsWhiteSpace ((char) c))
3322 continue;
3324 Report.Error (1056, Location, "Unexpected character `{0}'", ((char) c).ToString ());
3327 if (CompleteOnEOF){
3328 if (generated)
3329 return Token.COMPLETE_COMPLETION;
3331 generated = true;
3332 return Token.GENERATE_COMPLETION;
3336 return Token.EOF;
3339 int TokenizeBackslash ()
3341 int c = get_char ();
3342 tokens_seen = true;
3343 if (c == '\'') {
3344 val = new CharLiteral (context.BuiltinTypes, (char) c, Location);
3345 Report.Error (1011, Location, "Empty character literal");
3346 return Token.LITERAL;
3349 if (c == '\n') {
3350 Report.Error (1010, Location, "Newline in constant");
3351 return Token.ERROR;
3354 int d;
3355 c = escape (c, out d);
3356 if (c == -1)
3357 return Token.ERROR;
3358 if (d != 0)
3359 throw new NotImplementedException ();
3361 val = new CharLiteral (context.BuiltinTypes, (char) c, Location);
3362 c = get_char ();
3364 if (c != '\'') {
3365 Report.Error (1012, Location, "Too many characters in character literal");
3367 // Try to recover, read until newline or next "'"
3368 while ((c = get_char ()) != -1) {
3369 if (c == '\n' || c == '\'')
3370 break;
3374 return Token.LITERAL;
3377 int TokenizeLessThan ()
3379 int d;
3380 if (handle_typeof) {
3381 PushPosition ();
3382 if (parse_generic_dimension (out d)) {
3383 val = d;
3384 DiscardPosition ();
3385 return Token.GENERIC_DIMENSION;
3387 PopPosition ();
3390 // Save current position and parse next token.
3391 PushPosition ();
3392 if (parse_less_than ()) {
3393 if (parsing_generic_declaration && (parsing_generic_declaration_doc || token () != Token.DOT)) {
3394 d = Token.OP_GENERICS_LT_DECL;
3395 } else {
3396 d = Token.OP_GENERICS_LT;
3398 PopPosition ();
3399 return d;
3402 PopPosition ();
3403 parsing_generic_less_than = 0;
3405 d = peek_char ();
3406 if (d == '<') {
3407 get_char ();
3408 d = peek_char ();
3410 if (d == '=') {
3411 get_char ();
3412 return Token.OP_SHIFT_LEFT_ASSIGN;
3414 return Token.OP_SHIFT_LEFT;
3417 if (d == '=') {
3418 get_char ();
3419 return Token.OP_LE;
3421 return Token.OP_LT;
3425 // Handles one line xml comment
3427 private void handle_one_line_xml_comment ()
3429 int c;
3430 while ((c = peek_char ()) == ' ')
3431 get_char (); // skip heading whitespaces.
3432 while ((c = peek_char ()) != -1 && c != '\n' && c != '\r') {
3433 xml_comment_buffer.Append ((char) get_char ());
3435 if (c == '\r' || c == '\n')
3436 xml_comment_buffer.Append (Environment.NewLine);
3440 // Remove heading "*" in Javadoc-like xml documentation.
3442 private void update_formatted_doc_comment (int current_comment_start)
3444 int length = xml_comment_buffer.Length - current_comment_start;
3445 string [] lines = xml_comment_buffer.ToString (
3446 current_comment_start,
3447 length).Replace ("\r", "").Split ('\n');
3449 // The first line starts with /**, thus it is not target
3450 // for the format check.
3451 for (int i = 1; i < lines.Length; i++) {
3452 string s = lines [i];
3453 int idx = s.IndexOf ('*');
3454 string head = null;
3455 if (idx < 0) {
3456 if (i < lines.Length - 1)
3457 return;
3458 head = s;
3459 } else
3460 head = s.Substring (0, idx);
3461 foreach (char c in head)
3462 if (c != ' ')
3463 return;
3464 lines [i] = s.Substring (idx + 1);
3466 xml_comment_buffer.Remove (current_comment_start, length);
3467 xml_comment_buffer.Insert (current_comment_start, String.Join (Environment.NewLine, lines));
3471 // Checks if there was incorrect doc comments and raise
3472 // warnings.
3474 public void check_incorrect_doc_comment ()
3476 if (xml_comment_buffer.Length > 0)
3477 WarningMisplacedComment (Location);
3481 // Consumes the saved xml comment lines (if any)
3482 // as for current target member or type.
3484 public string consume_doc_comment ()
3486 if (xml_comment_buffer.Length > 0) {
3487 string ret = xml_comment_buffer.ToString ();
3488 reset_doc_comment ();
3489 return ret;
3491 return null;
3494 Report Report {
3495 get { return context.Report; }
3498 void reset_doc_comment ()
3500 xml_comment_buffer.Length = 0;
3503 public void cleanup ()
3505 if (ifstack != null && ifstack.Count >= 1) {
3506 int state = ifstack.Pop ();
3507 if ((state & REGION) != 0)
3508 Report.Error (1038, Location, "#endregion directive expected");
3509 else
3510 Report.Error (1027, Location, "Expected `#endif' directive");
3516 // Indicates whether it accepts XML documentation or not.
3518 public enum XmlCommentState {
3519 // comment is allowed in this state.
3520 Allowed,
3521 // comment is not allowed in this state.
3522 NotAllowed,
3523 // once comments appeared when it is NotAllowed, then the
3524 // state is changed to it, until the state is changed to
3525 // .Allowed.
3526 Error