2010-04-07 Jb Evain <jbevain@novell.com>
[mcs.git] / class / System.XML / System.Xml / XmlTextReader.cs
blobc43f2d594e6d0bf56bf1434a7506298356335df6
1 //
2 // System.Xml.XmlTextReader
3 //
4 // Author:
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
7 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
8 //
9 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
10 // Copyright (C) 2005-2006 Novell, Inc (http://www.novell.com)
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 //#define USE_NAME_BUFFER
34 // Optimization TODOs:
36 // - support PushbackChar() which reverts one character read.
37 // - ReadTextReader() should always keep one pushback buffer
38 // as pushback safety net.
39 // - Replace (peek,read) * n -> read * n + pushback
42 using System;
43 using System.Collections;
44 #if NET_2_0
45 using System.Collections.Generic;
46 #endif
47 using System.Globalization;
48 using System.IO;
49 using System.Security.Permissions;
50 using System.Text;
51 using System.Xml.Schema;
52 using Mono.Xml;
54 #if NET_2_0
55 using System.Xml;
57 namespace Mono.Xml2
58 #else
59 namespace System.Xml
60 #endif
63 #if NET_2_0
64 internal class XmlTextReader : XmlReader,
65 IXmlLineInfo, IXmlNamespaceResolver, IHasXmlParserContext
66 #else
67 [PermissionSet (SecurityAction.InheritanceDemand, Unrestricted = true)]
68 public class XmlTextReader : XmlReader, IXmlLineInfo, IHasXmlParserContext
69 #endif
71 #region Constructors
73 protected XmlTextReader ()
77 public XmlTextReader (Stream input)
78 : this (new XmlStreamReader (input))
82 public XmlTextReader (string url)
83 : this(url, new NameTable ())
87 public XmlTextReader (TextReader input)
88 : this (input, new NameTable ())
92 protected XmlTextReader (XmlNameTable nt)
93 : this (String.Empty, null, XmlNodeType.None, null)
97 public XmlTextReader (Stream input, XmlNameTable nt)
98 : this(new XmlStreamReader (input), nt)
102 public XmlTextReader (string url, Stream input)
103 : this (url, new XmlStreamReader (input))
107 public XmlTextReader (string url, TextReader input)
108 : this (url, input, new NameTable ())
112 public XmlTextReader (string url, XmlNameTable nt)
114 string uriString;
115 Stream stream = GetStreamFromUrl (url, out uriString);
116 XmlParserContext ctx = new XmlParserContext (nt,
117 new XmlNamespaceManager (nt),
118 String.Empty,
119 XmlSpace.None);
120 this.InitializeContext (uriString, ctx, new XmlStreamReader (stream), XmlNodeType.Document);
123 public XmlTextReader (TextReader input, XmlNameTable nt)
124 : this (String.Empty, input, nt)
128 // This is used in XmlReader.Create() to indicate that string
129 // argument is uri, not an xml fragment.
130 internal XmlTextReader (bool dummy, XmlResolver resolver, string url, XmlNodeType fragType, XmlParserContext context)
132 if (resolver == null) {
133 #if MOONLIGHT
134 resolver = new XmlXapResolver ();
135 #else
136 resolver = new XmlUrlResolver ();
137 #endif
139 this.XmlResolver = resolver;
140 string uriString;
141 Stream stream = GetStreamFromUrl (url, out uriString);
142 this.InitializeContext (uriString, context, new XmlStreamReader (stream), fragType);
145 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
146 : this (context != null ? context.BaseURI : String.Empty,
147 new XmlStreamReader (xmlFragment),
148 fragType,
149 context)
151 disallowReset = true;
154 internal XmlTextReader (string baseURI, TextReader xmlFragment, XmlNodeType fragType)
155 : this (baseURI, xmlFragment, fragType, null)
159 public XmlTextReader (string url, Stream input, XmlNameTable nt)
160 : this (url, new XmlStreamReader (input), nt)
164 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
165 : this (url, input, XmlNodeType.Document, null)
169 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
170 : this (context != null ? context.BaseURI : String.Empty,
171 new StringReader (xmlFragment),
172 fragType,
173 context)
175 disallowReset = true;
178 internal XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context)
180 InitializeContext (url, context, fragment, fragType);
183 private Stream GetStreamFromUrl (string url, out string absoluteUriString)
185 #if NET_2_1
186 if (url == null)
187 throw new ArgumentNullException ("url");
188 if (url.Length == 0)
189 throw new ArgumentException ("url");
190 #endif
191 Uri uri = resolver.ResolveUri (null, url);
192 absoluteUriString = uri != null ? uri.ToString () : String.Empty;
193 return resolver.GetEntity (uri, null, typeof (Stream)) as Stream;
196 #endregion
198 #region Properties
200 public override int AttributeCount
202 get { return attributeCount; }
205 public override string BaseURI
207 get { return parserContext.BaseURI; }
210 #if NET_2_0
211 public override bool CanReadBinaryContent {
212 get { return true; }
215 public override bool CanReadValueChunk {
216 get { return true; }
218 #else
219 internal override bool CanReadBinaryContent {
220 get { return true; }
223 internal override bool CanReadValueChunk {
224 get { return true; }
226 #endif
228 internal bool CharacterChecking {
229 get { return checkCharacters; }
230 set { checkCharacters = value; }
233 // for XmlReaderSettings.CloseInput support
234 internal bool CloseInput {
235 get { return closeInput; }
236 set { closeInput = value; }
239 public override int Depth
241 get {
242 int nodeTypeMod = currentToken.NodeType == XmlNodeType.Element ? 0 : -1;
243 if (currentAttributeValue >= 0)
244 return nodeTypeMod + elementDepth + 2; // inside attribute value.
245 else if (currentAttribute >= 0)
246 return nodeTypeMod + elementDepth + 1;
247 return elementDepth;
251 public Encoding Encoding
253 get { return parserContext.Encoding; }
255 #if NET_2_0
256 public EntityHandling EntityHandling {
257 get { return entityHandling; }
258 set { entityHandling = value; }
260 #endif
262 public override bool EOF {
263 get { return readState == ReadState.EndOfFile; }
266 public override bool HasValue {
267 get { return cursorToken.Value != null; }
270 public override bool IsDefault {
271 // XmlTextReader does not expand default attributes.
272 get { return false; }
275 public override bool IsEmptyElement {
276 get { return cursorToken.IsEmptyElement; }
279 #if NET_2_0
280 #else
281 public override string this [int i] {
282 get { return GetAttribute (i); }
285 public override string this [string name] {
286 get { return GetAttribute (name); }
289 public override string this [string localName, string namespaceName] {
290 get { return GetAttribute (localName, namespaceName); }
292 #endif
294 public int LineNumber {
295 get {
296 if (useProceedingLineInfo)
297 return line;
298 else
299 return cursorToken.LineNumber;
303 public int LinePosition {
304 get {
305 if (useProceedingLineInfo)
306 return column;
307 else
308 return cursorToken.LinePosition;
312 public override string LocalName {
313 get { return cursorToken.LocalName; }
316 public override string Name {
317 get { return cursorToken.Name; }
320 public bool Namespaces {
321 get { return namespaces; }
322 set {
323 if (readState != ReadState.Initial)
324 throw new InvalidOperationException ("Namespaces have to be set before reading.");
325 namespaces = value;
329 public override string NamespaceURI {
330 get { return cursorToken.NamespaceURI; }
333 public override XmlNameTable NameTable {
334 get { return nameTable; }
337 public override XmlNodeType NodeType {
338 get { return cursorToken.NodeType; }
341 public bool Normalization {
342 get { return normalization; }
343 set { normalization = value; }
346 public override string Prefix {
347 get { return cursorToken.Prefix; }
350 public bool ProhibitDtd {
351 get { return prohibitDtd; }
352 set { prohibitDtd = value; }
355 public override char QuoteChar {
356 get { return cursorToken.QuoteChar; }
359 public override ReadState ReadState {
360 get { return readState; }
363 #if NET_2_0
364 public override XmlReaderSettings Settings {
365 get { return base.Settings; }
367 #endif
369 public override string Value {
370 get { return cursorToken.Value != null ? cursorToken.Value : String.Empty; }
373 public WhitespaceHandling WhitespaceHandling {
374 get { return whitespaceHandling; }
375 set { whitespaceHandling = value; }
378 public override string XmlLang {
379 get { return parserContext.XmlLang; }
382 public XmlResolver XmlResolver {
383 set { resolver = value; }
386 public override XmlSpace XmlSpace {
387 get { return parserContext.XmlSpace; }
390 #endregion
392 #region Methods
394 public override void Close ()
396 readState = ReadState.Closed;
398 cursorToken.Clear ();
399 currentToken.Clear ();
400 attributeCount = 0;
401 if (closeInput && reader != null)
402 reader.Close ();
405 public override string GetAttribute (int i)
407 if (i >= attributeCount)
408 throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount");
409 else {
410 return attributeTokens [i].Value;
414 // MS.NET 1.0 msdn says that this method returns String.Empty
415 // for absent attribute, but in fact it returns null.
416 // This description is corrected in MS.NET 1.1 msdn.
417 public override string GetAttribute (string name)
419 for (int i = 0; i < attributeCount; i++)
420 if (attributeTokens [i].Name == name)
421 return attributeTokens [i].Value;
422 return null;
425 private int GetIndexOfQualifiedAttribute (string localName, string namespaceURI)
427 for (int i = 0; i < attributeCount; i++) {
428 XmlAttributeTokenInfo ti = attributeTokens [i];
429 if (ti.LocalName == localName && ti.NamespaceURI == namespaceURI)
430 return i;
432 return -1;
435 XmlParserContext IHasXmlParserContext.ParserContext {
436 get { return parserContext; }
439 public override string GetAttribute (string localName, string namespaceURI)
441 int idx = this.GetIndexOfQualifiedAttribute (localName, namespaceURI);
442 if (idx < 0)
443 return null;
444 return attributeTokens [idx].Value;
447 #if NET_2_0
448 public IDictionary<string, string> GetNamespacesInScope (XmlNamespaceScope scope)
450 return nsmgr.GetNamespacesInScope (scope);
453 IDictionary<string, string> IXmlNamespaceResolver.GetNamespacesInScope (XmlNamespaceScope scope)
455 return GetNamespacesInScope (scope);
457 #endif
459 public TextReader GetRemainder ()
461 if (peekCharsLength < 0)
462 return reader;
463 return new StringReader (new string (peekChars, peekCharsIndex, peekCharsLength - peekCharsIndex) + reader.ReadToEnd ());
466 #if NET_2_0
467 public bool HasLineInfo ()
468 #else
469 bool IXmlLineInfo.HasLineInfo ()
470 #endif
472 return true;
475 public override string LookupNamespace (string prefix)
477 return LookupNamespace (prefix, false);
480 private string LookupNamespace (string prefix, bool atomizedNames)
482 string s = nsmgr.LookupNamespace (
483 prefix, atomizedNames);
484 return s == String.Empty ? null : s;
487 #if NET_2_0
488 string IXmlNamespaceResolver.LookupPrefix (string ns)
490 return LookupPrefix (ns, false);
493 public string LookupPrefix (string ns, bool atomizedName)
495 return nsmgr.LookupPrefix (ns, atomizedName);
497 #endif
499 public override void MoveToAttribute (int i)
501 if (i >= attributeCount)
502 throw new ArgumentOutOfRangeException ("attribute index out of range.");
504 currentAttribute = i;
505 currentAttributeValue = -1;
506 cursorToken = attributeTokens [i];
509 public override bool MoveToAttribute (string name)
511 for (int i = 0; i < attributeCount; i++) {
512 XmlAttributeTokenInfo ti = attributeTokens [i];
513 if (ti.Name == name) {
514 MoveToAttribute (i);
515 return true;
518 return false;
521 public override bool MoveToAttribute (string localName, string namespaceName)
523 int idx = GetIndexOfQualifiedAttribute (localName, namespaceName);
524 if (idx < 0)
525 return false;
526 MoveToAttribute (idx);
527 return true;
530 public override bool MoveToElement ()
532 if (currentToken == null) // for attribute .ctor()
533 return false;
535 if (cursorToken == currentToken)
536 return false;
538 if (currentAttribute >= 0) {
539 currentAttribute = -1;
540 currentAttributeValue = -1;
541 cursorToken = currentToken;
542 return true;
544 else
545 return false;
548 public override bool MoveToFirstAttribute ()
550 if (attributeCount == 0)
551 return false;
552 MoveToElement ();
553 return MoveToNextAttribute ();
556 public override bool MoveToNextAttribute ()
558 if (currentAttribute == 0 && attributeCount == 0)
559 return false;
560 if (currentAttribute + 1 < attributeCount) {
561 currentAttribute++;
562 currentAttributeValue = -1;
563 cursorToken = attributeTokens [currentAttribute];
564 return true;
566 else
567 return false;
570 public override bool Read ()
572 if (readState == ReadState.Closed)
573 return false;
574 curNodePeekIndex = peekCharsIndex;
575 preserveCurrentTag = true;
576 nestLevel = 0;
577 ClearValueBuffer ();
579 if (startNodeType == XmlNodeType.Attribute) {
580 if (currentAttribute == 0)
581 return false; // already read.
582 SkipTextDeclaration ();
583 ClearAttributes ();
584 IncrementAttributeToken ();
585 ReadAttributeValueTokens ('"');
586 cursorToken = attributeTokens [0];
587 currentAttributeValue = -1;
588 readState = ReadState.Interactive;
589 return true;
591 if (readState == ReadState.Initial && currentState == XmlNodeType.Element)
592 SkipTextDeclaration ();
594 if (Binary != null)
595 Binary.Reset ();
597 bool more = false;
598 readState = ReadState.Interactive;
599 currentLinkedNodeLineNumber = line;
600 currentLinkedNodeLinePosition = column;
601 useProceedingLineInfo = true;
603 cursorToken = currentToken;
604 attributeCount = 0;
605 currentAttribute = currentAttributeValue = -1;
606 currentToken.Clear ();
608 // It was moved from end of ReadStartTag ().
609 if (depthUp) {
610 ++depth;
611 depthUp = false;
614 if (readCharsInProgress) {
615 readCharsInProgress = false;
616 return ReadUntilEndTag ();
619 more = ReadContent ();
621 if (!more && startNodeType == XmlNodeType.Document && currentState != XmlNodeType.EndElement)
622 throw NotWFError ("Document element did not appear.");
624 useProceedingLineInfo = false;
625 return more;
628 public override bool ReadAttributeValue ()
630 if (readState == ReadState.Initial && startNodeType == XmlNodeType.Attribute) {
631 Read ();
634 if (currentAttribute < 0)
635 return false;
636 XmlAttributeTokenInfo ti = attributeTokens [currentAttribute];
637 if (currentAttributeValue < 0)
638 currentAttributeValue = ti.ValueTokenStartIndex - 1;
640 if (currentAttributeValue < ti.ValueTokenEndIndex) {
641 currentAttributeValue++;
642 cursorToken = attributeValueTokens [currentAttributeValue];
643 return true;
645 else
646 return false;
649 public int ReadBase64 (byte [] buffer, int offset, int length)
651 BinaryCharGetter = binaryCharGetter;
652 try {
653 return Binary.ReadBase64 (buffer, offset, length);
654 } finally {
655 BinaryCharGetter = null;
659 public int ReadBinHex (byte [] buffer, int offset, int length)
661 BinaryCharGetter = binaryCharGetter;
662 try {
663 return Binary.ReadBinHex (buffer, offset, length);
664 } finally {
665 BinaryCharGetter = null;
669 public int ReadChars (char [] buffer, int offset, int length)
671 if (offset < 0) {
672 throw new ArgumentOutOfRangeException (
673 #if !NET_2_1
674 "offset", offset,
675 #endif
676 "Offset must be non-negative integer.");
678 } else if (length < 0) {
679 throw new ArgumentOutOfRangeException (
680 #if !NET_2_1
681 "length", length,
682 #endif
683 "Length must be non-negative integer.");
685 } else if (buffer.Length < offset + length)
686 throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length.");
688 if (IsEmptyElement) {
689 Read ();
690 return 0;
693 if (!readCharsInProgress && NodeType != XmlNodeType.Element)
694 return 0;
696 preserveCurrentTag = false;
697 readCharsInProgress = true;
698 useProceedingLineInfo = true;
700 return ReadCharsInternal (buffer, offset, length);
703 public void ResetState ()
705 if (disallowReset)
706 throw new InvalidOperationException ("Cannot call ResetState when parsing an XML fragment.");
707 Clear ();
710 public override void ResolveEntity ()
712 // XmlTextReader does not resolve entities.
713 throw new InvalidOperationException ("XmlTextReader cannot resolve external entities.");
716 #if NET_2_0
717 [MonoTODO] // FIXME: Implement, for performance improvement
718 public override void Skip ()
720 base.Skip ();
722 #endif
723 #endregion
725 #region Internals
726 // Parsed DTD Objects
727 // Note that thgis property must be kept since dtd2xsd uses it.
728 internal DTDObjectModel DTD {
729 get { return parserContext.Dtd; }
732 internal XmlResolver Resolver {
733 get { return resolver; }
735 #endregion
737 #region Privates
738 internal class XmlTokenInfo
740 public XmlTokenInfo (XmlTextReader xtr)
742 Reader = xtr;
743 Clear ();
746 string valueCache;
748 protected XmlTextReader Reader;
750 public string Name;
751 public string LocalName;
752 public string Prefix;
753 public string NamespaceURI;
754 public bool IsEmptyElement;
755 public char QuoteChar;
756 public int LineNumber;
757 public int LinePosition;
758 public int ValueBufferStart;
759 public int ValueBufferEnd;
761 public XmlNodeType NodeType;
763 public virtual string Value {
764 get {
765 if (valueCache != null)
766 return valueCache;
767 if (ValueBufferStart >= 0) {
768 //Console.WriteLine (NodeType + " / " + ValueBuffer.Length + " / " + ValueBufferStart + " / " + ValueBufferEnd);
769 valueCache = Reader.valueBuffer.ToString (ValueBufferStart, ValueBufferEnd - ValueBufferStart);
770 return valueCache;
772 switch (NodeType) {
773 case XmlNodeType.Text:
774 case XmlNodeType.SignificantWhitespace:
775 case XmlNodeType.Whitespace:
776 case XmlNodeType.Comment:
777 case XmlNodeType.CDATA:
778 case XmlNodeType.ProcessingInstruction:
779 valueCache = Reader.CreateValueString ();
780 return valueCache;
782 return null;
784 set { valueCache = value; }
787 public virtual void Clear ()
789 ValueBufferStart = -1;
790 valueCache = null;
791 NodeType = XmlNodeType.None;
792 Name = LocalName = Prefix = NamespaceURI = String.Empty;
793 IsEmptyElement = false;
794 QuoteChar = '"';
795 LineNumber = LinePosition = 0;
799 internal class XmlAttributeTokenInfo : XmlTokenInfo
801 public XmlAttributeTokenInfo (XmlTextReader reader)
802 : base (reader)
804 NodeType = XmlNodeType.Attribute;
807 public int ValueTokenStartIndex;
808 public int ValueTokenEndIndex;
809 string valueCache;
810 StringBuilder tmpBuilder = new StringBuilder ();
812 public override string Value {
813 get {
814 if (valueCache != null)
815 return valueCache;
817 // An empty value should return String.Empty.
818 if (ValueTokenStartIndex == ValueTokenEndIndex) {
819 XmlTokenInfo ti = Reader.attributeValueTokens [ValueTokenStartIndex];
820 if (ti.NodeType == XmlNodeType.EntityReference)
821 valueCache = String.Concat ("&", ti.Name, ";");
822 else
823 valueCache = ti.Value;
824 return valueCache;
827 tmpBuilder.Length = 0;
828 for (int i = ValueTokenStartIndex; i <= ValueTokenEndIndex; i++) {
829 XmlTokenInfo ti = Reader.attributeValueTokens [i];
830 if (ti.NodeType == XmlNodeType.Text)
831 tmpBuilder.Append (ti.Value);
832 else {
833 tmpBuilder.Append ('&');
834 tmpBuilder.Append (ti.Name);
835 tmpBuilder.Append (';');
839 valueCache = tmpBuilder.ToString (0, tmpBuilder.Length);
840 return valueCache;
843 set { valueCache = value; }
846 public override void Clear ()
848 base.Clear ();
849 valueCache = null;
850 NodeType = XmlNodeType.Attribute;
851 ValueTokenStartIndex = ValueTokenEndIndex = 0;
854 internal void FillXmlns ()
856 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns))
857 Reader.nsmgr.AddNamespace (LocalName, Value);
858 else if (Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
859 Reader.nsmgr.AddNamespace (String.Empty, Value);
862 internal void FillNamespace ()
864 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns) ||
865 Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
866 NamespaceURI = XmlNamespaceManager.XmlnsXmlns;
867 else if (Prefix.Length == 0)
868 NamespaceURI = string.Empty;
869 else
870 NamespaceURI = Reader.LookupNamespace (Prefix, true);
874 private XmlTokenInfo cursorToken;
875 private XmlTokenInfo currentToken;
876 private XmlAttributeTokenInfo currentAttributeToken;
877 private XmlTokenInfo currentAttributeValueToken;
878 private XmlAttributeTokenInfo [] attributeTokens = new XmlAttributeTokenInfo [10];
879 private XmlTokenInfo [] attributeValueTokens = new XmlTokenInfo [10];
880 private int currentAttribute;
881 private int currentAttributeValue;
882 private int attributeCount;
884 private XmlParserContext parserContext;
885 private XmlNameTable nameTable;
886 private XmlNamespaceManager nsmgr;
888 private ReadState readState;
889 private bool disallowReset;
891 private int depth;
892 private int elementDepth;
893 private bool depthUp;
895 private bool popScope;
897 struct TagName
899 public TagName (string n, string l, string p)
901 Name = n;
902 LocalName = l;
903 Prefix = p;
906 public readonly string Name;
907 public readonly string LocalName;
908 public readonly string Prefix;
911 private TagName [] elementNames;
912 int elementNameStackPos;
914 private bool allowMultipleRoot;
916 private bool isStandalone;
918 private bool returnEntityReference;
919 private string entityReferenceName;
921 #if USE_NAME_BUFFER
922 private char [] nameBuffer;
923 private int nameLength;
924 private int nameCapacity;
925 private const int initialNameCapacity = 32;
926 #endif
928 private StringBuilder valueBuffer;
930 private TextReader reader;
931 private char [] peekChars;
932 private int peekCharsIndex;
933 private int peekCharsLength;
934 private int curNodePeekIndex;
935 private bool preserveCurrentTag;
936 private const int peekCharCapacity = 1024;
938 private int line;
939 private int column;
941 private int currentLinkedNodeLineNumber;
942 private int currentLinkedNodeLinePosition;
943 private bool useProceedingLineInfo;
945 private XmlNodeType startNodeType;
946 // State machine attribute.
947 // XmlDeclaration: after the first node.
948 // DocumentType: after doctypedecl
949 // Element: inside document element
950 // EndElement: after document element
951 private XmlNodeType currentState;
953 // For ReadChars()/ReadBase64()/ReadBinHex()
954 private int nestLevel;
955 private bool readCharsInProgress;
956 XmlReaderBinarySupport.CharGetter binaryCharGetter;
958 // These values are never re-initialized.
959 private bool namespaces = true;
960 private WhitespaceHandling whitespaceHandling = WhitespaceHandling.All;
961 #if MOONLIGHT
962 private XmlResolver resolver = new XmlXapResolver ();
963 #else
964 private XmlResolver resolver = new XmlUrlResolver ();
965 #endif
966 private bool normalization = false;
968 private bool checkCharacters;
969 private bool prohibitDtd = false;
970 private bool closeInput = true;
971 private EntityHandling entityHandling; // 2.0
973 private NameTable whitespacePool;
974 private char [] whitespaceCache;
976 private XmlException NotWFError (string message)
978 return new XmlException (this as IXmlLineInfo, BaseURI, message);
981 private void Init ()
983 allowMultipleRoot = false;
984 elementNames = new TagName [10];
985 valueBuffer = new StringBuilder ();
986 binaryCharGetter = new XmlReaderBinarySupport.CharGetter (ReadChars);
987 #if USE_NAME_BUFFER
988 nameBuffer = new char [initialNameCapacity];
989 #endif
991 checkCharacters = true;
992 #if NET_2_0
993 if (Settings != null)
994 checkCharacters = Settings.CheckCharacters;
995 #endif
996 prohibitDtd = false;
997 closeInput = true;
998 entityHandling = EntityHandling.ExpandCharEntities;
1000 peekCharsIndex = 0;
1001 if (peekChars == null)
1002 peekChars = new char [peekCharCapacity];
1003 peekCharsLength = -1;
1004 curNodePeekIndex = -1; // read from start
1006 line = 1;
1007 column = 1;
1009 currentLinkedNodeLineNumber = currentLinkedNodeLinePosition = 0;
1011 Clear ();
1014 private void Clear ()
1016 currentToken = new XmlTokenInfo (this);
1017 cursorToken = currentToken;
1018 currentAttribute = -1;
1019 currentAttributeValue = -1;
1020 attributeCount = 0;
1022 readState = ReadState.Initial;
1024 depth = 0;
1025 elementDepth = 0;
1026 depthUp = false;
1028 popScope = allowMultipleRoot = false;
1029 elementNameStackPos = 0;
1031 isStandalone = false;
1032 returnEntityReference = false;
1033 entityReferenceName = String.Empty;
1035 #if USE_NAME_BUFFER
1036 nameLength = 0;
1037 nameCapacity = initialNameCapacity;
1038 #endif
1039 useProceedingLineInfo = false;
1041 currentState = XmlNodeType.None;
1043 readCharsInProgress = false;
1046 private void InitializeContext (string url, XmlParserContext context, TextReader fragment, XmlNodeType fragType)
1048 startNodeType = fragType;
1049 parserContext = context;
1050 if (context == null) {
1051 XmlNameTable nt = new NameTable ();
1052 parserContext = new XmlParserContext (nt,
1053 new XmlNamespaceManager (nt),
1054 String.Empty,
1055 XmlSpace.None);
1057 nameTable = parserContext.NameTable;
1058 nameTable = nameTable != null ? nameTable : new NameTable ();
1059 nsmgr = parserContext.NamespaceManager;
1060 nsmgr = nsmgr != null ? nsmgr : new XmlNamespaceManager (nameTable);
1062 if (url != null && url.Length > 0) {
1063 #if NET_2_1
1064 Uri uri = new Uri (url, UriKind.RelativeOrAbsolute);
1065 #else
1066 Uri uri = null;
1067 try {
1068 #if NET_2_0
1069 uri = new Uri (url, UriKind.RelativeOrAbsolute);
1070 #else
1071 uri = new Uri (url);
1072 #endif
1073 } catch (Exception) {
1074 string path = Path.GetFullPath ("./a");
1075 uri = new Uri (new Uri (path), url);
1077 #endif
1078 parserContext.BaseURI = uri.ToString ();
1081 Init ();
1083 reader = fragment;
1085 switch (fragType) {
1086 case XmlNodeType.Attribute:
1087 reader = new StringReader (fragment.ReadToEnd ().Replace ("\"", "&quot;"));
1088 break;
1089 case XmlNodeType.Element:
1090 currentState = XmlNodeType.Element;
1091 allowMultipleRoot = true;
1092 break;
1093 case XmlNodeType.Document:
1094 break;
1095 default:
1096 throw new XmlException (String.Format ("NodeType {0} is not allowed to create XmlTextReader.", fragType));
1100 #if NET_2_0
1101 internal ConformanceLevel Conformance {
1102 get { return allowMultipleRoot ? ConformanceLevel.Fragment : ConformanceLevel.Document; }
1103 set {
1104 if (value == ConformanceLevel.Fragment) {
1105 currentState = XmlNodeType.Element;
1106 allowMultipleRoot = true;
1111 internal void AdjustLineInfoOffset (int lineNumberOffset, int linePositionOffset)
1113 line += lineNumberOffset;
1114 column += linePositionOffset;
1117 internal void SetNameTable (XmlNameTable nameTable)
1119 parserContext.NameTable = nameTable;
1121 #endif
1123 // Use this method rather than setting the properties
1124 // directly so that all the necessary properties can
1125 // be changed in harmony with each other. Maybe the
1126 // fields should be in a seperate class to help enforce
1127 // this.
1129 // Namespace URI could not be provided here.
1130 private void SetProperties (
1131 XmlNodeType nodeType,
1132 string name,
1133 string prefix,
1134 string localName,
1135 bool isEmptyElement,
1136 string value,
1137 bool clearAttributes)
1139 SetTokenProperties (currentToken, nodeType, name, prefix, localName, isEmptyElement, value, clearAttributes);
1140 currentToken.LineNumber = this.currentLinkedNodeLineNumber;
1141 currentToken.LinePosition = this.currentLinkedNodeLinePosition;
1144 private void SetTokenProperties (
1145 XmlTokenInfo token,
1146 XmlNodeType nodeType,
1147 string name,
1148 string prefix,
1149 string localName,
1150 bool isEmptyElement,
1151 string value,
1152 bool clearAttributes)
1154 token.NodeType = nodeType;
1155 token.Name = name;
1156 token.Prefix = prefix;
1157 token.LocalName = localName;
1158 token.IsEmptyElement = isEmptyElement;
1159 token.Value = value;
1160 this.elementDepth = depth;
1162 if (clearAttributes)
1163 ClearAttributes ();
1166 private void ClearAttributes ()
1168 //for (int i = 0; i < attributeCount; i++)
1169 // attributeTokens [i].Clear ();
1170 attributeCount = 0;
1171 currentAttribute = -1;
1172 currentAttributeValue = -1;
1175 private int PeekSurrogate (int c)
1177 if (peekCharsLength <= peekCharsIndex + 1) {
1178 if (!ReadTextReader (c))
1179 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1180 return c;
1183 int highhalfChar = peekChars [peekCharsIndex];
1184 int lowhalfChar = peekChars [peekCharsIndex+1];
1186 if (((highhalfChar & 0xFC00) != 0xD800) || ((lowhalfChar & 0xFC00) != 0xDC00))
1187 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1188 return highhalfChar;
1189 return 0x10000 + (highhalfChar-0xD800)*0x400 + (lowhalfChar-0xDC00);
1192 private int PeekChar ()
1194 if (peekCharsIndex < peekCharsLength) {
1195 int c = peekChars [peekCharsIndex];
1196 if (c == 0)
1197 return -1;
1198 if (c < 0xD800 || c >= 0xDFFF)
1199 return c;
1200 return PeekSurrogate (c);
1201 } else {
1202 if (!ReadTextReader (-1))
1203 return -1;
1204 return PeekChar ();
1208 private int ReadChar ()
1210 int ch = PeekChar ();
1211 peekCharsIndex++;
1213 if (ch >= 0x10000)
1214 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1216 if (ch == '\n') {
1217 line++;
1218 column = 1;
1219 } else if (ch != -1) {
1220 column++;
1222 return ch;
1225 private void Advance (int ch) {
1226 peekCharsIndex++;
1228 if (ch >= 0x10000)
1229 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1231 if (ch == '\n') {
1232 line++;
1233 column = 1;
1234 } else if (ch != -1) {
1235 column++;
1239 private bool ReadTextReader (int remained)
1241 if (peekCharsLength < 0) { // initialized buffer
1242 peekCharsLength = reader.Read (peekChars, 0, peekChars.Length);
1243 return peekCharsLength > 0;
1245 int offset = remained >= 0 ? 1 : 0;
1246 int copysize = peekCharsLength - curNodePeekIndex;
1248 // It must assure that current tag content always exists
1249 // in peekChars.
1250 if (!preserveCurrentTag) {
1251 curNodePeekIndex = 0;
1252 peekCharsIndex = 0;
1253 //copysize = 0;
1254 } else if (peekCharsLength < peekChars.Length) {
1255 // NonBlockingStreamReader returned less bytes
1256 // than the size of the buffer. In that case,
1257 // just refill the buffer.
1258 } else if (curNodePeekIndex <= (peekCharsLength >> 1)) {
1259 // extend the buffer
1260 char [] tmp = new char [peekChars.Length * 2];
1261 Array.Copy (peekChars, curNodePeekIndex,
1262 tmp, 0, copysize);
1263 peekChars = tmp;
1264 curNodePeekIndex = 0;
1265 peekCharsIndex = copysize;
1266 } else {
1267 Array.Copy (peekChars, curNodePeekIndex,
1268 peekChars, 0, copysize);
1269 curNodePeekIndex = 0;
1270 peekCharsIndex = copysize;
1272 if (remained >= 0)
1273 peekChars [peekCharsIndex] = (char) remained;
1274 int count = peekChars.Length - peekCharsIndex - offset;
1275 if (count > peekCharCapacity)
1276 count = peekCharCapacity;
1277 int read = reader.Read (
1278 peekChars, peekCharsIndex + offset, count);
1279 int remainingSize = offset + read;
1280 peekCharsLength = peekCharsIndex + remainingSize;
1282 return (remainingSize != 0);
1285 private bool ReadContent ()
1287 if (popScope) {
1288 nsmgr.PopScope ();
1289 parserContext.PopScope ();
1290 popScope = false;
1293 if (returnEntityReference)
1294 SetEntityReferenceProperties ();
1295 else {
1296 int c = PeekChar ();
1297 if (c == -1) {
1298 readState = ReadState.EndOfFile;
1299 ClearValueBuffer ();
1300 SetProperties (
1301 XmlNodeType.None, // nodeType
1302 String.Empty, // name
1303 String.Empty, // prefix
1304 String.Empty, // localName
1305 false, // isEmptyElement
1306 null, // value
1307 true // clearAttributes
1309 if (depth > 0)
1310 throw NotWFError ("unexpected end of file. Current depth is " + depth);
1312 return false;
1313 } else {
1314 switch (c) {
1315 case '<':
1316 Advance (c);
1317 switch (PeekChar ())
1319 case '/':
1320 Advance ('/');
1321 ReadEndTag ();
1322 break;
1323 case '?':
1324 Advance ('?');
1325 ReadProcessingInstruction ();
1326 break;
1327 case '!':
1328 Advance ('!');
1329 ReadDeclaration ();
1330 break;
1331 default:
1332 ReadStartTag ();
1333 break;
1335 break;
1336 case '\r':
1337 case '\n':
1338 case '\t':
1339 case ' ':
1340 if (!ReadWhitespace ())
1341 // skip
1342 return ReadContent ();
1343 break;
1344 default:
1345 ReadText (true);
1346 break;
1350 return this.ReadState != ReadState.EndOfFile;
1353 private void SetEntityReferenceProperties ()
1355 DTDEntityDeclaration decl = DTD != null ? DTD.EntityDecls [entityReferenceName] : null;
1356 if (this.isStandalone)
1357 if (DTD == null || decl == null || !decl.IsInternalSubset)
1358 throw NotWFError ("Standalone document must not contain any references to an non-internally declared entity.");
1359 if (decl != null && decl.NotationName != null)
1360 throw NotWFError ("Reference to any unparsed entities is not allowed here.");
1362 ClearValueBuffer ();
1363 SetProperties (
1364 XmlNodeType.EntityReference, // nodeType
1365 entityReferenceName, // name
1366 String.Empty, // prefix
1367 entityReferenceName, // localName
1368 false, // isEmptyElement
1369 null, // value
1370 true // clearAttributes
1373 returnEntityReference = false;
1374 entityReferenceName = String.Empty;
1377 // The leading '<' has already been consumed.
1378 private void ReadStartTag ()
1380 if (currentState == XmlNodeType.EndElement)
1381 throw NotWFError ("Multiple document element was detected.");
1382 currentState = XmlNodeType.Element;
1384 nsmgr.PushScope ();
1386 currentLinkedNodeLineNumber = line;
1387 currentLinkedNodeLinePosition = column;
1389 string prefix, localName;
1390 string name = ReadName (out prefix, out localName);
1391 if (currentState == XmlNodeType.EndElement)
1392 throw NotWFError ("document has terminated, cannot open new element");
1394 bool isEmptyElement = false;
1396 ClearAttributes ();
1398 SkipWhitespace ();
1399 if (XmlChar.IsFirstNameChar (PeekChar ()))
1400 ReadAttributes (false);
1401 cursorToken = this.currentToken;
1403 // fill namespaces
1404 for (int i = 0; i < attributeCount; i++)
1405 attributeTokens [i].FillXmlns ();
1406 for (int i = 0; i < attributeCount; i++)
1407 attributeTokens [i].FillNamespace ();
1409 // quick name check
1410 if (namespaces)
1411 for (int i = 0; i < attributeCount; i++)
1412 if (attributeTokens [i].Prefix == "xmlns" &&
1413 attributeTokens [i].Value == String.Empty)
1414 throw NotWFError ("Empty namespace URI cannot be mapped to non-empty prefix.");
1416 for (int i = 0; i < attributeCount; i++) {
1417 for (int j = i + 1; j < attributeCount; j++)
1418 if (Object.ReferenceEquals (attributeTokens [i].Name, attributeTokens [j].Name) ||
1419 (Object.ReferenceEquals (attributeTokens [i].LocalName, attributeTokens [j].LocalName) &&
1420 Object.ReferenceEquals (attributeTokens [i].NamespaceURI, attributeTokens [j].NamespaceURI)))
1421 throw NotWFError ("Attribute name and qualified name must be identical.");
1424 if (PeekChar () == '/') {
1425 Advance ('/');
1426 isEmptyElement = true;
1427 popScope = true;
1429 else {
1430 depthUp = true;
1431 PushElementName (name, localName, prefix);
1433 parserContext.PushScope ();
1435 Expect ('>');
1437 SetProperties (
1438 XmlNodeType.Element, // nodeType
1439 name, // name
1440 prefix, // prefix
1441 localName, // name
1442 isEmptyElement, // isEmptyElement
1443 null, // value
1444 false // clearAttributes
1446 if (prefix.Length > 0)
1447 currentToken.NamespaceURI = LookupNamespace (prefix, true);
1448 else if (namespaces)
1449 currentToken.NamespaceURI = nsmgr.DefaultNamespace;
1451 if (namespaces) {
1452 if (NamespaceURI == null)
1453 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1454 try {
1455 for (int i = 0; i < attributeCount; i++) {
1456 MoveToAttribute (i);
1457 if (NamespaceURI == null)
1458 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1460 } finally {
1461 MoveToElement ();
1465 for (int i = 0; i < attributeCount; i++) {
1466 if (!Object.ReferenceEquals (attributeTokens [i].Prefix, XmlNamespaceManager.PrefixXml))
1467 continue;
1468 string aname = attributeTokens [i].LocalName;
1469 string value = attributeTokens [i].Value;
1470 switch (aname) {
1471 case "base":
1472 if (this.resolver != null) {
1473 Uri buri =
1474 BaseURI != String.Empty ?
1475 new Uri (BaseURI) : null;
1476 Uri uri = resolver.ResolveUri (
1477 buri, value);
1478 parserContext.BaseURI =
1479 uri != null ?
1480 uri.ToString () :
1481 String.Empty;
1483 else
1484 parserContext.BaseURI = value;
1485 break;
1486 case "lang":
1487 parserContext.XmlLang = value;
1488 break;
1489 case "space":
1490 switch (value) {
1491 case "preserve":
1492 parserContext.XmlSpace = XmlSpace.Preserve;
1493 break;
1494 case "default":
1495 parserContext.XmlSpace = XmlSpace.Default;
1496 break;
1497 default:
1498 throw NotWFError (String.Format ("Invalid xml:space value: {0}", value));
1500 break;
1504 if (IsEmptyElement)
1505 CheckCurrentStateUpdate ();
1508 private void PushElementName (string name, string local, string prefix)
1510 if (elementNames.Length == elementNameStackPos) {
1511 TagName [] newArray = new TagName [elementNames.Length * 2];
1512 Array.Copy (elementNames, 0, newArray, 0, elementNameStackPos);
1513 elementNames = newArray;
1515 elementNames [elementNameStackPos++] =
1516 new TagName (name, local, prefix);
1519 // The reader is positioned on the first character
1520 // of the element's name.
1521 private void ReadEndTag ()
1523 if (currentState != XmlNodeType.Element)
1524 throw NotWFError ("End tag cannot appear in this state.");
1526 currentLinkedNodeLineNumber = line;
1527 currentLinkedNodeLinePosition = column;
1529 if (elementNameStackPos == 0)
1530 throw NotWFError ("closing element without matching opening element");
1531 TagName expected = elementNames [--elementNameStackPos];
1532 Expect (expected.Name);
1534 ExpectAfterWhitespace ('>');
1536 --depth;
1538 SetProperties (
1539 XmlNodeType.EndElement, // nodeType
1540 expected.Name, // name
1541 expected.Prefix, // prefix
1542 expected.LocalName, // localName
1543 false, // isEmptyElement
1544 null, // value
1545 true // clearAttributes
1547 if (expected.Prefix.Length > 0)
1548 currentToken.NamespaceURI = LookupNamespace (expected.Prefix, true);
1549 else if (namespaces)
1550 currentToken.NamespaceURI = nsmgr.DefaultNamespace;
1552 popScope = true;
1554 CheckCurrentStateUpdate ();
1557 private void CheckCurrentStateUpdate ()
1559 if (depth == 0 && !allowMultipleRoot && (IsEmptyElement || NodeType == XmlNodeType.EndElement))
1560 currentState = XmlNodeType.EndElement;
1563 #if USE_NAME_BUFFER
1564 private void AppendSurrogatePairNameChar (int ch)
1566 nameBuffer [nameLength++] = (char) ((ch - 0x10000) / 0x400 + 0xD800);
1567 if (nameLength == nameCapacity)
1568 ExpandNameCapacity ();
1569 nameBuffer [nameLength++] = (char) ((ch - 0x10000) % 0x400 + 0xDC00);
1572 private void ExpandNameCapacity ()
1574 nameCapacity = nameCapacity * 2;
1575 char [] oldNameBuffer = nameBuffer;
1576 nameBuffer = new char [nameCapacity];
1577 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1579 #endif
1581 private void AppendValueChar (int ch)
1583 if (ch < Char.MaxValue)
1584 valueBuffer.Append ((char) ch);
1585 else
1586 AppendSurrogatePairValueChar (ch);
1589 private void AppendSurrogatePairValueChar (int ch)
1591 valueBuffer.Append ((char) ((ch - 0x10000) / 0x400 + 0xD800));
1592 valueBuffer.Append ((char) ((ch - 0x10000) % 0x400 + 0xDC00));
1595 private string CreateValueString ()
1597 // Since whitespace strings are mostly identical
1598 // depending on the Depth, we make use of NameTable
1599 // to atomize whitespace strings.
1600 switch (NodeType) {
1601 case XmlNodeType.Whitespace:
1602 case XmlNodeType.SignificantWhitespace:
1603 int len = valueBuffer.Length;
1604 if (whitespaceCache == null)
1605 whitespaceCache = new char [32];
1606 if (len >= whitespaceCache.Length)
1607 break;
1608 if (whitespacePool == null)
1609 whitespacePool = new NameTable ();
1610 #if NET_2_0 && !NET_2_1
1611 valueBuffer.CopyTo (0, whitespaceCache, 0, len);
1612 #else
1613 for (int i = 0; i < len; i++)
1614 whitespaceCache [i] = valueBuffer [i];
1615 #endif
1616 return whitespacePool.Add (whitespaceCache, 0, valueBuffer.Length);
1618 return (valueBuffer.Capacity < 100) ?
1619 valueBuffer.ToString (0, valueBuffer.Length) :
1620 valueBuffer.ToString ();
1623 private void ClearValueBuffer ()
1625 valueBuffer.Length = 0;
1628 // The reader is positioned on the first character
1629 // of the text.
1630 private void ReadText (bool notWhitespace)
1632 if (currentState != XmlNodeType.Element)
1633 throw NotWFError ("Text node cannot appear in this state.");
1634 preserveCurrentTag = false;
1636 if (notWhitespace)
1637 ClearValueBuffer ();
1639 int ch = PeekChar ();
1640 bool previousWasCloseBracket = false;
1642 while (ch != '<' && ch != -1) {
1643 if (ch == '&') {
1644 ReadChar ();
1645 ch = ReadReference (false);
1646 if (returnEntityReference) // Returns -1 if char validation should not be done
1647 break;
1648 } else if (normalization && ch == '\r') {
1649 ReadChar ();
1650 ch = PeekChar ();
1651 if (ch != '\n')
1652 // append '\n' instead of '\r'.
1653 AppendValueChar ('\n');
1654 // and in case of "\r\n", discard '\r'.
1655 continue;
1656 } else {
1657 if (CharacterChecking && XmlChar.IsInvalid (ch))
1658 throw NotWFError ("Not allowed character was found.");
1659 ch = ReadChar ();
1662 // FIXME: it might be optimized by the JIT later,
1663 // AppendValueChar (ch);
1665 if (ch < Char.MaxValue)
1666 valueBuffer.Append ((char) ch);
1667 else
1668 AppendSurrogatePairValueChar (ch);
1671 // Block "]]>"
1672 if (ch == ']') {
1673 if (previousWasCloseBracket)
1674 if (PeekChar () == '>')
1675 throw NotWFError ("Inside text content, character sequence ']]>' is not allowed.");
1676 previousWasCloseBracket = true;
1678 else if (previousWasCloseBracket)
1679 previousWasCloseBracket = false;
1680 ch = PeekChar ();
1681 notWhitespace = true;
1684 if (returnEntityReference && valueBuffer.Length == 0) {
1685 SetEntityReferenceProperties ();
1686 } else {
1687 XmlNodeType nodeType = notWhitespace ? XmlNodeType.Text :
1688 this.XmlSpace == XmlSpace.Preserve ? XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
1689 SetProperties (
1690 nodeType, // nodeType
1691 String.Empty, // name
1692 String.Empty, // prefix
1693 String.Empty, // localName
1694 false, // isEmptyElement
1695 null, // value: create only when required
1696 true // clearAttributes
1701 // The leading '&' has already been consumed.
1702 // Returns true if the entity reference isn't a simple
1703 // character reference or one of the predefined entities.
1704 // This allows the ReadText method to break so that the
1705 // next call to Read will return the EntityReference node.
1706 private int ReadReference (bool ignoreEntityReferences)
1708 if (PeekChar () == '#') {
1709 Advance ('#');
1710 return ReadCharacterReference ();
1711 } else
1712 return ReadEntityReference (ignoreEntityReferences);
1715 private int ReadCharacterReference ()
1717 int value = 0;
1718 int ch;
1720 if (PeekChar () == 'x') {
1721 Advance ('x');
1723 while ((ch = PeekChar ()) != ';' && ch != -1) {
1724 Advance (ch);
1726 if (ch >= '0' && ch <= '9')
1727 value = (value << 4) + ch - '0';
1728 else if (ch >= 'A' && ch <= 'F')
1729 value = (value << 4) + ch - 'A' + 10;
1730 else if (ch >= 'a' && ch <= 'f')
1731 value = (value << 4) + ch - 'a' + 10;
1732 else
1733 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1734 "invalid hexadecimal digit: {0} (#x{1:X})",
1735 (char) ch,
1736 ch));
1738 } else {
1739 while ((ch = PeekChar ()) != ';' && ch != -1) {
1740 Advance (ch);
1742 if (ch >= '0' && ch <= '9')
1743 value = value * 10 + ch - '0';
1744 else
1745 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1746 "invalid decimal digit: {0} (#x{1:X})",
1747 (char) ch,
1748 ch));
1752 ReadChar (); // ';'
1754 // There is no way to save surrogate pairs...
1755 if (CharacterChecking && Normalization &&
1756 XmlChar.IsInvalid (value))
1757 throw NotWFError ("Referenced character was not allowed in XML. Normalization is " + normalization + ", checkCharacters = " + checkCharacters);
1758 return value;
1761 // Returns -1 if it should not be validated.
1762 // Real EOF must not be detected here.
1763 private int ReadEntityReference (bool ignoreEntityReferences)
1765 string name = ReadName ();
1766 Expect (';');
1768 int predefined = XmlChar.GetPredefinedEntity (name);
1769 if (predefined >= 0)
1770 return predefined;
1771 else {
1772 if (ignoreEntityReferences) {
1773 AppendValueChar ('&');
1774 for (int i = 0; i < name.Length; i++)
1775 AppendValueChar (name [i]);
1776 AppendValueChar (';');
1777 } else {
1778 returnEntityReference = true;
1779 entityReferenceName = name;
1782 return -1;
1785 // The reader is positioned on the first character of
1786 // the attribute name.
1787 private void ReadAttributes (bool isXmlDecl)
1789 int peekChar = -1;
1790 bool requireWhitespace = false;
1791 currentAttribute = -1;
1792 currentAttributeValue = -1;
1794 do {
1795 if (!SkipWhitespace () && requireWhitespace)
1796 throw NotWFError ("Unexpected token. Name is required here.");
1798 IncrementAttributeToken ();
1799 currentAttributeToken.LineNumber = line;
1800 currentAttributeToken.LinePosition = column;
1802 string prefix, localName;
1803 currentAttributeToken.Name = ReadName (out prefix, out localName);
1804 currentAttributeToken.Prefix = prefix;
1805 currentAttributeToken.LocalName = localName;
1806 ExpectAfterWhitespace ('=');
1807 SkipWhitespace ();
1808 ReadAttributeValueTokens (-1);
1809 // This hack is required for xmldecl which has
1810 // both effective attributes and Value.
1811 string dummyValue;
1812 if (isXmlDecl)
1813 dummyValue = currentAttributeToken.Value;
1815 attributeCount++;
1817 if (!SkipWhitespace ())
1818 requireWhitespace = true;
1819 peekChar = PeekChar ();
1820 if (isXmlDecl) {
1821 if (peekChar == '?')
1822 break;
1824 else if (peekChar == '/' || peekChar == '>')
1825 break;
1826 } while (peekChar != -1);
1828 currentAttribute = -1;
1829 currentAttributeValue = -1;
1832 private void AddAttributeWithValue (string name, string value)
1834 IncrementAttributeToken ();
1835 XmlAttributeTokenInfo ati = attributeTokens [currentAttribute];
1836 ati.Name = NameTable.Add (name);
1837 ati.Prefix = String.Empty;
1838 ati.NamespaceURI = String.Empty;
1839 IncrementAttributeValueToken ();
1840 XmlTokenInfo vti = attributeValueTokens [currentAttributeValue];
1841 SetTokenProperties (vti,
1842 XmlNodeType.Text,
1843 String.Empty,
1844 String.Empty,
1845 String.Empty,
1846 false,
1847 value,
1848 false);
1849 ati.Value = value;
1850 attributeCount++;
1853 private void IncrementAttributeToken ()
1855 currentAttribute++;
1856 if (attributeTokens.Length == currentAttribute) {
1857 XmlAttributeTokenInfo [] newArray =
1858 new XmlAttributeTokenInfo [attributeTokens.Length * 2];
1859 attributeTokens.CopyTo (newArray, 0);
1860 attributeTokens = newArray;
1862 if (attributeTokens [currentAttribute] == null)
1863 attributeTokens [currentAttribute] = new XmlAttributeTokenInfo (this);
1864 currentAttributeToken = attributeTokens [currentAttribute];
1865 currentAttributeToken.Clear ();
1868 private void IncrementAttributeValueToken ()
1870 currentAttributeValue++;
1871 if (attributeValueTokens.Length == currentAttributeValue) {
1872 XmlTokenInfo [] newArray = new XmlTokenInfo [attributeValueTokens.Length * 2];
1873 attributeValueTokens.CopyTo (newArray, 0);
1874 attributeValueTokens = newArray;
1876 if (attributeValueTokens [currentAttributeValue] == null)
1877 attributeValueTokens [currentAttributeValue] = new XmlTokenInfo (this);
1878 currentAttributeValueToken = attributeValueTokens [currentAttributeValue];
1879 currentAttributeValueToken.Clear ();
1882 // LAMESPEC: Orthodox XML reader should normalize attribute values
1883 private void ReadAttributeValueTokens (int dummyQuoteChar)
1885 int quoteChar = (dummyQuoteChar < 0) ? ReadChar () : dummyQuoteChar;
1887 if (quoteChar != '\'' && quoteChar != '\"')
1888 throw NotWFError ("an attribute value was not quoted");
1889 currentAttributeToken.QuoteChar = (char) quoteChar;
1891 IncrementAttributeValueToken ();
1892 currentAttributeToken.ValueTokenStartIndex = currentAttributeValue;
1893 currentAttributeValueToken.LineNumber = line;
1894 currentAttributeValueToken.LinePosition = column;
1896 bool incrementToken = false;
1897 bool isNewToken = true;
1898 bool loop = true;
1899 int ch = 0;
1900 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1901 while (loop) {
1902 ch = ReadChar ();
1903 if (ch == quoteChar)
1904 break;
1906 if (incrementToken) {
1907 IncrementAttributeValueToken ();
1908 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1909 currentAttributeValueToken.LineNumber = line;
1910 currentAttributeValueToken.LinePosition = column;
1911 incrementToken = false;
1912 isNewToken = true;
1915 switch (ch)
1917 case '<':
1918 throw NotWFError ("attribute values cannot contain '<'");
1919 case -1:
1920 if (dummyQuoteChar < 0)
1921 throw NotWFError ("unexpected end of file in an attribute value");
1922 else // Attribute value constructor.
1923 loop = false;
1924 break;
1925 case '\r':
1926 if (!normalization)
1927 goto default;
1928 if (PeekChar () == '\n')
1929 continue; // skip '\r'.
1931 // The csc in MS.NET 2.0 beta 1 barfs on this goto, so work around that
1933 //goto case '\n';
1934 if (!normalization)
1935 goto default;
1936 ch = ' ';
1937 goto default;
1938 case '\n':
1939 case '\t':
1940 // When Normalize = true, then replace
1941 // all spaces to ' '
1942 if (!normalization)
1943 goto default;
1944 ch = ' ';
1945 goto default;
1946 case '&':
1947 if (PeekChar () == '#') {
1948 Advance ('#');
1949 ch = ReadCharacterReference ();
1950 AppendValueChar (ch);
1951 break;
1953 // Check XML 1.0 section 3.1 WFC.
1954 string entName = ReadName ();
1955 Expect (';');
1956 int predefined = XmlChar.GetPredefinedEntity (entName);
1957 if (predefined < 0) {
1958 CheckAttributeEntityReferenceWFC (entName);
1959 #if NET_2_0
1960 if (entityHandling == EntityHandling.ExpandEntities) {
1961 string value = DTD.GenerateEntityAttributeText (entName);
1962 foreach (char c in (IEnumerable<char>) value)
1963 AppendValueChar (c);
1964 } else
1965 #endif
1967 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1968 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1969 if (!isNewToken)
1970 IncrementAttributeValueToken ();
1971 currentAttributeValueToken.Name = entName;
1972 currentAttributeValueToken.Value = String.Empty;
1973 currentAttributeValueToken.NodeType = XmlNodeType.EntityReference;
1974 incrementToken = true;
1977 else
1978 AppendValueChar (predefined);
1979 break;
1980 default:
1981 if (CharacterChecking && XmlChar.IsInvalid (ch))
1982 throw NotWFError ("Invalid character was found.");
1983 // FIXME: it might be optimized by the JIT later,
1984 // AppendValueChar (ch);
1986 if (ch < Char.MaxValue)
1987 valueBuffer.Append ((char) ch);
1988 else
1989 AppendSurrogatePairValueChar (ch);
1991 break;
1994 isNewToken = false;
1996 if (!incrementToken) {
1997 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1998 currentAttributeValueToken.NodeType = XmlNodeType.Text;
2000 currentAttributeToken.ValueTokenEndIndex = currentAttributeValue;
2004 private void CheckAttributeEntityReferenceWFC (string entName)
2006 DTDEntityDeclaration entDecl =
2007 DTD == null ? null : DTD.EntityDecls [entName];
2008 if (entDecl == null) {
2009 if (entityHandling == EntityHandling.ExpandEntities
2010 || (DTD != null && resolver != null && entDecl == null))
2011 throw NotWFError (String.Format ("Referenced entity '{0}' does not exist.", entName));
2012 else
2013 return;
2016 if (entDecl.HasExternalReference)
2017 throw NotWFError ("Reference to external entities is not allowed in the value of an attribute.");
2018 if (isStandalone && !entDecl.IsInternalSubset)
2019 throw NotWFError ("Reference to external entities is not allowed in the internal subset.");
2020 if (entDecl.EntityValue.IndexOf ('<') >= 0)
2021 throw NotWFError ("Attribute must not contain character '<' either directly or indirectly by way of entity references.");
2024 // The reader is positioned on the first character
2025 // of the target.
2027 // It may be xml declaration or processing instruction.
2028 private void ReadProcessingInstruction ()
2030 string target = ReadName ();
2031 if (target != "xml" && target.ToLower (CultureInfo.InvariantCulture) == "xml")
2032 throw NotWFError ("Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
2034 if (!SkipWhitespace ())
2035 if (PeekChar () != '?')
2036 throw NotWFError ("Invalid processing instruction name was found.");
2038 ClearValueBuffer ();
2040 int ch;
2041 while ((ch = PeekChar ()) != -1) {
2042 Advance (ch);
2044 if (ch == '?' && PeekChar () == '>') {
2045 Advance ('>');
2046 break;
2049 if (CharacterChecking && XmlChar.IsInvalid (ch))
2050 throw NotWFError ("Invalid character was found.");
2051 AppendValueChar (ch);
2054 if (Object.ReferenceEquals (target, XmlNamespaceManager.PrefixXml))
2055 VerifyXmlDeclaration ();
2056 else {
2057 if (currentState == XmlNodeType.None)
2058 currentState = XmlNodeType.XmlDeclaration;
2060 SetProperties (
2061 XmlNodeType.ProcessingInstruction, // nodeType
2062 target, // name
2063 String.Empty, // prefix
2064 target, // localName
2065 false, // isEmptyElement
2066 null, // value: create only when required
2067 true // clearAttributes
2072 void VerifyXmlDeclaration ()
2074 if (!allowMultipleRoot && currentState != XmlNodeType.None)
2075 throw NotWFError ("XML declaration cannot appear in this state.");
2077 currentState = XmlNodeType.XmlDeclaration;
2079 string text = CreateValueString ();
2081 ClearAttributes ();
2083 int idx = 0;
2085 string encoding = null, standalone = null;
2086 string name, value;
2087 ParseAttributeFromString (text, ref idx, out name, out value);
2088 if (name != "version" || value != "1.0")
2089 throw NotWFError ("'version' is expected.");
2090 name = String.Empty;
2091 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2092 ParseAttributeFromString (text, ref idx, out name, out value);
2093 if (name == "encoding") {
2094 if (!XmlChar.IsValidIANAEncoding (value))
2095 throw NotWFError ("'encoding' must be a valid IANA encoding name.");
2096 if (reader is XmlStreamReader)
2097 parserContext.Encoding = ((XmlStreamReader) reader).Encoding;
2098 else
2099 parserContext.Encoding = Encoding.Unicode;
2100 encoding = value;
2101 name = String.Empty;
2102 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2103 ParseAttributeFromString (text, ref idx, out name, out value);
2105 if (name == "standalone") {
2106 this.isStandalone = value == "yes";
2107 if (value != "yes" && value != "no")
2108 throw NotWFError ("Only 'yes' or 'no' is allow for 'standalone'");
2109 standalone = value;
2110 SkipWhitespaceInString (text, ref idx);
2112 else if (name.Length != 0)
2113 throw NotWFError (String.Format ("Unexpected token: '{0}'", name));
2115 if (idx < text.Length)
2116 throw NotWFError ("'?' is expected.");
2118 AddAttributeWithValue ("version", "1.0");
2119 if (encoding != null)
2120 AddAttributeWithValue ("encoding", encoding);
2121 if (standalone != null)
2122 AddAttributeWithValue ("standalone", standalone);
2123 currentAttribute = currentAttributeValue = -1;
2125 SetProperties (
2126 XmlNodeType.XmlDeclaration, // nodeType
2127 "xml", // name
2128 String.Empty, // prefix
2129 "xml", // localName
2130 false, // isEmptyElement
2131 text, // value
2132 false // clearAttributes
2136 bool SkipWhitespaceInString (string text, ref int idx)
2138 int start = idx;
2139 while (idx < text.Length && XmlChar.IsWhitespace (text [idx]))
2140 idx++;
2141 return idx - start > 0;
2144 private void ParseAttributeFromString (string src,
2145 ref int idx, out string name, out string value)
2147 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2148 idx++;
2150 int start = idx;
2151 while (idx < src.Length && XmlChar.IsNameChar (src [idx]))
2152 idx++;
2153 name = src.Substring (start, idx - start);
2155 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2156 idx++;
2157 if (idx == src.Length || src [idx] != '=')
2158 throw NotWFError (String.Format ("'=' is expected after {0}", name));
2159 idx++;
2161 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2162 idx++;
2164 if (idx == src.Length || src [idx] != '"' && src [idx] != '\'')
2165 throw NotWFError ("'\"' or '\'' is expected.");
2167 char quote = src [idx];
2168 idx++;
2169 start = idx;
2171 while (idx < src.Length && src [idx] != quote)
2172 idx++;
2173 idx++;
2175 value = src.Substring (start, idx - start - 1);
2178 internal void SkipTextDeclaration ()
2180 if (PeekChar () != '<')
2181 return;
2183 ReadChar ();
2185 if (PeekChar () != '?') {
2186 peekCharsIndex = 0;
2187 return;
2189 ReadChar ();
2191 while (peekCharsIndex < 6) {
2192 if (PeekChar () < 0)
2193 break;
2194 else
2195 ReadChar ();
2197 if (new string (peekChars, 2, 4) != "xml ") {
2198 if (new string (peekChars, 2, 4).ToLower (CultureInfo.InvariantCulture) == "xml ") {
2199 throw NotWFError ("Processing instruction name must not be character sequence 'X' 'M' 'L' with case insensitivity.");
2201 peekCharsIndex = 0;
2202 return;
2205 SkipWhitespace ();
2207 // version decl
2208 if (PeekChar () == 'v') {
2209 Expect ("version");
2210 ExpectAfterWhitespace ('=');
2211 SkipWhitespace ();
2212 int quoteChar = ReadChar ();
2213 char [] expect1_0 = new char [3];
2214 int versionLength = 0;
2215 switch (quoteChar) {
2216 case '\'':
2217 case '"':
2218 while (PeekChar () != quoteChar) {
2219 if (PeekChar () == -1)
2220 throw NotWFError ("Invalid version declaration inside text declaration.");
2221 else if (versionLength == 3)
2222 throw NotWFError ("Invalid version number inside text declaration.");
2223 else {
2224 expect1_0 [versionLength] = (char) ReadChar ();
2225 versionLength++;
2226 if (versionLength == 3 && new String (expect1_0) != "1.0")
2227 throw NotWFError ("Invalid version number inside text declaration.");
2230 ReadChar ();
2231 SkipWhitespace ();
2232 break;
2233 default:
2234 throw NotWFError ("Invalid version declaration inside text declaration.");
2238 if (PeekChar () == 'e') {
2239 Expect ("encoding");
2240 ExpectAfterWhitespace ('=');
2241 SkipWhitespace ();
2242 int quoteChar = ReadChar ();
2243 switch (quoteChar) {
2244 case '\'':
2245 case '"':
2246 while (PeekChar () != quoteChar)
2247 if (ReadChar () == -1)
2248 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2249 ReadChar ();
2250 SkipWhitespace ();
2251 break;
2252 default:
2253 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2255 // Encoding value should be checked inside XmlInputStream.
2257 #if NET_2_0
2258 // this condition is to check if this instance is
2259 // not created by XmlReader.Create() (which just
2260 // omits strict text declaration check).
2261 else if (Conformance == ConformanceLevel.Auto)
2262 throw NotWFError ("Encoding declaration is mandatory in text declaration.");
2263 #endif
2265 Expect ("?>");
2267 curNodePeekIndex = peekCharsIndex; // without this it causes incorrect value start indication.
2270 // The reader is positioned on the first character after
2271 // the leading '<!'.
2272 private void ReadDeclaration ()
2274 int ch = PeekChar ();
2276 switch (ch)
2278 case '-':
2279 Expect ("--");
2280 ReadComment ();
2281 break;
2282 case '[':
2283 ReadChar ();
2284 Expect ("CDATA[");
2285 ReadCDATA ();
2286 break;
2287 case 'D':
2288 Expect ("DOCTYPE");
2289 ReadDoctypeDecl ();
2290 break;
2291 default:
2292 throw NotWFError ("Unexpected declaration markup was found.");
2296 // The reader is positioned on the first character after
2297 // the leading '<!--'.
2298 private void ReadComment ()
2300 if (currentState == XmlNodeType.None)
2301 currentState = XmlNodeType.XmlDeclaration;
2303 preserveCurrentTag = false;
2305 ClearValueBuffer ();
2307 int ch;
2308 while ((ch = PeekChar ()) != -1) {
2309 Advance (ch);
2311 if (ch == '-' && PeekChar () == '-') {
2312 Advance ('-');
2314 if (PeekChar () != '>')
2315 throw NotWFError ("comments cannot contain '--'");
2317 Advance ('>');
2318 break;
2321 if (XmlChar.IsInvalid (ch))
2322 throw NotWFError ("Not allowed character was found.");
2324 AppendValueChar (ch);
2327 SetProperties (
2328 XmlNodeType.Comment, // nodeType
2329 String.Empty, // name
2330 String.Empty, // prefix
2331 String.Empty, // localName
2332 false, // isEmptyElement
2333 null, // value: create only when required
2334 true // clearAttributes
2338 // The reader is positioned on the first character after
2339 // the leading '<![CDATA['.
2340 private void ReadCDATA ()
2342 if (currentState != XmlNodeType.Element)
2343 throw NotWFError ("CDATA section cannot appear in this state.");
2344 preserveCurrentTag = false;
2346 ClearValueBuffer ();
2348 bool skip = false;
2349 int ch = 0;
2350 while (PeekChar () != -1) {
2351 if (!skip)
2352 ch = ReadChar ();
2353 skip = false;
2355 if (ch == ']' && PeekChar () == ']') {
2356 ch = ReadChar (); // ']'
2358 if (PeekChar () == '>') {
2359 ReadChar (); // '>'
2360 break;
2361 } else {
2362 skip = true;
2365 if (normalization && ch == '\r') {
2366 ch = PeekChar ();
2367 if (ch != '\n')
2368 // append '\n' instead of '\r'.
2369 AppendValueChar ('\n');
2370 // otherwise, discard '\r'.
2371 continue;
2373 if (CharacterChecking && XmlChar.IsInvalid (ch))
2374 throw NotWFError ("Invalid character was found.");
2376 // FIXME: it might be optimized by the JIT later,
2377 // AppendValueChar (ch);
2379 if (ch < Char.MaxValue)
2380 valueBuffer.Append ((char) ch);
2381 else
2382 AppendSurrogatePairValueChar (ch);
2386 SetProperties (
2387 XmlNodeType.CDATA, // nodeType
2388 String.Empty, // name
2389 String.Empty, // prefix
2390 String.Empty, // localName
2391 false, // isEmptyElement
2392 null, // value: create only when required
2393 true // clearAttributes
2397 // The reader is positioned on the first character after
2398 // the leading '<!DOCTYPE'.
2399 private void ReadDoctypeDecl ()
2401 if (prohibitDtd)
2402 throw NotWFError ("Document Type Declaration (DTD) is prohibited in this XML.");
2403 switch (currentState) {
2404 case XmlNodeType.DocumentType:
2405 case XmlNodeType.Element:
2406 case XmlNodeType.EndElement:
2407 throw NotWFError ("Document type cannot appear in this state.");
2409 currentState = XmlNodeType.DocumentType;
2411 string doctypeName = null;
2412 string publicId = null;
2413 string systemId = null;
2414 int intSubsetStartLine = 0;
2415 int intSubsetStartColumn = 0;
2417 SkipWhitespace ();
2418 doctypeName = ReadName ();
2419 SkipWhitespace ();
2420 switch(PeekChar ())
2422 case 'S':
2423 systemId = ReadSystemLiteral (true);
2424 break;
2425 case 'P':
2426 publicId = ReadPubidLiteral ();
2427 if (!SkipWhitespace ())
2428 throw NotWFError ("Whitespace is required between PUBLIC id and SYSTEM id.");
2429 systemId = ReadSystemLiteral (false);
2430 break;
2432 SkipWhitespace ();
2435 if(PeekChar () == '[')
2437 // read markupdecl etc. or end of decl
2438 ReadChar ();
2439 intSubsetStartLine = this.LineNumber;
2440 intSubsetStartColumn = this.LinePosition;
2441 ClearValueBuffer ();
2442 ReadInternalSubset ();
2443 parserContext.InternalSubset = CreateValueString ();
2445 // end of DOCTYPE decl.
2446 ExpectAfterWhitespace ('>');
2448 GenerateDTDObjectModel (doctypeName, publicId,
2449 systemId, parserContext.InternalSubset,
2450 intSubsetStartLine, intSubsetStartColumn);
2452 // set properties for <!DOCTYPE> node
2453 SetProperties (
2454 XmlNodeType.DocumentType, // nodeType
2455 doctypeName, // name
2456 String.Empty, // prefix
2457 doctypeName, // localName
2458 false, // isEmptyElement
2459 parserContext.InternalSubset, // value
2460 true // clearAttributes
2463 if (publicId != null)
2464 AddAttributeWithValue ("PUBLIC", publicId);
2465 if (systemId != null)
2466 AddAttributeWithValue ("SYSTEM", systemId);
2467 currentAttribute = currentAttributeValue = -1;
2470 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2471 string systemId, string internalSubset)
2473 return GenerateDTDObjectModel (name, publicId, systemId, internalSubset, 0, 0);
2476 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2477 string systemId, string internalSubset, int intSubsetStartLine, int intSubsetStartColumn)
2479 // now compile DTD
2480 parserContext.Dtd = new DTDObjectModel (this.NameTable); // merges both internal and external subsets in the meantime,
2481 DTD.BaseURI = BaseURI;
2482 DTD.Name = name;
2483 DTD.PublicId = publicId;
2484 DTD.SystemId = systemId;
2485 DTD.InternalSubset = internalSubset;
2486 DTD.XmlResolver = resolver;
2487 DTD.IsStandalone = isStandalone;
2488 DTD.LineNumber = line;
2489 DTD.LinePosition = column;
2491 DTDReader dr = new DTDReader (DTD, intSubsetStartLine, intSubsetStartColumn);
2492 dr.Normalization = this.normalization;
2493 return dr.GenerateDTDObjectModel ();
2496 private enum DtdInputState
2498 Free = 1,
2499 ElementDecl,
2500 AttlistDecl,
2501 EntityDecl,
2502 NotationDecl,
2504 Comment,
2505 InsideSingleQuoted,
2506 InsideDoubleQuoted,
2509 private class DtdInputStateStack
2511 Stack intern = new Stack ();
2512 public DtdInputStateStack ()
2514 Push (DtdInputState.Free);
2517 public DtdInputState Peek ()
2519 return (DtdInputState) intern.Peek ();
2522 public DtdInputState Pop ()
2524 return (DtdInputState) intern.Pop ();
2527 public void Push (DtdInputState val)
2529 intern.Push (val);
2534 DtdInputStateStack stateStack = new DtdInputStateStack ();
2535 DtdInputState State {
2536 get { return stateStack.Peek (); }
2539 private int ReadValueChar ()
2541 int ret = ReadChar ();
2542 AppendValueChar (ret);
2543 return ret;
2546 private void ExpectAndAppend (string s)
2548 Expect (s);
2549 valueBuffer.Append (s);
2552 // Simply read but not generate any result.
2553 private void ReadInternalSubset ()
2555 bool continueParse = true;
2557 while (continueParse) {
2558 switch (ReadValueChar ()) {
2559 case ']':
2560 switch (State) {
2561 case DtdInputState.Free:
2562 // chop extra ']'
2563 valueBuffer.Remove (valueBuffer.Length - 1, 1);
2564 continueParse = false;
2565 break;
2566 case DtdInputState.InsideDoubleQuoted:
2567 case DtdInputState.InsideSingleQuoted:
2568 case DtdInputState.Comment:
2569 continue;
2570 default:
2571 throw NotWFError ("unexpected end of file at DTD.");
2573 break;
2574 case -1:
2575 throw NotWFError ("unexpected end of file at DTD.");
2576 case '<':
2577 switch (State) {
2578 case DtdInputState.InsideDoubleQuoted:
2579 case DtdInputState.InsideSingleQuoted:
2580 case DtdInputState.Comment:
2581 continue; // well-formed
2583 int c = ReadValueChar ();
2584 switch (c) {
2585 case '?':
2586 stateStack.Push (DtdInputState.PI);
2587 break;
2588 case '!':
2589 switch (ReadValueChar ()) {
2590 case 'E':
2591 switch (ReadValueChar ()) {
2592 case 'L':
2593 ExpectAndAppend ("EMENT");
2594 stateStack.Push (DtdInputState.ElementDecl);
2595 break;
2596 case 'N':
2597 ExpectAndAppend ("TITY");
2598 stateStack.Push (DtdInputState.EntityDecl);
2599 break;
2600 default:
2601 throw NotWFError ("unexpected token '<!E'.");
2603 break;
2604 case 'A':
2605 ExpectAndAppend ("TTLIST");
2606 stateStack.Push (DtdInputState.AttlistDecl);
2607 break;
2608 case 'N':
2609 ExpectAndAppend ("OTATION");
2610 stateStack.Push (DtdInputState.NotationDecl);
2611 break;
2612 case '-':
2613 ExpectAndAppend ("-");
2614 stateStack.Push (DtdInputState.Comment);
2615 break;
2617 break;
2618 default:
2619 throw NotWFError (String.Format ("unexpected '<{0}'.", (char) c));
2621 break;
2622 case '\'':
2623 if (State == DtdInputState.InsideSingleQuoted)
2624 stateStack.Pop ();
2625 else if (State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.Comment)
2626 stateStack.Push (DtdInputState.InsideSingleQuoted);
2627 break;
2628 case '"':
2629 if (State == DtdInputState.InsideDoubleQuoted)
2630 stateStack.Pop ();
2631 else if (State != DtdInputState.InsideSingleQuoted && State != DtdInputState.Comment)
2632 stateStack.Push (DtdInputState.InsideDoubleQuoted);
2633 break;
2634 case '>':
2635 switch (State) {
2636 case DtdInputState.ElementDecl:
2637 goto case DtdInputState.NotationDecl;
2638 case DtdInputState.AttlistDecl:
2639 goto case DtdInputState.NotationDecl;
2640 case DtdInputState.EntityDecl:
2641 goto case DtdInputState.NotationDecl;
2642 case DtdInputState.NotationDecl:
2643 stateStack.Pop ();
2644 break;
2645 case DtdInputState.InsideDoubleQuoted:
2646 case DtdInputState.InsideSingleQuoted:
2647 case DtdInputState.Comment:
2648 continue;
2649 default:
2650 throw NotWFError ("unexpected token '>'");
2652 break;
2653 case '?':
2654 if (State == DtdInputState.PI) {
2655 if (ReadValueChar () == '>')
2656 stateStack.Pop ();
2658 break;
2659 case '-':
2660 if (State == DtdInputState.Comment) {
2661 if (PeekChar () == '-') {
2662 ReadValueChar ();
2663 ExpectAndAppend (">");
2664 stateStack.Pop ();
2667 break;
2668 case '%':
2669 if (State != DtdInputState.Free && State != DtdInputState.EntityDecl && State != DtdInputState.Comment && State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.InsideSingleQuoted)
2670 throw NotWFError ("Parameter Entity Reference cannot appear as a part of markupdecl (see XML spec 2.8).");
2671 break;
2676 // The reader is positioned on the first 'S' of "SYSTEM".
2677 private string ReadSystemLiteral (bool expectSYSTEM)
2679 if(expectSYSTEM) {
2680 Expect ("SYSTEM");
2681 if (!SkipWhitespace ())
2682 throw NotWFError ("Whitespace is required after 'SYSTEM'.");
2684 else
2685 SkipWhitespace ();
2686 int quoteChar = ReadChar (); // apos or quot
2687 int c = 0;
2688 ClearValueBuffer ();
2689 while (c != quoteChar) {
2690 c = ReadChar ();
2691 if (c < 0)
2692 throw NotWFError ("Unexpected end of stream in ExternalID.");
2693 if (c != quoteChar)
2694 AppendValueChar (c);
2696 return CreateValueString ();
2699 private string ReadPubidLiteral()
2701 Expect ("PUBLIC");
2702 if (!SkipWhitespace ())
2703 throw NotWFError ("Whitespace is required after 'PUBLIC'.");
2704 int quoteChar = ReadChar ();
2705 int c = 0;
2706 ClearValueBuffer ();
2707 while(c != quoteChar)
2709 c = ReadChar ();
2710 if(c < 0) throw NotWFError ("Unexpected end of stream in ExternalID.");
2711 if(c != quoteChar && !XmlChar.IsPubidChar (c))
2712 throw NotWFError (String.Format ("character '{0}' not allowed for PUBLIC ID", (char)c ));
2713 if (c != quoteChar)
2714 AppendValueChar (c);
2716 return CreateValueString ();
2719 // The reader is positioned on the first character
2720 // of the name.
2721 private string ReadName ()
2723 string prefix, local;
2724 return ReadName (out prefix, out local);
2727 private string ReadName (out string prefix, out string localName)
2729 #if !USE_NAME_BUFFER
2730 bool savePreserve = preserveCurrentTag;
2731 preserveCurrentTag = true;
2733 int startOffset = peekCharsIndex - curNodePeekIndex;
2734 int ch = PeekChar ();
2735 if (!XmlChar.IsFirstNameChar (ch))
2736 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2737 Advance (ch);
2738 int length = 1;
2739 int colonAt = -1;
2741 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2742 Advance (ch);
2743 if (ch == ':' && namespaces && colonAt < 0)
2744 colonAt = length;
2745 length++;
2748 int start = curNodePeekIndex + startOffset;
2750 string name = NameTable.Add (
2751 peekChars, start, length);
2753 if (colonAt > 0) {
2754 prefix = NameTable.Add (
2755 peekChars, start, colonAt);
2756 localName = NameTable.Add (
2757 peekChars, start + colonAt + 1, length - colonAt - 1);
2758 } else {
2759 prefix = String.Empty;
2760 localName = name;
2763 preserveCurrentTag = savePreserve;
2765 return name;
2766 #else
2767 int ch = PeekChar ();
2768 if (!XmlChar.IsFirstNameChar (ch))
2769 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2771 nameLength = 0;
2773 Advance (ch);
2774 // AppendNameChar (ch);
2776 // nameBuffer.Length is always non-0 so no need to ExpandNameCapacity () here
2777 if (ch < Char.MaxValue)
2778 nameBuffer [nameLength++] = (char) ch;
2779 else
2780 AppendSurrogatePairNameChar (ch);
2783 int colonAt = -1;
2785 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2786 Advance (ch);
2788 if (ch == ':' && namespaces && colonAt < 0)
2789 colonAt = nameLength;
2790 // AppendNameChar (ch);
2792 if (nameLength == nameCapacity)
2793 ExpandNameCapacity ();
2794 if (ch < Char.MaxValue)
2795 nameBuffer [nameLength++] = (char) ch;
2796 else
2797 AppendSurrogatePairNameChar (ch);
2801 string name = NameTable.Add (nameBuffer, 0, nameLength);
2803 if (colonAt > 0) {
2804 prefix = NameTable.Add (nameBuffer, 0, colonAt);
2805 localName = NameTable.Add (nameBuffer, colonAt + 1, nameLength - colonAt - 1);
2806 } else {
2807 prefix = String.Empty;
2808 localName = name;
2811 return name;
2812 #endif
2815 // Read the next character and compare it against the
2816 // specified character.
2817 private void Expect (int expected)
2819 int ch = ReadChar ();
2821 if (ch != expected) {
2822 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2823 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
2824 (char) expected,
2825 expected,
2826 ch < 0 ? (object) "EOF" : (char) ch,
2827 ch));
2831 private void Expect (string expected)
2833 for (int i = 0; i < expected.Length; i++)
2834 if (ReadChar () != expected [i])
2835 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2836 "'{0}' is expected", expected));
2839 private void ExpectAfterWhitespace (char c)
2841 while (true) {
2842 int i = ReadChar ();
2843 if (i < 0x21 && XmlChar.IsWhitespace (i))
2844 continue;
2845 if (c != i)
2846 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "Expected {0}, but found {1} [{2}]", c, i < 0 ? (object) "EOF" : (char) i, i));
2847 break;
2851 // Does not consume the first non-whitespace character.
2852 private bool SkipWhitespace ()
2854 // FIXME: It should be inlined by the JIT.
2855 // bool skipped = XmlChar.IsWhitespace (PeekChar ());
2856 int ch = PeekChar ();
2857 bool skipped = (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2858 if (!skipped)
2859 return false;
2860 Advance (ch);
2861 // FIXME: It should be inlined by the JIT.
2862 // while (XmlChar.IsWhitespace (PeekChar ()))
2863 // ReadChar ();
2864 while ((ch = PeekChar ()) == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD)
2865 Advance (ch);
2866 return skipped;
2869 private bool ReadWhitespace ()
2871 if (currentState == XmlNodeType.None)
2872 currentState = XmlNodeType.XmlDeclaration;
2874 bool savePreserve = preserveCurrentTag;
2875 preserveCurrentTag = true;
2876 int startOffset = peekCharsIndex - curNodePeekIndex; // it should be 0 for now though.
2878 int ch = PeekChar ();
2879 do {
2880 Advance (ch);
2881 ch = PeekChar ();
2882 // FIXME: It should be inlined by the JIT.
2883 // } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch));
2884 } while (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2886 bool isText = currentState == XmlNodeType.Element && ch != -1 && ch != '<';
2888 if (!isText && (whitespaceHandling == WhitespaceHandling.None ||
2889 whitespaceHandling == WhitespaceHandling.Significant && XmlSpace != XmlSpace.Preserve))
2890 return false;
2892 ClearValueBuffer ();
2893 valueBuffer.Append (peekChars, curNodePeekIndex, peekCharsIndex - curNodePeekIndex - startOffset);
2894 preserveCurrentTag = savePreserve;
2896 if (isText) {
2897 ReadText (false);
2898 } else {
2899 XmlNodeType nodeType = (this.XmlSpace == XmlSpace.Preserve) ?
2900 XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
2901 SetProperties (nodeType,
2902 String.Empty,
2903 String.Empty,
2904 String.Empty,
2905 false,
2906 null, // value: create only when required
2907 true);
2910 return true;
2913 // Returns -1 if it should throw an error.
2914 private int ReadCharsInternal (char [] buffer, int offset, int length)
2916 int bufIndex = offset;
2917 for (int i = 0; i < length; i++) {
2918 int c = PeekChar ();
2919 switch (c) {
2920 case -1:
2921 throw NotWFError ("Unexpected end of xml.");
2922 case '<':
2923 if (i + 1 == length)
2924 // if it does not end here,
2925 // it cannot store another
2926 // character, so stop here.
2927 return i;
2928 Advance (c);
2929 if (PeekChar () != '/') {
2930 nestLevel++;
2931 buffer [bufIndex++] = '<';
2932 continue;
2934 else if (nestLevel-- > 0) {
2935 buffer [bufIndex++] = '<';
2936 continue;
2938 // Seems to skip immediate EndElement
2939 Expect ('/');
2940 if (depthUp) {
2941 depth++;
2942 depthUp = false;
2944 ReadEndTag ();
2945 readCharsInProgress = false;
2946 Read (); // move to the next node
2947 return i;
2948 default:
2949 Advance (c);
2950 if (c < Char.MaxValue)
2951 buffer [bufIndex++] = (char) c;
2952 else {
2953 buffer [bufIndex++] = (char) ((c - 0x10000) / 0x400 + 0xD800);
2954 buffer [bufIndex++] = (char) ((c - 0x10000) % 0x400 + 0xDC00);
2956 break;
2959 return length;
2962 private bool ReadUntilEndTag ()
2964 if (Depth == 0)
2965 currentState = XmlNodeType.EndElement;
2966 int ch;
2967 do {
2968 ch = ReadChar ();
2969 switch (ch) {
2970 case -1:
2971 throw NotWFError ("Unexpected end of xml.");
2972 case '<':
2973 if (PeekChar () != '/') {
2974 nestLevel++;
2975 continue;
2977 else if (--nestLevel > 0)
2978 continue;
2979 ReadChar ();
2980 string name = ReadName ();
2981 if (name != elementNames [elementNameStackPos - 1].Name)
2982 continue;
2983 Expect ('>');
2984 depth--;
2985 return Read ();
2987 } while (true);
2989 #endregion