2010-06-21 Atsushi Enomoto <atsushi@ximian.com>
[mcs.git] / class / System.XML / System.Xml / XmlTextReader.cs
blobf9422aa1b27f6681612a1b01c99de58052764cab
1 //
2 // System.Xml.XmlTextReader
3 //
4 // Author:
5 // Jason Diamond (jason@injektilo.org)
6 // Adam Treat (manyoso@yahoo.com)
7 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
8 //
9 // (C) 2001, 2002 Jason Diamond http://injektilo.org/
10 // Copyright (C) 2005-2006 Novell, Inc (http://www.novell.com)
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 //#define USE_NAME_BUFFER
34 // Optimization TODOs:
36 // - support PushbackChar() which reverts one character read.
37 // - ReadTextReader() should always keep one pushback buffer
38 // as pushback safety net.
39 // - Replace (peek,read) * n -> read * n + pushback
42 using System;
43 using System.Collections;
44 #if NET_2_0
45 using System.Collections.Generic;
46 #endif
47 using System.Globalization;
48 using System.IO;
49 using System.Security.Permissions;
50 using System.Text;
51 using System.Xml.Schema;
52 using Mono.Xml;
54 #if NET_2_0
55 using System.Xml;
57 namespace Mono.Xml2
58 #else
59 namespace System.Xml
60 #endif
63 #if NET_2_0
64 internal class XmlTextReader : XmlReader,
65 IXmlLineInfo, IXmlNamespaceResolver, IHasXmlParserContext
66 #else
67 [PermissionSet (SecurityAction.InheritanceDemand, Unrestricted = true)]
68 public class XmlTextReader : XmlReader, IXmlLineInfo, IHasXmlParserContext
69 #endif
71 #region Constructors
73 protected XmlTextReader ()
77 public XmlTextReader (Stream input)
78 : this (new XmlStreamReader (input))
82 public XmlTextReader (string url)
83 : this(url, new NameTable ())
87 public XmlTextReader (TextReader input)
88 : this (input, new NameTable ())
92 protected XmlTextReader (XmlNameTable nt)
93 : this (String.Empty, null, XmlNodeType.None, null)
97 public XmlTextReader (Stream input, XmlNameTable nt)
98 : this(new XmlStreamReader (input), nt)
102 public XmlTextReader (string url, Stream input)
103 : this (url, new XmlStreamReader (input))
107 public XmlTextReader (string url, TextReader input)
108 : this (url, input, new NameTable ())
112 public XmlTextReader (string url, XmlNameTable nt)
114 string uriString;
115 Stream stream = GetStreamFromUrl (url, out uriString);
116 XmlParserContext ctx = new XmlParserContext (nt,
117 new XmlNamespaceManager (nt),
118 String.Empty,
119 XmlSpace.None);
120 this.InitializeContext (uriString, ctx, new XmlStreamReader (stream), XmlNodeType.Document);
123 public XmlTextReader (TextReader input, XmlNameTable nt)
124 : this (String.Empty, input, nt)
128 // This is used in XmlReader.Create() to indicate that string
129 // argument is uri, not an xml fragment.
130 internal XmlTextReader (bool dummy, XmlResolver resolver, string url, XmlNodeType fragType, XmlParserContext context)
132 if (resolver == null) {
133 #if MOONLIGHT
134 resolver = new XmlXapResolver ();
135 #else
136 resolver = new XmlUrlResolver ();
137 #endif
139 this.XmlResolver = resolver;
140 string uriString;
141 Stream stream = GetStreamFromUrl (url, out uriString);
142 this.InitializeContext (uriString, context, new XmlStreamReader (stream), fragType);
145 public XmlTextReader (Stream xmlFragment, XmlNodeType fragType, XmlParserContext context)
146 : this (context != null ? context.BaseURI : String.Empty,
147 new XmlStreamReader (xmlFragment),
148 fragType,
149 context)
151 disallowReset = true;
154 internal XmlTextReader (string baseURI, TextReader xmlFragment, XmlNodeType fragType)
155 : this (baseURI, xmlFragment, fragType, null)
159 public XmlTextReader (string url, Stream input, XmlNameTable nt)
160 : this (url, new XmlStreamReader (input), nt)
164 public XmlTextReader (string url, TextReader input, XmlNameTable nt)
165 : this (url, input, XmlNodeType.Document, null)
169 public XmlTextReader (string xmlFragment, XmlNodeType fragType, XmlParserContext context)
170 : this (context != null ? context.BaseURI : String.Empty,
171 new StringReader (xmlFragment),
172 fragType,
173 context)
175 disallowReset = true;
178 internal XmlTextReader (string url, TextReader fragment, XmlNodeType fragType, XmlParserContext context)
180 InitializeContext (url, context, fragment, fragType);
183 private Stream GetStreamFromUrl (string url, out string absoluteUriString)
185 #if NET_2_1
186 if (url == null)
187 throw new ArgumentNullException ("url");
188 if (url.Length == 0)
189 throw new ArgumentException ("url");
190 #endif
191 Uri uri = resolver.ResolveUri (null, url);
192 absoluteUriString = uri != null ? uri.ToString () : String.Empty;
193 return resolver.GetEntity (uri, null, typeof (Stream)) as Stream;
196 #endregion
198 #region Properties
200 public override int AttributeCount
202 get { return attributeCount; }
205 public override string BaseURI
207 get { return parserContext.BaseURI; }
210 #if NET_2_0
211 public override bool CanReadBinaryContent {
212 get { return true; }
215 public override bool CanReadValueChunk {
216 get { return true; }
218 #else
219 internal override bool CanReadBinaryContent {
220 get { return true; }
223 internal override bool CanReadValueChunk {
224 get { return true; }
226 #endif
228 internal bool CharacterChecking {
229 get { return checkCharacters; }
230 set { checkCharacters = value; }
233 // for XmlReaderSettings.CloseInput support
234 internal bool CloseInput {
235 get { return closeInput; }
236 set { closeInput = value; }
239 public override int Depth
241 get {
242 int nodeTypeMod = currentToken.NodeType == XmlNodeType.Element ? 0 : -1;
243 if (currentAttributeValue >= 0)
244 return nodeTypeMod + elementDepth + 2; // inside attribute value.
245 else if (currentAttribute >= 0)
246 return nodeTypeMod + elementDepth + 1;
247 return elementDepth;
251 public Encoding Encoding
253 get { return parserContext.Encoding; }
255 #if NET_2_0
256 public EntityHandling EntityHandling {
257 get { return entityHandling; }
258 set { entityHandling = value; }
260 #endif
262 public override bool EOF {
263 get { return readState == ReadState.EndOfFile; }
266 public override bool HasValue {
267 get { return cursorToken.Value != null; }
270 public override bool IsDefault {
271 // XmlTextReader does not expand default attributes.
272 get { return false; }
275 public override bool IsEmptyElement {
276 get { return cursorToken.IsEmptyElement; }
279 #if NET_2_0
280 #else
281 public override string this [int i] {
282 get { return GetAttribute (i); }
285 public override string this [string name] {
286 get { return GetAttribute (name); }
289 public override string this [string localName, string namespaceName] {
290 get { return GetAttribute (localName, namespaceName); }
292 #endif
294 public int LineNumber {
295 get {
296 if (useProceedingLineInfo)
297 return line;
298 else
299 return cursorToken.LineNumber;
303 public int LinePosition {
304 get {
305 if (useProceedingLineInfo)
306 return column;
307 else
308 return cursorToken.LinePosition;
312 public override string LocalName {
313 get { return cursorToken.LocalName; }
316 public override string Name {
317 get { return cursorToken.Name; }
320 public bool Namespaces {
321 get { return namespaces; }
322 set {
323 if (readState != ReadState.Initial)
324 throw new InvalidOperationException ("Namespaces have to be set before reading.");
325 namespaces = value;
329 public override string NamespaceURI {
330 get { return cursorToken.NamespaceURI; }
333 public override XmlNameTable NameTable {
334 get { return nameTable; }
337 public override XmlNodeType NodeType {
338 get { return cursorToken.NodeType; }
341 public bool Normalization {
342 get { return normalization; }
343 set { normalization = value; }
346 public override string Prefix {
347 get { return cursorToken.Prefix; }
350 public bool ProhibitDtd {
351 get { return prohibitDtd; }
352 set { prohibitDtd = value; }
355 public override char QuoteChar {
356 get { return cursorToken.QuoteChar; }
359 public override ReadState ReadState {
360 get { return readState; }
363 #if NET_2_0
364 public override XmlReaderSettings Settings {
365 get { return base.Settings; }
367 #endif
369 public override string Value {
370 get { return cursorToken.Value != null ? cursorToken.Value : String.Empty; }
373 public WhitespaceHandling WhitespaceHandling {
374 get { return whitespaceHandling; }
375 set { whitespaceHandling = value; }
378 public override string XmlLang {
379 get { return parserContext.XmlLang; }
382 public XmlResolver XmlResolver {
383 set { resolver = value; }
386 public override XmlSpace XmlSpace {
387 get { return parserContext.XmlSpace; }
390 #endregion
392 #region Methods
394 public override void Close ()
396 readState = ReadState.Closed;
398 cursorToken.Clear ();
399 currentToken.Clear ();
400 attributeCount = 0;
401 if (closeInput && reader != null)
402 reader.Close ();
405 public override string GetAttribute (int i)
407 if (i >= attributeCount)
408 throw new ArgumentOutOfRangeException ("i is smaller than AttributeCount");
409 else {
410 return attributeTokens [i].Value;
414 // MS.NET 1.0 msdn says that this method returns String.Empty
415 // for absent attribute, but in fact it returns null.
416 // This description is corrected in MS.NET 1.1 msdn.
417 public override string GetAttribute (string name)
419 for (int i = 0; i < attributeCount; i++)
420 if (attributeTokens [i].Name == name)
421 return attributeTokens [i].Value;
422 return null;
425 private int GetIndexOfQualifiedAttribute (string localName, string namespaceURI)
427 for (int i = 0; i < attributeCount; i++) {
428 XmlAttributeTokenInfo ti = attributeTokens [i];
429 if (ti.LocalName == localName && ti.NamespaceURI == namespaceURI)
430 return i;
432 return -1;
435 XmlParserContext IHasXmlParserContext.ParserContext {
436 get { return parserContext; }
439 public override string GetAttribute (string localName, string namespaceURI)
441 int idx = this.GetIndexOfQualifiedAttribute (localName, namespaceURI);
442 if (idx < 0)
443 return null;
444 return attributeTokens [idx].Value;
447 #if NET_2_0
448 public IDictionary<string, string> GetNamespacesInScope (XmlNamespaceScope scope)
450 return nsmgr.GetNamespacesInScope (scope);
453 IDictionary<string, string> IXmlNamespaceResolver.GetNamespacesInScope (XmlNamespaceScope scope)
455 return GetNamespacesInScope (scope);
457 #endif
459 public TextReader GetRemainder ()
461 if (peekCharsLength < 0)
462 return reader;
463 return new StringReader (new string (peekChars, peekCharsIndex, peekCharsLength - peekCharsIndex) + reader.ReadToEnd ());
466 #if NET_2_0
467 public bool HasLineInfo ()
468 #else
469 bool IXmlLineInfo.HasLineInfo ()
470 #endif
472 return true;
475 public override string LookupNamespace (string prefix)
477 return LookupNamespace (prefix, false);
480 private string LookupNamespace (string prefix, bool atomizedNames)
482 string s = nsmgr.LookupNamespace (
483 prefix, atomizedNames);
484 return s == String.Empty ? null : s;
487 #if NET_2_0
488 string IXmlNamespaceResolver.LookupPrefix (string ns)
490 return LookupPrefix (ns, false);
493 public string LookupPrefix (string ns, bool atomizedName)
495 return nsmgr.LookupPrefix (ns, atomizedName);
497 #endif
499 public override void MoveToAttribute (int i)
501 if (i >= attributeCount)
502 throw new ArgumentOutOfRangeException ("attribute index out of range.");
504 currentAttribute = i;
505 currentAttributeValue = -1;
506 cursorToken = attributeTokens [i];
509 public override bool MoveToAttribute (string name)
511 for (int i = 0; i < attributeCount; i++) {
512 XmlAttributeTokenInfo ti = attributeTokens [i];
513 if (ti.Name == name) {
514 MoveToAttribute (i);
515 return true;
518 return false;
521 public override bool MoveToAttribute (string localName, string namespaceName)
523 int idx = GetIndexOfQualifiedAttribute (localName, namespaceName);
524 if (idx < 0)
525 return false;
526 MoveToAttribute (idx);
527 return true;
530 public override bool MoveToElement ()
532 if (currentToken == null) // for attribute .ctor()
533 return false;
535 if (cursorToken == currentToken)
536 return false;
538 if (currentAttribute >= 0) {
539 currentAttribute = -1;
540 currentAttributeValue = -1;
541 cursorToken = currentToken;
542 return true;
544 else
545 return false;
548 public override bool MoveToFirstAttribute ()
550 if (attributeCount == 0)
551 return false;
552 MoveToElement ();
553 return MoveToNextAttribute ();
556 public override bool MoveToNextAttribute ()
558 if (currentAttribute == 0 && attributeCount == 0)
559 return false;
560 if (currentAttribute + 1 < attributeCount) {
561 currentAttribute++;
562 currentAttributeValue = -1;
563 cursorToken = attributeTokens [currentAttribute];
564 return true;
566 else
567 return false;
570 public override bool Read ()
572 if (readState == ReadState.Closed)
573 return false;
574 curNodePeekIndex = peekCharsIndex;
575 preserveCurrentTag = true;
576 nestLevel = 0;
577 ClearValueBuffer ();
579 if (startNodeType == XmlNodeType.Attribute) {
580 if (currentAttribute == 0)
581 return false; // already read.
582 SkipTextDeclaration ();
583 ClearAttributes ();
584 IncrementAttributeToken ();
585 ReadAttributeValueTokens ('"');
586 cursorToken = attributeTokens [0];
587 currentAttributeValue = -1;
588 readState = ReadState.Interactive;
589 return true;
591 if (readState == ReadState.Initial && currentState == XmlNodeType.Element)
592 SkipTextDeclaration ();
594 if (Binary != null)
595 Binary.Reset ();
597 bool more = false;
598 readState = ReadState.Interactive;
599 currentLinkedNodeLineNumber = line;
600 currentLinkedNodeLinePosition = column;
601 useProceedingLineInfo = true;
603 cursorToken = currentToken;
604 attributeCount = 0;
605 currentAttribute = currentAttributeValue = -1;
606 currentToken.Clear ();
608 // It was moved from end of ReadStartTag ().
609 if (depthUp) {
610 ++depth;
611 depthUp = false;
614 if (readCharsInProgress) {
615 readCharsInProgress = false;
616 return ReadUntilEndTag ();
619 more = ReadContent ();
621 if (!more && startNodeType == XmlNodeType.Document && currentState != XmlNodeType.EndElement)
622 throw NotWFError ("Document element did not appear.");
624 useProceedingLineInfo = false;
625 return more;
628 public override bool ReadAttributeValue ()
630 if (readState == ReadState.Initial && startNodeType == XmlNodeType.Attribute) {
631 Read ();
634 if (currentAttribute < 0)
635 return false;
636 XmlAttributeTokenInfo ti = attributeTokens [currentAttribute];
637 if (currentAttributeValue < 0)
638 currentAttributeValue = ti.ValueTokenStartIndex - 1;
640 if (currentAttributeValue < ti.ValueTokenEndIndex) {
641 currentAttributeValue++;
642 cursorToken = attributeValueTokens [currentAttributeValue];
643 return true;
645 else
646 return false;
649 public int ReadBase64 (byte [] buffer, int offset, int length)
651 BinaryCharGetter = binaryCharGetter;
652 try {
653 return Binary.ReadBase64 (buffer, offset, length);
654 } finally {
655 BinaryCharGetter = null;
659 public int ReadBinHex (byte [] buffer, int offset, int length)
661 BinaryCharGetter = binaryCharGetter;
662 try {
663 return Binary.ReadBinHex (buffer, offset, length);
664 } finally {
665 BinaryCharGetter = null;
669 public int ReadChars (char [] buffer, int offset, int length)
671 if (offset < 0) {
672 throw new ArgumentOutOfRangeException (
673 #if !NET_2_1
674 "offset", offset,
675 #endif
676 "Offset must be non-negative integer.");
678 } else if (length < 0) {
679 throw new ArgumentOutOfRangeException (
680 #if !NET_2_1
681 "length", length,
682 #endif
683 "Length must be non-negative integer.");
685 } else if (buffer.Length < offset + length)
686 throw new ArgumentOutOfRangeException ("buffer length is smaller than the sum of offset and length.");
688 if (IsEmptyElement) {
689 Read ();
690 return 0;
693 if (!readCharsInProgress && NodeType != XmlNodeType.Element)
694 return 0;
696 preserveCurrentTag = false;
697 readCharsInProgress = true;
698 useProceedingLineInfo = true;
700 return ReadCharsInternal (buffer, offset, length);
703 public void ResetState ()
705 if (disallowReset)
706 throw new InvalidOperationException ("Cannot call ResetState when parsing an XML fragment.");
707 Clear ();
710 public override void ResolveEntity ()
712 // XmlTextReader does not resolve entities.
713 throw new InvalidOperationException ("XmlTextReader cannot resolve external entities.");
716 #if NET_2_0
717 [MonoTODO] // FIXME: Implement, for performance improvement
718 public override void Skip ()
720 base.Skip ();
722 #endif
723 #endregion
725 #region Internals
726 // Parsed DTD Objects
727 // Note that thgis property must be kept since dtd2xsd uses it.
728 internal DTDObjectModel DTD {
729 get { return parserContext.Dtd; }
732 internal XmlResolver Resolver {
733 get { return resolver; }
735 #endregion
737 #region Privates
738 internal class XmlTokenInfo
740 public XmlTokenInfo (XmlTextReader xtr)
742 Reader = xtr;
743 Clear ();
746 string valueCache;
748 protected XmlTextReader Reader;
750 public string Name;
751 public string LocalName;
752 public string Prefix;
753 public string NamespaceURI;
754 public bool IsEmptyElement;
755 public char QuoteChar;
756 public int LineNumber;
757 public int LinePosition;
758 public int ValueBufferStart;
759 public int ValueBufferEnd;
761 public XmlNodeType NodeType;
763 public virtual string Value {
764 get {
765 if (valueCache != null)
766 return valueCache;
767 if (ValueBufferStart >= 0) {
768 //Console.WriteLine (NodeType + " / " + ValueBuffer.Length + " / " + ValueBufferStart + " / " + ValueBufferEnd);
769 valueCache = Reader.valueBuffer.ToString (ValueBufferStart, ValueBufferEnd - ValueBufferStart);
770 return valueCache;
772 switch (NodeType) {
773 case XmlNodeType.Text:
774 case XmlNodeType.SignificantWhitespace:
775 case XmlNodeType.Whitespace:
776 case XmlNodeType.Comment:
777 case XmlNodeType.CDATA:
778 case XmlNodeType.ProcessingInstruction:
779 valueCache = Reader.CreateValueString ();
780 return valueCache;
782 return null;
784 set { valueCache = value; }
787 public virtual void Clear ()
789 ValueBufferStart = -1;
790 valueCache = null;
791 NodeType = XmlNodeType.None;
792 Name = LocalName = Prefix = NamespaceURI = String.Empty;
793 IsEmptyElement = false;
794 QuoteChar = '"';
795 LineNumber = LinePosition = 0;
799 internal class XmlAttributeTokenInfo : XmlTokenInfo
801 public XmlAttributeTokenInfo (XmlTextReader reader)
802 : base (reader)
804 NodeType = XmlNodeType.Attribute;
807 public int ValueTokenStartIndex;
808 public int ValueTokenEndIndex;
809 string valueCache;
810 StringBuilder tmpBuilder = new StringBuilder ();
812 public override string Value {
813 get {
814 if (valueCache != null)
815 return valueCache;
817 // An empty value should return String.Empty.
818 if (ValueTokenStartIndex == ValueTokenEndIndex) {
819 XmlTokenInfo ti = Reader.attributeValueTokens [ValueTokenStartIndex];
820 if (ti.NodeType == XmlNodeType.EntityReference)
821 valueCache = String.Concat ("&", ti.Name, ";");
822 else
823 valueCache = ti.Value;
824 return valueCache;
827 tmpBuilder.Length = 0;
828 for (int i = ValueTokenStartIndex; i <= ValueTokenEndIndex; i++) {
829 XmlTokenInfo ti = Reader.attributeValueTokens [i];
830 if (ti.NodeType == XmlNodeType.Text)
831 tmpBuilder.Append (ti.Value);
832 else {
833 tmpBuilder.Append ('&');
834 tmpBuilder.Append (ti.Name);
835 tmpBuilder.Append (';');
839 valueCache = tmpBuilder.ToString (0, tmpBuilder.Length);
840 return valueCache;
843 set { valueCache = value; }
846 public override void Clear ()
848 base.Clear ();
849 valueCache = null;
850 NodeType = XmlNodeType.Attribute;
851 ValueTokenStartIndex = ValueTokenEndIndex = 0;
854 internal void FillXmlns ()
856 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns))
857 Reader.nsmgr.AddNamespace (LocalName, Value);
858 else if (Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
859 Reader.nsmgr.AddNamespace (String.Empty, Value);
862 internal void FillNamespace ()
864 if (Object.ReferenceEquals (Prefix, XmlNamespaceManager.PrefixXmlns) ||
865 Object.ReferenceEquals (Name, XmlNamespaceManager.PrefixXmlns))
866 NamespaceURI = XmlNamespaceManager.XmlnsXmlns;
867 else if (Prefix.Length == 0)
868 NamespaceURI = string.Empty;
869 else
870 NamespaceURI = Reader.LookupNamespace (Prefix, true);
874 private XmlTokenInfo cursorToken;
875 private XmlTokenInfo currentToken;
876 private XmlAttributeTokenInfo currentAttributeToken;
877 private XmlTokenInfo currentAttributeValueToken;
878 private XmlAttributeTokenInfo [] attributeTokens = new XmlAttributeTokenInfo [10];
879 private XmlTokenInfo [] attributeValueTokens = new XmlTokenInfo [10];
880 private int currentAttribute;
881 private int currentAttributeValue;
882 private int attributeCount;
884 private XmlParserContext parserContext;
885 private XmlNameTable nameTable;
886 private XmlNamespaceManager nsmgr;
888 private ReadState readState;
889 private bool disallowReset;
891 private int depth;
892 private int elementDepth;
893 private bool depthUp;
895 private bool popScope;
897 struct TagName
899 public TagName (string n, string l, string p)
901 Name = n;
902 LocalName = l;
903 Prefix = p;
906 public readonly string Name;
907 public readonly string LocalName;
908 public readonly string Prefix;
911 private TagName [] elementNames;
912 int elementNameStackPos;
914 private bool allowMultipleRoot;
916 private bool isStandalone;
918 private bool returnEntityReference;
919 private string entityReferenceName;
921 #if USE_NAME_BUFFER
922 private char [] nameBuffer;
923 private int nameLength;
924 private int nameCapacity;
925 private const int initialNameCapacity = 32;
926 #endif
928 private StringBuilder valueBuffer;
930 private TextReader reader;
931 private char [] peekChars;
932 private int peekCharsIndex;
933 private int peekCharsLength;
934 private int curNodePeekIndex;
935 private bool preserveCurrentTag;
936 private const int peekCharCapacity = 1024;
938 private int line;
939 private int column;
941 private int currentLinkedNodeLineNumber;
942 private int currentLinkedNodeLinePosition;
943 private bool useProceedingLineInfo;
945 private XmlNodeType startNodeType;
946 // State machine attribute.
947 // XmlDeclaration: after the first node.
948 // DocumentType: after doctypedecl
949 // Element: inside document element
950 // EndElement: after document element
951 private XmlNodeType currentState;
953 // For ReadChars()/ReadBase64()/ReadBinHex()
954 private int nestLevel;
955 private bool readCharsInProgress;
956 XmlReaderBinarySupport.CharGetter binaryCharGetter;
958 // These values are never re-initialized.
959 private bool namespaces = true;
960 private WhitespaceHandling whitespaceHandling = WhitespaceHandling.All;
961 #if MOONLIGHT
962 private XmlResolver resolver = new XmlXapResolver ();
963 #else
964 private XmlResolver resolver = new XmlUrlResolver ();
965 #endif
966 private bool normalization = false;
968 private bool checkCharacters;
969 private bool prohibitDtd = false;
970 private bool closeInput = true;
971 private EntityHandling entityHandling; // 2.0
973 private NameTable whitespacePool;
974 private char [] whitespaceCache;
976 private XmlException NotWFError (string message)
978 return new XmlException (this as IXmlLineInfo, BaseURI, message);
981 private void Init ()
983 allowMultipleRoot = false;
984 elementNames = new TagName [10];
985 valueBuffer = new StringBuilder ();
986 binaryCharGetter = new XmlReaderBinarySupport.CharGetter (ReadChars);
987 #if USE_NAME_BUFFER
988 nameBuffer = new char [initialNameCapacity];
989 #endif
991 checkCharacters = true;
992 #if NET_2_0
993 if (Settings != null)
994 checkCharacters = Settings.CheckCharacters;
995 #endif
996 prohibitDtd = false;
997 closeInput = true;
998 entityHandling = EntityHandling.ExpandCharEntities;
1000 peekCharsIndex = 0;
1001 if (peekChars == null)
1002 peekChars = new char [peekCharCapacity];
1003 peekCharsLength = -1;
1004 curNodePeekIndex = -1; // read from start
1006 line = 1;
1007 column = 1;
1009 currentLinkedNodeLineNumber = currentLinkedNodeLinePosition = 0;
1011 Clear ();
1014 private void Clear ()
1016 currentToken = new XmlTokenInfo (this);
1017 cursorToken = currentToken;
1018 currentAttribute = -1;
1019 currentAttributeValue = -1;
1020 attributeCount = 0;
1022 readState = ReadState.Initial;
1024 depth = 0;
1025 elementDepth = 0;
1026 depthUp = false;
1028 popScope = allowMultipleRoot = false;
1029 elementNameStackPos = 0;
1031 isStandalone = false;
1032 returnEntityReference = false;
1033 entityReferenceName = String.Empty;
1035 #if USE_NAME_BUFFER
1036 nameLength = 0;
1037 nameCapacity = initialNameCapacity;
1038 #endif
1039 useProceedingLineInfo = false;
1041 currentState = XmlNodeType.None;
1043 readCharsInProgress = false;
1046 private void InitializeContext (string url, XmlParserContext context, TextReader fragment, XmlNodeType fragType)
1048 startNodeType = fragType;
1049 parserContext = context;
1050 if (context == null) {
1051 XmlNameTable nt = new NameTable ();
1052 parserContext = new XmlParserContext (nt,
1053 new XmlNamespaceManager (nt),
1054 String.Empty,
1055 XmlSpace.None);
1057 nameTable = parserContext.NameTable;
1058 nameTable = nameTable != null ? nameTable : new NameTable ();
1059 nsmgr = parserContext.NamespaceManager;
1060 nsmgr = nsmgr != null ? nsmgr : new XmlNamespaceManager (nameTable);
1062 if (url != null && url.Length > 0) {
1063 #if NET_2_1
1064 Uri uri = new Uri (url, UriKind.RelativeOrAbsolute);
1065 #else
1066 Uri uri = null;
1067 try {
1068 #if NET_2_0
1069 uri = new Uri (url, UriKind.RelativeOrAbsolute);
1070 #else
1071 uri = new Uri (url);
1072 #endif
1073 } catch (Exception) {
1074 string path = Path.GetFullPath ("./a");
1075 uri = new Uri (new Uri (path), url);
1077 #endif
1078 parserContext.BaseURI = uri.ToString ();
1081 Init ();
1083 reader = fragment;
1085 switch (fragType) {
1086 case XmlNodeType.Attribute:
1087 reader = new StringReader (fragment.ReadToEnd ().Replace ("\"", "&quot;"));
1088 break;
1089 case XmlNodeType.Element:
1090 currentState = XmlNodeType.Element;
1091 allowMultipleRoot = true;
1092 break;
1093 case XmlNodeType.Document:
1094 break;
1095 default:
1096 throw new XmlException (String.Format ("NodeType {0} is not allowed to create XmlTextReader.", fragType));
1100 #if NET_2_0
1101 internal ConformanceLevel Conformance {
1102 get { return allowMultipleRoot ? ConformanceLevel.Fragment : ConformanceLevel.Document; }
1103 set {
1104 if (value == ConformanceLevel.Fragment) {
1105 currentState = XmlNodeType.Element;
1106 allowMultipleRoot = true;
1111 internal void AdjustLineInfoOffset (int lineNumberOffset, int linePositionOffset)
1113 line += lineNumberOffset;
1114 column += linePositionOffset;
1117 internal void SetNameTable (XmlNameTable nameTable)
1119 parserContext.NameTable = nameTable;
1121 #endif
1123 // Use this method rather than setting the properties
1124 // directly so that all the necessary properties can
1125 // be changed in harmony with each other. Maybe the
1126 // fields should be in a seperate class to help enforce
1127 // this.
1129 // Namespace URI could not be provided here.
1130 private void SetProperties (
1131 XmlNodeType nodeType,
1132 string name,
1133 string prefix,
1134 string localName,
1135 bool isEmptyElement,
1136 string value,
1137 bool clearAttributes)
1139 SetTokenProperties (currentToken, nodeType, name, prefix, localName, isEmptyElement, value, clearAttributes);
1140 currentToken.LineNumber = this.currentLinkedNodeLineNumber;
1141 currentToken.LinePosition = this.currentLinkedNodeLinePosition;
1144 private void SetTokenProperties (
1145 XmlTokenInfo token,
1146 XmlNodeType nodeType,
1147 string name,
1148 string prefix,
1149 string localName,
1150 bool isEmptyElement,
1151 string value,
1152 bool clearAttributes)
1154 token.NodeType = nodeType;
1155 token.Name = name;
1156 token.Prefix = prefix;
1157 token.LocalName = localName;
1158 token.IsEmptyElement = isEmptyElement;
1159 token.Value = value;
1160 this.elementDepth = depth;
1162 if (clearAttributes)
1163 ClearAttributes ();
1166 private void ClearAttributes ()
1168 //for (int i = 0; i < attributeCount; i++)
1169 // attributeTokens [i].Clear ();
1170 attributeCount = 0;
1171 currentAttribute = -1;
1172 currentAttributeValue = -1;
1175 private int PeekSurrogate (int c)
1177 if (peekCharsLength <= peekCharsIndex + 1) {
1178 if (!ReadTextReader (c))
1179 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1180 return c;
1183 int highhalfChar = peekChars [peekCharsIndex];
1184 int lowhalfChar = peekChars [peekCharsIndex+1];
1186 if (((highhalfChar & 0xFC00) != 0xD800) || ((lowhalfChar & 0xFC00) != 0xDC00))
1187 //FIXME: copy MS.NET behaviour when unpaired surrogate found
1188 return highhalfChar;
1189 return 0x10000 + (highhalfChar-0xD800)*0x400 + (lowhalfChar-0xDC00);
1192 private int PeekChar ()
1194 if (peekCharsIndex < peekCharsLength) {
1195 int c = peekChars [peekCharsIndex];
1196 if (c == 0)
1197 return -1;
1198 if (c < 0xD800 || c >= 0xDFFF)
1199 return c;
1200 return PeekSurrogate (c);
1201 } else {
1202 if (!ReadTextReader (-1))
1203 return -1;
1204 return PeekChar ();
1208 private int ReadChar ()
1210 int ch = PeekChar ();
1211 peekCharsIndex++;
1213 if (ch >= 0x10000)
1214 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1216 if (ch == '\n') {
1217 line++;
1218 column = 1;
1219 } else if (ch != -1) {
1220 column++;
1222 return ch;
1225 private void Advance (int ch) {
1226 peekCharsIndex++;
1228 if (ch >= 0x10000)
1229 peekCharsIndex++; //Increment by 2 when a compound UCS-4 character was found
1231 if (ch == '\n') {
1232 line++;
1233 column = 1;
1234 } else if (ch != -1) {
1235 column++;
1239 private bool ReadTextReader (int remained)
1241 if (peekCharsLength < 0) { // initialized buffer
1242 peekCharsLength = reader.Read (peekChars, 0, peekChars.Length);
1243 return peekCharsLength > 0;
1245 int offset = remained >= 0 ? 1 : 0;
1246 int copysize = peekCharsLength - curNodePeekIndex;
1248 // It must assure that current tag content always exists
1249 // in peekChars.
1250 if (!preserveCurrentTag) {
1251 curNodePeekIndex = 0;
1252 peekCharsIndex = 0;
1253 //copysize = 0;
1254 } else if (peekCharsLength < peekChars.Length) {
1255 // NonBlockingStreamReader returned less bytes
1256 // than the size of the buffer. In that case,
1257 // just refill the buffer.
1258 } else if (curNodePeekIndex <= (peekCharsLength >> 1)) {
1259 // extend the buffer
1260 char [] tmp = new char [peekChars.Length * 2];
1261 Array.Copy (peekChars, curNodePeekIndex,
1262 tmp, 0, copysize);
1263 peekChars = tmp;
1264 curNodePeekIndex = 0;
1265 peekCharsIndex = copysize;
1266 } else {
1267 Array.Copy (peekChars, curNodePeekIndex,
1268 peekChars, 0, copysize);
1269 curNodePeekIndex = 0;
1270 peekCharsIndex = copysize;
1272 if (remained >= 0)
1273 peekChars [peekCharsIndex] = (char) remained;
1274 int count = peekChars.Length - peekCharsIndex - offset;
1275 if (count > peekCharCapacity)
1276 count = peekCharCapacity;
1277 int read = reader.Read (
1278 peekChars, peekCharsIndex + offset, count);
1279 int remainingSize = offset + read;
1280 peekCharsLength = peekCharsIndex + remainingSize;
1282 return (remainingSize != 0);
1285 private bool ReadContent ()
1287 if (popScope) {
1288 nsmgr.PopScope ();
1289 parserContext.PopScope ();
1290 popScope = false;
1293 if (returnEntityReference)
1294 SetEntityReferenceProperties ();
1295 else {
1296 int c = PeekChar ();
1297 if (c == -1) {
1298 readState = ReadState.EndOfFile;
1299 ClearValueBuffer ();
1300 SetProperties (
1301 XmlNodeType.None, // nodeType
1302 String.Empty, // name
1303 String.Empty, // prefix
1304 String.Empty, // localName
1305 false, // isEmptyElement
1306 null, // value
1307 true // clearAttributes
1309 if (depth > 0)
1310 throw NotWFError ("unexpected end of file. Current depth is " + depth);
1312 return false;
1313 } else {
1314 switch (c) {
1315 case '<':
1316 Advance (c);
1317 switch (PeekChar ())
1319 case '/':
1320 Advance ('/');
1321 ReadEndTag ();
1322 break;
1323 case '?':
1324 Advance ('?');
1325 ReadProcessingInstruction ();
1326 break;
1327 case '!':
1328 Advance ('!');
1329 ReadDeclaration ();
1330 break;
1331 default:
1332 ReadStartTag ();
1333 break;
1335 break;
1336 case '\r':
1337 case '\n':
1338 case '\t':
1339 case ' ':
1340 if (!ReadWhitespace ())
1341 // skip
1342 return ReadContent ();
1343 break;
1344 default:
1345 ReadText (true);
1346 break;
1350 return this.ReadState != ReadState.EndOfFile;
1353 private void SetEntityReferenceProperties ()
1355 DTDEntityDeclaration decl = DTD != null ? DTD.EntityDecls [entityReferenceName] : null;
1356 if (this.isStandalone)
1357 if (DTD == null || decl == null || !decl.IsInternalSubset)
1358 throw NotWFError ("Standalone document must not contain any references to an non-internally declared entity.");
1359 if (decl != null && decl.NotationName != null)
1360 throw NotWFError ("Reference to any unparsed entities is not allowed here.");
1362 ClearValueBuffer ();
1363 SetProperties (
1364 XmlNodeType.EntityReference, // nodeType
1365 entityReferenceName, // name
1366 String.Empty, // prefix
1367 entityReferenceName, // localName
1368 false, // isEmptyElement
1369 null, // value
1370 true // clearAttributes
1373 returnEntityReference = false;
1374 entityReferenceName = String.Empty;
1377 // The leading '<' has already been consumed.
1378 private void ReadStartTag ()
1380 if (currentState == XmlNodeType.EndElement)
1381 throw NotWFError ("Multiple document element was detected.");
1382 currentState = XmlNodeType.Element;
1384 nsmgr.PushScope ();
1386 currentLinkedNodeLineNumber = line;
1387 currentLinkedNodeLinePosition = column;
1389 string prefix, localName;
1390 string name = ReadName (out prefix, out localName);
1391 if (currentState == XmlNodeType.EndElement)
1392 throw NotWFError ("document has terminated, cannot open new element");
1394 bool isEmptyElement = false;
1396 ClearAttributes ();
1398 SkipWhitespace ();
1399 if (XmlChar.IsFirstNameChar (PeekChar ()))
1400 ReadAttributes (false);
1401 cursorToken = this.currentToken;
1403 // fill namespaces
1404 for (int i = 0; i < attributeCount; i++)
1405 attributeTokens [i].FillXmlns ();
1406 for (int i = 0; i < attributeCount; i++)
1407 attributeTokens [i].FillNamespace ();
1409 // quick name check
1410 if (namespaces)
1411 for (int i = 0; i < attributeCount; i++)
1412 if (attributeTokens [i].Prefix == "xmlns" &&
1413 attributeTokens [i].Value == String.Empty)
1414 throw NotWFError ("Empty namespace URI cannot be mapped to non-empty prefix.");
1416 for (int i = 0; i < attributeCount; i++) {
1417 for (int j = i + 1; j < attributeCount; j++)
1418 if (Object.ReferenceEquals (attributeTokens [i].Name, attributeTokens [j].Name) ||
1419 (Object.ReferenceEquals (attributeTokens [i].LocalName, attributeTokens [j].LocalName) &&
1420 Object.ReferenceEquals (attributeTokens [i].NamespaceURI, attributeTokens [j].NamespaceURI)))
1421 throw NotWFError ("Attribute name and qualified name must be identical.");
1424 if (PeekChar () == '/') {
1425 Advance ('/');
1426 isEmptyElement = true;
1427 popScope = true;
1429 else {
1430 depthUp = true;
1431 PushElementName (name, localName, prefix);
1433 parserContext.PushScope ();
1435 Expect ('>');
1437 SetProperties (
1438 XmlNodeType.Element, // nodeType
1439 name, // name
1440 prefix, // prefix
1441 localName, // name
1442 isEmptyElement, // isEmptyElement
1443 null, // value
1444 false // clearAttributes
1446 if (prefix.Length > 0)
1447 currentToken.NamespaceURI = LookupNamespace (prefix, true);
1448 else if (namespaces)
1449 currentToken.NamespaceURI = nsmgr.DefaultNamespace;
1451 if (namespaces) {
1452 if (NamespaceURI == null)
1453 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1454 try {
1455 for (int i = 0; i < attributeCount; i++) {
1456 MoveToAttribute (i);
1457 if (NamespaceURI == null)
1458 throw NotWFError (String.Format ("'{0}' is undeclared namespace.", Prefix));
1460 } finally {
1461 MoveToElement ();
1465 for (int i = 0; i < attributeCount; i++) {
1466 if (!Object.ReferenceEquals (attributeTokens [i].Prefix, XmlNamespaceManager.PrefixXml))
1467 continue;
1468 string aname = attributeTokens [i].LocalName;
1469 string value = attributeTokens [i].Value;
1470 switch (aname) {
1471 case "base":
1472 if (this.resolver != null) {
1473 Uri buri =
1474 BaseURI != String.Empty ?
1475 new Uri (BaseURI) : null;
1476 // xml:base="" without any base URI -> pointless. However there are
1477 // some people who use such xml:base. Seealso bug #608391.
1478 if (buri == null && String.IsNullOrEmpty (value))
1479 break;
1480 Uri uri = resolver.ResolveUri (
1481 buri, value);
1482 parserContext.BaseURI =
1483 uri != null ?
1484 uri.ToString () :
1485 String.Empty;
1487 else
1488 parserContext.BaseURI = value;
1489 break;
1490 case "lang":
1491 parserContext.XmlLang = value;
1492 break;
1493 case "space":
1494 switch (value) {
1495 case "preserve":
1496 parserContext.XmlSpace = XmlSpace.Preserve;
1497 break;
1498 case "default":
1499 parserContext.XmlSpace = XmlSpace.Default;
1500 break;
1501 default:
1502 throw NotWFError (String.Format ("Invalid xml:space value: {0}", value));
1504 break;
1508 if (IsEmptyElement)
1509 CheckCurrentStateUpdate ();
1512 private void PushElementName (string name, string local, string prefix)
1514 if (elementNames.Length == elementNameStackPos) {
1515 TagName [] newArray = new TagName [elementNames.Length * 2];
1516 Array.Copy (elementNames, 0, newArray, 0, elementNameStackPos);
1517 elementNames = newArray;
1519 elementNames [elementNameStackPos++] =
1520 new TagName (name, local, prefix);
1523 // The reader is positioned on the first character
1524 // of the element's name.
1525 private void ReadEndTag ()
1527 if (currentState != XmlNodeType.Element)
1528 throw NotWFError ("End tag cannot appear in this state.");
1530 currentLinkedNodeLineNumber = line;
1531 currentLinkedNodeLinePosition = column;
1533 if (elementNameStackPos == 0)
1534 throw NotWFError ("closing element without matching opening element");
1535 TagName expected = elementNames [--elementNameStackPos];
1536 Expect (expected.Name);
1538 ExpectAfterWhitespace ('>');
1540 --depth;
1542 SetProperties (
1543 XmlNodeType.EndElement, // nodeType
1544 expected.Name, // name
1545 expected.Prefix, // prefix
1546 expected.LocalName, // localName
1547 false, // isEmptyElement
1548 null, // value
1549 true // clearAttributes
1551 if (expected.Prefix.Length > 0)
1552 currentToken.NamespaceURI = LookupNamespace (expected.Prefix, true);
1553 else if (namespaces)
1554 currentToken.NamespaceURI = nsmgr.DefaultNamespace;
1556 popScope = true;
1558 CheckCurrentStateUpdate ();
1561 private void CheckCurrentStateUpdate ()
1563 if (depth == 0 && !allowMultipleRoot && (IsEmptyElement || NodeType == XmlNodeType.EndElement))
1564 currentState = XmlNodeType.EndElement;
1567 #if USE_NAME_BUFFER
1568 private void AppendSurrogatePairNameChar (int ch)
1570 nameBuffer [nameLength++] = (char) ((ch - 0x10000) / 0x400 + 0xD800);
1571 if (nameLength == nameCapacity)
1572 ExpandNameCapacity ();
1573 nameBuffer [nameLength++] = (char) ((ch - 0x10000) % 0x400 + 0xDC00);
1576 private void ExpandNameCapacity ()
1578 nameCapacity = nameCapacity * 2;
1579 char [] oldNameBuffer = nameBuffer;
1580 nameBuffer = new char [nameCapacity];
1581 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1583 #endif
1585 private void AppendValueChar (int ch)
1587 if (ch <= Char.MaxValue)
1588 valueBuffer.Append ((char) ch);
1589 else
1590 AppendSurrogatePairValueChar (ch);
1593 private void AppendSurrogatePairValueChar (int ch)
1595 valueBuffer.Append ((char) ((ch - 0x10000) / 0x400 + 0xD800));
1596 valueBuffer.Append ((char) ((ch - 0x10000) % 0x400 + 0xDC00));
1599 private string CreateValueString ()
1601 // Since whitespace strings are mostly identical
1602 // depending on the Depth, we make use of NameTable
1603 // to atomize whitespace strings.
1604 switch (NodeType) {
1605 case XmlNodeType.Whitespace:
1606 case XmlNodeType.SignificantWhitespace:
1607 int len = valueBuffer.Length;
1608 if (whitespaceCache == null)
1609 whitespaceCache = new char [32];
1610 if (len >= whitespaceCache.Length)
1611 break;
1612 if (whitespacePool == null)
1613 whitespacePool = new NameTable ();
1614 #if NET_2_0 && !NET_2_1
1615 valueBuffer.CopyTo (0, whitespaceCache, 0, len);
1616 #else
1617 for (int i = 0; i < len; i++)
1618 whitespaceCache [i] = valueBuffer [i];
1619 #endif
1620 return whitespacePool.Add (whitespaceCache, 0, valueBuffer.Length);
1622 return (valueBuffer.Capacity < 100) ?
1623 valueBuffer.ToString (0, valueBuffer.Length) :
1624 valueBuffer.ToString ();
1627 private void ClearValueBuffer ()
1629 valueBuffer.Length = 0;
1632 // The reader is positioned on the first character
1633 // of the text.
1634 private void ReadText (bool notWhitespace)
1636 if (currentState != XmlNodeType.Element)
1637 throw NotWFError ("Text node cannot appear in this state.");
1638 preserveCurrentTag = false;
1640 if (notWhitespace)
1641 ClearValueBuffer ();
1643 int ch = PeekChar ();
1644 bool previousWasCloseBracket = false;
1646 while (ch != '<' && ch != -1) {
1647 if (ch == '&') {
1648 ReadChar ();
1649 ch = ReadReference (false);
1650 if (returnEntityReference) // Returns -1 if char validation should not be done
1651 break;
1652 } else if (normalization && ch == '\r') {
1653 ReadChar ();
1654 ch = PeekChar ();
1655 if (ch != '\n')
1656 // append '\n' instead of '\r'.
1657 AppendValueChar ('\n');
1658 // and in case of "\r\n", discard '\r'.
1659 continue;
1660 } else {
1661 if (CharacterChecking && XmlChar.IsInvalid (ch))
1662 throw NotWFError ("Not allowed character was found.");
1663 ch = ReadChar ();
1666 // FIXME: it might be optimized by the JIT later,
1667 // AppendValueChar (ch);
1669 if (ch <= Char.MaxValue)
1670 valueBuffer.Append ((char) ch);
1671 else
1672 AppendSurrogatePairValueChar (ch);
1675 // Block "]]>"
1676 if (ch == ']') {
1677 if (previousWasCloseBracket)
1678 if (PeekChar () == '>')
1679 throw NotWFError ("Inside text content, character sequence ']]>' is not allowed.");
1680 previousWasCloseBracket = true;
1682 else if (previousWasCloseBracket)
1683 previousWasCloseBracket = false;
1684 ch = PeekChar ();
1685 notWhitespace = true;
1688 if (returnEntityReference && valueBuffer.Length == 0) {
1689 SetEntityReferenceProperties ();
1690 } else {
1691 XmlNodeType nodeType = notWhitespace ? XmlNodeType.Text :
1692 this.XmlSpace == XmlSpace.Preserve ? XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
1693 SetProperties (
1694 nodeType, // nodeType
1695 String.Empty, // name
1696 String.Empty, // prefix
1697 String.Empty, // localName
1698 false, // isEmptyElement
1699 null, // value: create only when required
1700 true // clearAttributes
1705 // The leading '&' has already been consumed.
1706 // Returns true if the entity reference isn't a simple
1707 // character reference or one of the predefined entities.
1708 // This allows the ReadText method to break so that the
1709 // next call to Read will return the EntityReference node.
1710 private int ReadReference (bool ignoreEntityReferences)
1712 if (PeekChar () == '#') {
1713 Advance ('#');
1714 return ReadCharacterReference ();
1715 } else
1716 return ReadEntityReference (ignoreEntityReferences);
1719 private int ReadCharacterReference ()
1721 int value = 0;
1722 int ch;
1724 if (PeekChar () == 'x') {
1725 Advance ('x');
1727 while ((ch = PeekChar ()) != ';' && ch != -1) {
1728 Advance (ch);
1730 if (ch >= '0' && ch <= '9')
1731 value = (value << 4) + ch - '0';
1732 else if (ch >= 'A' && ch <= 'F')
1733 value = (value << 4) + ch - 'A' + 10;
1734 else if (ch >= 'a' && ch <= 'f')
1735 value = (value << 4) + ch - 'a' + 10;
1736 else
1737 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1738 "invalid hexadecimal digit: {0} (#x{1:X})",
1739 (char) ch,
1740 ch));
1742 } else {
1743 while ((ch = PeekChar ()) != ';' && ch != -1) {
1744 Advance (ch);
1746 if (ch >= '0' && ch <= '9')
1747 value = value * 10 + ch - '0';
1748 else
1749 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1750 "invalid decimal digit: {0} (#x{1:X})",
1751 (char) ch,
1752 ch));
1756 ReadChar (); // ';'
1758 // There is no way to save surrogate pairs...
1759 if (CharacterChecking && Normalization &&
1760 XmlChar.IsInvalid (value))
1761 throw NotWFError ("Referenced character was not allowed in XML. Normalization is " + normalization + ", checkCharacters = " + checkCharacters);
1762 return value;
1765 // Returns -1 if it should not be validated.
1766 // Real EOF must not be detected here.
1767 private int ReadEntityReference (bool ignoreEntityReferences)
1769 string name = ReadName ();
1770 Expect (';');
1772 int predefined = XmlChar.GetPredefinedEntity (name);
1773 if (predefined >= 0)
1774 return predefined;
1775 else {
1776 if (ignoreEntityReferences) {
1777 AppendValueChar ('&');
1778 for (int i = 0; i < name.Length; i++)
1779 AppendValueChar (name [i]);
1780 AppendValueChar (';');
1781 } else {
1782 returnEntityReference = true;
1783 entityReferenceName = name;
1786 return -1;
1789 // The reader is positioned on the first character of
1790 // the attribute name.
1791 private void ReadAttributes (bool isXmlDecl)
1793 int peekChar = -1;
1794 bool requireWhitespace = false;
1795 currentAttribute = -1;
1796 currentAttributeValue = -1;
1798 do {
1799 if (!SkipWhitespace () && requireWhitespace)
1800 throw NotWFError ("Unexpected token. Name is required here.");
1802 IncrementAttributeToken ();
1803 currentAttributeToken.LineNumber = line;
1804 currentAttributeToken.LinePosition = column;
1806 string prefix, localName;
1807 currentAttributeToken.Name = ReadName (out prefix, out localName);
1808 currentAttributeToken.Prefix = prefix;
1809 currentAttributeToken.LocalName = localName;
1810 ExpectAfterWhitespace ('=');
1811 SkipWhitespace ();
1812 ReadAttributeValueTokens (-1);
1813 // This hack is required for xmldecl which has
1814 // both effective attributes and Value.
1815 string dummyValue;
1816 if (isXmlDecl)
1817 dummyValue = currentAttributeToken.Value;
1819 attributeCount++;
1821 if (!SkipWhitespace ())
1822 requireWhitespace = true;
1823 peekChar = PeekChar ();
1824 if (isXmlDecl) {
1825 if (peekChar == '?')
1826 break;
1828 else if (peekChar == '/' || peekChar == '>')
1829 break;
1830 } while (peekChar != -1);
1832 currentAttribute = -1;
1833 currentAttributeValue = -1;
1836 private void AddAttributeWithValue (string name, string value)
1838 IncrementAttributeToken ();
1839 XmlAttributeTokenInfo ati = attributeTokens [currentAttribute];
1840 ati.Name = NameTable.Add (name);
1841 ati.Prefix = String.Empty;
1842 ati.NamespaceURI = String.Empty;
1843 IncrementAttributeValueToken ();
1844 XmlTokenInfo vti = attributeValueTokens [currentAttributeValue];
1845 SetTokenProperties (vti,
1846 XmlNodeType.Text,
1847 String.Empty,
1848 String.Empty,
1849 String.Empty,
1850 false,
1851 value,
1852 false);
1853 ati.Value = value;
1854 attributeCount++;
1857 private void IncrementAttributeToken ()
1859 currentAttribute++;
1860 if (attributeTokens.Length == currentAttribute) {
1861 XmlAttributeTokenInfo [] newArray =
1862 new XmlAttributeTokenInfo [attributeTokens.Length * 2];
1863 attributeTokens.CopyTo (newArray, 0);
1864 attributeTokens = newArray;
1866 if (attributeTokens [currentAttribute] == null)
1867 attributeTokens [currentAttribute] = new XmlAttributeTokenInfo (this);
1868 currentAttributeToken = attributeTokens [currentAttribute];
1869 currentAttributeToken.Clear ();
1872 private void IncrementAttributeValueToken ()
1874 currentAttributeValue++;
1875 if (attributeValueTokens.Length == currentAttributeValue) {
1876 XmlTokenInfo [] newArray = new XmlTokenInfo [attributeValueTokens.Length * 2];
1877 attributeValueTokens.CopyTo (newArray, 0);
1878 attributeValueTokens = newArray;
1880 if (attributeValueTokens [currentAttributeValue] == null)
1881 attributeValueTokens [currentAttributeValue] = new XmlTokenInfo (this);
1882 currentAttributeValueToken = attributeValueTokens [currentAttributeValue];
1883 currentAttributeValueToken.Clear ();
1886 // LAMESPEC: Orthodox XML reader should normalize attribute values
1887 private void ReadAttributeValueTokens (int dummyQuoteChar)
1889 int quoteChar = (dummyQuoteChar < 0) ? ReadChar () : dummyQuoteChar;
1891 if (quoteChar != '\'' && quoteChar != '\"')
1892 throw NotWFError ("an attribute value was not quoted");
1893 currentAttributeToken.QuoteChar = (char) quoteChar;
1895 IncrementAttributeValueToken ();
1896 currentAttributeToken.ValueTokenStartIndex = currentAttributeValue;
1897 currentAttributeValueToken.LineNumber = line;
1898 currentAttributeValueToken.LinePosition = column;
1900 bool incrementToken = false;
1901 bool isNewToken = true;
1902 bool loop = true;
1903 int ch = 0;
1904 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1905 while (loop) {
1906 ch = ReadChar ();
1907 if (ch == quoteChar)
1908 break;
1910 if (incrementToken) {
1911 IncrementAttributeValueToken ();
1912 currentAttributeValueToken.ValueBufferStart = valueBuffer.Length;
1913 currentAttributeValueToken.LineNumber = line;
1914 currentAttributeValueToken.LinePosition = column;
1915 incrementToken = false;
1916 isNewToken = true;
1919 switch (ch)
1921 case '<':
1922 throw NotWFError ("attribute values cannot contain '<'");
1923 case -1:
1924 if (dummyQuoteChar < 0)
1925 throw NotWFError ("unexpected end of file in an attribute value");
1926 else // Attribute value constructor.
1927 loop = false;
1928 break;
1929 case '\r':
1930 if (!normalization)
1931 goto default;
1932 if (PeekChar () == '\n')
1933 continue; // skip '\r'.
1935 // The csc in MS.NET 2.0 beta 1 barfs on this goto, so work around that
1937 //goto case '\n';
1938 if (!normalization)
1939 goto default;
1940 ch = ' ';
1941 goto default;
1942 case '\n':
1943 case '\t':
1944 // When Normalize = true, then replace
1945 // all spaces to ' '
1946 if (!normalization)
1947 goto default;
1948 ch = ' ';
1949 goto default;
1950 case '&':
1951 if (PeekChar () == '#') {
1952 Advance ('#');
1953 ch = ReadCharacterReference ();
1954 AppendValueChar (ch);
1955 break;
1957 // Check XML 1.0 section 3.1 WFC.
1958 string entName = ReadName ();
1959 Expect (';');
1960 int predefined = XmlChar.GetPredefinedEntity (entName);
1961 if (predefined < 0) {
1962 CheckAttributeEntityReferenceWFC (entName);
1963 #if NET_2_0
1964 if (entityHandling == EntityHandling.ExpandEntities) {
1965 string value = DTD.GenerateEntityAttributeText (entName);
1966 foreach (char c in (IEnumerable<char>) value)
1967 AppendValueChar (c);
1968 } else
1969 #endif
1971 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
1972 currentAttributeValueToken.NodeType = XmlNodeType.Text;
1973 if (!isNewToken)
1974 IncrementAttributeValueToken ();
1975 currentAttributeValueToken.Name = entName;
1976 currentAttributeValueToken.Value = String.Empty;
1977 currentAttributeValueToken.NodeType = XmlNodeType.EntityReference;
1978 incrementToken = true;
1981 else
1982 AppendValueChar (predefined);
1983 break;
1984 default:
1985 if (CharacterChecking && XmlChar.IsInvalid (ch))
1986 throw NotWFError ("Invalid character was found.");
1987 // FIXME: it might be optimized by the JIT later,
1988 // AppendValueChar (ch);
1990 if (ch <= Char.MaxValue)
1991 valueBuffer.Append ((char) ch);
1992 else
1993 AppendSurrogatePairValueChar (ch);
1995 break;
1998 isNewToken = false;
2000 if (!incrementToken) {
2001 currentAttributeValueToken.ValueBufferEnd = valueBuffer.Length;
2002 currentAttributeValueToken.NodeType = XmlNodeType.Text;
2004 currentAttributeToken.ValueTokenEndIndex = currentAttributeValue;
2008 private void CheckAttributeEntityReferenceWFC (string entName)
2010 DTDEntityDeclaration entDecl =
2011 DTD == null ? null : DTD.EntityDecls [entName];
2012 if (entDecl == null) {
2013 if (entityHandling == EntityHandling.ExpandEntities
2014 || (DTD != null && resolver != null && entDecl == null))
2015 throw NotWFError (String.Format ("Referenced entity '{0}' does not exist.", entName));
2016 else
2017 return;
2020 if (entDecl.HasExternalReference)
2021 throw NotWFError ("Reference to external entities is not allowed in the value of an attribute.");
2022 if (isStandalone && !entDecl.IsInternalSubset)
2023 throw NotWFError ("Reference to external entities is not allowed in the internal subset.");
2024 if (entDecl.EntityValue.IndexOf ('<') >= 0)
2025 throw NotWFError ("Attribute must not contain character '<' either directly or indirectly by way of entity references.");
2028 // The reader is positioned on the first character
2029 // of the target.
2031 // It may be xml declaration or processing instruction.
2032 private void ReadProcessingInstruction ()
2034 string target = ReadName ();
2035 if (target != "xml" && target.ToLower (CultureInfo.InvariantCulture) == "xml")
2036 throw NotWFError ("Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
2038 if (!SkipWhitespace ())
2039 if (PeekChar () != '?')
2040 throw NotWFError ("Invalid processing instruction name was found.");
2042 ClearValueBuffer ();
2044 int ch;
2045 while ((ch = PeekChar ()) != -1) {
2046 Advance (ch);
2048 if (ch == '?' && PeekChar () == '>') {
2049 Advance ('>');
2050 break;
2053 if (CharacterChecking && XmlChar.IsInvalid (ch))
2054 throw NotWFError ("Invalid character was found.");
2055 AppendValueChar (ch);
2058 if (Object.ReferenceEquals (target, XmlNamespaceManager.PrefixXml))
2059 VerifyXmlDeclaration ();
2060 else {
2061 if (currentState == XmlNodeType.None)
2062 currentState = XmlNodeType.XmlDeclaration;
2064 SetProperties (
2065 XmlNodeType.ProcessingInstruction, // nodeType
2066 target, // name
2067 String.Empty, // prefix
2068 target, // localName
2069 false, // isEmptyElement
2070 null, // value: create only when required
2071 true // clearAttributes
2076 void VerifyXmlDeclaration ()
2078 if (!allowMultipleRoot && currentState != XmlNodeType.None)
2079 throw NotWFError ("XML declaration cannot appear in this state.");
2081 currentState = XmlNodeType.XmlDeclaration;
2083 string text = CreateValueString ();
2085 ClearAttributes ();
2087 int idx = 0;
2089 string encoding = null, standalone = null;
2090 string name, value;
2091 ParseAttributeFromString (text, ref idx, out name, out value);
2092 if (name != "version" || value != "1.0")
2093 throw NotWFError ("'version' is expected.");
2094 name = String.Empty;
2095 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2096 ParseAttributeFromString (text, ref idx, out name, out value);
2097 if (name == "encoding") {
2098 if (!XmlChar.IsValidIANAEncoding (value))
2099 throw NotWFError ("'encoding' must be a valid IANA encoding name.");
2100 if (reader is XmlStreamReader)
2101 parserContext.Encoding = ((XmlStreamReader) reader).Encoding;
2102 else
2103 parserContext.Encoding = Encoding.Unicode;
2104 encoding = value;
2105 name = String.Empty;
2106 if (SkipWhitespaceInString (text, ref idx) && idx < text.Length)
2107 ParseAttributeFromString (text, ref idx, out name, out value);
2109 if (name == "standalone") {
2110 this.isStandalone = value == "yes";
2111 if (value != "yes" && value != "no")
2112 throw NotWFError ("Only 'yes' or 'no' is allow for 'standalone'");
2113 standalone = value;
2114 SkipWhitespaceInString (text, ref idx);
2116 else if (name.Length != 0)
2117 throw NotWFError (String.Format ("Unexpected token: '{0}'", name));
2119 if (idx < text.Length)
2120 throw NotWFError ("'?' is expected.");
2122 AddAttributeWithValue ("version", "1.0");
2123 if (encoding != null)
2124 AddAttributeWithValue ("encoding", encoding);
2125 if (standalone != null)
2126 AddAttributeWithValue ("standalone", standalone);
2127 currentAttribute = currentAttributeValue = -1;
2129 SetProperties (
2130 XmlNodeType.XmlDeclaration, // nodeType
2131 "xml", // name
2132 String.Empty, // prefix
2133 "xml", // localName
2134 false, // isEmptyElement
2135 text, // value
2136 false // clearAttributes
2140 bool SkipWhitespaceInString (string text, ref int idx)
2142 int start = idx;
2143 while (idx < text.Length && XmlChar.IsWhitespace (text [idx]))
2144 idx++;
2145 return idx - start > 0;
2148 private void ParseAttributeFromString (string src,
2149 ref int idx, out string name, out string value)
2151 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2152 idx++;
2154 int start = idx;
2155 while (idx < src.Length && XmlChar.IsNameChar (src [idx]))
2156 idx++;
2157 name = src.Substring (start, idx - start);
2159 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2160 idx++;
2161 if (idx == src.Length || src [idx] != '=')
2162 throw NotWFError (String.Format ("'=' is expected after {0}", name));
2163 idx++;
2165 while (idx < src.Length && XmlChar.IsWhitespace (src [idx]))
2166 idx++;
2168 if (idx == src.Length || src [idx] != '"' && src [idx] != '\'')
2169 throw NotWFError ("'\"' or '\'' is expected.");
2171 char quote = src [idx];
2172 idx++;
2173 start = idx;
2175 while (idx < src.Length && src [idx] != quote)
2176 idx++;
2177 idx++;
2179 value = src.Substring (start, idx - start - 1);
2182 internal void SkipTextDeclaration ()
2184 if (PeekChar () != '<')
2185 return;
2187 ReadChar ();
2189 if (PeekChar () != '?') {
2190 peekCharsIndex = 0;
2191 return;
2193 ReadChar ();
2195 while (peekCharsIndex < 6) {
2196 if (PeekChar () < 0)
2197 break;
2198 else
2199 ReadChar ();
2201 if (new string (peekChars, 2, 4) != "xml ") {
2202 if (new string (peekChars, 2, 4).ToLower (CultureInfo.InvariantCulture) == "xml ") {
2203 throw NotWFError ("Processing instruction name must not be character sequence 'X' 'M' 'L' with case insensitivity.");
2205 peekCharsIndex = 0;
2206 return;
2209 SkipWhitespace ();
2211 // version decl
2212 if (PeekChar () == 'v') {
2213 Expect ("version");
2214 ExpectAfterWhitespace ('=');
2215 SkipWhitespace ();
2216 int quoteChar = ReadChar ();
2217 char [] expect1_0 = new char [3];
2218 int versionLength = 0;
2219 switch (quoteChar) {
2220 case '\'':
2221 case '"':
2222 while (PeekChar () != quoteChar) {
2223 if (PeekChar () == -1)
2224 throw NotWFError ("Invalid version declaration inside text declaration.");
2225 else if (versionLength == 3)
2226 throw NotWFError ("Invalid version number inside text declaration.");
2227 else {
2228 expect1_0 [versionLength] = (char) ReadChar ();
2229 versionLength++;
2230 if (versionLength == 3 && new String (expect1_0) != "1.0")
2231 throw NotWFError ("Invalid version number inside text declaration.");
2234 ReadChar ();
2235 SkipWhitespace ();
2236 break;
2237 default:
2238 throw NotWFError ("Invalid version declaration inside text declaration.");
2242 if (PeekChar () == 'e') {
2243 Expect ("encoding");
2244 ExpectAfterWhitespace ('=');
2245 SkipWhitespace ();
2246 int quoteChar = ReadChar ();
2247 switch (quoteChar) {
2248 case '\'':
2249 case '"':
2250 while (PeekChar () != quoteChar)
2251 if (ReadChar () == -1)
2252 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2253 ReadChar ();
2254 SkipWhitespace ();
2255 break;
2256 default:
2257 throw NotWFError ("Invalid encoding declaration inside text declaration.");
2259 // Encoding value should be checked inside XmlInputStream.
2261 #if NET_2_0
2262 // this condition is to check if this instance is
2263 // not created by XmlReader.Create() (which just
2264 // omits strict text declaration check).
2265 else if (Conformance == ConformanceLevel.Auto)
2266 throw NotWFError ("Encoding declaration is mandatory in text declaration.");
2267 #endif
2269 Expect ("?>");
2271 curNodePeekIndex = peekCharsIndex; // without this it causes incorrect value start indication.
2274 // The reader is positioned on the first character after
2275 // the leading '<!'.
2276 private void ReadDeclaration ()
2278 int ch = PeekChar ();
2280 switch (ch)
2282 case '-':
2283 Expect ("--");
2284 ReadComment ();
2285 break;
2286 case '[':
2287 ReadChar ();
2288 Expect ("CDATA[");
2289 ReadCDATA ();
2290 break;
2291 case 'D':
2292 Expect ("DOCTYPE");
2293 ReadDoctypeDecl ();
2294 break;
2295 default:
2296 throw NotWFError ("Unexpected declaration markup was found.");
2300 // The reader is positioned on the first character after
2301 // the leading '<!--'.
2302 private void ReadComment ()
2304 if (currentState == XmlNodeType.None)
2305 currentState = XmlNodeType.XmlDeclaration;
2307 preserveCurrentTag = false;
2309 ClearValueBuffer ();
2311 int ch;
2312 while ((ch = PeekChar ()) != -1) {
2313 Advance (ch);
2315 if (ch == '-' && PeekChar () == '-') {
2316 Advance ('-');
2318 if (PeekChar () != '>')
2319 throw NotWFError ("comments cannot contain '--'");
2321 Advance ('>');
2322 break;
2325 if (XmlChar.IsInvalid (ch))
2326 throw NotWFError ("Not allowed character was found.");
2328 AppendValueChar (ch);
2331 SetProperties (
2332 XmlNodeType.Comment, // nodeType
2333 String.Empty, // name
2334 String.Empty, // prefix
2335 String.Empty, // localName
2336 false, // isEmptyElement
2337 null, // value: create only when required
2338 true // clearAttributes
2342 // The reader is positioned on the first character after
2343 // the leading '<![CDATA['.
2344 private void ReadCDATA ()
2346 if (currentState != XmlNodeType.Element)
2347 throw NotWFError ("CDATA section cannot appear in this state.");
2348 preserveCurrentTag = false;
2350 ClearValueBuffer ();
2352 bool skip = false;
2353 int ch = 0;
2354 while (PeekChar () != -1) {
2355 if (!skip)
2356 ch = ReadChar ();
2357 skip = false;
2359 if (ch == ']' && PeekChar () == ']') {
2360 ch = ReadChar (); // ']'
2362 if (PeekChar () == '>') {
2363 ReadChar (); // '>'
2364 break;
2365 } else {
2366 skip = true;
2369 if (normalization && ch == '\r') {
2370 ch = PeekChar ();
2371 if (ch != '\n')
2372 // append '\n' instead of '\r'.
2373 AppendValueChar ('\n');
2374 // otherwise, discard '\r'.
2375 continue;
2377 if (CharacterChecking && XmlChar.IsInvalid (ch))
2378 throw NotWFError ("Invalid character was found.");
2380 // FIXME: it might be optimized by the JIT later,
2381 // AppendValueChar (ch);
2383 if (ch <= Char.MaxValue)
2384 valueBuffer.Append ((char) ch);
2385 else
2386 AppendSurrogatePairValueChar (ch);
2390 SetProperties (
2391 XmlNodeType.CDATA, // nodeType
2392 String.Empty, // name
2393 String.Empty, // prefix
2394 String.Empty, // localName
2395 false, // isEmptyElement
2396 null, // value: create only when required
2397 true // clearAttributes
2401 // The reader is positioned on the first character after
2402 // the leading '<!DOCTYPE'.
2403 private void ReadDoctypeDecl ()
2405 if (prohibitDtd)
2406 throw NotWFError ("Document Type Declaration (DTD) is prohibited in this XML.");
2407 switch (currentState) {
2408 case XmlNodeType.DocumentType:
2409 case XmlNodeType.Element:
2410 case XmlNodeType.EndElement:
2411 throw NotWFError ("Document type cannot appear in this state.");
2413 currentState = XmlNodeType.DocumentType;
2415 string doctypeName = null;
2416 string publicId = null;
2417 string systemId = null;
2418 int intSubsetStartLine = 0;
2419 int intSubsetStartColumn = 0;
2421 SkipWhitespace ();
2422 doctypeName = ReadName ();
2423 SkipWhitespace ();
2424 switch(PeekChar ())
2426 case 'S':
2427 systemId = ReadSystemLiteral (true);
2428 break;
2429 case 'P':
2430 publicId = ReadPubidLiteral ();
2431 if (!SkipWhitespace ())
2432 throw NotWFError ("Whitespace is required between PUBLIC id and SYSTEM id.");
2433 systemId = ReadSystemLiteral (false);
2434 break;
2436 SkipWhitespace ();
2439 if(PeekChar () == '[')
2441 // read markupdecl etc. or end of decl
2442 ReadChar ();
2443 intSubsetStartLine = this.LineNumber;
2444 intSubsetStartColumn = this.LinePosition;
2445 ClearValueBuffer ();
2446 ReadInternalSubset ();
2447 parserContext.InternalSubset = CreateValueString ();
2449 // end of DOCTYPE decl.
2450 ExpectAfterWhitespace ('>');
2452 GenerateDTDObjectModel (doctypeName, publicId,
2453 systemId, parserContext.InternalSubset,
2454 intSubsetStartLine, intSubsetStartColumn);
2456 // set properties for <!DOCTYPE> node
2457 SetProperties (
2458 XmlNodeType.DocumentType, // nodeType
2459 doctypeName, // name
2460 String.Empty, // prefix
2461 doctypeName, // localName
2462 false, // isEmptyElement
2463 parserContext.InternalSubset, // value
2464 true // clearAttributes
2467 if (publicId != null)
2468 AddAttributeWithValue ("PUBLIC", publicId);
2469 if (systemId != null)
2470 AddAttributeWithValue ("SYSTEM", systemId);
2471 currentAttribute = currentAttributeValue = -1;
2474 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2475 string systemId, string internalSubset)
2477 return GenerateDTDObjectModel (name, publicId, systemId, internalSubset, 0, 0);
2480 internal DTDObjectModel GenerateDTDObjectModel (string name, string publicId,
2481 string systemId, string internalSubset, int intSubsetStartLine, int intSubsetStartColumn)
2483 // now compile DTD
2484 parserContext.Dtd = new DTDObjectModel (this.NameTable); // merges both internal and external subsets in the meantime,
2485 DTD.BaseURI = BaseURI;
2486 DTD.Name = name;
2487 DTD.PublicId = publicId;
2488 DTD.SystemId = systemId;
2489 DTD.InternalSubset = internalSubset;
2490 DTD.XmlResolver = resolver;
2491 DTD.IsStandalone = isStandalone;
2492 DTD.LineNumber = line;
2493 DTD.LinePosition = column;
2495 DTDReader dr = new DTDReader (DTD, intSubsetStartLine, intSubsetStartColumn);
2496 dr.Normalization = this.normalization;
2497 return dr.GenerateDTDObjectModel ();
2500 private enum DtdInputState
2502 Free = 1,
2503 ElementDecl,
2504 AttlistDecl,
2505 EntityDecl,
2506 NotationDecl,
2508 Comment,
2509 InsideSingleQuoted,
2510 InsideDoubleQuoted,
2513 private class DtdInputStateStack
2515 Stack intern = new Stack ();
2516 public DtdInputStateStack ()
2518 Push (DtdInputState.Free);
2521 public DtdInputState Peek ()
2523 return (DtdInputState) intern.Peek ();
2526 public DtdInputState Pop ()
2528 return (DtdInputState) intern.Pop ();
2531 public void Push (DtdInputState val)
2533 intern.Push (val);
2538 DtdInputStateStack stateStack = new DtdInputStateStack ();
2539 DtdInputState State {
2540 get { return stateStack.Peek (); }
2543 private int ReadValueChar ()
2545 int ret = ReadChar ();
2546 AppendValueChar (ret);
2547 return ret;
2550 private void ExpectAndAppend (string s)
2552 Expect (s);
2553 valueBuffer.Append (s);
2556 // Simply read but not generate any result.
2557 private void ReadInternalSubset ()
2559 bool continueParse = true;
2561 while (continueParse) {
2562 switch (ReadValueChar ()) {
2563 case ']':
2564 switch (State) {
2565 case DtdInputState.Free:
2566 // chop extra ']'
2567 valueBuffer.Remove (valueBuffer.Length - 1, 1);
2568 continueParse = false;
2569 break;
2570 case DtdInputState.InsideDoubleQuoted:
2571 case DtdInputState.InsideSingleQuoted:
2572 case DtdInputState.Comment:
2573 continue;
2574 default:
2575 throw NotWFError ("unexpected end of file at DTD.");
2577 break;
2578 case -1:
2579 throw NotWFError ("unexpected end of file at DTD.");
2580 case '<':
2581 switch (State) {
2582 case DtdInputState.InsideDoubleQuoted:
2583 case DtdInputState.InsideSingleQuoted:
2584 case DtdInputState.Comment:
2585 continue; // well-formed
2587 int c = ReadValueChar ();
2588 switch (c) {
2589 case '?':
2590 stateStack.Push (DtdInputState.PI);
2591 break;
2592 case '!':
2593 switch (ReadValueChar ()) {
2594 case 'E':
2595 switch (ReadValueChar ()) {
2596 case 'L':
2597 ExpectAndAppend ("EMENT");
2598 stateStack.Push (DtdInputState.ElementDecl);
2599 break;
2600 case 'N':
2601 ExpectAndAppend ("TITY");
2602 stateStack.Push (DtdInputState.EntityDecl);
2603 break;
2604 default:
2605 throw NotWFError ("unexpected token '<!E'.");
2607 break;
2608 case 'A':
2609 ExpectAndAppend ("TTLIST");
2610 stateStack.Push (DtdInputState.AttlistDecl);
2611 break;
2612 case 'N':
2613 ExpectAndAppend ("OTATION");
2614 stateStack.Push (DtdInputState.NotationDecl);
2615 break;
2616 case '-':
2617 ExpectAndAppend ("-");
2618 stateStack.Push (DtdInputState.Comment);
2619 break;
2621 break;
2622 default:
2623 throw NotWFError (String.Format ("unexpected '<{0}'.", (char) c));
2625 break;
2626 case '\'':
2627 if (State == DtdInputState.InsideSingleQuoted)
2628 stateStack.Pop ();
2629 else if (State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.Comment)
2630 stateStack.Push (DtdInputState.InsideSingleQuoted);
2631 break;
2632 case '"':
2633 if (State == DtdInputState.InsideDoubleQuoted)
2634 stateStack.Pop ();
2635 else if (State != DtdInputState.InsideSingleQuoted && State != DtdInputState.Comment)
2636 stateStack.Push (DtdInputState.InsideDoubleQuoted);
2637 break;
2638 case '>':
2639 switch (State) {
2640 case DtdInputState.ElementDecl:
2641 goto case DtdInputState.NotationDecl;
2642 case DtdInputState.AttlistDecl:
2643 goto case DtdInputState.NotationDecl;
2644 case DtdInputState.EntityDecl:
2645 goto case DtdInputState.NotationDecl;
2646 case DtdInputState.NotationDecl:
2647 stateStack.Pop ();
2648 break;
2649 case DtdInputState.InsideDoubleQuoted:
2650 case DtdInputState.InsideSingleQuoted:
2651 case DtdInputState.Comment:
2652 continue;
2653 default:
2654 throw NotWFError ("unexpected token '>'");
2656 break;
2657 case '?':
2658 if (State == DtdInputState.PI) {
2659 if (ReadValueChar () == '>')
2660 stateStack.Pop ();
2662 break;
2663 case '-':
2664 if (State == DtdInputState.Comment) {
2665 if (PeekChar () == '-') {
2666 ReadValueChar ();
2667 ExpectAndAppend (">");
2668 stateStack.Pop ();
2671 break;
2672 case '%':
2673 if (State != DtdInputState.Free && State != DtdInputState.EntityDecl && State != DtdInputState.Comment && State != DtdInputState.InsideDoubleQuoted && State != DtdInputState.InsideSingleQuoted)
2674 throw NotWFError ("Parameter Entity Reference cannot appear as a part of markupdecl (see XML spec 2.8).");
2675 break;
2680 // The reader is positioned on the first 'S' of "SYSTEM".
2681 private string ReadSystemLiteral (bool expectSYSTEM)
2683 if(expectSYSTEM) {
2684 Expect ("SYSTEM");
2685 if (!SkipWhitespace ())
2686 throw NotWFError ("Whitespace is required after 'SYSTEM'.");
2688 else
2689 SkipWhitespace ();
2690 int quoteChar = ReadChar (); // apos or quot
2691 int c = 0;
2692 ClearValueBuffer ();
2693 while (c != quoteChar) {
2694 c = ReadChar ();
2695 if (c < 0)
2696 throw NotWFError ("Unexpected end of stream in ExternalID.");
2697 if (c != quoteChar)
2698 AppendValueChar (c);
2700 return CreateValueString ();
2703 private string ReadPubidLiteral()
2705 Expect ("PUBLIC");
2706 if (!SkipWhitespace ())
2707 throw NotWFError ("Whitespace is required after 'PUBLIC'.");
2708 int quoteChar = ReadChar ();
2709 int c = 0;
2710 ClearValueBuffer ();
2711 while(c != quoteChar)
2713 c = ReadChar ();
2714 if(c < 0) throw NotWFError ("Unexpected end of stream in ExternalID.");
2715 if(c != quoteChar && !XmlChar.IsPubidChar (c))
2716 throw NotWFError (String.Format ("character '{0}' not allowed for PUBLIC ID", (char)c ));
2717 if (c != quoteChar)
2718 AppendValueChar (c);
2720 return CreateValueString ();
2723 // The reader is positioned on the first character
2724 // of the name.
2725 private string ReadName ()
2727 string prefix, local;
2728 return ReadName (out prefix, out local);
2731 private string ReadName (out string prefix, out string localName)
2733 #if !USE_NAME_BUFFER
2734 bool savePreserve = preserveCurrentTag;
2735 preserveCurrentTag = true;
2737 int startOffset = peekCharsIndex - curNodePeekIndex;
2738 int ch = PeekChar ();
2739 if (!XmlChar.IsFirstNameChar (ch))
2740 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2741 Advance (ch);
2742 int length = 1;
2743 int colonAt = -1;
2745 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2746 Advance (ch);
2747 if (ch == ':' && namespaces && colonAt < 0)
2748 colonAt = length;
2749 length++;
2752 int start = curNodePeekIndex + startOffset;
2754 string name = NameTable.Add (
2755 peekChars, start, length);
2757 if (colonAt > 0) {
2758 prefix = NameTable.Add (
2759 peekChars, start, colonAt);
2760 localName = NameTable.Add (
2761 peekChars, start + colonAt + 1, length - colonAt - 1);
2762 } else {
2763 prefix = String.Empty;
2764 localName = name;
2767 preserveCurrentTag = savePreserve;
2769 return name;
2770 #else
2771 int ch = PeekChar ();
2772 if (!XmlChar.IsFirstNameChar (ch))
2773 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "a name did not start with a legal character {0} ({1})", ch, (char) ch));
2775 nameLength = 0;
2777 Advance (ch);
2778 // AppendNameChar (ch);
2780 // nameBuffer.Length is always non-0 so no need to ExpandNameCapacity () here
2781 if (ch <= Char.MaxValue)
2782 nameBuffer [nameLength++] = (char) ch;
2783 else
2784 AppendSurrogatePairNameChar (ch);
2787 int colonAt = -1;
2789 while (XmlChar.IsNameChar ((ch = PeekChar ()))) {
2790 Advance (ch);
2792 if (ch == ':' && namespaces && colonAt < 0)
2793 colonAt = nameLength;
2794 // AppendNameChar (ch);
2796 if (nameLength == nameCapacity)
2797 ExpandNameCapacity ();
2798 if (ch <= Char.MaxValue)
2799 nameBuffer [nameLength++] = (char) ch;
2800 else
2801 AppendSurrogatePairNameChar (ch);
2805 string name = NameTable.Add (nameBuffer, 0, nameLength);
2807 if (colonAt > 0) {
2808 prefix = NameTable.Add (nameBuffer, 0, colonAt);
2809 localName = NameTable.Add (nameBuffer, colonAt + 1, nameLength - colonAt - 1);
2810 } else {
2811 prefix = String.Empty;
2812 localName = name;
2815 return name;
2816 #endif
2819 // Read the next character and compare it against the
2820 // specified character.
2821 private void Expect (int expected)
2823 int ch = ReadChar ();
2825 if (ch != expected) {
2826 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2827 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
2828 (char) expected,
2829 expected,
2830 ch < 0 ? (object) "EOF" : (char) ch,
2831 ch));
2835 private void Expect (string expected)
2837 for (int i = 0; i < expected.Length; i++)
2838 if (ReadChar () != expected [i])
2839 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
2840 "'{0}' is expected", expected));
2843 private void ExpectAfterWhitespace (char c)
2845 while (true) {
2846 int i = ReadChar ();
2847 if (i < 0x21 && XmlChar.IsWhitespace (i))
2848 continue;
2849 if (c != i)
2850 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "Expected {0}, but found {1} [{2}]", c, i < 0 ? (object) "EOF" : (char) i, i));
2851 break;
2855 // Does not consume the first non-whitespace character.
2856 private bool SkipWhitespace ()
2858 // FIXME: It should be inlined by the JIT.
2859 // bool skipped = XmlChar.IsWhitespace (PeekChar ());
2860 int ch = PeekChar ();
2861 bool skipped = (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2862 if (!skipped)
2863 return false;
2864 Advance (ch);
2865 // FIXME: It should be inlined by the JIT.
2866 // while (XmlChar.IsWhitespace (PeekChar ()))
2867 // ReadChar ();
2868 while ((ch = PeekChar ()) == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD)
2869 Advance (ch);
2870 return skipped;
2873 private bool ReadWhitespace ()
2875 if (currentState == XmlNodeType.None)
2876 currentState = XmlNodeType.XmlDeclaration;
2878 bool savePreserve = preserveCurrentTag;
2879 preserveCurrentTag = true;
2880 int startOffset = peekCharsIndex - curNodePeekIndex; // it should be 0 for now though.
2882 int ch = PeekChar ();
2883 do {
2884 Advance (ch);
2885 ch = PeekChar ();
2886 // FIXME: It should be inlined by the JIT.
2887 // } while ((ch = PeekChar ()) != -1 && XmlChar.IsWhitespace (ch));
2888 } while (ch == 0x20 || ch == 0x9 || ch == 0xA || ch == 0xD);
2890 bool isText = currentState == XmlNodeType.Element && ch != -1 && ch != '<';
2892 if (!isText && (whitespaceHandling == WhitespaceHandling.None ||
2893 whitespaceHandling == WhitespaceHandling.Significant && XmlSpace != XmlSpace.Preserve))
2894 return false;
2896 ClearValueBuffer ();
2897 valueBuffer.Append (peekChars, curNodePeekIndex, peekCharsIndex - curNodePeekIndex - startOffset);
2898 preserveCurrentTag = savePreserve;
2900 if (isText) {
2901 ReadText (false);
2902 } else {
2903 XmlNodeType nodeType = (this.XmlSpace == XmlSpace.Preserve) ?
2904 XmlNodeType.SignificantWhitespace : XmlNodeType.Whitespace;
2905 SetProperties (nodeType,
2906 String.Empty,
2907 String.Empty,
2908 String.Empty,
2909 false,
2910 null, // value: create only when required
2911 true);
2914 return true;
2917 // Returns -1 if it should throw an error.
2918 private int ReadCharsInternal (char [] buffer, int offset, int length)
2920 int bufIndex = offset;
2921 for (int i = 0; i < length; i++) {
2922 int c = PeekChar ();
2923 switch (c) {
2924 case -1:
2925 throw NotWFError ("Unexpected end of xml.");
2926 case '<':
2927 if (i + 1 == length)
2928 // if it does not end here,
2929 // it cannot store another
2930 // character, so stop here.
2931 return i;
2932 Advance (c);
2933 if (PeekChar () != '/') {
2934 nestLevel++;
2935 buffer [bufIndex++] = '<';
2936 continue;
2938 else if (nestLevel-- > 0) {
2939 buffer [bufIndex++] = '<';
2940 continue;
2942 // Seems to skip immediate EndElement
2943 Expect ('/');
2944 if (depthUp) {
2945 depth++;
2946 depthUp = false;
2948 ReadEndTag ();
2949 readCharsInProgress = false;
2950 Read (); // move to the next node
2951 return i;
2952 default:
2953 Advance (c);
2954 if (c <= Char.MaxValue)
2955 buffer [bufIndex++] = (char) c;
2956 else {
2957 buffer [bufIndex++] = (char) ((c - 0x10000) / 0x400 + 0xD800);
2958 buffer [bufIndex++] = (char) ((c - 0x10000) % 0x400 + 0xDC00);
2960 break;
2963 return length;
2966 private bool ReadUntilEndTag ()
2968 if (Depth == 0)
2969 currentState = XmlNodeType.EndElement;
2970 int ch;
2971 do {
2972 ch = ReadChar ();
2973 switch (ch) {
2974 case -1:
2975 throw NotWFError ("Unexpected end of xml.");
2976 case '<':
2977 if (PeekChar () != '/') {
2978 nestLevel++;
2979 continue;
2981 else if (--nestLevel > 0)
2982 continue;
2983 ReadChar ();
2984 string name = ReadName ();
2985 if (name != elementNames [elementNameStackPos - 1].Name)
2986 continue;
2987 Expect ('>');
2988 depth--;
2989 return Read ();
2991 } while (true);
2993 #endregion