add implementation for stpcpy
[mono-project/dkf.git] / docs / HtmlAgilityPack / HtmlDocument.cs
blob7233da0dfbe9931d63acb9afdb971e95e040e70b
1 // HtmlAgilityPack V1.0 - Simon Mourier <simon underscore mourier at hotmail dot com>
2 using System;
3 using System.Collections;
4 using System.Collections.Generic;
5 using System.IO;
6 using System.Text;
7 using System.Text.RegularExpressions;
8 using System.Xml;
9 using System.Xml.XPath;
11 namespace HtmlAgilityPack
13 /// <summary>
14 /// Represents a complete HTML document.
15 /// </summary>
16 public class HtmlDocument : IXPathNavigable
18 #region Fields
20 private int _c;
21 private Crc32 _crc32;
22 private HtmlAttribute _currentattribute;
23 private HtmlNode _currentnode;
24 private Encoding _declaredencoding;
25 private HtmlNode _documentnode;
26 private bool _fullcomment;
27 private int _index;
28 internal Hashtable _lastnodes = new Hashtable();
29 private HtmlNode _lastparentnode;
30 private int _line;
31 private int _lineposition, _maxlineposition;
32 internal Hashtable _nodesid;
33 private ParseState _oldstate;
34 private bool _onlyDetectEncoding;
35 internal Hashtable _openednodes;
36 private List<HtmlParseError> _parseerrors = new List<HtmlParseError>();
37 private string _remainder;
38 private int _remainderOffset;
39 private ParseState _state;
40 private Encoding _streamencoding;
41 internal string _text;
43 // public props
45 /// <summary>
46 /// Adds Debugging attributes to node. Default is false.
47 /// </summary>
48 public bool OptionAddDebuggingAttributes;
50 /// <summary>
51 /// Defines if closing for non closed nodes must be done at the end or directly in the document.
52 /// Setting this to true can actually change how browsers render the page. Default is false.
53 /// </summary>
54 public bool OptionAutoCloseOnEnd; // close errors at the end
56 /// <summary>
57 /// Defines if non closed nodes will be checked at the end of parsing. Default is true.
58 /// </summary>
59 public bool OptionCheckSyntax = true;
61 /// <summary>
62 /// Defines if a checksum must be computed for the document while parsing. Default is false.
63 /// </summary>
64 public bool OptionComputeChecksum;
66 /// <summary>
67 /// Defines the default stream encoding to use. Default is System.Text.Encoding.Default.
68 /// </summary>
69 public Encoding OptionDefaultStreamEncoding = Encoding.Default;
71 /// <summary>
72 /// Defines if source text must be extracted while parsing errors.
73 /// If the document has a lot of errors, or cascading errors, parsing performance can be dramatically affected if set to true.
74 /// Default is false.
75 /// </summary>
76 public bool OptionExtractErrorSourceText;
78 // turning this on can dramatically slow performance if a lot of errors are detected
80 /// <summary>
81 /// Defines the maximum length of source text or parse errors. Default is 100.
82 /// </summary>
83 public int OptionExtractErrorSourceTextMaxLength = 100;
85 /// <summary>
86 /// Defines if LI, TR, TH, TD tags must be partially fixed when nesting errors are detected. Default is false.
87 /// </summary>
88 public bool OptionFixNestedTags; // fix li, tr, th, td tags
90 /// <summary>
91 /// Defines if output must conform to XML, instead of HTML.
92 /// </summary>
93 public bool OptionOutputAsXml;
95 /// <summary>
96 /// Defines if attribute value output must be optimized (not bound with double quotes if it is possible). Default is false.
97 /// </summary>
98 public bool OptionOutputOptimizeAttributeValues;
100 /// <summary>
101 /// Defines if name must be output with it's original case. Useful for asp.net tags and attributes
102 /// </summary>
103 public bool OptionOutputOriginalCase;
105 /// <summary>
106 /// Defines if name must be output in uppercase. Default is false.
107 /// </summary>
108 public bool OptionOutputUpperCase;
110 /// <summary>
111 /// Defines if declared encoding must be read from the document.
112 /// Declared encoding is determined using the meta http-equiv="content-type" content="text/html;charset=XXXXX" html node.
113 /// Default is true.
114 /// </summary>
115 public bool OptionReadEncoding = true;
117 /// <summary>
118 /// Defines the name of a node that will throw the StopperNodeException when found as an end node. Default is null.
119 /// </summary>
120 public string OptionStopperNodeName;
122 /// <summary>
123 /// Defines if the 'id' attribute must be specifically used. Default is true.
124 /// </summary>
125 public bool OptionUseIdAttribute = true;
127 /// <summary>
128 /// Defines if empty nodes must be written as closed during output. Default is false.
129 /// </summary>
130 public bool OptionWriteEmptyNodes;
132 #endregion
134 #region Static Members
136 internal static readonly string HtmlExceptionRefNotChild = "Reference node must be a child of this node";
138 internal static readonly string HtmlExceptionUseIdAttributeFalse =
139 "You need to set UseIdAttribute property to true to enable this feature";
141 #endregion
143 #region Constructors
145 /// <summary>
146 /// Creates an instance of an HTML document.
147 /// </summary>
148 public HtmlDocument()
150 _documentnode = CreateNode(HtmlNodeType.Document, 0);
153 #endregion
155 #region Properties
157 /// <summary>
158 /// Gets the document CRC32 checksum if OptionComputeChecksum was set to true before parsing, 0 otherwise.
159 /// </summary>
160 public int CheckSum
164 if (_crc32 == null)
166 return 0;
168 else
170 return (int) _crc32.CheckSum;
175 /// <summary>
176 /// Gets the document's declared encoding.
177 /// Declared encoding is determined using the meta http-equiv="content-type" content="text/html;charset=XXXXX" html node.
178 /// </summary>
179 public Encoding DeclaredEncoding
181 get { return _declaredencoding; }
184 /// <summary>
185 /// Gets the root node of the document.
186 /// </summary>
187 public HtmlNode DocumentNode
189 get { return _documentnode; }
192 /// <summary>
193 /// Gets the document's output encoding.
194 /// </summary>
195 public Encoding Encoding
197 get { return GetOutEncoding(); }
200 /// <summary>
201 /// Gets a list of parse errors found in the document.
202 /// </summary>
203 public IEnumerable<HtmlParseError> ParseErrors
205 get { return _parseerrors; }
208 /// <summary>
209 /// Gets the remaining text.
210 /// Will always be null if OptionStopperNodeName is null.
211 /// </summary>
212 public string Remainder
214 get { return _remainder; }
217 /// <summary>
218 /// Gets the offset of Remainder in the original Html text.
219 /// If OptionStopperNodeName is null, this will return the length of the original Html text.
220 /// </summary>
221 public int RemainderOffset
223 get { return _remainderOffset; }
226 /// <summary>
227 /// Gets the document's stream encoding.
228 /// </summary>
229 public Encoding StreamEncoding
231 get { return _streamencoding; }
234 #endregion
236 #region IXPathNavigable Members
238 /// <summary>
239 /// Creates a new XPathNavigator object for navigating this HTML document.
240 /// </summary>
241 /// <returns>An XPathNavigator object. The XPathNavigator is positioned on the root of the document.</returns>
242 public XPathNavigator CreateNavigator()
244 return new HtmlNodeNavigator(this, _documentnode);
247 #endregion
249 #region Public Methods
251 /// <summary>
252 /// Gets a valid XML name.
253 /// </summary>
254 /// <param name="name">Any text.</param>
255 /// <returns>A string that is a valid XML name.</returns>
256 public static string GetXmlName(string name)
258 string xmlname = string.Empty;
259 bool nameisok = true;
260 for (int i = 0; i < name.Length; i++)
262 // names are lcase
263 // note: we are very limited here, too much?
264 if (((name[i] >= 'a') && (name[i] <= 'z')) ||
265 ((name[i] >= '0') && (name[i] <= '9')) ||
266 // (name[i]==':') || (name[i]=='_') || (name[i]=='-') || (name[i]=='.')) // these are bads in fact
267 (name[i] == '_') || (name[i] == '-') || (name[i] == '.'))
269 xmlname += name[i];
271 else
273 nameisok = false;
274 byte[] bytes = Encoding.UTF8.GetBytes(new char[] {name[i]});
275 for (int j = 0; j < bytes.Length; j++)
277 xmlname += bytes[j].ToString("x2");
279 xmlname += "_";
282 if (nameisok)
284 return xmlname;
286 return "_" + xmlname;
289 /// <summary>
290 /// Applies HTML encoding to a specified string.
291 /// </summary>
292 /// <param name="html">The input string to encode. May not be null.</param>
293 /// <returns>The encoded string.</returns>
294 public static string HtmlEncode(string html)
296 if (html == null)
298 throw new ArgumentNullException("html");
300 // replace & by &amp; but only once!
301 Regex rx = new Regex("&(?!(amp;)|(lt;)|(gt;)|(quot;))", RegexOptions.IgnoreCase);
302 return rx.Replace(html, "&amp;").Replace("<", "&lt;").Replace(">", "&gt;").Replace("\"", "&quot;");
305 /// <summary>
306 /// Determines if the specified character is considered as a whitespace character.
307 /// </summary>
308 /// <param name="c">The character to check.</param>
309 /// <returns>true if if the specified character is considered as a whitespace character.</returns>
310 public static bool IsWhiteSpace(int c)
312 if ((c == 10) || (c == 13) || (c == 32) || (c == 9))
314 return true;
316 return false;
319 /// <summary>
320 /// Creates an HTML attribute with the specified name.
321 /// </summary>
322 /// <param name="name">The name of the attribute. May not be null.</param>
323 /// <returns>The new HTML attribute.</returns>
324 public HtmlAttribute CreateAttribute(string name)
326 if (name == null)
328 throw new ArgumentNullException("name");
330 HtmlAttribute att = CreateAttribute();
331 att.Name = name;
332 return att;
335 /// <summary>
336 /// Creates an HTML attribute with the specified name.
337 /// </summary>
338 /// <param name="name">The name of the attribute. May not be null.</param>
339 /// <param name="value">The value of the attribute.</param>
340 /// <returns>The new HTML attribute.</returns>
341 public HtmlAttribute CreateAttribute(string name, string value)
343 if (name == null)
345 throw new ArgumentNullException("name");
347 HtmlAttribute att = CreateAttribute(name);
348 att.Value = value;
349 return att;
352 /// <summary>
353 /// Creates an HTML comment node.
354 /// </summary>
355 /// <returns>The new HTML comment node.</returns>
356 public HtmlCommentNode CreateComment()
358 return (HtmlCommentNode) CreateNode(HtmlNodeType.Comment);
361 /// <summary>
362 /// Creates an HTML comment node with the specified comment text.
363 /// </summary>
364 /// <param name="comment">The comment text. May not be null.</param>
365 /// <returns>The new HTML comment node.</returns>
366 public HtmlCommentNode CreateComment(string comment)
368 if (comment == null)
370 throw new ArgumentNullException("comment");
372 HtmlCommentNode c = CreateComment();
373 c.Comment = comment;
374 return c;
377 /// <summary>
378 /// Creates an HTML element node with the specified name.
379 /// </summary>
380 /// <param name="name">The qualified name of the element. May not be null.</param>
381 /// <returns>The new HTML node.</returns>
382 public HtmlNode CreateElement(string name)
384 if (name == null)
386 throw new ArgumentNullException("name");
388 HtmlNode node = CreateNode(HtmlNodeType.Element);
389 node.Name = name;
390 return node;
393 /// <summary>
394 /// Creates an HTML text node.
395 /// </summary>
396 /// <returns>The new HTML text node.</returns>
397 public HtmlTextNode CreateTextNode()
399 return (HtmlTextNode) CreateNode(HtmlNodeType.Text);
402 /// <summary>
403 /// Creates an HTML text node with the specified text.
404 /// </summary>
405 /// <param name="text">The text of the node. May not be null.</param>
406 /// <returns>The new HTML text node.</returns>
407 public HtmlTextNode CreateTextNode(string text)
409 if (text == null)
411 throw new ArgumentNullException("text");
413 HtmlTextNode t = CreateTextNode();
414 t.Text = text;
415 return t;
418 /// <summary>
419 /// Detects the encoding of an HTML stream.
420 /// </summary>
421 /// <param name="stream">The input stream. May not be null.</param>
422 /// <returns>The detected encoding.</returns>
423 public Encoding DetectEncoding(Stream stream)
425 if (stream == null)
427 throw new ArgumentNullException("stream");
429 return DetectEncoding(new StreamReader(stream));
432 /// <summary>
433 /// Detects the encoding of an HTML file.
434 /// </summary>
435 /// <param name="path">Path for the file containing the HTML document to detect. May not be null.</param>
436 /// <returns>The detected encoding.</returns>
437 public Encoding DetectEncoding(string path)
439 if (path == null)
441 throw new ArgumentNullException("path");
443 StreamReader sr = new StreamReader(path, OptionDefaultStreamEncoding);
444 Encoding encoding = DetectEncoding(sr);
445 sr.Close();
446 return encoding;
449 /// <summary>
450 /// Detects the encoding of an HTML text provided on a TextReader.
451 /// </summary>
452 /// <param name="reader">The TextReader used to feed the HTML. May not be null.</param>
453 /// <returns>The detected encoding.</returns>
454 public Encoding DetectEncoding(TextReader reader)
456 if (reader == null)
458 throw new ArgumentNullException("reader");
460 _onlyDetectEncoding = true;
461 if (OptionCheckSyntax)
463 _openednodes = new Hashtable();
465 else
467 _openednodes = null;
470 if (OptionUseIdAttribute)
472 _nodesid = new Hashtable();
474 else
476 _nodesid = null;
479 StreamReader sr = reader as StreamReader;
480 if (sr != null)
482 _streamencoding = sr.CurrentEncoding;
484 else
486 _streamencoding = null;
488 _declaredencoding = null;
490 _text = reader.ReadToEnd();
491 _documentnode = CreateNode(HtmlNodeType.Document, 0);
493 // this is almost a hack, but it allows us not to muck with the original parsing code
496 Parse();
498 catch (EncodingFoundException ex)
500 return ex.Encoding;
502 return null;
505 /// <summary>
506 /// Detects the encoding of an HTML document from a file first, and then loads the file.
507 /// </summary>
508 /// <param name="path">The complete file path to be read.</param>
509 public void DetectEncodingAndLoad(string path)
511 DetectEncodingAndLoad(path, true);
514 /// <summary>
515 /// Detects the encoding of an HTML document from a file first, and then loads the file.
516 /// </summary>
517 /// <param name="path">The complete file path to be read. May not be null.</param>
518 /// <param name="detectEncoding">true to detect encoding, false otherwise.</param>
519 public void DetectEncodingAndLoad(string path, bool detectEncoding)
521 if (path == null)
523 throw new ArgumentNullException("path");
525 Encoding enc;
526 if (detectEncoding)
528 enc = DetectEncoding(path);
530 else
532 enc = null;
535 if (enc == null)
537 Load(path);
539 else
541 Load(path, enc);
545 /// <summary>
546 /// Detects the encoding of an HTML text.
547 /// </summary>
548 /// <param name="html">The input html text. May not be null.</param>
549 /// <returns>The detected encoding.</returns>
550 public Encoding DetectEncodingHtml(string html)
552 if (html == null)
554 throw new ArgumentNullException("html");
556 StringReader sr = new StringReader(html);
557 Encoding encoding = DetectEncoding(sr);
558 sr.Close();
559 return encoding;
562 /// <summary>
563 /// Gets the HTML node with the specified 'id' attribute value.
564 /// </summary>
565 /// <param name="id">The attribute id to match. May not be null.</param>
566 /// <returns>The HTML node with the matching id or null if not found.</returns>
567 public HtmlNode GetElementbyId(string id)
569 if (id == null)
571 throw new ArgumentNullException("id");
573 if (_nodesid == null)
575 throw new Exception(HtmlExceptionUseIdAttributeFalse);
578 return _nodesid[id.ToLower()] as HtmlNode;
581 /// <summary>
582 /// Loads an HTML document from a stream.
583 /// </summary>
584 /// <param name="stream">The input stream.</param>
585 public void Load(Stream stream)
587 Load(new StreamReader(stream, OptionDefaultStreamEncoding));
590 /// <summary>
591 /// Loads an HTML document from a stream.
592 /// </summary>
593 /// <param name="stream">The input stream.</param>
594 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the stream.</param>
595 public void Load(Stream stream, bool detectEncodingFromByteOrderMarks)
597 Load(new StreamReader(stream, detectEncodingFromByteOrderMarks));
600 /// <summary>
601 /// Loads an HTML document from a stream.
602 /// </summary>
603 /// <param name="stream">The input stream.</param>
604 /// <param name="encoding">The character encoding to use.</param>
605 public void Load(Stream stream, Encoding encoding)
607 Load(new StreamReader(stream, encoding));
610 /// <summary>
611 /// Loads an HTML document from a stream.
612 /// </summary>
613 /// <param name="stream">The input stream.</param>
614 /// <param name="encoding">The character encoding to use.</param>
615 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the stream.</param>
616 public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
618 Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks));
621 /// <summary>
622 /// Loads an HTML document from a stream.
623 /// </summary>
624 /// <param name="stream">The input stream.</param>
625 /// <param name="encoding">The character encoding to use.</param>
626 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the stream.</param>
627 /// <param name="buffersize">The minimum buffer size.</param>
628 public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
630 Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks, buffersize));
633 /// <summary>
634 /// Loads an HTML document from a file.
635 /// </summary>
636 /// <param name="path">The complete file path to be read. May not be null.</param>
637 public void Load(string path)
639 if (path == null)
641 throw new ArgumentNullException("path");
643 StreamReader sr = new StreamReader(path, OptionDefaultStreamEncoding);
644 Load(sr);
645 sr.Close();
648 /// <summary>
649 /// Loads an HTML document from a file.
650 /// </summary>
651 /// <param name="path">The complete file path to be read. May not be null.</param>
652 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
653 public void Load(string path, bool detectEncodingFromByteOrderMarks)
655 if (path == null)
657 throw new ArgumentNullException("path");
659 StreamReader sr = new StreamReader(path, detectEncodingFromByteOrderMarks);
660 Load(sr);
661 sr.Close();
664 /// <summary>
665 /// Loads an HTML document from a file.
666 /// </summary>
667 /// <param name="path">The complete file path to be read. May not be null.</param>
668 /// <param name="encoding">The character encoding to use. May not be null.</param>
669 public void Load(string path, Encoding encoding)
671 if (path == null)
673 throw new ArgumentNullException("path");
675 if (encoding == null)
677 throw new ArgumentNullException("encoding");
679 StreamReader sr = new StreamReader(path, encoding);
680 Load(sr);
681 sr.Close();
684 /// <summary>
685 /// Loads an HTML document from a file.
686 /// </summary>
687 /// <param name="path">The complete file path to be read. May not be null.</param>
688 /// <param name="encoding">The character encoding to use. May not be null.</param>
689 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
690 public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
692 if (path == null)
694 throw new ArgumentNullException("path");
696 if (encoding == null)
698 throw new ArgumentNullException("encoding");
700 StreamReader sr = new StreamReader(path, encoding, detectEncodingFromByteOrderMarks);
701 Load(sr);
702 sr.Close();
705 /// <summary>
706 /// Loads an HTML document from a file.
707 /// </summary>
708 /// <param name="path">The complete file path to be read. May not be null.</param>
709 /// <param name="encoding">The character encoding to use. May not be null.</param>
710 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
711 /// <param name="buffersize">The minimum buffer size.</param>
712 public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
714 if (path == null)
716 throw new ArgumentNullException("path");
718 if (encoding == null)
720 throw new ArgumentNullException("encoding");
722 StreamReader sr = new StreamReader(path, encoding, detectEncodingFromByteOrderMarks, buffersize);
723 Load(sr);
724 sr.Close();
727 /// <summary>
728 /// Loads the HTML document from the specified TextReader.
729 /// </summary>
730 /// <param name="reader">The TextReader used to feed the HTML data into the document. May not be null.</param>
731 public void Load(TextReader reader)
733 // all Load methods pass down to this one
734 if (reader == null)
736 throw new ArgumentNullException("reader");
739 _onlyDetectEncoding = false;
741 if (OptionCheckSyntax)
743 _openednodes = new Hashtable();
745 else
747 _openednodes = null;
750 if (OptionUseIdAttribute)
752 _nodesid = new Hashtable();
754 else
756 _nodesid = null;
759 StreamReader sr = reader as StreamReader;
760 if (sr != null)
764 // trigger bom read if needed
765 sr.Peek();
767 // ReSharper disable EmptyGeneralCatchClause
768 catch (Exception)
769 // ReSharper restore EmptyGeneralCatchClause
771 // void on purpose
773 _streamencoding = sr.CurrentEncoding;
775 else
777 _streamencoding = null;
779 _declaredencoding = null;
781 _text = reader.ReadToEnd();
782 _documentnode = CreateNode(HtmlNodeType.Document, 0);
783 Parse();
785 if (OptionCheckSyntax)
787 foreach (HtmlNode node in _openednodes.Values)
789 if (!node._starttag) // already reported
791 continue;
794 string html;
795 if (OptionExtractErrorSourceText)
797 html = node.OuterHtml;
798 if (html.Length > OptionExtractErrorSourceTextMaxLength)
800 html = html.Substring(0, OptionExtractErrorSourceTextMaxLength);
803 else
805 html = string.Empty;
807 AddError(
808 HtmlParseErrorCode.TagNotClosed,
809 node._line, node._lineposition,
810 node._streamposition, html,
811 "End tag </" + node.Name + "> was not found");
814 // we don't need this anymore
815 _openednodes.Clear();
819 /// <summary>
820 /// Loads the HTML document from the specified string.
821 /// </summary>
822 /// <param name="html">String containing the HTML document to load. May not be null.</param>
823 public void LoadHtml(string html)
825 if (html == null)
827 throw new ArgumentNullException("html");
829 StringReader sr = new StringReader(html);
830 Load(sr);
831 sr.Close();
834 /// <summary>
835 /// Saves the HTML document to the specified stream.
836 /// </summary>
837 /// <param name="outStream">The stream to which you want to save.</param>
838 public void Save(Stream outStream)
840 StreamWriter sw = new StreamWriter(outStream, GetOutEncoding());
841 Save(sw);
844 /// <summary>
845 /// Saves the HTML document to the specified stream.
846 /// </summary>
847 /// <param name="outStream">The stream to which you want to save. May not be null.</param>
848 /// <param name="encoding">The character encoding to use. May not be null.</param>
849 public void Save(Stream outStream, Encoding encoding)
851 if (outStream == null)
853 throw new ArgumentNullException("outStream");
855 if (encoding == null)
857 throw new ArgumentNullException("encoding");
859 StreamWriter sw = new StreamWriter(outStream, encoding);
860 Save(sw);
863 /// <summary>
864 /// Saves the mixed document to the specified file.
865 /// </summary>
866 /// <param name="filename">The location of the file where you want to save the document.</param>
867 public void Save(string filename)
869 StreamWriter sw = new StreamWriter(filename, false, GetOutEncoding());
870 Save(sw);
871 sw.Close();
874 /// <summary>
875 /// Saves the mixed document to the specified file.
876 /// </summary>
877 /// <param name="filename">The location of the file where you want to save the document. May not be null.</param>
878 /// <param name="encoding">The character encoding to use. May not be null.</param>
879 public void Save(string filename, Encoding encoding)
881 if (filename == null)
883 throw new ArgumentNullException("filename");
885 if (encoding == null)
887 throw new ArgumentNullException("encoding");
889 StreamWriter sw = new StreamWriter(filename, false, encoding);
890 Save(sw);
891 sw.Close();
894 /// <summary>
895 /// Saves the HTML document to the specified StreamWriter.
896 /// </summary>
897 /// <param name="writer">The StreamWriter to which you want to save.</param>
898 public void Save(StreamWriter writer)
900 Save((TextWriter) writer);
903 /// <summary>
904 /// Saves the HTML document to the specified TextWriter.
905 /// </summary>
906 /// <param name="writer">The TextWriter to which you want to save. May not be null.</param>
907 public void Save(TextWriter writer)
909 if (writer == null)
911 throw new ArgumentNullException("writer");
913 DocumentNode.WriteTo(writer);
916 /// <summary>
917 /// Saves the HTML document to the specified XmlWriter.
918 /// </summary>
919 /// <param name="writer">The XmlWriter to which you want to save.</param>
920 public void Save(XmlWriter writer)
922 DocumentNode.WriteTo(writer);
923 writer.Flush();
926 #endregion
928 #region Internal Methods
930 internal HtmlAttribute CreateAttribute()
932 return new HtmlAttribute(this);
935 internal HtmlNode CreateNode(HtmlNodeType type)
937 return CreateNode(type, -1);
940 internal HtmlNode CreateNode(HtmlNodeType type, int index)
942 switch (type)
944 case HtmlNodeType.Comment:
945 return new HtmlCommentNode(this, index);
947 case HtmlNodeType.Text:
948 return new HtmlTextNode(this, index);
950 default:
951 return new HtmlNode(type, this, index);
955 internal Encoding GetOutEncoding()
957 // when unspecified, use the stream encoding first
958 if (_declaredencoding != null)
960 return _declaredencoding;
962 else
964 if (_streamencoding != null)
966 return _streamencoding;
969 return OptionDefaultStreamEncoding;
972 internal HtmlNode GetXmlDeclaration()
974 if (!_documentnode.HasChildNodes)
976 return null;
979 foreach (HtmlNode node in _documentnode._childnodes)
981 if (node.Name == "?xml") // it's ok, names are case sensitive
983 return node;
986 return null;
989 internal void SetIdForNode(HtmlNode node, string id)
991 if (!OptionUseIdAttribute)
993 return;
996 if ((_nodesid == null) || (id == null))
998 return;
1001 if (node == null)
1003 _nodesid.Remove(id.ToLower());
1005 else
1007 _nodesid[id.ToLower()] = node;
1011 internal void UpdateLastParentNode()
1015 if (_lastparentnode.Closed)
1017 _lastparentnode = _lastparentnode.ParentNode;
1019 } while ((_lastparentnode != null) && (_lastparentnode.Closed));
1020 if (_lastparentnode == null)
1022 _lastparentnode = _documentnode;
1026 #endregion
1028 #region Private Methods
1030 private HtmlParseError AddError(
1031 HtmlParseErrorCode code,
1032 int line,
1033 int linePosition,
1034 int streamPosition,
1035 string sourceText,
1036 string reason)
1038 HtmlParseError err = new HtmlParseError(code, line, linePosition, streamPosition, sourceText, reason);
1039 _parseerrors.Add(err);
1040 return err;
1043 private void CloseCurrentNode()
1045 if (_currentnode.Closed) // text or document are by def closed
1046 return;
1048 bool error = false;
1050 // find last node of this kind
1051 HtmlNode prev = (HtmlNode) _lastnodes[_currentnode.Name];
1052 if (prev == null)
1054 if (HtmlNode.IsClosedElement(_currentnode.Name))
1056 // </br> will be seen as <br>
1057 _currentnode.CloseNode(_currentnode);
1059 // add to parent node
1060 if (_lastparentnode != null)
1062 HtmlNode foundNode = null;
1063 Stack futureChild = new Stack();
1064 for (HtmlNode node = _lastparentnode.LastChild; node != null; node = node.PreviousSibling)
1066 if ((node.Name == _currentnode.Name) && (!node.HasChildNodes))
1068 foundNode = node;
1069 break;
1071 futureChild.Push(node);
1073 if (foundNode != null)
1075 HtmlNode node = null;
1076 while (futureChild.Count != 0)
1078 node = (HtmlNode) futureChild.Pop();
1079 _lastparentnode.RemoveChild(node);
1080 foundNode.AppendChild(node);
1083 else
1085 _lastparentnode.AppendChild(_currentnode);
1089 else
1091 // node has no parent
1092 // node is not a closed node
1094 if (HtmlNode.CanOverlapElement(_currentnode.Name))
1096 // this is a hack: add it as a text node
1097 HtmlNode closenode = CreateNode(HtmlNodeType.Text, _currentnode._outerstartindex);
1098 closenode._outerlength = _currentnode._outerlength;
1099 ((HtmlTextNode) closenode).Text = ((HtmlTextNode) closenode).Text.ToLower();
1100 if (_lastparentnode != null)
1102 _lastparentnode.AppendChild(closenode);
1105 else
1107 if (HtmlNode.IsEmptyElement(_currentnode.Name))
1109 AddError(
1110 HtmlParseErrorCode.EndTagNotRequired,
1111 _currentnode._line, _currentnode._lineposition,
1112 _currentnode._streamposition, _currentnode.OuterHtml,
1113 "End tag </" + _currentnode.Name + "> is not required");
1115 else
1117 // node cannot overlap, node is not empty
1118 AddError(
1119 HtmlParseErrorCode.TagNotOpened,
1120 _currentnode._line, _currentnode._lineposition,
1121 _currentnode._streamposition, _currentnode.OuterHtml,
1122 "Start tag <" + _currentnode.Name + "> was not found");
1123 error = true;
1128 else
1130 if (OptionFixNestedTags)
1132 if (FindResetterNodes(prev, GetResetters(_currentnode.Name)))
1134 AddError(
1135 HtmlParseErrorCode.EndTagInvalidHere,
1136 _currentnode._line, _currentnode._lineposition,
1137 _currentnode._streamposition, _currentnode.OuterHtml,
1138 "End tag </" + _currentnode.Name + "> invalid here");
1139 error = true;
1143 if (!error)
1145 _lastnodes[_currentnode.Name] = prev._prevwithsamename;
1146 prev.CloseNode(_currentnode);
1151 // we close this node, get grandparent
1152 if (!error)
1154 if ((_lastparentnode != null) &&
1155 ((!HtmlNode.IsClosedElement(_currentnode.Name)) ||
1156 (_currentnode._starttag)))
1158 UpdateLastParentNode();
1163 private string CurrentAttributeName()
1165 return _text.Substring(_currentattribute._namestartindex, _currentattribute._namelength);
1168 private string CurrentAttributeValue()
1170 return _text.Substring(_currentattribute._valuestartindex, _currentattribute._valuelength);
1173 private string CurrentNodeInner()
1175 return _text.Substring(_currentnode._innerstartindex, _currentnode._innerlength);
1178 private string CurrentNodeName()
1180 return _text.Substring(_currentnode._namestartindex, _currentnode._namelength);
1183 private string CurrentNodeOuter()
1185 return _text.Substring(_currentnode._outerstartindex, _currentnode._outerlength);
1189 private void DecrementPosition()
1191 _index--;
1192 if (_lineposition == 1)
1194 _lineposition = _maxlineposition;
1195 _line--;
1197 else
1199 _lineposition--;
1203 private HtmlNode FindResetterNode(HtmlNode node, string name)
1205 HtmlNode resetter = (HtmlNode) _lastnodes[name];
1206 if (resetter == null)
1207 return null;
1208 if (resetter.Closed)
1210 return null;
1212 if (resetter._streamposition < node._streamposition)
1214 return null;
1216 return resetter;
1219 private bool FindResetterNodes(HtmlNode node, string[] names)
1221 if (names == null)
1223 return false;
1225 for (int i = 0; i < names.Length; i++)
1227 if (FindResetterNode(node, names[i]) != null)
1229 return true;
1232 return false;
1235 private void FixNestedTag(string name, string[] resetters)
1237 if (resetters == null)
1238 return;
1240 HtmlNode prev;
1242 // if we find a previous unclosed same name node, without a resetter node between, we must close it
1243 prev = (HtmlNode) _lastnodes[name];
1244 if ((prev != null) && (!prev.Closed))
1246 // try to find a resetter node, if found, we do nothing
1247 if (FindResetterNodes(prev, resetters))
1249 return;
1252 // ok we need to close the prev now
1253 // create a fake closer node
1254 HtmlNode close = new HtmlNode(prev.NodeType, this, -1);
1255 close._endnode = close;
1256 prev.CloseNode(close);
1260 private void FixNestedTags()
1262 // we are only interested by start tags, not closing tags
1263 if (!_currentnode._starttag)
1264 return;
1266 string name = CurrentNodeName();
1267 FixNestedTag(name, GetResetters(name));
1270 private string[] GetResetters(string name)
1272 switch (name)
1274 case "li":
1275 return new string[] {"ul"};
1277 case "tr":
1278 return new string[] {"table"};
1280 case "th":
1281 case "td":
1282 return new string[] {"tr", "table"};
1284 default:
1285 return null;
1289 private void IncrementPosition()
1291 if (_crc32 != null)
1293 // REVIEW: should we add some checksum code in DecrementPosition too?
1294 _crc32.AddToCRC32(_c);
1297 _index++;
1298 _maxlineposition = _lineposition;
1299 if (_c == 10)
1301 _lineposition = 1;
1302 _line++;
1304 else
1306 _lineposition++;
1310 private bool NewCheck()
1312 if (_c != '<')
1314 return false;
1316 if (_index < _text.Length)
1318 if (_text[_index] == '%')
1320 switch (_state)
1322 case ParseState.AttributeAfterEquals:
1323 PushAttributeValueStart(_index - 1);
1324 break;
1326 case ParseState.BetweenAttributes:
1327 PushAttributeNameStart(_index - 1);
1328 break;
1330 case ParseState.WhichTag:
1331 PushNodeNameStart(true, _index - 1);
1332 _state = ParseState.Tag;
1333 break;
1335 _oldstate = _state;
1336 _state = ParseState.ServerSideCode;
1337 return true;
1341 if (!PushNodeEnd(_index - 1, true))
1343 // stop parsing
1344 _index = _text.Length;
1345 return true;
1347 _state = ParseState.WhichTag;
1348 if ((_index - 1) <= (_text.Length - 2))
1350 if (_text[_index] == '!')
1352 PushNodeStart(HtmlNodeType.Comment, _index - 1);
1353 PushNodeNameStart(true, _index);
1354 PushNodeNameEnd(_index + 1);
1355 _state = ParseState.Comment;
1356 if (_index < (_text.Length - 2))
1358 if ((_text[_index + 1] == '-') &&
1359 (_text[_index + 2] == '-'))
1361 _fullcomment = true;
1363 else
1365 _fullcomment = false;
1368 return true;
1371 PushNodeStart(HtmlNodeType.Element, _index - 1);
1372 return true;
1375 private void Parse()
1377 int lastquote = 0;
1378 if (OptionComputeChecksum)
1380 _crc32 = new Crc32();
1383 _lastnodes = new Hashtable();
1384 _c = 0;
1385 _fullcomment = false;
1386 _parseerrors = new List<HtmlParseError>();
1387 _line = 1;
1388 _lineposition = 1;
1389 _maxlineposition = 1;
1391 _state = ParseState.Text;
1392 _oldstate = _state;
1393 _documentnode._innerlength = _text.Length;
1394 _documentnode._outerlength = _text.Length;
1395 _remainderOffset = _text.Length;
1397 _lastparentnode = _documentnode;
1398 _currentnode = CreateNode(HtmlNodeType.Text, 0);
1399 _currentattribute = null;
1401 _index = 0;
1402 PushNodeStart(HtmlNodeType.Text, 0);
1403 while (_index < _text.Length)
1405 _c = _text[_index];
1406 IncrementPosition();
1408 switch (_state)
1410 case ParseState.Text:
1411 if (NewCheck())
1412 continue;
1413 break;
1415 case ParseState.WhichTag:
1416 if (NewCheck())
1417 continue;
1418 if (_c == '/')
1420 PushNodeNameStart(false, _index);
1422 else
1424 PushNodeNameStart(true, _index - 1);
1425 DecrementPosition();
1427 _state = ParseState.Tag;
1428 break;
1430 case ParseState.Tag:
1431 if (NewCheck())
1432 continue;
1433 if (IsWhiteSpace(_c))
1435 PushNodeNameEnd(_index - 1);
1436 if (_state != ParseState.Tag)
1437 continue;
1438 _state = ParseState.BetweenAttributes;
1439 continue;
1441 if (_c == '/')
1443 PushNodeNameEnd(_index - 1);
1444 if (_state != ParseState.Tag)
1445 continue;
1446 _state = ParseState.EmptyTag;
1447 continue;
1449 if (_c == '>')
1451 PushNodeNameEnd(_index - 1);
1452 if (_state != ParseState.Tag)
1453 continue;
1454 if (!PushNodeEnd(_index, false))
1456 // stop parsing
1457 _index = _text.Length;
1458 break;
1460 if (_state != ParseState.Tag)
1461 continue;
1462 _state = ParseState.Text;
1463 PushNodeStart(HtmlNodeType.Text, _index);
1465 break;
1467 case ParseState.BetweenAttributes:
1468 if (NewCheck())
1469 continue;
1471 if (IsWhiteSpace(_c))
1472 continue;
1474 if ((_c == '/') || (_c == '?'))
1476 _state = ParseState.EmptyTag;
1477 continue;
1480 if (_c == '>')
1482 if (!PushNodeEnd(_index, false))
1484 // stop parsing
1485 _index = _text.Length;
1486 break;
1489 if (_state != ParseState.BetweenAttributes)
1490 continue;
1491 _state = ParseState.Text;
1492 PushNodeStart(HtmlNodeType.Text, _index);
1493 continue;
1496 PushAttributeNameStart(_index - 1);
1497 _state = ParseState.AttributeName;
1498 break;
1500 case ParseState.EmptyTag:
1501 if (NewCheck())
1502 continue;
1504 if (_c == '>')
1506 if (!PushNodeEnd(_index, true))
1508 // stop parsing
1509 _index = _text.Length;
1510 break;
1513 if (_state != ParseState.EmptyTag)
1514 continue;
1515 _state = ParseState.Text;
1516 PushNodeStart(HtmlNodeType.Text, _index);
1517 continue;
1519 _state = ParseState.BetweenAttributes;
1520 break;
1522 case ParseState.AttributeName:
1523 if (NewCheck())
1524 continue;
1526 if (IsWhiteSpace(_c))
1528 PushAttributeNameEnd(_index - 1);
1529 _state = ParseState.AttributeBeforeEquals;
1530 continue;
1532 if (_c == '=')
1534 PushAttributeNameEnd(_index - 1);
1535 _state = ParseState.AttributeAfterEquals;
1536 continue;
1538 if (_c == '>')
1540 PushAttributeNameEnd(_index - 1);
1541 if (!PushNodeEnd(_index, false))
1543 // stop parsing
1544 _index = _text.Length;
1545 break;
1547 if (_state != ParseState.AttributeName)
1548 continue;
1549 _state = ParseState.Text;
1550 PushNodeStart(HtmlNodeType.Text, _index);
1551 continue;
1553 break;
1555 case ParseState.AttributeBeforeEquals:
1556 if (NewCheck())
1557 continue;
1559 if (IsWhiteSpace(_c))
1560 continue;
1561 if (_c == '>')
1563 if (!PushNodeEnd(_index, false))
1565 // stop parsing
1566 _index = _text.Length;
1567 break;
1569 if (_state != ParseState.AttributeBeforeEquals)
1570 continue;
1571 _state = ParseState.Text;
1572 PushNodeStart(HtmlNodeType.Text, _index);
1573 continue;
1575 if (_c == '=')
1577 _state = ParseState.AttributeAfterEquals;
1578 continue;
1580 // no equals, no whitespace, it's a new attrribute starting
1581 _state = ParseState.BetweenAttributes;
1582 DecrementPosition();
1583 break;
1585 case ParseState.AttributeAfterEquals:
1586 if (NewCheck())
1587 continue;
1589 if (IsWhiteSpace(_c))
1590 continue;
1592 if ((_c == '\'') || (_c == '"'))
1594 _state = ParseState.QuotedAttributeValue;
1595 PushAttributeValueStart(_index, _c);
1596 lastquote = _c;
1597 continue;
1599 if (_c == '>')
1601 if (!PushNodeEnd(_index, false))
1603 // stop parsing
1604 _index = _text.Length;
1605 break;
1607 if (_state != ParseState.AttributeAfterEquals)
1608 continue;
1609 _state = ParseState.Text;
1610 PushNodeStart(HtmlNodeType.Text, _index);
1611 continue;
1613 PushAttributeValueStart(_index - 1);
1614 _state = ParseState.AttributeValue;
1615 break;
1617 case ParseState.AttributeValue:
1618 if (NewCheck())
1619 continue;
1621 if (IsWhiteSpace(_c))
1623 PushAttributeValueEnd(_index - 1);
1624 _state = ParseState.BetweenAttributes;
1625 continue;
1628 if (_c == '>')
1630 PushAttributeValueEnd(_index - 1);
1631 if (!PushNodeEnd(_index, false))
1633 // stop parsing
1634 _index = _text.Length;
1635 break;
1637 if (_state != ParseState.AttributeValue)
1638 continue;
1639 _state = ParseState.Text;
1640 PushNodeStart(HtmlNodeType.Text, _index);
1641 continue;
1643 break;
1645 case ParseState.QuotedAttributeValue:
1646 if (_c == lastquote)
1648 PushAttributeValueEnd(_index - 1);
1649 _state = ParseState.BetweenAttributes;
1650 continue;
1652 if (_c == '<')
1654 if (_index < _text.Length)
1656 if (_text[_index] == '%')
1658 _oldstate = _state;
1659 _state = ParseState.ServerSideCode;
1660 continue;
1664 break;
1666 case ParseState.Comment:
1667 if (_c == '>')
1669 if (_fullcomment)
1671 if ((_text[_index - 2] != '-') ||
1672 (_text[_index - 3] != '-'))
1674 continue;
1677 if (!PushNodeEnd(_index, false))
1679 // stop parsing
1680 _index = _text.Length;
1681 break;
1683 _state = ParseState.Text;
1684 PushNodeStart(HtmlNodeType.Text, _index);
1685 continue;
1687 break;
1689 case ParseState.ServerSideCode:
1690 if (_c == '%')
1692 if (_index < _text.Length)
1694 if (_text[_index] == '>')
1696 switch (_oldstate)
1698 case ParseState.AttributeAfterEquals:
1699 _state = ParseState.AttributeValue;
1700 break;
1702 case ParseState.BetweenAttributes:
1703 PushAttributeNameEnd(_index + 1);
1704 _state = ParseState.BetweenAttributes;
1705 break;
1707 default:
1708 _state = _oldstate;
1709 break;
1711 IncrementPosition();
1715 break;
1717 case ParseState.PcData:
1718 // look for </tag + 1 char
1720 // check buffer end
1721 if ((_currentnode._namelength + 3) <= (_text.Length - (_index - 1)))
1723 if (string.Compare(_text.Substring(_index - 1, _currentnode._namelength + 2),
1724 "</" + _currentnode.Name, true) == 0)
1726 int c = _text[_index - 1 + 2 + _currentnode.Name.Length];
1727 if ((c == '>') || (IsWhiteSpace(c)))
1729 // add the script as a text node
1730 HtmlNode script = CreateNode(HtmlNodeType.Text,
1731 _currentnode._outerstartindex +
1732 _currentnode._outerlength);
1733 script._outerlength = _index - 1 - script._outerstartindex;
1734 _currentnode.AppendChild(script);
1737 PushNodeStart(HtmlNodeType.Element, _index - 1);
1738 PushNodeNameStart(false, _index - 1 + 2);
1739 _state = ParseState.Tag;
1740 IncrementPosition();
1744 break;
1748 // finish the current work
1749 if (_currentnode._namestartindex > 0)
1751 PushNodeNameEnd(_index);
1753 PushNodeEnd(_index, false);
1755 // we don't need this anymore
1756 _lastnodes.Clear();
1759 private void PushAttributeNameEnd(int index)
1761 _currentattribute._namelength = index - _currentattribute._namestartindex;
1762 _currentnode.Attributes.Append(_currentattribute);
1765 private void PushAttributeNameStart(int index)
1767 _currentattribute = CreateAttribute();
1768 _currentattribute._namestartindex = index;
1769 _currentattribute.Line = _line;
1770 _currentattribute._lineposition = _lineposition;
1771 _currentattribute._streamposition = index;
1774 private void PushAttributeValueEnd(int index)
1776 _currentattribute._valuelength = index - _currentattribute._valuestartindex;
1779 private void PushAttributeValueStart(int index)
1781 PushAttributeValueStart(index, 0);
1784 private void PushAttributeValueStart(int index, int quote)
1786 _currentattribute._valuestartindex = index;
1787 if (quote == '\'')
1788 _currentattribute.QuoteType = AttributeValueQuote.SingleQuote;
1791 private bool PushNodeEnd(int index, bool close)
1793 _currentnode._outerlength = index - _currentnode._outerstartindex;
1795 if ((_currentnode._nodetype == HtmlNodeType.Text) ||
1796 (_currentnode._nodetype == HtmlNodeType.Comment))
1798 // forget about void nodes
1799 if (_currentnode._outerlength > 0)
1801 _currentnode._innerlength = _currentnode._outerlength;
1802 _currentnode._innerstartindex = _currentnode._outerstartindex;
1803 if (_lastparentnode != null)
1805 _lastparentnode.AppendChild(_currentnode);
1809 else
1811 if ((_currentnode._starttag) && (_lastparentnode != _currentnode))
1813 // add to parent node
1814 if (_lastparentnode != null)
1816 _lastparentnode.AppendChild(_currentnode);
1819 ReadDocumentEncoding(_currentnode);
1821 // remember last node of this kind
1822 HtmlNode prev = (HtmlNode) _lastnodes[_currentnode.Name];
1823 _currentnode._prevwithsamename = prev;
1824 _lastnodes[_currentnode.Name] = _currentnode;
1826 // change parent?
1827 if ((_currentnode.NodeType == HtmlNodeType.Document) ||
1828 (_currentnode.NodeType == HtmlNodeType.Element))
1830 _lastparentnode = _currentnode;
1833 if (HtmlNode.IsCDataElement(CurrentNodeName()))
1835 _state = ParseState.PcData;
1836 return true;
1839 if ((HtmlNode.IsClosedElement(_currentnode.Name)) ||
1840 (HtmlNode.IsEmptyElement(_currentnode.Name)))
1842 close = true;
1847 if ((close) || (!_currentnode._starttag))
1849 if ((OptionStopperNodeName != null) && (_remainder == null) &&
1850 (string.Compare(_currentnode.Name, OptionStopperNodeName, true) == 0))
1852 _remainderOffset = index;
1853 _remainder = _text.Substring(_remainderOffset);
1854 CloseCurrentNode();
1855 return false; // stop parsing
1857 CloseCurrentNode();
1859 return true;
1862 private void PushNodeNameEnd(int index)
1864 _currentnode._namelength = index - _currentnode._namestartindex;
1865 if (OptionFixNestedTags)
1867 FixNestedTags();
1871 private void PushNodeNameStart(bool starttag, int index)
1873 _currentnode._starttag = starttag;
1874 _currentnode._namestartindex = index;
1877 private void PushNodeStart(HtmlNodeType type, int index)
1879 _currentnode = CreateNode(type, index);
1880 _currentnode._line = _line;
1881 _currentnode._lineposition = _lineposition;
1882 if (type == HtmlNodeType.Element)
1884 _currentnode._lineposition--;
1886 _currentnode._streamposition = index;
1889 private void ReadDocumentEncoding(HtmlNode node)
1891 if (!OptionReadEncoding)
1892 return;
1893 // format is
1894 // <meta http-equiv="content-type" content="text/html;charset=iso-8859-1" />
1896 // when we append a child, we are in node end, so attributes are already populated
1897 if (node._namelength == 4) // quick check, avoids string alloc
1899 if (node.Name == "meta") // all nodes names are lowercase
1901 HtmlAttribute att = node.Attributes["http-equiv"];
1902 if (att != null)
1904 if (string.Compare(att.Value, "content-type", true) == 0)
1906 HtmlAttribute content = node.Attributes["content"];
1907 if (content != null)
1909 string charset = NameValuePairList.GetNameValuePairsValue(content.Value, "charset");
1910 if (charset != null && (charset = charset.Trim()).Length > 0)
1912 _declaredencoding = Encoding.GetEncoding(charset.Trim());
1913 if (_onlyDetectEncoding)
1915 throw new EncodingFoundException(_declaredencoding);
1918 if (_streamencoding != null)
1920 if (_declaredencoding.WindowsCodePage != _streamencoding.WindowsCodePage)
1922 AddError(
1923 HtmlParseErrorCode.CharsetMismatch,
1924 _line, _lineposition,
1925 _index, node.OuterHtml,
1926 "Encoding mismatch between StreamEncoding: " +
1927 _streamencoding.WebName + " and DeclaredEncoding: " +
1928 _declaredencoding.WebName);
1939 #endregion
1941 #region Nested type: ParseState
1943 private enum ParseState
1945 Text,
1946 WhichTag,
1947 Tag,
1948 BetweenAttributes,
1949 EmptyTag,
1950 AttributeName,
1951 AttributeBeforeEquals,
1952 AttributeAfterEquals,
1953 AttributeValue,
1954 Comment,
1955 QuotedAttributeValue,
1956 ServerSideCode,
1957 PcData
1960 #endregion