[netcore] Remove local copy of static alc resolve methods
[mono-project.git] / docs / HtmlAgilityPack / HtmlNode.cs
blobecc915e13df22ab8e9696c2f8a022a4f6fdf4c41
1 // HtmlAgilityPack V1.0 - Simon Mourier <simon underscore mourier at hotmail dot com>
2 using System;
3 using System.Collections;
4 using System.Collections.Generic;
5 using System.Diagnostics;
6 using System.IO;
7 using System.Xml;
8 using System.Xml.XPath;
10 namespace HtmlAgilityPack
12 /// <summary>
13 /// Represents an HTML node.
14 /// </summary>
15 [DebuggerDisplay("Name: {OriginalName}}")]
16 public class HtmlNode : IXPathNavigable
18 #region Fields
20 internal HtmlAttributeCollection _attributes;
21 internal HtmlNodeCollection _childnodes;
22 internal HtmlNode _endnode;
24 internal bool _innerchanged;
25 internal string _innerhtml;
26 internal int _innerlength;
27 internal int _innerstartindex;
28 internal int _line;
29 internal int _lineposition;
30 private string _name;
31 internal int _namelength;
32 internal int _namestartindex;
33 internal HtmlNode _nextnode;
34 internal HtmlNodeType _nodetype;
35 internal bool _outerchanged;
36 internal string _outerhtml;
37 internal int _outerlength;
38 internal int _outerstartindex;
39 internal HtmlDocument _ownerdocument;
40 internal HtmlNode _parentnode;
41 internal HtmlNode _prevnode;
42 internal HtmlNode _prevwithsamename;
43 internal bool _starttag;
44 internal int _streamposition;
46 #endregion
48 #region Static Members
50 /// <summary>
51 /// Gets the name of a comment node. It is actually defined as '#comment'.
52 /// </summary>
53 public static readonly string HtmlNodeTypeNameComment = "#comment";
55 /// <summary>
56 /// Gets the name of the document node. It is actually defined as '#document'.
57 /// </summary>
58 public static readonly string HtmlNodeTypeNameDocument = "#document";
60 /// <summary>
61 /// Gets the name of a text node. It is actually defined as '#text'.
62 /// </summary>
63 public static readonly string HtmlNodeTypeNameText = "#text";
65 /// <summary>
66 /// Gets a collection of flags that define specific behaviors for specific element nodes.
67 /// The table contains a DictionaryEntry list with the lowercase tag name as the Key, and a combination of HtmlElementFlags as the Value.
68 /// </summary>
69 public static Hashtable ElementsFlags;
71 #endregion
73 #region Constructors
75 /// <summary>
76 /// Initialize HtmlNode. Builds a list of all tags that have special allowances
77 /// </summary>
78 static HtmlNode()
80 // tags whose content may be anything
81 ElementsFlags = new Hashtable();
82 ElementsFlags.Add("script", HtmlElementFlag.CData);
83 ElementsFlags.Add("style", HtmlElementFlag.CData);
84 ElementsFlags.Add("noxhtml", HtmlElementFlag.CData);
86 // tags that can not contain other tags
87 ElementsFlags.Add("base", HtmlElementFlag.Empty);
88 ElementsFlags.Add("link", HtmlElementFlag.Empty);
89 ElementsFlags.Add("meta", HtmlElementFlag.Empty);
90 ElementsFlags.Add("isindex", HtmlElementFlag.Empty);
91 ElementsFlags.Add("hr", HtmlElementFlag.Empty);
92 ElementsFlags.Add("col", HtmlElementFlag.Empty);
93 ElementsFlags.Add("img", HtmlElementFlag.Empty);
94 ElementsFlags.Add("param", HtmlElementFlag.Empty);
95 ElementsFlags.Add("embed", HtmlElementFlag.Empty);
96 ElementsFlags.Add("frame", HtmlElementFlag.Empty);
97 ElementsFlags.Add("wbr", HtmlElementFlag.Empty);
98 ElementsFlags.Add("bgsound", HtmlElementFlag.Empty);
99 ElementsFlags.Add("spacer", HtmlElementFlag.Empty);
100 ElementsFlags.Add("keygen", HtmlElementFlag.Empty);
101 ElementsFlags.Add("area", HtmlElementFlag.Empty);
102 ElementsFlags.Add("input", HtmlElementFlag.Empty);
103 ElementsFlags.Add("basefont", HtmlElementFlag.Empty);
105 ElementsFlags.Add("form", HtmlElementFlag.CanOverlap | HtmlElementFlag.Empty);
107 // they sometimes contain, and sometimes they don 't...
108 ElementsFlags.Add("option", HtmlElementFlag.Empty);
110 // tag whose closing tag is equivalent to open tag:
111 // <p>bla</p>bla will be transformed into <p>bla</p>bla
112 // <p>bla<p>bla will be transformed into <p>bla<p>bla and not <p>bla></p><p>bla</p> or <p>bla<p>bla</p></p>
113 //<br> see above
114 ElementsFlags.Add("br", HtmlElementFlag.Empty | HtmlElementFlag.Closed);
115 ElementsFlags.Add("p", HtmlElementFlag.Empty | HtmlElementFlag.Closed);
118 /// <summary>
119 /// Initializes HtmlNode, providing type, owner and where it exists in a collection
120 /// </summary>
121 /// <param name="type"></param>
122 /// <param name="ownerdocument"></param>
123 /// <param name="index"></param>
124 public HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
126 _nodetype = type;
127 _ownerdocument = ownerdocument;
128 _outerstartindex = index;
130 switch (type)
132 case HtmlNodeType.Comment:
133 Name = HtmlNodeTypeNameComment;
134 _endnode = this;
135 break;
137 case HtmlNodeType.Document:
138 Name = HtmlNodeTypeNameDocument;
139 _endnode = this;
140 break;
142 case HtmlNodeType.Text:
143 Name = HtmlNodeTypeNameText;
144 _endnode = this;
145 break;
148 if (_ownerdocument._openednodes != null)
150 if (!Closed)
152 // we use the index as the key
154 // -1 means the node comes from public
155 if (-1 != index)
157 _ownerdocument._openednodes.Add(index, this);
162 if ((-1 != index) || (type == HtmlNodeType.Comment) || (type == HtmlNodeType.Text)) return;
163 // innerhtml and outerhtml must be calculated
164 _outerchanged = true;
165 _innerchanged = true;
168 #endregion
170 #region Properties
172 /// <summary>
173 /// Gets the collection of HTML attributes for this node. May not be null.
174 /// </summary>
175 public HtmlAttributeCollection Attributes
179 if (!HasAttributes)
181 _attributes = new HtmlAttributeCollection(this);
183 return _attributes;
185 internal set { _attributes = value; }
188 /// <summary>
189 /// Gets all the children of the node.
190 /// </summary>
191 public HtmlNodeCollection ChildNodes
195 if (_childnodes == null)
197 _childnodes = new HtmlNodeCollection(this);
199 return _childnodes;
201 internal set { _childnodes = value; }
204 /// <summary>
205 /// Gets a value indicating if this node has been closed or not.
206 /// </summary>
207 public bool Closed
209 get { return (_endnode != null); }
212 /// <summary>
213 /// Gets the collection of HTML attributes for the closing tag. May not be null.
214 /// </summary>
215 public HtmlAttributeCollection ClosingAttributes
219 if (!HasClosingAttributes)
221 return new HtmlAttributeCollection(this);
223 return _endnode.Attributes;
227 internal HtmlNode EndNode
229 get { return _endnode; }
232 /// <summary>
233 /// Gets the first child of the node.
234 /// </summary>
235 public HtmlNode FirstChild
239 if (!HasChildNodes)
241 return null;
243 return _childnodes[0];
247 /// <summary>
248 /// Gets a value indicating whether the current node has any attributes.
249 /// </summary>
250 public bool HasAttributes
254 if (_attributes == null)
256 return false;
259 if (_attributes.Count <= 0)
261 return false;
263 return true;
267 /// <summary>
268 /// Gets a value indicating whether this node has any child nodes.
269 /// </summary>
270 public bool HasChildNodes
274 if (_childnodes == null)
276 return false;
279 if (_childnodes.Count <= 0)
281 return false;
283 return true;
287 /// <summary>
288 /// Gets a value indicating whether the current node has any attributes on the closing tag.
289 /// </summary>
290 public bool HasClosingAttributes
294 if ((_endnode == null) || (_endnode == this))
296 return false;
299 if (_endnode._attributes == null)
301 return false;
304 if (_endnode._attributes.Count <= 0)
306 return false;
308 return true;
312 /// <summary>
313 /// Gets or sets the value of the 'id' HTML attribute. The document must have been parsed using the OptionUseIdAttribute set to true.
314 /// </summary>
315 public string Id
319 if (_ownerdocument._nodesid == null)
321 throw new Exception(HtmlDocument.HtmlExceptionUseIdAttributeFalse);
323 return GetId();
327 if (_ownerdocument._nodesid == null)
329 throw new Exception(HtmlDocument.HtmlExceptionUseIdAttributeFalse);
332 if (value == null)
334 throw new ArgumentNullException("value");
336 SetId(value);
340 /// <summary>
341 /// Gets or Sets the HTML between the start and end tags of the object.
342 /// </summary>
343 public virtual string InnerHtml
347 if (_innerchanged)
349 _innerhtml = WriteContentTo();
350 _innerchanged = false;
351 return _innerhtml;
353 if (_innerhtml != null)
355 return _innerhtml;
358 if (_innerstartindex < 0)
360 return string.Empty;
363 return _ownerdocument._text.Substring(_innerstartindex, _innerlength);
367 HtmlDocument doc = new HtmlDocument();
368 doc.LoadHtml(value);
370 RemoveAllChildren();
371 AppendChildren(doc.DocumentNode.ChildNodes);
375 /// <summary>
376 /// Gets or Sets the text between the start and end tags of the object.
377 /// </summary>
378 public virtual string InnerText
382 if (_nodetype == HtmlNodeType.Text)
384 return ((HtmlTextNode) this).Text;
387 if (_nodetype == HtmlNodeType.Comment)
389 return ((HtmlCommentNode) this).Comment;
392 // note: right now, this method is *slow*, because we recompute everything.
393 // it could be optimised like innerhtml
394 if (!HasChildNodes)
396 return string.Empty;
399 string s = null;
400 foreach (HtmlNode node in ChildNodes)
402 s += node.InnerText;
404 return s;
408 /// <summary>
409 /// Gets the last child of the node.
410 /// </summary>
411 public HtmlNode LastChild
415 return !HasChildNodes ? null : _childnodes[_childnodes.Count - 1];
419 /// <summary>
420 /// Gets the line number of this node in the document.
421 /// </summary>
422 public int Line
424 get { return _line; }
425 internal set { _line = value; }
428 /// <summary>
429 /// Gets the column number of this node in the document.
430 /// </summary>
431 public int LinePosition
433 get { return _lineposition; }
434 internal set { _lineposition = value; }
437 /// <summary>
438 /// Gets or sets this node's name.
439 /// </summary>
440 public string Name
444 if (_name == null)
446 Name = _ownerdocument._text.Substring(_namestartindex, _namelength);
448 return _name != null ? _name.ToLower() : string.Empty;
450 set { _name = value; }
453 /// <summary>
454 /// Gets the HTML node immediately following this element.
455 /// </summary>
456 public HtmlNode NextSibling
458 get { return _nextnode; }
459 internal set { _nextnode = value; }
462 /// <summary>
463 /// Gets the type of this node.
464 /// </summary>
465 public HtmlNodeType NodeType
467 get { return _nodetype; }
468 internal set { _nodetype = value; }
471 /// <summary>
472 /// The original unaltered name of the tag
473 /// </summary>
474 public string OriginalName
476 get { return _name; }
479 /// <summary>
480 /// Gets or Sets the object and its content in HTML.
481 /// </summary>
482 public virtual string OuterHtml
486 if (_outerchanged)
488 _outerhtml = WriteTo();
489 _outerchanged = false;
490 return _outerhtml;
493 if (_outerhtml != null)
495 return _outerhtml;
498 if (_outerstartindex < 0)
500 return string.Empty;
503 return _ownerdocument._text.Substring(_outerstartindex, _outerlength);
507 /// <summary>
508 /// Gets the <see cref="HtmlDocument"/> to which this node belongs.
509 /// </summary>
510 public HtmlDocument OwnerDocument
512 get { return _ownerdocument; }
513 internal set { _ownerdocument = value; }
516 /// <summary>
517 /// Gets the parent of this node (for nodes that can have parents).
518 /// </summary>
519 public HtmlNode ParentNode
521 get { return _parentnode; }
522 internal set { _parentnode = value; }
525 /// <summary>
526 /// Gets the node immediately preceding this node.
527 /// </summary>
528 public HtmlNode PreviousSibling
530 get { return _prevnode; }
531 internal set { _prevnode = value; }
534 /// <summary>
535 /// Gets the stream position of this node in the document, relative to the start of the document.
536 /// </summary>
537 public int StreamPosition
539 get { return _streamposition; }
542 /// <summary>
543 /// Gets a valid XPath string that points to this node
544 /// </summary>
545 public string XPath
549 string basePath = (ParentNode == null || ParentNode.NodeType == HtmlNodeType.Document)
550 ? "/"
551 : ParentNode.XPath + "/";
552 return basePath + GetRelativeXpath();
556 #endregion
558 #region IXPathNavigable Members
560 /// <summary>
561 /// Creates a new XPathNavigator object for navigating this HTML node.
562 /// </summary>
563 /// <returns>An XPathNavigator object. The XPathNavigator is positioned on the node from which the method was called. It is not positioned on the root of the document.</returns>
564 public XPathNavigator CreateNavigator()
566 return new HtmlNodeNavigator(_ownerdocument, this);
569 #endregion
571 #region Public Methods
573 /// <summary>
574 /// Determines if an element node can be kept overlapped.
575 /// </summary>
576 /// <param name="name">The name of the element node to check. May not be <c>null</c>.</param>
577 /// <returns>true if the name is the name of an element node that can be kept overlapped, <c>false</c> otherwise.</returns>
578 public static bool CanOverlapElement(string name)
580 if (name == null)
582 throw new ArgumentNullException("name");
585 object flag = ElementsFlags[name.ToLower()];
586 if (flag == null)
588 return false;
590 return (((HtmlElementFlag) flag) & HtmlElementFlag.CanOverlap) != 0;
593 /// <summary>
594 /// Creates an HTML node from a string representing literal HTML.
595 /// </summary>
596 /// <param name="html">The HTML text.</param>
597 /// <returns>The newly created node instance.</returns>
598 public static HtmlNode CreateNode(string html)
600 // REVIEW: this is *not* optimum...
601 HtmlDocument doc = new HtmlDocument();
602 doc.LoadHtml(html);
603 return doc.DocumentNode.FirstChild;
606 /// <summary>
607 /// Determines if an element node is a CDATA element node.
608 /// </summary>
609 /// <param name="name">The name of the element node to check. May not be null.</param>
610 /// <returns>true if the name is the name of a CDATA element node, false otherwise.</returns>
611 public static bool IsCDataElement(string name)
613 if (name == null)
615 throw new ArgumentNullException("name");
618 object flag = ElementsFlags[name.ToLower()];
619 if (flag == null)
621 return false;
623 return (((HtmlElementFlag) flag) & HtmlElementFlag.CData) != 0;
626 /// <summary>
627 /// Determines if an element node is closed.
628 /// </summary>
629 /// <param name="name">The name of the element node to check. May not be null.</param>
630 /// <returns>true if the name is the name of a closed element node, false otherwise.</returns>
631 public static bool IsClosedElement(string name)
633 if (name == null)
635 throw new ArgumentNullException("name");
638 object flag = ElementsFlags[name.ToLower()];
639 if (flag == null)
641 return false;
643 return (((HtmlElementFlag) flag) & HtmlElementFlag.Closed) != 0;
646 /// <summary>
647 /// Determines if an element node is defined as empty.
648 /// </summary>
649 /// <param name="name">The name of the element node to check. May not be null.</param>
650 /// <returns>true if the name is the name of an empty element node, false otherwise.</returns>
651 public static bool IsEmptyElement(string name)
653 if (name == null)
655 throw new ArgumentNullException("name");
658 if (name.Length == 0)
660 return true;
663 // <!DOCTYPE ...
664 if ('!' == name[0])
666 return true;
669 // <?xml ...
670 if ('?' == name[0])
672 return true;
675 object flag = ElementsFlags[name.ToLower()];
676 if (flag == null)
678 return false;
680 return (((HtmlElementFlag) flag) & HtmlElementFlag.Empty) != 0;
683 /// <summary>
684 /// Determines if a text corresponds to the closing tag of an node that can be kept overlapped.
685 /// </summary>
686 /// <param name="text">The text to check. May not be null.</param>
687 /// <returns>true or false.</returns>
688 public static bool IsOverlappedClosingElement(string text)
690 if (text == null)
692 throw new ArgumentNullException("text");
694 // min is </x>: 4
695 if (text.Length <= 4)
696 return false;
698 if ((text[0] != '<') ||
699 (text[text.Length - 1] != '>') ||
700 (text[1] != '/'))
701 return false;
703 string name = text.Substring(2, text.Length - 3);
704 return CanOverlapElement(name);
707 /// <summary>
708 /// Returns a collection of all ancestor nodes of this element.
709 /// </summary>
710 /// <returns></returns>
711 public IEnumerable<HtmlNode> Ancestors()
713 HtmlNode node = ParentNode;
714 while (node.ParentNode != null)
716 yield return node.ParentNode;
717 node = node.ParentNode;
721 /// <summary>
722 /// Get Ancestors with matching name
723 /// </summary>
724 /// <param name="name"></param>
725 /// <returns></returns>
726 public IEnumerable<HtmlNode> Ancestors(string name)
728 for (HtmlNode n = ParentNode; n != null; n = n.ParentNode)
729 if (n.Name == name)
730 yield return n;
733 /// <summary>
734 /// Returns a collection of all ancestor nodes of this element.
735 /// </summary>
736 /// <returns></returns>
737 public IEnumerable<HtmlNode> AncestorsAndSelf()
739 for (HtmlNode n = this; n != null; n = n.ParentNode)
740 yield return n;
743 /// <summary>
744 /// Gets all anscestor nodes and the current node
745 /// </summary>
746 /// <param name="name"></param>
747 /// <returns></returns>
748 public IEnumerable<HtmlNode> AncestorsAndSelf(string name)
750 for (HtmlNode n = this; n != null; n = n.ParentNode)
751 if (n.Name == name)
752 yield return n;
755 /// <summary>
756 /// Adds the specified node to the end of the list of children of this node.
757 /// </summary>
758 /// <param name="newChild">The node to add. May not be null.</param>
759 /// <returns>The node added.</returns>
760 public HtmlNode AppendChild(HtmlNode newChild)
762 if (newChild == null)
764 throw new ArgumentNullException("newChild");
767 ChildNodes.Append(newChild);
768 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
769 _outerchanged = true;
770 _innerchanged = true;
771 return newChild;
774 /// <summary>
775 /// Adds the specified node to the end of the list of children of this node.
776 /// </summary>
777 /// <param name="newChildren">The node list to add. May not be null.</param>
778 public void AppendChildren(HtmlNodeCollection newChildren)
780 if (newChildren == null)
781 throw new ArgumentNullException("newChildrend");
783 foreach (HtmlNode newChild in newChildren)
785 AppendChild(newChild);
789 /// <summary>
790 /// Gets all Attributes with name
791 /// </summary>
792 /// <param name="name"></param>
793 /// <returns></returns>
794 public IEnumerable<HtmlAttribute> ChildAttributes(string name)
796 return Attributes.AttributesWithName(name);
799 /// <summary>
800 /// Creates a duplicate of the node
801 /// </summary>
802 /// <returns></returns>
803 public HtmlNode Clone()
805 return CloneNode(true);
808 /// <summary>
809 /// Creates a duplicate of the node and changes its name at the same time.
810 /// </summary>
811 /// <param name="newName">The new name of the cloned node. May not be <c>null</c>.</param>
812 /// <returns>The cloned node.</returns>
813 public HtmlNode CloneNode(string newName)
815 return CloneNode(newName, true);
818 /// <summary>
819 /// Creates a duplicate of the node and changes its name at the same time.
820 /// </summary>
821 /// <param name="newName">The new name of the cloned node. May not be null.</param>
822 /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
823 /// <returns>The cloned node.</returns>
824 public HtmlNode CloneNode(string newName, bool deep)
826 if (newName == null)
828 throw new ArgumentNullException("newName");
831 HtmlNode node = CloneNode(deep);
832 node.Name = newName;
833 return node;
836 /// <summary>
837 /// Creates a duplicate of the node.
838 /// </summary>
839 /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
840 /// <returns>The cloned node.</returns>
841 public HtmlNode CloneNode(bool deep)
843 HtmlNode node = _ownerdocument.CreateNode(_nodetype);
844 node.Name = Name;
846 switch (_nodetype)
848 case HtmlNodeType.Comment:
849 ((HtmlCommentNode) node).Comment = ((HtmlCommentNode) this).Comment;
850 return node;
852 case HtmlNodeType.Text:
853 ((HtmlTextNode) node).Text = ((HtmlTextNode) this).Text;
854 return node;
857 // attributes
858 if (HasAttributes)
860 foreach (HtmlAttribute att in _attributes)
862 HtmlAttribute newatt = att.Clone();
863 node.Attributes.Append(newatt);
867 // closing attributes
868 if (HasClosingAttributes)
870 node._endnode = _endnode.CloneNode(false);
871 foreach (HtmlAttribute att in _endnode._attributes)
873 HtmlAttribute newatt = att.Clone();
874 node._endnode._attributes.Append(newatt);
877 if (!deep)
879 return node;
882 if (!HasChildNodes)
884 return node;
887 // child nodes
888 foreach (HtmlNode child in _childnodes)
890 HtmlNode newchild = child.Clone();
891 node.AppendChild(newchild);
893 return node;
896 /// <summary>
897 /// Creates a duplicate of the node and the subtree under it.
898 /// </summary>
899 /// <param name="node">The node to duplicate. May not be <c>null</c>.</param>
900 public void CopyFrom(HtmlNode node)
902 CopyFrom(node, true);
905 /// <summary>
906 /// Creates a duplicate of the node.
907 /// </summary>
908 /// <param name="node">The node to duplicate. May not be <c>null</c>.</param>
909 /// <param name="deep">true to recursively clone the subtree under the specified node, false to clone only the node itself.</param>
910 public void CopyFrom(HtmlNode node, bool deep)
912 if (node == null)
914 throw new ArgumentNullException("node");
917 Attributes.RemoveAll();
918 if (node.HasAttributes)
920 foreach (HtmlAttribute att in node.Attributes)
922 SetAttributeValue(att.Name, att.Value);
926 if (!deep)
928 RemoveAllChildren();
929 if (node.HasChildNodes)
931 foreach (HtmlNode child in node.ChildNodes)
933 AppendChild(child.CloneNode(true));
939 /// <summary>
940 /// Creates an XPathNavigator using the root of this document.
941 /// </summary>
942 /// <returns></returns>
943 public XPathNavigator CreateRootNavigator()
945 return new HtmlNodeNavigator(_ownerdocument, _ownerdocument.DocumentNode);
948 /// <summary>
949 /// Gets all Descendant nodes for this node and each of child nodes
950 /// </summary>
951 /// <returns></returns>
952 public IEnumerable<HtmlNode> DescendantNodes()
954 foreach (HtmlNode node in ChildNodes)
956 yield return node;
957 foreach (HtmlNode descendant in node.DescendantNodes())
958 yield return descendant;
962 /// <summary>
963 /// Returns a collection of all descendant nodes of this element, in document order
964 /// </summary>
965 /// <returns></returns>
966 public IEnumerable<HtmlNode> DescendantNodesAndSelf()
968 return DescendantsAndSelf();
971 /// <summary>
972 /// Gets all Descendant nodes in enumerated list
973 /// </summary>
974 /// <returns></returns>
975 public IEnumerable<HtmlNode> Descendants()
977 foreach (HtmlNode node in DescendantNodes())
979 yield return node;
983 /// <summary>
984 /// Get all descendant nodes with matching name
985 /// </summary>
986 /// <param name="name"></param>
987 /// <returns></returns>
988 public IEnumerable<HtmlNode> Descendants(string name)
990 foreach (HtmlNode node in Descendants())
991 if (node.Name == name)
992 yield return node;
995 /// <summary>
996 /// Returns a collection of all descendant nodes of this element, in document order
997 /// </summary>
998 /// <returns></returns>
999 public IEnumerable<HtmlNode> DescendantsAndSelf()
1001 yield return this;
1002 foreach (HtmlNode n in DescendantNodes())
1004 HtmlNode el = n;
1005 if (el != null)
1006 yield return el;
1010 /// <summary>
1011 /// Gets all descendant nodes including this node
1012 /// </summary>
1013 /// <param name="name"></param>
1014 /// <returns></returns>
1015 public IEnumerable<HtmlNode> DescendantsAndSelf(string name)
1017 yield return this;
1018 foreach (HtmlNode node in Descendants())
1019 if (node.Name == name)
1020 yield return node;
1023 /// <summary>
1024 /// Gets first generation child node matching name
1025 /// </summary>
1026 /// <param name="name"></param>
1027 /// <returns></returns>
1028 public HtmlNode Element(string name)
1030 foreach (HtmlNode node in ChildNodes)
1031 if (node.Name == name)
1032 return node;
1033 return null;
1036 /// <summary>
1037 /// Gets matching first generation child nodes matching name
1038 /// </summary>
1039 /// <param name="name"></param>
1040 /// <returns></returns>
1041 public IEnumerable<HtmlNode> Elements(string name)
1043 foreach (HtmlNode node in ChildNodes)
1044 if (node.Name == name)
1045 yield return node;
1048 /// <summary>
1049 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1050 /// </summary>
1051 /// <param name="name">The name of the attribute to get. May not be <c>null</c>.</param>
1052 /// <param name="def">The default value to return if not found.</param>
1053 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1054 public string GetAttributeValue(string name, string def)
1056 if (name == null)
1058 throw new ArgumentNullException("name");
1061 if (!HasAttributes)
1063 return def;
1065 HtmlAttribute att = Attributes[name];
1066 if (att == null)
1068 return def;
1070 return att.Value;
1073 /// <summary>
1074 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1075 /// </summary>
1076 /// <param name="name">The name of the attribute to get. May not be <c>null</c>.</param>
1077 /// <param name="def">The default value to return if not found.</param>
1078 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1079 public int GetAttributeValue(string name, int def)
1081 if (name == null)
1083 throw new ArgumentNullException("name");
1086 if (!HasAttributes)
1088 return def;
1090 HtmlAttribute att = Attributes[name];
1091 if (att == null)
1093 return def;
1097 return Convert.ToInt32(att.Value);
1099 catch
1101 return def;
1105 /// <summary>
1106 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1107 /// </summary>
1108 /// <param name="name">The name of the attribute to get. May not be <c>null</c>.</param>
1109 /// <param name="def">The default value to return if not found.</param>
1110 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1111 public bool GetAttributeValue(string name, bool def)
1113 if (name == null)
1115 throw new ArgumentNullException("name");
1118 if (!HasAttributes)
1120 return def;
1122 HtmlAttribute att = Attributes[name];
1123 if (att == null)
1125 return def;
1129 return Convert.ToBoolean(att.Value);
1131 catch
1133 return def;
1137 /// <summary>
1138 /// Inserts the specified node immediately after the specified reference node.
1139 /// </summary>
1140 /// <param name="newChild">The node to insert. May not be <c>null</c>.</param>
1141 /// <param name="refChild">The node that is the reference node. The newNode is placed after the refNode.</param>
1142 /// <returns>The node being inserted.</returns>
1143 public HtmlNode InsertAfter(HtmlNode newChild, HtmlNode refChild)
1145 if (newChild == null)
1147 throw new ArgumentNullException("newChild");
1150 if (refChild == null)
1152 return PrependChild(newChild);
1155 if (newChild == refChild)
1157 return newChild;
1160 int index = -1;
1162 if (_childnodes != null)
1164 index = _childnodes[refChild];
1166 if (index == -1)
1168 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
1171 if (_childnodes != null) _childnodes.Insert(index + 1, newChild);
1173 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
1174 _outerchanged = true;
1175 _innerchanged = true;
1176 return newChild;
1179 /// <summary>
1180 /// Inserts the specified node immediately before the specified reference node.
1181 /// </summary>
1182 /// <param name="newChild">The node to insert. May not be <c>null</c>.</param>
1183 /// <param name="refChild">The node that is the reference node. The newChild is placed before this node.</param>
1184 /// <returns>The node being inserted.</returns>
1185 public HtmlNode InsertBefore(HtmlNode newChild, HtmlNode refChild)
1187 if (newChild == null)
1189 throw new ArgumentNullException("newChild");
1192 if (refChild == null)
1194 return AppendChild(newChild);
1197 if (newChild == refChild)
1199 return newChild;
1202 int index = -1;
1204 if (_childnodes != null)
1206 index = _childnodes[refChild];
1209 if (index == -1)
1211 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
1214 if (_childnodes != null) _childnodes.Insert(index, newChild);
1216 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
1217 _outerchanged = true;
1218 _innerchanged = true;
1219 return newChild;
1222 /// <summary>
1223 /// Adds the specified node to the beginning of the list of children of this node.
1224 /// </summary>
1225 /// <param name="newChild">The node to add. May not be <c>null</c>.</param>
1226 /// <returns>The node added.</returns>
1227 public HtmlNode PrependChild(HtmlNode newChild)
1229 if (newChild == null)
1231 throw new ArgumentNullException("newChild");
1233 ChildNodes.Prepend(newChild);
1234 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
1235 _outerchanged = true;
1236 _innerchanged = true;
1237 return newChild;
1240 /// <summary>
1241 /// Adds the specified node list to the beginning of the list of children of this node.
1242 /// </summary>
1243 /// <param name="newChildren">The node list to add. May not be <c>null</c>.</param>
1244 public void PrependChildren(HtmlNodeCollection newChildren)
1246 if (newChildren == null)
1248 throw new ArgumentNullException("newChildren");
1251 foreach (HtmlNode newChild in newChildren)
1253 PrependChild(newChild);
1257 /// <summary>
1258 /// Removes node from parent collection
1259 /// </summary>
1260 public void Remove()
1262 if (ParentNode != null)
1263 ParentNode.ChildNodes.Remove(this);
1266 /// <summary>
1267 /// Removes all the children and/or attributes of the current node.
1268 /// </summary>
1269 public void RemoveAll()
1271 RemoveAllChildren();
1273 if (HasAttributes)
1275 _attributes.Clear();
1278 if ((_endnode != null) && (_endnode != this))
1280 if (_endnode._attributes != null)
1282 _endnode._attributes.Clear();
1285 _outerchanged = true;
1286 _innerchanged = true;
1289 /// <summary>
1290 /// Removes all the children of the current node.
1291 /// </summary>
1292 public void RemoveAllChildren()
1294 if (!HasChildNodes)
1296 return;
1299 if (_ownerdocument.OptionUseIdAttribute)
1301 // remove nodes from id list
1302 foreach (HtmlNode node in _childnodes)
1304 _ownerdocument.SetIdForNode(null, node.GetId());
1307 _childnodes.Clear();
1308 _outerchanged = true;
1309 _innerchanged = true;
1312 /// <summary>
1313 /// Removes the specified child node.
1314 /// </summary>
1315 /// <param name="oldChild">The node being removed. May not be <c>null</c>.</param>
1316 /// <returns>The node removed.</returns>
1317 public HtmlNode RemoveChild(HtmlNode oldChild)
1319 if (oldChild == null)
1321 throw new ArgumentNullException("oldChild");
1324 int index = -1;
1326 if (_childnodes != null)
1328 index = _childnodes[oldChild];
1331 if (index == -1)
1333 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
1336 if (_childnodes != null)
1337 _childnodes.Remove(index);
1339 _ownerdocument.SetIdForNode(null, oldChild.GetId());
1340 _outerchanged = true;
1341 _innerchanged = true;
1342 return oldChild;
1345 /// <summary>
1346 /// Removes the specified child node.
1347 /// </summary>
1348 /// <param name="oldChild">The node being removed. May not be <c>null</c>.</param>
1349 /// <param name="keepGrandChildren">true to keep grand children of the node, false otherwise.</param>
1350 /// <returns>The node removed.</returns>
1351 public HtmlNode RemoveChild(HtmlNode oldChild, bool keepGrandChildren)
1353 if (oldChild == null)
1355 throw new ArgumentNullException("oldChild");
1358 if ((oldChild._childnodes != null) && keepGrandChildren)
1360 // get prev sibling
1361 HtmlNode prev = oldChild.PreviousSibling;
1363 // reroute grand children to ourselves
1364 foreach (HtmlNode grandchild in oldChild._childnodes)
1366 InsertAfter(grandchild, prev);
1369 RemoveChild(oldChild);
1370 _outerchanged = true;
1371 _innerchanged = true;
1372 return oldChild;
1375 /// <summary>
1376 /// Replaces the child node oldChild with newChild node.
1377 /// </summary>
1378 /// <param name="newChild">The new node to put in the child list.</param>
1379 /// <param name="oldChild">The node being replaced in the list.</param>
1380 /// <returns>The node replaced.</returns>
1381 public HtmlNode ReplaceChild(HtmlNode newChild, HtmlNode oldChild)
1383 if (newChild == null)
1385 return RemoveChild(oldChild);
1388 if (oldChild == null)
1390 return AppendChild(newChild);
1393 int index = -1;
1395 if (_childnodes != null)
1397 index = _childnodes[oldChild];
1400 if (index == -1)
1402 throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
1405 if (_childnodes != null) _childnodes.Replace(index, newChild);
1407 _ownerdocument.SetIdForNode(null, oldChild.GetId());
1408 _ownerdocument.SetIdForNode(newChild, newChild.GetId());
1409 _outerchanged = true;
1410 _innerchanged = true;
1411 return newChild;
1414 /// <summary>
1415 /// Selects a list of nodes matching the <see cref="XPath"/> expression.
1416 /// </summary>
1417 /// <param name="xpath">The XPath expression.</param>
1418 /// <returns>An <see cref="HtmlNodeCollection"/> containing a collection of nodes matching the <see cref="XPath"/> query, or <c>null</c> if no node matched the XPath expression.</returns>
1419 public HtmlNodeCollection SelectNodes(string xpath)
1421 HtmlNodeCollection list = new HtmlNodeCollection(null);
1423 HtmlNodeNavigator nav = new HtmlNodeNavigator(_ownerdocument, this);
1424 XPathNodeIterator it = nav.Select(xpath);
1425 while (it.MoveNext())
1427 HtmlNodeNavigator n = (HtmlNodeNavigator) it.Current;
1428 list.Add(n.CurrentNode);
1430 if (list.Count == 0)
1432 return null;
1434 return list;
1437 /// <summary>
1438 /// Selects the first XmlNode that matches the XPath expression.
1439 /// </summary>
1440 /// <param name="xpath">The XPath expression. May not be null.</param>
1441 /// <returns>The first <see cref="HtmlNode"/> that matches the XPath query or a null reference if no matching node was found.</returns>
1442 public HtmlNode SelectSingleNode(string xpath)
1444 if (xpath == null)
1446 throw new ArgumentNullException("xpath");
1449 HtmlNodeNavigator nav = new HtmlNodeNavigator(_ownerdocument, this);
1450 XPathNodeIterator it = nav.Select(xpath);
1451 if (!it.MoveNext())
1453 return null;
1456 HtmlNodeNavigator node = (HtmlNodeNavigator) it.Current;
1457 return node.CurrentNode;
1460 /// <summary>
1461 /// Helper method to set the value of an attribute of this node. If the attribute is not found, it will be created automatically.
1462 /// </summary>
1463 /// <param name="name">The name of the attribute to set. May not be null.</param>
1464 /// <param name="value">The value for the attribute.</param>
1465 /// <returns>The corresponding attribute instance.</returns>
1466 public HtmlAttribute SetAttributeValue(string name, string value)
1468 if (name == null)
1470 throw new ArgumentNullException("name");
1472 HtmlAttribute att = Attributes[name];
1473 if (att == null)
1475 return Attributes.Append(_ownerdocument.CreateAttribute(name, value));
1477 att.Value = value;
1478 return att;
1481 /// <summary>
1482 /// Saves all the children of the node to the specified TextWriter.
1483 /// </summary>
1484 /// <param name="outText">The TextWriter to which you want to save.</param>
1485 public void WriteContentTo(TextWriter outText)
1487 if (_childnodes == null)
1489 return;
1492 foreach (HtmlNode node in _childnodes)
1494 node.WriteTo(outText);
1498 /// <summary>
1499 /// Saves all the children of the node to a string.
1500 /// </summary>
1501 /// <returns>The saved string.</returns>
1502 public string WriteContentTo()
1504 StringWriter sw = new StringWriter();
1505 WriteContentTo(sw);
1506 sw.Flush();
1507 return sw.ToString();
1510 /// <summary>
1511 /// Saves the current node to the specified TextWriter.
1512 /// </summary>
1513 /// <param name="outText">The TextWriter to which you want to save.</param>
1514 public void WriteTo(TextWriter outText)
1516 string html;
1517 switch (_nodetype)
1519 case HtmlNodeType.Comment:
1520 html = ((HtmlCommentNode) this).Comment;
1521 if (_ownerdocument.OptionOutputAsXml)
1523 outText.Write("<!--" + GetXmlComment((HtmlCommentNode) this) + " -->");
1525 else
1527 outText.Write(html);
1529 break;
1531 case HtmlNodeType.Document:
1532 if (_ownerdocument.OptionOutputAsXml)
1534 outText.Write("<?xml version=\"1.0\" encoding=\"" + _ownerdocument.GetOutEncoding().BodyName +
1535 "\"?>");
1537 // check there is a root element
1538 if (_ownerdocument.DocumentNode.HasChildNodes)
1540 int rootnodes = _ownerdocument.DocumentNode._childnodes.Count;
1541 if (rootnodes > 0)
1543 HtmlNode xml = _ownerdocument.GetXmlDeclaration();
1544 if (xml != null)
1546 rootnodes --;
1549 if (rootnodes > 1)
1551 if (_ownerdocument.OptionOutputUpperCase)
1553 outText.Write("<SPAN>");
1554 WriteContentTo(outText);
1555 outText.Write("</SPAN>");
1557 else
1559 outText.Write("<span>");
1560 WriteContentTo(outText);
1561 outText.Write("</span>");
1563 break;
1568 WriteContentTo(outText);
1569 break;
1571 case HtmlNodeType.Text:
1572 html = ((HtmlTextNode) this).Text;
1573 if (_ownerdocument.OptionOutputAsXml)
1575 outText.Write(HtmlDocument.HtmlEncode(html));
1577 else
1579 outText.Write(html);
1581 break;
1583 case HtmlNodeType.Element:
1584 string name;
1585 if (_ownerdocument.OptionOutputUpperCase)
1587 name = Name.ToUpper();
1589 else
1591 name = Name;
1594 if (_ownerdocument.OptionOutputOriginalCase)
1595 name = OriginalName;
1597 if (_ownerdocument.OptionOutputAsXml)
1599 if (name.Length > 0)
1601 if (name[0] == '?')
1603 // forget this one, it's been done at the document level
1604 break;
1607 if (name.Trim().Length == 0)
1609 break;
1611 name = HtmlDocument.GetXmlName(name);
1613 else
1615 break;
1619 outText.Write("<" + name);
1620 WriteAttributes(outText, false);
1622 if (!HasChildNodes)
1624 if (IsEmptyElement(Name))
1626 if ((_ownerdocument.OptionWriteEmptyNodes) || (_ownerdocument.OptionOutputAsXml))
1628 outText.Write(" />");
1630 else
1632 if (Name.Length > 0)
1634 if (Name[0] == '?')
1636 outText.Write("?");
1640 outText.Write(">");
1643 else
1645 outText.Write("></" + name + ">");
1648 else
1650 outText.Write(">");
1651 bool cdata = false;
1652 if (_ownerdocument.OptionOutputAsXml)
1654 if (IsCDataElement(Name))
1656 // this code and the following tries to output things as nicely as possible for old browsers.
1657 cdata = true;
1658 outText.Write("\r\n//<![CDATA[\r\n");
1662 if (cdata)
1664 if (HasChildNodes)
1666 // child must be a text
1667 ChildNodes[0].WriteTo(outText);
1669 outText.Write("\r\n//]]>//\r\n");
1671 else
1673 WriteContentTo(outText);
1676 outText.Write("</" + name);
1677 if (!_ownerdocument.OptionOutputAsXml)
1679 WriteAttributes(outText, true);
1681 outText.Write(">");
1683 break;
1687 /// <summary>
1688 /// Saves the current node to the specified XmlWriter.
1689 /// </summary>
1690 /// <param name="writer">The XmlWriter to which you want to save.</param>
1691 public void WriteTo(XmlWriter writer)
1693 switch (_nodetype)
1695 case HtmlNodeType.Comment:
1696 writer.WriteComment(GetXmlComment((HtmlCommentNode) this));
1697 break;
1699 case HtmlNodeType.Document:
1700 writer.WriteProcessingInstruction("xml",
1701 "version=\"1.0\" encoding=\"" +
1702 _ownerdocument.GetOutEncoding().BodyName + "\"");
1703 if (HasChildNodes)
1705 foreach (HtmlNode subnode in ChildNodes)
1707 subnode.WriteTo(writer);
1710 break;
1712 case HtmlNodeType.Text:
1713 string html = ((HtmlTextNode) this).Text;
1714 writer.WriteString(html);
1715 break;
1717 case HtmlNodeType.Element:
1718 string name = _ownerdocument.OptionOutputUpperCase ? Name.ToUpper() : Name;
1720 if (_ownerdocument.OptionOutputOriginalCase)
1721 name = OriginalName;
1723 writer.WriteStartElement(name);
1724 WriteAttributes(writer, this);
1726 if (HasChildNodes)
1728 foreach (HtmlNode subnode in ChildNodes)
1730 subnode.WriteTo(writer);
1733 writer.WriteEndElement();
1734 break;
1738 /// <summary>
1739 /// Saves the current node to a string.
1740 /// </summary>
1741 /// <returns>The saved string.</returns>
1742 public string WriteTo()
1744 using (StringWriter sw = new StringWriter())
1746 WriteTo(sw);
1747 sw.Flush();
1748 return sw.ToString();
1752 #endregion
1754 #region Internal Methods
1756 internal static string GetXmlComment(HtmlCommentNode comment)
1758 string s = comment.Comment;
1759 return s.Substring(4, s.Length - 7).Replace("--", " - -");
1762 internal static void WriteAttributes(XmlWriter writer, HtmlNode node)
1764 if (!node.HasAttributes)
1766 return;
1768 // we use Hashitems to make sure attributes are written only once
1769 foreach (HtmlAttribute att in node.Attributes.Hashitems.Values)
1771 writer.WriteAttributeString(att.XmlName, att.Value);
1775 internal void CloseNode(HtmlNode endnode)
1777 if (!_ownerdocument.OptionAutoCloseOnEnd)
1779 // close all children
1780 if (_childnodes != null)
1782 foreach (HtmlNode child in _childnodes)
1784 if (child.Closed)
1785 continue;
1787 // create a fake closer node
1788 HtmlNode close = new HtmlNode(NodeType, _ownerdocument, -1);
1789 close._endnode = close;
1790 child.CloseNode(close);
1795 if (!Closed)
1797 _endnode = endnode;
1799 if (_ownerdocument._openednodes != null)
1801 _ownerdocument._openednodes.Remove(_outerstartindex);
1804 HtmlNode self = _ownerdocument._lastnodes[Name] as HtmlNode;
1805 if (self == this)
1807 _ownerdocument._lastnodes.Remove(Name);
1808 _ownerdocument.UpdateLastParentNode();
1811 if (endnode == this)
1812 return;
1814 // create an inner section
1815 _innerstartindex = _outerstartindex + _outerlength;
1816 _innerlength = endnode._outerstartindex - _innerstartindex;
1818 // update full length
1819 _outerlength = (endnode._outerstartindex + endnode._outerlength) - _outerstartindex;
1823 internal string GetId()
1825 HtmlAttribute att = Attributes["id"];
1826 if (att == null)
1828 return null;
1830 return att.Value;
1833 internal void SetId(string id)
1835 HtmlAttribute att = Attributes["id"];
1836 if (att == null)
1838 att = _ownerdocument.CreateAttribute("id");
1840 att.Value = id;
1841 _ownerdocument.SetIdForNode(this, att.Value);
1842 _outerchanged = true;
1845 internal void WriteAttribute(TextWriter outText, HtmlAttribute att)
1847 string name;
1848 string quote = att.QuoteType == AttributeValueQuote.DoubleQuote ? "\"" : "'";
1849 if (_ownerdocument.OptionOutputAsXml)
1851 if (_ownerdocument.OptionOutputUpperCase)
1853 name = att.XmlName.ToUpper();
1855 else
1857 name = att.XmlName;
1859 if (_ownerdocument.OptionOutputOriginalCase)
1860 name = att.OriginalName;
1862 outText.Write(" " + name + "=" + quote + HtmlDocument.HtmlEncode(att.XmlValue) + quote);
1864 else
1866 if (_ownerdocument.OptionOutputUpperCase)
1868 name = att.Name.ToUpper();
1870 else
1872 name = att.Name;
1875 if (att.Name.Length >= 4)
1877 if ((att.Name[0] == '<') && (att.Name[1] == '%') &&
1878 (att.Name[att.Name.Length - 1] == '>') && (att.Name[att.Name.Length - 2] == '%'))
1880 outText.Write(" " + name);
1881 return;
1884 if (_ownerdocument.OptionOutputOptimizeAttributeValues)
1886 if (att.Value.IndexOfAny(new Char[] {(char) 10, (char) 13, (char) 9, ' '}) < 0)
1888 outText.Write(" " + name + "=" + att.Value);
1890 else
1892 outText.Write(" " + name + "=" + quote + att.Value + quote);
1895 else
1897 outText.Write(" " + name + "=" + quote + att.Value + quote);
1902 internal void WriteAttributes(TextWriter outText, bool closing)
1904 if (_ownerdocument.OptionOutputAsXml)
1906 if (_attributes == null)
1908 return;
1910 // we use Hashitems to make sure attributes are written only once
1911 foreach (HtmlAttribute att in _attributes.Hashitems.Values)
1913 WriteAttribute(outText, att);
1915 return;
1918 if (!closing)
1920 if (_attributes != null)
1922 foreach (HtmlAttribute att in _attributes)
1924 WriteAttribute(outText, att);
1927 if (_ownerdocument.OptionAddDebuggingAttributes)
1929 WriteAttribute(outText, _ownerdocument.CreateAttribute("_closed", Closed.ToString()));
1930 WriteAttribute(outText, _ownerdocument.CreateAttribute("_children", ChildNodes.Count.ToString()));
1932 int i = 0;
1933 foreach (HtmlNode n in ChildNodes)
1935 WriteAttribute(outText, _ownerdocument.CreateAttribute("_child_" + i,
1936 n.Name));
1937 i++;
1941 else
1943 if (_endnode == null)
1945 return;
1948 if (_endnode._attributes == null)
1950 return;
1953 if (_endnode == this)
1955 return;
1958 foreach (HtmlAttribute att in _endnode._attributes)
1960 WriteAttribute(outText, att);
1962 if (_ownerdocument.OptionAddDebuggingAttributes)
1964 WriteAttribute(outText, _ownerdocument.CreateAttribute("_closed", Closed.ToString()));
1965 WriteAttribute(outText, _ownerdocument.CreateAttribute("_children", ChildNodes.Count.ToString()));
1970 #endregion
1972 #region Private Methods
1974 private string GetRelativeXpath()
1976 if (ParentNode == null)
1977 return Name;
1978 if (NodeType == HtmlNodeType.Document)
1979 return string.Empty;
1981 int i = 1;
1982 foreach (HtmlNode node in ParentNode.ChildNodes)
1984 if (node.Name != Name) continue;
1986 if (node == this)
1987 break;
1989 i++;
1991 return Name + "[" + i + "]";
1994 #endregion