1 // HtmlAgilityPack V1.0 - Simon Mourier <simon underscore mourier at hotmail dot com>
3 using System
.Collections
;
4 using System
.Collections
.Generic
;
5 using System
.Diagnostics
;
8 using System
.Xml
.XPath
;
10 namespace HtmlAgilityPack
13 /// Represents an HTML node.
15 [DebuggerDisplay("Name: {OriginalName}}")]
16 public class HtmlNode
: IXPathNavigable
20 internal HtmlAttributeCollection _attributes
;
21 internal HtmlNodeCollection _childnodes
;
22 internal HtmlNode _endnode
;
24 internal bool _innerchanged
;
25 internal string _innerhtml
;
26 internal int _innerlength
;
27 internal int _innerstartindex
;
29 internal int _lineposition
;
31 internal int _namelength
;
32 internal int _namestartindex
;
33 internal HtmlNode _nextnode
;
34 internal HtmlNodeType _nodetype
;
35 internal bool _outerchanged
;
36 internal string _outerhtml
;
37 internal int _outerlength
;
38 internal int _outerstartindex
;
39 internal HtmlDocument _ownerdocument
;
40 internal HtmlNode _parentnode
;
41 internal HtmlNode _prevnode
;
42 internal HtmlNode _prevwithsamename
;
43 internal bool _starttag
;
44 internal int _streamposition
;
48 #region Static Members
51 /// Gets the name of a comment node. It is actually defined as '#comment'.
53 public static readonly string HtmlNodeTypeNameComment
= "#comment";
56 /// Gets the name of the document node. It is actually defined as '#document'.
58 public static readonly string HtmlNodeTypeNameDocument
= "#document";
61 /// Gets the name of a text node. It is actually defined as '#text'.
63 public static readonly string HtmlNodeTypeNameText
= "#text";
66 /// Gets a collection of flags that define specific behaviors for specific element nodes.
67 /// The table contains a DictionaryEntry list with the lowercase tag name as the Key, and a combination of HtmlElementFlags as the Value.
69 public static Hashtable ElementsFlags
;
76 /// Initialize HtmlNode. Builds a list of all tags that have special allowances
80 // tags whose content may be anything
81 ElementsFlags
= new Hashtable();
82 ElementsFlags
.Add("script", HtmlElementFlag
.CData
);
83 ElementsFlags
.Add("style", HtmlElementFlag
.CData
);
84 ElementsFlags
.Add("noxhtml", HtmlElementFlag
.CData
);
86 // tags that can not contain other tags
87 ElementsFlags
.Add("base", HtmlElementFlag
.Empty
);
88 ElementsFlags
.Add("link", HtmlElementFlag
.Empty
);
89 ElementsFlags
.Add("meta", HtmlElementFlag
.Empty
);
90 ElementsFlags
.Add("isindex", HtmlElementFlag
.Empty
);
91 ElementsFlags
.Add("hr", HtmlElementFlag
.Empty
);
92 ElementsFlags
.Add("col", HtmlElementFlag
.Empty
);
93 ElementsFlags
.Add("img", HtmlElementFlag
.Empty
);
94 ElementsFlags
.Add("param", HtmlElementFlag
.Empty
);
95 ElementsFlags
.Add("embed", HtmlElementFlag
.Empty
);
96 ElementsFlags
.Add("frame", HtmlElementFlag
.Empty
);
97 ElementsFlags
.Add("wbr", HtmlElementFlag
.Empty
);
98 ElementsFlags
.Add("bgsound", HtmlElementFlag
.Empty
);
99 ElementsFlags
.Add("spacer", HtmlElementFlag
.Empty
);
100 ElementsFlags
.Add("keygen", HtmlElementFlag
.Empty
);
101 ElementsFlags
.Add("area", HtmlElementFlag
.Empty
);
102 ElementsFlags
.Add("input", HtmlElementFlag
.Empty
);
103 ElementsFlags
.Add("basefont", HtmlElementFlag
.Empty
);
105 ElementsFlags
.Add("form", HtmlElementFlag
.CanOverlap
| HtmlElementFlag
.Empty
);
107 // they sometimes contain, and sometimes they don 't...
108 ElementsFlags
.Add("option", HtmlElementFlag
.Empty
);
110 // tag whose closing tag is equivalent to open tag:
111 // <p>bla</p>bla will be transformed into <p>bla</p>bla
112 // <p>bla<p>bla will be transformed into <p>bla<p>bla and not <p>bla></p><p>bla</p> or <p>bla<p>bla</p></p>
114 ElementsFlags
.Add("br", HtmlElementFlag
.Empty
| HtmlElementFlag
.Closed
);
115 ElementsFlags
.Add("p", HtmlElementFlag
.Empty
| HtmlElementFlag
.Closed
);
119 /// Initializes HtmlNode, providing type, owner and where it exists in a collection
121 /// <param name="type"></param>
122 /// <param name="ownerdocument"></param>
123 /// <param name="index"></param>
124 public HtmlNode(HtmlNodeType type
, HtmlDocument ownerdocument
, int index
)
127 _ownerdocument
= ownerdocument
;
128 _outerstartindex
= index
;
132 case HtmlNodeType
.Comment
:
133 Name
= HtmlNodeTypeNameComment
;
137 case HtmlNodeType
.Document
:
138 Name
= HtmlNodeTypeNameDocument
;
142 case HtmlNodeType
.Text
:
143 Name
= HtmlNodeTypeNameText
;
148 if (_ownerdocument
._openednodes
!= null)
152 // we use the index as the key
154 // -1 means the node comes from public
157 _ownerdocument
._openednodes
.Add(index
, this);
162 if ((-1 != index
) || (type
== HtmlNodeType
.Comment
) || (type
== HtmlNodeType
.Text
)) return;
163 // innerhtml and outerhtml must be calculated
164 _outerchanged
= true;
165 _innerchanged
= true;
173 /// Gets the collection of HTML attributes for this node. May not be null.
175 public HtmlAttributeCollection Attributes
181 _attributes
= new HtmlAttributeCollection(this);
185 internal set { _attributes = value; }
189 /// Gets all the children of the node.
191 public HtmlNodeCollection ChildNodes
195 if (_childnodes
== null)
197 _childnodes
= new HtmlNodeCollection(this);
201 internal set { _childnodes = value; }
205 /// Gets a value indicating if this node has been closed or not.
209 get { return (_endnode != null); }
213 /// Gets the collection of HTML attributes for the closing tag. May not be null.
215 public HtmlAttributeCollection ClosingAttributes
219 if (!HasClosingAttributes
)
221 return new HtmlAttributeCollection(this);
223 return _endnode
.Attributes
;
227 internal HtmlNode EndNode
229 get { return _endnode; }
233 /// Gets the first child of the node.
235 public HtmlNode FirstChild
243 return _childnodes
[0];
248 /// Gets a value indicating whether the current node has any attributes.
250 public bool HasAttributes
254 if (_attributes
== null)
259 if (_attributes
.Count
<= 0)
268 /// Gets a value indicating whether this node has any child nodes.
270 public bool HasChildNodes
274 if (_childnodes
== null)
279 if (_childnodes
.Count
<= 0)
288 /// Gets a value indicating whether the current node has any attributes on the closing tag.
290 public bool HasClosingAttributes
294 if ((_endnode
== null) || (_endnode
== this))
299 if (_endnode
._attributes
== null)
304 if (_endnode
._attributes
.Count
<= 0)
313 /// Gets or sets the value of the 'id' HTML attribute. The document must have been parsed using the OptionUseIdAttribute set to true.
319 if (_ownerdocument
._nodesid
== null)
321 throw new Exception(HtmlDocument
.HtmlExceptionUseIdAttributeFalse
);
327 if (_ownerdocument
._nodesid
== null)
329 throw new Exception(HtmlDocument
.HtmlExceptionUseIdAttributeFalse
);
334 throw new ArgumentNullException("value");
341 /// Gets or Sets the HTML between the start and end tags of the object.
343 public virtual string InnerHtml
349 _innerhtml
= WriteContentTo();
350 _innerchanged
= false;
353 if (_innerhtml
!= null)
358 if (_innerstartindex
< 0)
363 return _ownerdocument
._text
.Substring(_innerstartindex
, _innerlength
);
367 HtmlDocument doc
= new HtmlDocument();
371 AppendChildren(doc
.DocumentNode
.ChildNodes
);
376 /// Gets or Sets the text between the start and end tags of the object.
378 public virtual string InnerText
382 if (_nodetype
== HtmlNodeType
.Text
)
384 return ((HtmlTextNode
) this).Text
;
387 if (_nodetype
== HtmlNodeType
.Comment
)
389 return ((HtmlCommentNode
) this).Comment
;
392 // note: right now, this method is *slow*, because we recompute everything.
393 // it could be optimised like innerhtml
400 foreach (HtmlNode node
in ChildNodes
)
409 /// Gets the last child of the node.
411 public HtmlNode LastChild
415 return !HasChildNodes
? null : _childnodes
[_childnodes
.Count
- 1];
420 /// Gets the line number of this node in the document.
424 get { return _line; }
425 internal set { _line = value; }
429 /// Gets the column number of this node in the document.
431 public int LinePosition
433 get { return _lineposition; }
434 internal set { _lineposition = value; }
438 /// Gets or sets this node's name.
446 Name
= _ownerdocument
._text
.Substring(_namestartindex
, _namelength
);
448 return _name
!= null ? _name
.ToLower() : string.Empty
;
450 set { _name = value; }
454 /// Gets the HTML node immediately following this element.
456 public HtmlNode NextSibling
458 get { return _nextnode; }
459 internal set { _nextnode = value; }
463 /// Gets the type of this node.
465 public HtmlNodeType NodeType
467 get { return _nodetype; }
468 internal set { _nodetype = value; }
472 /// The original unaltered name of the tag
474 public string OriginalName
476 get { return _name; }
480 /// Gets or Sets the object and its content in HTML.
482 public virtual string OuterHtml
488 _outerhtml
= WriteTo();
489 _outerchanged
= false;
493 if (_outerhtml
!= null)
498 if (_outerstartindex
< 0)
503 return _ownerdocument
._text
.Substring(_outerstartindex
, _outerlength
);
508 /// Gets the <see cref="HtmlDocument"/> to which this node belongs.
510 public HtmlDocument OwnerDocument
512 get { return _ownerdocument; }
513 internal set { _ownerdocument = value; }
517 /// Gets the parent of this node (for nodes that can have parents).
519 public HtmlNode ParentNode
521 get { return _parentnode; }
522 internal set { _parentnode = value; }
526 /// Gets the node immediately preceding this node.
528 public HtmlNode PreviousSibling
530 get { return _prevnode; }
531 internal set { _prevnode = value; }
535 /// Gets the stream position of this node in the document, relative to the start of the document.
537 public int StreamPosition
539 get { return _streamposition; }
543 /// Gets a valid XPath string that points to this node
549 string basePath
= (ParentNode
== null || ParentNode
.NodeType
== HtmlNodeType
.Document
)
551 : ParentNode
.XPath
+ "/";
552 return basePath
+ GetRelativeXpath();
558 #region IXPathNavigable Members
561 /// Creates a new XPathNavigator object for navigating this HTML node.
563 /// <returns>An XPathNavigator object. The XPathNavigator is positioned on the node from which the method was called. It is not positioned on the root of the document.</returns>
564 public XPathNavigator
CreateNavigator()
566 return new HtmlNodeNavigator(_ownerdocument
, this);
571 #region Public Methods
574 /// Determines if an element node can be kept overlapped.
576 /// <param name="name">The name of the element node to check. May not be <c>null</c>.</param>
577 /// <returns>true if the name is the name of an element node that can be kept overlapped, <c>false</c> otherwise.</returns>
578 public static bool CanOverlapElement(string name
)
582 throw new ArgumentNullException("name");
585 object flag
= ElementsFlags
[name
.ToLower()];
590 return (((HtmlElementFlag
) flag
) & HtmlElementFlag
.CanOverlap
) != 0;
594 /// Creates an HTML node from a string representing literal HTML.
596 /// <param name="html">The HTML text.</param>
597 /// <returns>The newly created node instance.</returns>
598 public static HtmlNode
CreateNode(string html
)
600 // REVIEW: this is *not* optimum...
601 HtmlDocument doc
= new HtmlDocument();
603 return doc
.DocumentNode
.FirstChild
;
607 /// Determines if an element node is a CDATA element node.
609 /// <param name="name">The name of the element node to check. May not be null.</param>
610 /// <returns>true if the name is the name of a CDATA element node, false otherwise.</returns>
611 public static bool IsCDataElement(string name
)
615 throw new ArgumentNullException("name");
618 object flag
= ElementsFlags
[name
.ToLower()];
623 return (((HtmlElementFlag
) flag
) & HtmlElementFlag
.CData
) != 0;
627 /// Determines if an element node is closed.
629 /// <param name="name">The name of the element node to check. May not be null.</param>
630 /// <returns>true if the name is the name of a closed element node, false otherwise.</returns>
631 public static bool IsClosedElement(string name
)
635 throw new ArgumentNullException("name");
638 object flag
= ElementsFlags
[name
.ToLower()];
643 return (((HtmlElementFlag
) flag
) & HtmlElementFlag
.Closed
) != 0;
647 /// Determines if an element node is defined as empty.
649 /// <param name="name">The name of the element node to check. May not be null.</param>
650 /// <returns>true if the name is the name of an empty element node, false otherwise.</returns>
651 public static bool IsEmptyElement(string name
)
655 throw new ArgumentNullException("name");
658 if (name
.Length
== 0)
675 object flag
= ElementsFlags
[name
.ToLower()];
680 return (((HtmlElementFlag
) flag
) & HtmlElementFlag
.Empty
) != 0;
684 /// Determines if a text corresponds to the closing tag of an node that can be kept overlapped.
686 /// <param name="text">The text to check. May not be null.</param>
687 /// <returns>true or false.</returns>
688 public static bool IsOverlappedClosingElement(string text
)
692 throw new ArgumentNullException("text");
695 if (text
.Length
<= 4)
698 if ((text
[0] != '<') ||
699 (text
[text
.Length
- 1] != '>') ||
703 string name
= text
.Substring(2, text
.Length
- 3);
704 return CanOverlapElement(name
);
708 /// Returns a collection of all ancestor nodes of this element.
710 /// <returns></returns>
711 public IEnumerable
<HtmlNode
> Ancestors()
713 HtmlNode node
= ParentNode
;
714 while (node
.ParentNode
!= null)
716 yield return node
.ParentNode
;
717 node
= node
.ParentNode
;
722 /// Get Ancestors with matching name
724 /// <param name="name"></param>
725 /// <returns></returns>
726 public IEnumerable
<HtmlNode
> Ancestors(string name
)
728 for (HtmlNode n
= ParentNode
; n
!= null; n
= n
.ParentNode
)
734 /// Returns a collection of all ancestor nodes of this element.
736 /// <returns></returns>
737 public IEnumerable
<HtmlNode
> AncestorsAndSelf()
739 for (HtmlNode n
= this; n
!= null; n
= n
.ParentNode
)
744 /// Gets all anscestor nodes and the current node
746 /// <param name="name"></param>
747 /// <returns></returns>
748 public IEnumerable
<HtmlNode
> AncestorsAndSelf(string name
)
750 for (HtmlNode n
= this; n
!= null; n
= n
.ParentNode
)
756 /// Adds the specified node to the end of the list of children of this node.
758 /// <param name="newChild">The node to add. May not be null.</param>
759 /// <returns>The node added.</returns>
760 public HtmlNode
AppendChild(HtmlNode newChild
)
762 if (newChild
== null)
764 throw new ArgumentNullException("newChild");
767 ChildNodes
.Append(newChild
);
768 _ownerdocument
.SetIdForNode(newChild
, newChild
.GetId());
769 _outerchanged
= true;
770 _innerchanged
= true;
775 /// Adds the specified node to the end of the list of children of this node.
777 /// <param name="newChildren">The node list to add. May not be null.</param>
778 public void AppendChildren(HtmlNodeCollection newChildren
)
780 if (newChildren
== null)
781 throw new ArgumentNullException("newChildrend");
783 foreach (HtmlNode newChild
in newChildren
)
785 AppendChild(newChild
);
790 /// Gets all Attributes with name
792 /// <param name="name"></param>
793 /// <returns></returns>
794 public IEnumerable
<HtmlAttribute
> ChildAttributes(string name
)
796 return Attributes
.AttributesWithName(name
);
800 /// Creates a duplicate of the node
802 /// <returns></returns>
803 public HtmlNode
Clone()
805 return CloneNode(true);
809 /// Creates a duplicate of the node and changes its name at the same time.
811 /// <param name="newName">The new name of the cloned node. May not be <c>null</c>.</param>
812 /// <returns>The cloned node.</returns>
813 public HtmlNode
CloneNode(string newName
)
815 return CloneNode(newName
, true);
819 /// Creates a duplicate of the node and changes its name at the same time.
821 /// <param name="newName">The new name of the cloned node. May not be null.</param>
822 /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
823 /// <returns>The cloned node.</returns>
824 public HtmlNode
CloneNode(string newName
, bool deep
)
828 throw new ArgumentNullException("newName");
831 HtmlNode node
= CloneNode(deep
);
837 /// Creates a duplicate of the node.
839 /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
840 /// <returns>The cloned node.</returns>
841 public HtmlNode
CloneNode(bool deep
)
843 HtmlNode node
= _ownerdocument
.CreateNode(_nodetype
);
848 case HtmlNodeType
.Comment
:
849 ((HtmlCommentNode
) node
).Comment
= ((HtmlCommentNode
) this).Comment
;
852 case HtmlNodeType
.Text
:
853 ((HtmlTextNode
) node
).Text
= ((HtmlTextNode
) this).Text
;
860 foreach (HtmlAttribute att
in _attributes
)
862 HtmlAttribute newatt
= att
.Clone();
863 node
.Attributes
.Append(newatt
);
867 // closing attributes
868 if (HasClosingAttributes
)
870 node
._endnode
= _endnode
.CloneNode(false);
871 foreach (HtmlAttribute att
in _endnode
._attributes
)
873 HtmlAttribute newatt
= att
.Clone();
874 node
._endnode
._attributes
.Append(newatt
);
888 foreach (HtmlNode child
in _childnodes
)
890 HtmlNode newchild
= child
.Clone();
891 node
.AppendChild(newchild
);
897 /// Creates a duplicate of the node and the subtree under it.
899 /// <param name="node">The node to duplicate. May not be <c>null</c>.</param>
900 public void CopyFrom(HtmlNode node
)
902 CopyFrom(node
, true);
906 /// Creates a duplicate of the node.
908 /// <param name="node">The node to duplicate. May not be <c>null</c>.</param>
909 /// <param name="deep">true to recursively clone the subtree under the specified node, false to clone only the node itself.</param>
910 public void CopyFrom(HtmlNode node
, bool deep
)
914 throw new ArgumentNullException("node");
917 Attributes
.RemoveAll();
918 if (node
.HasAttributes
)
920 foreach (HtmlAttribute att
in node
.Attributes
)
922 SetAttributeValue(att
.Name
, att
.Value
);
929 if (node
.HasChildNodes
)
931 foreach (HtmlNode child
in node
.ChildNodes
)
933 AppendChild(child
.CloneNode(true));
940 /// Creates an XPathNavigator using the root of this document.
942 /// <returns></returns>
943 public XPathNavigator
CreateRootNavigator()
945 return new HtmlNodeNavigator(_ownerdocument
, _ownerdocument
.DocumentNode
);
949 /// Gets all Descendant nodes for this node and each of child nodes
951 /// <returns></returns>
952 public IEnumerable
<HtmlNode
> DescendantNodes()
954 foreach (HtmlNode node
in ChildNodes
)
957 foreach (HtmlNode descendant
in node
.DescendantNodes())
958 yield return descendant
;
963 /// Returns a collection of all descendant nodes of this element, in document order
965 /// <returns></returns>
966 public IEnumerable
<HtmlNode
> DescendantNodesAndSelf()
968 return DescendantsAndSelf();
972 /// Gets all Descendant nodes in enumerated list
974 /// <returns></returns>
975 public IEnumerable
<HtmlNode
> Descendants()
977 foreach (HtmlNode node
in DescendantNodes())
984 /// Get all descendant nodes with matching name
986 /// <param name="name"></param>
987 /// <returns></returns>
988 public IEnumerable
<HtmlNode
> Descendants(string name
)
990 foreach (HtmlNode node
in Descendants())
991 if (node
.Name
== name
)
996 /// Returns a collection of all descendant nodes of this element, in document order
998 /// <returns></returns>
999 public IEnumerable
<HtmlNode
> DescendantsAndSelf()
1002 foreach (HtmlNode n
in DescendantNodes())
1011 /// Gets all descendant nodes including this node
1013 /// <param name="name"></param>
1014 /// <returns></returns>
1015 public IEnumerable
<HtmlNode
> DescendantsAndSelf(string name
)
1018 foreach (HtmlNode node
in Descendants())
1019 if (node
.Name
== name
)
1024 /// Gets first generation child node matching name
1026 /// <param name="name"></param>
1027 /// <returns></returns>
1028 public HtmlNode
Element(string name
)
1030 foreach (HtmlNode node
in ChildNodes
)
1031 if (node
.Name
== name
)
1037 /// Gets matching first generation child nodes matching name
1039 /// <param name="name"></param>
1040 /// <returns></returns>
1041 public IEnumerable
<HtmlNode
> Elements(string name
)
1043 foreach (HtmlNode node
in ChildNodes
)
1044 if (node
.Name
== name
)
1049 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1051 /// <param name="name">The name of the attribute to get. May not be <c>null</c>.</param>
1052 /// <param name="def">The default value to return if not found.</param>
1053 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1054 public string GetAttributeValue(string name
, string def
)
1058 throw new ArgumentNullException("name");
1065 HtmlAttribute att
= Attributes
[name
];
1074 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1076 /// <param name="name">The name of the attribute to get. May not be <c>null</c>.</param>
1077 /// <param name="def">The default value to return if not found.</param>
1078 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1079 public int GetAttributeValue(string name
, int def
)
1083 throw new ArgumentNullException("name");
1090 HtmlAttribute att
= Attributes
[name
];
1097 return Convert
.ToInt32(att
.Value
);
1106 /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
1108 /// <param name="name">The name of the attribute to get. May not be <c>null</c>.</param>
1109 /// <param name="def">The default value to return if not found.</param>
1110 /// <returns>The value of the attribute if found, the default value if not found.</returns>
1111 public bool GetAttributeValue(string name
, bool def
)
1115 throw new ArgumentNullException("name");
1122 HtmlAttribute att
= Attributes
[name
];
1129 return Convert
.ToBoolean(att
.Value
);
1138 /// Inserts the specified node immediately after the specified reference node.
1140 /// <param name="newChild">The node to insert. May not be <c>null</c>.</param>
1141 /// <param name="refChild">The node that is the reference node. The newNode is placed after the refNode.</param>
1142 /// <returns>The node being inserted.</returns>
1143 public HtmlNode
InsertAfter(HtmlNode newChild
, HtmlNode refChild
)
1145 if (newChild
== null)
1147 throw new ArgumentNullException("newChild");
1150 if (refChild
== null)
1152 return PrependChild(newChild
);
1155 if (newChild
== refChild
)
1162 if (_childnodes
!= null)
1164 index
= _childnodes
[refChild
];
1168 throw new ArgumentException(HtmlDocument
.HtmlExceptionRefNotChild
);
1171 if (_childnodes
!= null) _childnodes
.Insert(index
+ 1, newChild
);
1173 _ownerdocument
.SetIdForNode(newChild
, newChild
.GetId());
1174 _outerchanged
= true;
1175 _innerchanged
= true;
1180 /// Inserts the specified node immediately before the specified reference node.
1182 /// <param name="newChild">The node to insert. May not be <c>null</c>.</param>
1183 /// <param name="refChild">The node that is the reference node. The newChild is placed before this node.</param>
1184 /// <returns>The node being inserted.</returns>
1185 public HtmlNode
InsertBefore(HtmlNode newChild
, HtmlNode refChild
)
1187 if (newChild
== null)
1189 throw new ArgumentNullException("newChild");
1192 if (refChild
== null)
1194 return AppendChild(newChild
);
1197 if (newChild
== refChild
)
1204 if (_childnodes
!= null)
1206 index
= _childnodes
[refChild
];
1211 throw new ArgumentException(HtmlDocument
.HtmlExceptionRefNotChild
);
1214 if (_childnodes
!= null) _childnodes
.Insert(index
, newChild
);
1216 _ownerdocument
.SetIdForNode(newChild
, newChild
.GetId());
1217 _outerchanged
= true;
1218 _innerchanged
= true;
1223 /// Adds the specified node to the beginning of the list of children of this node.
1225 /// <param name="newChild">The node to add. May not be <c>null</c>.</param>
1226 /// <returns>The node added.</returns>
1227 public HtmlNode
PrependChild(HtmlNode newChild
)
1229 if (newChild
== null)
1231 throw new ArgumentNullException("newChild");
1233 ChildNodes
.Prepend(newChild
);
1234 _ownerdocument
.SetIdForNode(newChild
, newChild
.GetId());
1235 _outerchanged
= true;
1236 _innerchanged
= true;
1241 /// Adds the specified node list to the beginning of the list of children of this node.
1243 /// <param name="newChildren">The node list to add. May not be <c>null</c>.</param>
1244 public void PrependChildren(HtmlNodeCollection newChildren
)
1246 if (newChildren
== null)
1248 throw new ArgumentNullException("newChildren");
1251 foreach (HtmlNode newChild
in newChildren
)
1253 PrependChild(newChild
);
1258 /// Removes node from parent collection
1260 public void Remove()
1262 if (ParentNode
!= null)
1263 ParentNode
.ChildNodes
.Remove(this);
1267 /// Removes all the children and/or attributes of the current node.
1269 public void RemoveAll()
1271 RemoveAllChildren();
1275 _attributes
.Clear();
1278 if ((_endnode
!= null) && (_endnode
!= this))
1280 if (_endnode
._attributes
!= null)
1282 _endnode
._attributes
.Clear();
1285 _outerchanged
= true;
1286 _innerchanged
= true;
1290 /// Removes all the children of the current node.
1292 public void RemoveAllChildren()
1299 if (_ownerdocument
.OptionUseIdAttribute
)
1301 // remove nodes from id list
1302 foreach (HtmlNode node
in _childnodes
)
1304 _ownerdocument
.SetIdForNode(null, node
.GetId());
1307 _childnodes
.Clear();
1308 _outerchanged
= true;
1309 _innerchanged
= true;
1313 /// Removes the specified child node.
1315 /// <param name="oldChild">The node being removed. May not be <c>null</c>.</param>
1316 /// <returns>The node removed.</returns>
1317 public HtmlNode
RemoveChild(HtmlNode oldChild
)
1319 if (oldChild
== null)
1321 throw new ArgumentNullException("oldChild");
1326 if (_childnodes
!= null)
1328 index
= _childnodes
[oldChild
];
1333 throw new ArgumentException(HtmlDocument
.HtmlExceptionRefNotChild
);
1336 if (_childnodes
!= null)
1337 _childnodes
.Remove(index
);
1339 _ownerdocument
.SetIdForNode(null, oldChild
.GetId());
1340 _outerchanged
= true;
1341 _innerchanged
= true;
1346 /// Removes the specified child node.
1348 /// <param name="oldChild">The node being removed. May not be <c>null</c>.</param>
1349 /// <param name="keepGrandChildren">true to keep grand children of the node, false otherwise.</param>
1350 /// <returns>The node removed.</returns>
1351 public HtmlNode
RemoveChild(HtmlNode oldChild
, bool keepGrandChildren
)
1353 if (oldChild
== null)
1355 throw new ArgumentNullException("oldChild");
1358 if ((oldChild
._childnodes
!= null) && keepGrandChildren
)
1361 HtmlNode prev
= oldChild
.PreviousSibling
;
1363 // reroute grand children to ourselves
1364 foreach (HtmlNode grandchild
in oldChild
._childnodes
)
1366 InsertAfter(grandchild
, prev
);
1369 RemoveChild(oldChild
);
1370 _outerchanged
= true;
1371 _innerchanged
= true;
1376 /// Replaces the child node oldChild with newChild node.
1378 /// <param name="newChild">The new node to put in the child list.</param>
1379 /// <param name="oldChild">The node being replaced in the list.</param>
1380 /// <returns>The node replaced.</returns>
1381 public HtmlNode
ReplaceChild(HtmlNode newChild
, HtmlNode oldChild
)
1383 if (newChild
== null)
1385 return RemoveChild(oldChild
);
1388 if (oldChild
== null)
1390 return AppendChild(newChild
);
1395 if (_childnodes
!= null)
1397 index
= _childnodes
[oldChild
];
1402 throw new ArgumentException(HtmlDocument
.HtmlExceptionRefNotChild
);
1405 if (_childnodes
!= null) _childnodes
.Replace(index
, newChild
);
1407 _ownerdocument
.SetIdForNode(null, oldChild
.GetId());
1408 _ownerdocument
.SetIdForNode(newChild
, newChild
.GetId());
1409 _outerchanged
= true;
1410 _innerchanged
= true;
1415 /// Selects a list of nodes matching the <see cref="XPath"/> expression.
1417 /// <param name="xpath">The XPath expression.</param>
1418 /// <returns>An <see cref="HtmlNodeCollection"/> containing a collection of nodes matching the <see cref="XPath"/> query, or <c>null</c> if no node matched the XPath expression.</returns>
1419 public HtmlNodeCollection
SelectNodes(string xpath
)
1421 HtmlNodeCollection list
= new HtmlNodeCollection(null);
1423 HtmlNodeNavigator nav
= new HtmlNodeNavigator(_ownerdocument
, this);
1424 XPathNodeIterator it
= nav
.Select(xpath
);
1425 while (it
.MoveNext())
1427 HtmlNodeNavigator n
= (HtmlNodeNavigator
) it
.Current
;
1428 list
.Add(n
.CurrentNode
);
1430 if (list
.Count
== 0)
1438 /// Selects the first XmlNode that matches the XPath expression.
1440 /// <param name="xpath">The XPath expression. May not be null.</param>
1441 /// <returns>The first <see cref="HtmlNode"/> that matches the XPath query or a null reference if no matching node was found.</returns>
1442 public HtmlNode
SelectSingleNode(string xpath
)
1446 throw new ArgumentNullException("xpath");
1449 HtmlNodeNavigator nav
= new HtmlNodeNavigator(_ownerdocument
, this);
1450 XPathNodeIterator it
= nav
.Select(xpath
);
1456 HtmlNodeNavigator node
= (HtmlNodeNavigator
) it
.Current
;
1457 return node
.CurrentNode
;
1461 /// Helper method to set the value of an attribute of this node. If the attribute is not found, it will be created automatically.
1463 /// <param name="name">The name of the attribute to set. May not be null.</param>
1464 /// <param name="value">The value for the attribute.</param>
1465 /// <returns>The corresponding attribute instance.</returns>
1466 public HtmlAttribute
SetAttributeValue(string name
, string value)
1470 throw new ArgumentNullException("name");
1472 HtmlAttribute att
= Attributes
[name
];
1475 return Attributes
.Append(_ownerdocument
.CreateAttribute(name
, value));
1482 /// Saves all the children of the node to the specified TextWriter.
1484 /// <param name="outText">The TextWriter to which you want to save.</param>
1485 public void WriteContentTo(TextWriter outText
)
1487 if (_childnodes
== null)
1492 foreach (HtmlNode node
in _childnodes
)
1494 node
.WriteTo(outText
);
1499 /// Saves all the children of the node to a string.
1501 /// <returns>The saved string.</returns>
1502 public string WriteContentTo()
1504 StringWriter sw
= new StringWriter();
1507 return sw
.ToString();
1511 /// Saves the current node to the specified TextWriter.
1513 /// <param name="outText">The TextWriter to which you want to save.</param>
1514 public void WriteTo(TextWriter outText
)
1519 case HtmlNodeType
.Comment
:
1520 html
= ((HtmlCommentNode
) this).Comment
;
1521 if (_ownerdocument
.OptionOutputAsXml
)
1523 outText
.Write("<!--" + GetXmlComment((HtmlCommentNode
) this) + " -->");
1527 outText
.Write(html
);
1531 case HtmlNodeType
.Document
:
1532 if (_ownerdocument
.OptionOutputAsXml
)
1534 outText
.Write("<?xml version=\"1.0\" encoding=\"" + _ownerdocument
.GetOutEncoding().BodyName
+
1537 // check there is a root element
1538 if (_ownerdocument
.DocumentNode
.HasChildNodes
)
1540 int rootnodes
= _ownerdocument
.DocumentNode
._childnodes
.Count
;
1543 HtmlNode xml
= _ownerdocument
.GetXmlDeclaration();
1551 if (_ownerdocument
.OptionOutputUpperCase
)
1553 outText
.Write("<SPAN>");
1554 WriteContentTo(outText
);
1555 outText
.Write("</SPAN>");
1559 outText
.Write("<span>");
1560 WriteContentTo(outText
);
1561 outText
.Write("</span>");
1568 WriteContentTo(outText
);
1571 case HtmlNodeType
.Text
:
1572 html
= ((HtmlTextNode
) this).Text
;
1573 if (_ownerdocument
.OptionOutputAsXml
)
1575 outText
.Write(HtmlDocument
.HtmlEncode(html
));
1579 outText
.Write(html
);
1583 case HtmlNodeType
.Element
:
1585 if (_ownerdocument
.OptionOutputUpperCase
)
1587 name
= Name
.ToUpper();
1594 if (_ownerdocument
.OptionOutputOriginalCase
)
1595 name
= OriginalName
;
1597 if (_ownerdocument
.OptionOutputAsXml
)
1599 if (name
.Length
> 0)
1603 // forget this one, it's been done at the document level
1607 if (name
.Trim().Length
== 0)
1611 name
= HtmlDocument
.GetXmlName(name
);
1619 outText
.Write("<" + name
);
1620 WriteAttributes(outText
, false);
1624 if (IsEmptyElement(Name
))
1626 if ((_ownerdocument
.OptionWriteEmptyNodes
) || (_ownerdocument
.OptionOutputAsXml
))
1628 outText
.Write(" />");
1632 if (Name
.Length
> 0)
1645 outText
.Write("></" + name
+ ">");
1652 if (_ownerdocument
.OptionOutputAsXml
)
1654 if (IsCDataElement(Name
))
1656 // this code and the following tries to output things as nicely as possible for old browsers.
1658 outText
.Write("\r\n//<![CDATA[\r\n");
1666 // child must be a text
1667 ChildNodes
[0].WriteTo(outText
);
1669 outText
.Write("\r\n//]]>//\r\n");
1673 WriteContentTo(outText
);
1676 outText
.Write("</" + name
);
1677 if (!_ownerdocument
.OptionOutputAsXml
)
1679 WriteAttributes(outText
, true);
1688 /// Saves the current node to the specified XmlWriter.
1690 /// <param name="writer">The XmlWriter to which you want to save.</param>
1691 public void WriteTo(XmlWriter writer
)
1695 case HtmlNodeType
.Comment
:
1696 writer
.WriteComment(GetXmlComment((HtmlCommentNode
) this));
1699 case HtmlNodeType
.Document
:
1700 writer
.WriteProcessingInstruction("xml",
1701 "version=\"1.0\" encoding=\"" +
1702 _ownerdocument
.GetOutEncoding().BodyName
+ "\"");
1705 foreach (HtmlNode subnode
in ChildNodes
)
1707 subnode
.WriteTo(writer
);
1712 case HtmlNodeType
.Text
:
1713 string html
= ((HtmlTextNode
) this).Text
;
1714 writer
.WriteString(html
);
1717 case HtmlNodeType
.Element
:
1718 string name
= _ownerdocument
.OptionOutputUpperCase
? Name
.ToUpper() : Name
;
1720 if (_ownerdocument
.OptionOutputOriginalCase
)
1721 name
= OriginalName
;
1723 writer
.WriteStartElement(name
);
1724 WriteAttributes(writer
, this);
1728 foreach (HtmlNode subnode
in ChildNodes
)
1730 subnode
.WriteTo(writer
);
1733 writer
.WriteEndElement();
1739 /// Saves the current node to a string.
1741 /// <returns>The saved string.</returns>
1742 public string WriteTo()
1744 using (StringWriter sw
= new StringWriter())
1748 return sw
.ToString();
1754 #region Internal Methods
1756 internal static string GetXmlComment(HtmlCommentNode comment
)
1758 string s
= comment
.Comment
;
1759 return s
.Substring(4, s
.Length
- 7).Replace("--", " - -");
1762 internal static void WriteAttributes(XmlWriter writer
, HtmlNode node
)
1764 if (!node
.HasAttributes
)
1768 // we use Hashitems to make sure attributes are written only once
1769 foreach (HtmlAttribute att
in node
.Attributes
.Hashitems
.Values
)
1771 writer
.WriteAttributeString(att
.XmlName
, att
.Value
);
1775 internal void CloseNode(HtmlNode endnode
)
1777 if (!_ownerdocument
.OptionAutoCloseOnEnd
)
1779 // close all children
1780 if (_childnodes
!= null)
1782 foreach (HtmlNode child
in _childnodes
)
1787 // create a fake closer node
1788 HtmlNode close
= new HtmlNode(NodeType
, _ownerdocument
, -1);
1789 close
._endnode
= close
;
1790 child
.CloseNode(close
);
1799 if (_ownerdocument
._openednodes
!= null)
1801 _ownerdocument
._openednodes
.Remove(_outerstartindex
);
1804 HtmlNode self
= _ownerdocument
._lastnodes
[Name
] as HtmlNode
;
1807 _ownerdocument
._lastnodes
.Remove(Name
);
1808 _ownerdocument
.UpdateLastParentNode();
1811 if (endnode
== this)
1814 // create an inner section
1815 _innerstartindex
= _outerstartindex
+ _outerlength
;
1816 _innerlength
= endnode
._outerstartindex
- _innerstartindex
;
1818 // update full length
1819 _outerlength
= (endnode
._outerstartindex
+ endnode
._outerlength
) - _outerstartindex
;
1823 internal string GetId()
1825 HtmlAttribute att
= Attributes
["id"];
1833 internal void SetId(string id
)
1835 HtmlAttribute att
= Attributes
["id"];
1838 att
= _ownerdocument
.CreateAttribute("id");
1841 _ownerdocument
.SetIdForNode(this, att
.Value
);
1842 _outerchanged
= true;
1845 internal void WriteAttribute(TextWriter outText
, HtmlAttribute att
)
1848 string quote
= att
.QuoteType
== AttributeValueQuote
.DoubleQuote
? "\"" : "'";
1849 if (_ownerdocument
.OptionOutputAsXml
)
1851 if (_ownerdocument
.OptionOutputUpperCase
)
1853 name
= att
.XmlName
.ToUpper();
1859 if (_ownerdocument
.OptionOutputOriginalCase
)
1860 name
= att
.OriginalName
;
1862 outText
.Write(" " + name
+ "=" + quote
+ HtmlDocument
.HtmlEncode(att
.XmlValue
) + quote
);
1866 if (_ownerdocument
.OptionOutputUpperCase
)
1868 name
= att
.Name
.ToUpper();
1875 if (att
.Name
.Length
>= 4)
1877 if ((att
.Name
[0] == '<') && (att
.Name
[1] == '%') &&
1878 (att
.Name
[att
.Name
.Length
- 1] == '>') && (att
.Name
[att
.Name
.Length
- 2] == '%'))
1880 outText
.Write(" " + name
);
1884 if (_ownerdocument
.OptionOutputOptimizeAttributeValues
)
1886 if (att
.Value
.IndexOfAny(new Char
[] {(char) 10, (char) 13, (char) 9, ' '}
) < 0)
1888 outText
.Write(" " + name
+ "=" + att
.Value
);
1892 outText
.Write(" " + name
+ "=" + quote
+ att
.Value
+ quote
);
1897 outText
.Write(" " + name
+ "=" + quote
+ att
.Value
+ quote
);
1902 internal void WriteAttributes(TextWriter outText
, bool closing
)
1904 if (_ownerdocument
.OptionOutputAsXml
)
1906 if (_attributes
== null)
1910 // we use Hashitems to make sure attributes are written only once
1911 foreach (HtmlAttribute att
in _attributes
.Hashitems
.Values
)
1913 WriteAttribute(outText
, att
);
1920 if (_attributes
!= null)
1922 foreach (HtmlAttribute att
in _attributes
)
1924 WriteAttribute(outText
, att
);
1927 if (_ownerdocument
.OptionAddDebuggingAttributes
)
1929 WriteAttribute(outText
, _ownerdocument
.CreateAttribute("_closed", Closed
.ToString()));
1930 WriteAttribute(outText
, _ownerdocument
.CreateAttribute("_children", ChildNodes
.Count
.ToString()));
1933 foreach (HtmlNode n
in ChildNodes
)
1935 WriteAttribute(outText
, _ownerdocument
.CreateAttribute("_child_" + i
,
1943 if (_endnode
== null)
1948 if (_endnode
._attributes
== null)
1953 if (_endnode
== this)
1958 foreach (HtmlAttribute att
in _endnode
._attributes
)
1960 WriteAttribute(outText
, att
);
1962 if (_ownerdocument
.OptionAddDebuggingAttributes
)
1964 WriteAttribute(outText
, _ownerdocument
.CreateAttribute("_closed", Closed
.ToString()));
1965 WriteAttribute(outText
, _ownerdocument
.CreateAttribute("_children", ChildNodes
.Count
.ToString()));
1972 #region Private Methods
1974 private string GetRelativeXpath()
1976 if (ParentNode
== null)
1978 if (NodeType
== HtmlNodeType
.Document
)
1979 return string.Empty
;
1982 foreach (HtmlNode node
in ParentNode
.ChildNodes
)
1984 if (node
.Name
!= Name
) continue;
1991 return Name
+ "[" + i
+ "]";