1 <?xml version=
"1.0" encoding=
"UTF-8"?>
4 <meta http-equiv=
"Content-Type" content=
"text/html; charset=UTF-8"/>
5 <title>HTMLparser: interface for an HTML
4.0 non-verifying parser
</title>
6 <meta name=
"generator" content=
"Libxml2 devhelp stylesheet"/>
7 <link rel=
"start" href=
"index.html" title=
"libxml2 Reference Manual"/>
8 <link rel=
"up" href=
"general.html" title=
"API"/>
9 <link rel=
"stylesheet" href=
"style.css" type=
"text/css"/>
10 <link rel=
"chapter" href=
"general.html" title=
"API"/>
12 <body bgcolor=
"white" text=
"black" link=
"#0000FF" vlink=
"#840084" alink=
"#0000FF">
13 <table class=
"navigation" width=
"100%" summary=
"Navigation header" cellpadding=
"2" cellspacing=
"2">
16 <a accesskey=
"p" href=
"libxml2-DOCBparser.html">
17 <img src=
"left.png" width=
"24" height=
"24" border=
"0" alt=
"Prev"/>
21 <a accesskey=
"u" href=
"general.html">
22 <img src=
"up.png" width=
"24" height=
"24" border=
"0" alt=
"Up"/>
26 <a accesskey=
"h" href=
"index.html">
27 <img src=
"home.png" width=
"24" height=
"24" border=
"0" alt=
"Home"/>
31 <a accesskey=
"n" href=
"libxml2-HTMLtree.html">
32 <img src=
"right.png" width=
"24" height=
"24" border=
"0" alt=
"Next"/>
35 <th width=
"100%" align=
"center">libxml2 Reference Manual
</th>
39 <span class=
"refentrytitle">HTMLparser
</span>
41 <p>HTMLparser - interface for an HTML
4.0 non-verifying parser
</p>
42 <p>this module implements an HTML
4.0 non-verifying parser with API compatible with the XML parser ones. It should be able to parse
"real world" HTML, even if severely broken from a specification point of view.
</p>
43 <p>Author(s): Daniel Veillard
</p>
44 <div class=
"refsynopsisdiv">
46 <pre class=
"synopsis">#define
<a href=
"#htmlDefaultSubelement">htmlDefaultSubelement
</a>(elt);
47 #define
<a href=
"#htmlElementAllowedHereDesc">htmlElementAllowedHereDesc
</a>(parent, elt);
48 #define
<a href=
"#htmlRequiredAttrs">htmlRequiredAttrs
</a>(elt);
49 typedef
<a href=
"libxml2-parser.html#xmlParserNodeInfo">xmlParserNodeInfo
</a> <a href=
"#htmlParserNodeInfo">htmlParserNodeInfo
</a>;
50 typedef
<a href=
"libxml2-tree.html#xmlParserInput">xmlParserInput
</a> <a href=
"#htmlParserInput">htmlParserInput
</a>;
51 typedef
<a href=
"libxml2-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr
</a> <a href=
"#htmlParserCtxtPtr">htmlParserCtxtPtr
</a>;
52 typedef struct _htmlEntityDesc
<a href=
"#htmlEntityDesc">htmlEntityDesc
</a>;
53 typedef
<a href=
"libxml2-tree.html#xmlDocPtr">xmlDocPtr
</a> <a href=
"#htmlDocPtr">htmlDocPtr
</a>;
54 typedef
<a href=
"libxml2-tree.html#xmlSAXHandlerPtr">xmlSAXHandlerPtr
</a> <a href=
"#htmlSAXHandlerPtr">htmlSAXHandlerPtr
</a>;
55 typedef enum
<a href=
"#htmlStatus">htmlStatus
</a>;
56 typedef
<a href=
"libxml2-tree.html#xmlNodePtr">xmlNodePtr
</a> <a href=
"#htmlNodePtr">htmlNodePtr
</a>;
57 typedef
<a href=
"libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> *
<a href=
"#htmlElemDescPtr">htmlElemDescPtr
</a>;
58 typedef struct _htmlElemDesc
<a href=
"#htmlElemDesc">htmlElemDesc
</a>;
59 typedef
<a href=
"libxml2-tree.html#xmlSAXHandler">xmlSAXHandler
</a> <a href=
"#htmlSAXHandler">htmlSAXHandler
</a>;
60 typedef
<a href=
"libxml2-tree.html#xmlParserInputPtr">xmlParserInputPtr
</a> <a href=
"#htmlParserInputPtr">htmlParserInputPtr
</a>;
61 typedef enum
<a href=
"#htmlParserOption">htmlParserOption
</a>;
62 typedef
<a href=
"libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc
</a> *
<a href=
"#htmlEntityDescPtr">htmlEntityDescPtr
</a>;
63 typedef
<a href=
"libxml2-tree.html#xmlParserCtxt">xmlParserCtxt
</a> <a href=
"#htmlParserCtxt">htmlParserCtxt
</a>;
64 int
<a href=
"#htmlIsScriptAttribute">htmlIsScriptAttribute
</a> (const
<a href=
"libxml2-xmlstring.html#xmlChar">xmlChar
</a> * name);
65 int
<a href=
"#htmlHandleOmittedElem">htmlHandleOmittedElem
</a> (int val);
66 <a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlReadFd">htmlReadFd
</a> (int fd,
<br/> const char * URL,
<br/> const char * encoding,
<br/> int options);
67 <a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlReadIO">htmlReadIO
</a> (
<a href=
"libxml2-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback
</a> ioread,
<br/> <a href=
"libxml2-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback
</a> ioclose,
<br/> void * ioctx,
<br/> const char * URL,
<br/> const char * encoding,
<br/> int options);
68 <a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlParseFile">htmlParseFile
</a> (const char * filename,
<br/> const char * encoding);
69 <a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlCtxtReadDoc">htmlCtxtReadDoc
</a> (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br/> const
<a href=
"libxml2-xmlstring.html#xmlChar">xmlChar
</a> * cur,
<br/> const char * URL,
<br/> const char * encoding,
<br/> int options);
70 int
<a href=
"#htmlAutoCloseTag">htmlAutoCloseTag
</a> (
<a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> doc,
<br/> const
<a href=
"libxml2-xmlstring.html#xmlChar">xmlChar
</a> * name,
<br/> <a href=
"libxml2-HTMLparser.html#htmlNodePtr">htmlNodePtr
</a> elem);
71 int
<a href=
"#htmlParseChunk">htmlParseChunk
</a> (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br/> const char * chunk,
<br/> int size,
<br/> int terminate);
72 const
<a href=
"libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> *
<a href=
"#htmlTagLookup">htmlTagLookup
</a> (const
<a href=
"libxml2-xmlstring.html#xmlChar">xmlChar
</a> * tag);
73 <a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> <a href=
"#htmlCreateMemoryParserCtxt">htmlCreateMemoryParserCtxt
</a> (const char * buffer,
<br/> int size);
74 void
<a href=
"#htmlCtxtReset">htmlCtxtReset
</a> (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt);
75 int
<a href=
"#htmlElementAllowedHere">htmlElementAllowedHere
</a> (const
<a href=
"libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * parent,
<br/> const
<a href=
"libxml2-xmlstring.html#xmlChar">xmlChar
</a> * elt);
76 <a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlCtxtReadIO">htmlCtxtReadIO
</a> (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br/> <a href=
"libxml2-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback
</a> ioread,
<br/> <a href=
"libxml2-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback
</a> ioclose,
<br/> void * ioctx,
<br/> const char * URL,
<br/> const char * encoding,
<br/> int options);
77 <a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> <a href=
"#htmlCreatePushParserCtxt">htmlCreatePushParserCtxt
</a> (
<a href=
"libxml2-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr
</a> sax,
<br/> void * user_data,
<br/> const char * chunk,
<br/> int size,
<br/> const char * filename,
<br/> <a href=
"libxml2-encoding.html#xmlCharEncoding">xmlCharEncoding
</a> enc);
78 <a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlReadMemory">htmlReadMemory
</a> (const char * buffer,
<br/> int size,
<br/> const char * URL,
<br/> const char * encoding,
<br/> int options);
79 int
<a href=
"#htmlIsAutoClosed">htmlIsAutoClosed
</a> (
<a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> doc,
<br/> <a href=
"libxml2-HTMLparser.html#htmlNodePtr">htmlNodePtr
</a> elem);
80 int
<a href=
"#htmlParseCharRef">htmlParseCharRef
</a> (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt);
81 <a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlReadDoc">htmlReadDoc
</a> (const
<a href=
"libxml2-xmlstring.html#xmlChar">xmlChar
</a> * cur,
<br/> const char * URL,
<br/> const char * encoding,
<br/> int options);
82 int
<a href=
"#htmlEncodeEntities">htmlEncodeEntities
</a> (unsigned char * out,
<br/> int * outlen,
<br/> const unsigned char * in,
<br/> int * inlen,
<br/> int quoteChar);
83 <a href=
"libxml2-HTMLparser.html#htmlStatus">htmlStatus
</a> <a href=
"#htmlNodeStatus">htmlNodeStatus
</a> (const
<a href=
"libxml2-HTMLparser.html#htmlNodePtr">htmlNodePtr
</a> node,
<br/> int legacy);
84 <a href=
"libxml2-HTMLparser.html#htmlStatus">htmlStatus
</a> <a href=
"#htmlAttrAllowed">htmlAttrAllowed
</a> (const
<a href=
"libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * elt,
<br/> const
<a href=
"libxml2-xmlstring.html#xmlChar">xmlChar
</a> * attr,
<br/> int legacy);
85 <a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlSAXParseFile">htmlSAXParseFile
</a> (const char * filename,
<br/> const char * encoding,
<br/> <a href=
"libxml2-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr
</a> sax,
<br/> void * userData);
86 const
<a href=
"libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc
</a> *
<a href=
"#htmlParseEntityRef">htmlParseEntityRef
</a> (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br/> const
<a href=
"libxml2-xmlstring.html#xmlChar">xmlChar
</a> ** str);
87 <a href=
"libxml2-HTMLparser.html#htmlStatus">htmlStatus
</a> <a href=
"#htmlElementStatusHere">htmlElementStatusHere
</a> (const
<a href=
"libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * parent,
<br/> const
<a href=
"libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * elt);
88 const
<a href=
"libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc
</a> *
<a href=
"#htmlEntityValueLookup">htmlEntityValueLookup
</a> (unsigned int value);
89 void
<a href=
"#htmlParseElement">htmlParseElement
</a> (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt);
90 int
<a href=
"#UTF8ToHtml">UTF8ToHtml
</a> (unsigned char * out,
<br/> int * outlen,
<br/> const unsigned char * in,
<br/> int * inlen);
91 const
<a href=
"libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc
</a> *
<a href=
"#htmlEntityLookup">htmlEntityLookup
</a> (const
<a href=
"libxml2-xmlstring.html#xmlChar">xmlChar
</a> * name);
92 void
<a href=
"#htmlFreeParserCtxt">htmlFreeParserCtxt
</a> (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt);
93 <a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlCtxtReadMemory">htmlCtxtReadMemory
</a> (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br/> const char * buffer,
<br/> int size,
<br/> const char * URL,
<br/> const char * encoding,
<br/> int options);
94 <a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlCtxtReadFd">htmlCtxtReadFd
</a> (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br/> int fd,
<br/> const char * URL,
<br/> const char * encoding,
<br/> int options);
95 <a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlReadFile">htmlReadFile
</a> (const char * filename,
<br/> const char * encoding,
<br/> int options);
96 <a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlCtxtReadFile">htmlCtxtReadFile
</a> (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br/> const char * filename,
<br/> const char * encoding,
<br/> int options);
97 int
<a href=
"#htmlParseDocument">htmlParseDocument
</a> (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt);
98 <a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> <a href=
"#htmlNewParserCtxt">htmlNewParserCtxt
</a> (void);
99 <a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlSAXParseDoc">htmlSAXParseDoc
</a> (
<a href=
"libxml2-xmlstring.html#xmlChar">xmlChar
</a> * cur,
<br/> const char * encoding,
<br/> <a href=
"libxml2-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr
</a> sax,
<br/> void * userData);
100 int
<a href=
"#htmlCtxtUseOptions">htmlCtxtUseOptions
</a> (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br/> int options);
101 <a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlParseDoc">htmlParseDoc
</a> (
<a href=
"libxml2-xmlstring.html#xmlChar">xmlChar
</a> * cur,
<br/> const char * encoding);
104 <div class=
"refsect1" lang=
"en">
107 <div class=
"refsect1" lang=
"en">
109 <div class=
"refsect2" lang=
"en">
110 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlDefaultSubelement">Macro
</a>htmlDefaultSubelement
</h3><pre class=
"programlisting">#define
<a href=
"#htmlDefaultSubelement">htmlDefaultSubelement
</a>(elt);
111 </pre><p>Returns the default subelement for this element
</p><div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>elt
</tt></i>:
</span></td><td>HTML element
</td></tr></tbody></table></div>
114 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlElementAllowedHereDesc">Macro
</a>htmlElementAllowedHereDesc
</h3><pre class=
"programlisting">#define
<a href=
"#htmlElementAllowedHereDesc">htmlElementAllowedHereDesc
</a>(parent, elt);
115 </pre><p>Checks whether an HTML element description may be a direct child of the specified element. Returns
1 if allowed;
0 otherwise.
</p><div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>parent
</tt></i>:
</span></td><td>HTML parent element
</td></tr><tr><td><span class=
"term"><i><tt>elt
</tt></i>:
</span></td><td>HTML element
</td></tr></tbody></table></div>
118 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlRequiredAttrs">Macro
</a>htmlRequiredAttrs
</h3><pre class=
"programlisting">#define
<a href=
"#htmlRequiredAttrs">htmlRequiredAttrs
</a>(elt);
119 </pre><p>Returns the attributes required for the specified element.
</p><div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>elt
</tt></i>:
</span></td><td>HTML element
</td></tr></tbody></table></div>
122 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlDocPtr">Typedef
</a>htmlDocPtr
</h3><pre class=
"programlisting"><a href=
"libxml2-tree.html#xmlDocPtr">xmlDocPtr
</a> htmlDocPtr;
126 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlElemDesc">Structure
</a>htmlElemDesc
</h3><pre class=
"programlisting">struct _htmlElemDesc {
127 const char * name : The tag name
128 char startTag : Whether the start tag can be implied
129 char endTag : Whether the end tag can be implied
130 char saveEndTag : Whether the end tag should be saved
131 char empty : Is this an empty element ?
132 char depr : Is this a deprecated element ?
133 char dtd :
1: only in Loose DTD,
2: only Frameset one
134 char isinline : is this a block
0 or inline
1 element
135 const char * desc : the description NRK Jan
.2003 * New fields encapsulating HTML structur
136 const char ** subelts : allowed sub-elements of this element
137 const char * defaultsubelt : subelement for suggested auto-repair if necessary or NULL
138 const char ** attrs_opt : Optional Attributes
139 const char ** attrs_depr : Additional deprecated attributes
140 const char ** attrs_req : Required attributes
145 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlElemDescPtr">Typedef
</a>htmlElemDescPtr
</h3><pre class=
"programlisting"><a href=
"libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * htmlElemDescPtr;
149 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlEntityDesc">Structure
</a>htmlEntityDesc
</h3><pre class=
"programlisting">struct _htmlEntityDesc {
150 unsigned int value : the UNICODE value for the character
151 const char * name : The entity name
152 const char * desc : the description
157 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlEntityDescPtr">Typedef
</a>htmlEntityDescPtr
</h3><pre class=
"programlisting"><a href=
"libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc
</a> * htmlEntityDescPtr;
161 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlNodePtr">Typedef
</a>htmlNodePtr
</h3><pre class=
"programlisting"><a href=
"libxml2-tree.html#xmlNodePtr">xmlNodePtr
</a> htmlNodePtr;
165 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlParserCtxt">Typedef
</a>htmlParserCtxt
</h3><pre class=
"programlisting"><a href=
"libxml2-tree.html#xmlParserCtxt">xmlParserCtxt
</a> htmlParserCtxt;
169 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlParserCtxtPtr">Typedef
</a>htmlParserCtxtPtr
</h3><pre class=
"programlisting"><a href=
"libxml2-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr
</a> htmlParserCtxtPtr;
173 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlParserInput">Typedef
</a>htmlParserInput
</h3><pre class=
"programlisting"><a href=
"libxml2-tree.html#xmlParserInput">xmlParserInput
</a> htmlParserInput;
177 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlParserInputPtr">Typedef
</a>htmlParserInputPtr
</h3><pre class=
"programlisting"><a href=
"libxml2-tree.html#xmlParserInputPtr">xmlParserInputPtr
</a> htmlParserInputPtr;
181 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlParserNodeInfo">Typedef
</a>htmlParserNodeInfo
</h3><pre class=
"programlisting"><a href=
"libxml2-parser.html#xmlParserNodeInfo">xmlParserNodeInfo
</a> htmlParserNodeInfo;
185 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlParserOption">Enum
</a>htmlParserOption
</h3><pre class=
"programlisting">enum
<a href=
"#htmlParserOption">htmlParserOption
</a> {
186 <a name=
"HTML_PARSE_RECOVER">HTML_PARSE_RECOVER
</a> =
1 /* Relaxed parsing */
187 <a name=
"HTML_PARSE_NODEFDTD">HTML_PARSE_NODEFDTD
</a> =
4 /* do not default a doctype if not found */
188 <a name=
"HTML_PARSE_NOERROR">HTML_PARSE_NOERROR
</a> =
32 /* suppress error reports */
189 <a name=
"HTML_PARSE_NOWARNING">HTML_PARSE_NOWARNING
</a> =
64 /* suppress warning reports */
190 <a name=
"HTML_PARSE_PEDANTIC">HTML_PARSE_PEDANTIC
</a> =
128 /* pedantic error reporting */
191 <a name=
"HTML_PARSE_NOBLANKS">HTML_PARSE_NOBLANKS
</a> =
256 /* remove blank nodes */
192 <a name=
"HTML_PARSE_NONET">HTML_PARSE_NONET
</a> =
2048 /* Forbid network access */
193 <a name=
"HTML_PARSE_NOIMPLIED">HTML_PARSE_NOIMPLIED
</a> =
8192 /* Do not add implied html/body... elements */
194 <a name=
"HTML_PARSE_COMPACT">HTML_PARSE_COMPACT
</a> =
65536 /* compact small text nodes */
195 <a name=
"HTML_PARSE_IGNORE_ENC">HTML_PARSE_IGNORE_ENC
</a> =
2097152 /* ignore internal document encoding hint */
200 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlSAXHandler">Typedef
</a>htmlSAXHandler
</h3><pre class=
"programlisting"><a href=
"libxml2-tree.html#xmlSAXHandler">xmlSAXHandler
</a> htmlSAXHandler;
204 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlSAXHandlerPtr">Typedef
</a>htmlSAXHandlerPtr
</h3><pre class=
"programlisting"><a href=
"libxml2-tree.html#xmlSAXHandlerPtr">xmlSAXHandlerPtr
</a> htmlSAXHandlerPtr;
208 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlStatus">Enum
</a>htmlStatus
</h3><pre class=
"programlisting">enum
<a href=
"#htmlStatus">htmlStatus
</a> {
209 <a name=
"HTML_NA">HTML_NA
</a> =
0 /* something we don't check at all */
210 <a name=
"HTML_INVALID">HTML_INVALID
</a> =
1
211 <a name=
"HTML_DEPRECATED">HTML_DEPRECATED
</a> =
2
212 <a name=
"HTML_VALID">HTML_VALID
</a> =
4
213 <a name=
"HTML_REQUIRED">HTML_REQUIRED
</a> =
12 /* VALID bit set so (
& HTML_VALID ) is TRUE */
218 <div class=
"refsect2" lang=
"en"><h3><a name=
"UTF8ToHtml"/>UTF8ToHtml ()
</h3><pre class=
"programlisting">int UTF8ToHtml (unsigned char * out,
<br/> int * outlen,
<br/> const unsigned char * in,
<br/> int * inlen)
<br/>
219 </pre><p>Take a block of UTF-
8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.
</p>
220 <div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>out
</tt></i>:
</span></td><td>a pointer to an array of bytes to store the result
</td></tr><tr><td><span class=
"term"><i><tt>outlen
</tt></i>:
</span></td><td>the length of @out
</td></tr><tr><td><span class=
"term"><i><tt>in
</tt></i>:
</span></td><td>a pointer to an array of UTF-
8 chars
</td></tr><tr><td><span class=
"term"><i><tt>inlen
</tt></i>:
</span></td><td>the length of @in
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>0 if success, -
2 if the transcoding fails, or -
1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed.
</td></tr></tbody></table></div></div>
222 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlAttrAllowed"/>htmlAttrAllowed ()
</h3><pre class=
"programlisting"><a href=
"libxml2-HTMLparser.html#htmlStatus">htmlStatus
</a> htmlAttrAllowed (const
<a href=
"libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * elt,
<br/> const
<a href=
"libxml2-xmlstring.html#xmlChar">xmlChar
</a> * attr,
<br/> int legacy)
<br/>
223 </pre><p>Checks whether an
<a href=
"libxml2-SAX.html#attribute">attribute
</a> is valid for an element Has full knowledge of Required and Deprecated attributes
</p>
224 <div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>elt
</tt></i>:
</span></td><td>HTML element
</td></tr><tr><td><span class=
"term"><i><tt>attr
</tt></i>:
</span></td><td>HTML
<a href=
"libxml2-SAX.html#attribute">attribute
</a></td></tr><tr><td><span class=
"term"><i><tt>legacy
</tt></i>:
</span></td><td>whether to allow deprecated attributes
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED,
<a href=
"libxml2-HTMLparser.html#HTML_INVALID">HTML_INVALID
</a></td></tr></tbody></table></div></div>
226 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlAutoCloseTag"/>htmlAutoCloseTag ()
</h3><pre class=
"programlisting">int htmlAutoCloseTag (
<a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> doc,
<br/> const
<a href=
"libxml2-xmlstring.html#xmlChar">xmlChar
</a> * name,
<br/> <a href=
"libxml2-HTMLparser.html#htmlNodePtr">htmlNodePtr
</a> elem)
<br/>
227 </pre><p>The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if the element or one of it's children would autoclose the given tag.
</p>
228 <div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>doc
</tt></i>:
</span></td><td>the HTML document
</td></tr><tr><td><span class=
"term"><i><tt>name
</tt></i>:
</span></td><td>The tag name
</td></tr><tr><td><span class=
"term"><i><tt>elem
</tt></i>:
</span></td><td>the HTML element
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>1 if autoclose,
0 otherwise
</td></tr></tbody></table></div></div>
230 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlCreateMemoryParserCtxt"/>htmlCreateMemoryParserCtxt ()
</h3><pre class=
"programlisting"><a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> htmlCreateMemoryParserCtxt (const char * buffer,
<br/> int size)
<br/>
231 </pre><p>Create a parser context for an HTML in-memory document.
</p>
232 <div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>buffer
</tt></i>:
</span></td><td>a pointer to a char array
</td></tr><tr><td><span class=
"term"><i><tt>size
</tt></i>:
</span></td><td>the size of the array
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>the new parser context or NULL
</td></tr></tbody></table></div></div>
234 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlCreatePushParserCtxt"/>htmlCreatePushParserCtxt ()
</h3><pre class=
"programlisting"><a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> htmlCreatePushParserCtxt (
<a href=
"libxml2-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr
</a> sax,
<br/> void * user_data,
<br/> const char * chunk,
<br/> int size,
<br/> const char * filename,
<br/> <a href=
"libxml2-encoding.html#xmlCharEncoding">xmlCharEncoding
</a> enc)
<br/>
235 </pre><p>Create a parser context for using the HTML parser in push mode The value of @filename is used for fetching external entities and error/warning reports.
</p>
236 <div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>sax
</tt></i>:
</span></td><td>a SAX handler
</td></tr><tr><td><span class=
"term"><i><tt>user_data
</tt></i>:
</span></td><td>The user data returned on SAX callbacks
</td></tr><tr><td><span class=
"term"><i><tt>chunk
</tt></i>:
</span></td><td>a pointer to an array of chars
</td></tr><tr><td><span class=
"term"><i><tt>size
</tt></i>:
</span></td><td>number of chars in the array
</td></tr><tr><td><span class=
"term"><i><tt>filename
</tt></i>:
</span></td><td>an optional file name or URI
</td></tr><tr><td><span class=
"term"><i><tt>enc
</tt></i>:
</span></td><td>an optional encoding
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>the new parser context or NULL
</td></tr></tbody></table></div></div>
238 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlCtxtReadDoc"/>htmlCtxtReadDoc ()
</h3><pre class=
"programlisting"><a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> htmlCtxtReadDoc (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br/> const
<a href=
"libxml2-xmlstring.html#xmlChar">xmlChar
</a> * cur,
<br/> const char * URL,
<br/> const char * encoding,
<br/> int options)
<br/>
239 </pre><p>parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context
</p>
240 <div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>ctxt
</tt></i>:
</span></td><td>an HTML parser context
</td></tr><tr><td><span class=
"term"><i><tt>cur
</tt></i>:
</span></td><td>a pointer to a zero terminated string
</td></tr><tr><td><span class=
"term"><i><tt>URL
</tt></i>:
</span></td><td>the base URL to use for the document
</td></tr><tr><td><span class=
"term"><i><tt>encoding
</tt></i>:
</span></td><td>the document encoding, or NULL
</td></tr><tr><td><span class=
"term"><i><tt>options
</tt></i>:
</span></td><td>a combination of htmlParserOption(s)
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>the resulting document tree
</td></tr></tbody></table></div></div>
242 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlCtxtReadFd"/>htmlCtxtReadFd ()
</h3><pre class=
"programlisting"><a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> htmlCtxtReadFd (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br/> int fd,
<br/> const char * URL,
<br/> const char * encoding,
<br/> int options)
<br/>
243 </pre><p>parse an XML from a file descriptor and build a tree. This reuses the existing @ctxt parser context
</p>
244 <div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>ctxt
</tt></i>:
</span></td><td>an HTML parser context
</td></tr><tr><td><span class=
"term"><i><tt>fd
</tt></i>:
</span></td><td>an open file descriptor
</td></tr><tr><td><span class=
"term"><i><tt>URL
</tt></i>:
</span></td><td>the base URL to use for the document
</td></tr><tr><td><span class=
"term"><i><tt>encoding
</tt></i>:
</span></td><td>the document encoding, or NULL
</td></tr><tr><td><span class=
"term"><i><tt>options
</tt></i>:
</span></td><td>a combination of htmlParserOption(s)
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>the resulting document tree
</td></tr></tbody></table></div></div>
246 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlCtxtReadFile"/>htmlCtxtReadFile ()
</h3><pre class=
"programlisting"><a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> htmlCtxtReadFile (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br/> const char * filename,
<br/> const char * encoding,
<br/> int options)
<br/>
247 </pre><p>parse an XML file from the filesystem or the network. This reuses the existing @ctxt parser context
</p>
248 <div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>ctxt
</tt></i>:
</span></td><td>an HTML parser context
</td></tr><tr><td><span class=
"term"><i><tt>filename
</tt></i>:
</span></td><td>a file or URL
</td></tr><tr><td><span class=
"term"><i><tt>encoding
</tt></i>:
</span></td><td>the document encoding, or NULL
</td></tr><tr><td><span class=
"term"><i><tt>options
</tt></i>:
</span></td><td>a combination of htmlParserOption(s)
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>the resulting document tree
</td></tr></tbody></table></div></div>
250 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlCtxtReadIO"/>htmlCtxtReadIO ()
</h3><pre class=
"programlisting"><a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> htmlCtxtReadIO (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br/> <a href=
"libxml2-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback
</a> ioread,
<br/> <a href=
"libxml2-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback
</a> ioclose,
<br/> void * ioctx,
<br/> const char * URL,
<br/> const char * encoding,
<br/> int options)
<br/>
251 </pre><p>parse an HTML document from I/O functions and source and build a tree. This reuses the existing @ctxt parser context
</p>
252 <div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>ctxt
</tt></i>:
</span></td><td>an HTML parser context
</td></tr><tr><td><span class=
"term"><i><tt>ioread
</tt></i>:
</span></td><td>an I/O read function
</td></tr><tr><td><span class=
"term"><i><tt>ioclose
</tt></i>:
</span></td><td>an I/O close function
</td></tr><tr><td><span class=
"term"><i><tt>ioctx
</tt></i>:
</span></td><td>an I/O handler
</td></tr><tr><td><span class=
"term"><i><tt>URL
</tt></i>:
</span></td><td>the base URL to use for the document
</td></tr><tr><td><span class=
"term"><i><tt>encoding
</tt></i>:
</span></td><td>the document encoding, or NULL
</td></tr><tr><td><span class=
"term"><i><tt>options
</tt></i>:
</span></td><td>a combination of htmlParserOption(s)
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>the resulting document tree
</td></tr></tbody></table></div></div>
254 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlCtxtReadMemory"/>htmlCtxtReadMemory ()
</h3><pre class=
"programlisting"><a href=
"libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> htmlCtxtReadMemory (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br/> const char * buffer,
<br/> int size,
<br/> const char * URL,
<br/> const char * encoding,
<br/> int options)
<br/>
255 </pre><p>parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context
</p>
256 <div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>ctxt
</tt></i>:
</span></td><td>an HTML parser context
</td></tr><tr><td><span class=
"term"><i><tt>buffer
</tt></i>:
</span></td><td>a pointer to a char array
</td></tr><tr><td><span class=
"term"><i><tt>size
</tt></i>:
</span></td><td>the size of the array
</td></tr><tr><td><span class=
"term"><i><tt>URL
</tt></i>:
</span></td><td>the base URL to use for the document
</td></tr><tr><td><span class=
"term"><i><tt>encoding
</tt></i>:
</span></td><td>the document encoding, or NULL
</td></tr><tr><td><span class=
"term"><i><tt>options
</tt></i>:
</span></td><td>a combination of htmlParserOption(s)
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>the resulting document tree
</td></tr></tbody></table></div></div>
258 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlCtxtReset"/>htmlCtxtReset ()
</h3><pre class=
"programlisting">void htmlCtxtReset (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt)
<br/>
259 </pre><p>Reset a parser context
</p>
260 <div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>ctxt
</tt></i>:
</span></td><td>an HTML parser context
</td></tr></tbody></table></div></div>
262 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlCtxtUseOptions"/>htmlCtxtUseOptions ()
</h3><pre class=
"programlisting">int htmlCtxtUseOptions (
<a href=
"libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br/> int options)
<br/>
263 </pre><p>Applies the options to the parser context
</p>
264 <div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>ctxt
</tt></i>:
</span></td><td>an HTML parser context
</td></tr><tr><td><span class=
"term"><i><tt>options
</tt></i>:
</span></td><td>a combination of htmlParserOption(s)
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>0 in case of success, the set of unknown or unimplemented options in case of error.
</td></tr></tbody></table></div></div>
266 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlElementAllowedHere"/>htmlElementAllowedHere ()
</h3><pre class=
"programlisting">int htmlElementAllowedHere (const
<a href=
"libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * parent,
<br/> const
<a href=
"libxml2-xmlstring.html#xmlChar">xmlChar
</a> * elt)
<br/>
267 </pre><p>Checks whether an HTML element may be a direct child of a parent element. Note - doesn't check for deprecated elements
</p>
268 <div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>parent
</tt></i>:
</span></td><td>HTML parent element
</td></tr><tr><td><span class=
"term"><i><tt>elt
</tt></i>:
</span></td><td>HTML element
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>1 if allowed;
0 otherwise.
</td></tr></tbody></table></div></div>
270 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlElementStatusHere"/>htmlElementStatusHere ()
</h3><pre class=
"programlisting"><a href=
"libxml2-HTMLparser.html#htmlStatus">htmlStatus
</a> htmlElementStatusHere (const
<a href=
"libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * parent,
<br/> const
<a href=
"libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * elt)
<br/>
271 </pre><p>Checks whether an HTML element may be a direct child of a parent element. and if so whether it is valid or deprecated.
</p>
272 <div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>parent
</tt></i>:
</span></td><td>HTML parent element
</td></tr><tr><td><span class=
"term"><i><tt>elt
</tt></i>:
</span></td><td>HTML element
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>one of HTML_VALID, HTML_DEPRECATED,
<a href=
"libxml2-HTMLparser.html#HTML_INVALID">HTML_INVALID
</a></td></tr></tbody></table></div></div>
274 <div class=
"refsect2" lang=
"en"><h3><a name=
"htmlEncodeEntities"/>htmlEncodeEntities ()
</h3><pre class=
"programlisting">int htmlEncodeEntities (unsigned char * out,
<br/> int * outlen,
<br/> const unsigned char * in,
<br/> int * inlen,
<br/> int quoteChar)
<br/>
275 </pre><p>Take a block of UTF-
8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.
</p>
276 <div class=
"variablelist"><table border=
"0"><col align=
"left"/><tbody><tr><td><span class=
"term"><i><tt>out
</tt></i>:
</span></td><td>a pointer to an array of bytes to store the result
</td></tr><tr><td><span class=
"term"><i><tt>outlen
</tt></i>:
</span></td><td>the length of @out
</td></tr><tr><td><span class=
"term"><i><tt>in
</tt></i>:
</span></td><td>a pointer to an array of UTF-
8 chars
</td></tr><tr><td><span class=
"term"><i><tt>inlen
</tt></i>:
</span></td><td>the length of @in
</td></tr><tr><td><span class=
"term"><i><tt>quoteChar
</tt></i>:
</span></td><td>the quote character to escape (' or
") or zero.</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed.</td></tr></tbody></table></div></div>
278 <div class="refsect2
" lang="en
"><h3><a name="htmlEntityLookup
"/>htmlEntityLookup ()</h3><pre class="programlisting
">const <a href="libxml2-HTMLparser.html#htmlEntityDesc
">htmlEntityDesc</a> * htmlEntityLookup (const <a href="libxml2-xmlstring.html#xmlChar
">xmlChar</a> * name)<br/>
279 </pre><p>Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.</p>
280 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>name</tt></i>:</span></td><td>the entity name</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the associated <a href="libxml2-HTMLparser.html#htmlEntityDescPtr
">htmlEntityDescPtr</a> if found, NULL otherwise.</td></tr></tbody></table></div></div>
282 <div class="refsect2
" lang="en
"><h3><a name="htmlEntityValueLookup
"/>htmlEntityValueLookup ()</h3><pre class="programlisting
">const <a href="libxml2-HTMLparser.html#htmlEntityDesc
">htmlEntityDesc</a> * htmlEntityValueLookup (unsigned int value)<br/>
283 </pre><p>Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.</p>
284 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>value</tt></i>:</span></td><td>the entity's unicode value</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the associated <a href="libxml2-HTMLparser.html#htmlEntityDescPtr
">htmlEntityDescPtr</a> if found, NULL otherwise.</td></tr></tbody></table></div></div>
286 <div class="refsect2
" lang="en
"><h3><a name="htmlFreeParserCtxt
"/>htmlFreeParserCtxt ()</h3><pre class="programlisting
">void htmlFreeParserCtxt (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr
">htmlParserCtxtPtr</a> ctxt)<br/>
287 </pre><p>Free all the memory used by a parser context. However the parsed document in ctxt->myDoc is not freed.</p>
288 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr></tbody></table></div></div>
290 <div class="refsect2
" lang="en
"><h3><a name="htmlHandleOmittedElem
"/>htmlHandleOmittedElem ()</h3><pre class="programlisting
">int htmlHandleOmittedElem (int val)<br/>
291 </pre><p>Set and return the previous value for handling HTML omitted tags.</p>
292 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>val</tt></i>:</span></td><td>int 0 or 1</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the last value for 0 for no handling, 1 for auto insertion.</td></tr></tbody></table></div></div>
294 <div class="refsect2
" lang="en
"><h3><a name="htmlIsAutoClosed
"/>htmlIsAutoClosed ()</h3><pre class="programlisting
">int htmlIsAutoClosed (<a href="libxml2-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> doc, <br/> <a href="libxml2-HTMLparser.html#htmlNodePtr
">htmlNodePtr</a> elem)<br/>
295 </pre><p>The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if a tag is autoclosed by one of it's child</p>
296 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>doc</tt></i>:</span></td><td>the HTML document</td></tr><tr><td><span class="term
"><i><tt>elem</tt></i>:</span></td><td>the HTML element</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>1 if autoclosed, 0 otherwise</td></tr></tbody></table></div></div>
298 <div class="refsect2
" lang="en
"><h3><a name="htmlIsScriptAttribute
"/>htmlIsScriptAttribute ()</h3><pre class="programlisting
">int htmlIsScriptAttribute (const <a href="libxml2-xmlstring.html#xmlChar
">xmlChar</a> * name)<br/>
299 </pre><p>Check if an <a href="libxml2-SAX.html#attribute
">attribute</a> is of content type Script</p>
300 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>name</tt></i>:</span></td><td>an <a href="libxml2-SAX.html#attribute
">attribute</a> name</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>1 is the <a href="libxml2-SAX.html#attribute
">attribute</a> is a script 0 otherwise</td></tr></tbody></table></div></div>
302 <div class="refsect2
" lang="en
"><h3><a name="htmlNewParserCtxt
"/>htmlNewParserCtxt ()</h3><pre class="programlisting
"><a href="libxml2-HTMLparser.html#htmlParserCtxtPtr
">htmlParserCtxtPtr</a> htmlNewParserCtxt (void)<br/>
303 </pre><p>Allocate and initialize a new parser context.</p>
304 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the <a href="libxml2-HTMLparser.html#htmlParserCtxtPtr
">htmlParserCtxtPtr</a> or NULL in case of allocation error</td></tr></tbody></table></div></div>
306 <div class="refsect2
" lang="en
"><h3><a name="htmlNodeStatus
"/>htmlNodeStatus ()</h3><pre class="programlisting
"><a href="libxml2-HTMLparser.html#htmlStatus
">htmlStatus</a> htmlNodeStatus (const <a href="libxml2-HTMLparser.html#htmlNodePtr
">htmlNodePtr</a> node, <br/> int legacy)<br/>
307 </pre><p>Checks whether the tree node is valid. Experimental (the author only uses the HTML enhancements in a SAX parser)</p>
308 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>node</tt></i>:</span></td><td>an <a href="libxml2-HTMLparser.html#htmlNodePtr
">htmlNodePtr</a> in a tree</td></tr><tr><td><span class="term
"><i><tt>legacy</tt></i>:</span></td><td>whether to allow deprecated elements (YES is faster here for Element nodes)</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>for Element nodes, a return from <a href="libxml2-HTMLparser.html#htmlElementAllowedHere
">htmlElementAllowedHere</a> (if legacy allowed) or <a href="libxml2-HTMLparser.html#htmlElementStatusHere
">htmlElementStatusHere</a> (otherwise). for Attribute nodes, a return from <a href="libxml2-HTMLparser.html#htmlAttrAllowed
">htmlAttrAllowed</a> for other nodes, <a href="libxml2-HTMLparser.html#HTML_NA
">HTML_NA</a> (no checks performed)</td></tr></tbody></table></div></div>
310 <div class="refsect2
" lang="en
"><h3><a name="htmlParseCharRef
"/>htmlParseCharRef ()</h3><pre class="programlisting
">int htmlParseCharRef (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr
">htmlParserCtxtPtr</a> ctxt)<br/>
311 </pre><p>parse Reference declarations [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'</p>
312 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the value parsed (as an int)</td></tr></tbody></table></div></div>
314 <div class="refsect2
" lang="en
"><h3><a name="htmlParseChunk
"/>htmlParseChunk ()</h3><pre class="programlisting
">int htmlParseChunk (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr
">htmlParserCtxtPtr</a> ctxt, <br/> const char * chunk, <br/> int size, <br/> int terminate)<br/>
315 </pre><p>Parse a Chunk of memory</p>
316 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term
"><i><tt>chunk</tt></i>:</span></td><td>an char array</td></tr><tr><td><span class="term
"><i><tt>size</tt></i>:</span></td><td>the size in byte of the chunk</td></tr><tr><td><span class="term
"><i><tt>terminate</tt></i>:</span></td><td>last chunk indicator</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>zero if no error, the <a href="libxml2-xmlerror.html#xmlParserErrors
">xmlParserErrors</a> otherwise.</td></tr></tbody></table></div></div>
318 <div class="refsect2
" lang="en
"><h3><a name="htmlParseDoc
"/>htmlParseDoc ()</h3><pre class="programlisting
"><a href="libxml2-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlParseDoc (<a href="libxml2-xmlstring.html#xmlChar
">xmlChar</a> * cur, <br/> const char * encoding)<br/>
319 </pre><p>parse an HTML in-memory document and build a tree.</p>
320 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>cur</tt></i>:</span></td><td>a pointer to an array of <a href="libxml2-xmlstring.html#xmlChar
">xmlChar</a></td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>a free form C string describing the HTML document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
322 <div class="refsect2
" lang="en
"><h3><a name="htmlParseDocument
"/>htmlParseDocument ()</h3><pre class="programlisting
">int htmlParseDocument (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr
">htmlParserCtxtPtr</a> ctxt)<br/>
323 </pre><p>parse an HTML document (and build a tree if using the standard SAX interface).</p>
324 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>0, -1 in case of error. the parser context is augmented as a result of the parsing.</td></tr></tbody></table></div></div>
326 <div class="refsect2
" lang="en
"><h3><a name="htmlParseElement
"/>htmlParseElement ()</h3><pre class="programlisting
">void htmlParseElement (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr
">htmlParserCtxtPtr</a> ctxt)<br/>
327 </pre><p>parse an HTML element, this is highly recursive this is kept for compatibility with previous code versions [39] element ::= EmptyElemTag | STag content ETag [41] Attribute ::= Name Eq AttValue</p>
328 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr></tbody></table></div></div>
330 <div class="refsect2
" lang="en
"><h3><a name="htmlParseEntityRef
"/>htmlParseEntityRef ()</h3><pre class="programlisting
">const <a href="libxml2-HTMLparser.html#htmlEntityDesc
">htmlEntityDesc</a> * htmlParseEntityRef (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr
">htmlParserCtxtPtr</a> ctxt, <br/> const <a href="libxml2-xmlstring.html#xmlChar
">xmlChar</a> ** str)<br/>
331 </pre><p>parse an HTML ENTITY references [68] EntityRef ::= '&' Name ';'</p>
332 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term
"><i><tt>str</tt></i>:</span></td><td>location to store the entity name</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the associated <a href="libxml2-HTMLparser.html#htmlEntityDescPtr
">htmlEntityDescPtr</a> if found, or NULL otherwise, if non-NULL *str will have to be freed by the caller.</td></tr></tbody></table></div></div>
334 <div class="refsect2
" lang="en
"><h3><a name="htmlParseFile
"/>htmlParseFile ()</h3><pre class="programlisting
"><a href="libxml2-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlParseFile (const char * filename, <br/> const char * encoding)<br/>
335 </pre><p>parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.</p>
336 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>filename</tt></i>:</span></td><td>the filename</td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>a free form C string describing the HTML document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
338 <div class="refsect2
" lang="en
"><h3><a name="htmlReadDoc
"/>htmlReadDoc ()</h3><pre class="programlisting
"><a href="libxml2-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlReadDoc (const <a href="libxml2-xmlstring.html#xmlChar
">xmlChar</a> * cur, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
339 </pre><p>parse an XML in-memory document and build a tree.</p>
340 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>cur</tt></i>:</span></td><td>a pointer to a zero terminated string</td></tr><tr><td><span class="term
"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
342 <div class="refsect2
" lang="en
"><h3><a name="htmlReadFd
"/>htmlReadFd ()</h3><pre class="programlisting
"><a href="libxml2-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlReadFd (int fd, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
343 </pre><p>parse an XML from a file descriptor and build a tree.</p>
344 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>fd</tt></i>:</span></td><td>an open file descriptor</td></tr><tr><td><span class="term
"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
346 <div class="refsect2
" lang="en
"><h3><a name="htmlReadFile
"/>htmlReadFile ()</h3><pre class="programlisting
"><a href="libxml2-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlReadFile (const char * filename, <br/> const char * encoding, <br/> int options)<br/>
347 </pre><p>parse an XML file from the filesystem or the network.</p>
348 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>filename</tt></i>:</span></td><td>a file or URL</td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
350 <div class="refsect2
" lang="en
"><h3><a name="htmlReadIO
"/>htmlReadIO ()</h3><pre class="programlisting
"><a href="libxml2-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlReadIO (<a href="libxml2-xmlIO.html#xmlInputReadCallback
">xmlInputReadCallback</a> ioread, <br/> <a href="libxml2-xmlIO.html#xmlInputCloseCallback
">xmlInputCloseCallback</a> ioclose, <br/> void * ioctx, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
351 </pre><p>parse an HTML document from I/O functions and source and build a tree.</p>
352 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>ioread</tt></i>:</span></td><td>an I/O read function</td></tr><tr><td><span class="term
"><i><tt>ioclose</tt></i>:</span></td><td>an I/O close function</td></tr><tr><td><span class="term
"><i><tt>ioctx</tt></i>:</span></td><td>an I/O handler</td></tr><tr><td><span class="term
"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
354 <div class="refsect2
" lang="en
"><h3><a name="htmlReadMemory
"/>htmlReadMemory ()</h3><pre class="programlisting
"><a href="libxml2-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlReadMemory (const char * buffer, <br/> int size, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
355 </pre><p>parse an XML in-memory document and build a tree.</p>
356 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>buffer</tt></i>:</span></td><td>a pointer to a char array</td></tr><tr><td><span class="term
"><i><tt>size</tt></i>:</span></td><td>the size of the array</td></tr><tr><td><span class="term
"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
358 <div class="refsect2
" lang="en
"><h3><a name="htmlSAXParseDoc
"/>htmlSAXParseDoc ()</h3><pre class="programlisting
"><a href="libxml2-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlSAXParseDoc (<a href="libxml2-xmlstring.html#xmlChar
">xmlChar</a> * cur, <br/> const char * encoding, <br/> <a href="libxml2-HTMLparser.html#htmlSAXHandlerPtr
">htmlSAXHandlerPtr</a> sax, <br/> void * userData)<br/>
359 </pre><p>Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks to handle parse events. If sax is NULL, fallback to the default DOM behavior and return a tree.</p>
360 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>cur</tt></i>:</span></td><td>a pointer to an array of <a href="libxml2-xmlstring.html#xmlChar
">xmlChar</a></td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>a free form C string describing the HTML document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>sax</tt></i>:</span></td><td>the SAX handler block</td></tr><tr><td><span class="term
"><i><tt>userData</tt></i>:</span></td><td>if using SAX, this pointer will be provided on callbacks.</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree unless SAX is NULL or the document is not well formed.</td></tr></tbody></table></div></div>
362 <div class="refsect2
" lang="en
"><h3><a name="htmlSAXParseFile
"/>htmlSAXParseFile ()</h3><pre class="programlisting
"><a href="libxml2-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlSAXParseFile (const char * filename, <br/> const char * encoding, <br/> <a href="libxml2-HTMLparser.html#htmlSAXHandlerPtr
">htmlSAXHandlerPtr</a> sax, <br/> void * userData)<br/>
363 </pre><p>parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.</p>
364 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>filename</tt></i>:</span></td><td>the filename</td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>a free form C string describing the HTML document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>sax</tt></i>:</span></td><td>the SAX handler block</td></tr><tr><td><span class="term
"><i><tt>userData</tt></i>:</span></td><td>if using SAX, this pointer will be provided on callbacks.</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree unless SAX is NULL or the document is not well formed.</td></tr></tbody></table></div></div>
366 <div class="refsect2
" lang="en
"><h3><a name="htmlTagLookup
"/>htmlTagLookup ()</h3><pre class="programlisting
">const <a href="libxml2-HTMLparser.html#htmlElemDesc
">htmlElemDesc</a> * htmlTagLookup (const <a href="libxml2-xmlstring.html#xmlChar
">xmlChar</a> * tag)<br/>
367 </pre><p>Lookup the HTML tag in the ElementTable</p>
368 <div class="variablelist
"><table border="0"><col align="left
"/><tbody><tr><td><span class="term
"><i><tt>tag</tt></i>:</span></td><td>The tag name in lowercase</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the related <a href="libxml2-HTMLparser.html#htmlElemDescPtr
">htmlElemDescPtr</a> or NULL if not found.</td></tr></tbody></table></div></div>