2 * HTMLtree.c : implementation of access function for an HTML tree.
4 * See Copyright for the status of this software.
12 #ifdef LIBXML_HTML_ENABLED
14 #include <string.h> /* for memset() only ! */
18 #include <libxml/xmlmemory.h>
19 #include <libxml/HTMLparser.h>
20 #include <libxml/HTMLtree.h>
21 #include <libxml/entities.h>
22 #include <libxml/valid.h>
23 #include <libxml/xmlerror.h>
24 #include <libxml/parserInternals.h>
25 #include <libxml/globals.h>
26 #include <libxml/uri.h>
30 /************************************************************************
32 * Getting/Setting encoding meta tags *
34 ************************************************************************/
37 * htmlGetMetaEncoding:
40 * Encoding definition lookup in the Meta tags
42 * Returns the current encoding as flagged in the HTML source
45 htmlGetMetaEncoding(htmlDocPtr doc
) {
47 const xmlChar
*content
;
48 const xmlChar
*encoding
;
58 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
59 if (xmlStrEqual(cur
->name
, BAD_CAST
"html"))
61 if (xmlStrEqual(cur
->name
, BAD_CAST
"head"))
63 if (xmlStrEqual(cur
->name
, BAD_CAST
"meta"))
76 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
77 if (xmlStrEqual(cur
->name
, BAD_CAST
"head"))
79 if (xmlStrEqual(cur
->name
, BAD_CAST
"meta"))
90 * Search the meta elements
94 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
95 if (xmlStrEqual(cur
->name
, BAD_CAST
"meta")) {
96 xmlAttrPtr attr
= cur
->properties
;
102 while (attr
!= NULL
) {
103 if ((attr
->children
!= NULL
) &&
104 (attr
->children
->type
== XML_TEXT_NODE
) &&
105 (attr
->children
->next
== NULL
)) {
106 value
= attr
->children
->content
;
107 if ((!xmlStrcasecmp(attr
->name
, BAD_CAST
"http-equiv"))
108 && (!xmlStrcasecmp(value
, BAD_CAST
"Content-Type")))
110 else if ((value
!= NULL
)
111 && (!xmlStrcasecmp(attr
->name
, BAD_CAST
"content")))
113 if ((http
!= 0) && (content
!= NULL
))
125 encoding
= xmlStrstr(content
, BAD_CAST
"charset=");
126 if (encoding
== NULL
)
127 encoding
= xmlStrstr(content
, BAD_CAST
"Charset=");
128 if (encoding
== NULL
)
129 encoding
= xmlStrstr(content
, BAD_CAST
"CHARSET=");
130 if (encoding
!= NULL
) {
133 encoding
= xmlStrstr(content
, BAD_CAST
"charset =");
134 if (encoding
== NULL
)
135 encoding
= xmlStrstr(content
, BAD_CAST
"Charset =");
136 if (encoding
== NULL
)
137 encoding
= xmlStrstr(content
, BAD_CAST
"CHARSET =");
138 if (encoding
!= NULL
)
141 if (encoding
!= NULL
) {
142 while ((*encoding
== ' ') || (*encoding
== '\t')) encoding
++;
148 * htmlSetMetaEncoding:
150 * @encoding: the encoding string
152 * Sets the current encoding in the Meta tags
153 * NOTE: this will not change the document content encoding, just
154 * the META flag associated.
156 * Returns 0 in case of success and -1 in case of error
159 htmlSetMetaEncoding(htmlDocPtr doc
, const xmlChar
*encoding
) {
160 htmlNodePtr cur
, meta
= NULL
, head
= NULL
;
161 const xmlChar
*content
= NULL
;
162 char newcontent
[100];
169 /* html isn't a real encoding it's just libxml2 way to get entities */
170 if (!xmlStrcasecmp(encoding
, BAD_CAST
"html"))
173 if (encoding
!= NULL
) {
174 snprintf(newcontent
, sizeof(newcontent
), "text/html; charset=%s",
176 newcontent
[sizeof(newcontent
) - 1] = 0;
184 while (cur
!= NULL
) {
185 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
186 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"html") == 0)
188 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"head") == 0)
190 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"meta") == 0)
202 while (cur
!= NULL
) {
203 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
204 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"head") == 0)
206 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"meta") == 0) {
217 if (cur
->children
== NULL
)
223 * Search and update all the remaining the meta elements carrying
224 * encoding information
226 while (cur
!= NULL
) {
227 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
228 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"meta") == 0) {
229 xmlAttrPtr attr
= cur
->properties
;
231 const xmlChar
*value
;
235 while (attr
!= NULL
) {
236 if ((attr
->children
!= NULL
) &&
237 (attr
->children
->type
== XML_TEXT_NODE
) &&
238 (attr
->children
->next
== NULL
)) {
239 value
= attr
->children
->content
;
240 if ((!xmlStrcasecmp(attr
->name
, BAD_CAST
"http-equiv"))
241 && (!xmlStrcasecmp(value
, BAD_CAST
"Content-Type")))
245 if ((value
!= NULL
) &&
246 (!xmlStrcasecmp(attr
->name
, BAD_CAST
"content")))
249 if ((http
!= 0) && (content
!= NULL
))
254 if ((http
!= 0) && (content
!= NULL
)) {
265 if ((encoding
!= NULL
) && (head
!= NULL
)) {
267 * Create a new Meta element with the right attributes
270 meta
= xmlNewDocNode(doc
, NULL
, BAD_CAST
"meta", NULL
);
271 if (head
->children
== NULL
)
272 xmlAddChild(head
, meta
);
274 xmlAddPrevSibling(head
->children
, meta
);
275 xmlNewProp(meta
, BAD_CAST
"http-equiv", BAD_CAST
"Content-Type");
276 xmlNewProp(meta
, BAD_CAST
"content", BAD_CAST newcontent
);
279 /* remove the meta tag if NULL is passed */
280 if (encoding
== NULL
) {
284 /* change the document only if there is a real encoding change */
285 else if (xmlStrcasestr(content
, encoding
) == NULL
) {
286 xmlSetProp(meta
, BAD_CAST
"content", BAD_CAST newcontent
);
297 * These are the HTML attributes which will be output
298 * in minimized form, i.e. <option selected="selected"> will be
299 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
302 static const char* const htmlBooleanAttrs
[] = {
303 "checked", "compact", "declare", "defer", "disabled", "ismap",
304 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
311 * @name: the name of the attribute to check
313 * Determine if a given attribute is a boolean attribute.
315 * returns: false if the attribute is not boolean, true otherwise.
318 htmlIsBooleanAttr(const xmlChar
*name
)
322 while (htmlBooleanAttrs
[i
] != NULL
) {
323 if (xmlStrcasecmp((const xmlChar
*)htmlBooleanAttrs
[i
], name
) == 0)
330 #ifdef LIBXML_OUTPUT_ENABLED
332 * private routine exported from xmlIO.c
335 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder
);
336 /************************************************************************
338 * Output error handlers *
340 ************************************************************************/
343 * @extra: extra information
345 * Handle an out of memory condition
348 htmlSaveErrMemory(const char *extra
)
350 __xmlSimpleError(XML_FROM_OUTPUT
, XML_ERR_NO_MEMORY
, NULL
, NULL
, extra
);
355 * @code: the error number
356 * @node: the location of the error.
357 * @extra: extra information
359 * Handle an out of memory condition
362 htmlSaveErr(int code
, xmlNodePtr node
, const char *extra
)
364 const char *msg
= NULL
;
367 case XML_SAVE_NOT_UTF8
:
368 msg
= "string is not in UTF-8\n";
370 case XML_SAVE_CHAR_INVALID
:
371 msg
= "invalid character value\n";
373 case XML_SAVE_UNKNOWN_ENCODING
:
374 msg
= "unknown encoding %s\n";
376 case XML_SAVE_NO_DOCTYPE
:
377 msg
= "HTML has no DOCTYPE\n";
380 msg
= "unexpected error number\n";
382 __xmlSimpleError(XML_FROM_OUTPUT
, code
, node
, msg
, extra
);
385 /************************************************************************
387 * Dumping HTML tree content to a simple buffer *
389 ************************************************************************/
392 * htmlBufNodeDumpFormat:
393 * @buf: the xmlBufPtr output
395 * @cur: the current node
396 * @format: should formatting spaces been added
398 * Dump an HTML node, recursive behaviour,children are printed too.
400 * Returns the number of byte written or -1 in case of error
403 htmlBufNodeDumpFormat(xmlBufPtr buf
, xmlDocPtr doc
, xmlNodePtr cur
,
407 xmlOutputBufferPtr outbuf
;
415 outbuf
= (xmlOutputBufferPtr
) xmlMalloc(sizeof(xmlOutputBuffer
));
416 if (outbuf
== NULL
) {
417 htmlSaveErrMemory("allocating HTML output buffer");
420 memset(outbuf
, 0, (size_t) sizeof(xmlOutputBuffer
));
421 outbuf
->buffer
= buf
;
422 outbuf
->encoder
= NULL
;
423 outbuf
->writecallback
= NULL
;
424 outbuf
->closecallback
= NULL
;
425 outbuf
->context
= NULL
;
428 use
= xmlBufUse(buf
);
429 htmlNodeDumpFormatOutput(outbuf
, doc
, cur
, NULL
, format
);
431 ret
= xmlBufUse(buf
) - use
;
437 * @buf: the HTML buffer output
439 * @cur: the current node
441 * Dump an HTML node, recursive behaviour,children are printed too,
442 * and formatting returns are added.
444 * Returns the number of byte written or -1 in case of error
447 htmlNodeDump(xmlBufferPtr buf
, xmlDocPtr doc
, xmlNodePtr cur
) {
451 if ((buf
== NULL
) || (cur
== NULL
))
455 buffer
= xmlBufFromBuffer(buf
);
459 ret
= htmlBufNodeDumpFormat(buffer
, doc
, cur
, 1);
461 xmlBufBackToBuffer(buffer
);
469 * htmlNodeDumpFileFormat:
470 * @out: the FILE pointer
472 * @cur: the current node
473 * @encoding: the document encoding
474 * @format: should formatting spaces been added
476 * Dump an HTML node, recursive behaviour,children are printed too.
478 * TODO: if encoding == NULL try to save in the doc encoding
480 * returns: the number of byte written or -1 in case of failure.
483 htmlNodeDumpFileFormat(FILE *out
, xmlDocPtr doc
,
484 xmlNodePtr cur
, const char *encoding
, int format
) {
485 xmlOutputBufferPtr buf
;
486 xmlCharEncodingHandlerPtr handler
= NULL
;
491 if (encoding
!= NULL
) {
494 enc
= xmlParseCharEncoding(encoding
);
495 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
496 handler
= xmlFindCharEncodingHandler(encoding
);
498 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING
, NULL
, encoding
);
502 * Fallback to HTML or ASCII when the encoding is unspecified
505 handler
= xmlFindCharEncodingHandler("HTML");
507 handler
= xmlFindCharEncodingHandler("ascii");
511 * save the content to a temp buffer.
513 buf
= xmlOutputBufferCreateFile(out
, handler
);
514 if (buf
== NULL
) return(0);
516 htmlNodeDumpFormatOutput(buf
, doc
, cur
, NULL
, format
);
518 ret
= xmlOutputBufferClose(buf
);
524 * @out: the FILE pointer
526 * @cur: the current node
528 * Dump an HTML node, recursive behaviour,children are printed too,
529 * and formatting returns are added.
532 htmlNodeDumpFile(FILE *out
, xmlDocPtr doc
, xmlNodePtr cur
) {
533 htmlNodeDumpFileFormat(out
, doc
, cur
, NULL
, 1);
537 * htmlDocDumpMemoryFormat:
539 * @mem: OUT: the memory pointer
540 * @size: OUT: the memory length
541 * @format: should formatting spaces been added
543 * Dump an HTML document in memory and return the xmlChar * and it's size.
544 * It's up to the caller to free the memory.
547 htmlDocDumpMemoryFormat(xmlDocPtr cur
, xmlChar
**mem
, int *size
, int format
) {
548 xmlOutputBufferPtr buf
;
549 xmlCharEncodingHandlerPtr handler
= NULL
;
550 const char *encoding
;
554 if ((mem
== NULL
) || (size
== NULL
))
562 encoding
= (const char *) htmlGetMetaEncoding(cur
);
564 if (encoding
!= NULL
) {
567 enc
= xmlParseCharEncoding(encoding
);
568 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
569 handler
= xmlFindCharEncodingHandler(encoding
);
571 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING
, NULL
, encoding
);
576 * Fallback to HTML or ASCII when the encoding is unspecified
579 handler
= xmlFindCharEncodingHandler("HTML");
581 handler
= xmlFindCharEncodingHandler("ascii");
584 buf
= xmlAllocOutputBufferInternal(handler
);
591 htmlDocContentDumpFormatOutput(buf
, cur
, NULL
, format
);
593 xmlOutputBufferFlush(buf
);
594 if (buf
->conv
!= NULL
) {
595 *size
= xmlBufUse(buf
->conv
);
596 *mem
= xmlStrndup(xmlBufContent(buf
->conv
), *size
);
598 *size
= xmlBufUse(buf
->buffer
);
599 *mem
= xmlStrndup(xmlBufContent(buf
->buffer
), *size
);
601 (void)xmlOutputBufferClose(buf
);
607 * @mem: OUT: the memory pointer
608 * @size: OUT: the memory length
610 * Dump an HTML document in memory and return the xmlChar * and it's size.
611 * It's up to the caller to free the memory.
614 htmlDocDumpMemory(xmlDocPtr cur
, xmlChar
**mem
, int *size
) {
615 htmlDocDumpMemoryFormat(cur
, mem
, size
, 1);
619 /************************************************************************
621 * Dumping HTML tree content to an I/O output buffer *
623 ************************************************************************/
625 void xmlNsListDumpOutput(xmlOutputBufferPtr buf
, xmlNsPtr cur
);
629 * @buf: the HTML buffer output
631 * @encoding: the encoding string
633 * TODO: check whether encoding is needed
635 * Dump the HTML document DTD, if any.
638 htmlDtdDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
,
639 const char *encoding ATTRIBUTE_UNUSED
) {
640 xmlDtdPtr cur
= doc
->intSubset
;
643 htmlSaveErr(XML_SAVE_NO_DOCTYPE
, (xmlNodePtr
) doc
, NULL
);
646 xmlOutputBufferWriteString(buf
, "<!DOCTYPE ");
647 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
648 if (cur
->ExternalID
!= NULL
) {
649 xmlOutputBufferWriteString(buf
, " PUBLIC ");
650 xmlBufWriteQuotedString(buf
->buffer
, cur
->ExternalID
);
651 if (cur
->SystemID
!= NULL
) {
652 xmlOutputBufferWriteString(buf
, " ");
653 xmlBufWriteQuotedString(buf
->buffer
, cur
->SystemID
);
655 } else if (cur
->SystemID
!= NULL
&&
656 xmlStrcmp(cur
->SystemID
, BAD_CAST
"about:legacy-compat")) {
657 xmlOutputBufferWriteString(buf
, " SYSTEM ");
658 xmlBufWriteQuotedString(buf
->buffer
, cur
->SystemID
);
660 xmlOutputBufferWriteString(buf
, ">\n");
664 * htmlAttrDumpOutput:
665 * @buf: the HTML buffer output
667 * @cur: the attribute pointer
669 * Dump an HTML attribute
672 htmlAttrDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
, xmlAttrPtr cur
) {
676 * The html output method should not escape a & character
677 * occurring in an attribute value immediately followed by
678 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
679 * This is implemented in xmlEncodeEntitiesReentrant
685 xmlOutputBufferWriteString(buf
, " ");
686 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
687 xmlOutputBufferWriteString(buf
, (const char *)cur
->ns
->prefix
);
688 xmlOutputBufferWriteString(buf
, ":");
690 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
691 if ((cur
->children
!= NULL
) && (!htmlIsBooleanAttr(cur
->name
))) {
692 value
= xmlNodeListGetString(doc
, cur
->children
, 0);
694 xmlOutputBufferWriteString(buf
, "=");
695 if ((cur
->ns
== NULL
) && (cur
->parent
!= NULL
) &&
696 (cur
->parent
->ns
== NULL
) &&
697 ((!xmlStrcasecmp(cur
->name
, BAD_CAST
"href")) ||
698 (!xmlStrcasecmp(cur
->name
, BAD_CAST
"action")) ||
699 (!xmlStrcasecmp(cur
->name
, BAD_CAST
"src")) ||
700 ((!xmlStrcasecmp(cur
->name
, BAD_CAST
"name")) &&
701 (!xmlStrcasecmp(cur
->parent
->name
, BAD_CAST
"a"))))) {
703 xmlChar
*tmp
= value
;
705 while (IS_BLANK_CH(*tmp
)) tmp
++;
708 * the < and > have already been escaped at the entity level
709 * And doing so here breaks server side includes
711 escaped
= xmlURIEscapeStr(tmp
, BAD_CAST
"@/:=?;#%&,+<>");
712 if (escaped
!= NULL
) {
713 xmlBufWriteQuotedString(buf
->buffer
, escaped
);
716 xmlBufWriteQuotedString(buf
->buffer
, value
);
719 xmlBufWriteQuotedString(buf
->buffer
, value
);
723 xmlOutputBufferWriteString(buf
, "=\"\"");
729 * htmlNodeDumpFormatOutput:
730 * @buf: the HTML buffer output
732 * @cur: the current node
733 * @encoding: the encoding string (unused)
734 * @format: should formatting spaces been added
736 * Dump an HTML node, recursive behaviour,children are printed too.
739 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
,
740 xmlNodePtr cur
, const char *encoding ATTRIBUTE_UNUSED
,
742 xmlNodePtr root
, parent
;
744 const htmlElemDesc
* info
;
748 if ((cur
== NULL
) || (buf
== NULL
)) {
753 parent
= cur
->parent
;
756 case XML_HTML_DOCUMENT_NODE
:
757 case XML_DOCUMENT_NODE
:
758 if (((xmlDocPtr
) cur
)->intSubset
!= NULL
) {
759 htmlDtdDumpOutput(buf
, (xmlDocPtr
) cur
, NULL
);
761 if (cur
->children
!= NULL
) {
762 /* Always validate cur->parent when descending. */
763 if (cur
->parent
== parent
) {
769 xmlOutputBufferWriteString(buf
, "\n");
773 case XML_ELEMENT_NODE
:
775 * Some users like lxml are known to pass nodes with a corrupted
776 * tree structure. Fall back to a recursive call to handle this
779 if ((cur
->parent
!= parent
) && (cur
->children
!= NULL
)) {
780 htmlNodeDumpFormatOutput(buf
, doc
, cur
, encoding
, format
);
785 * Get specific HTML info for that node.
788 info
= htmlTagLookup(cur
->name
);
792 xmlOutputBufferWriteString(buf
, "<");
793 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
794 xmlOutputBufferWriteString(buf
, (const char *)cur
->ns
->prefix
);
795 xmlOutputBufferWriteString(buf
, ":");
797 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
799 xmlNsListDumpOutput(buf
, cur
->nsDef
);
800 attr
= cur
->properties
;
801 while (attr
!= NULL
) {
802 htmlAttrDumpOutput(buf
, doc
, attr
);
806 if ((info
!= NULL
) && (info
->empty
)) {
807 xmlOutputBufferWriteString(buf
, ">");
808 } else if (cur
->children
== NULL
) {
809 if ((info
!= NULL
) && (info
->saveEndTag
!= 0) &&
810 (xmlStrcmp(BAD_CAST info
->name
, BAD_CAST
"html")) &&
811 (xmlStrcmp(BAD_CAST info
->name
, BAD_CAST
"body"))) {
812 xmlOutputBufferWriteString(buf
, ">");
814 xmlOutputBufferWriteString(buf
, "></");
815 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
816 xmlOutputBufferWriteString(buf
,
817 (const char *)cur
->ns
->prefix
);
818 xmlOutputBufferWriteString(buf
, ":");
820 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
821 xmlOutputBufferWriteString(buf
, ">");
824 xmlOutputBufferWriteString(buf
, ">");
825 if ((format
) && (info
!= NULL
) && (!info
->isinline
) &&
826 (cur
->children
->type
!= HTML_TEXT_NODE
) &&
827 (cur
->children
->type
!= HTML_ENTITY_REF_NODE
) &&
828 (cur
->children
!= cur
->last
) &&
829 (cur
->name
!= NULL
) &&
830 (cur
->name
[0] != 'p')) /* p, pre, param */
831 xmlOutputBufferWriteString(buf
, "\n");
837 if ((format
) && (cur
->next
!= NULL
) &&
838 (info
!= NULL
) && (!info
->isinline
)) {
839 if ((cur
->next
->type
!= HTML_TEXT_NODE
) &&
840 (cur
->next
->type
!= HTML_ENTITY_REF_NODE
) &&
842 (parent
->name
!= NULL
) &&
843 (parent
->name
[0] != 'p')) /* p, pre, param */
844 xmlOutputBufferWriteString(buf
, "\n");
849 case XML_ATTRIBUTE_NODE
:
850 htmlAttrDumpOutput(buf
, doc
, (xmlAttrPtr
) cur
);
854 if (cur
->content
== NULL
)
856 if (((cur
->name
== (const xmlChar
*)xmlStringText
) ||
857 (cur
->name
!= (const xmlChar
*)xmlStringTextNoenc
)) &&
859 ((xmlStrcasecmp(parent
->name
, BAD_CAST
"script")) &&
860 (xmlStrcasecmp(parent
->name
, BAD_CAST
"style"))))) {
863 buffer
= xmlEncodeEntitiesReentrant(doc
, cur
->content
);
864 if (buffer
!= NULL
) {
865 xmlOutputBufferWriteString(buf
, (const char *)buffer
);
869 xmlOutputBufferWriteString(buf
, (const char *)cur
->content
);
873 case HTML_COMMENT_NODE
:
874 if (cur
->content
!= NULL
) {
875 xmlOutputBufferWriteString(buf
, "<!--");
876 xmlOutputBufferWriteString(buf
, (const char *)cur
->content
);
877 xmlOutputBufferWriteString(buf
, "-->");
882 if (cur
->name
!= NULL
) {
883 xmlOutputBufferWriteString(buf
, "<?");
884 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
885 if (cur
->content
!= NULL
) {
886 xmlOutputBufferWriteString(buf
, " ");
887 xmlOutputBufferWriteString(buf
,
888 (const char *)cur
->content
);
890 xmlOutputBufferWriteString(buf
, ">");
894 case HTML_ENTITY_REF_NODE
:
895 xmlOutputBufferWriteString(buf
, "&");
896 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
897 xmlOutputBufferWriteString(buf
, ";");
900 case HTML_PRESERVE_NODE
:
901 if (cur
->content
!= NULL
) {
902 xmlOutputBufferWriteString(buf
, (const char *)cur
->content
);
913 if (cur
->next
!= NULL
) {
919 /* cur->parent was validated when descending. */
920 parent
= cur
->parent
;
922 if ((cur
->type
== XML_HTML_DOCUMENT_NODE
) ||
923 (cur
->type
== XML_DOCUMENT_NODE
)) {
924 xmlOutputBufferWriteString(buf
, "\n");
926 if ((format
) && (cur
->ns
== NULL
))
927 info
= htmlTagLookup(cur
->name
);
931 if ((format
) && (info
!= NULL
) && (!info
->isinline
) &&
932 (cur
->last
->type
!= HTML_TEXT_NODE
) &&
933 (cur
->last
->type
!= HTML_ENTITY_REF_NODE
) &&
934 (cur
->children
!= cur
->last
) &&
935 (cur
->name
!= NULL
) &&
936 (cur
->name
[0] != 'p')) /* p, pre, param */
937 xmlOutputBufferWriteString(buf
, "\n");
939 xmlOutputBufferWriteString(buf
, "</");
940 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
941 xmlOutputBufferWriteString(buf
, (const char *)cur
->ns
->prefix
);
942 xmlOutputBufferWriteString(buf
, ":");
944 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
945 xmlOutputBufferWriteString(buf
, ">");
947 if ((format
) && (info
!= NULL
) && (!info
->isinline
) &&
948 (cur
->next
!= NULL
)) {
949 if ((cur
->next
->type
!= HTML_TEXT_NODE
) &&
950 (cur
->next
->type
!= HTML_ENTITY_REF_NODE
) &&
952 (parent
->name
!= NULL
) &&
953 (parent
->name
[0] != 'p')) /* p, pre, param */
954 xmlOutputBufferWriteString(buf
, "\n");
962 * htmlNodeDumpOutput:
963 * @buf: the HTML buffer output
965 * @cur: the current node
966 * @encoding: the encoding string (unused)
968 * Dump an HTML node, recursive behaviour,children are printed too,
969 * and formatting returns/spaces are added.
972 htmlNodeDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
,
973 xmlNodePtr cur
, const char *encoding ATTRIBUTE_UNUSED
) {
974 htmlNodeDumpFormatOutput(buf
, doc
, cur
, NULL
, 1);
978 * htmlDocContentDumpFormatOutput:
979 * @buf: the HTML buffer output
981 * @encoding: the encoding string (unused)
982 * @format: should formatting spaces been added
984 * Dump an HTML document.
987 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf
, xmlDocPtr cur
,
988 const char *encoding ATTRIBUTE_UNUSED
,
993 cur
->type
= XML_HTML_DOCUMENT_NODE
;
995 htmlNodeDumpFormatOutput(buf
, cur
, (xmlNodePtr
) cur
, NULL
, format
);
997 cur
->type
= (xmlElementType
) type
;
1001 * htmlDocContentDumpOutput:
1002 * @buf: the HTML buffer output
1003 * @cur: the document
1004 * @encoding: the encoding string (unused)
1006 * Dump an HTML document. Formatting return/spaces are added.
1009 htmlDocContentDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr cur
,
1010 const char *encoding ATTRIBUTE_UNUSED
) {
1011 htmlNodeDumpFormatOutput(buf
, cur
, (xmlNodePtr
) cur
, NULL
, 1);
1014 /************************************************************************
1016 * Saving functions front-ends *
1018 ************************************************************************/
1023 * @cur: the document
1025 * Dump an HTML document to an open FILE.
1027 * returns: the number of byte written or -1 in case of failure.
1030 htmlDocDump(FILE *f
, xmlDocPtr cur
) {
1031 xmlOutputBufferPtr buf
;
1032 xmlCharEncodingHandlerPtr handler
= NULL
;
1033 const char *encoding
;
1038 if ((cur
== NULL
) || (f
== NULL
)) {
1042 encoding
= (const char *) htmlGetMetaEncoding(cur
);
1044 if (encoding
!= NULL
) {
1045 xmlCharEncoding enc
;
1047 enc
= xmlParseCharEncoding(encoding
);
1048 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
1049 handler
= xmlFindCharEncodingHandler(encoding
);
1050 if (handler
== NULL
)
1051 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING
, NULL
, encoding
);
1055 * Fallback to HTML or ASCII when the encoding is unspecified
1057 if (handler
== NULL
)
1058 handler
= xmlFindCharEncodingHandler("HTML");
1059 if (handler
== NULL
)
1060 handler
= xmlFindCharEncodingHandler("ascii");
1063 buf
= xmlOutputBufferCreateFile(f
, handler
);
1064 if (buf
== NULL
) return(-1);
1065 htmlDocContentDumpOutput(buf
, cur
, NULL
);
1067 ret
= xmlOutputBufferClose(buf
);
1073 * @filename: the filename (or URL)
1074 * @cur: the document
1076 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1078 * returns: the number of byte written or -1 in case of failure.
1081 htmlSaveFile(const char *filename
, xmlDocPtr cur
) {
1082 xmlOutputBufferPtr buf
;
1083 xmlCharEncodingHandlerPtr handler
= NULL
;
1084 const char *encoding
;
1087 if ((cur
== NULL
) || (filename
== NULL
))
1092 encoding
= (const char *) htmlGetMetaEncoding(cur
);
1094 if (encoding
!= NULL
) {
1095 xmlCharEncoding enc
;
1097 enc
= xmlParseCharEncoding(encoding
);
1098 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
1099 handler
= xmlFindCharEncodingHandler(encoding
);
1100 if (handler
== NULL
)
1101 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING
, NULL
, encoding
);
1105 * Fallback to HTML or ASCII when the encoding is unspecified
1107 if (handler
== NULL
)
1108 handler
= xmlFindCharEncodingHandler("HTML");
1109 if (handler
== NULL
)
1110 handler
= xmlFindCharEncodingHandler("ascii");
1114 * save the content to a temp buffer.
1116 buf
= xmlOutputBufferCreateFilename(filename
, handler
, cur
->compression
);
1117 if (buf
== NULL
) return(0);
1119 htmlDocContentDumpOutput(buf
, cur
, NULL
);
1121 ret
= xmlOutputBufferClose(buf
);
1126 * htmlSaveFileFormat:
1127 * @filename: the filename
1128 * @cur: the document
1129 * @format: should formatting spaces been added
1130 * @encoding: the document encoding
1132 * Dump an HTML document to a file using a given encoding.
1134 * returns: the number of byte written or -1 in case of failure.
1137 htmlSaveFileFormat(const char *filename
, xmlDocPtr cur
,
1138 const char *encoding
, int format
) {
1139 xmlOutputBufferPtr buf
;
1140 xmlCharEncodingHandlerPtr handler
= NULL
;
1143 if ((cur
== NULL
) || (filename
== NULL
))
1148 if (encoding
!= NULL
) {
1149 xmlCharEncoding enc
;
1151 enc
= xmlParseCharEncoding(encoding
);
1152 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
1153 handler
= xmlFindCharEncodingHandler(encoding
);
1154 if (handler
== NULL
)
1155 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING
, NULL
, encoding
);
1157 htmlSetMetaEncoding(cur
, (const xmlChar
*) encoding
);
1159 htmlSetMetaEncoding(cur
, (const xmlChar
*) "UTF-8");
1162 * Fallback to HTML or ASCII when the encoding is unspecified
1164 if (handler
== NULL
)
1165 handler
= xmlFindCharEncodingHandler("HTML");
1166 if (handler
== NULL
)
1167 handler
= xmlFindCharEncodingHandler("ascii");
1171 * save the content to a temp buffer.
1173 buf
= xmlOutputBufferCreateFilename(filename
, handler
, 0);
1174 if (buf
== NULL
) return(0);
1176 htmlDocContentDumpFormatOutput(buf
, cur
, encoding
, format
);
1178 ret
= xmlOutputBufferClose(buf
);
1184 * @filename: the filename
1185 * @cur: the document
1186 * @encoding: the document encoding
1188 * Dump an HTML document to a file using a given encoding
1189 * and formatting returns/spaces are added.
1191 * returns: the number of byte written or -1 in case of failure.
1194 htmlSaveFileEnc(const char *filename
, xmlDocPtr cur
, const char *encoding
) {
1195 return(htmlSaveFileFormat(filename
, cur
, encoding
, 1));
1198 #endif /* LIBXML_OUTPUT_ENABLED */
1200 #endif /* LIBXML_HTML_ENABLED */