mscms: Fix double free on error path in EnumColorProfilesA (scan-build).
[wine.git] / libs / xml2 / HTMLtree.c
blobfa3a0ed38a86d839e9a2f8aadc5ffe42c9563b3e
1 /*
2 * HTMLtree.c : implementation of access function for an HTML tree.
4 * See Copyright for the status of this software.
6 * daniel@veillard.com
7 */
10 #define IN_LIBXML
11 #include "libxml.h"
12 #ifdef LIBXML_HTML_ENABLED
14 #include <string.h> /* for memset() only ! */
15 #include <ctype.h>
16 #include <stdlib.h>
18 #include <libxml/xmlmemory.h>
19 #include <libxml/HTMLparser.h>
20 #include <libxml/HTMLtree.h>
21 #include <libxml/entities.h>
22 #include <libxml/valid.h>
23 #include <libxml/xmlerror.h>
24 #include <libxml/parserInternals.h>
25 #include <libxml/globals.h>
26 #include <libxml/uri.h>
28 #include "private/buf.h"
29 #include "private/error.h"
30 #include "private/io.h"
31 #include "private/save.h"
33 /************************************************************************
34 * *
35 * Getting/Setting encoding meta tags *
36 * *
37 ************************************************************************/
39 /**
40 * htmlGetMetaEncoding:
41 * @doc: the document
43 * Encoding definition lookup in the Meta tags
45 * Returns the current encoding as flagged in the HTML source
47 const xmlChar *
48 htmlGetMetaEncoding(htmlDocPtr doc) {
49 htmlNodePtr cur;
50 const xmlChar *content;
51 const xmlChar *encoding;
53 if (doc == NULL)
54 return(NULL);
55 cur = doc->children;
58 * Search the html
60 while (cur != NULL) {
61 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
62 if (xmlStrEqual(cur->name, BAD_CAST"html"))
63 break;
64 if (xmlStrEqual(cur->name, BAD_CAST"head"))
65 goto found_head;
66 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
67 goto found_meta;
69 cur = cur->next;
71 if (cur == NULL)
72 return(NULL);
73 cur = cur->children;
76 * Search the head
78 while (cur != NULL) {
79 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
80 if (xmlStrEqual(cur->name, BAD_CAST"head"))
81 break;
82 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
83 goto found_meta;
85 cur = cur->next;
87 if (cur == NULL)
88 return(NULL);
89 found_head:
90 cur = cur->children;
93 * Search the meta elements
95 found_meta:
96 while (cur != NULL) {
97 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
98 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99 xmlAttrPtr attr = cur->properties;
100 int http;
101 const xmlChar *value;
103 content = NULL;
104 http = 0;
105 while (attr != NULL) {
106 if ((attr->children != NULL) &&
107 (attr->children->type == XML_TEXT_NODE) &&
108 (attr->children->next == NULL)) {
109 value = attr->children->content;
110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112 http = 1;
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115 content = value;
116 if ((http != 0) && (content != NULL))
117 goto found_content;
119 attr = attr->next;
123 cur = cur->next;
125 return(NULL);
127 found_content:
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
134 encoding += 8;
135 } else {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
142 encoding += 9;
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
147 return(encoding);
151 * htmlSetMetaEncoding:
152 * @doc: the document
153 * @encoding: the encoding string
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
159 * Returns 0 in case of success and -1 in case of error
162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta = NULL, head = NULL;
164 const xmlChar *content = NULL;
165 char newcontent[100];
167 newcontent[0] = 0;
169 if (doc == NULL)
170 return(-1);
172 /* html isn't a real encoding it's just libxml2 way to get entities */
173 if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
174 return(-1);
176 if (encoding != NULL) {
177 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
178 (char *)encoding);
179 newcontent[sizeof(newcontent) - 1] = 0;
182 cur = doc->children;
185 * Search the html
187 while (cur != NULL) {
188 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
189 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
190 break;
191 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
192 goto found_head;
193 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
194 goto found_meta;
196 cur = cur->next;
198 if (cur == NULL)
199 return(-1);
200 cur = cur->children;
203 * Search the head
205 while (cur != NULL) {
206 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
207 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
208 break;
209 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
210 head = cur->parent;
211 goto found_meta;
214 cur = cur->next;
216 if (cur == NULL)
217 return(-1);
218 found_head:
219 head = cur;
220 if (cur->children == NULL)
221 goto create;
222 cur = cur->children;
224 found_meta:
226 * Search and update all the remaining the meta elements carrying
227 * encoding information
229 while (cur != NULL) {
230 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
231 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
232 xmlAttrPtr attr = cur->properties;
233 int http;
234 const xmlChar *value;
236 content = NULL;
237 http = 0;
238 while (attr != NULL) {
239 if ((attr->children != NULL) &&
240 (attr->children->type == XML_TEXT_NODE) &&
241 (attr->children->next == NULL)) {
242 value = attr->children->content;
243 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
244 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
245 http = 1;
246 else
248 if ((value != NULL) &&
249 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
250 content = value;
252 if ((http != 0) && (content != NULL))
253 break;
255 attr = attr->next;
257 if ((http != 0) && (content != NULL)) {
258 meta = cur;
259 break;
264 cur = cur->next;
266 create:
267 if (meta == NULL) {
268 if ((encoding != NULL) && (head != NULL)) {
270 * Create a new Meta element with the right attributes
273 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
274 if (head->children == NULL)
275 xmlAddChild(head, meta);
276 else
277 xmlAddPrevSibling(head->children, meta);
278 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
279 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
281 } else {
282 /* remove the meta tag if NULL is passed */
283 if (encoding == NULL) {
284 xmlUnlinkNode(meta);
285 xmlFreeNode(meta);
287 /* change the document only if there is a real encoding change */
288 else if (xmlStrcasestr(content, encoding) == NULL) {
289 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
294 return(0);
298 * booleanHTMLAttrs:
300 * These are the HTML attributes which will be output
301 * in minimized form, i.e. <option selected="selected"> will be
302 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
305 static const char* const htmlBooleanAttrs[] = {
306 "checked", "compact", "declare", "defer", "disabled", "ismap",
307 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
308 "selected", NULL
313 * htmlIsBooleanAttr:
314 * @name: the name of the attribute to check
316 * Determine if a given attribute is a boolean attribute.
318 * returns: false if the attribute is not boolean, true otherwise.
321 htmlIsBooleanAttr(const xmlChar *name)
323 int i = 0;
325 while (htmlBooleanAttrs[i] != NULL) {
326 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
327 return 1;
328 i++;
330 return 0;
333 #ifdef LIBXML_OUTPUT_ENABLED
334 /************************************************************************
336 * Output error handlers *
338 ************************************************************************/
340 * htmlSaveErrMemory:
341 * @extra: extra information
343 * Handle an out of memory condition
345 static void
346 htmlSaveErrMemory(const char *extra)
348 __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
352 * htmlSaveErr:
353 * @code: the error number
354 * @node: the location of the error.
355 * @extra: extra information
357 * Handle an out of memory condition
359 static void
360 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
362 const char *msg = NULL;
364 switch(code) {
365 case XML_SAVE_NOT_UTF8:
366 msg = "string is not in UTF-8\n";
367 break;
368 case XML_SAVE_CHAR_INVALID:
369 msg = "invalid character value\n";
370 break;
371 case XML_SAVE_UNKNOWN_ENCODING:
372 msg = "unknown encoding %s\n";
373 break;
374 case XML_SAVE_NO_DOCTYPE:
375 msg = "HTML has no DOCTYPE\n";
376 break;
377 default:
378 msg = "unexpected error number\n";
380 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
383 /************************************************************************
385 * Dumping HTML tree content to a simple buffer *
387 ************************************************************************/
390 * htmlBufNodeDumpFormat:
391 * @buf: the xmlBufPtr output
392 * @doc: the document
393 * @cur: the current node
394 * @format: should formatting spaces been added
396 * Dump an HTML node, recursive behaviour,children are printed too.
398 * Returns the number of byte written or -1 in case of error
400 static size_t
401 htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
402 int format) {
403 size_t use;
404 int ret;
405 xmlOutputBufferPtr outbuf;
407 if (cur == NULL) {
408 return (-1);
410 if (buf == NULL) {
411 return (-1);
413 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
414 if (outbuf == NULL) {
415 htmlSaveErrMemory("allocating HTML output buffer");
416 return (-1);
418 memset(outbuf, 0, sizeof(xmlOutputBuffer));
419 outbuf->buffer = buf;
420 outbuf->encoder = NULL;
421 outbuf->writecallback = NULL;
422 outbuf->closecallback = NULL;
423 outbuf->context = NULL;
424 outbuf->written = 0;
426 use = xmlBufUse(buf);
427 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
428 xmlFree(outbuf);
429 ret = xmlBufUse(buf) - use;
430 return (ret);
434 * htmlNodeDump:
435 * @buf: the HTML buffer output
436 * @doc: the document
437 * @cur: the current node
439 * Dump an HTML node, recursive behaviour,children are printed too,
440 * and formatting returns are added.
442 * Returns the number of byte written or -1 in case of error
445 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
446 xmlBufPtr buffer;
447 size_t ret;
449 if ((buf == NULL) || (cur == NULL))
450 return(-1);
452 xmlInitParser();
453 buffer = xmlBufFromBuffer(buf);
454 if (buffer == NULL)
455 return(-1);
457 ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
459 xmlBufBackToBuffer(buffer);
461 if (ret > INT_MAX)
462 return(-1);
463 return((int) ret);
467 * htmlNodeDumpFileFormat:
468 * @out: the FILE pointer
469 * @doc: the document
470 * @cur: the current node
471 * @encoding: the document encoding
472 * @format: should formatting spaces been added
474 * Dump an HTML node, recursive behaviour,children are printed too.
476 * TODO: if encoding == NULL try to save in the doc encoding
478 * returns: the number of byte written or -1 in case of failure.
481 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
482 xmlNodePtr cur, const char *encoding, int format) {
483 xmlOutputBufferPtr buf;
484 xmlCharEncodingHandlerPtr handler = NULL;
485 int ret;
487 xmlInitParser();
489 if (encoding != NULL) {
490 xmlCharEncoding enc;
492 enc = xmlParseCharEncoding(encoding);
493 if (enc != XML_CHAR_ENCODING_UTF8) {
494 handler = xmlFindCharEncodingHandler(encoding);
495 if (handler == NULL)
496 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
498 } else {
500 * Fallback to HTML or ASCII when the encoding is unspecified
502 if (handler == NULL)
503 handler = xmlFindCharEncodingHandler("HTML");
504 if (handler == NULL)
505 handler = xmlFindCharEncodingHandler("ascii");
509 * save the content to a temp buffer.
511 buf = xmlOutputBufferCreateFile(out, handler);
512 if (buf == NULL) return(0);
514 htmlNodeDumpFormatOutput(buf, doc, cur, NULL, format);
516 ret = xmlOutputBufferClose(buf);
517 return(ret);
521 * htmlNodeDumpFile:
522 * @out: the FILE pointer
523 * @doc: the document
524 * @cur: the current node
526 * Dump an HTML node, recursive behaviour,children are printed too,
527 * and formatting returns are added.
529 void
530 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
531 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
535 * htmlDocDumpMemoryFormat:
536 * @cur: the document
537 * @mem: OUT: the memory pointer
538 * @size: OUT: the memory length
539 * @format: should formatting spaces been added
541 * Dump an HTML document in memory and return the xmlChar * and it's size.
542 * It's up to the caller to free the memory.
544 void
545 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
546 xmlOutputBufferPtr buf;
547 xmlCharEncodingHandlerPtr handler = NULL;
548 const char *encoding;
550 xmlInitParser();
552 if ((mem == NULL) || (size == NULL))
553 return;
554 if (cur == NULL) {
555 *mem = NULL;
556 *size = 0;
557 return;
560 encoding = (const char *) htmlGetMetaEncoding(cur);
562 if (encoding != NULL) {
563 xmlCharEncoding enc;
565 enc = xmlParseCharEncoding(encoding);
566 if (enc != XML_CHAR_ENCODING_UTF8) {
567 handler = xmlFindCharEncodingHandler(encoding);
568 if (handler == NULL)
569 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
572 } else {
574 * Fallback to HTML or ASCII when the encoding is unspecified
576 if (handler == NULL)
577 handler = xmlFindCharEncodingHandler("HTML");
578 if (handler == NULL)
579 handler = xmlFindCharEncodingHandler("ascii");
582 buf = xmlAllocOutputBufferInternal(handler);
583 if (buf == NULL) {
584 *mem = NULL;
585 *size = 0;
586 return;
589 htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
591 xmlOutputBufferFlush(buf);
592 if (buf->conv != NULL) {
593 *size = xmlBufUse(buf->conv);
594 *mem = xmlStrndup(xmlBufContent(buf->conv), *size);
595 } else {
596 *size = xmlBufUse(buf->buffer);
597 *mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
599 (void)xmlOutputBufferClose(buf);
603 * htmlDocDumpMemory:
604 * @cur: the document
605 * @mem: OUT: the memory pointer
606 * @size: OUT: the memory length
608 * Dump an HTML document in memory and return the xmlChar * and it's size.
609 * It's up to the caller to free the memory.
611 void
612 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
613 htmlDocDumpMemoryFormat(cur, mem, size, 1);
617 /************************************************************************
619 * Dumping HTML tree content to an I/O output buffer *
621 ************************************************************************/
624 * htmlDtdDumpOutput:
625 * @buf: the HTML buffer output
626 * @doc: the document
627 * @encoding: the encoding string
629 * TODO: check whether encoding is needed
631 * Dump the HTML document DTD, if any.
633 static void
634 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
635 const char *encoding ATTRIBUTE_UNUSED) {
636 xmlDtdPtr cur = doc->intSubset;
638 if (cur == NULL) {
639 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
640 return;
642 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
643 xmlOutputBufferWriteString(buf, (const char *)cur->name);
644 if (cur->ExternalID != NULL) {
645 xmlOutputBufferWriteString(buf, " PUBLIC ");
646 xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
647 if (cur->SystemID != NULL) {
648 xmlOutputBufferWriteString(buf, " ");
649 xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
651 } else if (cur->SystemID != NULL &&
652 xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
653 xmlOutputBufferWriteString(buf, " SYSTEM ");
654 xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
656 xmlOutputBufferWriteString(buf, ">\n");
660 * htmlAttrDumpOutput:
661 * @buf: the HTML buffer output
662 * @doc: the document
663 * @cur: the attribute pointer
665 * Dump an HTML attribute
667 static void
668 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
669 xmlChar *value;
672 * The html output method should not escape a & character
673 * occurring in an attribute value immediately followed by
674 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
675 * This is implemented in xmlEncodeEntitiesReentrant
678 if (cur == NULL) {
679 return;
681 xmlOutputBufferWriteString(buf, " ");
682 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
683 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
684 xmlOutputBufferWriteString(buf, ":");
686 xmlOutputBufferWriteString(buf, (const char *)cur->name);
687 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
688 value = xmlNodeListGetString(doc, cur->children, 0);
689 if (value) {
690 xmlOutputBufferWriteString(buf, "=");
691 if ((cur->ns == NULL) && (cur->parent != NULL) &&
692 (cur->parent->ns == NULL) &&
693 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
694 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
695 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
696 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
697 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
698 xmlChar *escaped;
699 xmlChar *tmp = value;
701 while (IS_BLANK_CH(*tmp)) tmp++;
704 * Angle brackets are technically illegal in URIs, but they're
705 * used in server side includes, for example. Curly brackets
706 * are illegal as well and often used in templates.
707 * Don't escape non-whitespace, printable ASCII chars for
708 * improved interoperability. Only escape space, control
709 * and non-ASCII chars.
711 escaped = xmlURIEscapeStr(tmp,
712 BAD_CAST "\"#$%&+,/:;<=>?@[\\]^`{|}");
713 if (escaped != NULL) {
714 xmlBufWriteQuotedString(buf->buffer, escaped);
715 xmlFree(escaped);
716 } else {
717 xmlBufWriteQuotedString(buf->buffer, value);
719 } else {
720 xmlBufWriteQuotedString(buf->buffer, value);
722 xmlFree(value);
723 } else {
724 xmlOutputBufferWriteString(buf, "=\"\"");
730 * htmlNodeDumpFormatOutput:
731 * @buf: the HTML buffer output
732 * @doc: the document
733 * @cur: the current node
734 * @encoding: the encoding string (unused)
735 * @format: should formatting spaces been added
737 * Dump an HTML node, recursive behaviour,children are printed too.
739 void
740 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
741 xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
742 int format) {
743 xmlNodePtr root, parent;
744 xmlAttrPtr attr;
745 const htmlElemDesc * info;
747 xmlInitParser();
749 if ((cur == NULL) || (buf == NULL)) {
750 return;
753 root = cur;
754 parent = cur->parent;
755 while (1) {
756 switch (cur->type) {
757 case XML_HTML_DOCUMENT_NODE:
758 case XML_DOCUMENT_NODE:
759 if (((xmlDocPtr) cur)->intSubset != NULL) {
760 htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
762 if (cur->children != NULL) {
763 /* Always validate cur->parent when descending. */
764 if (cur->parent == parent) {
765 parent = cur;
766 cur = cur->children;
767 continue;
769 } else {
770 xmlOutputBufferWriteString(buf, "\n");
772 break;
774 case XML_ELEMENT_NODE:
776 * Some users like lxml are known to pass nodes with a corrupted
777 * tree structure. Fall back to a recursive call to handle this
778 * case.
780 if ((cur->parent != parent) && (cur->children != NULL)) {
781 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
782 break;
786 * Get specific HTML info for that node.
788 if (cur->ns == NULL)
789 info = htmlTagLookup(cur->name);
790 else
791 info = NULL;
793 xmlOutputBufferWriteString(buf, "<");
794 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
795 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
796 xmlOutputBufferWriteString(buf, ":");
798 xmlOutputBufferWriteString(buf, (const char *)cur->name);
799 if (cur->nsDef)
800 xmlNsListDumpOutput(buf, cur->nsDef);
801 attr = cur->properties;
802 while (attr != NULL) {
803 htmlAttrDumpOutput(buf, doc, attr);
804 attr = attr->next;
807 if ((info != NULL) && (info->empty)) {
808 xmlOutputBufferWriteString(buf, ">");
809 } else if (cur->children == NULL) {
810 if ((info != NULL) && (info->saveEndTag != 0) &&
811 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
812 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
813 xmlOutputBufferWriteString(buf, ">");
814 } else {
815 xmlOutputBufferWriteString(buf, "></");
816 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
817 xmlOutputBufferWriteString(buf,
818 (const char *)cur->ns->prefix);
819 xmlOutputBufferWriteString(buf, ":");
821 xmlOutputBufferWriteString(buf, (const char *)cur->name);
822 xmlOutputBufferWriteString(buf, ">");
824 } else {
825 xmlOutputBufferWriteString(buf, ">");
826 if ((format) && (info != NULL) && (!info->isinline) &&
827 (cur->children->type != HTML_TEXT_NODE) &&
828 (cur->children->type != HTML_ENTITY_REF_NODE) &&
829 (cur->children != cur->last) &&
830 (cur->name != NULL) &&
831 (cur->name[0] != 'p')) /* p, pre, param */
832 xmlOutputBufferWriteString(buf, "\n");
833 parent = cur;
834 cur = cur->children;
835 continue;
838 if ((format) && (cur->next != NULL) &&
839 (info != NULL) && (!info->isinline)) {
840 if ((cur->next->type != HTML_TEXT_NODE) &&
841 (cur->next->type != HTML_ENTITY_REF_NODE) &&
842 (parent != NULL) &&
843 (parent->name != NULL) &&
844 (parent->name[0] != 'p')) /* p, pre, param */
845 xmlOutputBufferWriteString(buf, "\n");
848 break;
850 case XML_ATTRIBUTE_NODE:
851 htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur);
852 break;
854 case HTML_TEXT_NODE:
855 if (cur->content == NULL)
856 break;
857 if (((cur->name == (const xmlChar *)xmlStringText) ||
858 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
859 ((parent == NULL) ||
860 ((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
861 (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
862 xmlChar *buffer;
864 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
865 if (buffer != NULL) {
866 xmlOutputBufferWriteString(buf, (const char *)buffer);
867 xmlFree(buffer);
869 } else {
870 xmlOutputBufferWriteString(buf, (const char *)cur->content);
872 break;
874 case HTML_COMMENT_NODE:
875 if (cur->content != NULL) {
876 xmlOutputBufferWriteString(buf, "<!--");
877 xmlOutputBufferWriteString(buf, (const char *)cur->content);
878 xmlOutputBufferWriteString(buf, "-->");
880 break;
882 case HTML_PI_NODE:
883 if (cur->name != NULL) {
884 xmlOutputBufferWriteString(buf, "<?");
885 xmlOutputBufferWriteString(buf, (const char *)cur->name);
886 if (cur->content != NULL) {
887 xmlOutputBufferWriteString(buf, " ");
888 xmlOutputBufferWriteString(buf,
889 (const char *)cur->content);
891 xmlOutputBufferWriteString(buf, ">");
893 break;
895 case HTML_ENTITY_REF_NODE:
896 xmlOutputBufferWriteString(buf, "&");
897 xmlOutputBufferWriteString(buf, (const char *)cur->name);
898 xmlOutputBufferWriteString(buf, ";");
899 break;
901 case HTML_PRESERVE_NODE:
902 if (cur->content != NULL) {
903 xmlOutputBufferWriteString(buf, (const char *)cur->content);
905 break;
907 default:
908 break;
911 while (1) {
912 if (cur == root)
913 return;
914 if (cur->next != NULL) {
915 cur = cur->next;
916 break;
919 cur = parent;
920 /* cur->parent was validated when descending. */
921 parent = cur->parent;
923 if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
924 (cur->type == XML_DOCUMENT_NODE)) {
925 xmlOutputBufferWriteString(buf, "\n");
926 } else {
927 if ((format) && (cur->ns == NULL))
928 info = htmlTagLookup(cur->name);
929 else
930 info = NULL;
932 if ((format) && (info != NULL) && (!info->isinline) &&
933 (cur->last->type != HTML_TEXT_NODE) &&
934 (cur->last->type != HTML_ENTITY_REF_NODE) &&
935 (cur->children != cur->last) &&
936 (cur->name != NULL) &&
937 (cur->name[0] != 'p')) /* p, pre, param */
938 xmlOutputBufferWriteString(buf, "\n");
940 xmlOutputBufferWriteString(buf, "</");
941 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
942 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
943 xmlOutputBufferWriteString(buf, ":");
945 xmlOutputBufferWriteString(buf, (const char *)cur->name);
946 xmlOutputBufferWriteString(buf, ">");
948 if ((format) && (info != NULL) && (!info->isinline) &&
949 (cur->next != NULL)) {
950 if ((cur->next->type != HTML_TEXT_NODE) &&
951 (cur->next->type != HTML_ENTITY_REF_NODE) &&
952 (parent != NULL) &&
953 (parent->name != NULL) &&
954 (parent->name[0] != 'p')) /* p, pre, param */
955 xmlOutputBufferWriteString(buf, "\n");
963 * htmlNodeDumpOutput:
964 * @buf: the HTML buffer output
965 * @doc: the document
966 * @cur: the current node
967 * @encoding: the encoding string (unused)
969 * Dump an HTML node, recursive behaviour,children are printed too,
970 * and formatting returns/spaces are added.
972 void
973 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
974 xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED) {
975 htmlNodeDumpFormatOutput(buf, doc, cur, NULL, 1);
979 * htmlDocContentDumpFormatOutput:
980 * @buf: the HTML buffer output
981 * @cur: the document
982 * @encoding: the encoding string (unused)
983 * @format: should formatting spaces been added
985 * Dump an HTML document.
987 void
988 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
989 const char *encoding ATTRIBUTE_UNUSED,
990 int format) {
991 int type = 0;
992 if (cur) {
993 type = cur->type;
994 cur->type = XML_HTML_DOCUMENT_NODE;
996 htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, format);
997 if (cur)
998 cur->type = (xmlElementType) type;
1002 * htmlDocContentDumpOutput:
1003 * @buf: the HTML buffer output
1004 * @cur: the document
1005 * @encoding: the encoding string (unused)
1007 * Dump an HTML document. Formatting return/spaces are added.
1009 void
1010 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1011 const char *encoding ATTRIBUTE_UNUSED) {
1012 htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, 1);
1015 /************************************************************************
1017 * Saving functions front-ends *
1019 ************************************************************************/
1022 * htmlDocDump:
1023 * @f: the FILE*
1024 * @cur: the document
1026 * Dump an HTML document to an open FILE.
1028 * returns: the number of byte written or -1 in case of failure.
1031 htmlDocDump(FILE *f, xmlDocPtr cur) {
1032 xmlOutputBufferPtr buf;
1033 xmlCharEncodingHandlerPtr handler = NULL;
1034 const char *encoding;
1035 int ret;
1037 xmlInitParser();
1039 if ((cur == NULL) || (f == NULL)) {
1040 return(-1);
1043 encoding = (const char *) htmlGetMetaEncoding(cur);
1045 if (encoding != NULL) {
1046 xmlCharEncoding enc;
1048 enc = xmlParseCharEncoding(encoding);
1049 if (enc != XML_CHAR_ENCODING_UTF8) {
1050 handler = xmlFindCharEncodingHandler(encoding);
1051 if (handler == NULL)
1052 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1054 } else {
1056 * Fallback to HTML or ASCII when the encoding is unspecified
1058 if (handler == NULL)
1059 handler = xmlFindCharEncodingHandler("HTML");
1060 if (handler == NULL)
1061 handler = xmlFindCharEncodingHandler("ascii");
1064 buf = xmlOutputBufferCreateFile(f, handler);
1065 if (buf == NULL) return(-1);
1066 htmlDocContentDumpOutput(buf, cur, NULL);
1068 ret = xmlOutputBufferClose(buf);
1069 return(ret);
1073 * htmlSaveFile:
1074 * @filename: the filename (or URL)
1075 * @cur: the document
1077 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1078 * used.
1079 * returns: the number of byte written or -1 in case of failure.
1082 htmlSaveFile(const char *filename, xmlDocPtr cur) {
1083 xmlOutputBufferPtr buf;
1084 xmlCharEncodingHandlerPtr handler = NULL;
1085 const char *encoding;
1086 int ret;
1088 if ((cur == NULL) || (filename == NULL))
1089 return(-1);
1091 xmlInitParser();
1093 encoding = (const char *) htmlGetMetaEncoding(cur);
1095 if (encoding != NULL) {
1096 xmlCharEncoding enc;
1098 enc = xmlParseCharEncoding(encoding);
1099 if (enc != XML_CHAR_ENCODING_UTF8) {
1100 handler = xmlFindCharEncodingHandler(encoding);
1101 if (handler == NULL)
1102 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1104 } else {
1106 * Fallback to HTML or ASCII when the encoding is unspecified
1108 if (handler == NULL)
1109 handler = xmlFindCharEncodingHandler("HTML");
1110 if (handler == NULL)
1111 handler = xmlFindCharEncodingHandler("ascii");
1115 * save the content to a temp buffer.
1117 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1118 if (buf == NULL) return(0);
1120 htmlDocContentDumpOutput(buf, cur, NULL);
1122 ret = xmlOutputBufferClose(buf);
1123 return(ret);
1127 * htmlSaveFileFormat:
1128 * @filename: the filename
1129 * @cur: the document
1130 * @format: should formatting spaces been added
1131 * @encoding: the document encoding
1133 * Dump an HTML document to a file using a given encoding.
1135 * returns: the number of byte written or -1 in case of failure.
1138 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1139 const char *encoding, int format) {
1140 xmlOutputBufferPtr buf;
1141 xmlCharEncodingHandlerPtr handler = NULL;
1142 int ret;
1144 if ((cur == NULL) || (filename == NULL))
1145 return(-1);
1147 xmlInitParser();
1149 if (encoding != NULL) {
1150 xmlCharEncoding enc;
1152 enc = xmlParseCharEncoding(encoding);
1153 if (enc != XML_CHAR_ENCODING_UTF8) {
1154 handler = xmlFindCharEncodingHandler(encoding);
1155 if (handler == NULL)
1156 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1158 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1159 } else {
1160 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1163 * Fallback to HTML or ASCII when the encoding is unspecified
1165 if (handler == NULL)
1166 handler = xmlFindCharEncodingHandler("HTML");
1167 if (handler == NULL)
1168 handler = xmlFindCharEncodingHandler("ascii");
1172 * save the content to a temp buffer.
1174 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1175 if (buf == NULL) return(0);
1177 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1179 ret = xmlOutputBufferClose(buf);
1180 return(ret);
1184 * htmlSaveFileEnc:
1185 * @filename: the filename
1186 * @cur: the document
1187 * @encoding: the document encoding
1189 * Dump an HTML document to a file using a given encoding
1190 * and formatting returns/spaces are added.
1192 * returns: the number of byte written or -1 in case of failure.
1195 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1196 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1199 #endif /* LIBXML_OUTPUT_ENABLED */
1201 #endif /* LIBXML_HTML_ENABLED */