wineoss: Use dedicated macros to call interface functions.
[wine.git] / libs / xml2 / HTMLtree.c
blob3dd78a91ebaab141c698fd211f10d1a3362f8c53
1 /*
2 * HTMLtree.c : implementation of access function for an HTML tree.
4 * See Copyright for the status of this software.
6 * daniel@veillard.com
7 */
10 #define IN_LIBXML
11 #include "libxml.h"
12 #ifdef LIBXML_HTML_ENABLED
14 #include <string.h> /* for memset() only ! */
15 #include <ctype.h>
16 #include <stdlib.h>
18 #include <libxml/xmlmemory.h>
19 #include <libxml/HTMLparser.h>
20 #include <libxml/HTMLtree.h>
21 #include <libxml/entities.h>
22 #include <libxml/valid.h>
23 #include <libxml/xmlerror.h>
24 #include <libxml/parserInternals.h>
25 #include <libxml/globals.h>
26 #include <libxml/uri.h>
28 #include "buf.h"
30 /************************************************************************
31 * *
32 * Getting/Setting encoding meta tags *
33 * *
34 ************************************************************************/
36 /**
37 * htmlGetMetaEncoding:
38 * @doc: the document
40 * Encoding definition lookup in the Meta tags
42 * Returns the current encoding as flagged in the HTML source
44 const xmlChar *
45 htmlGetMetaEncoding(htmlDocPtr doc) {
46 htmlNodePtr cur;
47 const xmlChar *content;
48 const xmlChar *encoding;
50 if (doc == NULL)
51 return(NULL);
52 cur = doc->children;
55 * Search the html
57 while (cur != NULL) {
58 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
59 if (xmlStrEqual(cur->name, BAD_CAST"html"))
60 break;
61 if (xmlStrEqual(cur->name, BAD_CAST"head"))
62 goto found_head;
63 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
64 goto found_meta;
66 cur = cur->next;
68 if (cur == NULL)
69 return(NULL);
70 cur = cur->children;
73 * Search the head
75 while (cur != NULL) {
76 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
77 if (xmlStrEqual(cur->name, BAD_CAST"head"))
78 break;
79 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
80 goto found_meta;
82 cur = cur->next;
84 if (cur == NULL)
85 return(NULL);
86 found_head:
87 cur = cur->children;
90 * Search the meta elements
92 found_meta:
93 while (cur != NULL) {
94 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
95 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
96 xmlAttrPtr attr = cur->properties;
97 int http;
98 const xmlChar *value;
100 content = NULL;
101 http = 0;
102 while (attr != NULL) {
103 if ((attr->children != NULL) &&
104 (attr->children->type == XML_TEXT_NODE) &&
105 (attr->children->next == NULL)) {
106 value = attr->children->content;
107 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
108 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
109 http = 1;
110 else if ((value != NULL)
111 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
112 content = value;
113 if ((http != 0) && (content != NULL))
114 goto found_content;
116 attr = attr->next;
120 cur = cur->next;
122 return(NULL);
124 found_content:
125 encoding = xmlStrstr(content, BAD_CAST"charset=");
126 if (encoding == NULL)
127 encoding = xmlStrstr(content, BAD_CAST"Charset=");
128 if (encoding == NULL)
129 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
130 if (encoding != NULL) {
131 encoding += 8;
132 } else {
133 encoding = xmlStrstr(content, BAD_CAST"charset =");
134 if (encoding == NULL)
135 encoding = xmlStrstr(content, BAD_CAST"Charset =");
136 if (encoding == NULL)
137 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
138 if (encoding != NULL)
139 encoding += 9;
141 if (encoding != NULL) {
142 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
144 return(encoding);
148 * htmlSetMetaEncoding:
149 * @doc: the document
150 * @encoding: the encoding string
152 * Sets the current encoding in the Meta tags
153 * NOTE: this will not change the document content encoding, just
154 * the META flag associated.
156 * Returns 0 in case of success and -1 in case of error
159 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
160 htmlNodePtr cur, meta = NULL, head = NULL;
161 const xmlChar *content = NULL;
162 char newcontent[100];
164 newcontent[0] = 0;
166 if (doc == NULL)
167 return(-1);
169 /* html isn't a real encoding it's just libxml2 way to get entities */
170 if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
171 return(-1);
173 if (encoding != NULL) {
174 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
175 (char *)encoding);
176 newcontent[sizeof(newcontent) - 1] = 0;
179 cur = doc->children;
182 * Search the html
184 while (cur != NULL) {
185 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
186 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
187 break;
188 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
189 goto found_head;
190 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
191 goto found_meta;
193 cur = cur->next;
195 if (cur == NULL)
196 return(-1);
197 cur = cur->children;
200 * Search the head
202 while (cur != NULL) {
203 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
204 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
205 break;
206 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
207 head = cur->parent;
208 goto found_meta;
211 cur = cur->next;
213 if (cur == NULL)
214 return(-1);
215 found_head:
216 head = cur;
217 if (cur->children == NULL)
218 goto create;
219 cur = cur->children;
221 found_meta:
223 * Search and update all the remaining the meta elements carrying
224 * encoding information
226 while (cur != NULL) {
227 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
228 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
229 xmlAttrPtr attr = cur->properties;
230 int http;
231 const xmlChar *value;
233 content = NULL;
234 http = 0;
235 while (attr != NULL) {
236 if ((attr->children != NULL) &&
237 (attr->children->type == XML_TEXT_NODE) &&
238 (attr->children->next == NULL)) {
239 value = attr->children->content;
240 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
241 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
242 http = 1;
243 else
245 if ((value != NULL) &&
246 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
247 content = value;
249 if ((http != 0) && (content != NULL))
250 break;
252 attr = attr->next;
254 if ((http != 0) && (content != NULL)) {
255 meta = cur;
256 break;
261 cur = cur->next;
263 create:
264 if (meta == NULL) {
265 if ((encoding != NULL) && (head != NULL)) {
267 * Create a new Meta element with the right attributes
270 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
271 if (head->children == NULL)
272 xmlAddChild(head, meta);
273 else
274 xmlAddPrevSibling(head->children, meta);
275 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
276 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
278 } else {
279 /* remove the meta tag if NULL is passed */
280 if (encoding == NULL) {
281 xmlUnlinkNode(meta);
282 xmlFreeNode(meta);
284 /* change the document only if there is a real encoding change */
285 else if (xmlStrcasestr(content, encoding) == NULL) {
286 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
291 return(0);
295 * booleanHTMLAttrs:
297 * These are the HTML attributes which will be output
298 * in minimized form, i.e. <option selected="selected"> will be
299 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
302 static const char* const htmlBooleanAttrs[] = {
303 "checked", "compact", "declare", "defer", "disabled", "ismap",
304 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
305 "selected", NULL
310 * htmlIsBooleanAttr:
311 * @name: the name of the attribute to check
313 * Determine if a given attribute is a boolean attribute.
315 * returns: false if the attribute is not boolean, true otherwise.
318 htmlIsBooleanAttr(const xmlChar *name)
320 int i = 0;
322 while (htmlBooleanAttrs[i] != NULL) {
323 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
324 return 1;
325 i++;
327 return 0;
330 #ifdef LIBXML_OUTPUT_ENABLED
332 * private routine exported from xmlIO.c
334 xmlOutputBufferPtr
335 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
336 /************************************************************************
338 * Output error handlers *
340 ************************************************************************/
342 * htmlSaveErrMemory:
343 * @extra: extra information
345 * Handle an out of memory condition
347 static void
348 htmlSaveErrMemory(const char *extra)
350 __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
354 * htmlSaveErr:
355 * @code: the error number
356 * @node: the location of the error.
357 * @extra: extra information
359 * Handle an out of memory condition
361 static void
362 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
364 const char *msg = NULL;
366 switch(code) {
367 case XML_SAVE_NOT_UTF8:
368 msg = "string is not in UTF-8\n";
369 break;
370 case XML_SAVE_CHAR_INVALID:
371 msg = "invalid character value\n";
372 break;
373 case XML_SAVE_UNKNOWN_ENCODING:
374 msg = "unknown encoding %s\n";
375 break;
376 case XML_SAVE_NO_DOCTYPE:
377 msg = "HTML has no DOCTYPE\n";
378 break;
379 default:
380 msg = "unexpected error number\n";
382 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
385 /************************************************************************
387 * Dumping HTML tree content to a simple buffer *
389 ************************************************************************/
392 * htmlBufNodeDumpFormat:
393 * @buf: the xmlBufPtr output
394 * @doc: the document
395 * @cur: the current node
396 * @format: should formatting spaces been added
398 * Dump an HTML node, recursive behaviour,children are printed too.
400 * Returns the number of byte written or -1 in case of error
402 static size_t
403 htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
404 int format) {
405 size_t use;
406 int ret;
407 xmlOutputBufferPtr outbuf;
409 if (cur == NULL) {
410 return (-1);
412 if (buf == NULL) {
413 return (-1);
415 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
416 if (outbuf == NULL) {
417 htmlSaveErrMemory("allocating HTML output buffer");
418 return (-1);
420 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
421 outbuf->buffer = buf;
422 outbuf->encoder = NULL;
423 outbuf->writecallback = NULL;
424 outbuf->closecallback = NULL;
425 outbuf->context = NULL;
426 outbuf->written = 0;
428 use = xmlBufUse(buf);
429 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
430 xmlFree(outbuf);
431 ret = xmlBufUse(buf) - use;
432 return (ret);
436 * htmlNodeDump:
437 * @buf: the HTML buffer output
438 * @doc: the document
439 * @cur: the current node
441 * Dump an HTML node, recursive behaviour,children are printed too,
442 * and formatting returns are added.
444 * Returns the number of byte written or -1 in case of error
447 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
448 xmlBufPtr buffer;
449 size_t ret;
451 if ((buf == NULL) || (cur == NULL))
452 return(-1);
454 xmlInitParser();
455 buffer = xmlBufFromBuffer(buf);
456 if (buffer == NULL)
457 return(-1);
459 ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
461 xmlBufBackToBuffer(buffer);
463 if (ret > INT_MAX)
464 return(-1);
465 return((int) ret);
469 * htmlNodeDumpFileFormat:
470 * @out: the FILE pointer
471 * @doc: the document
472 * @cur: the current node
473 * @encoding: the document encoding
474 * @format: should formatting spaces been added
476 * Dump an HTML node, recursive behaviour,children are printed too.
478 * TODO: if encoding == NULL try to save in the doc encoding
480 * returns: the number of byte written or -1 in case of failure.
483 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
484 xmlNodePtr cur, const char *encoding, int format) {
485 xmlOutputBufferPtr buf;
486 xmlCharEncodingHandlerPtr handler = NULL;
487 int ret;
489 xmlInitParser();
491 if (encoding != NULL) {
492 xmlCharEncoding enc;
494 enc = xmlParseCharEncoding(encoding);
495 if (enc != XML_CHAR_ENCODING_UTF8) {
496 handler = xmlFindCharEncodingHandler(encoding);
497 if (handler == NULL)
498 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
500 } else {
502 * Fallback to HTML or ASCII when the encoding is unspecified
504 if (handler == NULL)
505 handler = xmlFindCharEncodingHandler("HTML");
506 if (handler == NULL)
507 handler = xmlFindCharEncodingHandler("ascii");
511 * save the content to a temp buffer.
513 buf = xmlOutputBufferCreateFile(out, handler);
514 if (buf == NULL) return(0);
516 htmlNodeDumpFormatOutput(buf, doc, cur, NULL, format);
518 ret = xmlOutputBufferClose(buf);
519 return(ret);
523 * htmlNodeDumpFile:
524 * @out: the FILE pointer
525 * @doc: the document
526 * @cur: the current node
528 * Dump an HTML node, recursive behaviour,children are printed too,
529 * and formatting returns are added.
531 void
532 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
533 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
537 * htmlDocDumpMemoryFormat:
538 * @cur: the document
539 * @mem: OUT: the memory pointer
540 * @size: OUT: the memory length
541 * @format: should formatting spaces been added
543 * Dump an HTML document in memory and return the xmlChar * and it's size.
544 * It's up to the caller to free the memory.
546 void
547 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
548 xmlOutputBufferPtr buf;
549 xmlCharEncodingHandlerPtr handler = NULL;
550 const char *encoding;
552 xmlInitParser();
554 if ((mem == NULL) || (size == NULL))
555 return;
556 if (cur == NULL) {
557 *mem = NULL;
558 *size = 0;
559 return;
562 encoding = (const char *) htmlGetMetaEncoding(cur);
564 if (encoding != NULL) {
565 xmlCharEncoding enc;
567 enc = xmlParseCharEncoding(encoding);
568 if (enc != XML_CHAR_ENCODING_UTF8) {
569 handler = xmlFindCharEncodingHandler(encoding);
570 if (handler == NULL)
571 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
574 } else {
576 * Fallback to HTML or ASCII when the encoding is unspecified
578 if (handler == NULL)
579 handler = xmlFindCharEncodingHandler("HTML");
580 if (handler == NULL)
581 handler = xmlFindCharEncodingHandler("ascii");
584 buf = xmlAllocOutputBufferInternal(handler);
585 if (buf == NULL) {
586 *mem = NULL;
587 *size = 0;
588 return;
591 htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
593 xmlOutputBufferFlush(buf);
594 if (buf->conv != NULL) {
595 *size = xmlBufUse(buf->conv);
596 *mem = xmlStrndup(xmlBufContent(buf->conv), *size);
597 } else {
598 *size = xmlBufUse(buf->buffer);
599 *mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
601 (void)xmlOutputBufferClose(buf);
605 * htmlDocDumpMemory:
606 * @cur: the document
607 * @mem: OUT: the memory pointer
608 * @size: OUT: the memory length
610 * Dump an HTML document in memory and return the xmlChar * and it's size.
611 * It's up to the caller to free the memory.
613 void
614 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
615 htmlDocDumpMemoryFormat(cur, mem, size, 1);
619 /************************************************************************
621 * Dumping HTML tree content to an I/O output buffer *
623 ************************************************************************/
625 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
628 * htmlDtdDumpOutput:
629 * @buf: the HTML buffer output
630 * @doc: the document
631 * @encoding: the encoding string
633 * TODO: check whether encoding is needed
635 * Dump the HTML document DTD, if any.
637 static void
638 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
639 const char *encoding ATTRIBUTE_UNUSED) {
640 xmlDtdPtr cur = doc->intSubset;
642 if (cur == NULL) {
643 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
644 return;
646 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
647 xmlOutputBufferWriteString(buf, (const char *)cur->name);
648 if (cur->ExternalID != NULL) {
649 xmlOutputBufferWriteString(buf, " PUBLIC ");
650 xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
651 if (cur->SystemID != NULL) {
652 xmlOutputBufferWriteString(buf, " ");
653 xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
655 } else if (cur->SystemID != NULL &&
656 xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
657 xmlOutputBufferWriteString(buf, " SYSTEM ");
658 xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
660 xmlOutputBufferWriteString(buf, ">\n");
664 * htmlAttrDumpOutput:
665 * @buf: the HTML buffer output
666 * @doc: the document
667 * @cur: the attribute pointer
669 * Dump an HTML attribute
671 static void
672 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
673 xmlChar *value;
676 * The html output method should not escape a & character
677 * occurring in an attribute value immediately followed by
678 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
679 * This is implemented in xmlEncodeEntitiesReentrant
682 if (cur == NULL) {
683 return;
685 xmlOutputBufferWriteString(buf, " ");
686 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
687 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
688 xmlOutputBufferWriteString(buf, ":");
690 xmlOutputBufferWriteString(buf, (const char *)cur->name);
691 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
692 value = xmlNodeListGetString(doc, cur->children, 0);
693 if (value) {
694 xmlOutputBufferWriteString(buf, "=");
695 if ((cur->ns == NULL) && (cur->parent != NULL) &&
696 (cur->parent->ns == NULL) &&
697 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
698 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
699 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
700 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
701 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
702 xmlChar *escaped;
703 xmlChar *tmp = value;
705 while (IS_BLANK_CH(*tmp)) tmp++;
708 * the < and > have already been escaped at the entity level
709 * And doing so here breaks server side includes
711 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
712 if (escaped != NULL) {
713 xmlBufWriteQuotedString(buf->buffer, escaped);
714 xmlFree(escaped);
715 } else {
716 xmlBufWriteQuotedString(buf->buffer, value);
718 } else {
719 xmlBufWriteQuotedString(buf->buffer, value);
721 xmlFree(value);
722 } else {
723 xmlOutputBufferWriteString(buf, "=\"\"");
729 * htmlNodeDumpFormatOutput:
730 * @buf: the HTML buffer output
731 * @doc: the document
732 * @cur: the current node
733 * @encoding: the encoding string (unused)
734 * @format: should formatting spaces been added
736 * Dump an HTML node, recursive behaviour,children are printed too.
738 void
739 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
740 xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
741 int format) {
742 xmlNodePtr root, parent;
743 xmlAttrPtr attr;
744 const htmlElemDesc * info;
746 xmlInitParser();
748 if ((cur == NULL) || (buf == NULL)) {
749 return;
752 root = cur;
753 parent = cur->parent;
754 while (1) {
755 switch (cur->type) {
756 case XML_HTML_DOCUMENT_NODE:
757 case XML_DOCUMENT_NODE:
758 if (((xmlDocPtr) cur)->intSubset != NULL) {
759 htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
761 if (cur->children != NULL) {
762 /* Always validate cur->parent when descending. */
763 if (cur->parent == parent) {
764 parent = cur;
765 cur = cur->children;
766 continue;
768 } else {
769 xmlOutputBufferWriteString(buf, "\n");
771 break;
773 case XML_ELEMENT_NODE:
775 * Some users like lxml are known to pass nodes with a corrupted
776 * tree structure. Fall back to a recursive call to handle this
777 * case.
779 if ((cur->parent != parent) && (cur->children != NULL)) {
780 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
781 break;
785 * Get specific HTML info for that node.
787 if (cur->ns == NULL)
788 info = htmlTagLookup(cur->name);
789 else
790 info = NULL;
792 xmlOutputBufferWriteString(buf, "<");
793 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
794 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
795 xmlOutputBufferWriteString(buf, ":");
797 xmlOutputBufferWriteString(buf, (const char *)cur->name);
798 if (cur->nsDef)
799 xmlNsListDumpOutput(buf, cur->nsDef);
800 attr = cur->properties;
801 while (attr != NULL) {
802 htmlAttrDumpOutput(buf, doc, attr);
803 attr = attr->next;
806 if ((info != NULL) && (info->empty)) {
807 xmlOutputBufferWriteString(buf, ">");
808 } else if (cur->children == NULL) {
809 if ((info != NULL) && (info->saveEndTag != 0) &&
810 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
811 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
812 xmlOutputBufferWriteString(buf, ">");
813 } else {
814 xmlOutputBufferWriteString(buf, "></");
815 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
816 xmlOutputBufferWriteString(buf,
817 (const char *)cur->ns->prefix);
818 xmlOutputBufferWriteString(buf, ":");
820 xmlOutputBufferWriteString(buf, (const char *)cur->name);
821 xmlOutputBufferWriteString(buf, ">");
823 } else {
824 xmlOutputBufferWriteString(buf, ">");
825 if ((format) && (info != NULL) && (!info->isinline) &&
826 (cur->children->type != HTML_TEXT_NODE) &&
827 (cur->children->type != HTML_ENTITY_REF_NODE) &&
828 (cur->children != cur->last) &&
829 (cur->name != NULL) &&
830 (cur->name[0] != 'p')) /* p, pre, param */
831 xmlOutputBufferWriteString(buf, "\n");
832 parent = cur;
833 cur = cur->children;
834 continue;
837 if ((format) && (cur->next != NULL) &&
838 (info != NULL) && (!info->isinline)) {
839 if ((cur->next->type != HTML_TEXT_NODE) &&
840 (cur->next->type != HTML_ENTITY_REF_NODE) &&
841 (parent != NULL) &&
842 (parent->name != NULL) &&
843 (parent->name[0] != 'p')) /* p, pre, param */
844 xmlOutputBufferWriteString(buf, "\n");
847 break;
849 case XML_ATTRIBUTE_NODE:
850 htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur);
851 break;
853 case HTML_TEXT_NODE:
854 if (cur->content == NULL)
855 break;
856 if (((cur->name == (const xmlChar *)xmlStringText) ||
857 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
858 ((parent == NULL) ||
859 ((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
860 (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
861 xmlChar *buffer;
863 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
864 if (buffer != NULL) {
865 xmlOutputBufferWriteString(buf, (const char *)buffer);
866 xmlFree(buffer);
868 } else {
869 xmlOutputBufferWriteString(buf, (const char *)cur->content);
871 break;
873 case HTML_COMMENT_NODE:
874 if (cur->content != NULL) {
875 xmlOutputBufferWriteString(buf, "<!--");
876 xmlOutputBufferWriteString(buf, (const char *)cur->content);
877 xmlOutputBufferWriteString(buf, "-->");
879 break;
881 case HTML_PI_NODE:
882 if (cur->name != NULL) {
883 xmlOutputBufferWriteString(buf, "<?");
884 xmlOutputBufferWriteString(buf, (const char *)cur->name);
885 if (cur->content != NULL) {
886 xmlOutputBufferWriteString(buf, " ");
887 xmlOutputBufferWriteString(buf,
888 (const char *)cur->content);
890 xmlOutputBufferWriteString(buf, ">");
892 break;
894 case HTML_ENTITY_REF_NODE:
895 xmlOutputBufferWriteString(buf, "&");
896 xmlOutputBufferWriteString(buf, (const char *)cur->name);
897 xmlOutputBufferWriteString(buf, ";");
898 break;
900 case HTML_PRESERVE_NODE:
901 if (cur->content != NULL) {
902 xmlOutputBufferWriteString(buf, (const char *)cur->content);
904 break;
906 default:
907 break;
910 while (1) {
911 if (cur == root)
912 return;
913 if (cur->next != NULL) {
914 cur = cur->next;
915 break;
918 cur = parent;
919 /* cur->parent was validated when descending. */
920 parent = cur->parent;
922 if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
923 (cur->type == XML_DOCUMENT_NODE)) {
924 xmlOutputBufferWriteString(buf, "\n");
925 } else {
926 if ((format) && (cur->ns == NULL))
927 info = htmlTagLookup(cur->name);
928 else
929 info = NULL;
931 if ((format) && (info != NULL) && (!info->isinline) &&
932 (cur->last->type != HTML_TEXT_NODE) &&
933 (cur->last->type != HTML_ENTITY_REF_NODE) &&
934 (cur->children != cur->last) &&
935 (cur->name != NULL) &&
936 (cur->name[0] != 'p')) /* p, pre, param */
937 xmlOutputBufferWriteString(buf, "\n");
939 xmlOutputBufferWriteString(buf, "</");
940 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
941 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
942 xmlOutputBufferWriteString(buf, ":");
944 xmlOutputBufferWriteString(buf, (const char *)cur->name);
945 xmlOutputBufferWriteString(buf, ">");
947 if ((format) && (info != NULL) && (!info->isinline) &&
948 (cur->next != NULL)) {
949 if ((cur->next->type != HTML_TEXT_NODE) &&
950 (cur->next->type != HTML_ENTITY_REF_NODE) &&
951 (parent != NULL) &&
952 (parent->name != NULL) &&
953 (parent->name[0] != 'p')) /* p, pre, param */
954 xmlOutputBufferWriteString(buf, "\n");
962 * htmlNodeDumpOutput:
963 * @buf: the HTML buffer output
964 * @doc: the document
965 * @cur: the current node
966 * @encoding: the encoding string (unused)
968 * Dump an HTML node, recursive behaviour,children are printed too,
969 * and formatting returns/spaces are added.
971 void
972 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
973 xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED) {
974 htmlNodeDumpFormatOutput(buf, doc, cur, NULL, 1);
978 * htmlDocContentDumpFormatOutput:
979 * @buf: the HTML buffer output
980 * @cur: the document
981 * @encoding: the encoding string (unused)
982 * @format: should formatting spaces been added
984 * Dump an HTML document.
986 void
987 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
988 const char *encoding ATTRIBUTE_UNUSED,
989 int format) {
990 int type = 0;
991 if (cur) {
992 type = cur->type;
993 cur->type = XML_HTML_DOCUMENT_NODE;
995 htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, format);
996 if (cur)
997 cur->type = (xmlElementType) type;
1001 * htmlDocContentDumpOutput:
1002 * @buf: the HTML buffer output
1003 * @cur: the document
1004 * @encoding: the encoding string (unused)
1006 * Dump an HTML document. Formatting return/spaces are added.
1008 void
1009 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1010 const char *encoding ATTRIBUTE_UNUSED) {
1011 htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, 1);
1014 /************************************************************************
1016 * Saving functions front-ends *
1018 ************************************************************************/
1021 * htmlDocDump:
1022 * @f: the FILE*
1023 * @cur: the document
1025 * Dump an HTML document to an open FILE.
1027 * returns: the number of byte written or -1 in case of failure.
1030 htmlDocDump(FILE *f, xmlDocPtr cur) {
1031 xmlOutputBufferPtr buf;
1032 xmlCharEncodingHandlerPtr handler = NULL;
1033 const char *encoding;
1034 int ret;
1036 xmlInitParser();
1038 if ((cur == NULL) || (f == NULL)) {
1039 return(-1);
1042 encoding = (const char *) htmlGetMetaEncoding(cur);
1044 if (encoding != NULL) {
1045 xmlCharEncoding enc;
1047 enc = xmlParseCharEncoding(encoding);
1048 if (enc != XML_CHAR_ENCODING_UTF8) {
1049 handler = xmlFindCharEncodingHandler(encoding);
1050 if (handler == NULL)
1051 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1053 } else {
1055 * Fallback to HTML or ASCII when the encoding is unspecified
1057 if (handler == NULL)
1058 handler = xmlFindCharEncodingHandler("HTML");
1059 if (handler == NULL)
1060 handler = xmlFindCharEncodingHandler("ascii");
1063 buf = xmlOutputBufferCreateFile(f, handler);
1064 if (buf == NULL) return(-1);
1065 htmlDocContentDumpOutput(buf, cur, NULL);
1067 ret = xmlOutputBufferClose(buf);
1068 return(ret);
1072 * htmlSaveFile:
1073 * @filename: the filename (or URL)
1074 * @cur: the document
1076 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1077 * used.
1078 * returns: the number of byte written or -1 in case of failure.
1081 htmlSaveFile(const char *filename, xmlDocPtr cur) {
1082 xmlOutputBufferPtr buf;
1083 xmlCharEncodingHandlerPtr handler = NULL;
1084 const char *encoding;
1085 int ret;
1087 if ((cur == NULL) || (filename == NULL))
1088 return(-1);
1090 xmlInitParser();
1092 encoding = (const char *) htmlGetMetaEncoding(cur);
1094 if (encoding != NULL) {
1095 xmlCharEncoding enc;
1097 enc = xmlParseCharEncoding(encoding);
1098 if (enc != XML_CHAR_ENCODING_UTF8) {
1099 handler = xmlFindCharEncodingHandler(encoding);
1100 if (handler == NULL)
1101 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1103 } else {
1105 * Fallback to HTML or ASCII when the encoding is unspecified
1107 if (handler == NULL)
1108 handler = xmlFindCharEncodingHandler("HTML");
1109 if (handler == NULL)
1110 handler = xmlFindCharEncodingHandler("ascii");
1114 * save the content to a temp buffer.
1116 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1117 if (buf == NULL) return(0);
1119 htmlDocContentDumpOutput(buf, cur, NULL);
1121 ret = xmlOutputBufferClose(buf);
1122 return(ret);
1126 * htmlSaveFileFormat:
1127 * @filename: the filename
1128 * @cur: the document
1129 * @format: should formatting spaces been added
1130 * @encoding: the document encoding
1132 * Dump an HTML document to a file using a given encoding.
1134 * returns: the number of byte written or -1 in case of failure.
1137 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1138 const char *encoding, int format) {
1139 xmlOutputBufferPtr buf;
1140 xmlCharEncodingHandlerPtr handler = NULL;
1141 int ret;
1143 if ((cur == NULL) || (filename == NULL))
1144 return(-1);
1146 xmlInitParser();
1148 if (encoding != NULL) {
1149 xmlCharEncoding enc;
1151 enc = xmlParseCharEncoding(encoding);
1152 if (enc != XML_CHAR_ENCODING_UTF8) {
1153 handler = xmlFindCharEncodingHandler(encoding);
1154 if (handler == NULL)
1155 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1157 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1158 } else {
1159 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1162 * Fallback to HTML or ASCII when the encoding is unspecified
1164 if (handler == NULL)
1165 handler = xmlFindCharEncodingHandler("HTML");
1166 if (handler == NULL)
1167 handler = xmlFindCharEncodingHandler("ascii");
1171 * save the content to a temp buffer.
1173 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1174 if (buf == NULL) return(0);
1176 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1178 ret = xmlOutputBufferClose(buf);
1179 return(ret);
1183 * htmlSaveFileEnc:
1184 * @filename: the filename
1185 * @cur: the document
1186 * @encoding: the document encoding
1188 * Dump an HTML document to a file using a given encoding
1189 * and formatting returns/spaces are added.
1191 * returns: the number of byte written or -1 in case of failure.
1194 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1195 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1198 #endif /* LIBXML_OUTPUT_ENABLED */
1200 #endif /* LIBXML_HTML_ENABLED */