wined3d: Respect the BO memory offset in wined3d_context_gl_map_bo_address().
[wine.git] / libs / xml2 / HTMLtree.c
blob24434d453e132e76bc7ee2f350ae709f564c2220
1 /*
2 * HTMLtree.c : implementation of access function for an HTML tree.
4 * See Copyright for the status of this software.
6 * daniel@veillard.com
7 */
10 #define IN_LIBXML
11 #include "libxml.h"
12 #ifdef LIBXML_HTML_ENABLED
14 #include <string.h> /* for memset() only ! */
16 #ifdef HAVE_CTYPE_H
17 #include <ctype.h>
18 #endif
19 #ifdef HAVE_STDLIB_H
20 #include <stdlib.h>
21 #endif
23 #include <libxml/xmlmemory.h>
24 #include <libxml/HTMLparser.h>
25 #include <libxml/HTMLtree.h>
26 #include <libxml/entities.h>
27 #include <libxml/valid.h>
28 #include <libxml/xmlerror.h>
29 #include <libxml/parserInternals.h>
30 #include <libxml/globals.h>
31 #include <libxml/uri.h>
33 #include "buf.h"
35 /************************************************************************
36 * *
37 * Getting/Setting encoding meta tags *
38 * *
39 ************************************************************************/
41 /**
42 * htmlGetMetaEncoding:
43 * @doc: the document
45 * Encoding definition lookup in the Meta tags
47 * Returns the current encoding as flagged in the HTML source
49 const xmlChar *
50 htmlGetMetaEncoding(htmlDocPtr doc) {
51 htmlNodePtr cur;
52 const xmlChar *content;
53 const xmlChar *encoding;
55 if (doc == NULL)
56 return(NULL);
57 cur = doc->children;
60 * Search the html
62 while (cur != NULL) {
63 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
64 if (xmlStrEqual(cur->name, BAD_CAST"html"))
65 break;
66 if (xmlStrEqual(cur->name, BAD_CAST"head"))
67 goto found_head;
68 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
69 goto found_meta;
71 cur = cur->next;
73 if (cur == NULL)
74 return(NULL);
75 cur = cur->children;
78 * Search the head
80 while (cur != NULL) {
81 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
82 if (xmlStrEqual(cur->name, BAD_CAST"head"))
83 break;
84 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
85 goto found_meta;
87 cur = cur->next;
89 if (cur == NULL)
90 return(NULL);
91 found_head:
92 cur = cur->children;
95 * Search the meta elements
97 found_meta:
98 while (cur != NULL) {
99 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
100 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
101 xmlAttrPtr attr = cur->properties;
102 int http;
103 const xmlChar *value;
105 content = NULL;
106 http = 0;
107 while (attr != NULL) {
108 if ((attr->children != NULL) &&
109 (attr->children->type == XML_TEXT_NODE) &&
110 (attr->children->next == NULL)) {
111 value = attr->children->content;
112 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
113 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
114 http = 1;
115 else if ((value != NULL)
116 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
117 content = value;
118 if ((http != 0) && (content != NULL))
119 goto found_content;
121 attr = attr->next;
125 cur = cur->next;
127 return(NULL);
129 found_content:
130 encoding = xmlStrstr(content, BAD_CAST"charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"Charset=");
133 if (encoding == NULL)
134 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
135 if (encoding != NULL) {
136 encoding += 8;
137 } else {
138 encoding = xmlStrstr(content, BAD_CAST"charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"Charset =");
141 if (encoding == NULL)
142 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
143 if (encoding != NULL)
144 encoding += 9;
146 if (encoding != NULL) {
147 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
149 return(encoding);
153 * htmlSetMetaEncoding:
154 * @doc: the document
155 * @encoding: the encoding string
157 * Sets the current encoding in the Meta tags
158 * NOTE: this will not change the document content encoding, just
159 * the META flag associated.
161 * Returns 0 in case of success and -1 in case of error
164 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
165 htmlNodePtr cur, meta = NULL, head = NULL;
166 const xmlChar *content = NULL;
167 char newcontent[100];
169 newcontent[0] = 0;
171 if (doc == NULL)
172 return(-1);
174 /* html isn't a real encoding it's just libxml2 way to get entities */
175 if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
176 return(-1);
178 if (encoding != NULL) {
179 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
180 (char *)encoding);
181 newcontent[sizeof(newcontent) - 1] = 0;
184 cur = doc->children;
187 * Search the html
189 while (cur != NULL) {
190 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
191 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
192 break;
193 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
194 goto found_head;
195 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
196 goto found_meta;
198 cur = cur->next;
200 if (cur == NULL)
201 return(-1);
202 cur = cur->children;
205 * Search the head
207 while (cur != NULL) {
208 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
209 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
210 break;
211 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
212 head = cur->parent;
213 goto found_meta;
216 cur = cur->next;
218 if (cur == NULL)
219 return(-1);
220 found_head:
221 head = cur;
222 if (cur->children == NULL)
223 goto create;
224 cur = cur->children;
226 found_meta:
228 * Search and update all the remaining the meta elements carrying
229 * encoding information
231 while (cur != NULL) {
232 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
233 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
234 xmlAttrPtr attr = cur->properties;
235 int http;
236 const xmlChar *value;
238 content = NULL;
239 http = 0;
240 while (attr != NULL) {
241 if ((attr->children != NULL) &&
242 (attr->children->type == XML_TEXT_NODE) &&
243 (attr->children->next == NULL)) {
244 value = attr->children->content;
245 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
246 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
247 http = 1;
248 else
250 if ((value != NULL) &&
251 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
252 content = value;
254 if ((http != 0) && (content != NULL))
255 break;
257 attr = attr->next;
259 if ((http != 0) && (content != NULL)) {
260 meta = cur;
261 break;
266 cur = cur->next;
268 create:
269 if (meta == NULL) {
270 if ((encoding != NULL) && (head != NULL)) {
272 * Create a new Meta element with the right attributes
275 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
276 if (head->children == NULL)
277 xmlAddChild(head, meta);
278 else
279 xmlAddPrevSibling(head->children, meta);
280 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
281 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
283 } else {
284 /* remove the meta tag if NULL is passed */
285 if (encoding == NULL) {
286 xmlUnlinkNode(meta);
287 xmlFreeNode(meta);
289 /* change the document only if there is a real encoding change */
290 else if (xmlStrcasestr(content, encoding) == NULL) {
291 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
296 return(0);
300 * booleanHTMLAttrs:
302 * These are the HTML attributes which will be output
303 * in minimized form, i.e. <option selected="selected"> will be
304 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
307 static const char* htmlBooleanAttrs[] = {
308 "checked", "compact", "declare", "defer", "disabled", "ismap",
309 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
310 "selected", NULL
315 * htmlIsBooleanAttr:
316 * @name: the name of the attribute to check
318 * Determine if a given attribute is a boolean attribute.
320 * returns: false if the attribute is not boolean, true otherwise.
323 htmlIsBooleanAttr(const xmlChar *name)
325 int i = 0;
327 while (htmlBooleanAttrs[i] != NULL) {
328 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
329 return 1;
330 i++;
332 return 0;
335 #ifdef LIBXML_OUTPUT_ENABLED
337 * private routine exported from xmlIO.c
339 xmlOutputBufferPtr
340 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
341 /************************************************************************
343 * Output error handlers *
345 ************************************************************************/
347 * htmlSaveErrMemory:
348 * @extra: extra information
350 * Handle an out of memory condition
352 static void
353 htmlSaveErrMemory(const char *extra)
355 __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
359 * htmlSaveErr:
360 * @code: the error number
361 * @node: the location of the error.
362 * @extra: extra information
364 * Handle an out of memory condition
366 static void
367 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
369 const char *msg = NULL;
371 switch(code) {
372 case XML_SAVE_NOT_UTF8:
373 msg = "string is not in UTF-8\n";
374 break;
375 case XML_SAVE_CHAR_INVALID:
376 msg = "invalid character value\n";
377 break;
378 case XML_SAVE_UNKNOWN_ENCODING:
379 msg = "unknown encoding %s\n";
380 break;
381 case XML_SAVE_NO_DOCTYPE:
382 msg = "HTML has no DOCTYPE\n";
383 break;
384 default:
385 msg = "unexpected error number\n";
387 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
390 /************************************************************************
392 * Dumping HTML tree content to a simple buffer *
394 ************************************************************************/
397 * htmlBufNodeDumpFormat:
398 * @buf: the xmlBufPtr output
399 * @doc: the document
400 * @cur: the current node
401 * @format: should formatting spaces been added
403 * Dump an HTML node, recursive behaviour,children are printed too.
405 * Returns the number of byte written or -1 in case of error
407 static size_t
408 htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
409 int format) {
410 size_t use;
411 int ret;
412 xmlOutputBufferPtr outbuf;
414 if (cur == NULL) {
415 return (-1);
417 if (buf == NULL) {
418 return (-1);
420 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
421 if (outbuf == NULL) {
422 htmlSaveErrMemory("allocating HTML output buffer");
423 return (-1);
425 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
426 outbuf->buffer = buf;
427 outbuf->encoder = NULL;
428 outbuf->writecallback = NULL;
429 outbuf->closecallback = NULL;
430 outbuf->context = NULL;
431 outbuf->written = 0;
433 use = xmlBufUse(buf);
434 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
435 xmlFree(outbuf);
436 ret = xmlBufUse(buf) - use;
437 return (ret);
441 * htmlNodeDump:
442 * @buf: the HTML buffer output
443 * @doc: the document
444 * @cur: the current node
446 * Dump an HTML node, recursive behaviour,children are printed too,
447 * and formatting returns are added.
449 * Returns the number of byte written or -1 in case of error
452 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
453 xmlBufPtr buffer;
454 size_t ret;
456 if ((buf == NULL) || (cur == NULL))
457 return(-1);
459 xmlInitParser();
460 buffer = xmlBufFromBuffer(buf);
461 if (buffer == NULL)
462 return(-1);
464 ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
466 xmlBufBackToBuffer(buffer);
468 if (ret > INT_MAX)
469 return(-1);
470 return((int) ret);
474 * htmlNodeDumpFileFormat:
475 * @out: the FILE pointer
476 * @doc: the document
477 * @cur: the current node
478 * @encoding: the document encoding
479 * @format: should formatting spaces been added
481 * Dump an HTML node, recursive behaviour,children are printed too.
483 * TODO: if encoding == NULL try to save in the doc encoding
485 * returns: the number of byte written or -1 in case of failure.
488 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
489 xmlNodePtr cur, const char *encoding, int format) {
490 xmlOutputBufferPtr buf;
491 xmlCharEncodingHandlerPtr handler = NULL;
492 int ret;
494 xmlInitParser();
496 if (encoding != NULL) {
497 xmlCharEncoding enc;
499 enc = xmlParseCharEncoding(encoding);
500 if (enc != XML_CHAR_ENCODING_UTF8) {
501 handler = xmlFindCharEncodingHandler(encoding);
502 if (handler == NULL)
503 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
505 } else {
507 * Fallback to HTML or ASCII when the encoding is unspecified
509 if (handler == NULL)
510 handler = xmlFindCharEncodingHandler("HTML");
511 if (handler == NULL)
512 handler = xmlFindCharEncodingHandler("ascii");
516 * save the content to a temp buffer.
518 buf = xmlOutputBufferCreateFile(out, handler);
519 if (buf == NULL) return(0);
521 htmlNodeDumpFormatOutput(buf, doc, cur, NULL, format);
523 ret = xmlOutputBufferClose(buf);
524 return(ret);
528 * htmlNodeDumpFile:
529 * @out: the FILE pointer
530 * @doc: the document
531 * @cur: the current node
533 * Dump an HTML node, recursive behaviour,children are printed too,
534 * and formatting returns are added.
536 void
537 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
538 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
542 * htmlDocDumpMemoryFormat:
543 * @cur: the document
544 * @mem: OUT: the memory pointer
545 * @size: OUT: the memory length
546 * @format: should formatting spaces been added
548 * Dump an HTML document in memory and return the xmlChar * and it's size.
549 * It's up to the caller to free the memory.
551 void
552 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
553 xmlOutputBufferPtr buf;
554 xmlCharEncodingHandlerPtr handler = NULL;
555 const char *encoding;
557 xmlInitParser();
559 if ((mem == NULL) || (size == NULL))
560 return;
561 if (cur == NULL) {
562 *mem = NULL;
563 *size = 0;
564 return;
567 encoding = (const char *) htmlGetMetaEncoding(cur);
569 if (encoding != NULL) {
570 xmlCharEncoding enc;
572 enc = xmlParseCharEncoding(encoding);
573 if (enc != XML_CHAR_ENCODING_UTF8) {
574 handler = xmlFindCharEncodingHandler(encoding);
575 if (handler == NULL)
576 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
579 } else {
581 * Fallback to HTML or ASCII when the encoding is unspecified
583 if (handler == NULL)
584 handler = xmlFindCharEncodingHandler("HTML");
585 if (handler == NULL)
586 handler = xmlFindCharEncodingHandler("ascii");
589 buf = xmlAllocOutputBufferInternal(handler);
590 if (buf == NULL) {
591 *mem = NULL;
592 *size = 0;
593 return;
596 htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
598 xmlOutputBufferFlush(buf);
599 if (buf->conv != NULL) {
600 *size = xmlBufUse(buf->conv);
601 *mem = xmlStrndup(xmlBufContent(buf->conv), *size);
602 } else {
603 *size = xmlBufUse(buf->buffer);
604 *mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
606 (void)xmlOutputBufferClose(buf);
610 * htmlDocDumpMemory:
611 * @cur: the document
612 * @mem: OUT: the memory pointer
613 * @size: OUT: the memory length
615 * Dump an HTML document in memory and return the xmlChar * and it's size.
616 * It's up to the caller to free the memory.
618 void
619 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
620 htmlDocDumpMemoryFormat(cur, mem, size, 1);
624 /************************************************************************
626 * Dumping HTML tree content to an I/O output buffer *
628 ************************************************************************/
630 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
633 * htmlDtdDumpOutput:
634 * @buf: the HTML buffer output
635 * @doc: the document
636 * @encoding: the encoding string
638 * TODO: check whether encoding is needed
640 * Dump the HTML document DTD, if any.
642 static void
643 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
644 const char *encoding ATTRIBUTE_UNUSED) {
645 xmlDtdPtr cur = doc->intSubset;
647 if (cur == NULL) {
648 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
649 return;
651 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
652 xmlOutputBufferWriteString(buf, (const char *)cur->name);
653 if (cur->ExternalID != NULL) {
654 xmlOutputBufferWriteString(buf, " PUBLIC ");
655 xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
656 if (cur->SystemID != NULL) {
657 xmlOutputBufferWriteString(buf, " ");
658 xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
660 } else if (cur->SystemID != NULL &&
661 xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
662 xmlOutputBufferWriteString(buf, " SYSTEM ");
663 xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
665 xmlOutputBufferWriteString(buf, ">\n");
669 * htmlAttrDumpOutput:
670 * @buf: the HTML buffer output
671 * @doc: the document
672 * @cur: the attribute pointer
674 * Dump an HTML attribute
676 static void
677 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
678 xmlChar *value;
681 * The html output method should not escape a & character
682 * occurring in an attribute value immediately followed by
683 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
684 * This is implemented in xmlEncodeEntitiesReentrant
687 if (cur == NULL) {
688 return;
690 xmlOutputBufferWriteString(buf, " ");
691 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
692 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
693 xmlOutputBufferWriteString(buf, ":");
695 xmlOutputBufferWriteString(buf, (const char *)cur->name);
696 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
697 value = xmlNodeListGetString(doc, cur->children, 0);
698 if (value) {
699 xmlOutputBufferWriteString(buf, "=");
700 if ((cur->ns == NULL) && (cur->parent != NULL) &&
701 (cur->parent->ns == NULL) &&
702 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
703 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
704 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
705 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
706 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
707 xmlChar *escaped;
708 xmlChar *tmp = value;
710 while (IS_BLANK_CH(*tmp)) tmp++;
713 * the < and > have already been escaped at the entity level
714 * And doing so here breaks server side includes
716 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
717 if (escaped != NULL) {
718 xmlBufWriteQuotedString(buf->buffer, escaped);
719 xmlFree(escaped);
720 } else {
721 xmlBufWriteQuotedString(buf->buffer, value);
723 } else {
724 xmlBufWriteQuotedString(buf->buffer, value);
726 xmlFree(value);
727 } else {
728 xmlOutputBufferWriteString(buf, "=\"\"");
734 * htmlNodeDumpFormatOutput:
735 * @buf: the HTML buffer output
736 * @doc: the document
737 * @cur: the current node
738 * @encoding: the encoding string (unused)
739 * @format: should formatting spaces been added
741 * Dump an HTML node, recursive behaviour,children are printed too.
743 void
744 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
745 xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
746 int format) {
747 xmlNodePtr root;
748 xmlAttrPtr attr;
749 const htmlElemDesc * info;
751 xmlInitParser();
753 if ((cur == NULL) || (buf == NULL)) {
754 return;
757 root = cur;
758 while (1) {
759 switch (cur->type) {
760 case XML_HTML_DOCUMENT_NODE:
761 case XML_DOCUMENT_NODE:
762 if (((xmlDocPtr) cur)->intSubset != NULL) {
763 htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
765 if (cur->children != NULL) {
766 cur = cur->children;
767 continue;
769 break;
771 case XML_ELEMENT_NODE:
773 * Get specific HTML info for that node.
775 if (cur->ns == NULL)
776 info = htmlTagLookup(cur->name);
777 else
778 info = NULL;
780 xmlOutputBufferWriteString(buf, "<");
781 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
782 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
783 xmlOutputBufferWriteString(buf, ":");
785 xmlOutputBufferWriteString(buf, (const char *)cur->name);
786 if (cur->nsDef)
787 xmlNsListDumpOutput(buf, cur->nsDef);
788 attr = cur->properties;
789 while (attr != NULL) {
790 htmlAttrDumpOutput(buf, doc, attr);
791 attr = attr->next;
794 if ((info != NULL) && (info->empty)) {
795 xmlOutputBufferWriteString(buf, ">");
796 } else if (cur->children == NULL) {
797 if ((info != NULL) && (info->saveEndTag != 0) &&
798 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
799 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
800 xmlOutputBufferWriteString(buf, ">");
801 } else {
802 xmlOutputBufferWriteString(buf, "></");
803 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
804 xmlOutputBufferWriteString(buf,
805 (const char *)cur->ns->prefix);
806 xmlOutputBufferWriteString(buf, ":");
808 xmlOutputBufferWriteString(buf, (const char *)cur->name);
809 xmlOutputBufferWriteString(buf, ">");
811 } else {
812 xmlOutputBufferWriteString(buf, ">");
813 if ((format) && (info != NULL) && (!info->isinline) &&
814 (cur->children->type != HTML_TEXT_NODE) &&
815 (cur->children->type != HTML_ENTITY_REF_NODE) &&
816 (cur->children != cur->last) &&
817 (cur->name != NULL) &&
818 (cur->name[0] != 'p')) /* p, pre, param */
819 xmlOutputBufferWriteString(buf, "\n");
820 cur = cur->children;
821 continue;
824 if ((format) && (cur->next != NULL) &&
825 (info != NULL) && (!info->isinline)) {
826 if ((cur->next->type != HTML_TEXT_NODE) &&
827 (cur->next->type != HTML_ENTITY_REF_NODE) &&
828 (cur->parent != NULL) &&
829 (cur->parent->name != NULL) &&
830 (cur->parent->name[0] != 'p')) /* p, pre, param */
831 xmlOutputBufferWriteString(buf, "\n");
834 break;
836 case XML_ATTRIBUTE_NODE:
837 htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur);
838 break;
840 case HTML_TEXT_NODE:
841 if (cur->content == NULL)
842 break;
843 if (((cur->name == (const xmlChar *)xmlStringText) ||
844 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
845 ((cur->parent == NULL) ||
846 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
847 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
848 xmlChar *buffer;
850 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
851 if (buffer != NULL) {
852 xmlOutputBufferWriteString(buf, (const char *)buffer);
853 xmlFree(buffer);
855 } else {
856 xmlOutputBufferWriteString(buf, (const char *)cur->content);
858 break;
860 case HTML_COMMENT_NODE:
861 if (cur->content != NULL) {
862 xmlOutputBufferWriteString(buf, "<!--");
863 xmlOutputBufferWriteString(buf, (const char *)cur->content);
864 xmlOutputBufferWriteString(buf, "-->");
866 break;
868 case HTML_PI_NODE:
869 if (cur->name != NULL) {
870 xmlOutputBufferWriteString(buf, "<?");
871 xmlOutputBufferWriteString(buf, (const char *)cur->name);
872 if (cur->content != NULL) {
873 xmlOutputBufferWriteString(buf, " ");
874 xmlOutputBufferWriteString(buf,
875 (const char *)cur->content);
877 xmlOutputBufferWriteString(buf, ">");
879 break;
881 case HTML_ENTITY_REF_NODE:
882 xmlOutputBufferWriteString(buf, "&");
883 xmlOutputBufferWriteString(buf, (const char *)cur->name);
884 xmlOutputBufferWriteString(buf, ";");
885 break;
887 case HTML_PRESERVE_NODE:
888 if (cur->content != NULL) {
889 xmlOutputBufferWriteString(buf, (const char *)cur->content);
891 break;
893 default:
894 break;
897 while (1) {
898 if (cur == root)
899 return;
900 if (cur->next != NULL) {
901 cur = cur->next;
902 break;
906 * The parent should never be NULL here but we want to handle
907 * corrupted documents gracefully.
909 if (cur->parent == NULL)
910 return;
911 cur = cur->parent;
913 if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
914 (cur->type == XML_DOCUMENT_NODE)) {
915 xmlOutputBufferWriteString(buf, "\n");
916 } else {
917 if ((format) && (cur->ns == NULL))
918 info = htmlTagLookup(cur->name);
919 else
920 info = NULL;
922 if ((format) && (info != NULL) && (!info->isinline) &&
923 (cur->last->type != HTML_TEXT_NODE) &&
924 (cur->last->type != HTML_ENTITY_REF_NODE) &&
925 (cur->children != cur->last) &&
926 (cur->name != NULL) &&
927 (cur->name[0] != 'p')) /* p, pre, param */
928 xmlOutputBufferWriteString(buf, "\n");
930 xmlOutputBufferWriteString(buf, "</");
931 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
932 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
933 xmlOutputBufferWriteString(buf, ":");
935 xmlOutputBufferWriteString(buf, (const char *)cur->name);
936 xmlOutputBufferWriteString(buf, ">");
938 if ((format) && (info != NULL) && (!info->isinline) &&
939 (cur->next != NULL)) {
940 if ((cur->next->type != HTML_TEXT_NODE) &&
941 (cur->next->type != HTML_ENTITY_REF_NODE) &&
942 (cur->parent != NULL) &&
943 (cur->parent->name != NULL) &&
944 (cur->parent->name[0] != 'p')) /* p, pre, param */
945 xmlOutputBufferWriteString(buf, "\n");
953 * htmlNodeDumpOutput:
954 * @buf: the HTML buffer output
955 * @doc: the document
956 * @cur: the current node
957 * @encoding: the encoding string (unused)
959 * Dump an HTML node, recursive behaviour,children are printed too,
960 * and formatting returns/spaces are added.
962 void
963 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
964 xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED) {
965 htmlNodeDumpFormatOutput(buf, doc, cur, NULL, 1);
969 * htmlDocContentDumpFormatOutput:
970 * @buf: the HTML buffer output
971 * @cur: the document
972 * @encoding: the encoding string (unused)
973 * @format: should formatting spaces been added
975 * Dump an HTML document.
977 void
978 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
979 const char *encoding ATTRIBUTE_UNUSED,
980 int format) {
981 htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, format);
985 * htmlDocContentDumpOutput:
986 * @buf: the HTML buffer output
987 * @cur: the document
988 * @encoding: the encoding string (unused)
990 * Dump an HTML document. Formatting return/spaces are added.
992 void
993 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
994 const char *encoding ATTRIBUTE_UNUSED) {
995 htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, 1);
998 /************************************************************************
1000 * Saving functions front-ends *
1002 ************************************************************************/
1005 * htmlDocDump:
1006 * @f: the FILE*
1007 * @cur: the document
1009 * Dump an HTML document to an open FILE.
1011 * returns: the number of byte written or -1 in case of failure.
1014 htmlDocDump(FILE *f, xmlDocPtr cur) {
1015 xmlOutputBufferPtr buf;
1016 xmlCharEncodingHandlerPtr handler = NULL;
1017 const char *encoding;
1018 int ret;
1020 xmlInitParser();
1022 if ((cur == NULL) || (f == NULL)) {
1023 return(-1);
1026 encoding = (const char *) htmlGetMetaEncoding(cur);
1028 if (encoding != NULL) {
1029 xmlCharEncoding enc;
1031 enc = xmlParseCharEncoding(encoding);
1032 if (enc != XML_CHAR_ENCODING_UTF8) {
1033 handler = xmlFindCharEncodingHandler(encoding);
1034 if (handler == NULL)
1035 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1037 } else {
1039 * Fallback to HTML or ASCII when the encoding is unspecified
1041 if (handler == NULL)
1042 handler = xmlFindCharEncodingHandler("HTML");
1043 if (handler == NULL)
1044 handler = xmlFindCharEncodingHandler("ascii");
1047 buf = xmlOutputBufferCreateFile(f, handler);
1048 if (buf == NULL) return(-1);
1049 htmlDocContentDumpOutput(buf, cur, NULL);
1051 ret = xmlOutputBufferClose(buf);
1052 return(ret);
1056 * htmlSaveFile:
1057 * @filename: the filename (or URL)
1058 * @cur: the document
1060 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1061 * used.
1062 * returns: the number of byte written or -1 in case of failure.
1065 htmlSaveFile(const char *filename, xmlDocPtr cur) {
1066 xmlOutputBufferPtr buf;
1067 xmlCharEncodingHandlerPtr handler = NULL;
1068 const char *encoding;
1069 int ret;
1071 if ((cur == NULL) || (filename == NULL))
1072 return(-1);
1074 xmlInitParser();
1076 encoding = (const char *) htmlGetMetaEncoding(cur);
1078 if (encoding != NULL) {
1079 xmlCharEncoding enc;
1081 enc = xmlParseCharEncoding(encoding);
1082 if (enc != XML_CHAR_ENCODING_UTF8) {
1083 handler = xmlFindCharEncodingHandler(encoding);
1084 if (handler == NULL)
1085 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1087 } else {
1089 * Fallback to HTML or ASCII when the encoding is unspecified
1091 if (handler == NULL)
1092 handler = xmlFindCharEncodingHandler("HTML");
1093 if (handler == NULL)
1094 handler = xmlFindCharEncodingHandler("ascii");
1098 * save the content to a temp buffer.
1100 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1101 if (buf == NULL) return(0);
1103 htmlDocContentDumpOutput(buf, cur, NULL);
1105 ret = xmlOutputBufferClose(buf);
1106 return(ret);
1110 * htmlSaveFileFormat:
1111 * @filename: the filename
1112 * @cur: the document
1113 * @format: should formatting spaces been added
1114 * @encoding: the document encoding
1116 * Dump an HTML document to a file using a given encoding.
1118 * returns: the number of byte written or -1 in case of failure.
1121 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1122 const char *encoding, int format) {
1123 xmlOutputBufferPtr buf;
1124 xmlCharEncodingHandlerPtr handler = NULL;
1125 int ret;
1127 if ((cur == NULL) || (filename == NULL))
1128 return(-1);
1130 xmlInitParser();
1132 if (encoding != NULL) {
1133 xmlCharEncoding enc;
1135 enc = xmlParseCharEncoding(encoding);
1136 if (enc != XML_CHAR_ENCODING_UTF8) {
1137 handler = xmlFindCharEncodingHandler(encoding);
1138 if (handler == NULL)
1139 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1141 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1142 } else {
1143 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1146 * Fallback to HTML or ASCII when the encoding is unspecified
1148 if (handler == NULL)
1149 handler = xmlFindCharEncodingHandler("HTML");
1150 if (handler == NULL)
1151 handler = xmlFindCharEncodingHandler("ascii");
1155 * save the content to a temp buffer.
1157 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1158 if (buf == NULL) return(0);
1160 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1162 ret = xmlOutputBufferClose(buf);
1163 return(ret);
1167 * htmlSaveFileEnc:
1168 * @filename: the filename
1169 * @cur: the document
1170 * @encoding: the document encoding
1172 * Dump an HTML document to a file using a given encoding
1173 * and formatting returns/spaces are added.
1175 * returns: the number of byte written or -1 in case of failure.
1178 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1179 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1182 #endif /* LIBXML_OUTPUT_ENABLED */
1184 #define bottom_HTMLtree
1185 #include "elfgcchack.h"
1186 #endif /* LIBXML_HTML_ENABLED */