bug 1067: Comments about freeing the DOM document node.
[elinks/kon.git] / src / document / dom / renderer.c
blob38cd083044e28c821c12fdd503a3e69e26d013b7
1 /* DOM document renderer */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <sys/types.h> /* FreeBSD needs this before regex.h */
8 #ifdef HAVE_REGEX_H
9 #include <regex.h>
10 #endif
11 #include <string.h>
13 #include "elinks.h"
15 #include "bookmarks/bookmarks.h" /* get_bookmark() */
16 #include "cache/cache.h"
17 #include "document/css/css.h"
18 #include "document/css/parser.h"
19 #include "document/css/property.h"
20 #include "document/css/stylesheet.h"
21 #include "document/docdata.h"
22 #include "document/document.h"
23 #include "document/dom/renderer.h"
24 #include "document/renderer.h"
25 #include "dom/configuration.h"
26 #include "dom/scanner.h"
27 #include "dom/sgml/parser.h"
28 #include "dom/sgml/html/html.h"
29 #include "dom/sgml/rss/rss.h"
30 #include "dom/node.h"
31 #include "dom/stack.h"
32 #include "intl/charsets.h"
33 #include "globhist/globhist.h" /* get_global_history_item() */
34 #include "protocol/uri.h"
35 #include "terminal/draw.h"
36 #include "util/box.h"
37 #include "util/error.h"
38 #include "util/memory.h"
39 #include "util/snprintf.h"
40 #include "util/string.h"
43 struct dom_renderer {
44 enum sgml_document_type doctype;
45 struct document *document;
47 struct conv_table *convert_table;
48 enum convert_string_mode convert_mode;
50 struct uri *base_uri;
52 unsigned char *source;
53 unsigned char *end;
55 unsigned char *position;
56 int canvas_x, canvas_y;
58 #ifdef HAVE_REGEX_H
59 regex_t url_regex;
60 unsigned int find_url:1;
61 #endif
62 struct screen_char styles[DOM_NODES];
64 /* RSS renderer variables */
65 struct dom_node *channel;
66 struct dom_node_list *items;
67 struct dom_node *item;
68 struct dom_node *node;
69 struct dom_string text;
72 #define URL_REGEX "(file://|((f|ht|nt)tp(s)?|smb)://[[:alnum:]]+([-@:.]?[[:alnum:]])*\\.[[:alpha:]]{2,4}(:[[:digit:]]+)?)(/(%[[:xdigit:]]{2}|[-_~&=;?.a-z0-9])*)*"
73 #define URL_REGFLAGS (REG_ICASE | REG_EXTENDED)
75 static void
76 init_template(struct screen_char *template, struct document_options *options,
77 color_T background, color_T foreground, enum screen_char_attr attr)
79 struct color_pair colors = INIT_COLOR_PAIR(background, foreground);
81 template->attr = attr;
82 template->data = ' ';
83 set_term_color(template, &colors,
84 options->color_flags, options->color_mode);
88 /* Checks the user CSS for properties for each DOM node type name */
89 static inline void
90 init_dom_renderer(struct dom_renderer *renderer, struct document *document,
91 struct string *buffer, struct conv_table *convert_table)
93 enum dom_node_type type;
94 struct css_stylesheet *css = &default_stylesheet;
96 memset(renderer, 0, sizeof(*renderer));
98 renderer->document = document;
99 renderer->convert_table = convert_table;
100 renderer->convert_mode = document->options.plain ? CSM_NONE : CSM_DEFAULT;
101 renderer->source = buffer->source;
102 renderer->end = buffer->source + buffer->length;
103 renderer->position = renderer->source;
104 renderer->base_uri = get_uri_reference(document->uri);
106 #ifdef HAVE_REGEX_H
107 if (renderer->document->options.plain_display_links) {
108 if (regcomp(&renderer->url_regex, URL_REGEX, URL_REGFLAGS)) {
109 regfree(&renderer->url_regex);
110 } else {
111 renderer->find_url = 1;
114 #endif
116 for (type = 0; type < DOM_NODES; type++) {
117 struct screen_char *template = &renderer->styles[type];
118 color_T background = document->options.default_style.bg;
119 color_T foreground = document->options.default_style.fg;
120 enum screen_char_attr attr = 0;
121 static int i_want_struct_module_for_dom;
123 struct dom_string *name = get_dom_node_type_name(type);
124 struct css_selector *selector = NULL;
126 if (!i_want_struct_module_for_dom) {
127 static const unsigned char default_colors[] =
128 "document { color: yellow } "
129 "element { color: lightgreen } "
130 "entity-reference { color: red } "
131 "proc-instruction { color: red } "
132 "attribute { color: magenta } "
133 "comment { color: aqua } "
134 "cdata-section { color: orange2 } ";
135 unsigned char *styles = (unsigned char *) default_colors;
137 i_want_struct_module_for_dom = 1;
138 /* When someone will get here earlier than at 4am,
139 * this will be done in some init function, perhaps
140 * not overriding the user's default stylesheet. */
141 css_parse_stylesheet(css, NULL, styles, styles + sizeof(default_colors));
144 if (name)
145 if (is_dom_string_set(name))
146 selector = find_css_selector(&css->selectors,
147 CST_ELEMENT, CSR_ROOT,
148 name->string, name->length);
150 if (selector) {
151 struct css_property *property;
153 foreach (property, selector->properties) {
154 switch (property->type) {
155 case CSS_PT_BACKGROUND_COLOR:
156 case CSS_PT_BACKGROUND:
157 if (property->value_type == CSS_VT_COLOR)
158 background = property->value.color;
159 break;
160 case CSS_PT_COLOR:
161 foreground = property->value.color;
162 break;
163 case CSS_PT_FONT_WEIGHT:
164 if (property->value.font_attribute.add & AT_BOLD)
165 attr |= SCREEN_ATTR_BOLD;
166 break;
167 case CSS_PT_FONT_STYLE:
168 if (property->value.font_attribute.add & AT_UNDERLINE)
169 attr |= SCREEN_ATTR_UNDERLINE;
171 if (property->value.font_attribute.add & AT_ITALIC)
172 attr |= SCREEN_ATTR_ITALIC;
173 break;
174 case CSS_PT_TEXT_DECORATION:
175 if (property->value.font_attribute.add & AT_UNDERLINE)
176 attr |= SCREEN_ATTR_UNDERLINE;
177 break;
178 case CSS_PT_DISPLAY:
179 case CSS_PT_NONE:
180 case CSS_PT_TEXT_ALIGN:
181 case CSS_PT_WHITE_SPACE:
182 case CSS_PT_LAST:
183 break;
188 init_template(template, &document->options, background, foreground, attr);
192 static inline void
193 done_dom_renderer(struct dom_renderer *renderer)
195 #ifdef HAVE_REGEX_H
196 if (renderer->find_url)
197 regfree(&renderer->url_regex);
198 #endif
199 done_uri(renderer->base_uri);
203 /* Document maintainance */
205 static struct screen_char *
206 realloc_line(struct document *document, int x, int y)
208 struct line *line = realloc_lines(document, y);
210 if (!line) return NULL;
212 if (x > line->length) {
213 if (!ALIGN_LINE(&line->chars, line->length, x))
214 return NULL;
216 for (; line->length < x; line->length++) {
217 line->chars[line->length].data = ' ';
220 if (x > document->width) document->width = x;
223 return line->chars;
226 static struct node *
227 add_search_node(struct dom_renderer *renderer, int width)
229 struct node *node = mem_alloc(sizeof(*node));
231 if (node) {
232 set_box(&node->box, renderer->canvas_x, renderer->canvas_y,
233 width, 1);
234 add_to_list(renderer->document->nodes, node);
237 return node;
240 #define X(renderer) ((renderer)->canvas_x)
241 #define Y(renderer) ((renderer)->canvas_y)
242 #define POS(renderer) (&(renderer)->document->data[Y(renderer)].chars[X(renderer)])
243 #define WIDTH(renderer, add) ((renderer)->canvas_x + (add))
245 static void
246 render_dom_line(struct dom_renderer *renderer, struct screen_char *template,
247 unsigned char *string, int length)
249 struct document *document = renderer->document;
250 struct conv_table *convert = renderer->convert_table;
251 enum convert_string_mode mode = renderer->convert_mode;
252 int x, charlen;
253 #ifdef CONFIG_UTF8
254 int utf8 = document->options.utf8;
255 unsigned char *end;
256 #endif /* CONFIG_UTF8 */
259 assert(renderer && template && string && length);
261 string = convert_string(convert, string, length, document->options.cp,
262 mode, &length, NULL, NULL);
263 if (!string) return;
265 if (!realloc_line(document, WIDTH(renderer, length), Y(renderer))) {
266 mem_free(string);
267 return;
270 add_search_node(renderer, length);
272 #ifdef CONFIG_UTF8
273 end = string + length;
274 #endif /* CONFIG_UTF8 */
275 for (x = 0, charlen = 1; x < length;x += charlen, renderer->canvas_x++) {
276 unsigned char *text = &string[x];
278 /* This is mostly to be able to break out so the indentation
279 * level won't get to high. */
280 switch (*text) {
281 case ASCII_TAB:
283 int tab_width = 7 - (X(renderer) & 7);
284 int width = WIDTH(renderer, length - x + tab_width);
286 template->data = ' ';
288 if (!realloc_line(document, width, Y(renderer)))
289 break;
291 /* Only loop over the expanded tab chars and let the
292 * ``main loop'' add the actual tab char. */
293 for (; tab_width-- > 0; renderer->canvas_x++)
294 copy_screen_chars(POS(renderer), template, 1);
295 charlen = 1;
296 break;
298 default:
299 #ifdef CONFIG_UTF8
300 if (utf8) {
301 unicode_val_T data;
302 charlen = utf8charlen(text);
303 data = utf8_to_unicode(&text, end);
305 template->data = (unicode_val_T)data;
307 if (unicode_to_cell(data) == 2) {
308 copy_screen_chars(POS(renderer),
309 template, 1);
311 X(renderer)++;
312 template->data = UCS_NO_CHAR;
315 } else
316 #endif /* CONFIG_UTF8 */
317 template->data = isscreensafe(*text) ? *text:'.';
320 copy_screen_chars(POS(renderer), template, 1);
322 mem_free(string);
325 static inline unsigned char *
326 split_dom_line(unsigned char *line, int length, int *linelen)
328 unsigned char *end = line + length;
329 unsigned char *pos;
331 /* End of line detection.
332 * We handle \r, \r\n and \n types here. */
333 for (pos = line; pos < end; pos++) {
334 int step = 0;
336 if (pos[step] == ASCII_CR)
337 step++;
339 if (pos[step] == ASCII_LF)
340 step++;
342 if (step) {
343 *linelen = pos - line;
344 return pos + step;
348 *linelen = length;
349 return NULL;
352 static void
353 render_dom_text(struct dom_renderer *renderer, struct screen_char *template,
354 unsigned char *string, int length)
356 int linelen;
358 for (; length > 0; string += linelen, length -= linelen) {
359 unsigned char *newline = split_dom_line(string, length, &linelen);
361 if (linelen)
362 render_dom_line(renderer, template, string, linelen);
364 if (newline) {
365 renderer->canvas_y++;
366 renderer->canvas_x = 0;
367 linelen = newline - string;
372 #define realloc_document_links(doc, size) \
373 ALIGN_LINK(&(doc)->links, (doc)->nlinks, size)
375 static inline struct link *
376 add_dom_link(struct dom_renderer *renderer, unsigned char *string, int length,
377 unsigned char *uristring, int urilength)
379 struct document *document = renderer->document;
380 int x = renderer->canvas_x;
381 int y = renderer->canvas_y;
382 unsigned char *where;
383 struct link *link;
384 struct point *point;
385 struct screen_char template;
386 color_T fgcolor;
388 if (!realloc_document_links(document, document->nlinks + 1))
389 return NULL;
391 link = &document->links[document->nlinks];
393 if (!realloc_points(link, length))
394 return NULL;
396 uristring = convert_string(renderer->convert_table,
397 uristring, urilength, document->options.cp,
398 CSM_DEFAULT, NULL, NULL, NULL);
399 if (!uristring) return NULL;
401 where = join_urls(renderer->base_uri, uristring);
403 mem_free(uristring);
405 if (!where)
406 return NULL;
407 #ifdef CONFIG_GLOBHIST
408 else if (get_global_history_item(where))
409 fgcolor = document->options.default_vlink;
410 #endif
411 #ifdef CONFIG_BOOKMARKS
412 else if (get_bookmark(where))
413 fgcolor = document->options.default_bookmark_link;
414 #endif
415 else
416 fgcolor = document->options.default_link;
418 link->npoints = length;
419 link->type = LINK_HYPERTEXT;
420 link->where = where;
421 link->color.background = document->options.default_style.bg;
422 link->color.foreground = fgcolor;
423 link->number = document->nlinks;
425 init_template(&template, &document->options,
426 link->color.background, link->color.foreground, 0);
428 render_dom_text(renderer, &template, string, length);
430 for (point = link->points; length > 0; length--, point++, x++) {
431 point->x = x;
432 point->y = y;
435 document->nlinks++;
436 document->links_sorted = 0;
438 return link;
442 /* DOM Source Renderer */
444 #define check_dom_node_source(renderer, str, len) \
445 ((renderer)->source <= (str) && (str) + (len) <= (renderer)->end)
447 #define assert_source(renderer, str, len) \
448 assertm(check_dom_node_source(renderer, str, len), "renderer[%p : %p] str[%p : %p]", \
449 (renderer)->source, (renderer)->end, (str), (str) + (len))
451 static inline void
452 render_dom_flush(struct dom_renderer *renderer, unsigned char *string)
454 struct screen_char *template = &renderer->styles[DOM_NODE_TEXT];
455 int length = string - renderer->position;
457 assert_source(renderer, renderer->position, 0);
458 assert_source(renderer, string, 0);
460 if (length <= 0) return;
461 render_dom_text(renderer, template, renderer->position, length);
462 renderer->position = string;
464 assert_source(renderer, renderer->position, 0);
467 static inline void
468 render_dom_node_text(struct dom_renderer *renderer, struct screen_char *template,
469 struct dom_node *node)
471 unsigned char *string = node->string.string;
472 int length = node->string.length;
474 if (node->type == DOM_NODE_ENTITY_REFERENCE) {
475 string -= 1;
476 length += 2;
479 if (check_dom_node_source(renderer, string, length)) {
480 render_dom_flush(renderer, string);
481 renderer->position = string + length;
482 assert_source(renderer, renderer->position, 0);
485 render_dom_text(renderer, template, string, length);
488 #ifdef HAVE_REGEX_H
489 static inline void
490 render_dom_node_enhanced_text(struct dom_renderer *renderer, struct dom_node *node)
492 regex_t *regex = &renderer->url_regex;
493 regmatch_t regmatch;
494 unsigned char *string = node->string.string;
495 int length = node->string.length;
496 struct screen_char *template = &renderer->styles[node->type];
497 unsigned char *alloc_string;
499 if (check_dom_node_source(renderer, string, length)) {
500 render_dom_flush(renderer, string);
501 renderer->position = string + length;
502 assert_source(renderer, renderer->position, 0);
505 alloc_string = memacpy(string, length);
506 if (alloc_string)
507 string = alloc_string;
509 while (length > 0 && !regexec(regex, string, 1, &regmatch, 0)) {
510 int matchlen = regmatch.rm_eo - regmatch.rm_so;
511 int offset = regmatch.rm_so;
513 if (!matchlen || offset < 0 || regmatch.rm_eo > length)
514 break;
516 if (offset > 0)
517 render_dom_text(renderer, template, string, offset);
519 string += offset;
520 length -= offset;
522 add_dom_link(renderer, string, matchlen, string, matchlen);
524 length -= matchlen;
525 string += matchlen;
528 if (length > 0)
529 render_dom_text(renderer, template, string, length);
531 mem_free_if(alloc_string);
533 #endif
535 static enum dom_code
536 render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *data)
538 struct dom_renderer *renderer = stack->current->data;
540 assert(node && renderer && renderer->document);
542 #ifdef HAVE_REGEX_H
543 if (renderer->find_url
544 && (node->type == DOM_NODE_TEXT
545 || node->type == DOM_NODE_CDATA_SECTION
546 || node->type == DOM_NODE_COMMENT)) {
547 render_dom_node_enhanced_text(renderer, node);
548 } else
549 #endif
550 render_dom_node_text(renderer, &renderer->styles[node->type], node);
552 return DOM_CODE_OK;
555 /* This callback is also used for rendering processing instruction nodes. */
556 static enum dom_code
557 render_dom_element_source(struct dom_stack *stack, struct dom_node *node, void *data)
559 struct dom_renderer *renderer = stack->current->data;
561 assert(node && renderer && renderer->document);
563 render_dom_node_text(renderer, &renderer->styles[node->type], node);
565 return DOM_CODE_OK;
568 enum dom_code
569 render_dom_element_end_source(struct dom_stack *stack, struct dom_node *node, void *data)
571 struct dom_renderer *renderer = stack->current->data;
572 struct dom_stack_state *state = get_dom_stack_top(stack);
573 struct sgml_parser_state *pstate = get_dom_stack_state_data(stack->contexts[0], state);
574 struct dom_scanner_token *token = &pstate->end_token;
575 unsigned char *string = token->string.string;
576 int length = token->string.length;
578 assert(node && renderer && renderer->document);
580 if (!string || !length)
581 return DOM_CODE_OK;
583 if (check_dom_node_source(renderer, string, length)) {
584 render_dom_flush(renderer, string);
585 renderer->position = string + length;
586 assert_source(renderer, renderer->position, 0);
589 render_dom_text(renderer, &renderer->styles[node->type], string, length);
591 return DOM_CODE_OK;
594 static void
595 set_base_uri(struct dom_renderer *renderer, unsigned char *value, size_t valuelen)
597 unsigned char *href = memacpy(value, valuelen);
598 unsigned char *uristring;
599 struct uri *uri;
601 if (!href) return;
602 uristring = join_urls(renderer->base_uri, href);
603 mem_free(href);
605 if (!uristring) return;
606 uri = get_uri(uristring, 0);
607 mem_free(uristring);
609 if (!uri) return;
611 done_uri(renderer->base_uri);
612 renderer->base_uri = uri;
615 enum dom_code
616 render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void *data)
618 struct dom_renderer *renderer = stack->current->data;
619 struct screen_char *template = &renderer->styles[node->type];
621 assert(node && renderer->document);
623 render_dom_node_text(renderer, template, node);
625 if (is_dom_string_set(&node->data.attribute.value)) {
626 int quoted = node->data.attribute.quoted == 1;
627 unsigned char *value = node->data.attribute.value.string - quoted;
628 int valuelen = node->data.attribute.value.length + quoted * 2;
630 if (check_dom_node_source(renderer, value, 0)) {
631 render_dom_flush(renderer, value);
632 renderer->position = value + valuelen;
633 assert_source(renderer, renderer->position, 0);
636 if (node->data.attribute.reference
637 && valuelen - quoted * 2 > 0) {
638 int skips;
640 /* Need to flush the first quoting delimiter and any
641 * leading whitespace so that the renderers x position
642 * is at the start of the value string. */
643 for (skips = 0; skips < valuelen; skips++) {
644 if ((quoted && skips == 0)
645 || isspace(value[skips])
646 || value[skips] < ' ')
647 continue;
649 break;
652 if (skips > 0) {
653 render_dom_text(renderer, template, value, skips);
654 value += skips;
655 valuelen -= skips;
658 /* Figure out what should be skipped after the actual
659 * link text. */
660 for (skips = 0; skips < valuelen; skips++) {
661 if ((quoted && skips == 0)
662 || isspace(value[valuelen - skips - 1])
663 || value[valuelen - skips - 1] < ' ')
664 continue;
666 break;
669 if (renderer->doctype == SGML_DOCTYPE_HTML
670 && node->data.attribute.type == HTML_ATTRIBUTE_HREF
671 && node->parent->data.element.type == HTML_ELEMENT_BASE) {
672 set_base_uri(renderer, value, valuelen - skips);
675 add_dom_link(renderer, value, valuelen - skips,
676 value, valuelen - skips);
678 if (skips > 0) {
679 value += valuelen - skips;
680 render_dom_text(renderer, template, value, skips);
682 } else {
683 render_dom_text(renderer, template, value, valuelen);
687 return DOM_CODE_OK;
690 enum dom_code
691 render_dom_cdata_source(struct dom_stack *stack, struct dom_node *node, void *data)
693 struct dom_renderer *renderer = stack->current->data;
694 unsigned char *string = node->string.string;
696 assert(node && renderer && renderer->document);
698 /* Highlight the 'CDATA' part of <![CDATA[ if it is there. */
699 if (check_dom_node_source(renderer, string - 6, 6)) {
700 render_dom_flush(renderer, string - 6);
701 render_dom_text(renderer, &renderer->styles[DOM_NODE_ATTRIBUTE], string - 6, 5);
702 renderer->position = string - 1;
703 assert_source(renderer, renderer->position, 0);
706 render_dom_node_text(renderer, &renderer->styles[node->type], node);
708 return DOM_CODE_OK;
711 enum dom_code
712 render_dom_document_end(struct dom_stack *stack, struct dom_node *node, void *data)
714 struct dom_renderer *renderer = stack->current->data;
716 /* If there are no non-element nodes after the last element node make
717 * sure that we flush to the end of the cache entry source including
718 * the '>' of the last element tag if it has one. (bug 519) */
719 if (check_dom_node_source(renderer, renderer->position, 0)) {
720 render_dom_flush(renderer, renderer->end);
723 /* It is not necessary to return DOM_CODE_FREE_NODE here.
724 * Because the parser was created with the SGML_PARSER_STREAM
725 * type, the stack has the DOM_STACK_FLAG_FREE_NODES flag and
726 * implicitly frees all nodes popped from it. */
727 return DOM_CODE_OK;
730 static struct dom_stack_context_info dom_source_renderer_context_info = {
731 /* Object size: */ 0,
732 /* Push: */
734 /* */ NULL,
735 /* DOM_NODE_ELEMENT */ render_dom_element_source,
736 /* DOM_NODE_ATTRIBUTE */ render_dom_attribute_source,
737 /* DOM_NODE_TEXT */ render_dom_node_source,
738 /* DOM_NODE_CDATA_SECTION */ render_dom_cdata_source,
739 /* DOM_NODE_ENTITY_REFERENCE */ render_dom_node_source,
740 /* DOM_NODE_ENTITY */ render_dom_node_source,
741 /* DOM_NODE_PROC_INSTRUCTION */ render_dom_element_source,
742 /* DOM_NODE_COMMENT */ render_dom_node_source,
743 /* DOM_NODE_DOCUMENT */ NULL,
744 /* DOM_NODE_DOCUMENT_TYPE */ render_dom_node_source,
745 /* DOM_NODE_DOCUMENT_FRAGMENT */ render_dom_node_source,
746 /* DOM_NODE_NOTATION */ render_dom_node_source,
748 /* Pop: */
750 /* */ NULL,
751 /* DOM_NODE_ELEMENT */ render_dom_element_end_source,
752 /* DOM_NODE_ATTRIBUTE */ NULL,
753 /* DOM_NODE_TEXT */ NULL,
754 /* DOM_NODE_CDATA_SECTION */ NULL,
755 /* DOM_NODE_ENTITY_REFERENCE */ NULL,
756 /* DOM_NODE_ENTITY */ NULL,
757 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
758 /* DOM_NODE_COMMENT */ NULL,
759 /* DOM_NODE_DOCUMENT */ render_dom_document_end,
760 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
761 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
762 /* DOM_NODE_NOTATION */ NULL,
767 /* DOM RSS Renderer */
769 #define RSS_CONFIG_FLAGS \
770 (DOM_CONFIG_NORMALIZE_WHITESPACE | DOM_CONFIG_NORMALIZE_CHARACTERS)
772 enum dom_code
773 dom_rss_push_element(struct dom_stack *stack, struct dom_node *node, void *data)
775 struct dom_renderer *renderer = stack->current->data;
777 assert(node && renderer && renderer->document);
779 switch (node->data.element.type) {
780 case RSS_ELEMENT_CHANNEL:
781 /* The stack should have: #document * channel */
782 if (stack->depth != 3)
783 break;
785 if (!renderer->channel) {
786 renderer->channel = node;
788 break;
790 case RSS_ELEMENT_ITEM:
791 /* The stack should have: #document * channel item */
792 #if 0
793 /* Don't be so strict ... */
794 if (stack->depth != 4)
795 break;
796 #endif
797 /* ... but be exclusive. */
798 if (renderer->item)
799 break;
800 add_to_dom_node_list(&renderer->items, node, -1);
801 renderer->item = node;
802 break;
804 case RSS_ELEMENT_LINK:
805 case RSS_ELEMENT_DESCRIPTION:
806 case RSS_ELEMENT_TITLE:
807 case RSS_ELEMENT_AUTHOR:
808 case RSS_ELEMENT_PUBDATE:
809 if (!node->parent || renderer->node != node->parent)
810 break;
812 renderer->node = node;
815 return DOM_CODE_OK;
818 enum dom_code
819 dom_rss_pop_element(struct dom_stack *stack, struct dom_node *node, void *data)
821 struct dom_renderer *renderer = stack->current->data;
822 struct dom_node_list **list;
824 assert(node && renderer && renderer->document);
826 switch (node->data.element.type) {
827 case RSS_ELEMENT_ITEM:
828 if (is_dom_string_set(&renderer->text))
829 done_dom_string(&renderer->text);
830 renderer->item = NULL;
831 break;
833 case RSS_ELEMENT_LINK:
834 case RSS_ELEMENT_DESCRIPTION:
835 case RSS_ELEMENT_TITLE:
836 case RSS_ELEMENT_AUTHOR:
837 case RSS_ELEMENT_PUBDATE:
838 if (!is_dom_string_set(&renderer->text)
839 || !node->parent
840 || renderer->item != node->parent
841 || renderer->node != node)
842 break;
844 /* Replace any child nodes with the normalized text node. */
845 list = get_dom_node_list(node->parent, node);
846 done_dom_node_list(*list);
847 if (is_dom_string_set(&renderer->text)) {
848 if (!add_dom_node(node, DOM_NODE_TEXT, &renderer->text))
849 done_dom_string(&renderer->text);
851 renderer->node = NULL;
852 break;
854 default:
855 break;
858 return DOM_CODE_OK;
862 static struct dom_string *
863 get_rss_text(struct dom_node *node, enum rss_element_type type)
865 node = get_dom_node_child(node, DOM_NODE_ELEMENT, type);
867 if (!node) return NULL;
869 node = get_dom_node_child(node, DOM_NODE_TEXT, 0);
871 return node ? &node->string: NULL;
874 static void
875 render_rss_item(struct dom_renderer *renderer, struct dom_node *item)
877 struct dom_string *title = get_rss_text(item, RSS_ELEMENT_TITLE);
878 struct dom_string *link = get_rss_text(item, RSS_ELEMENT_LINK);
879 struct dom_string *author = get_rss_text(item, RSS_ELEMENT_AUTHOR);
880 struct dom_string *date = get_rss_text(item, RSS_ELEMENT_PUBDATE);
882 if (title && is_dom_string_set(title)) {
883 if (item == renderer->channel) {
884 unsigned char *str;
886 str = convert_string(renderer->convert_table,
887 title->string, title->length,
888 renderer->document->options.cp,
889 CSM_DEFAULT, NULL, NULL, NULL);
890 if (str)
891 renderer->document->title = str;
893 render_dom_text(renderer, &renderer->styles[DOM_NODE_ELEMENT],
894 title->string, title->length);
897 if (link && is_dom_string_set(link)) {
898 X(renderer)++;
899 add_dom_link(renderer, "[link]", 6, link->string, link->length);
902 /* New line, and indent */
903 Y(renderer)++;
904 X(renderer) = 0;
906 if (author && is_dom_string_set(author)) {
907 render_dom_text(renderer, &renderer->styles[DOM_NODE_COMMENT],
908 author->string, author->length);
911 if (date && is_dom_string_set(date)) {
912 if (author && is_dom_string_set(author)) {
913 render_dom_text(renderer, &renderer->styles[DOM_NODE_COMMENT],
914 " - ", 3);
917 render_dom_text(renderer, &renderer->styles[DOM_NODE_COMMENT],
918 date->string, date->length);
921 if ((author && is_dom_string_set(author))
922 || (date && is_dom_string_set(date))) {
923 /* New line, and indent */
924 Y(renderer)++;
925 X(renderer) = 0;
929 enum dom_code
930 dom_rss_pop_document(struct dom_stack *stack, struct dom_node *root, void *data)
932 struct dom_renderer *renderer = stack->current->data;
934 if (!renderer->channel)
935 return DOM_CODE_OK;
937 render_rss_item(renderer, renderer->channel);
939 if (renderer->items) {
940 struct dom_node *node;
941 int index;
943 foreach_dom_node (renderer->items, node, index) {
944 Y(renderer)++;
945 X(renderer) = 0;
946 render_rss_item(renderer, node);
950 if (is_dom_string_set(&renderer->text))
951 done_dom_string(&renderer->text);
952 mem_free_if(renderer->items);
954 /* ELinks does not provide any sort of DOM access to the RSS
955 * document after it has been rendered. Tell the caller to
956 * free the document node and all of its children. Otherwise,
957 * they would leak. */
958 return DOM_CODE_FREE_NODE;
962 static struct dom_stack_context_info dom_rss_renderer_context_info = {
963 /* Object size: */ 0,
964 /* Push: */
966 /* */ NULL,
967 /* DOM_NODE_ELEMENT */ dom_rss_push_element,
968 /* DOM_NODE_ATTRIBUTE */ NULL,
969 /* DOM_NODE_TEXT */ NULL,
970 /* DOM_NODE_CDATA_SECTION */ NULL,
971 /* DOM_NODE_ENTITY_REFERENCE */ NULL,
972 /* DOM_NODE_ENTITY */ NULL,
973 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
974 /* DOM_NODE_COMMENT */ NULL,
975 /* DOM_NODE_DOCUMENT */ NULL,
976 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
977 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
978 /* DOM_NODE_NOTATION */ NULL,
980 /* Pop: */
982 /* */ NULL,
983 /* DOM_NODE_ELEMENT */ dom_rss_pop_element,
984 /* DOM_NODE_ATTRIBUTE */ NULL,
985 /* DOM_NODE_TEXT */ NULL,
986 /* DOM_NODE_CDATA_SECTION */ NULL,
987 /* DOM_NODE_ENTITY_REFERENCE */ NULL,
988 /* DOM_NODE_ENTITY */ NULL,
989 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
990 /* DOM_NODE_COMMENT */ NULL,
991 /* DOM_NODE_DOCUMENT */ dom_rss_pop_document,
992 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
993 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
994 /* DOM_NODE_NOTATION */ NULL,
999 static void
1000 get_doctype(struct dom_renderer *renderer, struct cache_entry *cached)
1002 if (!c_strcasecmp("application/rss+xml", cached->content_type)) {
1003 renderer->doctype = SGML_DOCTYPE_RSS;
1005 } else if (!c_strcasecmp("application/docbook+xml",
1006 cached->content_type)) {
1007 renderer->doctype = SGML_DOCTYPE_DOCBOOK;
1009 } else if (!c_strcasecmp("application/xbel+xml", cached->content_type)
1010 || !c_strcasecmp("application/x-xbel", cached->content_type)
1011 || !c_strcasecmp("application/xbel", cached->content_type)) {
1012 renderer->doctype = SGML_DOCTYPE_XBEL;
1014 } else {
1015 assertm(!c_strcasecmp("text/html", cached->content_type)
1016 || !c_strcasecmp("application/xhtml+xml",
1017 cached->content_type),
1018 "Couldn't resolve doctype '%s'", cached->content_type);
1020 renderer->doctype = SGML_DOCTYPE_HTML;
1024 /* Shared multiplexor between renderers */
1025 void
1026 render_dom_document(struct cache_entry *cached, struct document *document,
1027 struct string *buffer)
1029 unsigned char *head = empty_string_or_(cached->head);
1030 struct dom_renderer renderer;
1031 struct dom_config config;
1032 struct conv_table *convert_table;
1033 struct sgml_parser *parser;
1034 enum sgml_parser_type parser_type;
1035 unsigned char *string = struri(cached->uri);
1036 size_t length = strlen(string);
1037 struct dom_string uri = INIT_DOM_STRING(string, length);
1038 enum dom_code code;
1040 convert_table = get_convert_table(head, document->options.cp,
1041 document->options.assume_cp,
1042 &document->cp,
1043 &document->cp_status,
1044 document->options.hard_assume);
1046 init_dom_renderer(&renderer, document, buffer, convert_table);
1048 document->bgcolor = document->options.default_style.bg;
1049 #ifdef CONFIG_UTF8
1050 document->options.utf8 = is_cp_utf8(document->options.cp);
1051 #endif /* CONFIG_UTF8 */
1053 if (document->options.plain)
1054 parser_type = SGML_PARSER_STREAM;
1055 else
1056 parser_type = SGML_PARSER_TREE;
1058 get_doctype(&renderer, cached);
1060 parser = init_sgml_parser(parser_type, renderer.doctype, &uri, 0);
1061 if (!parser) return;
1063 if (document->options.plain) {
1064 add_dom_stack_context(&parser->stack, &renderer,
1065 &dom_source_renderer_context_info);
1067 } else if (renderer.doctype == SGML_DOCTYPE_RSS) {
1068 add_dom_stack_context(&parser->stack, &renderer,
1069 &dom_rss_renderer_context_info);
1070 add_dom_config_normalizer(&parser->stack, &config, RSS_CONFIG_FLAGS);
1073 /* FIXME: When rendering this way we don't really care about the code.
1074 * However, it will be useful when we will be able to also
1075 * incrementally parse new data. This will require the parser to live
1076 * during the fetching of data. */
1077 code = parse_sgml(parser, buffer->source, buffer->length, 1);
1078 if (parser->root) {
1079 assert(parser->stack.depth == 1);
1081 get_dom_stack_top(&parser->stack)->immutable = 0;
1082 /* For SGML_PARSER_STREAM this will free the DOM
1083 * root node. */
1084 pop_dom_node(&parser->stack);
1087 done_dom_renderer(&renderer);
1088 done_sgml_parser(parser);