Fix compilation using --enable-html-highlight.
[elinks.git] / src / document / dom / renderer.c
blob30171b4f62d6f83ab31565bfbaabb83b71a7553a
1 /* DOM document renderer */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <sys/types.h> /* FreeBSD needs this before regex.h */
8 #ifdef HAVE_REGEX_H
9 #include <regex.h>
10 #endif
11 #include <string.h>
13 #include "elinks.h"
15 #include "bookmarks/bookmarks.h" /* get_bookmark() */
16 #include "cache/cache.h"
17 #include "document/css/css.h"
18 #include "document/css/parser.h"
19 #include "document/css/property.h"
20 #include "document/css/stylesheet.h"
21 #include "document/docdata.h"
22 #include "document/document.h"
23 #include "document/dom/renderer.h"
24 #include "document/renderer.h"
25 #include "dom/configuration.h"
26 #include "dom/scanner.h"
27 #include "dom/sgml/parser.h"
28 #include "dom/sgml/html/html.h"
29 #include "dom/sgml/rss/rss.h"
30 #include "dom/node.h"
31 #include "dom/stack.h"
32 #include "intl/charsets.h"
33 #include "globhist/globhist.h" /* get_global_history_item() */
34 #include "protocol/uri.h"
35 #include "terminal/draw.h"
36 #include "util/box.h"
37 #include "util/error.h"
38 #include "util/memory.h"
39 #include "util/snprintf.h"
40 #include "util/string.h"
43 struct dom_renderer {
44 enum sgml_document_type doctype;
45 struct document *document;
47 struct conv_table *convert_table;
48 enum convert_string_mode convert_mode;
50 struct uri *base_uri;
52 unsigned char *source;
53 unsigned char *end;
55 unsigned char *position;
56 int canvas_x, canvas_y;
58 #ifdef HAVE_REGEX_H
59 regex_t url_regex;
60 unsigned int find_url:1;
61 #endif
62 struct screen_char styles[DOM_NODES];
64 /* RSS renderer variables */
65 struct dom_node *channel;
66 struct dom_node_list *items;
67 struct dom_node *item;
68 struct dom_node *node;
69 struct dom_string text;
72 #define URL_REGEX "(file://|((f|ht|nt)tp(s)?|smb)://[[:alnum:]]+([-@:.]?[[:alnum:]])*\\.[[:alpha:]]{2,4}(:[[:digit:]]+)?)(/(%[[:xdigit:]]{2}|[-_~&=;?.a-z0-9])*)*"
73 #define URL_REGFLAGS (REG_ICASE | REG_EXTENDED)
75 static void
76 init_template(struct screen_char *template, struct document_options *options,
77 color_T background, color_T foreground, enum screen_char_attr attr)
79 struct color_pair colors = INIT_COLOR_PAIR(background, foreground);
81 template->attr = attr;
82 template->data = ' ';
83 set_term_color(template, &colors,
84 options->color_flags, options->color_mode);
88 /* Checks the user CSS for properties for each DOM node type name */
89 static inline void
90 init_dom_renderer(struct dom_renderer *renderer, struct document *document,
91 struct string *buffer, struct conv_table *convert_table)
93 enum dom_node_type type;
94 struct css_stylesheet *css = &default_stylesheet;
96 memset(renderer, 0, sizeof(*renderer));
98 renderer->document = document;
99 renderer->convert_table = convert_table;
100 renderer->convert_mode = document->options.plain ? CSM_NONE : CSM_DEFAULT;
101 renderer->source = buffer->source;
102 renderer->end = buffer->source + buffer->length;
103 renderer->position = renderer->source;
104 renderer->base_uri = get_uri_reference(document->uri);
106 #ifdef HAVE_REGEX_H
107 if (renderer->document->options.plain_display_links) {
108 if (regcomp(&renderer->url_regex, URL_REGEX, URL_REGFLAGS)) {
109 regfree(&renderer->url_regex);
110 } else {
111 renderer->find_url = 1;
114 #endif
116 for (type = 0; type < DOM_NODES; type++) {
117 struct screen_char *template = &renderer->styles[type];
118 color_T background = document->options.default_style.bg;
119 color_T foreground = document->options.default_style.fg;
120 enum screen_char_attr attr = 0;
121 static int i_want_struct_module_for_dom;
123 struct dom_string *name = get_dom_node_type_name(type);
124 struct css_selector *selector = NULL;
126 if (!i_want_struct_module_for_dom) {
127 static const unsigned char default_colors[] =
128 "document { color: yellow } "
129 "element { color: lightgreen } "
130 "entity-reference { color: red } "
131 "proc-instruction { color: red } "
132 "attribute { color: magenta } "
133 "comment { color: aqua } "
134 "cdata-section { color: orange2 } ";
135 unsigned char *styles = (unsigned char *) default_colors;
137 i_want_struct_module_for_dom = 1;
138 /* When someone will get here earlier than at 4am,
139 * this will be done in some init function, perhaps
140 * not overriding the user's default stylesheet. */
141 css_parse_stylesheet(css, NULL, styles, styles + sizeof(default_colors));
144 if (name)
145 if (is_dom_string_set(name))
146 selector = find_css_selector(&css->selectors,
147 CST_ELEMENT, CSR_ROOT,
148 name->string, name->length);
150 if (selector) {
151 struct css_property *property;
153 foreach (property, selector->properties) {
154 switch (property->type) {
155 case CSS_PT_BACKGROUND_COLOR:
156 case CSS_PT_BACKGROUND:
157 if (property->value_type == CSS_VT_COLOR)
158 background = property->value.color;
159 break;
160 case CSS_PT_COLOR:
161 foreground = property->value.color;
162 break;
163 case CSS_PT_FONT_WEIGHT:
164 if (property->value.font_attribute.add & AT_BOLD)
165 attr |= SCREEN_ATTR_BOLD;
166 break;
167 case CSS_PT_FONT_STYLE:
168 if (property->value.font_attribute.add & AT_UNDERLINE)
169 attr |= SCREEN_ATTR_UNDERLINE;
171 if (property->value.font_attribute.add & AT_ITALIC)
172 attr |= SCREEN_ATTR_ITALIC;
173 break;
174 case CSS_PT_TEXT_DECORATION:
175 if (property->value.font_attribute.add & AT_UNDERLINE)
176 attr |= SCREEN_ATTR_UNDERLINE;
177 break;
178 case CSS_PT_DISPLAY:
179 case CSS_PT_NONE:
180 case CSS_PT_TEXT_ALIGN:
181 case CSS_PT_WHITE_SPACE:
182 case CSS_PT_LAST:
183 break;
188 init_template(template, &document->options, background, foreground, attr);
192 static inline void
193 done_dom_renderer(struct dom_renderer *renderer)
195 #ifdef HAVE_REGEX_H
196 if (renderer->find_url)
197 regfree(&renderer->url_regex);
198 #endif
199 done_uri(renderer->base_uri);
203 /* Document maintainance */
205 static struct screen_char *
206 realloc_line(struct document *document, int x, int y)
208 struct line *line = realloc_lines(document, y);
210 if (!line) return NULL;
212 if (x > line->length) {
213 if (!ALIGN_LINE(&line->chars, line->length, x))
214 return NULL;
216 for (; line->length < x; line->length++) {
217 line->chars[line->length].data = ' ';
220 if (x > document->width) document->width = x;
223 return line->chars;
226 static struct node *
227 add_search_node(struct dom_renderer *renderer, int width)
229 struct node *node = mem_alloc(sizeof(*node));
231 if (node) {
232 set_box(&node->box, renderer->canvas_x, renderer->canvas_y,
233 width, 1);
234 add_to_list(renderer->document->nodes, node);
237 return node;
240 #define X(renderer) ((renderer)->canvas_x)
241 #define Y(renderer) ((renderer)->canvas_y)
242 #define POS(renderer) (&(renderer)->document->data[Y(renderer)].chars[X(renderer)])
243 #define WIDTH(renderer, add) ((renderer)->canvas_x + (add))
245 static void
246 render_dom_line(struct dom_renderer *renderer, struct screen_char *template,
247 unsigned char *string, int length)
249 struct document *document = renderer->document;
250 struct conv_table *convert = renderer->convert_table;
251 enum convert_string_mode mode = renderer->convert_mode;
252 int x, charlen;
253 #ifdef CONFIG_UTF8
254 int utf8 = document->options.utf8;
255 unsigned char *end;
256 #endif /* CONFIG_UTF8 */
259 assert(renderer && template && string && length);
261 string = convert_string(convert, string, length, document->options.cp,
262 mode, &length, NULL, NULL);
263 if (!string) return;
265 if (!realloc_line(document, WIDTH(renderer, length), Y(renderer))) {
266 mem_free(string);
267 return;
270 add_search_node(renderer, length);
272 #ifdef CONFIG_UTF8
273 end = string + length;
274 #endif /* CONFIG_UTF8 */
275 for (x = 0, charlen = 1; x < length;x += charlen, renderer->canvas_x++) {
276 unsigned char *text = &string[x];
278 /* This is mostly to be able to break out so the indentation
279 * level won't get to high. */
280 switch (*text) {
281 case ASCII_TAB:
283 int tab_width = 7 - (X(renderer) & 7);
284 int width = WIDTH(renderer, length - x + tab_width);
286 template->data = ' ';
288 if (!realloc_line(document, width, Y(renderer)))
289 break;
291 /* Only loop over the expanded tab chars and let the
292 * ``main loop'' add the actual tab char. */
293 for (; tab_width-- > 0; renderer->canvas_x++)
294 copy_screen_chars(POS(renderer), template, 1);
295 charlen = 1;
296 break;
298 default:
299 #ifdef CONFIG_UTF8
300 if (utf8) {
301 unicode_val_T data;
302 charlen = utf8charlen(text);
303 data = utf8_to_unicode(&text, end);
305 template->data = (unicode_val_T)data;
307 if (unicode_to_cell(data) == 2) {
308 copy_screen_chars(POS(renderer),
309 template, 1);
311 X(renderer)++;
312 template->data = UCS_NO_CHAR;
315 } else
316 #endif /* CONFIG_UTF8 */
317 template->data = isscreensafe(*text) ? *text:'.';
320 copy_screen_chars(POS(renderer), template, 1);
322 mem_free(string);
325 static inline unsigned char *
326 split_dom_line(unsigned char *line, int length, int *linelen)
328 unsigned char *end = line + length;
329 unsigned char *pos;
331 /* End of line detection.
332 * We handle \r, \r\n and \n types here. */
333 for (pos = line; pos < end; pos++) {
334 int step = 0;
336 if (pos[step] == ASCII_CR)
337 step++;
339 if (pos[step] == ASCII_LF)
340 step++;
342 if (step) {
343 *linelen = pos - line;
344 return pos + step;
348 *linelen = length;
349 return NULL;
352 static void
353 render_dom_text(struct dom_renderer *renderer, struct screen_char *template,
354 unsigned char *string, int length)
356 int linelen;
358 for (; length > 0; string += linelen, length -= linelen) {
359 unsigned char *newline = split_dom_line(string, length, &linelen);
361 if (linelen)
362 render_dom_line(renderer, template, string, linelen);
364 if (newline) {
365 renderer->canvas_y++;
366 renderer->canvas_x = 0;
367 linelen = newline - string;
372 #define realloc_document_links(doc, size) \
373 ALIGN_LINK(&(doc)->links, (doc)->nlinks, size)
375 static inline struct link *
376 add_dom_link(struct dom_renderer *renderer, unsigned char *string, int length,
377 unsigned char *uristring, int urilength)
379 struct document *document = renderer->document;
380 int x = renderer->canvas_x;
381 int y = renderer->canvas_y;
382 unsigned char *where;
383 struct link *link;
384 struct point *point;
385 struct screen_char template;
386 color_T fgcolor;
388 if (!realloc_document_links(document, document->nlinks + 1))
389 return NULL;
391 link = &document->links[document->nlinks];
393 if (!realloc_points(link, length))
394 return NULL;
396 uristring = convert_string(renderer->convert_table,
397 uristring, urilength, document->options.cp,
398 CSM_DEFAULT, NULL, NULL, NULL);
399 if (!uristring) return NULL;
401 where = join_urls(renderer->base_uri, uristring);
403 mem_free(uristring);
405 if (!where)
406 return NULL;
407 #ifdef CONFIG_GLOBHIST
408 else if (get_global_history_item(where))
409 fgcolor = document->options.default_vlink;
410 #endif
411 #ifdef CONFIG_BOOKMARKS
412 else if (get_bookmark(where))
413 fgcolor = document->options.default_bookmark_link;
414 #endif
415 else
416 fgcolor = document->options.default_link;
418 link->npoints = length;
419 link->type = LINK_HYPERTEXT;
420 link->where = where;
421 link->color.background = document->options.default_style.bg;
422 link->color.foreground = fgcolor;
423 link->number = document->nlinks;
425 init_template(&template, &document->options,
426 link->color.background, link->color.foreground, 0);
428 render_dom_text(renderer, &template, string, length);
430 for (point = link->points; length > 0; length--, point++, x++) {
431 point->x = x;
432 point->y = y;
435 document->nlinks++;
436 document->links_sorted = 0;
438 return link;
442 /* DOM Source Renderer */
444 #define check_dom_node_source(renderer, str, len) \
445 ((renderer)->source <= (str) && (str) + (len) <= (renderer)->end)
447 #define assert_source(renderer, str, len) \
448 assertm(check_dom_node_source(renderer, str, len), "renderer[%p : %p] str[%p : %p]", \
449 (renderer)->source, (renderer)->end, (str), (str) + (len))
451 static inline void
452 render_dom_flush(struct dom_renderer *renderer, unsigned char *string)
454 struct screen_char *template = &renderer->styles[DOM_NODE_TEXT];
455 int length = string - renderer->position;
457 assert_source(renderer, renderer->position, 0);
458 assert_source(renderer, string, 0);
460 if (length <= 0) return;
461 render_dom_text(renderer, template, renderer->position, length);
462 renderer->position = string;
464 assert_source(renderer, renderer->position, 0);
467 static inline void
468 render_dom_node_text(struct dom_renderer *renderer, struct screen_char *template,
469 struct dom_node *node)
471 unsigned char *string = node->string.string;
472 int length = node->string.length;
474 if (node->type == DOM_NODE_ENTITY_REFERENCE) {
475 string -= 1;
476 length += 2;
479 if (check_dom_node_source(renderer, string, length)) {
480 render_dom_flush(renderer, string);
481 renderer->position = string + length;
482 assert_source(renderer, renderer->position, 0);
485 render_dom_text(renderer, template, string, length);
488 #ifdef HAVE_REGEX_H
489 static inline void
490 render_dom_node_enhanced_text(struct dom_renderer *renderer, struct dom_node *node)
492 regex_t *regex = &renderer->url_regex;
493 regmatch_t regmatch;
494 unsigned char *string = node->string.string;
495 int length = node->string.length;
496 struct screen_char *template = &renderer->styles[node->type];
497 unsigned char *alloc_string;
499 if (check_dom_node_source(renderer, string, length)) {
500 render_dom_flush(renderer, string);
501 renderer->position = string + length;
502 assert_source(renderer, renderer->position, 0);
505 alloc_string = memacpy(string, length);
506 if (alloc_string)
507 string = alloc_string;
509 while (length > 0 && !regexec(regex, string, 1, &regmatch, 0)) {
510 int matchlen = regmatch.rm_eo - regmatch.rm_so;
511 int offset = regmatch.rm_so;
513 if (!matchlen || offset < 0 || regmatch.rm_eo > length)
514 break;
516 if (offset > 0)
517 render_dom_text(renderer, template, string, offset);
519 string += offset;
520 length -= offset;
522 add_dom_link(renderer, string, matchlen, string, matchlen);
524 length -= matchlen;
525 string += matchlen;
528 if (length > 0)
529 render_dom_text(renderer, template, string, length);
531 mem_free_if(alloc_string);
533 #endif
535 static enum dom_code
536 render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *data)
538 struct dom_renderer *renderer = stack->current->data;
540 assert(node && renderer && renderer->document);
542 #ifdef HAVE_REGEX_H
543 if (renderer->find_url
544 && (node->type == DOM_NODE_TEXT
545 || node->type == DOM_NODE_CDATA_SECTION
546 || node->type == DOM_NODE_COMMENT)) {
547 render_dom_node_enhanced_text(renderer, node);
548 } else
549 #endif
550 render_dom_node_text(renderer, &renderer->styles[node->type], node);
552 return DOM_CODE_OK;
555 /* This callback is also used for rendering processing instruction nodes. */
556 static enum dom_code
557 render_dom_element_source(struct dom_stack *stack, struct dom_node *node, void *data)
559 struct dom_renderer *renderer = stack->current->data;
561 assert(node && renderer && renderer->document);
563 render_dom_node_text(renderer, &renderer->styles[node->type], node);
565 return DOM_CODE_OK;
568 enum dom_code
569 render_dom_element_end_source(struct dom_stack *stack, struct dom_node *node, void *data)
571 struct dom_renderer *renderer = stack->current->data;
572 struct dom_stack_state *state = get_dom_stack_top(stack);
573 struct sgml_parser_state *pstate = get_dom_stack_state_data(stack->contexts[0], state);
574 struct dom_scanner_token *token = &pstate->end_token;
575 unsigned char *string = token->string.string;
576 int length = token->string.length;
578 assert(node && renderer && renderer->document);
580 if (!string || !length)
581 return DOM_CODE_OK;
583 if (check_dom_node_source(renderer, string, length)) {
584 render_dom_flush(renderer, string);
585 renderer->position = string + length;
586 assert_source(renderer, renderer->position, 0);
589 render_dom_text(renderer, &renderer->styles[node->type], string, length);
591 return DOM_CODE_OK;
594 static void
595 set_base_uri(struct dom_renderer *renderer, unsigned char *value, size_t valuelen)
597 unsigned char *href = memacpy(value, valuelen);
598 unsigned char *uristring;
599 struct uri *uri;
601 if (!href) return;
602 uristring = join_urls(renderer->base_uri, href);
603 mem_free(href);
605 if (!uristring) return;
606 uri = get_uri(uristring, 0);
607 mem_free(uristring);
609 if (!uri) return;
611 done_uri(renderer->base_uri);
612 renderer->base_uri = uri;
615 enum dom_code
616 render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void *data)
618 struct dom_renderer *renderer = stack->current->data;
619 struct screen_char *template = &renderer->styles[node->type];
621 assert(node && renderer->document);
623 render_dom_node_text(renderer, template, node);
625 if (is_dom_string_set(&node->data.attribute.value)) {
626 int quoted = node->data.attribute.quoted == 1;
627 unsigned char *value = node->data.attribute.value.string - quoted;
628 int valuelen = node->data.attribute.value.length + quoted * 2;
630 if (check_dom_node_source(renderer, value, 0)) {
631 render_dom_flush(renderer, value);
632 renderer->position = value + valuelen;
633 assert_source(renderer, renderer->position, 0);
636 if (node->data.attribute.reference
637 && valuelen - quoted * 2 > 0) {
638 int skips;
640 /* Need to flush the first quoting delimiter and any
641 * leading whitespace so that the renderers x position
642 * is at the start of the value string. */
643 for (skips = 0; skips < valuelen; skips++) {
644 if ((quoted && skips == 0)
645 || isspace(value[skips])
646 || value[skips] < ' ')
647 continue;
649 break;
652 if (skips > 0) {
653 render_dom_text(renderer, template, value, skips);
654 value += skips;
655 valuelen -= skips;
658 /* Figure out what should be skipped after the actual
659 * link text. */
660 for (skips = 0; skips < valuelen; skips++) {
661 if ((quoted && skips == 0)
662 || isspace(value[valuelen - skips - 1])
663 || value[valuelen - skips - 1] < ' ')
664 continue;
666 break;
669 if (renderer->doctype == SGML_DOCTYPE_HTML
670 && node->data.attribute.type == HTML_ATTRIBUTE_HREF
671 && node->parent->data.element.type == HTML_ELEMENT_BASE) {
672 set_base_uri(renderer, value, valuelen - skips);
675 add_dom_link(renderer, value, valuelen - skips,
676 value, valuelen - skips);
678 if (skips > 0) {
679 value += valuelen - skips;
680 render_dom_text(renderer, template, value, skips);
682 } else {
683 render_dom_text(renderer, template, value, valuelen);
687 return DOM_CODE_OK;
690 enum dom_code
691 render_dom_cdata_source(struct dom_stack *stack, struct dom_node *node, void *data)
693 struct dom_renderer *renderer = stack->current->data;
694 unsigned char *string = node->string.string;
696 assert(node && renderer && renderer->document);
698 /* Highlight the 'CDATA' part of <![CDATA[ if it is there. */
699 if (check_dom_node_source(renderer, string - 6, 6)) {
700 render_dom_flush(renderer, string - 6);
701 render_dom_text(renderer, &renderer->styles[DOM_NODE_ATTRIBUTE], string - 6, 5);
702 renderer->position = string - 1;
703 assert_source(renderer, renderer->position, 0);
706 render_dom_node_text(renderer, &renderer->styles[node->type], node);
708 return DOM_CODE_OK;
711 enum dom_code
712 render_dom_document_end(struct dom_stack *stack, struct dom_node *node, void *data)
714 struct dom_renderer *renderer = stack->current->data;
716 /* If there are no non-element nodes after the last element node make
717 * sure that we flush to the end of the cache entry source including
718 * the '>' of the last element tag if it has one. (bug 519) */
719 if (check_dom_node_source(renderer, renderer->position, 0)) {
720 render_dom_flush(renderer, renderer->end);
723 return DOM_CODE_OK;
726 static struct dom_stack_context_info dom_source_renderer_context_info = {
727 /* Object size: */ 0,
728 /* Push: */
730 /* */ NULL,
731 /* DOM_NODE_ELEMENT */ render_dom_element_source,
732 /* DOM_NODE_ATTRIBUTE */ render_dom_attribute_source,
733 /* DOM_NODE_TEXT */ render_dom_node_source,
734 /* DOM_NODE_CDATA_SECTION */ render_dom_cdata_source,
735 /* DOM_NODE_ENTITY_REFERENCE */ render_dom_node_source,
736 /* DOM_NODE_ENTITY */ render_dom_node_source,
737 /* DOM_NODE_PROC_INSTRUCTION */ render_dom_element_source,
738 /* DOM_NODE_COMMENT */ render_dom_node_source,
739 /* DOM_NODE_DOCUMENT */ NULL,
740 /* DOM_NODE_DOCUMENT_TYPE */ render_dom_node_source,
741 /* DOM_NODE_DOCUMENT_FRAGMENT */ render_dom_node_source,
742 /* DOM_NODE_NOTATION */ render_dom_node_source,
744 /* Pop: */
746 /* */ NULL,
747 /* DOM_NODE_ELEMENT */ render_dom_element_end_source,
748 /* DOM_NODE_ATTRIBUTE */ NULL,
749 /* DOM_NODE_TEXT */ NULL,
750 /* DOM_NODE_CDATA_SECTION */ NULL,
751 /* DOM_NODE_ENTITY_REFERENCE */ NULL,
752 /* DOM_NODE_ENTITY */ NULL,
753 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
754 /* DOM_NODE_COMMENT */ NULL,
755 /* DOM_NODE_DOCUMENT */ render_dom_document_end,
756 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
757 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
758 /* DOM_NODE_NOTATION */ NULL,
763 /* DOM RSS Renderer */
765 #define RSS_CONFIG_FLAGS \
766 (DOM_CONFIG_NORMALIZE_WHITESPACE | DOM_CONFIG_NORMALIZE_CHARACTERS)
768 enum dom_code
769 dom_rss_push_element(struct dom_stack *stack, struct dom_node *node, void *data)
771 struct dom_renderer *renderer = stack->current->data;
773 assert(node && renderer && renderer->document);
775 switch (node->data.element.type) {
776 case RSS_ELEMENT_CHANNEL:
777 /* The stack should have: #document * channel */
778 if (stack->depth != 3)
779 break;
781 if (!renderer->channel) {
782 renderer->channel = node;
784 break;
786 case RSS_ELEMENT_ITEM:
787 /* The stack should have: #document * channel item */
788 #if 0
789 /* Don't be so strict ... */
790 if (stack->depth != 4)
791 break;
792 #endif
793 /* ... but be exclusive. */
794 if (renderer->item)
795 break;
796 add_to_dom_node_list(&renderer->items, node, -1);
797 renderer->item = node;
798 break;
800 case RSS_ELEMENT_LINK:
801 case RSS_ELEMENT_DESCRIPTION:
802 case RSS_ELEMENT_TITLE:
803 case RSS_ELEMENT_AUTHOR:
804 case RSS_ELEMENT_PUBDATE:
805 if (!node->parent || renderer->node != node->parent)
806 break;
808 renderer->node = node;
811 return DOM_CODE_OK;
814 enum dom_code
815 dom_rss_pop_element(struct dom_stack *stack, struct dom_node *node, void *data)
817 struct dom_renderer *renderer = stack->current->data;
818 struct dom_node_list **list;
820 assert(node && renderer && renderer->document);
822 switch (node->data.element.type) {
823 case RSS_ELEMENT_ITEM:
824 if (is_dom_string_set(&renderer->text))
825 done_dom_string(&renderer->text);
826 renderer->item = NULL;
827 break;
829 case RSS_ELEMENT_LINK:
830 case RSS_ELEMENT_DESCRIPTION:
831 case RSS_ELEMENT_TITLE:
832 case RSS_ELEMENT_AUTHOR:
833 case RSS_ELEMENT_PUBDATE:
834 if (!is_dom_string_set(&renderer->text)
835 || !node->parent
836 || renderer->item != node->parent
837 || renderer->node != node)
838 break;
840 /* Replace any child nodes with the normalized text node. */
841 list = get_dom_node_list(node->parent, node);
842 done_dom_node_list(*list);
843 if (is_dom_string_set(&renderer->text)) {
844 if (!add_dom_node(node, DOM_NODE_TEXT, &renderer->text))
845 done_dom_string(&renderer->text);
847 renderer->node = NULL;
848 break;
850 default:
851 break;
854 return DOM_CODE_OK;
858 static struct dom_string *
859 get_rss_text(struct dom_node *node, enum rss_element_type type)
861 node = get_dom_node_child(node, DOM_NODE_ELEMENT, type);
863 if (!node) return NULL;
865 node = get_dom_node_child(node, DOM_NODE_TEXT, 0);
867 return node ? &node->string: NULL;
870 static void
871 render_rss_item(struct dom_renderer *renderer, struct dom_node *item)
873 struct dom_string *title = get_rss_text(item, RSS_ELEMENT_TITLE);
874 struct dom_string *link = get_rss_text(item, RSS_ELEMENT_LINK);
875 struct dom_string *author = get_rss_text(item, RSS_ELEMENT_AUTHOR);
876 struct dom_string *date = get_rss_text(item, RSS_ELEMENT_PUBDATE);
878 if (title && is_dom_string_set(title)) {
879 if (item == renderer->channel) {
880 unsigned char *str;
882 str = convert_string(renderer->convert_table,
883 title->string, title->length,
884 renderer->document->options.cp,
885 CSM_DEFAULT, NULL, NULL, NULL);
886 if (str)
887 renderer->document->title = str;
889 render_dom_text(renderer, &renderer->styles[DOM_NODE_ELEMENT],
890 title->string, title->length);
893 if (link && is_dom_string_set(link)) {
894 X(renderer)++;
895 add_dom_link(renderer, "[link]", 6, link->string, link->length);
898 /* New line, and indent */
899 Y(renderer)++;
900 X(renderer) = 0;
902 if (author && is_dom_string_set(author)) {
903 render_dom_text(renderer, &renderer->styles[DOM_NODE_COMMENT],
904 author->string, author->length);
907 if (date && is_dom_string_set(date)) {
908 if (author && is_dom_string_set(author)) {
909 render_dom_text(renderer, &renderer->styles[DOM_NODE_COMMENT],
910 " - ", 3);
913 render_dom_text(renderer, &renderer->styles[DOM_NODE_COMMENT],
914 date->string, date->length);
917 if ((author && is_dom_string_set(author))
918 || (date && is_dom_string_set(date))) {
919 /* New line, and indent */
920 Y(renderer)++;
921 X(renderer) = 0;
925 enum dom_code
926 dom_rss_pop_document(struct dom_stack *stack, struct dom_node *root, void *data)
928 struct dom_renderer *renderer = stack->current->data;
930 if (!renderer->channel)
931 return DOM_CODE_OK;
933 render_rss_item(renderer, renderer->channel);
935 if (renderer->items) {
936 struct dom_node *node;
937 int index;
939 foreach_dom_node (renderer->items, node, index) {
940 Y(renderer)++;
941 X(renderer) = 0;
942 render_rss_item(renderer, node);
946 if (is_dom_string_set(&renderer->text))
947 done_dom_string(&renderer->text);
948 mem_free_if(renderer->items);
950 done_dom_node(root);
952 return DOM_CODE_OK;
956 static struct dom_stack_context_info dom_rss_renderer_context_info = {
957 /* Object size: */ 0,
958 /* Push: */
960 /* */ NULL,
961 /* DOM_NODE_ELEMENT */ dom_rss_push_element,
962 /* DOM_NODE_ATTRIBUTE */ NULL,
963 /* DOM_NODE_TEXT */ NULL,
964 /* DOM_NODE_CDATA_SECTION */ NULL,
965 /* DOM_NODE_ENTITY_REFERENCE */ NULL,
966 /* DOM_NODE_ENTITY */ NULL,
967 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
968 /* DOM_NODE_COMMENT */ NULL,
969 /* DOM_NODE_DOCUMENT */ NULL,
970 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
971 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
972 /* DOM_NODE_NOTATION */ NULL,
974 /* Pop: */
976 /* */ NULL,
977 /* DOM_NODE_ELEMENT */ dom_rss_pop_element,
978 /* DOM_NODE_ATTRIBUTE */ NULL,
979 /* DOM_NODE_TEXT */ NULL,
980 /* DOM_NODE_CDATA_SECTION */ NULL,
981 /* DOM_NODE_ENTITY_REFERENCE */ NULL,
982 /* DOM_NODE_ENTITY */ NULL,
983 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
984 /* DOM_NODE_COMMENT */ NULL,
985 /* DOM_NODE_DOCUMENT */ dom_rss_pop_document,
986 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
987 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
988 /* DOM_NODE_NOTATION */ NULL,
993 static void
994 get_doctype(struct dom_renderer *renderer, struct cache_entry *cached)
996 if (!strcasecmp("application/rss+xml", cached->content_type)) {
997 renderer->doctype = SGML_DOCTYPE_RSS;
999 } else if (!strcasecmp("application/docbook+xml",
1000 cached->content_type)) {
1001 renderer->doctype = SGML_DOCTYPE_DOCBOOK;
1003 } else if (!strcasecmp("application/xbel+xml", cached->content_type)
1004 || !strcasecmp("application/x-xbel", cached->content_type)
1005 || !strcasecmp("application/xbel", cached->content_type)) {
1006 renderer->doctype = SGML_DOCTYPE_XBEL;
1008 } else {
1009 assertm(!strcasecmp("text/html", cached->content_type)
1010 || !strcasecmp("application/xhtml+xml",
1011 cached->content_type),
1012 "Couldn't resolve doctype '%s'", cached->content_type);
1014 renderer->doctype = SGML_DOCTYPE_HTML;
1018 /* Shared multiplexor between renderers */
1019 void
1020 render_dom_document(struct cache_entry *cached, struct document *document,
1021 struct string *buffer)
1023 unsigned char *head = empty_string_or_(cached->head);
1024 struct dom_renderer renderer;
1025 struct conv_table *convert_table;
1026 struct sgml_parser *parser;
1027 enum sgml_parser_type parser_type;
1028 unsigned char *string = struri(cached->uri);
1029 size_t length = strlen(string);
1030 struct dom_string uri = INIT_DOM_STRING(string, length);
1031 enum dom_code code;
1033 convert_table = get_convert_table(head, document->options.cp,
1034 document->options.assume_cp,
1035 &document->cp,
1036 &document->cp_status,
1037 document->options.hard_assume);
1039 init_dom_renderer(&renderer, document, buffer, convert_table);
1041 document->bgcolor = document->options.default_style.bg;
1042 #ifdef CONFIG_UTF8
1043 document->options.utf8 = is_cp_utf8(document->options.cp);
1044 #endif /* CONFIG_UTF8 */
1046 if (document->options.plain)
1047 parser_type = SGML_PARSER_STREAM;
1048 else
1049 parser_type = SGML_PARSER_TREE;
1051 get_doctype(&renderer, cached);
1053 parser = init_sgml_parser(parser_type, renderer.doctype, &uri, 0);
1054 if (!parser) return;
1056 if (document->options.plain) {
1057 add_dom_stack_context(&parser->stack, &renderer,
1058 &dom_source_renderer_context_info);
1060 } else if (renderer.doctype == SGML_DOCTYPE_RSS) {
1061 add_dom_stack_context(&parser->stack, &renderer,
1062 &dom_rss_renderer_context_info);
1063 add_dom_config_normalizer(&parser->stack, RSS_CONFIG_FLAGS);
1066 /* FIXME: When rendering this way we don't really care about the code.
1067 * However, it will be useful when we will be able to also
1068 * incrementally parse new data. This will require the parser to live
1069 * during the fetching of data. */
1070 code = parse_sgml(parser, buffer->source, buffer->length, 1);
1071 if (parser->root) {
1072 assert(parser->stack.depth == 1);
1074 get_dom_stack_top(&parser->stack)->immutable = 0;
1075 /* For SGML_PARSER_STREAM this will free the DOM
1076 * root node. */
1077 pop_dom_node(&parser->stack);
1080 done_dom_renderer(&renderer);
1081 done_sgml_parser(parser);