1 /* DOM document renderer */
7 #include <sys/types.h> /* FreeBSD needs this before regex.h */
15 #include "bookmarks/bookmarks.h" /* get_bookmark() */
16 #include "cache/cache.h"
17 #include "document/css/css.h"
18 #include "document/css/parser.h"
19 #include "document/css/property.h"
20 #include "document/css/stylesheet.h"
21 #include "document/docdata.h"
22 #include "document/document.h"
23 #include "document/dom/renderer.h"
24 #include "document/renderer.h"
25 #include "dom/configuration.h"
26 #include "dom/scanner.h"
27 #include "dom/sgml/parser.h"
28 #include "dom/sgml/html/html.h"
29 #include "dom/sgml/rss/rss.h"
31 #include "dom/stack.h"
32 #include "intl/charsets.h"
33 #include "globhist/globhist.h" /* get_global_history_item() */
34 #include "protocol/uri.h"
35 #include "terminal/draw.h"
37 #include "util/error.h"
38 #include "util/memory.h"
39 #include "util/snprintf.h"
40 #include "util/string.h"
44 enum sgml_document_type doctype
;
45 struct document
*document
;
47 struct conv_table
*convert_table
;
48 enum convert_string_mode convert_mode
;
52 unsigned char *source
;
55 unsigned char *position
;
56 int canvas_x
, canvas_y
;
60 unsigned int find_url
:1;
62 struct screen_char styles
[DOM_NODES
];
64 /* RSS renderer variables */
65 struct dom_node
*channel
;
66 struct dom_node_list
*items
;
67 struct dom_node
*item
;
68 struct dom_node
*node
;
69 struct dom_string text
;
72 #define URL_REGEX "(file://|((f|ht|nt)tp(s)?|smb)://[[:alnum:]]+([-@:.]?[[:alnum:]])*\\.[[:alpha:]]{2,4}(:[[:digit:]]+)?)(/(%[[:xdigit:]]{2}|[-_~&=;?.a-z0-9])*)*"
73 #define URL_REGFLAGS (REG_ICASE | REG_EXTENDED)
76 init_template(struct screen_char
*template, struct document_options
*options
,
77 color_T background
, color_T foreground
, enum screen_char_attr attr
)
79 struct color_pair colors
= INIT_COLOR_PAIR(background
, foreground
);
81 template->attr
= attr
;
83 set_term_color(template, &colors
,
84 options
->color_flags
, options
->color_mode
);
88 /* Checks the user CSS for properties for each DOM node type name */
90 init_dom_renderer(struct dom_renderer
*renderer
, struct document
*document
,
91 struct string
*buffer
, struct conv_table
*convert_table
)
93 enum dom_node_type type
;
94 struct css_stylesheet
*css
= &default_stylesheet
;
96 memset(renderer
, 0, sizeof(*renderer
));
98 renderer
->document
= document
;
99 renderer
->convert_table
= convert_table
;
100 renderer
->convert_mode
= document
->options
.plain
? CSM_NONE
: CSM_DEFAULT
;
101 renderer
->source
= buffer
->source
;
102 renderer
->end
= buffer
->source
+ buffer
->length
;
103 renderer
->position
= renderer
->source
;
104 renderer
->base_uri
= get_uri_reference(document
->uri
);
107 if (renderer
->document
->options
.plain_display_links
) {
108 if (regcomp(&renderer
->url_regex
, URL_REGEX
, URL_REGFLAGS
)) {
109 regfree(&renderer
->url_regex
);
111 renderer
->find_url
= 1;
116 for (type
= 0; type
< DOM_NODES
; type
++) {
117 struct screen_char
*template = &renderer
->styles
[type
];
118 color_T background
= document
->options
.default_style
.bg
;
119 color_T foreground
= document
->options
.default_style
.fg
;
120 enum screen_char_attr attr
= 0;
121 static int i_want_struct_module_for_dom
;
123 struct dom_string
*name
= get_dom_node_type_name(type
);
124 struct css_selector
*selector
= NULL
;
126 if (!i_want_struct_module_for_dom
) {
127 static const unsigned char default_colors
[] =
128 "document { color: yellow } "
129 "element { color: lightgreen } "
130 "entity-reference { color: red } "
131 "proc-instruction { color: red } "
132 "attribute { color: magenta } "
133 "comment { color: aqua } "
134 "cdata-section { color: orange2 } ";
135 unsigned char *styles
= (unsigned char *) default_colors
;
137 i_want_struct_module_for_dom
= 1;
138 /* When someone will get here earlier than at 4am,
139 * this will be done in some init function, perhaps
140 * not overriding the user's default stylesheet. */
141 css_parse_stylesheet(css
, NULL
, styles
, styles
+ sizeof(default_colors
));
145 if (is_dom_string_set(name
))
146 selector
= find_css_selector(&css
->selectors
,
147 CST_ELEMENT
, CSR_ROOT
,
148 name
->string
, name
->length
);
151 struct css_property
*property
;
153 foreach (property
, selector
->properties
) {
154 switch (property
->type
) {
155 case CSS_PT_BACKGROUND_COLOR
:
156 case CSS_PT_BACKGROUND
:
157 if (property
->value_type
== CSS_VT_COLOR
)
158 background
= property
->value
.color
;
161 foreground
= property
->value
.color
;
163 case CSS_PT_FONT_WEIGHT
:
164 if (property
->value
.font_attribute
.add
& AT_BOLD
)
165 attr
|= SCREEN_ATTR_BOLD
;
167 case CSS_PT_FONT_STYLE
:
168 if (property
->value
.font_attribute
.add
& AT_UNDERLINE
)
169 attr
|= SCREEN_ATTR_UNDERLINE
;
171 if (property
->value
.font_attribute
.add
& AT_ITALIC
)
172 attr
|= SCREEN_ATTR_ITALIC
;
174 case CSS_PT_TEXT_DECORATION
:
175 if (property
->value
.font_attribute
.add
& AT_UNDERLINE
)
176 attr
|= SCREEN_ATTR_UNDERLINE
;
180 case CSS_PT_TEXT_ALIGN
:
181 case CSS_PT_WHITE_SPACE
:
188 init_template(template, &document
->options
, background
, foreground
, attr
);
193 done_dom_renderer(struct dom_renderer
*renderer
)
196 if (renderer
->find_url
)
197 regfree(&renderer
->url_regex
);
199 done_uri(renderer
->base_uri
);
203 /* Document maintainance */
205 static struct screen_char
*
206 realloc_line(struct document
*document
, int x
, int y
)
208 struct line
*line
= realloc_lines(document
, y
);
210 if (!line
) return NULL
;
212 if (x
> line
->length
) {
213 if (!ALIGN_LINE(&line
->chars
, line
->length
, x
))
216 for (; line
->length
< x
; line
->length
++) {
217 line
->chars
[line
->length
].data
= ' ';
220 if (x
> document
->width
) document
->width
= x
;
227 add_search_node(struct dom_renderer
*renderer
, int width
)
229 struct node
*node
= mem_alloc(sizeof(*node
));
232 set_box(&node
->box
, renderer
->canvas_x
, renderer
->canvas_y
,
234 add_to_list(renderer
->document
->nodes
, node
);
240 #define X(renderer) ((renderer)->canvas_x)
241 #define Y(renderer) ((renderer)->canvas_y)
242 #define POS(renderer) (&(renderer)->document->data[Y(renderer)].chars[X(renderer)])
243 #define WIDTH(renderer, add) ((renderer)->canvas_x + (add))
246 render_dom_line(struct dom_renderer
*renderer
, struct screen_char
*template,
247 unsigned char *string
, int length
)
249 struct document
*document
= renderer
->document
;
250 struct conv_table
*convert
= renderer
->convert_table
;
251 enum convert_string_mode mode
= renderer
->convert_mode
;
254 int utf8
= document
->options
.utf8
;
256 #endif /* CONFIG_UTF8 */
259 assert(renderer
&& template && string
&& length
);
261 string
= convert_string(convert
, string
, length
, document
->options
.cp
,
262 mode
, &length
, NULL
, NULL
);
265 if (!realloc_line(document
, WIDTH(renderer
, length
), Y(renderer
))) {
270 add_search_node(renderer
, length
);
273 end
= string
+ length
;
274 #endif /* CONFIG_UTF8 */
275 for (x
= 0, charlen
= 1; x
< length
;x
+= charlen
, renderer
->canvas_x
++) {
276 unsigned char *text
= &string
[x
];
278 /* This is mostly to be able to break out so the indentation
279 * level won't get to high. */
283 int tab_width
= 7 - (X(renderer
) & 7);
284 int width
= WIDTH(renderer
, length
- x
+ tab_width
);
286 template->data
= ' ';
288 if (!realloc_line(document
, width
, Y(renderer
)))
291 /* Only loop over the expanded tab chars and let the
292 * ``main loop'' add the actual tab char. */
293 for (; tab_width
-- > 0; renderer
->canvas_x
++)
294 copy_screen_chars(POS(renderer
), template, 1);
302 charlen
= utf8charlen(text
);
303 data
= utf8_to_unicode(&text
, end
);
305 template->data
= (unicode_val_T
)data
;
307 if (unicode_to_cell(data
) == 2) {
308 copy_screen_chars(POS(renderer
),
312 template->data
= UCS_NO_CHAR
;
316 #endif /* CONFIG_UTF8 */
317 template->data
= isscreensafe(*text
) ? *text
:'.';
320 copy_screen_chars(POS(renderer
), template, 1);
325 static inline unsigned char *
326 split_dom_line(unsigned char *line
, int length
, int *linelen
)
328 unsigned char *end
= line
+ length
;
331 /* End of line detection.
332 * We handle \r, \r\n and \n types here. */
333 for (pos
= line
; pos
< end
; pos
++) {
336 if (pos
[step
] == ASCII_CR
)
339 if (pos
[step
] == ASCII_LF
)
343 *linelen
= pos
- line
;
353 render_dom_text(struct dom_renderer
*renderer
, struct screen_char
*template,
354 unsigned char *string
, int length
)
358 for (; length
> 0; string
+= linelen
, length
-= linelen
) {
359 unsigned char *newline
= split_dom_line(string
, length
, &linelen
);
362 render_dom_line(renderer
, template, string
, linelen
);
365 renderer
->canvas_y
++;
366 renderer
->canvas_x
= 0;
367 linelen
= newline
- string
;
372 #define realloc_document_links(doc, size) \
373 ALIGN_LINK(&(doc)->links, (doc)->nlinks, size)
375 static inline struct link
*
376 add_dom_link(struct dom_renderer
*renderer
, unsigned char *string
, int length
,
377 unsigned char *uristring
, int urilength
)
379 struct document
*document
= renderer
->document
;
380 int x
= renderer
->canvas_x
;
381 int y
= renderer
->canvas_y
;
382 unsigned char *where
;
385 struct screen_char
template;
388 if (!realloc_document_links(document
, document
->nlinks
+ 1))
391 link
= &document
->links
[document
->nlinks
];
393 if (!realloc_points(link
, length
))
396 uristring
= convert_string(renderer
->convert_table
,
397 uristring
, urilength
, document
->options
.cp
,
398 CSM_DEFAULT
, NULL
, NULL
, NULL
);
399 if (!uristring
) return NULL
;
401 where
= join_urls(renderer
->base_uri
, uristring
);
407 #ifdef CONFIG_GLOBHIST
408 else if (get_global_history_item(where
))
409 fgcolor
= document
->options
.default_vlink
;
411 #ifdef CONFIG_BOOKMARKS
412 else if (get_bookmark(where
))
413 fgcolor
= document
->options
.default_bookmark_link
;
416 fgcolor
= document
->options
.default_link
;
418 link
->npoints
= length
;
419 link
->type
= LINK_HYPERTEXT
;
421 link
->color
.background
= document
->options
.default_style
.bg
;
422 link
->color
.foreground
= fgcolor
;
423 link
->number
= document
->nlinks
;
425 init_template(&template, &document
->options
,
426 link
->color
.background
, link
->color
.foreground
, 0);
428 render_dom_text(renderer
, &template, string
, length
);
430 for (point
= link
->points
; length
> 0; length
--, point
++, x
++) {
436 document
->links_sorted
= 0;
442 /* DOM Source Renderer */
444 #define check_dom_node_source(renderer, str, len) \
445 ((renderer)->source <= (str) && (str) + (len) <= (renderer)->end)
447 #define assert_source(renderer, str, len) \
448 assertm(check_dom_node_source(renderer, str, len), "renderer[%p : %p] str[%p : %p]", \
449 (renderer)->source, (renderer)->end, (str), (str) + (len))
452 render_dom_flush(struct dom_renderer
*renderer
, unsigned char *string
)
454 struct screen_char
*template = &renderer
->styles
[DOM_NODE_TEXT
];
455 int length
= string
- renderer
->position
;
457 assert_source(renderer
, renderer
->position
, 0);
458 assert_source(renderer
, string
, 0);
460 if (length
<= 0) return;
461 render_dom_text(renderer
, template, renderer
->position
, length
);
462 renderer
->position
= string
;
464 assert_source(renderer
, renderer
->position
, 0);
468 render_dom_node_text(struct dom_renderer
*renderer
, struct screen_char
*template,
469 struct dom_node
*node
)
471 unsigned char *string
= node
->string
.string
;
472 int length
= node
->string
.length
;
474 if (node
->type
== DOM_NODE_ENTITY_REFERENCE
) {
479 if (check_dom_node_source(renderer
, string
, length
)) {
480 render_dom_flush(renderer
, string
);
481 renderer
->position
= string
+ length
;
482 assert_source(renderer
, renderer
->position
, 0);
485 render_dom_text(renderer
, template, string
, length
);
490 render_dom_node_enhanced_text(struct dom_renderer
*renderer
, struct dom_node
*node
)
492 regex_t
*regex
= &renderer
->url_regex
;
494 unsigned char *string
= node
->string
.string
;
495 int length
= node
->string
.length
;
496 struct screen_char
*template = &renderer
->styles
[node
->type
];
497 unsigned char *alloc_string
;
499 if (check_dom_node_source(renderer
, string
, length
)) {
500 render_dom_flush(renderer
, string
);
501 renderer
->position
= string
+ length
;
502 assert_source(renderer
, renderer
->position
, 0);
505 alloc_string
= memacpy(string
, length
);
507 string
= alloc_string
;
509 while (length
> 0 && !regexec(regex
, string
, 1, ®match
, 0)) {
510 int matchlen
= regmatch
.rm_eo
- regmatch
.rm_so
;
511 int offset
= regmatch
.rm_so
;
513 if (!matchlen
|| offset
< 0 || regmatch
.rm_eo
> length
)
517 render_dom_text(renderer
, template, string
, offset
);
522 add_dom_link(renderer
, string
, matchlen
, string
, matchlen
);
529 render_dom_text(renderer
, template, string
, length
);
531 mem_free_if(alloc_string
);
536 render_dom_node_source(struct dom_stack
*stack
, struct dom_node
*node
, void *data
)
538 struct dom_renderer
*renderer
= stack
->current
->data
;
540 assert(node
&& renderer
&& renderer
->document
);
543 if (renderer
->find_url
544 && (node
->type
== DOM_NODE_TEXT
545 || node
->type
== DOM_NODE_CDATA_SECTION
546 || node
->type
== DOM_NODE_COMMENT
)) {
547 render_dom_node_enhanced_text(renderer
, node
);
550 render_dom_node_text(renderer
, &renderer
->styles
[node
->type
], node
);
555 /* This callback is also used for rendering processing instruction nodes. */
557 render_dom_element_source(struct dom_stack
*stack
, struct dom_node
*node
, void *data
)
559 struct dom_renderer
*renderer
= stack
->current
->data
;
561 assert(node
&& renderer
&& renderer
->document
);
563 render_dom_node_text(renderer
, &renderer
->styles
[node
->type
], node
);
569 render_dom_element_end_source(struct dom_stack
*stack
, struct dom_node
*node
, void *data
)
571 struct dom_renderer
*renderer
= stack
->current
->data
;
572 struct dom_stack_state
*state
= get_dom_stack_top(stack
);
573 struct sgml_parser_state
*pstate
= get_dom_stack_state_data(stack
->contexts
[0], state
);
574 struct dom_scanner_token
*token
= &pstate
->end_token
;
575 unsigned char *string
= token
->string
.string
;
576 int length
= token
->string
.length
;
578 assert(node
&& renderer
&& renderer
->document
);
580 if (!string
|| !length
)
583 if (check_dom_node_source(renderer
, string
, length
)) {
584 render_dom_flush(renderer
, string
);
585 renderer
->position
= string
+ length
;
586 assert_source(renderer
, renderer
->position
, 0);
589 render_dom_text(renderer
, &renderer
->styles
[node
->type
], string
, length
);
595 set_base_uri(struct dom_renderer
*renderer
, unsigned char *value
, size_t valuelen
)
597 unsigned char *href
= memacpy(value
, valuelen
);
598 unsigned char *uristring
;
602 uristring
= join_urls(renderer
->base_uri
, href
);
605 if (!uristring
) return;
606 uri
= get_uri(uristring
, 0);
611 done_uri(renderer
->base_uri
);
612 renderer
->base_uri
= uri
;
616 render_dom_attribute_source(struct dom_stack
*stack
, struct dom_node
*node
, void *data
)
618 struct dom_renderer
*renderer
= stack
->current
->data
;
619 struct screen_char
*template = &renderer
->styles
[node
->type
];
621 assert(node
&& renderer
->document
);
623 render_dom_node_text(renderer
, template, node
);
625 if (is_dom_string_set(&node
->data
.attribute
.value
)) {
626 int quoted
= node
->data
.attribute
.quoted
== 1;
627 unsigned char *value
= node
->data
.attribute
.value
.string
- quoted
;
628 int valuelen
= node
->data
.attribute
.value
.length
+ quoted
* 2;
630 if (check_dom_node_source(renderer
, value
, 0)) {
631 render_dom_flush(renderer
, value
);
632 renderer
->position
= value
+ valuelen
;
633 assert_source(renderer
, renderer
->position
, 0);
636 if (node
->data
.attribute
.reference
637 && valuelen
- quoted
* 2 > 0) {
640 /* Need to flush the first quoting delimiter and any
641 * leading whitespace so that the renderers x position
642 * is at the start of the value string. */
643 for (skips
= 0; skips
< valuelen
; skips
++) {
644 if ((quoted
&& skips
== 0)
645 || isspace(value
[skips
])
646 || value
[skips
] < ' ')
653 render_dom_text(renderer
, template, value
, skips
);
658 /* Figure out what should be skipped after the actual
660 for (skips
= 0; skips
< valuelen
; skips
++) {
661 if ((quoted
&& skips
== 0)
662 || isspace(value
[valuelen
- skips
- 1])
663 || value
[valuelen
- skips
- 1] < ' ')
669 if (renderer
->doctype
== SGML_DOCTYPE_HTML
670 && node
->data
.attribute
.type
== HTML_ATTRIBUTE_HREF
671 && node
->parent
->data
.element
.type
== HTML_ELEMENT_BASE
) {
672 set_base_uri(renderer
, value
, valuelen
- skips
);
675 add_dom_link(renderer
, value
, valuelen
- skips
,
676 value
, valuelen
- skips
);
679 value
+= valuelen
- skips
;
680 render_dom_text(renderer
, template, value
, skips
);
683 render_dom_text(renderer
, template, value
, valuelen
);
691 render_dom_cdata_source(struct dom_stack
*stack
, struct dom_node
*node
, void *data
)
693 struct dom_renderer
*renderer
= stack
->current
->data
;
694 unsigned char *string
= node
->string
.string
;
696 assert(node
&& renderer
&& renderer
->document
);
698 /* Highlight the 'CDATA' part of <![CDATA[ if it is there. */
699 if (check_dom_node_source(renderer
, string
- 6, 6)) {
700 render_dom_flush(renderer
, string
- 6);
701 render_dom_text(renderer
, &renderer
->styles
[DOM_NODE_ATTRIBUTE
], string
- 6, 5);
702 renderer
->position
= string
- 1;
703 assert_source(renderer
, renderer
->position
, 0);
706 render_dom_node_text(renderer
, &renderer
->styles
[node
->type
], node
);
712 render_dom_document_end(struct dom_stack
*stack
, struct dom_node
*node
, void *data
)
714 struct dom_renderer
*renderer
= stack
->current
->data
;
716 /* If there are no non-element nodes after the last element node make
717 * sure that we flush to the end of the cache entry source including
718 * the '>' of the last element tag if it has one. (bug 519) */
719 if (check_dom_node_source(renderer
, renderer
->position
, 0)) {
720 render_dom_flush(renderer
, renderer
->end
);
726 static struct dom_stack_context_info dom_source_renderer_context_info
= {
727 /* Object size: */ 0,
731 /* DOM_NODE_ELEMENT */ render_dom_element_source
,
732 /* DOM_NODE_ATTRIBUTE */ render_dom_attribute_source
,
733 /* DOM_NODE_TEXT */ render_dom_node_source
,
734 /* DOM_NODE_CDATA_SECTION */ render_dom_cdata_source
,
735 /* DOM_NODE_ENTITY_REFERENCE */ render_dom_node_source
,
736 /* DOM_NODE_ENTITY */ render_dom_node_source
,
737 /* DOM_NODE_PROC_INSTRUCTION */ render_dom_element_source
,
738 /* DOM_NODE_COMMENT */ render_dom_node_source
,
739 /* DOM_NODE_DOCUMENT */ NULL
,
740 /* DOM_NODE_DOCUMENT_TYPE */ render_dom_node_source
,
741 /* DOM_NODE_DOCUMENT_FRAGMENT */ render_dom_node_source
,
742 /* DOM_NODE_NOTATION */ render_dom_node_source
,
747 /* DOM_NODE_ELEMENT */ render_dom_element_end_source
,
748 /* DOM_NODE_ATTRIBUTE */ NULL
,
749 /* DOM_NODE_TEXT */ NULL
,
750 /* DOM_NODE_CDATA_SECTION */ NULL
,
751 /* DOM_NODE_ENTITY_REFERENCE */ NULL
,
752 /* DOM_NODE_ENTITY */ NULL
,
753 /* DOM_NODE_PROC_INSTRUCTION */ NULL
,
754 /* DOM_NODE_COMMENT */ NULL
,
755 /* DOM_NODE_DOCUMENT */ render_dom_document_end
,
756 /* DOM_NODE_DOCUMENT_TYPE */ NULL
,
757 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL
,
758 /* DOM_NODE_NOTATION */ NULL
,
763 /* DOM RSS Renderer */
765 #define RSS_CONFIG_FLAGS \
766 (DOM_CONFIG_NORMALIZE_WHITESPACE | DOM_CONFIG_NORMALIZE_CHARACTERS)
769 dom_rss_push_element(struct dom_stack
*stack
, struct dom_node
*node
, void *data
)
771 struct dom_renderer
*renderer
= stack
->current
->data
;
773 assert(node
&& renderer
&& renderer
->document
);
775 switch (node
->data
.element
.type
) {
776 case RSS_ELEMENT_CHANNEL
:
777 /* The stack should have: #document * channel */
778 if (stack
->depth
!= 3)
781 if (!renderer
->channel
) {
782 renderer
->channel
= node
;
786 case RSS_ELEMENT_ITEM
:
787 /* The stack should have: #document * channel item */
789 /* Don't be so strict ... */
790 if (stack
->depth
!= 4)
793 /* ... but be exclusive. */
796 add_to_dom_node_list(&renderer
->items
, node
, -1);
797 renderer
->item
= node
;
800 case RSS_ELEMENT_LINK
:
801 case RSS_ELEMENT_DESCRIPTION
:
802 case RSS_ELEMENT_TITLE
:
803 case RSS_ELEMENT_AUTHOR
:
804 case RSS_ELEMENT_PUBDATE
:
805 if (!node
->parent
|| renderer
->node
!= node
->parent
)
808 renderer
->node
= node
;
815 dom_rss_pop_element(struct dom_stack
*stack
, struct dom_node
*node
, void *data
)
817 struct dom_renderer
*renderer
= stack
->current
->data
;
818 struct dom_node_list
**list
;
820 assert(node
&& renderer
&& renderer
->document
);
822 switch (node
->data
.element
.type
) {
823 case RSS_ELEMENT_ITEM
:
824 if (is_dom_string_set(&renderer
->text
))
825 done_dom_string(&renderer
->text
);
826 renderer
->item
= NULL
;
829 case RSS_ELEMENT_LINK
:
830 case RSS_ELEMENT_DESCRIPTION
:
831 case RSS_ELEMENT_TITLE
:
832 case RSS_ELEMENT_AUTHOR
:
833 case RSS_ELEMENT_PUBDATE
:
834 if (!is_dom_string_set(&renderer
->text
)
836 || renderer
->item
!= node
->parent
837 || renderer
->node
!= node
)
840 /* Replace any child nodes with the normalized text node. */
841 list
= get_dom_node_list(node
->parent
, node
);
842 done_dom_node_list(*list
);
843 if (is_dom_string_set(&renderer
->text
)) {
844 if (!add_dom_node(node
, DOM_NODE_TEXT
, &renderer
->text
))
845 done_dom_string(&renderer
->text
);
847 renderer
->node
= NULL
;
858 static struct dom_string
*
859 get_rss_text(struct dom_node
*node
, enum rss_element_type type
)
861 node
= get_dom_node_child(node
, DOM_NODE_ELEMENT
, type
);
863 if (!node
) return NULL
;
865 node
= get_dom_node_child(node
, DOM_NODE_TEXT
, 0);
867 return node
? &node
->string
: NULL
;
871 render_rss_item(struct dom_renderer
*renderer
, struct dom_node
*item
)
873 struct dom_string
*title
= get_rss_text(item
, RSS_ELEMENT_TITLE
);
874 struct dom_string
*link
= get_rss_text(item
, RSS_ELEMENT_LINK
);
875 struct dom_string
*author
= get_rss_text(item
, RSS_ELEMENT_AUTHOR
);
876 struct dom_string
*date
= get_rss_text(item
, RSS_ELEMENT_PUBDATE
);
878 if (title
&& is_dom_string_set(title
)) {
879 if (item
== renderer
->channel
) {
882 str
= convert_string(renderer
->convert_table
,
883 title
->string
, title
->length
,
884 renderer
->document
->options
.cp
,
885 CSM_DEFAULT
, NULL
, NULL
, NULL
);
887 renderer
->document
->title
= str
;
889 render_dom_text(renderer
, &renderer
->styles
[DOM_NODE_ELEMENT
],
890 title
->string
, title
->length
);
893 if (link
&& is_dom_string_set(link
)) {
895 add_dom_link(renderer
, "[link]", 6, link
->string
, link
->length
);
898 /* New line, and indent */
902 if (author
&& is_dom_string_set(author
)) {
903 render_dom_text(renderer
, &renderer
->styles
[DOM_NODE_COMMENT
],
904 author
->string
, author
->length
);
907 if (date
&& is_dom_string_set(date
)) {
908 if (author
&& is_dom_string_set(author
)) {
909 render_dom_text(renderer
, &renderer
->styles
[DOM_NODE_COMMENT
],
913 render_dom_text(renderer
, &renderer
->styles
[DOM_NODE_COMMENT
],
914 date
->string
, date
->length
);
917 if ((author
&& is_dom_string_set(author
))
918 || (date
&& is_dom_string_set(date
))) {
919 /* New line, and indent */
926 dom_rss_pop_document(struct dom_stack
*stack
, struct dom_node
*root
, void *data
)
928 struct dom_renderer
*renderer
= stack
->current
->data
;
930 if (!renderer
->channel
)
933 render_rss_item(renderer
, renderer
->channel
);
935 if (renderer
->items
) {
936 struct dom_node
*node
;
939 foreach_dom_node (renderer
->items
, node
, index
) {
942 render_rss_item(renderer
, node
);
946 if (is_dom_string_set(&renderer
->text
))
947 done_dom_string(&renderer
->text
);
948 mem_free_if(renderer
->items
);
956 static struct dom_stack_context_info dom_rss_renderer_context_info
= {
957 /* Object size: */ 0,
961 /* DOM_NODE_ELEMENT */ dom_rss_push_element
,
962 /* DOM_NODE_ATTRIBUTE */ NULL
,
963 /* DOM_NODE_TEXT */ NULL
,
964 /* DOM_NODE_CDATA_SECTION */ NULL
,
965 /* DOM_NODE_ENTITY_REFERENCE */ NULL
,
966 /* DOM_NODE_ENTITY */ NULL
,
967 /* DOM_NODE_PROC_INSTRUCTION */ NULL
,
968 /* DOM_NODE_COMMENT */ NULL
,
969 /* DOM_NODE_DOCUMENT */ NULL
,
970 /* DOM_NODE_DOCUMENT_TYPE */ NULL
,
971 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL
,
972 /* DOM_NODE_NOTATION */ NULL
,
977 /* DOM_NODE_ELEMENT */ dom_rss_pop_element
,
978 /* DOM_NODE_ATTRIBUTE */ NULL
,
979 /* DOM_NODE_TEXT */ NULL
,
980 /* DOM_NODE_CDATA_SECTION */ NULL
,
981 /* DOM_NODE_ENTITY_REFERENCE */ NULL
,
982 /* DOM_NODE_ENTITY */ NULL
,
983 /* DOM_NODE_PROC_INSTRUCTION */ NULL
,
984 /* DOM_NODE_COMMENT */ NULL
,
985 /* DOM_NODE_DOCUMENT */ dom_rss_pop_document
,
986 /* DOM_NODE_DOCUMENT_TYPE */ NULL
,
987 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL
,
988 /* DOM_NODE_NOTATION */ NULL
,
994 get_doctype(struct dom_renderer
*renderer
, struct cache_entry
*cached
)
996 if (!strcasecmp("application/rss+xml", cached
->content_type
)) {
997 renderer
->doctype
= SGML_DOCTYPE_RSS
;
999 } else if (!strcasecmp("application/docbook+xml",
1000 cached
->content_type
)) {
1001 renderer
->doctype
= SGML_DOCTYPE_DOCBOOK
;
1003 } else if (!strcasecmp("application/xbel+xml", cached
->content_type
)
1004 || !strcasecmp("application/x-xbel", cached
->content_type
)
1005 || !strcasecmp("application/xbel", cached
->content_type
)) {
1006 renderer
->doctype
= SGML_DOCTYPE_XBEL
;
1009 assertm(!strcasecmp("text/html", cached
->content_type
)
1010 || !strcasecmp("application/xhtml+xml",
1011 cached
->content_type
),
1012 "Couldn't resolve doctype '%s'", cached
->content_type
);
1014 renderer
->doctype
= SGML_DOCTYPE_HTML
;
1018 /* Shared multiplexor between renderers */
1020 render_dom_document(struct cache_entry
*cached
, struct document
*document
,
1021 struct string
*buffer
)
1023 unsigned char *head
= empty_string_or_(cached
->head
);
1024 struct dom_renderer renderer
;
1025 struct conv_table
*convert_table
;
1026 struct sgml_parser
*parser
;
1027 enum sgml_parser_type parser_type
;
1028 unsigned char *string
= struri(cached
->uri
);
1029 size_t length
= strlen(string
);
1030 struct dom_string uri
= INIT_DOM_STRING(string
, length
);
1033 convert_table
= get_convert_table(head
, document
->options
.cp
,
1034 document
->options
.assume_cp
,
1036 &document
->cp_status
,
1037 document
->options
.hard_assume
);
1039 init_dom_renderer(&renderer
, document
, buffer
, convert_table
);
1041 document
->bgcolor
= document
->options
.default_style
.bg
;
1043 document
->options
.utf8
= is_cp_utf8(document
->options
.cp
);
1044 #endif /* CONFIG_UTF8 */
1046 if (document
->options
.plain
)
1047 parser_type
= SGML_PARSER_STREAM
;
1049 parser_type
= SGML_PARSER_TREE
;
1051 get_doctype(&renderer
, cached
);
1053 parser
= init_sgml_parser(parser_type
, renderer
.doctype
, &uri
, 0);
1054 if (!parser
) return;
1056 if (document
->options
.plain
) {
1057 add_dom_stack_context(&parser
->stack
, &renderer
,
1058 &dom_source_renderer_context_info
);
1060 } else if (renderer
.doctype
== SGML_DOCTYPE_RSS
) {
1061 add_dom_stack_context(&parser
->stack
, &renderer
,
1062 &dom_rss_renderer_context_info
);
1063 add_dom_config_normalizer(&parser
->stack
, RSS_CONFIG_FLAGS
);
1066 /* FIXME: When rendering this way we don't really care about the code.
1067 * However, it will be useful when we will be able to also
1068 * incrementally parse new data. This will require the parser to live
1069 * during the fetching of data. */
1070 code
= parse_sgml(parser
, buffer
->source
, buffer
->length
, 1);
1072 assert(parser
->stack
.depth
== 1);
1074 get_dom_stack_top(&parser
->stack
)->immutable
= 0;
1075 /* For SGML_PARSER_STREAM this will free the DOM
1077 pop_dom_node(&parser
->stack
);
1080 done_dom_renderer(&renderer
);
1081 done_sgml_parser(parser
);