1 /* DOM document renderer */
7 #include <sys/types.h> /* FreeBSD needs this before regex.h */
15 #include "cache/cache.h"
16 #include "document/css/css.h"
17 #include "document/css/parser.h"
18 #include "document/css/property.h"
19 #include "document/css/stylesheet.h"
20 #include "document/document.h"
21 #include "document/dom/renderer.h"
22 #include "document/dom/rss.h"
23 #include "document/dom/source.h"
24 #include "document/dom/util.h"
25 #include "document/renderer.h"
26 #include "dom/configuration.h"
27 #include "dom/scanner.h"
28 #include "dom/sgml/parser.h"
29 #include "dom/sgml/html/html.h"
30 #include "dom/sgml/rss/rss.h"
32 #include "dom/stack.h"
33 #include "intl/charsets.h"
34 #include "protocol/uri.h"
35 #include "terminal/draw.h"
36 #include "util/error.h"
37 #include "util/memory.h"
38 #include "util/string.h"
41 #define URL_REGEX "(file://|((f|ht|nt)tp(s)?|smb)://[[:alnum:]]+([-@:.]?[[:alnum:]])*\\.[[:alpha:]]{2,4}(:[[:digit:]]+)?)(/(%[[:xdigit:]]{2}|[-_~&=;?.a-z0-9])*)*"
42 #define URL_REGFLAGS (REG_ICASE | REG_EXTENDED)
45 /* Checks the user CSS for properties for each DOM node type name */
47 init_dom_renderer(struct dom_renderer
*renderer
, struct document
*document
,
48 struct string
*buffer
, struct conv_table
*convert_table
)
50 enum dom_node_type type
;
51 struct css_stylesheet
*css
= &default_stylesheet
;
53 memset(renderer
, 0, sizeof(*renderer
));
55 renderer
->document
= document
;
56 renderer
->convert_table
= convert_table
;
57 renderer
->convert_mode
= document
->options
.plain
? CSM_NONE
: CSM_DEFAULT
;
58 renderer
->source
= buffer
->source
;
59 renderer
->end
= buffer
->source
+ buffer
->length
;
60 renderer
->position
= renderer
->source
;
61 renderer
->base_uri
= get_uri_reference(document
->uri
);
64 if (renderer
->document
->options
.plain_display_links
) {
65 if (regcomp(&renderer
->url_regex
, URL_REGEX
, URL_REGFLAGS
)) {
66 regfree(&renderer
->url_regex
);
68 renderer
->find_url
= 1;
73 for (type
= 0; type
< DOM_NODES
; type
++) {
74 struct screen_char
*template = &renderer
->styles
[type
];
75 color_T background
= document
->options
.default_bg
;
76 color_T foreground
= document
->options
.default_fg
;
77 enum screen_char_attr attr
= 0;
78 static int i_want_struct_module_for_dom
;
80 struct dom_string
*name
= get_dom_node_type_name(type
);
81 struct css_selector
*selector
= NULL
;
83 if (!i_want_struct_module_for_dom
) {
84 static const unsigned char default_colors
[] =
85 "document { color: yellow } "
86 "element { color: lightgreen } "
87 "entity-reference { color: red } "
88 "proc-instruction { color: red } "
89 "attribute { color: magenta } "
90 "comment { color: aqua } "
91 "cdata-section { color: orange2 } ";
92 unsigned char *styles
= (unsigned char *) default_colors
;
94 i_want_struct_module_for_dom
= 1;
95 /* When someone will get here earlier than at 4am,
96 * this will be done in some init function, perhaps
97 * not overriding the user's default stylesheet. */
98 css_parse_stylesheet(css
, NULL
, styles
, styles
+ sizeof(default_colors
));
102 if (is_dom_string_set(name
))
103 selector
= find_css_selector(&css
->selectors
,
104 CST_ELEMENT
, CSR_ROOT
,
105 name
->string
, name
->length
);
108 struct css_property
*property
;
110 foreach (property
, selector
->properties
) {
111 switch (property
->type
) {
112 case CSS_PT_BACKGROUND_COLOR
:
113 case CSS_PT_BACKGROUND
:
114 if (property
->value_type
== CSS_VT_COLOR
)
115 background
= property
->value
.color
;
118 foreground
= property
->value
.color
;
120 case CSS_PT_FONT_WEIGHT
:
121 if (property
->value
.font_attribute
.add
& AT_BOLD
)
122 attr
|= SCREEN_ATTR_BOLD
;
124 case CSS_PT_FONT_STYLE
:
125 if (property
->value
.font_attribute
.add
& AT_UNDERLINE
)
126 attr
|= SCREEN_ATTR_UNDERLINE
;
128 if (property
->value
.font_attribute
.add
& AT_ITALIC
)
129 attr
|= SCREEN_ATTR_ITALIC
;
131 case CSS_PT_TEXT_DECORATION
:
132 if (property
->value
.font_attribute
.add
& AT_UNDERLINE
)
133 attr
|= SCREEN_ATTR_UNDERLINE
;
137 case CSS_PT_TEXT_ALIGN
:
138 case CSS_PT_WHITE_SPACE
:
145 init_template(template, &document
->options
, background
, foreground
, attr
);
150 done_dom_renderer(struct dom_renderer
*renderer
)
153 if (renderer
->find_url
)
154 regfree(&renderer
->url_regex
);
156 done_uri(renderer
->base_uri
);
161 get_doctype(struct dom_renderer
*renderer
, struct cache_entry
*cached
)
163 if (!strcasecmp("application/rss+xml", cached
->content_type
)) {
164 renderer
->doctype
= SGML_DOCTYPE_RSS
;
166 } else if (!strcasecmp("application/docbook+xml",
167 cached
->content_type
)) {
168 renderer
->doctype
= SGML_DOCTYPE_DOCBOOK
;
170 } else if (!strcasecmp("application/xbel+xml", cached
->content_type
)
171 || !strcasecmp("application/x-xbel", cached
->content_type
)
172 || !strcasecmp("application/xbel", cached
->content_type
)) {
173 renderer
->doctype
= SGML_DOCTYPE_XBEL
;
176 assertm(!strcasecmp("text/html", cached
->content_type
)
177 || !strcasecmp("application/xhtml+xml",
178 cached
->content_type
),
179 "Couldn't resolve doctype '%s'", cached
->content_type
);
181 renderer
->doctype
= SGML_DOCTYPE_HTML
;
185 /* Shared multiplexor between renderers */
187 render_dom_document(struct cache_entry
*cached
, struct document
*document
,
188 struct string
*buffer
)
190 unsigned char *head
= empty_string_or_(cached
->head
);
191 struct dom_renderer renderer
;
192 struct conv_table
*convert_table
;
193 struct sgml_parser
*parser
;
194 enum sgml_parser_type parser_type
;
195 unsigned char *string
= struri(cached
->uri
);
196 size_t length
= strlen(string
);
197 struct dom_string uri
= INIT_DOM_STRING(string
, length
);
200 convert_table
= get_convert_table(head
, document
->options
.cp
,
201 document
->options
.assume_cp
,
203 &document
->cp_status
,
204 document
->options
.hard_assume
);
206 init_dom_renderer(&renderer
, document
, buffer
, convert_table
);
208 document
->bgcolor
= document
->options
.default_bg
;
210 document
->options
.utf8
= is_cp_utf8(document
->options
.cp
);
211 #endif /* CONFIG_UTF8 */
213 if (document
->options
.plain
)
214 parser_type
= SGML_PARSER_STREAM
;
216 parser_type
= SGML_PARSER_TREE
;
218 get_doctype(&renderer
, cached
);
220 parser
= init_sgml_parser(parser_type
, renderer
.doctype
, &uri
, 0);
223 if (document
->options
.plain
) {
224 add_dom_stack_context(&parser
->stack
, &renderer
,
225 &dom_source_renderer_context_info
);
227 } else if (renderer
.doctype
== SGML_DOCTYPE_RSS
) {
228 add_dom_stack_context(&parser
->stack
, &renderer
,
229 &dom_rss_renderer_context_info
);
230 add_dom_config_normalizer(&parser
->stack
, RSS_CONFIG_FLAGS
);
233 /* FIXME: When rendering this way we don't really care about the code.
234 * However, it will be useful when we will be able to also
235 * incrementally parse new data. This will require the parser to live
236 * during the fetching of data. */
237 code
= parse_sgml(parser
, buffer
->source
, buffer
->length
, 1);
239 assert(parser
->stack
.depth
== 1);
241 get_dom_stack_top(&parser
->stack
)->immutable
= 0;
242 /* For SGML_PARSER_STREAM this will free the DOM
244 pop_dom_node(&parser
->stack
);
247 done_dom_renderer(&renderer
);
248 done_sgml_parser(parser
);