document/dom: Split SGML (HTML) source renderer to source.*
[elinks/images.git] / src / document / dom / renderer.c
blob7f58e2738faf09eabb22e9a7115573c6713b744f
1 /* DOM document renderer */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <sys/types.h> /* FreeBSD needs this before regex.h */
8 #ifdef HAVE_REGEX_H
9 #include <regex.h>
10 #endif
11 #include <string.h>
13 #include "elinks.h"
15 #include "cache/cache.h"
16 #include "document/css/css.h"
17 #include "document/css/parser.h"
18 #include "document/css/property.h"
19 #include "document/css/stylesheet.h"
20 #include "document/document.h"
21 #include "document/dom/renderer.h"
22 #include "document/dom/rss.h"
23 #include "document/dom/source.h"
24 #include "document/dom/util.h"
25 #include "document/renderer.h"
26 #include "dom/configuration.h"
27 #include "dom/scanner.h"
28 #include "dom/sgml/parser.h"
29 #include "dom/sgml/html/html.h"
30 #include "dom/sgml/rss/rss.h"
31 #include "dom/node.h"
32 #include "dom/stack.h"
33 #include "intl/charsets.h"
34 #include "protocol/uri.h"
35 #include "terminal/draw.h"
36 #include "util/error.h"
37 #include "util/memory.h"
38 #include "util/string.h"
41 #define URL_REGEX "(file://|((f|ht|nt)tp(s)?|smb)://[[:alnum:]]+([-@:.]?[[:alnum:]])*\\.[[:alpha:]]{2,4}(:[[:digit:]]+)?)(/(%[[:xdigit:]]{2}|[-_~&=;?.a-z0-9])*)*"
42 #define URL_REGFLAGS (REG_ICASE | REG_EXTENDED)
45 /* Checks the user CSS for properties for each DOM node type name */
46 static inline void
47 init_dom_renderer(struct dom_renderer *renderer, struct document *document,
48 struct string *buffer, struct conv_table *convert_table)
50 enum dom_node_type type;
51 struct css_stylesheet *css = &default_stylesheet;
53 memset(renderer, 0, sizeof(*renderer));
55 renderer->document = document;
56 renderer->convert_table = convert_table;
57 renderer->convert_mode = document->options.plain ? CSM_NONE : CSM_DEFAULT;
58 renderer->source = buffer->source;
59 renderer->end = buffer->source + buffer->length;
60 renderer->position = renderer->source;
61 renderer->base_uri = get_uri_reference(document->uri);
63 #ifdef HAVE_REGEX_H
64 if (renderer->document->options.plain_display_links) {
65 if (regcomp(&renderer->url_regex, URL_REGEX, URL_REGFLAGS)) {
66 regfree(&renderer->url_regex);
67 } else {
68 renderer->find_url = 1;
71 #endif
73 for (type = 0; type < DOM_NODES; type++) {
74 struct screen_char *template = &renderer->styles[type];
75 color_T background = document->options.default_bg;
76 color_T foreground = document->options.default_fg;
77 enum screen_char_attr attr = 0;
78 static int i_want_struct_module_for_dom;
80 struct dom_string *name = get_dom_node_type_name(type);
81 struct css_selector *selector = NULL;
83 if (!i_want_struct_module_for_dom) {
84 static const unsigned char default_colors[] =
85 "document { color: yellow } "
86 "element { color: lightgreen } "
87 "entity-reference { color: red } "
88 "proc-instruction { color: red } "
89 "attribute { color: magenta } "
90 "comment { color: aqua } "
91 "cdata-section { color: orange2 } ";
92 unsigned char *styles = (unsigned char *) default_colors;
94 i_want_struct_module_for_dom = 1;
95 /* When someone will get here earlier than at 4am,
96 * this will be done in some init function, perhaps
97 * not overriding the user's default stylesheet. */
98 css_parse_stylesheet(css, NULL, styles, styles + sizeof(default_colors));
101 if (name)
102 if (is_dom_string_set(name))
103 selector = find_css_selector(&css->selectors,
104 CST_ELEMENT, CSR_ROOT,
105 name->string, name->length);
107 if (selector) {
108 struct css_property *property;
110 foreach (property, selector->properties) {
111 switch (property->type) {
112 case CSS_PT_BACKGROUND_COLOR:
113 case CSS_PT_BACKGROUND:
114 if (property->value_type == CSS_VT_COLOR)
115 background = property->value.color;
116 break;
117 case CSS_PT_COLOR:
118 foreground = property->value.color;
119 break;
120 case CSS_PT_FONT_WEIGHT:
121 if (property->value.font_attribute.add & AT_BOLD)
122 attr |= SCREEN_ATTR_BOLD;
123 break;
124 case CSS_PT_FONT_STYLE:
125 if (property->value.font_attribute.add & AT_UNDERLINE)
126 attr |= SCREEN_ATTR_UNDERLINE;
128 if (property->value.font_attribute.add & AT_ITALIC)
129 attr |= SCREEN_ATTR_ITALIC;
130 break;
131 case CSS_PT_TEXT_DECORATION:
132 if (property->value.font_attribute.add & AT_UNDERLINE)
133 attr |= SCREEN_ATTR_UNDERLINE;
134 break;
135 case CSS_PT_DISPLAY:
136 case CSS_PT_NONE:
137 case CSS_PT_TEXT_ALIGN:
138 case CSS_PT_WHITE_SPACE:
139 case CSS_PT_LAST:
140 break;
145 init_template(template, &document->options, background, foreground, attr);
149 static inline void
150 done_dom_renderer(struct dom_renderer *renderer)
152 #ifdef HAVE_REGEX_H
153 if (renderer->find_url)
154 regfree(&renderer->url_regex);
155 #endif
156 done_uri(renderer->base_uri);
160 static void
161 get_doctype(struct dom_renderer *renderer, struct cache_entry *cached)
163 if (!strcasecmp("application/rss+xml", cached->content_type)) {
164 renderer->doctype = SGML_DOCTYPE_RSS;
166 } else if (!strcasecmp("application/docbook+xml",
167 cached->content_type)) {
168 renderer->doctype = SGML_DOCTYPE_DOCBOOK;
170 } else if (!strcasecmp("application/xbel+xml", cached->content_type)
171 || !strcasecmp("application/x-xbel", cached->content_type)
172 || !strcasecmp("application/xbel", cached->content_type)) {
173 renderer->doctype = SGML_DOCTYPE_XBEL;
175 } else {
176 assertm(!strcasecmp("text/html", cached->content_type)
177 || !strcasecmp("application/xhtml+xml",
178 cached->content_type),
179 "Couldn't resolve doctype '%s'", cached->content_type);
181 renderer->doctype = SGML_DOCTYPE_HTML;
185 /* Shared multiplexor between renderers */
186 void
187 render_dom_document(struct cache_entry *cached, struct document *document,
188 struct string *buffer)
190 unsigned char *head = empty_string_or_(cached->head);
191 struct dom_renderer renderer;
192 struct conv_table *convert_table;
193 struct sgml_parser *parser;
194 enum sgml_parser_type parser_type;
195 unsigned char *string = struri(cached->uri);
196 size_t length = strlen(string);
197 struct dom_string uri = INIT_DOM_STRING(string, length);
198 enum dom_code code;
200 convert_table = get_convert_table(head, document->options.cp,
201 document->options.assume_cp,
202 &document->cp,
203 &document->cp_status,
204 document->options.hard_assume);
206 init_dom_renderer(&renderer, document, buffer, convert_table);
208 document->bgcolor = document->options.default_bg;
209 #ifdef CONFIG_UTF8
210 document->options.utf8 = is_cp_utf8(document->options.cp);
211 #endif /* CONFIG_UTF8 */
213 if (document->options.plain)
214 parser_type = SGML_PARSER_STREAM;
215 else
216 parser_type = SGML_PARSER_TREE;
218 get_doctype(&renderer, cached);
220 parser = init_sgml_parser(parser_type, renderer.doctype, &uri, 0);
221 if (!parser) return;
223 if (document->options.plain) {
224 add_dom_stack_context(&parser->stack, &renderer,
225 &dom_source_renderer_context_info);
227 } else if (renderer.doctype == SGML_DOCTYPE_RSS) {
228 add_dom_stack_context(&parser->stack, &renderer,
229 &dom_rss_renderer_context_info);
230 add_dom_config_normalizer(&parser->stack, RSS_CONFIG_FLAGS);
233 /* FIXME: When rendering this way we don't really care about the code.
234 * However, it will be useful when we will be able to also
235 * incrementally parse new data. This will require the parser to live
236 * during the fetching of data. */
237 code = parse_sgml(parser, buffer->source, buffer->length, 1);
238 if (parser->root) {
239 assert(parser->stack.depth == 1);
241 get_dom_stack_top(&parser->stack)->immutable = 0;
242 /* For SGML_PARSER_STREAM this will free the DOM
243 * root node. */
244 pop_dom_node(&parser->stack);
247 done_dom_renderer(&renderer);
248 done_sgml_parser(parser);