template -> template_ for C++ compatibility
[elinks.git] / src / document / dom / source.c
blob30df114e3dcaeb25c493fbe832ce3feb97245736
1 /* DOM-based SGML (HTML) source view renderer (just syntax highlighting :-) */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <sys/types.h> /* FreeBSD needs this before regex.h */
8 #ifdef HAVE_REGEX_H
9 #include <regex.h>
10 #endif
12 #include "elinks.h"
14 #include "cache/cache.h"
15 #include "document/css/css.h"
16 #include "document/css/parser.h"
17 #include "document/css/property.h"
18 #include "document/css/stylesheet.h"
19 #include "document/document.h"
20 #include "document/dom/renderer.h"
21 #include "document/dom/util.h"
22 #include "document/dom/rss.h"
23 #include "document/renderer.h"
24 #include "dom/configuration.h"
25 #include "dom/scanner.h"
26 #include "dom/sgml/parser.h"
27 #include "dom/sgml/html/html.h"
28 #include "dom/sgml/rss/rss.h"
29 #include "dom/node.h"
30 #include "dom/stack.h"
31 #include "intl/charsets.h"
32 #include "protocol/uri.h"
33 #include "terminal/draw.h"
34 #include "util/error.h"
35 #include "util/memory.h"
36 #include "util/string.h"
39 #define check_dom_node_source(renderer, str, len) \
40 ((renderer)->source <= (str) && (str) + (len) <= (renderer)->end)
42 #define assert_source(renderer, str, len) \
43 assertm(check_dom_node_source(renderer, str, len), "renderer[%p : %p] str[%p : %p]", \
44 (renderer)->source, (renderer)->end, (str), (str) + (len))
47 #define URL_REGEX "(file://|((f|ht|nt)tp(s)?|smb)://[[:alnum:]]+([-@:.]?[[:alnum:]])*\\.[[:alpha:]]{2,4}(:[[:digit:]]+)?)(/(%[[:xdigit:]]{2}|[-_~&=;?.a-z0-9])*)*"
48 #define URL_REGFLAGS (REG_ICASE | REG_EXTENDED)
51 struct source_renderer {
52 #ifdef HAVE_REGEX_H
53 regex_t url_regex;
54 unsigned int find_url:1;
55 #endif
57 /* One style per node type. */
58 struct screen_char styles[DOM_NODES];
62 static inline void
63 render_dom_flush(struct dom_renderer *renderer, unsigned char *string)
65 struct source_renderer *data = renderer->data;
66 struct screen_char *template_ = &data->styles[DOM_NODE_TEXT];
67 int length = string - renderer->position;
69 assert_source(renderer, renderer->position, 0);
70 assert_source(renderer, string, 0);
72 if (length <= 0) return;
73 render_dom_text(renderer, template_, renderer->position, length);
74 renderer->position = string;
76 assert_source(renderer, renderer->position, 0);
79 static inline void
80 render_dom_node_text(struct dom_renderer *renderer, struct screen_char *template_,
81 struct dom_node *node)
83 unsigned char *string = node->string.string;
84 int length = node->string.length;
86 if (node->type == DOM_NODE_ENTITY_REFERENCE) {
87 string -= 1;
88 length += 2;
91 if (check_dom_node_source(renderer, string, length)) {
92 render_dom_flush(renderer, string);
93 renderer->position = string + length;
94 assert_source(renderer, renderer->position, 0);
97 render_dom_text(renderer, template_, string, length);
100 #ifdef HAVE_REGEX_H
101 static inline void
102 render_dom_node_enhanced_text(struct dom_renderer *renderer, struct dom_node *node)
104 struct source_renderer *data = renderer->data;
105 regex_t *regex = &data->url_regex;
106 regmatch_t regmatch;
107 unsigned char *string = node->string.string;
108 int length = node->string.length;
109 struct screen_char *template_ = &data->styles[node->type];
110 unsigned char *alloc_string;
112 if (check_dom_node_source(renderer, string, length)) {
113 render_dom_flush(renderer, string);
114 renderer->position = string + length;
115 assert_source(renderer, renderer->position, 0);
118 alloc_string = memacpy(string, length);
119 if (alloc_string)
120 string = alloc_string;
122 while (length > 0 && !regexec(regex, string, 1, &regmatch, 0)) {
123 int matchlen = regmatch.rm_eo - regmatch.rm_so;
124 int offset = regmatch.rm_so;
126 if (!matchlen || offset < 0 || regmatch.rm_eo > length)
127 break;
129 if (offset > 0)
130 render_dom_text(renderer, template_, string, offset);
132 string += offset;
133 length -= offset;
135 add_dom_link(renderer, string, matchlen, string, matchlen);
137 length -= matchlen;
138 string += matchlen;
141 if (length > 0)
142 render_dom_text(renderer, template_, string, length);
144 mem_free_if(alloc_string);
146 #endif
148 static enum dom_code
149 render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *xxx)
151 struct dom_renderer *renderer = stack->current->data;
152 struct source_renderer *data = renderer->data;
154 assert(node && renderer && renderer->document);
156 #ifdef HAVE_REGEX_H
157 if (data->find_url
158 && (node->type == DOM_NODE_TEXT
159 || node->type == DOM_NODE_CDATA_SECTION
160 || node->type == DOM_NODE_COMMENT)) {
161 render_dom_node_enhanced_text(renderer, node);
162 } else
163 #endif
164 render_dom_node_text(renderer, &data->styles[node->type], node);
166 return DOM_CODE_OK;
169 /* This callback is also used for rendering processing instruction nodes. */
170 static enum dom_code
171 render_dom_element_source(struct dom_stack *stack, struct dom_node *node, void *xxx)
173 struct dom_renderer *renderer = stack->current->data;
174 struct source_renderer *data = renderer->data;
176 assert(node && renderer && renderer->document);
178 render_dom_node_text(renderer, &data->styles[node->type], node);
180 return DOM_CODE_OK;
183 static enum dom_code
184 render_dom_element_end_source(struct dom_stack *stack, struct dom_node *node, void *xxx)
186 struct dom_renderer *renderer = stack->current->data;
187 struct source_renderer *data = renderer->data;
188 struct dom_stack_state *state = get_dom_stack_top(stack);
189 struct sgml_parser_state *pstate = get_dom_stack_state_data(stack->contexts[0], state);
190 struct dom_scanner_token *token = &pstate->end_token;
191 unsigned char *string = token->string.string;
192 int length = token->string.length;
194 assert(node && renderer && renderer->document);
196 if (!string || !length)
197 return DOM_CODE_OK;
199 if (check_dom_node_source(renderer, string, length)) {
200 render_dom_flush(renderer, string);
201 renderer->position = string + length;
202 assert_source(renderer, renderer->position, 0);
205 render_dom_text(renderer, &data->styles[node->type], string, length);
207 return DOM_CODE_OK;
210 static void
211 set_base_uri(struct dom_renderer *renderer, unsigned char *value, size_t valuelen)
213 unsigned char *href = memacpy(value, valuelen);
214 unsigned char *uristring;
215 struct uri *uri;
217 if (!href) return;
218 uristring = join_urls(renderer->base_uri, href);
219 mem_free(href);
221 if (!uristring) return;
222 uri = get_uri(uristring, 0);
223 mem_free(uristring);
225 if (!uri) return;
227 done_uri(renderer->base_uri);
228 renderer->base_uri = uri;
231 static enum dom_code
232 render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void *xxx)
234 struct dom_renderer *renderer = stack->current->data;
235 struct source_renderer *data = renderer->data;
236 struct screen_char *template_ = &data->styles[node->type];
238 assert(node && renderer->document);
240 render_dom_node_text(renderer, template_, node);
242 if (is_dom_string_set(&node->data.attribute.value)) {
243 int quoted = node->data.attribute.quoted == 1;
244 unsigned char *value = node->data.attribute.value.string - quoted;
245 int valuelen = node->data.attribute.value.length + quoted * 2;
247 if (check_dom_node_source(renderer, value, 0)) {
248 render_dom_flush(renderer, value);
249 renderer->position = value + valuelen;
250 assert_source(renderer, renderer->position, 0);
253 if (node->data.attribute.reference
254 && valuelen - quoted * 2 > 0) {
255 int skips;
257 /* Need to flush the first quoting delimiter and any
258 * leading whitespace so that the renderers x position
259 * is at the start of the value string. */
260 for (skips = 0; skips < valuelen; skips++) {
261 if ((quoted && skips == 0)
262 || isspace(value[skips])
263 || value[skips] < ' ')
264 continue;
266 break;
269 if (skips > 0) {
270 render_dom_text(renderer, template_, value, skips);
271 value += skips;
272 valuelen -= skips;
275 /* Figure out what should be skipped after the actual
276 * link text. */
277 for (skips = 0; skips < valuelen; skips++) {
278 if ((quoted && skips == 0)
279 || isspace(value[valuelen - skips - 1])
280 || value[valuelen - skips - 1] < ' ')
281 continue;
283 break;
286 if (renderer->doctype == SGML_DOCTYPE_HTML
287 && node->data.attribute.type == HTML_ATTRIBUTE_HREF
288 && node->parent->data.element.type == HTML_ELEMENT_BASE) {
289 set_base_uri(renderer, value, valuelen - skips);
292 add_dom_link(renderer, value, valuelen - skips,
293 value, valuelen - skips);
295 if (skips > 0) {
296 value += valuelen - skips;
297 render_dom_text(renderer, template_, value, skips);
299 } else {
300 render_dom_text(renderer, template_, value, valuelen);
304 return DOM_CODE_OK;
307 static enum dom_code
308 render_dom_cdata_source(struct dom_stack *stack, struct dom_node *node, void *xxx)
310 struct dom_renderer *renderer = stack->current->data;
311 struct source_renderer *data = renderer->data;
312 unsigned char *string = node->string.string;
314 assert(node && renderer && renderer->document);
316 /* Highlight the 'CDATA' part of <![CDATA[ if it is there. */
317 if (check_dom_node_source(renderer, string - 6, 6)) {
318 render_dom_flush(renderer, string - 6);
319 render_dom_text(renderer, &data->styles[DOM_NODE_ATTRIBUTE], string - 6, 5);
320 renderer->position = string - 1;
321 assert_source(renderer, renderer->position, 0);
324 render_dom_node_text(renderer, &data->styles[node->type], node);
326 return DOM_CODE_OK;
330 static enum dom_code
331 render_dom_document_start(struct dom_stack *stack, struct dom_node *node, void *xxx)
333 struct dom_renderer *renderer = stack->current->data;
334 struct document *document = renderer->document;
335 struct source_renderer *data;
336 enum dom_node_type type;
338 struct css_stylesheet *css = &default_stylesheet;
340 static int i_want_struct_module_for_dom;
342 if (!i_want_struct_module_for_dom) {
343 static const unsigned char default_colors[] =
344 "document { color: yellow } "
345 "element { color: lightgreen } "
346 "entity-reference { color: red } "
347 "proc-instruction { color: red } "
348 "attribute { color: magenta } "
349 "comment { color: aqua } "
350 "cdata-section { color: orange2 } ";
352 i_want_struct_module_for_dom = 1;
353 /* When someone will get here earlier than at 4am,
354 * this will be done in some init function, perhaps
355 * not overriding the user's default stylesheet. */
356 css_parse_stylesheet(css, NULL, default_colors,
357 default_colors + sizeof(default_colors));
361 data = renderer->data = mem_calloc(1, sizeof(*data));
363 /* Initialize styles for all the DOM node types. */
365 for (type = 0; type < DOM_NODES; type++) {
366 struct screen_char *template_ = &data->styles[type];
367 struct dom_string *name = get_dom_node_type_name(type);
368 struct css_selector *selector = NULL;
370 if (name && is_dom_string_set(name))
371 selector = find_css_selector(&css->selectors,
372 CST_ELEMENT, CSR_ROOT,
373 name->string, name->length);
374 init_template_by_style(template_, &document->options,
375 selector ? &selector->properties : NULL);
378 #ifdef HAVE_REGEX_H
379 if (document->options.plain_display_links) {
380 if (regcomp(&data->url_regex, URL_REGEX, URL_REGFLAGS)) {
381 regfree(&data->url_regex);
382 } else {
383 data->find_url = 1;
386 #endif
388 return DOM_CODE_OK;
391 static enum dom_code
392 render_dom_document_end(struct dom_stack *stack, struct dom_node *node, void *xxx)
394 struct dom_renderer *renderer = stack->current->data;
395 struct source_renderer *data = renderer->data;
397 /* If there are no non-element nodes after the last element node make
398 * sure that we flush to the end of the cache entry source including
399 * the '>' of the last element tag if it has one. (bug 519) */
400 if (check_dom_node_source(renderer, renderer->position, 0)) {
401 render_dom_flush(renderer, renderer->end);
404 #ifdef HAVE_REGEX_H
405 if (data->find_url)
406 regfree(&data->url_regex);
407 #endif
409 mem_free(data);
411 /* It is not necessary to return DOM_CODE_FREE_NODE here.
412 * Because the parser was created with the SGML_PARSER_STREAM
413 * type, the stack has the DOM_STACK_FLAG_FREE_NODES flag and
414 * implicitly frees all nodes popped from it. */
415 return DOM_CODE_OK;
419 struct dom_stack_context_info dom_source_renderer_context_info = {
420 /* Object size: */ 0,
421 /* Push: */
423 /* */ NULL,
424 /* DOM_NODE_ELEMENT */ render_dom_element_source,
425 /* DOM_NODE_ATTRIBUTE */ render_dom_attribute_source,
426 /* DOM_NODE_TEXT */ render_dom_node_source,
427 /* DOM_NODE_CDATA_SECTION */ render_dom_cdata_source,
428 /* DOM_NODE_ENTITY_REFERENCE */ render_dom_node_source,
429 /* DOM_NODE_ENTITY */ render_dom_node_source,
430 /* DOM_NODE_PROC_INSTRUCTION */ render_dom_element_source,
431 /* DOM_NODE_COMMENT */ render_dom_node_source,
432 /* DOM_NODE_DOCUMENT */ render_dom_document_start,
433 /* DOM_NODE_DOCUMENT_TYPE */ render_dom_node_source,
434 /* DOM_NODE_DOCUMENT_FRAGMENT */ render_dom_node_source,
435 /* DOM_NODE_NOTATION */ render_dom_node_source,
437 /* Pop: */
439 /* */ NULL,
440 /* DOM_NODE_ELEMENT */ render_dom_element_end_source,
441 /* DOM_NODE_ATTRIBUTE */ NULL,
442 /* DOM_NODE_TEXT */ NULL,
443 /* DOM_NODE_CDATA_SECTION */ NULL,
444 /* DOM_NODE_ENTITY_REFERENCE */ NULL,
445 /* DOM_NODE_ENTITY */ NULL,
446 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
447 /* DOM_NODE_COMMENT */ NULL,
448 /* DOM_NODE_DOCUMENT */ render_dom_document_end,
449 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
450 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
451 /* DOM_NODE_NOTATION */ NULL,