Fix compilation using --enable-html-highlight.
[elinks.git] / src / document / renderer.c
blob40f2f58ff40634c270a8cd337566356a25950037
1 /** HTML renderer
2 * @file */
4 #ifdef HAVE_CONFIG_H
5 #include "config.h"
6 #endif
8 #include <ctype.h>
9 #include <stdarg.h>
10 #include <stdlib.h>
11 #include <string.h>
13 #include "elinks.h"
15 #include "cache/cache.h"
16 #include "config/options.h"
17 #include "document/document.h"
18 #include "document/dom/renderer.h"
19 #include "document/html/frames.h"
20 #include "document/html/renderer.h"
21 #include "document/plain/renderer.h"
22 #include "document/renderer.h"
23 #include "document/view.h"
24 #include "ecmascript/ecmascript.h"
25 #include "encoding/encoding.h"
26 #include "intl/charsets.h"
27 #include "main/main.h"
28 #include "main/object.h"
29 #include "protocol/header.h"
30 #include "protocol/protocol.h"
31 #include "protocol/uri.h"
32 #include "session/location.h"
33 #include "session/session.h"
34 #include "terminal/terminal.h"
35 #include "terminal/window.h"
36 #include "util/error.h"
37 #include "util/memory.h"
38 #include "util/string.h"
39 #include "viewer/text/view.h"
40 #include "viewer/text/vs.h"
43 #ifdef CONFIG_ECMASCRIPT
44 /** @todo XXX: This function is de facto obsolete, since we do not need to copy
45 * snippets around anymore (we process them in one go after the document is
46 * loaded; gradual processing was practically impossible because the snippets
47 * could reorder randomly during the loading - consider i.e.
48 * @<body onLoad>@<script>@</body>: first just @<body> is loaded, but then the
49 * rest of the document is loaded and @<script> gets before @<body>; do not even
50 * imagine the trouble with rewritten (through scripting hooks) documents;
51 * besides, implementing document.write() will be much simpler).
52 * But I want to take no risk by reworking that now. --pasky */
53 static void
54 add_snippets(struct ecmascript_interpreter *interpreter,
55 LIST_OF(struct string_list_item) *doc_snippets,
56 LIST_OF(struct string_list_item) *queued_snippets)
58 struct string_list_item *doc_current = doc_snippets->next;
60 #ifdef CONFIG_LEDS
61 if (list_empty(*queued_snippets) && interpreter->vs->doc_view->session)
62 unset_led_value(interpreter->vs->doc_view->session->status.ecmascript_led);
63 #endif
65 if (list_empty(*doc_snippets) || !get_opt_bool("ecmascript.enable"))
66 return;
68 /* We do this all only once per view_state now. */
69 if (!list_empty(*queued_snippets)) {
70 /* So if we already did it, we shouldn't need to do it again.
71 * This is the case of moving around in history - we have all
72 * what happenned recorded in the view_state and needn't bother
73 * again. */
74 #ifdef CONFIG_DEBUG
75 /* Hopefully. */
76 struct string_list_item *iterator = queued_snippets->next;
78 while (iterator != (struct string_list_item *) queued_snippets) {
79 if (doc_current == (struct string_list_item *) doc_snippets) {
80 INTERNAL("add_snippets(): doc_snippets shorter than queued_snippets!");
81 return;
83 #if 0
84 DBG("Comparing snippets\n%.*s\n###### vs #####\n%.*s\n #####",
85 iterator->string.length, iterator->string.source,
86 doc_current->string.length, doc_current->string.source);
87 #endif
88 assert(!strlcmp(iterator->string.source,
89 iterator->string.length,
90 doc_current->string.source,
91 doc_current->string.length));
93 doc_current = doc_current->next;
94 iterator = iterator->next;
96 #endif
97 return;
100 assert(doc_current);
101 for (; doc_current != (struct string_list_item *) doc_snippets;
102 doc_current = doc_current->next) {
103 add_to_string_list(queued_snippets, doc_current->string.source,
104 doc_current->string.length);
105 #if 0
106 DBG("Adding snippet\n%.*s\n #####",
107 doc_current->string.length,
108 doc_current->string.source);
109 #endif
113 static void
114 process_snippets(struct ecmascript_interpreter *interpreter,
115 LIST_OF(struct string_list_item) *snippets,
116 struct string_list_item **current)
118 if (!*current)
119 *current = snippets->next;
120 for (; *current != (struct string_list_item *) snippets;
121 (*current) = (*current)->next) {
122 struct string *string = &(*current)->string;
123 unsigned char *uristring;
124 struct uri *uri;
125 struct cache_entry *cached;
126 struct fragment *fragment;
128 if (string->length == 0)
129 continue;
131 if (*string->source != '^') {
132 /* Evaluate <script>code</script> snippet */
133 ecmascript_eval(interpreter, string, NULL);
134 continue;
137 /* Eval external <script src="reference"></script> snippet */
138 uristring = string->source + 1;
139 if (!*uristring) continue;
141 uri = get_uri(uristring, URI_BASE);
142 if (!uri) continue;
144 cached = get_redirected_cache_entry(uri);
145 done_uri(uri);
147 if (!cached) {
148 /* At this time (!gradual_rerendering), we should've
149 * already retrieved this though. So it must've been
150 * that it went away because unused and the cache was
151 * already too full. */
152 #if 0
153 /* Disabled because gradual rerendering can be triggered
154 * by numerous events other than a ecmascript reference
155 * completing like the original document and CSS. Problem
156 * is that we should never continue this loop but rather
157 * break out if that is the case. Somehow we need to
158 * be able to derive URI loading problems at this point
159 * or maybe remove reference snippets if they fail to load.
161 * This FIFO queue handling should be used for also CSS
162 * imports so it would be cool if it could be general
163 * enough for that. Using it for frames with the FIFOing
164 * disabled probably wouldn't hurt either.
166 * To top this thing off it would be nice if it also
167 * handled dependency tracking between references so that
168 * CSS documents will not disappear from the cache
169 * before all referencing HTML documents has been deleted
170 * from it.
172 * Reported as bug 533. */
173 /* Pasky's explanation: If we get the doc in a single
174 * shot, before calling draw_formatted() we didn't have
175 * anything additional queued for loading and the cache
176 * entry was already loaded, so we didn't get
177 * gradual_loading set. But then while parsing the
178 * document we got some external references and trying
179 * to process them right now. Boom.
181 * The obvious solution would be to always call
182 * draw_formatted() with gradual_loading in
183 * doc_loading_callback() and if we are sure the
184 * loading is really over, call it one more time
185 * without gradual_loading set. I'm not sure about
186 * the implications though so I won't do it before
187 * 0.10.0. --pasky */
188 ERROR("The script of %s was lost in too full a cache!",
189 uristring);
190 #endif
191 continue;
194 fragment = get_cache_fragment(cached);
195 if (fragment) {
196 struct string code = INIT_STRING(fragment->data, fragment->length);
198 ecmascript_eval(interpreter, &code, NULL);
202 #endif
204 static void
205 render_encoded_document(struct cache_entry *cached, struct document *document)
207 struct uri *uri = cached->uri;
208 enum stream_encoding encoding = ENCODING_NONE;
209 struct fragment *fragment = get_cache_fragment(cached);
210 struct string buffer = INIT_STRING("", 0);
212 /* Even empty documents have to be rendered so that info in the protocol
213 * header, such as refresh info, get processed. (bug 625) */
214 if (fragment) {
215 buffer.source = fragment->data;
216 buffer.length = fragment->length;
219 if (uri->protocol != PROTOCOL_FILE) {
220 unsigned char *extension = get_extension_from_uri(uri);
222 if (extension) {
223 encoding = guess_encoding(extension);
224 mem_free(extension);
227 if (encoding != ENCODING_NONE) {
228 int length = 0;
229 unsigned char *source;
231 source = decode_encoded_buffer(encoding, buffer.source,
232 buffer.length, &length);
233 if (source) {
234 buffer.source = source;
235 buffer.length = length;
236 } else {
237 encoding = ENCODING_NONE;
242 if (document->options.plain) {
243 #ifdef CONFIG_DOM
244 if (cached->content_type
245 && (!strcasecmp("text/html", cached->content_type)
246 || !strcasecmp("application/xhtml+xml", cached->content_type)
247 || !strcasecmp("application/docbook+xml", cached->content_type)
248 || !strcasecmp("application/rss+xml", cached->content_type)
249 || !strcasecmp("application/xbel+xml", cached->content_type)
250 || !strcasecmp("application/x-xbel", cached->content_type)
251 || !strcasecmp("application/xbel", cached->content_type)))
252 render_dom_document(cached, document, &buffer);
253 else
254 #endif
255 render_plain_document(cached, document, &buffer);
257 } else {
258 #ifdef CONFIG_DOM
259 if (cached->content_type
260 && (!strlcasecmp("application/rss+xml", 19, cached->content_type, -1)))
261 render_dom_document(cached, document, &buffer);
262 else
263 #endif
264 render_html_document(cached, document, &buffer);
267 if (encoding != ENCODING_NONE) {
268 done_string(&buffer);
272 void
273 render_document(struct view_state *vs, struct document_view *doc_view,
274 struct document_options *options)
276 unsigned char *name;
277 struct document *document;
278 struct cache_entry *cached;
280 assert(vs && doc_view && options);
281 if_assert_failed return;
283 #if 0
284 DBG("(Re%u)Rendering %s on doc_view %p [%s] while attaching it to %p",
285 options->gradual_rerendering, struri(vs->uri),
286 doc_view, doc_view->name, vs);
287 #endif
289 name = doc_view->name;
290 doc_view->name = NULL;
292 detach_formatted(doc_view);
294 doc_view->name = name;
295 doc_view->vs = vs;
296 doc_view->last_x = doc_view->last_y = -1;
298 #if 0
299 /* This is a nice idea, but doesn't always work: in particular when
300 * there's a frame name conflict. You loaded something to the vs'
301 * frame, but later something tried to get loaded to a frame with
302 * the same name and we got back this frame again, so we are now
303 * overriding the original document with a cuckoo. This assert()ion
304 * should be re-enabled when we start to get this right (which is
305 * very complex, but someone should rewrite the frames support
306 * anyway). --pasky */
307 assert(!vs->doc_view);
308 #else
309 if (vs->doc_view) {
310 /* It will be still detached, no worries - hopefully it still
311 * resides in ses->scrn_frames. */
312 assert(vs->doc_view->vs == vs);
313 vs->doc_view->used = 0; /* A bit risky, but... */
314 vs->doc_view->vs = NULL;
315 vs->doc_view = NULL;
316 #ifdef CONFIG_ECMASCRIPT
317 vs->ecmascript_fragile = 1; /* And is this good? ;-) */
318 #endif
320 #endif
321 vs->doc_view = doc_view;
323 cached = find_in_cache(vs->uri);
324 if (!cached) {
325 INTERNAL("document %s to format not found", struri(vs->uri));
326 return;
329 document = get_cached_document(cached, options);
330 if (document) {
331 doc_view->document = document;
332 } else {
333 document = init_document(cached, options);
334 if (!document) return;
335 doc_view->document = document;
337 shrink_memory(0);
339 render_encoded_document(cached, document);
340 sort_links(document);
341 if (!document->title) {
342 enum uri_component components;
344 if (document->uri->protocol == PROTOCOL_FILE) {
345 components = URI_PATH;
346 } else {
347 components = URI_PUBLIC;
350 document->title = get_uri_string(document->uri, components);
351 if (document->title) {
352 #ifdef CONFIG_UTF8
353 if (doc_view->document->options.utf8)
354 decode_uri(document->title);
355 else
356 #endif /* CONFIG_UTF8 */
357 decode_uri_for_display(document->title);
361 #ifdef CONFIG_CSS
362 document->css_magic = get_document_css_magic(document);
363 #endif
365 #ifdef CONFIG_ECMASCRIPT
366 if (!vs->ecmascript_fragile)
367 assert(vs->ecmascript);
368 if (!options->gradual_rerendering) {
369 /* We also reset the state if the underlying document changed
370 * from the last time we did the snippets. This may be
371 * triggered i.e. when redrawing a document which has been
372 * reloaded in a different tab meanwhile (OTOH we don't want
373 * to reset the state if we are redrawing a document we have
374 * already drawn before).
376 * (vs->ecmascript->onload_snippets_owner) check may be
377 * superfluous since we should always have
378 * vs->ecmascript_fragile set in those cases; that's why we
379 * don't ever bother to re-zero it if we are suddenly doing
380 * gradual rendering again.
382 * XXX: What happens if a document is still loading in the
383 * other tab when we press ^L here? */
384 if (vs->ecmascript_fragile
385 || (vs->ecmascript && vs->ecmascript->onload_snippets_owner
386 && document->id != vs->ecmascript->onload_snippets_owner))
387 ecmascript_reset_state(vs);
388 assert(vs->ecmascript);
389 vs->ecmascript->onload_snippets_owner = document->id;
391 /* Passing of the onload_snippets pointers gives *_snippets()
392 * some feeling of universality, shall we ever get any other
393 * snippets (?). */
394 add_snippets(vs->ecmascript,
395 &document->onload_snippets,
396 &vs->ecmascript->onload_snippets);
397 process_snippets(vs->ecmascript, &vs->ecmascript->onload_snippets,
398 &vs->ecmascript->current_onload_snippet);
400 #endif
402 /* If we do not care about the height and width of the document
403 * just use the setup values. */
405 copy_box(&doc_view->box, &document->options.box);
407 if (!document->options.needs_width)
408 doc_view->box.width = options->box.width;
410 if (!document->options.needs_height)
411 doc_view->box.height = options->box.height;
415 void
416 render_document_frames(struct session *ses, int no_cache)
418 struct document_options doc_opts;
419 struct document_view *doc_view;
420 struct document_view *current_doc_view = NULL;
421 struct view_state *vs = NULL;
423 if (!ses->doc_view) {
424 ses->doc_view = mem_calloc(1, sizeof(*ses->doc_view));
425 if (!ses->doc_view) return;
426 ses->doc_view->session = ses;
427 ses->doc_view->search_word = &ses->search_word;
430 if (have_location(ses)) vs = &cur_loc(ses)->vs;
432 init_document_options(&doc_opts);
434 set_box(&doc_opts.box, 0, 0,
435 ses->tab->term->width, ses->tab->term->height);
437 if (ses->status.show_title_bar) {
438 doc_opts.box.y++;
439 doc_opts.box.height--;
441 if (ses->status.show_status_bar) doc_opts.box.height--;
442 if (ses->status.show_tabs_bar) {
443 doc_opts.box.height--;
444 if (ses->status.show_tabs_bar_at_top) doc_opts.box.y++;
447 doc_opts.color_mode = get_opt_int_tree(ses->tab->term->spec, "colors");
448 if (!get_opt_bool_tree(ses->tab->term->spec, "underline"))
449 doc_opts.color_flags |= COLOR_ENHANCE_UNDERLINE;
451 doc_opts.cp = get_opt_codepage_tree(ses->tab->term->spec, "charset");
452 doc_opts.no_cache = no_cache & 1;
453 doc_opts.gradual_rerendering = !!(no_cache & 2);
455 if (vs) {
456 if (vs->plain < 0) vs->plain = 0;
457 doc_opts.plain = vs->plain;
458 doc_opts.wrap = vs->wrap;
459 } else {
460 doc_opts.plain = 1;
463 foreach (doc_view, ses->scrn_frames) doc_view->used = 0;
465 if (vs) render_document(vs, ses->doc_view, &doc_opts);
467 if (document_has_frames(ses->doc_view->document)) {
468 current_doc_view = current_frame(ses);
469 format_frames(ses, ses->doc_view->document->frame_desc, &doc_opts, 0);
472 foreach (doc_view, ses->scrn_frames) {
473 struct document_view *prev_doc_view = doc_view->prev;
475 if (doc_view->used) continue;
477 detach_formatted(doc_view);
478 del_from_list(doc_view);
479 mem_free(doc_view);
480 doc_view = prev_doc_view;
483 if (current_doc_view) {
484 int n = 0;
486 foreach (doc_view, ses->scrn_frames) {
487 if (document_has_frames(doc_view->document)) continue;
488 if (doc_view == current_doc_view) {
489 cur_loc(ses)->vs.current_link = n;
490 break;
492 n++;
497 static int
498 comp_links(struct link *l1, struct link *l2)
500 assert(l1 && l2);
501 if_assert_failed return 0;
502 return (l1->number - l2->number);
505 void
506 sort_links(struct document *document)
508 int i;
510 assert(document);
511 if_assert_failed return;
512 if (!document->nlinks) return;
514 if (document->links_sorted) return;
515 assert(document->links);
516 if_assert_failed return;
518 qsort(document->links, document->nlinks, sizeof(*document->links),
519 (void *) comp_links);
521 if (!document->height) return;
523 mem_free_if(document->lines1);
524 document->lines1 = mem_calloc(document->height, sizeof(*document->lines1));
525 mem_free_if(document->lines2);
526 if (!document->lines1) return;
527 document->lines2 = mem_calloc(document->height, sizeof(*document->lines2));
528 if (!document->lines2) {
529 mem_free(document->lines1);
530 return;
533 for (i = 0; i < document->nlinks; i++) {
534 struct link *link = &document->links[i];
535 int p, q, j;
537 if (!link->npoints) {
538 done_link_members(link);
539 memmove(link, link + 1,
540 (document->nlinks - i - 1) * sizeof(*link));
541 document->nlinks--;
542 i--;
543 continue;
545 p = link->points[0].y;
546 q = link->points[link->npoints - 1].y;
547 if (p > q) j = p, p = q, q = j;
548 for (j = p; j <= q; j++) {
549 assertm(j < document->height, "link out of screen");
550 if_assert_failed continue;
551 document->lines2[j] = &document->links[i];
552 if (!document->lines1[j])
553 document->lines1[j] = &document->links[i];
556 document->links_sorted = 1;
559 struct conv_table *
560 get_convert_table(unsigned char *head, int to_cp,
561 int default_cp, int *from_cp,
562 enum cp_status *cp_status, int ignore_server_cp)
564 unsigned char *part = head;
565 int cp_index = -1;
567 assert(head);
568 if_assert_failed return NULL;
570 if (ignore_server_cp) {
571 if (cp_status) *cp_status = CP_STATUS_IGNORED;
572 if (from_cp) *from_cp = default_cp;
573 return get_translation_table(default_cp, to_cp);
576 while (cp_index == -1) {
577 unsigned char *ct_charset;
578 unsigned char *meta;
579 unsigned char *a = parse_header(part, "Content-Type", &part);
581 if (!a) break;
582 /* Content type info from document meta header.
583 * scan_http_equiv() appends the meta stuff to the protocol header before
584 * this function is called. Last Content-Type header field is used. */
586 while ((meta = parse_header(part, "Content-Type", &part))) {
587 mem_free_set(&a, meta);
590 parse_header_param(a, "charset", &ct_charset);
591 if (ct_charset) {
592 cp_index = get_cp_index(ct_charset);
593 mem_free(ct_charset);
595 mem_free(a);
598 if (cp_index == -1) {
599 unsigned char *a = parse_header(head, "Content-Charset", NULL);
601 if (a) {
602 cp_index = get_cp_index(a);
603 mem_free(a);
607 if (cp_index == -1) {
608 unsigned char *a = parse_header(head, "Charset", NULL);
610 if (a) {
611 cp_index = get_cp_index(a);
612 mem_free(a);
616 if (cp_index == -1) {
617 cp_index = default_cp;
618 if (cp_status) *cp_status = CP_STATUS_ASSUMED;
619 } else {
620 if (cp_status) *cp_status = CP_STATUS_SERVER;
623 if (from_cp) *from_cp = cp_index;
625 return get_translation_table(cp_index, to_cp);