Add get_terminal_codepage().
[elinks.git] / src / document / renderer.c
blobe860a9249334452346e6bb127f5a0b3921cc9643
1 /** HTML renderer
2 * @file */
4 #ifdef HAVE_CONFIG_H
5 #include "config.h"
6 #endif
8 #include <ctype.h>
9 #include <stdarg.h>
10 #include <stdlib.h>
11 #include <string.h>
13 #include "elinks.h"
15 #include "cache/cache.h"
16 #include "config/options.h"
17 #include "document/document.h"
18 #include "document/dom/renderer.h"
19 #include "document/html/frames.h"
20 #include "document/html/renderer.h"
21 #include "document/plain/renderer.h"
22 #include "document/renderer.h"
23 #include "document/view.h"
24 #include "ecmascript/ecmascript.h"
25 #include "encoding/encoding.h"
26 #include "intl/charsets.h"
27 #include "main/main.h"
28 #include "main/object.h"
29 #include "protocol/header.h"
30 #include "protocol/protocol.h"
31 #include "protocol/uri.h"
32 #include "session/location.h"
33 #include "session/session.h"
34 #include "terminal/terminal.h"
35 #include "terminal/window.h"
36 #include "util/error.h"
37 #include "util/memory.h"
38 #include "util/string.h"
39 #include "viewer/text/form.h"
40 #include "viewer/text/view.h"
41 #include "viewer/text/vs.h"
44 #ifdef CONFIG_ECMASCRIPT
45 /** @todo XXX: This function is de facto obsolete, since we do not need to copy
46 * snippets around anymore (we process them in one go after the document is
47 * loaded; gradual processing was practically impossible because the snippets
48 * could reorder randomly during the loading - consider i.e.
49 * @<body onLoad>@<script>@</body>: first just @<body> is loaded, but then the
50 * rest of the document is loaded and @<script> gets before @<body>; do not even
51 * imagine the trouble with rewritten (through scripting hooks) documents;
52 * besides, implementing document.write() will be much simpler).
53 * But I want to take no risk by reworking that now. --pasky */
54 static void
55 add_snippets(struct ecmascript_interpreter *interpreter,
56 LIST_OF(struct string_list_item) *doc_snippets,
57 LIST_OF(struct string_list_item) *queued_snippets)
59 struct string_list_item *doc_current = doc_snippets->next;
61 #ifdef CONFIG_LEDS
62 if (list_empty(*queued_snippets) && interpreter->vs->doc_view->session)
63 unset_led_value(interpreter->vs->doc_view->session->status.ecmascript_led);
64 #endif
66 if (list_empty(*doc_snippets) || !get_opt_bool("ecmascript.enable"))
67 return;
69 /* We do this all only once per view_state now. */
70 if (!list_empty(*queued_snippets)) {
71 /* So if we already did it, we shouldn't need to do it again.
72 * This is the case of moving around in history - we have all
73 * what happenned recorded in the view_state and needn't bother
74 * again. */
75 #ifdef CONFIG_DEBUG
76 /* Hopefully. */
77 struct string_list_item *iterator = queued_snippets->next;
79 while (iterator != (struct string_list_item *) queued_snippets) {
80 if (doc_current == (struct string_list_item *) doc_snippets) {
81 INTERNAL("add_snippets(): doc_snippets shorter than queued_snippets!");
82 return;
84 #if 0
85 DBG("Comparing snippets\n%.*s\n###### vs #####\n%.*s\n #####",
86 iterator->string.length, iterator->string.source,
87 doc_current->string.length, doc_current->string.source);
88 #endif
89 assert(!strlcmp(iterator->string.source,
90 iterator->string.length,
91 doc_current->string.source,
92 doc_current->string.length));
94 doc_current = doc_current->next;
95 iterator = iterator->next;
97 #endif
98 return;
101 assert(doc_current);
102 for (; doc_current != (struct string_list_item *) doc_snippets;
103 doc_current = doc_current->next) {
104 add_to_string_list(queued_snippets, doc_current->string.source,
105 doc_current->string.length);
106 #if 0
107 DBG("Adding snippet\n%.*s\n #####",
108 doc_current->string.length,
109 doc_current->string.source);
110 #endif
114 static void
115 process_snippets(struct ecmascript_interpreter *interpreter,
116 LIST_OF(struct string_list_item) *snippets,
117 struct string_list_item **current)
119 if (!*current)
120 *current = snippets->next;
121 for (; *current != (struct string_list_item *) snippets;
122 (*current) = (*current)->next) {
123 struct string *string = &(*current)->string;
124 unsigned char *uristring;
125 struct uri *uri;
126 struct cache_entry *cached;
127 struct fragment *fragment;
129 if (string->length == 0)
130 continue;
132 if (*string->source != '^') {
133 /* Evaluate <script>code</script> snippet */
134 ecmascript_eval(interpreter, string, NULL);
135 continue;
138 /* Eval external <script src="reference"></script> snippet */
139 uristring = string->source + 1;
140 if (!*uristring) continue;
142 uri = get_uri(uristring, URI_BASE);
143 if (!uri) continue;
145 cached = get_redirected_cache_entry(uri);
146 done_uri(uri);
148 if (!cached) {
149 /* At this time (!gradual_rerendering), we should've
150 * already retrieved this though. So it must've been
151 * that it went away because unused and the cache was
152 * already too full. */
153 #if 0
154 /* Disabled because gradual rerendering can be triggered
155 * by numerous events other than a ecmascript reference
156 * completing like the original document and CSS. Problem
157 * is that we should never continue this loop but rather
158 * break out if that is the case. Somehow we need to
159 * be able to derive URI loading problems at this point
160 * or maybe remove reference snippets if they fail to load.
162 * This FIFO queue handling should be used for also CSS
163 * imports so it would be cool if it could be general
164 * enough for that. Using it for frames with the FIFOing
165 * disabled probably wouldn't hurt either.
167 * To top this thing off it would be nice if it also
168 * handled dependency tracking between references so that
169 * CSS documents will not disappear from the cache
170 * before all referencing HTML documents has been deleted
171 * from it.
173 * Reported as bug 533. */
174 /* Pasky's explanation: If we get the doc in a single
175 * shot, before calling draw_formatted() we didn't have
176 * anything additional queued for loading and the cache
177 * entry was already loaded, so we didn't get
178 * gradual_loading set. But then while parsing the
179 * document we got some external references and trying
180 * to process them right now. Boom.
182 * The obvious solution would be to always call
183 * draw_formatted() with gradual_loading in
184 * doc_loading_callback() and if we are sure the
185 * loading is really over, call it one more time
186 * without gradual_loading set. I'm not sure about
187 * the implications though so I won't do it before
188 * 0.10.0. --pasky */
189 ERROR("The script of %s was lost in too full a cache!",
190 uristring);
191 #endif
192 continue;
195 fragment = get_cache_fragment(cached);
196 if (fragment) {
197 struct string code = INIT_STRING(fragment->data, fragment->length);
199 ecmascript_eval(interpreter, &code, NULL);
203 #endif
205 static void
206 render_encoded_document(struct cache_entry *cached, struct document *document)
208 struct uri *uri = cached->uri;
209 enum stream_encoding encoding = ENCODING_NONE;
210 struct fragment *fragment = get_cache_fragment(cached);
211 struct string buffer = INIT_STRING("", 0);
213 /* Even empty documents have to be rendered so that info in the protocol
214 * header, such as refresh info, get processed. (bug 625) */
215 if (fragment) {
216 buffer.source = fragment->data;
217 buffer.length = fragment->length;
220 if (uri->protocol != PROTOCOL_FILE) {
221 unsigned char *extension = get_extension_from_uri(uri);
223 if (extension) {
224 encoding = guess_encoding(extension);
225 mem_free(extension);
228 if (encoding != ENCODING_NONE) {
229 int length = 0;
230 unsigned char *source;
232 source = decode_encoded_buffer(encoding, buffer.source,
233 buffer.length, &length);
234 if (source) {
235 buffer.source = source;
236 buffer.length = length;
237 } else {
238 encoding = ENCODING_NONE;
243 if (document->options.plain) {
244 #ifdef CONFIG_DOM
245 if (cached->content_type
246 && (!c_strcasecmp("text/html", cached->content_type)
247 || !c_strcasecmp("application/xhtml+xml", cached->content_type)
248 || !c_strcasecmp("application/docbook+xml", cached->content_type)
249 || !c_strcasecmp("application/rss+xml", cached->content_type)
250 || !c_strcasecmp("application/xbel+xml", cached->content_type)
251 || !c_strcasecmp("application/x-xbel", cached->content_type)
252 || !c_strcasecmp("application/xbel", cached->content_type)))
253 render_dom_document(cached, document, &buffer);
254 else
255 #endif
256 render_plain_document(cached, document, &buffer);
258 } else {
259 #ifdef CONFIG_DOM
260 if (cached->content_type
261 && (!c_strlcasecmp("application/rss+xml", 19, cached->content_type, -1)))
262 render_dom_document(cached, document, &buffer);
263 else
264 #endif
265 render_html_document(cached, document, &buffer);
268 if (encoding != ENCODING_NONE) {
269 done_string(&buffer);
273 void
274 render_document(struct view_state *vs, struct document_view *doc_view,
275 struct document_options *options)
277 unsigned char *name;
278 struct document *document;
279 struct cache_entry *cached;
281 assert(vs && doc_view && options);
282 if_assert_failed return;
284 #if 0
285 DBG("(Re%u)Rendering %s on doc_view %p [%s] while attaching it to %p",
286 options->gradual_rerendering, struri(vs->uri),
287 doc_view, doc_view->name, vs);
288 #endif
290 name = doc_view->name;
291 doc_view->name = NULL;
293 detach_formatted(doc_view);
295 doc_view->name = name;
296 doc_view->vs = vs;
297 doc_view->last_x = doc_view->last_y = -1;
299 #if 0
300 /* This is a nice idea, but doesn't always work: in particular when
301 * there's a frame name conflict. You loaded something to the vs'
302 * frame, but later something tried to get loaded to a frame with
303 * the same name and we got back this frame again, so we are now
304 * overriding the original document with a cuckoo. This assert()ion
305 * should be re-enabled when we start to get this right (which is
306 * very complex, but someone should rewrite the frames support
307 * anyway). --pasky */
308 assert(!vs->doc_view);
309 #else
310 if (vs->doc_view) {
311 /* It will be still detached, no worries - hopefully it still
312 * resides in ses->scrn_frames. */
313 assert(vs->doc_view->vs == vs);
314 vs->doc_view->used = 0; /* A bit risky, but... */
315 vs->doc_view->vs = NULL;
316 vs->doc_view = NULL;
317 #ifdef CONFIG_ECMASCRIPT
318 vs->ecmascript_fragile = 1; /* And is this good? ;-) */
319 #endif
321 #endif
322 vs->doc_view = doc_view;
324 cached = find_in_cache(vs->uri);
325 if (!cached) {
326 INTERNAL("document %s to format not found", struri(vs->uri));
327 return;
330 document = get_cached_document(cached, options);
331 if (document) {
332 doc_view->document = document;
333 } else {
334 document = init_document(cached, options);
335 if (!document) return;
336 doc_view->document = document;
338 if (doc_view->session
339 && doc_view->session->reloadlevel > CACHE_MODE_NORMAL)
340 for (; vs->form_info_len > 0; vs->form_info_len--)
341 done_form_state(&vs->form_info[vs->form_info_len - 1]);
343 shrink_memory(0);
345 render_encoded_document(cached, document);
346 sort_links(document);
347 if (!document->title) {
348 enum uri_component components;
350 if (document->uri->protocol == PROTOCOL_FILE) {
351 components = URI_PATH;
352 } else {
353 components = URI_PUBLIC;
356 document->title = get_uri_string(document->uri, components);
357 if (document->title) {
358 #ifdef CONFIG_UTF8
359 if (doc_view->document->options.utf8)
360 decode_uri(document->title);
361 else
362 #endif /* CONFIG_UTF8 */
363 decode_uri_for_display(document->title);
367 #ifdef CONFIG_CSS
368 document->css_magic = get_document_css_magic(document);
369 #endif
371 #ifdef CONFIG_ECMASCRIPT
372 if (!vs->ecmascript_fragile)
373 assert(vs->ecmascript);
374 if (!options->gradual_rerendering) {
375 /* We also reset the state if the underlying document changed
376 * from the last time we did the snippets. This may be
377 * triggered i.e. when redrawing a document which has been
378 * reloaded in a different tab meanwhile (OTOH we don't want
379 * to reset the state if we are redrawing a document we have
380 * already drawn before).
382 * (vs->ecmascript->onload_snippets_owner) check may be
383 * superfluous since we should always have
384 * vs->ecmascript_fragile set in those cases; that's why we
385 * don't ever bother to re-zero it if we are suddenly doing
386 * gradual rendering again.
388 * XXX: What happens if a document is still loading in the
389 * other tab when we press ^L here? */
390 if (vs->ecmascript_fragile
391 || (vs->ecmascript
392 && vs->ecmascript->onload_snippets_cache_id
393 && document->cache_id != vs->ecmascript->onload_snippets_cache_id))
394 ecmascript_reset_state(vs);
395 assert(vs->ecmascript);
396 vs->ecmascript->onload_snippets_cache_id = document->cache_id;
398 /* Passing of the onload_snippets pointers gives *_snippets()
399 * some feeling of universality, shall we ever get any other
400 * snippets (?). */
401 add_snippets(vs->ecmascript,
402 &document->onload_snippets,
403 &vs->ecmascript->onload_snippets);
404 process_snippets(vs->ecmascript, &vs->ecmascript->onload_snippets,
405 &vs->ecmascript->current_onload_snippet);
407 #endif
409 /* If we do not care about the height and width of the document
410 * just use the setup values. */
412 copy_box(&doc_view->box, &document->options.box);
414 if (!document->options.needs_width)
415 doc_view->box.width = options->box.width;
417 if (!document->options.needs_height)
418 doc_view->box.height = options->box.height;
422 void
423 render_document_frames(struct session *ses, int no_cache)
425 struct document_options doc_opts;
426 struct document_view *doc_view;
427 struct document_view *current_doc_view = NULL;
428 struct view_state *vs = NULL;
430 if (!ses->doc_view) {
431 ses->doc_view = mem_calloc(1, sizeof(*ses->doc_view));
432 if (!ses->doc_view) return;
433 ses->doc_view->session = ses;
434 ses->doc_view->search_word = &ses->search_word;
437 if (have_location(ses)) vs = &cur_loc(ses)->vs;
439 init_document_options(&doc_opts);
441 set_box(&doc_opts.box, 0, 0,
442 ses->tab->term->width, ses->tab->term->height);
444 if (ses->status.show_title_bar) {
445 doc_opts.box.y++;
446 doc_opts.box.height--;
448 if (ses->status.show_status_bar) doc_opts.box.height--;
449 if (ses->status.show_tabs_bar) {
450 doc_opts.box.height--;
451 if (ses->status.show_tabs_bar_at_top) doc_opts.box.y++;
454 doc_opts.color_mode = get_opt_int_tree(ses->tab->term->spec, "colors");
455 if (!get_opt_bool_tree(ses->tab->term->spec, "underline"))
456 doc_opts.color_flags |= COLOR_ENHANCE_UNDERLINE;
458 doc_opts.cp = get_terminal_codepage(ses->tab->term);
459 doc_opts.no_cache = no_cache & 1;
460 doc_opts.gradual_rerendering = !!(no_cache & 2);
462 if (vs) {
463 if (vs->plain < 0) vs->plain = 0;
464 doc_opts.plain = vs->plain;
465 doc_opts.wrap = vs->wrap;
466 } else {
467 doc_opts.plain = 1;
470 foreach (doc_view, ses->scrn_frames) doc_view->used = 0;
472 if (vs) render_document(vs, ses->doc_view, &doc_opts);
474 if (document_has_frames(ses->doc_view->document)) {
475 current_doc_view = current_frame(ses);
476 format_frames(ses, ses->doc_view->document->frame_desc, &doc_opts, 0);
479 foreach (doc_view, ses->scrn_frames) {
480 struct document_view *prev_doc_view = doc_view->prev;
482 if (doc_view->used) continue;
484 detach_formatted(doc_view);
485 del_from_list(doc_view);
486 mem_free(doc_view);
487 doc_view = prev_doc_view;
490 if (current_doc_view) {
491 int n = 0;
493 foreach (doc_view, ses->scrn_frames) {
494 if (document_has_frames(doc_view->document)) continue;
495 if (doc_view == current_doc_view) {
496 cur_loc(ses)->vs.current_link = n;
497 break;
499 n++;
504 /* comparison function for qsort() */
505 static int
506 comp_links(const void *v1, const void *v2)
508 const struct link *l1 = v1, *l2 = v2;
510 assert(l1 && l2);
511 if_assert_failed return 0;
512 return (l1->number - l2->number);
515 void
516 sort_links(struct document *document)
518 int i;
520 assert(document);
521 if_assert_failed return;
522 if (!document->nlinks) return;
524 if (document->links_sorted) return;
525 assert(document->links);
526 if_assert_failed return;
528 qsort(document->links, document->nlinks, sizeof(*document->links),
529 comp_links);
531 if (!document->height) return;
533 mem_free_if(document->lines1);
534 document->lines1 = mem_calloc(document->height, sizeof(*document->lines1));
535 mem_free_if(document->lines2);
536 if (!document->lines1) return;
537 document->lines2 = mem_calloc(document->height, sizeof(*document->lines2));
538 if (!document->lines2) {
539 mem_free(document->lines1);
540 return;
543 for (i = 0; i < document->nlinks; i++) {
544 struct link *link = &document->links[i];
545 int p, q, j;
547 if (!link->npoints) {
548 done_link_members(link);
549 memmove(link, link + 1,
550 (document->nlinks - i - 1) * sizeof(*link));
551 document->nlinks--;
552 i--;
553 continue;
555 p = link->points[0].y;
556 q = link->points[link->npoints - 1].y;
557 if (p > q) j = p, p = q, q = j;
558 for (j = p; j <= q; j++) {
559 assertm(j < document->height, "link out of screen");
560 if_assert_failed continue;
561 document->lines2[j] = &document->links[i];
562 if (!document->lines1[j])
563 document->lines1[j] = &document->links[i];
566 document->links_sorted = 1;
569 struct conv_table *
570 get_convert_table(unsigned char *head, int to_cp,
571 int default_cp, int *from_cp,
572 enum cp_status *cp_status, int ignore_server_cp)
574 unsigned char *part = head;
575 int cp_index = -1;
577 assert(head);
578 if_assert_failed return NULL;
580 if (ignore_server_cp) {
581 if (cp_status) *cp_status = CP_STATUS_IGNORED;
582 if (from_cp) *from_cp = default_cp;
583 return get_translation_table(default_cp, to_cp);
586 while (cp_index == -1) {
587 unsigned char *ct_charset;
588 unsigned char *meta;
589 unsigned char *a = parse_header(part, "Content-Type", &part);
591 if (!a) break;
592 /* Content type info from document meta header.
593 * scan_http_equiv() appends the meta stuff to the protocol header before
594 * this function is called. Last Content-Type header field is used. */
596 while ((meta = parse_header(part, "Content-Type", &part))) {
597 mem_free_set(&a, meta);
600 parse_header_param(a, "charset", &ct_charset);
601 if (ct_charset) {
602 cp_index = get_cp_index(ct_charset);
603 mem_free(ct_charset);
605 mem_free(a);
608 if (cp_index == -1) {
609 unsigned char *a = parse_header(head, "Content-Charset", NULL);
611 if (a) {
612 cp_index = get_cp_index(a);
613 mem_free(a);
617 if (cp_index == -1) {
618 unsigned char *a = parse_header(head, "Charset", NULL);
620 if (a) {
621 cp_index = get_cp_index(a);
622 mem_free(a);
626 if (cp_index == -1) {
627 cp_index = default_cp;
628 if (cp_status) *cp_status = CP_STATUS_ASSUMED;
629 } else {
630 if (cp_status) *cp_status = CP_STATUS_SERVER;
633 if (from_cp) *from_cp = cp_index;
635 return get_translation_table(cp_index, to_cp);