Added support for label_key in references of dumps.
[elinks.git] / src / viewer / dump / dump.c
blob0958f070b6f3a172e73ef8496504d0077c3b3693
1 /* Support for dumping to the file on startup (w/o bfu) */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <errno.h>
8 #include <stdio.h>
9 #include <string.h>
10 #include <sys/types.h> /* NetBSD flavour */
11 #ifdef HAVE_SYS_SIGNAL_H
12 #include <sys/signal.h>
13 #endif
14 #ifdef HAVE_FCNTL_H
15 #include <fcntl.h> /* OS/2 needs this after sys/types.h */
16 #endif
17 #ifdef HAVE_UNISTD_H
18 #include <unistd.h>
19 #endif
21 #include "elinks.h"
23 #include "cache/cache.h"
24 #include "config/options.h"
25 #include "document/document.h"
26 #include "document/html/renderer.h"
27 #include "document/options.h"
28 #include "document/renderer.h"
29 #include "document/view.h"
30 #include "intl/charsets.h"
31 #include "intl/gettext/libintl.h"
32 #include "main/select.h"
33 #include "main/main.h"
34 #include "network/connection.h"
35 #include "network/state.h"
36 #include "osdep/ascii.h"
37 #include "osdep/osdep.h"
38 #include "protocol/protocol.h"
39 #include "protocol/uri.h"
40 #include "session/download.h"
41 #include "terminal/color.h"
42 #include "terminal/hardio.h"
43 #include "terminal/terminal.h"
44 #include "util/memory.h"
45 #include "util/string.h"
46 #include "viewer/dump/dump.h"
47 #include "viewer/text/view.h"
48 #include "viewer/text/vs.h"
51 static int dump_pos;
52 static struct download dump_download;
53 static int dump_redir_count = 0;
55 #define D_BUF 65536
57 #define FRAME_CHARS_BEGIN 0xB0
58 #define FRAME_CHARS_END 0xE0
60 /** A place where dumping functions write their output. The data
61 * first goes to the buffer in this structure. When the buffer is
62 * full enough, it is flushed to a file descriptor or to a string. */
63 struct dump_output {
64 /** How many bytes are in #buf already. */
65 size_t bufpos;
67 /** A string to which the buffer should eventually be flushed,
68 * or NULL. */
69 struct string *string;
71 /** A file descriptor to which the buffer should eventually be
72 * flushed, or -1. */
73 int fd;
75 /** Mapping of SCREEN_ATTR_FRAME characters. If the target
76 * codepage is UTF-8 (which is possible only if CONFIG_UTF8 is
77 * defined), then the values are UTF-32. Otherwise, they are
78 * in the target codepage, even though the type may still be
79 * unicode_val_T. */
80 #ifdef CONFIG_UTF8
81 unicode_val_T frame[FRAME_CHARS_END - FRAME_CHARS_BEGIN];
82 #else
83 unsigned char frame[FRAME_CHARS_END - FRAME_CHARS_BEGIN];
84 #endif
86 /** Bytes waiting to be flushed. */
87 unsigned char buf[D_BUF];
90 /** Mapping from CP437 box-drawing characters to simpler CP437 characters.
91 * - Map mixed light/double lines to light lines or double lines,
92 * depending on the majority; or to light lines if even.
93 * - Map double lines to light lines.
94 * - Map light and dark shades to medium, then to full blocks.
95 * - Map half blocks to full blocks.
96 * - Otherwise map to ASCII characters. */
97 static const unsigned char frame_simplify[FRAME_CHARS_END - FRAME_CHARS_BEGIN]
98 = {
99 /*-0 -1 -2 -3 -4 -5 -6 -7 */
100 /*-8 -9 -A -B -C -D -E -F */
101 0xB1, 0xDB, 0xB1, '|' , '+' , 0xB4, 0xB9, 0xBF, /* 0xB0...0xB7 */
102 0xC5, 0xB4, 0xB3, 0xBF, 0xD9, 0xD9, 0xD9, '+' , /* 0xB8...0xBF */
103 '+' , '+' , '+' , '+' , '-' , '+' , 0xC3, 0xCC, /* 0xC0...0xC7 */
104 0xC0, 0xDA, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xCA, /* 0xC8...0xCF */
105 0xC1, 0xCB, 0xC2, 0xC0, 0xC0, 0xDA, 0xDA, 0xC5, /* 0xD0...0xD7 */
106 0xC5, '+' , '+' , '#' , 0xDB, 0xDB, 0xDB, 0xDB /* 0xD8...0xDF */
109 /** Initialize dump_output::frame for the specified codepage.
111 * If the codepage does not support all the box-drawing characters
112 * of CP437, then map them to simpler characters, according to
113 * frame_simplify.
115 * @relates dump_output */
116 static void
117 dump_output_prepare_frame(struct dump_output *out, int to_cp)
119 const int cp437 = get_cp_index("cp437");
120 int orig;
121 unsigned char subst;
123 #ifdef CONFIG_UTF8
124 if (is_cp_utf8(to_cp)) {
125 for (orig = FRAME_CHARS_BEGIN; orig < FRAME_CHARS_END; orig++)
126 out->frame[orig - FRAME_CHARS_BEGIN]
127 = cp2u(cp437, orig);
128 return;
130 #endif /* CONFIG_UTF8 */
132 for (orig = FRAME_CHARS_BEGIN; orig < FRAME_CHARS_END; orig++) {
133 for (subst = orig;
134 subst >= FRAME_CHARS_BEGIN && subst < FRAME_CHARS_END;
135 subst = frame_simplify[subst - FRAME_CHARS_BEGIN]) {
136 unicode_val_T ucs = cp2u(cp437, subst);
137 const unsigned char *result = u2cp_no_nbsp(ucs, to_cp);
139 if (result && cp2u(to_cp, result[0]) == ucs
140 && !result[1]) {
141 subst = result[0];
142 break;
144 /* Otherwise, the mapping from ucs to to_cp
145 * was not accurate, and this loop will try
146 * a simpler character. */
148 out->frame[orig - FRAME_CHARS_BEGIN] = subst;
152 /** Allocate and initialize a struct dump_output.
153 * The caller should eventually free the structure with mem_free().
155 * @param fd
156 * The file descriptor to which the output will be written.
157 * Use -1 if the output should go to a string instead.
159 * @param string
160 * The string to which the output will be appended.
161 * Use NULL if the output should go to a file descriptor instead.
163 * @param cp
164 * The codepage of the dump. It need not match the codepage
165 * of the document.
167 * @return The new structure, or NULL on error.
169 * @relates dump_output */
170 static struct dump_output *
171 dump_output_alloc(int fd, struct string *string, int cp)
173 struct dump_output *out;
175 assert((fd == -1) ^ (string == NULL));
176 if_assert_failed return NULL;
178 out = mem_alloc(sizeof(*out));
179 if (out) {
180 out->fd = fd;
181 out->string = string;
182 out->bufpos = 0;
184 dump_output_prepare_frame(out, cp);
186 return out;
189 /** Flush buffered output to the file or string.
191 * @return 0 on success, or -1 on error.
193 * @post If this succeeds, then out->bufpos == 0, so that the buffer
194 * has room for more data.
196 * @relates dump_output */
197 static int
198 dump_output_flush(struct dump_output *out)
200 if (out->string) {
201 if (!add_bytes_to_string(out->string, out->buf, out->bufpos))
202 return -1;
204 else {
205 if (hard_write(out->fd, out->buf, out->bufpos) != out->bufpos)
206 return -1;
209 out->bufpos = 0;
210 return 0;
213 static int
214 write_char(unsigned char c, struct dump_output *out)
216 if (out->bufpos >= D_BUF) {
217 if (dump_output_flush(out))
218 return -1;
221 out->buf[out->bufpos++] = c;
222 return 0;
225 static int
226 write_color_16(unsigned char color, struct dump_output *out)
228 unsigned char bufor[] = "\033[0;30;40m";
229 unsigned char *data = bufor;
230 int background = (color >> 4) & 7;
231 int foreground = color & 7;
233 bufor[5] += foreground;
234 if (background) bufor[8] += background;
235 else {
236 bufor[6] = 'm';
237 bufor[7] = '\0';
239 while(*data) {
240 if (write_char(*data++, out)) return -1;
242 return 0;
245 #define DUMP_COLOR_MODE_16
246 #define DUMP_FUNCTION_COLOR dump_16color
247 #define DUMP_FUNCTION_UTF8 dump_16color_utf8
248 #define DUMP_FUNCTION_UNIBYTE dump_16color_unibyte
249 #include "dump-color-mode.h"
250 #undef DUMP_COLOR_MODE_16
251 #undef DUMP_FUNCTION_COLOR
252 #undef DUMP_FUNCTION_UTF8
253 #undef DUMP_FUNCTION_UNIBYTE
255 /* configure --enable-debug uses gcc -Wall -Werror, and -Wall includes
256 * -Wunused-function, so declaring or defining any unused function
257 * would break the build. */
258 #if defined(CONFIG_88_COLORS) || defined(CONFIG_256_COLORS)
260 static int
261 write_color_256(const unsigned char *str, unsigned char color,
262 struct dump_output *out)
264 unsigned char bufor[16];
265 unsigned char *data = bufor;
267 snprintf(bufor, 16, "\033[%s;5;%dm", str, color);
268 while(*data) {
269 if (write_char(*data++, out)) return -1;
271 return 0;
274 #define DUMP_COLOR_MODE_256
275 #define DUMP_FUNCTION_COLOR dump_256color
276 #define DUMP_FUNCTION_UTF8 dump_256color_utf8
277 #define DUMP_FUNCTION_UNIBYTE dump_256color_unibyte
278 #include "dump-color-mode.h"
279 #undef DUMP_COLOR_MODE_256
280 #undef DUMP_FUNCTION_COLOR
281 #undef DUMP_FUNCTION_UTF8
282 #undef DUMP_FUNCTION_UNIBYTE
284 #endif /* defined(CONFIG_88_COLORS) || defined(CONFIG_256_COLORS) */
286 #ifdef CONFIG_TRUE_COLOR
288 static int
289 write_true_color(const unsigned char *str, const unsigned char *color,
290 struct dump_output *out)
292 unsigned char bufor[24];
293 unsigned char *data = bufor;
295 snprintf(bufor, 24, "\033[%s;2;%d;%d;%dm", str, color[0], color[1], color[2]);
296 while(*data) {
297 if (write_char(*data++, out)) return -1;
299 return 0;
302 #define DUMP_COLOR_MODE_TRUE
303 #define DUMP_FUNCTION_COLOR dump_truecolor
304 #define DUMP_FUNCTION_UTF8 dump_truecolor_utf8
305 #define DUMP_FUNCTION_UNIBYTE dump_truecolor_unibyte
306 #include "dump-color-mode.h"
307 #undef DUMP_COLOR_MODE_TRUE
308 #undef DUMP_FUNCTION_COLOR
309 #undef DUMP_FUNCTION_UTF8
310 #undef DUMP_FUNCTION_UNIBYTE
312 #endif /* CONFIG_TRUE_COLOR */
314 #define DUMP_COLOR_MODE_NONE
315 #define DUMP_FUNCTION_COLOR dump_nocolor
316 #define DUMP_FUNCTION_UTF8 dump_nocolor_utf8
317 #define DUMP_FUNCTION_UNIBYTE dump_nocolor_unibyte
318 #include "dump-color-mode.h"
319 #undef DUMP_COLOR_MODE_NONE
320 #undef DUMP_FUNCTION_COLOR
321 #undef DUMP_FUNCTION_UTF8
322 #undef DUMP_FUNCTION_UNIBYTE
324 /*! @return 0 on success, -1 on error */
325 static int
326 dump_references(struct document *document, int fd, unsigned char buf[D_BUF])
328 if (document->nlinks
329 && get_opt_bool("document.dump.references", NULL)) {
330 unsigned char key_sym[64] = {0};
331 int x;
332 unsigned char *header = "\nReferences\n\n Visible links\n";
333 const unsigned char *label_key = get_opt_str("document.browse.links.label_key", NULL);
334 int headlen = strlen(header);
335 int base = strlen(label_key);
337 if (hard_write(fd, header, headlen) != headlen)
338 return -1;
340 for (x = 0; x < document->nlinks; x++) {
341 struct link *link = &document->links[x];
342 unsigned char *where = link->where;
343 size_t reflen;
345 if (!where) continue;
347 if (document->options.links_numbering) {
349 dec2qwerty(x + 1, key_sym, label_key, base);
351 if (link->title && *link->title)
352 snprintf(buf, D_BUF, "%4s. %s\n\t%s\n",
353 key_sym, link->title, where);
354 else
355 snprintf(buf, D_BUF, "%4s. %s\n",
356 key_sym, where);
357 } else {
358 if (link->title && *link->title)
359 snprintf(buf, D_BUF, " . %s\n\t%s\n",
360 link->title, where);
361 else
362 snprintf(buf, D_BUF, " . %s\n", where);
365 reflen = strlen(buf);
366 if (hard_write(fd, buf, reflen) != reflen)
367 return -1;
371 return 0;
375 dump_to_file(struct document *document, int fd)
377 struct dump_output *out = dump_output_alloc(fd, NULL,
378 document->options.cp);
379 int error;
381 if (!out) return -1;
383 error = dump_nocolor(document, out);
384 if (!error)
385 error = dump_references(document, fd, out->buf);
387 mem_free(out);
388 return error;
391 /* This dumps the given @cached's formatted output onto @fd. */
392 static void
393 dump_formatted(int fd, struct download *download, struct cache_entry *cached)
395 struct document_options o;
396 struct document_view formatted;
397 struct view_state vs;
398 int width;
399 struct dump_output *out;
401 if (!cached) return;
403 memset(&formatted, 0, sizeof(formatted));
405 init_document_options(NULL, &o);
406 width = get_opt_int("document.dump.width", NULL);
407 set_box(&o.box, 0, 1, width, DEFAULT_TERMINAL_HEIGHT);
409 o.cp = get_opt_codepage("document.dump.codepage", NULL);
410 o.color_mode = get_opt_int("document.dump.color_mode", NULL);
411 o.plain = 0;
412 o.frames = 0;
413 o.links_numbering = get_opt_bool("document.dump.numbering", NULL);
415 init_vs(&vs, cached->uri, -1);
417 render_document(&vs, &formatted, &o);
419 out = dump_output_alloc(fd, NULL, o.cp);
420 if (out) {
421 int error;
423 switch (o.color_mode) {
424 case COLOR_MODE_DUMP:
425 case COLOR_MODE_MONO: /* FIXME: inversion */
426 error = dump_nocolor(formatted.document, out);
427 break;
429 default:
430 /* If the desired color mode was not compiled in,
431 * use 16 colors. */
432 case COLOR_MODE_16:
433 error = dump_16color(formatted.document, out);
434 break;
436 #ifdef CONFIG_88_COLORS
437 case COLOR_MODE_88:
438 error = dump_256color(formatted.document, out);
439 break;
440 #endif
442 #ifdef CONFIG_256_COLORS
443 case COLOR_MODE_256:
444 error = dump_256color(formatted.document, out);
445 break;
446 #endif
448 #ifdef CONFIG_TRUE_COLOR
449 case COLOR_MODE_TRUE_COLOR:
450 error = dump_truecolor(formatted.document, out);
451 break;
452 #endif
455 if (!error)
456 dump_references(formatted.document, fd, out->buf);
458 mem_free(out);
459 } /* if out */
461 detach_formatted(&formatted);
462 destroy_vs(&vs, 1);
465 #undef D_BUF
467 /* This dumps the given @cached's source onto @fd nothing more. It returns 0 if it
468 * all went fine and 1 if something isn't quite right and we should terminate
469 * ourselves ASAP. */
470 static int
471 dump_source(int fd, struct download *download, struct cache_entry *cached)
473 struct fragment *frag;
475 if (!cached) return 0;
477 nextfrag:
478 foreach (frag, cached->frag) {
479 int d = dump_pos - frag->offset;
480 int l, w;
482 if (d < 0 || frag->length <= d)
483 continue;
485 l = frag->length - d;
486 w = hard_write(fd, frag->data + d, l);
488 if (w != l) {
489 detach_connection(download, dump_pos);
491 if (w < 0)
492 ERROR(gettext("Can't write to stdout: %s"),
493 (unsigned char *) strerror(errno));
494 else
495 ERROR(gettext("Can't write to stdout."));
497 program.retval = RET_ERROR;
498 return 1;
501 dump_pos += w;
502 detach_connection(download, dump_pos);
503 goto nextfrag;
506 return 0;
509 static unsigned char *
510 subst_url(unsigned char *str, struct string *url)
512 struct string string;
514 if (!init_string(&string)) return NULL;
516 while (*str) {
517 int p;
519 for (p = 0; str[p] && str[p] != '%' && str[p] != '\\'; p++);
521 add_bytes_to_string(&string, str, p);
522 str += p;
524 if (*str == '\\') {
525 unsigned char ch;
527 str++;
528 switch (*str) {
529 case 'f':
530 ch = '\f';
531 break;
532 case 'n':
533 ch = '\n';
534 break;
535 case 't':
536 ch = '\t';
537 break;
538 default:
539 ch = *str;
541 if (*str) {
542 add_char_to_string(&string, ch);
543 str++;
545 continue;
547 } else if (*str != '%') {
548 break;
551 str++;
552 switch (*str) {
553 case 'u':
554 if (url) add_string_to_string(&string, url);
555 break;
558 if (*str) str++;
561 return string.source;
564 static void
565 dump_print(unsigned char *option, struct string *url)
567 unsigned char *str = get_opt_str(option, NULL);
569 if (str) {
570 unsigned char *realstr = subst_url(str, url);
572 if (realstr) {
573 printf("%s", realstr);
574 fflush(stdout);
575 mem_free(realstr);
580 static void
581 dump_loading_callback(struct download *download, void *p)
583 struct cache_entry *cached = download->cached;
584 int fd = get_output_handle();
586 if (fd == -1) return;
587 if (cached && cached->redirect && dump_redir_count++ < MAX_REDIRECTS) {
588 struct uri *uri = cached->redirect;
590 cancel_download(download, 0);
592 load_uri(uri, cached->uri, download, PRI_MAIN, 0, -1);
593 return;
596 if (is_in_queued_state(download->state)) return;
598 if (get_cmd_opt_bool("dump")) {
599 if (is_in_transfering_state(download->state))
600 return;
602 dump_formatted(fd, download, cached);
604 } else {
605 if (dump_source(fd, download, cached) > 0)
606 goto terminate;
608 if (is_in_progress_state(download->state))
609 return;
613 if (!is_in_state(download->state, S_OK)) {
614 usrerror(get_state_message(download->state, NULL));
615 program.retval = RET_ERROR;
616 goto terminate;
619 terminate:
620 program.terminate = 1;
621 dump_next(NULL);
624 static void
625 dump_start(unsigned char *url)
627 unsigned char *wd = get_cwd();
628 struct uri *uri = get_translated_uri(url, wd);
630 mem_free_if(wd);
632 if (!uri || get_protocol_external_handler(NULL, uri)) {
633 usrerror(gettext("URL protocol not supported (%s)."), url);
634 goto terminate;
637 dump_download.callback = (download_callback_T *) dump_loading_callback;
638 dump_pos = 0;
640 if (load_uri(uri, NULL, &dump_download, PRI_MAIN, 0, -1)) {
641 terminate:
642 dump_next(NULL);
643 program.terminate = 1;
644 program.retval = RET_SYNTAX;
647 if (uri) done_uri(uri);
650 void
651 dump_next(LIST_OF(struct string_list_item) *url_list)
653 static INIT_LIST_OF(struct string_list_item, todo_list);
654 static INIT_LIST_OF(struct string_list_item, done_list);
655 struct string_list_item *item;
657 if (url_list) {
658 /* Steal all them nice list items but keep the same order */
659 while (!list_empty(*url_list)) {
660 item = url_list->next;
661 del_from_list(item);
662 add_to_list_end(todo_list, item);
666 /* Dump each url list item one at a time */
667 if (!list_empty(todo_list)) {
668 static int first = 1;
670 program.terminate = 0;
672 item = todo_list.next;
673 del_from_list(item);
674 add_to_list(done_list, item);
676 if (!first) {
677 dump_print("document.dump.separator", NULL);
678 } else {
679 first = 0;
682 dump_print("document.dump.header", &item->string);
683 dump_start(item->string.source);
684 /* XXX: I think it ought to print footer at the end of
685 * the whole dump (not only this file). Testing required.
686 * --pasky */
687 dump_print("document.dump.footer", &item->string);
689 } else {
690 free_string_list(&done_list);
691 program.terminate = 1;
695 struct string *
696 add_document_to_string(struct string *string, struct document *document)
698 struct dump_output *out;
699 int error;
701 assert(string && document);
702 if_assert_failed return NULL;
704 out = dump_output_alloc(-1, string, document->options.cp);
705 if (!out) return NULL;
707 error = dump_nocolor(document, out);
709 mem_free(out);
710 return error ? NULL : string;