Backport Pasky's changes concerning text_style-related stuff.
[elinks.git] / src / document / plain / renderer.c
blob99cfbf05b75a50a3541b4b03d4be82ba69b2fc0c
1 /* Plain text document renderer */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <ctype.h>
8 #include <string.h>
10 #include "elinks.h"
12 #include "bookmarks/bookmarks.h"
13 #include "cache/cache.h"
14 #include "config/options.h"
15 #include "document/docdata.h"
16 #include "document/document.h"
17 #include "document/format.h"
18 #include "document/options.h"
19 #include "document/plain/renderer.h"
20 #include "document/renderer.h"
21 #include "globhist/globhist.h"
22 #include "intl/charsets.h"
23 #include "protocol/protocol.h"
24 #include "protocol/uri.h"
25 #include "terminal/color.h"
26 #include "terminal/draw.h"
27 #include "util/color.h"
28 #include "util/error.h"
29 #include "util/memory.h"
30 #include "util/string.h"
33 struct plain_renderer {
34 /* The document being renderered */
35 struct document *document;
37 /* The data and data length of the defragmented cache entry */
38 unsigned char *source;
39 int length;
41 /* The convert table that should be used for converting line strings to
42 * the rendered strings. */
43 struct conv_table *convert_table;
45 /* The default template char data for text */
46 struct screen_char template;
48 /* The maximum width any line can have (used for wrapping text) */
49 int max_width;
51 /* The current line number */
52 int lineno;
54 /* Are we doing line compression */
55 unsigned int compress:1;
58 #define realloc_document_links(doc, size) \
59 ALIGN_LINK(&(doc)->links, (doc)->nlinks, size)
61 static struct screen_char *
62 realloc_line(struct document *document, int x, int y)
64 struct line *line = realloc_lines(document, y);
66 if (!line) return NULL;
68 if (x != line->length) {
69 if (!ALIGN_LINE(&line->chars, line->length, x))
70 return NULL;
72 line->length = x;
75 return line->chars;
78 static inline struct link *
79 add_document_link(struct document *document, unsigned char *uri, int length,
80 int x, int y)
82 struct link *link;
83 struct point *point;
85 if (!realloc_document_links(document, document->nlinks + 1))
86 return NULL;
88 link = &document->links[document->nlinks];
90 if (!realloc_points(link, length))
91 return NULL;
93 link->npoints = length;
94 link->type = LINK_HYPERTEXT;
95 link->where = uri;
96 link->color.background = document->options.default_style.bg;
97 link->color.foreground = document->options.default_link;
98 link->number = document->nlinks;
100 for (point = link->points; length > 0; length--, point++, x++) {
101 point->x = x;
102 point->y = y;
105 document->nlinks++;
106 document->links_sorted = 0;
107 return link;
110 /* Searches a word to find an email adress or an URI to add as a link. */
111 static inline struct link *
112 check_link_word(struct document *document, unsigned char *uri, int length,
113 int x, int y)
115 struct uri test;
116 unsigned char *where = NULL;
117 unsigned char *mailto = memchr(uri, '@', length);
118 int keep = uri[length];
119 struct link *new_link;
121 assert(document);
122 if_assert_failed return NULL;
124 uri[length] = 0;
126 if (mailto && mailto > uri && mailto - uri < length - 1) {
127 where = straconcat("mailto:", uri, (unsigned char *) NULL);
129 } else if (parse_uri(&test, uri) == URI_ERRNO_OK
130 && test.protocol != PROTOCOL_UNKNOWN
131 && (test.datalen || test.hostlen)) {
132 where = memacpy(uri, length);
135 uri[length] = keep;
137 if (!where) return NULL;
139 /* We need to reparse the URI and normalize it so that the protocol and
140 * host part are converted to lowercase. */
141 normalize_uri(NULL, where);
143 new_link = add_document_link(document, where, length, x, y);
145 if (!new_link) mem_free(where);
147 return new_link;
150 #define url_char(c) ( \
151 (c) > ' ' \
152 && (c) != '<' \
153 && (c) != '>' \
154 && (c) != '(' \
155 && (c) != ')' \
156 && !isquote(c))
158 static inline int
159 get_uri_length(unsigned char *line, int length)
161 int uri_end = 0;
163 while (uri_end < length
164 && url_char(line[uri_end]))
165 uri_end++;
167 for (; uri_end > 0; uri_end--) {
168 if (line[uri_end - 1] != '.'
169 && line[uri_end - 1] != ',')
170 break;
173 return uri_end;
176 static int
177 print_document_link(struct plain_renderer *renderer, int lineno,
178 unsigned char *line, int line_pos, int width,
179 int expanded, struct screen_char *pos, int cells)
181 struct document *document = renderer->document;
182 unsigned char *start = &line[line_pos];
183 int len = get_uri_length(start, width - line_pos);
184 int screen_column = cells + expanded;
185 struct link *new_link;
186 int link_end = line_pos + len;
187 unsigned char saved_char;
188 struct document_options *doc_opts = &document->options;
189 struct screen_char template = renderer->template;
190 int i;
192 if (!len) return 0;
194 new_link = check_link_word(document, start, len, screen_column,
195 lineno);
197 if (!new_link) return 0;
199 saved_char = line[link_end];
200 line[link_end] = '\0';
202 if (0)
203 ; /* Shut up compiler */
204 #ifdef CONFIG_GLOBHIST
205 else if (get_global_history_item(start))
206 new_link->color.foreground = doc_opts->default_vlink;
207 #endif
208 #ifdef CONFIG_BOOKMARKS
209 else if (get_bookmark(start))
210 new_link->color.foreground = doc_opts->default_bookmark_link;
211 #endif
212 else
213 new_link->color.foreground = doc_opts->default_link;
215 line[link_end] = saved_char;
217 new_link->color.background = doc_opts->default_style.bg;
219 set_term_color(&template, &new_link->color,
220 doc_opts->color_flags, doc_opts->color_mode);
222 for (i = len; i; i--) {
223 template.data = line[line_pos++];
224 copy_screen_chars(pos++, &template, 1);
227 return len;
230 static inline int
231 add_document_line(struct plain_renderer *renderer,
232 unsigned char *line, int line_width)
234 struct document *document = renderer->document;
235 struct screen_char *template = &renderer->template;
236 struct screen_char saved_renderer_template = *template;
237 struct screen_char *pos, *startpos;
238 #ifdef CONFIG_UTF8
239 int utf8 = document->options.utf8;
240 #endif /* CONFIG_UTF8 */
241 int cells = 0;
242 int lineno = renderer->lineno;
243 int expanded = 0;
244 int width = line_width;
245 int line_pos;
247 line = convert_string(renderer->convert_table, line, width,
248 document->options.cp, CSM_NONE, &width,
249 NULL, NULL);
250 if (!line) return 0;
252 /* Now expand tabs */
253 for (line_pos = 0; line_pos < width;) {
254 unsigned char line_char = line[line_pos];
255 int charlen = 1;
256 int cell = 1;
257 #ifdef CONFIG_UTF8
258 unicode_val_T data;
260 if (utf8) {
261 unsigned char *line_char2 = &line[line_pos];
262 charlen = utf8charlen(&line_char);
263 data = utf8_to_unicode(&line_char2, &line[width]);
265 if (data == UCS_NO_CHAR) {
266 line_pos += charlen;
267 continue;
270 cell = unicode_to_cell(data);
272 #endif /* CONFIG_UTF8 */
274 if (line_char == ASCII_TAB
275 && (line_pos + charlen == width
276 || line[line_pos + charlen] != ASCII_BS)) {
277 int tab_width = 7 - ((cells + expanded) & 7);
279 expanded += tab_width;
280 } else if (line_char == ASCII_BS) {
281 #if 0
282 This does not work: Suppose we have seventeen spaces
283 followed by a back-space; that will call for sixteen
284 bytes of memory, but we will print seventeen spaces
285 before we hit the back-space -- overflow!
287 /* Don't count the character
288 * that the back-space character will delete */
289 if (expanded + line_pos)
290 expanded--;
291 #endif
292 #if 0
293 /* Don't count the back-space character */
294 if (expanded > 0)
295 expanded--;
296 #endif
298 line_pos += charlen;
299 cells += cell;
302 assert(expanded >= 0);
304 startpos = pos = realloc_line(document, width + expanded, lineno);
305 if (!pos) {
306 mem_free(line);
307 return 0;
310 cells = 0;
311 expanded = 0;
312 for (line_pos = 0; line_pos < width;) {
313 unsigned char line_char = line[line_pos];
314 unsigned char next_char, prev_char;
315 int charlen = 1;
316 int cell = 1;
317 #ifdef CONFIG_UTF8
318 unicode_val_T data = UCS_NO_CHAR;
320 if (utf8) {
321 unsigned char *line_char2 = &line[line_pos];
322 charlen = utf8charlen(&line_char);
323 data = utf8_to_unicode(&line_char2, &line[width]);
325 if (data == UCS_NO_CHAR) {
326 line_pos += charlen;
327 continue;
330 cell = unicode_to_cell(data);
332 #endif /* CONFIG_UTF8 */
334 prev_char = line_pos > 0 ? line[line_pos - 1] : '\0';
335 next_char = (line_pos + charlen < width) ?
336 line[line_pos + charlen] : '\0';
338 /* Do not expand tabs that precede back-spaces; this saves the
339 * back-space code some trouble. */
340 if (line_char == ASCII_TAB && next_char != ASCII_BS) {
341 int tab_width = 7 - ((cells + expanded) & 7);
343 expanded += tab_width;
345 template->data = ' ';
347 copy_screen_chars(pos++, template, 1);
348 while (tab_width--);
350 *template = saved_renderer_template;
352 } else if (line_char == ASCII_BS) {
353 if (!(expanded + cells)) {
354 /* We've backspaced to the start of the line */
355 goto next;
357 if (pos > startpos)
358 pos--; /* Backspace */
360 /* Handle x^H_ as _^Hx, but prevent an infinite loop
361 * swapping two underscores. */
362 if (next_char == '_' && prev_char != '_') {
363 /* x^H_ becomes _^Hx */
364 if (line_pos - 1 >= 0)
365 line[line_pos - 1] = next_char;
366 if (line_pos + charlen < width)
367 line[line_pos + charlen] = prev_char;
369 /* Go back and reparse the swapped characters */
370 if (line_pos - 2 >= 0) {
371 cells--;
372 line_pos--;
374 continue;
377 if ((expanded + line_pos) - 2 >= 0) {
378 /* Don't count the backspace character or the
379 * deleted character when returning the line's
380 * width or when expanding tabs. */
381 expanded -= 2;
384 if (pos->data == '_' && next_char == '_') {
385 /* Is _^H_ an underlined underscore
386 * or an emboldened underscore? */
388 if (expanded + line_pos >= 0
389 && pos - 1 >= startpos
390 && (pos - 1)->attr) {
391 /* There is some preceding text,
392 * and it has an attribute; copy it */
393 template->attr |= (pos - 1)->attr;
394 } else {
395 /* Default to bold; seems more useful
396 * than underlining the underscore */
397 template->attr |= SCREEN_ATTR_BOLD;
400 } else if (pos->data == '_') {
401 /* Underline _^Hx */
403 template->attr |= SCREEN_ATTR_UNDERLINE;
405 } else if (pos->data == next_char) {
406 /* Embolden x^Hx */
408 template->attr |= SCREEN_ATTR_BOLD;
411 /* Handle _^Hx^Hx as both bold and underlined */
412 if (template->attr)
413 template->attr |= pos->attr;
414 } else {
415 int added_chars = 0;
417 if (document->options.plain_display_links
418 && isalpha(line_char) && isalpha(next_char)) {
419 /* We only want to check for a URI if there are
420 * at least two consecutive alphabetic
421 * characters, or if we are at the very start of
422 * the line. It improves performance a bit.
423 * --Zas */
424 added_chars = print_document_link(renderer,
425 lineno, line,
426 line_pos,
427 width,
428 expanded,
429 pos, cells);
432 if (added_chars) {
433 line_pos += added_chars - 1;
434 cells += added_chars - 1;
435 pos += added_chars;
436 } else {
437 #ifdef CONFIG_UTF8
438 if (utf8) {
439 if (data == UCS_NO_CHAR) {
440 line_pos += charlen;
441 continue;
444 template->data = (unicode_val_T)data;
445 copy_screen_chars(pos++, template, 1);
447 if (cell == 2) {
448 template->data = UCS_NO_CHAR;
449 copy_screen_chars(pos++,
450 template, 1);
452 } else
453 #endif /* CONFIG_UTF8 */
455 if (!isscreensafe(line_char))
456 line_char = '.';
457 template->data = line_char;
458 copy_screen_chars(pos++, template, 1);
460 /* Detect copy of nul chars to screen,
461 * this should not occur. --Zas */
462 assert(line_char);
466 *template = saved_renderer_template;
468 next:
469 line_pos += charlen;
470 cells += cell;
472 mem_free(line);
474 realloc_line(document, pos - startpos, lineno);
476 return width + expanded;
479 static void
480 init_template(struct screen_char *template, struct document_options *options)
482 get_screen_char_template(template, options, options->default_style);
485 static struct node *
486 add_node(struct plain_renderer *renderer, int x, int width, int height)
488 struct node *node = mem_alloc(sizeof(*node));
490 if (node) {
491 struct document *document = renderer->document;
493 set_box(&node->box, x, renderer->lineno, width, height);
495 int_lower_bound(&document->width, width);
496 int_lower_bound(&document->height, height);
498 add_to_list(document->nodes, node);
501 return node;
504 static void
505 add_document_lines(struct plain_renderer *renderer)
507 unsigned char *source = renderer->source;
508 int length = renderer->length;
509 int was_empty_line = 0;
510 int was_wrapped = 0;
511 #ifdef CONFIG_UTF8
512 int utf8 = is_cp_utf8(renderer->document->cp);
513 #endif
514 for (; length > 0; renderer->lineno++) {
515 unsigned char *xsource;
516 int width, added, only_spaces = 1, spaces = 0, was_spaces = 0;
517 int last_space = 0;
518 int tab_spaces = 0;
519 int step = 0;
520 int cells = 0;
522 /* End of line detection: We handle \r, \r\n and \n types. */
523 for (width = 0; (width < length) &&
524 (cells < renderer->max_width);) {
525 if (source[width] == ASCII_CR)
526 step++;
527 if (source[width + step] == ASCII_LF)
528 step++;
529 if (step) break;
531 if (isspace(source[width])) {
532 last_space = width;
533 if (only_spaces)
534 spaces++;
535 else
536 was_spaces++;
537 if (source[width] == '\t')
538 tab_spaces += 7 - ((width + tab_spaces) % 8);
539 } else {
540 only_spaces = 0;
541 was_spaces = 0;
543 #ifdef CONFIG_UTF8
544 if (utf8) {
545 unsigned char *text = &source[width];
546 unicode_val_T data = utf8_to_unicode(&text,
547 &source[length]);
549 if (data == UCS_NO_CHAR) return;
551 cells += unicode_to_cell(data);
552 width += utf8charlen(&source[width]);
553 } else
554 #endif /* CONFIG_UTF8 */
556 cells++;
557 width++;
561 if (only_spaces && step) {
562 if (was_wrapped || (renderer->compress && was_empty_line)) {
563 /* Successive empty lines will appear as one. */
564 length -= step + spaces;
565 source += step + spaces;
566 renderer->lineno--;
567 assert(renderer->lineno >= 0);
568 continue;
570 was_empty_line = 1;
572 /* No need to keep whitespaces on an empty line. */
573 source += spaces;
574 length -= spaces;
575 width -= spaces;
577 } else {
578 was_empty_line = 0;
579 was_wrapped = !step;
581 if (was_spaces && step) {
582 /* Drop trailing whitespaces. */
583 width -= was_spaces;
584 step += was_spaces;
587 if (!step && (width < length) && last_space) {
588 width = last_space;
589 step = 1;
593 assert(width >= 0);
595 /* We will touch the supplied source, so better replicate it. */
596 xsource = memacpy(source, width);
597 if (!xsource) continue;
599 added = add_document_line(renderer, xsource, width);
600 mem_free(xsource);
602 if (added) {
603 /* Add (search) nodes on a line by line basis */
604 add_node(renderer, 0, added, 1);
607 /* Skip end of line chars too. */
608 width += step;
609 length -= width;
610 source += width;
613 assert(!length);
616 void
617 render_plain_document(struct cache_entry *cached, struct document *document,
618 struct string *buffer)
620 struct conv_table *convert_table;
621 unsigned char *head = empty_string_or_(cached->head);
622 struct plain_renderer renderer;
624 convert_table = get_convert_table(head, document->options.cp,
625 document->options.assume_cp,
626 &document->cp,
627 &document->cp_status,
628 document->options.hard_assume);
630 renderer.source = buffer->source;
631 renderer.length = buffer->length;
633 renderer.document = document;
634 renderer.lineno = 0;
635 renderer.convert_table = convert_table;
636 renderer.compress = document->options.plain_compress_empty_lines;
637 renderer.max_width = document->options.wrap ? document->options.box.width
638 : INT_MAX;
640 document->bgcolor = document->options.default_style.bg;
641 document->width = 0;
642 #ifdef CONFIG_UTF8
643 document->options.utf8 = is_cp_utf8(document->options.cp);
644 #endif /* CONFIG_UTF8 */
646 /* Setup the style */
647 init_template(&renderer.template, &document->options);
649 add_document_lines(&renderer);