1 /* Plain text document renderer */
12 #include "bookmarks/bookmarks.h"
13 #include "cache/cache.h"
14 #include "config/options.h"
15 #include "document/docdata.h"
16 #include "document/document.h"
17 #include "document/format.h"
18 #include "document/options.h"
19 #include "document/plain/renderer.h"
20 #include "document/renderer.h"
21 #include "globhist/globhist.h"
22 #include "intl/charsets.h"
23 #include "protocol/protocol.h"
24 #include "protocol/uri.h"
25 #include "terminal/color.h"
26 #include "terminal/draw.h"
27 #include "util/color.h"
28 #include "util/error.h"
29 #include "util/memory.h"
30 #include "util/string.h"
33 struct plain_renderer
{
34 /* The document being renderered */
35 struct document
*document
;
37 /* The data and data length of the defragmented cache entry */
38 unsigned char *source
;
41 /* The convert table that should be used for converting line strings to
42 * the rendered strings. */
43 struct conv_table
*convert_table
;
45 /* The default template char data for text */
46 struct screen_char
template;
48 /* The maximum width any line can have (used for wrapping text) */
51 /* The current line number */
54 /* Are we doing line compression */
55 unsigned int compress
:1;
58 #define realloc_document_links(doc, size) \
59 ALIGN_LINK(&(doc)->links, (doc)->nlinks, size)
61 static struct screen_char
*
62 realloc_line(struct document
*document
, int x
, int y
)
64 struct line
*line
= realloc_lines(document
, y
);
66 if (!line
) return NULL
;
68 if (x
!= line
->length
) {
69 if (!ALIGN_LINE(&line
->chars
, line
->length
, x
))
78 static inline struct link
*
79 add_document_link(struct document
*document
, unsigned char *uri
, int length
,
85 if (!realloc_document_links(document
, document
->nlinks
+ 1))
88 link
= &document
->links
[document
->nlinks
];
90 if (!realloc_points(link
, length
))
93 link
->npoints
= length
;
94 link
->type
= LINK_HYPERTEXT
;
96 link
->color
.background
= document
->options
.default_style
.bg
;
97 link
->color
.foreground
= document
->options
.default_link
;
98 link
->number
= document
->nlinks
;
100 for (point
= link
->points
; length
> 0; length
--, point
++, x
++) {
106 document
->links_sorted
= 0;
110 /* Searches a word to find an email adress or an URI to add as a link. */
111 static inline struct link
*
112 check_link_word(struct document
*document
, unsigned char *uri
, int length
,
116 unsigned char *where
= NULL
;
117 unsigned char *mailto
= memchr(uri
, '@', length
);
118 int keep
= uri
[length
];
119 struct link
*new_link
;
122 if_assert_failed
return NULL
;
126 if (mailto
&& mailto
> uri
&& mailto
- uri
< length
- 1) {
127 where
= straconcat("mailto:", uri
, (unsigned char *) NULL
);
129 } else if (parse_uri(&test
, uri
) == URI_ERRNO_OK
130 && test
.protocol
!= PROTOCOL_UNKNOWN
131 && (test
.datalen
|| test
.hostlen
)) {
132 where
= memacpy(uri
, length
);
137 if (!where
) return NULL
;
139 /* We need to reparse the URI and normalize it so that the protocol and
140 * host part are converted to lowercase. */
141 normalize_uri(NULL
, where
);
143 new_link
= add_document_link(document
, where
, length
, x
, y
);
145 if (!new_link
) mem_free(where
);
150 #define url_char(c) ( \
159 get_uri_length(unsigned char *line
, int length
)
163 while (uri_end
< length
164 && url_char(line
[uri_end
]))
167 for (; uri_end
> 0; uri_end
--) {
168 if (line
[uri_end
- 1] != '.'
169 && line
[uri_end
- 1] != ',')
177 print_document_link(struct plain_renderer
*renderer
, int lineno
,
178 unsigned char *line
, int line_pos
, int width
,
179 int expanded
, struct screen_char
*pos
, int cells
)
181 struct document
*document
= renderer
->document
;
182 unsigned char *start
= &line
[line_pos
];
183 int len
= get_uri_length(start
, width
- line_pos
);
184 int screen_column
= cells
+ expanded
;
185 struct link
*new_link
;
186 int link_end
= line_pos
+ len
;
187 unsigned char saved_char
;
188 struct document_options
*doc_opts
= &document
->options
;
189 struct screen_char
template = renderer
->template;
194 new_link
= check_link_word(document
, start
, len
, screen_column
,
197 if (!new_link
) return 0;
199 saved_char
= line
[link_end
];
200 line
[link_end
] = '\0';
203 ; /* Shut up compiler */
204 #ifdef CONFIG_GLOBHIST
205 else if (get_global_history_item(start
))
206 new_link
->color
.foreground
= doc_opts
->default_vlink
;
208 #ifdef CONFIG_BOOKMARKS
209 else if (get_bookmark(start
))
210 new_link
->color
.foreground
= doc_opts
->default_bookmark_link
;
213 new_link
->color
.foreground
= doc_opts
->default_link
;
215 line
[link_end
] = saved_char
;
217 new_link
->color
.background
= doc_opts
->default_style
.bg
;
219 set_term_color(&template, &new_link
->color
,
220 doc_opts
->color_flags
, doc_opts
->color_mode
);
222 for (i
= len
; i
; i
--) {
223 template.data
= line
[line_pos
++];
224 copy_screen_chars(pos
++, &template, 1);
231 add_document_line(struct plain_renderer
*renderer
,
232 unsigned char *line
, int line_width
)
234 struct document
*document
= renderer
->document
;
235 struct screen_char
*template = &renderer
->template;
236 struct screen_char saved_renderer_template
= *template;
237 struct screen_char
*pos
, *startpos
;
239 int utf8
= document
->options
.utf8
;
240 #endif /* CONFIG_UTF8 */
242 int lineno
= renderer
->lineno
;
244 int width
= line_width
;
247 line
= convert_string(renderer
->convert_table
, line
, width
,
248 document
->options
.cp
, CSM_NONE
, &width
,
252 /* Now expand tabs */
253 for (line_pos
= 0; line_pos
< width
;) {
254 unsigned char line_char
= line
[line_pos
];
261 unsigned char *line_char2
= &line
[line_pos
];
262 charlen
= utf8charlen(&line_char
);
263 data
= utf8_to_unicode(&line_char2
, &line
[width
]);
265 if (data
== UCS_NO_CHAR
) {
270 cell
= unicode_to_cell(data
);
272 #endif /* CONFIG_UTF8 */
274 if (line_char
== ASCII_TAB
275 && (line_pos
+ charlen
== width
276 || line
[line_pos
+ charlen
] != ASCII_BS
)) {
277 int tab_width
= 7 - ((cells
+ expanded
) & 7);
279 expanded
+= tab_width
;
280 } else if (line_char
== ASCII_BS
) {
282 This does
not work
: Suppose we have seventeen spaces
283 followed by a back
-space
; that will call
for sixteen
284 bytes of memory
, but we will print seventeen spaces
285 before we hit the back
-space
-- overflow
!
287 /* Don't count the character
288 * that the back-space character will delete */
289 if (expanded
+ line_pos
)
293 /* Don't count the back-space character */
302 assert(expanded
>= 0);
304 startpos
= pos
= realloc_line(document
, width
+ expanded
, lineno
);
312 for (line_pos
= 0; line_pos
< width
;) {
313 unsigned char line_char
= line
[line_pos
];
314 unsigned char next_char
, prev_char
;
318 unicode_val_T data
= UCS_NO_CHAR
;
321 unsigned char *line_char2
= &line
[line_pos
];
322 charlen
= utf8charlen(&line_char
);
323 data
= utf8_to_unicode(&line_char2
, &line
[width
]);
325 if (data
== UCS_NO_CHAR
) {
330 cell
= unicode_to_cell(data
);
332 #endif /* CONFIG_UTF8 */
334 prev_char
= line_pos
> 0 ? line
[line_pos
- 1] : '\0';
335 next_char
= (line_pos
+ charlen
< width
) ?
336 line
[line_pos
+ charlen
] : '\0';
338 /* Do not expand tabs that precede back-spaces; this saves the
339 * back-space code some trouble. */
340 if (line_char
== ASCII_TAB
&& next_char
!= ASCII_BS
) {
341 int tab_width
= 7 - ((cells
+ expanded
) & 7);
343 expanded
+= tab_width
;
345 template->data
= ' ';
347 copy_screen_chars(pos
++, template, 1);
350 *template = saved_renderer_template
;
352 } else if (line_char
== ASCII_BS
) {
353 if (!(expanded
+ cells
)) {
354 /* We've backspaced to the start of the line */
358 pos
--; /* Backspace */
360 /* Handle x^H_ as _^Hx, but prevent an infinite loop
361 * swapping two underscores. */
362 if (next_char
== '_' && prev_char
!= '_') {
363 /* x^H_ becomes _^Hx */
364 if (line_pos
- 1 >= 0)
365 line
[line_pos
- 1] = next_char
;
366 if (line_pos
+ charlen
< width
)
367 line
[line_pos
+ charlen
] = prev_char
;
369 /* Go back and reparse the swapped characters */
370 if (line_pos
- 2 >= 0) {
377 if ((expanded
+ line_pos
) - 2 >= 0) {
378 /* Don't count the backspace character or the
379 * deleted character when returning the line's
380 * width or when expanding tabs. */
384 if (pos
->data
== '_' && next_char
== '_') {
385 /* Is _^H_ an underlined underscore
386 * or an emboldened underscore? */
388 if (expanded
+ line_pos
>= 0
389 && pos
- 1 >= startpos
390 && (pos
- 1)->attr
) {
391 /* There is some preceding text,
392 * and it has an attribute; copy it */
393 template->attr
|= (pos
- 1)->attr
;
395 /* Default to bold; seems more useful
396 * than underlining the underscore */
397 template->attr
|= SCREEN_ATTR_BOLD
;
400 } else if (pos
->data
== '_') {
403 template->attr
|= SCREEN_ATTR_UNDERLINE
;
405 } else if (pos
->data
== next_char
) {
408 template->attr
|= SCREEN_ATTR_BOLD
;
411 /* Handle _^Hx^Hx as both bold and underlined */
413 template->attr
|= pos
->attr
;
417 if (document
->options
.plain_display_links
418 && isalpha(line_char
) && isalpha(next_char
)) {
419 /* We only want to check for a URI if there are
420 * at least two consecutive alphabetic
421 * characters, or if we are at the very start of
422 * the line. It improves performance a bit.
424 added_chars
= print_document_link(renderer
,
433 line_pos
+= added_chars
- 1;
434 cells
+= added_chars
- 1;
439 if (data
== UCS_NO_CHAR
) {
444 template->data
= (unicode_val_T
)data
;
445 copy_screen_chars(pos
++, template, 1);
448 template->data
= UCS_NO_CHAR
;
449 copy_screen_chars(pos
++,
453 #endif /* CONFIG_UTF8 */
455 if (!isscreensafe(line_char
))
457 template->data
= line_char
;
458 copy_screen_chars(pos
++, template, 1);
460 /* Detect copy of nul chars to screen,
461 * this should not occur. --Zas */
466 *template = saved_renderer_template
;
474 realloc_line(document
, pos
- startpos
, lineno
);
476 return width
+ expanded
;
480 init_template(struct screen_char
*template, struct document_options
*options
)
482 get_screen_char_template(template, options
, options
->default_style
);
486 add_node(struct plain_renderer
*renderer
, int x
, int width
, int height
)
488 struct node
*node
= mem_alloc(sizeof(*node
));
491 struct document
*document
= renderer
->document
;
493 set_box(&node
->box
, x
, renderer
->lineno
, width
, height
);
495 int_lower_bound(&document
->width
, width
);
496 int_lower_bound(&document
->height
, height
);
498 add_to_list(document
->nodes
, node
);
505 add_document_lines(struct plain_renderer
*renderer
)
507 unsigned char *source
= renderer
->source
;
508 int length
= renderer
->length
;
509 int was_empty_line
= 0;
512 int utf8
= is_cp_utf8(renderer
->document
->cp
);
514 for (; length
> 0; renderer
->lineno
++) {
515 unsigned char *xsource
;
516 int width
, added
, only_spaces
= 1, spaces
= 0, was_spaces
= 0;
522 /* End of line detection: We handle \r, \r\n and \n types. */
523 for (width
= 0; (width
< length
) &&
524 (cells
< renderer
->max_width
);) {
525 if (source
[width
] == ASCII_CR
)
527 if (source
[width
+ step
] == ASCII_LF
)
531 if (isspace(source
[width
])) {
537 if (source
[width
] == '\t')
538 tab_spaces
+= 7 - ((width
+ tab_spaces
) % 8);
545 unsigned char *text
= &source
[width
];
546 unicode_val_T data
= utf8_to_unicode(&text
,
549 if (data
== UCS_NO_CHAR
) return;
551 cells
+= unicode_to_cell(data
);
552 width
+= utf8charlen(&source
[width
]);
554 #endif /* CONFIG_UTF8 */
561 if (only_spaces
&& step
) {
562 if (was_wrapped
|| (renderer
->compress
&& was_empty_line
)) {
563 /* Successive empty lines will appear as one. */
564 length
-= step
+ spaces
;
565 source
+= step
+ spaces
;
567 assert(renderer
->lineno
>= 0);
572 /* No need to keep whitespaces on an empty line. */
581 if (was_spaces
&& step
) {
582 /* Drop trailing whitespaces. */
587 if (!step
&& (width
< length
) && last_space
) {
595 /* We will touch the supplied source, so better replicate it. */
596 xsource
= memacpy(source
, width
);
597 if (!xsource
) continue;
599 added
= add_document_line(renderer
, xsource
, width
);
603 /* Add (search) nodes on a line by line basis */
604 add_node(renderer
, 0, added
, 1);
607 /* Skip end of line chars too. */
617 render_plain_document(struct cache_entry
*cached
, struct document
*document
,
618 struct string
*buffer
)
620 struct conv_table
*convert_table
;
621 unsigned char *head
= empty_string_or_(cached
->head
);
622 struct plain_renderer renderer
;
624 convert_table
= get_convert_table(head
, document
->options
.cp
,
625 document
->options
.assume_cp
,
627 &document
->cp_status
,
628 document
->options
.hard_assume
);
630 renderer
.source
= buffer
->source
;
631 renderer
.length
= buffer
->length
;
633 renderer
.document
= document
;
635 renderer
.convert_table
= convert_table
;
636 renderer
.compress
= document
->options
.plain_compress_empty_lines
;
637 renderer
.max_width
= document
->options
.wrap
? document
->options
.box
.width
640 document
->bgcolor
= document
->options
.default_style
.bg
;
643 document
->options
.utf8
= is_cp_utf8(document
->options
.cp
);
644 #endif /* CONFIG_UTF8 */
646 /* Setup the style */
647 init_template(&renderer
.template, &document
->options
);
649 add_document_lines(&renderer
);