Merge commit 'pasky.or.cz/elinks-0.12' into elinks-0.13
[elinks.git] / src / document / html / renderer.c
blob358a948ab26eafa3c145d591a146fa3831023a6b
1 /* HTML renderer */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <ctype.h>
8 #include <stdarg.h>
9 #include <string.h>
11 #include "elinks.h"
13 #include "cache/cache.h"
14 #include "config/options.h"
15 #include "document/docdata.h"
16 #include "document/document.h"
17 #include "document/html/frames.h"
18 #include "document/html/parser.h"
19 #include "document/html/parser/parse.h"
20 #include "document/html/renderer.h"
21 #include "document/html/tables.h"
22 #include "document/options.h"
23 #include "document/refresh.h"
24 #include "document/renderer.h"
25 #include "intl/charsets.h"
26 #include "osdep/types.h"
27 #include "protocol/uri.h"
28 #include "session/session.h"
29 #include "terminal/color.h"
30 #include "terminal/draw.h"
31 #include "util/color.h"
32 #include "util/conv.h"
33 #include "util/error.h"
34 #include "util/hash.h"
35 #include "util/lists.h"
36 #include "util/memory.h"
37 #include "util/string.h"
38 #include "util/time.h"
39 #include "viewer/text/form.h"
40 #include "viewer/text/view.h"
41 #include "viewer/text/vs.h"
43 /* Unsafe macros */
44 #include "document/html/internal.h"
46 /* Types and structs */
48 /* Tags are used for ``id''s or anchors in the document referenced by the
49 * fragment part of the URI. */
50 /* FIXME: This and find_tag() should be part of the general infrastructure
51 * in document/document.*. --pasky */
52 struct tag {
53 LIST_HEAD(struct tag);
55 int x, y;
56 unsigned char name[1]; /* must be last of struct. --Zas */
59 enum link_state {
60 LINK_STATE_NONE,
61 LINK_STATE_NEW,
62 LINK_STATE_SAME,
65 struct link_state_info {
66 unsigned char *link;
67 unsigned char *target;
68 unsigned char *image;
69 struct form_control *form;
72 struct table_cache_entry_key {
73 unsigned char *start;
74 unsigned char *end;
75 int align;
76 int margin;
77 int width;
78 int x;
79 int link_num;
82 struct table_cache_entry {
83 LIST_HEAD(struct table_cache_entry);
85 struct table_cache_entry_key key;
86 struct part part;
89 /* Max. entries in table cache used for nested tables. */
90 #define MAX_TABLE_CACHE_ENTRIES 16384
92 /* Global variables */
93 static int table_cache_entries;
94 static struct hash *table_cache;
97 struct renderer_context {
98 int last_link_to_move;
99 struct tag *last_tag_to_move;
100 /* All tags between document->tags and this tag (inclusive) should
101 * be aligned to the next line break, unless some real content follows
102 * the tag. Therefore, this virtual tags list accumulates new tags as
103 * they arrive and empties when some real content is written; if a line
104 * break is inserted in the meanwhile, the tags follow it (ie. imagine
105 * <a name="x"> <p>, then the "x" tag follows the line breaks inserted
106 * by the <p> tag). */
107 struct tag *last_tag_for_newline;
109 struct link_state_info link_state_info;
111 struct conv_table *convert_table;
113 /* Used for setting cache info from HTTP-EQUIV meta tags. */
114 struct cache_entry *cached;
116 int g_ctrl_num;
117 int subscript; /* Count stacked subscripts */
118 int supscript; /* Count stacked supscripts */
120 unsigned int empty_format:1;
121 unsigned int nobreak:1;
122 unsigned int nosearchable:1;
123 unsigned int nowrap:1; /* Activated/deactivated by SP_NOWRAP. */
126 static struct renderer_context renderer_context;
129 /* Prototypes */
130 static void line_break(struct html_context *);
131 static void put_chars(struct html_context *, unsigned char *, int);
133 #define X(x_) (part->box.x + (x_))
134 #define Y(y_) (part->box.y + (y_))
136 #define SPACES_GRANULARITY 0x7F
138 #define ALIGN_SPACES(x, o, n) mem_align_alloc(x, o, n, SPACES_GRANULARITY)
140 static inline void
141 set_screen_char_color(struct screen_char *schar,
142 color_T bgcolor, color_T fgcolor,
143 enum color_flags color_flags,
144 enum color_mode color_mode)
146 struct color_pair colors = INIT_COLOR_PAIR(bgcolor, fgcolor);
148 set_term_color(schar, &colors, color_flags, color_mode);
151 static int
152 realloc_line(struct html_context *html_context, struct document *document,
153 int y, int length)
155 struct screen_char *pos, *end;
156 struct line *line;
157 int orig_length;
159 if (!realloc_lines(document, y))
160 return -1;
162 line = &document->data[y];
163 orig_length = line->length;
165 if (length < orig_length)
166 return orig_length;
168 if (!ALIGN_LINE(&line->chars, line->length, length + 1))
169 return -1;
171 /* We cannot rely on the aligned allocation to clear the members for us
172 * since for line splitting we simply trim the length. Question is if
173 * it is better to to clear the line after the splitting or here. */
174 end = &line->chars[length];
175 end->data = ' ';
176 end->attr = 0;
177 set_screen_char_color(end, par_format.bgcolor, 0x0,
178 COLOR_ENSURE_CONTRAST, /* for bug 461 */
179 document->options.color_mode);
181 for (pos = &line->chars[line->length]; pos < end; pos++) {
182 copy_screen_chars(pos, end, 1);
185 line->length = length + 1;
187 return orig_length;
190 void
191 expand_lines(struct html_context *html_context, struct part *part,
192 int x, int y, int lines, color_T bgcolor)
194 int line;
196 assert(part && part->document);
197 if_assert_failed return;
199 if (!use_document_bg_colors(&part->document->options))
200 return;
202 par_format.bgcolor = bgcolor;
204 for (line = 0; line < lines; line++)
205 realloc_line(html_context, part->document, Y(y + line), X(x));
208 static inline int
209 realloc_spaces(struct part *part, int length)
211 if (length < part->spaces_len)
212 return 0;
214 if (!ALIGN_SPACES(&part->spaces, part->spaces_len, length))
215 return -1;
216 #ifdef CONFIG_UTF8
217 if (!ALIGN_SPACES(&part->char_width, part->spaces_len, length))
218 return -1;
219 #endif
221 part->spaces_len = length;
223 return 0;
227 #define LINE(y_) part->document->data[Y(y_)]
228 #define POS(x_, y_) LINE(y_).chars[X(x_)]
229 #define LEN(y_) int_max(LINE(y_).length - part->box.x, 0)
232 /* When we clear chars we want to preserve and use the background colors
233 * already in place else we could end up ``staining'' the background especial
234 * when drawing table cells. So make the cleared chars share the colors in
235 * place. */
236 static inline void
237 clear_hchars(struct html_context *html_context, int x, int y, int width)
239 struct part *part;
240 struct screen_char *pos, *end;
242 assert(html_context);
243 if_assert_failed return;
245 part = html_context->part;
247 assert(part && part->document && width > 0);
248 if_assert_failed return;
250 if (realloc_line(html_context, part->document, Y(y), X(x) + width - 1) < 0)
251 return;
253 assert(part->document->data);
254 if_assert_failed return;
256 pos = &POS(x, y);
257 end = pos + width - 1;
258 end->data = ' ';
259 end->attr = 0;
260 set_screen_char_color(end, par_format.bgcolor, 0x0,
261 COLOR_ENSURE_CONTRAST, /* for bug 461 */
262 part->document->options.color_mode);
264 while (pos < end)
265 copy_screen_chars(pos++, end, 1);
268 /* TODO: Merge parts with get_format_screen_char(). --jonas */
269 /* Allocates the required chars on the given line and returns the char at
270 * position (x, y) ready to be used as a template char. */
271 static inline struct screen_char *
272 get_frame_char(struct html_context *html_context, struct part *part,
273 int x, int y, unsigned char data,
274 color_T bgcolor, color_T fgcolor)
276 struct screen_char *template;
278 assert(html_context);
279 if_assert_failed return NULL;
281 assert(part && part->document && x >= 0 && y >= 0);
282 if_assert_failed return NULL;
284 if (realloc_line(html_context, part->document, Y(y), X(x)) < 0)
285 return NULL;
287 assert(part->document->data);
288 if_assert_failed return NULL;
290 template = &POS(x, y);
291 template->data = data;
292 template->attr = SCREEN_ATTR_FRAME;
293 set_screen_char_color(template, bgcolor, fgcolor,
294 part->document->options.color_flags,
295 part->document->options.color_mode);
297 return template;
300 void
301 draw_frame_hchars(struct part *part, int x, int y, int width,
302 unsigned char data, color_T bgcolor, color_T fgcolor,
303 struct html_context *html_context)
305 struct screen_char *template;
307 assert(width > 0);
308 if_assert_failed return;
310 template = get_frame_char(html_context, part, x + width - 1, y, data, bgcolor, fgcolor);
311 if (!template) return;
313 /* The template char is the last we need to draw so only decrease @width. */
314 for (width -= 1; width; width--, x++) {
315 copy_screen_chars(&POS(x, y), template, 1);
319 void
320 draw_frame_vchars(struct part *part, int x, int y, int height,
321 unsigned char data, color_T bgcolor, color_T fgcolor,
322 struct html_context *html_context)
324 struct screen_char *template = get_frame_char(html_context, part, x, y,
325 data, bgcolor, fgcolor);
327 if (!template) return;
329 /* The template char is the first vertical char to be drawn. So
330 * copy it to the rest. */
331 for (height -= 1, y += 1; height; height--, y++) {
332 if (realloc_line(html_context, part->document, Y(y), X(x)) < 0)
333 return;
335 copy_screen_chars(&POS(x, y), template, 1);
339 static inline struct screen_char *
340 get_format_screen_char(struct html_context *html_context,
341 enum link_state link_state)
343 static struct text_attrib_style ta_cache = { -1, 0x0, 0x0 };
344 static struct screen_char schar_cache;
346 if (memcmp(&ta_cache, &format.style, sizeof(ta_cache))) {
347 copy_struct(&ta_cache, &format.style);
349 schar_cache.attr = 0;
350 if (format.style.attr) {
351 if (format.style.attr & AT_UNDERLINE) {
352 schar_cache.attr |= SCREEN_ATTR_UNDERLINE;
355 if (format.style.attr & AT_BOLD) {
356 schar_cache.attr |= SCREEN_ATTR_BOLD;
359 if (format.style.attr & AT_ITALIC) {
360 schar_cache.attr |= SCREEN_ATTR_ITALIC;
363 if (format.style.attr & AT_GRAPHICS) {
364 schar_cache.attr |= SCREEN_ATTR_FRAME;
368 if (link_state != LINK_STATE_NONE
369 && html_context->options->underline_links) {
370 schar_cache.attr |= SCREEN_ATTR_UNDERLINE;
373 set_screen_char_color(&schar_cache, format.style.bg, format.style.fg,
374 html_context->options->color_flags,
375 html_context->options->color_mode);
378 if (!!(schar_cache.attr & SCREEN_ATTR_UNSEARCHABLE)
379 ^ !!renderer_context.nosearchable) {
380 schar_cache.attr ^= SCREEN_ATTR_UNSEARCHABLE;
383 return &schar_cache;
386 #ifdef CONFIG_UTF8
387 /* First possibly do the format change and then find out what coordinates
388 * to use since sub- or superscript might change them */
389 static inline int
390 set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
391 enum link_state link_state)
393 struct part *const part = html_context->part;
394 struct screen_char *const schar = get_format_screen_char(html_context,
395 link_state);
396 int x = part->cx;
397 const int y = part->cy;
398 const int x2 = x;
399 int len = charslen;
400 const int utf8 = html_context->options->utf8;
401 int orig_length;
403 assert(part);
404 if_assert_failed return len;
406 assert(charslen >= 0);
408 if (realloc_spaces(part, x + charslen))
409 return 0;
411 /* U+00AD SOFT HYPHEN characters in HTML documents are
412 * supposed to be displayed only if the word is broken at that
413 * point. ELinks currently does not use them, so it should
414 * not display them. If the input @chars is in UTF-8, then
415 * set_hline() discards the characters. If the input is in
416 * some other charset, then set_hline() does not know which
417 * byte that charset uses for U+00AD, so it cannot discard
418 * the characters; instead, the translation table used by
419 * convert_string() has already discarded the characters.
421 * Likewise, if the input @chars is in UTF-8, then it may
422 * contain U+00A0 NO-BREAK SPACE characters; but if the input
423 * is in some other charset, then the translation table
424 * has mapped those characters to NBSP_CHAR. */
426 if (part->document) {
427 /* Reallocate LINE(y).chars[] to large enough. The
428 * last parameter of realloc_line is the index of the
429 * last element to which we may want to write,
430 * i.e. one less than the required size of the array.
431 * Compute the required size by assuming that each
432 * byte of input will need at most one character cell.
433 * (All double-cell characters take up at least two
434 * bytes in UTF-8, and there are no triple-cell or
435 * wider characters.) However, if there already is an
436 * incomplete character in part->document->buf, then
437 * the first byte of input can result in a double-cell
438 * character, so we must reserve one extra element. */
439 orig_length = realloc_line(html_context, part->document,
440 Y(y), X(x) + charslen);
441 if (orig_length < 0) /* error */
442 return 0;
443 if (utf8) {
444 unsigned char *const end = chars + charslen;
445 unicode_val_T data;
447 if (part->document->buf_length) {
448 /* previous char was broken in the middle */
449 int length = utf8charlen(part->document->buf);
450 unsigned char i;
451 unsigned char *buf_ptr = part->document->buf;
453 for (i = part->document->buf_length; i < length && chars < end;) {
454 part->document->buf[i++] = *chars++;
456 part->document->buf_length = i;
457 part->document->buf[i] = '\0';
458 data = utf8_to_unicode(&buf_ptr, buf_ptr + i);
459 if (data != UCS_NO_CHAR) {
460 /* FIXME: If there was invalid
461 * UTF-8 in the buffer,
462 * @utf8_to_unicode may have left
463 * some bytes unused. Those
464 * bytes should be pulled back
465 * into @chars, rather than
466 * discarded. This is not
467 * trivial to implement because
468 * each byte may have arrived in
469 * a separate call. */
470 part->document->buf_length = 0;
471 goto good_char;
472 } else {
473 /* Still not full char */
474 LINE(y).length = orig_length;
475 return 0;
479 while (chars < end) {
480 /* ELinks does not use NBSP_CHAR in UTF-8. */
482 data = utf8_to_unicode(&chars, end);
483 if (data == UCS_NO_CHAR) {
484 part->spaces[x] = 0;
485 if (charslen == 1) {
486 /* HR */
487 unsigned char attr = schar->attr;
489 schar->data = *chars++;
490 schar->attr = SCREEN_ATTR_FRAME;
491 copy_screen_chars(&POS(x, y), schar, 1);
492 schar->attr = attr;
493 part->char_width[x++] = 0;
494 continue;
495 } else {
496 unsigned char i;
498 for (i = 0; chars < end;i++) {
499 part->document->buf[i] = *chars++;
501 part->document->buf_length = i;
502 break;
504 /* not reached */
507 good_char:
508 if (data == UCS_SOFT_HYPHEN)
509 continue;
511 if (data == UCS_NO_BREAK_SPACE
512 && html_context->options->wrap_nbsp)
513 data = UCS_SPACE;
514 part->spaces[x] = (data == UCS_SPACE);
516 if (unicode_to_cell(data) == 2) {
517 schar->data = (unicode_val_T)data;
518 part->char_width[x] = 2;
519 copy_screen_chars(&POS(x++, y), schar, 1);
520 schar->data = UCS_NO_CHAR;
521 part->spaces[x] = 0;
522 part->char_width[x] = 0;
523 } else {
524 part->char_width[x] = unicode_to_cell(data);
525 schar->data = (unicode_val_T)data;
527 copy_screen_chars(&POS(x++, y), schar, 1);
528 } /* while chars < end */
529 } else { /* not UTF-8 */
530 for (; charslen > 0; charslen--, x++, chars++) {
531 part->char_width[x] = 1;
532 if (*chars == NBSP_CHAR) {
533 schar->data = ' ';
534 part->spaces[x] = html_context->options->wrap_nbsp;
535 } else {
536 part->spaces[x] = (*chars == ' ');
537 schar->data = *chars;
539 copy_screen_chars(&POS(x, y), schar, 1);
541 } /* end of UTF-8 check */
543 /* Assert that we haven't written past the end of the
544 * LINE(y).chars array. @x here is one greater than
545 * the last one used in POS(x, y). Instead of this,
546 * we could assert(X(x) < LINE(y).length) immediately
547 * before each @copy_screen_chars call above, but
548 * those are in an inner loop that should be fast. */
549 assert(X(x) <= LINE(y).length);
550 /* Some part of the code is apparently using LINE(y).length
551 * for line-wrapping decisions. It may currently be too
552 * large because it was allocated above based on @charslen
553 * which is the number of bytes, not the number of cells.
554 * Change the length to the correct size, but don't let it
555 * get smaller than it was on entry to this function. */
556 LINE(y).length = int_max(orig_length, X(x));
557 len = x - x2;
558 } else { /* part->document == NULL */
559 if (utf8) {
560 unsigned char *const end = chars + charslen;
562 while (chars < end) {
563 unicode_val_T data;
565 data = utf8_to_unicode(&chars, end);
566 if (data == UCS_SOFT_HYPHEN)
567 continue;
569 if (data == UCS_NO_BREAK_SPACE
570 && html_context->options->wrap_nbsp)
571 data = UCS_SPACE;
572 part->spaces[x] = (data == UCS_SPACE);
574 part->char_width[x] = unicode_to_cell(data);
575 if (part->char_width[x] == 2) {
576 x++;
577 part->spaces[x] = 0;
578 part->char_width[x] = 0;
580 if (data == UCS_NO_CHAR) {
581 /* this is at the end only */
582 return x - x2;
584 x++;
585 } /* while chars < end */
586 len = x - x2;
587 } else { /* not UTF-8 */
588 for (; charslen > 0; charslen--, x++, chars++) {
589 part->char_width[x] = 1;
590 if (*chars == NBSP_CHAR) {
591 part->spaces[x] = html_context->options->wrap_nbsp;
592 } else {
593 part->spaces[x] = (*chars == ' ');
597 } /* end of part->document check */
598 return len;
600 #else
602 /* First possibly do the format change and then find out what coordinates
603 * to use since sub- or superscript might change them */
604 static inline void
605 set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
606 enum link_state link_state)
608 struct part *part = html_context->part;
609 struct screen_char *schar = get_format_screen_char(html_context,
610 link_state);
611 int x = part->cx;
612 int y = part->cy;
614 assert(part);
615 if_assert_failed return;
617 if (realloc_spaces(part, x + charslen))
618 return;
620 if (part->document) {
621 if (realloc_line(html_context, part->document,
622 Y(y), X(x) + charslen - 1) < 0)
623 return;
625 for (; charslen > 0; charslen--, x++, chars++) {
626 if (*chars == NBSP_CHAR) {
627 schar->data = ' ';
628 part->spaces[x] = html_context->options->wrap_nbsp;
629 } else {
630 part->spaces[x] = (*chars == ' ');
631 schar->data = *chars;
633 copy_screen_chars(&POS(x, y), schar, 1);
635 } else {
636 for (; charslen > 0; charslen--, x++, chars++) {
637 if (*chars == NBSP_CHAR) {
638 part->spaces[x] = html_context->options->wrap_nbsp;
639 } else {
640 part->spaces[x] = (*chars == ' ');
645 #endif /* CONFIG_UTF8 */
647 static void
648 move_links(struct html_context *html_context, int xf, int yf, int xt, int yt)
650 struct part *part;
651 struct tag *tag;
652 int nlink = renderer_context.last_link_to_move;
653 int matched = 0;
655 assert(html_context);
656 if_assert_failed return;
658 part = html_context->part;
660 assert(part && part->document);
661 if_assert_failed return;
663 if (!realloc_lines(part->document, Y(yt)))
664 return;
666 for (; nlink < part->document->nlinks; nlink++) {
667 struct link *link = &part->document->links[nlink];
668 int i;
670 for (i = 0; i < link->npoints; i++) {
671 /* Fix for bug 479 (part one) */
672 /* The scenario that triggered it:
674 * Imagine a centered element containing a really long
675 * word (over half of the screen width long) followed
676 * by a few links with no spaces between them where all
677 * the link text combined with the really long word
678 * will force the line to be wrapped. When rendering
679 * the line first words (including link text words) are
680 * put on one line. Then wrapping is performed moving
681 * all links from current line to the one below. Then
682 * the current line (now only containing the really
683 * long word) is centered. This will trigger a call to
684 * move_links() which will increment.
686 * Without the fix below the centering of the current
687 * line will increment last_link_to_move to that of the
688 * last link which means centering of the next line
689 * with all the links will only move the last link
690 * leaving all the other links' points dangling and
691 * causing buggy link highlighting.
693 * Even links like textareas will be correctly handled
694 * because @last_link_to_move is a way to optimize how
695 * many links move_links() will have to iterate and
696 * this little fix will only decrease the effect of the
697 * optimization by always ensuring it is never
698 * incremented too far. */
699 if (!matched && link->points[i].y > Y(yf)) {
700 matched = 1;
701 continue;
704 if (link->points[i].y != Y(yf))
705 continue;
707 matched = 1;
709 if (link->points[i].x < X(xf))
710 continue;
712 if (yt >= 0) {
713 link->points[i].y = Y(yt);
714 link->points[i].x += -xf + xt;
715 } else {
716 int to_move = link->npoints - (i + 1);
718 assert(to_move >= 0);
720 if (to_move > 0) {
721 memmove(&link->points[i],
722 &link->points[i + 1],
723 to_move *
724 sizeof(*link->points));
725 i--;
728 link->npoints--;
732 if (!matched) {
733 renderer_context.last_link_to_move = nlink;
737 /* Don't move tags when removing links. */
738 if (yt < 0) return;
740 matched = 0;
741 tag = renderer_context.last_tag_to_move;
743 while (list_has_next(part->document->tags, tag)) {
744 tag = tag->next;
746 if (tag->y == Y(yf)) {
747 matched = 1;
748 if (tag->x >= X(xf)) {
749 tag->y = Y(yt);
750 tag->x += -xf + xt;
753 } else if (!matched && tag->y > Y(yf)) {
754 /* Fix for bug 479 (part two) */
755 matched = 1;
758 if (!matched) renderer_context.last_tag_to_move = tag;
762 static inline void
763 copy_chars(struct html_context *html_context, int x, int y, int width, struct screen_char *d)
765 struct part *part;
767 assert(html_context);
768 if_assert_failed return;
770 part = html_context->part;
772 assert(width > 0 && part && part->document && part->document->data);
773 if_assert_failed return;
775 if (realloc_line(html_context, part->document, Y(y), X(x) + width - 1) < 0)
776 return;
778 copy_screen_chars(&POS(x, y), d, width);
781 static inline void
782 move_chars(struct html_context *html_context, int x, int y, int nx, int ny)
784 struct part *part;
786 assert(html_context);
787 if_assert_failed return;
789 part = html_context->part;
791 assert(part && part->document && part->document->data);
792 if_assert_failed return;
794 if (LEN(y) - x <= 0) return;
795 copy_chars(html_context, nx, ny, LEN(y) - x, &POS(x, y));
797 LINE(y).length = X(x);
798 move_links(html_context, x, y, nx, ny);
801 static inline void
802 shift_chars(struct html_context *html_context, int y, int shift)
804 struct part *part;
805 struct screen_char *a;
806 int len;
808 assert(html_context);
809 if_assert_failed return;
811 part = html_context->part;
813 assert(part && part->document && part->document->data);
814 if_assert_failed return;
816 len = LEN(y);
818 a = fmem_alloc(len * sizeof(*a));
819 if (!a) return;
821 copy_screen_chars(a, &POS(0, y), len);
823 clear_hchars(html_context, 0, y, shift);
824 copy_chars(html_context, shift, y, len, a);
825 fmem_free(a);
827 move_links(html_context, 0, y, shift, y);
830 static inline void
831 del_chars(struct html_context *html_context, int x, int y)
833 struct part *part;
835 assert(html_context);
836 if_assert_failed return;
838 part = html_context->part;
840 assert(part && part->document && part->document->data);
841 if_assert_failed return;
843 LINE(y).length = X(x);
844 move_links(html_context, x, y, -1, -1);
847 #if TABLE_LINE_PADDING < 0
848 # define overlap_width(x) (x).width
849 #else
850 # define overlap_width(x) int_min((x).width, \
851 html_context->options->box.width - TABLE_LINE_PADDING)
852 #endif
853 #define overlap(x) int_max(overlap_width(x) - (x).rightmargin, 0)
855 static int inline
856 split_line_at(struct html_context *html_context, int width)
858 struct part *part;
859 int tmp;
860 int new_width = width + par_format.rightmargin;
862 assert(html_context);
863 if_assert_failed return 0;
865 part = html_context->part;
867 assert(part);
868 if_assert_failed return 0;
870 /* Make sure that we count the right margin to the total
871 * actual box width. */
872 int_lower_bound(&part->box.width, new_width);
874 if (part->document) {
875 assert(part->document->data);
876 if_assert_failed return 0;
877 #ifdef CONFIG_UTF8
878 if (html_context->options->utf8
879 && width < part->spaces_len && part->char_width[width] == 2) {
880 move_chars(html_context, width, part->cy, par_format.leftmargin, part->cy + 1);
881 del_chars(html_context, width, part->cy);
882 } else
883 #endif
885 assertm(POS(width, part->cy).data == ' ',
886 "bad split: %c", POS(width, part->cy).data);
887 move_chars(html_context, width + 1, part->cy, par_format.leftmargin, part->cy + 1);
888 del_chars(html_context, width, part->cy);
893 #ifdef CONFIG_UTF8
894 if (!(html_context->options->utf8
895 && width < part->spaces_len
896 && part->char_width[width] == 2))
897 #endif
898 width++; /* Since we were using (x + 1) only later... */
900 tmp = part->spaces_len - width;
901 if (tmp > 0) {
902 /* 0 is possible and I'm paranoid ... --Zas */
903 memmove(part->spaces, part->spaces + width, tmp);
904 #ifdef CONFIG_UTF8
905 memmove(part->char_width, part->char_width + width, tmp);
906 #endif
909 assert(tmp >= 0);
910 if_assert_failed tmp = 0;
911 memset(part->spaces + tmp, 0, width);
912 #ifdef CONFIG_UTF8
913 memset(part->char_width + tmp, 0, width);
914 #endif
916 if (par_format.leftmargin > 0) {
917 tmp = part->spaces_len - par_format.leftmargin;
918 assertm(tmp > 0, "part->spaces_len - par_format.leftmargin == %d", tmp);
919 /* So tmp is zero, memmove() should survive that. Don't recover. */
920 memmove(part->spaces + par_format.leftmargin, part->spaces, tmp);
921 #ifdef CONFIG_UTF8
922 memmove(part->char_width + par_format.leftmargin, part->char_width, tmp);
923 #endif
926 part->cy++;
928 if (part->cx == width) {
929 part->cx = -1;
930 int_lower_bound(&part->box.height, part->cy);
931 return 2;
932 } else {
933 part->cx -= width - par_format.leftmargin;
934 int_lower_bound(&part->box.height, part->cy + 1);
935 return 1;
939 /* Here, we scan the line for a possible place where we could split it into two
940 * (breaking it, because it is too long), if it is overlapping from the maximal
941 * box width. */
942 /* Returns 0 if there was found no spot suitable for breaking the line.
943 * 1 if the line was split into two.
944 * 2 if the (second) splitted line is blank (that is useful to determine
945 * ie. if the next line_break() should really break the line; we don't
946 * want to see any blank lines to pop up, do we?). */
947 static int
948 split_line(struct html_context *html_context)
950 struct part *part;
951 int x;
953 assert(html_context);
954 if_assert_failed return 0;
956 part = html_context->part;
958 assert(part);
959 if_assert_failed return 0;
961 #ifdef CONFIG_UTF8
962 if (html_context->options->utf8) {
963 for (x = overlap(par_format); x >= par_format.leftmargin; x--) {
965 if (x < part->spaces_len && (part->spaces[x]
966 || (part->char_width[x] == 2
967 /* Ugly hack. If we haven't place for
968 * double-width characters we print two
969 * double-width characters. */
970 && x != par_format.leftmargin)))
971 return split_line_at(html_context, x);
974 for (x = par_format.leftmargin; x < part->cx ; x++) {
975 if (x < part->spaces_len && (part->spaces[x]
976 || (part->char_width[x] == 2
977 /* We want to break line after _second_
978 * double-width character. */
979 && x > par_format.leftmargin)))
980 return split_line_at(html_context, x);
982 } else
983 #endif
985 for (x = overlap(par_format); x >= par_format.leftmargin; x--)
986 if (x < part->spaces_len && part->spaces[x])
987 return split_line_at(html_context, x);
989 for (x = par_format.leftmargin; x < part->cx ; x++)
990 if (x < part->spaces_len && part->spaces[x])
991 return split_line_at(html_context, x);
994 /* Make sure that we count the right margin to the total
995 * actual box width. */
996 int_lower_bound(&part->box.width, part->cx + par_format.rightmargin);
998 return 0;
1001 /* Insert @new_spaces spaces before the coordinates @x and @y,
1002 * adding those spaces to whatever link is at those coordinates. */
1003 /* TODO: Integrate with move_links. */
1004 static void
1005 insert_spaces_in_link(struct part *part, int x, int y, int new_spaces)
1007 int i = part->document->nlinks;
1009 x = X(x);
1010 y = Y(y);
1012 while (i--) {
1013 struct link *link = &part->document->links[i];
1014 int j = link->npoints;
1016 while (j-- > 1) {
1017 struct point *point = &link->points[j];
1019 if (point->x != x || point->y != y)
1020 continue;
1022 if (!realloc_points(link, link->npoints + new_spaces))
1023 return;
1025 link->npoints += new_spaces;
1026 point = &link->points[link->npoints - 1];
1028 while (new_spaces--) {
1029 point->x = --x;
1030 point->y = y;
1031 point--;
1034 return;
1039 /* This function is very rare exemplary of clean and beautyful code here.
1040 * Please handle with care. --pasky */
1041 static void
1042 justify_line(struct html_context *html_context, int y)
1044 struct part *part;
1045 struct screen_char *line; /* we save original line here */
1046 int len;
1047 int pos;
1048 int *space_list;
1049 int spaces;
1050 int diff;
1052 assert(html_context);
1053 if_assert_failed return;
1055 part = html_context->part;
1057 assert(part && part->document && part->document->data);
1058 if_assert_failed return;
1060 len = LEN(y);
1061 assert(len > 0);
1062 if_assert_failed return;
1064 line = fmem_alloc(len * sizeof(*line));
1065 if (!line) return;
1067 /* It may sometimes happen that the line is only one char long and that
1068 * char is space - then we're going to write to both [0] and [1], but
1069 * we allocated only one field. Thus, we've to do (len + 1). --pasky */
1070 space_list = fmem_alloc((len + 1) * sizeof(*space_list));
1071 if (!space_list) {
1072 fmem_free(line);
1073 return;
1076 copy_screen_chars(line, &POS(0, y), len);
1078 /* Skip leading spaces */
1080 spaces = 0;
1081 pos = 0;
1083 while (line[pos].data == ' ')
1084 pos++;
1086 /* Yes, this can be negative, we know. But we add one to it always
1087 * anyway, so it's ok. */
1088 space_list[spaces++] = pos - 1;
1090 /* Count spaces */
1092 for (; pos < len; pos++)
1093 if (line[pos].data == ' ')
1094 space_list[spaces++] = pos;
1096 space_list[spaces] = len;
1098 /* Realign line */
1100 /* Diff is the difference between the width of the paragraph
1101 * and the current length of the line. */
1102 diff = overlap(par_format) - len;
1104 /* We check diff > 0 because diff can be negative (i.e., we have
1105 * an unbroken line of length > overlap(par_format))
1106 * even when spaces > 1 if the line has only non-breaking spaces. */
1107 if (spaces > 1 && diff > 0) {
1108 int prev_end = 0;
1109 int word;
1111 /* Allocate enough memory for the justified line.
1112 * If the memory is not available, then leave the
1113 * line unchanged, rather than halfway there. The
1114 * following loop assumes the allocation succeeded. */
1115 if (!realloc_line(html_context, html_context->part->document,
1116 Y(y), X(overlap(par_format))))
1117 goto out_of_memory;
1119 for (word = 0; word < spaces; word++) {
1120 /* We have to increase line length by 'diff' num. of
1121 * characters, so we move 'word'th word 'word_shift'
1122 * characters right. */
1123 int word_start = space_list[word] + 1;
1124 int word_len = space_list[word + 1] - word_start;
1125 int word_shift;
1126 int new_start;
1127 int new_spaces;
1129 assert(word_len >= 0);
1130 if_assert_failed continue;
1132 word_shift = (word * diff) / (spaces - 1);
1133 new_start = word_start + word_shift;
1135 /* Copy the original word, without any spaces. */
1136 copy_chars(html_context, new_start, y, word_len,
1137 &line[word_start]);
1139 /* Copy the space that preceded the word,
1140 * duplicating it as many times as necessary.
1141 * This preserves its attributes, such as
1142 * background color and underlining. If this
1143 * is the first word, then skip the copy
1144 * because there might not be a space there
1145 * and anyway it need not be duplicated. */
1146 if (word) {
1147 int spacex;
1149 /* realloc_line() was called above. */
1150 assert(LEN(y) >= new_start);
1151 if_assert_failed continue;
1153 for (spacex = prev_end; spacex < new_start;
1154 ++spacex) {
1155 copy_screen_chars(&POS(spacex, y),
1156 &line[word_start - 1],
1161 /* Remember that any links at the right side
1162 * of the added spaces have moved, and the
1163 * spaces themselves may also belong to a
1164 * link. */
1165 new_spaces = new_start - prev_end - 1;
1166 if (word && new_spaces) {
1167 move_links(html_context, prev_end + 1, y, new_start, y);
1168 insert_spaces_in_link(part,
1169 new_start, y, new_spaces);
1172 prev_end = new_start + word_len;
1176 out_of_memory:
1177 fmem_free(space_list);
1178 fmem_free(line);
1181 static void
1182 align_line(struct html_context *html_context, int y, int last)
1184 struct part *part;
1185 int shift;
1186 int len;
1188 assert(html_context);
1189 if_assert_failed return;
1191 part = html_context->part;
1193 assert(part && part->document && part->document->data);
1194 if_assert_failed return;
1196 len = LEN(y);
1198 if (!len || par_format.align == ALIGN_LEFT)
1199 return;
1201 if (par_format.align == ALIGN_JUSTIFY) {
1202 if (!last)
1203 justify_line(html_context, y);
1204 return;
1207 shift = overlap(par_format) - len;
1208 if (par_format.align == ALIGN_CENTER)
1209 shift /= 2;
1210 if (shift > 0)
1211 shift_chars(html_context, y, shift);
1214 static inline void
1215 init_link_event_hooks(struct html_context *html_context, struct link *link)
1217 link->event_hooks = mem_calloc(1, sizeof(*link->event_hooks));
1218 if (!link->event_hooks) return;
1220 #define add_evhook(list_, type_, src_) \
1221 do { \
1222 struct script_event_hook *evhook; \
1224 if (!src_) break; \
1226 evhook = mem_calloc(1, sizeof(*evhook)); \
1227 if (!evhook) break; \
1229 evhook->type = type_; \
1230 evhook->src = stracpy(src_); \
1231 add_to_list(*(list_), evhook); \
1232 } while (0)
1234 init_list(*link->event_hooks);
1235 add_evhook(link->event_hooks, SEVHOOK_ONCLICK, format.onclick);
1236 add_evhook(link->event_hooks, SEVHOOK_ONDBLCLICK, format.ondblclick);
1237 add_evhook(link->event_hooks, SEVHOOK_ONMOUSEOVER, format.onmouseover);
1238 add_evhook(link->event_hooks, SEVHOOK_ONHOVER, format.onhover);
1239 add_evhook(link->event_hooks, SEVHOOK_ONFOCUS, format.onfocus);
1240 add_evhook(link->event_hooks, SEVHOOK_ONMOUSEOUT, format.onmouseout);
1241 add_evhook(link->event_hooks, SEVHOOK_ONBLUR, format.onblur);
1243 #undef add_evhook
1246 static struct link *
1247 new_link(struct html_context *html_context, unsigned char *name, int namelen)
1249 struct document *document;
1250 struct part *part;
1251 int link_number;
1252 struct link *link;
1254 assert(html_context);
1255 if_assert_failed return NULL;
1257 part = html_context->part;
1259 assert(part);
1260 if_assert_failed return NULL;
1262 document = part->document;
1264 assert(document);
1265 if_assert_failed return NULL;
1267 link_number = part->link_num;
1269 if (!ALIGN_LINK(&document->links, document->nlinks, document->nlinks + 1))
1270 return NULL;
1272 link = &document->links[document->nlinks++];
1273 link->number = link_number - 1;
1274 if (document->options.use_tabindex) link->number += format.tabindex;
1275 link->accesskey = format.accesskey;
1276 link->title = null_or_stracpy(format.title);
1277 link->where_img = null_or_stracpy(format.image);
1279 if (!format.form) {
1280 link->target = null_or_stracpy(format.target);
1281 link->data.name = memacpy(name, namelen);
1282 /* if (strlen(url) > 4 && !strncasecmp(url, "MAP@", 4)) { */
1283 if (format.link
1284 && ((format.link[0]|32) == 'm')
1285 && ((format.link[1]|32) == 'a')
1286 && ((format.link[2]|32) == 'p')
1287 && (format.link[3] == '@')
1288 && format.link[4]) {
1289 link->type = LINK_MAP;
1290 link->where = stracpy(format.link + 4);
1291 } else {
1292 link->type = LINK_HYPERTEXT;
1293 link->where = null_or_stracpy(format.link);
1296 } else {
1297 struct form_control *fc = format.form;
1298 struct form *form;
1300 switch (fc->type) {
1301 case FC_TEXT:
1302 case FC_PASSWORD:
1303 case FC_FILE:
1304 link->type = LINK_FIELD;
1305 break;
1306 case FC_TEXTAREA:
1307 link->type = LINK_AREA;
1308 break;
1309 case FC_CHECKBOX:
1310 case FC_RADIO:
1311 link->type = LINK_CHECKBOX;
1312 break;
1313 case FC_SELECT:
1314 link->type = LINK_SELECT;
1315 break;
1316 case FC_SUBMIT:
1317 case FC_IMAGE:
1318 case FC_RESET:
1319 case FC_BUTTON:
1320 case FC_HIDDEN:
1321 link->type = LINK_BUTTON;
1323 link->data.form_control = fc;
1324 /* At this point, format.form might already be set but
1325 * the form_control not registered through SP_CONTROL
1326 * yet, therefore without fc->form set. It is always
1327 * after the "good" last form was already processed,
1328 * though, so we can safely just take that. */
1329 form = fc->form;
1330 if (!form && !list_empty(document->forms))
1331 form = document->forms.next;
1332 link->target = null_or_stracpy(form ? form->target : NULL);
1335 link->color.background = format.style.bg;
1336 link->color.foreground = link_is_textinput(link)
1337 ? format.style.fg : format.clink;
1339 init_link_event_hooks(html_context, link);
1341 document->links_sorted = 0;
1342 return link;
1345 static void
1346 html_special_tag(struct document *document, unsigned char *t, int x, int y)
1348 struct tag *tag;
1349 int tag_len;
1351 assert(document);
1352 if_assert_failed return;
1354 tag_len = strlen(t);
1355 /* One byte is reserved for name in struct tag. */
1356 tag = mem_alloc(sizeof(*tag) + tag_len);
1357 if (!tag) return;
1359 tag->x = x;
1360 tag->y = y;
1361 memcpy(tag->name, t, tag_len + 1);
1362 add_to_list(document->tags, tag);
1363 if (renderer_context.last_tag_for_newline == (struct tag *) &document->tags)
1364 renderer_context.last_tag_for_newline = tag;
1368 static void
1369 put_chars_conv(struct html_context *html_context,
1370 unsigned char *chars, int charslen)
1372 struct part *part;
1374 assert(html_context);
1375 if_assert_failed return;
1377 part = html_context->part;
1379 assert(part && chars && charslen);
1380 if_assert_failed return;
1382 if (format.style.attr & AT_GRAPHICS) {
1383 put_chars(html_context, chars, charslen);
1384 return;
1387 convert_string(renderer_context.convert_table, chars, charslen,
1388 html_context->options->cp,
1389 CSM_DEFAULT, NULL, (void (*)(void *, unsigned char *, int)) put_chars, html_context);
1392 static inline void
1393 put_link_number(struct html_context *html_context)
1395 struct part *part = html_context->part;
1396 unsigned char s[64];
1397 unsigned char *fl = format.link;
1398 unsigned char *ft = format.target;
1399 unsigned char *fi = format.image;
1400 struct form_control *ff = format.form;
1401 int slen = 0;
1403 format.link = format.target = format.image = NULL;
1404 format.form = NULL;
1406 s[slen++] = '[';
1407 ulongcat(s, &slen, part->link_num, sizeof(s) - 3, 0);
1408 s[slen++] = ']';
1409 s[slen] = '\0';
1411 renderer_context.nosearchable = 1;
1412 put_chars(html_context, s, slen);
1413 renderer_context.nosearchable = 0;
1415 if (ff && ff->type == FC_TEXTAREA) line_break(html_context);
1417 /* We might have ended up on a new line after the line breaking
1418 * or putting the link number chars. */
1419 if (part->cx == -1) part->cx = par_format.leftmargin;
1421 format.link = fl;
1422 format.target = ft;
1423 format.image = fi;
1424 format.form = ff;
1427 #define assert_link_variable(old, new) \
1428 assertm(!(old), "Old link value [%s]. New value [%s]", old, new);
1430 static inline void
1431 init_link_state_info(unsigned char *link, unsigned char *target,
1432 unsigned char *image, struct form_control *form)
1434 assert_link_variable(renderer_context.link_state_info.image, image);
1435 assert_link_variable(renderer_context.link_state_info.target, target);
1436 assert_link_variable(renderer_context.link_state_info.link, link);
1438 renderer_context.link_state_info.link = null_or_stracpy(link);
1439 renderer_context.link_state_info.target = null_or_stracpy(target);
1440 renderer_context.link_state_info.image = null_or_stracpy(image);
1441 renderer_context.link_state_info.form = form;
1444 static inline void
1445 done_link_state_info(void)
1447 mem_free_if(renderer_context.link_state_info.link);
1448 mem_free_if(renderer_context.link_state_info.target);
1449 mem_free_if(renderer_context.link_state_info.image);
1450 memset(&renderer_context.link_state_info, 0,
1451 sizeof(renderer_context.link_state_info));
1454 #ifdef CONFIG_UTF8
1455 static inline void
1456 process_link(struct html_context *html_context, enum link_state link_state,
1457 unsigned char *chars, int charslen, int cells)
1458 #else
1459 static inline void
1460 process_link(struct html_context *html_context, enum link_state link_state,
1461 unsigned char *chars, int charslen)
1462 #endif /* CONFIG_UTF8 */
1464 struct part *part = html_context->part;
1465 struct link *link;
1466 int x_offset = 0;
1468 switch (link_state) {
1469 case LINK_STATE_SAME: {
1470 unsigned char *name;
1472 if (!part->document) return;
1474 assertm(part->document->nlinks > 0, "no link");
1475 if_assert_failed return;
1477 link = &part->document->links[part->document->nlinks - 1];
1479 name = get_link_name(link);
1480 if (name) {
1481 unsigned char *new_name;
1483 new_name = straconcat(name, chars,
1484 (unsigned char *) NULL);
1485 if (new_name) {
1486 mem_free(name);
1487 link->data.name = new_name;
1491 /* FIXME: Concatenating two adjectent <a> elements to a single
1492 * link is broken since we lose the event handlers for the
1493 * second one. OTOH simply appending them here won't fly since
1494 * we may get here multiple times for even a single link. We
1495 * will probably need some SP_ for creating a new link or so.
1496 * --pasky */
1498 break;
1501 case LINK_STATE_NEW:
1502 part->link_num++;
1504 init_link_state_info(format.link, format.target,
1505 format.image, format.form);
1506 if (!part->document) return;
1508 /* Trim leading space from the link text */
1509 while (x_offset < charslen && chars[x_offset] <= ' ')
1510 x_offset++;
1512 if (x_offset) {
1513 charslen -= x_offset;
1514 chars += x_offset;
1515 #ifdef CONFIG_UTF8
1516 cells -= x_offset;
1517 #endif /* CONFIG_UTF8 */
1520 link = new_link(html_context, chars, charslen);
1521 if (!link) return;
1523 break;
1525 case LINK_STATE_NONE:
1526 default:
1527 INTERNAL("bad link_state %i", (int) link_state);
1528 return;
1531 /* Add new canvas positions to the link. */
1532 #ifdef CONFIG_UTF8
1533 if (realloc_points(link, link->npoints + cells))
1534 #else
1535 if (realloc_points(link, link->npoints + charslen))
1536 #endif /* CONFIG_UTF8 */
1538 struct point *point = &link->points[link->npoints];
1539 int x = X(part->cx) + x_offset;
1540 int y = Y(part->cy);
1542 #ifdef CONFIG_UTF8
1543 link->npoints += cells;
1545 for (; cells > 0; cells--, point++, x++)
1546 #else
1547 link->npoints += charslen;
1549 for (; charslen > 0; charslen--, point++, x++)
1550 #endif /* CONFIG_UTF8 */
1552 point->x = x;
1553 point->y = y;
1558 static inline enum link_state
1559 get_link_state(struct html_context *html_context)
1561 enum link_state state;
1563 if (!(format.link || format.image || format.form)) {
1564 state = LINK_STATE_NONE;
1566 } else if ((renderer_context.link_state_info.link
1567 || renderer_context.link_state_info.image
1568 || renderer_context.link_state_info.form)
1569 && !xstrcmp(format.link, renderer_context.link_state_info.link)
1570 && !xstrcmp(format.target, renderer_context.link_state_info.target)
1571 && !xstrcmp(format.image, renderer_context.link_state_info.image)
1572 && format.form == renderer_context.link_state_info.form) {
1574 return LINK_STATE_SAME;
1576 } else {
1577 state = LINK_STATE_NEW;
1580 done_link_state_info();
1582 return state;
1585 static inline int
1586 html_has_non_space_chars(unsigned char *chars, int charslen)
1588 int pos = 0;
1590 while (pos < charslen)
1591 if (!isspace(chars[pos++]))
1592 return 1;
1594 return 0;
1597 static void
1598 put_chars(struct html_context *html_context, unsigned char *chars, int charslen)
1600 enum link_state link_state;
1601 struct part *part;
1602 #ifdef CONFIG_UTF8
1603 int cells;
1604 #endif /* CONFIG_UTF8 */
1606 assert(html_context);
1607 if_assert_failed return;
1609 part = html_context->part;
1611 assert(part);
1612 if_assert_failed return;
1614 assert(chars && charslen);
1615 if_assert_failed return;
1617 /* If we are not handling verbatim aligning and we are at the begining
1618 * of a line trim whitespace. */
1619 if (part->cx == -1) {
1620 /* If we are not handling verbatim aligning trim leading
1621 * whitespaces. */
1622 if (!html_is_preformatted()) {
1623 while (charslen && *chars == ' ') {
1624 chars++;
1625 charslen--;
1628 if (charslen < 1) return;
1631 part->cx = par_format.leftmargin;
1634 /* For preformatted html always update 'the last tag' so we never end
1635 * up moving tags to the wrong line (Fixes bug 324). For all other html
1636 * it is moved only when the line being rendered carry some real
1637 * non-whitespace content. */
1638 if (html_is_preformatted()
1639 || html_has_non_space_chars(chars, charslen)) {
1640 renderer_context.last_tag_for_newline = (struct tag *) &part->document->tags;
1643 int_lower_bound(&part->box.height, part->cy + 1);
1645 link_state = get_link_state(html_context);
1647 if (link_state == LINK_STATE_NEW) {
1648 int x_offset = 0;
1650 /* Don't add inaccessible links. It seems to be caused
1651 * by the parser putting a space char after stuff like
1652 * <img>-tags or comments wrapped in <a>-tags. See bug
1653 * 30 for test case. */
1654 while (x_offset < charslen && chars[x_offset] <= ' ')
1655 x_offset++;
1657 /* For pure spaces reset the link state */
1658 if (x_offset == charslen)
1659 link_state = LINK_STATE_NONE;
1660 else if (html_context->options->links_numbering)
1661 put_link_number(html_context);
1663 #ifdef CONFIG_UTF8
1664 cells =
1665 #endif /* CONFIG_UTF8 */
1666 set_hline(html_context, chars, charslen, link_state);
1668 if (link_state != LINK_STATE_NONE) {
1669 #ifdef CONFIG_UTF8
1670 process_link(html_context, link_state, chars, charslen,
1671 cells);
1672 #else
1673 process_link(html_context, link_state, chars, charslen);
1674 #endif /* CONFIG_UTF8 */
1677 #ifdef CONFIG_UTF8
1678 if (renderer_context.nowrap
1679 && part->cx + cells > overlap(par_format))
1680 return;
1682 part->cx += cells;
1683 #else
1684 if (renderer_context.nowrap
1685 && part->cx + charslen > overlap(par_format))
1686 return;
1688 part->cx += charslen;
1689 #endif /* CONFIG_UTF8 */
1691 renderer_context.nobreak = 0;
1693 if (!(html_context->options->wrap || html_is_preformatted())) {
1694 while (part->cx > overlap(par_format)
1695 && part->cx > par_format.leftmargin) {
1696 int x = split_line(html_context);
1698 if (!x) break;
1699 if (part->document)
1700 align_line(html_context, part->cy - 1, 0);
1701 renderer_context.nobreak = !!(x - 1);
1705 assert(charslen > 0);
1706 #ifdef CONFIG_UTF8
1707 part->xa += cells;
1708 #else
1709 part->xa += charslen;
1710 #endif /* CONFIG_UTF8 */
1711 int_lower_bound(&part->max_width, part->xa
1712 + par_format.leftmargin + par_format.rightmargin
1713 - (chars[charslen - 1] == ' '
1714 && !html_is_preformatted()));
1715 return;
1719 #undef overlap
1721 static void
1722 line_break(struct html_context *html_context)
1724 struct part *part;
1725 struct tag *tag;
1727 assert(html_context);
1728 if_assert_failed return;
1730 part = html_context->part;
1732 assert(part);
1733 if_assert_failed return;
1735 int_lower_bound(&part->box.width, part->cx + par_format.rightmargin);
1737 if (renderer_context.nobreak) {
1738 renderer_context.nobreak = 0;
1739 part->cx = -1;
1740 part->xa = 0;
1741 return;
1744 if (!part->document || !part->document->data) goto end;
1746 if (!realloc_lines(part->document, part->box.height + part->cy + 1))
1747 return;
1749 if (part->cx > par_format.leftmargin && LEN(part->cy) > part->cx - 1
1750 && POS(part->cx - 1, part->cy).data == ' ') {
1751 del_chars(html_context, part->cx - 1, part->cy);
1752 part->cx--;
1755 if (part->cx > 0) align_line(html_context, part->cy, 1);
1757 for (tag = renderer_context.last_tag_for_newline;
1758 tag && tag != (struct tag *) &part->document->tags;
1759 tag = tag->prev) {
1760 tag->x = X(0);
1761 tag->y = Y(part->cy + 1);
1764 end:
1765 part->cy++;
1766 part->cx = -1;
1767 part->xa = 0;
1768 memset(part->spaces, 0, part->spaces_len);
1769 #ifdef CONFIG_UTF8
1770 memset(part->char_width, 0, part->spaces_len);
1771 #endif
1774 static void
1775 html_special_form(struct part *part, struct form *form)
1777 assert(part && form);
1778 if_assert_failed return;
1780 if (!part->document) {
1781 done_form(form);
1782 return;
1785 if (!list_empty(part->document->forms)) {
1786 struct form *nform;
1788 /* Make sure the new form ``claims'' its slice of the form range
1789 * maintained in the form_num and form_end variables. */
1790 foreach (nform, part->document->forms) {
1791 if (form->form_num < nform->form_num
1792 || nform->form_end < form->form_num)
1793 continue;
1795 /* First check if the form has identical form numbers.
1796 * That should only be the case when the form being
1797 * added is in fact the same form in which case it
1798 * should be dropped. The fact that this can happen
1799 * suggests that the table renderering can be confused.
1800 * See bug 647 for a test case. */
1801 if (nform->form_num == form->form_num
1802 && nform->form_end == form->form_end) {
1803 done_form(form);
1804 return;
1807 /* The form start is inside an already added form, so
1808 * partition the space of the existing form and get
1809 * |old|new|. */
1810 nform->form_end = form->form_num - 1;
1811 assertm(nform->form_num <= nform->form_end,
1812 "[%d:%d] [%d:%d]", nform->form_num, nform->form_end,
1813 form->form_num, form->form_end);
1814 break;
1816 } else {
1817 /* If it is the first form make sure it eats the whole form
1818 * range. */
1819 #if 0
1820 /* Disabled because in tables the parse order may lead to a
1821 * later form being parsed before a preceeding one causing the
1822 * wrong order if we set it to zero. Let's hope it doesn't break
1823 * anything else. */
1824 form->form_num = 0;
1825 #endif
1828 add_to_list(part->document->forms, form);
1831 static void
1832 html_special_form_control(struct part *part, struct form_control *fc)
1834 struct form *form;
1836 assert(part && fc);
1837 if_assert_failed return;
1839 if (!part->document) {
1840 done_form_control(fc);
1841 mem_free(fc);
1842 return;
1845 fc->g_ctrl_num = renderer_context.g_ctrl_num++;
1847 if (list_empty(part->document->forms)) {
1848 /* No forms encountered yet, that means a homeless form
1849 * control. Generate a dummy form for those Flying
1850 * Dutchmans. */
1851 form = init_form();
1852 form->form_num = 0;
1853 add_to_list(part->document->forms, form);
1855 /* Attach this form control to the last form encountered. */
1856 form = part->document->forms.next;
1857 fc->form = form;
1858 add_to_list(form->items, fc);
1861 /* Reparents form items based on position in the source. */
1862 void
1863 check_html_form_hierarchy(struct part *part)
1865 struct document *document = part->document;
1866 INIT_LIST_OF(struct form_control, form_controls);
1867 struct form *form;
1868 struct form_control *fc, *next;
1870 if (list_empty(document->forms))
1871 return;
1873 /* Take out all badly placed form items. */
1875 foreach (form, document->forms) {
1877 assertm(form->form_num <= form->form_end,
1878 "%p [%d : %d]", form, form->form_num, form->form_end);
1880 foreachsafe (fc, next, form->items) {
1881 if (form->form_num <= fc->position
1882 && fc->position <= form->form_end)
1883 continue;
1885 move_to_top_of_list(form_controls, fc);
1889 /* Re-insert the form items the correct places. */
1891 foreachsafe (fc, next, form_controls) {
1893 foreach (form, document->forms) {
1894 if (form->form_num <= fc->position
1895 && fc->position <= form->form_end)
1896 continue;
1898 fc->form = form;
1899 move_to_top_of_list(form->items, fc);
1900 break;
1904 assert(list_empty(form_controls));
1907 static inline void
1908 color_link_lines(struct html_context *html_context)
1910 struct document *document = html_context->part->document;
1911 struct color_pair colors = INIT_COLOR_PAIR(par_format.bgcolor, 0x0);
1912 enum color_mode color_mode = document->options.color_mode;
1913 enum color_flags color_flags = document->options.color_flags;
1914 int y;
1916 for (y = 0; y < document->height; y++) {
1917 int x;
1919 for (x = 0; x < document->data[y].length; x++) {
1920 struct screen_char *schar = &document->data[y].chars[x];
1922 set_term_color(schar, &colors, color_flags, color_mode);
1924 /* XXX: Entering hack zone! Change to clink color after
1925 * link text has been recolored. */
1926 if (schar->data == ':' && colors.foreground == 0x0)
1927 colors.foreground = format.clink;
1930 colors.foreground = 0x0;
1934 static void *
1935 html_special(struct html_context *html_context, enum html_special_type c, ...)
1937 va_list l;
1938 struct part *part;
1939 struct document *document;
1940 void *ret_val = NULL;
1942 assert(html_context);
1943 if_assert_failed return NULL;
1945 part = html_context->part;
1947 assert(part);
1948 if_assert_failed return NULL;
1950 document = part->document;
1952 va_start(l, c);
1953 switch (c) {
1954 case SP_TAG:
1955 if (document) {
1956 unsigned char *t = va_arg(l, unsigned char *);
1958 html_special_tag(document, t, X(part->cx), Y(part->cy));
1960 break;
1961 case SP_FORM:
1963 struct form *form = va_arg(l, struct form *);
1965 html_special_form(part, form);
1966 break;
1968 case SP_CONTROL:
1970 struct form_control *fc = va_arg(l, struct form_control *);
1972 html_special_form_control(part, fc);
1973 break;
1975 case SP_TABLE:
1976 ret_val = renderer_context.convert_table;
1977 break;
1978 case SP_USED:
1979 ret_val = (void *) (long) !!document;
1980 break;
1981 case SP_CACHE_CONTROL:
1983 struct cache_entry *cached = renderer_context.cached;
1985 cached->cache_mode = CACHE_MODE_NEVER;
1986 cached->expire = 0;
1987 break;
1989 case SP_CACHE_EXPIRES:
1991 time_t expires = va_arg(l, time_t);
1992 struct cache_entry *cached = renderer_context.cached;
1994 if (!expires || cached->cache_mode == CACHE_MODE_NEVER)
1995 break;
1997 timeval_from_seconds(&cached->max_age, expires);
1998 cached->expire = 1;
1999 break;
2001 case SP_FRAMESET:
2003 struct frameset_param *fsp = va_arg(l, struct frameset_param *);
2004 struct frameset_desc *frameset_desc;
2006 if (!fsp->parent && document->frame_desc)
2007 break;
2009 frameset_desc = create_frameset(fsp);
2010 if (!fsp->parent && !document->frame_desc)
2011 document->frame_desc = frameset_desc;
2013 ret_val = frameset_desc;
2014 break;
2016 case SP_FRAME:
2018 struct frameset_desc *parent = va_arg(l, struct frameset_desc *);
2019 unsigned char *name = va_arg(l, unsigned char *);
2020 unsigned char *url = va_arg(l, unsigned char *);
2022 add_frameset_entry(parent, NULL, name, url);
2023 break;
2025 case SP_NOWRAP:
2026 renderer_context.nowrap = !!va_arg(l, int);
2027 break;
2028 case SP_REFRESH:
2030 unsigned long seconds = va_arg(l, unsigned long);
2031 unsigned char *t = va_arg(l, unsigned char *);
2033 if (document) {
2034 if (document->refresh)
2035 done_document_refresh(document->refresh);
2036 document->refresh = init_document_refresh(t, seconds);
2038 break;
2040 case SP_COLOR_LINK_LINES:
2041 if (document && use_document_bg_colors(&document->options))
2042 color_link_lines(html_context);
2043 break;
2044 case SP_STYLESHEET:
2045 #ifdef CONFIG_CSS
2046 if (document) {
2047 struct uri *uri = va_arg(l, struct uri *);
2049 add_to_uri_list(&document->css_imports, uri);
2051 #endif
2052 break;
2053 case SP_SCRIPT:
2054 #ifdef CONFIG_ECMASCRIPT
2055 if (document) {
2056 struct uri *uri = va_arg(l, struct uri *);
2058 add_to_uri_list(&document->ecmascript_imports, uri);
2060 #endif
2061 break;
2064 va_end(l);
2066 return ret_val;
2069 void
2070 free_table_cache(void)
2072 if (table_cache) {
2073 struct hash_item *item;
2074 int i;
2076 /* We do not free key here. */
2077 foreach_hash_item (item, *table_cache, i) {
2078 mem_free_if(item->value);
2081 free_hash(&table_cache);
2082 table_cache_entries = 0;
2086 struct part *
2087 format_html_part(struct html_context *html_context,
2088 unsigned char *start, unsigned char *end,
2089 int align, int margin, int width, struct document *document,
2090 int x, int y, unsigned char *head,
2091 int link_num)
2093 struct part *part;
2094 struct html_element *html_state;
2095 int llm = renderer_context.last_link_to_move;
2096 struct tag *ltm = renderer_context.last_tag_to_move;
2097 int ef = renderer_context.empty_format;
2098 int lm = html_context->margin;
2100 /* Hash creation if needed. */
2101 if (!table_cache) {
2102 table_cache = init_hash8();
2103 } else if (!document) {
2104 /* Search for cached entry. */
2105 struct table_cache_entry_key key;
2106 struct hash_item *item;
2108 /* Clear key to prevent potential alignment problem
2109 * when keys are compared. */
2110 memset(&key, 0, sizeof(key));
2112 key.start = start;
2113 key.end = end;
2114 key.align = align;
2115 key.margin = margin;
2116 key.width = width;
2117 key.x = x;
2118 key.link_num = link_num;
2120 item = get_hash_item(table_cache,
2121 (unsigned char *) &key,
2122 sizeof(key));
2123 if (item) { /* We found it in cache, so just copy and return. */
2124 part = mem_alloc(sizeof(*part));
2125 if (part) {
2126 copy_struct(part, &((struct table_cache_entry *)
2127 item->value)->part);
2128 return part;
2133 assertm(y >= 0, "format_html_part: y == %d", y);
2134 if_assert_failed return NULL;
2136 if (document) {
2137 struct node *node = mem_alloc(sizeof(*node));
2139 if (node) {
2140 int node_width = !html_context->table_level ? INT_MAX : width;
2142 set_box(&node->box, x, y, node_width, 1);
2143 add_to_list(document->nodes, node);
2146 renderer_context.last_link_to_move = document->nlinks;
2147 renderer_context.last_tag_to_move = (struct tag *) &document->tags;
2148 renderer_context.last_tag_for_newline = (struct tag *) &document->tags;
2149 } else {
2150 renderer_context.last_link_to_move = 0;
2151 renderer_context.last_tag_to_move = (struct tag *) NULL;
2152 renderer_context.last_tag_for_newline = (struct tag *) NULL;
2155 html_context->margin = margin;
2156 renderer_context.empty_format = !document;
2158 done_link_state_info();
2159 renderer_context.nobreak = 1;
2161 part = mem_calloc(1, sizeof(*part));
2162 if (!part) goto ret;
2164 part->document = document;
2165 part->box.x = x;
2166 part->box.y = y;
2167 part->cx = -1;
2168 part->cy = 0;
2169 part->link_num = link_num;
2171 html_state = init_html_parser_state(html_context, ELEMENT_IMMORTAL, align, margin, width);
2173 parse_html(start, end, part, head, html_context);
2175 done_html_parser_state(html_context, html_state);
2177 int_lower_bound(&part->max_width, part->box.width);
2179 renderer_context.nobreak = 0;
2181 done_link_state_info();
2182 mem_free_if(part->spaces);
2183 #ifdef CONFIG_UTF8
2184 mem_free_if(part->char_width);
2185 #endif
2187 if (document) {
2188 struct node *node = document->nodes.next;
2190 node->box.height = y - node->box.y + part->box.height;
2193 ret:
2194 renderer_context.last_link_to_move = llm;
2195 renderer_context.last_tag_to_move = ltm;
2196 renderer_context.empty_format = ef;
2198 html_context->margin = lm;
2200 if (html_context->table_level > 1 && !document
2201 && table_cache
2202 && table_cache_entries < MAX_TABLE_CACHE_ENTRIES) {
2203 /* Create a new entry. */
2204 /* Clear memory to prevent bad key comparaison due to alignment
2205 * of key fields. */
2206 struct table_cache_entry *tce = mem_calloc(1, sizeof(*tce));
2207 /* A goto is used here to prevent a test or code
2208 * redundancy. */
2209 if (!tce) goto end;
2211 tce->key.start = start;
2212 tce->key.end = end;
2213 tce->key.align = align;
2214 tce->key.margin = margin;
2215 tce->key.width = width;
2216 tce->key.x = x;
2217 tce->key.link_num = link_num;
2218 copy_struct(&tce->part, part);
2220 if (!add_hash_item(table_cache,
2221 (unsigned char *) &tce->key,
2222 sizeof(tce->key), tce)) {
2223 mem_free(tce);
2224 } else {
2225 table_cache_entries++;
2229 end:
2231 return part;
2234 void
2235 render_html_document(struct cache_entry *cached, struct document *document,
2236 struct string *buffer)
2238 struct html_context *html_context;
2239 struct part *part;
2240 unsigned char *start;
2241 unsigned char *end;
2242 struct string title;
2243 struct string head;
2245 assert(cached && document);
2246 if_assert_failed return;
2248 if (!init_string(&head)) return;
2250 if (cached->head) add_to_string(&head, cached->head);
2252 start = buffer->source;
2253 end = buffer->source + buffer->length;
2255 html_context = init_html_parser(cached->uri, &document->options,
2256 start, end, &head, &title,
2257 put_chars_conv, line_break,
2258 html_special);
2259 if (!html_context) return;
2261 renderer_context.g_ctrl_num = 0;
2262 renderer_context.cached = cached;
2263 renderer_context.convert_table = get_convert_table(head.source,
2264 document->options.cp,
2265 document->options.assume_cp,
2266 &document->cp,
2267 &document->cp_status,
2268 document->options.hard_assume);
2269 #ifdef CONFIG_UTF8
2270 html_context->options->utf8 = is_cp_utf8(document->options.cp);
2271 #endif /* CONFIG_UTF8 */
2272 html_context->doc_cp = document->cp;
2274 if (title.length) {
2275 document->title = convert_string(renderer_context.convert_table,
2276 title.source, title.length,
2277 document->options.cp,
2278 CSM_DEFAULT, NULL, NULL, NULL);
2280 done_string(&title);
2282 part = format_html_part(html_context, start, end, par_format.align,
2283 par_format.leftmargin,
2284 document->options.box.width, document,
2285 0, 0, head.source, 1);
2287 /* Drop empty allocated lines at end of document if any
2288 * and adjust document height. */
2289 while (document->height && !document->data[document->height - 1].length)
2290 mem_free_if(document->data[--document->height].chars);
2292 /* Calculate document width. */
2294 int i;
2296 document->width = 0;
2297 for (i = 0; i < document->height; i++)
2298 int_lower_bound(&document->width, document->data[i].length);
2301 #if 1
2302 document->options.needs_width = 1;
2303 #else
2304 /* FIXME: This needs more tuning since if we are centering stuff it
2305 * does not work. */
2306 document->options.needs_width =
2307 (document->width + (document->options.margin
2308 >= document->options.width));
2309 #endif
2311 document->bgcolor = par_format.bgcolor;
2313 done_html_parser(html_context);
2315 /* Drop forms which has been serving as a placeholder for form items
2316 * added in the wrong order due to the ordering of table rendering. */
2318 struct form *form;
2320 foreach (form, document->forms) {
2321 if (form->form_num)
2322 continue;
2324 if (list_empty(form->items))
2325 done_form(form);
2327 break;
2331 /* @part was residing in html_context so it has to stay alive until
2332 * done_html_parser(). */
2333 done_string(&head);
2334 mem_free_if(part);
2336 #if 0 /* debug purpose */
2338 FILE *f = fopen("forms", "ab");
2339 struct form_control *form;
2340 unsigned char *qq;
2341 fprintf(f,"FORM:\n");
2342 foreach (form, document->forms) {
2343 fprintf(f, "g=%d f=%d c=%d t:%d\n",
2344 form->g_ctrl_num, form->form_num,
2345 form->ctrl_num, form->type);
2347 fprintf(f,"fragment: \n");
2348 for (qq = start; qq < end; qq++) fprintf(f, "%c", *qq);
2349 fprintf(f,"----------\n\n");
2350 fclose(f);
2352 #endif
2356 find_tag(struct document *document, unsigned char *name, int namelen)
2358 struct tag *tag;
2360 foreach (tag, document->tags)
2361 if (!strlcasecmp(tag->name, -1, name, namelen))
2362 return tag->y;
2364 return -1;