Bug 879, set_hline: Discard U+00AD SOFT HYPHEN characters if UTF-8.
[elinks.git] / src / document / html / renderer.c
blob440203191a6c6d3ef66817797a2d41cb489adbf1
1 /* HTML renderer */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <ctype.h>
8 #include <stdarg.h>
9 #include <string.h>
11 #include "elinks.h"
13 #include "cache/cache.h"
14 #include "config/options.h"
15 #include "document/docdata.h"
16 #include "document/document.h"
17 #include "document/html/frames.h"
18 #include "document/html/parser.h"
19 #include "document/html/parser/parse.h"
20 #include "document/html/renderer.h"
21 #include "document/html/tables.h"
22 #include "document/options.h"
23 #include "document/refresh.h"
24 #include "document/renderer.h"
25 #include "intl/charsets.h"
26 #include "osdep/types.h"
27 #include "protocol/uri.h"
28 #include "session/session.h"
29 #include "terminal/color.h"
30 #include "terminal/draw.h"
31 #include "util/color.h"
32 #include "util/conv.h"
33 #include "util/error.h"
34 #include "util/hash.h"
35 #include "util/lists.h"
36 #include "util/memory.h"
37 #include "util/string.h"
38 #include "util/time.h"
39 #include "viewer/text/form.h"
40 #include "viewer/text/view.h"
41 #include "viewer/text/vs.h"
43 /* Unsafe macros */
44 #include "document/html/internal.h"
46 /* Types and structs */
48 /* Tags are used for ``id''s or anchors in the document referenced by the
49 * fragment part of the URI. */
50 /* FIXME: This and find_tag() should be part of the general infrastructure
51 * in document/document.*. --pasky */
52 struct tag {
53 LIST_HEAD(struct tag);
55 int x, y;
56 unsigned char name[1]; /* must be last of struct. --Zas */
59 enum link_state {
60 LINK_STATE_NONE,
61 LINK_STATE_NEW,
62 LINK_STATE_SAME,
65 struct link_state_info {
66 unsigned char *link;
67 unsigned char *target;
68 unsigned char *image;
69 struct form_control *form;
72 struct table_cache_entry_key {
73 unsigned char *start;
74 unsigned char *end;
75 int align;
76 int margin;
77 int width;
78 int x;
79 int link_num;
82 struct table_cache_entry {
83 LIST_HEAD(struct table_cache_entry);
85 struct table_cache_entry_key key;
86 struct part part;
89 /* Max. entries in table cache used for nested tables. */
90 #define MAX_TABLE_CACHE_ENTRIES 16384
92 /* Global variables */
93 static int table_cache_entries;
94 static struct hash *table_cache;
97 struct renderer_context {
98 int last_link_to_move;
99 struct tag *last_tag_to_move;
100 /* All tags between document->tags and this tag (inclusive) should
101 * be aligned to the next line break, unless some real content follows
102 * the tag. Therefore, this virtual tags list accumulates new tags as
103 * they arrive and empties when some real content is written; if a line
104 * break is inserted in the meanwhile, the tags follow it (ie. imagine
105 * <a name="x"> <p>, then the "x" tag follows the line breaks inserted
106 * by the <p> tag). */
107 struct tag *last_tag_for_newline;
109 struct link_state_info link_state_info;
111 struct conv_table *convert_table;
113 /* Used for setting cache info from HTTP-EQUIV meta tags. */
114 struct cache_entry *cached;
116 int g_ctrl_num;
117 int subscript; /* Count stacked subscripts */
118 int supscript; /* Count stacked supscripts */
120 unsigned int empty_format:1;
121 unsigned int nobreak:1;
122 unsigned int nosearchable:1;
123 unsigned int nowrap:1; /* Activated/deactivated by SP_NOWRAP. */
126 static struct renderer_context renderer_context;
129 /* Prototypes */
130 static void line_break(struct html_context *);
131 static void put_chars(struct html_context *, unsigned char *, int);
133 #define X(x_) (part->box.x + (x_))
134 #define Y(y_) (part->box.y + (y_))
136 #define SPACES_GRANULARITY 0x7F
138 #define ALIGN_SPACES(x, o, n) mem_align_alloc(x, o, n, SPACES_GRANULARITY)
140 static inline void
141 set_screen_char_color(struct screen_char *schar,
142 color_T bgcolor, color_T fgcolor,
143 enum color_flags color_flags,
144 enum color_mode color_mode)
146 struct color_pair colors = INIT_COLOR_PAIR(bgcolor, fgcolor);
148 set_term_color(schar, &colors, color_flags, color_mode);
151 static int
152 realloc_line(struct html_context *html_context, struct document *document,
153 int y, int length)
155 struct screen_char *pos, *end;
156 struct line *line;
157 int orig_length;
159 if (!realloc_lines(document, y))
160 return -1;
162 line = &document->data[y];
163 orig_length = line->length;
165 if (length < orig_length)
166 return orig_length;
168 if (!ALIGN_LINE(&line->chars, line->length, length + 1))
169 return -1;
171 /* We cannot rely on the aligned allocation to clear the members for us
172 * since for line splitting we simply trim the length. Question is if
173 * it is better to to clear the line after the splitting or here. */
174 end = &line->chars[length];
175 end->data = ' ';
176 end->attr = 0;
177 set_screen_char_color(end, par_format.bgcolor, 0x0,
178 0, document->options.color_mode);
180 for (pos = &line->chars[line->length]; pos < end; pos++) {
181 copy_screen_chars(pos, end, 1);
184 line->length = length + 1;
186 return orig_length;
189 void
190 expand_lines(struct html_context *html_context, struct part *part,
191 int x, int y, int lines, color_T bgcolor)
193 int line;
195 assert(part && part->document);
196 if_assert_failed return;
198 if (!use_document_bg_colors(&part->document->options))
199 return;
201 par_format.bgcolor = bgcolor;
203 for (line = 0; line < lines; line++)
204 realloc_line(html_context, part->document, Y(y + line), X(x));
207 static inline int
208 realloc_spaces(struct part *part, int length)
210 if (length < part->spaces_len)
211 return 0;
213 if (!ALIGN_SPACES(&part->spaces, part->spaces_len, length))
214 return -1;
215 #ifdef CONFIG_UTF8
216 if (!ALIGN_SPACES(&part->char_width, part->spaces_len, length))
217 return -1;
218 #endif
220 part->spaces_len = length;
222 return 0;
226 #define LINE(y_) part->document->data[Y(y_)]
227 #define POS(x_, y_) LINE(y_).chars[X(x_)]
228 #define LEN(y_) int_max(LINE(y_).length - part->box.x, 0)
231 /* When we clear chars we want to preserve and use the background colors
232 * already in place else we could end up ``staining'' the background especial
233 * when drawing table cells. So make the cleared chars share the colors in
234 * place. */
235 static inline void
236 clear_hchars(struct html_context *html_context, int x, int y, int width)
238 struct part *part;
239 struct screen_char *pos, *end;
241 assert(html_context);
242 if_assert_failed return;
244 part = html_context->part;
246 assert(part && part->document && width > 0);
247 if_assert_failed return;
249 if (realloc_line(html_context, part->document, Y(y), X(x) + width - 1) < 0)
250 return;
252 assert(part->document->data);
253 if_assert_failed return;
255 pos = &POS(x, y);
256 end = pos + width - 1;
257 end->data = ' ';
258 end->attr = 0;
259 set_screen_char_color(end, par_format.bgcolor, 0x0,
260 0, part->document->options.color_mode);
262 while (pos < end)
263 copy_screen_chars(pos++, end, 1);
266 /* TODO: Merge parts with get_format_screen_char(). --jonas */
267 /* Allocates the required chars on the given line and returns the char at
268 * position (x, y) ready to be used as a template char. */
269 static inline struct screen_char *
270 get_frame_char(struct html_context *html_context, struct part *part,
271 int x, int y, unsigned char data,
272 color_T bgcolor, color_T fgcolor)
274 struct screen_char *template;
276 assert(html_context);
277 if_assert_failed return NULL;
279 assert(part && part->document && x >= 0 && y >= 0);
280 if_assert_failed return NULL;
282 if (realloc_line(html_context, part->document, Y(y), X(x)) < 0)
283 return NULL;
285 assert(part->document->data);
286 if_assert_failed return NULL;
288 template = &POS(x, y);
289 template->data = data;
290 template->attr = SCREEN_ATTR_FRAME;
291 set_screen_char_color(template, bgcolor, fgcolor,
292 part->document->options.color_flags,
293 part->document->options.color_mode);
295 return template;
298 void
299 draw_frame_hchars(struct part *part, int x, int y, int width,
300 unsigned char data, color_T bgcolor, color_T fgcolor,
301 struct html_context *html_context)
303 struct screen_char *template;
305 assert(width > 0);
306 if_assert_failed return;
308 template = get_frame_char(html_context, part, x + width - 1, y, data, bgcolor, fgcolor);
309 if (!template) return;
311 /* The template char is the last we need to draw so only decrease @width. */
312 for (width -= 1; width; width--, x++) {
313 copy_screen_chars(&POS(x, y), template, 1);
317 void
318 draw_frame_vchars(struct part *part, int x, int y, int height,
319 unsigned char data, color_T bgcolor, color_T fgcolor,
320 struct html_context *html_context)
322 struct screen_char *template = get_frame_char(html_context, part, x, y,
323 data, bgcolor, fgcolor);
325 if (!template) return;
327 /* The template char is the first vertical char to be drawn. So
328 * copy it to the rest. */
329 for (height -= 1, y += 1; height; height--, y++) {
330 if (realloc_line(html_context, part->document, Y(y), X(x)) < 0)
331 return;
333 copy_screen_chars(&POS(x, y), template, 1);
337 static inline struct screen_char *
338 get_format_screen_char(struct html_context *html_context,
339 enum link_state link_state)
341 static struct text_attrib_style ta_cache = { -1, 0x0, 0x0 };
342 static struct screen_char schar_cache;
344 if (memcmp(&ta_cache, &format.style, sizeof(ta_cache))) {
345 copy_struct(&ta_cache, &format.style);
347 schar_cache.attr = 0;
348 if (format.style.attr) {
349 if (format.style.attr & AT_UNDERLINE) {
350 schar_cache.attr |= SCREEN_ATTR_UNDERLINE;
353 if (format.style.attr & AT_BOLD) {
354 schar_cache.attr |= SCREEN_ATTR_BOLD;
357 if (format.style.attr & AT_ITALIC) {
358 schar_cache.attr |= SCREEN_ATTR_ITALIC;
361 if (format.style.attr & AT_GRAPHICS) {
362 schar_cache.attr |= SCREEN_ATTR_FRAME;
366 if (link_state != LINK_STATE_NONE
367 && html_context->options->underline_links) {
368 schar_cache.attr |= SCREEN_ATTR_UNDERLINE;
371 set_screen_char_color(&schar_cache, format.style.bg, format.style.fg,
372 html_context->options->color_flags,
373 html_context->options->color_mode);
376 if (!!(schar_cache.attr & SCREEN_ATTR_UNSEARCHABLE)
377 ^ !!renderer_context.nosearchable) {
378 schar_cache.attr ^= SCREEN_ATTR_UNSEARCHABLE;
381 return &schar_cache;
384 #ifdef CONFIG_UTF8
385 /* First possibly do the format change and then find out what coordinates
386 * to use since sub- or superscript might change them */
387 static inline int
388 set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
389 enum link_state link_state)
391 struct part *const part = html_context->part;
392 struct screen_char *const schar = get_format_screen_char(html_context,
393 link_state);
394 int x = part->cx;
395 const int y = part->cy;
396 const int x2 = x;
397 int len = charslen;
398 const int utf8 = html_context->options->utf8;
399 int orig_length;
401 assert(part);
402 if_assert_failed return len;
404 assert(charslen >= 0);
406 if (realloc_spaces(part, x + charslen))
407 return 0;
409 /* U+00AD SOFT HYPHEN characters in HTML documents are
410 * supposed to be displayed only if the word is broken at that
411 * point. ELinks currently does not use them, so it should
412 * not display them. If the input @chars is in UTF-8, then
413 * set_hline() discards the characters. If the input is in
414 * some other charset, then set_hline() does not know which
415 * byte that charset uses for U+00AD, so it cannot discard
416 * the characters; instead, the translation table used by
417 * convert_string() has already discarded the characters.
419 * Likewise, if the input @chars is in UTF-8, then it may
420 * contain U+00A0 NO-BREAK SPACE characters; but if the input
421 * is in some other charset, then the translation table
422 * has mapped those characters to NBSP_CHAR. */
424 if (part->document) {
425 /* Reallocate LINE(y).chars[] to large enough. The
426 * last parameter of realloc_line is the index of the
427 * last element to which we may want to write,
428 * i.e. one less than the required size of the array.
429 * Compute the required size by assuming that each
430 * byte of input will need at most one character cell.
431 * (All double-cell characters take up at least two
432 * bytes in UTF-8, and there are no triple-cell or
433 * wider characters.) However, if there already is an
434 * incomplete character in part->document->buf, then
435 * the first byte of input can result in a double-cell
436 * character, so we must reserve one extra element. */
437 orig_length = realloc_line(html_context, part->document,
438 Y(y), X(x) + charslen);
439 if (orig_length < 0) /* error */
440 return 0;
441 if (utf8) {
442 unsigned char *const end = chars + charslen;
443 unicode_val_T data;
445 if (part->document->buf_length) {
446 /* previous char was broken in the middle */
447 int length = utf8charlen(part->document->buf);
448 unsigned char i;
449 unsigned char *buf_ptr = part->document->buf;
451 for (i = part->document->buf_length; i < length && chars < end;) {
452 part->document->buf[i++] = *chars++;
454 part->document->buf_length = i;
455 part->document->buf[i] = '\0';
456 data = utf8_to_unicode(&buf_ptr, buf_ptr + i);
457 if (data != UCS_NO_CHAR) {
458 /* FIXME: If there was invalid
459 * UTF-8 in the buffer,
460 * @utf8_to_unicode may have left
461 * some bytes unused. Those
462 * bytes should be pulled back
463 * into @chars, rather than
464 * discarded. This is not
465 * trivial to implement because
466 * each byte may have arrived in
467 * a separate call. */
468 part->document->buf_length = 0;
469 goto good_char;
470 } else {
471 /* Still not full char */
472 LINE(y).length = orig_length;
473 return 0;
477 while (chars < end) {
478 /* ELinks does not use NBSP_CHAR in UTF-8. */
480 data = utf8_to_unicode(&chars, end);
481 if (data == UCS_NO_CHAR) {
482 part->spaces[x] = 0;
483 if (charslen == 1) {
484 /* HR */
485 unsigned char attr = schar->attr;
487 schar->data = *chars++;
488 schar->attr = SCREEN_ATTR_FRAME;
489 copy_screen_chars(&POS(x, y), schar, 1);
490 schar->attr = attr;
491 part->char_width[x++] = 0;
492 continue;
493 } else {
494 unsigned char i;
496 for (i = 0; chars < end;i++) {
497 part->document->buf[i] = *chars++;
499 part->document->buf_length = i;
500 break;
502 } else {
503 good_char:
504 if (data == UCS_SOFT_HYPHEN)
505 continue;
506 if (data == UCS_NO_BREAK_SPACE
507 && html_context->options->wrap_nbsp)
508 data = UCS_SPACE;
509 part->spaces[x] = (data == UCS_SPACE);
510 if (unicode_to_cell(data) == 2) {
511 schar->data = (unicode_val_T)data;
512 part->char_width[x] = 2;
513 copy_screen_chars(&POS(x++, y), schar, 1);
514 schar->data = UCS_NO_CHAR;
515 part->spaces[x] = 0;
516 part->char_width[x] = 0;
517 } else {
518 part->char_width[x] = unicode_to_cell(data);
519 schar->data = (unicode_val_T)data;
522 copy_screen_chars(&POS(x++, y), schar, 1);
523 } /* while chars < end */
524 } else { /* not UTF-8 */
525 for (; charslen > 0; charslen--, x++, chars++) {
526 part->char_width[x] = 1;
527 if (*chars == NBSP_CHAR) {
528 schar->data = ' ';
529 part->spaces[x] = html_context->options->wrap_nbsp;
530 } else {
531 part->spaces[x] = (*chars == ' ');
532 schar->data = *chars;
534 copy_screen_chars(&POS(x, y), schar, 1);
536 } /* end of UTF-8 check */
538 /* Assert that we haven't written past the end of the
539 * LINE(y).chars array. @x here is one greater than
540 * the last one used in POS(x, y). Instead of this,
541 * we could assert(X(x) < LINE(y).length) immediately
542 * before each @copy_screen_chars call above, but
543 * those are in an inner loop that should be fast. */
544 assert(X(x) <= LINE(y).length);
545 /* Some part of the code is apparently using LINE(y).length
546 * for line-wrapping decisions. It may currently be too
547 * large because it was allocated above based on @charslen
548 * which is the number of bytes, not the number of cells.
549 * Change the length to the correct size, but don't let it
550 * get smaller than it was on entry to this function. */
551 LINE(y).length = int_max(orig_length, X(x));
552 len = x - x2;
553 } else { /* part->document == NULL */
554 if (utf8) {
555 unsigned char *const end = chars + charslen;
557 while (chars < end) {
558 unicode_val_T data;
560 data = utf8_to_unicode(&chars, end);
561 if (data == UCS_SOFT_HYPHEN)
562 continue;
563 part->spaces[x] = (data == UCS_SPACE);
564 part->char_width[x] = unicode_to_cell(data);
565 if (part->char_width[x] == 2) {
566 x++;
567 part->spaces[x] = 0;
568 part->char_width[x] = 0;
570 if (data == UCS_NO_CHAR) {
571 /* this is at the end only */
572 return x - x2;
574 x++;
575 } /* while chars < end */
576 len = x - x2;
577 } else { /* not UTF-8 */
578 for (; charslen > 0; charslen--, x++, chars++) {
579 part->spaces[x] = (*chars == ' ');
580 part->char_width[x] = 1;
583 } /* end of part->document check */
584 return len;
586 #else
588 /* First possibly do the format change and then find out what coordinates
589 * to use since sub- or superscript might change them */
590 static inline void
591 set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
592 enum link_state link_state)
594 struct part *part = html_context->part;
595 struct screen_char *schar = get_format_screen_char(html_context,
596 link_state);
597 int x = part->cx;
598 int y = part->cy;
600 assert(part);
601 if_assert_failed return;
603 if (realloc_spaces(part, x + charslen))
604 return;
606 if (part->document) {
607 if (realloc_line(html_context, part->document,
608 Y(y), X(x) + charslen - 1) < 0)
609 return;
611 for (; charslen > 0; charslen--, x++, chars++) {
612 if (*chars == NBSP_CHAR) {
613 schar->data = ' ';
614 part->spaces[x] = html_context->options->wrap_nbsp;
615 } else {
616 part->spaces[x] = (*chars == ' ');
617 schar->data = *chars;
619 copy_screen_chars(&POS(x, y), schar, 1);
621 } else {
622 for (; charslen > 0; charslen--, x++, chars++) {
623 part->spaces[x] = (*chars == ' ');
627 #endif /* CONFIG_UTF8 */
629 static void
630 move_links(struct html_context *html_context, int xf, int yf, int xt, int yt)
632 struct part *part;
633 struct tag *tag;
634 int nlink = renderer_context.last_link_to_move;
635 int matched = 0;
637 assert(html_context);
638 if_assert_failed return;
640 part = html_context->part;
642 assert(part && part->document);
643 if_assert_failed return;
645 if (!realloc_lines(part->document, Y(yt)))
646 return;
648 for (; nlink < part->document->nlinks; nlink++) {
649 struct link *link = &part->document->links[nlink];
650 int i;
652 for (i = 0; i < link->npoints; i++) {
653 /* Fix for bug 479 (part one) */
654 /* The scenario that triggered it:
656 * Imagine a centered element containing a really long
657 * word (over half of the screen width long) followed
658 * by a few links with no spaces between them where all
659 * the link text combined with the really long word
660 * will force the line to be wrapped. When rendering
661 * the line first words (including link text words) are
662 * put on one line. Then wrapping is performed moving
663 * all links from current line to the one below. Then
664 * the current line (now only containing the really
665 * long word) is centered. This will trigger a call to
666 * move_links() which will increment.
668 * Without the fix below the centering of the current
669 * line will increment last_link_to_move to that of the
670 * last link which means centering of the next line
671 * with all the links will only move the last link
672 * leaving all the other links' points dangling and
673 * causing buggy link highlighting.
675 * Even links like textareas will be correctly handled
676 * because @last_link_to_move is a way to optimize how
677 * many links move_links() will have to iterate and
678 * this little fix will only decrease the effect of the
679 * optimization by always ensuring it is never
680 * incremented too far. */
681 if (!matched && link->points[i].y > Y(yf)) {
682 matched = 1;
683 continue;
686 if (link->points[i].y != Y(yf))
687 continue;
689 matched = 1;
691 if (link->points[i].x < X(xf))
692 continue;
694 if (yt >= 0) {
695 link->points[i].y = Y(yt);
696 link->points[i].x += -xf + xt;
697 } else {
698 int to_move = link->npoints - (i + 1);
700 assert(to_move >= 0);
702 if (to_move > 0) {
703 memmove(&link->points[i],
704 &link->points[i + 1],
705 to_move *
706 sizeof(*link->points));
707 i--;
710 link->npoints--;
714 if (!matched) {
715 renderer_context.last_link_to_move = nlink;
719 /* Don't move tags when removing links. */
720 if (yt < 0) return;
722 matched = 0;
723 tag = renderer_context.last_tag_to_move;
725 while (list_has_next(part->document->tags, tag)) {
726 tag = tag->next;
728 if (tag->y == Y(yf)) {
729 matched = 1;
730 if (tag->x >= X(xf)) {
731 tag->y = Y(yt);
732 tag->x += -xf + xt;
735 } else if (!matched && tag->y > Y(yf)) {
736 /* Fix for bug 479 (part two) */
737 matched = 1;
740 if (!matched) renderer_context.last_tag_to_move = tag;
744 static inline void
745 copy_chars(struct html_context *html_context, int x, int y, int width, struct screen_char *d)
747 struct part *part;
749 assert(html_context);
750 if_assert_failed return;
752 part = html_context->part;
754 assert(width > 0 && part && part->document && part->document->data);
755 if_assert_failed return;
757 if (realloc_line(html_context, part->document, Y(y), X(x) + width - 1) < 0)
758 return;
760 copy_screen_chars(&POS(x, y), d, width);
763 static inline void
764 move_chars(struct html_context *html_context, int x, int y, int nx, int ny)
766 struct part *part;
768 assert(html_context);
769 if_assert_failed return;
771 part = html_context->part;
773 assert(part && part->document && part->document->data);
774 if_assert_failed return;
776 if (LEN(y) - x <= 0) return;
777 copy_chars(html_context, nx, ny, LEN(y) - x, &POS(x, y));
779 LINE(y).length = X(x);
780 move_links(html_context, x, y, nx, ny);
783 static inline void
784 shift_chars(struct html_context *html_context, int y, int shift)
786 struct part *part;
787 struct screen_char *a;
788 int len;
790 assert(html_context);
791 if_assert_failed return;
793 part = html_context->part;
795 assert(part && part->document && part->document->data);
796 if_assert_failed return;
798 len = LEN(y);
800 a = fmem_alloc(len * sizeof(*a));
801 if (!a) return;
803 copy_screen_chars(a, &POS(0, y), len);
805 clear_hchars(html_context, 0, y, shift);
806 copy_chars(html_context, shift, y, len, a);
807 fmem_free(a);
809 move_links(html_context, 0, y, shift, y);
812 static inline void
813 del_chars(struct html_context *html_context, int x, int y)
815 struct part *part;
817 assert(html_context);
818 if_assert_failed return;
820 part = html_context->part;
822 assert(part && part->document && part->document->data);
823 if_assert_failed return;
825 LINE(y).length = X(x);
826 move_links(html_context, x, y, -1, -1);
829 #if TABLE_LINE_PADDING < 0
830 # define overlap_width(x) (x).width
831 #else
832 # define overlap_width(x) int_min((x).width, \
833 html_context->options->box.width - TABLE_LINE_PADDING)
834 #endif
835 #define overlap(x) int_max(overlap_width(x) - (x).rightmargin, 0)
837 static int inline
838 split_line_at(struct html_context *html_context, int width)
840 struct part *part;
841 int tmp;
842 int new_width = width + par_format.rightmargin;
844 assert(html_context);
845 if_assert_failed return 0;
847 part = html_context->part;
849 assert(part);
850 if_assert_failed return 0;
852 /* Make sure that we count the right margin to the total
853 * actual box width. */
854 int_lower_bound(&part->box.width, new_width);
856 if (part->document) {
857 assert(part->document->data);
858 if_assert_failed return 0;
859 #ifdef CONFIG_UTF8
860 if (html_context->options->utf8
861 && width < part->spaces_len && part->char_width[width] == 2) {
862 move_chars(html_context, width, part->cy, par_format.leftmargin, part->cy + 1);
863 del_chars(html_context, width, part->cy);
864 } else
865 #endif
867 assertm(POS(width, part->cy).data == ' ',
868 "bad split: %c", POS(width, part->cy).data);
869 move_chars(html_context, width + 1, part->cy, par_format.leftmargin, part->cy + 1);
870 del_chars(html_context, width, part->cy);
875 #ifdef CONFIG_UTF8
876 if (!(html_context->options->utf8
877 && width < part->spaces_len
878 && part->char_width[width] == 2))
879 #endif
880 width++; /* Since we were using (x + 1) only later... */
882 tmp = part->spaces_len - width;
883 if (tmp > 0) {
884 /* 0 is possible and I'm paranoid ... --Zas */
885 memmove(part->spaces, part->spaces + width, tmp);
886 #ifdef CONFIG_UTF8
887 memmove(part->char_width, part->char_width + width, tmp);
888 #endif
891 assert(tmp >= 0);
892 if_assert_failed tmp = 0;
893 memset(part->spaces + tmp, 0, width);
894 #ifdef CONFIG_UTF8
895 memset(part->char_width + tmp, 0, width);
896 #endif
898 if (par_format.leftmargin > 0) {
899 tmp = part->spaces_len - par_format.leftmargin;
900 assertm(tmp > 0, "part->spaces_len - par_format.leftmargin == %d", tmp);
901 /* So tmp is zero, memmove() should survive that. Don't recover. */
902 memmove(part->spaces + par_format.leftmargin, part->spaces, tmp);
903 #ifdef CONFIG_UTF8
904 memmove(part->char_width + par_format.leftmargin, part->char_width, tmp);
905 #endif
908 part->cy++;
910 if (part->cx == width) {
911 part->cx = -1;
912 int_lower_bound(&part->box.height, part->cy);
913 return 2;
914 } else {
915 part->cx -= width - par_format.leftmargin;
916 int_lower_bound(&part->box.height, part->cy + 1);
917 return 1;
921 /* Here, we scan the line for a possible place where we could split it into two
922 * (breaking it, because it is too long), if it is overlapping from the maximal
923 * box width. */
924 /* Returns 0 if there was found no spot suitable for breaking the line.
925 * 1 if the line was split into two.
926 * 2 if the (second) splitted line is blank (that is useful to determine
927 * ie. if the next line_break() should really break the line; we don't
928 * want to see any blank lines to pop up, do we?). */
929 static int
930 split_line(struct html_context *html_context)
932 struct part *part;
933 int x;
935 assert(html_context);
936 if_assert_failed return 0;
938 part = html_context->part;
940 assert(part);
941 if_assert_failed return 0;
943 #ifdef CONFIG_UTF8
944 if (html_context->options->utf8) {
945 for (x = overlap(par_format); x >= par_format.leftmargin; x--) {
947 if (x < part->spaces_len && (part->spaces[x]
948 || (part->char_width[x] == 2
949 /* Ugly hack. If we haven't place for
950 * double-width characters we print two
951 * double-width characters. */
952 && x != par_format.leftmargin)))
953 return split_line_at(html_context, x);
956 for (x = par_format.leftmargin; x < part->cx ; x++) {
957 if (x < part->spaces_len && (part->spaces[x]
958 || (part->char_width[x] == 2
959 /* We want to break line after _second_
960 * double-width character. */
961 && x > par_format.leftmargin)))
962 return split_line_at(html_context, x);
964 } else
965 #endif
967 for (x = overlap(par_format); x >= par_format.leftmargin; x--)
968 if (x < part->spaces_len && part->spaces[x])
969 return split_line_at(html_context, x);
971 for (x = par_format.leftmargin; x < part->cx ; x++)
972 if (x < part->spaces_len && part->spaces[x])
973 return split_line_at(html_context, x);
976 /* Make sure that we count the right margin to the total
977 * actual box width. */
978 int_lower_bound(&part->box.width, part->cx + par_format.rightmargin);
980 return 0;
983 /* Insert @new_spaces spaces before the coordinates @x and @y,
984 * adding those spaces to whatever link is at those coordinates. */
985 /* TODO: Integrate with move_links. */
986 static void
987 insert_spaces_in_link(struct part *part, int x, int y, int new_spaces)
989 int i = part->document->nlinks;
991 x = X(x);
992 y = Y(y);
994 while (i--) {
995 struct link *link = &part->document->links[i];
996 int j = link->npoints;
998 while (j-- > 1) {
999 struct point *point = &link->points[j];
1001 if (point->x != x || point->y != y)
1002 continue;
1004 if (!realloc_points(link, link->npoints + new_spaces))
1005 return;
1007 link->npoints += new_spaces;
1008 point = &link->points[link->npoints - 1];
1010 while (new_spaces--) {
1011 point->x = --x;
1012 point->y = y;
1013 point--;
1016 return;
1021 /* This function is very rare exemplary of clean and beautyful code here.
1022 * Please handle with care. --pasky */
1023 static void
1024 justify_line(struct html_context *html_context, int y)
1026 struct part *part;
1027 struct screen_char *line; /* we save original line here */
1028 int len;
1029 int pos;
1030 int *space_list;
1031 int spaces;
1032 int diff;
1034 assert(html_context);
1035 if_assert_failed return;
1037 part = html_context->part;
1039 assert(part && part->document && part->document->data);
1040 if_assert_failed return;
1042 len = LEN(y);
1043 assert(len > 0);
1044 if_assert_failed return;
1046 line = fmem_alloc(len * sizeof(*line));
1047 if (!line) return;
1049 /* It may sometimes happen that the line is only one char long and that
1050 * char is space - then we're going to write to both [0] and [1], but
1051 * we allocated only one field. Thus, we've to do (len + 1). --pasky */
1052 space_list = fmem_alloc((len + 1) * sizeof(*space_list));
1053 if (!space_list) {
1054 fmem_free(line);
1055 return;
1058 copy_screen_chars(line, &POS(0, y), len);
1060 /* Skip leading spaces */
1062 spaces = 0;
1063 pos = 0;
1065 while (line[pos].data == ' ')
1066 pos++;
1068 /* Yes, this can be negative, we know. But we add one to it always
1069 * anyway, so it's ok. */
1070 space_list[spaces++] = pos - 1;
1072 /* Count spaces */
1074 for (; pos < len; pos++)
1075 if (line[pos].data == ' ')
1076 space_list[spaces++] = pos;
1078 space_list[spaces] = len;
1080 /* Realign line */
1082 /* Diff is the difference between the width of the paragraph
1083 * and the current length of the line. */
1084 diff = overlap(par_format) - len;
1086 /* We check diff > 0 because diff can be negative (i.e., we have
1087 * an unbroken line of length > overlap(par_format))
1088 * even when spaces > 1 if the line has only non-breaking spaces. */
1089 if (spaces > 1 && diff > 0) {
1090 int prev_end = 0;
1091 int word;
1093 clear_hchars(html_context, 0, y, overlap(par_format));
1095 for (word = 0; word < spaces; word++) {
1096 /* We have to increase line length by 'diff' num. of
1097 * characters, so we move 'word'th word 'word_shift'
1098 * characters right. */
1099 int word_start = space_list[word] + 1;
1100 int word_len = space_list[word + 1] - word_start;
1101 int word_shift;
1102 int new_start;
1103 int new_spaces;
1105 assert(word_len >= 0);
1106 if_assert_failed continue;
1107 if (!word_len) continue;
1109 word_shift = (word * diff) / (spaces - 1);
1110 new_start = word_start + word_shift;
1112 copy_chars(html_context, new_start, y, word_len,
1113 &line[word_start]);
1115 new_spaces = new_start - prev_end - 1;
1116 if (word && new_spaces) {
1117 move_links(html_context, prev_end + 1, y, new_start, y);
1118 insert_spaces_in_link(part,
1119 new_start, y, new_spaces);
1122 prev_end = new_start + word_len;
1126 fmem_free(space_list);
1127 fmem_free(line);
1130 static void
1131 align_line(struct html_context *html_context, int y, int last)
1133 struct part *part;
1134 int shift;
1135 int len;
1137 assert(html_context);
1138 if_assert_failed return;
1140 part = html_context->part;
1142 assert(part && part->document && part->document->data);
1143 if_assert_failed return;
1145 len = LEN(y);
1147 if (!len || par_format.align == ALIGN_LEFT)
1148 return;
1150 if (par_format.align == ALIGN_JUSTIFY) {
1151 if (!last)
1152 justify_line(html_context, y);
1153 return;
1156 shift = overlap(par_format) - len;
1157 if (par_format.align == ALIGN_CENTER)
1158 shift /= 2;
1159 if (shift > 0)
1160 shift_chars(html_context, y, shift);
1163 static inline void
1164 init_link_event_hooks(struct html_context *html_context, struct link *link)
1166 link->event_hooks = mem_calloc(1, sizeof(*link->event_hooks));
1167 if (!link->event_hooks) return;
1169 #define add_evhook(list_, type_, src_) \
1170 do { \
1171 struct script_event_hook *evhook; \
1173 if (!src_) break; \
1175 evhook = mem_calloc(1, sizeof(*evhook)); \
1176 if (!evhook) break; \
1178 evhook->type = type_; \
1179 evhook->src = stracpy(src_); \
1180 add_to_list(*(list_), evhook); \
1181 } while (0)
1183 init_list(*link->event_hooks);
1184 add_evhook(link->event_hooks, SEVHOOK_ONCLICK, format.onclick);
1185 add_evhook(link->event_hooks, SEVHOOK_ONDBLCLICK, format.ondblclick);
1186 add_evhook(link->event_hooks, SEVHOOK_ONMOUSEOVER, format.onmouseover);
1187 add_evhook(link->event_hooks, SEVHOOK_ONHOVER, format.onhover);
1188 add_evhook(link->event_hooks, SEVHOOK_ONFOCUS, format.onfocus);
1189 add_evhook(link->event_hooks, SEVHOOK_ONMOUSEOUT, format.onmouseout);
1190 add_evhook(link->event_hooks, SEVHOOK_ONBLUR, format.onblur);
1192 #undef add_evhook
1195 static struct link *
1196 new_link(struct html_context *html_context, unsigned char *name, int namelen)
1198 struct document *document;
1199 struct part *part;
1200 int link_number;
1201 struct link *link;
1203 assert(html_context);
1204 if_assert_failed return NULL;
1206 part = html_context->part;
1208 assert(part);
1209 if_assert_failed return NULL;
1211 document = part->document;
1213 assert(document);
1214 if_assert_failed return NULL;
1216 link_number = part->link_num;
1218 if (!ALIGN_LINK(&document->links, document->nlinks, document->nlinks + 1))
1219 return NULL;
1221 link = &document->links[document->nlinks++];
1222 link->number = link_number - 1;
1223 if (document->options.use_tabindex) link->number += format.tabindex;
1224 link->accesskey = format.accesskey;
1225 link->title = null_or_stracpy(format.title);
1226 link->where_img = null_or_stracpy(format.image);
1228 if (!format.form) {
1229 link->target = null_or_stracpy(format.target);
1230 link->data.name = memacpy(name, namelen);
1231 /* if (strlen(url) > 4 && !strncasecmp(url, "MAP@", 4)) { */
1232 if (format.link
1233 && ((format.link[0]|32) == 'm')
1234 && ((format.link[1]|32) == 'a')
1235 && ((format.link[2]|32) == 'p')
1236 && (format.link[3] == '@')
1237 && format.link[4]) {
1238 link->type = LINK_MAP;
1239 link->where = stracpy(format.link + 4);
1240 } else {
1241 link->type = LINK_HYPERTEXT;
1242 link->where = null_or_stracpy(format.link);
1245 } else {
1246 struct form_control *fc = format.form;
1247 struct form *form;
1249 switch (fc->type) {
1250 case FC_TEXT:
1251 case FC_PASSWORD:
1252 case FC_FILE:
1253 link->type = LINK_FIELD;
1254 break;
1255 case FC_TEXTAREA:
1256 link->type = LINK_AREA;
1257 break;
1258 case FC_CHECKBOX:
1259 case FC_RADIO:
1260 link->type = LINK_CHECKBOX;
1261 break;
1262 case FC_SELECT:
1263 link->type = LINK_SELECT;
1264 break;
1265 case FC_SUBMIT:
1266 case FC_IMAGE:
1267 case FC_RESET:
1268 case FC_BUTTON:
1269 case FC_HIDDEN:
1270 link->type = LINK_BUTTON;
1272 link->data.form_control = fc;
1273 /* At this point, format.form might already be set but
1274 * the form_control not registered through SP_CONTROL
1275 * yet, therefore without fc->form set. It is always
1276 * after the "good" last form was already processed,
1277 * though, so we can safely just take that. */
1278 form = fc->form;
1279 if (!form && !list_empty(document->forms))
1280 form = document->forms.next;
1281 link->target = null_or_stracpy(form ? form->target : NULL);
1284 link->color.background = format.style.bg;
1285 link->color.foreground = link_is_textinput(link)
1286 ? format.style.fg : format.clink;
1288 init_link_event_hooks(html_context, link);
1290 document->links_sorted = 0;
1291 return link;
1294 static void
1295 html_special_tag(struct document *document, unsigned char *t, int x, int y)
1297 struct tag *tag;
1298 int tag_len;
1300 assert(document);
1301 if_assert_failed return;
1303 tag_len = strlen(t);
1304 /* One byte is reserved for name in struct tag. */
1305 tag = mem_alloc(sizeof(*tag) + tag_len);
1306 if (!tag) return;
1308 tag->x = x;
1309 tag->y = y;
1310 memcpy(tag->name, t, tag_len + 1);
1311 add_to_list(document->tags, tag);
1312 if (renderer_context.last_tag_for_newline == (struct tag *) &document->tags)
1313 renderer_context.last_tag_for_newline = tag;
1317 static void
1318 put_chars_conv(struct html_context *html_context,
1319 unsigned char *chars, int charslen)
1321 struct part *part;
1323 assert(html_context);
1324 if_assert_failed return;
1326 part = html_context->part;
1328 assert(part && chars && charslen);
1329 if_assert_failed return;
1331 if (format.style.attr & AT_GRAPHICS) {
1332 put_chars(html_context, chars, charslen);
1333 return;
1336 convert_string(renderer_context.convert_table, chars, charslen,
1337 html_context->options->cp,
1338 CSM_DEFAULT, NULL, (void (*)(void *, unsigned char *, int)) put_chars, html_context);
1341 static inline void
1342 put_link_number(struct html_context *html_context)
1344 struct part *part = html_context->part;
1345 unsigned char s[64];
1346 unsigned char *fl = format.link;
1347 unsigned char *ft = format.target;
1348 unsigned char *fi = format.image;
1349 struct form_control *ff = format.form;
1350 int slen = 0;
1352 format.link = format.target = format.image = NULL;
1353 format.form = NULL;
1355 s[slen++] = '[';
1356 ulongcat(s, &slen, part->link_num, sizeof(s) - 3, 0);
1357 s[slen++] = ']';
1358 s[slen] = '\0';
1360 renderer_context.nosearchable = 1;
1361 put_chars(html_context, s, slen);
1362 renderer_context.nosearchable = 0;
1364 if (ff && ff->type == FC_TEXTAREA) line_break(html_context);
1366 /* We might have ended up on a new line after the line breaking
1367 * or putting the link number chars. */
1368 if (part->cx == -1) part->cx = par_format.leftmargin;
1370 format.link = fl;
1371 format.target = ft;
1372 format.image = fi;
1373 format.form = ff;
1376 #define assert_link_variable(old, new) \
1377 assertm(!(old), "Old link value [%s]. New value [%s]", old, new);
1379 static inline void
1380 init_link_state_info(unsigned char *link, unsigned char *target,
1381 unsigned char *image, struct form_control *form)
1383 assert_link_variable(renderer_context.link_state_info.image, image);
1384 assert_link_variable(renderer_context.link_state_info.target, target);
1385 assert_link_variable(renderer_context.link_state_info.link, link);
1387 renderer_context.link_state_info.link = null_or_stracpy(link);
1388 renderer_context.link_state_info.target = null_or_stracpy(target);
1389 renderer_context.link_state_info.image = null_or_stracpy(image);
1390 renderer_context.link_state_info.form = form;
1393 static inline void
1394 done_link_state_info(void)
1396 mem_free_if(renderer_context.link_state_info.link);
1397 mem_free_if(renderer_context.link_state_info.target);
1398 mem_free_if(renderer_context.link_state_info.image);
1399 memset(&renderer_context.link_state_info, 0,
1400 sizeof(renderer_context.link_state_info));
1403 #ifdef CONFIG_UTF8
1404 static inline void
1405 process_link(struct html_context *html_context, enum link_state link_state,
1406 unsigned char *chars, int charslen, int cells)
1407 #else
1408 static inline void
1409 process_link(struct html_context *html_context, enum link_state link_state,
1410 unsigned char *chars, int charslen)
1411 #endif /* CONFIG_UTF8 */
1413 struct part *part = html_context->part;
1414 struct link *link;
1415 int x_offset = 0;
1417 switch (link_state) {
1418 case LINK_STATE_SAME: {
1419 unsigned char *name;
1421 if (!part->document) return;
1423 assertm(part->document->nlinks > 0, "no link");
1424 if_assert_failed return;
1426 link = &part->document->links[part->document->nlinks - 1];
1428 name = get_link_name(link);
1429 if (name) {
1430 unsigned char *new_name;
1432 new_name = straconcat(name, chars,
1433 (unsigned char *) NULL);
1434 if (new_name) {
1435 mem_free(name);
1436 link->data.name = new_name;
1440 /* FIXME: Concatenating two adjectent <a> elements to a single
1441 * link is broken since we lose the event handlers for the
1442 * second one. OTOH simply appending them here won't fly since
1443 * we may get here multiple times for even a single link. We
1444 * will probably need some SP_ for creating a new link or so.
1445 * --pasky */
1447 break;
1450 case LINK_STATE_NEW:
1451 part->link_num++;
1453 init_link_state_info(format.link, format.target,
1454 format.image, format.form);
1455 if (!part->document) return;
1457 /* Trim leading space from the link text */
1458 while (x_offset < charslen && chars[x_offset] <= ' ')
1459 x_offset++;
1461 if (x_offset) {
1462 charslen -= x_offset;
1463 chars += x_offset;
1464 #ifdef CONFIG_UTF8
1465 cells -= x_offset;
1466 #endif /* CONFIG_UTF8 */
1469 link = new_link(html_context, chars, charslen);
1470 if (!link) return;
1472 break;
1474 case LINK_STATE_NONE:
1475 default:
1476 INTERNAL("bad link_state %i", (int) link_state);
1477 return;
1480 /* Add new canvas positions to the link. */
1481 #ifdef CONFIG_UTF8
1482 if (realloc_points(link, link->npoints + cells))
1483 #else
1484 if (realloc_points(link, link->npoints + charslen))
1485 #endif /* CONFIG_UTF8 */
1487 struct point *point = &link->points[link->npoints];
1488 int x = X(part->cx) + x_offset;
1489 int y = Y(part->cy);
1491 #ifdef CONFIG_UTF8
1492 link->npoints += cells;
1494 for (; cells > 0; cells--, point++, x++)
1495 #else
1496 link->npoints += charslen;
1498 for (; charslen > 0; charslen--, point++, x++)
1499 #endif /* CONFIG_UTF8 */
1501 point->x = x;
1502 point->y = y;
1507 static inline enum link_state
1508 get_link_state(struct html_context *html_context)
1510 enum link_state state;
1512 if (!(format.link || format.image || format.form)) {
1513 state = LINK_STATE_NONE;
1515 } else if ((renderer_context.link_state_info.link
1516 || renderer_context.link_state_info.image
1517 || renderer_context.link_state_info.form)
1518 && !xstrcmp(format.link, renderer_context.link_state_info.link)
1519 && !xstrcmp(format.target, renderer_context.link_state_info.target)
1520 && !xstrcmp(format.image, renderer_context.link_state_info.image)
1521 && format.form == renderer_context.link_state_info.form) {
1523 return LINK_STATE_SAME;
1525 } else {
1526 state = LINK_STATE_NEW;
1529 done_link_state_info();
1531 return state;
1534 static inline int
1535 html_has_non_space_chars(unsigned char *chars, int charslen)
1537 int pos = 0;
1539 while (pos < charslen)
1540 if (!isspace(chars[pos++]))
1541 return 1;
1543 return 0;
1546 static void
1547 put_chars(struct html_context *html_context, unsigned char *chars, int charslen)
1549 enum link_state link_state;
1550 struct part *part;
1551 #ifdef CONFIG_UTF8
1552 int cells;
1553 #endif /* CONFIG_UTF8 */
1555 assert(html_context);
1556 if_assert_failed return;
1558 part = html_context->part;
1560 assert(part);
1561 if_assert_failed return;
1563 assert(chars && charslen);
1564 if_assert_failed return;
1566 /* If we are not handling verbatim aligning and we are at the begining
1567 * of a line trim whitespace. */
1568 if (part->cx == -1) {
1569 /* If we are not handling verbatim aligning trim leading
1570 * whitespaces. */
1571 if (!html_is_preformatted()) {
1572 while (charslen && *chars == ' ') {
1573 chars++;
1574 charslen--;
1577 if (charslen < 1) return;
1580 part->cx = par_format.leftmargin;
1583 /* For preformatted html always update 'the last tag' so we never end
1584 * up moving tags to the wrong line (Fixes bug 324). For all other html
1585 * it is moved only when the line being rendered carry some real
1586 * non-whitespace content. */
1587 if (html_is_preformatted()
1588 || html_has_non_space_chars(chars, charslen)) {
1589 renderer_context.last_tag_for_newline = (struct tag *) &part->document->tags;
1592 int_lower_bound(&part->box.height, part->cy + 1);
1594 link_state = get_link_state(html_context);
1596 if (link_state == LINK_STATE_NEW) {
1597 int x_offset = 0;
1599 /* Don't add inaccessible links. It seems to be caused
1600 * by the parser putting a space char after stuff like
1601 * <img>-tags or comments wrapped in <a>-tags. See bug
1602 * 30 for test case. */
1603 while (x_offset < charslen && chars[x_offset] <= ' ')
1604 x_offset++;
1606 /* For pure spaces reset the link state */
1607 if (x_offset == charslen)
1608 link_state = LINK_STATE_NONE;
1609 else if (html_context->options->links_numbering)
1610 put_link_number(html_context);
1612 #ifdef CONFIG_UTF8
1613 cells =
1614 #endif /* CONFIG_UTF8 */
1615 set_hline(html_context, chars, charslen, link_state);
1617 if (link_state != LINK_STATE_NONE) {
1618 #ifdef CONFIG_UTF8
1619 process_link(html_context, link_state, chars, charslen,
1620 cells);
1621 #else
1622 process_link(html_context, link_state, chars, charslen);
1623 #endif /* CONFIG_UTF8 */
1626 #ifdef CONFIG_UTF8
1627 if (renderer_context.nowrap
1628 && part->cx + cells > overlap(par_format))
1629 return;
1631 part->cx += cells;
1632 #else
1633 if (renderer_context.nowrap
1634 && part->cx + charslen > overlap(par_format))
1635 return;
1637 part->cx += charslen;
1638 #endif /* CONFIG_UTF8 */
1640 renderer_context.nobreak = 0;
1642 if (!(html_context->options->wrap || html_is_preformatted())) {
1643 while (part->cx > overlap(par_format)
1644 && part->cx > par_format.leftmargin) {
1645 int x = split_line(html_context);
1647 if (!x) break;
1648 if (part->document)
1649 align_line(html_context, part->cy - 1, 0);
1650 renderer_context.nobreak = !!(x - 1);
1654 assert(charslen > 0);
1655 #ifdef CONFIG_UTF8
1656 part->xa += cells;
1657 #else
1658 part->xa += charslen;
1659 #endif /* CONFIG_UTF8 */
1660 int_lower_bound(&part->max_width, part->xa
1661 + par_format.leftmargin + par_format.rightmargin
1662 - (chars[charslen - 1] == ' '
1663 && !html_is_preformatted()));
1664 return;
1668 #undef overlap
1670 static void
1671 line_break(struct html_context *html_context)
1673 struct part *part;
1674 struct tag *tag;
1676 assert(html_context);
1677 if_assert_failed return;
1679 part = html_context->part;
1681 assert(part);
1682 if_assert_failed return;
1684 int_lower_bound(&part->box.width, part->cx + par_format.rightmargin);
1686 if (renderer_context.nobreak) {
1687 renderer_context.nobreak = 0;
1688 part->cx = -1;
1689 part->xa = 0;
1690 return;
1693 if (!part->document || !part->document->data) goto end;
1695 if (!realloc_lines(part->document, part->box.height + part->cy + 1))
1696 return;
1698 if (part->cx > par_format.leftmargin && LEN(part->cy) > part->cx - 1
1699 && POS(part->cx - 1, part->cy).data == ' ') {
1700 del_chars(html_context, part->cx - 1, part->cy);
1701 part->cx--;
1704 if (part->cx > 0) align_line(html_context, part->cy, 1);
1706 for (tag = renderer_context.last_tag_for_newline;
1707 tag && tag != (struct tag *) &part->document->tags;
1708 tag = tag->prev) {
1709 tag->x = X(0);
1710 tag->y = Y(part->cy + 1);
1713 end:
1714 part->cy++;
1715 part->cx = -1;
1716 part->xa = 0;
1717 memset(part->spaces, 0, part->spaces_len);
1718 #ifdef CONFIG_UTF8
1719 memset(part->char_width, 0, part->spaces_len);
1720 #endif
1723 static void
1724 html_special_form(struct part *part, struct form *form)
1726 assert(part && form);
1727 if_assert_failed return;
1729 if (!part->document) {
1730 done_form(form);
1731 return;
1734 if (!list_empty(part->document->forms)) {
1735 struct form *nform;
1737 /* Make sure the new form ``claims'' its slice of the form range
1738 * maintained in the form_num and form_end variables. */
1739 foreach (nform, part->document->forms) {
1740 if (form->form_num < nform->form_num
1741 || nform->form_end < form->form_num)
1742 continue;
1744 /* First check if the form has identical form numbers.
1745 * That should only be the case when the form being
1746 * added is in fact the same form in which case it
1747 * should be dropped. The fact that this can happen
1748 * suggests that the table renderering can be confused.
1749 * See bug 647 for a test case. */
1750 if (nform->form_num == form->form_num
1751 && nform->form_end == form->form_end) {
1752 done_form(form);
1753 return;
1756 /* The form start is inside an already added form, so
1757 * partition the space of the existing form and get
1758 * |old|new|. */
1759 nform->form_end = form->form_num - 1;
1760 assertm(nform->form_num <= nform->form_end,
1761 "[%d:%d] [%d:%d]", nform->form_num, nform->form_end,
1762 form->form_num, form->form_end);
1763 break;
1765 } else {
1766 /* If it is the first form make sure it eats the whole form
1767 * range. */
1768 #if 0
1769 /* Disabled because in tables the parse order may lead to a
1770 * later form being parsed before a preceeding one causing the
1771 * wrong order if we set it to zero. Let's hope it doesn't break
1772 * anything else. */
1773 form->form_num = 0;
1774 #endif
1777 add_to_list(part->document->forms, form);
1780 static void
1781 html_special_form_control(struct part *part, struct form_control *fc)
1783 struct form *form;
1785 assert(part && fc);
1786 if_assert_failed return;
1788 if (!part->document) {
1789 done_form_control(fc);
1790 mem_free(fc);
1791 return;
1794 fc->g_ctrl_num = renderer_context.g_ctrl_num++;
1796 /* We don't want to recode hidden fields. */
1797 if (fc->type == FC_TEXT || fc->type == FC_PASSWORD ||
1798 fc->type == FC_TEXTAREA) {
1799 unsigned char *dv = convert_string(renderer_context.convert_table,
1800 fc->default_value,
1801 strlen(fc->default_value),
1802 part->document->options.cp,
1803 CSM_QUERY, NULL, NULL, NULL);
1805 if (dv) mem_free_set(&fc->default_value, dv);
1808 if (list_empty(part->document->forms)) {
1809 /* No forms encountered yet, that means a homeless form
1810 * control. Generate a dummy form for those Flying
1811 * Dutchmans. */
1812 form = init_form();
1813 form->form_num = 0;
1814 add_to_list(part->document->forms, form);
1816 /* Attach this form control to the last form encountered. */
1817 form = part->document->forms.next;
1818 fc->form = form;
1819 add_to_list(form->items, fc);
1822 /* Reparents form items based on position in the source. */
1823 void
1824 check_html_form_hierarchy(struct part *part)
1826 struct document *document = part->document;
1827 INIT_LIST_HEAD(form_controls);
1828 struct form *form;
1829 struct form_control *fc, *next;
1831 if (list_empty(document->forms))
1832 return;
1834 /* Take out all badly placed form items. */
1836 foreach (form, document->forms) {
1838 assertm(form->form_num <= form->form_end,
1839 "%p [%d : %d]", form, form->form_num, form->form_end);
1841 foreachsafe (fc, next, form->items) {
1842 if (form->form_num <= fc->position
1843 && fc->position <= form->form_end)
1844 continue;
1846 move_to_top_of_list(form_controls, fc);
1850 /* Re-insert the form items the correct places. */
1852 foreachsafe (fc, next, form_controls) {
1854 foreach (form, document->forms) {
1855 if (fc->position < form->form_num
1856 || form->form_end < fc->position)
1857 continue;
1859 fc->form = form;
1860 move_to_top_of_list(form->items, fc);
1861 break;
1865 assert(list_empty(form_controls));
1868 static inline void
1869 color_link_lines(struct html_context *html_context)
1871 struct document *document = html_context->part->document;
1872 struct color_pair colors = INIT_COLOR_PAIR(par_format.bgcolor, 0x0);
1873 enum color_mode color_mode = document->options.color_mode;
1874 enum color_flags color_flags = document->options.color_flags;
1875 int y;
1877 for (y = 0; y < document->height; y++) {
1878 int x;
1880 for (x = 0; x < document->data[y].length; x++) {
1881 struct screen_char *schar = &document->data[y].chars[x];
1883 set_term_color(schar, &colors, color_flags, color_mode);
1885 /* XXX: Entering hack zone! Change to clink color after
1886 * link text has been recolored. */
1887 if (schar->data == ':' && colors.foreground == 0x0)
1888 colors.foreground = format.clink;
1891 colors.foreground = 0x0;
1895 static void *
1896 html_special(struct html_context *html_context, enum html_special_type c, ...)
1898 va_list l;
1899 struct part *part;
1900 struct document *document;
1901 void *ret_val = NULL;
1903 assert(html_context);
1904 if_assert_failed return NULL;
1906 part = html_context->part;
1908 assert(part);
1909 if_assert_failed return NULL;
1911 document = part->document;
1913 va_start(l, c);
1914 switch (c) {
1915 case SP_TAG:
1916 if (document) {
1917 unsigned char *t = va_arg(l, unsigned char *);
1919 html_special_tag(document, t, X(part->cx), Y(part->cy));
1921 break;
1922 case SP_FORM:
1924 struct form *form = va_arg(l, struct form *);
1926 html_special_form(part, form);
1927 break;
1929 case SP_CONTROL:
1931 struct form_control *fc = va_arg(l, struct form_control *);
1933 html_special_form_control(part, fc);
1934 break;
1936 case SP_TABLE:
1937 ret_val = renderer_context.convert_table;
1938 break;
1939 case SP_USED:
1940 ret_val = (void *) (long) !!document;
1941 break;
1942 case SP_CACHE_CONTROL:
1944 struct cache_entry *cached = renderer_context.cached;
1946 cached->cache_mode = CACHE_MODE_NEVER;
1947 cached->expire = 0;
1948 break;
1950 case SP_CACHE_EXPIRES:
1952 time_t expires = va_arg(l, time_t);
1953 struct cache_entry *cached = renderer_context.cached;
1955 if (!expires || cached->cache_mode == CACHE_MODE_NEVER)
1956 break;
1958 timeval_from_seconds(&cached->max_age, expires);
1959 cached->expire = 1;
1960 break;
1962 case SP_FRAMESET:
1964 struct frameset_param *fsp = va_arg(l, struct frameset_param *);
1965 struct frameset_desc *frameset_desc;
1967 if (!fsp->parent && document->frame_desc)
1968 break;
1970 frameset_desc = create_frameset(fsp);
1971 if (!fsp->parent && !document->frame_desc)
1972 document->frame_desc = frameset_desc;
1974 ret_val = frameset_desc;
1975 break;
1977 case SP_FRAME:
1979 struct frameset_desc *parent = va_arg(l, struct frameset_desc *);
1980 unsigned char *name = va_arg(l, unsigned char *);
1981 unsigned char *url = va_arg(l, unsigned char *);
1983 add_frameset_entry(parent, NULL, name, url);
1984 break;
1986 case SP_NOWRAP:
1987 renderer_context.nowrap = !!va_arg(l, int);
1988 break;
1989 case SP_REFRESH:
1991 unsigned long seconds = va_arg(l, unsigned long);
1992 unsigned char *t = va_arg(l, unsigned char *);
1994 if (document) {
1995 if (document->refresh)
1996 done_document_refresh(document->refresh);
1997 document->refresh = init_document_refresh(t, seconds);
1999 break;
2001 case SP_COLOR_LINK_LINES:
2002 if (document && use_document_bg_colors(&document->options))
2003 color_link_lines(html_context);
2004 break;
2005 case SP_STYLESHEET:
2006 #ifdef CONFIG_CSS
2007 if (document) {
2008 struct uri *uri = va_arg(l, struct uri *);
2010 add_to_uri_list(&document->css_imports, uri);
2012 #endif
2013 break;
2014 case SP_SCRIPT:
2015 #ifdef CONFIG_ECMASCRIPT
2016 if (document) {
2017 struct uri *uri = va_arg(l, struct uri *);
2019 add_to_uri_list(&document->ecmascript_imports, uri);
2021 #endif
2022 break;
2025 va_end(l);
2027 return ret_val;
2030 void
2031 free_table_cache(void)
2033 if (table_cache) {
2034 struct hash_item *item;
2035 int i;
2037 /* We do not free key here. */
2038 foreach_hash_item (item, *table_cache, i) {
2039 mem_free_if(item->value);
2042 free_hash(&table_cache);
2043 table_cache_entries = 0;
2047 struct part *
2048 format_html_part(struct html_context *html_context,
2049 unsigned char *start, unsigned char *end,
2050 int align, int margin, int width, struct document *document,
2051 int x, int y, unsigned char *head,
2052 int link_num)
2054 struct part *part;
2055 struct html_element *html_state;
2056 int llm = renderer_context.last_link_to_move;
2057 struct tag *ltm = renderer_context.last_tag_to_move;
2058 int ef = renderer_context.empty_format;
2059 int lm = html_context->margin;
2061 /* Hash creation if needed. */
2062 if (!table_cache) {
2063 table_cache = init_hash8();
2064 } else if (!document) {
2065 /* Search for cached entry. */
2066 struct table_cache_entry_key key;
2067 struct hash_item *item;
2069 /* Clear key to prevent potential alignment problem
2070 * when keys are compared. */
2071 memset(&key, 0, sizeof(key));
2073 key.start = start;
2074 key.end = end;
2075 key.align = align;
2076 key.margin = margin;
2077 key.width = width;
2078 key.x = x;
2079 key.link_num = link_num;
2081 item = get_hash_item(table_cache,
2082 (unsigned char *) &key,
2083 sizeof(key));
2084 if (item) { /* We found it in cache, so just copy and return. */
2085 part = mem_alloc(sizeof(*part));
2086 if (part) {
2087 copy_struct(part, &((struct table_cache_entry *)
2088 item->value)->part);
2089 return part;
2094 assertm(y >= 0, "format_html_part: y == %d", y);
2095 if_assert_failed return NULL;
2097 if (document) {
2098 struct node *node = mem_alloc(sizeof(*node));
2100 if (node) {
2101 int node_width = !html_context->table_level ? INT_MAX : width;
2103 set_box(&node->box, x, y, node_width, 1);
2104 add_to_list(document->nodes, node);
2107 renderer_context.last_link_to_move = document->nlinks;
2108 renderer_context.last_tag_to_move = (struct tag *) &document->tags;
2109 renderer_context.last_tag_for_newline = (struct tag *) &document->tags;
2110 } else {
2111 renderer_context.last_link_to_move = 0;
2112 renderer_context.last_tag_to_move = (struct tag *) NULL;
2113 renderer_context.last_tag_for_newline = (struct tag *) NULL;
2116 html_context->margin = margin;
2117 renderer_context.empty_format = !document;
2119 done_link_state_info();
2120 renderer_context.nobreak = 1;
2122 part = mem_calloc(1, sizeof(*part));
2123 if (!part) goto ret;
2125 part->document = document;
2126 part->box.x = x;
2127 part->box.y = y;
2128 part->cx = -1;
2129 part->cy = 0;
2130 part->link_num = link_num;
2132 html_state = init_html_parser_state(html_context, ELEMENT_IMMORTAL, align, margin, width);
2134 parse_html(start, end, part, head, html_context);
2136 done_html_parser_state(html_context, html_state);
2138 int_lower_bound(&part->max_width, part->box.width);
2140 renderer_context.nobreak = 0;
2142 done_link_state_info();
2143 mem_free_if(part->spaces);
2144 #ifdef CONFIG_UTF8
2145 mem_free_if(part->char_width);
2146 #endif
2148 if (document) {
2149 struct node *node = document->nodes.next;
2151 node->box.height = y - node->box.y + part->box.height;
2154 ret:
2155 renderer_context.last_link_to_move = llm;
2156 renderer_context.last_tag_to_move = ltm;
2157 renderer_context.empty_format = ef;
2159 html_context->margin = lm;
2161 if (html_context->table_level > 1 && !document
2162 && table_cache
2163 && table_cache_entries < MAX_TABLE_CACHE_ENTRIES) {
2164 /* Create a new entry. */
2165 /* Clear memory to prevent bad key comparaison due to alignment
2166 * of key fields. */
2167 struct table_cache_entry *tce = mem_calloc(1, sizeof(*tce));
2168 /* A goto is used here to prevent a test or code
2169 * redundancy. */
2170 if (!tce) goto end;
2172 tce->key.start = start;
2173 tce->key.end = end;
2174 tce->key.align = align;
2175 tce->key.margin = margin;
2176 tce->key.width = width;
2177 tce->key.x = x;
2178 tce->key.link_num = link_num;
2179 copy_struct(&tce->part, part);
2181 if (!add_hash_item(table_cache,
2182 (unsigned char *) &tce->key,
2183 sizeof(tce->key), tce)) {
2184 mem_free(tce);
2185 } else {
2186 table_cache_entries++;
2190 end:
2192 return part;
2195 void
2196 render_html_document(struct cache_entry *cached, struct document *document,
2197 struct string *buffer)
2199 struct html_context *html_context;
2200 struct part *part;
2201 unsigned char *start;
2202 unsigned char *end;
2203 struct string title;
2204 struct string head;
2206 assert(cached && document);
2207 if_assert_failed return;
2209 if (!init_string(&head)) return;
2211 if (cached->head) add_to_string(&head, cached->head);
2213 start = buffer->source;
2214 end = buffer->source + buffer->length;
2216 html_context = init_html_parser(cached->uri, &document->options,
2217 start, end, &head, &title,
2218 put_chars_conv, line_break,
2219 html_special);
2220 if (!html_context) return;
2222 renderer_context.g_ctrl_num = 0;
2223 renderer_context.cached = cached;
2224 renderer_context.convert_table = get_convert_table(head.source,
2225 document->options.cp,
2226 document->options.assume_cp,
2227 &document->cp,
2228 &document->cp_status,
2229 document->options.hard_assume);
2230 #ifdef CONFIG_UTF8
2231 html_context->options->utf8 = is_cp_utf8(document->options.cp);
2232 #endif /* CONFIG_UTF8 */
2234 if (title.length) {
2235 document->title = convert_string(renderer_context.convert_table,
2236 title.source, title.length,
2237 document->options.cp,
2238 CSM_DEFAULT, NULL, NULL, NULL);
2240 done_string(&title);
2242 part = format_html_part(html_context, start, end, par_format.align,
2243 par_format.leftmargin,
2244 document->options.box.width, document,
2245 0, 0, head.source, 1);
2247 /* Drop empty allocated lines at end of document if any
2248 * and adjust document height. */
2249 while (document->height && !document->data[document->height - 1].length)
2250 mem_free_if(document->data[--document->height].chars);
2252 /* Calculate document width. */
2254 int i;
2256 document->width = 0;
2257 for (i = 0; i < document->height; i++)
2258 int_lower_bound(&document->width, document->data[i].length);
2261 #if 1
2262 document->options.needs_width = 1;
2263 #else
2264 /* FIXME: This needs more tuning since if we are centering stuff it
2265 * does not work. */
2266 document->options.needs_width =
2267 (document->width + (document->options.margin
2268 >= document->options.width));
2269 #endif
2271 document->bgcolor = par_format.bgcolor;
2273 done_html_parser(html_context);
2275 /* Drop forms which has been serving as a placeholder for form items
2276 * added in the wrong order due to the ordering of table rendering. */
2278 struct form *form;
2280 foreach (form, document->forms) {
2281 if (form->form_num)
2282 continue;
2284 if (list_empty(form->items))
2285 done_form(form);
2287 break;
2291 /* @part was residing in html_context so it has to stay alive until
2292 * done_html_parser(). */
2293 done_string(&head);
2294 mem_free_if(part);
2296 #if 0 /* debug purpose */
2298 FILE *f = fopen("forms", "ab");
2299 struct form_control *form;
2300 unsigned char *qq;
2301 fprintf(f,"FORM:\n");
2302 foreach (form, document->forms) {
2303 fprintf(f, "g=%d f=%d c=%d t:%d\n",
2304 form->g_ctrl_num, form->form_num,
2305 form->ctrl_num, form->type);
2307 fprintf(f,"fragment: \n");
2308 for (qq = start; qq < end; qq++) fprintf(f, "%c", *qq);
2309 fprintf(f,"----------\n\n");
2310 fclose(f);
2312 #endif
2316 find_tag(struct document *document, unsigned char *name, int namelen)
2318 struct tag *tag;
2320 foreach (tag, document->tags)
2321 if (!strlcasecmp(tag->name, -1, name, namelen))
2322 return tag->y;
2324 return -1;