Patch 1: Finalize modifications to the HTML parser
[elinks/kon.git] / src / document / html / renderer.c
blobe9ababeaa11d516936858f6d3f6d034b5c7fe29b
1 /* HTML renderer */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <ctype.h>
8 #include <stdarg.h>
9 #include <string.h>
11 #include "elinks.h"
13 #include "cache/cache.h"
14 #include "config/options.h"
15 #include "document/docdata.h"
16 #include "document/document.h"
17 #include "document/html/frames.h"
18 #include "document/html/parser.h"
19 #include "document/html/parser/parse.h"
20 #include "document/html/renderer.h"
21 #include "document/html/tables.h"
22 #include "document/options.h"
23 #include "document/refresh.h"
24 #include "document/renderer.h"
25 #include "intl/charsets.h"
26 #include "osdep/types.h"
27 #include "protocol/uri.h"
28 #include "session/session.h"
29 #include "terminal/color.h"
30 #include "terminal/draw.h"
31 #include "util/color.h"
32 #include "util/conv.h"
33 #include "util/error.h"
34 #include "util/hash.h"
35 #include "util/lists.h"
36 #include "util/memory.h"
37 #include "util/string.h"
38 #include "util/time.h"
39 #include "viewer/text/form.h"
40 #include "viewer/text/view.h"
41 #include "viewer/text/vs.h"
43 /* Unsafe macros */
44 #include "document/html/internal.h"
46 /* Types and structs */
48 enum link_state {
49 LINK_STATE_NONE,
50 LINK_STATE_NEW,
51 LINK_STATE_SAME,
54 struct link_state_info {
55 unsigned char *link;
56 unsigned char *target;
57 unsigned char *image;
58 struct form_control *form;
61 struct table_cache_entry_key {
62 unsigned char *start;
63 unsigned char *end;
64 int align;
65 int margin;
66 int width;
67 int x;
68 int link_num;
71 struct table_cache_entry {
72 LIST_HEAD(struct table_cache_entry);
74 struct table_cache_entry_key key;
75 struct part part;
78 /* Max. entries in table cache used for nested tables. */
79 #define MAX_TABLE_CACHE_ENTRIES 16384
81 /* Global variables */
82 static int table_cache_entries;
83 static struct hash *table_cache;
86 struct renderer_context {
87 int last_link_to_move;
88 struct tag *last_tag_to_move;
89 /* All tags between document->tags and this tag (inclusive) should
90 * be aligned to the next line break, unless some real content follows
91 * the tag. Therefore, this virtual tags list accumulates new tags as
92 * they arrive and empties when some real content is written; if a line
93 * break is inserted in the meanwhile, the tags follow it (ie. imagine
94 * <a name="x"> <p>, then the "x" tag follows the line breaks inserted
95 * by the <p> tag). */
96 struct tag *last_tag_for_newline;
98 struct link_state_info link_state_info;
100 struct conv_table *convert_table;
102 /* Used for setting cache info from HTTP-EQUIV meta tags. */
103 struct cache_entry *cached;
105 int g_ctrl_num;
106 int subscript; /* Count stacked subscripts */
107 int supscript; /* Count stacked supscripts */
109 unsigned int empty_format:1;
110 unsigned int nobreak:1;
111 unsigned int nosearchable:1;
112 unsigned int nowrap:1; /* Activated/deactivated by SP_NOWRAP. */
115 static struct renderer_context renderer_context;
118 /* Prototypes */
119 static void line_break(struct html_context *);
120 static void put_chars(struct html_context *, unsigned char *, int);
122 #define X(x_) (part->box.x + (x_))
123 #define Y(y_) (part->box.y + (y_))
125 #define SPACES_GRANULARITY 0x7F
127 #define ALIGN_SPACES(x, o, n) mem_align_alloc(x, o, n, SPACES_GRANULARITY)
129 static inline void
130 set_screen_char_color(struct screen_char *schar,
131 color_T bgcolor, color_T fgcolor,
132 enum color_flags color_flags,
133 enum color_mode color_mode)
135 struct color_pair colors = INIT_COLOR_PAIR(bgcolor, fgcolor);
137 set_term_color(schar, &colors, color_flags, color_mode);
140 static int
141 realloc_line(struct html_context *html_context, struct document *document,
142 int y, int length)
144 struct screen_char *pos, *end;
145 struct line *line;
146 int orig_length;
148 if (!realloc_lines(document, y))
149 return -1;
151 line = &document->data[y];
152 orig_length = line->length;
154 if (length < orig_length)
155 return orig_length;
157 if (!ALIGN_LINE(&line->chars, line->length, length + 1))
158 return -1;
160 /* We cannot rely on the aligned allocation to clear the members for us
161 * since for line splitting we simply trim the length. Question is if
162 * it is better to to clear the line after the splitting or here. */
163 end = &line->chars[length];
164 end->data = ' ';
165 end->attr = 0;
166 set_screen_char_color(end, par_format.bgcolor, 0x0,
167 COLOR_ENSURE_CONTRAST, /* for bug 461 */
168 document->options.color_mode);
170 for (pos = &line->chars[line->length]; pos < end; pos++) {
171 copy_screen_chars(pos, end, 1);
174 line->length = length + 1;
176 return orig_length;
179 void
180 expand_lines(struct html_context *html_context, struct part *part,
181 int x, int y, int lines, color_T bgcolor)
183 int line;
185 assert(part && part->document);
186 if_assert_failed return;
188 if (!use_document_bg_colors(&part->document->options))
189 return;
191 par_format.bgcolor = bgcolor;
193 for (line = 0; line < lines; line++)
194 realloc_line(html_context, part->document, Y(y + line), X(x));
197 static inline int
198 realloc_spaces(struct part *part, int length)
200 if (length < part->spaces_len)
201 return 0;
203 if (!ALIGN_SPACES(&part->spaces, part->spaces_len, length))
204 return -1;
205 #ifdef CONFIG_UTF8
206 if (!ALIGN_SPACES(&part->char_width, part->spaces_len, length))
207 return -1;
208 #endif
210 part->spaces_len = length;
212 return 0;
216 #define LINE(y_) part->document->data[Y(y_)]
217 #define POS(x_, y_) LINE(y_).chars[X(x_)]
218 #define LEN(y_) int_max(LINE(y_).length - part->box.x, 0)
221 /* When we clear chars we want to preserve and use the background colors
222 * already in place else we could end up ``staining'' the background especial
223 * when drawing table cells. So make the cleared chars share the colors in
224 * place. */
225 static inline void
226 clear_hchars(struct html_context *html_context, int x, int y, int width)
228 struct part *part;
229 struct screen_char *pos, *end;
231 assert(html_context);
232 if_assert_failed return;
234 part = html_context->part;
236 assert(part && part->document && width > 0);
237 if_assert_failed return;
239 if (realloc_line(html_context, part->document, Y(y), X(x) + width - 1) < 0)
240 return;
242 assert(part->document->data);
243 if_assert_failed return;
245 pos = &POS(x, y);
246 end = pos + width - 1;
247 end->data = ' ';
248 end->attr = 0;
249 set_screen_char_color(end, par_format.bgcolor, 0x0,
250 COLOR_ENSURE_CONTRAST, /* for bug 461 */
251 part->document->options.color_mode);
253 while (pos < end)
254 copy_screen_chars(pos++, end, 1);
257 /* TODO: Merge parts with get_format_screen_char(). --jonas */
258 /* Allocates the required chars on the given line and returns the char at
259 * position (x, y) ready to be used as a template char. */
260 static inline struct screen_char *
261 get_frame_char(struct html_context *html_context, struct part *part,
262 int x, int y, unsigned char data,
263 color_T bgcolor, color_T fgcolor)
265 struct screen_char *template;
267 assert(html_context);
268 if_assert_failed return NULL;
270 assert(part && part->document && x >= 0 && y >= 0);
271 if_assert_failed return NULL;
273 if (realloc_line(html_context, part->document, Y(y), X(x)) < 0)
274 return NULL;
276 assert(part->document->data);
277 if_assert_failed return NULL;
279 template = &POS(x, y);
280 template->data = data;
281 template->attr = SCREEN_ATTR_FRAME;
282 set_screen_char_color(template, bgcolor, fgcolor,
283 part->document->options.color_flags,
284 part->document->options.color_mode);
286 return template;
289 void
290 draw_frame_hchars(struct part *part, int x, int y, int width,
291 unsigned char data, color_T bgcolor, color_T fgcolor,
292 struct html_context *html_context)
294 struct screen_char *template;
296 assert(width > 0);
297 if_assert_failed return;
299 template = get_frame_char(html_context, part, x + width - 1, y, data, bgcolor, fgcolor);
300 if (!template) return;
302 /* The template char is the last we need to draw so only decrease @width. */
303 for (width -= 1; width; width--, x++) {
304 copy_screen_chars(&POS(x, y), template, 1);
308 void
309 draw_frame_vchars(struct part *part, int x, int y, int height,
310 unsigned char data, color_T bgcolor, color_T fgcolor,
311 struct html_context *html_context)
313 struct screen_char *template = get_frame_char(html_context, part, x, y,
314 data, bgcolor, fgcolor);
316 if (!template) return;
318 /* The template char is the first vertical char to be drawn. So
319 * copy it to the rest. */
320 for (height -= 1, y += 1; height; height--, y++) {
321 if (realloc_line(html_context, part->document, Y(y), X(x)) < 0)
322 return;
324 copy_screen_chars(&POS(x, y), template, 1);
328 static inline struct screen_char *
329 get_format_screen_char(struct html_context *html_context,
330 enum link_state link_state)
332 static struct text_style ta_cache = { -1, 0x0, 0x0 };
333 static struct screen_char schar_cache;
335 if (memcmp(&ta_cache, &format.style, sizeof(ta_cache))) {
336 copy_struct(&ta_cache, &format.style);
337 struct text_style final_style = format.style;
339 if (link_state != LINK_STATE_NONE
340 && html_context->options->underline_links) {
341 final_style.attr |= AT_UNDERLINE;
344 get_screen_char_template(&schar_cache, html_context->options, final_style);
347 if (!!(schar_cache.attr & SCREEN_ATTR_UNSEARCHABLE)
348 ^ !!renderer_context.nosearchable) {
349 schar_cache.attr ^= SCREEN_ATTR_UNSEARCHABLE;
352 return &schar_cache;
355 #ifdef CONFIG_UTF8
356 /* First possibly do the format change and then find out what coordinates
357 * to use since sub- or superscript might change them */
358 static inline int
359 set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
360 enum link_state link_state)
362 struct part *const part = html_context->part;
363 struct screen_char *const schar = get_format_screen_char(html_context,
364 link_state);
365 int x = part->cx;
366 const int y = part->cy;
367 const int x2 = x;
368 int len = charslen;
369 const int utf8 = html_context->options->utf8;
370 int orig_length;
372 assert(part);
373 if_assert_failed return len;
375 assert(charslen >= 0);
377 if (realloc_spaces(part, x + charslen))
378 return 0;
380 /* U+00AD SOFT HYPHEN characters in HTML documents are
381 * supposed to be displayed only if the word is broken at that
382 * point. ELinks currently does not use them, so it should
383 * not display them. If the input @chars is in UTF-8, then
384 * set_hline() discards the characters. If the input is in
385 * some other charset, then set_hline() does not know which
386 * byte that charset uses for U+00AD, so it cannot discard
387 * the characters; instead, the translation table used by
388 * convert_string() has already discarded the characters.
390 * Likewise, if the input @chars is in UTF-8, then it may
391 * contain U+00A0 NO-BREAK SPACE characters; but if the input
392 * is in some other charset, then the translation table
393 * has mapped those characters to NBSP_CHAR. */
395 if (part->document) {
396 /* Reallocate LINE(y).chars[] to large enough. The
397 * last parameter of realloc_line is the index of the
398 * last element to which we may want to write,
399 * i.e. one less than the required size of the array.
400 * Compute the required size by assuming that each
401 * byte of input will need at most one character cell.
402 * (All double-cell characters take up at least two
403 * bytes in UTF-8, and there are no triple-cell or
404 * wider characters.) However, if there already is an
405 * incomplete character in part->document->buf, then
406 * the first byte of input can result in a double-cell
407 * character, so we must reserve one extra element. */
408 orig_length = realloc_line(html_context, part->document,
409 Y(y), X(x) + charslen);
410 if (orig_length < 0) /* error */
411 return 0;
412 if (utf8) {
413 unsigned char *const end = chars + charslen;
414 unicode_val_T data;
416 if (part->document->buf_length) {
417 /* previous char was broken in the middle */
418 int length = utf8charlen(part->document->buf);
419 unsigned char i;
420 unsigned char *buf_ptr = part->document->buf;
422 for (i = part->document->buf_length; i < length && chars < end;) {
423 part->document->buf[i++] = *chars++;
425 part->document->buf_length = i;
426 part->document->buf[i] = '\0';
427 data = utf8_to_unicode(&buf_ptr, buf_ptr + i);
428 if (data != UCS_NO_CHAR) {
429 /* FIXME: If there was invalid
430 * UTF-8 in the buffer,
431 * @utf8_to_unicode may have left
432 * some bytes unused. Those
433 * bytes should be pulled back
434 * into @chars, rather than
435 * discarded. This is not
436 * trivial to implement because
437 * each byte may have arrived in
438 * a separate call. */
439 part->document->buf_length = 0;
440 goto good_char;
441 } else {
442 /* Still not full char */
443 LINE(y).length = orig_length;
444 return 0;
448 while (chars < end) {
449 /* ELinks does not use NBSP_CHAR in UTF-8. */
451 data = utf8_to_unicode(&chars, end);
452 if (data == UCS_NO_CHAR) {
453 part->spaces[x] = 0;
454 if (charslen == 1) {
455 /* HR */
456 unsigned char attr = schar->attr;
458 schar->data = *chars++;
459 schar->attr = SCREEN_ATTR_FRAME;
460 copy_screen_chars(&POS(x, y), schar, 1);
461 schar->attr = attr;
462 part->char_width[x++] = 0;
463 continue;
464 } else {
465 unsigned char i;
467 for (i = 0; chars < end;i++) {
468 part->document->buf[i] = *chars++;
470 part->document->buf_length = i;
471 break;
473 /* not reached */
476 good_char:
477 if (data == UCS_SOFT_HYPHEN)
478 continue;
480 if (data == UCS_NO_BREAK_SPACE
481 && html_context->options->wrap_nbsp)
482 data = UCS_SPACE;
483 part->spaces[x] = (data == UCS_SPACE);
485 if (unicode_to_cell(data) == 2) {
486 schar->data = (unicode_val_T)data;
487 part->char_width[x] = 2;
488 copy_screen_chars(&POS(x++, y), schar, 1);
489 schar->data = UCS_NO_CHAR;
490 part->spaces[x] = 0;
491 part->char_width[x] = 0;
492 } else {
493 part->char_width[x] = unicode_to_cell(data);
494 schar->data = (unicode_val_T)data;
496 copy_screen_chars(&POS(x++, y), schar, 1);
497 } /* while chars < end */
498 } else { /* not UTF-8 */
499 for (; charslen > 0; charslen--, x++, chars++) {
500 part->char_width[x] = 1;
501 if (*chars == NBSP_CHAR) {
502 schar->data = ' ';
503 part->spaces[x] = html_context->options->wrap_nbsp;
504 } else {
505 part->spaces[x] = (*chars == ' ');
506 schar->data = *chars;
508 copy_screen_chars(&POS(x, y), schar, 1);
510 } /* end of UTF-8 check */
512 /* Assert that we haven't written past the end of the
513 * LINE(y).chars array. @x here is one greater than
514 * the last one used in POS(x, y). Instead of this,
515 * we could assert(X(x) < LINE(y).length) immediately
516 * before each @copy_screen_chars call above, but
517 * those are in an inner loop that should be fast. */
518 assert(X(x) <= LINE(y).length);
519 /* Some part of the code is apparently using LINE(y).length
520 * for line-wrapping decisions. It may currently be too
521 * large because it was allocated above based on @charslen
522 * which is the number of bytes, not the number of cells.
523 * Change the length to the correct size, but don't let it
524 * get smaller than it was on entry to this function. */
525 LINE(y).length = int_max(orig_length, X(x));
526 len = x - x2;
527 } else { /* part->document == NULL */
528 if (utf8) {
529 unsigned char *const end = chars + charslen;
531 while (chars < end) {
532 unicode_val_T data;
534 data = utf8_to_unicode(&chars, end);
535 if (data == UCS_SOFT_HYPHEN)
536 continue;
538 if (data == UCS_NO_BREAK_SPACE
539 && html_context->options->wrap_nbsp)
540 data = UCS_SPACE;
541 part->spaces[x] = (data == UCS_SPACE);
543 part->char_width[x] = unicode_to_cell(data);
544 if (part->char_width[x] == 2) {
545 x++;
546 part->spaces[x] = 0;
547 part->char_width[x] = 0;
549 if (data == UCS_NO_CHAR) {
550 /* this is at the end only */
551 return x - x2;
553 x++;
554 } /* while chars < end */
555 len = x - x2;
556 } else { /* not UTF-8 */
557 for (; charslen > 0; charslen--, x++, chars++) {
558 part->char_width[x] = 1;
559 if (*chars == NBSP_CHAR) {
560 part->spaces[x] = html_context->options->wrap_nbsp;
561 } else {
562 part->spaces[x] = (*chars == ' ');
566 } /* end of part->document check */
567 return len;
569 #else
571 /* First possibly do the format change and then find out what coordinates
572 * to use since sub- or superscript might change them */
573 static inline void
574 set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
575 enum link_state link_state)
577 struct part *part = html_context->part;
578 struct screen_char *schar = get_format_screen_char(html_context,
579 link_state);
580 int x = part->cx;
581 int y = part->cy;
583 assert(part);
584 if_assert_failed return;
586 if (realloc_spaces(part, x + charslen))
587 return;
589 if (part->document) {
590 if (realloc_line(html_context, part->document,
591 Y(y), X(x) + charslen - 1) < 0)
592 return;
594 for (; charslen > 0; charslen--, x++, chars++) {
595 if (*chars == NBSP_CHAR) {
596 schar->data = ' ';
597 part->spaces[x] = html_context->options->wrap_nbsp;
598 } else {
599 part->spaces[x] = (*chars == ' ');
600 schar->data = *chars;
602 copy_screen_chars(&POS(x, y), schar, 1);
604 } else {
605 for (; charslen > 0; charslen--, x++, chars++) {
606 if (*chars == NBSP_CHAR) {
607 part->spaces[x] = html_context->options->wrap_nbsp;
608 } else {
609 part->spaces[x] = (*chars == ' ');
614 #endif /* CONFIG_UTF8 */
616 static void
617 move_links(struct html_context *html_context, int xf, int yf, int xt, int yt)
619 struct part *part;
620 struct tag *tag;
621 int nlink = renderer_context.last_link_to_move;
622 int matched = 0;
624 assert(html_context);
625 if_assert_failed return;
627 part = html_context->part;
629 assert(part && part->document);
630 if_assert_failed return;
632 if (!realloc_lines(part->document, Y(yt)))
633 return;
635 for (; nlink < part->document->nlinks; nlink++) {
636 struct link *link = &part->document->links[nlink];
637 int i;
639 for (i = 0; i < link->npoints; i++) {
640 /* Fix for bug 479 (part one) */
641 /* The scenario that triggered it:
643 * Imagine a centered element containing a really long
644 * word (over half of the screen width long) followed
645 * by a few links with no spaces between them where all
646 * the link text combined with the really long word
647 * will force the line to be wrapped. When rendering
648 * the line first words (including link text words) are
649 * put on one line. Then wrapping is performed moving
650 * all links from current line to the one below. Then
651 * the current line (now only containing the really
652 * long word) is centered. This will trigger a call to
653 * move_links() which will increment.
655 * Without the fix below the centering of the current
656 * line will increment last_link_to_move to that of the
657 * last link which means centering of the next line
658 * with all the links will only move the last link
659 * leaving all the other links' points dangling and
660 * causing buggy link highlighting.
662 * Even links like textareas will be correctly handled
663 * because @last_link_to_move is a way to optimize how
664 * many links move_links() will have to iterate and
665 * this little fix will only decrease the effect of the
666 * optimization by always ensuring it is never
667 * incremented too far. */
668 if (!matched && link->points[i].y > Y(yf)) {
669 matched = 1;
670 continue;
673 if (link->points[i].y != Y(yf))
674 continue;
676 matched = 1;
678 if (link->points[i].x < X(xf))
679 continue;
681 if (yt >= 0) {
682 link->points[i].y = Y(yt);
683 link->points[i].x += -xf + xt;
684 } else {
685 int to_move = link->npoints - (i + 1);
687 assert(to_move >= 0);
689 if (to_move > 0) {
690 memmove(&link->points[i],
691 &link->points[i + 1],
692 to_move *
693 sizeof(*link->points));
694 i--;
697 link->npoints--;
701 if (!matched) {
702 renderer_context.last_link_to_move = nlink;
706 /* Don't move tags when removing links. */
707 if (yt < 0) return;
709 matched = 0;
710 tag = renderer_context.last_tag_to_move;
712 while (list_has_next(part->document->tags, tag)) {
713 tag = tag->next;
715 if (tag->y == Y(yf)) {
716 matched = 1;
717 if (tag->x >= X(xf)) {
718 tag->y = Y(yt);
719 tag->x += -xf + xt;
722 } else if (!matched && tag->y > Y(yf)) {
723 /* Fix for bug 479 (part two) */
724 matched = 1;
727 if (!matched) renderer_context.last_tag_to_move = tag;
731 static inline void
732 copy_chars(struct html_context *html_context, int x, int y, int width, struct screen_char *d)
734 struct part *part;
736 assert(html_context);
737 if_assert_failed return;
739 part = html_context->part;
741 assert(width > 0 && part && part->document && part->document->data);
742 if_assert_failed return;
744 if (realloc_line(html_context, part->document, Y(y), X(x) + width - 1) < 0)
745 return;
747 copy_screen_chars(&POS(x, y), d, width);
750 static inline void
751 move_chars(struct html_context *html_context, int x, int y, int nx, int ny)
753 struct part *part;
755 assert(html_context);
756 if_assert_failed return;
758 part = html_context->part;
760 assert(part && part->document && part->document->data);
761 if_assert_failed return;
763 if (LEN(y) - x <= 0) return;
764 copy_chars(html_context, nx, ny, LEN(y) - x, &POS(x, y));
766 LINE(y).length = X(x);
767 move_links(html_context, x, y, nx, ny);
770 static inline void
771 shift_chars(struct html_context *html_context, int y, int shift)
773 struct part *part;
774 struct screen_char *a;
775 int len;
777 assert(html_context);
778 if_assert_failed return;
780 part = html_context->part;
782 assert(part && part->document && part->document->data);
783 if_assert_failed return;
785 len = LEN(y);
787 a = fmem_alloc(len * sizeof(*a));
788 if (!a) return;
790 copy_screen_chars(a, &POS(0, y), len);
792 clear_hchars(html_context, 0, y, shift);
793 copy_chars(html_context, shift, y, len, a);
794 fmem_free(a);
796 move_links(html_context, 0, y, shift, y);
799 static inline void
800 del_chars(struct html_context *html_context, int x, int y)
802 struct part *part;
804 assert(html_context);
805 if_assert_failed return;
807 part = html_context->part;
809 assert(part && part->document && part->document->data);
810 if_assert_failed return;
812 LINE(y).length = X(x);
813 move_links(html_context, x, y, -1, -1);
816 #if TABLE_LINE_PADDING < 0
817 # define overlap_width(x) (x).width
818 #else
819 # define overlap_width(x) int_min((x).width, \
820 html_context->options->box.width - TABLE_LINE_PADDING)
821 #endif
822 #define overlap(x) int_max(overlap_width(x) - (x).rightmargin, 0)
824 static int inline
825 split_line_at(struct html_context *html_context, int width)
827 struct part *part;
828 int tmp;
829 int new_width = width + par_format.rightmargin;
831 assert(html_context);
832 if_assert_failed return 0;
834 part = html_context->part;
836 assert(part);
837 if_assert_failed return 0;
839 /* Make sure that we count the right margin to the total
840 * actual box width. */
841 int_lower_bound(&part->box.width, new_width);
843 if (part->document) {
844 assert(part->document->data);
845 if_assert_failed return 0;
846 #ifdef CONFIG_UTF8
847 if (html_context->options->utf8
848 && width < part->spaces_len && part->char_width[width] == 2) {
849 move_chars(html_context, width, part->cy, par_format.leftmargin, part->cy + 1);
850 del_chars(html_context, width, part->cy);
851 } else
852 #endif
854 assertm(POS(width, part->cy).data == ' ',
855 "bad split: %c", POS(width, part->cy).data);
856 move_chars(html_context, width + 1, part->cy, par_format.leftmargin, part->cy + 1);
857 del_chars(html_context, width, part->cy);
862 #ifdef CONFIG_UTF8
863 if (!(html_context->options->utf8
864 && width < part->spaces_len
865 && part->char_width[width] == 2))
866 #endif
867 width++; /* Since we were using (x + 1) only later... */
869 tmp = part->spaces_len - width;
870 if (tmp > 0) {
871 /* 0 is possible and I'm paranoid ... --Zas */
872 memmove(part->spaces, part->spaces + width, tmp);
873 #ifdef CONFIG_UTF8
874 memmove(part->char_width, part->char_width + width, tmp);
875 #endif
878 assert(tmp >= 0);
879 if_assert_failed tmp = 0;
880 memset(part->spaces + tmp, 0, width);
881 #ifdef CONFIG_UTF8
882 memset(part->char_width + tmp, 0, width);
883 #endif
885 if (par_format.leftmargin > 0) {
886 tmp = part->spaces_len - par_format.leftmargin;
887 assertm(tmp > 0, "part->spaces_len - par_format.leftmargin == %d", tmp);
888 /* So tmp is zero, memmove() should survive that. Don't recover. */
889 memmove(part->spaces + par_format.leftmargin, part->spaces, tmp);
890 #ifdef CONFIG_UTF8
891 memmove(part->char_width + par_format.leftmargin, part->char_width, tmp);
892 #endif
895 part->cy++;
897 if (part->cx == width) {
898 part->cx = -1;
899 int_lower_bound(&part->box.height, part->cy);
900 return 2;
901 } else {
902 part->cx -= width - par_format.leftmargin;
903 int_lower_bound(&part->box.height, part->cy + 1);
904 return 1;
908 /* Here, we scan the line for a possible place where we could split it into two
909 * (breaking it, because it is too long), if it is overlapping from the maximal
910 * box width. */
911 /* Returns 0 if there was found no spot suitable for breaking the line.
912 * 1 if the line was split into two.
913 * 2 if the (second) splitted line is blank (that is useful to determine
914 * ie. if the next line_break() should really break the line; we don't
915 * want to see any blank lines to pop up, do we?). */
916 static int
917 split_line(struct html_context *html_context)
919 struct part *part;
920 int x;
922 assert(html_context);
923 if_assert_failed return 0;
925 part = html_context->part;
927 assert(part);
928 if_assert_failed return 0;
930 #ifdef CONFIG_UTF8
931 if (html_context->options->utf8) {
932 for (x = overlap(par_format); x >= par_format.leftmargin; x--) {
934 if (x < part->spaces_len && (part->spaces[x]
935 || (part->char_width[x] == 2
936 /* Ugly hack. If we haven't place for
937 * double-width characters we print two
938 * double-width characters. */
939 && x != par_format.leftmargin)))
940 return split_line_at(html_context, x);
943 for (x = par_format.leftmargin; x < part->cx ; x++) {
944 if (x < part->spaces_len && (part->spaces[x]
945 || (part->char_width[x] == 2
946 /* We want to break line after _second_
947 * double-width character. */
948 && x > par_format.leftmargin)))
949 return split_line_at(html_context, x);
951 } else
952 #endif
954 for (x = overlap(par_format); x >= par_format.leftmargin; x--)
955 if (x < part->spaces_len && part->spaces[x])
956 return split_line_at(html_context, x);
958 for (x = par_format.leftmargin; x < part->cx ; x++)
959 if (x < part->spaces_len && part->spaces[x])
960 return split_line_at(html_context, x);
963 /* Make sure that we count the right margin to the total
964 * actual box width. */
965 int_lower_bound(&part->box.width, part->cx + par_format.rightmargin);
967 return 0;
970 /* Insert @new_spaces spaces before the coordinates @x and @y,
971 * adding those spaces to whatever link is at those coordinates. */
972 /* TODO: Integrate with move_links. */
973 static void
974 insert_spaces_in_link(struct part *part, int x, int y, int new_spaces)
976 int i = part->document->nlinks;
978 x = X(x);
979 y = Y(y);
981 while (i--) {
982 struct link *link = &part->document->links[i];
983 int j = link->npoints;
985 while (j-- > 1) {
986 struct point *point = &link->points[j];
988 if (point->x != x || point->y != y)
989 continue;
991 if (!realloc_points(link, link->npoints + new_spaces))
992 return;
994 link->npoints += new_spaces;
995 point = &link->points[link->npoints - 1];
997 while (new_spaces--) {
998 point->x = --x;
999 point->y = y;
1000 point--;
1003 return;
1008 /* This function is very rare exemplary of clean and beautyful code here.
1009 * Please handle with care. --pasky */
1010 static void
1011 justify_line(struct html_context *html_context, int y)
1013 struct part *part;
1014 struct screen_char *line; /* we save original line here */
1015 int len;
1016 int pos;
1017 int *space_list;
1018 int spaces;
1019 int diff;
1021 assert(html_context);
1022 if_assert_failed return;
1024 part = html_context->part;
1026 assert(part && part->document && part->document->data);
1027 if_assert_failed return;
1029 len = LEN(y);
1030 assert(len > 0);
1031 if_assert_failed return;
1033 line = fmem_alloc(len * sizeof(*line));
1034 if (!line) return;
1036 /* It may sometimes happen that the line is only one char long and that
1037 * char is space - then we're going to write to both [0] and [1], but
1038 * we allocated only one field. Thus, we've to do (len + 1). --pasky */
1039 space_list = fmem_alloc((len + 1) * sizeof(*space_list));
1040 if (!space_list) {
1041 fmem_free(line);
1042 return;
1045 copy_screen_chars(line, &POS(0, y), len);
1047 /* Skip leading spaces */
1049 spaces = 0;
1050 pos = 0;
1052 while (line[pos].data == ' ')
1053 pos++;
1055 /* Yes, this can be negative, we know. But we add one to it always
1056 * anyway, so it's ok. */
1057 space_list[spaces++] = pos - 1;
1059 /* Count spaces */
1061 for (; pos < len; pos++)
1062 if (line[pos].data == ' ')
1063 space_list[spaces++] = pos;
1065 space_list[spaces] = len;
1067 /* Realign line */
1069 /* Diff is the difference between the width of the paragraph
1070 * and the current length of the line. */
1071 diff = overlap(par_format) - len;
1073 /* We check diff > 0 because diff can be negative (i.e., we have
1074 * an unbroken line of length > overlap(par_format))
1075 * even when spaces > 1 if the line has only non-breaking spaces. */
1076 if (spaces > 1 && diff > 0) {
1077 int prev_end = 0;
1078 int word;
1080 /* Allocate enough memory for the justified line.
1081 * If the memory is not available, then leave the
1082 * line unchanged, rather than halfway there. The
1083 * following loop assumes the allocation succeeded. */
1084 if (!realloc_line(html_context, html_context->part->document,
1085 Y(y), X(overlap(par_format))))
1086 goto out_of_memory;
1088 for (word = 0; word < spaces; word++) {
1089 /* We have to increase line length by 'diff' num. of
1090 * characters, so we move 'word'th word 'word_shift'
1091 * characters right. */
1092 int word_start = space_list[word] + 1;
1093 int word_len = space_list[word + 1] - word_start;
1094 int word_shift;
1095 int new_start;
1096 int new_spaces;
1098 assert(word_len >= 0);
1099 if_assert_failed continue;
1101 word_shift = (word * diff) / (spaces - 1);
1102 new_start = word_start + word_shift;
1104 /* Assert that the realloc_line() above
1105 * allocated enough memory for the word
1106 * and the preceding spaces. */
1107 assert(LEN(y) >= new_start + word_len);
1108 if_assert_failed continue;
1110 /* Copy the original word, without any spaces.
1111 * word_len may be 0 here. */
1112 copy_screen_chars(&POS(new_start, y),
1113 &line[word_start], word_len);
1115 /* Copy the space that preceded the word,
1116 * duplicating it as many times as necessary.
1117 * This preserves its attributes, such as
1118 * background color and underlining. If this
1119 * is the first word, then skip the copy
1120 * because there might not be a space there
1121 * and anyway it need not be duplicated. */
1122 if (word) {
1123 int spacex;
1125 for (spacex = prev_end; spacex < new_start;
1126 ++spacex) {
1127 copy_screen_chars(&POS(spacex, y),
1128 &line[word_start - 1],
1133 /* Remember that any links at the right side
1134 * of the added spaces have moved, and the
1135 * spaces themselves may also belong to a
1136 * link. */
1137 new_spaces = new_start - prev_end - 1;
1138 if (word && new_spaces) {
1139 move_links(html_context, prev_end + 1, y, new_start, y);
1140 insert_spaces_in_link(part,
1141 new_start, y, new_spaces);
1144 prev_end = new_start + word_len;
1148 out_of_memory:
1149 fmem_free(space_list);
1150 fmem_free(line);
1153 static void
1154 align_line(struct html_context *html_context, int y, int last)
1156 struct part *part;
1157 int shift;
1158 int len;
1160 assert(html_context);
1161 if_assert_failed return;
1163 part = html_context->part;
1165 assert(part && part->document && part->document->data);
1166 if_assert_failed return;
1168 len = LEN(y);
1170 if (!len || par_format.align == ALIGN_LEFT)
1171 return;
1173 if (par_format.align == ALIGN_JUSTIFY) {
1174 if (!last)
1175 justify_line(html_context, y);
1176 return;
1179 shift = overlap(par_format) - len;
1180 if (par_format.align == ALIGN_CENTER)
1181 shift /= 2;
1182 if (shift > 0)
1183 shift_chars(html_context, y, shift);
1186 static inline void
1187 init_link_event_hooks(struct html_context *html_context, struct link *link)
1189 link->event_hooks = mem_calloc(1, sizeof(*link->event_hooks));
1190 if (!link->event_hooks) return;
1192 #define add_evhook(list_, type_, src_) \
1193 do { \
1194 struct script_event_hook *evhook; \
1196 if (!src_) break; \
1198 evhook = mem_calloc(1, sizeof(*evhook)); \
1199 if (!evhook) break; \
1201 evhook->type = type_; \
1202 evhook->src = stracpy(src_); \
1203 add_to_list(*(list_), evhook); \
1204 } while (0)
1206 init_list(*link->event_hooks);
1207 add_evhook(link->event_hooks, SEVHOOK_ONCLICK, format.onclick);
1208 add_evhook(link->event_hooks, SEVHOOK_ONDBLCLICK, format.ondblclick);
1209 add_evhook(link->event_hooks, SEVHOOK_ONMOUSEOVER, format.onmouseover);
1210 add_evhook(link->event_hooks, SEVHOOK_ONHOVER, format.onhover);
1211 add_evhook(link->event_hooks, SEVHOOK_ONFOCUS, format.onfocus);
1212 add_evhook(link->event_hooks, SEVHOOK_ONMOUSEOUT, format.onmouseout);
1213 add_evhook(link->event_hooks, SEVHOOK_ONBLUR, format.onblur);
1215 #undef add_evhook
1218 static struct link *
1219 new_link(struct html_context *html_context, unsigned char *name, int namelen)
1221 struct document *document;
1222 struct part *part;
1223 int link_number;
1224 struct link *link;
1226 assert(html_context);
1227 if_assert_failed return NULL;
1229 part = html_context->part;
1231 assert(part);
1232 if_assert_failed return NULL;
1234 document = part->document;
1236 assert(document);
1237 if_assert_failed return NULL;
1239 link_number = part->link_num;
1241 if (!ALIGN_LINK(&document->links, document->nlinks, document->nlinks + 1))
1242 return NULL;
1244 link = &document->links[document->nlinks++];
1245 link->number = link_number - 1;
1246 if (document->options.use_tabindex) link->number += format.tabindex;
1247 link->accesskey = format.accesskey;
1248 link->title = null_or_stracpy(format.title);
1249 link->where_img = null_or_stracpy(format.image);
1251 if (!format.form) {
1252 link->target = null_or_stracpy(format.target);
1253 link->data.name = memacpy(name, namelen);
1254 /* if (strlen(url) > 4 && !c_strncasecmp(url, "MAP@", 4)) { */
1255 if (format.link
1256 && ((format.link[0]|32) == 'm')
1257 && ((format.link[1]|32) == 'a')
1258 && ((format.link[2]|32) == 'p')
1259 && (format.link[3] == '@')
1260 && format.link[4]) {
1261 link->type = LINK_MAP;
1262 link->where = stracpy(format.link + 4);
1263 } else {
1264 link->type = LINK_HYPERTEXT;
1265 link->where = null_or_stracpy(format.link);
1268 } else {
1269 struct form_control *fc = format.form;
1270 struct form *form;
1272 switch (fc->type) {
1273 case FC_TEXT:
1274 case FC_PASSWORD:
1275 case FC_FILE:
1276 link->type = LINK_FIELD;
1277 break;
1278 case FC_TEXTAREA:
1279 link->type = LINK_AREA;
1280 break;
1281 case FC_CHECKBOX:
1282 case FC_RADIO:
1283 link->type = LINK_CHECKBOX;
1284 break;
1285 case FC_SELECT:
1286 link->type = LINK_SELECT;
1287 break;
1288 case FC_SUBMIT:
1289 case FC_IMAGE:
1290 case FC_RESET:
1291 case FC_BUTTON:
1292 case FC_HIDDEN:
1293 link->type = LINK_BUTTON;
1295 link->data.form_control = fc;
1296 /* At this point, format.form might already be set but
1297 * the form_control not registered through SP_CONTROL
1298 * yet, therefore without fc->form set. It is always
1299 * after the "good" last form was already processed,
1300 * though, so we can safely just take that. */
1301 form = fc->form;
1302 if (!form && !list_empty(document->forms))
1303 form = document->forms.next;
1304 link->target = null_or_stracpy(form ? form->target : NULL);
1307 link->color.background = format.style.bg;
1308 link->color.foreground = link_is_textinput(link)
1309 ? format.style.fg : format.clink;
1311 init_link_event_hooks(html_context, link);
1313 document->links_sorted = 0;
1314 return link;
1317 static void
1318 html_special_tag(struct document *document, unsigned char *t, int x, int y)
1320 struct tag *tag;
1321 int tag_len;
1323 assert(document);
1324 if_assert_failed return;
1326 tag_len = strlen(t);
1327 /* One byte is reserved for name in struct tag. */
1328 tag = mem_alloc(sizeof(*tag) + tag_len);
1329 if (!tag) return;
1331 tag->x = x;
1332 tag->y = y;
1333 memcpy(tag->name, t, tag_len + 1);
1334 add_to_list(document->tags, tag);
1335 if (renderer_context.last_tag_for_newline == (struct tag *) &document->tags)
1336 renderer_context.last_tag_for_newline = tag;
1340 static void
1341 put_chars_conv(struct html_context *html_context,
1342 unsigned char *chars, int charslen)
1344 struct part *part;
1346 assert(html_context);
1347 if_assert_failed return;
1349 part = html_context->part;
1351 assert(part && chars && charslen);
1352 if_assert_failed return;
1354 if (format.style.attr & AT_GRAPHICS) {
1355 put_chars(html_context, chars, charslen);
1356 return;
1359 convert_string(renderer_context.convert_table, chars, charslen,
1360 html_context->options->cp,
1361 CSM_DEFAULT, NULL, (void (*)(void *, unsigned char *, int)) put_chars, html_context);
1364 static inline void
1365 put_link_number(struct html_context *html_context)
1367 struct part *part = html_context->part;
1368 unsigned char s[64];
1369 unsigned char *fl = format.link;
1370 unsigned char *ft = format.target;
1371 unsigned char *fi = format.image;
1372 struct form_control *ff = format.form;
1373 int slen = 0;
1375 format.link = format.target = format.image = NULL;
1376 format.form = NULL;
1378 s[slen++] = '[';
1379 ulongcat(s, &slen, part->link_num, sizeof(s) - 3, 0);
1380 s[slen++] = ']';
1381 s[slen] = '\0';
1383 renderer_context.nosearchable = 1;
1384 put_chars(html_context, s, slen);
1385 renderer_context.nosearchable = 0;
1387 if (ff && ff->type == FC_TEXTAREA) line_break(html_context);
1389 /* We might have ended up on a new line after the line breaking
1390 * or putting the link number chars. */
1391 if (part->cx == -1) part->cx = par_format.leftmargin;
1393 format.link = fl;
1394 format.target = ft;
1395 format.image = fi;
1396 format.form = ff;
1399 #define assert_link_variable(old, new) \
1400 assertm(!(old), "Old link value [%s]. New value [%s]", old, new);
1402 static inline void
1403 init_link_state_info(unsigned char *link, unsigned char *target,
1404 unsigned char *image, struct form_control *form)
1406 assert_link_variable(renderer_context.link_state_info.image, image);
1407 assert_link_variable(renderer_context.link_state_info.target, target);
1408 assert_link_variable(renderer_context.link_state_info.link, link);
1410 renderer_context.link_state_info.link = null_or_stracpy(link);
1411 renderer_context.link_state_info.target = null_or_stracpy(target);
1412 renderer_context.link_state_info.image = null_or_stracpy(image);
1413 renderer_context.link_state_info.form = form;
1416 static inline void
1417 done_link_state_info(void)
1419 mem_free_if(renderer_context.link_state_info.link);
1420 mem_free_if(renderer_context.link_state_info.target);
1421 mem_free_if(renderer_context.link_state_info.image);
1422 memset(&renderer_context.link_state_info, 0,
1423 sizeof(renderer_context.link_state_info));
1426 #ifdef CONFIG_UTF8
1427 static inline void
1428 process_link(struct html_context *html_context, enum link_state link_state,
1429 unsigned char *chars, int charslen, int cells)
1430 #else
1431 static inline void
1432 process_link(struct html_context *html_context, enum link_state link_state,
1433 unsigned char *chars, int charslen)
1434 #endif /* CONFIG_UTF8 */
1436 struct part *part = html_context->part;
1437 struct link *link;
1438 int x_offset = 0;
1440 switch (link_state) {
1441 case LINK_STATE_SAME: {
1442 unsigned char *name;
1444 if (!part->document) return;
1446 assertm(part->document->nlinks > 0, "no link");
1447 if_assert_failed return;
1449 link = &part->document->links[part->document->nlinks - 1];
1451 name = get_link_name(link);
1452 if (name) {
1453 unsigned char *new_name;
1455 new_name = straconcat(name, chars,
1456 (unsigned char *) NULL);
1457 if (new_name) {
1458 mem_free(name);
1459 link->data.name = new_name;
1463 /* FIXME: Concatenating two adjectent <a> elements to a single
1464 * link is broken since we lose the event handlers for the
1465 * second one. OTOH simply appending them here won't fly since
1466 * we may get here multiple times for even a single link. We
1467 * will probably need some SP_ for creating a new link or so.
1468 * --pasky */
1470 break;
1473 case LINK_STATE_NEW:
1474 part->link_num++;
1476 init_link_state_info(format.link, format.target,
1477 format.image, format.form);
1478 if (!part->document) return;
1480 /* Trim leading space from the link text */
1481 while (x_offset < charslen && chars[x_offset] <= ' ')
1482 x_offset++;
1484 if (x_offset) {
1485 charslen -= x_offset;
1486 chars += x_offset;
1487 #ifdef CONFIG_UTF8
1488 cells -= x_offset;
1489 #endif /* CONFIG_UTF8 */
1492 link = new_link(html_context, chars, charslen);
1493 if (!link) return;
1495 break;
1497 case LINK_STATE_NONE:
1498 default:
1499 INTERNAL("bad link_state %i", (int) link_state);
1500 return;
1503 /* Add new canvas positions to the link. */
1504 #ifdef CONFIG_UTF8
1505 if (realloc_points(link, link->npoints + cells))
1506 #else
1507 if (realloc_points(link, link->npoints + charslen))
1508 #endif /* CONFIG_UTF8 */
1510 struct point *point = &link->points[link->npoints];
1511 int x = X(part->cx) + x_offset;
1512 int y = Y(part->cy);
1514 #ifdef CONFIG_UTF8
1515 link->npoints += cells;
1517 for (; cells > 0; cells--, point++, x++)
1518 #else
1519 link->npoints += charslen;
1521 for (; charslen > 0; charslen--, point++, x++)
1522 #endif /* CONFIG_UTF8 */
1524 point->x = x;
1525 point->y = y;
1530 static inline enum link_state
1531 get_link_state(struct html_context *html_context)
1533 enum link_state state;
1535 if (!(format.link || format.image || format.form)) {
1536 state = LINK_STATE_NONE;
1538 } else if ((renderer_context.link_state_info.link
1539 || renderer_context.link_state_info.image
1540 || renderer_context.link_state_info.form)
1541 && !xstrcmp(format.link, renderer_context.link_state_info.link)
1542 && !xstrcmp(format.target, renderer_context.link_state_info.target)
1543 && !xstrcmp(format.image, renderer_context.link_state_info.image)
1544 && format.form == renderer_context.link_state_info.form) {
1546 return LINK_STATE_SAME;
1548 } else {
1549 state = LINK_STATE_NEW;
1552 done_link_state_info();
1554 return state;
1557 static inline int
1558 html_has_non_space_chars(unsigned char *chars, int charslen)
1560 int pos = 0;
1562 while (pos < charslen)
1563 if (!isspace(chars[pos++]))
1564 return 1;
1566 return 0;
1569 static void
1570 put_chars(struct html_context *html_context, unsigned char *chars, int charslen)
1572 enum link_state link_state;
1573 struct part *part;
1574 #ifdef CONFIG_UTF8
1575 int cells;
1576 #endif /* CONFIG_UTF8 */
1578 assert(html_context);
1579 if_assert_failed return;
1581 part = html_context->part;
1583 assert(part);
1584 if_assert_failed return;
1586 assert(chars && charslen);
1587 if_assert_failed return;
1589 /* If we are not handling verbatim aligning and we are at the begining
1590 * of a line trim whitespace. */
1591 if (part->cx == -1) {
1592 /* If we are not handling verbatim aligning trim leading
1593 * whitespaces. */
1594 if (!html_is_preformatted()) {
1595 while (charslen && *chars == ' ') {
1596 chars++;
1597 charslen--;
1600 if (charslen < 1) return;
1603 part->cx = par_format.leftmargin;
1606 /* For preformatted html always update 'the last tag' so we never end
1607 * up moving tags to the wrong line (Fixes bug 324). For all other html
1608 * it is moved only when the line being rendered carry some real
1609 * non-whitespace content. */
1610 if (html_is_preformatted()
1611 || html_has_non_space_chars(chars, charslen)) {
1612 renderer_context.last_tag_for_newline = (struct tag *) &part->document->tags;
1615 int_lower_bound(&part->box.height, part->cy + 1);
1617 link_state = get_link_state(html_context);
1619 if (link_state == LINK_STATE_NEW) {
1620 int x_offset = 0;
1622 /* Don't add inaccessible links. It seems to be caused
1623 * by the parser putting a space char after stuff like
1624 * <img>-tags or comments wrapped in <a>-tags. See bug
1625 * 30 for test case. */
1626 while (x_offset < charslen && chars[x_offset] <= ' ')
1627 x_offset++;
1629 /* For pure spaces reset the link state */
1630 if (x_offset == charslen)
1631 link_state = LINK_STATE_NONE;
1632 else if (html_context->options->links_numbering)
1633 put_link_number(html_context);
1635 #ifdef CONFIG_UTF8
1636 cells =
1637 #endif /* CONFIG_UTF8 */
1638 set_hline(html_context, chars, charslen, link_state);
1640 if (link_state != LINK_STATE_NONE) {
1641 #ifdef CONFIG_UTF8
1642 process_link(html_context, link_state, chars, charslen,
1643 cells);
1644 #else
1645 process_link(html_context, link_state, chars, charslen);
1646 #endif /* CONFIG_UTF8 */
1649 #ifdef CONFIG_UTF8
1650 if (renderer_context.nowrap
1651 && part->cx + cells > overlap(par_format))
1652 return;
1654 part->cx += cells;
1655 #else
1656 if (renderer_context.nowrap
1657 && part->cx + charslen > overlap(par_format))
1658 return;
1660 part->cx += charslen;
1661 #endif /* CONFIG_UTF8 */
1663 renderer_context.nobreak = 0;
1665 if (!(html_context->options->wrap || html_is_preformatted())) {
1666 while (part->cx > overlap(par_format)
1667 && part->cx > par_format.leftmargin) {
1668 int x = split_line(html_context);
1670 if (!x) break;
1671 if (part->document)
1672 align_line(html_context, part->cy - 1, 0);
1673 renderer_context.nobreak = !!(x - 1);
1677 assert(charslen > 0);
1678 #ifdef CONFIG_UTF8
1679 part->xa += cells;
1680 #else
1681 part->xa += charslen;
1682 #endif /* CONFIG_UTF8 */
1683 int_lower_bound(&part->max_width, part->xa
1684 + par_format.leftmargin + par_format.rightmargin
1685 - (chars[charslen - 1] == ' '
1686 && !html_is_preformatted()));
1687 return;
1691 #undef overlap
1693 static void
1694 line_break(struct html_context *html_context)
1696 struct part *part;
1697 struct tag *tag;
1699 assert(html_context);
1700 if_assert_failed return;
1702 part = html_context->part;
1704 assert(part);
1705 if_assert_failed return;
1707 int_lower_bound(&part->box.width, part->cx + par_format.rightmargin);
1709 if (renderer_context.nobreak) {
1710 renderer_context.nobreak = 0;
1711 part->cx = -1;
1712 part->xa = 0;
1713 return;
1716 if (!part->document || !part->document->data) goto end;
1718 if (!realloc_lines(part->document, part->box.height + part->cy + 1))
1719 return;
1721 if (part->cx > par_format.leftmargin && LEN(part->cy) > part->cx - 1
1722 && POS(part->cx - 1, part->cy).data == ' ') {
1723 del_chars(html_context, part->cx - 1, part->cy);
1724 part->cx--;
1727 if (part->cx > 0) align_line(html_context, part->cy, 1);
1729 for (tag = renderer_context.last_tag_for_newline;
1730 tag && tag != (struct tag *) &part->document->tags;
1731 tag = tag->prev) {
1732 tag->x = X(0);
1733 tag->y = Y(part->cy + 1);
1736 end:
1737 part->cy++;
1738 part->cx = -1;
1739 part->xa = 0;
1740 memset(part->spaces, 0, part->spaces_len);
1741 #ifdef CONFIG_UTF8
1742 memset(part->char_width, 0, part->spaces_len);
1743 #endif
1746 static void
1747 html_special_form(struct part *part, struct form *form)
1749 struct form *nform;
1751 assert(part && form);
1752 assert(form->form_num > 0);
1753 assert(form->form_end == INT_MAX);
1754 if_assert_failed return;
1756 if (!part->document) {
1757 done_form(form);
1758 return;
1761 /* Make a fake form with form_num == 0 so that there is
1762 * something to use if form controls appear above the first
1763 * actual FORM element. There can never be a real form with
1764 * form_num == 0 because the form_num is the position after the
1765 * "<form" characters and that's already five characters. The
1766 * fake form does not have a name, and it gets a form_view and
1767 * becomes visible to ECMAScript only if it actually has
1768 * controls in it. */
1769 if (list_empty(part->document->forms)) {
1770 nform = init_form();
1771 if (!nform) {
1772 done_form(form);
1773 return;
1775 nform->form_num = 0;
1776 add_to_list(part->document->forms, nform);
1779 /* Make sure the new form ``claims'' its slice of the form range
1780 * maintained in the form_num and form_end variables. */
1781 foreach (nform, part->document->forms) {
1782 if (form->form_num < nform->form_num
1783 || nform->form_end < form->form_num)
1784 continue;
1786 /* First check if the form has identical form numbers.
1787 * That should only be the case when the form being
1788 * added is in fact the same form in which case it
1789 * should be dropped. The fact that this can happen
1790 * suggests that the table renderering can be confused.
1791 * See bug 647 for a test case.
1792 * Do not compare form->form_end here because it is
1793 * normally set by this function and that has obviously
1794 * not yet been done. */
1795 if (nform->form_num == form->form_num) {
1796 done_form(form);
1797 return;
1800 /* The form start is inside an already added form, so
1801 * partition the space of the existing form and get
1802 * |old|new|. */
1803 form->form_end = nform->form_end;
1804 nform->form_end = form->form_num - 1;
1805 assertm(nform->form_num <= nform->form_end,
1806 "[%d:%d] [%d:%d]", nform->form_num, nform->form_end,
1807 form->form_num, form->form_end);
1808 add_to_list(part->document->forms, form);
1809 return;
1812 ERROR("hole between forms");
1813 done_form(form);
1814 return;
1817 static void
1818 html_special_form_control(struct part *part, struct form_control *fc)
1820 struct form *form;
1822 assert(part && fc);
1823 if_assert_failed return;
1825 if (!part->document) {
1826 done_form_control(fc);
1827 mem_free(fc);
1828 return;
1831 fc->g_ctrl_num = renderer_context.g_ctrl_num++;
1833 if (list_empty(part->document->forms)) {
1834 /* No forms encountered yet, that means a homeless form
1835 * control. Generate a dummy form for those Flying
1836 * Dutchmans. */
1837 form = init_form();
1838 form->form_num = 0;
1839 add_to_list(part->document->forms, form);
1841 /* Attach this form control to the last form encountered. */
1842 form = part->document->forms.next;
1843 fc->form = form;
1844 add_to_list(form->items, fc);
1847 #ifdef CONFIG_DEBUG
1848 /** Assert that each form in the list has a different form.form_num
1849 * ... form.form_end range and that the ranges are contiguous and
1850 * together cover all numbers from 0 to INT_MAX. Alternatively, the
1851 * whole list may be empty. This function can be called from a
1852 * debugger, or automatically from some places.
1854 * This function may leave assert_failed = 1; the caller must use
1855 * if_assert_failed. */
1856 static void
1857 assert_forms_list_ok(LIST_OF(struct form) *forms)
1859 int saw_form_num_0 = 0;
1860 struct form *outer;
1862 if (list_empty(*forms)) return;
1864 /* O(n^2) algorithm, but it's only for debugging. */
1865 foreach (outer, *forms) {
1866 int followers = 0;
1867 struct form *inner;
1869 if (outer->form_num == 0)
1870 saw_form_num_0++;
1872 foreach (inner, *forms) {
1873 assert(inner == outer
1874 || inner->form_num > outer->form_end
1875 || outer->form_num > inner->form_end);
1876 if (outer->form_end == inner->form_num - 1)
1877 followers++;
1880 if (outer->form_end == INT_MAX)
1881 assert(followers == 0);
1882 else
1883 assert(followers == 1);
1886 assert(saw_form_num_0 == 1);
1888 #else /* !CONFIG_DEBUG */
1889 # define assert_forms_list_ok(forms) ((void) 0)
1890 #endif /* !CONFIG_DEBUG */
1892 /* Reparents form items based on position in the source. */
1893 void
1894 check_html_form_hierarchy(struct part *part)
1896 struct document *document = part->document;
1897 INIT_LIST_OF(struct form_control, form_controls);
1898 struct form *form;
1899 struct form_control *fc, *next;
1901 if (list_empty(document->forms))
1902 return;
1904 assert_forms_list_ok(&document->forms);
1905 if_assert_failed {}
1907 /* Take out all badly placed form items. */
1909 foreach (form, document->forms) {
1911 assertm(form->form_num <= form->form_end,
1912 "%p [%d : %d]", form, form->form_num, form->form_end);
1914 foreachsafe (fc, next, form->items) {
1915 if (form->form_num <= fc->position
1916 && fc->position <= form->form_end)
1917 continue;
1919 move_to_top_of_list(form_controls, fc);
1923 /* Re-insert the form items the correct places. */
1925 foreachsafe (fc, next, form_controls) {
1927 foreach (form, document->forms) {
1928 if (fc->position < form->form_num
1929 || form->form_end < fc->position)
1930 continue;
1932 fc->form = form;
1933 move_to_top_of_list(form->items, fc);
1934 break;
1938 assert(list_empty(form_controls));
1941 static inline void
1942 color_link_lines(struct html_context *html_context)
1944 struct document *document = html_context->part->document;
1945 struct color_pair colors = INIT_COLOR_PAIR(par_format.bgcolor, 0x0);
1946 enum color_mode color_mode = document->options.color_mode;
1947 enum color_flags color_flags = document->options.color_flags;
1948 int y;
1950 for (y = 0; y < document->height; y++) {
1951 int x;
1953 for (x = 0; x < document->data[y].length; x++) {
1954 struct screen_char *schar = &document->data[y].chars[x];
1956 set_term_color(schar, &colors, color_flags, color_mode);
1958 /* XXX: Entering hack zone! Change to clink color after
1959 * link text has been recolored. */
1960 if (schar->data == ':' && colors.foreground == 0x0)
1961 colors.foreground = format.clink;
1964 colors.foreground = 0x0;
1968 static void *
1969 html_special(struct html_context *html_context, enum html_special_type c, ...)
1971 va_list l;
1972 struct part *part;
1973 struct document *document;
1974 void *ret_val = NULL;
1976 assert(html_context);
1977 if_assert_failed return NULL;
1979 part = html_context->part;
1981 assert(part);
1982 if_assert_failed return NULL;
1984 document = part->document;
1986 va_start(l, c);
1987 switch (c) {
1988 case SP_TAG:
1989 if (document) {
1990 unsigned char *t = va_arg(l, unsigned char *);
1992 html_special_tag(document, t, X(part->cx), Y(part->cy));
1994 break;
1995 case SP_FORM:
1997 struct form *form = va_arg(l, struct form *);
1999 html_special_form(part, form);
2000 break;
2002 case SP_CONTROL:
2004 struct form_control *fc = va_arg(l, struct form_control *);
2006 html_special_form_control(part, fc);
2007 break;
2009 case SP_TABLE:
2010 ret_val = renderer_context.convert_table;
2011 break;
2012 case SP_USED:
2013 ret_val = (void *) (long) !!document;
2014 break;
2015 case SP_CACHE_CONTROL:
2017 struct cache_entry *cached = renderer_context.cached;
2019 cached->cache_mode = CACHE_MODE_NEVER;
2020 cached->expire = 0;
2021 break;
2023 case SP_CACHE_EXPIRES:
2025 time_t expires = va_arg(l, time_t);
2026 struct cache_entry *cached = renderer_context.cached;
2028 if (!expires || cached->cache_mode == CACHE_MODE_NEVER)
2029 break;
2031 timeval_from_seconds(&cached->max_age, expires);
2032 cached->expire = 1;
2033 break;
2035 case SP_FRAMESET:
2037 struct frameset_param *fsp = va_arg(l, struct frameset_param *);
2038 struct frameset_desc *frameset_desc;
2040 if (!fsp->parent && document->frame_desc)
2041 break;
2043 frameset_desc = create_frameset(fsp);
2044 if (!fsp->parent && !document->frame_desc)
2045 document->frame_desc = frameset_desc;
2047 ret_val = frameset_desc;
2048 break;
2050 case SP_FRAME:
2052 struct frameset_desc *parent = va_arg(l, struct frameset_desc *);
2053 unsigned char *name = va_arg(l, unsigned char *);
2054 unsigned char *url = va_arg(l, unsigned char *);
2056 add_frameset_entry(parent, NULL, name, url);
2057 break;
2059 case SP_NOWRAP:
2060 renderer_context.nowrap = !!va_arg(l, int);
2061 break;
2062 case SP_REFRESH:
2064 unsigned long seconds = va_arg(l, unsigned long);
2065 unsigned char *t = va_arg(l, unsigned char *);
2067 if (document) {
2068 if (document->refresh)
2069 done_document_refresh(document->refresh);
2070 document->refresh = init_document_refresh(t, seconds);
2072 break;
2074 case SP_COLOR_LINK_LINES:
2075 if (document && use_document_bg_colors(&document->options))
2076 color_link_lines(html_context);
2077 break;
2078 case SP_STYLESHEET:
2079 #ifdef CONFIG_CSS
2080 if (document) {
2081 struct uri *uri = va_arg(l, struct uri *);
2083 add_to_uri_list(&document->css_imports, uri);
2085 #endif
2086 break;
2087 case SP_SCRIPT:
2088 #ifdef CONFIG_ECMASCRIPT
2089 if (document) {
2090 struct uri *uri = va_arg(l, struct uri *);
2092 add_to_uri_list(&document->ecmascript_imports, uri);
2094 #endif
2095 break;
2098 va_end(l);
2100 return ret_val;
2103 void
2104 free_table_cache(void)
2106 if (table_cache) {
2107 struct hash_item *item;
2108 int i;
2110 /* We do not free key here. */
2111 foreach_hash_item (item, *table_cache, i) {
2112 mem_free_if(item->value);
2115 free_hash(&table_cache);
2116 table_cache_entries = 0;
2120 struct part *
2121 format_html_part(struct html_context *html_context,
2122 unsigned char *start, unsigned char *end,
2123 int align, int margin, int width, struct document *document,
2124 int x, int y, unsigned char *head,
2125 int link_num)
2127 struct part *part;
2128 void *html_state;
2129 struct tag *saved_last_tag_to_move = renderer_context.last_tag_to_move;
2130 int saved_empty_format = renderer_context.empty_format;
2131 int saved_margin = html_context->margin;
2132 int saved_last_link_to_move = renderer_context.last_link_to_move;
2134 /* Hash creation if needed. */
2135 if (!table_cache) {
2136 table_cache = init_hash8();
2137 } else if (!document) {
2138 /* Search for cached entry. */
2139 struct table_cache_entry_key key;
2140 struct hash_item *item;
2142 /* Clear key to prevent potential alignment problem
2143 * when keys are compared. */
2144 memset(&key, 0, sizeof(key));
2146 key.start = start;
2147 key.end = end;
2148 key.align = align;
2149 key.margin = margin;
2150 key.width = width;
2151 key.x = x;
2152 key.link_num = link_num;
2154 item = get_hash_item(table_cache,
2155 (unsigned char *) &key,
2156 sizeof(key));
2157 if (item) { /* We found it in cache, so just copy and return. */
2158 part = mem_alloc(sizeof(*part));
2159 if (part) {
2160 copy_struct(part, &((struct table_cache_entry *)
2161 item->value)->part);
2162 return part;
2167 assertm(y >= 0, "format_html_part: y == %d", y);
2168 if_assert_failed return NULL;
2170 if (document) {
2171 struct node *node = mem_alloc(sizeof(*node));
2173 if (node) {
2174 int node_width = !html_context->table_level ? INT_MAX : width;
2176 set_box(&node->box, x, y, node_width, 1);
2177 add_to_list(document->nodes, node);
2180 renderer_context.last_link_to_move = document->nlinks;
2181 renderer_context.last_tag_to_move = (struct tag *) &document->tags;
2182 renderer_context.last_tag_for_newline = (struct tag *) &document->tags;
2183 } else {
2184 renderer_context.last_link_to_move = 0;
2185 renderer_context.last_tag_to_move = (struct tag *) NULL;
2186 renderer_context.last_tag_for_newline = (struct tag *) NULL;
2189 html_context->margin = margin;
2190 renderer_context.empty_format = !document;
2192 done_link_state_info();
2193 renderer_context.nobreak = 1;
2195 part = mem_calloc(1, sizeof(*part));
2196 if (!part) goto ret;
2198 part->document = document;
2199 part->box.x = x;
2200 part->box.y = y;
2201 part->cx = -1;
2202 part->cy = 0;
2203 part->link_num = link_num;
2205 html_state = init_html_parser_state(html_context, ELEMENT_IMMORTAL, align, margin, width);
2207 parse_html(start, end, part, head, html_context);
2209 done_html_parser_state(html_context, html_state);
2211 int_lower_bound(&part->max_width, part->box.width);
2213 renderer_context.nobreak = 0;
2215 done_link_state_info();
2216 mem_free_if(part->spaces);
2217 #ifdef CONFIG_UTF8
2218 mem_free_if(part->char_width);
2219 #endif
2221 if (document) {
2222 struct node *node = document->nodes.next;
2224 node->box.height = y - node->box.y + part->box.height;
2227 ret:
2228 renderer_context.last_link_to_move = saved_last_link_to_move;
2229 renderer_context.last_tag_to_move = saved_last_tag_to_move;
2230 renderer_context.empty_format = saved_empty_format;
2232 html_context->margin = saved_margin;
2234 if (html_context->table_level > 1 && !document
2235 && table_cache
2236 && table_cache_entries < MAX_TABLE_CACHE_ENTRIES) {
2237 /* Create a new entry. */
2238 /* Clear memory to prevent bad key comparaison due to alignment
2239 * of key fields. */
2240 struct table_cache_entry *tce = mem_calloc(1, sizeof(*tce));
2242 if (tce) {
2243 tce->key.start = start;
2244 tce->key.end = end;
2245 tce->key.align = align;
2246 tce->key.margin = margin;
2247 tce->key.width = width;
2248 tce->key.x = x;
2249 tce->key.link_num = link_num;
2250 copy_struct(&tce->part, part);
2252 if (!add_hash_item(table_cache,
2253 (unsigned char *) &tce->key,
2254 sizeof(tce->key), tce)) {
2255 mem_free(tce);
2256 } else {
2257 table_cache_entries++;
2262 return part;
2265 void
2266 render_html_document(struct cache_entry *cached, struct document *document,
2267 struct string *buffer)
2269 struct html_context *html_context;
2270 struct part *part;
2271 unsigned char *start;
2272 unsigned char *end;
2273 struct string title;
2274 struct string head;
2276 assert(cached && document);
2277 if_assert_failed return;
2279 if (!init_string(&head)) return;
2281 if (cached->head) add_to_string(&head, cached->head);
2283 start = buffer->source;
2284 end = buffer->source + buffer->length;
2286 html_context = init_html_parser(cached->uri, &document->options,
2287 start, end, &head, &title,
2288 put_chars_conv, line_break,
2289 html_special);
2290 if (!html_context) return;
2292 renderer_context.g_ctrl_num = 0;
2293 renderer_context.cached = cached;
2294 renderer_context.convert_table = get_convert_table(head.source,
2295 document->options.cp,
2296 document->options.assume_cp,
2297 &document->cp,
2298 &document->cp_status,
2299 document->options.hard_assume);
2300 #ifdef CONFIG_UTF8
2301 html_context->options->utf8 = is_cp_utf8(document->options.cp);
2302 #endif /* CONFIG_UTF8 */
2303 html_context->doc_cp = document->cp;
2305 if (title.length) {
2306 document->title = convert_string(renderer_context.convert_table,
2307 title.source, title.length,
2308 document->options.cp,
2309 CSM_DEFAULT, NULL, NULL, NULL);
2311 done_string(&title);
2313 part = format_html_part(html_context, start, end, par_format.align,
2314 par_format.leftmargin,
2315 document->options.box.width, document,
2316 0, 0, head.source, 1);
2318 /* Drop empty allocated lines at end of document if any
2319 * and adjust document height. */
2320 while (document->height && !document->data[document->height - 1].length)
2321 mem_free_if(document->data[--document->height].chars);
2323 /* Calculate document width. */
2325 int i;
2327 document->width = 0;
2328 for (i = 0; i < document->height; i++)
2329 int_lower_bound(&document->width, document->data[i].length);
2332 #if 1
2333 document->options.needs_width = 1;
2334 #else
2335 /* FIXME: This needs more tuning since if we are centering stuff it
2336 * does not work. */
2337 document->options.needs_width =
2338 (document->width + (document->options.margin
2339 >= document->options.width));
2340 #endif
2342 document->bgcolor = par_format.bgcolor;
2344 done_html_parser(html_context);
2346 /* Drop forms which has been serving as a placeholder for form items
2347 * added in the wrong order due to the ordering of table rendering. */
2349 struct form *form;
2351 foreach (form, document->forms) {
2352 if (form->form_num)
2353 continue;
2355 if (list_empty(form->items))
2356 done_form(form);
2358 break;
2362 /* @part was residing in html_context so it has to stay alive until
2363 * done_html_parser(). */
2364 done_string(&head);
2365 mem_free_if(part);
2367 #if 0 /* debug purpose */
2369 FILE *f = fopen("forms", "ab");
2370 struct form_control *form;
2371 unsigned char *qq;
2372 fprintf(f,"FORM:\n");
2373 foreach (form, document->forms) {
2374 fprintf(f, "g=%d f=%d c=%d t:%d\n",
2375 form->g_ctrl_num, form->form_num,
2376 form->ctrl_num, form->type);
2378 fprintf(f,"fragment: \n");
2379 for (qq = start; qq < end; qq++) fprintf(f, "%c", *qq);
2380 fprintf(f,"----------\n\n");
2381 fclose(f);
2383 #endif