src/document/html/renderer.c

   1 /* HTML renderer */
   2
   3 #ifdef HAVE_CONFIG_H
   4 #include "config.h"
   5 #endif
   6
   7 /* Our current implementation of combining characters requires
   8  * wcwidth().  Therefore the configure script should have disabled
   9  * CONFIG_COMBINE if wcwidth() doesn't exist.  */
  10 #ifdef CONFIG_COMBINE
  11 #define _XOPEN_SOURCE 500       /* for wcwidth */
  12 #endif
  13
  14 #include <ctype.h>
  15 #include <stdarg.h>
  16 #include <string.h>
  17
  18 #ifdef HAVE_WCHAR_H
  19 #include <wchar.h>
  20 #endif
  21
  22 #include "elinks.h"
  23
  24 #include "cache/cache.h"
  25 #include "config/options.h"
  26 #include "document/docdata.h"
  27 #include "document/document.h"
  28 #include "document/html/frames.h"
  29 #include "document/html/parser.h"
  30 #include "document/html/parser/parse.h"
  31 #include "document/html/renderer.h"
  32 #include "document/html/tables.h"
  33 #include "document/options.h"
  34 #include "document/refresh.h"
  35 #include "document/renderer.h"
  36 #include "intl/charsets.h"
  37 #include "osdep/types.h"
  38 #include "protocol/uri.h"
  39 #include "session/session.h"
  40 #include "terminal/color.h"
  41 #include "terminal/draw.h"
  42 #include "util/color.h"
  43 #include "util/conv.h"
  44 #include "util/error.h"
  45 #include "util/hash.h"
  46 #include "util/lists.h"
  47 #include "util/memory.h"
  48 #include "util/string.h"
  49 #include "util/time.h"
  50 #include "viewer/text/form.h"
  51 #include "viewer/text/view.h"
  52 #include "viewer/text/vs.h"
  53
  54 /* Unsafe macros */
  55 #include "document/html/internal.h"
  56
  57 /* Types and structs */
  58
  59 enum link_state {
  60         LINK_STATE_NONE,
  61         LINK_STATE_NEW,
  62         LINK_STATE_SAME,
  63 };
  64
  65 struct link_state_info {
  66         unsigned char *link;
  67         unsigned char *target;
  68         unsigned char *image;
  69         struct form_control *form;
  70 };
  71
  72 struct table_cache_entry_key {
  73         unsigned char *start;
  74         unsigned char *end;
  75         int align;
  76         int margin;
  77         int width;
  78         int x;
  79         int link_num;
  80 };
  81
  82 struct table_cache_entry {
  83         LIST_HEAD(struct table_cache_entry);
  84
  85         struct table_cache_entry_key key;
  86         struct part part;
  87 };
  88
  89 /* Max. entries in table cache used for nested tables. */
  90 #define MAX_TABLE_CACHE_ENTRIES 16384
  91
  92 /* Global variables */
  93 static int table_cache_entries;
  94 static struct hash *table_cache;
  95
  96
  97 struct renderer_context {
  98         int last_link_to_move;
  99         struct tag *last_tag_to_move;
 100         /* All tags between document->tags and this tag (inclusive) should
 101          * be aligned to the next line break, unless some real content follows
 102          * the tag. Therefore, this virtual tags list accumulates new tags as
 103          * they arrive and empties when some real content is written; if a line
 104          * break is inserted in the meanwhile, the tags follow it (ie. imagine
 105          * <a name="x"> <p>, then the "x" tag follows the line breaks inserted
 106          * by the <p> tag). */
 107         struct tag *last_tag_for_newline;
 108
 109         struct link_state_info link_state_info;
 110
 111         struct conv_table *convert_table;
 112
 113         /* Used for setting cache info from HTTP-EQUIV meta tags. */
 114         struct cache_entry *cached;
 115
 116         int g_ctrl_num;
 117         int subscript;  /* Count stacked subscripts */
 118         int supscript;  /* Count stacked supscripts */
 119
 120         unsigned int empty_format:1;
 121         unsigned int nobreak:1;
 122         unsigned int nosearchable:1;
 123         unsigned int nowrap:1; /* Activated/deactivated by SP_NOWRAP. */
 124 };
 125
 126 static struct renderer_context renderer_context;
 127
 128
 129 /* Prototypes */
 130 static void line_break(struct html_context *);
 131 static void put_chars(struct html_context *, unsigned char *, int);
 132
 133 #define X(x_)   (part->box.x + (x_))
 134 #define Y(y_)   (part->box.y + (y_))
 135
 136 #define SPACES_GRANULARITY      0x7F
 137
 138 #define ALIGN_SPACES(x, o, n) mem_align_alloc(x, o, n, SPACES_GRANULARITY)
 139
 140 static inline void
 141 set_screen_char_color(struct screen_char *schar,
 142                       color_T bgcolor, color_T fgcolor,
 143                       enum color_flags color_flags,
 144                       enum color_mode color_mode)
 145 {
 146         struct color_pair colors = INIT_COLOR_PAIR(bgcolor, fgcolor);
 147
 148         set_term_color(schar, &colors, color_flags, color_mode);
 149 }
 150
 151 static int
 152 realloc_line(struct html_context *html_context, struct document *document,
 153              int y, int length)
 154 {
 155         struct screen_char *pos, *end;
 156         struct line *line;
 157         int orig_length;
 158
 159         if (!realloc_lines(document, y))
 160                 return -1;
 161
 162         line = &document->data[y];
 163         orig_length = line->length;
 164
 165         if (length < orig_length)
 166                 return orig_length;
 167
 168         if (!ALIGN_LINE(&line->chars, line->length, length + 1))
 169                 return -1;
 170
 171         /* We cannot rely on the aligned allocation to clear the members for us
 172          * since for line splitting we simply trim the length. Question is if
 173          * it is better to to clear the line after the splitting or here. */
 174         end = &line->chars[length];
 175         end->data = ' ';
 176         end->attr = 0;
 177         set_screen_char_color(end, par_format.color.background, 0x0,
 178                               COLOR_ENSURE_CONTRAST, /* for bug 461 */
 179                               document->options.color_mode);
 180
 181         for (pos = &line->chars[line->length]; pos < end; pos++) {
 182                 copy_screen_chars(pos, end, 1);
 183         }
 184
 185         line->length = length + 1;
 186
 187         return orig_length;
 188 }
 189
 190 void
 191 expand_lines(struct html_context *html_context, struct part *part,
 192              int x, int y, int lines, color_T bgcolor)
 193 {
 194         int line;
 195
 196         assert(part && part->document);
 197         if_assert_failed return;
 198
 199         if (!use_document_bg_colors(&part->document->options))
 200                 return;
 201
 202         par_format.color.background = bgcolor;
 203
 204         for (line = 0; line < lines; line++)
 205                 realloc_line(html_context, part->document, Y(y + line), X(x));
 206 }
 207
 208 static inline int
 209 realloc_spaces(struct part *part, int length)
 210 {
 211         if (length < part->spaces_len)
 212                 return 0;
 213
 214         if (!ALIGN_SPACES(&part->spaces, part->spaces_len, length))
 215                 return -1;
 216 #ifdef CONFIG_UTF8
 217         if (!ALIGN_SPACES(&part->char_width, part->spaces_len, length))
 218                 return -1;
 219 #endif
 220
 221         part->spaces_len = length;
 222
 223         return 0;
 224 }
 225
 226
 227 #define LINE(y_)        part->document->data[Y(y_)]
 228 #define POS(x_, y_)     LINE(y_).chars[X(x_)]
 229 #define LEN(y_)         int_max(LINE(y_).length - part->box.x, 0)
 230
 231
 232 /* When we clear chars we want to preserve and use the background colors
 233  * already in place else we could end up ``staining'' the background especial
 234  * when drawing table cells. So make the cleared chars share the colors in
 235  * place.
 236  *
 237  * This function does not update document.comb_x and document.comb_y.
 238  * That is the caller's responsibility.  */
 239 static inline void
 240 clear_hchars(struct html_context *html_context, int x, int y, int width,
 241              struct screen_char *a)
 242 {
 243         struct part *part;
 244         struct screen_char *pos, *end;
 245
 246         assert(html_context);
 247         if_assert_failed return;
 248
 249         part = html_context->part;
 250
 251         assert(part && part->document && width > 0);
 252         if_assert_failed return;
 253
 254         if (realloc_line(html_context, part->document, Y(y), X(x) + width - 1) < 0)
 255                 return;
 256
 257         assert(part->document->data);
 258         if_assert_failed return;
 259
 260         pos = &POS(x, y);
 261         end = pos + width - 1;
 262         end->data = ' ';
 263         end->attr = 0;
 264         end->c = a->c;
 265
 266         while (pos < end)
 267                 copy_screen_chars(pos++, end, 1);
 268 }
 269
 270 /* TODO: Merge parts with get_format_screen_char(). --jonas */
 271 /* Allocates the required chars on the given line and returns the char at
 272  * position (x, y) ready to be used as a template char.  */
 273 static inline struct screen_char *
 274 get_frame_char(struct html_context *html_context, struct part *part,
 275                int x, int y, unsigned char data,
 276                color_T bgcolor, color_T fgcolor)
 277 {
 278         struct screen_char *template;
 279
 280         assert(html_context);
 281         if_assert_failed return NULL;
 282
 283         assert(part && part->document && x >= 0 && y >= 0);
 284         if_assert_failed return NULL;
 285
 286         if (realloc_line(html_context, part->document, Y(y), X(x)) < 0)
 287                 return NULL;
 288
 289         assert(part->document->data);
 290         if_assert_failed return NULL;
 291
 292         template = &POS(x, y);
 293         template->data = data;
 294         template->attr = SCREEN_ATTR_FRAME;
 295         set_screen_char_color(template, bgcolor, fgcolor,
 296                               part->document->options.color_flags,
 297                               part->document->options.color_mode);
 298
 299         return template;
 300 }
 301
 302 void
 303 draw_frame_hchars(struct part *part, int x, int y, int width,
 304                   unsigned char data, color_T bgcolor, color_T fgcolor,
 305                   struct html_context *html_context)
 306 {
 307         struct screen_char *template;
 308
 309         assert(width > 0);
 310         if_assert_failed return;
 311
 312         template = get_frame_char(html_context, part, x + width - 1, y, data, bgcolor, fgcolor);
 313         if (!template) return;
 314
 315         /* The template char is the last we need to draw so only decrease @width. */
 316         for (width -= 1; width; width--, x++) {
 317                 copy_screen_chars(&POS(x, y), template, 1);
 318         }
 319 }
 320
 321 void
 322 draw_frame_vchars(struct part *part, int x, int y, int height,
 323                   unsigned char data, color_T bgcolor, color_T fgcolor,
 324                   struct html_context *html_context)
 325 {
 326         struct screen_char *template = get_frame_char(html_context, part, x, y,
 327                                                       data, bgcolor, fgcolor);
 328
 329         if (!template) return;
 330
 331         /* The template char is the first vertical char to be drawn. So
 332          * copy it to the rest. */
 333         for (height -= 1, y += 1; height; height--, y++) {
 334                 if (realloc_line(html_context, part->document, Y(y), X(x)) < 0)
 335                         return;
 336
 337                 copy_screen_chars(&POS(x, y), template, 1);
 338         }
 339 }
 340
 341 static inline struct screen_char *
 342 get_format_screen_char(struct html_context *html_context,
 343                        enum link_state link_state)
 344 {
 345         static struct text_style ta_cache = INIT_TEXT_STYLE(-1, 0x0, 0x0);
 346         static struct screen_char schar_cache;
 347
 348         if (memcmp(&ta_cache, &format.style, sizeof(ta_cache))) {
 349                 copy_struct(&ta_cache, &format.style);
 350                 struct text_style final_style = format.style;
 351
 352                 if (link_state != LINK_STATE_NONE
 353                     && html_context->options->underline_links) {
 354                         final_style.attr |= AT_UNDERLINE;
 355                 }
 356
 357                 get_screen_char_template(&schar_cache, html_context->options, final_style);
 358         }
 359
 360         if (!!(schar_cache.attr & SCREEN_ATTR_UNSEARCHABLE)
 361             ^ !!renderer_context.nosearchable) {
 362                 schar_cache.attr ^= SCREEN_ATTR_UNSEARCHABLE;
 363         }
 364
 365         return &schar_cache;
 366 }
 367
 368 /* document.comb_x and document.comb_y exist only when CONFIG_COMBINE
 369  * is defined.  assert() does nothing if CONFIG_FASTMEM is defined.  */
 370 #if defined(CONFIG_COMBINE) && !defined(CONFIG_FASTMEM)
 371 /** Assert that path->document->comb_x and part->document->comb_y
 372  * refer to an allocated struct screen_char, or comb_x is -1.
 373  *
 374  * The CONFIG_COMBINE variant of set_hline() can update the
 375  * screen_char.data at these coordinates.  Sometimes, the coordinates
 376  * have not been valid, and the update has corrupted memory.  These
 377  * assertions should catch that bug if it happens again.
 378  *
 379  * @post This function can leave ::assert_failed set, so the caller
 380  * should use ::if_assert_failed, perhaps with discard_comb_x_y().  */
 381 static void
 382 assert_comb_x_y_ok(const struct document *document)
 383 {
 384         assert(document);
 385         if (document->comb_x != -1) {
 386                 assert(document->comb_y >= 0);
 387                 assert(document->comb_y < document->height);
 388                 assert(document->comb_x >= 0);
 389                 assert(document->comb_x < document->data[document->comb_y].length);
 390         }
 391 }
 392 #else
 393 # define assert_comb_x_y_ok(document) ((void) 0)
 394 #endif
 395
 396 #ifdef CONFIG_COMBINE
 397 /** Discard any combining characters that have not yet been combined
 398  * with to the previous base character.  */
 399 static void
 400 discard_comb_x_y(struct document *document)
 401 {
 402         document->comb_x = -1;
 403         document->comb_y = -1;
 404         document->combi_length = 0;
 405 }
 406 #else
 407 # define discard_comb_x_y(document) ((void) 0)
 408 #endif
 409
 410 #ifdef CONFIG_COMBINE
 411 static void
 412 move_comb_x_y(struct part *part, int xf, int yf, int xt, int yt)
 413 {
 414         if (part->document->comb_x != -1
 415             && part->document->comb_y == Y(yf)
 416             && part->document->comb_x >= X(xf)) {
 417                 if (yt >= 0) {
 418                         part->document->comb_x += xt - xf;
 419                         part->document->comb_y += yt - yf;
 420                 } else
 421                         discard_comb_x_y(part->document);
 422         }
 423 }
 424 #else
 425 # define move_comb_x_y(part, xf, yf, xt, yt) ((void) 0)
 426 #endif
 427
 428 #ifdef CONFIG_COMBINE
 429 static void
 430 set_comb_x_y(struct part *part, int x, int y)
 431 {
 432         struct document *document = part->document;
 433
 434         document->comb_x = X(x);
 435         document->comb_y = Y(y);
 436         assert_comb_x_y_ok(document);
 437         if_assert_failed discard_comb_x_y(document);
 438 }
 439 #else
 440 # define set_comb_x_y(part, x, y) ((void) 0)
 441 #endif
 442
 443 #ifdef CONFIG_COMBINE
 444 static void
 445 put_combined(struct part *part, int x)
 446 {
 447         struct document *document = part->document;
 448
 449         if (document->combi_length) {
 450                 if (document->comb_x != -1) {
 451                         unicode_val_T prev = get_combined(document->combi, document->combi_length + 1);
 452
 453                         assert_comb_x_y_ok(document);
 454                         if_assert_failed prev = UCS_NO_CHAR;
 455
 456                         /* Make sure the combined character is not considered as
 457                          * a space. */
 458                         if (x)
 459                                 part->spaces[x - 1] = 0;
 460
 461                         if (prev != UCS_NO_CHAR)
 462                                 document->data[document->comb_y]
 463                                         .chars[document->comb_x].data = prev;
 464                 }
 465                 document->combi_length = 0;
 466         }
 467 }
 468 #else
 469 # define put_combined(part, x) ((void) 0)
 470 #endif
 471
 472 #ifdef CONFIG_UTF8
 473 /* First possibly do the format change and then find out what coordinates
 474  * to use since sub- or superscript might change them */
 475 static inline int
 476 set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
 477           enum link_state link_state)
 478 {
 479         struct part *const part = html_context->part;
 480         struct screen_char *const schar = get_format_screen_char(html_context,
 481                                                                  link_state);
 482         int x = part->cx;
 483         const int y = part->cy;
 484         const int x2 = x;
 485         int len = charslen;
 486         const int utf8 = html_context->options->utf8;
 487         int orig_length;
 488
 489         assert(part);
 490         if_assert_failed return len;
 491
 492         assert(charslen >= 0);
 493
 494         if (realloc_spaces(part, x + charslen))
 495                 return 0;
 496
 497         /* U+00AD SOFT HYPHEN characters in HTML documents are
 498          * supposed to be displayed only if the word is broken at that
 499          * point.  ELinks currently does not use them, so it should
 500          * not display them.  If the input @chars is in UTF-8, then
 501          * set_hline() discards the characters.  If the input is in
 502          * some other charset, then set_hline() does not know which
 503          * byte that charset uses for U+00AD, so it cannot discard
 504          * the characters; instead, the translation table used by
 505          * convert_string() has already discarded the characters.
 506          *
 507          * Likewise, if the input @chars is in UTF-8, then it may
 508          * contain U+00A0 NO-BREAK SPACE characters; but if the input
 509          * is in some other charset, then the translation table
 510          * has mapped those characters to NBSP_CHAR.  */
 511
 512         if (part->document) {
 513                 struct document *const document = part->document;
 514
 515                 assert_comb_x_y_ok(document);
 516                 if_assert_failed discard_comb_x_y(document);
 517
 518                 /* Reallocate LINE(y).chars[] to large enough.  The
 519                  * last parameter of realloc_line is the index of the
 520                  * last element to which we may want to write,
 521                  * i.e. one less than the required size of the array.
 522                  * Compute the required size by assuming that each
 523                  * byte of input will need at most one character cell.
 524                  * (All double-cell characters take up at least two
 525                  * bytes in UTF-8, and there are no triple-cell or
 526                  * wider characters.)  However, if there already is an
 527                  * incomplete character in document->buf, then
 528                  * the first byte of input can result in a double-cell
 529                  * character, so we must reserve one extra element.  */
 530                 orig_length = realloc_line(html_context, document,
 531                                            Y(y), X(x) + charslen);
 532                 if (orig_length < 0) /* error */
 533                         return 0;
 534                 if (utf8) {
 535                         unsigned char *const end = chars + charslen;
 536                         unicode_val_T data;
 537
 538                         if (document->buf_length) {
 539                                 /* previous char was broken in the middle */
 540                                 int length = utf8charlen(document->buf);
 541                                 unsigned char i;
 542                                 unsigned char *buf_ptr = document->buf;
 543
 544                                 for (i = document->buf_length; i < length && chars < end;) {
 545                                         document->buf[i++] = *chars++;
 546                                 }
 547                                 document->buf_length = i;
 548                                 document->buf[i] = '\0';
 549                                 data = utf8_to_unicode(&buf_ptr, buf_ptr + i);
 550                                 if (data != UCS_NO_CHAR) {
 551                                         /* FIXME: If there was invalid
 552                                          * UTF-8 in the buffer,
 553                                          * @utf8_to_unicode may have left
 554                                          * some bytes unused.  Those
 555                                          * bytes should be pulled back
 556                                          * into @chars, rather than
 557                                          * discarded.  This is not
 558                                          * trivial to implement because
 559                                          * each byte may have arrived in
 560                                          * a separate call.  */
 561                                         document->buf_length = 0;
 562                                         goto good_char;
 563                                 } else {
 564                                         /* Still not full char */
 565                                         assert_comb_x_y_ok(document);
 566                                         LINE(y).length = orig_length;
 567                                         assert_comb_x_y_ok(document);
 568                                         if_assert_failed discard_comb_x_y(document);
 569                                         return 0;
 570                                 }
 571                         }
 572
 573                         while (chars < end) {
 574                                 /* ELinks does not use NBSP_CHAR in UTF-8.  */
 575
 576                                 data = utf8_to_unicode(&chars, end);
 577                                 if (data == UCS_NO_CHAR) {
 578                                         part->spaces[x] = 0;
 579                                         if (charslen == 1) {
 580                                                 /* HR */
 581                                                 unsigned char attr = schar->attr;
 582
 583                                                 schar->data = *chars++;
 584                                                 schar->attr = SCREEN_ATTR_FRAME;
 585                                                 copy_screen_chars(&POS(x, y), schar, 1);
 586                                                 schar->attr = attr;
 587                                                 part->char_width[x++] = 0;
 588                                                 continue;
 589                                         } else {
 590                                                 unsigned char i;
 591
 592                                                 for (i = 0; chars < end;i++) {
 593                                                         document->buf[i] = *chars++;
 594                                                 }
 595                                                 document->buf_length = i;
 596                                                 break;
 597                                         }
 598                                         /* not reached */
 599                                 }
 600
 601 good_char:
 602                                 if (data == UCS_SOFT_HYPHEN)
 603                                         continue;
 604
 605                                 if (data == UCS_NO_BREAK_SPACE
 606                                     && html_context->options->wrap_nbsp)
 607                                         data = UCS_SPACE;
 608
 609 #ifdef CONFIG_COMBINE
 610                                 if (wcwidth((wchar_t)data)) {
 611                                         put_combined(part, x);
 612                                         document->combi[0] = data;
 613                                 } else {
 614                                         if (part->cx == x) {
 615                                                 if (X(x)) {
 616                                                         /* Isolated combining
 617                                                          * character not on the
 618                                                          * first column: combine
 619                                                          * it with whatever is
 620                                                          * printed at its left. */
 621                                                         document->combi[0] = POS(x - 1, y).data;
 622                                                         set_comb_x_y(part, x - 1, y);
 623                                                 } else {
 624                                                         /* Isolated combining
 625                                                          * character on the
 626                                                          * first column: use
 627                                                          * UCS_NO_BREAK_SPACE as
 628                                                          * the base character.
 629                                                          * */
 630                                                         document->combi[0] = UCS_NO_BREAK_SPACE;
 631                                                         set_comb_x_y(part, x, y);
 632                                                         schar->data = UCS_SPACE;
 633                                                         copy_screen_chars(&POS(x++, y), schar, 1);
 634                                                 }
 635                                         }
 636                                         if (document->combi_length < (UCS_MAX_LENGTH_COMBINED - 1))
 637                                                 document->combi[++document->combi_length] = data;
 638                                         continue;
 639                                 }
 640 #endif /* CONFIG_COMBINE */
 641                                 part->spaces[x] = (data == UCS_SPACE);
 642
 643                                 if (unicode_to_cell(data) == 2) {
 644                                         schar->data = (unicode_val_T)data;
 645                                         part->char_width[x] = 2;
 646                                         copy_screen_chars(&POS(x++, y), schar, 1);
 647                                         schar->data = UCS_NO_CHAR;
 648                                         part->spaces[x] = 0;
 649                                         part->char_width[x] = 0;
 650                                 } else {
 651                                         part->char_width[x] = unicode_to_cell(data);
 652                                         schar->data = (unicode_val_T)data;
 653                                 }
 654
 655                                 set_comb_x_y(part, x, y);
 656
 657                                 copy_screen_chars(&POS(x++, y), schar, 1);
 658                         } /* while chars < end */
 659
 660                         /* Display any trailing combining characters. */
 661                         put_combined(part, x);
 662                 } else { /* not UTF-8 */
 663                         for (; charslen > 0; charslen--, x++, chars++) {
 664                                 part->char_width[x] = 1;
 665                                 if (*chars == NBSP_CHAR) {
 666                                         schar->data = ' ';
 667                                         part->spaces[x] = html_context->options->wrap_nbsp;
 668                                 } else {
 669                                         part->spaces[x] = (*chars == ' ');
 670                                         schar->data = *chars;
 671                                 }
 672                                 copy_screen_chars(&POS(x, y), schar, 1);
 673                         }
 674                 } /* end of UTF-8 check */
 675
 676                 /* Assert that we haven't written past the end of the
 677                  * LINE(y).chars array.  @x here is one greater than
 678                  * the last one used in POS(x, y).  Instead of this,
 679                  * we could assert(X(x) < LINE(y).length) immediately
 680                  * before each @copy_screen_chars call above, but
 681                  * those are in an inner loop that should be fast.  */
 682                 assert(X(x) <= LINE(y).length);
 683                 /* Some part of the code is apparently using LINE(y).length
 684                  * for line-wrapping decisions.  It may currently be too
 685                  * large because it was allocated above based on @charslen
 686                  * which is the number of bytes, not the number of cells.
 687                  * Change the length to the correct size, but don't let it
 688                  * get smaller than it was on entry to this function.  */
 689                 assert_comb_x_y_ok(document);
 690                 LINE(y).length = int_max(orig_length, X(x));
 691                 assert_comb_x_y_ok(document);
 692                 if_assert_failed discard_comb_x_y(document);
 693                 len = x - x2;
 694         } else { /* part->document == NULL */
 695                 if (utf8) {
 696                         unsigned char *const end = chars + charslen;
 697
 698                         while (chars < end) {
 699                                 unicode_val_T data;
 700
 701                                 data = utf8_to_unicode(&chars, end);
 702 #ifdef CONFIG_COMBINE
 703                                 if (data == UCS_SOFT_HYPHEN
 704                                     || (data != UCS_NO_CHAR && wcwidth((wchar_t)data) == 0))
 705 #else
 706                                 if (data == UCS_SOFT_HYPHEN)
 707 #endif
 708                                         continue;
 709
 710                                 if (data == UCS_NO_BREAK_SPACE
 711                                     && html_context->options->wrap_nbsp)
 712                                         data = UCS_SPACE;
 713                                 part->spaces[x] = (data == UCS_SPACE);
 714
 715                                 part->char_width[x] = unicode_to_cell(data);
 716                                 if (part->char_width[x] == 2) {
 717                                         x++;
 718                                         part->spaces[x] = 0;
 719                                         part->char_width[x] = 0;
 720                                 }
 721                                 if (data == UCS_NO_CHAR) {
 722                                         /* this is at the end only */
 723                                         return x - x2;
 724                                 }
 725                                 x++;
 726                         } /* while chars < end */
 727                         len = x - x2;
 728                 } else { /* not UTF-8 */
 729                         for (; charslen > 0; charslen--, x++, chars++) {
 730                                 part->char_width[x] = 1;
 731                                 if (*chars == NBSP_CHAR) {
 732                                         part->spaces[x] = html_context->options->wrap_nbsp;
 733                                 } else {
 734                                         part->spaces[x] = (*chars == ' ');
 735                                 }
 736                         }
 737                 }
 738         } /* end of part->document check */
 739         return len;
 740 }
 741 #else
 742
 743 /* First possibly do the format change and then find out what coordinates
 744  * to use since sub- or superscript might change them */
 745 static inline void
 746 set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
 747           enum link_state link_state)
 748 {
 749         struct part *part = html_context->part;
 750         struct screen_char *schar = get_format_screen_char(html_context,
 751                                                            link_state);
 752         int x = part->cx;
 753         int y = part->cy;
 754
 755         assert(part);
 756         if_assert_failed return;
 757
 758         if (realloc_spaces(part, x + charslen))
 759                 return;
 760
 761         if (part->document) {
 762                 if (realloc_line(html_context, part->document,
 763                                  Y(y), X(x) + charslen - 1) < 0)
 764                         return;
 765
 766                 for (; charslen > 0; charslen--, x++, chars++) {
 767                         if (*chars == NBSP_CHAR) {
 768                                 schar->data = ' ';
 769                                 part->spaces[x] = html_context->options->wrap_nbsp;
 770                         } else {
 771                                 part->spaces[x] = (*chars == ' ');
 772                                 schar->data = *chars;
 773                         }
 774                         copy_screen_chars(&POS(x, y), schar, 1);
 775                 }
 776         } else {
 777                 for (; charslen > 0; charslen--, x++, chars++) {
 778                         if (*chars == NBSP_CHAR) {
 779                                 part->spaces[x] = html_context->options->wrap_nbsp;
 780                         } else {
 781                                 part->spaces[x] = (*chars == ' ');
 782                         }
 783                 }
 784         }
 785 }
 786 #endif /* CONFIG_UTF8 */
 787
 788 static void
 789 move_links(struct html_context *html_context, int xf, int yf, int xt, int yt)
 790 {
 791         struct part *part;
 792         struct tag *tag;
 793         int nlink = renderer_context.last_link_to_move;
 794         int matched = 0;
 795
 796         assert(html_context);
 797         if_assert_failed return;
 798
 799         part = html_context->part;
 800
 801         assert(part && part->document);
 802         if_assert_failed return;
 803
 804         if (!realloc_lines(part->document, Y(yt)))
 805                 return;
 806
 807         for (; nlink < part->document->nlinks; nlink++) {
 808                 struct link *link = &part->document->links[nlink];
 809                 int i;
 810
 811                 for (i = 0; i < link->npoints; i++) {
 812                         /* Fix for bug 479 (part one) */
 813                         /* The scenario that triggered it:
 814                          *
 815                          * Imagine a centered element containing a really long
 816                          * word (over half of the screen width long) followed
 817                          * by a few links with no spaces between them where all
 818                          * the link text combined with the really long word
 819                          * will force the line to be wrapped. When rendering
 820                          * the line first words (including link text words) are
 821                          * put on one line. Then wrapping is performed moving
 822                          * all links from current line to the one below. Then
 823                          * the current line (now only containing the really
 824                          * long word) is centered. This will trigger a call to
 825                          * move_links() which will increment.
 826                          *
 827                          * Without the fix below the centering of the current
 828                          * line will increment last_link_to_move to that of the
 829                          * last link which means centering of the next line
 830                          * with all the links will only move the last link
 831                          * leaving all the other links' points dangling and
 832                          * causing buggy link highlighting.
 833                          *
 834                          * Even links like textareas will be correctly handled
 835                          * because @last_link_to_move is a way to optimize how
 836                          * many links move_links() will have to iterate and
 837                          * this little fix will only decrease the effect of the
 838                          * optimization by always ensuring it is never
 839                          * incremented too far. */
 840                         if (!matched && link->points[i].y > Y(yf)) {
 841                                 matched = 1;
 842                                 continue;
 843                         }
 844
 845                         if (link->points[i].y != Y(yf))
 846                                 continue;
 847
 848                         matched = 1;
 849
 850                         if (link->points[i].x < X(xf))
 851                                 continue;
 852
 853                         if (yt >= 0) {
 854                                 link->points[i].y = Y(yt);
 855                                 link->points[i].x += -xf + xt;
 856                         } else {
 857                                 int to_move = link->npoints - (i + 1);
 858
 859                                 assert(to_move >= 0);
 860
 861                                 if (to_move > 0) {
 862                                         memmove(&link->points[i],
 863                                                 &link->points[i + 1],
 864                                                 to_move *
 865                                                 sizeof(*link->points));
 866                                         i--;
 867                                 }
 868
 869                                 link->npoints--;
 870                         }
 871                 }
 872
 873                 if (!matched) {
 874                         renderer_context.last_link_to_move = nlink;
 875                 }
 876         }
 877
 878         /* Don't move tags when removing links. */
 879         if (yt < 0) return;
 880
 881         matched = 0;
 882         tag = renderer_context.last_tag_to_move;
 883
 884         while (list_has_next(part->document->tags, tag)) {
 885                 tag = tag->next;
 886
 887                 if (tag->y == Y(yf)) {
 888                         matched = 1;
 889                         if (tag->x >= X(xf)) {
 890                                 tag->y = Y(yt);
 891                                 tag->x += -xf + xt;
 892                         }
 893
 894                 } else if (!matched && tag->y > Y(yf)) {
 895                         /* Fix for bug 479 (part two) */
 896                         matched = 1;
 897                 }
 898
 899                 if (!matched) renderer_context.last_tag_to_move = tag;
 900         }
 901 }
 902
 903 /* This function does not update document.comb_x and document.comb_y.
 904  * That is the caller's responsibility.  */
 905 static inline void
 906 copy_chars(struct html_context *html_context, int x, int y, int width, struct screen_char *d)
 907 {
 908         struct part *part;
 909
 910         assert(html_context);
 911         if_assert_failed return;
 912
 913         part = html_context->part;
 914
 915         assert(width > 0 && part && part->document && part->document->data);
 916         if_assert_failed return;
 917
 918         if (realloc_line(html_context, part->document, Y(y), X(x) + width - 1) < 0)
 919                 return;
 920
 921         copy_screen_chars(&POS(x, y), d, width);
 922 }
 923
 924 static inline void
 925 move_chars(struct html_context *html_context, int x, int y, int nx, int ny)
 926 {
 927         struct part *part;
 928
 929         assert(html_context);
 930         if_assert_failed return;
 931
 932         part = html_context->part;
 933
 934         assert(part && part->document && part->document->data);
 935         if_assert_failed return;
 936
 937         if (LEN(y) - x <= 0) return;
 938         copy_chars(html_context, nx, ny, LEN(y) - x, &POS(x, y));
 939
 940         assert_comb_x_y_ok(part->document);
 941         move_comb_x_y(part, x, y, nx, ny);
 942         LINE(y).length = X(x);
 943         assert_comb_x_y_ok(part->document);
 944         if_assert_failed discard_comb_x_y(part->document);
 945         move_links(html_context, x, y, nx, ny);
 946 }
 947
 948 /** Shift the line @a y to the right by @a shift character cells,
 949  * and update document.comb_x and document.comb_y.  */
 950 static inline void
 951 shift_chars(struct html_context *html_context, int y, int shift)
 952 {
 953         struct part *part;
 954         struct screen_char *a;
 955         int len;
 956
 957         assert(html_context);
 958         if_assert_failed return;
 959
 960         part = html_context->part;
 961
 962         assert(part && part->document && part->document->data);
 963         if_assert_failed return;
 964
 965         len = LEN(y);
 966
 967         a = fmem_alloc(len * sizeof(*a));
 968         if (!a) return;
 969
 970         copy_screen_chars(a, &POS(0, y), len);
 971
 972         assert_comb_x_y_ok(part->document);
 973         if_assert_failed discard_comb_x_y(part->document);
 974
 975         clear_hchars(html_context, 0, y, shift, a);
 976         copy_chars(html_context, shift, y, len, a);
 977         fmem_free(a);
 978
 979         move_links(html_context, 0, y, shift, y);
 980         move_comb_x_y(part, 0, y, shift, y);
 981
 982         assert_comb_x_y_ok(part->document);
 983         if_assert_failed discard_comb_x_y(part->document);
 984 }
 985
 986 static inline void
 987 del_chars(struct html_context *html_context, int x, int y)
 988 {
 989         struct part *part;
 990
 991         assert(html_context);
 992         if_assert_failed return;
 993
 994         part = html_context->part;
 995
 996         assert(part && part->document && part->document->data);
 997         if_assert_failed return;
 998
 999         assert_comb_x_y_ok(part->document);
1000         if_assert_failed discard_comb_x_y(part->document);
1001
1002         LINE(y).length = X(x);
1003         move_comb_x_y(part, x, y, -1, -1);
1004         move_links(html_context, x, y, -1, -1);
1005
1006         assert_comb_x_y_ok(part->document);
1007         if_assert_failed discard_comb_x_y(part->document);
1008 }
1009
1010 #if TABLE_LINE_PADDING < 0
1011 # define overlap_width(x) (x).width
1012 #else
1013 # define overlap_width(x) int_min((x).width, \
1014         html_context->options->box.width - TABLE_LINE_PADDING)
1015 #endif
1016 #define overlap(x) int_max(overlap_width(x) - (x).rightmargin, 0)
1017
1018 static int inline
1019 split_line_at(struct html_context *html_context, int width)
1020 {
1021         struct part *part;
1022         int tmp;
1023         int new_width = width + par_format.rightmargin;
1024
1025         assert(html_context);
1026         if_assert_failed return 0;
1027
1028         part = html_context->part;
1029
1030         assert(part);
1031         if_assert_failed return 0;
1032
1033         /* Make sure that we count the right margin to the total
1034          * actual box width. */
1035         int_lower_bound(&part->box.width, new_width);
1036
1037         if (part->document) {
1038                 assert(part->document->data);
1039                 if_assert_failed return 0;
1040 #ifdef CONFIG_UTF8
1041                 if (html_context->options->utf8
1042                     && width < part->spaces_len && part->char_width[width] == 2) {
1043                         move_chars(html_context, width, part->cy, par_format.leftmargin, part->cy + 1);
1044                         del_chars(html_context, width, part->cy);
1045                 } else
1046 #endif
1047                 {
1048                         assertm(POS(width, part->cy).data == ' ',
1049                                         "bad split: %c", POS(width, part->cy).data);
1050                         move_chars(html_context, width + 1, part->cy, par_format.leftmargin, part->cy + 1);
1051                         del_chars(html_context, width, part->cy);
1052
1053                 }
1054         }
1055
1056 #ifdef CONFIG_UTF8
1057         if (!(html_context->options->utf8
1058               && width < part->spaces_len
1059               && part->char_width[width] == 2))
1060 #endif
1061                 width++; /* Since we were using (x + 1) only later... */
1062
1063         tmp = part->spaces_len - width;
1064         if (tmp > 0) {
1065                 /* 0 is possible and I'm paranoid ... --Zas */
1066                 memmove(part->spaces, part->spaces + width, tmp);
1067 #ifdef CONFIG_UTF8
1068                 memmove(part->char_width, part->char_width + width, tmp);
1069 #endif
1070         }
1071
1072         assert(tmp >= 0);
1073         if_assert_failed tmp = 0;
1074         memset(part->spaces + tmp, 0, width);
1075 #ifdef CONFIG_UTF8
1076         memset(part->char_width + tmp, 0, width);
1077 #endif
1078
1079         if (par_format.leftmargin > 0) {
1080                 tmp = part->spaces_len - par_format.leftmargin;
1081                 assertm(tmp > 0, "part->spaces_len - par_format.leftmargin == %d", tmp);
1082                 /* So tmp is zero, memmove() should survive that. Don't recover. */
1083                 memmove(part->spaces + par_format.leftmargin, part->spaces, tmp);
1084 #ifdef CONFIG_UTF8
1085                 memmove(part->char_width + par_format.leftmargin, part->char_width, tmp);
1086 #endif
1087         }
1088
1089         part->cy++;
1090
1091         if (part->cx == width) {
1092                 part->cx = -1;
1093                 int_lower_bound(&part->box.height, part->cy);
1094                 return 2;
1095         } else {
1096                 part->cx -= width - par_format.leftmargin;
1097                 int_lower_bound(&part->box.height, part->cy + 1);
1098                 return 1;
1099         }
1100 }
1101
1102 /* Here, we scan the line for a possible place where we could split it into two
1103  * (breaking it, because it is too long), if it is overlapping from the maximal
1104  * box width. */
1105 /* Returns 0 if there was found no spot suitable for breaking the line.
1106  *         1 if the line was split into two.
1107  *         2 if the (second) splitted line is blank (that is useful to determine
1108  *           ie. if the next line_break() should really break the line; we don't
1109  *           want to see any blank lines to pop up, do we?). */
1110 static int
1111 split_line(struct html_context *html_context)
1112 {
1113         struct part *part;
1114         int x;
1115
1116         assert(html_context);
1117         if_assert_failed return 0;
1118
1119         part = html_context->part;
1120
1121         assert(part);
1122         if_assert_failed return 0;
1123
1124 #ifdef CONFIG_UTF8
1125         if (html_context->options->utf8) {
1126                 for (x = overlap(par_format); x >= par_format.leftmargin; x--) {
1127
1128                         if (x < part->spaces_len && (part->spaces[x]
1129                             || (part->char_width[x] == 2
1130                                 /* Ugly hack. If we haven't place for
1131                                  * double-width characters we print two
1132                                  * double-width characters. */
1133                                 && x != par_format.leftmargin)))
1134                                 return split_line_at(html_context, x);
1135                 }
1136
1137                 for (x = par_format.leftmargin; x < part->cx ; x++) {
1138                         if (x < part->spaces_len && (part->spaces[x]
1139                             || (part->char_width[x] == 2
1140                                 /* We want to break line after _second_
1141                                  * double-width character. */
1142                                 && x > par_format.leftmargin)))
1143                                 return split_line_at(html_context, x);
1144                 }
1145         } else
1146 #endif
1147         {
1148                 for (x = overlap(par_format); x >= par_format.leftmargin; x--)
1149                         if (x < part->spaces_len && part->spaces[x])
1150                                 return split_line_at(html_context, x);
1151
1152                 for (x = par_format.leftmargin; x < part->cx ; x++)
1153                         if (x < part->spaces_len && part->spaces[x])
1154                                 return split_line_at(html_context, x);
1155         }
1156
1157         /* Make sure that we count the right margin to the total
1158          * actual box width. */
1159         int_lower_bound(&part->box.width, part->cx + par_format.rightmargin);
1160
1161         return 0;
1162 }
1163
1164 /* Insert @new_spaces spaces before the coordinates @x and @y,
1165  * adding those spaces to whatever link is at those coordinates. */
1166 /* TODO: Integrate with move_links. */
1167 static void
1168 insert_spaces_in_link(struct part *part, int x, int y, int new_spaces)
1169 {
1170         int i = part->document->nlinks;
1171
1172         x = X(x);
1173         y = Y(y);
1174
1175         while (i--) {
1176                 struct link *link = &part->document->links[i];
1177                 int j = link->npoints;
1178
1179                 while (j-- > 1) {
1180                         struct point *point = &link->points[j];
1181
1182                         if (point->x != x || point->y != y)
1183                                 continue;
1184
1185                         if (!realloc_points(link, link->npoints + new_spaces))
1186                                 return;
1187
1188                         link->npoints += new_spaces;
1189                         point = &link->points[link->npoints - 1];
1190
1191                         while (new_spaces--) {
1192                                 point->x = --x;
1193                                 point->y = y;
1194                                 point--;
1195                         }
1196
1197                         return;
1198                 }
1199         }
1200 }
1201
1202 /* This function is very rare exemplary of clean and beautyful code here.
1203  * Please handle with care. --pasky */
1204 static void
1205 justify_line(struct html_context *html_context, int y)
1206 {
1207         struct part *part;
1208         struct screen_char *line; /* we save original line here */
1209         int len;
1210         int pos;
1211         int *space_list;
1212         int spaces;
1213         int diff;
1214
1215         assert(html_context);
1216         if_assert_failed return;
1217
1218         part = html_context->part;
1219
1220         assert(part && part->document && part->document->data);
1221         if_assert_failed return;
1222
1223         len = LEN(y);
1224         assert(len > 0);
1225         if_assert_failed return;
1226
1227         line = fmem_alloc(len * sizeof(*line));
1228         if (!line) return;
1229
1230         /* It may sometimes happen that the line is only one char long and that
1231          * char is space - then we're going to write to both [0] and [1], but
1232          * we allocated only one field. Thus, we've to do (len + 1). --pasky */
1233         space_list = fmem_alloc((len + 1) * sizeof(*space_list));
1234         if (!space_list) {
1235                 fmem_free(line);
1236                 return;
1237         }
1238
1239         copy_screen_chars(line, &POS(0, y), len);
1240
1241         /* Skip leading spaces */
1242
1243         spaces = 0;
1244         pos = 0;
1245
1246         while (line[pos].data == ' ')
1247                 pos++;
1248
1249         /* Yes, this can be negative, we know. But we add one to it always
1250          * anyway, so it's ok. */
1251         space_list[spaces++] = pos - 1;
1252
1253         /* Count spaces */
1254
1255         for (; pos < len; pos++)
1256                 if (line[pos].data == ' ')
1257                         space_list[spaces++] = pos;
1258
1259         space_list[spaces] = len;
1260
1261         /* Realign line */
1262
1263         /* Diff is the difference between the width of the paragraph
1264          * and the current length of the line. */
1265         diff = overlap(par_format) - len;
1266
1267         /* We check diff > 0 because diff can be negative (i.e., we have
1268          * an unbroken line of length > overlap(par_format))
1269          * even when spaces > 1 if the line has only non-breaking spaces. */
1270         if (spaces > 1 && diff > 0) {
1271                 int prev_end = 0;
1272                 int word;
1273
1274                 /* Allocate enough memory for the justified line.
1275                  * If the memory is not available, then leave the
1276                  * line unchanged, rather than halfway there.  The
1277                  * following loop assumes the allocation succeeded.  */
1278                 if (!realloc_line(html_context, html_context->part->document,
1279                                   Y(y), X(overlap(par_format))))
1280                         goto out_of_memory;
1281
1282                 for (word = 0; word < spaces; word++) {
1283                         /* We have to increase line length by 'diff' num. of
1284                          * characters, so we move 'word'th word 'word_shift'
1285                          * characters right. */
1286                         int word_start = space_list[word] + 1;
1287                         int word_len = space_list[word + 1] - word_start;
1288                         int word_shift;
1289                         int new_start;
1290                         int new_spaces;
1291
1292                         assert(word_len >= 0);
1293                         if_assert_failed continue;
1294
1295                         word_shift = (word * diff) / (spaces - 1);
1296                         new_start = word_start + word_shift;
1297
1298                         /* Assert that the realloc_line() above
1299                          * allocated enough memory for the word
1300                          * and the preceding spaces.  */
1301                         assert(LEN(y) >= new_start + word_len);
1302                         if_assert_failed continue;
1303
1304                         /* Copy the original word, without any spaces.
1305                          * word_len may be 0 here.  */
1306                         copy_screen_chars(&POS(new_start, y),
1307                                           &line[word_start], word_len);
1308
1309                         /* Copy the space that preceded the word,
1310                          * duplicating it as many times as necessary.
1311                          * This preserves its attributes, such as
1312                          * background color and underlining.  If this
1313                          * is the first word, then skip the copy
1314                          * because there might not be a space there
1315                          * and anyway it need not be duplicated.  */
1316                         if (word) {
1317                                 int spacex;
1318
1319                                 for (spacex = prev_end; spacex < new_start;
1320                                      ++spacex) {
1321                                         copy_screen_chars(&POS(spacex, y),
1322                                                           &line[word_start - 1],
1323                                                           1);
1324                                 }
1325                         }
1326
1327                         /* Remember that any links at the right side
1328                          * of the added spaces have moved, and the
1329                          * spaces themselves may also belong to a
1330                          * link.  */
1331                         new_spaces = new_start - prev_end - 1;
1332                         if (word && new_spaces) {
1333                                 move_comb_x_y(part, prev_end + 1, y, new_start, y);
1334                                 move_links(html_context, prev_end + 1, y, new_start, y);
1335                                 insert_spaces_in_link(part,
1336                                                       new_start, y, new_spaces);
1337                         }
1338
1339                         prev_end = new_start + word_len;
1340                 }
1341         }
1342
1343 out_of_memory:
1344         fmem_free(space_list);
1345         fmem_free(line);
1346 }
1347
1348 static void
1349 align_line(struct html_context *html_context, int y, int last)
1350 {
1351         struct part *part;
1352         int shift;
1353         int len;
1354
1355         assert(html_context);
1356         if_assert_failed return;
1357
1358         part = html_context->part;
1359
1360         assert(part && part->document && part->document->data);
1361         if_assert_failed return;
1362
1363         len = LEN(y);
1364
1365         if (!len || par_format.align == ALIGN_LEFT)
1366                 return;
1367
1368         if (par_format.align == ALIGN_JUSTIFY) {
1369                 if (!last)
1370                         justify_line(html_context, y);
1371                 return;
1372         }
1373
1374         shift = overlap(par_format) - len;
1375         if (par_format.align == ALIGN_CENTER)
1376                 shift /= 2;
1377         if (shift > 0)
1378                 shift_chars(html_context, y, shift);
1379 }
1380
1381 static inline void
1382 init_link_event_hooks(struct html_context *html_context, struct link *link)
1383 {
1384         link->event_hooks = mem_calloc(1, sizeof(*link->event_hooks));
1385         if (!link->event_hooks) return;
1386
1387 #define add_evhook(list_, type_, src_)                                          \
1388         do {                                                                    \
1389                 struct script_event_hook *evhook;                               \
1390                                                                                 \
1391                 if (!src_) break;                                               \
1392                                                                                 \
1393                 evhook = mem_calloc(1, sizeof(*evhook));                        \
1394                 if (!evhook) break;                                             \
1395                                                                                 \
1396                 evhook->type = type_;                                           \
1397                 evhook->src  = stracpy(src_);                                   \
1398                 add_to_list(*(list_), evhook);                                  \
1399         } while (0)
1400
1401         init_list(*link->event_hooks);
1402         add_evhook(link->event_hooks, SEVHOOK_ONCLICK, format.onclick);
1403         add_evhook(link->event_hooks, SEVHOOK_ONDBLCLICK, format.ondblclick);
1404         add_evhook(link->event_hooks, SEVHOOK_ONMOUSEOVER, format.onmouseover);
1405         add_evhook(link->event_hooks, SEVHOOK_ONHOVER, format.onhover);
1406         add_evhook(link->event_hooks, SEVHOOK_ONFOCUS, format.onfocus);
1407         add_evhook(link->event_hooks, SEVHOOK_ONMOUSEOUT, format.onmouseout);
1408         add_evhook(link->event_hooks, SEVHOOK_ONBLUR, format.onblur);
1409
1410 #undef add_evhook
1411 }
1412
1413 static struct link *
1414 new_link(struct html_context *html_context, unsigned char *name, int namelen)
1415 {
1416         struct document *document;
1417         struct part *part;
1418         int link_number;
1419         struct link *link;
1420
1421         assert(html_context);
1422         if_assert_failed return NULL;
1423
1424         part = html_context->part;
1425
1426         assert(part);
1427         if_assert_failed return NULL;
1428
1429         document = part->document;
1430
1431         assert(document);
1432         if_assert_failed return NULL;
1433
1434         link_number = part->link_num;
1435
1436         if (!ALIGN_LINK(&document->links, document->nlinks, document->nlinks + 1))
1437                 return NULL;
1438
1439         link = &document->links[document->nlinks++];
1440         link->number = link_number - 1;
1441         if (document->options.use_tabindex) link->number += format.tabindex;
1442         link->accesskey = format.accesskey;
1443         link->title = null_or_stracpy(format.title);
1444         link->where_img = null_or_stracpy(format.image);
1445
1446         if (!format.form) {
1447                 link->target = null_or_stracpy(format.target);
1448                 link->data.name = memacpy(name, namelen);
1449                 /* if (strlen(url) > 4 && !c_strncasecmp(url, "MAP@", 4)) { */
1450                 if (format.link
1451                     && ((format.link[0]|32) == 'm')
1452                     && ((format.link[1]|32) == 'a')
1453                     && ((format.link[2]|32) == 'p')
1454                     &&  (format.link[3]     == '@')
1455                     &&   format.link[4]) {
1456                         link->type = LINK_MAP;
1457                         link->where = stracpy(format.link + 4);
1458                 } else {
1459                         link->type = LINK_HYPERTEXT;
1460                         link->where = null_or_stracpy(format.link);
1461                 }
1462
1463         } else {
1464                 struct form_control *fc = format.form;
1465                 struct form *form;
1466
1467                 switch (fc->type) {
1468                 case FC_TEXT:
1469                 case FC_PASSWORD:
1470                 case FC_FILE:
1471                         link->type = LINK_FIELD;
1472                         break;
1473                 case FC_TEXTAREA:
1474                         link->type = LINK_AREA;
1475                         break;
1476                 case FC_CHECKBOX:
1477                 case FC_RADIO:
1478                         link->type = LINK_CHECKBOX;
1479                         break;
1480                 case FC_SELECT:
1481                         link->type = LINK_SELECT;
1482                         break;
1483                 case FC_SUBMIT:
1484                 case FC_IMAGE:
1485                 case FC_RESET:
1486                 case FC_BUTTON:
1487                 case FC_HIDDEN:
1488                         link->type = LINK_BUTTON;
1489                 }
1490                 link->data.form_control = fc;
1491                 /* At this point, format.form might already be set but
1492                  * the form_control not registered through SP_CONTROL
1493                  * yet, therefore without fc->form set. It is always
1494                  * after the "good" last form was already processed,
1495                  * though, so we can safely just take that. */
1496                 form = fc->form;
1497                 if (!form && !list_empty(document->forms))
1498                         form = document->forms.next;
1499                 link->target = null_or_stracpy(form ? form->target : NULL);
1500         }
1501
1502         link->color.background = format.style.color.background;
1503         link->color.foreground = link_is_textinput(link)
1504                                 ? format.style.color.foreground
1505                                 : format.color.clink;
1506
1507         init_link_event_hooks(html_context, link);
1508
1509         document->links_sorted = 0;
1510         return link;
1511 }
1512
1513 static void
1514 html_special_tag(struct document *document, unsigned char *t, int x, int y)
1515 {
1516         struct tag *tag;
1517         int tag_len;
1518
1519         assert(document);
1520         if_assert_failed return;
1521
1522         tag_len = strlen(t);
1523         /* One byte is reserved for name in struct tag. */
1524         tag = mem_alloc(sizeof(*tag) + tag_len);
1525         if (!tag) return;
1526
1527         tag->x = x;
1528         tag->y = y;
1529         memcpy(tag->name, t, tag_len + 1);
1530         add_to_list(document->tags, tag);
1531         if (renderer_context.last_tag_for_newline == (struct tag *) &document->tags)
1532                 renderer_context.last_tag_for_newline = tag;
1533 }
1534
1535
1536 static void
1537 put_chars_conv(struct html_context *html_context,
1538                unsigned char *chars, int charslen)
1539 {
1540         struct part *part;
1541
1542         assert(html_context);
1543         if_assert_failed return;
1544
1545         part = html_context->part;
1546
1547         assert(part && chars && charslen);
1548         if_assert_failed return;
1549
1550         if (format.style.attr & AT_GRAPHICS) {
1551                 put_chars(html_context, chars, charslen);
1552                 return;
1553         }
1554
1555         convert_string(renderer_context.convert_table, chars, charslen,
1556                        html_context->options->cp,
1557                        (format.style.attr & AT_NO_ENTITIES) ? CSM_NONE : CSM_DEFAULT,
1558                        NULL, (void (*)(void *, unsigned char *, int)) put_chars, html_context);
1559 }
1560
1561 /*
1562  * Converts a number in base 10 to a string in another base whose symbols are
1563  * represented by key. I the trivial case, key="0123456789". A more homerow
1564  * friendly key="gfdsahjkl;trewqyuiopvcxznm". Returns the length of link_sym.
1565  */
1566 int
1567 dec2qwerty(int num, unsigned char *link_sym, const unsigned char *key, int base)
1568 {
1569         int newlen, i, pow;
1570
1571         if (base < 2) return 0;
1572
1573         for (newlen = 1, pow = base; pow <= num; ++newlen, pow *= base);
1574
1575         link_sym[newlen] = '\0';
1576         for (i = 1; i <= newlen; ++i) {
1577                 int key_index = num % base;
1578                 link_sym[newlen - i] = key[key_index];
1579                 num /= base;
1580         }
1581         return newlen;
1582 }
1583
1584 /*
1585  * Returns the value of link_sym in decimal according to key.
1586  */
1587 int
1588 qwerty2dec(const unsigned char *link_sym, const unsigned char *key, int base)
1589 {
1590         int z = 0;
1591         int symlen = strlen(link_sym);
1592         int i;
1593         int pow;
1594
1595         for (i = 0, pow = 1; i < symlen; ++i, pow *= base) {
1596                 int j = 0;
1597                 while (key[j] != link_sym[symlen - 1 - i]) ++j;
1598                 z += j * pow;
1599         }
1600         return z;
1601 }
1602
1603 static inline void
1604 put_link_number(struct html_context *html_context)
1605 {
1606         char *symkey = get_opt_str("document.browse.links.label_key", NULL);
1607         struct part *part = html_context->part;
1608         unsigned char s[64];
1609         unsigned char *fl = format.link;
1610         unsigned char *ft = format.target;
1611         unsigned char *fi = format.image;
1612         struct form_control *ff = format.form;
1613         int slen = 0;
1614         int base = strlen(symkey);
1615
1616         format.link = format.target = format.image = NULL;
1617         format.form = NULL;
1618
1619         s[slen++] = '[';
1620         slen += dec2qwerty(part->link_num, s + 1, symkey, base);
1621         s[slen++] = ']';
1622         s[slen] = '\0';
1623
1624         renderer_context.nosearchable = 1;
1625         put_chars(html_context, s, slen);
1626         renderer_context.nosearchable = 0;
1627
1628         if (ff && ff->type == FC_TEXTAREA) line_break(html_context);
1629
1630         /* We might have ended up on a new line after the line breaking
1631          * or putting the link number chars. */
1632         if (part->cx == -1) part->cx = par_format.leftmargin;
1633
1634         format.link = fl;
1635         format.target = ft;
1636         format.image = fi;
1637         format.form = ff;
1638 }
1639
1640 #define assert_link_variable(old, new) \
1641         assertm(!(old), "Old link value [%s]. New value [%s]", old, new);
1642
1643 static inline void
1644 init_link_state_info(unsigned char *link, unsigned char *target,
1645                      unsigned char *image, struct form_control *form)
1646 {
1647         assert_link_variable(renderer_context.link_state_info.image, image);
1648         assert_link_variable(renderer_context.link_state_info.target, target);
1649         assert_link_variable(renderer_context.link_state_info.link, link);
1650
1651         renderer_context.link_state_info.link = null_or_stracpy(link);
1652         renderer_context.link_state_info.target = null_or_stracpy(target);
1653         renderer_context.link_state_info.image = null_or_stracpy(image);
1654         renderer_context.link_state_info.form = form;
1655 }
1656
1657 static inline void
1658 done_link_state_info(void)
1659 {
1660         mem_free_if(renderer_context.link_state_info.link);
1661         mem_free_if(renderer_context.link_state_info.target);
1662         mem_free_if(renderer_context.link_state_info.image);
1663         memset(&renderer_context.link_state_info, 0,
1664                sizeof(renderer_context.link_state_info));
1665 }
1666
1667 #ifdef CONFIG_UTF8
1668 static inline void
1669 process_link(struct html_context *html_context, enum link_state link_state,
1670              unsigned char *chars, int charslen, int cells)
1671 #else
1672 static inline void
1673 process_link(struct html_context *html_context, enum link_state link_state,
1674                    unsigned char *chars, int charslen)
1675 #endif /* CONFIG_UTF8 */
1676 {
1677         struct part *part = html_context->part;
1678         struct link *link;
1679         int x_offset = 0;
1680
1681         switch (link_state) {
1682         case LINK_STATE_SAME: {
1683                 unsigned char *name;
1684
1685                 if (!part->document) return;
1686
1687                 assertm(part->document->nlinks > 0, "no link");
1688                 if_assert_failed return;
1689
1690                 link = &part->document->links[part->document->nlinks - 1];
1691
1692                 name = get_link_name(link);
1693                 if (name) {
1694                         unsigned char *new_name;
1695
1696                         new_name = straconcat(name, chars,
1697                                               (unsigned char *) NULL);
1698                         if (new_name) {
1699                                 mem_free(name);
1700                                 link->data.name = new_name;
1701                         }
1702                 }
1703
1704                 /* FIXME: Concatenating two adjectent <a> elements to a single
1705                  * link is broken since we lose the event handlers for the
1706                  * second one.  OTOH simply appending them here won't fly since
1707                  * we may get here multiple times for even a single link. We
1708                  * will probably need some SP_ for creating a new link or so.
1709                  * --pasky */
1710
1711                 break;
1712         }
1713
1714         case LINK_STATE_NEW:
1715                 part->link_num++;
1716
1717                 init_link_state_info(format.link, format.target,
1718                                      format.image, format.form);
1719                 if (!part->document) return;
1720
1721                 /* Trim leading space from the link text */
1722                 while (x_offset < charslen && chars[x_offset] <= ' ')
1723                         x_offset++;
1724
1725                 if (x_offset) {
1726                         charslen -= x_offset;
1727                         chars += x_offset;
1728 #ifdef CONFIG_UTF8
1729                         cells -= x_offset;
1730 #endif /* CONFIG_UTF8 */
1731                 }
1732
1733                 link = new_link(html_context, chars, charslen);
1734                 if (!link) return;
1735
1736                 break;
1737
1738         case LINK_STATE_NONE:
1739         default:
1740                 INTERNAL("bad link_state %i", (int) link_state);
1741                 return;
1742         }
1743
1744         /* Add new canvas positions to the link. */
1745 #ifdef CONFIG_UTF8
1746         if (realloc_points(link, link->npoints + cells))
1747 #else
1748         if (realloc_points(link, link->npoints + charslen))
1749 #endif /* CONFIG_UTF8 */
1750         {
1751                 struct point *point = &link->points[link->npoints];
1752                 int x = X(part->cx) + x_offset;
1753                 int y = Y(part->cy);
1754
1755 #ifdef CONFIG_UTF8
1756                 link->npoints += cells;
1757
1758                 for (; cells > 0; cells--, point++, x++)
1759 #else
1760                 link->npoints += charslen;
1761
1762                 for (; charslen > 0; charslen--, point++, x++)
1763 #endif /* CONFIG_UTF8 */
1764                 {
1765                         point->x = x;
1766                         point->y = y;
1767                 }
1768         }
1769 }
1770
1771 static inline enum link_state
1772 get_link_state(struct html_context *html_context)
1773 {
1774         enum link_state state;
1775
1776         if (!(format.link || format.image || format.form)) {
1777                 state = LINK_STATE_NONE;
1778
1779         } else if ((renderer_context.link_state_info.link
1780                     || renderer_context.link_state_info.image
1781                     || renderer_context.link_state_info.form)
1782                    && !xstrcmp(format.link, renderer_context.link_state_info.link)
1783                    && !xstrcmp(format.target, renderer_context.link_state_info.target)
1784                    && !xstrcmp(format.image, renderer_context.link_state_info.image)
1785                    && format.form == renderer_context.link_state_info.form) {
1786
1787                 return LINK_STATE_SAME;
1788
1789         } else {
1790                 state = LINK_STATE_NEW;
1791         }
1792
1793         done_link_state_info();
1794
1795         return state;
1796 }
1797
1798 static inline int
1799 html_has_non_space_chars(unsigned char *chars, int charslen)
1800 {
1801         int pos = 0;
1802
1803         while (pos < charslen)
1804                 if (!isspace(chars[pos++]))
1805                         return 1;
1806
1807         return 0;
1808 }
1809
1810 static void
1811 put_chars(struct html_context *html_context, unsigned char *chars, int charslen)
1812 {
1813         enum link_state link_state;
1814         struct part *part;
1815 #ifdef CONFIG_UTF8
1816         int cells;
1817 #endif /* CONFIG_UTF8 */
1818
1819         assert(html_context);
1820         if_assert_failed return;
1821
1822         part = html_context->part;
1823
1824         assert(part);
1825         if_assert_failed return;
1826
1827         assert(chars && charslen);
1828         if_assert_failed return;
1829
1830         /* If we are not handling verbatim aligning and we are at the begining
1831          * of a line trim whitespace. */
1832         if (part->cx == -1) {
1833                 /* If we are not handling verbatim aligning trim leading
1834                  * whitespaces. */
1835                 if (!html_is_preformatted()) {
1836                         while (charslen && *chars == ' ') {
1837                                 chars++;
1838                                 charslen--;
1839                         }
1840
1841                         if (charslen < 1) return;
1842                 }
1843
1844                 part->cx = par_format.leftmargin;
1845         }
1846
1847         /* For preformatted html always update 'the last tag' so we never end
1848          * up moving tags to the wrong line (Fixes bug 324). For all other html
1849          * it is moved only when the line being rendered carry some real
1850          * non-whitespace content. */
1851         if (html_is_preformatted()
1852             || html_has_non_space_chars(chars, charslen)) {
1853                 renderer_context.last_tag_for_newline = (struct tag *) &part->document->tags;
1854         }
1855
1856         int_lower_bound(&part->box.height, part->cy + 1);
1857
1858         link_state = get_link_state(html_context);
1859
1860         if (link_state == LINK_STATE_NEW) {
1861                 int x_offset = 0;
1862
1863                 /* Don't add inaccessible links. It seems to be caused
1864                  * by the parser putting a space char after stuff like
1865                  * <img>-tags or comments wrapped in <a>-tags. See bug
1866                  * 30 for test case. */
1867                 while (x_offset < charslen && chars[x_offset] <= ' ')
1868                         x_offset++;
1869
1870                 /* For pure spaces reset the link state */
1871                 if (x_offset == charslen)
1872                         link_state = LINK_STATE_NONE;
1873                 else if (html_context->options->links_numbering)
1874                         put_link_number(html_context);
1875         }
1876 #ifdef CONFIG_UTF8
1877         cells =
1878 #endif /* CONFIG_UTF8 */
1879                 set_hline(html_context, chars, charslen, link_state);
1880
1881         if (link_state != LINK_STATE_NONE) {
1882 #ifdef CONFIG_UTF8
1883                 process_link(html_context, link_state, chars, charslen,
1884                              cells);
1885 #else
1886                 process_link(html_context, link_state, chars, charslen);
1887 #endif /* CONFIG_UTF8 */
1888         }
1889
1890 #ifdef CONFIG_UTF8
1891         if (renderer_context.nowrap
1892             && part->cx + cells > overlap(par_format))
1893                 return;
1894
1895         part->cx += cells;
1896 #else
1897         if (renderer_context.nowrap
1898                         && part->cx + charslen > overlap(par_format))
1899                 return;
1900
1901         part->cx += charslen;
1902 #endif /* CONFIG_UTF8 */
1903
1904         renderer_context.nobreak = 0;
1905
1906         if (!(html_context->options->wrap || html_is_preformatted())) {
1907                 while (part->cx > overlap(par_format)
1908                        && part->cx > par_format.leftmargin) {
1909                         int x = split_line(html_context);
1910
1911                         if (!x) break;
1912                         if (part->document)
1913                                 align_line(html_context, part->cy - 1, 0);
1914                         renderer_context.nobreak = !!(x - 1);
1915                 }
1916         }
1917
1918         assert(charslen > 0);
1919 #ifdef CONFIG_UTF8
1920         part->xa += cells;
1921 #else
1922         part->xa += charslen;
1923 #endif /* CONFIG_UTF8 */
1924         int_lower_bound(&part->max_width, part->xa
1925                         + par_format.leftmargin + par_format.rightmargin
1926                         - (chars[charslen - 1] == ' '
1927                            && !html_is_preformatted()));
1928         return;
1929
1930 }
1931
1932 #undef overlap
1933
1934 static void
1935 line_break(struct html_context *html_context)
1936 {
1937         struct part *part;
1938         struct tag *tag;
1939
1940         assert(html_context);
1941         if_assert_failed return;
1942
1943         part = html_context->part;
1944
1945         assert(part);
1946         if_assert_failed return;
1947
1948         int_lower_bound(&part->box.width, part->cx + par_format.rightmargin);
1949
1950         if (renderer_context.nobreak) {
1951                 renderer_context.nobreak = 0;
1952                 part->cx = -1;
1953                 part->xa = 0;
1954                 return;
1955         }
1956
1957         if (!part->document || !part->document->data) goto end;
1958
1959         if (!realloc_lines(part->document, part->box.height + part->cy + 1))
1960                 return;
1961
1962         if (part->cx > par_format.leftmargin && LEN(part->cy) > part->cx - 1
1963             && POS(part->cx - 1, part->cy).data == ' ') {
1964                 del_chars(html_context, part->cx - 1, part->cy);
1965                 part->cx--;
1966         }
1967
1968         if (part->cx > 0) align_line(html_context, part->cy, 1);
1969
1970         for (tag = renderer_context.last_tag_for_newline;
1971              tag && tag != (struct tag *) &part->document->tags;
1972              tag = tag->prev) {
1973                 tag->x = X(0);
1974                 tag->y = Y(part->cy + 1);
1975         }
1976
1977 end:
1978         part->cy++;
1979         part->cx = -1;
1980         part->xa = 0;
1981         memset(part->spaces, 0, part->spaces_len);
1982 #ifdef CONFIG_UTF8
1983         memset(part->char_width, 0, part->spaces_len);
1984 #endif
1985 }
1986
1987 static void
1988 html_special_form(struct part *part, struct form *form)
1989 {
1990         struct form *nform;
1991
1992         assert(part && form);
1993         assert(form->form_num > 0);
1994         assert(form->form_end == INT_MAX);
1995         if_assert_failed return;
1996
1997         if (!part->document) {
1998                 done_form(form);
1999                 return;
2000         }
2001
2002         /* Make a fake form with form_num == 0 so that there is
2003          * something to use if form controls appear above the first
2004          * actual FORM element.  There can never be a real form with
2005          * form_num == 0 because the form_num is the position after the
2006          * "<form" characters and that's already five characters.  The
2007          * fake form does not have a name, and it gets a form_view and
2008          * becomes visible to ECMAScript only if it actually has
2009          * controls in it.  */
2010         if (list_empty(part->document->forms)) {
2011                 nform = init_form();
2012                 if (!nform) {
2013                         done_form(form);
2014                         return;
2015                 }
2016                 nform->form_num = 0;
2017                 add_to_list(part->document->forms, nform);
2018         }
2019
2020         /* Make sure the new form ``claims'' its slice of the form range
2021          * maintained in the form_num and form_end variables. */
2022         foreach (nform, part->document->forms) {
2023                 if (form->form_num < nform->form_num
2024                     || nform->form_end < form->form_num)
2025                         continue;
2026
2027                 /* First check if the form has identical form numbers.
2028                  * That should only be the case when the form being
2029                  * added is in fact the same form in which case it
2030                  * should be dropped. The fact that this can happen
2031                  * suggests that the table renderering can be confused.
2032                  * See bug 647 for a test case.
2033                  * Do not compare form->form_end here because it is
2034                  * normally set by this function and that has obviously
2035                  * not yet been done.  */
2036                 if (nform->form_num == form->form_num) {
2037                         done_form(form);
2038                         return;
2039                 }
2040
2041                 /* The form start is inside an already added form, so
2042                  * partition the space of the existing form and get
2043                  * |old|new|. */
2044                 form->form_end = nform->form_end;
2045                 nform->form_end = form->form_num - 1;
2046                 assertm(nform->form_num <= nform->form_end,
2047                         "[%d:%d] [%d:%d]", nform->form_num, nform->form_end,
2048                         form->form_num, form->form_end);
2049                 add_to_list(part->document->forms, form);
2050                 return;
2051         }
2052
2053         ERROR("hole between forms");
2054         done_form(form);
2055         return;
2056 }
2057
2058 static void
2059 html_special_form_control(struct part *part, struct form_control *fc)
2060 {
2061         struct form *form;
2062
2063         assert(part && fc);
2064         if_assert_failed return;
2065
2066         if (!part->document) {
2067                 done_form_control(fc);
2068                 mem_free(fc);
2069                 return;
2070         }
2071
2072         fc->g_ctrl_num = renderer_context.g_ctrl_num++;
2073
2074         if (list_empty(part->document->forms)) {
2075                 /* No forms encountered yet, that means a homeless form
2076                  * control. Generate a dummy form for those Flying
2077                  * Dutchmans. */
2078                 form = init_form();
2079                 form->form_num = 0;
2080                 add_to_list(part->document->forms, form);
2081         }
2082         /* Attach this form control to the last form encountered. */
2083         form = part->document->forms.next;
2084         fc->form = form;
2085         add_to_list(form->items, fc);
2086 }
2087
2088 #ifdef CONFIG_DEBUG
2089 /** Assert that each form in the list has a different form.form_num
2090  * ... form.form_end range and that the ranges are contiguous and
2091  * together cover all numbers from 0 to INT_MAX.  Alternatively, the
2092  * whole list may be empty.  This function can be called from a
2093  * debugger, or automatically from some places.
2094  *
2095  * This function may leave assert_failed = 1; the caller must use
2096  * if_assert_failed.  */
2097 static void
2098 assert_forms_list_ok(LIST_OF(struct form) *forms)
2099 {
2100         int saw_form_num_0 = 0;
2101         struct form *outer;
2102
2103         if (list_empty(*forms)) return;
2104
2105         /* O(n^2) algorithm, but it's only for debugging.  */
2106         foreach (outer, *forms) {
2107                 int followers = 0;
2108                 struct form *inner;
2109
2110                 if (outer->form_num == 0)
2111                         saw_form_num_0++;
2112
2113                 foreach (inner, *forms) {
2114                         assert(inner == outer
2115                                || inner->form_num > outer->form_end
2116                                || outer->form_num > inner->form_end);
2117                         if (outer->form_end == inner->form_num - 1)
2118                                 followers++;
2119                 }
2120
2121                 if (outer->form_end == INT_MAX)
2122                         assert(followers == 0);
2123                 else
2124                         assert(followers == 1);
2125         }
2126
2127         assert(saw_form_num_0 == 1);
2128 }
2129 #else  /* !CONFIG_DEBUG */
2130 # define assert_forms_list_ok(forms) ((void) 0)
2131 #endif /* !CONFIG_DEBUG */
2132
2133 /* Reparents form items based on position in the source. */
2134 void
2135 check_html_form_hierarchy(struct part *part)
2136 {
2137         struct document *document = part->document;
2138         INIT_LIST_OF(struct form_control, form_controls);
2139         struct form *form;
2140         struct form_control *fc, *next;
2141
2142         if (list_empty(document->forms))
2143                 return;
2144
2145         assert_forms_list_ok(&document->forms);
2146         if_assert_failed {}
2147
2148         /* Take out all badly placed form items. */
2149
2150         foreach (form, document->forms) {
2151
2152                 assertm(form->form_num <= form->form_end,
2153                         "%p [%d : %d]", form, form->form_num, form->form_end);
2154
2155                 foreachsafe (fc, next, form->items) {
2156                         if (form->form_num <= fc->position
2157                             && fc->position <= form->form_end)
2158                                 continue;
2159
2160                         move_to_top_of_list(form_controls, fc);
2161                 }
2162         }
2163
2164         /* Re-insert the form items the correct places. */
2165
2166         foreachsafe (fc, next, form_controls) {
2167
2168                 foreach (form, document->forms) {
2169                         if (fc->position < form->form_num
2170                             || form->form_end < fc->position)
2171                                 continue;
2172
2173                         fc->form = form;
2174                         move_to_top_of_list(form->items, fc);
2175                         break;
2176                 }
2177         }
2178
2179         assert(list_empty(form_controls));
2180 }
2181
2182 static inline void
2183 color_link_lines(struct html_context *html_context)
2184 {
2185         struct document *document = html_context->part->document;
2186         struct color_pair colors = INIT_COLOR_PAIR(par_format.color.background, 0x0);
2187         enum color_mode color_mode = document->options.color_mode;
2188         enum color_flags color_flags = document->options.color_flags;
2189         int y;
2190
2191         for (y = 0; y < document->height; y++) {
2192                 int x;
2193
2194                 for (x = 0; x < document->data[y].length; x++) {
2195                         struct screen_char *schar = &document->data[y].chars[x];
2196
2197                         set_term_color(schar, &colors, color_flags, color_mode);
2198
2199                         /* XXX: Entering hack zone! Change to clink color after
2200                          * link text has been recolored. */
2201                         if (schar->data == ':' && colors.foreground == 0x0)
2202                                 colors.foreground = format.color.clink;
2203                 }
2204
2205                 colors.foreground = 0x0;
2206         }
2207 }
2208
2209 static void *
2210 html_special(struct html_context *html_context, enum html_special_type c, ...)
2211 {
2212         va_list l;
2213         struct part *part;
2214         struct document *document;
2215         void *ret_val = NULL;
2216
2217         assert(html_context);
2218         if_assert_failed return NULL;
2219
2220         part = html_context->part;
2221
2222         assert(part);
2223         if_assert_failed return NULL;
2224
2225         document = part->document;
2226
2227         va_start(l, c);
2228         switch (c) {
2229                 case SP_TAG:
2230                         if (document) {
2231                                 unsigned char *t = va_arg(l, unsigned char *);
2232
2233                                 html_special_tag(document, t, X(part->cx), Y(part->cy));
2234                         }
2235                         break;
2236                 case SP_FORM:
2237                 {
2238                         struct form *form = va_arg(l, struct form *);
2239
2240                         html_special_form(part, form);
2241                         break;
2242                 }
2243                 case SP_CONTROL:
2244                 {
2245                         struct form_control *fc = va_arg(l, struct form_control *);
2246
2247                         html_special_form_control(part, fc);
2248                         break;
2249                 }
2250                 case SP_TABLE:
2251                         ret_val = renderer_context.convert_table;
2252                         break;
2253                 case SP_USED:
2254                         ret_val = (void *) (long) !!document;
2255                         break;
2256                 case SP_CACHE_CONTROL:
2257                 {
2258                         struct cache_entry *cached = renderer_context.cached;
2259
2260                         cached->cache_mode = CACHE_MODE_NEVER;
2261                         cached->expire = 0;
2262                         break;
2263                 }
2264                 case SP_CACHE_EXPIRES:
2265                 {
2266                         time_t expires = va_arg(l, time_t);
2267                         struct cache_entry *cached = renderer_context.cached;
2268
2269                         if (!expires || cached->cache_mode == CACHE_MODE_NEVER)
2270                                 break;
2271
2272                         timeval_from_seconds(&cached->max_age, expires);
2273                         cached->expire = 1;
2274                         break;
2275                 }
2276                 case SP_FRAMESET:
2277                 {
2278                         struct frameset_param *fsp = va_arg(l, struct frameset_param *);
2279                         struct frameset_desc *frameset_desc;
2280
2281                         if (!fsp->parent && document->frame_desc)
2282                                 break;
2283
2284                         frameset_desc = create_frameset(fsp);
2285                         if (!fsp->parent && !document->frame_desc)
2286                                 document->frame_desc = frameset_desc;
2287
2288                         ret_val = frameset_desc;
2289                         break;
2290                 }
2291                 case SP_FRAME:
2292                 {
2293                         struct frameset_desc *parent = va_arg(l, struct frameset_desc *);
2294                         unsigned char *name = va_arg(l, unsigned char *);
2295                         unsigned char *url = va_arg(l, unsigned char *);
2296
2297                         add_frameset_entry(parent, NULL, name, url);
2298                         break;
2299                 }
2300                 case SP_NOWRAP:
2301                         renderer_context.nowrap = !!va_arg(l, int);
2302                         break;
2303                 case SP_REFRESH:
2304                 {
2305                         unsigned long seconds = va_arg(l, unsigned long);
2306                         unsigned char *t = va_arg(l, unsigned char *);
2307
2308                         if (document) {
2309                                 if (document->refresh)
2310                                         done_document_refresh(document->refresh);
2311                                 document->refresh = init_document_refresh(t, seconds);
2312                         }
2313                         break;
2314                 }
2315                 case SP_COLOR_LINK_LINES:
2316                         if (document && use_document_bg_colors(&document->options))
2317                                 color_link_lines(html_context);
2318                         break;
2319                 case SP_STYLESHEET:
2320 #ifdef CONFIG_CSS
2321                         if (document) {
2322                                 struct uri *uri = va_arg(l, struct uri *);
2323
2324                                 add_to_uri_list(&document->css_imports, uri);
2325                         }
2326 #endif
2327                         break;
2328                 case SP_SCRIPT:
2329 #ifdef CONFIG_ECMASCRIPT
2330                         if (document) {
2331                                 struct uri *uri = va_arg(l, struct uri *);
2332
2333                                 add_to_uri_list(&document->ecmascript_imports, uri);
2334                         }
2335 #endif
2336                         break;
2337         }
2338
2339         va_end(l);
2340
2341         return ret_val;
2342 }
2343
2344 void
2345 free_table_cache(void)
2346 {
2347         if (table_cache) {
2348                 struct hash_item *item;
2349                 int i;
2350
2351                 /* We do not free key here. */
2352                 foreach_hash_item (item, *table_cache, i) {
2353                         mem_free_if(item->value);
2354                 }
2355
2356                 free_hash(&table_cache);
2357                 table_cache_entries = 0;
2358         }
2359 }
2360
2361 struct part *
2362 format_html_part(struct html_context *html_context,
2363                  unsigned char *start, unsigned char *end,
2364                  int align, int margin, int width, struct document *document,
2365                  int x, int y, unsigned char *head,
2366                  int link_num)
2367 {
2368         struct part *part;
2369         void *html_state;
2370         struct tag *saved_last_tag_to_move = renderer_context.last_tag_to_move;
2371         int saved_empty_format = renderer_context.empty_format;
2372         int saved_margin = html_context->margin;
2373         int saved_last_link_to_move = renderer_context.last_link_to_move;
2374
2375         /* Hash creation if needed. */
2376         if (!table_cache) {
2377                 table_cache = init_hash8();
2378         } else if (!document) {
2379                 /* Search for cached entry. */
2380                 struct table_cache_entry_key key;
2381                 struct hash_item *item;
2382
2383                 /* Clear key to prevent potential alignment problem
2384                  * when keys are compared. */
2385                 memset(&key, 0, sizeof(key));
2386
2387                 key.start = start;
2388                 key.end = end;
2389                 key.align = align;
2390                 key.margin = margin;
2391                 key.width = width;
2392                 key.x = x;
2393                 key.link_num = link_num;
2394
2395                 item = get_hash_item(table_cache,
2396                                      (unsigned char *) &key,
2397                                      sizeof(key));
2398                 if (item) { /* We found it in cache, so just copy and return. */
2399                         part = mem_alloc(sizeof(*part));
2400                         if (part)  {
2401                                 copy_struct(part, &((struct table_cache_entry *)
2402                                                     item->value)->part);
2403                                 return part;
2404                         }
2405                 }
2406         }
2407
2408         assertm(y >= 0, "format_html_part: y == %d", y);
2409         if_assert_failed return NULL;
2410
2411         if (document) {
2412                 struct node *node = mem_alloc(sizeof(*node));
2413
2414                 if (node) {
2415                         int node_width = !html_context->table_level ? INT_MAX : width;
2416
2417                         set_box(&node->box, x, y, node_width, 1);
2418                         add_to_list(document->nodes, node);
2419                 }
2420
2421                 renderer_context.last_link_to_move = document->nlinks;
2422                 renderer_context.last_tag_to_move = (struct tag *) &document->tags;
2423                 renderer_context.last_tag_for_newline = (struct tag *) &document->tags;
2424         } else {
2425                 renderer_context.last_link_to_move = 0;
2426                 renderer_context.last_tag_to_move = (struct tag *) NULL;
2427                 renderer_context.last_tag_for_newline = (struct tag *) NULL;
2428         }
2429
2430         html_context->margin = margin;
2431         renderer_context.empty_format = !document;
2432
2433         done_link_state_info();
2434         renderer_context.nobreak = 1;
2435
2436         part = mem_calloc(1, sizeof(*part));
2437         if (!part) goto ret;
2438
2439         part->document = document;
2440         part->box.x = x;
2441         part->box.y = y;
2442         part->cx = -1;
2443         part->cy = 0;
2444         part->link_num = link_num;
2445
2446         html_state = init_html_parser_state(html_context, ELEMENT_IMMORTAL, align, margin, width);
2447
2448         parse_html(start, end, part, head, html_context);
2449
2450         done_html_parser_state(html_context, html_state);
2451
2452         int_lower_bound(&part->max_width, part->box.width);
2453
2454         renderer_context.nobreak = 0;
2455
2456         done_link_state_info();
2457         mem_free_if(part->spaces);
2458 #ifdef CONFIG_UTF8
2459         mem_free_if(part->char_width);
2460 #endif
2461
2462         if (document) {
2463                 struct node *node = document->nodes.next;
2464
2465                 node->box.height = y - node->box.y + part->box.height;
2466         }
2467
2468 ret:
2469         renderer_context.last_link_to_move = saved_last_link_to_move;
2470         renderer_context.last_tag_to_move = saved_last_tag_to_move;
2471         renderer_context.empty_format = saved_empty_format;
2472
2473         html_context->margin = saved_margin;
2474
2475         if (html_context->table_level > 1 && !document
2476             && table_cache
2477             && table_cache_entries < MAX_TABLE_CACHE_ENTRIES) {
2478                 /* Create a new entry. */
2479                 /* Clear memory to prevent bad key comparaison due to alignment
2480                  * of key fields. */
2481                 struct table_cache_entry *tce = mem_calloc(1, sizeof(*tce));
2482
2483                 if (tce) {
2484                         tce->key.start = start;
2485                         tce->key.end = end;
2486                         tce->key.align = align;
2487                         tce->key.margin = margin;
2488                         tce->key.width = width;
2489                         tce->key.x = x;
2490                         tce->key.link_num = link_num;
2491                         copy_struct(&tce->part, part);
2492
2493                         if (!add_hash_item(table_cache,
2494                                            (unsigned char *) &tce->key,
2495                                            sizeof(tce->key), tce)) {
2496                                 mem_free(tce);
2497                         } else {
2498                                 table_cache_entries++;
2499                         }
2500                 }
2501         }
2502
2503         return part;
2504 }
2505
2506 void
2507 render_html_document(struct cache_entry *cached, struct document *document,
2508                      struct string *buffer)
2509 {
2510         struct html_context *html_context;
2511         struct part *part;
2512         unsigned char *start;
2513         unsigned char *end;
2514         struct string title;
2515         struct string head;
2516
2517         assert(cached && document);
2518         if_assert_failed return;
2519
2520         if (!init_string(&head)) return;
2521
2522         if (cached->head) add_to_string(&head, cached->head);
2523
2524         start = buffer->source;
2525         end = buffer->source + buffer->length;
2526
2527         html_context = init_html_parser(cached->uri, &document->options,
2528                                         start, end, &head, &title,
2529                                         put_chars_conv, line_break,
2530                                         html_special);
2531         if (!html_context) return;
2532
2533         renderer_context.g_ctrl_num = 0;
2534         renderer_context.cached = cached;
2535         renderer_context.convert_table = get_convert_table(head.source,
2536                                                            document->options.cp,
2537                                                            document->options.assume_cp,
2538                                                            &document->cp,
2539                                                            &document->cp_status,
2540                                                            document->options.hard_assume);
2541 #ifdef CONFIG_UTF8
2542         html_context->options->utf8 = is_cp_utf8(document->options.cp);
2543 #endif /* CONFIG_UTF8 */
2544         html_context->doc_cp = document->cp;
2545
2546         if (title.length) {
2547                 /* CSM_DEFAULT because init_html_parser() did not
2548                  * decode entities in the title.  */
2549                 document->title = convert_string(renderer_context.convert_table,
2550                                                  title.source, title.length,
2551                                                  document->options.cp,
2552                                                  CSM_DEFAULT, NULL, NULL, NULL);
2553         }
2554         done_string(&title);
2555
2556         part = format_html_part(html_context, start, end, par_format.align,
2557                                 par_format.leftmargin,
2558                                 document->options.box.width, document,
2559                                 0, 0, head.source, 1);
2560
2561         /* Drop empty allocated lines at end of document if any
2562          * and adjust document height. */
2563         while (document->height && !document->data[document->height - 1].length)
2564                 mem_free_if(document->data[--document->height].chars);
2565
2566         /* Calculate document width. */
2567         {
2568                 int i;
2569
2570                 document->width = 0;
2571                 for (i = 0; i < document->height; i++)
2572                         int_lower_bound(&document->width, document->data[i].length);
2573         }
2574
2575 #if 1
2576         document->options.needs_width = 1;
2577 #else
2578         /* FIXME: This needs more tuning since if we are centering stuff it
2579          * does not work. */
2580         document->options.needs_width =
2581                                 (document->width + (document->options.margin
2582                                  >= document->options.width));
2583 #endif
2584
2585         document->color.background = par_format.color.background;
2586
2587         done_html_parser(html_context);
2588
2589         /* Drop forms which has been serving as a placeholder for form items
2590          * added in the wrong order due to the ordering of table rendering. */
2591         {
2592                 struct form *form;
2593
2594                 foreach (form, document->forms) {
2595                         if (form->form_num)
2596                                 continue;
2597
2598                         if (list_empty(form->items))
2599                                 done_form(form);
2600
2601                         break;
2602                 }
2603         }
2604
2605         /* @part was residing in html_context so it has to stay alive until
2606          * done_html_parser(). */
2607         done_string(&head);
2608         mem_free_if(part);
2609
2610 #if 0 /* debug purpose */
2611         {
2612                 FILE *f = fopen("forms", "ab");
2613                 struct form_control *form;
2614                 unsigned char *qq;
2615                 fprintf(f,"FORM:\n");
2616                 foreach (form, document->forms) {
2617                         fprintf(f, "g=%d f=%d c=%d t:%d\n",
2618                                 form->g_ctrl_num, form->form_num,
2619                                 form->ctrl_num, form->type);
2620                 }
2621                 fprintf(f,"fragment: \n");
2622                 for (qq = start; qq < end; qq++) fprintf(f, "%c", *qq);
2623                 fprintf(f,"----------\n\n");
2624                 fclose(f);
2625         }
2626 #endif
2627 }