fmt: consider line length in short line cost
[neatroff.git] / fmt.c
blobcde26ef7649b868a5b51ca421957f9eb725b3fac
1 /*
2 * line formatting buffer for line adjustment and hyphenation
4 * The line formatting buffer does two main functions: breaking
5 * words into lines (possibly after breaking them at their
6 * hyphenation points), and, if requested, adjusting the space
7 * between words in a line. In this file the first step is
8 * referred to as filling.
10 * Functions like fmt_word() return nonzero on failure, which
11 * means the call should be repeated after fetching previously
12 * formatted lines via fmt_nextline().
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include "roff.h"
19 #define FMT_LLEN(f) MAX(0, (f)->ll - (f)->li)
20 #define FMT_FILL(f) (!n_ce && n_u)
21 #define FMT_ADJ(f) (n_u && !n_na && !n_ce && (n_j & AD_B) == AD_B)
23 static int fmt_fillwords(struct fmt *f, int br);
25 struct word {
26 char *s;
27 int wid; /* word's width */
28 int elsn, elsp; /* els_neg and els_pos */
29 int gap; /* the space before this word */
30 int hy; /* hyphen width if inserted after this word */
31 int str; /* does the space before it stretch */
34 struct line {
35 struct sbuf sbuf;
36 int wid, li, ll;
37 int elsn, elsp;
40 struct fmt {
41 /* queued words */
42 struct word words[NWORDS];
43 int nwords;
44 /* queued lines */
45 struct line lines[NLINES];
46 int l_head, l_tail;
47 /* for paragraph adjustment */
48 long best[NWORDS];
49 int best_pos[NWORDS];
50 int best_dep[NWORDS];
51 /* current line */
52 int gap; /* space before the next word */
53 int nls; /* newlines before the next word */
54 int nls_sup; /* suppressed newlines */
55 int li, ll; /* current line indentation and length */
56 int filled; /* filled all words in the last fmt_fill() */
57 int eos; /* last word ends a sentence */
58 int fillreq; /* fill after the last word (\p) */
61 /* .ll, .in and .ti are delayed until the partial line is output */
62 static void fmt_confupdate(struct fmt *f)
64 f->ll = n_l;
65 f->li = n_ti >= 0 ? n_ti : n_i;
66 n_ti = -1;
69 static int fmt_confchanged(struct fmt *f)
71 return f->ll != n_l || f->li != (n_ti >= 0 ? n_ti : n_i);
74 /* move words inside an fmt struct */
75 static void fmt_movewords(struct fmt *a, int dst, int src, int len)
77 memmove(a->words + dst, a->words + src, len * sizeof(a->words[0]));
80 /* move words from the buffer to s */
81 static int fmt_wordscopy(struct fmt *f, int beg, int end,
82 struct sbuf *s, int *els_neg, int *els_pos)
84 struct word *wcur;
85 int w = 0;
86 int i;
87 *els_neg = 0;
88 *els_pos = 0;
89 for (i = beg; i < end; i++) {
90 wcur = &f->words[i];
91 sbuf_printf(s, "%ch'%du'", c_ec, wcur->gap);
92 sbuf_append(s, wcur->s);
93 w += wcur->wid + wcur->gap;
94 if (wcur->elsn < *els_neg)
95 *els_neg = wcur->elsn;
96 if (wcur->elsp > *els_pos)
97 *els_pos = wcur->elsp;
98 free(wcur->s);
100 if (beg < end) {
101 wcur = &f->words[end - 1];
102 if (wcur->hy)
103 sbuf_append(s, "\\(hy");
104 w += wcur->hy;
106 return w;
109 static int fmt_nlines(struct fmt *f)
111 if (f->l_tail <= f->l_head)
112 return f->l_head - f->l_tail;
113 return NLINES - f->l_tail + f->l_head;
116 /* the total width of the specified words in f->words[] */
117 static int fmt_wordslen(struct fmt *f, int beg, int end)
119 int i, w = 0;
120 for (i = beg; i < end; i++)
121 w += f->words[i].wid + f->words[i].gap;
122 return beg < end ? w + f->words[end - 1].hy : 0;
125 /* the number of stretchable spaces in f */
126 static int fmt_spaces(struct fmt *f, int beg, int end)
128 int i, n = 0;
129 for (i = beg + 1; i < end; i++)
130 if (f->words[i].str)
131 n++;
132 return n;
135 /* the amount of stretchable spaces in f */
136 static int fmt_spacessum(struct fmt *f, int beg, int end)
138 int i, n = 0;
139 for (i = beg + 1; i < end; i++)
140 if (f->words[i].str)
141 n += f->words[i].gap;
142 return n;
145 /* return the next line in the buffer */
146 int fmt_nextline(struct fmt *f, struct sbuf *sbuf, int *w,
147 int *li, int *ll, int *els_neg, int *els_pos)
149 struct line *l;
150 l = &f->lines[f->l_tail];
151 if (f->l_head == f->l_tail)
152 return 1;
153 *li = l->li;
154 *ll = l->ll;
155 *w = l->wid;
156 *els_neg = l->elsn;
157 *els_pos = l->elsp;
158 sbuf_append(sbuf, sbuf_buf(&l->sbuf));
159 sbuf_done(&l->sbuf);
160 f->l_tail = (f->l_tail + 1) % NLINES;
161 return 0;
164 static struct line *fmt_mkline(struct fmt *f)
166 struct line *l = &f->lines[f->l_head];
167 if ((f->l_head + 1) % NLINES == f->l_tail)
168 return NULL;
169 f->l_head = (f->l_head + 1) % NLINES;
170 l->li = f->li;
171 l->ll = f->ll;
172 sbuf_init(&l->sbuf);
173 return l;
176 static int fmt_extractline(struct fmt *f, int beg, int end, int llen)
178 int fmt_div, fmt_rem;
179 int w, i, nspc;
180 struct line *l;
181 if (!(l = fmt_mkline(f)))
182 return 1;
183 w = fmt_wordslen(f, beg, end);
184 nspc = fmt_spaces(f, beg, end);
185 /* stretch if (spread & 1) and shrink if (spread & 2) */
186 if (nspc && llen) {
187 fmt_div = (llen - w) / nspc;
188 fmt_rem = (llen - w) % nspc;
189 if (fmt_rem < 0) {
190 fmt_div--;
191 fmt_rem += nspc;
193 for (i = beg + 1; i < end; i++)
194 if (f->words[i].str)
195 f->words[i].gap += fmt_div + (fmt_rem-- > 0);
197 l->wid = fmt_wordscopy(f, beg, end, &l->sbuf, &l->elsn, &l->elsp);
198 return 0;
201 static int fmt_sp(struct fmt *f)
203 if (fmt_fillwords(f, 1))
204 return 1;
205 if (fmt_extractline(f, 0, f->nwords, 0))
206 return 1;
207 f->filled = 0;
208 f->nls--;
209 f->nls_sup = 0;
210 f->nwords = 0;
211 f->fillreq = 0;
212 return 0;
215 /* fill as many lines as possible; if br, put the remaining words in a line */
216 int fmt_fill(struct fmt *f, int br)
218 if (fmt_fillwords(f, br))
219 return 1;
220 if (br) {
221 f->filled = 0;
222 if (f->nwords)
223 if (fmt_sp(f))
224 return 1;
226 return 0;
229 void fmt_space(struct fmt *fmt)
231 fmt->gap += font_swid(dev_font(n_f), n_s, n_ss);
234 int fmt_newline(struct fmt *f)
236 f->gap = 0;
237 if (!FMT_FILL(f)) {
238 f->nls++;
239 fmt_sp(f);
240 return 0;
242 if (f->nls >= 1)
243 if (fmt_sp(f))
244 return 1;
245 if (f->nls == 0 && !f->filled && !f->nwords)
246 fmt_sp(f);
247 f->nls++;
248 return 0;
251 /* format the paragraph after the next word (\p) */
252 int fmt_fillreq(struct fmt *f)
254 if (f->fillreq > 0)
255 if (fmt_fillwords(f, 0))
256 return 1;
257 f->fillreq = f->nwords + 1;
258 return 0;
261 static void fmt_wb2word(struct fmt *f, struct word *word, struct wb *wb,
262 int hy, int str, int gap)
264 int len = strlen(wb_buf(wb));
265 word->s = xmalloc(len + 1);
266 memcpy(word->s, wb_buf(wb), len + 1);
267 word->wid = wb_wid(wb);
268 word->elsn = wb->els_neg;
269 word->elsp = wb->els_pos;
270 word->hy = hy ? wb_dashwid(wb) : 0;
271 word->str = str;
272 word->gap = gap;
275 /* find explicit hyphenation positions: dashes, \: and \% */
276 static int fmt_hyphmarks(char *word, int *hyidx, int *hyins)
278 char d[ILNLEN];
279 char *s = word;
280 int c, n = 0;
281 while ((c = escread(&s, d)) > 0)
283 if (c < 0 || !strcmp(c_hc, d))
284 return -1;
285 while ((c = escread(&s, d)) >= 0 && n < NHYPHSWORD) {
286 if (!c && !strcmp(c_hc, d)) {
287 hyins[n] = 1;
288 hyidx[n++] = s - word;
290 if (!c && (!strcmp(c_bp, d) || c_dash(d))) {
291 hyins[n] = 0;
292 hyidx[n++] = s - word;
295 return n;
298 static void fmt_insertword(struct fmt *f, struct wb *wb, int gap)
300 int hyidx[NHYPHSWORD];
301 int hyins[NHYPHSWORD] = {0};
302 char *src = wb_buf(wb);
303 struct wb wbc;
304 char *beg;
305 char *end;
306 int n, i;
307 int cf, cs, cm;
308 n = fmt_hyphmarks(src, hyidx, hyins);
309 if (n <= 0) {
310 fmt_wb2word(f, &f->words[f->nwords++], wb, 0, 1, gap);
311 return;
313 /* update f->fillreq considering the new sub-words */
314 if (f->fillreq == f->nwords + 1)
315 f->fillreq += n;
316 wb_init(&wbc);
317 for (i = 0; i <= n; i++) {
318 beg = src + (i > 0 ? hyidx[i - 1] : 0);
319 end = src + (i < n ? hyidx[i] : strlen(src));
320 wb_catstr(&wbc, beg, end);
321 fmt_wb2word(f, &f->words[f->nwords++], &wbc,
322 i < n && hyins[i], i == 0, i == 0 ? gap : 0);
323 /* restoring wbc */
324 wb_fnszget(&wbc, &cs, &cf, &cm);
325 wb_reset(&wbc);
326 wb_fnszset(&wbc, cs, cf, cm);
328 wb_done(&wbc);
331 /* the amount of space necessary before the next word */
332 static int fmt_wordgap(struct fmt *f)
334 int nls = f->nls || f->nls_sup;
335 int swid = font_swid(dev_font(n_f), n_s, n_ss);
336 if (f->eos && f->nwords)
337 if ((nls && !f->gap) || (!nls && f->gap == 2 * swid))
338 return swid + font_swid(dev_font(n_f), n_s, n_sss);
339 return (nls && !f->gap && f->nwords) ? swid : f->gap;
342 /* insert wb into fmt */
343 int fmt_word(struct fmt *f, struct wb *wb)
345 if (wb_empty(wb))
346 return 0;
347 if (f->nwords + NHYPHSWORD >= NWORDS || fmt_confchanged(f))
348 if (fmt_fillwords(f, 0))
349 return 1;
350 if (FMT_FILL(f) && f->nls && f->gap)
351 if (fmt_sp(f))
352 return 1;
353 if (!f->nwords) /* apply the new .l and .i */
354 fmt_confupdate(f);
355 f->gap = fmt_wordgap(f);
356 f->eos = wb_eos(wb);
357 fmt_insertword(f, wb, f->filled ? 0 : f->gap);
358 f->filled = 0;
359 f->nls = 0;
360 f->nls_sup = 0;
361 f->gap = 0;
362 return 0;
365 /* approximate 8 * sqrt(cost) */
366 static long scaledown(long cost)
368 long ret = 0;
369 int i;
370 for (i = 0; i < 14; i++)
371 ret += ((cost >> (i * 2)) & 3) << (i + 3);
372 return ret < (1 << 13) ? ret : (1 << 13);
375 /* the cost of putting lwid words in a line of length llen */
376 static long FMT_COST(int llen, int lwid, int swid, int nspc)
378 /* the ratio that the stretchable spaces of the line should be spread */
379 long ratio = abs((llen - lwid) * 100l / (swid ? swid : 1));
380 /* ratio too large; scaling it down */
381 if (ratio > 4000)
382 ratio = 4000 + scaledown(ratio - 4000);
383 /* assigning a cost of 100 to each space stretching 100 percent */
384 return ratio * ratio / 100l * (nspc ? nspc : 1);
387 /* the cost of putting a line break before word pos */
388 static long fmt_findcost(struct fmt *f, int pos)
390 int i, pen = 0;
391 long cur;
392 int llen = MAX(1, FMT_LLEN(f));
393 int lwid = 0; /* current line length */
394 int swid = 0; /* amount of stretchable spaces */
395 int nspc = 0; /* number of stretchable spaces */
396 if (pos <= 0)
397 return 0;
398 if (f->best_pos[pos] >= 0)
399 return f->best[pos];
400 i = pos - 1;
401 lwid = 0;
402 if (f->words[i].hy) /* the last word is hyphenated */
403 lwid += f->words[i].hy;
404 if (f->words[i].hy)
405 pen = n_hycost;
406 while (i >= 0) {
407 lwid += f->words[i].wid;
408 if (i + 1 < pos)
409 lwid += f->words[i + 1].gap;
410 if (i + 1 < pos && f->words[i + 1].str) {
411 swid += f->words[i + 1].gap;
412 nspc++;
414 if (lwid - (swid * n_ssh / 100) > llen)
415 if (pos - i > 1)
416 break;
417 cur = fmt_findcost(f, i) + FMT_COST(llen, lwid, swid, nspc) +
418 pen * (nspc ? nspc : 1);
419 if (f->best_pos[pos] < 0 || cur < f->best[pos]) {
420 f->best_pos[pos] = i;
421 f->best_dep[pos] = f->best_dep[i] + 1;
422 f->best[pos] = cur;
424 i--;
426 return f->best[pos];
429 static int fmt_bestpos(struct fmt *f, int pos)
431 fmt_findcost(f, pos);
432 return MAX(0, f->best_pos[pos]);
435 static int fmt_bestdep(struct fmt *f, int pos)
437 fmt_findcost(f, pos);
438 return MAX(0, f->best_dep[pos]);
441 /* return the last filled word */
442 static int fmt_breakparagraph(struct fmt *f, int pos, int br)
444 int i;
445 int best = -1;
446 long cost, best_cost = 0;
447 int llen = FMT_LLEN(f);
448 int lwid = 0; /* current line length */
449 int swid = 0; /* amount of stretchable spaces */
450 int nspc = 0; /* number of stretchable spaces */
451 if (f->fillreq > 0 && f->fillreq <= f->nwords) {
452 fmt_findcost(f, f->fillreq);
453 return f->fillreq;
455 if (pos > 0 && f->words[pos - 1].wid >= llen) {
456 fmt_findcost(f, pos);
457 return pos;
459 i = pos - 1;
460 lwid = 0;
461 if (f->words[i].hy) /* the last word is hyphenated */
462 lwid += f->words[i].hy;
463 while (i >= 0) {
464 lwid += f->words[i].wid;
465 if (i + 1 < pos)
466 lwid += f->words[i + 1].gap;
467 if (i + 1 < pos && f->words[i + 1].str) {
468 swid += f->words[i + 1].gap;
469 nspc++;
471 if (lwid > llen && i + 1 < pos)
472 break;
473 cost = fmt_findcost(f, i);
474 /* the cost of formatting short lines; should prevent widows */
475 if (br && n_pmll && lwid < llen * n_pmll / 100) {
476 int pmll = llen * n_pmll / 100;
477 cost += (long) n_pmllcost * (pmll - lwid) / pmll;
479 if (best < 0 || cost < best_cost) {
480 best = i;
481 best_cost = cost;
483 i--;
485 return best;
488 /* extract the first nreq formatted lines before the word at pos */
489 static int fmt_head(struct fmt *f, int nreq, int pos)
491 int best = pos; /* best line break for nreq-th line */
492 int prev, next; /* best line breaks without hyphenation */
493 if (nreq <= 0 || fmt_bestdep(f, pos) < nreq)
494 return pos;
495 /* finding the optimal line break for nreq-th line */
496 while (best > 0 && fmt_bestdep(f, best) > nreq)
497 best = fmt_bestpos(f, best);
498 prev = best;
499 next = best;
500 /* finding closest line breaks without hyphenation */
501 while (prev > 1 && f->words[prev - 1].hy &&
502 fmt_bestdep(f, prev - 1) == nreq)
503 prev--;
504 while (next < pos && f->words[next - 1].hy &&
505 fmt_bestdep(f, next) == nreq)
506 next++;
507 /* choosing the best of them */
508 if (!f->words[prev - 1].hy && !f->words[next - 1].hy)
509 return fmt_findcost(f, prev) <= fmt_findcost(f, next) ? prev : next;
510 if (!f->words[prev - 1].hy)
511 return prev;
512 if (!f->words[next - 1].hy)
513 return next;
514 return best;
517 /* break f->words[0..end] into lines according to fmt_bestpos() */
518 static int fmt_break(struct fmt *f, int end)
520 int beg, ret = 0;
521 beg = fmt_bestpos(f, end);
522 if (beg > 0)
523 ret += fmt_break(f, beg);
524 f->words[beg].gap = 0;
525 if (fmt_extractline(f, beg, end, FMT_ADJ(f) ? FMT_LLEN(f) : 0))
526 return ret;
527 if (beg > 0)
528 fmt_confupdate(f);
529 return ret + (end - beg);
532 /* estimated number of lines until traps or the end of a page */
533 static int fmt_safelines(void)
535 int lnht = MAX(1, n_L) * n_v;
536 return (f_nexttrap() + lnht - 1) / lnht;
539 /* fill the words collected in the buffer */
540 static int fmt_fillwords(struct fmt *f, int br)
542 int nreq; /* the number of lines until a trap */
543 int end; /* the final line ends before this word */
544 int end_head; /* like end, but only the first nreq lines included */
545 int head = 0; /* only nreq first lines have been formatted */
546 int llen; /* line length, taking shrinkable spaces into account */
547 int n, i;
548 if (!FMT_FILL(f))
549 return 0;
550 llen = fmt_wordslen(f, 0, f->nwords) -
551 fmt_spacessum(f, 0, f->nwords) * n_ssh / 100;
552 /* not enough words to fill */
553 if ((f->fillreq <= 0 || f->nwords < f->fillreq) && llen <= FMT_LLEN(f))
554 return 0;
555 nreq = (n_hy & HY_LAST) ? fmt_safelines() : 0;
556 if (nreq > 0 && nreq <= fmt_nlines(f))
557 return 1;
558 /* resetting positions */
559 for (i = 0; i < f->nwords + 1; i++)
560 f->best_pos[i] = -1;
561 end = fmt_breakparagraph(f, f->nwords, br);
562 if (nreq > 0) {
563 end_head = fmt_head(f, nreq - fmt_nlines(f), end);
564 head = end_head < end;
565 end = end_head;
567 /* recursively add lines */
568 n = fmt_break(f, end);
569 f->nwords -= n;
570 f->fillreq -= n;
571 fmt_movewords(f, 0, n, f->nwords);
572 f->filled = n && !f->nwords;
573 if (f->nwords)
574 f->words[0].gap = 0;
575 if (f->nwords) /* apply the new .l and .i */
576 fmt_confupdate(f);
577 return head || n != end;
580 struct fmt *fmt_alloc(void)
582 struct fmt *fmt = xmalloc(sizeof(*fmt));
583 memset(fmt, 0, sizeof(*fmt));
584 return fmt;
587 void fmt_free(struct fmt *fmt)
589 free(fmt);
592 int fmt_wid(struct fmt *fmt)
594 return fmt_wordslen(fmt, 0, fmt->nwords) + fmt_wordgap(fmt);
597 int fmt_morewords(struct fmt *fmt)
599 return fmt_morelines(fmt) || fmt->nwords;
602 int fmt_morelines(struct fmt *fmt)
604 return fmt->l_head != fmt->l_tail;
607 /* suppress the last newline */
608 void fmt_suppressnl(struct fmt *fmt)
610 if (fmt->nls) {
611 fmt->nls--;
612 fmt->nls_sup = 1;