roff: switch to ISC
[neatroff.git] / fmt.c
blob83571ccbed9d8b857ad6ad22a53856df9c30caa0
1 /*
2 * line formatting buffer for line adjustment and hyphenation
4 * The line formatting buffer does two main functions: breaking
5 * words into lines (possibly after breaking them at their
6 * hyphenation points), and, if requested, adjusting the space
7 * between words in a line. In this file the first step is
8 * referred to as filling.
10 * Functions like fmt_word() return nonzero on failure, which
11 * means the call should be repeated after fetching previously
12 * formatted lines via fmt_nextline().
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include "roff.h"
19 #define FMT_LLEN(f) MAX(0, (f)->ll - (f)->li)
20 #define FMT_FILL(f) (!n_ce && n_u)
21 #define FMT_ADJ(f) (n_u && !n_na && !n_ce && (n_j & AD_B) == AD_B)
23 static int fmt_fillwords(struct fmt *f, int br);
25 struct word {
26 char *s;
27 int wid; /* word's width */
28 int elsn, elsp; /* els_neg and els_pos */
29 int gap; /* the space before this word */
30 int hy; /* hyphen width if inserted after this word */
31 int str; /* does the space before it stretch */
32 int cost; /* the extra cost of line break after this word */
35 struct line {
36 struct sbuf sbuf;
37 int wid, li, ll;
38 int elsn, elsp;
41 struct fmt {
42 /* queued words */
43 struct word *words;
44 int words_n, words_sz;
45 /* queued lines */
46 struct line *lines;
47 int lines_head, lines_tail, lines_sz;
48 /* for paragraph adjustment */
49 long *best;
50 int *best_pos;
51 int *best_dep;
52 /* current line */
53 int gap; /* space before the next word */
54 int nls; /* newlines before the next word */
55 int nls_sup; /* suppressed newlines */
56 int li, ll; /* current line indentation and length */
57 int filled; /* filled all words in the last fmt_fill() */
58 int eos; /* last word ends a sentence */
59 int fillreq; /* fill after the last word (\p) */
62 /* .ll, .in and .ti are delayed until the partial line is output */
63 static void fmt_confupdate(struct fmt *f)
65 f->ll = n_l;
66 f->li = n_ti >= 0 ? n_ti : n_i;
67 n_ti = -1;
70 static int fmt_confchanged(struct fmt *f)
72 return f->ll != n_l || f->li != (n_ti >= 0 ? n_ti : n_i);
75 /* move words inside an fmt struct */
76 static void fmt_movewords(struct fmt *a, int dst, int src, int len)
78 memmove(a->words + dst, a->words + src, len * sizeof(a->words[0]));
81 /* move words from the buffer to s */
82 static int fmt_wordscopy(struct fmt *f, int beg, int end,
83 struct sbuf *s, int *els_neg, int *els_pos)
85 struct word *wcur;
86 int w = 0;
87 int i;
88 *els_neg = 0;
89 *els_pos = 0;
90 for (i = beg; i < end; i++) {
91 wcur = &f->words[i];
92 sbuf_printf(s, "%ch'%du'", c_ec, wcur->gap);
93 sbuf_append(s, wcur->s);
94 w += wcur->wid + wcur->gap;
95 if (wcur->elsn < *els_neg)
96 *els_neg = wcur->elsn;
97 if (wcur->elsp > *els_pos)
98 *els_pos = wcur->elsp;
99 free(wcur->s);
101 if (beg < end) {
102 wcur = &f->words[end - 1];
103 if (wcur->hy)
104 sbuf_append(s, "\\(hy");
105 w += wcur->hy;
107 return w;
110 static int fmt_nlines(struct fmt *f)
112 return f->lines_head - f->lines_tail;
115 /* the total width of the specified words in f->words[] */
116 static int fmt_wordslen(struct fmt *f, int beg, int end)
118 int i, w = 0;
119 for (i = beg; i < end; i++)
120 w += f->words[i].wid + f->words[i].gap;
121 return beg < end ? w + f->words[end - 1].hy : 0;
124 /* the number of stretchable spaces in f */
125 static int fmt_spaces(struct fmt *f, int beg, int end)
127 int i, n = 0;
128 for (i = beg + 1; i < end; i++)
129 if (f->words[i].str)
130 n++;
131 return n;
134 /* the amount of stretchable spaces in f */
135 static int fmt_spacessum(struct fmt *f, int beg, int end)
137 int i, n = 0;
138 for (i = beg + 1; i < end; i++)
139 if (f->words[i].str)
140 n += f->words[i].gap;
141 return n;
144 /* return the next line in the buffer */
145 char *fmt_nextline(struct fmt *f, int *w,
146 int *li, int *ll, int *els_neg, int *els_pos)
148 struct line *l;
149 if (f->lines_head == f->lines_tail)
150 return NULL;
151 l = &f->lines[f->lines_tail++];
152 *li = l->li;
153 *ll = l->ll;
154 *w = l->wid;
155 *els_neg = l->elsn;
156 *els_pos = l->elsp;
157 return sbuf_out(&l->sbuf);
160 static struct line *fmt_mkline(struct fmt *f)
162 struct line *l;
163 if (f->lines_head == f->lines_tail) {
164 f->lines_head = 0;
165 f->lines_tail = 0;
167 if (f->lines_head == f->lines_sz) {
168 f->lines_sz += 256;
169 f->lines = mextend(f->lines, f->lines_head,
170 f->lines_sz, sizeof(f->lines[0]));
172 l = &f->lines[f->lines_head++];
173 l->li = f->li;
174 l->ll = f->ll;
175 sbuf_init(&l->sbuf);
176 return l;
179 /* extract words from beg to end; shrink or stretch spaces if needed */
180 static int fmt_extractline(struct fmt *f, int beg, int end, int str)
182 int fmt_div, fmt_rem;
183 int w, i, nspc, llen;
184 struct line *l;
185 if (!(l = fmt_mkline(f)))
186 return 1;
187 llen = FMT_LLEN(f);
188 w = fmt_wordslen(f, beg, end);
189 nspc = fmt_spaces(f, beg, end);
190 if (nspc && FMT_ADJ(f) && (llen < w || str)) {
191 fmt_div = (llen - w) / nspc;
192 fmt_rem = (llen - w) % nspc;
193 if (fmt_rem < 0) {
194 fmt_div--;
195 fmt_rem += nspc;
197 for (i = beg + 1; i < end; i++)
198 if (f->words[i].str)
199 f->words[i].gap += fmt_div + (fmt_rem-- > 0);
201 l->wid = fmt_wordscopy(f, beg, end, &l->sbuf, &l->elsn, &l->elsp);
202 return 0;
205 static int fmt_sp(struct fmt *f)
207 if (fmt_fillwords(f, 1))
208 return 1;
209 if (fmt_extractline(f, 0, f->words_n, 0))
210 return 1;
211 f->filled = 0;
212 f->nls--;
213 f->nls_sup = 0;
214 f->words_n = 0;
215 f->fillreq = 0;
216 return 0;
219 /* fill as many lines as possible; if br, put the remaining words in a line */
220 int fmt_fill(struct fmt *f, int br)
222 if (fmt_fillwords(f, br))
223 return 1;
224 if (br) {
225 f->filled = 0;
226 if (f->words_n)
227 if (fmt_sp(f))
228 return 1;
230 return 0;
233 void fmt_space(struct fmt *fmt)
235 fmt->gap += font_swid(dev_font(n_f), n_s, n_ss);
238 int fmt_newline(struct fmt *f)
240 f->gap = 0;
241 if (!FMT_FILL(f)) {
242 f->nls++;
243 fmt_sp(f);
244 return 0;
246 if (f->nls >= 1)
247 if (fmt_sp(f))
248 return 1;
249 if (f->nls == 0 && !f->filled && !f->words_n)
250 fmt_sp(f);
251 f->nls++;
252 return 0;
255 /* format the paragraph after the next word (\p) */
256 int fmt_fillreq(struct fmt *f)
258 if (f->fillreq > 0)
259 if (fmt_fillwords(f, 0))
260 return 1;
261 f->fillreq = f->words_n + 1;
262 return 0;
265 static void fmt_wb2word(struct fmt *f, struct word *word, struct wb *wb,
266 int hy, int str, int gap, int cost)
268 int len = strlen(wb_buf(wb));
269 word->s = xmalloc(len + 1);
270 memcpy(word->s, wb_buf(wb), len + 1);
271 word->wid = wb_wid(wb);
272 word->elsn = wb->els_neg;
273 word->elsp = wb->els_pos;
274 word->cost = cost ? wb_cost(wb) : 0;
275 word->hy = hy ? wb_hywid(wb) : 0;
276 word->str = str;
277 word->gap = gap;
280 /* find explicit hyphenation positions: dashes, \: and \% */
281 static int fmt_hyphmarks(char *word, int *hyidx, int *hyins)
283 char *s = word;
284 char *d = NULL;
285 int c, n = 0;
286 while ((c = escread(&s, &d)) > 0)
288 if (c < 0 || !strcmp(c_hc, d))
289 return -1;
290 while ((c = escread(&s, &d)) >= 0 && n < NHYPHSWORD) {
291 if (!c && !strcmp(c_hc, d)) {
292 hyins[n] = 1;
293 hyidx[n++] = s - word;
295 if (!c && c_hydash(d)) {
296 hyins[n] = 0;
297 hyidx[n++] = s - word;
300 return n;
303 static struct word *fmt_mkword(struct fmt *f)
305 if (f->words_n == f->words_sz) {
306 f->words_sz += 256;
307 f->words = mextend(f->words, f->words_n,
308 f->words_sz, sizeof(f->words[0]));
310 return &f->words[f->words_n++];
313 static void fmt_insertword(struct fmt *f, struct wb *wb, int gap)
315 int hyidx[NHYPHSWORD];
316 int hyins[NHYPHSWORD] = {0};
317 char *src = wb_buf(wb);
318 struct wb wbc;
319 char *beg;
320 char *end;
321 int n, i;
322 int cf, cs, cm;
323 n = fmt_hyphmarks(src, hyidx, hyins);
324 if (n <= 0) {
325 fmt_wb2word(f, fmt_mkword(f), wb, 0, 1, gap, 1);
326 return;
328 /* update f->fillreq considering the new sub-words */
329 if (f->fillreq == f->words_n + 1)
330 f->fillreq += n;
331 wb_init(&wbc);
332 for (i = 0; i <= n; i++) {
333 beg = src + (i > 0 ? hyidx[i - 1] : 0);
334 end = src + (i < n ? hyidx[i] : strlen(src));
335 wb_catstr(&wbc, beg, end);
336 fmt_wb2word(f, fmt_mkword(f), &wbc,
337 i < n && hyins[i], i == 0, i == 0 ? gap : 0, i == n);
338 /* restoring wbc */
339 wb_fnszget(&wbc, &cs, &cf, &cm);
340 wb_reset(&wbc);
341 wb_fnszset(&wbc, cs, cf, cm);
343 wb_done(&wbc);
346 /* the amount of space necessary before the next word */
347 static int fmt_wordgap(struct fmt *f)
349 int nls = f->nls || f->nls_sup;
350 int swid = font_swid(dev_font(n_f), n_s, n_ss);
351 if (f->eos && f->words_n)
352 if ((nls && !f->gap) || (!nls && f->gap == 2 * swid))
353 return swid + font_swid(dev_font(n_f), n_s, n_sss);
354 return (nls && !f->gap && f->words_n) ? swid : f->gap;
357 /* insert wb into fmt */
358 int fmt_word(struct fmt *f, struct wb *wb)
360 if (wb_empty(wb))
361 return 0;
362 if (fmt_confchanged(f))
363 if (fmt_fillwords(f, 0))
364 return 1;
365 if (FMT_FILL(f) && f->nls && f->gap)
366 if (fmt_sp(f))
367 return 1;
368 if (!f->words_n) /* apply the new .l and .i */
369 fmt_confupdate(f);
370 f->gap = fmt_wordgap(f);
371 f->eos = wb_eos(wb);
372 fmt_insertword(f, wb, f->filled ? 0 : f->gap);
373 f->filled = 0;
374 f->nls = 0;
375 f->nls_sup = 0;
376 f->gap = 0;
377 return 0;
380 /* approximate 8 * sqrt(cost) */
381 static long scaledown(long cost)
383 long ret = 0;
384 int i;
385 for (i = 0; i < 14; i++)
386 ret += ((cost >> (i * 2)) & 3) << (i + 3);
387 return ret < (1 << 13) ? ret : (1 << 13);
390 /* the cost of putting lwid words in a line of length llen */
391 static long FMT_COST(int llen, int lwid, int swid, int nspc)
393 /* the ratio that the stretchable spaces of the line should be spread */
394 long ratio = abs((llen - lwid) * 100l / (swid ? swid : 1));
395 /* ratio too large; scaling it down */
396 if (ratio > 4000)
397 ratio = 4000 + scaledown(ratio - 4000);
398 /* assigning a cost of 100 to each space stretching 100 percent */
399 return ratio * ratio / 100l * (nspc ? nspc : 1);
402 /* the number of hyphenations in consecutive lines ending at pos */
403 static int fmt_hydepth(struct fmt *f, int pos)
405 int n = 0;
406 while (pos > 0 && f->words[pos - 1].hy && ++n < 5)
407 pos = f->best_pos[pos];
408 return n;
411 static long hycost(int depth)
413 if (n_hlm > 0 && depth > n_hlm)
414 return 10000000;
415 if (depth >= 3)
416 return n_hycost + n_hycost2 + n_hycost3;
417 if (depth == 2)
418 return n_hycost + n_hycost2;
419 return depth ? n_hycost : 0;
422 /* the cost of putting a line break before word pos */
423 static long fmt_findcost(struct fmt *f, int pos)
425 int i, hyphenated;
426 long cur;
427 int llen = MAX(1, FMT_LLEN(f));
428 int lwid = 0; /* current line length */
429 int swid = 0; /* amount of stretchable spaces */
430 int nspc = 0; /* number of stretchable spaces */
431 if (pos <= 0)
432 return 0;
433 if (f->best_pos[pos] >= 0)
434 return f->best[pos] + f->words[pos - 1].cost;
435 lwid = f->words[pos - 1].hy; /* non-zero if the last word is hyphenated */
436 hyphenated = f->words[pos - 1].hy != 0;
437 i = pos - 1;
438 while (i >= 0) {
439 lwid += f->words[i].wid;
440 if (i + 1 < pos)
441 lwid += f->words[i + 1].gap;
442 if (i + 1 < pos && f->words[i + 1].str) {
443 swid += f->words[i + 1].gap;
444 nspc++;
446 if (lwid > llen + swid * n_ssh / 100 && i + 1 < pos)
447 break;
448 cur = fmt_findcost(f, i) + FMT_COST(llen, lwid, swid, nspc);
449 if (hyphenated)
450 cur += hycost(1 + fmt_hydepth(f, i));
451 if (f->best_pos[pos] < 0 || cur < f->best[pos]) {
452 f->best_pos[pos] = i;
453 f->best_dep[pos] = f->best_dep[i] + 1;
454 f->best[pos] = cur;
456 i--;
458 return f->best[pos] + f->words[pos - 1].cost;
461 static int fmt_bestpos(struct fmt *f, int pos)
463 fmt_findcost(f, pos);
464 return MAX(0, f->best_pos[pos]);
467 static int fmt_bestdep(struct fmt *f, int pos)
469 fmt_findcost(f, pos);
470 return MAX(0, f->best_dep[pos]);
473 /* return the last filled word */
474 static int fmt_breakparagraph(struct fmt *f, int pos, int br)
476 int i;
477 int best = -1;
478 long cost, best_cost = 0;
479 int llen = FMT_LLEN(f);
480 int lwid = 0; /* current line length */
481 int swid = 0; /* amount of stretchable spaces */
482 int nspc = 0; /* number of stretchable spaces */
483 if (f->fillreq > 0 && f->fillreq <= f->words_n) {
484 fmt_findcost(f, f->fillreq);
485 return f->fillreq;
487 if (pos > 0 && f->words[pos - 1].wid >= llen) {
488 fmt_findcost(f, pos);
489 return pos;
491 i = pos - 1;
492 lwid = 0;
493 if (f->words[i].hy) /* the last word is hyphenated */
494 lwid += f->words[i].hy;
495 while (i >= 0) {
496 lwid += f->words[i].wid;
497 if (i + 1 < pos)
498 lwid += f->words[i + 1].gap;
499 if (i + 1 < pos && f->words[i + 1].str) {
500 swid += f->words[i + 1].gap;
501 nspc++;
503 if (lwid > llen && i + 1 < pos)
504 break;
505 cost = fmt_findcost(f, i);
506 /* the cost of formatting short lines; should prevent widows */
507 if (br && n_pmll && lwid < llen * n_pmll / 100) {
508 int pmll = llen * n_pmll / 100;
509 cost += (long) n_pmllcost * (pmll - lwid) / pmll;
511 if (best < 0 || cost < best_cost) {
512 best = i;
513 best_cost = cost;
515 i--;
517 return best;
520 /* extract the first nreq formatted lines before the word at pos */
521 static int fmt_head(struct fmt *f, int nreq, int pos)
523 int best = pos; /* best line break for nreq-th line */
524 int prev, next; /* best line breaks without hyphenation */
525 if (nreq <= 0 || fmt_bestdep(f, pos) < nreq)
526 return pos;
527 /* finding the optimal line break for nreq-th line */
528 while (best > 0 && fmt_bestdep(f, best) > nreq)
529 best = fmt_bestpos(f, best);
530 prev = best;
531 next = best;
532 /* finding closest line breaks without hyphenation */
533 while (prev > 1 && f->words[prev - 1].hy &&
534 fmt_bestdep(f, prev - 1) == nreq)
535 prev--;
536 while (next < pos && f->words[next - 1].hy &&
537 fmt_bestdep(f, next) == nreq)
538 next++;
539 /* choosing the best of them */
540 if (!f->words[prev - 1].hy && !f->words[next - 1].hy)
541 return fmt_findcost(f, prev) <= fmt_findcost(f, next) ? prev : next;
542 if (!f->words[prev - 1].hy)
543 return prev;
544 if (!f->words[next - 1].hy)
545 return next;
546 return best;
549 /* break f->words[0..end] into lines according to fmt_bestpos() */
550 static int fmt_break(struct fmt *f, int end)
552 int beg, ret = 0;
553 beg = fmt_bestpos(f, end);
554 if (beg > 0)
555 ret += fmt_break(f, beg);
556 f->words[beg].gap = 0;
557 if (fmt_extractline(f, beg, end, 1))
558 return ret;
559 if (beg > 0)
560 fmt_confupdate(f);
561 return ret + (end - beg);
564 /* estimated number of lines until traps or the end of a page */
565 static int fmt_safelines(void)
567 int lnht = MAX(1, n_L) * n_v;
568 return (f_nexttrap() + lnht - 1) / lnht;
571 /* fill the words collected in the buffer */
572 static int fmt_fillwords(struct fmt *f, int br)
574 int nreq; /* the number of lines until a trap */
575 int end; /* the final line ends before this word */
576 int end_head; /* like end, but only the first nreq lines included */
577 int head = 0; /* only nreq first lines have been formatted */
578 int llen; /* line length, taking shrinkable spaces into account */
579 int n, i;
580 if (!FMT_FILL(f))
581 return 0;
582 llen = fmt_wordslen(f, 0, f->words_n) -
583 fmt_spacessum(f, 0, f->words_n) * n_ssh / 100;
584 /* not enough words to fill */
585 if ((f->fillreq <= 0 || f->words_n < f->fillreq) && llen <= FMT_LLEN(f))
586 return 0;
587 nreq = (n_hy & HY_LAST) ? fmt_safelines() : 0;
588 if (nreq > 0 && nreq <= fmt_nlines(f))
589 return 1;
590 /* resetting positions */
591 f->best = malloc((f->words_n + 1) * sizeof(f->best[0]));
592 f->best_pos = malloc((f->words_n + 1) * sizeof(f->best_pos[0]));
593 f->best_dep = malloc((f->words_n + 1) * sizeof(f->best_dep[0]));
594 memset(f->best, 0, (f->words_n + 1) * sizeof(f->best[0]));
595 memset(f->best_dep, 0, (f->words_n + 1) * sizeof(f->best_dep[0]));
596 for (i = 0; i < f->words_n + 1; i++)
597 f->best_pos[i] = -1;
598 end = fmt_breakparagraph(f, f->words_n, br);
599 if (nreq > 0) {
600 end_head = fmt_head(f, nreq - fmt_nlines(f), end);
601 head = end_head < end;
602 end = end_head;
604 /* recursively add lines */
605 n = end > 0 ? fmt_break(f, end) : 0;
606 f->words_n -= n;
607 f->fillreq -= n;
608 fmt_movewords(f, 0, n, f->words_n);
609 f->filled = n && !f->words_n;
610 if (f->words_n)
611 f->words[0].gap = 0;
612 if (f->words_n) /* apply the new .l and .i */
613 fmt_confupdate(f);
614 free(f->best);
615 free(f->best_pos);
616 free(f->best_dep);
617 f->best = NULL;
618 f->best_pos = NULL;
619 f->best_dep = NULL;
620 return head || n != end;
623 struct fmt *fmt_alloc(void)
625 struct fmt *fmt = xmalloc(sizeof(*fmt));
626 memset(fmt, 0, sizeof(*fmt));
627 return fmt;
630 void fmt_free(struct fmt *fmt)
632 free(fmt->lines);
633 free(fmt->words);
634 free(fmt);
637 int fmt_wid(struct fmt *fmt)
639 return fmt_wordslen(fmt, 0, fmt->words_n) + fmt_wordgap(fmt);
642 int fmt_morewords(struct fmt *fmt)
644 return fmt_morelines(fmt) || fmt->words_n;
647 int fmt_morelines(struct fmt *fmt)
649 return fmt->lines_head != fmt->lines_tail;
652 /* suppress the last newline */
653 void fmt_suppressnl(struct fmt *fmt)
655 if (fmt->nls) {
656 fmt->nls--;
657 fmt->nls_sup = 1;