font: unmap glyphs with .fmap
[neatroff.git] / fmt.c
blob5fa52e9699735f5318cb05a01eca1c4d1bbbce96
1 /*
2 * line formatting buffer for line adjustment and hyphenation
4 * The line formatting buffer does two main functions: breaking
5 * words into lines (possibly after breaking them at their
6 * hyphenation points), and, if requested, adjusting the space
7 * between words in a line. In this file the first step is
8 * referred to as filling.
10 * Functions like fmt_word() return nonzero on failure, which
11 * means the call should be repeated after fetching previously
12 * formatted lines via fmt_nextline().
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include "roff.h"
19 #define FMT_LLEN(f) MAX(0, (f)->ll - (f)->li)
20 #define FMT_FILL(f) (!n_ce && n_u)
21 #define FMT_ADJ(f) (n_u && !n_na && !n_ce && (n_j & AD_B) == AD_B)
23 static int fmt_fillwords(struct fmt *f, int br);
25 struct word {
26 char *s;
27 int wid; /* word's width */
28 int elsn, elsp; /* els_neg and els_pos */
29 int gap; /* the space before this word */
30 int hy; /* hyphen width if inserted after this word */
31 int str; /* does the space before it stretch */
32 int cost; /* the extra cost of line break after this word */
33 int swid; /* space width after this word (i.e., \w' ') */
36 struct line {
37 struct sbuf sbuf;
38 int wid, li, ll;
39 int elsn, elsp;
42 struct fmt {
43 /* queued words */
44 struct word *words;
45 int words_n, words_sz;
46 /* queued lines */
47 struct line *lines;
48 int lines_head, lines_tail, lines_sz;
49 /* for paragraph adjustment */
50 long *best;
51 int *best_pos;
52 int *best_dep;
53 /* current line */
54 int gap; /* space before the next word */
55 int nls; /* newlines before the next word */
56 int nls_sup; /* suppressed newlines */
57 int li, ll; /* current line indentation and length */
58 int filled; /* filled all words in the last fmt_fill() */
59 int eos; /* last word ends a sentence */
60 int fillreq; /* fill after the last word (\p) */
63 /* .ll, .in and .ti are delayed until the partial line is output */
64 static void fmt_confupdate(struct fmt *f)
66 f->ll = n_l;
67 f->li = n_ti >= 0 ? n_ti : n_i;
68 n_ti = -1;
71 static int fmt_confchanged(struct fmt *f)
73 return f->ll != n_l || f->li != (n_ti >= 0 ? n_ti : n_i);
76 /* move words inside an fmt struct */
77 static void fmt_movewords(struct fmt *a, int dst, int src, int len)
79 memmove(a->words + dst, a->words + src, len * sizeof(a->words[0]));
82 /* move words from the buffer to s */
83 static int fmt_wordscopy(struct fmt *f, int beg, int end,
84 struct sbuf *s, int *els_neg, int *els_pos)
86 struct word *wcur;
87 int w = 0;
88 int i;
89 *els_neg = 0;
90 *els_pos = 0;
91 for (i = beg; i < end; i++) {
92 wcur = &f->words[i];
93 sbuf_printf(s, "%ch'%du'", c_ec, wcur->gap);
94 sbuf_append(s, wcur->s);
95 w += wcur->wid + wcur->gap;
96 if (wcur->elsn < *els_neg)
97 *els_neg = wcur->elsn;
98 if (wcur->elsp > *els_pos)
99 *els_pos = wcur->elsp;
100 free(wcur->s);
102 if (beg < end) {
103 wcur = &f->words[end - 1];
104 if (wcur->hy)
105 sbuf_append(s, "\\(hy");
106 w += wcur->hy;
108 return w;
111 static int fmt_nlines(struct fmt *f)
113 return f->lines_head - f->lines_tail;
116 /* the total width of the specified words in f->words[] */
117 static int fmt_wordslen(struct fmt *f, int beg, int end)
119 int i, w = 0;
120 for (i = beg; i < end; i++)
121 w += f->words[i].wid + f->words[i].gap;
122 return beg < end ? w + f->words[end - 1].hy : 0;
125 /* the number of stretchable spaces in f */
126 static int fmt_spaces(struct fmt *f, int beg, int end)
128 int i, n = 0;
129 for (i = beg + 1; i < end; i++)
130 if (f->words[i].str)
131 n++;
132 return n;
135 /* the amount of stretchable spaces in f */
136 static int fmt_spacessum(struct fmt *f, int beg, int end)
138 int i, n = 0;
139 for (i = beg + 1; i < end; i++)
140 if (f->words[i].str)
141 n += f->words[i].gap;
142 return n;
145 /* return the next line in the buffer */
146 char *fmt_nextline(struct fmt *f, int *w,
147 int *li, int *ll, int *els_neg, int *els_pos)
149 struct line *l;
150 if (f->lines_head == f->lines_tail)
151 return NULL;
152 l = &f->lines[f->lines_tail++];
153 *li = l->li;
154 *ll = l->ll;
155 *w = l->wid;
156 *els_neg = l->elsn;
157 *els_pos = l->elsp;
158 return sbuf_out(&l->sbuf);
161 static struct line *fmt_mkline(struct fmt *f)
163 struct line *l;
164 if (f->lines_head == f->lines_tail) {
165 f->lines_head = 0;
166 f->lines_tail = 0;
168 if (f->lines_head == f->lines_sz) {
169 f->lines_sz += 256;
170 f->lines = mextend(f->lines, f->lines_head,
171 f->lines_sz, sizeof(f->lines[0]));
173 l = &f->lines[f->lines_head++];
174 l->li = f->li;
175 l->ll = f->ll;
176 sbuf_init(&l->sbuf);
177 return l;
180 /* extract words from beg to end; shrink or stretch spaces if needed */
181 static int fmt_extractline(struct fmt *f, int beg, int end, int str)
183 int fmt_div, fmt_rem;
184 int w, i, nspc, llen;
185 struct line *l;
186 if (!(l = fmt_mkline(f)))
187 return 1;
188 llen = FMT_LLEN(f);
189 w = fmt_wordslen(f, beg, end);
190 nspc = fmt_spaces(f, beg, end);
191 if (nspc && FMT_ADJ(f) && (llen < w || str)) {
192 fmt_div = (llen - w) / nspc;
193 fmt_rem = (llen - w) % nspc;
194 if (fmt_rem < 0) {
195 fmt_div--;
196 fmt_rem += nspc;
198 for (i = beg + 1; i < end; i++)
199 if (f->words[i].str)
200 f->words[i].gap += fmt_div + (fmt_rem-- > 0);
202 l->wid = fmt_wordscopy(f, beg, end, &l->sbuf, &l->elsn, &l->elsp);
203 return 0;
206 static int fmt_sp(struct fmt *f)
208 if (fmt_fillwords(f, 1))
209 return 1;
210 if (fmt_extractline(f, 0, f->words_n, 0))
211 return 1;
212 f->filled = 0;
213 f->nls--;
214 f->nls_sup = 0;
215 f->words_n = 0;
216 f->fillreq = 0;
217 return 0;
220 /* fill as many lines as possible; if br, put the remaining words in a line */
221 int fmt_fill(struct fmt *f, int br)
223 if (fmt_fillwords(f, br))
224 return 1;
225 if (br) {
226 f->filled = 0;
227 if (f->words_n)
228 if (fmt_sp(f))
229 return 1;
231 return 0;
234 void fmt_space(struct fmt *fmt)
236 fmt->gap += font_swid(dev_font(n_f), n_s, n_ss);
239 int fmt_newline(struct fmt *f)
241 f->gap = 0;
242 if (!FMT_FILL(f)) {
243 f->nls++;
244 fmt_sp(f);
245 return 0;
247 if (f->nls >= 1)
248 if (fmt_sp(f))
249 return 1;
250 if (f->nls == 0 && !f->filled && !f->words_n)
251 fmt_sp(f);
252 f->nls++;
253 return 0;
256 /* format the paragraph after the next word (\p) */
257 int fmt_fillreq(struct fmt *f)
259 if (f->fillreq > 0)
260 if (fmt_fillwords(f, 0))
261 return 1;
262 f->fillreq = f->words_n + 1;
263 return 0;
266 static void fmt_wb2word(struct fmt *f, struct word *word, struct wb *wb,
267 int hy, int str, int gap, int cost)
269 int len = strlen(wb_buf(wb));
270 word->s = xmalloc(len + 1);
271 memcpy(word->s, wb_buf(wb), len + 1);
272 word->wid = wb_wid(wb);
273 word->elsn = wb->els_neg;
274 word->elsp = wb->els_pos;
275 word->hy = hy ? wb_hywid(wb) : 0;
276 word->str = str;
277 word->gap = gap;
278 word->cost = cost;
279 word->swid = wb_swid(wb);
282 /* find explicit break positions: dashes, \:, \%, and \~ */
283 static int fmt_hyphmarks(char *word, int *hyidx, int *hyins, int *hygap)
285 char *s = word;
286 char *d = NULL;
287 int c, n = 0;
288 while ((c = escread(&s, &d)) > 0)
290 if (c < 0 || !strcmp(c_hc, d))
291 return -1;
292 while ((c = escread(&s, &d)) >= 0 && n < NHYPHSWORD) {
293 if (!c && !strcmp(c_hc, d)) {
294 hyins[n] = 1;
295 hyidx[n++] = s - word;
297 if (!c && c_hydash(d)) {
298 hyins[n] = 0;
299 hyidx[n++] = s - word;
301 if (!c && !strcmp(c_nb, d)) {
302 hygap[n] = 1;
303 hyidx[n++] = s - word;
306 return n;
309 static struct word *fmt_mkword(struct fmt *f)
311 if (f->words_n == f->words_sz) {
312 f->words_sz += 256;
313 f->words = mextend(f->words, f->words_n,
314 f->words_sz, sizeof(f->words[0]));
316 return &f->words[f->words_n++];
319 static void fmt_insertword(struct fmt *f, struct wb *wb, int gap)
321 int hyidx[NHYPHSWORD]; /* sub-word boundaries */
322 int hyins[NHYPHSWORD] = {0}; /* insert dash */
323 int hygap[NHYPHSWORD] = {0}; /* stretchable no-break space */
324 char *src = wb_buf(wb);
325 struct wb wbc;
326 char *beg;
327 char *end;
328 int n, i;
329 int cf, cs, cm;
330 n = fmt_hyphmarks(src, hyidx, hyins, hygap);
331 if (n <= 0) {
332 fmt_wb2word(f, fmt_mkword(f), wb, 0, 1, gap, wb_cost(wb));
333 return;
335 /* update f->fillreq considering the new sub-words */
336 if (f->fillreq == f->words_n + 1)
337 f->fillreq += n;
338 wb_init(&wbc);
339 /* add sub-words */
340 for (i = 0; i <= n; i++) {
341 int ihy = i < n && hyins[i]; /* dash width */
342 int istr = i == 0 || hygap[i - 1]; /* stretchable */
343 int igap; /* gap width */
344 int icost; /* hyphenation cost */
345 beg = src + (i > 0 ? hyidx[i - 1] : 0);
346 end = src + (i < n ? hyidx[i] : strlen(src));
347 if (i < n && hygap[i]) /* remove \~ */
348 end -= strlen(c_nb);
349 wb_catstr(&wbc, beg, end);
350 wb_fnszget(&wbc, &cf, &cs, &cm);
351 icost = i == n ? wb_cost(&wbc) : hygap[i] * 10000000;
352 igap = i == 0 ? gap : hygap[i - 1] * wb_swid(&wbc);
353 fmt_wb2word(f, fmt_mkword(f), &wbc, ihy, istr, igap, icost);
354 wb_reset(&wbc);
355 wb_fnszset(&wbc, cf, cs, cm); /* restoring wbc */
357 wb_done(&wbc);
360 /* the amount of space necessary before the next word */
361 static int fmt_wordgap(struct fmt *f)
363 int nls = f->nls || f->nls_sup;
364 int swid = font_swid(dev_font(n_f), n_s, n_ss);
365 if (f->eos && f->words_n)
366 if ((nls && !f->gap) || (!nls && f->gap == 2 * swid))
367 return swid + font_swid(dev_font(n_f), n_s, n_sss);
368 return (nls && !f->gap && f->words_n) ? swid : f->gap;
371 /* insert wb into fmt */
372 int fmt_word(struct fmt *f, struct wb *wb)
374 if (wb_empty(wb))
375 return 0;
376 if (fmt_confchanged(f))
377 if (fmt_fillwords(f, 0))
378 return 1;
379 if (FMT_FILL(f) && f->nls && f->gap)
380 if (fmt_sp(f))
381 return 1;
382 if (!f->words_n) /* apply the new .l and .i */
383 fmt_confupdate(f);
384 f->gap = fmt_wordgap(f);
385 f->eos = wb_eos(wb);
386 fmt_insertword(f, wb, f->filled ? 0 : f->gap);
387 f->filled = 0;
388 f->nls = 0;
389 f->nls_sup = 0;
390 f->gap = 0;
391 return 0;
394 /* approximate 8 * sqrt(cost) */
395 static long scaledown(long cost)
397 long ret = 0;
398 int i;
399 for (i = 0; i < 14; i++)
400 ret += ((cost >> (i * 2)) & 3) << (i + 3);
401 return ret < (1 << 13) ? ret : (1 << 13);
404 /* the cost of putting lwid words in a line of length llen */
405 static long FMT_COST(int llen, int lwid, int swid, int nspc)
407 /* the ratio that the stretchable spaces of the line should be spread */
408 long ratio = abs((llen - lwid) * 100l / (swid ? swid : 1));
409 /* ratio too large; scaling it down */
410 if (ratio > 4000)
411 ratio = 4000 + scaledown(ratio - 4000);
412 /* assigning a cost of 100 to each space stretching 100 percent */
413 return ratio * ratio / 100l * (nspc ? nspc : 1);
416 /* the number of hyphenations in consecutive lines ending at pos */
417 static int fmt_hydepth(struct fmt *f, int pos)
419 int n = 0;
420 while (pos > 0 && f->words[pos - 1].hy && ++n < 5)
421 pos = f->best_pos[pos];
422 return n;
425 static long hycost(int depth)
427 if (n_hlm > 0 && depth > n_hlm)
428 return 10000000;
429 if (depth >= 3)
430 return n_hycost + n_hycost2 + n_hycost3;
431 if (depth == 2)
432 return n_hycost + n_hycost2;
433 return depth ? n_hycost : 0;
436 /* the cost of putting a line break before word pos */
437 static long fmt_findcost(struct fmt *f, int pos)
439 int i, hyphenated;
440 long cur;
441 int llen = MAX(1, FMT_LLEN(f));
442 int lwid = 0; /* current line length */
443 int swid = 0; /* amount of stretchable spaces */
444 int nspc = 0; /* number of stretchable spaces */
445 int dwid = 0; /* equal to swid, unless swid is zero */
446 if (pos <= 0)
447 return 0;
448 if (f->best_pos[pos] >= 0)
449 return f->best[pos] + f->words[pos - 1].cost;
450 lwid = f->words[pos - 1].hy; /* non-zero if the last word is hyphenated */
451 hyphenated = f->words[pos - 1].hy != 0;
452 i = pos - 1;
453 while (i >= 0) {
454 lwid += f->words[i].wid;
455 if (i + 1 < pos)
456 lwid += f->words[i + 1].gap;
457 if (i + 1 < pos && f->words[i + 1].str) {
458 swid += f->words[i + 1].gap;
459 nspc++;
461 if (lwid > llen + swid * n_ssh / 100 && i + 1 < pos)
462 break;
463 dwid = swid;
464 if (!dwid && i > 0) /* no stretchable spaces */
465 dwid = f->words[i - 1].swid;
466 cur = fmt_findcost(f, i) + FMT_COST(llen, lwid, dwid, nspc);
467 if (hyphenated)
468 cur += hycost(1 + fmt_hydepth(f, i));
469 if (f->best_pos[pos] < 0 || cur < f->best[pos]) {
470 f->best_pos[pos] = i;
471 f->best_dep[pos] = f->best_dep[i] + 1;
472 f->best[pos] = cur;
474 i--;
476 return f->best[pos] + f->words[pos - 1].cost;
479 static int fmt_bestpos(struct fmt *f, int pos)
481 fmt_findcost(f, pos);
482 return MAX(0, f->best_pos[pos]);
485 static int fmt_bestdep(struct fmt *f, int pos)
487 fmt_findcost(f, pos);
488 return MAX(0, f->best_dep[pos]);
491 /* return the last filled word */
492 static int fmt_breakparagraph(struct fmt *f, int pos, int br)
494 int i;
495 int best = -1;
496 long cost, best_cost = 0;
497 int llen = FMT_LLEN(f);
498 int lwid = 0; /* current line length */
499 int swid = 0; /* amount of stretchable spaces */
500 int nspc = 0; /* number of stretchable spaces */
501 if (f->fillreq > 0 && f->fillreq <= f->words_n) {
502 fmt_findcost(f, f->fillreq);
503 return f->fillreq;
505 if (pos > 0 && f->words[pos - 1].wid >= llen) {
506 fmt_findcost(f, pos);
507 return pos;
509 i = pos - 1;
510 lwid = 0;
511 if (f->words[i].hy) /* the last word is hyphenated */
512 lwid += f->words[i].hy;
513 while (i >= 0) {
514 lwid += f->words[i].wid;
515 if (i + 1 < pos)
516 lwid += f->words[i + 1].gap;
517 if (i + 1 < pos && f->words[i + 1].str) {
518 swid += f->words[i + 1].gap;
519 nspc++;
521 if (lwid > llen && i + 1 < pos)
522 break;
523 cost = fmt_findcost(f, i);
524 /* the cost of formatting short lines; should prevent widows */
525 if (br && n_pmll && lwid < llen * n_pmll / 100) {
526 int pmll = llen * n_pmll / 100;
527 cost += (long) n_pmllcost * (pmll - lwid) / pmll;
529 if (best < 0 || cost < best_cost) {
530 best = i;
531 best_cost = cost;
533 i--;
535 return best;
538 /* extract the first nreq formatted lines before the word at pos */
539 static int fmt_head(struct fmt *f, int nreq, int pos)
541 int best = pos; /* best line break for nreq-th line */
542 int prev, next; /* best line breaks without hyphenation */
543 if (nreq <= 0 || fmt_bestdep(f, pos) < nreq)
544 return pos;
545 /* finding the optimal line break for nreq-th line */
546 while (best > 0 && fmt_bestdep(f, best) > nreq)
547 best = fmt_bestpos(f, best);
548 prev = best;
549 next = best;
550 /* finding closest line breaks without hyphenation */
551 while (prev > 1 && f->words[prev - 1].hy &&
552 fmt_bestdep(f, prev - 1) == nreq)
553 prev--;
554 while (next < pos && f->words[next - 1].hy &&
555 fmt_bestdep(f, next) == nreq)
556 next++;
557 /* choosing the best of them */
558 if (!f->words[prev - 1].hy && !f->words[next - 1].hy)
559 return fmt_findcost(f, prev) <= fmt_findcost(f, next) ? prev : next;
560 if (!f->words[prev - 1].hy)
561 return prev;
562 if (!f->words[next - 1].hy)
563 return next;
564 return best;
567 /* break f->words[0..end] into lines according to fmt_bestpos() */
568 static int fmt_break(struct fmt *f, int end)
570 int beg, ret = 0;
571 beg = fmt_bestpos(f, end);
572 if (beg > 0)
573 ret += fmt_break(f, beg);
574 f->words[beg].gap = 0;
575 if (fmt_extractline(f, beg, end, 1))
576 return ret;
577 if (beg > 0)
578 fmt_confupdate(f);
579 return ret + (end - beg);
582 /* estimated number of lines until traps or the end of a page */
583 static int fmt_safelines(void)
585 int lnht = MAX(1, n_L) * n_v;
586 return (f_nexttrap() + lnht - 1) / lnht;
589 /* fill the words collected in the buffer */
590 static int fmt_fillwords(struct fmt *f, int br)
592 int nreq; /* the number of lines until a trap */
593 int end; /* the final line ends before this word */
594 int end_head; /* like end, but only the first nreq lines included */
595 int head = 0; /* only nreq first lines have been formatted */
596 int llen; /* line length, taking shrinkable spaces into account */
597 int n, i;
598 if (!FMT_FILL(f))
599 return 0;
600 llen = fmt_wordslen(f, 0, f->words_n) -
601 fmt_spacessum(f, 0, f->words_n) * n_ssh / 100;
602 /* not enough words to fill */
603 if ((f->fillreq <= 0 || f->words_n < f->fillreq) && llen <= FMT_LLEN(f))
604 return 0;
605 nreq = (n_hy & HY_LAST) ? fmt_safelines() : 0;
606 if (nreq > 0 && nreq <= fmt_nlines(f))
607 return 1;
608 /* resetting positions */
609 f->best = malloc((f->words_n + 1) * sizeof(f->best[0]));
610 f->best_pos = malloc((f->words_n + 1) * sizeof(f->best_pos[0]));
611 f->best_dep = malloc((f->words_n + 1) * sizeof(f->best_dep[0]));
612 memset(f->best, 0, (f->words_n + 1) * sizeof(f->best[0]));
613 memset(f->best_dep, 0, (f->words_n + 1) * sizeof(f->best_dep[0]));
614 for (i = 0; i < f->words_n + 1; i++)
615 f->best_pos[i] = -1;
616 end = fmt_breakparagraph(f, f->words_n, br);
617 if (nreq > 0) {
618 end_head = fmt_head(f, nreq - fmt_nlines(f), end);
619 head = end_head < end;
620 end = end_head;
622 /* recursively add lines */
623 n = end > 0 ? fmt_break(f, end) : 0;
624 f->words_n -= n;
625 f->fillreq -= n;
626 fmt_movewords(f, 0, n, f->words_n);
627 f->filled = n && !f->words_n;
628 if (f->words_n)
629 f->words[0].gap = 0;
630 if (f->words_n) /* apply the new .l and .i */
631 fmt_confupdate(f);
632 free(f->best);
633 free(f->best_pos);
634 free(f->best_dep);
635 f->best = NULL;
636 f->best_pos = NULL;
637 f->best_dep = NULL;
638 return head || n != end;
641 struct fmt *fmt_alloc(void)
643 struct fmt *fmt = xmalloc(sizeof(*fmt));
644 memset(fmt, 0, sizeof(*fmt));
645 return fmt;
648 void fmt_free(struct fmt *fmt)
650 free(fmt->lines);
651 free(fmt->words);
652 free(fmt);
655 int fmt_wid(struct fmt *fmt)
657 return fmt_wordslen(fmt, 0, fmt->words_n) + fmt_wordgap(fmt);
660 int fmt_morewords(struct fmt *fmt)
662 return fmt_morelines(fmt) || fmt->words_n;
665 int fmt_morelines(struct fmt *fmt)
667 return fmt->lines_head != fmt->lines_tail;
670 /* suppress the last newline */
671 void fmt_suppressnl(struct fmt *fmt)
673 if (fmt->nls) {
674 fmt->nls--;
675 fmt->nls_sup = 1;