font: specifying OpenType font language with .ffsc
[neatroff.git] / fmt.c
blobed460fa5f63f34fb48ff4c6defe26cdc990732eb
1 /*
2 * line formatting buffer for line adjustment and hyphenation
4 * The line formatting buffer does two main functions: breaking
5 * words into lines (possibly after breaking them at their
6 * hyphenation points), and, if requested, adjusting the space
7 * between words in a line. In this file the first step is
8 * referred to as filling.
10 * Functions like fmt_word() return nonzero on failure, which
11 * means the call should be repeated after fetching previously
12 * formatted lines via fmt_nextline().
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include "roff.h"
19 #define FMT_LLEN(f) MAX(0, (f)->ll - (f)->li - (f)->lI)
20 #define FMT_FILL(f) (!n_ce && n_u)
21 #define FMT_ADJ(f) (n_u && !n_na && !n_ce && (n_j & AD_B) == AD_B)
23 static int fmt_fillwords(struct fmt *f, int br);
25 struct word {
26 char *s;
27 int wid; /* word's width */
28 int elsn, elsp; /* els_neg and els_pos */
29 int gap; /* the space before this word */
30 int hy; /* hyphen width if inserted after this word */
31 int str; /* does the space before it stretch */
32 int cost; /* the extra cost of line break after this word */
33 int swid; /* space width after this word (i.e., \w' ') */
36 struct line {
37 struct sbuf sbuf;
38 int wid, li, ll, lI;
39 int elsn, elsp;
42 struct fmt {
43 /* queued words */
44 struct word *words;
45 int words_n, words_sz;
46 /* queued lines */
47 struct line *lines;
48 int lines_head, lines_tail, lines_sz;
49 /* for paragraph adjustment */
50 long *best;
51 int *best_pos;
52 int *best_dep;
53 /* current line */
54 int gap; /* space before the next word */
55 int nls; /* newlines before the next word */
56 int nls_sup; /* suppressed newlines */
57 int li, ll, lI; /* current line indentation and length */
58 int filled; /* filled all words in the last fmt_fill() */
59 int eos; /* last word ends a sentence */
60 int fillreq; /* fill after the last word (\p) */
63 /* .ll, .in and .ti are delayed until the partial line is output */
64 static void fmt_confupdate(struct fmt *f)
66 f->ll = n_l;
67 f->li = n_ti >= 0 ? n_ti : n_i;
68 f->lI = n_tI >= 0 ? n_tI : n_I;
69 n_ti = -1;
70 n_tI = -1;
73 static int fmt_confchanged(struct fmt *f)
75 return f->ll != n_l || f->li != (n_ti >= 0 ? n_ti : n_i) ||
76 f->lI != (n_tI >= 0 ? n_tI : n_I);
79 /* move words inside an fmt struct */
80 static void fmt_movewords(struct fmt *a, int dst, int src, int len)
82 memmove(a->words + dst, a->words + src, len * sizeof(a->words[0]));
85 /* move words from the buffer to s */
86 static int fmt_wordscopy(struct fmt *f, int beg, int end,
87 struct sbuf *s, int *els_neg, int *els_pos)
89 struct word *wcur;
90 int w = 0;
91 int i;
92 *els_neg = 0;
93 *els_pos = 0;
94 for (i = beg; i < end; i++) {
95 wcur = &f->words[i];
96 sbuf_printf(s, "%ch'%du'", c_ec, wcur->gap);
97 sbuf_append(s, wcur->s);
98 w += wcur->wid + wcur->gap;
99 if (wcur->elsn < *els_neg)
100 *els_neg = wcur->elsn;
101 if (wcur->elsp > *els_pos)
102 *els_pos = wcur->elsp;
103 free(wcur->s);
105 if (beg < end) {
106 wcur = &f->words[end - 1];
107 if (wcur->hy)
108 sbuf_append(s, "\\(hy");
109 w += wcur->hy;
111 return w;
114 static int fmt_nlines(struct fmt *f)
116 return f->lines_head - f->lines_tail;
119 /* the total width of the specified words in f->words[] */
120 static int fmt_wordslen(struct fmt *f, int beg, int end)
122 int i, w = 0;
123 for (i = beg; i < end; i++)
124 w += f->words[i].wid + f->words[i].gap;
125 return beg < end ? w + f->words[end - 1].hy : 0;
128 /* the number of stretchable spaces in f */
129 static int fmt_spaces(struct fmt *f, int beg, int end)
131 int i, n = 0;
132 for (i = beg + 1; i < end; i++)
133 if (f->words[i].str)
134 n++;
135 return n;
138 /* the amount of stretchable spaces in f */
139 static int fmt_spacessum(struct fmt *f, int beg, int end)
141 int i, n = 0;
142 for (i = beg + 1; i < end; i++)
143 if (f->words[i].str)
144 n += f->words[i].gap;
145 return n;
148 /* return the next line in the buffer */
149 char *fmt_nextline(struct fmt *f, int *w,
150 int *li, int *lI, int *ll, int *els_neg, int *els_pos)
152 struct line *l;
153 if (f->lines_head == f->lines_tail)
154 return NULL;
155 l = &f->lines[f->lines_tail++];
156 *li = l->li;
157 *lI = l->lI;
158 *ll = l->ll;
159 *w = l->wid;
160 *els_neg = l->elsn;
161 *els_pos = l->elsp;
162 return sbuf_out(&l->sbuf);
165 static struct line *fmt_mkline(struct fmt *f)
167 struct line *l;
168 if (f->lines_head == f->lines_tail) {
169 f->lines_head = 0;
170 f->lines_tail = 0;
172 if (f->lines_head == f->lines_sz) {
173 f->lines_sz += 256;
174 f->lines = mextend(f->lines, f->lines_head,
175 f->lines_sz, sizeof(f->lines[0]));
177 l = &f->lines[f->lines_head++];
178 l->li = f->li;
179 l->lI = f->lI;
180 l->ll = f->ll;
181 sbuf_init(&l->sbuf);
182 return l;
185 static void fmt_keshideh(struct fmt *f, int beg, int end, int wid);
187 /* extract words from beg to end; shrink or stretch spaces if needed */
188 static int fmt_extractline(struct fmt *f, int beg, int end, int str)
190 int fmt_div, fmt_rem;
191 int w, i, nspc, llen;
192 struct line *l;
193 if (!(l = fmt_mkline(f)))
194 return 1;
195 llen = FMT_LLEN(f);
196 w = fmt_wordslen(f, beg, end);
197 if (str && FMT_ADJ(f) && n_j & AD_K) {
198 fmt_keshideh(f, beg, end, llen - w);
199 w = fmt_wordslen(f, beg, end);
201 nspc = fmt_spaces(f, beg, end);
202 if (nspc && FMT_ADJ(f) && (llen < w || str)) {
203 fmt_div = (llen - w) / nspc;
204 fmt_rem = (llen - w) % nspc;
205 if (fmt_rem < 0) {
206 fmt_div--;
207 fmt_rem += nspc;
209 for (i = beg + 1; i < end; i++)
210 if (f->words[i].str)
211 f->words[i].gap += fmt_div + (fmt_rem-- > 0);
213 l->wid = fmt_wordscopy(f, beg, end, &l->sbuf, &l->elsn, &l->elsp);
214 return 0;
217 static int fmt_sp(struct fmt *f)
219 if (fmt_fillwords(f, 1))
220 return 1;
221 if (fmt_extractline(f, 0, f->words_n, 0))
222 return 1;
223 f->filled = 0;
224 f->nls--;
225 f->nls_sup = 0;
226 f->words_n = 0;
227 f->fillreq = 0;
228 return 0;
231 /* fill as many lines as possible; if br, put the remaining words in a line */
232 int fmt_fill(struct fmt *f, int br)
234 if (fmt_fillwords(f, br))
235 return 1;
236 if (br) {
237 f->filled = 0;
238 if (f->words_n)
239 if (fmt_sp(f))
240 return 1;
242 return 0;
245 void fmt_space(struct fmt *fmt)
247 fmt->gap += font_swid(dev_font(n_f), n_s, n_ss);
250 int fmt_newline(struct fmt *f)
252 f->gap = 0;
253 if (!FMT_FILL(f)) {
254 f->nls++;
255 fmt_sp(f);
256 return 0;
258 if (f->nls >= 1)
259 if (fmt_sp(f))
260 return 1;
261 if (f->nls == 0 && !f->filled && !f->words_n)
262 fmt_sp(f);
263 f->nls++;
264 return 0;
267 /* format the paragraph after the next word (\p) */
268 int fmt_fillreq(struct fmt *f)
270 if (f->fillreq > 0)
271 if (fmt_fillwords(f, 0))
272 return 1;
273 f->fillreq = f->words_n + 1;
274 return 0;
277 static void fmt_wb2word(struct fmt *f, struct word *word, struct wb *wb,
278 int hy, int str, int gap, int cost)
280 int len = strlen(wb_buf(wb));
281 word->s = xmalloc(len + 1);
282 memcpy(word->s, wb_buf(wb), len + 1);
283 word->wid = wb_wid(wb);
284 word->elsn = wb->els_neg;
285 word->elsp = wb->els_pos;
286 word->hy = hy ? wb_hywid(wb) : 0;
287 word->str = str;
288 word->gap = gap;
289 word->cost = cost;
290 word->swid = wb_swid(wb);
293 /* find explicit break positions: dashes, \:, \%, and \~ */
294 static int fmt_hyphmarks(char *word, int *hyidx, int *hyins, int *hygap)
296 char *s = word;
297 char *d = NULL;
298 int c, n = 0;
299 while ((c = escread(&s, &d)) > 0)
301 if (c < 0 || !strcmp(c_hc, d))
302 return -1;
303 while ((c = escread(&s, &d)) >= 0 && n < NHYPHSWORD) {
304 if (!c && !strcmp(c_hc, d)) {
305 hyins[n] = 1;
306 hyidx[n++] = s - word;
308 if (!c && c_hydash(d)) {
309 hyins[n] = 0;
310 hyidx[n++] = s - word;
312 if (!c && !strcmp(c_nb, d)) {
313 hygap[n] = 1;
314 hyidx[n++] = s - word;
317 return n;
320 static struct word *fmt_mkword(struct fmt *f)
322 if (f->words_n == f->words_sz) {
323 f->words_sz += 256;
324 f->words = mextend(f->words, f->words_n,
325 f->words_sz, sizeof(f->words[0]));
327 return &f->words[f->words_n++];
330 static void fmt_insertword(struct fmt *f, struct wb *wb, int gap)
332 int hyidx[NHYPHSWORD]; /* sub-word boundaries */
333 int hyins[NHYPHSWORD] = {0}; /* insert dash */
334 int hygap[NHYPHSWORD] = {0}; /* stretchable no-break space */
335 char *src = wb_buf(wb);
336 struct wb wbc;
337 char *beg;
338 char *end;
339 int n, i;
340 int cf, cs, cm, ccd;
341 n = fmt_hyphmarks(src, hyidx, hyins, hygap);
342 if (n <= 0) {
343 fmt_wb2word(f, fmt_mkword(f), wb, 0, 1, gap, wb_cost(wb));
344 return;
346 /* update f->fillreq considering the new sub-words */
347 if (f->fillreq == f->words_n + 1)
348 f->fillreq += n;
349 wb_init(&wbc);
350 /* add sub-words */
351 for (i = 0; i <= n; i++) {
352 int ihy = i < n && hyins[i]; /* dash width */
353 int istr = i == 0 || hygap[i - 1]; /* stretchable */
354 int igap; /* gap width */
355 int icost; /* hyphenation cost */
356 beg = src + (i > 0 ? hyidx[i - 1] : 0);
357 end = src + (i < n ? hyidx[i] : strlen(src));
358 if (i < n && hygap[i]) /* remove \~ */
359 end -= strlen(c_nb);
360 wb_catstr(&wbc, beg, end);
361 wb_fnszget(&wbc, &cf, &cs, &cm, &ccd);
362 icost = i == n ? wb_cost(&wbc) : hygap[i] * 10000000;
363 igap = i == 0 ? gap : hygap[i - 1] * wb_swid(&wbc);
364 fmt_wb2word(f, fmt_mkword(f), &wbc, ihy, istr, igap, icost);
365 wb_reset(&wbc);
366 wb_fnszset(&wbc, cf, cs, cm, ccd); /* restoring wbc */
368 wb_done(&wbc);
371 /* the amount of space necessary before the next word */
372 static int fmt_wordgap(struct fmt *f)
374 int nls = f->nls || f->nls_sup;
375 int swid = font_swid(dev_font(n_f), n_s, n_ss);
376 if (f->eos && f->words_n)
377 if ((nls && !f->gap) || (!nls && f->gap == 2 * swid))
378 return swid + font_swid(dev_font(n_f), n_s, n_sss);
379 return (nls && !f->gap && f->words_n) ? swid : f->gap;
382 /* insert wb into fmt */
383 int fmt_word(struct fmt *f, struct wb *wb)
385 if (wb_empty(wb))
386 return 0;
387 if (fmt_confchanged(f))
388 if (fmt_fillwords(f, 0))
389 return 1;
390 if (FMT_FILL(f) && f->nls && f->gap)
391 if (fmt_sp(f))
392 return 1;
393 if (!f->words_n) /* apply the new .l and .i */
394 fmt_confupdate(f);
395 f->gap = fmt_wordgap(f);
396 f->eos = wb_eos(wb);
397 fmt_insertword(f, wb, f->filled ? 0 : f->gap);
398 f->filled = 0;
399 f->nls = 0;
400 f->nls_sup = 0;
401 f->gap = 0;
402 return 0;
405 /* insert keshideh characters */
406 static void fmt_keshideh(struct fmt *f, int beg, int end, int wid)
408 struct wb wb;
409 int kw, i = 0, c = 0;
410 struct word *w;
411 int cnt = 0;
412 do {
413 cnt = 0;
414 for (c = 0; c < 2; c++) {
415 for (i = end - 1 - c; i >= beg; i -= 2) {
416 w = &f->words[i];
417 wb_init(&wb);
418 kw = wb_keshideh(w->s, &wb, wid);
419 if (kw > 0) {
420 free(w->s);
421 w->s = xmalloc(strlen(wb_buf(&wb)) + 1);
422 strcpy(w->s, wb_buf(&wb));
423 w->wid = wb_wid(&wb);
424 wid -= kw;
425 cnt++;
427 wb_done(&wb);
430 } while (cnt);
433 /* approximate 8 * sqrt(cost) */
434 static long scaledown(long cost)
436 long ret = 0;
437 int i;
438 for (i = 0; i < 14; i++)
439 ret += ((cost >> (i * 2)) & 3) << (i + 3);
440 return ret < (1 << 13) ? ret : (1 << 13);
443 /* the cost of putting lwid words in a line of length llen */
444 static long FMT_COST(int llen, int lwid, int swid, int nspc)
446 /* the ratio that the stretchable spaces of the line should be spread */
447 long ratio = abs((llen - lwid) * 100l / (swid ? swid : 1));
448 /* ratio too large; scaling it down */
449 if (ratio > 4000)
450 ratio = 4000 + scaledown(ratio - 4000);
451 /* assigning a cost of 100 to each space stretching 100 percent */
452 return ratio * ratio / 100l * (nspc ? nspc : 1);
455 /* the number of hyphenations in consecutive lines ending at pos */
456 static int fmt_hydepth(struct fmt *f, int pos)
458 int n = 0;
459 while (pos > 0 && f->words[pos - 1].hy && ++n < 5)
460 pos = f->best_pos[pos];
461 return n;
464 static long hycost(int depth)
466 if (n_hlm > 0 && depth > n_hlm)
467 return 10000000;
468 if (depth >= 3)
469 return n_hycost + n_hycost2 + n_hycost3;
470 if (depth == 2)
471 return n_hycost + n_hycost2;
472 return depth ? n_hycost : 0;
475 /* the cost of putting a line break before word pos */
476 static long fmt_findcost(struct fmt *f, int pos)
478 int i, hyphenated;
479 long cur;
480 int llen = MAX(1, FMT_LLEN(f));
481 int lwid = 0; /* current line length */
482 int swid = 0; /* amount of stretchable spaces */
483 int nspc = 0; /* number of stretchable spaces */
484 int dwid = 0; /* equal to swid, unless swid is zero */
485 if (pos <= 0)
486 return 0;
487 if (f->best_pos[pos] >= 0)
488 return f->best[pos] + f->words[pos - 1].cost;
489 lwid = f->words[pos - 1].hy; /* non-zero if the last word is hyphenated */
490 hyphenated = f->words[pos - 1].hy != 0;
491 i = pos - 1;
492 while (i >= 0) {
493 lwid += f->words[i].wid;
494 if (i + 1 < pos)
495 lwid += f->words[i + 1].gap;
496 if (i + 1 < pos && f->words[i + 1].str) {
497 swid += f->words[i + 1].gap;
498 nspc++;
500 if (lwid > llen + swid * n_ssh / 100 && i + 1 < pos)
501 break;
502 dwid = swid;
503 if (!dwid && i > 0) /* no stretchable spaces */
504 dwid = f->words[i - 1].swid;
505 cur = fmt_findcost(f, i) + FMT_COST(llen, lwid, dwid, nspc);
506 if (hyphenated)
507 cur += hycost(1 + fmt_hydepth(f, i));
508 if (f->best_pos[pos] < 0 || cur < f->best[pos]) {
509 f->best_pos[pos] = i;
510 f->best_dep[pos] = f->best_dep[i] + 1;
511 f->best[pos] = cur;
513 i--;
515 return f->best[pos] + f->words[pos - 1].cost;
518 static int fmt_bestpos(struct fmt *f, int pos)
520 fmt_findcost(f, pos);
521 return MAX(0, f->best_pos[pos]);
524 static int fmt_bestdep(struct fmt *f, int pos)
526 fmt_findcost(f, pos);
527 return MAX(0, f->best_dep[pos]);
530 /* return the last filled word */
531 static int fmt_breakparagraph(struct fmt *f, int pos, int br)
533 int i;
534 int best = -1;
535 long cost, best_cost = 0;
536 int llen = FMT_LLEN(f);
537 int lwid = 0; /* current line length */
538 int swid = 0; /* amount of stretchable spaces */
539 int nspc = 0; /* number of stretchable spaces */
540 if (f->fillreq > 0 && f->fillreq <= f->words_n) {
541 fmt_findcost(f, f->fillreq);
542 return f->fillreq;
544 if (pos > 0 && f->words[pos - 1].wid >= llen) {
545 fmt_findcost(f, pos);
546 return pos;
548 i = pos - 1;
549 lwid = 0;
550 if (f->words[i].hy) /* the last word is hyphenated */
551 lwid += f->words[i].hy;
552 while (i >= 0) {
553 lwid += f->words[i].wid;
554 if (i + 1 < pos)
555 lwid += f->words[i + 1].gap;
556 if (i + 1 < pos && f->words[i + 1].str) {
557 swid += f->words[i + 1].gap;
558 nspc++;
560 if (lwid > llen && i + 1 < pos)
561 break;
562 cost = fmt_findcost(f, i);
563 /* the cost of formatting short lines; should prevent widows */
564 if (br && n_pmll && lwid < llen * n_pmll / 100) {
565 int pmll = llen * n_pmll / 100;
566 cost += (long) n_pmllcost * (pmll - lwid) / pmll;
568 if (best < 0 || cost < best_cost) {
569 best = i;
570 best_cost = cost;
572 i--;
574 return best;
577 /* extract the first nreq formatted lines before the word at pos */
578 static int fmt_head(struct fmt *f, int nreq, int pos)
580 int best = pos; /* best line break for nreq-th line */
581 int prev, next; /* best line breaks without hyphenation */
582 if (nreq <= 0 || fmt_bestdep(f, pos) < nreq)
583 return pos;
584 /* finding the optimal line break for nreq-th line */
585 while (best > 0 && fmt_bestdep(f, best) > nreq)
586 best = fmt_bestpos(f, best);
587 prev = best;
588 next = best;
589 /* finding closest line breaks without hyphenation */
590 while (prev > 1 && f->words[prev - 1].hy &&
591 fmt_bestdep(f, prev - 1) == nreq)
592 prev--;
593 while (next < pos && f->words[next - 1].hy &&
594 fmt_bestdep(f, next) == nreq)
595 next++;
596 /* choosing the best of them */
597 if (!f->words[prev - 1].hy && !f->words[next - 1].hy)
598 return fmt_findcost(f, prev) <= fmt_findcost(f, next) ? prev : next;
599 if (!f->words[prev - 1].hy)
600 return prev;
601 if (!f->words[next - 1].hy)
602 return next;
603 return best;
606 /* break f->words[0..end] into lines according to fmt_bestpos() */
607 static int fmt_break(struct fmt *f, int end)
609 int beg, ret = 0;
610 beg = fmt_bestpos(f, end);
611 if (beg > 0)
612 ret += fmt_break(f, beg);
613 f->words[beg].gap = 0;
614 if (fmt_extractline(f, beg, end, 1))
615 return ret;
616 if (beg > 0)
617 fmt_confupdate(f);
618 return ret + (end - beg);
621 /* estimated number of lines until traps or the end of a page */
622 static int fmt_safelines(void)
624 int lnht = MAX(1, n_L) * n_v;
625 return (f_nexttrap() + lnht - 1) / lnht;
628 /* fill the words collected in the buffer */
629 static int fmt_fillwords(struct fmt *f, int br)
631 int nreq; /* the number of lines until a trap */
632 int end; /* the final line ends before this word */
633 int end_head; /* like end, but only the first nreq lines included */
634 int head = 0; /* only nreq first lines have been formatted */
635 int llen; /* line length, taking shrinkable spaces into account */
636 int n, i;
637 if (!FMT_FILL(f))
638 return 0;
639 llen = fmt_wordslen(f, 0, f->words_n) -
640 fmt_spacessum(f, 0, f->words_n) * n_ssh / 100;
641 /* not enough words to fill */
642 if ((f->fillreq <= 0 || f->words_n < f->fillreq) && llen <= FMT_LLEN(f))
643 return 0;
644 nreq = (n_hy & HY_LAST) ? fmt_safelines() : 0;
645 if (nreq > 0 && nreq <= fmt_nlines(f))
646 return 1;
647 /* resetting positions */
648 f->best = malloc((f->words_n + 1) * sizeof(f->best[0]));
649 f->best_pos = malloc((f->words_n + 1) * sizeof(f->best_pos[0]));
650 f->best_dep = malloc((f->words_n + 1) * sizeof(f->best_dep[0]));
651 memset(f->best, 0, (f->words_n + 1) * sizeof(f->best[0]));
652 memset(f->best_dep, 0, (f->words_n + 1) * sizeof(f->best_dep[0]));
653 for (i = 0; i < f->words_n + 1; i++)
654 f->best_pos[i] = -1;
655 end = fmt_breakparagraph(f, f->words_n, br);
656 if (nreq > 0) {
657 end_head = fmt_head(f, nreq - fmt_nlines(f), end);
658 head = end_head < end;
659 end = end_head;
661 /* recursively add lines */
662 n = end > 0 ? fmt_break(f, end) : 0;
663 f->words_n -= n;
664 f->fillreq -= n;
665 fmt_movewords(f, 0, n, f->words_n);
666 f->filled = n && !f->words_n;
667 if (f->words_n)
668 f->words[0].gap = 0;
669 if (f->words_n) /* apply the new .l and .i */
670 fmt_confupdate(f);
671 free(f->best);
672 free(f->best_pos);
673 free(f->best_dep);
674 f->best = NULL;
675 f->best_pos = NULL;
676 f->best_dep = NULL;
677 return head || n != end;
680 struct fmt *fmt_alloc(void)
682 struct fmt *fmt = xmalloc(sizeof(*fmt));
683 memset(fmt, 0, sizeof(*fmt));
684 return fmt;
687 void fmt_free(struct fmt *fmt)
689 free(fmt->lines);
690 free(fmt->words);
691 free(fmt);
694 int fmt_wid(struct fmt *fmt)
696 return fmt_wordslen(fmt, 0, fmt->words_n) + fmt_wordgap(fmt);
699 int fmt_morewords(struct fmt *fmt)
701 return fmt_morelines(fmt) || fmt->words_n;
704 int fmt_morelines(struct fmt *fmt)
706 return fmt->lines_head != fmt->lines_tail;
709 /* suppress the last newline */
710 void fmt_suppressnl(struct fmt *fmt)
712 if (fmt->nls) {
713 fmt->nls--;
714 fmt->nls_sup = 1;