font: fix reading gsub scripts
[neatroff.git] / fmt.c
bloba92b95de502cdd6c53df77b986dee453195f9d57
1 /*
2 * line formatting buffer for line adjustment and hyphenation
4 * The line formatting buffer does two main functions: breaking
5 * words into lines (possibly after breaking them at their
6 * hyphenation points), and, if requested, adjusting the space
7 * between words in a line. In this file the first step is
8 * referred to as filling.
10 * Functions like fmt_word() return nonzero on failure, which
11 * means the call should be repeated after fetching previously
12 * formatted lines via fmt_nextline().
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include "roff.h"
19 #define FMT_LLEN(f) MAX(0, (f)->ll - (f)->li)
20 #define FMT_FILL(f) (!n_ce && n_u)
21 #define FMT_ADJ(f) (n_u && !n_na && !n_ce && (n_j & AD_B) == AD_B)
23 static int fmt_fillwords(struct fmt *f, int br);
25 struct word {
26 char *s;
27 int wid; /* word's width */
28 int elsn, elsp; /* els_neg and els_pos */
29 int gap; /* the space before this word */
30 int hy; /* hyphen width if inserted after this word */
31 int str; /* does the space before it stretch */
32 int cost; /* the extra cost of line break after this word */
35 struct line {
36 struct sbuf sbuf;
37 int wid, li, ll;
38 int elsn, elsp;
41 struct fmt {
42 /* queued words */
43 struct word *words;
44 int words_n, words_sz;
45 /* queued lines */
46 struct line *lines;
47 int lines_head, lines_tail, lines_sz;
48 /* for paragraph adjustment */
49 long *best;
50 int *best_pos;
51 int *best_dep;
52 /* current line */
53 int gap; /* space before the next word */
54 int nls; /* newlines before the next word */
55 int nls_sup; /* suppressed newlines */
56 int li, ll; /* current line indentation and length */
57 int filled; /* filled all words in the last fmt_fill() */
58 int eos; /* last word ends a sentence */
59 int fillreq; /* fill after the last word (\p) */
62 /* .ll, .in and .ti are delayed until the partial line is output */
63 static void fmt_confupdate(struct fmt *f)
65 f->ll = n_l;
66 f->li = n_ti >= 0 ? n_ti : n_i;
67 n_ti = -1;
70 static int fmt_confchanged(struct fmt *f)
72 return f->ll != n_l || f->li != (n_ti >= 0 ? n_ti : n_i);
75 /* move words inside an fmt struct */
76 static void fmt_movewords(struct fmt *a, int dst, int src, int len)
78 memmove(a->words + dst, a->words + src, len * sizeof(a->words[0]));
81 /* move words from the buffer to s */
82 static int fmt_wordscopy(struct fmt *f, int beg, int end,
83 struct sbuf *s, int *els_neg, int *els_pos)
85 struct word *wcur;
86 int w = 0;
87 int i;
88 *els_neg = 0;
89 *els_pos = 0;
90 for (i = beg; i < end; i++) {
91 wcur = &f->words[i];
92 sbuf_printf(s, "%ch'%du'", c_ec, wcur->gap);
93 sbuf_append(s, wcur->s);
94 w += wcur->wid + wcur->gap;
95 if (wcur->elsn < *els_neg)
96 *els_neg = wcur->elsn;
97 if (wcur->elsp > *els_pos)
98 *els_pos = wcur->elsp;
99 free(wcur->s);
101 if (beg < end) {
102 wcur = &f->words[end - 1];
103 if (wcur->hy)
104 sbuf_append(s, "\\(hy");
105 w += wcur->hy;
107 return w;
110 static int fmt_nlines(struct fmt *f)
112 return f->lines_head - f->lines_tail;
115 /* the total width of the specified words in f->words[] */
116 static int fmt_wordslen(struct fmt *f, int beg, int end)
118 int i, w = 0;
119 for (i = beg; i < end; i++)
120 w += f->words[i].wid + f->words[i].gap;
121 return beg < end ? w + f->words[end - 1].hy : 0;
124 /* the number of stretchable spaces in f */
125 static int fmt_spaces(struct fmt *f, int beg, int end)
127 int i, n = 0;
128 for (i = beg + 1; i < end; i++)
129 if (f->words[i].str)
130 n++;
131 return n;
134 /* the amount of stretchable spaces in f */
135 static int fmt_spacessum(struct fmt *f, int beg, int end)
137 int i, n = 0;
138 for (i = beg + 1; i < end; i++)
139 if (f->words[i].str)
140 n += f->words[i].gap;
141 return n;
144 /* return the next line in the buffer */
145 char *fmt_nextline(struct fmt *f, int *w,
146 int *li, int *ll, int *els_neg, int *els_pos)
148 struct line *l;
149 if (f->lines_head == f->lines_tail)
150 return NULL;
151 l = &f->lines[f->lines_tail++];
152 *li = l->li;
153 *ll = l->ll;
154 *w = l->wid;
155 *els_neg = l->elsn;
156 *els_pos = l->elsp;
157 return sbuf_out(&l->sbuf);
160 static struct line *fmt_mkline(struct fmt *f)
162 struct line *l;
163 if (f->lines_head == f->lines_tail) {
164 f->lines_head = 0;
165 f->lines_tail = 0;
167 if (f->lines_head == f->lines_sz) {
168 f->lines_sz += 256;
169 f->lines = mextend(f->lines, f->lines_head,
170 f->lines_sz, sizeof(f->lines[0]));
172 l = &f->lines[f->lines_head++];
173 l->li = f->li;
174 l->ll = f->ll;
175 sbuf_init(&l->sbuf);
176 return l;
179 /* extract words from beg to end; shrink or stretch spaces if needed */
180 static int fmt_extractline(struct fmt *f, int beg, int end, int str)
182 int fmt_div, fmt_rem;
183 int w, i, nspc, llen;
184 struct line *l;
185 if (!(l = fmt_mkline(f)))
186 return 1;
187 llen = FMT_LLEN(f);
188 w = fmt_wordslen(f, beg, end);
189 nspc = fmt_spaces(f, beg, end);
190 if (nspc && FMT_ADJ(f) && (llen < w || str)) {
191 fmt_div = (llen - w) / nspc;
192 fmt_rem = (llen - w) % nspc;
193 if (fmt_rem < 0) {
194 fmt_div--;
195 fmt_rem += nspc;
197 for (i = beg + 1; i < end; i++)
198 if (f->words[i].str)
199 f->words[i].gap += fmt_div + (fmt_rem-- > 0);
201 l->wid = fmt_wordscopy(f, beg, end, &l->sbuf, &l->elsn, &l->elsp);
202 return 0;
205 static int fmt_sp(struct fmt *f)
207 if (fmt_fillwords(f, 1))
208 return 1;
209 if (fmt_extractline(f, 0, f->words_n, 0))
210 return 1;
211 f->filled = 0;
212 f->nls--;
213 f->nls_sup = 0;
214 f->words_n = 0;
215 f->fillreq = 0;
216 return 0;
219 /* fill as many lines as possible; if br, put the remaining words in a line */
220 int fmt_fill(struct fmt *f, int br)
222 if (fmt_fillwords(f, br))
223 return 1;
224 if (br) {
225 f->filled = 0;
226 if (f->words_n)
227 if (fmt_sp(f))
228 return 1;
230 return 0;
233 void fmt_space(struct fmt *fmt)
235 fmt->gap += font_swid(dev_font(n_f), n_s, n_ss);
238 int fmt_newline(struct fmt *f)
240 f->gap = 0;
241 if (!FMT_FILL(f)) {
242 f->nls++;
243 fmt_sp(f);
244 return 0;
246 if (f->nls >= 1)
247 if (fmt_sp(f))
248 return 1;
249 if (f->nls == 0 && !f->filled && !f->words_n)
250 fmt_sp(f);
251 f->nls++;
252 return 0;
255 /* format the paragraph after the next word (\p) */
256 int fmt_fillreq(struct fmt *f)
258 if (f->fillreq > 0)
259 if (fmt_fillwords(f, 0))
260 return 1;
261 f->fillreq = f->words_n + 1;
262 return 0;
265 static void fmt_wb2word(struct fmt *f, struct word *word, struct wb *wb,
266 int hy, int str, int gap, int cost)
268 int len = strlen(wb_buf(wb));
269 word->s = xmalloc(len + 1);
270 memcpy(word->s, wb_buf(wb), len + 1);
271 word->wid = wb_wid(wb);
272 word->elsn = wb->els_neg;
273 word->elsp = wb->els_pos;
274 word->hy = hy ? wb_hywid(wb) : 0;
275 word->str = str;
276 word->gap = gap;
277 word->cost = cost;
280 /* find explicit break positions: dashes, \:, \%, and \~ */
281 static int fmt_hyphmarks(char *word, int *hyidx, int *hyins, int *hygap)
283 char *s = word;
284 char *d = NULL;
285 int c, n = 0;
286 while ((c = escread(&s, &d)) > 0)
288 if (c < 0 || !strcmp(c_hc, d))
289 return -1;
290 while ((c = escread(&s, &d)) >= 0 && n < NHYPHSWORD) {
291 if (!c && !strcmp(c_hc, d)) {
292 hyins[n] = 1;
293 hyidx[n++] = s - word;
295 if (!c && c_hydash(d)) {
296 hyins[n] = 0;
297 hyidx[n++] = s - word;
299 if (!c && !strcmp(c_nb, d)) {
300 hygap[n] = 1;
301 hyidx[n++] = s - word;
304 return n;
307 static struct word *fmt_mkword(struct fmt *f)
309 if (f->words_n == f->words_sz) {
310 f->words_sz += 256;
311 f->words = mextend(f->words, f->words_n,
312 f->words_sz, sizeof(f->words[0]));
314 return &f->words[f->words_n++];
317 static void fmt_insertword(struct fmt *f, struct wb *wb, int gap)
319 int hyidx[NHYPHSWORD]; /* sub-word boundaries */
320 int hyins[NHYPHSWORD] = {0}; /* insert dash */
321 int hygap[NHYPHSWORD] = {0}; /* stretchable no-break space */
322 char *src = wb_buf(wb);
323 struct wb wbc;
324 char *beg;
325 char *end;
326 int n, i;
327 int cf, cs, cm;
328 n = fmt_hyphmarks(src, hyidx, hyins, hygap);
329 if (n <= 0) {
330 fmt_wb2word(f, fmt_mkword(f), wb, 0, 1, gap, wb_cost(wb));
331 return;
333 /* update f->fillreq considering the new sub-words */
334 if (f->fillreq == f->words_n + 1)
335 f->fillreq += n;
336 wb_init(&wbc);
337 /* add sub-words */
338 for (i = 0; i <= n; i++) {
339 int ihy = i < n && hyins[i]; /* dash width */
340 int istr = i == 0 || hygap[i - 1]; /* stretchable */
341 int igap; /* gap width */
342 int icost; /* hyphenation cost */
343 beg = src + (i > 0 ? hyidx[i - 1] : 0);
344 end = src + (i < n ? hyidx[i] : strlen(src));
345 if (i < n && hygap[i]) /* remove \~ */
346 end -= strlen(c_nb);
347 wb_catstr(&wbc, beg, end);
348 wb_fnszget(&wbc, &cf, &cs, &cm);
349 icost = i == n ? wb_cost(&wbc) : hygap[i] * 10000000;
350 igap = i == 0 ? gap : hygap[i - 1] * font_swid(dev_font(cf), cs, n_ss);
351 fmt_wb2word(f, fmt_mkword(f), &wbc, ihy, istr, igap, icost);
352 wb_reset(&wbc);
353 wb_fnszset(&wbc, cf, cs, cm); /* restoring wbc */
355 wb_done(&wbc);
358 /* the amount of space necessary before the next word */
359 static int fmt_wordgap(struct fmt *f)
361 int nls = f->nls || f->nls_sup;
362 int swid = font_swid(dev_font(n_f), n_s, n_ss);
363 if (f->eos && f->words_n)
364 if ((nls && !f->gap) || (!nls && f->gap == 2 * swid))
365 return swid + font_swid(dev_font(n_f), n_s, n_sss);
366 return (nls && !f->gap && f->words_n) ? swid : f->gap;
369 /* insert wb into fmt */
370 int fmt_word(struct fmt *f, struct wb *wb)
372 if (wb_empty(wb))
373 return 0;
374 if (fmt_confchanged(f))
375 if (fmt_fillwords(f, 0))
376 return 1;
377 if (FMT_FILL(f) && f->nls && f->gap)
378 if (fmt_sp(f))
379 return 1;
380 if (!f->words_n) /* apply the new .l and .i */
381 fmt_confupdate(f);
382 f->gap = fmt_wordgap(f);
383 f->eos = wb_eos(wb);
384 fmt_insertword(f, wb, f->filled ? 0 : f->gap);
385 f->filled = 0;
386 f->nls = 0;
387 f->nls_sup = 0;
388 f->gap = 0;
389 return 0;
392 /* approximate 8 * sqrt(cost) */
393 static long scaledown(long cost)
395 long ret = 0;
396 int i;
397 for (i = 0; i < 14; i++)
398 ret += ((cost >> (i * 2)) & 3) << (i + 3);
399 return ret < (1 << 13) ? ret : (1 << 13);
402 /* the cost of putting lwid words in a line of length llen */
403 static long FMT_COST(int llen, int lwid, int swid, int nspc)
405 /* the ratio that the stretchable spaces of the line should be spread */
406 long ratio = abs((llen - lwid) * 100l / (swid ? swid : 1));
407 /* ratio too large; scaling it down */
408 if (ratio > 4000)
409 ratio = 4000 + scaledown(ratio - 4000);
410 /* assigning a cost of 100 to each space stretching 100 percent */
411 return ratio * ratio / 100l * (nspc ? nspc : 1);
414 /* the number of hyphenations in consecutive lines ending at pos */
415 static int fmt_hydepth(struct fmt *f, int pos)
417 int n = 0;
418 while (pos > 0 && f->words[pos - 1].hy && ++n < 5)
419 pos = f->best_pos[pos];
420 return n;
423 static long hycost(int depth)
425 if (n_hlm > 0 && depth > n_hlm)
426 return 10000000;
427 if (depth >= 3)
428 return n_hycost + n_hycost2 + n_hycost3;
429 if (depth == 2)
430 return n_hycost + n_hycost2;
431 return depth ? n_hycost : 0;
434 /* the cost of putting a line break before word pos */
435 static long fmt_findcost(struct fmt *f, int pos)
437 int i, hyphenated;
438 long cur;
439 int llen = MAX(1, FMT_LLEN(f));
440 int lwid = 0; /* current line length */
441 int swid = 0; /* amount of stretchable spaces */
442 int nspc = 0; /* number of stretchable spaces */
443 if (pos <= 0)
444 return 0;
445 if (f->best_pos[pos] >= 0)
446 return f->best[pos] + f->words[pos - 1].cost;
447 lwid = f->words[pos - 1].hy; /* non-zero if the last word is hyphenated */
448 hyphenated = f->words[pos - 1].hy != 0;
449 i = pos - 1;
450 while (i >= 0) {
451 lwid += f->words[i].wid;
452 if (i + 1 < pos)
453 lwid += f->words[i + 1].gap;
454 if (i + 1 < pos && f->words[i + 1].str) {
455 swid += f->words[i + 1].gap;
456 nspc++;
458 if (lwid > llen + swid * n_ssh / 100 && i + 1 < pos)
459 break;
460 cur = fmt_findcost(f, i) + FMT_COST(llen, lwid, swid, nspc);
461 if (hyphenated)
462 cur += hycost(1 + fmt_hydepth(f, i));
463 if (f->best_pos[pos] < 0 || cur < f->best[pos]) {
464 f->best_pos[pos] = i;
465 f->best_dep[pos] = f->best_dep[i] + 1;
466 f->best[pos] = cur;
468 i--;
470 return f->best[pos] + f->words[pos - 1].cost;
473 static int fmt_bestpos(struct fmt *f, int pos)
475 fmt_findcost(f, pos);
476 return MAX(0, f->best_pos[pos]);
479 static int fmt_bestdep(struct fmt *f, int pos)
481 fmt_findcost(f, pos);
482 return MAX(0, f->best_dep[pos]);
485 /* return the last filled word */
486 static int fmt_breakparagraph(struct fmt *f, int pos, int br)
488 int i;
489 int best = -1;
490 long cost, best_cost = 0;
491 int llen = FMT_LLEN(f);
492 int lwid = 0; /* current line length */
493 int swid = 0; /* amount of stretchable spaces */
494 int nspc = 0; /* number of stretchable spaces */
495 if (f->fillreq > 0 && f->fillreq <= f->words_n) {
496 fmt_findcost(f, f->fillreq);
497 return f->fillreq;
499 if (pos > 0 && f->words[pos - 1].wid >= llen) {
500 fmt_findcost(f, pos);
501 return pos;
503 i = pos - 1;
504 lwid = 0;
505 if (f->words[i].hy) /* the last word is hyphenated */
506 lwid += f->words[i].hy;
507 while (i >= 0) {
508 lwid += f->words[i].wid;
509 if (i + 1 < pos)
510 lwid += f->words[i + 1].gap;
511 if (i + 1 < pos && f->words[i + 1].str) {
512 swid += f->words[i + 1].gap;
513 nspc++;
515 if (lwid > llen && i + 1 < pos)
516 break;
517 cost = fmt_findcost(f, i);
518 /* the cost of formatting short lines; should prevent widows */
519 if (br && n_pmll && lwid < llen * n_pmll / 100) {
520 int pmll = llen * n_pmll / 100;
521 cost += (long) n_pmllcost * (pmll - lwid) / pmll;
523 if (best < 0 || cost < best_cost) {
524 best = i;
525 best_cost = cost;
527 i--;
529 return best;
532 /* extract the first nreq formatted lines before the word at pos */
533 static int fmt_head(struct fmt *f, int nreq, int pos)
535 int best = pos; /* best line break for nreq-th line */
536 int prev, next; /* best line breaks without hyphenation */
537 if (nreq <= 0 || fmt_bestdep(f, pos) < nreq)
538 return pos;
539 /* finding the optimal line break for nreq-th line */
540 while (best > 0 && fmt_bestdep(f, best) > nreq)
541 best = fmt_bestpos(f, best);
542 prev = best;
543 next = best;
544 /* finding closest line breaks without hyphenation */
545 while (prev > 1 && f->words[prev - 1].hy &&
546 fmt_bestdep(f, prev - 1) == nreq)
547 prev--;
548 while (next < pos && f->words[next - 1].hy &&
549 fmt_bestdep(f, next) == nreq)
550 next++;
551 /* choosing the best of them */
552 if (!f->words[prev - 1].hy && !f->words[next - 1].hy)
553 return fmt_findcost(f, prev) <= fmt_findcost(f, next) ? prev : next;
554 if (!f->words[prev - 1].hy)
555 return prev;
556 if (!f->words[next - 1].hy)
557 return next;
558 return best;
561 /* break f->words[0..end] into lines according to fmt_bestpos() */
562 static int fmt_break(struct fmt *f, int end)
564 int beg, ret = 0;
565 beg = fmt_bestpos(f, end);
566 if (beg > 0)
567 ret += fmt_break(f, beg);
568 f->words[beg].gap = 0;
569 if (fmt_extractline(f, beg, end, 1))
570 return ret;
571 if (beg > 0)
572 fmt_confupdate(f);
573 return ret + (end - beg);
576 /* estimated number of lines until traps or the end of a page */
577 static int fmt_safelines(void)
579 int lnht = MAX(1, n_L) * n_v;
580 return (f_nexttrap() + lnht - 1) / lnht;
583 /* fill the words collected in the buffer */
584 static int fmt_fillwords(struct fmt *f, int br)
586 int nreq; /* the number of lines until a trap */
587 int end; /* the final line ends before this word */
588 int end_head; /* like end, but only the first nreq lines included */
589 int head = 0; /* only nreq first lines have been formatted */
590 int llen; /* line length, taking shrinkable spaces into account */
591 int n, i;
592 if (!FMT_FILL(f))
593 return 0;
594 llen = fmt_wordslen(f, 0, f->words_n) -
595 fmt_spacessum(f, 0, f->words_n) * n_ssh / 100;
596 /* not enough words to fill */
597 if ((f->fillreq <= 0 || f->words_n < f->fillreq) && llen <= FMT_LLEN(f))
598 return 0;
599 nreq = (n_hy & HY_LAST) ? fmt_safelines() : 0;
600 if (nreq > 0 && nreq <= fmt_nlines(f))
601 return 1;
602 /* resetting positions */
603 f->best = malloc((f->words_n + 1) * sizeof(f->best[0]));
604 f->best_pos = malloc((f->words_n + 1) * sizeof(f->best_pos[0]));
605 f->best_dep = malloc((f->words_n + 1) * sizeof(f->best_dep[0]));
606 memset(f->best, 0, (f->words_n + 1) * sizeof(f->best[0]));
607 memset(f->best_dep, 0, (f->words_n + 1) * sizeof(f->best_dep[0]));
608 for (i = 0; i < f->words_n + 1; i++)
609 f->best_pos[i] = -1;
610 end = fmt_breakparagraph(f, f->words_n, br);
611 if (nreq > 0) {
612 end_head = fmt_head(f, nreq - fmt_nlines(f), end);
613 head = end_head < end;
614 end = end_head;
616 /* recursively add lines */
617 n = end > 0 ? fmt_break(f, end) : 0;
618 f->words_n -= n;
619 f->fillreq -= n;
620 fmt_movewords(f, 0, n, f->words_n);
621 f->filled = n && !f->words_n;
622 if (f->words_n)
623 f->words[0].gap = 0;
624 if (f->words_n) /* apply the new .l and .i */
625 fmt_confupdate(f);
626 free(f->best);
627 free(f->best_pos);
628 free(f->best_dep);
629 f->best = NULL;
630 f->best_pos = NULL;
631 f->best_dep = NULL;
632 return head || n != end;
635 struct fmt *fmt_alloc(void)
637 struct fmt *fmt = xmalloc(sizeof(*fmt));
638 memset(fmt, 0, sizeof(*fmt));
639 return fmt;
642 void fmt_free(struct fmt *fmt)
644 free(fmt->lines);
645 free(fmt->words);
646 free(fmt);
649 int fmt_wid(struct fmt *fmt)
651 return fmt_wordslen(fmt, 0, fmt->words_n) + fmt_wordgap(fmt);
654 int fmt_morewords(struct fmt *fmt)
656 return fmt_morelines(fmt) || fmt->words_n;
659 int fmt_morelines(struct fmt *fmt)
661 return fmt->lines_head != fmt->lines_tail;
664 /* suppress the last newline */
665 void fmt_suppressnl(struct fmt *fmt)
667 if (fmt->nls) {
668 fmt->nls--;
669 fmt->nls_sup = 1;