dir: support text direction with .>>, .<<, \> and \<
[neatroff.git] / fmt.c
blobfa7537be2b2f090c9cfe4ccab596c05f7a337e8e
1 /*
2 * line formatting buffer for line adjustment and hyphenation
4 * The line formatting buffer does two main functions: breaking
5 * words into lines (possibly after breaking them at their
6 * hyphenation points), and, if requested, adjusting the space
7 * between words in a line. In this file the first step is
8 * referred to as filling.
10 * Functions like fmt_word() return nonzero on failure, which
11 * means the call should be repeated after fetching previously
12 * formatted lines via fmt_nextline().
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include "roff.h"
19 #define FMT_LLEN(f) MAX(0, (f)->ll - (f)->li - (f)->lI)
20 #define FMT_FILL(f) (!n_ce && n_u)
21 #define FMT_ADJ(f) (n_u && !n_na && !n_ce && (n_j & AD_B) == AD_B)
23 static int fmt_fillwords(struct fmt *f, int br);
25 struct word {
26 char *s;
27 int wid; /* word's width */
28 int elsn, elsp; /* els_neg and els_pos */
29 int gap; /* the space before this word */
30 int hy; /* hyphen width if inserted after this word */
31 int str; /* does the space before it stretch */
32 int cost; /* the extra cost of line break after this word */
33 int swid; /* space width after this word (i.e., \w' ') */
36 struct line {
37 struct sbuf sbuf;
38 int wid, li, ll, lI;
39 int elsn, elsp;
42 struct fmt {
43 /* queued words */
44 struct word *words;
45 int words_n, words_sz;
46 /* queued lines */
47 struct line *lines;
48 int lines_head, lines_tail, lines_sz;
49 /* for paragraph adjustment */
50 long *best;
51 int *best_pos;
52 int *best_dep;
53 /* current line */
54 int gap; /* space before the next word */
55 int nls; /* newlines before the next word */
56 int nls_sup; /* suppressed newlines */
57 int li, ll, lI; /* current line indentation and length */
58 int filled; /* filled all words in the last fmt_fill() */
59 int eos; /* last word ends a sentence */
60 int fillreq; /* fill after the last word (\p) */
63 /* .ll, .in and .ti are delayed until the partial line is output */
64 static void fmt_confupdate(struct fmt *f)
66 f->ll = n_l;
67 f->li = n_ti >= 0 ? n_ti : n_i;
68 f->lI = n_tI >= 0 ? n_tI : n_I;
69 n_ti = -1;
70 n_tI = -1;
73 static int fmt_confchanged(struct fmt *f)
75 return f->ll != n_l || f->li != (n_ti >= 0 ? n_ti : n_i) ||
76 f->lI != (n_tI >= 0 ? n_tI : n_I);
79 /* move words inside an fmt struct */
80 static void fmt_movewords(struct fmt *a, int dst, int src, int len)
82 memmove(a->words + dst, a->words + src, len * sizeof(a->words[0]));
85 /* move words from the buffer to s */
86 static int fmt_wordscopy(struct fmt *f, int beg, int end,
87 struct sbuf *s, int *els_neg, int *els_pos)
89 struct word *wcur;
90 int w = 0;
91 int i;
92 *els_neg = 0;
93 *els_pos = 0;
94 for (i = beg; i < end; i++) {
95 wcur = &f->words[i];
96 sbuf_printf(s, "%ch'%du'", c_ec, wcur->gap);
97 sbuf_append(s, wcur->s);
98 w += wcur->wid + wcur->gap;
99 if (wcur->elsn < *els_neg)
100 *els_neg = wcur->elsn;
101 if (wcur->elsp > *els_pos)
102 *els_pos = wcur->elsp;
103 free(wcur->s);
105 if (beg < end) {
106 wcur = &f->words[end - 1];
107 if (wcur->hy)
108 sbuf_append(s, "\\(hy");
109 w += wcur->hy;
111 return w;
114 static int fmt_nlines(struct fmt *f)
116 return f->lines_head - f->lines_tail;
119 /* the total width of the specified words in f->words[] */
120 static int fmt_wordslen(struct fmt *f, int beg, int end)
122 int i, w = 0;
123 for (i = beg; i < end; i++)
124 w += f->words[i].wid + f->words[i].gap;
125 return beg < end ? w + f->words[end - 1].hy : 0;
128 /* the number of stretchable spaces in f */
129 static int fmt_spaces(struct fmt *f, int beg, int end)
131 int i, n = 0;
132 for (i = beg + 1; i < end; i++)
133 if (f->words[i].str)
134 n++;
135 return n;
138 /* the amount of stretchable spaces in f */
139 static int fmt_spacessum(struct fmt *f, int beg, int end)
141 int i, n = 0;
142 for (i = beg + 1; i < end; i++)
143 if (f->words[i].str)
144 n += f->words[i].gap;
145 return n;
148 /* return the next line in the buffer */
149 char *fmt_nextline(struct fmt *f, int *w,
150 int *li, int *lI, int *ll, int *els_neg, int *els_pos)
152 struct line *l;
153 if (f->lines_head == f->lines_tail)
154 return NULL;
155 l = &f->lines[f->lines_tail++];
156 *li = l->li;
157 *lI = l->lI;
158 *ll = l->ll;
159 *w = l->wid;
160 *els_neg = l->elsn;
161 *els_pos = l->elsp;
162 return sbuf_out(&l->sbuf);
165 static struct line *fmt_mkline(struct fmt *f)
167 struct line *l;
168 if (f->lines_head == f->lines_tail) {
169 f->lines_head = 0;
170 f->lines_tail = 0;
172 if (f->lines_head == f->lines_sz) {
173 f->lines_sz += 256;
174 f->lines = mextend(f->lines, f->lines_head,
175 f->lines_sz, sizeof(f->lines[0]));
177 l = &f->lines[f->lines_head++];
178 l->li = f->li;
179 l->lI = f->lI;
180 l->ll = f->ll;
181 sbuf_init(&l->sbuf);
182 return l;
185 /* extract words from beg to end; shrink or stretch spaces if needed */
186 static int fmt_extractline(struct fmt *f, int beg, int end, int str)
188 int fmt_div, fmt_rem;
189 int w, i, nspc, llen;
190 struct line *l;
191 if (!(l = fmt_mkline(f)))
192 return 1;
193 llen = FMT_LLEN(f);
194 w = fmt_wordslen(f, beg, end);
195 nspc = fmt_spaces(f, beg, end);
196 if (nspc && FMT_ADJ(f) && (llen < w || str)) {
197 fmt_div = (llen - w) / nspc;
198 fmt_rem = (llen - w) % nspc;
199 if (fmt_rem < 0) {
200 fmt_div--;
201 fmt_rem += nspc;
203 for (i = beg + 1; i < end; i++)
204 if (f->words[i].str)
205 f->words[i].gap += fmt_div + (fmt_rem-- > 0);
207 l->wid = fmt_wordscopy(f, beg, end, &l->sbuf, &l->elsn, &l->elsp);
208 return 0;
211 static int fmt_sp(struct fmt *f)
213 if (fmt_fillwords(f, 1))
214 return 1;
215 if (fmt_extractline(f, 0, f->words_n, 0))
216 return 1;
217 f->filled = 0;
218 f->nls--;
219 f->nls_sup = 0;
220 f->words_n = 0;
221 f->fillreq = 0;
222 return 0;
225 /* fill as many lines as possible; if br, put the remaining words in a line */
226 int fmt_fill(struct fmt *f, int br)
228 if (fmt_fillwords(f, br))
229 return 1;
230 if (br) {
231 f->filled = 0;
232 if (f->words_n)
233 if (fmt_sp(f))
234 return 1;
236 return 0;
239 void fmt_space(struct fmt *fmt)
241 fmt->gap += font_swid(dev_font(n_f), n_s, n_ss);
244 int fmt_newline(struct fmt *f)
246 f->gap = 0;
247 if (!FMT_FILL(f)) {
248 f->nls++;
249 fmt_sp(f);
250 return 0;
252 if (f->nls >= 1)
253 if (fmt_sp(f))
254 return 1;
255 if (f->nls == 0 && !f->filled && !f->words_n)
256 fmt_sp(f);
257 f->nls++;
258 return 0;
261 /* format the paragraph after the next word (\p) */
262 int fmt_fillreq(struct fmt *f)
264 if (f->fillreq > 0)
265 if (fmt_fillwords(f, 0))
266 return 1;
267 f->fillreq = f->words_n + 1;
268 return 0;
271 static void fmt_wb2word(struct fmt *f, struct word *word, struct wb *wb,
272 int hy, int str, int gap, int cost)
274 int len = strlen(wb_buf(wb));
275 word->s = xmalloc(len + 1);
276 memcpy(word->s, wb_buf(wb), len + 1);
277 word->wid = wb_wid(wb);
278 word->elsn = wb->els_neg;
279 word->elsp = wb->els_pos;
280 word->hy = hy ? wb_hywid(wb) : 0;
281 word->str = str;
282 word->gap = gap;
283 word->cost = cost;
284 word->swid = wb_swid(wb);
287 /* find explicit break positions: dashes, \:, \%, and \~ */
288 static int fmt_hyphmarks(char *word, int *hyidx, int *hyins, int *hygap)
290 char *s = word;
291 char *d = NULL;
292 int c, n = 0;
293 while ((c = escread(&s, &d)) > 0)
295 if (c < 0 || !strcmp(c_hc, d))
296 return -1;
297 while ((c = escread(&s, &d)) >= 0 && n < NHYPHSWORD) {
298 if (!c && !strcmp(c_hc, d)) {
299 hyins[n] = 1;
300 hyidx[n++] = s - word;
302 if (!c && c_hydash(d)) {
303 hyins[n] = 0;
304 hyidx[n++] = s - word;
306 if (!c && !strcmp(c_nb, d)) {
307 hygap[n] = 1;
308 hyidx[n++] = s - word;
311 return n;
314 static struct word *fmt_mkword(struct fmt *f)
316 if (f->words_n == f->words_sz) {
317 f->words_sz += 256;
318 f->words = mextend(f->words, f->words_n,
319 f->words_sz, sizeof(f->words[0]));
321 return &f->words[f->words_n++];
324 static void fmt_insertword(struct fmt *f, struct wb *wb, int gap)
326 int hyidx[NHYPHSWORD]; /* sub-word boundaries */
327 int hyins[NHYPHSWORD] = {0}; /* insert dash */
328 int hygap[NHYPHSWORD] = {0}; /* stretchable no-break space */
329 char *src = wb_buf(wb);
330 struct wb wbc;
331 char *beg;
332 char *end;
333 int n, i;
334 int cf, cs, cm, ccd;
335 n = fmt_hyphmarks(src, hyidx, hyins, hygap);
336 if (n <= 0) {
337 fmt_wb2word(f, fmt_mkword(f), wb, 0, 1, gap, wb_cost(wb));
338 return;
340 /* update f->fillreq considering the new sub-words */
341 if (f->fillreq == f->words_n + 1)
342 f->fillreq += n;
343 wb_init(&wbc);
344 /* add sub-words */
345 for (i = 0; i <= n; i++) {
346 int ihy = i < n && hyins[i]; /* dash width */
347 int istr = i == 0 || hygap[i - 1]; /* stretchable */
348 int igap; /* gap width */
349 int icost; /* hyphenation cost */
350 beg = src + (i > 0 ? hyidx[i - 1] : 0);
351 end = src + (i < n ? hyidx[i] : strlen(src));
352 if (i < n && hygap[i]) /* remove \~ */
353 end -= strlen(c_nb);
354 wb_catstr(&wbc, beg, end);
355 wb_fnszget(&wbc, &cf, &cs, &cm, &ccd);
356 icost = i == n ? wb_cost(&wbc) : hygap[i] * 10000000;
357 igap = i == 0 ? gap : hygap[i - 1] * wb_swid(&wbc);
358 fmt_wb2word(f, fmt_mkword(f), &wbc, ihy, istr, igap, icost);
359 wb_reset(&wbc);
360 wb_fnszset(&wbc, cf, cs, cm, ccd); /* restoring wbc */
362 wb_done(&wbc);
365 /* the amount of space necessary before the next word */
366 static int fmt_wordgap(struct fmt *f)
368 int nls = f->nls || f->nls_sup;
369 int swid = font_swid(dev_font(n_f), n_s, n_ss);
370 if (f->eos && f->words_n)
371 if ((nls && !f->gap) || (!nls && f->gap == 2 * swid))
372 return swid + font_swid(dev_font(n_f), n_s, n_sss);
373 return (nls && !f->gap && f->words_n) ? swid : f->gap;
376 /* insert wb into fmt */
377 int fmt_word(struct fmt *f, struct wb *wb)
379 if (wb_empty(wb))
380 return 0;
381 if (fmt_confchanged(f))
382 if (fmt_fillwords(f, 0))
383 return 1;
384 if (FMT_FILL(f) && f->nls && f->gap)
385 if (fmt_sp(f))
386 return 1;
387 if (!f->words_n) /* apply the new .l and .i */
388 fmt_confupdate(f);
389 f->gap = fmt_wordgap(f);
390 f->eos = wb_eos(wb);
391 fmt_insertword(f, wb, f->filled ? 0 : f->gap);
392 f->filled = 0;
393 f->nls = 0;
394 f->nls_sup = 0;
395 f->gap = 0;
396 return 0;
399 /* approximate 8 * sqrt(cost) */
400 static long scaledown(long cost)
402 long ret = 0;
403 int i;
404 for (i = 0; i < 14; i++)
405 ret += ((cost >> (i * 2)) & 3) << (i + 3);
406 return ret < (1 << 13) ? ret : (1 << 13);
409 /* the cost of putting lwid words in a line of length llen */
410 static long FMT_COST(int llen, int lwid, int swid, int nspc)
412 /* the ratio that the stretchable spaces of the line should be spread */
413 long ratio = abs((llen - lwid) * 100l / (swid ? swid : 1));
414 /* ratio too large; scaling it down */
415 if (ratio > 4000)
416 ratio = 4000 + scaledown(ratio - 4000);
417 /* assigning a cost of 100 to each space stretching 100 percent */
418 return ratio * ratio / 100l * (nspc ? nspc : 1);
421 /* the number of hyphenations in consecutive lines ending at pos */
422 static int fmt_hydepth(struct fmt *f, int pos)
424 int n = 0;
425 while (pos > 0 && f->words[pos - 1].hy && ++n < 5)
426 pos = f->best_pos[pos];
427 return n;
430 static long hycost(int depth)
432 if (n_hlm > 0 && depth > n_hlm)
433 return 10000000;
434 if (depth >= 3)
435 return n_hycost + n_hycost2 + n_hycost3;
436 if (depth == 2)
437 return n_hycost + n_hycost2;
438 return depth ? n_hycost : 0;
441 /* the cost of putting a line break before word pos */
442 static long fmt_findcost(struct fmt *f, int pos)
444 int i, hyphenated;
445 long cur;
446 int llen = MAX(1, FMT_LLEN(f));
447 int lwid = 0; /* current line length */
448 int swid = 0; /* amount of stretchable spaces */
449 int nspc = 0; /* number of stretchable spaces */
450 int dwid = 0; /* equal to swid, unless swid is zero */
451 if (pos <= 0)
452 return 0;
453 if (f->best_pos[pos] >= 0)
454 return f->best[pos] + f->words[pos - 1].cost;
455 lwid = f->words[pos - 1].hy; /* non-zero if the last word is hyphenated */
456 hyphenated = f->words[pos - 1].hy != 0;
457 i = pos - 1;
458 while (i >= 0) {
459 lwid += f->words[i].wid;
460 if (i + 1 < pos)
461 lwid += f->words[i + 1].gap;
462 if (i + 1 < pos && f->words[i + 1].str) {
463 swid += f->words[i + 1].gap;
464 nspc++;
466 if (lwid > llen + swid * n_ssh / 100 && i + 1 < pos)
467 break;
468 dwid = swid;
469 if (!dwid && i > 0) /* no stretchable spaces */
470 dwid = f->words[i - 1].swid;
471 cur = fmt_findcost(f, i) + FMT_COST(llen, lwid, dwid, nspc);
472 if (hyphenated)
473 cur += hycost(1 + fmt_hydepth(f, i));
474 if (f->best_pos[pos] < 0 || cur < f->best[pos]) {
475 f->best_pos[pos] = i;
476 f->best_dep[pos] = f->best_dep[i] + 1;
477 f->best[pos] = cur;
479 i--;
481 return f->best[pos] + f->words[pos - 1].cost;
484 static int fmt_bestpos(struct fmt *f, int pos)
486 fmt_findcost(f, pos);
487 return MAX(0, f->best_pos[pos]);
490 static int fmt_bestdep(struct fmt *f, int pos)
492 fmt_findcost(f, pos);
493 return MAX(0, f->best_dep[pos]);
496 /* return the last filled word */
497 static int fmt_breakparagraph(struct fmt *f, int pos, int br)
499 int i;
500 int best = -1;
501 long cost, best_cost = 0;
502 int llen = FMT_LLEN(f);
503 int lwid = 0; /* current line length */
504 int swid = 0; /* amount of stretchable spaces */
505 int nspc = 0; /* number of stretchable spaces */
506 if (f->fillreq > 0 && f->fillreq <= f->words_n) {
507 fmt_findcost(f, f->fillreq);
508 return f->fillreq;
510 if (pos > 0 && f->words[pos - 1].wid >= llen) {
511 fmt_findcost(f, pos);
512 return pos;
514 i = pos - 1;
515 lwid = 0;
516 if (f->words[i].hy) /* the last word is hyphenated */
517 lwid += f->words[i].hy;
518 while (i >= 0) {
519 lwid += f->words[i].wid;
520 if (i + 1 < pos)
521 lwid += f->words[i + 1].gap;
522 if (i + 1 < pos && f->words[i + 1].str) {
523 swid += f->words[i + 1].gap;
524 nspc++;
526 if (lwid > llen && i + 1 < pos)
527 break;
528 cost = fmt_findcost(f, i);
529 /* the cost of formatting short lines; should prevent widows */
530 if (br && n_pmll && lwid < llen * n_pmll / 100) {
531 int pmll = llen * n_pmll / 100;
532 cost += (long) n_pmllcost * (pmll - lwid) / pmll;
534 if (best < 0 || cost < best_cost) {
535 best = i;
536 best_cost = cost;
538 i--;
540 return best;
543 /* extract the first nreq formatted lines before the word at pos */
544 static int fmt_head(struct fmt *f, int nreq, int pos)
546 int best = pos; /* best line break for nreq-th line */
547 int prev, next; /* best line breaks without hyphenation */
548 if (nreq <= 0 || fmt_bestdep(f, pos) < nreq)
549 return pos;
550 /* finding the optimal line break for nreq-th line */
551 while (best > 0 && fmt_bestdep(f, best) > nreq)
552 best = fmt_bestpos(f, best);
553 prev = best;
554 next = best;
555 /* finding closest line breaks without hyphenation */
556 while (prev > 1 && f->words[prev - 1].hy &&
557 fmt_bestdep(f, prev - 1) == nreq)
558 prev--;
559 while (next < pos && f->words[next - 1].hy &&
560 fmt_bestdep(f, next) == nreq)
561 next++;
562 /* choosing the best of them */
563 if (!f->words[prev - 1].hy && !f->words[next - 1].hy)
564 return fmt_findcost(f, prev) <= fmt_findcost(f, next) ? prev : next;
565 if (!f->words[prev - 1].hy)
566 return prev;
567 if (!f->words[next - 1].hy)
568 return next;
569 return best;
572 /* break f->words[0..end] into lines according to fmt_bestpos() */
573 static int fmt_break(struct fmt *f, int end)
575 int beg, ret = 0;
576 beg = fmt_bestpos(f, end);
577 if (beg > 0)
578 ret += fmt_break(f, beg);
579 f->words[beg].gap = 0;
580 if (fmt_extractline(f, beg, end, 1))
581 return ret;
582 if (beg > 0)
583 fmt_confupdate(f);
584 return ret + (end - beg);
587 /* estimated number of lines until traps or the end of a page */
588 static int fmt_safelines(void)
590 int lnht = MAX(1, n_L) * n_v;
591 return (f_nexttrap() + lnht - 1) / lnht;
594 /* fill the words collected in the buffer */
595 static int fmt_fillwords(struct fmt *f, int br)
597 int nreq; /* the number of lines until a trap */
598 int end; /* the final line ends before this word */
599 int end_head; /* like end, but only the first nreq lines included */
600 int head = 0; /* only nreq first lines have been formatted */
601 int llen; /* line length, taking shrinkable spaces into account */
602 int n, i;
603 if (!FMT_FILL(f))
604 return 0;
605 llen = fmt_wordslen(f, 0, f->words_n) -
606 fmt_spacessum(f, 0, f->words_n) * n_ssh / 100;
607 /* not enough words to fill */
608 if ((f->fillreq <= 0 || f->words_n < f->fillreq) && llen <= FMT_LLEN(f))
609 return 0;
610 nreq = (n_hy & HY_LAST) ? fmt_safelines() : 0;
611 if (nreq > 0 && nreq <= fmt_nlines(f))
612 return 1;
613 /* resetting positions */
614 f->best = malloc((f->words_n + 1) * sizeof(f->best[0]));
615 f->best_pos = malloc((f->words_n + 1) * sizeof(f->best_pos[0]));
616 f->best_dep = malloc((f->words_n + 1) * sizeof(f->best_dep[0]));
617 memset(f->best, 0, (f->words_n + 1) * sizeof(f->best[0]));
618 memset(f->best_dep, 0, (f->words_n + 1) * sizeof(f->best_dep[0]));
619 for (i = 0; i < f->words_n + 1; i++)
620 f->best_pos[i] = -1;
621 end = fmt_breakparagraph(f, f->words_n, br);
622 if (nreq > 0) {
623 end_head = fmt_head(f, nreq - fmt_nlines(f), end);
624 head = end_head < end;
625 end = end_head;
627 /* recursively add lines */
628 n = end > 0 ? fmt_break(f, end) : 0;
629 f->words_n -= n;
630 f->fillreq -= n;
631 fmt_movewords(f, 0, n, f->words_n);
632 f->filled = n && !f->words_n;
633 if (f->words_n)
634 f->words[0].gap = 0;
635 if (f->words_n) /* apply the new .l and .i */
636 fmt_confupdate(f);
637 free(f->best);
638 free(f->best_pos);
639 free(f->best_dep);
640 f->best = NULL;
641 f->best_pos = NULL;
642 f->best_dep = NULL;
643 return head || n != end;
646 struct fmt *fmt_alloc(void)
648 struct fmt *fmt = xmalloc(sizeof(*fmt));
649 memset(fmt, 0, sizeof(*fmt));
650 return fmt;
653 void fmt_free(struct fmt *fmt)
655 free(fmt->lines);
656 free(fmt->words);
657 free(fmt);
660 int fmt_wid(struct fmt *fmt)
662 return fmt_wordslen(fmt, 0, fmt->words_n) + fmt_wordgap(fmt);
665 int fmt_morewords(struct fmt *fmt)
667 return fmt_morelines(fmt) || fmt->words_n;
670 int fmt_morelines(struct fmt *fmt)
672 return fmt->lines_head != fmt->lines_tail;
675 /* suppress the last newline */
676 void fmt_suppressnl(struct fmt *fmt)
678 if (fmt->nls) {
679 fmt->nls--;
680 fmt->nls_sup = 1;