fmt: \p escape sequences for hyphenated words
[neatroff.git] / fmt.c
blobbf71ceffd2959787998bfff3d8bd9e7a0bc85443
1 /*
2 * line formatting buffer for line adjustment and hyphenation
4 * The line formatting buffer does two main functions: breaking
5 * words into lines (possibly after breaking them at their
6 * hyphenation points), and, if requested, adjusting the space
7 * between words in a line. In this file the first step is
8 * referred to as filling.
10 * Functions like fmt_word() return nonzero on failure, which
11 * means the call should be repeated after fetching previously
12 * formatted lines via fmt_nextline().
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include "roff.h"
19 #define FMT_LLEN(f) MAX(0, (f)->ll - (f)->li)
20 #define FMT_FILL(f) (!n_ce && n_u)
21 #define FMT_ADJ(f) (n_u && !n_na && !n_ce && (n_j & AD_B) == AD_B)
23 static int fmt_fillwords(struct fmt *f, int br);
25 struct word {
26 char *s;
27 int wid; /* word's width */
28 int elsn, elsp; /* els_neg and els_pos */
29 int gap; /* the space before this word */
30 int hy; /* hyphen width if inserted after this word */
31 int str; /* does the space before it stretch */
34 struct line {
35 struct sbuf sbuf;
36 int wid, li, ll;
37 int elsn, elsp;
40 struct fmt {
41 /* queued words */
42 struct word words[NWORDS];
43 int nwords;
44 /* queued lines */
45 struct line lines[NLINES];
46 int l_head, l_tail;
47 /* for paragraph adjustment */
48 long best[NWORDS];
49 int best_pos[NWORDS];
50 int best_dep[NWORDS];
51 /* current line */
52 int gap; /* space before the next word */
53 int nls; /* newlines before the next word */
54 int nls_sup; /* suppressed newlines */
55 int li, ll; /* current line indentation and length */
56 int filled; /* filled all words in the last fmt_fill() */
57 int eos; /* last word ends a sentence */
58 int fillreq; /* fill after the last word (\p) */
61 /* .ll, .in and .ti are delayed until the partial line is output */
62 static void fmt_confupdate(struct fmt *f)
64 f->ll = n_l;
65 f->li = n_ti >= 0 ? n_ti : n_i;
66 n_ti = -1;
69 static int fmt_confchanged(struct fmt *f)
71 return f->ll != n_l || f->li != (n_ti >= 0 ? n_ti : n_i);
74 /* move words inside an fmt struct */
75 static void fmt_movewords(struct fmt *a, int dst, int src, int len)
77 memmove(a->words + dst, a->words + src, len * sizeof(a->words[0]));
80 /* move words from the buffer to s */
81 static int fmt_wordscopy(struct fmt *f, int beg, int end,
82 struct sbuf *s, int *els_neg, int *els_pos)
84 struct word *wcur;
85 int w = 0;
86 int i;
87 *els_neg = 0;
88 *els_pos = 0;
89 for (i = beg; i < end; i++) {
90 wcur = &f->words[i];
91 sbuf_printf(s, "%ch'%du'", c_ec, wcur->gap);
92 sbuf_append(s, wcur->s);
93 w += wcur->wid + wcur->gap;
94 if (wcur->elsn < *els_neg)
95 *els_neg = wcur->elsn;
96 if (wcur->elsp > *els_pos)
97 *els_pos = wcur->elsp;
98 free(wcur->s);
100 if (beg < end) {
101 wcur = &f->words[end - 1];
102 if (wcur->hy)
103 sbuf_append(s, "\\(hy");
104 w += wcur->hy;
106 return w;
109 static int fmt_nlines(struct fmt *f)
111 if (f->l_tail <= f->l_head)
112 return f->l_head - f->l_tail;
113 return NLINES - f->l_tail + f->l_head;
116 /* the total width of the specified words in f->words[] */
117 static int fmt_wordslen(struct fmt *f, int beg, int end)
119 int i, w = 0;
120 for (i = beg; i < end; i++)
121 w += f->words[i].wid + f->words[i].gap;
122 return beg < end ? w + f->words[end - 1].hy : 0;
125 /* the number of stretchable spaces in f */
126 static int fmt_spaces(struct fmt *f, int beg, int end)
128 int i, n = 0;
129 for (i = beg + 1; i < end; i++)
130 if (f->words[i].str)
131 n++;
132 return n;
135 /* the amount of stretchable spaces in f */
136 static int fmt_spacessum(struct fmt *f, int beg, int end)
138 int i, n = 0;
139 for (i = beg + 1; i < end; i++)
140 if (f->words[i].str)
141 n += f->words[i].gap;
142 return n;
145 /* return the next line in the buffer */
146 int fmt_nextline(struct fmt *f, struct sbuf *sbuf, int *w,
147 int *li, int *ll, int *els_neg, int *els_pos)
149 struct line *l;
150 l = &f->lines[f->l_tail];
151 if (f->l_head == f->l_tail)
152 return 1;
153 *li = l->li;
154 *ll = l->ll;
155 *w = l->wid;
156 *els_neg = l->elsn;
157 *els_pos = l->elsp;
158 sbuf_append(sbuf, sbuf_buf(&l->sbuf));
159 sbuf_done(&l->sbuf);
160 f->l_tail = (f->l_tail + 1) % NLINES;
161 return 0;
164 static struct line *fmt_mkline(struct fmt *f)
166 struct line *l = &f->lines[f->l_head];
167 if ((f->l_head + 1) % NLINES == f->l_tail)
168 return NULL;
169 f->l_head = (f->l_head + 1) % NLINES;
170 l->li = f->li;
171 l->ll = f->ll;
172 sbuf_init(&l->sbuf);
173 return l;
176 static int fmt_extractline(struct fmt *f, int beg, int end, int llen, int spread)
178 int fmt_div, fmt_rem;
179 int w, i, nspc;
180 struct line *l;
181 if (!(l = fmt_mkline(f)))
182 return 1;
183 w = fmt_wordslen(f, beg, end);
184 nspc = fmt_spaces(f, beg, end);
185 /* stretch if (spread & 1) and shrink if (spread & 2) */
186 if (nspc && ((spread & 1 && w < llen) || (spread & 2 && w > llen))) {
187 fmt_div = (llen - w) / nspc;
188 fmt_rem = (llen - w) % nspc;
189 if (fmt_rem < 0) {
190 fmt_div--;
191 fmt_rem += nspc;
193 for (i = beg + 1; i < end; i++)
194 if (f->words[i].str)
195 f->words[i].gap += fmt_div + (fmt_rem-- > 0);
197 l->wid = fmt_wordscopy(f, beg, end, &l->sbuf, &l->elsn, &l->elsp);
198 return 0;
201 static int fmt_sp(struct fmt *f)
203 if (fmt_fillwords(f, 1))
204 return 1;
205 if (fmt_extractline(f, 0, f->nwords, FMT_LLEN(f) * n_pmll / 100,
206 FMT_ADJ(f) && n_j & AD_P))
207 return 1;
208 f->filled = 0;
209 f->nls--;
210 f->nls_sup = 0;
211 f->nwords = 0;
212 f->fillreq = 0;
213 return 0;
216 /* fill as many lines as possible; if br, put the remaining words in a line */
217 int fmt_fill(struct fmt *f, int br)
219 if (fmt_fillwords(f, br))
220 return 1;
221 if (br) {
222 f->filled = 0;
223 if (f->nwords)
224 if (fmt_sp(f))
225 return 1;
227 return 0;
230 void fmt_space(struct fmt *fmt)
232 fmt->gap += font_swid(dev_font(n_f), n_s, n_ss);
235 int fmt_newline(struct fmt *f)
237 f->gap = 0;
238 if (!FMT_FILL(f)) {
239 f->nls++;
240 fmt_sp(f);
241 return 0;
243 if (f->nls >= 1)
244 if (fmt_sp(f))
245 return 1;
246 if (f->nls == 0 && !f->filled && !f->nwords)
247 fmt_sp(f);
248 f->nls++;
249 return 0;
252 /* format the paragraph after the next word (\p) */
253 int fmt_fillreq(struct fmt *f)
255 if (f->fillreq > 0)
256 if (fmt_fillwords(f, 0))
257 return 1;
258 f->fillreq = f->nwords + 1;
259 return 0;
262 static void fmt_wb2word(struct fmt *f, struct word *word, struct wb *wb,
263 int hy, int str, int gap)
265 int len = strlen(wb_buf(wb));
266 word->s = xmalloc(len + 1);
267 memcpy(word->s, wb_buf(wb), len + 1);
268 word->wid = wb_wid(wb);
269 word->elsn = wb->els_neg;
270 word->elsp = wb->els_pos;
271 word->hy = hy ? wb_dashwid(wb) : 0;
272 word->str = str;
273 word->gap = gap;
276 /* find explicit hyphenation positions: dashes, \: and \% */
277 static int fmt_hyphmarks(char *word, int *hyidx, int *hyins)
279 char d[ILNLEN];
280 char *s = word;
281 int c, n = 0;
282 while ((c = escread(&s, d)) > 0)
284 if (c < 0 || !strcmp(c_hc, d))
285 return -1;
286 while ((c = escread(&s, d)) >= 0 && n < NHYPHSWORD) {
287 if (!c && !strcmp(c_hc, d)) {
288 hyins[n] = 1;
289 hyidx[n++] = s - word;
291 if (!c && (!strcmp(c_bp, d) || c_isdash(d))) {
292 hyins[n] = 0;
293 hyidx[n++] = s - word;
296 return n;
299 static void fmt_insertword(struct fmt *f, struct wb *wb, int gap)
301 int hyidx[NHYPHSWORD];
302 int hyins[NHYPHSWORD] = {0};
303 char *src = wb_buf(wb);
304 struct wb wbc;
305 char *beg;
306 char *end;
307 int n, i;
308 int cf, cs, cm;
309 n = fmt_hyphmarks(src, hyidx, hyins);
310 if (n <= 0) {
311 fmt_wb2word(f, &f->words[f->nwords++], wb, 0, 1, gap);
312 return;
314 /* update f->fillreq considering the new sub-words */
315 if (f->fillreq == f->nwords + 1)
316 f->fillreq += n;
317 wb_init(&wbc);
318 for (i = 0; i <= n; i++) {
319 beg = src + (i > 0 ? hyidx[i - 1] : 0);
320 end = src + (i < n ? hyidx[i] : strlen(src));
321 wb_catstr(&wbc, beg, end);
322 fmt_wb2word(f, &f->words[f->nwords++], &wbc,
323 i < n && hyins[i], i == 0, i == 0 ? gap : 0);
324 /* restoring wbc */
325 wb_fnszget(&wbc, &cs, &cf, &cm);
326 wb_reset(&wbc);
327 wb_fnszset(&wbc, cs, cf, cm);
329 wb_done(&wbc);
332 /* the amount of space necessary before the next word */
333 static int fmt_wordgap(struct fmt *f)
335 int nls = f->nls || f->nls_sup;
336 int swid = font_swid(dev_font(n_f), n_s, n_ss);
337 if (f->eos && f->nwords)
338 if ((nls && !f->gap) || (!nls && f->gap == 2 * swid))
339 return swid + font_swid(dev_font(n_f), n_s, n_sss);
340 return (nls && !f->gap && f->nwords) ? swid : f->gap;
343 /* insert wb into fmt */
344 int fmt_word(struct fmt *f, struct wb *wb)
346 if (wb_empty(wb))
347 return 0;
348 if (f->nwords + NHYPHSWORD >= NWORDS || fmt_confchanged(f))
349 if (fmt_fillwords(f, 0))
350 return 1;
351 if (FMT_FILL(f) && f->nls && f->gap)
352 if (fmt_sp(f))
353 return 1;
354 if (!f->nwords) /* apply the new .l and .i */
355 fmt_confupdate(f);
356 f->gap = fmt_wordgap(f);
357 f->eos = wb_eos(wb);
358 fmt_insertword(f, wb, f->filled ? 0 : f->gap);
359 f->filled = 0;
360 f->nls = 0;
361 f->nls_sup = 0;
362 f->gap = 0;
363 return 0;
366 /* approximate log2(cost) */
367 static long scaledown(long cost)
369 long ret = 0;
370 int i;
371 for (i = 0; i < 14; i++)
372 ret += ((cost >> (i * 2)) & 3) << (i + 3);
373 return ret < (1 << 13) ? ret : (1 << 13);
376 /* the cost of putting lwid words in a line of length llen */
377 static long FMT_COST(int llen, int lwid, int swid, int nspc)
379 /* the ratio that the stretchable spaces of the line should be spread */
380 long ratio = abs((llen - lwid) * 100l / (swid ? swid : 1));
381 /* ratio too large; scaling it down */
382 if (ratio > 4000)
383 ratio = 4000 + scaledown(ratio - 4000);
384 /* assigning a cost of 100 to each space stretching 100 percent */
385 return ratio * ratio / 100l * (nspc ? nspc : 1);
388 /* the cost of formatting last lines; should prevent widows */
389 static long FMT_LCOST(int llen, int lwid, int swid, int nspc)
391 if (!n_pmll || lwid >= llen * n_pmll / 100)
392 return 0;
393 return FMT_COST(llen * n_pmll / 100, lwid, swid, nspc);
396 /* the cost of putting a line break before word pos */
397 static long fmt_findcost(struct fmt *f, int pos)
399 int i, pen = 0;
400 long cur;
401 int llen = MAX(1, FMT_LLEN(f));
402 int lwid = 0; /* current line length */
403 int swid = 0; /* amount of stretchable spaces */
404 int nspc = 0; /* number of stretchable spaces */
405 if (pos <= 0)
406 return 0;
407 if (f->best_pos[pos] >= 0)
408 return f->best[pos];
409 i = pos - 1;
410 lwid = 0;
411 if (f->words[i].hy) /* the last word is hyphenated */
412 lwid += f->words[i].hy;
413 if (f->words[i].hy)
414 pen = n_hyp;
415 while (i >= 0) {
416 lwid += f->words[i].wid;
417 if (i + 1 < pos)
418 lwid += f->words[i + 1].gap;
419 if (i + 1 < pos && f->words[i + 1].str) {
420 swid += f->words[i + 1].gap;
421 nspc++;
423 if (lwid - (swid * n_ssh / 100) > llen)
424 if (pos - i > 1)
425 break;
426 cur = fmt_findcost(f, i) + FMT_COST(llen, lwid, swid, nspc) +
427 pen * (nspc ? nspc : 1);
428 if (f->best_pos[pos] < 0 || cur < f->best[pos]) {
429 f->best_pos[pos] = i;
430 f->best_dep[pos] = f->best_dep[i] + 1;
431 f->best[pos] = cur;
433 i--;
435 return f->best[pos];
438 static int fmt_bestpos(struct fmt *f, int pos)
440 fmt_findcost(f, pos);
441 return MAX(0, f->best_pos[pos]);
444 static int fmt_bestdep(struct fmt *f, int pos)
446 fmt_findcost(f, pos);
447 return MAX(0, f->best_dep[pos]);
450 /* return the last filled word */
451 static int fmt_breakparagraph(struct fmt *f, int pos, int br)
453 int i;
454 int best = -1;
455 long cost, best_cost = 0;
456 int llen = FMT_LLEN(f);
457 int lwid = 0; /* current line length */
458 int swid = 0; /* amount of stretchable spaces */
459 int nspc = 0; /* number of stretchable spaces */
460 if (f->fillreq > 0 && f->fillreq <= f->nwords) {
461 fmt_findcost(f, f->fillreq);
462 return f->fillreq;
464 if (pos > 0 && f->words[pos - 1].wid >= llen) {
465 fmt_findcost(f, pos);
466 return pos;
468 i = pos - 1;
469 lwid = 0;
470 if (f->words[i].hy) /* the last word is hyphenated */
471 lwid += f->words[i].hy;
472 while (i >= 0) {
473 lwid += f->words[i].wid;
474 if (i + 1 < pos)
475 lwid += f->words[i + 1].gap;
476 if (i + 1 < pos && f->words[i + 1].str) {
477 swid += f->words[i + 1].gap;
478 nspc++;
480 if (lwid > llen && i + 1 < pos)
481 break;
482 cost = fmt_findcost(f, i) +
483 (br ? FMT_LCOST(llen, lwid, swid, nspc) : 0);
484 if (best < 0 || cost < best_cost) {
485 best = i;
486 best_cost = cost;
488 i--;
490 return best;
493 /* extract the first nreq formatted lines before the word at pos */
494 static int fmt_head(struct fmt *f, int nreq, int pos)
496 int best = pos; /* best line break for nreq-th line */
497 int prev, next; /* best line breaks without hyphenation */
498 if (nreq <= 0 || fmt_bestdep(f, pos) < nreq)
499 return pos;
500 /* finding the optimal line break for nreq-th line */
501 while (best > 0 && fmt_bestdep(f, best) > nreq)
502 best = fmt_bestpos(f, best);
503 prev = best;
504 next = best;
505 /* finding closest line breaks without hyphenation */
506 while (prev > 1 && f->words[prev - 1].hy &&
507 fmt_bestdep(f, prev - 1) == nreq)
508 prev--;
509 while (next < pos && f->words[next - 1].hy &&
510 fmt_bestdep(f, next) == nreq)
511 next++;
512 /* choosing the best of them */
513 if (!f->words[prev - 1].hy && !f->words[next - 1].hy)
514 return fmt_findcost(f, prev) <= fmt_findcost(f, next) ? prev : next;
515 if (!f->words[prev - 1].hy)
516 return prev;
517 if (!f->words[next - 1].hy)
518 return next;
519 return best;
522 /* break f->words[0..end] into lines according to fmt_bestpos() */
523 static int fmt_break(struct fmt *f, int end)
525 int beg, ret = 0;
526 beg = fmt_bestpos(f, end);
527 if (beg > 0)
528 ret += fmt_break(f, beg);
529 f->words[beg].gap = 0;
530 if (fmt_extractline(f, beg, end, FMT_LLEN(f), FMT_ADJ(f) ? 3 : 0))
531 return ret;
532 if (beg > 0)
533 fmt_confupdate(f);
534 return ret + (end - beg);
537 /* estimated number of lines until traps or the end of a page */
538 static int fmt_safelines(void)
540 int lnht = MAX(1, n_L) * n_v;
541 return (f_nexttrap() + lnht - 1) / lnht;
544 /* fill the words collected in the buffer */
545 static int fmt_fillwords(struct fmt *f, int br)
547 int nreq; /* the number of lines until a trap */
548 int end; /* the final line ends before this word */
549 int end_head; /* like end, but only the first nreq lines included */
550 int head = 0; /* only nreq first lines have been formatted */
551 int llen; /* line length, taking shrinkable spaces into account */
552 int n, i;
553 if (!FMT_FILL(f))
554 return 0;
555 llen = fmt_wordslen(f, 0, f->nwords) -
556 fmt_spacessum(f, 0, f->nwords) * n_ssh / 100;
557 /* not enough words to fill */
558 if ((f->fillreq <= 0 || f->nwords < f->fillreq) && llen <= FMT_LLEN(f))
559 return 0;
560 nreq = (n_hy & HY_LAST) ? fmt_safelines() : 0;
561 if (nreq > 0 && nreq <= fmt_nlines(f))
562 return 1;
563 /* resetting positions */
564 for (i = 0; i < f->nwords + 1; i++)
565 f->best_pos[i] = -1;
566 end = fmt_breakparagraph(f, f->nwords, br);
567 if (nreq > 0) {
568 end_head = fmt_head(f, nreq - fmt_nlines(f), end);
569 head = end_head < end;
570 end = end_head;
572 /* recursively add lines */
573 n = fmt_break(f, end);
574 f->nwords -= n;
575 f->fillreq -= n;
576 fmt_movewords(f, 0, n, f->nwords);
577 f->filled = n && !f->nwords;
578 if (f->nwords)
579 f->words[0].gap = 0;
580 if (f->nwords) /* apply the new .l and .i */
581 fmt_confupdate(f);
582 return head || n != end;
585 struct fmt *fmt_alloc(void)
587 struct fmt *fmt = xmalloc(sizeof(*fmt));
588 memset(fmt, 0, sizeof(*fmt));
589 return fmt;
592 void fmt_free(struct fmt *fmt)
594 free(fmt);
597 int fmt_wid(struct fmt *fmt)
599 return fmt_wordslen(fmt, 0, fmt->nwords) + fmt_wordgap(fmt);
602 int fmt_morewords(struct fmt *fmt)
604 return fmt_morelines(fmt) || fmt->nwords;
607 int fmt_morelines(struct fmt *fmt)
609 return fmt->l_head != fmt->l_tail;
612 /* suppress the last newline */
613 void fmt_suppressnl(struct fmt *fmt)
615 if (fmt->nls) {
616 fmt->nls--;
617 fmt->nls_sup = 1;