fmt: consider the number of spaces while filling paragraphs
[neatroff.git] / fmt.c
blobf3bde683b743ee167755689626dbd04cb0bc395f
1 /*
2 * line formatting buffer for line adjustment and hyphenation
4 * The line formatting buffer does two main functions: breaking
5 * words into lines (possibly after breaking them at their
6 * hyphenation points), and, if requested, adjusting the space
7 * between words in a line. In this file the first step is
8 * referred to as filling.
10 * Functions like fmt_word() return nonzero on failure, which
11 * means the call should be repeated after fetching previously
12 * formatted lines via fmt_nextline().
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include "roff.h"
19 #define FMT_LLEN(f) MAX(0, (f)->ll - (f)->li)
20 #define FMT_FILL(f) (!n_ce && n_u)
21 #define FMT_ADJ(f) (n_u && !n_na && !n_ce && (n_j & AD_B) == AD_B)
23 static int fmt_fillwords(struct fmt *f, int br);
25 struct word {
26 char *s;
27 int wid; /* word's width */
28 int elsn, elsp; /* els_neg and els_pos */
29 int gap; /* the space before this word */
30 int hy; /* hyphen width if inserted after this word */
31 int str; /* does the space before it stretch */
34 struct line {
35 struct sbuf sbuf;
36 int wid, li, ll;
37 int elsn, elsp;
40 struct fmt {
41 /* queued words */
42 struct word words[NWORDS];
43 int nwords;
44 /* queued lines */
45 struct line lines[NLINES];
46 int l_head, l_tail;
47 /* for paragraph adjustment */
48 long best[NWORDS];
49 int best_pos[NWORDS];
50 int best_dep[NWORDS];
51 /* current line */
52 int gap; /* space before the next word */
53 int nls; /* newlines before the next word */
54 int nls_sup; /* suppressed newlines */
55 int li, ll; /* current line indentation and length */
56 int filled; /* filled all words in the last fmt_fill() */
57 int eos; /* last word ends a sentence */
58 int fillreq; /* fill after the last word (\p) */
61 /* .ll, .in and .ti are delayed until the partial line is output */
62 static void fmt_confupdate(struct fmt *f)
64 f->ll = n_l;
65 f->li = n_ti >= 0 ? n_ti : n_i;
66 n_ti = -1;
69 static int fmt_confchanged(struct fmt *f)
71 return f->ll != n_l || f->li != (n_ti >= 0 ? n_ti : n_i);
74 /* move words inside an fmt struct */
75 static void fmt_movewords(struct fmt *a, int dst, int src, int len)
77 memmove(a->words + dst, a->words + src, len * sizeof(a->words[0]));
80 /* move words from the buffer to s */
81 static int fmt_wordscopy(struct fmt *f, int beg, int end,
82 struct sbuf *s, int *els_neg, int *els_pos)
84 struct word *wcur;
85 int w = 0;
86 int i;
87 *els_neg = 0;
88 *els_pos = 0;
89 for (i = beg; i < end; i++) {
90 wcur = &f->words[i];
91 sbuf_printf(s, "%ch'%du'", c_ec, wcur->gap);
92 sbuf_append(s, wcur->s);
93 w += wcur->wid + wcur->gap;
94 if (wcur->elsn < *els_neg)
95 *els_neg = wcur->elsn;
96 if (wcur->elsp > *els_pos)
97 *els_pos = wcur->elsp;
98 free(wcur->s);
100 if (beg < end) {
101 wcur = &f->words[end - 1];
102 if (wcur->hy)
103 sbuf_append(s, "\\(hy");
104 w += wcur->hy;
106 return w;
109 static int fmt_nlines(struct fmt *f)
111 if (f->l_tail <= f->l_head)
112 return f->l_head - f->l_tail;
113 return NLINES - f->l_tail + f->l_head;
116 /* the total width of the specified words in f->words[] */
117 static int fmt_wordslen(struct fmt *f, int beg, int end)
119 int i, w = 0;
120 for (i = beg; i < end; i++)
121 w += f->words[i].wid + f->words[i].gap;
122 return beg < end ? w + f->words[end - 1].hy : 0;
125 /* the number of stretchable spaces in f */
126 static int fmt_spaces(struct fmt *f, int beg, int end)
128 int i, n = 0;
129 for (i = beg + 1; i < end; i++)
130 if (f->words[i].str)
131 n++;
132 return n;
135 /* the amount of stretchable spaces in f */
136 static int fmt_spacessum(struct fmt *f, int beg, int end)
138 int i, n = 0;
139 for (i = beg + 1; i < end; i++)
140 if (f->words[i].str)
141 n += f->words[i].gap;
142 return n;
145 /* return the next line in the buffer */
146 int fmt_nextline(struct fmt *f, struct sbuf *sbuf, int *w,
147 int *li, int *ll, int *els_neg, int *els_pos)
149 struct line *l;
150 l = &f->lines[f->l_tail];
151 if (f->l_head == f->l_tail)
152 return 1;
153 *li = l->li;
154 *ll = l->ll;
155 *w = l->wid;
156 *els_neg = l->elsn;
157 *els_pos = l->elsp;
158 sbuf_append(sbuf, sbuf_buf(&l->sbuf));
159 sbuf_done(&l->sbuf);
160 f->l_tail = (f->l_tail + 1) % NLINES;
161 return 0;
164 static struct line *fmt_mkline(struct fmt *f)
166 struct line *l = &f->lines[f->l_head];
167 if ((f->l_head + 1) % NLINES == f->l_tail)
168 return NULL;
169 f->l_head = (f->l_head + 1) % NLINES;
170 l->li = f->li;
171 l->ll = f->ll;
172 sbuf_init(&l->sbuf);
173 return l;
176 static int fmt_extractline(struct fmt *f, int beg, int end, int llen, int spread)
178 int fmt_div, fmt_rem;
179 int w, i, nspc;
180 struct line *l;
181 if (!(l = fmt_mkline(f)))
182 return 1;
183 w = fmt_wordslen(f, beg, end);
184 nspc = fmt_spaces(f, beg, end);
185 /* stretch if (spread & 1) and shrink if (spread & 2) */
186 if (nspc && ((spread & 1 && w < llen) || (spread & 2 && w > llen))) {
187 fmt_div = (llen - w) / nspc;
188 fmt_rem = (llen - w) % nspc;
189 if (fmt_rem < 0) {
190 fmt_div--;
191 fmt_rem += nspc;
193 for (i = beg + 1; i < end; i++)
194 if (f->words[i].str)
195 f->words[i].gap += fmt_div + (fmt_rem-- > 0);
197 l->wid = fmt_wordscopy(f, beg, end, &l->sbuf, &l->elsn, &l->elsp);
198 return 0;
201 static int fmt_sp(struct fmt *f)
203 if (fmt_fillwords(f, 1))
204 return 1;
205 if (fmt_extractline(f, 0, f->nwords, FMT_LLEN(f) * n_pmll / 100,
206 FMT_ADJ(f) && n_j & AD_P))
207 return 1;
208 f->filled = 0;
209 f->nls--;
210 f->nls_sup = 0;
211 f->nwords = 0;
212 f->fillreq = 0;
213 return 0;
216 /* fill as many lines as possible; if br, put the remaining words in a line */
217 int fmt_fill(struct fmt *f, int br)
219 if (fmt_fillwords(f, br))
220 return 1;
221 if (br) {
222 f->filled = 0;
223 if (f->nwords)
224 if (fmt_sp(f))
225 return 1;
227 return 0;
230 void fmt_space(struct fmt *fmt)
232 fmt->gap += font_swid(dev_font(n_f), n_s, n_ss);
235 int fmt_newline(struct fmt *f)
237 f->gap = 0;
238 if (!FMT_FILL(f)) {
239 f->nls++;
240 fmt_sp(f);
241 return 0;
243 if (f->nls >= 1)
244 if (fmt_sp(f))
245 return 1;
246 if (f->nls == 0 && !f->filled && !f->nwords)
247 fmt_sp(f);
248 f->nls++;
249 return 0;
252 /* format the paragraph after the next word (\p) */
253 int fmt_fillreq(struct fmt *f)
255 if (f->fillreq > 0)
256 if (fmt_fillwords(f, 0))
257 return 1;
258 f->fillreq = f->nwords + 1;
259 return 0;
262 static void fmt_wb2word(struct fmt *f, struct word *word, struct wb *wb,
263 int hy, int str, int gap)
265 int len = strlen(wb_buf(wb));
266 word->s = xmalloc(len + 1);
267 memcpy(word->s, wb_buf(wb), len + 1);
268 word->wid = wb_wid(wb);
269 word->elsn = wb->els_neg;
270 word->elsp = wb->els_pos;
271 word->hy = hy ? wb_dashwid(wb) : 0;
272 word->str = str;
273 word->gap = gap;
276 /* find explicit hyphenation positions: dashes, \: and \% */
277 static int fmt_hyphmarks(char *word, int *hyidx, int *hyins)
279 char d[ILNLEN];
280 char *s = word;
281 int c, n = 0;
282 while ((c = escread(&s, d)) > 0)
284 if (c < 0 || !strcmp(c_hc, d))
285 return -1;
286 while ((c = escread(&s, d)) >= 0 && n < NHYPHSWORD) {
287 if (!c && !strcmp(c_hc, d)) {
288 hyins[n] = 1;
289 hyidx[n++] = s - word;
291 if (!c && (!strcmp(c_bp, d) || c_isdash(d))) {
292 hyins[n] = 0;
293 hyidx[n++] = s - word;
296 return n;
299 static void fmt_insertword(struct fmt *f, struct wb *wb, int gap)
301 int hyidx[NHYPHSWORD];
302 int hyins[NHYPHSWORD] = {0};
303 char *src = wb_buf(wb);
304 struct wb wbc;
305 char *beg;
306 char *end;
307 int n, i;
308 int cf, cs, cm;
309 n = fmt_hyphmarks(src, hyidx, hyins);
310 if (n <= 0) {
311 fmt_wb2word(f, &f->words[f->nwords++], wb, 0, 1, gap);
312 return;
314 wb_init(&wbc);
315 for (i = 0; i <= n; i++) {
316 beg = src + (i > 0 ? hyidx[i - 1] : 0);
317 end = src + (i < n ? hyidx[i] : strlen(src));
318 wb_catstr(&wbc, beg, end);
319 fmt_wb2word(f, &f->words[f->nwords++], &wbc,
320 i < n && hyins[i], i == 0, i == 0 ? gap : 0);
321 /* restoring wbc */
322 wb_fnszget(&wbc, &cs, &cf, &cm);
323 wb_reset(&wbc);
324 wb_fnszset(&wbc, cs, cf, cm);
326 wb_done(&wbc);
329 /* the amount of space necessary before the next word */
330 static int fmt_wordgap(struct fmt *f)
332 int nls = f->nls || f->nls_sup;
333 int swid = font_swid(dev_font(n_f), n_s, n_ss);
334 if (f->eos && f->nwords)
335 if ((nls && !f->gap) || (!nls && f->gap == 2 * swid))
336 return swid + font_swid(dev_font(n_f), n_s, n_sss);
337 return (nls && !f->gap && f->nwords) ? swid : f->gap;
340 /* insert wb into fmt */
341 int fmt_word(struct fmt *f, struct wb *wb)
343 if (wb_empty(wb))
344 return 0;
345 if (f->nwords + NHYPHSWORD >= NWORDS || fmt_confchanged(f))
346 if (fmt_fillwords(f, 0))
347 return 1;
348 if (FMT_FILL(f) && f->nls && f->gap)
349 if (fmt_sp(f))
350 return 1;
351 if (!f->nwords) /* apply the new .l and .i */
352 fmt_confupdate(f);
353 f->gap = fmt_wordgap(f);
354 f->eos = wb_eos(wb);
355 fmt_insertword(f, wb, f->filled ? 0 : f->gap);
356 f->filled = 0;
357 f->nls = 0;
358 f->nls_sup = 0;
359 f->gap = 0;
360 return 0;
363 /* the cost of putting lwid words in a line of length llen */
364 static long FMT_COST(int llen, int lwid, int swid, int nspc)
366 /* the ratio that the stretchable spaces of the line should be spread */
367 long ratio = abs((llen - lwid) * 100l / (swid ? swid : 1));
368 /* assigning a cost of 100 to each space stretching 100 percent */
369 return (ratio < 4000 ? ratio * ratio : 16000000) / 100l * (nspc ? nspc : 4);
372 /* the cost of formatting last lines; should prevent widows */
373 static long FMT_LCOST(int llen, int lwid, int swid, int nspc)
375 if (!n_pmll || lwid >= llen * n_pmll / 100)
376 return 0;
377 return FMT_COST(llen, llen * (100 - n_pmll) / 100 + lwid, swid, nspc);
380 /* the cost of putting a line break before word pos */
381 static long fmt_findcost(struct fmt *f, int pos)
383 int i, pen = 0;
384 long cur;
385 int llen = MAX(1, FMT_LLEN(f));
386 int lwid = 0; /* current line length */
387 int swid = 0; /* amount of stretchable spaces */
388 int nspc = 0; /* number of stretchable spaces */
389 if (pos <= 0)
390 return 0;
391 if (f->best_pos[pos] >= 0)
392 return f->best[pos];
393 i = pos - 1;
394 lwid = 0;
395 if (f->words[i].hy) /* the last word is hyphenated */
396 lwid += f->words[i].hy;
397 if (f->words[i].hy)
398 pen = n_hyp;
399 while (i >= 0) {
400 lwid += f->words[i].wid;
401 if (i + 1 < pos)
402 lwid += f->words[i + 1].gap;
403 if (i + 1 < pos && f->words[i + 1].str) {
404 swid += f->words[i + 1].gap;
405 nspc++;
407 if (lwid - (swid * n_ssh / 100) > llen)
408 if (pos - i > 1)
409 break;
410 cur = fmt_findcost(f, i) + FMT_COST(llen, lwid, swid, nspc) +
411 pen * (nspc ? nspc : 1);
412 if (f->best_pos[pos] < 0 || cur < f->best[pos]) {
413 f->best_pos[pos] = i;
414 f->best_dep[pos] = f->best_dep[i] + 1;
415 f->best[pos] = cur;
417 i--;
419 return f->best[pos];
422 static int fmt_bestpos(struct fmt *f, int pos)
424 fmt_findcost(f, pos);
425 return MAX(0, f->best_pos[pos]);
428 static int fmt_bestdep(struct fmt *f, int pos)
430 fmt_findcost(f, pos);
431 return MAX(0, f->best_dep[pos]);
434 /* return the last filled word */
435 static int fmt_breakparagraph(struct fmt *f, int pos, int br)
437 int i;
438 int best = -1;
439 long cost, best_cost = 0;
440 int llen = FMT_LLEN(f);
441 int lwid = 0; /* current line length */
442 int swid = 0; /* amount of stretchable spaces */
443 int nspc = 0; /* number of stretchable spaces */
444 if (f->fillreq > 0 && f->fillreq <= f->nwords) {
445 fmt_findcost(f, f->fillreq);
446 return f->fillreq;
448 if (pos > 0 && f->words[pos - 1].wid >= llen) {
449 fmt_findcost(f, pos);
450 return pos;
452 i = pos - 1;
453 lwid = 0;
454 if (f->words[i].hy) /* the last word is hyphenated */
455 lwid += f->words[i].hy;
456 while (i >= 0) {
457 lwid += f->words[i].wid;
458 if (i + 1 < pos)
459 lwid += f->words[i + 1].gap;
460 if (i + 1 < pos && f->words[i + 1].str) {
461 swid += f->words[i + 1].gap;
462 nspc++;
464 if (lwid > llen && i + 1 < pos)
465 break;
466 cost = fmt_findcost(f, i) +
467 (br ? FMT_LCOST(llen, lwid, swid, nspc) : 0);
468 if (best < 0 || cost < best_cost) {
469 best = i;
470 best_cost = cost;
472 i--;
474 return best;
477 /* extract the first nreq formatted lines before the word at pos */
478 static int fmt_head(struct fmt *f, int nreq, int pos)
480 int best = pos; /* best line break for nreq-th line */
481 int prev, next; /* best line breaks without hyphenation */
482 if (nreq <= 0 || fmt_bestdep(f, pos) < nreq)
483 return pos;
484 /* finding the optimal line break for nreq-th line */
485 while (best > 0 && fmt_bestdep(f, best) > nreq)
486 best = fmt_bestpos(f, best);
487 prev = best;
488 next = best;
489 /* finding closest line breaks without hyphenation */
490 while (prev > 1 && f->words[prev - 1].hy &&
491 fmt_bestdep(f, prev - 1) == nreq)
492 prev--;
493 while (next < pos && f->words[next - 1].hy &&
494 fmt_bestdep(f, next + 1) == nreq)
495 next++;
496 /* choosing the best of them */
497 if (!f->words[prev - 1].hy && !f->words[next - 1].hy)
498 return fmt_findcost(f, prev) <= fmt_findcost(f, next) ? prev : next;
499 if (!f->words[prev - 1].hy)
500 return prev;
501 if (!f->words[next - 1].hy)
502 return next;
503 return best;
506 /* break f->words[0..end] into lines according to fmt_bestpos() */
507 static int fmt_break(struct fmt *f, int end)
509 int beg, ret = 0;
510 beg = fmt_bestpos(f, end);
511 if (beg > 0)
512 ret += fmt_break(f, beg);
513 f->words[beg].gap = 0;
514 if (fmt_extractline(f, beg, end, FMT_LLEN(f), FMT_ADJ(f) ? 3 : 0))
515 return ret;
516 if (beg > 0)
517 fmt_confupdate(f);
518 return ret + (end - beg);
521 /* estimated number of lines until traps or the end of a page */
522 static int fmt_safelines(void)
524 int lnht = MAX(1, n_L) * n_v;
525 return (f_nexttrap() + lnht - 1) / lnht;
528 /* fill the words collected in the buffer */
529 static int fmt_fillwords(struct fmt *f, int br)
531 int nreq; /* the number of lines until a trap */
532 int end; /* the final line ends before this word */
533 int end_head; /* like end, but only the first nreq lines included */
534 int head = 0; /* only nreq first lines have been formatted */
535 int llen; /* line length, taking shrinkable spaces into account */
536 int n, i;
537 if (!FMT_FILL(f))
538 return 0;
539 llen = fmt_wordslen(f, 0, f->nwords) -
540 fmt_spacessum(f, 0, f->nwords) * n_ssh / 100;
541 /* not enough words to fill */
542 if ((f->fillreq <= 0 || f->nwords < f->fillreq) && llen <= FMT_LLEN(f))
543 return 0;
544 nreq = (n_hy & HY_LAST) ? fmt_safelines() : 0;
545 if (nreq > 0 && nreq <= fmt_nlines(f))
546 return 1;
547 /* resetting positions */
548 for (i = 0; i < f->nwords + 1; i++)
549 f->best_pos[i] = -1;
550 end = fmt_breakparagraph(f, f->nwords, br);
551 if (nreq > 0) {
552 end_head = fmt_head(f, nreq - fmt_nlines(f), end);
553 head = end_head < end;
554 end = end_head;
556 /* recursively add lines */
557 n = fmt_break(f, end);
558 f->nwords -= n;
559 f->fillreq -= n;
560 fmt_movewords(f, 0, n, f->nwords);
561 f->filled = n && !f->nwords;
562 if (f->nwords)
563 f->words[0].gap = 0;
564 if (f->nwords) /* apply the new .l and .i */
565 fmt_confupdate(f);
566 return head || n != end;
569 struct fmt *fmt_alloc(void)
571 struct fmt *fmt = xmalloc(sizeof(*fmt));
572 memset(fmt, 0, sizeof(*fmt));
573 return fmt;
576 void fmt_free(struct fmt *fmt)
578 free(fmt);
581 int fmt_wid(struct fmt *fmt)
583 return fmt_wordslen(fmt, 0, fmt->nwords) + fmt_wordgap(fmt);
586 int fmt_morewords(struct fmt *fmt)
588 return fmt_morelines(fmt) || fmt->nwords;
591 int fmt_morelines(struct fmt *fmt)
593 return fmt->l_head != fmt->l_tail;
596 /* suppress the last newline */
597 void fmt_suppressnl(struct fmt *fmt)
599 if (fmt->nls) {
600 fmt->nls--;
601 fmt->nls_sup = 1;