reg: \n(.H and \n(.V
[neatroff.git] / fmt.c
blob6464e43378f7bec7ceeef219bb0a18900787df0d
1 /*
2 * line formatting buffer for line adjustment and hyphenation
4 * The line formatting buffer does two main functions: breaking
5 * words into lines (possibly after breaking them at their
6 * hyphenation points), and, if requested, adjusting the space
7 * between words in a line. In this file the first step is
8 * referred to as filling.
10 * Functions like fmt_word() return nonzero on failure, which
11 * means the call should be repeated after fetching previously
12 * formatted lines via fmt_nextline().
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include "roff.h"
19 #define FMT_LLEN(f) MAX(0, (f)->ll - (f)->li)
20 #define FMT_FILL(f) (!n_ce && n_u)
21 #define FMT_ADJ(f) (n_u && !n_na && !n_ce && (n_j & AD_B) == AD_B)
23 struct word {
24 char *s;
25 int wid; /* word's width */
26 int elsn, elsp; /* els_neg and els_pos */
27 int gap; /* the space before this word */
28 int hy; /* hyphen width if inserted after this word */
29 int str; /* does the space before it stretch */
32 struct line {
33 struct sbuf sbuf;
34 int wid, li, ll;
35 int elsn, elsp;
38 struct fmt {
39 /* queued words */
40 struct word words[NWORDS];
41 int nwords;
42 /* queued lines */
43 struct line lines[NLINES];
44 int l_head, l_tail;
45 /* for paragraph adjustment */
46 long best[NWORDS];
47 int best_pos[NWORDS];
48 int best_dep[NWORDS];
49 /* current line */
50 int gap; /* space before the next word */
51 int nls; /* newlines before the next word */
52 int nls_sup; /* suppressed newlines */
53 int li, ll; /* current line indentation and length */
54 int filled; /* filled all words in the last fmt_fill() */
55 int eos; /* last word ends a sentence */
56 int fillreq; /* fill after the last word (\p) */
59 /* .ll, .in and .ti are delayed until the partial line is output */
60 static void fmt_confupdate(struct fmt *f)
62 f->ll = n_l;
63 f->li = n_ti >= 0 ? n_ti : n_i;
64 n_ti = -1;
67 static int fmt_confchanged(struct fmt *f)
69 return f->ll != n_l || f->li != (n_ti >= 0 ? n_ti : n_i);
72 /* move words inside an fmt struct */
73 static void fmt_movewords(struct fmt *a, int dst, int src, int len)
75 memmove(a->words + dst, a->words + src, len * sizeof(a->words[0]));
78 /* move words from the buffer to s */
79 static int fmt_wordscopy(struct fmt *f, int beg, int end,
80 struct sbuf *s, int *els_neg, int *els_pos)
82 struct word *wcur;
83 int w = 0;
84 int i;
85 *els_neg = 0;
86 *els_pos = 0;
87 for (i = beg; i < end; i++) {
88 wcur = &f->words[i];
89 sbuf_printf(s, "%ch'%du'", c_ec, wcur->gap);
90 sbuf_append(s, wcur->s);
91 w += wcur->wid + wcur->gap;
92 if (wcur->elsn < *els_neg)
93 *els_neg = wcur->elsn;
94 if (wcur->elsp > *els_pos)
95 *els_pos = wcur->elsp;
96 free(wcur->s);
98 if (beg < end) {
99 wcur = &f->words[end - 1];
100 if (wcur->hy)
101 sbuf_append(s, "\\(hy");
102 w += wcur->hy;
104 return w;
107 static int fmt_nlines(struct fmt *f)
109 if (f->l_tail <= f->l_head)
110 return f->l_head - f->l_tail;
111 return NLINES - f->l_tail + f->l_head;
114 /* the total width of the specified words in f->words[] */
115 static int fmt_wordslen(struct fmt *f, int beg, int end)
117 int i, w = 0;
118 for (i = beg; i < end; i++)
119 w += f->words[i].wid + f->words[i].gap;
120 return beg < end ? w + f->words[end - 1].hy : 0;
123 /* the number of stretchable spaces in f */
124 static int fmt_spaces(struct fmt *f, int beg, int end)
126 int i, n = 0;
127 for (i = beg + 1; i < end; i++)
128 if (f->words[i].str)
129 n++;
130 return n;
133 /* the amount of stretchable spaces in f */
134 static int fmt_spacessum(struct fmt *f, int beg, int end)
136 int i, n = 0;
137 for (i = beg + 1; i < end; i++)
138 if (f->words[i].str)
139 n += f->words[i].gap;
140 return n;
143 /* return the next line in the buffer */
144 int fmt_nextline(struct fmt *f, struct sbuf *sbuf, int *w,
145 int *li, int *ll, int *els_neg, int *els_pos)
147 struct line *l;
148 l = &f->lines[f->l_tail];
149 if (f->l_head == f->l_tail)
150 return 1;
151 *li = l->li;
152 *ll = l->ll;
153 *w = l->wid;
154 *els_neg = l->elsn;
155 *els_pos = l->elsp;
156 sbuf_append(sbuf, sbuf_buf(&l->sbuf));
157 sbuf_done(&l->sbuf);
158 f->l_tail = (f->l_tail + 1) % NLINES;
159 return 0;
162 static struct line *fmt_mkline(struct fmt *f)
164 struct line *l = &f->lines[f->l_head];
165 if ((f->l_head + 1) % NLINES == f->l_tail)
166 return NULL;
167 f->l_head = (f->l_head + 1) % NLINES;
168 l->li = f->li;
169 l->ll = f->ll;
170 sbuf_init(&l->sbuf);
171 return l;
174 static int fmt_sp(struct fmt *f)
176 struct line *l;
177 if (fmt_fill(f))
178 return 1;
179 l = fmt_mkline(f);
180 if (!l)
181 return 1;
182 f->filled = 0;
183 f->nls--;
184 f->nls_sup = 0;
185 l->wid = fmt_wordscopy(f, 0, f->nwords, &l->sbuf, &l->elsn, &l->elsp);
186 f->nwords = 0;
187 f->fillreq = 0;
188 return 0;
191 int fmt_br(struct fmt *f)
193 if (fmt_fill(f))
194 return 1;
195 f->filled = 0;
196 if (f->nwords)
197 fmt_sp(f);
198 return 0;
201 void fmt_space(struct fmt *fmt)
203 fmt->gap += font_swid(dev_font(n_f), n_s, n_ss);
206 int fmt_newline(struct fmt *f)
208 f->gap = 0;
209 if (!FMT_FILL(f)) {
210 f->nls++;
211 fmt_sp(f);
212 return 0;
214 if (f->nls >= 1)
215 if (fmt_sp(f))
216 return 1;
217 if (f->nls == 0 && !f->filled && !f->nwords)
218 fmt_sp(f);
219 f->nls++;
220 return 0;
223 /* format the paragraph after the next word (\p) */
224 int fmt_fillreq(struct fmt *f)
226 if (f->fillreq > 0)
227 if (fmt_fill(f))
228 return 1;
229 f->fillreq = f->nwords + 1;
230 return 0;
233 static void fmt_wb2word(struct fmt *f, struct word *word, struct wb *wb,
234 int hy, int str, int gap)
236 int len = strlen(wb_buf(wb));
237 word->s = xmalloc(len + 1);
238 memcpy(word->s, wb_buf(wb), len + 1);
239 word->wid = wb_wid(wb);
240 word->elsn = wb->els_neg;
241 word->elsp = wb->els_pos;
242 word->hy = hy ? wb_dashwid(wb) : 0;
243 word->str = str;
244 word->gap = gap;
247 /* find explicit hyphenation positions: dashes, \: and \% */
248 static int fmt_hyphmarks(char *word, int *hyidx, int *hyins)
250 char d[ILNLEN];
251 char *s = word;
252 int c, n = 0;
253 while ((c = escread(&s, d)) > 0)
255 if (c < 0 || !strcmp(c_hc, d))
256 return -1;
257 while ((c = escread(&s, d)) >= 0 && n < NHYPHSWORD) {
258 if (!c && !strcmp(c_hc, d)) {
259 hyins[n] = 1;
260 hyidx[n++] = s - word;
262 if (!c && (!strcmp(c_bp, d) || c_isdash(d))) {
263 hyins[n] = 0;
264 hyidx[n++] = s - word;
267 return n;
270 static void fmt_insertword(struct fmt *f, struct wb *wb, int gap)
272 int hyidx[NHYPHSWORD];
273 int hyins[NHYPHSWORD] = {0};
274 char *src = wb_buf(wb);
275 struct wb wbc;
276 char *beg;
277 char *end;
278 int n, i;
279 int cf, cs, cm;
280 n = fmt_hyphmarks(src, hyidx, hyins);
281 if (n <= 0) {
282 fmt_wb2word(f, &f->words[f->nwords++], wb, 0, 1, gap);
283 return;
285 wb_init(&wbc);
286 for (i = 0; i <= n; i++) {
287 beg = src + (i > 0 ? hyidx[i - 1] : 0);
288 end = src + (i < n ? hyidx[i] : strlen(src));
289 wb_catstr(&wbc, beg, end);
290 fmt_wb2word(f, &f->words[f->nwords++], &wbc,
291 i < n && hyins[i], i == 0, i == 0 ? gap : 0);
292 /* restoring wbc */
293 wb_fnszget(&wbc, &cs, &cf, &cm);
294 wb_reset(&wbc);
295 wb_fnszset(&wbc, cs, cf, cm);
297 wb_done(&wbc);
300 /* the amount of space necessary before the next word */
301 static int fmt_wordgap(struct fmt *f)
303 int nls = f->nls || f->nls_sup;
304 int swid = font_swid(dev_font(n_f), n_s, n_ss);
305 if (f->eos && f->nwords)
306 if ((nls && !f->gap) || (!nls && f->gap == 2 * swid))
307 return swid + font_swid(dev_font(n_f), n_s, n_sss);
308 return (nls && !f->gap && f->nwords) ? swid : f->gap;
311 /* insert wb into fmt */
312 int fmt_word(struct fmt *f, struct wb *wb)
314 if (wb_empty(wb))
315 return 0;
316 if (f->nwords + NHYPHSWORD >= NWORDS || fmt_confchanged(f))
317 if (fmt_fill(f))
318 return 1;
319 if (FMT_FILL(f) && f->nls && f->gap)
320 if (fmt_sp(f))
321 return 1;
322 if (!f->nwords) /* apply the new .l and .i */
323 fmt_confupdate(f);
324 f->gap = fmt_wordgap(f);
325 f->eos = wb_eos(wb);
326 fmt_insertword(f, wb, f->filled ? 0 : f->gap);
327 f->filled = 0;
328 f->nls = 0;
329 f->nls_sup = 0;
330 f->gap = 0;
331 return 0;
334 /* assuming an empty line has cost 10000; take care of integer overflow */
335 #define POW2(x) ((x) * (x))
336 #define FMT_COST(lwid, llen, pen) (POW2(((llen) - (lwid)) * 1000l / (llen)) / 100l + (pen) * 10l)
338 /* the cost of putting a line break before word pos */
339 static long fmt_findcost(struct fmt *f, int pos)
341 int i, pen = 0;
342 long cur;
343 int lwid = 0; /* current line length */
344 int swid = 0; /* amount of spaces */
345 int llen = MAX(1, FMT_LLEN(f));
346 if (pos <= 0)
347 return 0;
348 if (f->best_pos[pos] >= 0)
349 return f->best[pos];
350 i = pos - 1;
351 lwid = 0;
352 if (f->words[i].hy) /* the last word is hyphenated */
353 lwid += f->words[i].hy;
354 if (f->words[i].hy)
355 pen = n_hyp;
356 while (i >= 0) {
357 lwid += f->words[i].wid;
358 if (i + 1 < pos)
359 lwid += f->words[i + 1].gap;
360 if (i + 1 < pos && f->words[i + 1].str)
361 swid += f->words[i + 1].gap;
362 if (lwid - (swid * n_ssh / 100) > llen)
363 if (pos - i > 1)
364 break;
365 cur = fmt_findcost(f, i) + FMT_COST(lwid, llen, pen);
366 if (f->best_pos[pos] < 0 || cur < f->best[pos]) {
367 f->best_pos[pos] = i;
368 f->best_dep[pos] = f->best_dep[i] + 1;
369 f->best[pos] = cur;
371 i--;
373 return f->best[pos];
376 static int fmt_bestpos(struct fmt *f, int pos)
378 fmt_findcost(f, pos);
379 return MAX(0, f->best_pos[pos]);
382 static int fmt_bestdep(struct fmt *f, int pos)
384 fmt_findcost(f, pos);
385 return MAX(0, f->best_dep[pos]);
388 /* return the last filled word */
389 static int fmt_breakparagraph(struct fmt *f, int pos)
391 int i;
392 int best = -1;
393 int llen = FMT_LLEN(f);
394 int lwid = 0;
395 if (f->fillreq > 0 && f->fillreq <= f->nwords) {
396 fmt_findcost(f, f->fillreq);
397 return f->fillreq;
399 if (pos > 0 && f->words[pos - 1].wid >= llen) {
400 fmt_findcost(f, pos);
401 return pos;
403 i = pos - 1;
404 lwid = 0;
405 if (f->words[i].hy) /* the last word is hyphenated */
406 lwid += f->words[i].hy;
407 while (i >= 0) {
408 lwid += f->words[i].wid;
409 if (i + 1 < pos)
410 lwid += f->words[i + 1].gap;
411 if (lwid > llen && i + 1 < pos)
412 break;
413 if (best < 0 || fmt_findcost(f, i) < fmt_findcost(f, best))
414 best = i;
415 i--;
417 return best;
420 /* extract the first nreq formatted lines before the word at pos */
421 static int fmt_head(struct fmt *f, int nreq, int pos)
423 int best = pos; /* best line break for nreq-th line */
424 int prev, next; /* best line breaks without hyphenation */
425 if (nreq <= 0 || fmt_bestdep(f, pos) < nreq)
426 return pos;
427 /* finding the optimal line break for nreq-th line */
428 while (best > 0 && fmt_bestdep(f, best) > nreq)
429 best = fmt_bestpos(f, best);
430 prev = best;
431 next = best;
432 /* finding closest line breaks without hyphenation */
433 while (prev > 1 && f->words[prev - 1].hy &&
434 fmt_bestdep(f, prev - 1) == nreq)
435 prev--;
436 while (next < pos && f->words[next - 1].hy &&
437 fmt_bestdep(f, next + 1) == nreq)
438 next++;
439 /* choosing the best of them */
440 if (!f->words[prev - 1].hy && !f->words[next - 1].hy)
441 return fmt_findcost(f, prev) <= fmt_findcost(f, next) ? prev : next;
442 if (!f->words[prev - 1].hy)
443 return prev;
444 if (!f->words[next - 1].hy)
445 return next;
446 return best;
449 /* break f->words[0..end] into lines according to fmt_bestpos() */
450 static int fmt_break(struct fmt *f, int end)
452 int llen, fmt_div, fmt_rem, beg;
453 int w, i, nspc;
454 struct line *l;
455 int ret = 0;
456 beg = fmt_bestpos(f, end);
457 if (beg > 0)
458 ret += fmt_break(f, beg);
459 l = fmt_mkline(f);
460 if (!l)
461 return ret;
462 llen = FMT_LLEN(f);
463 f->words[beg].gap = 0;
464 w = fmt_wordslen(f, beg, end);
465 nspc = fmt_spaces(f, beg, end);
466 if (FMT_ADJ(f) && nspc) {
467 fmt_div = (llen - w) / nspc;
468 fmt_rem = (llen - w) % nspc;
469 if (fmt_rem < 0) {
470 fmt_div--;
471 fmt_rem += nspc;
473 for (i = beg + 1; i < end; i++)
474 if (f->words[i].str)
475 f->words[i].gap += fmt_div + (fmt_rem-- > 0);
477 l->wid = fmt_wordscopy(f, beg, end, &l->sbuf, &l->elsn, &l->elsp);
478 if (beg > 0)
479 fmt_confupdate(f);
480 return ret + (end - beg);
483 /* estimated number of lines until traps or the end of a page */
484 static int fmt_safelines(void)
486 int lnht = MAX(1, n_L) * n_v;
487 return (f_nexttrap() + lnht - 1) / lnht;
490 /* fill the words collected in the buffer */
491 int fmt_fill(struct fmt *f)
493 int nreq; /* the number of lines until a trap */
494 int end; /* the final line ends before this word */
495 int end_head; /* like end, but only the first nreq lines included */
496 int head = 0; /* only nreq first lines have been formatted */
497 int llen; /* line length, taking shrinkable spaces into account */
498 int n, i;
499 if (!FMT_FILL(f))
500 return 0;
501 llen = fmt_wordslen(f, 0, f->nwords) -
502 fmt_spacessum(f, 0, f->nwords) * n_ssh / 100;
503 /* not enough words to fill */
504 if ((f->fillreq <= 0 || f->nwords < f->fillreq) && llen <= FMT_LLEN(f))
505 return 0;
506 nreq = (n_hy & HY_LAST) ? fmt_safelines() : 0;
507 if (nreq > 0 && nreq <= fmt_nlines(f))
508 return 1;
509 /* resetting positions */
510 for (i = 0; i < f->nwords + 1; i++)
511 f->best_pos[i] = -1;
512 end = fmt_breakparagraph(f, f->nwords);
513 if (nreq > 0) {
514 end_head = fmt_head(f, nreq - fmt_nlines(f), end);
515 head = end_head < end;
516 end = end_head;
518 /* recursively add lines */
519 n = fmt_break(f, end);
520 f->nwords -= n;
521 f->fillreq -= n;
522 fmt_movewords(f, 0, n, f->nwords);
523 f->filled = n && !f->nwords;
524 if (f->nwords)
525 f->words[0].gap = 0;
526 if (f->nwords) /* apply the new .l and .i */
527 fmt_confupdate(f);
528 return head || n != end;
531 struct fmt *fmt_alloc(void)
533 struct fmt *fmt = xmalloc(sizeof(*fmt));
534 memset(fmt, 0, sizeof(*fmt));
535 return fmt;
538 void fmt_free(struct fmt *fmt)
540 free(fmt);
543 int fmt_wid(struct fmt *fmt)
545 return fmt_wordslen(fmt, 0, fmt->nwords) + fmt_wordgap(fmt);
548 int fmt_morewords(struct fmt *fmt)
550 return fmt_morelines(fmt) || fmt->nwords;
553 int fmt_morelines(struct fmt *fmt)
555 return fmt->l_head != fmt->l_tail;
558 /* suppress the last newline */
559 void fmt_suppressnl(struct fmt *fmt)
561 if (fmt->nls) {
562 fmt->nls--;
563 fmt->nls_sup = 1;