fmt: new hyphenation support with penalties
[neatroff.git] / fmt.c
blob0c89890a7c2e8fdc476925e40edd4ead7bd935f2
1 /*
2 * line formatting buffer for line adjustment and hyphenation
4 * The line formatting buffer does two main functions: breaking
5 * words into lines (possibly after hyphenating some of them), and, if
6 * requested, adjusting the space between words in a line. In this
7 * file the first step is referred to as filling.
9 * Inputs are specified via these functions:
10 * + fmt_word(): for appending space-separated words.
11 * + fmt_space(): for appending spaces.
12 * + fmt_newline(): for appending new lines.
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include "roff.h"
20 #define FMT_LLEN(f) MAX(0, (f)->ll - (f)->li)
21 #define FMT_FILL(f) (!n_ce && n_u)
22 #define FMT_ADJ(f) (n_u && !n_na && !n_ce && (n_j & AD_B) == AD_B)
23 #define FMT_SWID(f) (spacewid(n_f, n_s))
25 struct word {
26 char *s;
27 int wid; /* word's width */
28 int elsn, elsp; /* els_neg and els_pos */
29 int gap; /* the space before this word */
30 int hy; /* hyphen width if inserted after this word */
31 int str; /* does the spece before it stretch */
34 struct line {
35 struct sbuf sbuf;
36 int wid, li, ll;
37 int elsn, elsp;
40 struct fmt {
41 /* queued words */
42 struct word words[NWORDS];
43 int nwords;
44 /* queued lines */
45 struct line lines[NLINES];
46 int l_head, l_tail;
47 /* for paragraph adjustment */
48 long best[NWORDS];
49 int best_pos[NWORDS];
50 /* current line */
51 int gap; /* space before the next word */
52 int nls; /* newlines before the next word */
53 int li, ll; /* current line indentation and length */
54 int filled; /* filled all words in the last fmt_fill() */
55 int eos; /* last word ends a sentence */
58 /* .ll, .in and .ti are delayed until the partial line is output */
59 static void fmt_confupdate(struct fmt *f)
61 f->ll = n_l;
62 f->li = n_ti >= 0 ? n_ti : n_i;
63 n_ti = -1;
66 static int fmt_confchanged(struct fmt *f)
68 return f->ll != n_l || f->li != (n_ti >= 0 ? n_ti : n_i);
71 /* move words inside an fmt struct */
72 static void fmt_movewords(struct fmt *a, int dst, int src, int len)
74 memmove(a->words + dst, a->words + src, len * sizeof(a->words[0]));
77 /* move words from the buffer to s */
78 static int fmt_wordscopy(struct fmt *f, int beg, int end,
79 struct sbuf *s, int *els_neg, int *els_pos)
81 struct word *wcur;
82 int w = 0;
83 int i;
84 *els_neg = 0;
85 *els_pos = 0;
86 for (i = beg; i < end; i++) {
87 wcur = &f->words[i];
88 sbuf_printf(s, "%ch'%du'", c_ec, wcur->gap);
89 sbuf_append(s, wcur->s);
90 w += wcur->wid + wcur->gap;
91 if (wcur->elsn < *els_neg)
92 *els_neg = wcur->elsn;
93 if (wcur->elsp > *els_pos)
94 *els_pos = wcur->elsp;
95 free(wcur->s);
97 if (beg < end) {
98 wcur = &f->words[end - 1];
99 if (wcur->hy)
100 sbuf_append(s, "\\(hy");
101 w += wcur->hy;
103 return w;
106 /* the total width of the specified words in f->words[] */
107 static int fmt_wordslen(struct fmt *f, int beg, int end)
109 int i, w = 0;
110 for (i = beg; i < end; i++)
111 w += f->words[i].wid + f->words[i].gap;
112 return beg < end ? w + f->words[end - 1].hy : 0;
115 /* the number stretchable spaces in f */
116 static int fmt_spaces(struct fmt *f, int beg, int end)
118 int i, n = 0;
119 for (i = beg + 1; i < end; i++)
120 if (f->words[i].str)
121 n++;
122 return n;
125 /* return the next line in the buffer */
126 int fmt_nextline(struct fmt *f, struct sbuf *sbuf, int *w,
127 int *li, int *ll, int *els_neg, int *els_pos)
129 struct line *l;
130 l = &f->lines[f->l_tail];
131 if (f->l_head == f->l_tail)
132 return 1;
133 *li = l->li;
134 *ll = l->ll;
135 *w = l->wid;
136 *els_neg = l->elsn;
137 *els_pos = l->elsp;
138 sbuf_append(sbuf, sbuf_buf(&l->sbuf));
139 sbuf_done(&l->sbuf);
140 f->l_tail = (f->l_tail + 1) % NLINES;
141 return 0;
144 static struct line *fmt_mkline(struct fmt *f)
146 struct line *l = &f->lines[f->l_head];
147 if ((f->l_head + 1) % NLINES == f->l_tail)
148 return NULL;
149 f->l_head = (f->l_head + 1) % NLINES;
150 l->li = f->li;
151 l->ll = f->ll;
152 sbuf_init(&l->sbuf);
153 return l;
156 static int fmt_sp(struct fmt *f)
158 struct line *l;
159 fmt_fill(f, 0);
160 l = fmt_mkline(f);
161 if (!l)
162 return 1;
163 f->filled = 0;
164 f->nls--;
165 l->wid = fmt_wordscopy(f, 0, f->nwords, &l->sbuf, &l->elsn, &l->elsp);
166 f->nwords = 0;
167 return 0;
170 void fmt_br(struct fmt *f)
172 fmt_fill(f, 0);
173 f->filled = 0;
174 if (f->nwords)
175 fmt_sp(f);
178 void fmt_space(struct fmt *fmt)
180 fmt->gap += FMT_SWID(fmt);
183 void fmt_newline(struct fmt *f)
185 f->nls++;
186 f->gap = 0;
187 if (!FMT_FILL(f)) {
188 fmt_sp(f);
189 return;
191 if (f->nls == 1 && !f->filled && !f->nwords)
192 fmt_sp(f);
193 if (f->nls > 1) {
194 if (!f->filled)
195 fmt_sp(f);
196 fmt_sp(f);
200 /* copy word buffer wb in fmt->words[i] */
201 static void fmt_insertword(struct fmt *f, struct wb *wb, int gap)
203 int hyidx[NHYPHS];
204 int hywid[NHYPHS];
205 int hydash[NHYPHS];
206 struct word *w;
207 char *beg, *end;
208 char *src = wb_buf(wb);
209 int n, i;
210 n = wb_hyph(src, hyidx, hywid, hydash, n_hy);
211 for (i = 0; i <= n; i++) {
212 w = &f->words[f->nwords++];
213 beg = src + (i > 0 ? hyidx[i - 1] : 0);
214 end = src + (i < n ? hyidx[i] : strlen(src));
215 w->s = malloc(end - beg + 1);
216 memcpy(w->s, beg, end - beg);
217 w->s[end - beg] = '\0';
218 if (n) {
219 w->wid = (i < n ? hywid[i] : wb_wid(wb)) -
220 (i > 0 ? hywid[i - 1] : 0);
221 } else {
222 w->wid = wb_wid(wb);
224 w->elsn = wb->els_neg;
225 w->elsp = wb->els_pos;
226 w->hy = i < n ? hydash[i] : 0;
227 w->str = i == 0;
228 w->gap = i == 0 ? gap : 0;
232 /* insert wb into fmt */
233 void fmt_word(struct fmt *f, struct wb *wb)
235 if (f->nwords == NWORDS || fmt_confchanged(f))
236 fmt_fill(f, 0);
237 if (wb_empty(wb) || f->nwords == NWORDS)
238 return;
239 if (FMT_FILL(f) && f->nls && f->gap)
240 fmt_sp(f);
241 if (!f->nwords) /* apply the new .l and .i */
242 fmt_confupdate(f);
243 if (f->nls && !f->gap && f->nwords >= 1)
244 f->gap = (f->nwords && f->eos) ? FMT_SWID(f) * 2 : FMT_SWID(f);
245 f->eos = wb_eos(wb);
246 fmt_insertword(f, wb, f->filled ? 0 : f->gap);
247 f->filled = 0;
248 f->nls = 0;
249 f->gap = 0;
252 /* assuming an empty line has cost 10000; take care of integer overflow */
253 #define POW2(x) ((x) * (x))
254 #define FMT_COST(lwid, llen, pen) (POW2(((llen) - (lwid)) * 1000l / (llen)) / 100l + (pen) * 10l)
256 /* the cost of putting a line break before word pos */
257 static long fmt_findcost(struct fmt *f, int pos)
259 int i, pen = 0;
260 long cur;
261 int lwid = 0;
262 int llen = FMT_LLEN(f);
263 if (pos <= 0)
264 return 0;
265 if (f->best_pos[pos] >= 0)
266 return f->best[pos];
267 i = pos - 1;
268 lwid = 0;
269 if (f->words[i].hy) /* the last word is hyphenated */
270 lwid += f->words[i].hy;
271 if (f->words[i].hy)
272 pen = n_hyp;
273 while (i >= 0) {
274 lwid += f->words[i].wid;
275 if (i + 1 < pos)
276 lwid += f->words[i + 1].gap;
277 if (lwid > llen && pos - i > 1)
278 break;
279 cur = fmt_findcost(f, i) + FMT_COST(lwid, llen, pen);
280 if (f->best_pos[pos] < 0 || cur < f->best[pos]) {
281 f->best_pos[pos] = i;
282 f->best[pos] = cur;
284 i--;
286 return f->best[pos];
289 static int fmt_bestpos(struct fmt *f, int pos)
291 fmt_findcost(f, pos);
292 return MAX(0, f->best_pos[pos]);
295 /* return the last filled word */
296 static int fmt_breakparagraph(struct fmt *f, int pos, int all)
298 int i;
299 long best = 0;
300 int best_i = -1;
301 int llen = FMT_LLEN(f);
302 int lwid = 0;
303 if (all || (pos > 0 && f->words[pos - 1].wid >= llen)) {
304 fmt_findcost(f, pos);
305 return pos;
307 i = pos - 1;
308 lwid = 0;
309 if (f->words[i].hy) /* the last word is hyphenated */
310 lwid += f->words[i].hy;
311 while (i >= 0) {
312 lwid += f->words[i].wid;
313 if (i + 1 < pos)
314 lwid += f->words[i + 1].gap;
315 if (lwid > llen && pos - i > 1)
316 break;
317 if (best_i < 0 || fmt_findcost(f, i) < best) {
318 best_i = i;
319 best = fmt_findcost(f, i);
321 i--;
323 return best_i;
326 /* break f->words[0..end] into lines according to fmt_bestpos() */
327 static int fmt_break(struct fmt *f, int end)
329 int llen, fmt_div, fmt_rem, beg;
330 int w, i, nspc;
331 struct line *l;
332 int ret = 0;
333 beg = fmt_bestpos(f, end);
334 if (beg > 0)
335 ret += fmt_break(f, beg);
336 l = fmt_mkline(f);
337 if (!l)
338 return ret;
339 llen = FMT_LLEN(f);
340 f->words[beg].gap = 0;
341 w = fmt_wordslen(f, beg, end);
342 nspc = fmt_spaces(f, beg, end);
343 if (FMT_ADJ(f) && nspc) {
344 fmt_div = (llen - w) / nspc;
345 fmt_rem = (llen - w) % nspc;
346 for (i = beg + 1; i < end; i++)
347 if (f->words[i].str)
348 f->words[i].gap += fmt_div + (fmt_rem-- > 0);
350 l->wid = fmt_wordscopy(f, beg, end, &l->sbuf, &l->elsn, &l->elsp);
351 if (beg > 0)
352 fmt_confupdate(f);
353 return ret + (end - beg);
356 int fmt_fill(struct fmt *f, int all)
358 int end, n, i;
359 if (!FMT_FILL(f))
360 return 0;
361 /* not enough words to fill */
362 if (!all && fmt_wordslen(f, 0, f->nwords) <= FMT_LLEN(f))
363 return 0;
364 /* resetting positions */
365 for (i = 0; i < f->nwords + 1; i++)
366 f->best_pos[i] = -1;
367 end = fmt_breakparagraph(f, f->nwords, all);
368 /* recursively add lines */
369 n = fmt_break(f, end);
370 f->nwords -= n;
371 fmt_movewords(f, 0, n, f->nwords);
372 f->filled = n && !f->nwords;
373 if (f->nwords)
374 f->words[0].gap = 0;
375 if (f->nwords) /* apply the new .l and .i */
376 fmt_confupdate(f);
377 return n != end;
380 struct fmt *fmt_alloc(void)
382 struct fmt *fmt = malloc(sizeof(*fmt));
383 memset(fmt, 0, sizeof(*fmt));
384 return fmt;
387 void fmt_free(struct fmt *fmt)
389 free(fmt);
392 int fmt_wid(struct fmt *fmt)
394 return fmt_wordslen(fmt, 0, fmt->nwords) +
395 (fmt->nls ? FMT_SWID(fmt) : fmt->gap);
398 int fmt_morewords(struct fmt *fmt)
400 return fmt_morelines(fmt) || fmt->nwords;
403 int fmt_morelines(struct fmt *fmt)
405 return fmt->l_head != fmt->l_tail;