char: break escape sequence reading functions
[neatroff.git] / fmt.c
blob0b3172f5d46d6fd71997ea3905e4690ea8660d3e
1 /*
2 * line formatting buffer for line adjustment and hyphenation
4 * The line formatting buffer does two main functions: breaking
5 * words into lines (possibly after hyphenating some of them),
6 * and, if requested, adjusting the space between words in a line.
7 * In this file the first step is referred to as filling.
9 * Functions like fmt_word() return nonzero on failure, which
10 * means the call should be repeated after fetching previously
11 * formatted lines via fmt_nextline().
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include "roff.h"
18 #define FMT_LLEN(f) MAX(0, (f)->ll - (f)->li)
19 #define FMT_FILL(f) (!n_ce && n_u)
20 #define FMT_ADJ(f) (n_u && !n_na && !n_ce && (n_j & AD_B) == AD_B)
22 struct word {
23 char *s;
24 int wid; /* word's width */
25 int elsn, elsp; /* els_neg and els_pos */
26 int gap; /* the space before this word */
27 int hy; /* hyphen width if inserted after this word */
28 int str; /* does the space before it stretch */
31 struct line {
32 struct sbuf sbuf;
33 int wid, li, ll;
34 int elsn, elsp;
37 struct fmt {
38 /* queued words */
39 struct word words[NWORDS];
40 int nwords;
41 /* queued lines */
42 struct line lines[NLINES];
43 int l_head, l_tail;
44 /* for paragraph adjustment */
45 long best[NWORDS];
46 int best_pos[NWORDS];
47 int best_dep[NWORDS];
48 /* current line */
49 int gap; /* space before the next word */
50 int nls; /* newlines before the next word */
51 int nls_sup; /* suppressed newlines */
52 int li, ll; /* current line indentation and length */
53 int filled; /* filled all words in the last fmt_fill() */
54 int eos; /* last word ends a sentence */
55 int fillreq; /* fill after the last word (\p) */
58 /* .ll, .in and .ti are delayed until the partial line is output */
59 static void fmt_confupdate(struct fmt *f)
61 f->ll = n_l;
62 f->li = n_ti >= 0 ? n_ti : n_i;
63 n_ti = -1;
66 static int fmt_confchanged(struct fmt *f)
68 return f->ll != n_l || f->li != (n_ti >= 0 ? n_ti : n_i);
71 /* move words inside an fmt struct */
72 static void fmt_movewords(struct fmt *a, int dst, int src, int len)
74 memmove(a->words + dst, a->words + src, len * sizeof(a->words[0]));
77 /* move words from the buffer to s */
78 static int fmt_wordscopy(struct fmt *f, int beg, int end,
79 struct sbuf *s, int *els_neg, int *els_pos)
81 struct word *wcur;
82 int w = 0;
83 int i;
84 *els_neg = 0;
85 *els_pos = 0;
86 for (i = beg; i < end; i++) {
87 wcur = &f->words[i];
88 sbuf_printf(s, "%ch'%du'", c_ec, wcur->gap);
89 sbuf_append(s, wcur->s);
90 w += wcur->wid + wcur->gap;
91 if (wcur->elsn < *els_neg)
92 *els_neg = wcur->elsn;
93 if (wcur->elsp > *els_pos)
94 *els_pos = wcur->elsp;
95 free(wcur->s);
97 if (beg < end) {
98 wcur = &f->words[end - 1];
99 if (wcur->hy)
100 sbuf_append(s, "\\(hy");
101 w += wcur->hy;
103 return w;
106 static int fmt_nlines(struct fmt *f)
108 if (f->l_tail <= f->l_head)
109 return f->l_head - f->l_tail;
110 return NLINES - f->l_tail + f->l_head;
113 /* the total width of the specified words in f->words[] */
114 static int fmt_wordslen(struct fmt *f, int beg, int end)
116 int i, w = 0;
117 for (i = beg; i < end; i++)
118 w += f->words[i].wid + f->words[i].gap;
119 return beg < end ? w + f->words[end - 1].hy : 0;
122 /* the number of stretchable spaces in f */
123 static int fmt_spaces(struct fmt *f, int beg, int end)
125 int i, n = 0;
126 for (i = beg + 1; i < end; i++)
127 if (f->words[i].str)
128 n++;
129 return n;
132 /* the amount of stretchable spaces in f */
133 static int fmt_spacessum(struct fmt *f, int beg, int end)
135 int i, n = 0;
136 for (i = beg + 1; i < end; i++)
137 if (f->words[i].str)
138 n += f->words[i].gap;
139 return n;
142 /* return the next line in the buffer */
143 int fmt_nextline(struct fmt *f, struct sbuf *sbuf, int *w,
144 int *li, int *ll, int *els_neg, int *els_pos)
146 struct line *l;
147 l = &f->lines[f->l_tail];
148 if (f->l_head == f->l_tail)
149 return 1;
150 *li = l->li;
151 *ll = l->ll;
152 *w = l->wid;
153 *els_neg = l->elsn;
154 *els_pos = l->elsp;
155 sbuf_append(sbuf, sbuf_buf(&l->sbuf));
156 sbuf_done(&l->sbuf);
157 f->l_tail = (f->l_tail + 1) % NLINES;
158 return 0;
161 static struct line *fmt_mkline(struct fmt *f)
163 struct line *l = &f->lines[f->l_head];
164 if ((f->l_head + 1) % NLINES == f->l_tail)
165 return NULL;
166 f->l_head = (f->l_head + 1) % NLINES;
167 l->li = f->li;
168 l->ll = f->ll;
169 sbuf_init(&l->sbuf);
170 return l;
173 static int fmt_sp(struct fmt *f)
175 struct line *l;
176 if (fmt_fill(f))
177 return 1;
178 l = fmt_mkline(f);
179 if (!l)
180 return 1;
181 f->filled = 0;
182 f->nls--;
183 f->nls_sup = 0;
184 l->wid = fmt_wordscopy(f, 0, f->nwords, &l->sbuf, &l->elsn, &l->elsp);
185 f->nwords = 0;
186 f->fillreq = 0;
187 return 0;
190 int fmt_br(struct fmt *f)
192 if (fmt_fill(f))
193 return 1;
194 f->filled = 0;
195 if (f->nwords)
196 fmt_sp(f);
197 return 0;
200 void fmt_space(struct fmt *fmt)
202 fmt->gap += N_SS(n_f, n_s);
205 int fmt_newline(struct fmt *f)
207 f->gap = 0;
208 if (!FMT_FILL(f)) {
209 f->nls++;
210 fmt_sp(f);
211 return 0;
213 if (f->nls >= 1)
214 if (fmt_sp(f))
215 return 1;
216 if (f->nls == 0 && !f->filled && !f->nwords)
217 fmt_sp(f);
218 f->nls++;
219 return 0;
222 /* format the paragraph after the next word (\p) */
223 int fmt_fillreq(struct fmt *f)
225 if (f->fillreq > 0)
226 if (fmt_fill(f))
227 return 1;
228 f->fillreq = f->nwords + 1;
229 return 0;
232 static void fmt_wb2word(struct fmt *f, struct word *word, struct wb *wb,
233 int hy, int str, int gap)
235 int len = strlen(wb_buf(wb));
236 word->s = xmalloc(len + 1);
237 memcpy(word->s, wb_buf(wb), len + 1);
238 word->wid = wb_wid(wb);
239 word->elsn = wb->els_neg;
240 word->elsp = wb->els_pos;
241 word->hy = hy ? wb_dashwid(wb) : 0;
242 word->str = str;
243 word->gap = gap;
246 static void fmt_insertword(struct fmt *f, struct wb *wb, int gap)
248 int hyidx[NHYPHSWORD];
249 int hyins[NHYPHSWORD] = {0};
250 char *src = wb_buf(wb);
251 struct wb wbc;
252 char *beg;
253 char *end;
254 int n, i;
255 int cf, cs, cm;
256 int hy = 0; /* insert hyphens */
257 n = wb_hyphmark(src, hyidx, hyins);
258 if (!n && n_hy && (n = wb_hyph(src, hyidx, n_hy)) > 0)
259 hy = 1;
260 if (n <= 0) {
261 fmt_wb2word(f, &f->words[f->nwords++], wb, 0, 1, gap);
262 return;
264 wb_init(&wbc);
265 for (i = 0; i <= n; i++) {
266 beg = src + (i > 0 ? hyidx[i - 1] : 0);
267 end = src + (i < n ? hyidx[i] : strlen(src));
268 wb_catstr(&wbc, beg, end);
269 fmt_wb2word(f, &f->words[f->nwords++], &wbc,
270 i < n && (hy || hyins[i]), i == 0, i == 0 ? gap : 0);
271 /* restoring wbc */
272 wb_fnszget(&wbc, &cs, &cf, &cm);
273 wb_reset(&wbc);
274 wb_fnszset(&wbc, cs, cf, cm);
276 wb_done(&wbc);
279 /* the amount of space necessary before the next word */
280 static int fmt_wordgap(struct fmt *f)
282 int nls = f->nls || f->nls_sup;
283 if (f->eos && f->nwords)
284 if ((nls && !f->gap) || (!nls && f->gap == 2 * N_SS(n_f, n_s)))
285 return N_SS(n_f, n_s) + N_SSS(n_f, n_s);
286 return (nls && !f->gap && f->nwords) ? N_SS(n_f, n_s) : f->gap;
289 /* insert wb into fmt */
290 int fmt_word(struct fmt *f, struct wb *wb)
292 if (wb_empty(wb))
293 return 0;
294 if (f->nwords + NHYPHSWORD >= NWORDS || fmt_confchanged(f))
295 if (fmt_fill(f))
296 return 1;
297 if (FMT_FILL(f) && f->nls && f->gap)
298 if (fmt_sp(f))
299 return 1;
300 if (!f->nwords) /* apply the new .l and .i */
301 fmt_confupdate(f);
302 f->gap = fmt_wordgap(f);
303 f->eos = wb_eos(wb);
304 fmt_insertword(f, wb, f->filled ? 0 : f->gap);
305 f->filled = 0;
306 f->nls = 0;
307 f->nls_sup = 0;
308 f->gap = 0;
309 return 0;
312 /* assuming an empty line has cost 10000; take care of integer overflow */
313 #define POW2(x) ((x) * (x))
314 #define FMT_COST(lwid, llen, pen) (POW2(((llen) - (lwid)) * 1000l / (llen)) / 100l + (pen) * 10l)
316 /* the cost of putting a line break before word pos */
317 static long fmt_findcost(struct fmt *f, int pos)
319 int i, pen = 0;
320 long cur;
321 int lwid = 0; /* current line length */
322 int swid = 0; /* amount of spaces */
323 int llen = MAX(1, FMT_LLEN(f));
324 if (pos <= 0)
325 return 0;
326 if (f->best_pos[pos] >= 0)
327 return f->best[pos];
328 i = pos - 1;
329 lwid = 0;
330 if (f->words[i].hy) /* the last word is hyphenated */
331 lwid += f->words[i].hy;
332 if (f->words[i].hy)
333 pen = n_hyp;
334 while (i >= 0) {
335 lwid += f->words[i].wid;
336 if (i + 1 < pos)
337 lwid += f->words[i + 1].gap;
338 if (i + 1 < pos && f->words[i + 1].str)
339 swid += f->words[i + 1].gap;
340 if (lwid - (swid * n_ssh / 100) > llen)
341 if (pos - i > 1)
342 break;
343 cur = fmt_findcost(f, i) + FMT_COST(lwid, llen, pen);
344 if (f->best_pos[pos] < 0 || cur < f->best[pos]) {
345 f->best_pos[pos] = i;
346 f->best_dep[pos] = f->best_dep[i] + 1;
347 f->best[pos] = cur;
349 i--;
351 return f->best[pos];
354 static int fmt_bestpos(struct fmt *f, int pos)
356 fmt_findcost(f, pos);
357 return MAX(0, f->best_pos[pos]);
360 static int fmt_bestdep(struct fmt *f, int pos)
362 fmt_findcost(f, pos);
363 return MAX(0, f->best_dep[pos]);
366 /* return the last filled word */
367 static int fmt_breakparagraph(struct fmt *f, int pos)
369 int i;
370 int best = -1;
371 int llen = FMT_LLEN(f);
372 int lwid = 0;
373 if (f->fillreq > 0 && f->fillreq <= f->nwords) {
374 fmt_findcost(f, f->fillreq);
375 return f->fillreq;
377 if (pos > 0 && f->words[pos - 1].wid >= llen) {
378 fmt_findcost(f, pos);
379 return pos;
381 i = pos - 1;
382 lwid = 0;
383 if (f->words[i].hy) /* the last word is hyphenated */
384 lwid += f->words[i].hy;
385 while (i >= 0) {
386 lwid += f->words[i].wid;
387 if (i + 1 < pos)
388 lwid += f->words[i + 1].gap;
389 if (lwid > llen && i + 1 < pos)
390 break;
391 if (best < 0 || fmt_findcost(f, i) < fmt_findcost(f, best))
392 best = i;
393 i--;
395 return best;
398 /* extract the first nreq formatted lines before the word at pos */
399 static int fmt_head(struct fmt *f, int nreq, int pos)
401 int best = pos; /* best line break for nreq-th line */
402 int prev, next; /* best line breaks without hyphenation */
403 if (nreq <= 0 || fmt_bestdep(f, pos) < nreq)
404 return pos;
405 /* finding the optimal line break for nreq-th line */
406 while (best > 0 && fmt_bestdep(f, best) > nreq)
407 best = fmt_bestpos(f, best);
408 prev = best;
409 next = best;
410 /* finding closest line breaks without hyphenation */
411 while (prev > 1 && f->words[prev - 1].hy &&
412 fmt_bestdep(f, prev - 1) == nreq)
413 prev--;
414 while (next < pos && f->words[next - 1].hy &&
415 fmt_bestdep(f, next + 1) == nreq)
416 next++;
417 /* choosing the best of them */
418 if (!f->words[prev - 1].hy && !f->words[next - 1].hy)
419 return fmt_findcost(f, prev) <= fmt_findcost(f, next) ? prev : next;
420 if (!f->words[prev - 1].hy)
421 return prev;
422 if (!f->words[next - 1].hy)
423 return next;
424 return best;
427 /* break f->words[0..end] into lines according to fmt_bestpos() */
428 static int fmt_break(struct fmt *f, int end)
430 int llen, fmt_div, fmt_rem, beg;
431 int w, i, nspc;
432 struct line *l;
433 int ret = 0;
434 beg = fmt_bestpos(f, end);
435 if (beg > 0)
436 ret += fmt_break(f, beg);
437 l = fmt_mkline(f);
438 if (!l)
439 return ret;
440 llen = FMT_LLEN(f);
441 f->words[beg].gap = 0;
442 w = fmt_wordslen(f, beg, end);
443 nspc = fmt_spaces(f, beg, end);
444 if (FMT_ADJ(f) && nspc) {
445 fmt_div = (llen - w) / nspc;
446 fmt_rem = (llen - w) % nspc;
447 if (fmt_rem < 0) {
448 fmt_div--;
449 fmt_rem += nspc;
451 for (i = beg + 1; i < end; i++)
452 if (f->words[i].str)
453 f->words[i].gap += fmt_div + (fmt_rem-- > 0);
455 l->wid = fmt_wordscopy(f, beg, end, &l->sbuf, &l->elsn, &l->elsp);
456 if (beg > 0)
457 fmt_confupdate(f);
458 return ret + (end - beg);
461 /* estimated number of lines until traps or the end of a page */
462 static int fmt_safelines(void)
464 int lnht = MAX(1, n_L) * n_v;
465 return (f_nexttrap() + lnht - 1) / lnht;
468 /* fill the words collected in the buffer */
469 int fmt_fill(struct fmt *f)
471 int nreq; /* the number of lines until a trap */
472 int end; /* the final line ends before this word */
473 int end_head; /* like end, but only the first nreq lines included */
474 int head = 0; /* only nreq first lines have been formatted */
475 int llen; /* line length, taking shrinkable spaces into account */
476 int n, i;
477 if (!FMT_FILL(f))
478 return 0;
479 llen = fmt_wordslen(f, 0, f->nwords) -
480 fmt_spacessum(f, 0, f->nwords) * n_ssh / 100;
481 /* not enough words to fill */
482 if ((f->fillreq <= 0 || f->nwords < f->fillreq) && llen <= FMT_LLEN(f))
483 return 0;
484 nreq = (n_hy & HY_LAST) ? fmt_safelines() : 0;
485 if (nreq > 0 && nreq <= fmt_nlines(f))
486 return 1;
487 /* resetting positions */
488 for (i = 0; i < f->nwords + 1; i++)
489 f->best_pos[i] = -1;
490 end = fmt_breakparagraph(f, f->nwords);
491 if (nreq > 0) {
492 end_head = fmt_head(f, nreq - fmt_nlines(f), end);
493 head = end_head < end;
494 end = end_head;
496 /* recursively add lines */
497 n = fmt_break(f, end);
498 f->nwords -= n;
499 f->fillreq -= n;
500 fmt_movewords(f, 0, n, f->nwords);
501 f->filled = n && !f->nwords;
502 if (f->nwords)
503 f->words[0].gap = 0;
504 if (f->nwords) /* apply the new .l and .i */
505 fmt_confupdate(f);
506 return head || n != end;
509 struct fmt *fmt_alloc(void)
511 struct fmt *fmt = xmalloc(sizeof(*fmt));
512 memset(fmt, 0, sizeof(*fmt));
513 return fmt;
516 void fmt_free(struct fmt *fmt)
518 free(fmt);
521 int fmt_wid(struct fmt *fmt)
523 return fmt_wordslen(fmt, 0, fmt->nwords) + fmt_wordgap(fmt);
526 int fmt_morewords(struct fmt *fmt)
528 return fmt_morelines(fmt) || fmt->nwords;
531 int fmt_morelines(struct fmt *fmt)
533 return fmt->l_head != fmt->l_tail;
536 /* suppress the last newline */
537 void fmt_suppressnl(struct fmt *fmt)
539 if (fmt->nls) {
540 fmt->nls--;
541 fmt->nls_sup = 1;