2 * line formatting buffer for line adjustment and hyphenation
4 * The line formatting buffer does two main functions: breaking
5 * words into lines (possibly after breaking them at their
6 * hyphenation points), and, if requested, adjusting the space
7 * between words in a line. In this file the first step is
8 * referred to as filling.
10 * Functions like fmt_word() return nonzero on failure, which
11 * means the call should be repeated after fetching previously
12 * formatted lines via fmt_nextline().
19 #define FMT_LLEN(f) MAX(0, (f)->ll - (f)->li - (f)->lI)
20 #define FMT_FILL(f) (!n_ce && n_u)
21 #define FMT_ADJ(f) (n_u && !n_na && !n_ce && (n_j & AD_B) == AD_B)
23 static int fmt_fillwords(struct fmt
*f
, int br
);
27 int wid
; /* word's width */
28 int elsn
, elsp
; /* els_neg and els_pos */
29 int gap
; /* the space before this word */
30 int hy
; /* hyphen width if inserted after this word */
31 int str
; /* does the space before it stretch */
32 int cost
; /* the extra cost of line break after this word */
33 int swid
; /* space width after this word (i.e., \w' ') */
45 int words_n
, words_sz
;
48 int lines_head
, lines_tail
, lines_sz
;
49 /* for paragraph adjustment */
54 int gap
; /* space before the next word */
55 int nls
; /* newlines before the next word */
56 int nls_sup
; /* suppressed newlines */
57 int li
, ll
, lI
; /* current line indentation and length */
58 int filled
; /* filled all words in the last fmt_fill() */
59 int eos
; /* last word ends a sentence */
60 int fillreq
; /* fill after the last word (\p) */
63 /* .ll, .in and .ti are delayed until the partial line is output */
64 static void fmt_confupdate(struct fmt
*f
)
67 f
->li
= n_ti
>= 0 ? n_ti
: n_i
;
68 f
->lI
= n_tI
>= 0 ? n_tI
: n_I
;
73 static int fmt_confchanged(struct fmt
*f
)
75 return f
->ll
!= n_l
|| f
->li
!= (n_ti
>= 0 ? n_ti
: n_i
) ||
76 f
->lI
!= (n_tI
>= 0 ? n_tI
: n_I
);
79 /* move words inside an fmt struct */
80 static void fmt_movewords(struct fmt
*a
, int dst
, int src
, int len
)
82 memmove(a
->words
+ dst
, a
->words
+ src
, len
* sizeof(a
->words
[0]));
85 /* move words from the buffer to s */
86 static int fmt_wordscopy(struct fmt
*f
, int beg
, int end
,
87 struct sbuf
*s
, int *els_neg
, int *els_pos
)
94 for (i
= beg
; i
< end
; i
++) {
96 sbuf_printf(s
, "%ch'%du'", c_ec
, wcur
->gap
);
97 sbuf_append(s
, wcur
->s
);
98 w
+= wcur
->wid
+ wcur
->gap
;
99 if (wcur
->elsn
< *els_neg
)
100 *els_neg
= wcur
->elsn
;
101 if (wcur
->elsp
> *els_pos
)
102 *els_pos
= wcur
->elsp
;
106 wcur
= &f
->words
[end
- 1];
108 sbuf_append(s
, "\\(hy");
114 static int fmt_nlines(struct fmt
*f
)
116 return f
->lines_head
- f
->lines_tail
;
119 /* the total width of the specified words in f->words[] */
120 static int fmt_wordslen(struct fmt
*f
, int beg
, int end
)
123 for (i
= beg
; i
< end
; i
++)
124 w
+= f
->words
[i
].wid
+ f
->words
[i
].gap
;
125 return beg
< end
? w
+ f
->words
[end
- 1].hy
: 0;
128 /* the number of stretchable spaces in f */
129 static int fmt_spaces(struct fmt
*f
, int beg
, int end
)
132 for (i
= beg
+ 1; i
< end
; i
++)
138 /* the amount of stretchable spaces in f */
139 static int fmt_spacessum(struct fmt
*f
, int beg
, int end
)
142 for (i
= beg
+ 1; i
< end
; i
++)
144 n
+= f
->words
[i
].gap
;
148 /* return the next line in the buffer */
149 char *fmt_nextline(struct fmt
*f
, int *w
,
150 int *li
, int *lI
, int *ll
, int *els_neg
, int *els_pos
)
153 if (f
->lines_head
== f
->lines_tail
)
155 l
= &f
->lines
[f
->lines_tail
++];
162 return sbuf_out(&l
->sbuf
);
165 static struct line
*fmt_mkline(struct fmt
*f
)
168 if (f
->lines_head
== f
->lines_tail
) {
172 if (f
->lines_head
== f
->lines_sz
) {
174 f
->lines
= mextend(f
->lines
, f
->lines_head
,
175 f
->lines_sz
, sizeof(f
->lines
[0]));
177 l
= &f
->lines
[f
->lines_head
++];
185 static void fmt_keshideh(struct fmt
*f
, int beg
, int end
, int wid
);
187 /* extract words from beg to end; shrink or stretch spaces if needed */
188 static int fmt_extractline(struct fmt
*f
, int beg
, int end
, int str
)
190 int fmt_div
, fmt_rem
;
191 int w
, i
, nspc
, llen
;
193 if (!(l
= fmt_mkline(f
)))
196 w
= fmt_wordslen(f
, beg
, end
);
197 if (str
&& FMT_ADJ(f
) && n_j
& AD_K
) {
198 fmt_keshideh(f
, beg
, end
, llen
- w
);
199 w
= fmt_wordslen(f
, beg
, end
);
201 nspc
= fmt_spaces(f
, beg
, end
);
202 if (nspc
&& FMT_ADJ(f
) && (llen
< w
|| str
)) {
203 fmt_div
= (llen
- w
) / nspc
;
204 fmt_rem
= (llen
- w
) % nspc
;
209 for (i
= beg
+ 1; i
< end
; i
++)
211 f
->words
[i
].gap
+= fmt_div
+ (fmt_rem
-- > 0);
213 l
->wid
= fmt_wordscopy(f
, beg
, end
, &l
->sbuf
, &l
->elsn
, &l
->elsp
);
217 static int fmt_sp(struct fmt
*f
)
219 if (fmt_fillwords(f
, 1))
221 if (fmt_extractline(f
, 0, f
->words_n
, 0))
231 /* fill as many lines as possible; if br, put the remaining words in a line */
232 int fmt_fill(struct fmt
*f
, int br
)
234 if (fmt_fillwords(f
, br
))
245 void fmt_space(struct fmt
*fmt
)
247 fmt
->gap
+= font_swid(dev_font(n_f
), n_s
, n_ss
);
250 int fmt_newline(struct fmt
*f
)
261 if (f
->nls
== 0 && !f
->filled
&& !f
->words_n
)
267 /* format the paragraph after the next word (\p) */
268 int fmt_fillreq(struct fmt
*f
)
271 if (fmt_fillwords(f
, 0))
273 f
->fillreq
= f
->words_n
+ 1;
277 static void fmt_wb2word(struct fmt
*f
, struct word
*word
, struct wb
*wb
,
278 int hy
, int str
, int gap
, int cost
)
280 int len
= strlen(wb_buf(wb
));
281 word
->s
= xmalloc(len
+ 1);
282 memcpy(word
->s
, wb_buf(wb
), len
+ 1);
283 word
->wid
= wb_wid(wb
);
284 word
->elsn
= wb
->els_neg
;
285 word
->elsp
= wb
->els_pos
;
286 word
->hy
= hy
? wb_hywid(wb
) : 0;
290 word
->swid
= wb_swid(wb
);
293 /* find explicit break positions: dashes, \:, \%, and \~ */
294 static int fmt_hyphmarks(char *word
, int *hyidx
, int *hyins
, int *hygap
)
300 while ((c
= escread(&s
, &d
)) > 0)
302 if (c
< 0 || !strcmp(c_hc
, d
))
304 while ((c
= escread(&s
, &d
)) >= 0 && n
< NHYPHSWORD
) {
306 if (!strcmp(c_hc
, d
)) {
308 hyidx
[n
++] = s
- word
;
312 hyidx
[n
++] = s
- word
;
314 if (!strcmp(c_nb
, d
)) {
316 hyidx
[n
++] = s
- word
;
321 /* cannot break the end of a word */
322 while (n
> 0 && hyidx
[n
- 1] == lastchar
)
327 static struct word
*fmt_mkword(struct fmt
*f
)
329 if (f
->words_n
== f
->words_sz
) {
331 f
->words
= mextend(f
->words
, f
->words_n
,
332 f
->words_sz
, sizeof(f
->words
[0]));
334 return &f
->words
[f
->words_n
++];
337 static void fmt_insertword(struct fmt
*f
, struct wb
*wb
, int gap
)
339 int hyidx
[NHYPHSWORD
]; /* sub-word boundaries */
340 int hyins
[NHYPHSWORD
] = {0}; /* insert dash */
341 int hygap
[NHYPHSWORD
] = {0}; /* stretchable no-break space */
342 char *src
= wb_buf(wb
);
348 n
= fmt_hyphmarks(src
, hyidx
, hyins
, hygap
);
350 fmt_wb2word(f
, fmt_mkword(f
), wb
, 0, 1, gap
, wb_cost(wb
));
353 /* update f->fillreq considering the new sub-words */
354 if (f
->fillreq
== f
->words_n
+ 1)
358 for (i
= 0; i
<= n
; i
++) {
359 int ihy
= i
< n
&& hyins
[i
]; /* dash width */
360 int istr
= i
== 0 || hygap
[i
- 1]; /* stretchable */
361 int igap
; /* gap width */
362 int icost
; /* hyphenation cost */
363 beg
= src
+ (i
> 0 ? hyidx
[i
- 1] : 0);
364 end
= src
+ (i
< n
? hyidx
[i
] : strlen(src
));
365 if (i
< n
&& hygap
[i
]) /* remove \~ */
367 wb_catstr(&wbc
, beg
, end
);
368 wb_fnszget(&wbc
, &cf
, &cs
, &cm
, &ccd
);
369 icost
= i
== n
? wb_cost(&wbc
) : hygap
[i
] * 10000000;
370 igap
= i
== 0 ? gap
: hygap
[i
- 1] * wb_swid(&wbc
);
371 fmt_wb2word(f
, fmt_mkword(f
), &wbc
, ihy
, istr
, igap
, icost
);
373 wb_fnszset(&wbc
, cf
, cs
, cm
, ccd
); /* restoring wbc */
378 /* the amount of space necessary before the next word */
379 static int fmt_wordgap(struct fmt
*f
)
381 int nls
= f
->nls
|| f
->nls_sup
;
382 int swid
= font_swid(dev_font(n_f
), n_s
, n_ss
);
383 if (f
->eos
&& f
->words_n
)
384 if ((nls
&& !f
->gap
) || (!nls
&& f
->gap
== 2 * swid
))
385 return swid
+ font_swid(dev_font(n_f
), n_s
, n_sss
);
386 return (nls
&& !f
->gap
&& f
->words_n
) ? swid
: f
->gap
;
389 /* insert wb into fmt */
390 int fmt_word(struct fmt
*f
, struct wb
*wb
)
394 if (fmt_confchanged(f
))
395 if (fmt_fillwords(f
, 0))
397 if (FMT_FILL(f
) && f
->nls
&& f
->gap
)
400 if (!f
->words_n
) /* apply the new .l and .i */
402 f
->gap
= fmt_wordgap(f
);
404 fmt_insertword(f
, wb
, f
->filled
? 0 : f
->gap
);
412 /* insert keshideh characters */
413 static void fmt_keshideh(struct fmt
*f
, int beg
, int end
, int wid
)
416 int kw
, i
= 0, c
= 0;
421 for (c
= 0; c
< 2; c
++) {
422 for (i
= end
- 1 - c
; i
>= beg
; i
-= 2) {
425 kw
= wb_keshideh(w
->s
, &wb
, wid
);
428 w
->s
= xmalloc(strlen(wb_buf(&wb
)) + 1);
429 strcpy(w
->s
, wb_buf(&wb
));
430 w
->wid
= wb_wid(&wb
);
440 /* approximate 8 * sqrt(cost) */
441 static long scaledown(long cost
)
445 for (i
= 0; i
< 14; i
++)
446 ret
+= ((cost
>> (i
* 2)) & 3) << (i
+ 3);
447 return ret
< (1 << 13) ? ret
: (1 << 13);
450 /* the cost of putting lwid words in a line of length llen */
451 static long FMT_COST(int llen
, int lwid
, int swid
, int nspc
)
453 /* the ratio that the stretchable spaces of the line should be spread */
454 long ratio
= labs((llen
- lwid
) * 100l / (swid
? swid
: 1));
455 /* ratio too large; scaling it down */
457 ratio
= 4000 + scaledown(ratio
- 4000);
458 /* assigning a cost of 100 to each space stretching 100 percent */
459 return ratio
* ratio
/ 100l * (nspc
? nspc
: 1);
462 /* the number of hyphenations in consecutive lines ending at pos */
463 static int fmt_hydepth(struct fmt
*f
, int pos
)
466 while (pos
> 0 && f
->words
[pos
- 1].hy
&& ++n
< 5)
467 pos
= f
->best_pos
[pos
];
471 static long hycost(int depth
)
473 if (n_hlm
> 0 && depth
> n_hlm
)
476 return n_hycost
+ n_hycost2
+ n_hycost3
;
478 return n_hycost
+ n_hycost2
;
479 return depth
? n_hycost
: 0;
482 /* the cost of putting a line break before word pos */
483 static long fmt_findcost(struct fmt
*f
, int pos
)
487 int llen
= MAX(1, FMT_LLEN(f
));
488 int lwid
= 0; /* current line length */
489 int swid
= 0; /* amount of stretchable spaces */
490 int nspc
= 0; /* number of stretchable spaces */
491 int dwid
= 0; /* equal to swid, unless swid is zero */
494 if (f
->best_pos
[pos
] >= 0)
495 return f
->best
[pos
] + f
->words
[pos
- 1].cost
;
496 lwid
= f
->words
[pos
- 1].hy
; /* non-zero if the last word is hyphenated */
497 hyphenated
= f
->words
[pos
- 1].hy
!= 0;
500 lwid
+= f
->words
[i
].wid
;
502 lwid
+= f
->words
[i
+ 1].gap
;
503 if (i
+ 1 < pos
&& f
->words
[i
+ 1].str
) {
504 swid
+= f
->words
[i
+ 1].gap
;
507 if (lwid
> llen
+ swid
* n_ssh
/ 100 && i
+ 1 < pos
)
510 if (!dwid
&& i
> 0) /* no stretchable spaces */
511 dwid
= f
->words
[i
- 1].swid
;
512 cur
= fmt_findcost(f
, i
) + FMT_COST(llen
, lwid
, dwid
, nspc
);
514 cur
+= hycost(1 + fmt_hydepth(f
, i
));
515 if (f
->best_pos
[pos
] < 0 || cur
< f
->best
[pos
]) {
516 f
->best_pos
[pos
] = i
;
517 f
->best_dep
[pos
] = f
->best_dep
[i
] + 1;
522 return f
->best
[pos
] + f
->words
[pos
- 1].cost
;
525 static int fmt_bestpos(struct fmt
*f
, int pos
)
527 fmt_findcost(f
, pos
);
528 return MAX(0, f
->best_pos
[pos
]);
531 static int fmt_bestdep(struct fmt
*f
, int pos
)
533 fmt_findcost(f
, pos
);
534 return MAX(0, f
->best_dep
[pos
]);
537 /* return the last filled word */
538 static int fmt_breakparagraph(struct fmt
*f
, int pos
, int br
)
542 long cost
, best_cost
= 0;
543 int llen
= FMT_LLEN(f
);
544 int lwid
= 0; /* current line length */
545 int swid
= 0; /* amount of stretchable spaces */
546 int nspc
= 0; /* number of stretchable spaces */
547 if (f
->fillreq
> 0 && f
->fillreq
<= f
->words_n
) {
548 fmt_findcost(f
, f
->fillreq
);
551 if (pos
> 0 && f
->words
[pos
- 1].wid
>= llen
) {
552 fmt_findcost(f
, pos
);
557 if (f
->words
[i
].hy
) /* the last word is hyphenated */
558 lwid
+= f
->words
[i
].hy
;
560 lwid
+= f
->words
[i
].wid
;
562 lwid
+= f
->words
[i
+ 1].gap
;
563 if (i
+ 1 < pos
&& f
->words
[i
+ 1].str
) {
564 swid
+= f
->words
[i
+ 1].gap
;
567 if (lwid
> llen
&& i
+ 1 < pos
)
569 cost
= fmt_findcost(f
, i
);
570 /* the cost of formatting short lines; should prevent widows */
571 if (br
&& n_pmll
&& lwid
< llen
* n_pmll
/ 100) {
572 int pmll
= llen
* n_pmll
/ 100;
573 cost
+= (long) n_pmllcost
* (pmll
- lwid
) / pmll
;
575 if (best
< 0 || cost
< best_cost
) {
584 /* extract the first nreq formatted lines before the word at pos */
585 static int fmt_head(struct fmt
*f
, int nreq
, int pos
, int nohy
)
587 int best
= pos
; /* best line break for nreq-th line */
588 int prev
, next
; /* best line breaks without hyphenation */
589 if (nreq
<= 0 || fmt_bestdep(f
, pos
) < nreq
)
591 /* finding the optimal line break for nreq-th line */
592 while (best
> 0 && fmt_bestdep(f
, best
) > nreq
)
593 best
= fmt_bestpos(f
, best
);
598 /* finding closest line breaks without hyphenation */
599 while (prev
> 1 && f
->words
[prev
- 1].hy
&&
600 fmt_bestdep(f
, prev
- 1) == nreq
)
602 while (next
< pos
&& f
->words
[next
- 1].hy
&&
603 fmt_bestdep(f
, next
) == nreq
)
605 /* choosing the best of them */
606 if (!f
->words
[prev
- 1].hy
&& !f
->words
[next
- 1].hy
)
607 return fmt_findcost(f
, prev
) <= fmt_findcost(f
, next
) ? prev
: next
;
608 if (!f
->words
[prev
- 1].hy
)
610 if (!f
->words
[next
- 1].hy
)
615 /* break f->words[0..end] into lines according to fmt_bestpos() */
616 static int fmt_break(struct fmt
*f
, int end
)
619 beg
= fmt_bestpos(f
, end
);
621 ret
+= fmt_break(f
, beg
);
622 f
->words
[beg
].gap
= 0;
623 if (fmt_extractline(f
, beg
, end
, 1))
627 return ret
+ (end
- beg
);
630 /* estimated number of lines until traps or the end of a page */
631 static int fmt_safelines(void)
633 int lnht
= MAX(1, n_L
) * n_v
;
634 return n_v
> 0 ? (f_nexttrap() + lnht
- 1) / lnht
: 1000;
637 /* fill the words collected in the buffer */
638 static int fmt_fillwords(struct fmt
*f
, int br
)
640 int nreq
; /* the number of lines until a trap */
641 int end
; /* the final line ends before this word */
642 int end_head
; /* like end, but only the first nreq lines included */
643 int head
= 0; /* only nreq first lines have been formatted */
644 int llen
; /* line length, taking shrinkable spaces into account */
648 llen
= fmt_wordslen(f
, 0, f
->words_n
) -
649 fmt_spacessum(f
, 0, f
->words_n
) * n_ssh
/ 100;
650 /* not enough words to fill */
651 if ((f
->fillreq
<= 0 || f
->words_n
< f
->fillreq
) && llen
<= FMT_LLEN(f
))
653 /* lines until a trap or page end */
654 nreq
= fmt_safelines();
655 /* if line settings are changed, output a single line */
656 if (fmt_confchanged(f
))
658 /* enough lines are collected already */
659 if (nreq
> 0 && nreq
<= fmt_nlines(f
))
661 /* resetting positions */
662 f
->best
= malloc((f
->words_n
+ 1) * sizeof(f
->best
[0]));
663 f
->best_pos
= malloc((f
->words_n
+ 1) * sizeof(f
->best_pos
[0]));
664 f
->best_dep
= malloc((f
->words_n
+ 1) * sizeof(f
->best_dep
[0]));
665 memset(f
->best
, 0, (f
->words_n
+ 1) * sizeof(f
->best
[0]));
666 memset(f
->best_dep
, 0, (f
->words_n
+ 1) * sizeof(f
->best_dep
[0]));
667 for (i
= 0; i
< f
->words_n
+ 1; i
++)
669 end
= fmt_breakparagraph(f
, f
->words_n
, br
);
671 int nohy
= 0; /* do not hyphenate the last line */
672 if (n_hy
& HY_LAST
&& nreq
== fmt_nlines(f
))
674 end_head
= fmt_head(f
, nreq
- fmt_nlines(f
), end
, nohy
);
675 head
= end_head
< end
;
678 /* recursively add lines */
679 n
= end
> 0 ? fmt_break(f
, end
) : 0;
682 fmt_movewords(f
, 0, n
, f
->words_n
);
683 f
->filled
= n
&& !f
->words_n
;
686 if (f
->words_n
) /* apply the new .l and .i */
694 return head
|| n
!= end
;
697 struct fmt
*fmt_alloc(void)
699 struct fmt
*fmt
= xmalloc(sizeof(*fmt
));
700 memset(fmt
, 0, sizeof(*fmt
));
704 void fmt_free(struct fmt
*fmt
)
711 int fmt_wid(struct fmt
*fmt
)
713 return fmt_wordslen(fmt
, 0, fmt
->words_n
) + fmt_wordgap(fmt
);
716 int fmt_morewords(struct fmt
*fmt
)
718 return fmt_morelines(fmt
) || fmt
->words_n
;
721 int fmt_morelines(struct fmt
*fmt
)
723 return fmt
->lines_head
!= fmt
->lines_tail
;
726 /* suppress the last newline */
727 void fmt_suppressnl(struct fmt
*fmt
)