hyph: allow utf-8 hyphenation patterns
[neatroff.git] / roff.h
blobf7c7652b52dc71e1bb68dbab5fc4801274af5a64
1 /*
2 * Most functions and variables in neatroff are prefixed with tokens
3 * that indicate their purpose, such as:
5 * + tr_xyz: the implementation of troff request .xyz (mostly tr.c)
6 * + in_xyz: input layer (in.c)
7 * + cp_xyz: copy-mode interpretation layer (cp.c)
8 * + ren_xyz: rendering characters into lines (ren.c)
9 * + out_xyz: output layer for generating troff output (out.c)
10 * + dev_xyz: output devices (dev.c)
11 * + num_xyz: number registers (reg.c)
12 * + str_xyz: string registers (reg.c)
13 * + env_xyz: environments (reg.c)
14 * + eval_xyz: integer expression evaluation (eval.c)
15 * + font_xyz: fonts (font.c)
16 * + sbuf_xyz: variable length string buffers (sbuf.c)
17 * + wb_xyz: word buffers (wb.c)
18 * + fmt_xyz: line formatting buffers (fmt.c)
19 * + n_xyz: builtin number register xyz
20 * + c_xyz: characters for requests like hc and mc
24 /* predefined array limits */
25 #define PATHLEN 1024 /* path length */
26 #define NFILES 16 /* number of input files */
27 #define NFONTS 32 /* number of fonts */
28 #define NGLYPHS 1024 /* glyphs in fonts */
29 #define NLIGS 128 /* number of font ligatures */
30 #define NKERNS 1024 /* number of font kerning pairs */
31 #define FNLEN 32 /* font name length */
32 #define NMLEN 32 /* macro/register/environment/glyph name length */
33 #define GNLEN NMLEN /* glyph name length */
34 #define RNLEN NMLEN /* register/macro name */
35 #define ILNLEN 1000 /* line limit of input files */
36 #define LNLEN 4000 /* line buffer length (ren.c/out.c) */
37 #define NWORDS 1024 /* number of queued words in formatting buffer */
38 #define NLINES 32 /* number of queued lines in formatting buffer */
39 #define NARGS 16 /* number of macro arguments */
40 #define NPREV 16 /* environment stack depth */
41 #define NTRAPS 1024 /* number of traps per page */
42 #define NIES 128 /* number of nested .ie commands */
43 #define NTABS 16 /* number of tab stops */
44 #define NCMAPS 512 /* number of character translations (.tr) */
45 #define NSSTR 32 /* number of nested sstr_push() calls */
46 #define NFIELDS 32 /* number of fields */
47 #define MAXFRAC 100000 /* maximum value of the fractional part */
48 #define LIGLEN 4 /* length of ligatures */
49 #define NCDEFS 128 /* number of character definitions (.char) */
50 #define NHYPHS 16384 /* hyphenation dictionary/patterns (.hw) */
51 #define NHYPHSWORD 16 /* number of hyphenations per word */
53 /* converting scales */
54 #define SC_IN (dev_res) /* inch in units */
55 #define SC_PT (SC_IN / 72) /* point in units */
56 #define SC_EM (n_s * SC_IN / 72)
58 /* escape sequences */
59 #define ESC_Q "bCDhHlLNoRSvwxX?" /* \X'ccc' quoted escape sequences */
60 #define ESC_P "*fgkmns" /* \Xc \X(cc \X[ccc] escape sequences */
62 #define MIN(a, b) ((a) < (b) ? (a) : (b))
63 #define MAX(a, b) ((a) < (b) ? (b) : (a))
64 #define LEN(a) (sizeof(a) / sizeof((a)[0]))
66 /* special characters */
67 extern int c_ec; /* escape character (\) */
68 extern int c_cc; /* basic control character (.) */
69 extern int c_c2; /* no-break control character (') */
70 #define c_ni 4 /* non-interpreted copy-mode escape */
71 #define c_hc env_hc()/* hyphenation character */
72 #define c_mc env_mc()/* margin character (.mc) */
73 #define c_tc env_tc()
74 #define c_lc env_lc()
75 #define c_bp "\\:" /* zero-width word break point */
77 /* number registers */
78 int num_get(int id, int inc);
79 void num_set(int id, int val);
80 void num_inc(int id, int val);
81 void num_del(int id);
82 char *num_str(int id);
83 char *num_getfmt(int id);
84 void num_setfmt(int id, char *fmt);
85 int *nreg(int id);
86 int eval(char *s, int unit);
87 int eval_up(char **s, int unit);
88 int eval_re(char *s, int orig, int unit);
90 /* string registers */
91 void str_set(int id, char *s);
92 void str_dset(int id, void *d);
93 char *str_get(int id);
94 void *str_dget(int id);
95 void str_rm(int id);
96 void str_rn(int src, int dst);
98 /* saving and restoring registers before and after printing diverted lines */
99 void odiv_beg(void);
100 void odiv_end(void);
102 /* enviroments */
103 void env_init(void);
104 void env_done(void);
105 struct fmt *env_fmt(void);
106 struct wb *env_wb(void);
107 char *env_hc(void);
108 char *env_mc(void);
109 char *env_tc(void);
110 char *env_lc(void);
111 int tab_next(int pos);
112 int tab_type(int pos);
114 /* device related variables */
115 extern int dev_res;
116 extern int dev_uwid;
117 extern int dev_hor;
118 extern int dev_ver;
120 struct glyph {
121 char id[GNLEN]; /* device-dependent glyph identifier */
122 char name[GNLEN]; /* the first character mapped to this glyph */
123 struct font *font; /* glyph font */
124 int wid; /* character width */
125 int type; /* character type; ascender/descender */
126 int llx, lly, urx, ury; /* character bounding box */
129 struct font {
130 char name[FNLEN];
131 char fontname[FNLEN];
132 struct glyph glyphs[NGLYPHS];
133 int nglyphs;
134 int spacewid;
135 int special;
136 int cs, bd; /* for .cs and .bd requests */
137 /* charset section characters */
138 char c[NGLYPHS][GNLEN]; /* character names in charset */
139 struct glyph *g[NGLYPHS]; /* character glyphs in charset */
140 struct glyph *g_map[NGLYPHS]; /* character remapped via font_map() */
141 int n; /* number of characters in charset */
142 /* glyph table based on the first character of their id fields in glyphs[] */
143 int ghead[256]; /* glyph list heads */
144 int gnext[NGLYPHS]; /* next item in glyph lists */
145 /* character table based on the first character of glyph names in c[] */
146 int chead[256]; /* character list heads */
147 int cnext[NGLYPHS]; /* next item in character lists */
148 /* font ligatures (lg*) */
149 char lg[NLIGS][LIGLEN * GNLEN]; /* ligatures */
150 int lgn; /* number of ligatures in lg[] */
151 /* kerning pair table per glyph (kn*) */
152 int knhead[NGLYPHS]; /* kerning pairs of glyphs[] */
153 int knnext[NKERNS]; /* next item in knhead[] list */
154 int knpair[NKERNS]; /* kerning pair 2nd glyphs */
155 int knval[NKERNS]; /* font pairwise kerning value */
156 int knn; /* number of kerning pairs */
159 /* output device functions */
160 int dev_open(char *dir, char *dev);
161 void dev_close(void);
162 int dev_mnt(int pos, char *id, char *name);
163 int dev_pos(char *id);
164 struct font *dev_font(int pos);
165 int dev_fontpos(struct font *fn);
166 void dev_setcs(int fn, int cs);
167 int dev_getcs(int fn);
168 void dev_setbd(int fn, int bd);
169 int dev_getbd(int fn);
171 /* font-related functions */
172 struct font *font_open(char *path);
173 void font_close(struct font *fn);
174 struct glyph *font_glyph(struct font *fn, char *id);
175 struct glyph *font_find(struct font *fn, char *name);
176 int font_lig(struct font *fn, char **c, int n);
177 int font_kern(struct font *fn, char *c1, char *c2);
178 int font_islig(struct font *fn, char *s);
179 int font_map(struct font *fn, char *name, struct glyph *gl);
180 int font_mapped(struct font *fn, char *name);
182 /* glyph handling functions */
183 struct glyph *dev_glyph(char *c, int fn);
184 int charwid(int fn, int sz, int wid);
186 /* convert wid in device unitwidth size to size sz */
187 #define DEVWID(sz, wid) (((wid) * (sz) + (dev_uwid / 2)) / dev_uwid)
188 /* the amount of word and sentence space for the given font and size */
189 #define N_SS(fn, sz) (charwid((fn), (sz), (dev_font(fn)->spacewid * n_ss + 6) / 12))
190 #define N_SSS(fn, sz) (charwid((fn), (sz), (dev_font(fn)->spacewid * n_sss + 6) / 12))
192 /* different layers of neatroff */
193 int in_next(void); /* input layer */
194 int cp_next(void); /* copy-mode layer */
195 int tr_next(void); /* troff layer */
197 void in_push(char *s, char **args);
198 void in_pushnl(char *s, char **args);
199 void in_so(char *path); /* .so request */
200 void in_nx(char *path); /* .nx request */
201 void in_ex(void); /* .ex request */
202 void in_lf(char *path, int ln); /* .lf request */
203 void in_queue(char *path); /* queue the given input file */
204 char *in_arg(int i); /* look up argument */
205 int in_nargs(void); /* number of arguments */
206 void in_back(int c); /* push back input character */
207 int in_top(void); /* the first pushed-back character */
208 char *in_filename(void); /* current filename */
209 int in_lnum(void); /* current line number */
211 void cp_blk(int skip); /* skip or read the next line or block */
212 void cp_copymode(int mode); /* do not interpret \w and \E */
213 #define cp_back in_back /* cp.c is stateless */
214 int tr_nextreq(void); /* read the next troff request */
216 /* variable length string buffer */
217 struct sbuf {
218 char *s; /* allocated buffer */
219 int sz; /* buffer size */
220 int n; /* length of the string stored in s */
223 void sbuf_init(struct sbuf *sbuf);
224 void sbuf_done(struct sbuf *sbuf);
225 char *sbuf_buf(struct sbuf *sbuf);
226 void sbuf_add(struct sbuf *sbuf, int c);
227 void sbuf_append(struct sbuf *sbuf, char *s);
228 void sbuf_printf(struct sbuf *sbuf, char *s, ...);
229 void sbuf_putnl(struct sbuf *sbuf);
230 void sbuf_cut(struct sbuf *sbuf, int n);
231 int sbuf_len(struct sbuf *sbuf);
232 int sbuf_empty(struct sbuf *sbuf);
234 /* word buffer */
235 struct wb {
236 struct sbuf sbuf;
237 int f, s, m; /* the last output font and size */
238 int r_f, r_s, r_m; /* current font and size; use n_f and n_s if -1 */
239 int part; /* partial input (\c) */
240 int els_neg, els_pos; /* extra line spacing */
241 int h, v; /* buffer vertical and horizontal positions */
242 int ct, sb, st; /* \w registers */
243 int llx, lly, urx, ury; /* bounding box */
244 int icleft_ll; /* len after the pending left italic correction */
245 /* saving previous characters added via wb_put() */
246 char prev_c[LIGLEN][GNLEN];
247 int prev_l[LIGLEN]; /* sbuf_len(&wb->sbuf) before wb_put() calls */
248 int prev_h[LIGLEN]; /* wb->h before wb_put() calls */
249 int prev_n; /* number of characters in prev_c[] */
250 int prev_ll; /* sbuf_len(&wb->sbuf) after the last wb_put() */
253 void wb_init(struct wb *wb);
254 void wb_done(struct wb *wb);
255 void wb_hmov(struct wb *wb, int n);
256 void wb_vmov(struct wb *wb, int n);
257 void wb_els(struct wb *wb, int els);
258 void wb_etc(struct wb *wb, char *x);
259 void wb_put(struct wb *wb, char *c);
260 void wb_putexpand(struct wb *wb, char *c);
261 int wb_part(struct wb *wb);
262 void wb_setpart(struct wb *wb);
263 void wb_drawl(struct wb *wb, int c, int h, int v);
264 void wb_drawc(struct wb *wb, int c, int r);
265 void wb_drawe(struct wb *wb, int c, int h, int v);
266 void wb_drawa(struct wb *wb, int c, int h1, int v1, int h2, int v2);
267 void wb_drawxbeg(struct wb *wb, int c);
268 void wb_drawxdot(struct wb *wb, int h, int v);
269 void wb_drawxend(struct wb *wb);
270 void wb_italiccorrection(struct wb *wb);
271 void wb_italiccorrectionleft(struct wb *wb);
272 void wb_cat(struct wb *wb, struct wb *src);
273 void wb_catstr(struct wb *wb, char *beg, char *end);
274 int wb_hyphmark(char *word, int *hyidx, int *hyins);
275 int wb_hyph(char *word, int *hyidx, int flg);
276 int wb_wid(struct wb *wb);
277 int wb_dashwid(struct wb *wb);
278 int wb_empty(struct wb *wb);
279 int wb_eos(struct wb *wb);
280 void wb_wconf(struct wb *wb, int *ct, int *st, int *sb,
281 int *llx, int *lly, int *urx, int *ury);
282 int wb_lig(struct wb *wb, char *c);
283 int wb_kern(struct wb *wb, char *c);
284 void wb_reset(struct wb *wb);
285 char *wb_buf(struct wb *wb);
286 void wb_fnszget(struct wb *wb, int *fn, int *sz, int *m);
287 void wb_fnszset(struct wb *wb, int fn, int sz, int m);
289 /* character translation (.tr) */
290 void cmap_add(char *c1, char *c2);
291 char *cmap_map(char *c);
292 /* character definition (.char) */
293 char *cdef_map(char *c, int fn);
294 int cdef_expand(struct wb *wb, char *c, int fn);
296 /* hyphenation flags */
297 #define HY_LAST 0x02 /* do not hyphenate last lines */
298 #define HY_FINAL2 0x04 /* do not hyphenate the final two characters */
299 #define HY_FIRST2 0x08 /* do not hyphenate the first two characters */
301 void hyphenate(char *hyphs, char *word, int flg);
303 /* adjustment types */
304 #define AD_C 0 /* center */
305 #define AD_L 1 /* adjust left margin (flag) */
306 #define AD_R 2 /* adjust right margin (flag) */
307 #define AD_B 3 /* adjust both margin (mask) */
308 #define AD_P 4 /* paragraph-at-once adjustment (flag) */
310 /* line formatting */
311 struct fmt *fmt_alloc(void);
312 void fmt_free(struct fmt *fmt);
313 int fmt_wid(struct fmt *fmt);
314 void fmt_space(struct fmt *fmt);
315 void fmt_suppressnl(struct fmt *fmt);
316 int fmt_word(struct fmt *fmt, struct wb *wb);
317 int fmt_newline(struct fmt *fmt);
318 int fmt_fillreq(struct fmt *f);
319 int fmt_br(struct fmt *fmt);
320 int fmt_fill(struct fmt *fmt);
321 int fmt_morelines(struct fmt *fmt);
322 int fmt_morewords(struct fmt *fmt);
323 int fmt_nextline(struct fmt *fmt, struct sbuf *sbuf, int *w,
324 int *li, int *ll, int *els_neg, int *els_pos);
326 /* rendering */
327 int render(void); /* the main loop */
328 int ren_parse(struct wb *wb, char *c);
329 int ren_char(struct wb *wb, int (*next)(void), void (*back)(int));
330 int ren_wid(int (*next)(void), void (*back)(int));
331 void ren_tl(int (*next)(void), void (*back)(int));
332 void ren_hline(struct wb *wb, int l, char *c); /* horizontal line */
333 void ren_hlcmd(struct wb *wb, char *arg); /* \l */
334 void ren_vlcmd(struct wb *wb, char *arg); /* \L */
335 void ren_bcmd(struct wb *wb, char *arg); /* \b */
336 void ren_ocmd(struct wb *wb, char *arg); /* \o */
337 void ren_dcmd(struct wb *wb, char *arg); /* \D */
339 /* out.c */
340 void out_line(char *s); /* output rendered line */
341 void out(char *s, ...); /* output troff cmd */
343 /* troff commands */
344 void tr_ab(char **args);
345 void tr_bp(char **args);
346 void tr_br(char **args);
347 void tr_ce(char **args);
348 void tr_ch(char **args);
349 void tr_cl(char **args);
350 void tr_di(char **args);
351 void tr_divbeg(char **args);
352 void tr_divend(char **args);
353 void tr_dt(char **args);
354 void tr_em(char **args);
355 void tr_ev(char **args);
356 void tr_fc(char **args);
357 void tr_fi(char **args);
358 void tr_fp(char **args);
359 void tr_fspecial(char **args);
360 void tr_ft(char **args);
361 void tr_hpf(char **args);
362 void tr_hpfa(char **args);
363 void tr_hw(char **args);
364 void tr_in(char **args);
365 void tr_ll(char **args);
366 void tr_mk(char **args);
367 void tr_ne(char **args);
368 void tr_nf(char **args);
369 void tr_ns(char **args);
370 void tr_os(char **args);
371 void tr_pn(char **args);
372 void tr_ps(char **args);
373 void tr_rs(char **args);
374 void tr_rt(char **args);
375 void tr_sp(char **args);
376 void tr_sv(char **args);
377 void tr_ta(char **args);
378 void tr_ti(char **args);
379 void tr_wh(char **args);
380 void tr_popren(char **args);
382 void tr_init(void);
383 int tr_readargs(char **args, struct sbuf *sbuf,
384 int (*next)(void), void (*back)(int));
386 /* helpers */
387 void errmsg(char *msg, ...);
388 void errdie(char *msg);
389 int utf8len(int c);
390 int utf8next(char *s, int (*next)(void));
391 int utf8read(char **s, char *d);
392 int utf8one(char *s);
393 int charnext(char *c, int (*next)(void), void (*back)(int));
394 int charread(char **s, char *c);
395 int charnext_delim(char *c, int (*next)(void), void (*back)(int), char *delim);
396 int charread_delim(char **s, char *c, char *delim);
397 void charnext_str(char *d, char *c);
398 void argnext(char *d, int cmd, int (*next)(void), void (*back)(int));
399 void argread(char **sp, char *d, int cmd);
400 int escread(char **s, char *d);
401 /* string streams; nested next()/back() interface for string buffers */
402 void sstr_push(char *s);
403 char *sstr_pop(void);
404 int sstr_next(void);
405 void sstr_back(int c);
407 /* internal commands */
408 #define TR_DIVBEG "\07<" /* diversion begins */
409 #define TR_DIVEND "\07>" /* diversion ends */
410 #define TR_POPREN "\07P" /* exit render_rec() */
412 /* mapping register, macro and environment names to indices */
413 #define NREGS 4096 /* maximum number of mapped names */
414 #define DOTMAP(c2) (c2) /* optimized mapping for ".x" names */
416 int map(char *s); /* map name s to an index */
417 char *map_name(int id); /* return the name mapped to id */
419 /* colors */
420 #define CLR_R(c) (((c) >> 16) & 0xff)
421 #define CLR_G(c) (((c) >> 8) & 0xff)
422 #define CLR_B(c) ((c) & 0xff)
423 #define CLR_RGB(r, g, b) (((r) << 16) | ((g) << 8) | (b))
425 char *clr_str(int c);
426 int clr_get(char *s);
428 /* builtin number registers; n_X for .X register */
429 #define n_a (*nreg(DOTMAP('a')))
430 #define n_cp (*nreg(DOTMAP('C')))
431 #define n_d (*nreg(DOTMAP('d')))
432 #define n_f (*nreg(DOTMAP('f')))
433 #define n_h (*nreg(DOTMAP('h')))
434 #define n_i (*nreg(DOTMAP('i')))
435 #define n_it (*nreg(map(".it"))) /* .it trap macro */
436 #define n_itn (*nreg(map(".itn"))) /* .it lines left */
437 #define n_j (*nreg(DOTMAP('j')))
438 #define n_l (*nreg(DOTMAP('l')))
439 #define n_L (*nreg(DOTMAP('L')))
440 #define n_n (*nreg(DOTMAP('n')))
441 #define n_nI (*nreg(map(".nI"))) /* i for .nm */
442 #define n_nm (*nreg(map(".nm"))) /* .nm enabled */
443 #define n_nM (*nreg(map(".nM"))) /* m for .nm */
444 #define n_nn (*nreg(map(".nn"))) /* remaining .nn */
445 #define n_nS (*nreg(map(".nS"))) /* s for .nm */
446 #define n_m (*nreg(DOTMAP('m')))
447 #define n_mc (*nreg(map(".mc"))) /* .mc enabled */
448 #define n_mcn (*nreg(map(".mcn"))) /* .mc distance */
449 #define n_o (*nreg(DOTMAP('o')))
450 #define n_p (*nreg(DOTMAP('p')))
451 #define n_s (*nreg(DOTMAP('s')))
452 #define n_u (*nreg(DOTMAP('u')))
453 #define n_v (*nreg(DOTMAP('v')))
454 #define n_ct (*nreg(map("ct")))
455 #define n_dl (*nreg(map("dl")))
456 #define n_dn (*nreg(map("dn")))
457 #define n_ln (*nreg(map("ln")))
458 #define n_nl (*nreg(map("nl")))
459 #define n_sb (*nreg(map("sb")))
460 #define n_st (*nreg(map("st")))
461 #define n_pg (*nreg(map("%"))) /* % */
462 #define n_lb (*nreg(map(".b0"))) /* input line beg */
463 #define n_ce (*nreg(map(".ce"))) /* .ce remaining */
464 #define n_f0 (*nreg(map(".f0"))) /* last .f */
465 #define n_lg (*nreg(map(".lg"))) /* .lg mode */
466 #define n_hy (*nreg(map(".hy"))) /* .hy mode */
467 #define n_hyp (*nreg(map(".hyp"))) /* hyphenation penalty */
468 #define n_i0 (*nreg(map(".i0"))) /* last .i */
469 #define n_ti (*nreg(map(".ti"))) /* pending .ti */
470 #define n_kn (*nreg(map(".kern"))) /* .kn mode */
471 #define n_l0 (*nreg(map(".l0"))) /* last .l */
472 #define n_L0 (*nreg(map(".L0"))) /* last .L */
473 #define n_m0 (*nreg(map(".m0"))) /* last .m */
474 #define n_mk (*nreg(map(".mk"))) /* .mk internal register */
475 #define n_na (*nreg(map(".na"))) /* .na mode */
476 #define n_ns (*nreg(map(".ns"))) /* .ns mode */
477 #define n_o0 (*nreg(map(".o0"))) /* last .o */
478 #define n_ss (*nreg(map(".ss"))) /* word space (.ss) */
479 #define n_sss (*nreg(map(".sss"))) /* sentence space (.ss) */
480 #define n_ssh (*nreg(map(".ssh"))) /* word space compression (.ssh) */
481 #define n_s0 (*nreg(map(".s0"))) /* last .s */
482 #define n_sv (*nreg(map(".sv"))) /* .sv value */
483 #define n_lt (*nreg(map(".lt"))) /* .lt value */
484 #define n_t0 (*nreg(map(".lt0"))) /* previous .lt value */
485 #define n_v0 (*nreg(map(".v0"))) /* last .v */
486 #define n_llx (*nreg(map("bbllx"))) /* \w bounding box */
487 #define n_lly (*nreg(map("bblly"))) /* \w bounding box */
488 #define n_urx (*nreg(map("bburx"))) /* \w bounding box */
489 #define n_ury (*nreg(map("bbury"))) /* \w bounding box */
491 /* functions for implementing read-only registers */
492 int f_nexttrap(void); /* .t */
493 int f_divreg(void); /* .z */
494 int f_hpos(void); /* .k */