font: unmap glyphs with .fmap
[neatroff.git] / hyph.c
blob4d5e1f150e20aafa80552f4c48332492cc0aae44
1 /* hyphenation */
2 #include <ctype.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include "roff.h"
6 #include "hyen.h"
8 #define HYPATLEN (NHYPHS * 16) /* hyphenation pattern length */
10 static void hcode_strcpy(char *d, char *s, int *map, int dots);
11 static int hcode_mapchar(char *s);
13 /* the hyphenation dictionary (.hw) */
15 static char hwword[HYPATLEN]; /* buffer for .hw words */
16 static char hwhyph[HYPATLEN]; /* buffer for .hw hyphenations */
17 static int hwword_len; /* used hwword[] length */
18 static struct dict *hwdict; /* map words to their index in hwoff[] */
19 static int hwoff[NHYPHS]; /* the offset of words in hwword[] */
20 static int hw_n; /* the number of dictionary words */
22 /* read a single character from s into d; return the number of characters read */
23 static int hy_cget(char *d, char *s)
25 int i = 0;
26 if (s[0] != '\\')
27 return utf8read(&s, d);
28 if (s[1] == '[') {
29 s += 2;
30 while (*s && *s != ']' && i < GNLEN - 1)
31 d[i++] = *s++;
32 d[i] = '\0';
33 return *s ? i + 3 : i + 2;
35 if (s[1] == '(') {
36 s += 2;
37 i += utf8read(&s, d + i);
38 i += utf8read(&s, d + i);
39 return 2 + i;
41 if (s[1] == 'C') {
42 int q = s[2];
43 s += 3;
44 while (*s && *s != q && i < GNLEN - 1)
45 d[i++] = *s++;
46 d[i] = '\0';
47 return *s ? i + 4 : i + 3;
49 *d++ = *s++;
50 return 1 + utf8read(&s, d);
53 /* append character s to d; return the number of characters written */
54 int hy_cput(char *d, char *s)
56 if (!s[0] || !s[1] || utf8one(s))
57 strcpy(d, s);
58 else if (s[0] == '\\')
59 strcpy(d, s);
60 else if (!s[2])
61 snprintf(d, GNLEN, "\\[%s]", s);
62 return strlen(d);
65 /* insert word s into hwword[] and hwhyph[] */
66 static void hw_add(char *s)
68 char *p = hwword + hwword_len;
69 char *n = hwhyph + hwword_len;
70 int len = strlen(s) + 1;
71 int i = 0, c;
72 if (hw_n == NHYPHS || hwword_len + len > sizeof(hwword))
73 return;
74 memset(n, 0, len);
75 while ((c = (unsigned char) *s++)) {
76 if (c == '-')
77 n[i] = 1;
78 else
79 p[i++] = c;
81 p[i] = '\0';
82 hwoff[hw_n] = hwword_len;
83 dict_put(hwdict, hwword + hwoff[hw_n], hw_n);
84 hwword_len += i + 1;
85 hw_n++;
88 static int hw_lookup(char *word, char *hyph)
90 char word2[WORDLEN] = {0};
91 char *hyph2;
92 int map[WORDLEN] = {0};
93 int off = 0;
94 int i, j, idx = -1;
95 hcode_strcpy(word2, word, map, 0);
96 while (word2[off] == '.') /* skip unknown characters at the front */
97 off++;
98 i = dict_prefix(hwdict, word2 + off, &idx);
99 if (i < 0)
100 return 1;
101 hyph2 = hwhyph + hwoff[i];
102 for (j = 0; word2[j + off]; j++)
103 if (hyph2[j])
104 hyph[map[j + off]] = hyph2[j];
105 return 0;
108 void tr_hw(char **args)
110 char word[WORDLEN];
111 char *c;
112 int i;
113 for (i = 1; i < NARGS && args[i]; i++) {
114 char *s = args[i];
115 char *d = word;
116 while (d - word < WORDLEN - GNLEN && !escread(&s, &c)) {
117 if (strcmp("-", c))
118 hcode_mapchar(c);
119 d += hy_cput(d, c);
121 hw_add(word);
125 /* the tex hyphenation algorithm */
127 static int hyinit; /* hyphenation data initialized */
128 static char hypats[HYPATLEN]; /* hyphenation patterns */
129 static char hynums[HYPATLEN]; /* hyphenation pattern numbers */
130 static int hypats_len; /* used hypats[] and hynums[] length */
131 static struct dict *hydict; /* map patterns to their index in hyoff[] */
132 static int hyoff[NHYPHS]; /* the offset of this pattern in hypats[] */
133 static int hy_n; /* the number of patterns */
135 /* find the patterns matching s and update hyphenation values in n */
136 static void hy_find(char *s, char *n)
138 int plen;
139 char *p, *np;
140 int i, j;
141 int idx = -1;
142 while ((i = dict_prefix(hydict, s, &idx)) >= 0) {
143 p = hypats + hyoff[i];
144 np = hynums + (p - hypats);
145 plen = strlen(p) + 1;
146 for (j = 0; j < plen; j++)
147 if (n[j] < np[j])
148 n[j] = np[j];
152 /* mark the hyphenation points of word in hyph */
153 static void hy_dohyph(char *hyph, char *word, int flg)
155 char w[WORDLEN] = {0}; /* cleaned-up word[]; "Abc" -> ".abc." */
156 char n[WORDLEN] = {0}; /* the hyphenation value for w[] */
157 int c[WORDLEN]; /* start of the i-th character in w */
158 int wmap[WORDLEN] = {0}; /* w[i] corresponds to word[wmap[i]] */
159 char ch[GNLEN];
160 int nc = 0;
161 int i, wlen;
162 hcode_strcpy(w, word, wmap, 1);
163 wlen = strlen(w);
164 for (i = 0; i < wlen - 1; i += hy_cget(ch, w + i))
165 c[nc++] = i;
166 for (i = 0; i < nc - 1; i++)
167 hy_find(w + c[i], n + c[i]);
168 memset(hyph, 0, wlen * sizeof(hyph[0]));
169 for (i = 3; i < nc - 2; i++)
170 if (n[c[i]] % 2 && w[c[i - 1]] != '.' && w[c[i]] != '.' &&
171 w[c[i - 2]] != '.' && w[c[i + 1]] != '.' &&
172 (~flg & HY_FINAL2 || w[c[i + 2]] != '.') &&
173 (~flg & HY_FIRST2 || w[c[i - 3]] != '.'))
174 hyph[wmap[c[i]]] = 1;
177 /* insert pattern s into hypats[] and hynums[] */
178 static void hy_add(char *s)
180 char *p = hypats + hypats_len;
181 char *n = hynums + hypats_len;
182 int len = strlen(s) + 1;
183 int i = 0, c;
184 if (hy_n >= NHYPHS || hypats_len + len >= sizeof(hypats))
185 return;
186 memset(n, 0, len);
187 while ((c = (unsigned char) *s++)) {
188 if (c >= '0' && c <= '9')
189 n[i] = c - '0';
190 else
191 p[i++] = c;
193 p[i] = '\0';
194 hyoff[hy_n] = hypats_len;
195 dict_put(hydict, hypats + hyoff[hy_n], hy_n);
196 hypats_len += i + 1;
197 hy_n++;
200 /* .hcode request */
201 static struct dict *hcodedict;
202 static char hcodesrc[NHCODES][GNLEN];
203 static char hcodedst[NHCODES][GNLEN];
204 static int hcode_n;
206 /* replace the character in s after .hcode mapping; returns s's new length */
207 static int hcode_mapchar(char *s)
209 int i = dict_get(hcodedict, s);
210 if (i >= 0)
211 strcpy(s, hcodedst[i]);
212 else if (!s[1])
213 *s = isalpha((unsigned char) *s) ? tolower((unsigned char) *s) : '.';
214 return strlen(s);
217 /* copy s to d after .hcode mappings; s[map[j]] corresponds to d[j] */
218 static void hcode_strcpy(char *d, char *s, int *map, int dots)
220 char c[GNLEN];
221 int di = 0, si = 0;
222 if (dots)
223 d[di++] = '.';
224 while (di < WORDLEN - GNLEN && s[si]) {
225 map[di] = si;
226 si += hy_cget(c, s + si);
227 hcode_mapchar(c);
228 di += hy_cput(d + di, c);
230 if (dots)
231 d[di++] = '.';
232 d[di] = '\0';
235 static void hcode_add(char *c1, char *c2)
237 int i = dict_get(hcodedict, c1);
238 if (i >= 0) {
239 strcpy(hcodedst[i], c2);
240 } else if (hcode_n < NHCODES) {
241 strcpy(hcodesrc[hcode_n], c1);
242 strcpy(hcodedst[hcode_n], c2);
243 dict_put(hcodedict, hcodesrc[hcode_n], hcode_n);
244 hcode_n++;
248 void tr_hcode(char **args)
250 char c1[GNLEN], c2[GNLEN];
251 char *s = args[1];
252 while (s && charread(&s, c1) >= 0 && charread(&s, c2) >= 0)
253 hcode_add(c1, c2);
256 static void hyph_readpatterns(char *s)
258 char word[WORDLEN];
259 char *d;
260 while (*s) {
261 d = word;
262 while (*s && !isspace((unsigned char) *s))
263 *d++ = *s++;
264 *d = '\0';
265 hy_add(word);
266 while (*s && isspace((unsigned char) *s))
267 s++;
271 static void hyph_readexceptions(char *s)
273 char word[WORDLEN];
274 char *d;
275 while (*s) {
276 d = word;
277 while (*s && !isspace((unsigned char) *s))
278 *d++ = *s++;
279 *d = '\0';
280 hw_add(word);
281 while (*s && isspace((unsigned char) *s))
282 s++;
286 void hyphenate(char *hyph, char *word, int flg)
288 if (!hyinit) {
289 hyinit = 1;
290 hyph_readpatterns(en_patterns);
291 hyph_readexceptions(en_exceptions);
293 if (hw_lookup(word, hyph))
294 hy_dohyph(hyph, word, flg);
297 void tr_hpfa(char **args)
299 char tok[ILNLEN], c1[ILNLEN], c2[ILNLEN];
300 FILE *filp;
301 hyinit = 1;
302 /* load english hyphenation patterns with no arguments */
303 if (!args[1]) {
304 hyph_readpatterns(en_patterns);
305 hyph_readexceptions(en_exceptions);
307 /* reading patterns */
308 if (args[1] && (filp = fopen(args[1], "r"))) {
309 while (fscanf(filp, "%s", tok) == 1)
310 if (strlen(tok) < WORDLEN)
311 hy_add(tok);
312 fclose(filp);
314 /* reading exceptions */
315 if (args[2] && (filp = fopen(args[2], "r"))) {
316 while (fscanf(filp, "%s", tok) == 1)
317 if (strlen(tok) < WORDLEN)
318 hw_add(tok);
319 fclose(filp);
321 /* reading hcode mappings */
322 if (args[3] && (filp = fopen(args[3], "r"))) {
323 while (fscanf(filp, "%s", tok) == 1) {
324 char *s = tok;
325 if (utf8read(&s, c1) && utf8read(&s, c2) && !*s)
326 hcode_add(c2, c1); /* inverting */
328 fclose(filp);
332 void hyph_init(void)
334 hwdict = dict_make(-1, 0, 1);
335 hydict = dict_make(-1, 0, 1);
336 hcodedict = dict_make(-1, 0, 1);
339 void hyph_done(void)
341 if (hwdict)
342 dict_free(hwdict);
343 if (hydict)
344 dict_free(hydict);
345 if (hcodedict)
346 dict_free(hcodedict);
349 void tr_hpf(char **args)
351 /* reseting the patterns */
352 hypats_len = 0;
353 hy_n = 0;
354 dict_free(hydict);
355 /* reseting the dictionary */
356 hwword_len = 0;
357 hw_n = 0;
358 dict_free(hwdict);
359 /* reseting hcode mappings */
360 hcode_n = 0;
361 dict_free(hcodedict);
362 /* reading */
363 hyph_init();
364 tr_hpfa(args);