tr: do not alter the line number in .chop
[neatroff.git] / hyph.c
blobc23619601b1f894060fbde0f23d084f1f1d20cbd
1 /* hyphenation */
2 #include <ctype.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include "roff.h"
6 #include "hyen.h"
8 #define HYPATLEN (NHYPHS * 16) /* hyphenation pattern length */
10 static void hcode_strcpy(char *d, char *s, int *map, int dots);
12 /* the hyphenation dictionary (.hw) */
14 static char hwword[HYPATLEN]; /* buffer for .hw words */
15 static char hwhyph[HYPATLEN]; /* buffer for .hw hyphenations */
16 static int hwword_len; /* used hwword[] length */
17 static struct dict *hwdict; /* map words to their index in hwoff[] */
18 static int hwoff[NHYPHS]; /* the offset of words in hwword[] */
19 static int hw_n; /* the number of dictionary words */
21 /* insert word s into hwword[] and hwhyph[] */
22 static void hw_add(char *s)
24 char *p = hwword + hwword_len;
25 char *n = hwhyph + hwword_len;
26 int len = strlen(s) + 1;
27 int i = 0, c;
28 if (hw_n == NHYPHS || hwword_len + len > sizeof(hwword))
29 return;
30 memset(n, 0, len);
31 while ((c = (unsigned char) *s++)) {
32 if (c == '-')
33 n[i] = 1;
34 else
35 p[i++] = c;
37 p[i] = '\0';
38 hwoff[hw_n] = hwword_len;
39 dict_put(hwdict, hwword + hwoff[hw_n], hw_n);
40 hwword_len += i + 1;
41 hw_n++;
44 static int hw_lookup(char *word, char *hyph)
46 char word2[WORDLEN] = {0};
47 char *hyph2;
48 int map[WORDLEN] = {0};
49 int i, j, idx = -1;
50 hcode_strcpy(word2, word, map, 0);
51 i = dict_prefix(hwdict, word2, &idx);
52 if (i < 0)
53 return 1;
54 hyph2 = hwhyph + hwoff[i];
55 for (j = 0; word2[j]; j++)
56 if (hyph2[j])
57 hyph[map[j]] = hyph2[j];
58 return 0;
61 void tr_hw(char **args)
63 int i;
64 for (i = 1; i < NARGS && args[i]; i++)
65 hw_add(args[i]);
68 /* the tex hyphenation algorithm */
70 static int hyinit; /* hyphenation data initialized */
71 static char hypats[HYPATLEN]; /* hyphenation patterns */
72 static char hynums[HYPATLEN]; /* hyphenation pattern numbers */
73 static int hypats_len; /* used hypats[] and hynums[] length */
74 static struct dict *hydict; /* map patterns to their index in hyoff[] */
75 static int hyoff[NHYPHS]; /* the offset of this pattern in hypats[] */
76 static int hy_n; /* the number of patterns */
78 /* find the patterns matching s and update hyphenation values in n */
79 static void hy_find(char *s, char *n)
81 int plen;
82 char *p, *np;
83 int i, j;
84 int idx = -1;
85 while ((i = dict_prefix(hydict, s, &idx)) >= 0) {
86 p = hypats + hyoff[i];
87 np = hynums + (p - hypats);
88 plen = strlen(p) + 1;
89 for (j = 0; j < plen; j++)
90 if (n[j] < np[j])
91 n[j] = np[j];
95 /* mark the hyphenation points of word in hyph */
96 static void hy_dohyph(char *hyph, char *word, int flg)
98 char n[WORDLEN] = {0};
99 char w[WORDLEN] = {0};
100 int c[WORDLEN]; /* start of the i-th character in w */
101 int wmap[WORDLEN] = {0}; /* word[wmap[i]] is w[i] */
102 int nc = 0;
103 int i, wlen;
104 hcode_strcpy(w, word, wmap, 1);
105 wlen = strlen(w);
106 for (i = 0; i < wlen - 1; i += utf8len((unsigned char) w[i]))
107 c[nc++] = i;
108 for (i = 0; i < nc - 1; i++)
109 hy_find(w + c[i], n + c[i]);
110 memset(hyph, 0, wlen * sizeof(hyph[0]));
111 for (i = 3; i < nc - 2; i++)
112 if (n[c[i]] % 2 && w[c[i - 1]] != '.' &&
113 w[c[i - 2]] != '.' && w[c[i + 1]] != '.' &&
114 (~flg & HY_FINAL2 || w[c[i + 2]] != '.') &&
115 (~flg & HY_FIRST2 || w[c[i - 3]] != '.'))
116 hyph[wmap[c[i]]] = 1;
119 /* insert pattern s into hypats[] and hynums[] */
120 static void hy_add(char *s)
122 char *p = hypats + hypats_len;
123 char *n = hynums + hypats_len;
124 int len = strlen(s) + 1;
125 int i = 0, c;
126 if (hy_n >= NHYPHS || hypats_len + len >= sizeof(hypats))
127 return;
128 memset(n, 0, len);
129 while ((c = (unsigned char) *s++)) {
130 if (c >= '0' && c <= '9')
131 n[i] = c - '0';
132 else
133 p[i++] = c;
135 p[i] = '\0';
136 hyoff[hy_n] = hypats_len;
137 dict_put(hydict, hypats + hyoff[hy_n], hy_n);
138 hypats_len += i + 1;
139 hy_n++;
142 /* .hcode request */
143 static struct dict *hcodedict;
144 static char hcodesrc[NHCODES][GNLEN];
145 static char hcodedst[NHCODES][GNLEN];
146 static int hcode_n;
148 /* replace the character in s after .hcode mapping; returns s's new length */
149 static int hcode_mapchar(char *s)
151 int i = dict_get(hcodedict, s);
152 if (i >= 0)
153 strcpy(s, hcodedst[i]);
154 else if (isalpha((unsigned char) *s))
155 *s = tolower(*s);
156 return strlen(s);
159 /* copy s to d after .hcode mappings; s[map[j]] corresponds to d[j] */
160 static void hcode_strcpy(char *d, char *s, int *map, int dots)
162 int di = 0, si = 0, len;
163 if (dots)
164 d[di++] = '.';
165 while (di < WORDLEN - GNLEN && s[si]) {
166 len = utf8len((unsigned char) s[si]);
167 map[di] = si;
168 memcpy(d + di, s + si, len);
169 si += len;
170 di += hcode_mapchar(d + di);
172 if (dots)
173 d[di++] = '.';
174 d[di] = '\0';
177 static void hcode_add(char *c1, char *c2)
179 int i = dict_get(hcodedict, c1);
180 if (i >= 0) {
181 strcpy(hcodedst[i], c2);
182 } else if (hcode_n < NHCODES) {
183 strcpy(hcodesrc[hcode_n], c1);
184 strcpy(hcodedst[hcode_n], c2);
185 dict_put(hcodedict, hcodesrc[hcode_n], hcode_n);
186 hcode_n++;
190 void tr_hcode(char **args)
192 char c1[GNLEN], c2[GNLEN];
193 char *s = args[1];
194 while (s && utf8read(&s, c1) && utf8read(&s, c2))
195 hcode_add(c1, c2);
198 static void hyph_readpatterns(char *s)
200 char word[WORDLEN];
201 char *d;
202 while (*s) {
203 d = word;
204 while (*s && !isspace((unsigned char) *s))
205 *d++ = *s++;
206 *d = '\0';
207 hy_add(word);
208 while (*s && isspace((unsigned char) *s))
209 s++;
213 static void hyph_readexceptions(char *s)
215 char word[WORDLEN];
216 char *d;
217 while (*s) {
218 d = word;
219 while (*s && !isspace((unsigned char) *s))
220 *d++ = *s++;
221 *d = '\0';
222 hw_add(word);
223 while (*s && isspace((unsigned char) *s))
224 s++;
228 void hyphenate(char *hyph, char *word, int flg)
230 if (!hyinit) {
231 hyinit = 1;
232 hyph_readpatterns(en_patterns);
233 hyph_readexceptions(en_exceptions);
235 if (hw_lookup(word, hyph))
236 hy_dohyph(hyph, word, flg);
239 void tr_hpfa(char **args)
241 char tok[ILNLEN], c1[ILNLEN], c2[ILNLEN];
242 FILE *filp;
243 hyinit = 1;
244 /* load english hyphenation patterns with no arguments */
245 if (!args[1]) {
246 hyph_readpatterns(en_patterns);
247 hyph_readexceptions(en_exceptions);
249 /* reading patterns */
250 if (args[1] && (filp = fopen(args[1], "r"))) {
251 while (fscanf(filp, "%s", tok) == 1)
252 if (strlen(tok) < WORDLEN)
253 hy_add(tok);
254 fclose(filp);
256 /* reading exceptions */
257 if (args[2] && (filp = fopen(args[2], "r"))) {
258 while (fscanf(filp, "%s", tok) == 1)
259 if (strlen(tok) < WORDLEN)
260 hw_add(tok);
261 fclose(filp);
263 /* reading hcode mappings */
264 if (args[3] && (filp = fopen(args[3], "r"))) {
265 while (fscanf(filp, "%s", tok) == 1) {
266 char *s = tok;
267 if (utf8read(&s, c1) && utf8read(&s, c2) && !*s)
268 hcode_add(c2, c1); /* inverting */
270 fclose(filp);
274 void hyph_init(void)
276 hwdict = dict_make(-1, 0, 1);
277 hydict = dict_make(-1, 0, 1);
278 hcodedict = dict_make(-1, 0, 1);
281 void hyph_done(void)
283 if (hwdict)
284 dict_free(hwdict);
285 if (hydict)
286 dict_free(hydict);
287 if (hcodedict)
288 dict_free(hcodedict);
291 void tr_hpf(char **args)
293 /* reseting the patterns */
294 hypats_len = 0;
295 hy_n = 0;
296 dict_free(hydict);
297 /* reseting the dictionary */
298 hwword_len = 0;
299 hw_n = 0;
300 dict_free(hwdict);
301 /* reseting hcode mappings */
302 hcode_n = 0;
303 dict_free(hcodedict);
304 /* reading */
305 hyph_init();
306 tr_hpfa(args);