tr: clean up macrobody()
[neatroff.git] / hyph.c
blob3348c27f8a9676613232782f47a2185d4961649f
1 /* hyphenation */
2 #include <ctype.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include "roff.h"
6 #include "hyen.h"
8 #define HYPATLEN (NHYPHS * 16) /* hyphenation pattern length */
10 static void hcode_strcpy(char *d, char *s, int *map, int dots);
12 /* the hyphenation dictionary (.hw) */
14 static char hwword[HYPATLEN]; /* buffer for .hw words */
15 static char hwhyph[HYPATLEN]; /* buffer for .hw hyphenations */
16 static int hwword_len; /* used hwword[] length */
17 static struct dict hwdict; /* map words to their index in hwoff[] */
18 static int hwoff[NHYPHS]; /* the offset of words in hwword[] */
19 static int hw_n; /* the number of dictionary words */
21 static void hw_add(char *word)
23 char *s = word;
24 char *d = hwword + hwword_len;
25 int c, i;
26 if (hw_n == LEN(hwoff) || hwword_len + 128 > sizeof(hwword))
27 return;
28 i = hw_n++;
29 while ((c = *s++)) {
30 if (c == '-')
31 hwhyph[d - hwword] = 1;
32 else
33 *d++ = c;
35 *d++ = '\0';
36 hwoff[i] = hwword_len;
37 hwword_len = d - hwword;
38 dict_put(&hwdict, hwword + hwoff[i], i);
41 static int hw_lookup(char *word, char *hyph)
43 char word2[WORDLEN] = {0};
44 char *hyph2;
45 int map[WORDLEN] = {0};
46 int i, j, idx = -1;
47 hcode_strcpy(word2, word, map, 0);
48 i = dict_prefix(&hwdict, word2, &idx);
49 if (i < 0)
50 return 1;
51 hyph2 = hwhyph + hwoff[i];
52 for (j = 0; word2[j]; j++)
53 if (hyph2[j])
54 hyph[map[j]] = hyph2[j];
55 return 0;
58 void tr_hw(char **args)
60 int i;
61 for (i = 1; i < NARGS && args[i]; i++)
62 hw_add(args[i]);
65 /* the tex hyphenation algorithm */
67 static int hyinit; /* hyphenation data initialized */
68 static char hypats[HYPATLEN]; /* hyphenation patterns */
69 static char hynums[HYPATLEN]; /* hyphenation pattern numbers */
70 static int hypats_len; /* used hypats[] and hynums[] length */
71 static struct dict hydict; /* map patterns to their index in hyoff[] */
72 static int hyoff[NHYPHS]; /* the offset of this pattern in hypats[] */
73 static int hy_n; /* the number of patterns */
75 /* find the patterns matching s and update hyphenation values in n */
76 static void hy_find(char *s, char *n)
78 int plen;
79 char *p, *np;
80 int i, j;
81 int idx = -1;
82 while ((i = dict_prefix(&hydict, s, &idx)) >= 0) {
83 p = hypats + hyoff[i];
84 np = hynums + (p - hypats);
85 plen = strlen(p);
86 for (j = 0; j < plen; j++)
87 if (n[j] < np[j])
88 n[j] = np[j];
92 /* mark the hyphenation points of word in hyph */
93 static void hy_dohyph(char *hyph, char *word, int flg)
95 char n[WORDLEN] = {0};
96 char w[WORDLEN] = {0};
97 int c[WORDLEN]; /* start of the i-th character in w */
98 int wmap[WORDLEN] = {0}; /* word[wmap[i]] is w[i] */
99 int nc = 0;
100 int i, wlen;
101 hcode_strcpy(w, word, wmap, 1);
102 wlen = strlen(w);
103 for (i = 0; i < wlen - 1; i += utf8len((unsigned int) w[i]))
104 c[nc++] = i;
105 for (i = 0; i < nc - 1; i++)
106 hy_find(w + c[i], n + c[i]);
107 memset(hyph, 0, wlen * sizeof(hyph[0]));
108 for (i = 3; i < nc - 2; i++)
109 if (n[i] % 2 && w[c[i - 1]] != '.' && w[c[i - 2]] != '.' && w[c[i + 1]] != '.')
110 hyph[wmap[c[i]]] = (~flg & HY_FINAL2 || w[c[i + 2]] != '.') &&
111 (~flg & HY_FIRST2 || w[c[i - 3]] != '.');
114 /* insert pattern s into hypats[] and hynums[] */
115 static void hy_add(char *s)
117 char *p = hypats + hypats_len;
118 char *n = hynums + hypats_len;
119 int i = 0, idx;
120 if (hy_n >= NHYPHS || hypats_len + 64 >= sizeof(hypats))
121 return;
122 idx = hy_n++;
123 while (*s) {
124 if (*s >= '0' && *s <= '9')
125 n[i] = *s++ - '0';
126 else
127 p[i++] = *s++;
129 p[i] = '\0';
130 hyoff[idx] = hypats_len;
131 dict_put(&hydict, hypats + hyoff[idx], idx);
132 hypats_len += i + 1;
135 /* .hcode request */
136 static struct dict hcodedict;
137 static char hcodesrc[NHCODES][GNLEN];
138 static char hcodedst[NHCODES][GNLEN];
139 static int hcode_n;
141 /* replace the character in s after .hcode mapping; returns s's new length */
142 static int hcode_mapchar(char *s)
144 int i = dict_get(&hcodedict, s);
145 if (i >= 0)
146 strcpy(s, hcodedst[i]);
147 else if (isalpha((unsigned char) *s))
148 *s = tolower(*s);
149 return strlen(s);
152 /* copy s to d after .hcode mappings; s[map[j]] corresponds to d[j] */
153 static void hcode_strcpy(char *d, char *s, int *map, int dots)
155 int di = 0, si = 0, len;
156 if (dots)
157 d[di++] = '.';
158 while (s[si]) {
159 len = utf8len((unsigned char) s[si]);
160 map[di] = si;
161 memcpy(d + di, s + si, len);
162 si += len;
163 di += hcode_mapchar(d + di);
165 if (dots)
166 d[di++] = '.';
167 d[di] = '\0';
170 void tr_hcode(char **args)
172 char c1[GNLEN], c2[GNLEN];
173 char *s = args[1];
174 int i;
175 while (s && charread(&s, c1) >= 0 && charread(&s, c2) >= 0) {
176 i = dict_get(&hcodedict, c1);
177 if (i >= 0) {
178 strcpy(hcodedst[i], c2);
179 } else if (hcode_n < NHCODES) {
180 strcpy(hcodesrc[hcode_n], c1);
181 strcpy(hcodedst[hcode_n], c2);
182 dict_put(&hcodedict, hcodesrc[hcode_n], hcode_n);
183 hcode_n++;
188 static void hyph_readpatterns(char *s)
190 char word[WORDLEN];
191 char *d;
192 while (*s) {
193 d = word;
194 while (*s && !isspace((unsigned char) *s))
195 *d++ = *s++;
196 *d = '\0';
197 hy_add(word);
198 while (*s && isspace((unsigned char) *s))
199 s++;
203 static void hyph_readexceptions(char *s)
205 char word[WORDLEN];
206 char *d;
207 while (*s) {
208 d = word;
209 while (*s && !isspace((unsigned char) *s))
210 *d++ = *s++;
211 *d = '\0';
212 hw_add(word);
213 while (*s && isspace((unsigned char) *s))
214 s++;
218 void hyphenate(char *hyph, char *word, int flg)
220 if (!hyinit) {
221 hyinit = 1;
222 hyph_readpatterns(en_patterns);
223 hyph_readexceptions(en_exceptions);
225 if (hw_lookup(word, hyph))
226 hy_dohyph(hyph, word, flg);
229 void tr_hpfa(char **args)
231 char tok[ILNLEN];
232 FILE *filp;
233 hyinit = 1;
234 /* load english hyphenation patterns with no arguments */
235 if (!args[1]) {
236 hyph_readpatterns(en_patterns);
237 hyph_readexceptions(en_exceptions);
239 /* reading patterns */
240 if (args[1]) {
241 filp = fopen(args[1], "r");
242 while (fscanf(filp, "%s", tok) == 1)
243 if (strlen(tok) < WORDLEN)
244 hy_add(tok);
245 fclose(filp);
247 /* reading exceptions */
248 if (args[2]) {
249 filp = fopen(args[1], "r");
250 while (fscanf(filp, "%s", tok) == 1)
251 if (strlen(tok) < WORDLEN)
252 hw_add(tok);
253 fclose(filp);
257 void hyph_init(void)
259 dict_init(&hwdict, NHYPHS, -1, 0, 1);
260 dict_init(&hydict, NHYPHS, -1, 0, 1);
261 dict_init(&hcodedict, NHYPHS, -1, 0, 1);
264 void tr_hpf(char **args)
266 /* reseting the patterns */
267 hypats_len = 0;
268 hy_n = 0;
269 dict_done(&hydict);
270 /* reseting the dictionary */
271 hwword_len = 0;
272 hw_n = 0;
273 dict_done(&hwdict);
274 /* reading */
275 hyph_init();
276 tr_hpfa(args);