8 #define HYPATLEN (NHYPHS * 16) /* hyphenation pattern length */
10 static void hcode_strcpy(char *d
, char *s
, int *map
, int dots
);
11 static int hcode_mapchar(char *s
);
13 /* the hyphenation dictionary (.hw) */
15 static char hwword
[HYPATLEN
]; /* buffer for .hw words */
16 static char hwhyph
[HYPATLEN
]; /* buffer for .hw hyphenations */
17 static int hwword_len
; /* used hwword[] length */
18 static struct dict
*hwdict
; /* map words to their index in hwoff[] */
19 static int hwoff
[NHYPHS
]; /* the offset of words in hwword[] */
20 static int hw_n
; /* the number of dictionary words */
22 /* read a single character from s into d; return the number of characters read */
23 static int hy_cget(char *d
, char *s
)
27 return utf8read(&s
, d
);
30 while (*s
&& *s
!= ']' && i
< GNLEN
- 1)
33 return *s
? i
+ 3 : i
+ 2;
37 i
+= utf8read(&s
, d
+ i
);
38 i
+= utf8read(&s
, d
+ i
);
44 while (*s
&& *s
!= q
&& i
< GNLEN
- 1)
47 return *s
? i
+ 4 : i
+ 3;
50 return 1 + utf8read(&s
, d
);
53 /* append character s to d; return the number of characters written */
54 int hy_cput(char *d
, char *s
)
56 if (!s
[0] || !s
[1] || utf8one(s
))
58 else if (s
[0] == '\\')
61 snprintf(d
, GNLEN
, "\\[%s]", s
);
65 /* insert word s into hwword[] and hwhyph[] */
66 static void hw_add(char *s
)
68 char *p
= hwword
+ hwword_len
;
69 char *n
= hwhyph
+ hwword_len
;
70 int len
= strlen(s
) + 1;
72 if (hw_n
== NHYPHS
|| hwword_len
+ len
> sizeof(hwword
))
75 while ((c
= (unsigned char) *s
++)) {
82 hwoff
[hw_n
] = hwword_len
;
83 dict_put(hwdict
, hwword
+ hwoff
[hw_n
], hw_n
);
88 static int hw_lookup(char *word
, char *hyph
)
90 char word2
[WORDLEN
] = {0};
92 int map
[WORDLEN
] = {0};
95 hcode_strcpy(word2
, word
, map
, 0);
96 while (word2
[off
] == '.') /* skip unknown characters at the front */
98 i
= dict_prefix(hwdict
, word2
+ off
, &idx
);
101 hyph2
= hwhyph
+ hwoff
[i
];
102 for (j
= 0; word2
[j
+ off
]; j
++)
104 hyph
[map
[j
+ off
]] = hyph2
[j
];
108 void tr_hw(char **args
)
113 for (i
= 1; i
< NARGS
&& args
[i
]; i
++) {
116 while (d
- word
< WORDLEN
- GNLEN
&& !escread(&s
, &c
)) {
125 /* the tex hyphenation algorithm */
127 static int hyinit
; /* hyphenation data initialized */
128 static char hypats
[HYPATLEN
]; /* hyphenation patterns */
129 static char hynums
[HYPATLEN
]; /* hyphenation pattern numbers */
130 static int hypats_len
; /* used hypats[] and hynums[] length */
131 static struct dict
*hydict
; /* map patterns to their index in hyoff[] */
132 static int hyoff
[NHYPHS
]; /* the offset of this pattern in hypats[] */
133 static int hy_n
; /* the number of patterns */
135 /* find the patterns matching s and update hyphenation values in n */
136 static void hy_find(char *s
, char *n
)
142 while ((i
= dict_prefix(hydict
, s
, &idx
)) >= 0) {
143 p
= hypats
+ hyoff
[i
];
144 np
= hynums
+ (p
- hypats
);
145 plen
= strlen(p
) + 1;
146 for (j
= 0; j
< plen
; j
++)
152 /* mark the hyphenation points of word in hyph */
153 static void hy_dohyph(char *hyph
, char *word
, int flg
)
155 char w
[WORDLEN
] = {0}; /* cleaned-up word[]; "Abc" -> ".abc." */
156 char n
[WORDLEN
] = {0}; /* the hyphenation value for w[] */
157 int c
[WORDLEN
]; /* start of the i-th character in w */
158 int wmap
[WORDLEN
] = {0}; /* w[i] corresponds to word[wmap[i]] */
162 hcode_strcpy(w
, word
, wmap
, 1);
164 for (i
= 0; i
< wlen
- 1; i
+= hy_cget(ch
, w
+ i
))
166 for (i
= 0; i
< nc
- 1; i
++)
167 hy_find(w
+ c
[i
], n
+ c
[i
]);
168 memset(hyph
, 0, wlen
* sizeof(hyph
[0]));
169 for (i
= 3; i
< nc
- 2; i
++)
170 if (n
[c
[i
]] % 2 && w
[c
[i
- 1]] != '.' && w
[c
[i
]] != '.' &&
171 w
[c
[i
- 2]] != '.' && w
[c
[i
+ 1]] != '.' &&
172 (~flg
& HY_FINAL2
|| w
[c
[i
+ 2]] != '.') &&
173 (~flg
& HY_FIRST2
|| w
[c
[i
- 3]] != '.'))
174 hyph
[wmap
[c
[i
]]] = 1;
177 /* insert pattern s into hypats[] and hynums[] */
178 static void hy_add(char *s
)
180 char *p
= hypats
+ hypats_len
;
181 char *n
= hynums
+ hypats_len
;
182 int len
= strlen(s
) + 1;
184 if (hy_n
>= NHYPHS
|| hypats_len
+ len
>= sizeof(hypats
))
187 while ((c
= (unsigned char) *s
++)) {
188 if (c
>= '0' && c
<= '9')
194 hyoff
[hy_n
] = hypats_len
;
195 dict_put(hydict
, hypats
+ hyoff
[hy_n
], hy_n
);
201 static struct dict
*hcodedict
;
202 static char hcodesrc
[NHCODES
][GNLEN
];
203 static char hcodedst
[NHCODES
][GNLEN
];
206 /* replace the character in s after .hcode mapping; returns s's new length */
207 static int hcode_mapchar(char *s
)
209 int i
= dict_get(hcodedict
, s
);
211 strcpy(s
, hcodedst
[i
]);
213 *s
= isalpha((unsigned char) *s
) ? tolower((unsigned char) *s
) : '.';
217 /* copy s to d after .hcode mappings; s[map[j]] corresponds to d[j] */
218 static void hcode_strcpy(char *d
, char *s
, int *map
, int dots
)
224 while (di
< WORDLEN
- GNLEN
&& s
[si
]) {
226 si
+= hy_cget(c
, s
+ si
);
228 di
+= hy_cput(d
+ di
, c
);
235 static void hcode_add(char *c1
, char *c2
)
237 int i
= dict_get(hcodedict
, c1
);
239 strcpy(hcodedst
[i
], c2
);
240 } else if (hcode_n
< NHCODES
) {
241 strcpy(hcodesrc
[hcode_n
], c1
);
242 strcpy(hcodedst
[hcode_n
], c2
);
243 dict_put(hcodedict
, hcodesrc
[hcode_n
], hcode_n
);
248 void tr_hcode(char **args
)
250 char c1
[GNLEN
], c2
[GNLEN
];
252 while (s
&& charread(&s
, c1
) >= 0 && charread(&s
, c2
) >= 0)
256 static void hyph_readpatterns(char *s
)
262 while (*s
&& !isspace((unsigned char) *s
))
266 while (*s
&& isspace((unsigned char) *s
))
271 static void hyph_readexceptions(char *s
)
277 while (*s
&& !isspace((unsigned char) *s
))
281 while (*s
&& isspace((unsigned char) *s
))
286 void hyphenate(char *hyph
, char *word
, int flg
)
290 hyph_readpatterns(en_patterns
);
291 hyph_readexceptions(en_exceptions
);
293 if (hw_lookup(word
, hyph
))
294 hy_dohyph(hyph
, word
, flg
);
297 void tr_hpfa(char **args
)
299 char tok
[ILNLEN
], c1
[ILNLEN
], c2
[ILNLEN
];
302 /* load english hyphenation patterns with no arguments */
304 hyph_readpatterns(en_patterns
);
305 hyph_readexceptions(en_exceptions
);
307 /* reading patterns */
308 if (args
[1] && (filp
= fopen(args
[1], "r"))) {
309 while (fscanf(filp
, "%s", tok
) == 1)
310 if (strlen(tok
) < WORDLEN
)
314 /* reading exceptions */
315 if (args
[2] && (filp
= fopen(args
[2], "r"))) {
316 while (fscanf(filp
, "%s", tok
) == 1)
317 if (strlen(tok
) < WORDLEN
)
321 /* reading hcode mappings */
322 if (args
[3] && (filp
= fopen(args
[3], "r"))) {
323 while (fscanf(filp
, "%s", tok
) == 1) {
325 if (utf8read(&s
, c1
) && utf8read(&s
, c2
) && !*s
)
326 hcode_add(c2
, c1
); /* inverting */
334 hwdict
= dict_make(-1, 0, 1);
335 hydict
= dict_make(-1, 0, 1);
336 hcodedict
= dict_make(-1, 0, 1);
346 dict_free(hcodedict
);
349 void tr_hpf(char **args
)
351 /* reseting the patterns */
355 /* reseting the dictionary */
359 /* reseting hcode mappings */
361 dict_free(hcodedict
);