8 #define HYPATLEN (NHYPHS * 16) /* hyphenation pattern length */
10 static void hcode_strcpy(char *d
, char *s
, int *map
, int dots
);
11 static int hcode_mapchar(char *s
);
13 /* the hyphenation dictionary (.hw) */
15 static char hwword
[HYPATLEN
]; /* buffer for .hw words */
16 static char hwhyph
[HYPATLEN
]; /* buffer for .hw hyphenations */
17 static int hwword_len
; /* used hwword[] length */
18 static struct dict
*hwdict
; /* map words to their index in hwoff[] */
19 static int hwoff
[NHYPHS
]; /* the offset of words in hwword[] */
20 static int hw_n
; /* the number of dictionary words */
22 /* read a single character from s into d; return the number of characters read */
23 static int hy_cget(char *d
, char *s
)
27 return utf8read(&s
, d
);
30 while (*s
&& *s
!= ']' && i
< GNLEN
- 1)
33 return *s
? i
+ 3 : i
+ 2;
37 i
+= utf8read(&s
, d
+ i
);
38 i
+= utf8read(&s
, d
+ i
);
44 while (*s
&& *s
!= q
&& i
< GNLEN
- 1)
47 return *s
? i
+ 4 : i
+ 3;
50 return 1 + utf8read(&s
, d
);
53 /* append character s to d; return the number of characters written */
54 int hy_cput(char *d
, char *s
)
56 if (!s
[0] || !s
[1] || utf8one(s
))
58 else if (s
[0] == '\\')
61 snprintf(d
, GNLEN
, "\\[%s]", s
);
65 /* insert word s into hwword[] and hwhyph[] */
66 static void hw_add(char *s
)
68 char *p
= hwword
+ hwword_len
;
69 char *n
= hwhyph
+ hwword_len
;
70 int len
= strlen(s
) + 1;
72 if (hw_n
== NHYPHS
|| hwword_len
+ len
> sizeof(hwword
))
75 while ((c
= (unsigned char) *s
++)) {
82 hwoff
[hw_n
] = hwword_len
;
83 dict_put(hwdict
, hwword
+ hwoff
[hw_n
], hw_n
);
88 static int hw_lookup(char *word
, char *hyph
)
90 char word2
[WORDLEN
] = {0};
92 int map
[WORDLEN
] = {0};
95 hcode_strcpy(word2
, word
, map
, 0);
96 while (word2
[off
] == '.') /* skip unknown characters at the front */
98 i
= dict_prefix(hwdict
, word2
+ off
, &idx
);
101 hyph2
= hwhyph
+ hwoff
[i
];
102 for (j
= 0; word2
[j
+ off
]; j
++)
104 hyph
[map
[j
+ off
]] = hyph2
[j
];
108 void tr_hw(char **args
)
113 for (i
= 1; i
< NARGS
&& args
[i
]; i
++) {
116 while (d
- word
< WORDLEN
- GNLEN
&& !escread(&s
, &c
)) {
125 /* the tex hyphenation algorithm */
127 static int hyinit
; /* hyphenation data initialized */
128 static char hypats
[HYPATLEN
]; /* hyphenation patterns */
129 static char hynums
[HYPATLEN
]; /* hyphenation pattern numbers */
130 static int hypats_len
; /* used hypats[] and hynums[] length */
131 static struct dict
*hydict
; /* map patterns to their index in hyoff[] */
132 static int hyoff
[NHYPHS
]; /* the offset of this pattern in hypats[] */
133 static int hy_n
; /* the number of patterns */
135 /* find the patterns matching s and update hyphenation values in n */
136 static void hy_find(char *s
, char *n
)
142 while ((i
= dict_prefix(hydict
, s
, &idx
)) >= 0) {
143 p
= hypats
+ hyoff
[i
];
144 np
= hynums
+ (p
- hypats
);
145 plen
= strlen(p
) + 1;
146 for (j
= 0; j
< plen
; j
++)
152 /* mark the hyphenation points of word in hyph */
153 static void hy_dohyph(char *hyph
, char *word
, int flg
)
155 char w
[WORDLEN
] = {0}; /* cleaned-up word[]; "Abc" -> ".abc." */
156 char n
[WORDLEN
] = {0}; /* the hyphenation value for w[] */
157 int c
[WORDLEN
]; /* start of the i-th character in w */
158 int wmap
[WORDLEN
] = {0}; /* w[i] corresponds to word[wmap[i]] */
162 hcode_strcpy(w
, word
, wmap
, 1);
164 for (i
= 0; i
< wlen
- 1; i
+= hy_cget(ch
, w
+ i
))
166 for (i
= 0; i
< nc
- 1; i
++)
167 hy_find(w
+ c
[i
], n
+ c
[i
]);
168 memset(hyph
, 0, wlen
* sizeof(hyph
[0]));
169 for (i
= 3; i
< nc
- 2; i
++)
170 if (n
[c
[i
]] % 2 && w
[c
[i
- 1]] != '.' && w
[c
[i
]] != '.' &&
171 w
[c
[i
- 2]] != '.' && w
[c
[i
+ 1]] != '.' &&
172 (~flg
& HY_FINAL2
|| w
[c
[i
+ 2]] != '.') &&
173 (~flg
& HY_FIRST2
|| w
[c
[i
- 3]] != '.'))
174 hyph
[wmap
[c
[i
]]] = 1;
177 /* insert pattern s into hypats[] and hynums[] */
178 static void hy_add(char *s
)
180 char *p
= hypats
+ hypats_len
;
181 char *n
= hynums
+ hypats_len
;
182 int len
= strlen(s
) + 1;
184 if (hy_n
>= NHYPHS
|| hypats_len
+ len
>= sizeof(hypats
))
187 while ((c
= (unsigned char) *s
++)) {
188 if (c
>= '0' && c
<= '9')
194 hyoff
[hy_n
] = hypats_len
;
195 dict_put(hydict
, hypats
+ hyoff
[hy_n
], hy_n
);
201 static struct dict
*hcodedict
;
202 static char hcodesrc
[NHCODES
][GNLEN
];
203 static char hcodedst
[NHCODES
][GNLEN
];
206 /* replace the character in s after .hcode mapping; returns s's new length */
207 static int hcode_mapchar(char *s
)
209 int i
= dict_get(hcodedict
, s
);
211 strcpy(s
, hcodedst
[i
]);
213 *s
= isalpha((unsigned char) *s
) ? tolower((unsigned char) *s
) : '.';
217 /* copy s to d after .hcode mappings; s[map[j]] corresponds to d[j] */
218 static void hcode_strcpy(char *d
, char *s
, int *map
, int dots
)
224 while (di
< WORDLEN
- GNLEN
&& s
[si
]) {
226 si
+= hy_cget(c
, s
+ si
);
228 di
+= hy_cput(d
+ di
, c
);
235 static void hcode_add(char *c1
, char *c2
)
237 int i
= dict_get(hcodedict
, c1
);
239 strcpy(hcodedst
[i
], c2
);
240 } else if (hcode_n
< NHCODES
) {
241 strcpy(hcodesrc
[hcode_n
], c1
);
242 strcpy(hcodedst
[hcode_n
], c2
);
243 dict_put(hcodedict
, hcodesrc
[hcode_n
], hcode_n
);
248 void tr_hcode(char **args
)
250 char c1
[GNLEN
], c2
[GNLEN
];
252 while (s
&& charread(&s
, c1
) >= 0 && charread(&s
, c2
) >= 0)
256 static void hyph_readpatterns(char *s
)
262 while (*s
&& !isspace((unsigned char) *s
))
266 while (*s
&& isspace((unsigned char) *s
))
271 static void hyph_readexceptions(char *s
)
277 while (*s
&& !isspace((unsigned char) *s
))
281 while (*s
&& isspace((unsigned char) *s
))
286 void hyphenate(char *hyph
, char *word
, int flg
)
290 hyph_readpatterns(en_patterns
);
291 hyph_readexceptions(en_exceptions
);
293 if (hw_lookup(word
, hyph
))
294 hy_dohyph(hyph
, word
, flg
);
297 /* lowercase-uppercase character mapping */
298 static char *hycase
[][2] = {
299 {"a", "A"}, {"á", "Á"}, {"à", "À"}, {"ă", "Ă"}, {"â", "Â"},
300 {"ǎ", "Ǎ"}, {"å", "Å"}, {"ä", "Ä"}, {"ã", "Ã"}, {"ą", "Ą"},
301 {"ā", "Ā"}, {"æ", "Æ"}, {"ǽ", "Ǽ"}, {"b", "B"}, {"c", "C"},
302 {"ć", "Ć"}, {"ĉ", "Ĉ"}, {"č", "Č"}, {"ç", "Ç"}, {"d", "D"},
303 {"ď", "Ď"}, {"đ", "Đ"}, {"ḍ", "Ḍ"}, {"ð", "Ð"}, {"e", "E"},
304 {"é", "É"}, {"è", "È"}, {"ê", "Ê"}, {"ě", "Ě"}, {"ë", "Ë"},
305 {"ė", "Ė"}, {"ę", "Ę"}, {"ē", "Ē"}, {"f", "F"}, {"g", "G"},
306 {"ğ", "Ğ"}, {"ĝ", "Ĝ"}, {"ģ", "Ģ"}, {"h", "H"}, {"ĥ", "Ĥ"},
307 {"ḥ", "Ḥ"}, {"ḫ", "Ḫ"}, {"i", "I"}, {"ı", "I"}, {"í", "Í"},
308 {"ì", "Ì"}, {"î", "Î"}, {"ǐ", "Ǐ"}, {"ï", "Ï"}, {"į", "Į"},
309 {"ī", "Ī"}, {"j", "J"}, {"ĵ", "Ĵ"}, {"k", "K"}, {"ķ", "Ķ"},
310 {"l", "L"}, {"ľ", "Ľ"}, {"ł", "Ł"}, {"ļ", "Ļ"}, {"ḷ", "Ḷ"},
311 {"m", "M"}, {"ṁ", "Ṁ"}, {"ṃ", "Ṃ"}, {"n", "N"}, {"ń", "Ń"},
312 {"ň", "Ň"}, {"ñ", "Ñ"}, {"ṅ", "Ṅ"}, {"ņ", "Ņ"}, {"ṇ", "Ṇ"},
313 {"œ", "Œ"}, {"o", "O"}, {"ó", "Ó"}, {"ò", "Ò"}, {"ô", "Ô"},
314 {"ǒ", "Ǒ"}, {"ö", "Ö"}, {"ő", "Ő"}, {"õ", "Õ"}, {"ø", "Ø"},
315 {"ō", "Ō"}, {"p", "P"}, {"q", "Q"}, {"r", "R"}, {"ŕ", "Ŕ"},
316 {"ř", "Ř"}, {"s", "S"}, {"ś", "Ś"}, {"ŝ", "Ŝ"}, {"š", "Š"},
317 {"ş", "Ş"}, {"ṣ", "Ṣ"}, {"t", "T"}, {"ť", "Ť"}, {"ț", "Ț"},
318 {"ṭ", "Ṭ"}, {"u", "U"}, {"ú", "Ú"}, {"ù", "Ù"}, {"ŭ", "Ŭ"},
319 {"û", "Û"}, {"ǔ", "Ǔ"}, {"ů", "Ů"}, {"ü", "Ü"}, {"ǘ", "Ǘ"},
320 {"ǜ", "Ǜ"}, {"ǚ", "Ǚ"}, {"ǖ", "Ǖ"}, {"ű", "Ű"}, {"ų", "Ų"},
321 {"ū", "Ū"}, {"v", "V"}, {"w", "W"}, {"x", "X"}, {"y", "Y"},
322 {"ý", "Ý"}, {"z", "Z"}, {"ź", "Ź"}, {"ž", "Ž"}, {"ż", "Ż"},
323 {"þ", "Þ"}, {"α", "Α"}, {"ά", "Ά"}, {"β", "Β"}, {"ϐ", "Β"},
324 {"γ", "Γ"}, {"δ", "Δ"}, {"ϫ", "Ϫ"}, {"ε", "Ε"}, {"έ", "Έ"},
325 {"ζ", "Ζ"}, {"ϩ", "Ϩ"}, {"η", "Η"}, {"ή", "Ή"}, {"θ", "Θ"},
326 {"ι", "Ι"}, {"ί", "Ί"}, {"ϊ", "Ϊ"}, {"κ", "Κ"}, {"ϧ", "Ϧ"},
327 {"λ", "Λ"}, {"μ", "Μ"}, {"ν", "Ν"}, {"ξ", "Ξ"}, {"ο", "Ο"},
328 {"ό", "Ό"}, {"π", "Π"}, {"ρ", "Ρ"}, {"ϲ", "Ϲ"}, {"σ", "Σ"},
329 {"ς", "Σ"}, {"ϭ", "Ϭ"}, {"τ", "Τ"}, {"ϯ", "Ϯ"}, {"υ", "Υ"},
330 {"ύ", "Ύ"}, {"ϋ", "Ϋ"}, {"φ", "Φ"}, {"ϥ", "Ϥ"}, {"χ", "Χ"},
331 {"ψ", "Ψ"}, {"ϣ", "Ϣ"}, {"ω", "Ω"}, {"ώ", "Ώ"}, {"а", "А"},
332 {"ӓ", "Ӓ"}, {"б", "Б"}, {"в", "В"}, {"г", "Г"}, {"ґ", "Ґ"},
333 {"д", "Д"}, {"ђ", "Ђ"}, {"е", "Е"}, {"ѐ", "Ѐ"}, {"є", "Є"},
334 {"ё", "Ё"}, {"ж", "Ж"}, {"з", "З"}, {"ѕ", "Ѕ"}, {"и", "И"},
335 {"ѝ", "Ѝ"}, {"ӥ", "Ӥ"}, {"і", "І"}, {"ї", "Ї"}, {"й", "Й"},
336 {"ј", "Ј"}, {"к", "К"}, {"л", "Л"}, {"љ", "Љ"}, {"м", "М"},
337 {"н", "Н"}, {"њ", "Њ"}, {"ᲂ", "О"}, {"о", "О"}, {"ӧ", "Ӧ"},
338 {"ө", "Ө"}, {"п", "П"}, {"р", "Р"}, {"с", "С"}, {"т", "Т"},
339 {"ћ", "Ћ"}, {"у", "У"}, {"ӱ", "Ӱ"}, {"ү", "Ү"}, {"ў", "Ў"},
340 {"ф", "Ф"}, {"х", "Х"}, {"ц", "Ц"}, {"ч", "Ч"}, {"џ", "Џ"},
341 {"ш", "Ш"}, {"щ", "Щ"}, {"ᲆ", "Ъ"}, {"ъ", "Ъ"}, {"ы", "Ы"},
342 {"ӹ", "Ӹ"}, {"ь", "Ь"}, {"э", "Э"}, {"ӭ", "Ӭ"}, {"ю", "Ю"},
343 {"я", "Я"}, {"ա", "Ա"}, {"բ", "Բ"}, {"գ", "Գ"}, {"դ", "Դ"},
344 {"ե", "Ե"}, {"զ", "Զ"}, {"է", "Է"}, {"ը", "Ը"}, {"թ", "Թ"},
345 {"ժ", "Ժ"}, {"ի", "Ի"}, {"լ", "Լ"}, {"խ", "Խ"}, {"ծ", "Ծ"},
346 {"կ", "Կ"}, {"հ", "Հ"}, {"ձ", "Ձ"}, {"ղ", "Ղ"}, {"ճ", "Ճ"},
347 {"մ", "Մ"}, {"յ", "Յ"}, {"ն", "Ն"}, {"շ", "Շ"}, {"ո", "Ո"},
348 {"չ", "Չ"}, {"պ", "Պ"}, {"ջ", "Ջ"}, {"ռ", "Ռ"}, {"ս", "Ս"},
349 {"վ", "Վ"}, {"տ", "Տ"}, {"ր", "Ր"}, {"ց", "Ց"}, {"փ", "Փ"},
350 {"ք", "Ք"}, {"օ", "Օ"},
353 void tr_hpfa(char **args
)
355 char tok
[128], c1
[GNLEN
], c2
[GNLEN
];
358 /* load english hyphenation patterns with no arguments */
360 hyph_readpatterns(en_patterns
);
361 hyph_readexceptions(en_exceptions
);
363 /* reading patterns */
364 if (args
[1] && (filp
= fopen(args
[1], "r"))) {
365 while (fscanf(filp
, "%128s", tok
) == 1)
366 if (strlen(tok
) < WORDLEN
)
370 /* reading exceptions */
371 if (args
[2] && (filp
= fopen(args
[2], "r"))) {
372 while (fscanf(filp
, "%128s", tok
) == 1)
373 if (strlen(tok
) < WORDLEN
)
377 /* reading hcode mappings */
378 if (args
[3] && (filp
= fopen(args
[3], "r"))) {
379 while (fscanf(filp
, "%128s", tok
) == 1) {
381 if (utf8read(&s
, c1
) && utf8read(&s
, c2
) && !*s
)
382 hcode_add(c2
, c1
); /* inverting */
386 /* lowercase-uppercase character hcode mappings */
387 if (args
[3] && !strcmp("-", args
[3])) {
389 for (i
= 0; i
< LEN(hycase
); i
++)
390 hcode_add(hycase
[i
][1], hycase
[i
][0]);
396 hwdict
= dict_make(-1, 0, 2);
397 hydict
= dict_make(-1, 0, 2);
398 hcodedict
= dict_make(-1, 0, 1);
408 dict_free(hcodedict
);
411 void tr_hpf(char **args
)
413 /* reseting the patterns */
417 /* reseting the dictionary */
421 /* reseting hcode mappings */
423 dict_free(hcodedict
);