trfn: be more strict about uXYZW and uniXYZW glyph names
[neatmkfn.git] / trfn_ch.h
blobf01f1953aaf15b0064da0b4be9dba89f5cafa3f8
1 /* ligatures with unicode aliases */
2 static char *ligs_utf8[][2] = {
3 {"ff", "ff"},
4 {"fi", "fi"},
5 {"fl", "fl"},
6 {"ffi", "ffi"},
7 {"ffl", "ffl"},
8 {"st", "st"},
9 };
10 /* these are not ligatures */
11 static char *ligs_exceptions[] = {
12 "ga", "aq", "ti", "ha",
15 /* AGL exceptions */
16 static char *agl_exceptions[][2] = {
17 {"Δ", "∆"}, /* Delta -> Delatagreek */
18 {"Ω", "Ω"}, /* Omega -> Omegagreek */
19 {"µ", "μ"}, /* mu -> mugreek */
20 {"‘", "`"}, /* quoteleft */
21 {"`", "ga"}, /* grave */
22 {"’", "'"}, /* quoteright */
23 {"'", "aq"}, /* quotesingle */
24 {"~", "ti"}, /* asciitilde; using tilde for ~ */
25 {"^", "ha"}, /* asciicircum; using circumflex for ^ */
28 /* troff aliases */
29 static char *alts[][8] = {
30 {"'", "cq"},
31 {"+", "pl"},
32 {"-", "hy"},
33 {"/", "sl"},
34 {"=", "eq"},
35 {"\"", "dq"},
36 {"\\", "bs", "rs"},
37 {"_", "ru", "ul"},
38 {"`", "oq"},
39 {"aq"},
40 {"|", "or"},
41 {"¡", "!!", "r!"},
42 {"¢", "c|", "ct"},
43 {"£", "L-", "ps"},
44 {"¤", "xo", "cr"},
45 {"¥", "Y-", "yn"},
46 {"¦", "||"},
47 {"§", "so", "sc"},
48 {"©", "co"},
49 {"ª", "a_"},
50 {"«", "<<", "Fo"},
51 {"¬", "-,", "no"},
52 {"®", "ro", "rg"},
53 {"°", "0^", "de"},
54 {"±", "+-"},
55 {"²", "2^"},
56 {"³", "3^"},
57 {"µ", "/u"},
58 {"¶", "P!", "pg"},
59 {"·", ".^"},
60 {"¹", "1^"},
61 {"º", "o_"},
62 {"»", ">>", "Fc"},
63 {"¼", "14"},
64 {"½", "12"},
65 {"¾", "34"},
66 {"¿", "??", "r?"},
67 {"À", "A`"},
68 {"Á", "A'"},
69 {"Â", "A^"},
70 {"Ã", "A~"},
71 {"Ä", "A:", "A\""},
72 {"Å", "A*"},
73 {"Æ", "AE"},
74 {"Ç", "C,"},
75 {"È", "E`"},
76 {"É", "E'"},
77 {"Ê", "E^"},
78 {"Ë", "E:"},
79 {"Ì", "I`"},
80 {"Í", "I'"},
81 {"Î", "I^"},
82 {"Ï", "I:"},
83 {"Ð", "D-"},
84 {"Ñ", "N~"},
85 {"Ò", "O`"},
86 {"Ó", "O'"},
87 {"Ô", "O^"},
88 {"Õ", "O~"},
89 {"Ö", "O:"},
90 {"×", "xx", "mu"},
91 {"Ø", "O/"},
92 {"Ù", "U`"},
93 {"Ú", "U'"},
94 {"Û", "U^"},
95 {"Ü", "U:"},
96 {"Ý", "Y'"},
97 {"Þ", "TH"},
98 {"ß", "ss"},
99 {"à", "a`"},
100 {"á", "a'"},
101 {"â", "a^"},
102 {"ã", "a~"},
103 {"ä", "a:"},
104 {"å", "a*"},
105 {"æ", "ae"},
106 {"ç", "c,"},
107 {"è", "e`"},
108 {"é", "e'"},
109 {"ê", "e^"},
110 {"ë", "e:"},
111 {"ì", "i`"},
112 {"í", "i'"},
113 {"î", "i^"},
114 {"ï", "i:"},
115 {"ð", "d-"},
116 {"ñ", "n~"},
117 {"ò", "o`"},
118 {"ó", "o'"},
119 {"ô", "o^"},
120 {"õ", "o~"},
121 {"ö", "o:"},
122 {"÷", "di", "-:"},
123 {"ø", "o/"},
124 {"ù", "u`"},
125 {"ú", "u'"},
126 {"û", "u^"},
127 {"ü", "u:"},
128 {"ý", "y'"},
129 {"þ", "th"},
130 {"ÿ", "y:"},
131 {"Č", "C<"},
132 {"č", "c<"},
133 {"Ď", "D<"},
134 {"ď", "d<"},
135 {"ě", "e<"},
136 {"ň", "n<"},
137 {"Ő", "O\""},
138 {"ő", "o\""},
139 {"Ř", "R<"},
140 {"ř", "r<"},
141 {"Š", "S<"},
142 {"š", "s<"},
143 {"Ť", "T<"},
144 {"ť", "t<"},
145 {"Ů", "U*"},
146 {"ů", "u*"},
147 {"Ű", "U\""},
148 {"ű", "u\""},
149 {"Ÿ", "Y:"},
150 {"Ž", "Z<"},
151 {"ž", "z<"},
152 {"ƒ", "fn",},
153 {"¸", ",,", ",a"},
154 {"´", "aa", "\\'"},
155 {"¯", "-a"},
156 {"¨", "\"\"", ":a"},
157 {"ga", "\\`"},
158 {"ˆ", "^", "^a"},
159 {"ˇ", "va"},
160 {"˘", "Ua"},
161 {"˙", ".a"},
162 {"˚", "oa"},
163 {"˛", "Ca"},
164 {"˝", "\"a"},
165 {"˜", "~"},
166 {"Α", "*A"},
167 {"Β", "*B"},
168 {"Γ", "*G"},
169 {"Ε", "*E"},
170 {"Ζ", "*Z"},
171 {"Η", "*Y"},
172 {"Θ", "*H"},
173 {"Ι", "*I"},
174 {"Κ", "*K"},
175 {"Λ", "*L"},
176 {"Μ", "*M"},
177 {"Ν", "*N"},
178 {"Ξ", "*C"},
179 {"Ο", "*O"},
180 {"Π", "*P"},
181 {"Ρ", "*R"},
182 {"Σ", "*S"},
183 {"Τ", "*T"},
184 {"Υ", "*U"},
185 {"Φ", "*F"},
186 {"Χ", "*X"},
187 {"Ψ", "*Q"},
188 {"Ω", "*W"},
189 {"α", "*a"},
190 {"β", "*b"},
191 {"γ", "*g"},
192 {"δ", "*d"},
193 {"ε", "*e"},
194 {"ζ", "*z"},
195 {"η", "*y"},
196 {"θ", "*h"},
197 {"ι", "*i"},
198 {"κ", "*k"},
199 {"λ", "*l"},
200 {"μ", "*m"},
201 {"ν", "*n"},
202 {"ξ", "*c"},
203 {"ο", "*o"},
204 {"π", "*p"},
205 {"ρ", "*r"},
206 {"ς", "ts"},
207 {"σ", "*s"},
208 {"τ", "*t"},
209 {"υ", "*u"},
210 {"φ", "*f"},
211 {"χ", "*x"},
212 {"ψ", "*q"},
213 {"ω", "*w"},
214 {"–", "en", "\\-"},
215 {"—", "em", "--"},
216 {"‚", "bq"},
217 {"“", "``", "lq"},
218 {"”", "''", "rq"},
219 {"†", "dg"},
220 {"‡", "dd"},
221 {"•", "bu"},
222 {"…", "el"},
223 {"‰", "%0"},
224 {"′", "fm"},
225 {"‹", "fo"},
226 {"›", "fc"},
227 {"⁄", "fr"},
228 {"ℑ", "If"},
229 {"ℛ", "ws"},
230 {"ℜ", "Rf"},
231 {"ℵ", "af"},
232 {"←", "<-"},
233 {"↑", "ua"},
234 {"→", "->"},
235 {"↓", "da"},
236 {"↔", "ab", "<>"},
237 {"↵", "CR"},
238 {"∀", "fa"},
239 {"∂", "pd"},
240 {"∃", "te"},
241 {"∅", "es"},
242 {"∆", "*D"},
243 {"∇", "gr"},
244 {"∈", "mo"},
245 {"∉", "!m"},
246 {"∋", "st"},
247 {"∏", "pr"},
248 {"∑", "su"},
249 {"−", "mi"},
250 {"∓", "-+"},
251 {"∗", "**"},
252 {"√", "sr"},
253 {"∝", "pt"},
254 {"∞", "if"},
255 {"∠", "an"},
256 {"∧", "l&"},
257 {"∨", "l|"},
258 {"∩", "ca"},
259 {"∪", "cu"},
260 {"∫", "is"},
261 {"∴", "tf"},
262 {"∼", "ap"},
263 {"≅", "cg", "=~"},
264 {"≈", "~~"},
265 {"≠", "!="},
266 {"≡", "=="},
267 {"≤", "<="},
268 {"≥", ">="},
269 {"⊂", "sb"},
270 {"⊃", "sp"},
271 {"⊄", "!b"},
272 {"⊆", "ib"},
273 {"⊇", "ip"},
274 {"⊕", "O+"},
275 {"⊗", "Ox"},
276 {"⊥", "pp"},
277 {"⋅", "c."},
278 {"〈", "b<"},
279 {"〉", "b>"},
280 {"◊", "lz"},
281 {"○", "ci"},
282 {"⟨", "la"},
283 {"⟩", "ra"},
284 {"", "co"},
285 {"", "rg"},
286 {"", "tm"},
287 {"", "rn"},
288 {"", "av"},
289 {"", "ah"},
290 {"", "RG"},
291 {"", "CO"},
292 {"", "TM"},
293 {"", "LT"},
294 {"", "br", "LX"},
295 {"", "LB"},
296 {"⎛", "LT"},
297 {"⎜", "LX"},
298 {"⎝", "LB"},
299 {"", "lc"},
300 {"", "lx"},
301 {"", "lf"},
302 {"⎡", "lc"},
303 {"⎢", "lx"},
304 {"⎣", "lf"},
305 {"", "lt"},
306 {"", "lk"},
307 {"", "lb"},
308 {"", "bv", "|",},
309 {"⎧", "lt"},
310 {"⎨", "lk"},
311 {"⎩", "lb"},
312 {"⎪", "bv"},
313 {"", "RT"},
314 {"", "RX"},
315 {"", "RB"},
316 {"⎞", "RT"},
317 {"⎟", "RX"},
318 {"⎠", "RB"},
319 {"", "rc"},
320 {"", "rx"},
321 {"", "rf"},
322 {"⎤", "rc"},
323 {"⎥", "rx"},
324 {"⎦", "rf"},
325 {"", "rt"},
326 {"", "rk"},
327 {"", "rb"},
328 {"⎫", "rt"},
329 {"⎬", "rk"},
330 {"⎭", "rb"},
331 {"ff", "ff"},
332 {"fi", "fi"},
333 {"fl", "fl"},
334 {"ffi", "ffi", "Fi"},
335 {"ffl", "ffl", "Fl"},
336 {"st", "st"},
339 /* different shapes of arabic and farsi characters */
340 static struct achar {
341 char *name;
342 unsigned c;
343 unsigned s;
344 unsigned i;
345 unsigned m;
346 unsigned f;
347 } achars[] = {
348 {"hamza", 0x0621, 0xfe80},
349 {"alefwithmaddaabove", 0x0622, 0xfe81, 0, 0, 0xfe82},
350 {"alefwithhamzaabove", 0x0623, 0xfe83, 0, 0, 0xfe84},
351 {"wawwithhamzaabove", 0x0624, 0xfe85, 0, 0, 0xfe86},
352 {"alefwithhamzabelow", 0x0625, 0xfe87, 0, 0, 0xfe88},
353 {"yehwithhamzaabove", 0x0626, 0xfe89, 0xfe8b, 0xfe8c, 0xfe8a},
354 {"alef", 0x0627, 0xfe8d, 0, 0, 0xfe8e},
355 {"arabicalef", 0x0627},
356 {"beh", 0x0628, 0xfe8f, 0xfe91, 0xfe92, 0xfe90},
357 {"tehmarbuta", 0x0629, 0xfe93, 0, 0, 0xfe94},
358 {"teh", 0x062a, 0xfe95, 0xfe97, 0xfe98, 0xfe96},
359 {"theh", 0x062b, 0xfe99, 0xfe9b, 0xfe9c, 0xfe9a},
360 {"jeem", 0x062c, 0xfe9d, 0xfe9f, 0xfea0, 0xfe9e},
361 {"hah", 0x062d, 0xfea1, 0xfea3, 0xfea4, 0xfea2},
362 {"khah", 0x062e, 0xfea5, 0xfea7, 0xfea8, 0xfea6},
363 {"dal", 0x062f, 0xfea9, 0, 0, 0xfeaa},
364 {"thal", 0x0630, 0xfeab, 0, 0, 0xfeac},
365 {"reh", 0x0631, 0xfead, 0, 0, 0xfeae},
366 {"zain", 0x0632, 0xfeaf, 0, 0, 0xfeb0},
367 {"seen", 0x0633, 0xfeb1, 0xfeb3, 0xfeb4, 0xfeb2},
368 {"sheen", 0x0634, 0xfeb5, 0xfeb7, 0xfeb8, 0xfeb6},
369 {"sad", 0x0635, 0xfeb9, 0xfebb, 0xfebc, 0xfeba},
370 {"dad", 0x0636, 0xfebd, 0xfebf, 0xfec0, 0xfebe},
371 {"tah", 0x0637, 0xfec1, 0xfec3, 0xfec4, 0xfec2},
372 {"zah", 0x0638, 0xfec5, 0xfec7, 0xfec8, 0xfec6},
373 {"ain", 0x0639, 0xfec9, 0xfecb, 0xfecc, 0xfeca},
374 {"ghain", 0x063a, 0xfecd, 0xfecf, 0xfed0, 0xfece},
375 {"tatweel", 0x0640},
376 {"feh", 0x0641, 0xfed1, 0xfed3, 0xfed4, 0xfed2},
377 {"qaf", 0x0642, 0xfed5, 0xfed7, 0xfed8, 0xfed6},
378 {"kaf", 0x0643, 0xfed9, 0xfedb, 0xfedc, 0xfeda},
379 {"lam", 0x0644, 0xfedd, 0xfedf, 0xfee0, 0xfede},
380 {"meem", 0x0645, 0xfee1, 0xfee3, 0xfee4, 0xfee2},
381 {"noon", 0x0646, 0xfee5, 0xfee7, 0xfee8, 0xfee6},
382 {"heh", 0x0647, 0xfee9, 0xfeeb, 0xfeec, 0xfeea},
383 {"waw", 0x0648, 0xfeed, 0, 0, 0xfeee},
384 {"alefmaksura", 0x0649, 0xfeef, 0, 0, 0xfef0},
385 {"yeh", 0x064a, 0xfef1, 0xfef3, 0xfef4, 0xfef2},
386 {"fathatan", 0x064b, 0xfe70},
387 {"dammatan", 0x064c, 0xfe72},
388 {"kasratan", 0x064d, 0xfe74},
389 {"fatha", 0x064e, 0xfe76, 0, 0xfe77, 0},
390 {"damma", 0x064f, 0xfe78, 0, 0xfe79, 0},
391 {"kasra", 0x0650, 0xfe7a, 0, 0xfe7b, 0},
392 {"shadda", 0x0651, 0xfe7c, 0, 0xfe7c, 0},
393 {"sukun", 0x0652, 0xfe7e, 0, 0xfe7f, 0},
394 {"peh", 0x067e, 0xfb56, 0xfb58, 0xfb59, 0xfb57},
395 {"tcheh", 0x0686, 0xfb7a, 0xfb7c, 0xfb7d, 0xfb7b},
396 {"jeh", 0x0698, 0xfb8a, 0, 0, 0xfb8b},
397 {"keheh", 0x06a9, 0xfb8e, 0xfb90, 0xfb91, 0xfb8f},
398 {"gaf", 0x06af, 0xfb92, 0xfb94, 0xfb95, 0xfb93},
399 {"farsiyeh", 0x06cc, 0xfbfc, 0xfbfe, 0xfbff, 0xfbfd},
400 {"lamwithalef", 0xfefb, 0xfefb, 0, 0, 0xfefc},
401 {"arabiccomma", 0x060c},
402 {"arabicsemicolon", 0x061b},
403 {"arabicquestionmark", 0x061f},
404 {"arabicindicdigitzero", 0x0660},
405 {"arabicindicdigitone", 0x0661},
406 {"arabicindicdigittwo", 0x0662},
407 {"arabicindicdigitthree", 0x0663},
408 {"arabicindicdigitfour", 0x0664},
409 {"arabicindicdigitfive", 0x0665},
410 {"arabicindicdigitsix", 0x0666},
411 {"arabicindicdigitseven", 0x0667},
412 {"arabicindicdigiteight", 0x0668},
413 {"arabicindicdigitnine", 0x0669},
414 {"arabicpercentsign", 0x066a},
415 {"extendedarabicindicdigitzero", 0x06f0},
416 {"extendedarabicindicdigitone", 0x06f1},
417 {"extendedarabicindicdigittwo", 0x06f2},
418 {"extendedarabicindicdigitthree", 0x06f3},
419 {"extendedarabicindicdigitfour", 0x06f4},
420 {"extendedarabicindicdigitfive", 0x06f5},
421 {"extendedarabicindicdigitsix", 0x06f6},
422 {"extendedarabicindicdigitseven", 0x06f7},
423 {"extendedarabicindicdigiteight", 0x06f8},
424 {"extendedarabicindicdigitnine", 0x06f9},
425 {"zeronojoin", 0x200c},
426 {"zerojoin", 0x200d},
429 static int ctype_ascii[128] = {
430 ['!'] = 2, ['"'] = 2, ['#'] = 2, ['$'] = 2, ['%'] = 2,
431 ['&'] = 2, ['\''] = 2, ['('] = 3, [')'] = 3, ['*'] = 2,
432 ['+'] = 0, [','] = 1, ['-'] = 0, ['.'] = 0, ['/'] = 2,
433 ['0'] = 2, ['1'] = 2, ['2'] = 2, ['3'] = 2, ['4'] = 2,
434 ['5'] = 2, ['6'] = 2, ['7'] = 2, ['8'] = 2, ['9'] = 2,
435 [':'] = 0, [';'] = 1, ['<'] = 0, ['='] = 0, ['>'] = 0,
436 ['?'] = 2, ['@'] = 3, ['A'] = 2, ['B'] = 2, ['C'] = 2,
437 ['D'] = 2, ['E'] = 2, ['F'] = 2, ['G'] = 2, ['H'] = 2,
438 ['I'] = 2, ['J'] = 3, ['K'] = 2, ['L'] = 2, ['M'] = 2,
439 ['N'] = 2, ['O'] = 2, ['P'] = 2, ['Q'] = 3, ['R'] = 2,
440 ['S'] = 2, ['T'] = 2, ['U'] = 2, ['V'] = 2, ['W'] = 2,
441 ['X'] = 2, ['Y'] = 2, ['Z'] = 2, ['['] = 3, ['\\'] = 2,
442 [']'] = 3, ['^'] = 2, ['_'] = 1, ['a'] = 0, ['b'] = 2,
443 ['c'] = 0, ['d'] = 2, ['e'] = 0, ['f'] = 2, ['g'] = 1,
444 ['h'] = 2, ['i'] = 2, ['j'] = 3, ['k'] = 2, ['l'] = 2,
445 ['m'] = 0, ['n'] = 0, ['o'] = 0, ['p'] = 1, ['q'] = 1,
446 ['r'] = 0, ['s'] = 0, ['t'] = 2, ['u'] = 0, ['v'] = 0,
447 ['w'] = 0, ['x'] = 0, ['y'] = 1, ['z'] = 0, ['{'] = 3,
448 ['|'] = 3, ['}'] = 3, ['~'] = 0,