otf: for each rule report its script
[neatmkfn.git] / trfn_ch.h
blob2a53be37c9fe067cec8209aef05dc4895a7bd3e1
1 /* ligatures with unicode aliases */
2 static char *ligs_utf8[][2] = {
3 {"ff", "ff"},
4 {"fi", "fi"},
5 {"fl", "fl"},
6 {"ffi", "ffi"},
7 {"ffl", "ffl"},
8 {"st", "st"},
9 };
11 /* AGL exceptions */
12 static char *agl_exceptions[][2] = {
13 {"`", "ga"}, /* grave; using quoteleft for ` */
14 {"'", "aq"}, /* quotesingle; using quoteright for ' */
15 {"~", "ti"}, /* asciitilde; using tilde for ~ */
16 {"^", "ha"}, /* asciicircum; using circumflex for ^ */
19 /* troff aliases */
20 static char *alts[][8] = {
21 {"’", "'", "cq"},
22 {"+", "pl"},
23 {"-", "hy"},
24 {"/", "sl"},
25 {"=", "eq"},
26 {"\"", "dq"},
27 {"\\", "bs", "rs"},
28 {"_", "ru", "ul"},
29 {"‘", "`", "oq"},
30 {"|", "or"},
31 {"¡", "!!", "r!"},
32 {"¢", "c|", "ct"},
33 {"£", "L-", "ps"},
34 {"¤", "xo", "cr"},
35 {"¥", "Y-", "yn"},
36 {"¦", "||"},
37 {"§", "so", "sc"},
38 {"©", "co"},
39 {"ª", "a_"},
40 {"«", "<<", "Fo"},
41 {"¬", "-,", "no"},
42 {"®", "ro", "rg"},
43 {"°", "0^", "de"},
44 {"±", "+-"},
45 {"²", "2^"},
46 {"³", "3^"},
47 {"¶", "P!", "pg"},
48 {"·", ".^"},
49 {"¹", "1^"},
50 {"º", "o_"},
51 {"»", ">>", "Fc"},
52 {"¼", "14"},
53 {"½", "12"},
54 {"¾", "34"},
55 {"¿", "??", "r?"},
56 {"À", "A`"},
57 {"Á", "A'"},
58 {"Â", "A^"},
59 {"Ã", "A~"},
60 {"Ä", "A:", "A\""},
61 {"Å", "A*"},
62 {"Æ", "AE"},
63 {"Ç", "C,"},
64 {"È", "E`"},
65 {"É", "E'"},
66 {"Ê", "E^"},
67 {"Ë", "E:"},
68 {"Ì", "I`"},
69 {"Í", "I'"},
70 {"Î", "I^"},
71 {"Ï", "I:"},
72 {"Ð", "D-"},
73 {"Ñ", "N~"},
74 {"Ò", "O`"},
75 {"Ó", "O'"},
76 {"Ô", "O^"},
77 {"Õ", "O~"},
78 {"Ö", "O:"},
79 {"×", "xx", "mu"},
80 {"Ø", "O/"},
81 {"Ù", "U`"},
82 {"Ú", "U'"},
83 {"Û", "U^"},
84 {"Ü", "U:"},
85 {"Ý", "Y'"},
86 {"Þ", "TH"},
87 {"ß", "ss"},
88 {"à", "a`"},
89 {"á", "a'"},
90 {"â", "a^"},
91 {"ã", "a~"},
92 {"ä", "a:"},
93 {"å", "a*"},
94 {"æ", "ae"},
95 {"ç", "c,"},
96 {"è", "e`"},
97 {"é", "e'"},
98 {"ê", "e^"},
99 {"ë", "e:"},
100 {"ì", "i`"},
101 {"í", "i'"},
102 {"î", "i^"},
103 {"ï", "i:"},
104 {"ð", "d-"},
105 {"ñ", "n~"},
106 {"ò", "o`"},
107 {"ó", "o'"},
108 {"ô", "o^"},
109 {"õ", "o~"},
110 {"ö", "o:"},
111 {"÷", "di", "-:"},
112 {"ø", "o/"},
113 {"ù", "u`"},
114 {"ú", "u'"},
115 {"û", "u^"},
116 {"ü", "u:"},
117 {"ý", "y'"},
118 {"þ", "th"},
119 {"ÿ", "y:"},
120 {"Č", "C<"},
121 {"č", "c<"},
122 {"Ď", "D<"},
123 {"ď", "d<"},
124 {"ě", "e<"},
125 {"ň", "n<"},
126 {"Ő", "O\""},
127 {"ő", "o\""},
128 {"Ř", "R<"},
129 {"ř", "r<"},
130 {"Š", "S<"},
131 {"š", "s<"},
132 {"Ť", "T<"},
133 {"ť", "t<"},
134 {"Ů", "U*"},
135 {"ů", "u*"},
136 {"Ű", "U\""},
137 {"ű", "u\""},
138 {"Ÿ", "Y:"},
139 {"Ž", "Z<"},
140 {"ž", "z<"},
141 {"ƒ", "fn",},
142 {"¸", ",,", ",a"},
143 {"´", "aa", "\\'"},
144 {"¯", "-a"},
145 {"¨", "\"\"", ":a"},
146 {"ga", "\\`"},
147 {"ˆ", "^", "^a"},
148 {"ˇ", "va"},
149 {"˘", "Ua"},
150 {"˙", ".a"},
151 {"˚", "oa"},
152 {"˛", "Ca"},
153 {"˝", "\"a"},
154 {"˜", "~"},
155 {"Α", "*A"},
156 {"Β", "*B"},
157 {"Γ", "*G"},
158 {"Ε", "*E"},
159 {"Ζ", "*Z"},
160 {"Η", "*Y"},
161 {"Θ", "*H"},
162 {"Ι", "*I"},
163 {"Κ", "*K"},
164 {"Λ", "*L"},
165 {"Μ", "*M"},
166 {"Ν", "*N"},
167 {"Ξ", "*C"},
168 {"Ο", "*O"},
169 {"Π", "*P"},
170 {"Ρ", "*R"},
171 {"Σ", "*S"},
172 {"Τ", "*T"},
173 {"Υ", "*U"},
174 {"Φ", "*F"},
175 {"Χ", "*X"},
176 {"Ψ", "*Q"},
177 {"Ω", "*W", "Ω"}, {"Ω", "Ω", "*W"},
178 {"α", "*a"},
179 {"β", "*b"},
180 {"γ", "*g"},
181 {"δ", "*d"},
182 {"ε", "*e"},
183 {"ζ", "*z"},
184 {"η", "*y"},
185 {"θ", "*h"},
186 {"ι", "*i"},
187 {"κ", "*k"},
188 {"λ", "*l"},
189 {"μ", "*m", "µ", "/u"}, {"µ", "/u", "*m", "μ"},
190 {"ν", "*n"},
191 {"ξ", "*c"},
192 {"ο", "*o"},
193 {"π", "*p"},
194 {"ρ", "*r"},
195 {"ς", "ts"},
196 {"σ", "*s"},
197 {"τ", "*t"},
198 {"υ", "*u"},
199 {"φ", "*f"},
200 {"χ", "*x"},
201 {"ψ", "*q"},
202 {"ω", "*w"},
203 {"–", "en", "\\-"},
204 {"—", "em", "--"},
205 {"‚", "bq"},
206 {"“", "``", "lq"},
207 {"”", "''", "rq"},
208 {"†", "dg"},
209 {"‡", "dd"},
210 {"•", "bu"},
211 {"…", "el"},
212 {"‰", "%0"},
213 {"′", "fm"},
214 {"‹", "fo"},
215 {"›", "fc"},
216 {"⁄", "fr"},
217 {"ℑ", "If"},
218 {"ℛ", "ws"},
219 {"ℜ", "Rf"},
220 {"ℵ", "af"},
221 {"←", "<-"},
222 {"↑", "ua"},
223 {"→", "->"},
224 {"↓", "da"},
225 {"↔", "ab", "<>"},
226 {"↵", "CR"},
227 {"∀", "fa"},
228 {"∂", "pd"},
229 {"∃", "te"},
230 {"∅", "es"},
231 {"∆", "*D", "Δ"}, {"Δ", "∆", "*D"},
232 {"∇", "gr"},
233 {"∈", "mo"},
234 {"∉", "!m"},
235 {"∋", "st"},
236 {"∏", "pr"},
237 {"∑", "su"},
238 {"−", "mi"},
239 {"∓", "-+"},
240 {"∗", "**"},
241 {"√", "sr"},
242 {"∝", "pt"},
243 {"∞", "if"},
244 {"∠", "an"},
245 {"∧", "l&"},
246 {"∨", "l|"},
247 {"∩", "ca"},
248 {"∪", "cu"},
249 {"∫", "is"},
250 {"∴", "tf"},
251 {"∼", "ap"},
252 {"≅", "cg", "=~"},
253 {"≈", "~~"},
254 {"≠", "!="},
255 {"≡", "=="},
256 {"≤", "<="},
257 {"≥", ">="},
258 {"⊂", "sb"},
259 {"⊃", "sp"},
260 {"⊄", "!b"},
261 {"⊆", "ib"},
262 {"⊇", "ip"},
263 {"⊕", "O+"},
264 {"⊗", "Ox"},
265 {"⊥", "pp"},
266 {"⋅", "c."},
267 {"〈", "b<"},
268 {"〉", "b>"},
269 {"◊", "lz"},
270 {"○", "ci"},
271 {"⟨", "la"},
272 {"⟩", "ra"},
273 {"", "co"},
274 {"", "rg"},
275 {"", "tm"},
276 {"", "rn"},
277 {"", "av"},
278 {"", "ah"},
279 {"", "RG"},
280 {"", "CO"},
281 {"", "TM"},
282 {"", "LT"},
283 {"", "br", "LX"},
284 {"", "LB"},
285 {"⎛", "LT"},
286 {"⎜", "LX"},
287 {"⎝", "LB"},
288 {"", "lc"},
289 {"", "lx"},
290 {"", "lf"},
291 {"⎡", "lc"},
292 {"⎢", "lx"},
293 {"⎣", "lf"},
294 {"", "lt"},
295 {"", "lk"},
296 {"", "lb"},
297 {"", "bv", "|"},
298 {"⎧", "lt"},
299 {"⎨", "lk"},
300 {"⎩", "lb"},
301 {"⎪", "bv"},
302 {"", "RT"},
303 {"", "RX"},
304 {"", "RB"},
305 {"⎞", "RT"},
306 {"⎟", "RX"},
307 {"⎠", "RB"},
308 {"", "rc"},
309 {"", "rx"},
310 {"", "rf"},
311 {"⎤", "rc"},
312 {"⎥", "rx"},
313 {"⎦", "rf"},
314 {"", "rt"},
315 {"", "rk"},
316 {"", "rb"},
317 {"⎫", "rt"},
318 {"⎬", "rk"},
319 {"⎭", "rb"},
320 {"ff", "ff"},
321 {"fi", "fi"},
322 {"fl", "fl"},
323 {"ffi", "ffi", "Fi"},
324 {"ffl", "ffl", "Fl"},
325 {"st", "st"},
326 {"ﺀ", "ء"},
327 {"ﺁ", "آ"},
328 {"ﺃ", "أ"},
329 {"ﺅ", "ؤ"},
330 {"ﺇ", "إ"},
331 {"ﺉ", "ئ"},
332 {"ﺍ", "ا"},
333 {"ﺏ", "ب"},
334 {"ﺓ", "ة"},
335 {"ﺕ", "ت"},
336 {"ﺙ", "ث"},
337 {"ﺝ", "ج"},
338 {"ﺡ", "ح"},
339 {"ﺥ", "خ"},
340 {"ﺩ", "د"},
341 {"ﺫ", "ذ"},
342 {"ﺭ", "ر"},
343 {"ﺯ", "ز"},
344 {"ﺱ", "س"},
345 {"ﺵ", "ش"},
346 {"ﺹ", "ص"},
347 {"ﺽ", "ض"},
348 {"ﻁ", "ط"},
349 {"ﻅ", "ظ"},
350 {"ﻉ", "ع"},
351 {"ﻍ", "غ"},
352 {"ـ", "ـ"},
353 {"ﻑ", "ف"},
354 {"ﻕ", "ق"},
355 {"ﻙ", "ك"},
356 {"ﻝ", "ل"},
357 {"ﻡ", "م"},
358 {"ﻥ", "ن"},
359 {"ﻩ", "ه"},
360 {"ﻭ", "و"},
361 {"ﻯ", "ى"},
362 {"ﻱ", "ي"},
363 {"ﭖ", "پ"},
364 {"ﭺ", "چ"},
365 {"ﮊ", "ژ"},
366 {"ﮎ", "ک"},
367 {"ﮒ", "گ"},
368 {"ﯼ", "ی"},
371 /* different shapes of arabic and farsi characters */
372 static struct achar {
373 char *name;
374 unsigned c;
375 unsigned s;
376 unsigned i;
377 unsigned m;
378 unsigned f;
379 } achars[] = {
380 {"hamza", 0x0621, 0xfe80},
381 {"alefwithmaddaabove", 0x0622, 0xfe81, 0, 0, 0xfe82},
382 {"alefwithhamzaabove", 0x0623, 0xfe83, 0, 0, 0xfe84},
383 {"wawwithhamzaabove", 0x0624, 0xfe85, 0, 0, 0xfe86},
384 {"alefwithhamzabelow", 0x0625, 0xfe87, 0, 0, 0xfe88},
385 {"yehwithhamzaabove", 0x0626, 0xfe89, 0xfe8b, 0xfe8c, 0xfe8a},
386 {"alef", 0x0627, 0xfe8d, 0, 0, 0xfe8e},
387 {"arabicalef", 0x0627},
388 {"beh", 0x0628, 0xfe8f, 0xfe91, 0xfe92, 0xfe90},
389 {"tehmarbuta", 0x0629, 0xfe93, 0, 0, 0xfe94},
390 {"teh", 0x062a, 0xfe95, 0xfe97, 0xfe98, 0xfe96},
391 {"theh", 0x062b, 0xfe99, 0xfe9b, 0xfe9c, 0xfe9a},
392 {"jeem", 0x062c, 0xfe9d, 0xfe9f, 0xfea0, 0xfe9e},
393 {"hah", 0x062d, 0xfea1, 0xfea3, 0xfea4, 0xfea2},
394 {"khah", 0x062e, 0xfea5, 0xfea7, 0xfea8, 0xfea6},
395 {"dal", 0x062f, 0xfea9, 0, 0, 0xfeaa},
396 {"thal", 0x0630, 0xfeab, 0, 0, 0xfeac},
397 {"reh", 0x0631, 0xfead, 0, 0, 0xfeae},
398 {"zain", 0x0632, 0xfeaf, 0, 0, 0xfeb0},
399 {"seen", 0x0633, 0xfeb1, 0xfeb3, 0xfeb4, 0xfeb2},
400 {"sheen", 0x0634, 0xfeb5, 0xfeb7, 0xfeb8, 0xfeb6},
401 {"sad", 0x0635, 0xfeb9, 0xfebb, 0xfebc, 0xfeba},
402 {"dad", 0x0636, 0xfebd, 0xfebf, 0xfec0, 0xfebe},
403 {"tah", 0x0637, 0xfec1, 0xfec3, 0xfec4, 0xfec2},
404 {"zah", 0x0638, 0xfec5, 0xfec7, 0xfec8, 0xfec6},
405 {"ain", 0x0639, 0xfec9, 0xfecb, 0xfecc, 0xfeca},
406 {"ghain", 0x063a, 0xfecd, 0xfecf, 0xfed0, 0xfece},
407 {"tatweel", 0x0640},
408 {"feh", 0x0641, 0xfed1, 0xfed3, 0xfed4, 0xfed2},
409 {"qaf", 0x0642, 0xfed5, 0xfed7, 0xfed8, 0xfed6},
410 {"kaf", 0x0643, 0xfed9, 0xfedb, 0xfedc, 0xfeda},
411 {"lam", 0x0644, 0xfedd, 0xfedf, 0xfee0, 0xfede},
412 {"meem", 0x0645, 0xfee1, 0xfee3, 0xfee4, 0xfee2},
413 {"noon", 0x0646, 0xfee5, 0xfee7, 0xfee8, 0xfee6},
414 {"heh", 0x0647, 0xfee9, 0xfeeb, 0xfeec, 0xfeea},
415 {"waw", 0x0648, 0xfeed, 0, 0, 0xfeee},
416 {"alefmaksura", 0x0649, 0xfeef, 0, 0, 0xfef0},
417 {"yeh", 0x064a, 0xfef1, 0xfef3, 0xfef4, 0xfef2},
418 {"fathatan", 0x064b, 0xfe70},
419 {"dammatan", 0x064c, 0xfe72},
420 {"kasratan", 0x064d, 0xfe74},
421 {"fatha", 0x064e, 0xfe76, 0, 0xfe77, 0},
422 {"damma", 0x064f, 0xfe78, 0, 0xfe79, 0},
423 {"kasra", 0x0650, 0xfe7a, 0, 0xfe7b, 0},
424 {"shadda", 0x0651, 0xfe7c, 0, 0xfe7c, 0},
425 {"sukun", 0x0652, 0xfe7e, 0, 0xfe7f, 0},
426 {"peh", 0x067e, 0xfb56, 0xfb58, 0xfb59, 0xfb57},
427 {"tcheh", 0x0686, 0xfb7a, 0xfb7c, 0xfb7d, 0xfb7b},
428 {"jeh", 0x0698, 0xfb8a, 0, 0, 0xfb8b},
429 {"keheh", 0x06a9, 0xfb8e, 0xfb90, 0xfb91, 0xfb8f},
430 {"gaf", 0x06af, 0xfb92, 0xfb94, 0xfb95, 0xfb93},
431 {"farsiyeh", 0x06cc, 0xfbfc, 0xfbfe, 0xfbff, 0xfbfd},
432 {"lamwithalef", 0xfefb, 0xfefb, 0, 0, 0xfefc},
433 {"arabiccomma", 0x060c},
434 {"arabicsemicolon", 0x061b},
435 {"arabicquestionmark", 0x061f},
436 {"arabicindicdigitzero", 0x0660},
437 {"arabicindicdigitone", 0x0661},
438 {"arabicindicdigittwo", 0x0662},
439 {"arabicindicdigitthree", 0x0663},
440 {"arabicindicdigitfour", 0x0664},
441 {"arabicindicdigitfive", 0x0665},
442 {"arabicindicdigitsix", 0x0666},
443 {"arabicindicdigitseven", 0x0667},
444 {"arabicindicdigiteight", 0x0668},
445 {"arabicindicdigitnine", 0x0669},
446 {"arabicpercentsign", 0x066a},
447 {"extendedarabicindicdigitzero", 0x06f0},
448 {"extendedarabicindicdigitone", 0x06f1},
449 {"extendedarabicindicdigittwo", 0x06f2},
450 {"extendedarabicindicdigitthree", 0x06f3},
451 {"extendedarabicindicdigitfour", 0x06f4},
452 {"extendedarabicindicdigitfive", 0x06f5},
453 {"extendedarabicindicdigitsix", 0x06f6},
454 {"extendedarabicindicdigitseven", 0x06f7},
455 {"extendedarabicindicdigiteight", 0x06f8},
456 {"extendedarabicindicdigitnine", 0x06f9},
457 {"zeronojoin", 0x200c},
458 {"zerojoin", 0x200d},
461 static int ctype_ascii[128] = {
462 ['!'] = 2, ['"'] = 2, ['#'] = 2, ['$'] = 2, ['%'] = 2,
463 ['&'] = 2, ['\''] = 2, ['('] = 3, [')'] = 3, ['*'] = 2,
464 ['+'] = 0, [','] = 1, ['-'] = 0, ['.'] = 0, ['/'] = 2,
465 ['0'] = 2, ['1'] = 2, ['2'] = 2, ['3'] = 2, ['4'] = 2,
466 ['5'] = 2, ['6'] = 2, ['7'] = 2, ['8'] = 2, ['9'] = 2,
467 [':'] = 0, [';'] = 1, ['<'] = 0, ['='] = 0, ['>'] = 0,
468 ['?'] = 2, ['@'] = 3, ['A'] = 2, ['B'] = 2, ['C'] = 2,
469 ['D'] = 2, ['E'] = 2, ['F'] = 2, ['G'] = 2, ['H'] = 2,
470 ['I'] = 2, ['J'] = 3, ['K'] = 2, ['L'] = 2, ['M'] = 2,
471 ['N'] = 2, ['O'] = 2, ['P'] = 2, ['Q'] = 3, ['R'] = 2,
472 ['S'] = 2, ['T'] = 2, ['U'] = 2, ['V'] = 2, ['W'] = 2,
473 ['X'] = 2, ['Y'] = 2, ['Z'] = 2, ['['] = 3, ['\\'] = 2,
474 [']'] = 3, ['^'] = 2, ['_'] = 1, ['a'] = 0, ['b'] = 2,
475 ['c'] = 0, ['d'] = 2, ['e'] = 0, ['f'] = 2, ['g'] = 1,
476 ['h'] = 2, ['i'] = 2, ['j'] = 3, ['k'] = 2, ['l'] = 2,
477 ['m'] = 0, ['n'] = 0, ['o'] = 0, ['p'] = 1, ['q'] = 1,
478 ['r'] = 0, ['s'] = 0, ['t'] = 2, ['u'] = 0, ['v'] = 0,
479 ['w'] = 0, ['x'] = 0, ['y'] = 1, ['z'] = 0, ['{'] = 3,
480 ['|'] = 3, ['}'] = 3, ['~'] = 0,