1 /* ligatures with unicode aliases */
2 static char *ligs_utf8
[][2] = {
10 /* these are not ligatures */
11 static char *ligs_exceptions
[] = {
12 "ga", "aq", "ti", "ha",
16 static char *agl_exceptions
[][2] = {
17 {"∆", "Δ"}, /* Delta -> Delatagreek */
18 {"Ω", "Ω"}, /* Omega -> Omegagreek */
19 {"‘", "`"}, /* quoteleft */
20 {"`", "ga"}, /* grave */
21 {"’", "'"}, /* quoteright */
22 {"'", "aq"}, /* quotesingle */
23 {"~", "ti"}, /* asciitilde; using tilde for ~ */
24 {"^", "ha"}, /* asciicircum; using circumflex for ^ */
28 static char *alts
[][8] = {
319 /* different shapes of arabic and farsi characters */
320 static struct achar
{
328 {"hamza", 0x0621, 0xfe80},
329 {"alefwithmaddaabove", 0x0622, 0xfe81, 0, 0, 0xfe82},
330 {"alefwithhamzaabove", 0x0623, 0xfe83, 0, 0, 0xfe84},
331 {"wawwithhamzaabove", 0x0624, 0xfe85, 0, 0, 0xfe86},
332 {"alefwithhamzabelow", 0x0625, 0xfe87, 0, 0, 0xfe88},
333 {"yehwithhamzaabove", 0x0626, 0xfe89, 0xfe8b, 0xfe8c, 0xfe8a},
334 {"alef", 0x0627, 0xfe8d, 0, 0, 0xfe8e},
335 {"arabicalef", 0x0627},
336 {"beh", 0x0628, 0xfe8f, 0xfe91, 0xfe92, 0xfe90},
337 {"tehmarbuta", 0x0629, 0xfe93, 0, 0, 0xfe94},
338 {"teh", 0x062a, 0xfe95, 0xfe97, 0xfe98, 0xfe96},
339 {"theh", 0x062b, 0xfe99, 0xfe9b, 0xfe9c, 0xfe9a},
340 {"jeem", 0x062c, 0xfe9d, 0xfe9f, 0xfea0, 0xfe9e},
341 {"hah", 0x062d, 0xfea1, 0xfea3, 0xfea4, 0xfea2},
342 {"khah", 0x062e, 0xfea5, 0xfea7, 0xfea8, 0xfea6},
343 {"dal", 0x062f, 0xfea9, 0, 0, 0xfeaa},
344 {"thal", 0x0630, 0xfeab, 0, 0, 0xfeac},
345 {"reh", 0x0631, 0xfead, 0, 0, 0xfeae},
346 {"zain", 0x0632, 0xfeaf, 0, 0, 0xfeb0},
347 {"seen", 0x0633, 0xfeb1, 0xfeb3, 0xfeb4, 0xfeb2},
348 {"sheen", 0x0634, 0xfeb5, 0xfeb7, 0xfeb8, 0xfeb6},
349 {"sad", 0x0635, 0xfeb9, 0xfebb, 0xfebc, 0xfeba},
350 {"dad", 0x0636, 0xfebd, 0xfebf, 0xfec0, 0xfebe},
351 {"tah", 0x0637, 0xfec1, 0xfec3, 0xfec4, 0xfec2},
352 {"zah", 0x0638, 0xfec5, 0xfec7, 0xfec8, 0xfec6},
353 {"ain", 0x0639, 0xfec9, 0xfecb, 0xfecc, 0xfeca},
354 {"ghain", 0x063a, 0xfecd, 0xfecf, 0xfed0, 0xfece},
356 {"feh", 0x0641, 0xfed1, 0xfed3, 0xfed4, 0xfed2},
357 {"qaf", 0x0642, 0xfed5, 0xfed7, 0xfed8, 0xfed6},
358 {"kaf", 0x0643, 0xfed9, 0xfedb, 0xfedc, 0xfeda},
359 {"lam", 0x0644, 0xfedd, 0xfedf, 0xfee0, 0xfede},
360 {"meem", 0x0645, 0xfee1, 0xfee3, 0xfee4, 0xfee2},
361 {"noon", 0x0646, 0xfee5, 0xfee7, 0xfee8, 0xfee6},
362 {"heh", 0x0647, 0xfee9, 0xfeeb, 0xfeec, 0xfeea},
363 {"waw", 0x0648, 0xfeed, 0, 0, 0xfeee},
364 {"alefmaksura", 0x0649, 0xfeef, 0, 0, 0xfef0},
365 {"yeh", 0x064a, 0xfef1, 0xfef3, 0xfef4, 0xfef2},
366 {"fathatan", 0x064b, 0xfe70},
367 {"dammatan", 0x064c, 0xfe72},
368 {"kasratan", 0x064d, 0xfe74},
369 {"fatha", 0x064e, 0xfe76, 0, 0xfe77, 0},
370 {"damma", 0x064f, 0xfe78, 0, 0xfe79, 0},
371 {"kasra", 0x0650, 0xfe7a, 0, 0xfe7b, 0},
372 {"shadda", 0x0651, 0xfe7c, 0, 0xfe7c, 0},
373 {"sukun", 0x0652, 0xfe7e, 0, 0xfe7f, 0},
374 {"peh", 0x067e, 0xfb56, 0xfb58, 0xfb59, 0xfb57},
375 {"tcheh", 0x0686, 0xfb7a, 0xfb7c, 0xfb7d, 0xfb7b},
376 {"jeh", 0x0698, 0xfb8a, 0, 0, 0xfb8b},
377 {"keheh", 0x06a9, 0xfb8e, 0xfb90, 0xfb91, 0xfb8f},
378 {"gaf", 0x06af, 0xfb92, 0xfb94, 0xfb95, 0xfb93},
379 {"farsiyeh", 0x06cc, 0xfbfc, 0xfbfe, 0xfbff, 0xfbfd},
380 {"lamwithalef", 0xfefb, 0xfefb, 0, 0, 0xfefc},
381 {"arabiccomma", 0x060c},
382 {"arabicsemicolon", 0x061b},
383 {"arabicquestionmark", 0x061f},
384 {"arabicindicdigitzero", 0x0660},
385 {"arabicindicdigitone", 0x0661},
386 {"arabicindicdigittwo", 0x0662},
387 {"arabicindicdigitthree", 0x0663},
388 {"arabicindicdigitfour", 0x0664},
389 {"arabicindicdigitfive", 0x0665},
390 {"arabicindicdigitsix", 0x0666},
391 {"arabicindicdigitseven", 0x0667},
392 {"arabicindicdigiteight", 0x0668},
393 {"arabicindicdigitnine", 0x0669},
394 {"arabicpercentsign", 0x066a},
395 {"extendedarabicindicdigitzero", 0x06f0},
396 {"extendedarabicindicdigitone", 0x06f1},
397 {"extendedarabicindicdigittwo", 0x06f2},
398 {"extendedarabicindicdigitthree", 0x06f3},
399 {"extendedarabicindicdigitfour", 0x06f4},
400 {"extendedarabicindicdigitfive", 0x06f5},
401 {"extendedarabicindicdigitsix", 0x06f6},
402 {"extendedarabicindicdigitseven", 0x06f7},
403 {"extendedarabicindicdigiteight", 0x06f8},
404 {"extendedarabicindicdigitnine", 0x06f9},
405 {"zeronojoin", 0x200c},
406 {"zerojoin", 0x200d},
409 int ctype_ascii
[128] = {
410 ['!'] = 2, ['"'] = 2, ['#'] = 2, ['$'] = 2, ['%'] = 2,
411 ['&'] = 2, ['\''] = 2, ['('] = 3, [')'] = 3, ['*'] = 2,
412 ['+'] = 0, [','] = 1, ['-'] = 0, ['.'] = 0, ['/'] = 2,
413 ['0'] = 2, ['1'] = 2, ['2'] = 2, ['3'] = 2, ['4'] = 2,
414 ['5'] = 2, ['6'] = 2, ['7'] = 2, ['8'] = 2, ['9'] = 2,
415 [':'] = 0, [';'] = 1, ['<'] = 0, ['='] = 0, ['>'] = 0,
416 ['?'] = 2, ['@'] = 3, ['A'] = 2, ['B'] = 2, ['C'] = 2,
417 ['D'] = 2, ['E'] = 2, ['F'] = 2, ['G'] = 2, ['H'] = 2,
418 ['I'] = 2, ['J'] = 3, ['K'] = 2, ['L'] = 2, ['M'] = 2,
419 ['N'] = 2, ['O'] = 2, ['P'] = 2, ['Q'] = 3, ['R'] = 2,
420 ['S'] = 2, ['T'] = 2, ['U'] = 2, ['V'] = 2, ['W'] = 2,
421 ['X'] = 2, ['Y'] = 2, ['Z'] = 2, ['['] = 3, ['\\'] = 2,
422 [']'] = 3, ['^'] = 2, ['_'] = 1, ['a'] = 0, ['b'] = 2,
423 ['c'] = 0, ['d'] = 2, ['e'] = 0, ['f'] = 2, ['g'] = 1,
424 ['h'] = 2, ['i'] = 2, ['j'] = 3, ['k'] = 2, ['l'] = 2,
425 ['m'] = 0, ['n'] = 0, ['o'] = 0, ['p'] = 1, ['q'] = 1,
426 ['r'] = 0, ['s'] = 0, ['t'] = 2, ['u'] = 0, ['v'] = 0,
427 ['w'] = 0, ['x'] = 0, ['y'] = 1, ['z'] = 0, ['{'] = 3,
428 ['|'] = 3, ['}'] = 3, ['~'] = 0,