beta-0.89.2
[luatex.git] / source / libs / poppler / poppler-src / poppler / gen-unicode-tables.py
blob282f6e525180bd9fcd4a2737c93d6acb3580ec5d
1 UNICODE_LAST_CHAR_PART1 = 0x2FAFF
2 HANGUL_S_BASE = 0xAC00
3 HANGUL_S_COUNT = 19 * 21 * 28
4 import unicodedata
6 print """// Generated by gen-unicode-tables.py
8 typedef struct {
9 Unicode character;
10 int length;
11 int offset;
12 } decomposition;
13 """
15 decomp_table = []
16 max_index = 0
17 decomp_expansion_index = {}
18 decomp_expansion = []
19 for u in xrange(0, UNICODE_LAST_CHAR_PART1):
20 if (u >= HANGUL_S_BASE and u < HANGUL_S_BASE + HANGUL_S_COUNT):
21 continue
22 norm = tuple(map(ord, unicodedata.normalize("NFKD", unichr(u))))
23 if norm != (u,):
24 try:
25 i = decomp_expansion_index[norm]
26 decomp_table.append((u, len(norm), i))
27 except KeyError:
28 decomp_table.append((u, len(norm), max_index))
29 decomp_expansion_index[norm] = max_index
30 decomp_expansion.append((norm, max_index))
31 max_index += len(norm)
32 print "#define DECOMP_TABLE_LENGTH %d\n" % len(decomp_table)
33 print "static const decomposition decomp_table[] = {\n%s\n};\n" % ", \n".join(
34 " { 0x%x, %d, %d }" % (character, length, offset)
35 for character, length, offset in decomp_table)
36 print "static const Unicode decomp_expansion[] = {\n%s\n};\n" % ", \n".join(
37 " %s /* offset %d */ " % (", ".join("0x%x" % u for u in norm),
38 index) for norm, index in decomp_expansion)