3 """usage: ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt
6 * https://unicode.org/Public/UCD/latest/ucd/ArabicShaping.txt
7 * https://unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
8 * https://unicode.org/Public/UCD/latest/ucd/Blocks.txt
13 if len (sys
.argv
) != 4:
16 files
= [open (x
, encoding
='utf-8') for x
in sys
.argv
[1:]]
18 headers
= [[files
[0].readline (), files
[0].readline ()], [files
[2].readline (), files
[2].readline ()]]
19 headers
.append (["UnicodeData.txt does not have a header."])
20 while files
[0].readline ().find ('##################') < 0:
32 fields
= [x
.strip () for x
in line
.split (';')]
36 uu
= fields
[0].split ('..')
37 start
= int (uu
[0], 16)
45 for u
in range (start
, end
+ 1):
48 def print_joining_table(f
):
56 fields
= [x
.strip () for x
in line
.split (';')]
60 u
= int (fields
[0], 16)
62 if fields
[3] in ["ALAPH", "DALATH RISH"]:
63 value
= "JOINING_GROUP_" + fields
[3].replace(' ', '_')
65 value
= "JOINING_TYPE_" + fields
[2]
69 for value
in sorted (set ([v
for v
in values
.values ()] + ['JOINING_TYPE_X'])):
70 short
= ''.join(x
[0] for x
in value
.split('_')[2:])
71 assert short
not in short_value
.values()
72 short_value
[value
] = short
75 for value
,short
in short_value
.items():
76 print ("#define %s %s" % (short
, value
))
78 uu
= sorted(values
.keys())
80 all_blocks
= set([blocks
[u
] for u
in uu
])
85 if u
- last
<= 1+16*5:
92 print ("static const uint8_t joining_table[] =")
96 for start
,end
in ranges
:
99 print ("#define joining_offset_0x%04xu %d" % (start
, offset
))
101 for u
in range(start
, end
+1):
103 block
= blocks
.get(u
, last_block
)
104 value
= values
.get(u
, "JOINING_TYPE_X")
106 if block
!= last_block
or u
== start
:
109 if block
in all_blocks
:
110 print ("\n /* %s */" % block
)
112 print ("\n /* FILLER */")
116 print (" /* %04X */" % (u
//32*32), " " * (u
% 32), end
="")
120 print (" /* %04X */ " % u
, end
="")
121 print ("%s," % short_value
[value
], end
="")
124 offset
+= end
- start
+ 1
126 occupancy
= num
* 100. / offset
127 print ("}; /* Table items: %d; occupancy: %d%% */" % (offset
, occupancy
))
132 print ("static unsigned int")
133 print ("joining_type (hb_codepoint_t u)")
135 print (" switch (u >> %d)" % page_bits
)
137 pages
= set([u
>>page_bits
for u
in [s
for s
,e
in ranges
]+[e
for s
,e
in ranges
]])
138 for p
in sorted(pages
):
139 print (" case 0x%0Xu:" % p
)
140 for (start
,end
) in ranges
:
141 if p
not in [start
>>page_bits
, end
>>page_bits
]: continue
142 offset
= "joining_offset_0x%04xu" % start
143 print (" if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return joining_table[u - 0x%04Xu + %s];" % (start
, end
, start
, offset
))
152 for value
,short
in short_value
.items():
153 print ("#undef %s" % (short
))
156 def print_shaping_table(f
):
163 fields
= [x
.strip () for x
in line
.split (';')]
164 if fields
[5][0:1] != '<':
167 items
= fields
[5].split (' ')
168 shape
, items
= items
[0][1:-1], tuple (int (x
, 16) for x
in items
[1:])
170 if not shape
in ['initial', 'medial', 'isolated', 'final']:
173 c
= int (fields
[0], 16)
175 # We only care about lam-alef ligatures
176 if len (items
) != 2 or items
[0] != 0x0644 or items
[1] not in [0x0622, 0x0623, 0x0625, 0x0627]:
181 if items
not in ligatures
:
182 ligatures
[items
] = {}
183 ligatures
[items
][shape
] = c
186 if items
[0] not in names
:
187 names
[items
[0]] = fields
[1]
189 names
[items
[0]] = os
.path
.commonprefix ([names
[items
[0]], fields
[1]]).strip ()
190 if items
[0] not in shapes
:
191 shapes
[items
[0]] = {}
192 shapes
[items
[0]][shape
] = c
195 print ("static const uint16_t shaping_table[][4] =")
198 keys
= shapes
.keys ()
199 min_u
, max_u
= min (keys
), max (keys
)
200 for u
in range (min_u
, max_u
+ 1):
201 s
= [shapes
[u
][shape
] if u
in shapes
and shape
in shapes
[u
] else 0
202 for shape
in ['initial', 'medial', 'final', 'isolated']]
203 value
= ', '.join ("0x%04Xu" % c
for c
in s
)
204 print (" {%s}, /* U+%04X %s */" % (value
, u
, names
[u
] if u
in names
else ""))
208 print ("#define SHAPING_TABLE_FIRST 0x%04Xu" % min_u
)
209 print ("#define SHAPING_TABLE_LAST 0x%04Xu" % max_u
)
213 for pair
in ligatures
.keys ():
214 for shape
in ligatures
[pair
]:
215 c
= ligatures
[pair
][shape
]
216 if shape
== 'isolated':
217 liga
= (shapes
[pair
[0]]['initial'], shapes
[pair
[1]]['final'])
218 elif shape
== 'final':
219 liga
= (shapes
[pair
[0]]['medial'], shapes
[pair
[1]]['final'])
221 raise Exception ("Unexpected shape", shape
)
222 if liga
[0] not in ligas
:
224 ligas
[liga
[0]].append ((liga
[1], c
))
225 max_i
= max (len (ligas
[l
]) for l
in ligas
)
227 print ("static const struct ligature_set_t {")
228 print (" uint16_t first;")
229 print (" struct ligature_pairs_t {")
230 print (" uint16_t second;")
231 print (" uint16_t ligature;")
232 print (" } ligatures[%d];" % max_i
)
233 print ("} ligature_table[] =")
235 for first
in sorted (ligas
.keys ()):
237 print (" { 0x%04Xu, {" % (first
))
238 for liga
in ligas
[first
]:
239 print (" { 0x%04Xu, 0x%04Xu }, /* %s */" % (liga
[0], liga
[1], names
[liga
[1]]))
247 print ("/* == Start of generated table == */")
249 print (" * The following table is generated by running:")
251 print (" * ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt")
253 print (" * on files with these headers:")
257 print (" * %s" % (l
.strip()))
260 print ("#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH")
261 print ("#define HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH")
264 read_blocks (files
[2])
265 print_joining_table (files
[0])
266 print_shaping_table (files
[1])
269 print ("#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH */")
271 print ("/* == End of generated table == */")