app-i18n/mozc: Initial commit - add fcitx5 support
[gentoo-zh.git] / app-i18n / mozc / files / mozc-2.23.2815.102-python-3_3.patch
bloba5c5a2dc8038db657c2ac12be89a4b9eeb5e734e
1 https://github.com/google/mozc/issues/462
3 --- /src/dictionary/gen_pos_map.py
4 +++ /src/dictionary/gen_pos_map.py
5 @@ -39,7 +39,7 @@
6 from build_tools import code_generator_util
9 -HEADER = """// Copyright 2009 Google Inc. All Rights Reserved.
10 +HEADER = b"""// Copyright 2009 Google Inc. All Rights Reserved.
11 // Author: keni
13 #ifndef MOZC_DICTIONARY_POS_MAP_H_
14 @@ -48,13 +48,13 @@
15 // POS conversion rules
16 const POSMap kPOSMap[] = {
17 """
18 -FOOTER = """};
19 +FOOTER = b"""};
21 #endif // MOZC_DICTIONARY_POS_MAP_H_
22 """
24 def ParseUserPos(user_pos_file):
25 - with open(user_pos_file, 'r') as stream:
26 + with open(user_pos_file, 'rb') as stream:
27 stream = code_generator_util.SkipLineComment(stream)
28 stream = code_generator_util.ParseColumnStream(stream, num_column=2)
29 return dict((key, enum_value) for key, enum_value in stream)
30 @@ -64,7 +64,7 @@
31 user_pos_map = ParseUserPos(user_pos_file)
33 result = {}
34 - with open(third_party_pos_map_file, 'r') as stream:
35 + with open(third_party_pos_map_file, 'rb') as stream:
36 stream = code_generator_util.SkipLineComment(stream)
37 for columns in code_generator_util.ParseColumnStream(stream, num_column=2):
38 third_party_pos_name, mozc_pos = (columns + [None])[:2]
39 @@ -78,7 +78,7 @@
40 result[third_party_pos_name] = mozc_pos
42 # Create mozc_pos to mozc_pos map.
43 - for key, value in user_pos_map.iteritems():
44 + for key, value in user_pos_map.items():
45 if key in result:
46 assert (result[key] == value)
47 continue
48 @@ -94,10 +94,10 @@
49 if value is None:
50 # Invalid PosType.
51 value = (
52 - 'static_cast< ::mozc::user_dictionary::UserDictionary::PosType>(-1)')
53 + b'static_cast< ::mozc::user_dictionary::UserDictionary::PosType>(-1)')
54 else:
55 - value = '::mozc::user_dictionary::UserDictionary::' + value
56 - output.write(' { %s, %s },\n' % (key, value))
57 + value = b'::mozc::user_dictionary::UserDictionary::' + value
58 + output.write(b' { %s, %s },\n' % (key, value))
59 output.write(FOOTER)
62 @@ -121,7 +121,7 @@
63 pos_map = GeneratePosMap(options.third_party_pos_map_file,
64 options.user_pos_file)
66 - with open(options.output, 'w') as stream:
67 + with open(options.output, 'wb') as stream:
68 OutputPosMap(pos_map, stream)
71 --- /src/dictionary/gen_pos_rewrite_rule.py
72 +++ /src/dictionary/gen_pos_rewrite_rule.py
73 @@ -46,29 +46,34 @@
76 def LoadRewriteMapRule(filename):
77 - fh = open(filename)
78 + fh = open(filename, 'rb')
79 rule = []
80 for line in fh:
81 - line = line.rstrip('\n')
82 - if not line or line.startswith('#'):
83 + line = line.rstrip(b'\n')
84 + if not line or line.startswith(b'#'):
85 continue
86 fields = line.split()
87 rule.append([fields[0], fields[1]])
88 + fh.close()
89 return rule
92 def ReadPOSID(id_file, special_pos_file):
93 pos_list = []
95 - for line in open(id_file, 'r'):
96 + fh = open(id_file, 'rb')
97 + for line in fh:
98 fields = line.split()
99 pos_list.append(fields[1])
100 + fh.close()
102 - for line in open(special_pos_file, 'r'):
103 - if len(line) <= 1 or line[0] == '#':
104 + fh = open(special_pos_file, 'rb')
105 + for line in fh:
106 + if len(line) <= 1 or line[0:1] == b'#':
107 continue
108 fields = line.split()
109 pos_list.append(fields[0])
110 + fh.close()
112 return pos_list
114 @@ -112,7 +117,7 @@
115 ids.append(id)
117 with open(opts.output, 'wb') as f:
118 - f.write(''.join(chr(id) for id in ids))
119 + f.write(''.join(chr(id) for id in ids).encode('utf-8'))
122 if __name__ == '__main__':
123 --- /src/dictionary/gen_suffix_data.py
124 +++ /src/dictionary/gen_suffix_data.py
125 @@ -52,10 +52,10 @@
126 opts = _ParseOptions()
128 result = []
129 - with open(opts.input, 'r') as stream:
130 + with open(opts.input, 'rb') as stream:
131 for line in stream:
132 - line = line.rstrip('\r\n')
133 - fields = line.split('\t')
134 + line = line.rstrip(b'\r\n')
135 + fields = line.split(b'\t')
136 key = fields[0]
137 lid = int(fields[1])
138 rid = int(fields[2])
139 @@ -63,7 +63,7 @@
140 value = fields[4]
142 if key == value:
143 - value = ''
144 + value = b''
146 result.append((key, value, lid, rid, cost))
148 --- /src/dictionary/gen_user_pos_data.py
149 +++ /src/dictionary/gen_user_pos_data.py
150 @@ -64,7 +64,7 @@
151 f.write(struct.pack('<H', conjugation_id))
153 serialized_string_array_builder.SerializeToFile(
154 - sorted(string_index.iterkeys()), output_string_array)
155 + sorted(x.encode('utf-8') for x in string_index.keys()), output_string_array)
158 def ParseOptions():
159 @@ -100,7 +100,7 @@
161 if options.output_pos_list:
162 serialized_string_array_builder.SerializeToFile(
163 - [pos for (pos, _) in user_pos.data], options.output_pos_list)
164 + [pos.encode('utf-8') for (pos, _) in user_pos.data], options.output_pos_list)
167 if __name__ == '__main__':
168 --- /src/dictionary/gen_zip_code_seed.py
169 +++ /src/dictionary/gen_zip_code_seed.py
170 @@ -83,7 +83,7 @@
171 address = unicodedata.normalize('NFKC', self.address)
172 line = '\t'.join([zip_code, '0', '0', str(ZIP_CODE_COST),
173 address, ZIP_CODE_LABEL])
174 - print line.encode('utf-8')
175 + print(line.encode('utf-8'))
178 def ProcessZipCodeCSV(file_name):
179 @@ -105,26 +105,26 @@
181 def ReadZipCodeEntries(zip_code, level1, level2, level3):
182 """Read zip code entries."""
183 - return [ZipEntry(zip_code, u''.join([level1, level2, town]))
184 + return [ZipEntry(zip_code, ''.join([level1, level2, town]))
185 for town in ParseTownName(level3)]
188 def ReadJigyosyoEntry(zip_code, level1, level2, level3, name):
189 """Read jigyosyo entry."""
190 return ZipEntry(zip_code,
191 - u''.join([level1, level2, level3, u' ', name]))
192 + ''.join([level1, level2, level3, ' ', name]))
195 def ParseTownName(level3):
196 """Parse town name."""
197 - if level3.find(u'以下に掲載がない場合') != -1:
198 + if level3.find('以下に掲載がない場合') != -1:
199 return ['']
201 assert CanParseAddress(level3), ('failed to be merged %s'
202 % level3.encode('utf-8'))
204 # We ignore additional information here.
205 - level3 = re.sub(u'(.*)', u'', level3, re.U)
206 + level3 = re.sub('(.*)', '', level3, re.U)
208 # For 地割, we have these cases.
209 # XX1地割
210 @@ -134,7 +134,7 @@
211 # XX第1地割、XX第2地割、
212 # XX第1地割〜XX第2地割、
213 # We simply use XX for them.
214 - chiwari_match = re.match(u'(\D*?)第?\d+地割.*', level3, re.U)
215 + chiwari_match = re.match('(\D*?)第?\d+地割.*', level3, re.U)
216 if chiwari_match:
217 town = chiwari_match.group(1)
218 return [town]
219 @@ -144,21 +144,21 @@
220 # -> XX町YY and (XX町)ZZ
221 # YY、ZZ
222 # -> YY and ZZ
223 - chou_match = re.match(u'(.*町)?(.*)', level3, re.U)
224 + chou_match = re.match('(.*町)?(.*)', level3, re.U)
225 if chou_match:
226 - chou = u''
227 + chou = ''
228 if chou_match.group(1):
229 chou = chou_match.group(1)
230 rests = chou_match.group(2)
231 - return [chou + rest for rest in rests.split(u'、')]
232 + return [chou + rest for rest in rests.split('、')]
234 return [level3]
237 def CanParseAddress(address):
238 """Return true for valid address."""
239 - return (address.find(u'(') == -1 or
240 - address.find(u')') != -1)
241 + return (address.find('(') == -1 or
242 + address.find(')') != -1)
245 def ParseOptions():
246 --- /src/dictionary/zip_code_util.py
247 +++ /src/dictionary/zip_code_util.py
248 @@ -86,11 +86,11 @@
251 _SPECIAL_CASES = [
252 - SpecialMergeZip(u'5900111', u'大阪府', u'堺市中区', [u'三原台']),
253 - SpecialMergeZip(u'8710046', u'大分県', u'中津市',
254 - [u'金谷', u'西堀端', u'東堀端', u'古金谷']),
255 - SpecialMergeZip(u'9218046', u'石川県', u'金沢市',
256 - [u'大桑町', u'三小牛町']),
257 + SpecialMergeZip('5900111', '大阪府', '堺市中区', ['三原台']),
258 + SpecialMergeZip('8710046', '大分県', '中津市',
259 + ['金谷', '西堀端', '東堀端', '古金谷']),
260 + SpecialMergeZip('9218046', '石川県', '金沢市',
261 + ['大桑町', '三小牛町']),
265 --- /src/gui/character_pad/data/gen_cp932_map.py
266 +++ /src/gui/character_pad/data/gen_cp932_map.py
267 @@ -32,7 +32,6 @@
269 import re
270 import sys
271 -import string
273 kUnicodePat = re.compile(r'0x[0-9A-Fa-f]{2,4}')
274 def IsValidUnicode(n):
275 @@ -42,28 +41,29 @@
276 fh = open(sys.argv[1])
277 result = {}
278 for line in fh.readlines():
279 - if line[0] is '#':
280 + if line[0] == '#':
281 continue
282 - array = string.split(line)
283 + array = line.split()
284 sjis = array[0]
285 ucs2 = array[1]
286 if eval(sjis) < 32 or not IsValidUnicode(ucs2):
287 continue
288 result.setdefault(ucs2, sjis)
289 + fh.close()
291 keys = sorted(result.keys())
293 - print "struct CP932MapData {"
294 - print " unsigned int ucs4;"
295 - print " unsigned short int sjis;"
296 - print "};"
297 - print ""
298 - print "static const size_t kCP932MapDataSize = %d;" % (len(keys))
299 - print "static const CP932MapData kCP932MapData[] = {"
300 + print("struct CP932MapData {")
301 + print(" unsigned int ucs4;")
302 + print(" unsigned short int sjis;")
303 + print("};")
304 + print("")
305 + print("static const size_t kCP932MapDataSize = %d;" % (len(keys)))
306 + print("static const CP932MapData kCP932MapData[] = {")
307 for n in keys:
308 - print " { %s, %s }," % (n ,result[n])
309 - print " { 0, 0 }";
310 - print "};"
311 + print(" { %s, %s }," % (n ,result[n]))
312 + print(" { 0, 0 }");
313 + print("};")
315 if __name__ == "__main__":
316 main()
317 --- /src/gui/character_pad/data/gen_local_character_map.py
318 +++ /src/gui/character_pad/data/gen_local_character_map.py
319 @@ -30,7 +30,6 @@
321 __author__ = "taku"
323 -import string
324 import re
325 import sys
327 @@ -43,9 +42,9 @@
328 fh = open(filename)
329 result = []
330 for line in fh.readlines():
331 - if line[0] is '#':
332 + if line[0] == '#':
333 continue
334 - array = string.split(line)
335 + array = line.split()
336 jis = array[0].replace('0x', '')
337 ucs2 = array[1].replace('0x', '')
338 if len(jis) == 2:
339 @@ -53,6 +52,7 @@
341 if IsValidUnicode(ucs2):
342 result.append([jis, ucs2])
343 + fh.close()
345 return ["JISX0201", result]
347 @@ -60,13 +60,14 @@
348 fh = open(filename)
349 result = []
350 for line in fh.readlines():
351 - if line[0] is '#':
352 + if line[0] == '#':
353 continue
354 array = line.split()
355 jis = array[1].replace('0x', '')
356 ucs2 = array[2].replace('0x', '')
357 if IsValidUnicode(ucs2):
358 result.append([jis, ucs2])
359 + fh.close()
361 return ["JISX0208", result]
363 @@ -74,13 +75,14 @@
364 fh = open(filename)
365 result = []
366 for line in fh.readlines():
367 - if line[0] is '#':
368 + if line[0] == '#':
369 continue
370 array = line.split()
371 jis = array[0].replace('0x', '')
372 ucs2 = array[1].replace('0x', '')
373 if IsValidUnicode(ucs2):
374 result.append([jis, ucs2])
375 + fh.close()
377 return ["JISX0212", result]
379 @@ -88,7 +90,7 @@
380 fh = open(filename)
381 result = []
382 for line in fh.readlines():
383 - if line[0] is '#':
384 + if line[0] == '#':
385 continue
386 array = line.split()
387 sjis = array[0].replace('0x', '')
388 @@ -100,19 +102,20 @@
390 if IsValidUnicode(ucs2):
391 result.append([sjis, ucs2])
392 + fh.close()
394 return ["CP932", result]
396 def Output(arg):
397 name = arg[0]
398 result = arg[1]
399 - print "static const size_t k%sMapSize = %d;" % (name, len(result))
400 - print "static const mozc::gui::CharacterPalette::LocalCharacterMap k%sMap[] = {" % (name)
401 + print("static const size_t k%sMapSize = %d;" % (name, len(result)))
402 + print("static const mozc::gui::CharacterPalette::LocalCharacterMap k%sMap[] = {" % (name))
403 for n in result:
404 - print " { 0x%s, 0x%s }," % (n[0] ,n[1])
405 - print " { 0, 0 }";
406 - print "};"
407 - print ""
408 + print(" { 0x%s, 0x%s }," % (n[0] ,n[1]))
409 + print(" { 0, 0 }");
410 + print("};")
411 + print("")
413 if __name__ == "__main__":
414 Output(LoadJISX0201(sys.argv[1]))
415 --- /src/gui/character_pad/data/gen_unicode_blocks.py
416 +++ /src/gui/character_pad/data/gen_unicode_blocks.py
417 @@ -33,13 +33,13 @@
418 import sys
419 import re
421 -re = re.compile('^(.....?)\.\.(.....?); (.+)')
422 +re = re.compile(r'^(.....?)\.\.(.....?); (.+)')
424 def main():
425 - print "static const mozc::gui::CharacterPalette::UnicodeBlock kUnicodeBlockTable[] = {"
426 + print("static const mozc::gui::CharacterPalette::UnicodeBlock kUnicodeBlockTable[] = {")
427 fh = open(sys.argv[1])
428 for line in fh.readlines():
429 - if line[0] is '#':
430 + if line[0] == '#':
431 continue
432 m = re.match(line)
433 if m is not None:
434 @@ -47,11 +47,12 @@
435 end = int(m.group(2), 16)
436 name = m.group(3)
437 if start <= 0x2FFFF and end <= 0x2FFFF:
438 - print " { \"%s\", { %d, %d } }," % (name, start, end)
439 + print(" { \"%s\", { %d, %d } }," % (name, start, end))
440 + fh.close()
442 - print " { NULL, { 0, 0 } }"
443 - print "};"
444 - print ""
445 + print(" { NULL, { 0, 0 } }")
446 + print("};")
447 + print("")
449 if __name__ == "__main__":
450 main()
451 --- /src/gui/character_pad/data/gen_unicode_data.py
452 +++ /src/gui/character_pad/data/gen_unicode_data.py
453 @@ -46,18 +46,19 @@
454 code = int(code, 16)
455 if code < 0x2FFFF:
456 results.append(" { %d, \"%s\" }," % (code, desc))
457 + fh.close()
459 - print "struct UnicodeData {";
460 - print " char32 ucs4;";
461 - print " const char *description;";
462 - print "};";
463 - print ""
464 - print "static const size_t kUnicodeDataSize = %d;" % (len(results))
465 - print "static const UnicodeData kUnicodeData[] = {";
466 + print("struct UnicodeData {");
467 + print(" char32 ucs4;");
468 + print(" const char *description;");
469 + print("};");
470 + print("")
471 + print("static const size_t kUnicodeDataSize = %d;" % (len(results)))
472 + print("static const UnicodeData kUnicodeData[] = {");
473 for line in results:
474 - print line;
475 - print " { 0, NULL }";
476 - print "};";
477 + print(line);
478 + print(" { 0, NULL }");
479 + print("};");
481 if __name__ == "__main__":
482 main()
483 --- /src/gui/character_pad/data/gen_unihan_data.py
484 +++ /src/gui/character_pad/data/gen_unihan_data.py
485 @@ -31,35 +31,34 @@
486 __author__ = "taku"
488 import re
489 -import string
490 import sys
491 rs = {}
493 def Escape(n):
494 - if n is not "NULL":
495 + if n != "NULL":
496 return "\"%s\"" % (n)
497 else:
498 return "NULL"
500 def GetCode(n):
501 - if n is not "NULL":
502 - n = string.replace(n, '0-', 'JIS X 0208: 0x')
503 - n = string.replace(n, '1-', 'JIS X 0212: 0x')
504 - n = string.replace(n, '3-', 'JIS X 0213: 0x')
505 - n = string.replace(n, '4-', 'JIS X 0213: 0x')
506 - n = string.replace(n, 'A-', 'Vendors Ideographs: 0x')
507 - n = string.replace(n, '3A', 'JIS X 0213 2000: 0x')
508 + if n != "NULL":
509 + n = n.replace('0-', 'JIS X 0208: 0x')
510 + n = n.replace('1-', 'JIS X 0212: 0x')
511 + n = n.replace('3-', 'JIS X 0213: 0x')
512 + n = n.replace('4-', 'JIS X 0213: 0x')
513 + n = n.replace('A-', 'Vendors Ideographs: 0x')
514 + n = n.replace('3A', 'JIS X 0213 2000: 0x')
515 return "\"%s\"" % n
516 else:
517 return "NULL"
519 def GetRadical(n):
520 pat = re.compile(r'^(\d+)\.')
521 - if n is not "NULL":
522 + if n != "NULL":
523 m = pat.match(n)
524 if m:
525 result = rs[m.group(1)]
526 - return "\"%s\"" % (result.encode('string_escape'))
527 + return "\"%s\"" % result
528 else:
529 return "NULL"
530 else:
531 @@ -73,6 +72,7 @@
532 id = array[1]
533 radical = array[2]
534 rs[id] = radical
535 + fh.close()
537 dic = {}
538 pat = re.compile(r'^U\+(\S+)\s+(kTotalStrokes|kJapaneseKun|kJapaneseOn|kRSUnicode|kIRG_JSource)\t(.+)')
539 @@ -86,23 +86,24 @@
540 n = int(m.group(1), 16)
541 if n <= 65536:
542 dic.setdefault(key, {}).setdefault(field, value)
543 + fh.close()
545 keys = sorted(dic.keys())
547 - print "struct UnihanData {";
548 - print " unsigned int ucs4;";
549 + print("struct UnihanData {");
550 + print(" unsigned int ucs4;");
551 # Since the total strokes defined in Unihan data is Chinese-based
552 # number, we can't use it.
553 # print " unsigned char total_strokes;";
554 - print " const char *japanese_kun;";
555 - print " const char *japanese_on;";
556 + print(" const char *japanese_kun;");
557 + print(" const char *japanese_on;");
558 # Since the radical information defined in Unihan data is Chinese-based
559 # number, we can't use it.
560 # print " const char *radical;";
561 - print " const char *IRG_jsource;";
562 - print "};"
563 - print "static const size_t kUnihanDataSize = %d;" % (len(keys))
564 - print "static const UnihanData kUnihanData[] = {"
565 + print(" const char *IRG_jsource;");
566 + print("};")
567 + print("static const size_t kUnihanDataSize = %d;" % (len(keys)))
568 + print("static const UnihanData kUnihanData[] = {")
570 for key in keys:
571 total_strokes = dic[key].get("kTotalStrokes", "0")
572 @@ -111,9 +112,9 @@
573 rad = GetRadical(dic[key].get("kRSUnicode", "NULL"))
574 code = GetCode(dic[key].get("kIRG_JSource", "NULL"))
575 # print " { 0x%s, %s, %s, %s, %s, %s }," % (key, total_strokes, kun, on, rad, code)
576 - print " { 0x%s, %s, %s, %s }," % (key, kun, on, code)
577 + print(" { 0x%s, %s, %s, %s }," % (key, kun, on, code))
579 - print "};"
580 + print("};")
582 if __name__ == "__main__":
583 main()