app-i18n/mozc: Initial commit - add fcitx5 support
[gentoo-zh.git] / app-i18n / mozc / files / mozc-2.23.2815.102-python-3_4.patch
blob41d2bf9eeb9025023802dc911aaa14137dd584b2
1 https://github.com/google/mozc/issues/462
3 --- /src/prediction/gen_zero_query_data.py
4 +++ /src/prediction/gen_zero_query_data.py
5 @@ -59,20 +59,20 @@
6 Returns:
7 A integer indicating parsed pua.
8 """
9 - if not s or s[0] == '>':
10 + if not s or s[0:1] == b'>':
11 return 0
12 return int(s, 16)
15 def NormalizeString(string):
16 return unicodedata.normalize(
17 - 'NFKC', string.decode('utf-8')).encode('utf-8').replace('~', '〜')
18 + 'NFKC', string.decode('utf-8')).replace('~', '〜').encode('utf-8')
21 def RemoveTrailingNumber(string):
22 if not string:
23 - return ''
24 - return re.sub(r'^([^0-9]+)[0-9]+$', r'\1', string)
25 + return b''
26 + return re.sub(br'^([^0-9]+)[0-9]+$', r'\1', string)
29 def GetReadingsFromDescription(description):
30 @@ -84,19 +84,19 @@
31 # - ビル・建物
32 # \xE3\x83\xBB : "・"
33 return [RemoveTrailingNumber(token) for token
34 - in re.split(r'(?:\(|\)|/|\xE3\x83\xBB)+', normalized)]
35 + in re.split(br'(?:\(|\)|/|\xE3\x83\xBB)+', normalized)]
38 def ReadEmojiTsv(stream):
39 """Reads emoji data from stream and returns zero query data."""
40 zero_query_dict = defaultdict(list)
41 stream = code_generator_util.SkipLineComment(stream)
42 - for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
43 + for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
44 if len(columns) != 13:
45 - logging.critical('format error: %s', '\t'.join(columns))
46 + logging.critical('format error: %s', b'\t'.join(columns))
47 sys.exit(1)
49 - code_points = columns[0].split(' ')
50 + code_points = columns[0].split(b' ')
52 # Emoji code point.
53 emoji = columns[1]
54 @@ -114,12 +114,12 @@
55 # - Composite emoji which has multiple code point.
56 # NOTE: Some Unicode 6.0 emoji don't have PUA, and it is also omitted.
57 # TODO(hsumita): Check the availability of such emoji and enable it.
58 - logging.info('Skip %s', ' '.join(code_points))
59 + logging.info('Skip %s', b' '.join(code_points))
60 continue
62 reading_list = []
63 # \xe3\x80\x80 is a full-width space
64 - for reading in re.split(r'(?: |\xe3\x80\x80)+', NormalizeString(readings)):
65 + for reading in re.split(br'(?: |\xe3\x80\x80)+', NormalizeString(readings)):
66 if not reading:
67 continue
68 reading_list.append(reading)
69 @@ -158,15 +158,15 @@
70 zero_query_dict = defaultdict(list)
72 for line in input_stream:
73 - if line.startswith('#'):
74 + if line.startswith(b'#'):
75 continue
76 - line = line.rstrip('\r\n')
77 + line = line.rstrip(b'\r\n')
78 if not line:
79 continue
81 - tokens = line.split('\t')
82 + tokens = line.split(b'\t')
83 key = tokens[0]
84 - values = tokens[1].split(',')
85 + values = tokens[1].split(b',')
87 for value in values:
88 zero_query_dict[key].append(
89 @@ -179,16 +179,16 @@
90 """Reads emoticon data from stream and returns zero query data."""
91 zero_query_dict = defaultdict(list)
92 stream = code_generator_util.SkipLineComment(stream)
93 - for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
94 + for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
95 if len(columns) != 3:
96 - logging.critical('format error: %s', '\t'.join(columns))
97 + logging.critical('format error: %s', b'\t'.join(columns))
98 sys.exit(1)
100 emoticon = columns[0]
101 readings = columns[2]
103 # \xe3\x80\x80 is a full-width space
104 - for reading in re.split(r'(?: |\xe3\x80\x80)+', readings.strip()):
105 + for reading in re.split(br'(?: |\xe3\x80\x80)+', readings.strip()):
106 if not reading:
107 continue
108 zero_query_dict[reading].append(
109 @@ -202,9 +202,9 @@
110 """Reads emoji data from stream and returns zero query data."""
111 zero_query_dict = defaultdict(list)
112 stream = code_generator_util.SkipLineComment(stream)
113 - for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
114 + for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
115 if len(columns) < 3:
116 - logging.warning('format error: %s', '\t'.join(columns))
117 + logging.warning('format error: %s', b'\t'.join(columns))
118 continue
120 symbol = columns[1]
121 @@ -222,7 +222,7 @@
122 continue
124 # \xe3\x80\x80 is a full-width space
125 - for reading in re.split(r'(?: |\xe3\x80\x80)+', readings.strip()):
126 + for reading in re.split(br'(?: |\xe3\x80\x80)+', readings.strip()):
127 if not reading:
128 continue
129 zero_query_dict[reading].append(
130 @@ -247,7 +247,7 @@
132 def IsValidKeyForZeroQuery(key):
133 """Returns if the key is valid for zero query trigger."""
134 - is_ascii = all(ord(char) < 128 for char in key)
135 + is_ascii = all(char < 128 for char in key)
136 return not is_ascii
139 @@ -301,13 +301,13 @@
141 def main():
142 options = ParseOptions()
143 - with open(options.input_rule, 'r') as input_stream:
144 + with open(options.input_rule, 'rb') as input_stream:
145 zero_query_rule_dict = ReadZeroQueryRuleData(input_stream)
146 - with open(options.input_symbol, 'r') as input_stream:
147 + with open(options.input_symbol, 'rb') as input_stream:
148 zero_query_symbol_dict = ReadSymbolTsv(input_stream)
149 - with open(options.input_emoji, 'r') as input_stream:
150 + with open(options.input_emoji, 'rb') as input_stream:
151 zero_query_emoji_dict = ReadEmojiTsv(input_stream)
152 - with open(options.input_emoticon, 'r') as input_stream:
153 + with open(options.input_emoticon, 'rb') as input_stream:
154 zero_query_emoticon_dict = ReadEmoticonTsv(input_stream)
156 merged_zero_query_dict = MergeZeroQueryData(
157 --- /src/prediction/gen_zero_query_number_data.py
158 +++ /src/prediction/gen_zero_query_number_data.py
159 @@ -41,15 +41,15 @@
160 zero_query_dict = defaultdict(list)
162 for line in input_stream:
163 - if line.startswith('#'):
164 + if line.startswith(b'#'):
165 continue
166 - line = line.rstrip('\r\n')
167 + line = line.rstrip(b'\r\n')
168 if not line:
169 continue
171 - tokens = line.split('\t')
172 + tokens = line.split(b'\t')
173 key = tokens[0]
174 - values = tokens[1].split(',')
175 + values = tokens[1].split(b',')
177 for value in values:
178 zero_query_dict[key].append(
179 @@ -71,7 +71,7 @@
181 def main():
182 options = ParseOption()
183 - with open(options.input, 'r') as input_stream:
184 + with open(options.input, 'rb') as input_stream:
185 zero_query_dict = ReadZeroQueryNumberData(input_stream)
186 util.WriteZeroQueryData(zero_query_dict,
187 options.output_token_array,
188 --- /src/prediction/gen_zero_query_util.py
189 +++ /src/prediction/gen_zero_query_util.py
190 @@ -69,7 +69,7 @@
191 output_string_array):
192 # Collect all the strings and assing index in ascending order
193 string_index = {}
194 - for key, entry_list in zero_query_dict.iteritems():
195 + for key, entry_list in zero_query_dict.items():
196 string_index[key] = 0
197 for entry in entry_list:
198 string_index[entry.value] = 0
199 --- /src/rewriter/gen_counter_suffix_array.py
200 +++ /src/rewriter/gen_counter_suffix_array.py
201 @@ -43,7 +43,7 @@
202 with codecs.open(id_file, 'r', encoding='utf-8') as stream:
203 stream = code_generator_util.ParseColumnStream(stream, num_column=2)
204 for pos_id, pos_name in stream:
205 - if pos_name.startswith(u'名詞,接尾,助数詞'):
206 + if pos_name.startswith('名詞,接尾,助数詞'):
207 pos_ids.add(pos_id)
208 return pos_ids
210 --- /src/rewriter/gen_emoji_rewriter_data.py
211 +++ /src/rewriter/gen_emoji_rewriter_data.py
212 @@ -74,19 +74,19 @@
213 the glyph (in other words, it has alternative (primary) code point, which
214 doesn't lead '>' and that's why we'll ignore it).
216 - if not s or s[0] == '>':
217 + if not s or s[0:1] == b'>':
218 return None
219 return int(s, 16)
222 -_FULLWIDTH_RE = re.compile(ur'[!-~]') # U+FF01 - U+FF5E
223 +_FULLWIDTH_RE = re.compile(r'[!-~]') # U+FF01 - U+FF5E
226 def NormalizeString(string):
227 """Normalize full width ascii characters to half width characters."""
228 - offset = ord(u'A') - ord(u'A')
229 - return _FULLWIDTH_RE.sub(lambda x: unichr(ord(x.group(0)) - offset),
230 - unicode(string, 'utf-8')).encode('utf-8')
231 + offset = ord('A') - ord('A')
232 + return _FULLWIDTH_RE.sub(lambda x: chr(ord(x.group(0)) - offset),
233 + string.decode('utf-8')).encode('utf-8')
236 def ReadEmojiTsv(stream):
237 @@ -96,14 +96,14 @@
238 token_dict = defaultdict(list)
240 stream = code_generator_util.SkipLineComment(stream)
241 - for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
242 + for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
243 if len(columns) != 13:
244 - logging.critical('format error: %s', '\t'.join(columns))
245 + logging.critical('format error: %s', b'\t'.join(columns))
246 sys.exit(1)
248 - code_points = columns[0].split(' ')
249 + code_points = columns[0].split(b' ')
250 # Emoji code point.
251 - emoji = columns[1] if columns[1] else ''
252 + emoji = columns[1] if columns[1] else b''
253 android_pua = ParseCodePoint(columns[2])
254 docomo_pua = ParseCodePoint(columns[3])
255 softbank_pua = ParseCodePoint(columns[4])
256 @@ -112,10 +112,10 @@
257 readings = columns[6]
259 # [7]: Name defined in Unicode. It is ignored in current implementation.
260 - utf8_description = columns[8] if columns[8] else ''
261 - docomo_description = columns[9] if columns[9] else ''
262 - softbank_description = columns[10] if columns[10] else ''
263 - kddi_description = columns[11] if columns[11] else ''
264 + utf8_description = columns[8] if columns[8] else b''
265 + docomo_description = columns[9] if columns[9] else b''
266 + softbank_description = columns[10] if columns[10] else b''
267 + kddi_description = columns[11] if columns[11] else b''
269 if not android_pua or len(code_points) > 1:
270 # Skip some emoji, which is not supported on old devices.
271 @@ -123,7 +123,7 @@
272 # - Composite emoji which has multiple code point.
273 # NOTE: Some Unicode 6.0 emoji don't have PUA, and it is also omitted.
274 # TODO(hsumita): Check the availability of such emoji and enable it.
275 - logging.info('Skip %s', ' '.join(code_points))
276 + logging.info('Skip %s', b' '.join(code_points))
277 continue
279 # Check consistency between carrier PUA codes and descriptions for Android
280 @@ -132,7 +132,7 @@
281 (bool(softbank_pua) != bool(softbank_description)) or
282 (bool(kddi_pua) != bool(kddi_description))):
283 logging.warning('carrier PUA and description conflict: %s',
284 - '\t'.join(columns))
285 + b'\t'.join(columns))
286 continue
288 # Check if the character is usable on Android.
289 @@ -140,7 +140,7 @@
290 android_pua = 0 # Replace None with 0.
292 if not emoji and not android_pua:
293 - logging.info('Skip: %s', '\t'.join(columns))
294 + logging.info('Skip: %s', b'\t'.join(columns))
295 continue
297 index = len(emoji_data_list)
298 @@ -149,7 +149,7 @@
299 kddi_description))
301 # \xe3\x80\x80 is a full-width space
302 - for reading in re.split(r'(?: |\xe3\x80\x80)+', readings.strip()):
303 + for reading in re.split(br'(?: |\xe3\x80\x80)+', readings.strip()):
304 if reading:
305 token_dict[NormalizeString(reading)].append(index)
307 @@ -159,7 +159,7 @@
308 def OutputData(emoji_data_list, token_dict,
309 token_array_file, string_array_file):
310 """Output token and string arrays to files."""
311 - sorted_token_dict = sorted(token_dict.iteritems())
312 + sorted_token_dict = sorted(token_dict.items())
314 strings = {}
315 for reading, _ in sorted_token_dict:
316 @@ -171,7 +171,7 @@
317 strings[docomo_description] = 0
318 strings[softbank_description] = 0
319 strings[kddi_description] = 0
320 - sorted_strings = sorted(strings.iterkeys())
321 + sorted_strings = sorted(strings.keys())
322 for index, s in enumerate(sorted_strings):
323 strings[s] = index
325 @@ -205,7 +205,7 @@
327 def main():
328 options = ParseOptions()
329 - with open(options.input, 'r') as input_stream:
330 + with open(options.input, 'rb') as input_stream:
331 (emoji_data_list, token_dict) = ReadEmojiTsv(input_stream)
333 OutputData(emoji_data_list, token_dict,
334 --- /src/rewriter/gen_reading_correction_data.py
335 +++ /src/rewriter/gen_reading_correction_data.py
336 @@ -63,7 +63,7 @@
337 def WriteData(input_path, output_value_array_path, output_error_array_path,
338 output_correction_array_path):
339 outputs = []
340 - with open(input_path) as input_stream:
341 + with open(input_path, 'rb') as input_stream:
342 input_stream = code_generator_util.SkipLineComment(input_stream)
343 input_stream = code_generator_util.ParseColumnStream(input_stream,
344 num_column=3)
345 @@ -73,7 +73,7 @@
347 # In order to lookup the entries via |error| with binary search,
348 # sort outputs here.
349 - outputs.sort(lambda x, y: cmp(x[1], y[1]) or cmp(x[0], y[0]))
350 + outputs.sort(key=lambda x: (x[1], x[0]))
352 serialized_string_array_builder.SerializeToFile(
353 [value for (value, _, _) in outputs], output_value_array_path)
354 --- /src/rewriter/gen_single_kanji_rewriter_data.py
355 +++ /src/rewriter/gen_single_kanji_rewriter_data.py
356 @@ -52,7 +52,7 @@
357 stream = code_generator_util.ParseColumnStream(stream, num_column=2)
358 outputs = list(stream)
359 # For binary search by |key|, sort outputs here.
360 - outputs.sort(lambda x, y: cmp(x[0], y[0]))
361 + outputs.sort(key=lambda x: x[0])
363 return outputs
365 @@ -72,7 +72,7 @@
366 variant_items.append([target, original, len(variant_types) - 1])
368 # For binary search by |target|, sort variant items here.
369 - variant_items.sort(lambda x, y: cmp(x[0], y[0]))
370 + variant_items.sort(key=lambda x: x[0])
372 return (variant_types, variant_items)
374 @@ -151,10 +151,10 @@
375 def main():
376 options = _ParseOptions()
378 - with open(options.single_kanji_file, 'r') as single_kanji_stream:
379 + with open(options.single_kanji_file, 'rb') as single_kanji_stream:
380 single_kanji = ReadSingleKanji(single_kanji_stream)
382 - with open(options.variant_file, 'r') as variant_stream:
383 + with open(options.variant_file, 'rb') as variant_stream:
384 variant_info = ReadVariant(variant_stream)
386 WriteSingleKanji(single_kanji,
387 --- /src/session/gen_session_stress_test_data.py
388 +++ /src/session/gen_session_stress_test_data.py
389 @@ -50,24 +50,26 @@
391 result = ''
392 for c in s:
393 - hexstr = hex(ord(c))
394 + hexstr = hex(c)
395 # because hexstr contains '0x', remove the prefix and add our prefix
396 result += '\\x' + hexstr[2:]
397 return result
399 def GenerateHeader(file):
400 try:
401 - print "const char *kTestSentences[] = {"
402 - for line in open(file, "r"):
403 - if line.startswith('#'):
404 + print("const char *kTestSentences[] = {")
405 + fh = open(file, "rb")
406 + for line in fh:
407 + if line.startswith(b'#'):
408 continue
409 - line = line.rstrip('\r\n')
410 + line = line.rstrip(b'\r\n')
411 if not line:
412 continue
413 - print " \"%s\"," % escape_string(line)
414 - print "};"
415 + print(" \"%s\"," % escape_string(line))
416 + fh.close()
417 + print("};")
418 except:
419 - print "cannot open %s" % (file)
420 + print("cannot open %s" % (file))
421 sys.exit(1)
423 def main():
424 --- /src/unix/ibus/gen_mozc_xml.py
425 +++ /src/unix/ibus/gen_mozc_xml.py
426 @@ -74,7 +74,7 @@
429 def OutputXmlElement(param_dict, element_name, value):
430 - print ' <%s>%s</%s>' % (element_name, (value % param_dict), element_name)
431 + print(' <%s>%s</%s>' % (element_name, (value % param_dict), element_name))
434 def OutputXml(param_dict, component, engine_common, engines, setup_arg):
435 @@ -90,26 +90,26 @@
436 engines: A dictionary from a property name to a list of property values of
437 engines. For example, {'name': ['mozc-jp', 'mozc', 'mozc-dv']}.
439 - print '<component>'
440 - for key in component:
441 + print('<component>')
442 + for key in sorted(component):
443 OutputXmlElement(param_dict, key, component[key])
444 - print '<engines>'
445 + print('<engines>')
446 for i in range(len(engines['name'])):
447 - print '<engine>'
448 - for key in engine_common:
449 + print('<engine>')
450 + for key in sorted(engine_common):
451 OutputXmlElement(param_dict, key, engine_common[key])
452 if setup_arg:
453 OutputXmlElement(param_dict, 'setup', ' '.join(setup_arg))
454 - for key in engines:
455 + for key in sorted(engines):
456 OutputXmlElement(param_dict, key, engines[key][i])
457 - print '</engine>'
458 - print '</engines>'
459 - print '</component>'
460 + print('</engine>')
461 + print('</engines>')
462 + print('</component>')
465 def OutputCppVariable(param_dict, prefix, variable_name, value):
466 - print 'const char k%s%s[] = "%s";' % (prefix, variable_name.capitalize(),
467 - (value % param_dict))
468 + print('const char k%s%s[] = "%s";' % (prefix, variable_name.capitalize(),
469 + (value % param_dict)))
472 def OutputCpp(param_dict, component, engine_common, engines):
473 @@ -122,18 +122,18 @@
474 engines: ditto.
476 guard_name = 'MOZC_UNIX_IBUS_MAIN_H_'
477 - print CPP_HEADER % (guard_name, guard_name)
478 - for key in component:
479 + print(CPP_HEADER % (guard_name, guard_name))
480 + for key in sorted(component):
481 OutputCppVariable(param_dict, 'Component', key, component[key])
482 - for key in engine_common:
483 + for key in sorted(engine_common):
484 OutputCppVariable(param_dict, 'Engine', key, engine_common[key])
485 - for key in engines:
486 - print 'const char* kEngine%sArray[] = {' % key.capitalize()
487 + for key in sorted(engines):
488 + print('const char* kEngine%sArray[] = {' % key.capitalize())
489 for i in range(len(engines[key])):
490 - print '"%s",' % (engines[key][i] % param_dict)
491 - print '};'
492 - print 'const size_t kEngineArrayLen = %s;' % len(engines['name'])
493 - print CPP_FOOTER % guard_name
494 + print('"%s",' % (engines[key][i] % param_dict))
495 + print('};')
496 + print('const size_t kEngineArrayLen = %s;' % len(engines['name']))
497 + print(CPP_FOOTER % guard_name)
500 def CheckIBusVersion(options, minimum_version):
501 --- /src/usage_stats/gen_stats_list.py
502 +++ /src/usage_stats/gen_stats_list.py
503 @@ -37,23 +37,24 @@
505 def GetStatsNameList(filename):
506 stats = []
507 - for line in open(filename, 'r'):
508 - stat = line.strip()
509 - if not stat or stat[0] == '#':
510 - continue
511 - stats.append(stat)
512 + with open(filename, 'r') as file:
513 + for line in file:
514 + stat = line.strip()
515 + if not stat or stat[0] == '#':
516 + continue
517 + stats.append(stat)
518 return stats
521 def main():
522 stats_list = GetStatsNameList(sys.argv[1])
523 - print '// This header file is generated by gen_stats_list.py'
524 + print('// This header file is generated by gen_stats_list.py')
525 for stats in stats_list:
526 - print 'const char k%s[] = "%s";' % (stats, stats)
527 - print 'const char *kStatsList[] = {'
528 + print('const char k%s[] = "%s";' % (stats, stats))
529 + print('const char *kStatsList[] = {')
530 for stats in stats_list:
531 - print ' k%s,' % (stats)
532 - print '};'
533 + print(' k%s,' % (stats))
534 + print('};')
537 if __name__ == '__main__':