app-i18n/mozc/files/mozc-2.23.2815.102-python-3_4.patch

   1 https://github.com/google/mozc/issues/462
   2
   3 --- /src/prediction/gen_zero_query_data.py
   4 +++ /src/prediction/gen_zero_query_data.py
   5 @@ -59,20 +59,20 @@
   6    Returns:
   7      A integer indicating parsed pua.
   8    """
   9 -  if not s or s[0] == '>':
  10 +  if not s or s[0:1] == b'>':
  11      return 0
  12    return int(s, 16)
  13
  14
  15  def NormalizeString(string):
  16    return unicodedata.normalize(
  17 -      'NFKC', string.decode('utf-8')).encode('utf-8').replace('~', '〜')
  18 +      'NFKC', string.decode('utf-8')).replace('~', '〜').encode('utf-8')
  19
  20
  21  def RemoveTrailingNumber(string):
  22    if not string:
  23 -    return ''
  24 -  return re.sub(r'^([^0-9]+)[0-9]+$', r'\1', string)
  25 +    return b''
  26 +  return re.sub(br'^([^0-9]+)[0-9]+$', r'\1', string)
  27
  28
  29  def GetReadingsFromDescription(description):
  30 @@ -84,19 +84,19 @@
  31    #  - ビル・建物
  32    # \xE3\x83\xBB : "・"
  33    return [RemoveTrailingNumber(token) for token
  34 -          in re.split(r'(?:\(|\)|/|\xE3\x83\xBB)+', normalized)]
  35 +          in re.split(br'(?:\(|\)|/|\xE3\x83\xBB)+', normalized)]
  36
  37
  38  def ReadEmojiTsv(stream):
  39    """Reads emoji data from stream and returns zero query data."""
  40    zero_query_dict = defaultdict(list)
  41    stream = code_generator_util.SkipLineComment(stream)
  42 -  for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
  43 +  for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
  44      if len(columns) != 13:
  45 -      logging.critical('format error: %s', '\t'.join(columns))
  46 +      logging.critical('format error: %s', b'\t'.join(columns))
  47        sys.exit(1)
  48
  49 -    code_points = columns[0].split(' ')
  50 +    code_points = columns[0].split(b' ')
  51
  52      # Emoji code point.
  53      emoji = columns[1]
  54 @@ -114,12 +114,12 @@
  55        # - Composite emoji which has multiple code point.
  56        # NOTE: Some Unicode 6.0 emoji don't have PUA, and it is also omitted.
  57        # TODO(hsumita): Check the availability of such emoji and enable it.
  58 -      logging.info('Skip %s', ' '.join(code_points))
  59 +      logging.info('Skip %s', b' '.join(code_points))
  60        continue
  61
  62      reading_list = []
  63      # \xe3\x80\x80 is a full-width space
  64 -    for reading in re.split(r'(?: |\xe3\x80\x80)+', NormalizeString(readings)):
  65 +    for reading in re.split(br'(?: |\xe3\x80\x80)+', NormalizeString(readings)):
  66        if not reading:
  67          continue
  68        reading_list.append(reading)
  69 @@ -158,15 +158,15 @@
  70    zero_query_dict = defaultdict(list)
  71
  72    for line in input_stream:
  73 -    if line.startswith('#'):
  74 +    if line.startswith(b'#'):
  75        continue
  76 -    line = line.rstrip('\r\n')
  77 +    line = line.rstrip(b'\r\n')
  78      if not line:
  79        continue
  80
  81 -    tokens = line.split('\t')
  82 +    tokens = line.split(b'\t')
  83      key = tokens[0]
  84 -    values = tokens[1].split(',')
  85 +    values = tokens[1].split(b',')
  86
  87      for value in values:
  88        zero_query_dict[key].append(
  89 @@ -179,16 +179,16 @@
  90    """Reads emoticon data from stream and returns zero query data."""
  91    zero_query_dict = defaultdict(list)
  92    stream = code_generator_util.SkipLineComment(stream)
  93 -  for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
  94 +  for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
  95      if len(columns) != 3:
  96 -      logging.critical('format error: %s', '\t'.join(columns))
  97 +      logging.critical('format error: %s', b'\t'.join(columns))
  98        sys.exit(1)
  99
 100      emoticon = columns[0]
 101      readings = columns[2]
 102
 103      # \xe3\x80\x80 is a full-width space
 104 -    for reading in re.split(r'(?: |\xe3\x80\x80)+', readings.strip()):
 105 +    for reading in re.split(br'(?: |\xe3\x80\x80)+', readings.strip()):
 106        if not reading:
 107          continue
 108        zero_query_dict[reading].append(
 109 @@ -202,9 +202,9 @@
 110    """Reads emoji data from stream and returns zero query data."""
 111    zero_query_dict = defaultdict(list)
 112    stream = code_generator_util.SkipLineComment(stream)
 113 -  for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
 114 +  for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
 115      if len(columns) < 3:
 116 -      logging.warning('format error: %s', '\t'.join(columns))
 117 +      logging.warning('format error: %s', b'\t'.join(columns))
 118        continue
 119
 120      symbol = columns[1]
 121 @@ -222,7 +222,7 @@
 122        continue
 123
 124      # \xe3\x80\x80 is a full-width space
 125 -    for reading in re.split(r'(?: |\xe3\x80\x80)+', readings.strip()):
 126 +    for reading in re.split(br'(?: |\xe3\x80\x80)+', readings.strip()):
 127        if not reading:
 128          continue
 129        zero_query_dict[reading].append(
 130 @@ -247,7 +247,7 @@
 131
 132  def IsValidKeyForZeroQuery(key):
 133    """Returns if the key is valid for zero query trigger."""
 134 -  is_ascii = all(ord(char) < 128 for char in key)
 135 +  is_ascii = all(char < 128 for char in key)
 136    return not is_ascii
 137
 138
 139 @@ -301,13 +301,13 @@
 140
 141  def main():
 142    options = ParseOptions()
 143 -  with open(options.input_rule, 'r') as input_stream:
 144 +  with open(options.input_rule, 'rb') as input_stream:
 145      zero_query_rule_dict = ReadZeroQueryRuleData(input_stream)
 146 -  with open(options.input_symbol, 'r') as input_stream:
 147 +  with open(options.input_symbol, 'rb') as input_stream:
 148      zero_query_symbol_dict = ReadSymbolTsv(input_stream)
 149 -  with open(options.input_emoji, 'r') as input_stream:
 150 +  with open(options.input_emoji, 'rb') as input_stream:
 151      zero_query_emoji_dict = ReadEmojiTsv(input_stream)
 152 -  with open(options.input_emoticon, 'r') as input_stream:
 153 +  with open(options.input_emoticon, 'rb') as input_stream:
 154      zero_query_emoticon_dict = ReadEmoticonTsv(input_stream)
 155
 156    merged_zero_query_dict = MergeZeroQueryData(
 157 --- /src/prediction/gen_zero_query_number_data.py
 158 +++ /src/prediction/gen_zero_query_number_data.py
 159 @@ -41,15 +41,15 @@
 160    zero_query_dict = defaultdict(list)
 161
 162    for line in input_stream:
 163 -    if line.startswith('#'):
 164 +    if line.startswith(b'#'):
 165        continue
 166 -    line = line.rstrip('\r\n')
 167 +    line = line.rstrip(b'\r\n')
 168      if not line:
 169        continue
 170
 171 -    tokens = line.split('\t')
 172 +    tokens = line.split(b'\t')
 173      key = tokens[0]
 174 -    values = tokens[1].split(',')
 175 +    values = tokens[1].split(b',')
 176
 177      for value in values:
 178        zero_query_dict[key].append(
 179 @@ -71,7 +71,7 @@
 180
 181  def main():
 182    options = ParseOption()
 183 -  with open(options.input, 'r') as input_stream:
 184 +  with open(options.input, 'rb') as input_stream:
 185      zero_query_dict = ReadZeroQueryNumberData(input_stream)
 186    util.WriteZeroQueryData(zero_query_dict,
 187                            options.output_token_array,
 188 --- /src/prediction/gen_zero_query_util.py
 189 +++ /src/prediction/gen_zero_query_util.py
 190 @@ -69,7 +69,7 @@
 191                         output_string_array):
 192    # Collect all the strings and assing index in ascending order
 193    string_index = {}
 194 -  for key, entry_list in zero_query_dict.iteritems():
 195 +  for key, entry_list in zero_query_dict.items():
 196      string_index[key] = 0
 197      for entry in entry_list:
 198        string_index[entry.value] = 0
 199 --- /src/rewriter/gen_counter_suffix_array.py
 200 +++ /src/rewriter/gen_counter_suffix_array.py
 201 @@ -43,7 +43,7 @@
 202    with codecs.open(id_file, 'r', encoding='utf-8') as stream:
 203      stream = code_generator_util.ParseColumnStream(stream, num_column=2)
 204      for pos_id, pos_name in stream:
 205 -      if pos_name.startswith(u'名詞,接尾,助数詞'):
 206 +      if pos_name.startswith('名詞,接尾,助数詞'):
 207          pos_ids.add(pos_id)
 208    return pos_ids
 209
 210 --- /src/rewriter/gen_emoji_rewriter_data.py
 211 +++ /src/rewriter/gen_emoji_rewriter_data.py
 212 @@ -74,19 +74,19 @@
 213    the glyph (in other words, it has alternative (primary) code point, which
 214    doesn't lead '>' and that's why we'll ignore it).
 215    """
 216 -  if not s or s[0] == '>':
 217 +  if not s or s[0:1] == b'>':
 218      return None
 219    return int(s, 16)
 220
 221
 222 -_FULLWIDTH_RE = re.compile(ur'[！-～]')   # U+FF01 - U+FF5E
 223 +_FULLWIDTH_RE = re.compile(r'[！-～]')   # U+FF01 - U+FF5E
 224
 225
 226  def NormalizeString(string):
 227    """Normalize full width ascii characters to half width characters."""
 228 -  offset = ord(u'Ａ') - ord(u'A')
 229 -  return _FULLWIDTH_RE.sub(lambda x: unichr(ord(x.group(0)) - offset),
 230 -                           unicode(string, 'utf-8')).encode('utf-8')
 231 +  offset = ord('Ａ') - ord('A')
 232 +  return _FULLWIDTH_RE.sub(lambda x: chr(ord(x.group(0)) - offset),
 233 +                           string.decode('utf-8')).encode('utf-8')
 234
 235
 236  def ReadEmojiTsv(stream):
 237 @@ -96,14 +96,14 @@
 238    token_dict = defaultdict(list)
 239
 240    stream = code_generator_util.SkipLineComment(stream)
 241 -  for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
 242 +  for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
 243      if len(columns) != 13:
 244 -      logging.critical('format error: %s', '\t'.join(columns))
 245 +      logging.critical('format error: %s', b'\t'.join(columns))
 246        sys.exit(1)
 247
 248 -    code_points = columns[0].split(' ')
 249 +    code_points = columns[0].split(b' ')
 250      # Emoji code point.
 251 -    emoji = columns[1] if columns[1] else ''
 252 +    emoji = columns[1] if columns[1] else b''
 253      android_pua = ParseCodePoint(columns[2])
 254      docomo_pua = ParseCodePoint(columns[3])
 255      softbank_pua = ParseCodePoint(columns[4])
 256 @@ -112,10 +112,10 @@
 257      readings = columns[6]
 258
 259      # [7]: Name defined in Unicode.  It is ignored in current implementation.
 260 -    utf8_description = columns[8] if columns[8] else ''
 261 -    docomo_description = columns[9] if columns[9] else ''
 262 -    softbank_description = columns[10] if columns[10] else ''
 263 -    kddi_description = columns[11] if columns[11] else ''
 264 +    utf8_description = columns[8] if columns[8] else b''
 265 +    docomo_description = columns[9] if columns[9] else b''
 266 +    softbank_description = columns[10] if columns[10] else b''
 267 +    kddi_description = columns[11] if columns[11] else b''
 268
 269      if not android_pua or len(code_points) > 1:
 270        # Skip some emoji, which is not supported on old devices.
 271 @@ -123,7 +123,7 @@
 272        # - Composite emoji which has multiple code point.
 273        # NOTE: Some Unicode 6.0 emoji don't have PUA, and it is also omitted.
 274        # TODO(hsumita): Check the availability of such emoji and enable it.
 275 -      logging.info('Skip %s', ' '.join(code_points))
 276 +      logging.info('Skip %s', b' '.join(code_points))
 277        continue
 278
 279      # Check consistency between carrier PUA codes and descriptions for Android
 280 @@ -132,7 +132,7 @@
 281          (bool(softbank_pua) != bool(softbank_description)) or
 282          (bool(kddi_pua) != bool(kddi_description))):
 283        logging.warning('carrier PUA and description conflict: %s',
 284 -                      '\t'.join(columns))
 285 +                      b'\t'.join(columns))
 286        continue
 287
 288      # Check if the character is usable on Android.
 289 @@ -140,7 +140,7 @@
 290        android_pua = 0  # Replace None with 0.
 291
 292      if not emoji and not android_pua:
 293 -      logging.info('Skip: %s', '\t'.join(columns))
 294 +      logging.info('Skip: %s', b'\t'.join(columns))
 295        continue
 296
 297      index = len(emoji_data_list)
 298 @@ -149,7 +149,7 @@
 299                              kddi_description))
 300
 301      # \xe3\x80\x80 is a full-width space
 302 -    for reading in re.split(r'(?: |\xe3\x80\x80)+', readings.strip()):
 303 +    for reading in re.split(br'(?: |\xe3\x80\x80)+', readings.strip()):
 304        if reading:
 305          token_dict[NormalizeString(reading)].append(index)
 306
 307 @@ -159,7 +159,7 @@
 308  def OutputData(emoji_data_list, token_dict,
 309                 token_array_file, string_array_file):
 310    """Output token and string arrays to files."""
 311 -  sorted_token_dict = sorted(token_dict.iteritems())
 312 +  sorted_token_dict = sorted(token_dict.items())
 313
 314    strings = {}
 315    for reading, _ in sorted_token_dict:
 316 @@ -171,7 +171,7 @@
 317      strings[docomo_description] = 0
 318      strings[softbank_description] = 0
 319      strings[kddi_description] = 0
 320 -  sorted_strings = sorted(strings.iterkeys())
 321 +  sorted_strings = sorted(strings.keys())
 322    for index, s in enumerate(sorted_strings):
 323      strings[s] = index
 324
 325 @@ -205,7 +205,7 @@
 326
 327  def main():
 328    options = ParseOptions()
 329 -  with open(options.input, 'r') as input_stream:
 330 +  with open(options.input, 'rb') as input_stream:
 331      (emoji_data_list, token_dict) = ReadEmojiTsv(input_stream)
 332
 333    OutputData(emoji_data_list, token_dict,
 334 --- /src/rewriter/gen_reading_correction_data.py
 335 +++ /src/rewriter/gen_reading_correction_data.py
 336 @@ -63,7 +63,7 @@
 337  def WriteData(input_path, output_value_array_path, output_error_array_path,
 338                output_correction_array_path):
 339    outputs = []
 340 -  with open(input_path) as input_stream:
 341 +  with open(input_path, 'rb') as input_stream:
 342      input_stream = code_generator_util.SkipLineComment(input_stream)
 343      input_stream = code_generator_util.ParseColumnStream(input_stream,
 344                                                           num_column=3)
 345 @@ -73,7 +73,7 @@
 346
 347    # In order to lookup the entries via |error| with binary search,
 348    # sort outputs here.
 349 -  outputs.sort(lambda x, y: cmp(x[1], y[1]) or cmp(x[0], y[0]))
 350 +  outputs.sort(key=lambda x: (x[1], x[0]))
 351
 352    serialized_string_array_builder.SerializeToFile(
 353        [value for (value, _, _) in outputs], output_value_array_path)
 354 --- /src/rewriter/gen_single_kanji_rewriter_data.py
 355 +++ /src/rewriter/gen_single_kanji_rewriter_data.py
 356 @@ -52,7 +52,7 @@
 357    stream = code_generator_util.ParseColumnStream(stream, num_column=2)
 358    outputs = list(stream)
 359    # For binary search by |key|, sort outputs here.
 360 -  outputs.sort(lambda x, y: cmp(x[0], y[0]))
 361 +  outputs.sort(key=lambda x: x[0])
 362
 363    return outputs
 364
 365 @@ -72,7 +72,7 @@
 366        variant_items.append([target, original, len(variant_types) - 1])
 367
 368    # For binary search by |target|, sort variant items here.
 369 -  variant_items.sort(lambda x, y: cmp(x[0], y[0]))
 370 +  variant_items.sort(key=lambda x: x[0])
 371
 372    return (variant_types, variant_items)
 373
 374 @@ -151,10 +151,10 @@
 375  def main():
 376    options = _ParseOptions()
 377
 378 -  with open(options.single_kanji_file, 'r') as single_kanji_stream:
 379 +  with open(options.single_kanji_file, 'rb') as single_kanji_stream:
 380      single_kanji = ReadSingleKanji(single_kanji_stream)
 381
 382 -  with open(options.variant_file, 'r') as variant_stream:
 383 +  with open(options.variant_file, 'rb') as variant_stream:
 384      variant_info = ReadVariant(variant_stream)
 385
 386    WriteSingleKanji(single_kanji,
 387 --- /src/session/gen_session_stress_test_data.py
 388 +++ /src/session/gen_session_stress_test_data.py
 389 @@ -50,24 +50,26 @@
 390    """
 391    result = ''
 392    for c in s:
 393 -    hexstr = hex(ord(c))
 394 +    hexstr = hex(c)
 395      # because hexstr contains '0x', remove the prefix and add our prefix
 396      result += '\\x' + hexstr[2:]
 397    return result
 398
 399  def GenerateHeader(file):
 400    try:
 401 -    print "const char *kTestSentences[] = {"
 402 -    for line in open(file, "r"):
 403 -      if line.startswith('#'):
 404 +    print("const char *kTestSentences[] = {")
 405 +    fh = open(file, "rb")
 406 +    for line in fh:
 407 +      if line.startswith(b'#'):
 408          continue
 409 -      line = line.rstrip('\r\n')
 410 +      line = line.rstrip(b'\r\n')
 411        if not line:
 412          continue
 413 -      print " \"%s\"," % escape_string(line)
 414 -    print "};"
 415 +      print(" \"%s\"," % escape_string(line))
 416 +    fh.close()
 417 +    print("};")
 418    except:
 419 -    print "cannot open %s" % (file)
 420 +    print("cannot open %s" % (file))
 421      sys.exit(1)
 422
 423  def main():
 424 --- /src/unix/ibus/gen_mozc_xml.py
 425 +++ /src/unix/ibus/gen_mozc_xml.py
 426 @@ -74,7 +74,7 @@
 427
 428
 429  def OutputXmlElement(param_dict, element_name, value):
 430 -  print '  <%s>%s</%s>' % (element_name, (value % param_dict), element_name)
 431 +  print('  <%s>%s</%s>' % (element_name, (value % param_dict), element_name))
 432
 433
 434  def OutputXml(param_dict, component, engine_common, engines, setup_arg):
 435 @@ -90,26 +90,26 @@
 436      engines: A dictionary from a property name to a list of property values of
 437          engines. For example, {'name': ['mozc-jp', 'mozc', 'mozc-dv']}.
 438    """
 439 -  print '<component>'
 440 -  for key in component:
 441 +  print('<component>')
 442 +  for key in sorted(component):
 443      OutputXmlElement(param_dict, key, component[key])
 444 -  print '<engines>'
 445 +  print('<engines>')
 446    for i in range(len(engines['name'])):
 447 -    print '<engine>'
 448 -    for key in engine_common:
 449 +    print('<engine>')
 450 +    for key in sorted(engine_common):
 451        OutputXmlElement(param_dict, key, engine_common[key])
 452      if setup_arg:
 453        OutputXmlElement(param_dict, 'setup', ' '.join(setup_arg))
 454 -    for key in engines:
 455 +    for key in sorted(engines):
 456        OutputXmlElement(param_dict, key, engines[key][i])
 457 -    print '</engine>'
 458 -  print '</engines>'
 459 -  print '</component>'
 460 +    print('</engine>')
 461 +  print('</engines>')
 462 +  print('</component>')
 463
 464
 465  def OutputCppVariable(param_dict, prefix, variable_name, value):
 466 -  print 'const char k%s%s[] = "%s";' % (prefix, variable_name.capitalize(),
 467 -                                        (value % param_dict))
 468 +  print('const char k%s%s[] = "%s";' % (prefix, variable_name.capitalize(),
 469 +                                        (value % param_dict)))
 470
 471
 472  def OutputCpp(param_dict, component, engine_common, engines):
 473 @@ -122,18 +122,18 @@
 474      engines: ditto.
 475    """
 476    guard_name = 'MOZC_UNIX_IBUS_MAIN_H_'
 477 -  print CPP_HEADER % (guard_name, guard_name)
 478 -  for key in component:
 479 +  print(CPP_HEADER % (guard_name, guard_name))
 480 +  for key in sorted(component):
 481      OutputCppVariable(param_dict, 'Component', key, component[key])
 482 -  for key in engine_common:
 483 +  for key in sorted(engine_common):
 484      OutputCppVariable(param_dict, 'Engine', key, engine_common[key])
 485 -  for key in engines:
 486 -    print 'const char* kEngine%sArray[] = {' % key.capitalize()
 487 +  for key in sorted(engines):
 488 +    print('const char* kEngine%sArray[] = {' % key.capitalize())
 489      for i in range(len(engines[key])):
 490 -      print '"%s",' % (engines[key][i] % param_dict)
 491 -    print '};'
 492 -  print 'const size_t kEngineArrayLen = %s;' % len(engines['name'])
 493 -  print CPP_FOOTER % guard_name
 494 +      print('"%s",' % (engines[key][i] % param_dict))
 495 +    print('};')
 496 +  print('const size_t kEngineArrayLen = %s;' % len(engines['name']))
 497 +  print(CPP_FOOTER % guard_name)
 498
 499
 500  def CheckIBusVersion(options, minimum_version):
 501 --- /src/usage_stats/gen_stats_list.py
 502 +++ /src/usage_stats/gen_stats_list.py
 503 @@ -37,23 +37,24 @@
 504
 505  def GetStatsNameList(filename):
 506    stats = []
 507 -  for line in open(filename, 'r'):
 508 -    stat = line.strip()
 509 -    if not stat or stat[0] == '#':
 510 -      continue
 511 -    stats.append(stat)
 512 +  with open(filename, 'r') as file:
 513 +    for line in file:
 514 +      stat = line.strip()
 515 +      if not stat or stat[0] == '#':
 516 +        continue
 517 +      stats.append(stat)
 518    return stats
 519
 520
 521  def main():
 522    stats_list = GetStatsNameList(sys.argv[1])
 523 -  print '// This header file is generated by gen_stats_list.py'
 524 +  print('// This header file is generated by gen_stats_list.py')
 525    for stats in stats_list:
 526 -    print 'const char k%s[] = "%s";' % (stats, stats)
 527 -  print 'const char *kStatsList[] = {'
 528 +    print('const char k%s[] = "%s";' % (stats, stats))
 529 +  print('const char *kStatsList[] = {')
 530    for stats in stats_list:
 531 -    print '  k%s,' % (stats)
 532 -  print '};'
 533 +    print('  k%s,' % (stats))
 534 +  print('};')
 535
 536
 537  if __name__ == '__main__':