Tools/unicode/genwincodec.py

   1 """This script generates a Python codec module from a Windows Code Page.
   2
   3 It uses the function MultiByteToWideChar to generate a decoding table.
   4 """
   5
   6 import ctypes
   7 from ctypes import wintypes
   8 from gencodec import codegen
   9 import unicodedata
  10
  11 def genwinmap(codepage):
  12     MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar
  13     MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD,
  14                                     wintypes.LPCSTR, ctypes.c_int,
  15                                     wintypes.LPWSTR, ctypes.c_int]
  16     MultiByteToWideChar.restype = ctypes.c_int
  17
  18     enc2uni = {}
  19
  20     for i in range(32) + [127]:
  21         enc2uni[i] = (i, 'CONTROL CHARACTER')
  22
  23     for i in range(256):
  24         buf = ctypes.create_unicode_buffer(2)
  25         ret = MultiByteToWideChar(
  26             codepage, 0,
  27             chr(i), 1,
  28             buf, 2)
  29         assert ret == 1, "invalid code page"
  30         assert buf[1] == '\x00'
  31         try:
  32             name = unicodedata.name(buf[0])
  33         except ValueError:
  34             try:
  35                 name = enc2uni[i][1]
  36             except KeyError:
  37                 name = ''
  38
  39         enc2uni[i] = (ord(buf[0]), name)
  40
  41     return enc2uni
  42
  43 def genwincodec(codepage):
  44     import platform
  45     map = genwinmap(codepage)
  46     encodingname = 'cp%d' % codepage
  47     code = codegen("", map, encodingname)
  48     # Replace first lines with our own docstring
  49     code = '''\
  50 """Python Character Mapping Codec %s generated on Windows:
  51 %s with the command:
  52   python Tools/unicode/genwincodec.py %s
  53 """#"
  54 ''' % (encodingname, ' '.join(platform.win32_ver()), codepage
  55       ) + code.split('"""#"', 1)[1]
  56
  57     print code
  58
  59 if __name__ == '__main__':
  60     import sys
  61     genwincodec(int(sys.argv[1]))