Issue #7142: Fix uses of unicode in memoryview objects
[python.git] / Tools / unicode / genwincodec.py
blob32dcadabf245d067aab384135afa6f65419ac0a9
1 """This script generates a Python codec module from a Windows Code Page.
3 It uses the function MultiByteToWideChar to generate a decoding table.
4 """
6 import ctypes
7 from ctypes import wintypes
8 from gencodec import codegen
9 import unicodedata
11 def genwinmap(codepage):
12 MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar
13 MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD,
14 wintypes.LPCSTR, ctypes.c_int,
15 wintypes.LPWSTR, ctypes.c_int]
16 MultiByteToWideChar.restype = ctypes.c_int
18 enc2uni = {}
20 for i in range(32) + [127]:
21 enc2uni[i] = (i, 'CONTROL CHARACTER')
23 for i in range(256):
24 buf = ctypes.create_unicode_buffer(2)
25 ret = MultiByteToWideChar(
26 codepage, 0,
27 chr(i), 1,
28 buf, 2)
29 assert ret == 1, "invalid code page"
30 assert buf[1] == '\x00'
31 try:
32 name = unicodedata.name(buf[0])
33 except ValueError:
34 try:
35 name = enc2uni[i][1]
36 except KeyError:
37 name = ''
39 enc2uni[i] = (ord(buf[0]), name)
41 return enc2uni
43 def genwincodec(codepage):
44 import platform
45 map = genwinmap(codepage)
46 encodingname = 'cp%d' % codepage
47 code = codegen("", map, encodingname)
48 # Replace first lines with our own docstring
49 code = '''\
50 """Python Character Mapping Codec %s generated on Windows:
51 %s with the command:
52 python Tools/unicode/genwincodec.py %s
53 """#"
54 ''' % (encodingname, ' '.join(platform.win32_ver()), codepage
55 ) + code.split('"""#"', 1)[1]
57 print code
59 if __name__ == '__main__':
60 import sys
61 genwincodec(int(sys.argv[1]))