1 """ Encoding Aliases Support
3 This module is used by the encodings package search function to
4 map encodings names to module names.
6 Note that the search function normalizes the encoding names before
7 doing the lookup, so the mapping will have to map normalized
8 encoding names to module names.
12 The following aliases dictionary contains mappings of all IANA
13 character set names for which the Python core library provides
14 codecs. In addition to these, a few Python specific codec
15 aliases have also been added.
20 # Please keep this list sorted alphabetically by value !
24 'ansi_x3.4_1968' : 'ascii',
25 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name
26 'ansi_x3.4_1986' : 'ascii',
30 'iso646_us' : 'ascii',
31 'iso_646.irv_1991' : 'ascii',
37 #'base64' : 'base64_codec',
38 #'base_64' : 'base64_codec',
45 'big5_hkscs' : 'big5hkscs',
46 'hkscs' : 'big5hkscs',
54 'ebcdic_cp_ca' : 'cp037',
55 'ebcdic_cp_nl' : 'cp037',
56 'ebcdic_cp_us' : 'cp037',
57 'ebcdic_cp_wt' : 'cp037',
63 'csibm1026' : 'cp1026',
72 'windows_1250' : 'cp1250',
76 'windows_1251' : 'cp1251',
80 'windows_1252' : 'cp1252',
84 'windows_1253' : 'cp1253',
88 'windows_1254' : 'cp1254',
92 'windows_1255' : 'cp1255',
96 'windows_1256' : 'cp1256',
100 'windows_1257' : 'cp1257',
104 'windows_1258' : 'cp1258',
108 'csibm424' : 'cp424',
109 'ebcdic_cp_he' : 'cp424',
114 'cspc8codepage437' : 'cp437',
119 'csibm500' : 'cp500',
120 'ebcdic_cp_be' : 'cp500',
121 'ebcdic_cp_ch' : 'cp500',
126 'cspc775baltic' : 'cp775',
131 'cspc850multilingual' : 'cp850',
136 'cspcp852' : 'cp852',
141 'csibm855' : 'cp855',
146 'csibm857' : 'cp857',
151 'csibm860' : 'cp860',
157 'csibm861' : 'cp861',
162 'cspc862latinhebrew' : 'cp862',
167 'csibm863' : 'cp863',
172 'csibm864' : 'cp864',
177 'csibm865' : 'cp865',
182 'csibm866' : 'cp866',
188 'csibm869' : 'cp869',
195 'ms_kanji' : 'cp932',
207 'jisx0213' : 'euc_jis_2004',
208 'eucjis2004' : 'euc_jis_2004',
209 'euc_jis2004' : 'euc_jis_2004',
212 'eucjisx0213' : 'euc_jisx0213',
222 'ksc5601' : 'euc_kr',
223 'ks_c_5601' : 'euc_kr',
224 'ks_c_5601_1987' : 'euc_kr',
225 'ksx1001' : 'euc_kr',
226 'ks_x_1001' : 'euc_kr',
229 'gb18030_2000' : 'gb18030',
232 'chinese' : 'gb2312',
233 'csiso58gb231280' : 'gb2312',
236 'eucgb2312_cn' : 'gb2312',
237 'gb2312_1980' : 'gb2312',
238 'gb2312_80' : 'gb2312',
239 'iso_ir_58' : 'gb2312',
247 #'hex' : 'hex_codec',
250 'roman8' : 'hp_roman8',
252 'csHPRoman8' : 'hp_roman8',
260 'csiso2022jp' : 'iso2022_jp',
261 'iso2022jp' : 'iso2022_jp',
262 'iso_2022_jp' : 'iso2022_jp',
265 'iso2022jp_1' : 'iso2022_jp_1',
266 'iso_2022_jp_1' : 'iso2022_jp_1',
269 'iso2022jp_2' : 'iso2022_jp_2',
270 'iso_2022_jp_2' : 'iso2022_jp_2',
272 # iso2022_jp_2004 codec
273 'iso_2022_jp_2004' : 'iso2022_jp_2004',
274 'iso2022jp_2004' : 'iso2022_jp_2004',
277 'iso2022jp_3' : 'iso2022_jp_3',
278 'iso_2022_jp_3' : 'iso2022_jp_3',
280 # iso2022_jp_ext codec
281 'iso2022jp_ext' : 'iso2022_jp_ext',
282 'iso_2022_jp_ext' : 'iso2022_jp_ext',
285 'csiso2022kr' : 'iso2022_kr',
286 'iso2022kr' : 'iso2022_kr',
287 'iso_2022_kr' : 'iso2022_kr',
290 'csisolatin6' : 'iso8859_10',
291 'iso_8859_10' : 'iso8859_10',
292 'iso_8859_10_1992' : 'iso8859_10',
293 'iso_ir_157' : 'iso8859_10',
295 'latin6' : 'iso8859_10',
298 'thai' : 'iso8859_11',
299 'iso_8859_11' : 'iso8859_11',
300 'iso_8859_11_2001' : 'iso8859_11',
303 'iso_8859_13' : 'iso8859_13',
305 'latin7' : 'iso8859_13',
308 'iso_8859_14' : 'iso8859_14',
309 'iso_8859_14_1998' : 'iso8859_14',
310 'iso_celtic' : 'iso8859_14',
311 'iso_ir_199' : 'iso8859_14',
313 'latin8' : 'iso8859_14',
316 'iso_8859_15' : 'iso8859_15',
318 'latin9' : 'iso8859_15',
321 'iso_8859_16' : 'iso8859_16',
322 'iso_8859_16_2001' : 'iso8859_16',
323 'iso_ir_226' : 'iso8859_16',
324 'l10' : 'iso8859_16',
325 'latin10' : 'iso8859_16',
328 'csisolatin2' : 'iso8859_2',
329 'iso_8859_2' : 'iso8859_2',
330 'iso_8859_2_1987' : 'iso8859_2',
331 'iso_ir_101' : 'iso8859_2',
333 'latin2' : 'iso8859_2',
336 'csisolatin3' : 'iso8859_3',
337 'iso_8859_3' : 'iso8859_3',
338 'iso_8859_3_1988' : 'iso8859_3',
339 'iso_ir_109' : 'iso8859_3',
341 'latin3' : 'iso8859_3',
344 'csisolatin4' : 'iso8859_4',
345 'iso_8859_4' : 'iso8859_4',
346 'iso_8859_4_1988' : 'iso8859_4',
347 'iso_ir_110' : 'iso8859_4',
349 'latin4' : 'iso8859_4',
352 'csisolatincyrillic' : 'iso8859_5',
353 'cyrillic' : 'iso8859_5',
354 'iso_8859_5' : 'iso8859_5',
355 'iso_8859_5_1988' : 'iso8859_5',
356 'iso_ir_144' : 'iso8859_5',
359 'arabic' : 'iso8859_6',
360 'asmo_708' : 'iso8859_6',
361 'csisolatinarabic' : 'iso8859_6',
362 'ecma_114' : 'iso8859_6',
363 'iso_8859_6' : 'iso8859_6',
364 'iso_8859_6_1987' : 'iso8859_6',
365 'iso_ir_127' : 'iso8859_6',
368 'csisolatingreek' : 'iso8859_7',
369 'ecma_118' : 'iso8859_7',
370 'elot_928' : 'iso8859_7',
371 'greek' : 'iso8859_7',
372 'greek8' : 'iso8859_7',
373 'iso_8859_7' : 'iso8859_7',
374 'iso_8859_7_1987' : 'iso8859_7',
375 'iso_ir_126' : 'iso8859_7',
378 'csisolatinhebrew' : 'iso8859_8',
379 'hebrew' : 'iso8859_8',
380 'iso_8859_8' : 'iso8859_8',
381 'iso_8859_8_1988' : 'iso8859_8',
382 'iso_ir_138' : 'iso8859_8',
385 'csisolatin5' : 'iso8859_9',
386 'iso_8859_9' : 'iso8859_9',
387 'iso_8859_9_1989' : 'iso8859_9',
388 'iso_ir_148' : 'iso8859_9',
390 'latin5' : 'iso8859_9',
397 'cskoi8r' : 'koi8_r',
401 # Note that the latin_1 codec is implemented internally in C and a
402 # lot faster than the charmap codec iso8859_1 which uses the same
403 # encoding. This is why we discourage the use of the iso8859_1
404 # codec and alias it to latin_1 instead.
408 'csisolatin1' : 'latin_1',
409 'ibm819' : 'latin_1',
410 'iso8859' : 'latin_1',
411 'iso8859_1' : 'latin_1',
412 'iso_8859_1' : 'latin_1',
413 'iso_8859_1_1987' : 'latin_1',
414 'iso_ir_100' : 'latin_1',
417 'latin1' : 'latin_1',
420 'maccyrillic' : 'mac_cyrillic',
423 'macgreek' : 'mac_greek',
426 'maciceland' : 'mac_iceland',
429 'maccentraleurope' : 'mac_latin2',
430 'maclatin2' : 'mac_latin2',
433 'macroman' : 'mac_roman',
436 'macturkish' : 'mac_turkish',
442 'csptcp154' : 'ptcp154',
445 'cyrillic-asian' : 'ptcp154',
447 ## quopri_codec codec
448 #'quopri' : 'quopri_codec',
449 #'quoted_printable' : 'quopri_codec',
450 #'quotedprintable' : 'quopri_codec',
456 'csshiftjis' : 'shift_jis',
457 'shiftjis' : 'shift_jis',
458 'sjis' : 'shift_jis',
459 's_jis' : 'shift_jis',
461 # shift_jis_2004 codec
462 'shiftjis2004' : 'shift_jis_2004',
463 'sjis_2004' : 'shift_jis_2004',
464 's_jis_2004' : 'shift_jis_2004',
466 # shift_jisx0213 codec
467 'shiftjisx0213' : 'shift_jisx0213',
468 'sjisx0213' : 'shift_jisx0213',
469 's_jisx0213' : 'shift_jisx0213',
475 'tis620' : 'tis_620',
476 'tis_620_0' : 'tis_620',
477 'tis_620_2529_0' : 'tis_620',
478 'tis_620_2529_1' : 'tis_620',
479 'iso_ir_166' : 'tis_620',
486 'unicodebigunmarked' : 'utf_16_be',
487 'utf_16be' : 'utf_16_be',
490 'unicodelittleunmarked' : 'utf_16_le',
491 'utf_16le' : 'utf_16_le',
498 'utf_32be' : 'utf_32_be',
501 'utf_32le' : 'utf_32_le',
506 'unicode_1_1_utf_7' : 'utf_7',
512 'utf8_ucs2' : 'utf_8',
513 'utf8_ucs4' : 'utf_8',
519 #'zip' : 'zlib_codec',
520 #'zlib' : 'zlib_codec',
522 # temporary mac CJK aliases, will be replaced by proper codecs in 3.1
523 'x_mac_japanese' : 'shift_jis',
524 'x_mac_korean' : 'euc_kr',
525 'x_mac_simp_chinese' : 'gb2312',
526 'x_mac_trad_chinese' : 'big5',