* BUGS: Use new binding of view-emacs-problems.
[emacs.git] / lisp / international / mule-conf.el
blob8bd9301e1b7aaa16d4026f931e20e63464b1b69e
1 ;;; mule-conf.el --- configure multilingual environment
3 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003,
4 ;; 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
5 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H14PRO021
8 ;; Copyright (C) 2003
9 ;; National Institute of Advanced Industrial Science and Technology (AIST)
10 ;; Registration Number H13PRO009
12 ;; Keywords: i18n, mule, multilingual, character set, coding system
14 ;; This file is part of GNU Emacs.
16 ;; GNU Emacs is free software: you can redistribute it and/or modify
17 ;; it under the terms of the GNU General Public License as published by
18 ;; the Free Software Foundation, either version 3 of the License, or
19 ;; (at your option) any later version.
21 ;; GNU Emacs is distributed in the hope that it will be useful,
22 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 ;; GNU General Public License for more details.
26 ;; You should have received a copy of the GNU General Public License
27 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
29 ;;; Commentary:
31 ;; This file defines the Emacs charsets and some basic coding systems.
32 ;; Other coding systems are defined in the files in directory
33 ;; lisp/language.
35 ;;; Code:
37 ;;; Remarks
39 ;; The ISO-IR registry is at http://www.itscj.ipsj.or.jp/ISO-IR/.
40 ;; Standards docs equivalent to iso-2022 and iso-8859 are at
41 ;; http://www.ecma.ch/.
43 ;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
44 ;; MS Windows, which are presumably the only charsets we really need
45 ;; to worry about on such systems:
46 ;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
47 ;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
48 ;; 1258, 874, 932, 936, 949, 950
50 ;;; Definitions of character sets.
52 ;; The charsets `ascii', `unicode' and `eight-bit' are already defined
53 ;; in charset.c as below:
55 ;; (define-charset 'ascii
56 ;; ""
57 ;; :dimension 1
58 ;; :code-space [0 127]
59 ;; :iso-final-char ?B
60 ;; :ascii-compatible-p t
61 ;; :emacs-mule-id 0
62 ;; :code-offset 0)
64 ;; (define-charset 'unicode
65 ;; ""
66 ;; :dimension 3
67 ;; :code-space [0 255 0 255 0 16]
68 ;; :ascii-compatible-p t
69 ;; :code-offset 0)
71 ;; (define-charset 'emacs
72 ;; ""
73 ;; :dimension 3
74 ;; :code-space [0 255 0 255 0 63]
75 ;; :ascii-compatible-p t
76 ;; :supplementary-p t
77 ;; :code-offset 0)
79 ;; (define-charset 'eight-bit
80 ;; ""
81 ;; :dimension 1
82 ;; :code-space [128 255]
83 ;; :code-offset #x3FFF80)
85 ;; We now set :docstring, :short-name, and :long-name properties.
87 (put-charset-property
88 'ascii :docstring "ASCII (ISO646 IRV)")
89 (put-charset-property
90 'ascii :short-name "ASCII")
91 (put-charset-property
92 'ascii :long-name "ASCII (ISO646 IRV)")
93 (put-charset-property
94 'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)")
95 (put-charset-property
96 'iso-8859-1 :short-name "Latin-1")
97 (put-charset-property
98 'iso-8859-1 :long-name "Latin-1")
99 (put-charset-property
100 'unicode :docstring "Unicode (ISO10646)")
101 (put-charset-property
102 'unicode :short-name "Unicode")
103 (put-charset-property
104 'unicode :long-name "Unicode (ISO10646)")
105 (put-charset-property
106 'emacs :docstring "Full Emacs charset (excluding eight bit chars)")
107 (put-charset-property
108 'emacs :short-name "Emacs")
109 (put-charset-property
110 'emacs :long-name "Emacs")
112 (put-charset-property 'eight-bit :docstring "Raw bytes 128-255")
113 (put-charset-property 'eight-bit :short-name "Raw bytes")
115 (define-charset-alias 'ucs 'unicode)
117 (define-charset 'latin-iso8859-1
118 "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
119 :short-name "RHP of Latin-1"
120 :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
121 :iso-final-char ?A
122 :emacs-mule-id 129
123 :code-space [32 127]
124 :code-offset 160)
126 ;; Name perhaps not ideal, but is XEmacs-compatible.
127 (define-charset 'control-1
128 "8-bit control code (0x80..0x9F)"
129 :short-name "8-bit control code"
130 :code-space [128 159]
131 :code-offset 128)
133 (define-charset 'eight-bit-control
134 "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
135 :short-name "Raw bytes 0x80..0x9F"
136 :supplementary-p t
137 :code-space [128 159]
138 :code-offset #x3FFF80) ; see character.h
140 (define-charset 'eight-bit-graphic
141 "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
142 :short-name "Raw bytes 0xA0..0xFF"
143 :supplementary-p t
144 :code-space [160 255]
145 :code-offset #x3FFFA0) ; see character.h
147 (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
148 iso-ir iso-final
149 emacs-mule-id map)
150 `(progn
151 (define-charset ,symbol
152 ,name
153 :short-name ,nickname
154 :long-name ,name
155 :ascii-compatible-p t
156 :code-space [0 255]
157 :map ,map)
158 (if ,iso-symbol
159 (define-charset ,iso-symbol
160 (if ,iso-ir
161 (format "Right-Hand Part of %s (%s): ISO-IR-%d"
162 ,name ,nickname ,iso-ir)
163 (format "Right-Hand Part of %s (%s)" ,name ,nickname))
164 :short-name (format "RHP of %s" ,name)
165 :long-name (format "RHP of %s (%s)" ,name ,nickname)
166 :iso-final-char ,iso-final
167 :emacs-mule-id ,emacs-mule-id
168 :code-space [32 127]
169 :subset (list ,symbol 160 255 -128)))))
171 (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
172 "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
174 (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
175 "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
177 (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
178 "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
180 (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
181 "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
183 (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
184 "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
186 (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
187 "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
189 (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
190 "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
192 (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
193 "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
195 (define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
196 "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
198 ;; http://www.nectec.or.th/it-standards/iso8859-11/
199 ;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
200 ;; plus nbsp
201 (define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
202 "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
204 ;; 8859-12 doesn't (yet?) exist.
206 (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
207 "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
209 (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
210 "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
212 (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
213 "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
215 (define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
216 "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
218 ;; No point in keeping it around.
219 (fmakunbound 'define-iso-single-byte-charset)
221 ;; Can this be shared with 8859-11?
222 ;; N.b. not all of these are defined unicodes.
223 (define-charset 'thai-tis620
224 "TIS620.2533"
225 :short-name "TIS620.2533"
226 :iso-final-char ?T
227 :emacs-mule-id 133
228 :code-space [32 127]
229 :code-offset #x0E00)
231 ;; Fixme: doc for this, c.f. above
232 (define-charset 'tis620-2533
233 "TIS620.2533"
234 :short-name "TIS620.2533"
235 :ascii-compatible-p t
236 :code-space [0 255]
237 :superset '(ascii eight-bit-control (thai-tis620 . 128)))
239 (define-charset 'jisx0201
240 "JISX0201"
241 :short-name "JISX0201"
242 :code-space [0 #xDF]
243 :map "JISX0201")
245 (define-charset 'latin-jisx0201
246 "Roman Part of JISX0201.1976"
247 :short-name "JISX0201 Roman"
248 :long-name "Japanese Roman (JISX0201.1976)"
249 :iso-final-char ?J
250 :emacs-mule-id 138
251 :supplementary-p t
252 :code-space [33 126]
253 :subset '(jisx0201 33 126 0))
255 (define-charset 'katakana-jisx0201
256 "Katakana Part of JISX0201.1976"
257 :short-name "JISX0201 Katakana"
258 :long-name "Japanese Katakana (JISX0201.1976)"
259 :iso-final-char ?I
260 :emacs-mule-id 137
261 :supplementary-p t
262 :code-space [33 126]
263 :subset '(jisx0201 161 254 -128))
265 (define-charset 'chinese-gb2312
266 "GB2312 Chinese simplified: ISO-IR-58"
267 :short-name "GB2312"
268 :long-name "GB2312: ISO-IR-58"
269 :iso-final-char ?A
270 :emacs-mule-id 145
271 :code-space [33 126 33 126]
272 :code-offset #x110000
273 :unify-map "GB2312")
275 (define-charset 'chinese-gbk
276 "GBK Chinese simplified."
277 :short-name "GBK"
278 :code-space [#x40 #xFE #x81 #xFE]
279 :code-offset #x160000
280 :unify-map "GBK")
281 (define-charset-alias 'cp936 'chinese-gbk)
282 (define-charset-alias 'windows-936 'chinese-gbk)
284 (define-charset 'chinese-cns11643-1
285 "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
286 :short-name "CNS11643-1"
287 :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
288 :iso-final-char ?G
289 :emacs-mule-id 149
290 :code-space [33 126 33 126]
291 :code-offset #x114000
292 :unify-map "CNS-1")
294 (define-charset 'chinese-cns11643-2
295 "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
296 :short-name "CNS11643-2"
297 :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
298 :iso-final-char ?H
299 :emacs-mule-id 150
300 :code-space [33 126 33 126]
301 :code-offset #x118000
302 :unify-map "CNS-2")
304 (define-charset 'chinese-cns11643-3
305 "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
306 :short-name "CNS11643-3"
307 :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
308 :iso-final-char ?I
309 :code-space [33 126 33 126]
310 :emacs-mule-id 246
311 :code-offset #x11C000
312 :unify-map "CNS-3")
314 (define-charset 'chinese-cns11643-4
315 "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
316 :short-name "CNS11643-4"
317 :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
318 :iso-final-char ?J
319 :emacs-mule-id 247
320 :code-space [33 126 33 126]
321 :code-offset #x120000
322 :unify-map "CNS-4")
324 (define-charset 'chinese-cns11643-5
325 "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
326 :short-name "CNS11643-5"
327 :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
328 :iso-final-char ?K
329 :emacs-mule-id 248
330 :code-space [33 126 33 126]
331 :code-offset #x124000
332 :unify-map "CNS-5")
334 (define-charset 'chinese-cns11643-6
335 "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
336 :short-name "CNS11643-6"
337 :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
338 :iso-final-char ?L
339 :emacs-mule-id 249
340 :code-space [33 126 33 126]
341 :code-offset #x128000
342 :unify-map "CNS-6")
344 (define-charset 'chinese-cns11643-7
345 "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
346 :short-name "CNS11643-7"
347 :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
348 :iso-final-char ?M
349 :emacs-mule-id 250
350 :code-space [33 126 33 126]
351 :code-offset #x12C000
352 :unify-map "CNS-7")
354 (define-charset 'big5
355 "Big5 (Chinese traditional)"
356 :short-name "Big5"
357 :code-space [#x40 #xFE #xA1 #xFE]
358 :code-offset #x130000
359 :unify-map "BIG5")
360 ;; Fixme: AKA cp950 according to
361 ;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>. Is
362 ;; that correct?
364 (define-charset 'chinese-big5-1
365 "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
366 :short-name "Big5 (Level-1)"
367 :long-name "Big5 (Level-1) A141-C67F"
368 :iso-final-char ?0
369 :emacs-mule-id 152
370 :supplementary-p t
371 :code-space [#x21 #x7E #x21 #x7E]
372 :code-offset #x135000
373 :unify-map "BIG5-1")
375 (define-charset 'chinese-big5-2
376 "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
377 :short-name "Big5 (Level-2)"
378 :long-name "Big5 (Level-2) C940-FEFE"
379 :iso-final-char ?1
380 :emacs-mule-id 153
381 :supplementary-p t
382 :code-space [#x21 #x7E #x21 #x7E]
383 :code-offset #x137800
384 :unify-map "BIG5-2")
386 (define-charset 'japanese-jisx0208
387 "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
388 :short-name "JISX0208"
389 :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
390 :iso-final-char ?B
391 :emacs-mule-id 146
392 :code-space [33 126 33 126]
393 :code-offset #x140000
394 :unify-map "JISX0208")
396 (define-charset 'japanese-jisx0208-1978
397 "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
398 :short-name "JISX0208.1978"
399 :long-name "JISX0208.1978 (JISC6226.1978): ISO-IR-42"
400 :iso-final-char ?@
401 :emacs-mule-id 144
402 :code-space [33 126 33 126]
403 :code-offset #x144000
404 :unify-map "JISC6226")
406 (define-charset 'japanese-jisx0212
407 "JISX0212 Japanese supplement: ISO-IR-159"
408 :short-name "JISX0212"
409 :long-name "JISX0212 (Japanese): ISO-IR-159"
410 :iso-final-char ?D
411 :emacs-mule-id 148
412 :code-space [33 126 33 126]
413 :code-offset #x148000
414 :unify-map "JISX0212")
416 ;; Note that jisx0213 contains characters not in Unicode (3.2?). It's
417 ;; arguable whether it should have a unify-map.
418 (define-charset 'japanese-jisx0213-1
419 "JISX0213.2000 Plane 1 (Japanese)"
420 :short-name "JISX0213-1"
421 :iso-final-char ?O
422 :emacs-mule-id 151
423 :unify-map "JISX2131"
424 :code-space [33 126 33 126]
425 :code-offset #x14C000)
427 (define-charset 'japanese-jisx0213-2
428 "JISX0213.2000 Plane 2 (Japanese)"
429 :short-name "JISX0213-2"
430 :iso-final-char ?P
431 :emacs-mule-id 254
432 :unify-map "JISX2132"
433 :code-space [33 126 33 126]
434 :code-offset #x150000)
436 (define-charset 'japanese-jisx0213-a
437 "JISX0213.2004 adds these characters to JISX0213.2000."
438 :short-name "JISX0213A"
439 :dimension 2
440 :code-space [33 126 33 126]
441 :supplementary-p t
442 :map "JISX213A")
444 (define-charset 'japanese-jisx0213.2004-1
445 "JISX0213.2004 Plane1 (Japanese)"
446 :short-name "JISX0213.2004-1"
447 :dimension 2
448 :code-space [33 126 33 126]
449 :iso-final-char ?Q
450 :superset '(japanese-jisx0213-a japanese-jisx0213-1))
452 (define-charset 'katakana-sjis
453 "Katakana part of Shift-JIS"
454 :dimension 1
455 :code-space [#xA1 #xDF]
456 :subset '(jisx0201 #xA1 #xDF 0)
457 :supplementary-p t)
459 (define-charset 'cp932-2-byte
460 "2-byte part of CP932"
461 :dimension 2
462 :map "CP932-2BYTE"
463 :code-space [#x40 #xFC #x81 #xFC]
464 :supplementary-p t)
466 (define-charset 'cp932
467 "CP932 (Microsoft shift-jis)"
468 :code-space [#x00 #xFF #x00 #xFE]
469 :short-name "CP932"
470 :superset '(ascii katakana-sjis cp932-2-byte))
472 (define-charset 'korean-ksc5601
473 "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
474 :short-name "KSC5601"
475 :long-name "KSC5601 (Korean): ISO-IR-149"
476 :iso-final-char ?C
477 :emacs-mule-id 147
478 :code-space [33 126 33 126]
479 :code-offset #x279f94 ; ... #x27c217
480 :unify-map "KSC5601")
482 (define-charset 'big5-hkscs
483 "Big5-HKSCS (Chinese traditional, Hong Kong supplement)"
484 :short-name "Big5"
485 :code-space [#x40 #xFE #xA1 #xFE]
486 :code-offset #x27c218 ; ... #x280839
487 :unify-map "BIG5-HKSCS")
489 ;; Fixme: Korean cp949/UHC
491 (define-charset 'chinese-sisheng
492 "SiSheng characters for PinYin/ZhuYin"
493 :short-name "SiSheng"
494 :long-name "SiSheng (PinYin/ZhuYin)"
495 :iso-final-char ?0
496 :emacs-mule-id 160
497 :code-space [33 126]
498 :unify-map "MULE-sisheng"
499 :supplementary-p t
500 :code-offset #x200000)
502 ;; A subset of the 1989 version of IPA. It consists of the consonant
503 ;; signs used in English, French, German and Italian, and all vowels
504 ;; signs in the table. [says old MULE doc]
505 (define-charset 'ipa
506 "IPA (International Phonetic Association)"
507 :short-name "IPA"
508 :iso-final-char ?0
509 :emacs-mule-id 161
510 :unify-map "MULE-ipa"
511 :code-space [32 127]
512 :supplementary-p t
513 :code-offset #x200080)
515 (define-charset 'viscii
516 "VISCII1.1"
517 :short-name "VISCII"
518 :long-name "VISCII 1.1"
519 :code-space [0 255]
520 :map "VISCII")
522 (define-charset 'vietnamese-viscii-lower
523 "VISCII1.1 lower-case"
524 :short-name "VISCII lower"
525 :long-name "VISCII lower-case"
526 :iso-final-char ?1
527 :emacs-mule-id 162
528 :code-space [32 127]
529 :code-offset #x200200
530 :supplementary-p t
531 :unify-map "MULE-lviscii")
533 (define-charset 'vietnamese-viscii-upper
534 "VISCII1.1 upper-case"
535 :short-name "VISCII upper"
536 :long-name "VISCII upper-case"
537 :iso-final-char ?2
538 :emacs-mule-id 163
539 :code-space [32 127]
540 :code-offset #x200280
541 :supplementary-p t
542 :unify-map "MULE-uviscii")
544 (define-charset 'vscii
545 "VSCII1.1 (TCVN-5712 VN1)"
546 :short-name "VSCII"
547 :code-space [0 255]
548 :map "VSCII")
550 (define-charset-alias 'tcvn-5712 'vscii)
552 ;; Fixme: see note in tcvn.map about combining characters
553 (define-charset 'vscii-2
554 "VSCII-2 (TCVN-5712 VN2)"
555 :code-space [0 255]
556 :map "VSCII-2")
558 (define-charset 'koi8-r
559 "KOI8-R"
560 :short-name "KOI8-R"
561 :ascii-compatible-p t
562 :code-space [0 255]
563 :map "KOI8-R")
565 (define-charset-alias 'koi8 'koi8-r)
567 (define-charset 'alternativnyj
568 "ALTERNATIVNYJ"
569 :short-name "alternativnyj"
570 :ascii-compatible-p t
571 :code-space [0 255]
572 :map "ALTERNATIVNYJ")
574 (define-charset 'cp866
575 "CP866"
576 :short-name "cp866"
577 :ascii-compatible-p t
578 :code-space [0 255]
579 :map "IBM866")
580 (define-charset-alias 'ibm866 'cp866)
582 (define-charset 'koi8-u
583 "KOI8-U"
584 :short-name "KOI8-U"
585 :ascii-compatible-p t
586 :code-space [0 255]
587 :map "KOI8-U")
589 (define-charset 'koi8-t
590 "KOI8-T"
591 :short-name "KOI8-T"
592 :ascii-compatible-p t
593 :code-space [0 255]
594 :map "KOI8-T")
596 (define-charset 'georgian-ps
597 "GEORGIAN-PS"
598 :short-name "GEORGIAN-PS"
599 :ascii-compatible-p t
600 :code-space [0 255]
601 :map "KA-PS")
603 (define-charset 'georgian-academy
604 "GEORGIAN-ACADEMY"
605 :short-name "GEORGIAN-ACADEMY"
606 :ascii-compatible-p t
607 :code-space [0 255]
608 :map "KA-ACADEMY")
610 (define-charset 'windows-1250
611 "WINDOWS-1250 (Central Europe)"
612 :short-name "WINDOWS-1250"
613 :ascii-compatible-p t
614 :code-space [0 255]
615 :map "CP1250")
616 (define-charset-alias 'cp1250 'windows-1250)
618 (define-charset 'windows-1251
619 "WINDOWS-1251 (Cyrillic)"
620 :short-name "WINDOWS-1251"
621 :ascii-compatible-p t
622 :code-space [0 255]
623 :map "CP1251")
624 (define-charset-alias 'cp1251 'windows-1251)
626 (define-charset 'windows-1252
627 "WINDOWS-1252 (Latin I)"
628 :short-name "WINDOWS-1252"
629 :ascii-compatible-p t
630 :code-space [0 255]
631 :map "CP1252")
632 (define-charset-alias 'cp1252 'windows-1252)
634 (define-charset 'windows-1253
635 "WINDOWS-1253 (Greek)"
636 :short-name "WINDOWS-1253"
637 :ascii-compatible-p t
638 :code-space [0 255]
639 :map "CP1253")
640 (define-charset-alias 'cp1253 'windows-1253)
642 (define-charset 'windows-1254
643 "WINDOWS-1254 (Turkish)"
644 :short-name "WINDOWS-1254"
645 :ascii-compatible-p t
646 :code-space [0 255]
647 :map "CP1254")
648 (define-charset-alias 'cp1254 'windows-1254)
650 (define-charset 'windows-1255
651 "WINDOWS-1255 (Hebrew)"
652 :short-name "WINDOWS-1255"
653 :ascii-compatible-p t
654 :code-space [0 255]
655 :map "CP1255")
656 (define-charset-alias 'cp1255 'windows-1255)
658 (define-charset 'windows-1256
659 "WINDOWS-1256 (Arabic)"
660 :short-name "WINDOWS-1256"
661 :ascii-compatible-p t
662 :code-space [0 255]
663 :map "CP1256")
664 (define-charset-alias 'cp1256 'windows-1256)
666 (define-charset 'windows-1257
667 "WINDOWS-1257 (Baltic)"
668 :short-name "WINDOWS-1257"
669 :ascii-compatible-p t
670 :code-space [0 255]
671 :map "CP1257")
672 (define-charset-alias 'cp1257 'windows-1257)
674 (define-charset 'windows-1258
675 "WINDOWS-1258 (Viet Nam)"
676 :short-name "WINDOWS-1258"
677 :ascii-compatible-p t
678 :code-space [0 255]
679 :map "CP1258")
680 (define-charset-alias 'cp1258 'windows-1258)
682 (define-charset 'next
683 "NEXT"
684 :short-name "NEXT"
685 :ascii-compatible-p t
686 :code-space [0 255]
687 :map "NEXTSTEP")
689 (define-charset 'cp1125
690 "CP1125"
691 :short-name "CP1125"
692 :code-space [0 255]
693 :ascii-compatible-p t
694 :map "CP1125")
695 (define-charset-alias 'ruscii 'cp1125)
696 ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
697 (define-charset-alias 'cp866u 'cp1125)
699 ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
700 ;; shows this as not ASCII comptaible, with various graphics in
701 ;; 0x01-0x1F.
702 (define-charset 'cp437
703 "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
704 :short-name "CP437"
705 :code-space [0 255]
706 :ascii-compatible-p t
707 :map "IBM437")
709 (define-charset 'cp720
710 "CP720 (Arabic)"
711 :short-name "CP720"
712 :code-space [0 255]
713 :ascii-compatible-p t
714 :map "CP720")
716 (define-charset 'cp737
717 "CP737 (PC Greek)"
718 :short-name "CP737"
719 :code-space [0 255]
720 :ascii-compatible-p t
721 :map "CP737")
723 (define-charset 'cp775
724 "CP775 (PC Baltic)"
725 :short-name "CP775"
726 :code-space [0 255]
727 :ascii-compatible-p t
728 :map "CP775")
730 (define-charset 'cp851
731 "CP851 (Greek)"
732 :short-name "CP851"
733 :code-space [0 255]
734 :ascii-compatible-p t
735 :map "IBM851")
737 (define-charset 'cp852
738 "CP852 (MS-DOS Latin-2)"
739 :short-name "CP852"
740 :code-space [0 255]
741 :ascii-compatible-p t
742 :map "IBM852")
744 (define-charset 'cp855
745 "CP855 (IBM Cyrillic)"
746 :short-name "CP855"
747 :code-space [0 255]
748 :ascii-compatible-p t
749 :map "IBM855")
751 (define-charset 'cp857
752 "CP857 (IBM Turkish)"
753 :short-name "CP857"
754 :code-space [0 255]
755 :ascii-compatible-p t
756 :map "IBM857")
758 (define-charset 'cp858
759 "CP858 (Multilingual Latin I + Euro)"
760 :short-name "CP858"
761 :code-space [0 255]
762 :ascii-compatible-p t
763 :map "CP858")
764 (define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858
766 (define-charset 'cp860
767 "CP860 (MS-DOS Portuguese)"
768 :short-name "CP860"
769 :code-space [0 255]
770 :ascii-compatible-p t
771 :map "IBM860")
773 (define-charset 'cp861
774 "CP861 (MS-DOS Icelandic)"
775 :short-name "CP861"
776 :code-space [0 255]
777 :ascii-compatible-p t
778 :map "IBM861")
780 (define-charset 'cp862
781 "CP862 (PC Hebrew)"
782 :short-name "CP862"
783 :code-space [0 255]
784 :ascii-compatible-p t
785 :map "IBM862")
787 (define-charset 'cp863
788 "CP863 (MS-DOS Canadian French)"
789 :short-name "CP863"
790 :code-space [0 255]
791 :ascii-compatible-p t
792 :map "IBM863")
794 (define-charset 'cp864
795 "CP864 (PC Arabic)"
796 :short-name "CP864"
797 :code-space [0 255]
798 :ascii-compatible-p t
799 :map "IBM864")
801 (define-charset 'cp865
802 "CP865 (MS-DOS Nordic)"
803 :short-name "CP865"
804 :code-space [0 255]
805 :ascii-compatible-p t
806 :map "IBM865")
808 (define-charset 'cp869
809 "CP869 (IBM Modern Greek)"
810 :short-name "CP869"
811 :code-space [0 255]
812 :ascii-compatible-p t
813 :map "IBM869")
815 (define-charset 'cp874
816 "CP874 (IBM Thai)"
817 :short-name "CP874"
818 :code-space [0 255]
819 :ascii-compatible-p t
820 :map "IBM874")
822 ;; For Arabic, we need three different types of character sets.
823 ;; Digits are of direction left-to-right and of width 1-column.
824 ;; Others are of direction right-to-left and of width 1-column or
825 ;; 2-column.
826 (define-charset 'arabic-digit
827 "Arabic digit"
828 :short-name "Arabic digit"
829 :iso-final-char ?2
830 :emacs-mule-id 164
831 :supplementary-p t
832 :code-space [34 42]
833 :code-offset #x0600)
835 (define-charset 'arabic-1-column
836 "Arabic 1-column"
837 :short-name "Arabic 1-col"
838 :long-name "Arabic 1-column"
839 :iso-final-char ?3
840 :emacs-mule-id 165
841 :supplementary-p t
842 :code-space [33 126]
843 :code-offset #x200100)
845 (define-charset 'arabic-2-column
846 "Arabic 2-column"
847 :short-name "Arabic 2-col"
848 :long-name "Arabic 2-column"
849 :iso-final-char ?4
850 :emacs-mule-id 224
851 :supplementary-p t
852 :code-space [33 126]
853 :code-offset #x200180)
855 ;; Lao script.
856 ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
857 ;; Not all of them are defined unicodes.
858 (define-charset 'lao
859 "Lao characters (ISO10646 0E81..0EDF)"
860 :short-name "Lao"
861 :iso-final-char ?1
862 :emacs-mule-id 167
863 :supplementary-p t
864 :code-space [33 126]
865 :code-offset #x0E81)
867 (define-charset 'mule-lao
868 "Lao characters (ISO10646 0E81..0EDF)"
869 :short-name "Lao"
870 :code-space [0 255]
871 :supplementary-p t
872 :superset '(ascii eight-bit-control (lao . 128)))
875 ;; Indian scripts. Symbolic charset for data exchange. Glyphs are
876 ;; not assigned. They are automatically converted to each Indian
877 ;; script which IS-13194 supports.
879 (define-charset 'indian-is13194
880 "Generic Indian charset for data exchange with IS 13194"
881 :short-name "IS 13194"
882 :long-name "Indian IS 13194"
883 :iso-final-char ?5
884 :emacs-mule-id 225
885 :supplementary-p t
886 :code-space [33 126]
887 :code-offset #x180000)
889 (let ((code-offset #x180100))
890 (dolist (script '(devanagari sanskrit bengali tamil telugu assamese
891 oriya kannada malayalam gujarati punjabi))
892 (define-charset (intern (format "%s-cdac" script))
893 (format "Glyphs of %s script for CDAC font. Subset of `indian-glyph'."
894 (capitalize (symbol-name script)))
895 :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
896 :supplementary-p t
897 :code-space [0 255]
898 :code-offset code-offset)
899 (setq code-offset (+ code-offset #x100)))
901 (dolist (script '(devanagari bengali punjabi gujarati
902 oriya tamil telugu kannada malayalam))
903 (define-charset (intern (format "%s-akruti" script))
904 (format "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'."
905 (capitalize (symbol-name script)))
906 :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
907 :supplementary-p t
908 :code-space [0 255]
909 :code-offset code-offset)
910 (setq code-offset (+ code-offset #x100))))
912 (define-charset 'indian-glyph
913 "Glyphs for Indian characters."
914 :short-name "Indian glyph"
915 :iso-final-char ?4
916 :emacs-mule-id 240
917 :supplementary-p t
918 :code-space [32 127 32 127]
919 :code-offset #x180100)
921 ;; Actual Glyph for 1-column width.
922 (define-charset 'indian-1-column
923 "Indian charset for 1-column width glyphs."
924 :short-name "Indian 1-col"
925 :long-name "Indian 1 Column"
926 :iso-final-char ?6
927 :emacs-mule-id 251
928 :supplementary-p t
929 :code-space [33 126 33 126]
930 :code-offset #x184000)
932 ;; Actual Glyph for 2-column width.
933 (define-charset 'indian-2-column
934 "Indian charset for 2-column width glyphs."
935 :short-name "Indian 2-col"
936 :long-name "Indian 2 Column"
937 :iso-final-char ?5
938 :emacs-mule-id 251
939 :supplementary-p t
940 :code-space [33 126 33 126]
941 :code-offset #x184000)
943 (define-charset 'tibetan
944 "Tibetan characters"
945 :iso-final-char ?7
946 :short-name "Tibetan 2-col"
947 :long-name "Tibetan 2 column"
948 :iso-final-char ?7
949 :emacs-mule-id 252
950 :unify-map "MULE-tibetan"
951 :supplementary-p t
952 :code-space [33 126 33 37]
953 :code-offset #x190000)
955 (define-charset 'tibetan-1-column
956 "Tibetan 1 column glyph"
957 :short-name "Tibetan 1-col"
958 :long-name "Tibetan 1 column"
959 :iso-final-char ?8
960 :emacs-mule-id 241
961 :supplementary-p t
962 :code-space [33 126 33 37]
963 :code-offset #x190000)
965 ;; Subsets of Unicode.
966 (define-charset 'mule-unicode-2500-33ff
967 "Unicode characters of the range U+2500..U+33FF."
968 :short-name "Unicode subset 2"
969 :long-name "Unicode subset (U+2500..U+33FF)"
970 :iso-final-char ?2
971 :emacs-mule-id 242
972 :supplementary-p t
973 :code-space [#x20 #x7f #x20 #x47]
974 :code-offset #x2500)
976 (define-charset 'mule-unicode-e000-ffff
977 "Unicode characters of the range U+E000..U+FFFF."
978 :short-name "Unicode subset 3"
979 :long-name "Unicode subset (U+E000+FFFF)"
980 :iso-final-char ?3
981 :emacs-mule-id 243
982 :supplementary-p t
983 :code-space [#x20 #x7F #x20 #x75]
984 :code-offset #xE000
985 :max-code 30015) ; U+FFFF
987 (define-charset 'mule-unicode-0100-24ff
988 "Unicode characters of the range U+0100..U+24FF."
989 :short-name "Unicode subset"
990 :long-name "Unicode subset (U+0100..U+24FF)"
991 :iso-final-char ?1
992 :emacs-mule-id 244
993 :supplementary-p t
994 :code-space [#x20 #x7F #x20 #x7F]
995 :code-offset #x100)
997 (define-charset 'unicode-bmp
998 "Unicode Basic Multilingual Plane (U+0000..U+FFFF)"
999 :short-name "Unicode BMP"
1000 :code-space [0 255 0 255]
1001 :code-offset 0)
1003 (define-charset 'unicode-smp
1004 "Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)"
1005 :short-name "Unicode SMP "
1006 :code-space [0 255 0 255]
1007 :code-offset #x10000)
1009 (define-charset 'unicode-sip
1010 "Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)"
1011 :short-name "Unicode SIP"
1012 :code-space [0 255 0 255]
1013 :code-offset #x20000)
1015 (define-charset 'unicode-ssp
1016 "Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)"
1017 :short-name "Unicode SSP"
1018 :code-space [0 255 0 255]
1019 :code-offset #xE0000)
1021 (define-charset 'ethiopic
1022 "Ethiopic characters for Amharic and Tigrigna."
1023 :short-name "Ethiopic"
1024 :long-name "Ethiopic characters"
1025 :iso-final-char ?3
1026 :emacs-mule-id 245
1027 :supplementary-p t
1028 :unify-map "MULE-ethiopic"
1029 :code-space [33 126 33 126]
1030 :code-offset #x1A0000)
1032 (define-charset 'mac-roman
1033 "Mac Roman charset"
1034 :short-name "Mac Roman"
1035 :ascii-compatible-p t
1036 :code-space [0 255]
1037 :map "MACINTOSH")
1039 ;; Fixme: modern EBCDIC variants, e.g. IBM00924?
1040 (define-charset 'ebcdic-us
1041 "US version of EBCDIC"
1042 :short-name "EBCDIC-US"
1043 :code-space [0 255]
1044 :mime-charset 'ebcdic-us
1045 :map "EBCDICUS")
1047 (define-charset 'ebcdic-uk
1048 "UK version of EBCDIC"
1049 :short-name "EBCDIC-UK"
1050 :code-space [0 255]
1051 :mime-charset 'ebcdic-uk
1052 :map "EBCDICUK")
1054 (define-charset 'ibm1047
1055 ;; Says groff:
1056 "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix."
1057 :short-name "IBM1047"
1058 :code-space [0 255]
1059 :mime-charset 'ibm1047
1060 :map "IBM1047")
1061 (define-charset-alias 'cp1047 'ibm1047)
1063 (define-charset 'hp-roman8
1064 "Encoding used by Hewlet-Packard printer software"
1065 :short-name "HP-ROMAN8"
1066 :ascii-compatible-p t
1067 :code-space [0 255]
1068 :map "HP-ROMAN8")
1070 ;; To make a coding system with this, a pre-write-conversion should
1071 ;; account for the commented-out multi-valued code points in
1072 ;; stdenc.map.
1073 (define-charset 'adobe-standard-encoding
1074 "Adobe `standard encoding' used in PostScript"
1075 :short-name "ADOBE-STANDARD-ENCODING"
1076 :code-space [#x20 255]
1077 :map "stdenc")
1079 (define-charset 'symbol
1080 "Adobe symbol encoding used in PostScript"
1081 :short-name "ADOBE-SYMBOL"
1082 :code-space [#x20 255]
1083 :map "symbol")
1085 (define-charset 'ibm850
1086 "DOS codepage 850 (Latin-1)"
1087 :short-name "IBM850"
1088 :ascii-compatible-p t
1089 :code-space [0 255]
1090 :map "IBM850")
1091 (define-charset-alias 'cp850 'ibm850)
1093 (define-charset 'mik
1094 "Bulgarian DOS codepage"
1095 :short-name "MIK"
1096 :ascii-compatible-p t
1097 :code-space [0 255]
1098 :map "MIK")
1100 (define-charset 'ptcp154
1101 "`Paratype' codepage (Asian Cyrillic)"
1102 :short-name "PT154"
1103 :ascii-compatible-p t
1104 :code-space [0 255]
1105 :mime-charset 'pt154
1106 :map "PTCP154")
1107 (define-charset-alias 'pt154 'ptcp154)
1108 (define-charset-alias 'cp154 'ptcp154)
1110 (define-charset 'gb18030-2-byte
1111 "GB18030 2-byte (0x814E..0xFEFE)"
1112 :code-space [#x40 #xFE #x81 #xFE]
1113 :supplementary-p t
1114 :map "GB180302")
1116 (define-charset 'gb18030-4-byte-bmp
1117 "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
1118 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
1119 :supplementary-p t
1120 :map "GB180304")
1122 (define-charset 'gb18030-4-byte-smp
1123 "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
1124 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
1125 :min-code '(#x9030 . #x8130)
1126 :max-code '(#xE332 . #x9A35)
1127 :supplementary-p t
1128 :code-offset #x10000)
1130 (define-charset 'gb18030-4-byte-ext-1
1131 "GB18030 4-byte (0x8431A530-0x8F39FE39)"
1132 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
1133 :min-code '(#x8431 . #xA530)
1134 :max-code '(#x8F39 . #xFE39)
1135 :supplementary-p t
1136 :code-offset #x200000 ; ... #x22484B
1139 (define-charset 'gb18030-4-byte-ext-2
1140 "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
1141 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
1142 :min-code '(#xE332 . #x9A36)
1143 :max-code '(#xFE39 . #xFE39)
1144 :supplementary-p t
1145 :code-offset #x22484C ; ... #x279f93
1148 (define-charset 'gb18030
1149 "GB18030"
1150 :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
1151 :min-code 0
1152 :max-code '(#xFE39 . #xFE39)
1153 :superset '(ascii gb18030-2-byte
1154 gb18030-4-byte-bmp gb18030-4-byte-smp
1155 gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
1157 (define-charset 'chinese-cns11643-15
1158 "CNS11643 Plane 15 Chinese Traditional"
1159 :short-name "CNS11643-15"
1160 :long-name "CNS11643-15 (Chinese traditional)"
1161 :code-space [33 126 33 126]
1162 :code-offset #x27A000)
1164 (unify-charset 'chinese-gb2312)
1165 (unify-charset 'chinese-gbk)
1166 (unify-charset 'chinese-cns11643-1)
1167 (unify-charset 'chinese-cns11643-2)
1168 (unify-charset 'chinese-cns11643-3)
1169 (unify-charset 'chinese-cns11643-4)
1170 (unify-charset 'chinese-cns11643-5)
1171 (unify-charset 'chinese-cns11643-6)
1172 (unify-charset 'chinese-cns11643-7)
1173 (unify-charset 'big5)
1174 (unify-charset 'chinese-big5-1)
1175 (unify-charset 'chinese-big5-2)
1176 (unify-charset 'big5-hkscs)
1177 (unify-charset 'korean-ksc5601)
1178 (unify-charset 'vietnamese-viscii-lower)
1179 (unify-charset 'vietnamese-viscii-upper)
1180 (unify-charset 'chinese-sisheng)
1181 (unify-charset 'ipa)
1182 (unify-charset 'tibetan)
1183 (unify-charset 'ethiopic)
1184 (unify-charset 'japanese-jisx0208-1978)
1185 (unify-charset 'japanese-jisx0208)
1186 (unify-charset 'japanese-jisx0212)
1187 (unify-charset 'japanese-jisx0213-1)
1188 (unify-charset 'japanese-jisx0213-2)
1191 ;; These are tables for translating characters on decoding and
1192 ;; encoding.
1193 ;; Fixme: these aren't used now -- should they be?
1194 (setq standard-translation-table-for-decode nil)
1196 (setq standard-translation-table-for-encode nil)
1198 ;;; Make fundamental coding systems.
1200 ;; The coding system `no-conversion' and `undecided' are already
1201 ;; defined in coding.c as below:
1203 ;; (define-coding-system 'no-conversion
1204 ;; "..."
1205 ;; :coding-type 'raw-text
1206 ;; ...)
1207 ;; (define-coding-system 'undecided
1208 ;; "..."
1209 ;; :coding-type 'undecided
1210 ;; ...)
1212 (define-coding-system-alias 'binary 'no-conversion)
1213 (define-coding-system-alias 'unix 'undecided-unix)
1214 (define-coding-system-alias 'dos 'undecided-dos)
1215 (define-coding-system-alias 'mac 'undecided-mac)
1217 (define-coding-system 'raw-text
1218 "Raw text, which means text contains random 8-bit codes.
1219 Encoding text with this coding system produces the actual byte
1220 sequence of the text in buffers and strings. An exception is made for
1221 characters from the `eight-bit' character set. Each of them is encoded
1222 into a single byte.
1224 When you visit a file with this coding, the file is read into a
1225 unibyte buffer as is (except for EOL format), thus each byte of a file
1226 is treated as a character."
1227 :coding-type 'raw-text
1228 :for-unibyte t
1229 :mnemonic ?t)
1231 (define-coding-system 'no-conversion-multibyte
1232 "Like `no-conversion' but don't read a file into a unibyte buffer."
1233 :coding-type 'raw-text
1234 :eol-type 'unix
1235 :mnemonic ?=)
1237 (define-coding-system 'iso-latin-1
1238 "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1239 :coding-type 'charset
1240 :mnemonic ?1
1241 :charset-list '(iso-8859-1)
1242 :mime-charset 'iso-8859-1)
1244 (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1245 (define-coding-system-alias 'latin-1 'iso-latin-1)
1247 ;; Coding systems not specific to each language environment.
1249 (define-coding-system 'emacs-mule
1250 "Emacs 21 internal format used in buffer and string."
1251 :coding-type 'emacs-mule
1252 :charset-list 'emacs-mule
1253 :mnemonic ?M)
1255 (define-coding-system 'utf-8
1256 "UTF-8 (no signature (BOM))"
1257 :coding-type 'utf-8
1258 :mnemonic ?U
1259 :charset-list '(unicode)
1260 :mime-charset 'utf-8)
1262 (define-coding-system 'utf-8-with-signature
1263 "UTF-8 (with signature (BOM))"
1264 :coding-type 'utf-8
1265 :mnemonic ?U
1266 :charset-list '(unicode)
1267 :bom t)
1269 (define-coding-system 'utf-8-auto
1270 "UTF-8 (auto-detect signature (BOM))"
1271 :coding-type 'utf-8
1272 :mnemonic ?U
1273 :charset-list '(unicode)
1274 :bom '(utf-8-with-signature . utf-8))
1276 (define-coding-system-alias 'mule-utf-8 'utf-8)
1278 (define-coding-system 'utf-8-emacs
1279 "Support for all Emacs characters (including non-Unicode characters)."
1280 :coding-type 'utf-8
1281 :mnemonic ?U
1282 :charset-list '(emacs))
1284 ;; The encoding used internally. This encoding is meant to be able to save
1285 ;; any multibyte buffer without losing information. It can change between
1286 ;; Emacs releases, tho, so should only be used for internal files.
1287 (define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix)
1289 (define-coding-system 'utf-16le
1290 "UTF-16LE (little endian, no signature (BOM))."
1291 :coding-type 'utf-16
1292 :mnemonic ?U
1293 :charset-list '(unicode)
1294 :endian 'little
1295 :mime-text-unsuitable t
1296 :mime-charset 'utf-16le)
1298 (define-coding-system 'utf-16be
1299 "UTF-16BE (big endian, no signature (BOM))."
1300 :coding-type 'utf-16
1301 :mnemonic ?U
1302 :charset-list '(unicode)
1303 :endian 'big
1304 :mime-text-unsuitable t
1305 :mime-charset 'utf-16be)
1307 (define-coding-system 'utf-16le-with-signature
1308 "UTF-16 (little endian, with signature (BOM))."
1309 :coding-type 'utf-16
1310 :mnemonic ?U
1311 :charset-list '(unicode)
1312 :bom t
1313 :endian 'little
1314 :mime-text-unsuitable t
1315 :mime-charset 'utf-16)
1317 (define-coding-system 'utf-16be-with-signature
1318 "UTF-16 (big endian, with signature)."
1319 :coding-type 'utf-16
1320 :mnemonic ?U
1321 :charset-list '(unicode)
1322 :bom t
1323 :endian 'big
1324 :mime-text-unsuitable t
1325 :mime-charset 'utf-16)
1327 (define-coding-system 'utf-16
1328 "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)."
1329 :coding-type 'utf-16
1330 :mnemonic ?U
1331 :charset-list '(unicode)
1332 :bom '(utf-16le-with-signature . utf-16be-with-signature)
1333 :endian 'big
1334 :mime-text-unsuitable t
1335 :mime-charset 'utf-16)
1337 ;; Backwards compatibility (old names, also used by Mule-UCS). We
1338 ;; prefer the MIME names.
1339 (define-coding-system-alias 'utf-16-le 'utf-16le-with-signature)
1340 (define-coding-system-alias 'utf-16-be 'utf-16be-with-signature)
1343 (define-coding-system 'iso-2022-7bit
1344 "ISO 2022 based 7-bit encoding using only G0."
1345 :coding-type 'iso-2022
1346 :mnemonic ?J
1347 :charset-list 'iso-2022
1348 :designation [(ascii t) nil nil nil]
1349 :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1351 (define-coding-system 'iso-2022-7bit-ss2
1352 "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
1353 :coding-type 'iso-2022
1354 :mnemonic ?$
1355 :charset-list 'iso-2022
1356 :designation [(ascii 94) nil (nil 96) nil]
1357 :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1358 designation single-shift composition))
1360 (define-coding-system 'iso-2022-7bit-lock
1361 "ISO-2022 coding system using Locking-Shift for 96-charset."
1362 :coding-type 'iso-2022
1363 :mnemonic ?&
1364 :charset-list 'iso-2022
1365 :designation [(ascii 94) (nil 96) nil nil]
1366 :flags '(ascii-at-eol ascii-at-cntl 7-bit
1367 designation locking-shift composition))
1369 (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
1371 (define-coding-system 'iso-2022-7bit-lock-ss2
1372 "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
1373 :coding-type 'iso-2022
1374 :mnemonic ?i
1375 :charset-list '(ascii
1376 japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1377 korean-ksc5601
1378 chinese-gb2312
1379 chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1380 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1381 chinese-cns11643-7)
1382 :designation [(ascii 94)
1383 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1384 (nil chinese-cns11643-2)
1385 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1386 chinese-cns11643-6 chinese-cns11643-7)]
1387 :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1388 single-shift init-bol))
1390 (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
1392 (define-coding-system 'iso-2022-8bit-ss2
1393 "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
1394 :coding-type 'iso-2022
1395 :mnemonic ?@
1396 :charset-list 'iso-2022
1397 :designation [(ascii 94) nil (nil 96) nil]
1398 :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
1400 (define-coding-system 'compound-text
1401 "Compound text based generic encoding for decoding unknown messages.
1403 This coding system does not support extended segments of CTEXT."
1404 :coding-type 'iso-2022
1405 :mnemonic ?x
1406 :charset-list 'iso-2022
1407 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1408 :flags '(ascii-at-eol ascii-at-cntl long-form
1409 designation locking-shift single-shift composition)
1410 ;; Fixme: this isn't a valid MIME charset and has to be
1411 ;; special-cased elsewhere -- fx
1412 :mime-charset 'x-ctext)
1414 (define-coding-system-alias 'x-ctext 'compound-text)
1415 (define-coding-system-alias 'ctext 'compound-text)
1417 ;; Same as compound-text, but doesn't produce composition escape
1418 ;; sequences. Used in post-read and pre-write conversions of
1419 ;; compound-text-with-extensions, see mule.el. Note that this should
1420 ;; not have a mime-charset property, to prevent it from showing up
1421 ;; close to the beginning of coding systems ordered by priority.
1422 (define-coding-system 'ctext-no-compositions
1423 "Compound text based generic encoding for decoding unknown messages.
1425 Like `compound-text', but does not produce escape sequences for compositions."
1426 :coding-type 'iso-2022
1427 :mnemonic ?x
1428 :charset-list 'iso-2022
1429 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1430 :flags '(ascii-at-eol ascii-at-cntl
1431 designation locking-shift single-shift))
1433 (define-coding-system 'compound-text-with-extensions
1434 "Compound text encoding with ICCCM Extended Segment extensions.
1436 See the variable `ctext-non-standard-encodings-alist' for the
1437 detail about how extended segments are handled.
1439 This coding system should be used only for X selections. It is inappropriate
1440 for decoding and encoding files, process I/O, etc."
1441 :coding-type 'iso-2022
1442 :mnemonic ?x
1443 :charset-list 'iso-2022
1444 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1445 :flags '(ascii-at-eol ascii-at-cntl long-form
1446 designation locking-shift single-shift)
1447 :post-read-conversion 'ctext-post-read-conversion
1448 :pre-write-conversion 'ctext-pre-write-conversion)
1450 (define-coding-system-alias
1451 'x-ctext-with-extensions 'compound-text-with-extensions)
1452 (define-coding-system-alias
1453 'ctext-with-extensions 'compound-text-with-extensions)
1455 (define-coding-system 'us-ascii
1456 "Encode ASCII as-is and encode non-ASCII characters to `?'."
1457 :coding-type 'charset
1458 :mnemonic ?-
1459 :charset-list '(ascii)
1460 :default-char ??
1461 :mime-charset 'us-ascii)
1463 (define-coding-system-alias 'iso-safe 'us-ascii)
1465 (define-coding-system 'utf-7
1466 "UTF-7 encoding of Unicode (RFC 2152)."
1467 :coding-type 'utf-8
1468 :mnemonic ?U
1469 :mime-charset 'utf-7
1470 :charset-list '(unicode)
1471 :pre-write-conversion 'utf-7-pre-write-conversion
1472 :post-read-conversion 'utf-7-post-read-conversion)
1474 (define-coding-system 'utf-7-imap
1475 "UTF-7 encoding of Unicode, IMAP version (RFC 2060)"
1476 :coding-type 'utf-8
1477 :mnemonic ?u
1478 :charset-list '(unicode)
1479 :pre-write-conversion 'utf-7-imap-pre-write-conversion
1480 :post-read-conversion 'utf-7-imap-post-read-conversion)
1482 ;; Use us-ascii for terminal output if some other coding system is not
1483 ;; specified explicitly.
1484 (set-safe-terminal-coding-system-internal 'us-ascii)
1486 ;; The other coding-systems are defined in each language specific
1487 ;; files under lisp/language.
1489 ;; Normally, set coding system to `undecided' before reading a file.
1490 ;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1491 ;; but we regard them as containing multibyte characters.
1492 ;; Tar files are not decoded at all, but we treat them as raw bytes.
1494 (setq file-coding-system-alist
1495 '(("\\.elc\\'" . utf-8-emacs)
1496 ("\\.utf\\(-8\\)?\\'" . utf-8)
1497 ("\\.xml\\'" . xml-find-file-coding-system)
1498 ;; We use raw-text for reading loaddefs.el so that if it
1499 ;; happens to have DOS or Mac EOLs, they are converted to
1500 ;; newlines. This is required to make the special treatment
1501 ;; of the "\ newline" combination in loaddefs.el, which marks
1502 ;; the beginning of a doc string, work.
1503 ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
1504 ("\\.tar\\'" . (no-conversion . no-conversion))
1505 ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
1506 ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)
1507 ("" . (undecided . nil))))
1510 ;;; Setting coding categories and their priorities.
1512 ;; This setting is just to read an Emacs Lisp source files which
1513 ;; contain multilingual text while dumping Emacs. More appropriate
1514 ;; values are set by the command `set-language-environment' for each
1515 ;; language environment.
1517 (set-coding-system-priority
1518 'iso-latin-1
1519 'utf-8
1520 'iso-2022-7bit
1524 ;;; Miscellaneous settings.
1526 ;; Make all multibyte characters self-insert.
1527 (set-char-table-range (nth 1 global-map)
1528 (cons 128 (max-char))
1529 'self-insert-command)
1531 (aset latin-extra-code-table ?\221 t)
1532 (aset latin-extra-code-table ?\222 t)
1533 (aset latin-extra-code-table ?\223 t)
1534 (aset latin-extra-code-table ?\224 t)
1535 (aset latin-extra-code-table ?\225 t)
1536 (aset latin-extra-code-table ?\226 t)
1538 ;; The old code-pages library is obsoleted by coding systems based on
1539 ;; the charsets defined in this file but might be required by user
1540 ;; code.
1541 (provide 'code-pages)
1543 ;; Local variables:
1544 ;; no-byte-compile: t
1545 ;; End:
1547 ;; arch-tag: 7d5fed55-b6df-42f6-8d3d-0011190551f5
1548 ;;; mule-conf.el ends here