Update copyright year to 2014 by running admin/update-copyright.
[emacs.git] / lisp / international / mule-conf.el
blob7ae4426ccc56f4acd936feb1475a216b0f85b4dd
1 ;;; mule-conf.el --- configure multilingual environment
3 ;; Copyright (C) 1997-2014 Free Software Foundation, Inc.
4 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 ;; National Institute of Advanced Industrial Science and Technology (AIST)
6 ;; Registration Number H14PRO021
7 ;; Copyright (C) 2003
8 ;; National Institute of Advanced Industrial Science and Technology (AIST)
9 ;; Registration Number H13PRO009
11 ;; Keywords: i18n, mule, multilingual, character set, coding system
13 ;; This file is part of GNU Emacs.
15 ;; GNU Emacs is free software: you can redistribute it and/or modify
16 ;; it under the terms of the GNU General Public License as published by
17 ;; the Free Software Foundation, either version 3 of the License, or
18 ;; (at your option) any later version.
20 ;; GNU Emacs is distributed in the hope that it will be useful,
21 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ;; GNU General Public License for more details.
25 ;; You should have received a copy of the GNU General Public License
26 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
28 ;;; Commentary:
30 ;; This file defines the Emacs charsets and some basic coding systems.
31 ;; Other coding systems are defined in the files in directory
32 ;; lisp/language.
34 ;;; Code:
36 ;;; Remarks
38 ;; The ISO-IR registry is at http://www.itscj.ipsj.or.jp/ISO-IR/.
39 ;; Standards docs equivalent to iso-2022 and iso-8859 are at
40 ;; http://www.ecma.ch/.
42 ;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
43 ;; MS Windows, which are presumably the only charsets we really need
44 ;; to worry about on such systems:
45 ;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
46 ;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
47 ;; 1258, 874, 932, 936, 949, 950
49 ;;; Definitions of character sets.
51 ;; The charsets `ascii', `unicode' and `eight-bit' are already defined
52 ;; in charset.c as below:
54 ;; (define-charset 'ascii
55 ;; ""
56 ;; :dimension 1
57 ;; :code-space [0 127]
58 ;; :iso-final-char ?B
59 ;; :ascii-compatible-p t
60 ;; :emacs-mule-id 0
61 ;; :code-offset 0)
63 ;; (define-charset 'unicode
64 ;; ""
65 ;; :dimension 3
66 ;; :code-space [0 255 0 255 0 16]
67 ;; :ascii-compatible-p t
68 ;; :code-offset 0)
70 ;; (define-charset 'emacs
71 ;; ""
72 ;; :dimension 3
73 ;; :code-space [0 255 0 255 0 63]
74 ;; :ascii-compatible-p t
75 ;; :supplementary-p t
76 ;; :code-offset 0)
78 ;; (define-charset 'eight-bit
79 ;; ""
80 ;; :dimension 1
81 ;; :code-space [128 255]
82 ;; :code-offset #x3FFF80)
84 ;; We now set :docstring, :short-name, and :long-name properties.
86 (put-charset-property
87 'ascii :docstring "ASCII (ISO646 IRV)")
88 (put-charset-property
89 'ascii :short-name "ASCII")
90 (put-charset-property
91 'ascii :long-name "ASCII (ISO646 IRV)")
92 (put-charset-property
93 'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)")
94 (put-charset-property
95 'iso-8859-1 :short-name "Latin-1")
96 (put-charset-property
97 'iso-8859-1 :long-name "Latin-1")
98 (put-charset-property
99 'unicode :docstring "Unicode (ISO10646)")
100 (put-charset-property
101 'unicode :short-name "Unicode")
102 (put-charset-property
103 'unicode :long-name "Unicode (ISO10646)")
104 (put-charset-property
105 'emacs :docstring "Full Emacs charset (excluding eight bit chars)")
106 (put-charset-property
107 'emacs :short-name "Emacs")
108 (put-charset-property
109 'emacs :long-name "Emacs")
111 (put-charset-property 'eight-bit :docstring "Raw bytes 128-255")
112 (put-charset-property 'eight-bit :short-name "Raw bytes")
114 (define-charset-alias 'ucs 'unicode)
116 (define-charset 'latin-iso8859-1
117 "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
118 :short-name "RHP of Latin-1"
119 :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
120 :iso-final-char ?A
121 :emacs-mule-id 129
122 :code-space [32 127]
123 :code-offset 160)
125 ;; Name perhaps not ideal, but is XEmacs-compatible.
126 (define-charset 'control-1
127 "8-bit control code (0x80..0x9F)"
128 :short-name "8-bit control code"
129 :code-space [128 159]
130 :code-offset 128)
132 (define-charset 'eight-bit-control
133 "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
134 :short-name "Raw bytes 0x80..0x9F"
135 :supplementary-p t
136 :code-space [128 159]
137 :code-offset #x3FFF80) ; see character.h
139 (define-charset 'eight-bit-graphic
140 "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
141 :short-name "Raw bytes 0xA0..0xFF"
142 :supplementary-p t
143 :code-space [160 255]
144 :code-offset #x3FFFA0) ; see character.h
146 (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
147 iso-ir iso-final
148 emacs-mule-id map)
149 `(progn
150 (define-charset ,symbol
151 ,name
152 :short-name ,nickname
153 :long-name ,name
154 :ascii-compatible-p t
155 :code-space [0 255]
156 :map ,map)
157 (if ,iso-symbol
158 (define-charset ,iso-symbol
159 (if ,iso-ir
160 (format "Right-Hand Part of %s (%s): ISO-IR-%d"
161 ,name ,nickname ,iso-ir)
162 (format "Right-Hand Part of %s (%s)" ,name ,nickname))
163 :short-name (format "RHP of %s" ,name)
164 :long-name (format "RHP of %s (%s)" ,name ,nickname)
165 :iso-final-char ,iso-final
166 :emacs-mule-id ,emacs-mule-id
167 :code-space [32 127]
168 :subset (list ,symbol 160 255 -128)))))
170 (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
171 "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
173 (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
174 "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
176 (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
177 "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
179 (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
180 "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
182 (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
183 "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
185 (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
186 "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
188 (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
189 "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
191 (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
192 "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
194 (define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
195 "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
197 ;; http://www.nectec.or.th/it-standards/iso8859-11/
198 ;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
199 ;; plus nbsp
200 (define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
201 "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
203 ;; 8859-12 doesn't (yet?) exist.
205 (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
206 "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
208 (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
209 "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
211 (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
212 "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
214 (define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
215 "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
217 ;; No point in keeping it around.
218 (fmakunbound 'define-iso-single-byte-charset)
220 ;; Can this be shared with 8859-11?
221 ;; N.b. not all of these are defined in Unicode.
222 (define-charset 'thai-tis620
223 "TIS620.2533"
224 :short-name "TIS620.2533"
225 :iso-final-char ?T
226 :emacs-mule-id 133
227 :code-space [32 127]
228 :code-offset #x0E00)
230 ;; Fixme: doc for this, c.f. above
231 (define-charset 'tis620-2533
232 "TIS620.2533"
233 :short-name "TIS620.2533"
234 :ascii-compatible-p t
235 :code-space [0 255]
236 :superset '(ascii eight-bit-control (thai-tis620 . 128)))
238 (define-charset 'jisx0201
239 "JISX0201"
240 :short-name "JISX0201"
241 :code-space [0 #xDF]
242 :map "JISX0201")
244 (define-charset 'latin-jisx0201
245 "Roman Part of JISX0201.1976"
246 :short-name "JISX0201 Roman"
247 :long-name "Japanese Roman (JISX0201.1976)"
248 :iso-final-char ?J
249 :emacs-mule-id 138
250 :supplementary-p t
251 :code-space [33 126]
252 :subset '(jisx0201 33 126 0))
254 (define-charset 'katakana-jisx0201
255 "Katakana Part of JISX0201.1976"
256 :short-name "JISX0201 Katakana"
257 :long-name "Japanese Katakana (JISX0201.1976)"
258 :iso-final-char ?I
259 :emacs-mule-id 137
260 :supplementary-p t
261 :code-space [33 126]
262 :subset '(jisx0201 161 254 -128))
264 (define-charset 'chinese-gb2312
265 "GB2312 Chinese simplified: ISO-IR-58"
266 :short-name "GB2312"
267 :long-name "GB2312: ISO-IR-58"
268 :iso-final-char ?A
269 :emacs-mule-id 145
270 :code-space [33 126 33 126]
271 :code-offset #x110000
272 :unify-map "GB2312")
274 (define-charset 'chinese-gbk
275 "GBK Chinese simplified."
276 :short-name "GBK"
277 :code-space [#x40 #xFE #x81 #xFE]
278 :code-offset #x160000
279 :unify-map "GBK")
280 (define-charset-alias 'cp936 'chinese-gbk)
281 (define-charset-alias 'windows-936 'chinese-gbk)
283 (define-charset 'chinese-cns11643-1
284 "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
285 :short-name "CNS11643-1"
286 :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
287 :iso-final-char ?G
288 :emacs-mule-id 149
289 :code-space [33 126 33 126]
290 :code-offset #x114000
291 :unify-map "CNS-1")
293 (define-charset 'chinese-cns11643-2
294 "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
295 :short-name "CNS11643-2"
296 :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
297 :iso-final-char ?H
298 :emacs-mule-id 150
299 :code-space [33 126 33 126]
300 :code-offset #x118000
301 :unify-map "CNS-2")
303 (define-charset 'chinese-cns11643-3
304 "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
305 :short-name "CNS11643-3"
306 :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
307 :iso-final-char ?I
308 :code-space [33 126 33 126]
309 :emacs-mule-id 246
310 :code-offset #x11C000
311 :unify-map "CNS-3")
313 (define-charset 'chinese-cns11643-4
314 "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
315 :short-name "CNS11643-4"
316 :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
317 :iso-final-char ?J
318 :emacs-mule-id 247
319 :code-space [33 126 33 126]
320 :code-offset #x120000
321 :unify-map "CNS-4")
323 (define-charset 'chinese-cns11643-5
324 "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
325 :short-name "CNS11643-5"
326 :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
327 :iso-final-char ?K
328 :emacs-mule-id 248
329 :code-space [33 126 33 126]
330 :code-offset #x124000
331 :unify-map "CNS-5")
333 (define-charset 'chinese-cns11643-6
334 "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
335 :short-name "CNS11643-6"
336 :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
337 :iso-final-char ?L
338 :emacs-mule-id 249
339 :code-space [33 126 33 126]
340 :code-offset #x128000
341 :unify-map "CNS-6")
343 (define-charset 'chinese-cns11643-7
344 "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
345 :short-name "CNS11643-7"
346 :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
347 :iso-final-char ?M
348 :emacs-mule-id 250
349 :code-space [33 126 33 126]
350 :code-offset #x12C000
351 :unify-map "CNS-7")
353 (define-charset 'big5
354 "Big5 (Chinese traditional)"
355 :short-name "Big5"
356 :code-space [#x40 #xFE #xA1 #xFE]
357 :code-offset #x130000
358 :unify-map "BIG5")
359 ;; Fixme: AKA cp950 according to
360 ;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>. Is
361 ;; that correct?
363 (define-charset 'chinese-big5-1
364 "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
365 :short-name "Big5 (Level-1)"
366 :long-name "Big5 (Level-1) A141-C67F"
367 :iso-final-char ?0
368 :emacs-mule-id 152
369 :supplementary-p t
370 :code-space [#x21 #x7E #x21 #x7E]
371 :code-offset #x135000
372 :unify-map "BIG5-1")
374 (define-charset 'chinese-big5-2
375 "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
376 :short-name "Big5 (Level-2)"
377 :long-name "Big5 (Level-2) C940-FEFE"
378 :iso-final-char ?1
379 :emacs-mule-id 153
380 :supplementary-p t
381 :code-space [#x21 #x7E #x21 #x7E]
382 :code-offset #x137800
383 :unify-map "BIG5-2")
385 (define-charset 'japanese-jisx0208
386 "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
387 :short-name "JISX0208"
388 :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
389 :iso-final-char ?B
390 :emacs-mule-id 146
391 :code-space [33 126 33 126]
392 :code-offset #x140000
393 :unify-map "JISX0208")
395 (define-charset 'japanese-jisx0208-1978
396 "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
397 :short-name "JISX0208.1978"
398 :long-name "JISX0208.1978 (JISC6226.1978): ISO-IR-42"
399 :iso-final-char ?@
400 :emacs-mule-id 144
401 :code-space [33 126 33 126]
402 :code-offset #x144000
403 :unify-map "JISC6226")
405 (define-charset 'japanese-jisx0212
406 "JISX0212 Japanese supplement: ISO-IR-159"
407 :short-name "JISX0212"
408 :long-name "JISX0212 (Japanese): ISO-IR-159"
409 :iso-final-char ?D
410 :emacs-mule-id 148
411 :code-space [33 126 33 126]
412 :code-offset #x148000
413 :unify-map "JISX0212")
415 ;; Note that jisx0213 contains characters not in Unicode (3.2?). It's
416 ;; arguable whether it should have a unify-map.
417 (define-charset 'japanese-jisx0213-1
418 "JISX0213.2000 Plane 1 (Japanese)"
419 :short-name "JISX0213-1"
420 :iso-final-char ?O
421 :emacs-mule-id 151
422 :unify-map "JISX2131"
423 :code-space [33 126 33 126]
424 :code-offset #x14C000)
426 (define-charset 'japanese-jisx0213-2
427 "JISX0213.2000 Plane 2 (Japanese)"
428 :short-name "JISX0213-2"
429 :iso-final-char ?P
430 :emacs-mule-id 254
431 :unify-map "JISX2132"
432 :code-space [33 126 33 126]
433 :code-offset #x150000)
435 (define-charset 'japanese-jisx0213-a
436 "JISX0213.2004 adds these characters to JISX0213.2000."
437 :short-name "JISX0213A"
438 :dimension 2
439 :code-space [33 126 33 126]
440 :supplementary-p t
441 :map "JISX213A")
443 (define-charset 'japanese-jisx0213.2004-1
444 "JISX0213.2004 Plane1 (Japanese)"
445 :short-name "JISX0213.2004-1"
446 :dimension 2
447 :code-space [33 126 33 126]
448 :iso-final-char ?Q
449 :superset '(japanese-jisx0213-a japanese-jisx0213-1))
451 (define-charset 'katakana-sjis
452 "Katakana part of Shift-JIS"
453 :dimension 1
454 :code-space [#xA1 #xDF]
455 :subset '(jisx0201 #xA1 #xDF 0)
456 :supplementary-p t)
458 (define-charset 'cp932-2-byte
459 "2-byte part of CP932"
460 :dimension 2
461 :map "CP932-2BYTE"
462 :code-space [#x40 #xFC #x81 #xFC]
463 :supplementary-p t)
465 (define-charset 'cp932
466 "CP932 (Microsoft shift-jis)"
467 :code-space [#x00 #xFF #x00 #xFE]
468 :short-name "CP932"
469 :superset '(ascii katakana-sjis cp932-2-byte))
471 (define-charset 'korean-ksc5601
472 "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
473 :short-name "KSC5601"
474 :long-name "KSC5601 (Korean): ISO-IR-149"
475 :iso-final-char ?C
476 :emacs-mule-id 147
477 :code-space [33 126 33 126]
478 :code-offset #x279f94 ; ... #x27c217
479 :unify-map "KSC5601")
481 (define-charset 'big5-hkscs
482 "Big5-HKSCS (Chinese traditional, Hong Kong supplement)"
483 :short-name "Big5"
484 :code-space [#x40 #xFE #xA1 #xFE]
485 :code-offset #x27c218 ; ... #x280839
486 :unify-map "BIG5-HKSCS")
488 (define-charset 'cp949-2-byte
489 "2-byte part of CP949"
490 :dimension 2
491 :map "CP949-2BYTE"
492 :code-space [#x41 #xFE #x81 #xFD]
493 :supplementary-p t)
495 (define-charset 'cp949
496 "CP949 (Korean)"
497 :short-name "CP949"
498 :long-name "CP949 (Korean)"
499 :code-space [#x00 #xFE #x00 #xFD]
500 :superset '(ascii cp949-2-byte))
502 (define-charset 'chinese-sisheng
503 "SiSheng characters for PinYin/ZhuYin"
504 :short-name "SiSheng"
505 :long-name "SiSheng (PinYin/ZhuYin)"
506 :iso-final-char ?0
507 :emacs-mule-id 160
508 :code-space [33 126]
509 :unify-map "MULE-sisheng"
510 :supplementary-p t
511 :code-offset #x200000)
513 ;; A subset of the 1989 version of IPA. It consists of the consonant
514 ;; signs used in English, French, German and Italian, and all vowels
515 ;; signs in the table. [says old MULE doc]
516 (define-charset 'ipa
517 "IPA (International Phonetic Association)"
518 :short-name "IPA"
519 :iso-final-char ?0
520 :emacs-mule-id 161
521 :unify-map "MULE-ipa"
522 :code-space [32 127]
523 :supplementary-p t
524 :code-offset #x200080)
526 (define-charset 'viscii
527 "VISCII1.1"
528 :short-name "VISCII"
529 :long-name "VISCII 1.1"
530 :code-space [0 255]
531 :map "VISCII")
533 (define-charset 'vietnamese-viscii-lower
534 "VISCII1.1 lower-case"
535 :short-name "VISCII lower"
536 :long-name "VISCII lower-case"
537 :iso-final-char ?1
538 :emacs-mule-id 162
539 :code-space [32 127]
540 :code-offset #x200200
541 :supplementary-p t
542 :unify-map "MULE-lviscii")
544 (define-charset 'vietnamese-viscii-upper
545 "VISCII1.1 upper-case"
546 :short-name "VISCII upper"
547 :long-name "VISCII upper-case"
548 :iso-final-char ?2
549 :emacs-mule-id 163
550 :code-space [32 127]
551 :code-offset #x200280
552 :supplementary-p t
553 :unify-map "MULE-uviscii")
555 (define-charset 'vscii
556 "VSCII1.1 (TCVN-5712 VN1)"
557 :short-name "VSCII"
558 :code-space [0 255]
559 :map "VSCII")
561 (define-charset-alias 'tcvn-5712 'vscii)
563 ;; Fixme: see note in tcvn.map about combining characters
564 (define-charset 'vscii-2
565 "VSCII-2 (TCVN-5712 VN2)"
566 :code-space [0 255]
567 :map "VSCII-2")
569 (define-charset 'koi8-r
570 "KOI8-R"
571 :short-name "KOI8-R"
572 :ascii-compatible-p t
573 :code-space [0 255]
574 :map "KOI8-R")
576 (define-charset-alias 'koi8 'koi8-r)
578 (define-charset 'alternativnyj
579 "ALTERNATIVNYJ"
580 :short-name "alternativnyj"
581 :ascii-compatible-p t
582 :code-space [0 255]
583 :map "ALTERNATIVNYJ")
585 (define-charset 'cp866
586 "CP866"
587 :short-name "cp866"
588 :ascii-compatible-p t
589 :code-space [0 255]
590 :map "IBM866")
591 (define-charset-alias 'ibm866 'cp866)
593 (define-charset 'koi8-u
594 "KOI8-U"
595 :short-name "KOI8-U"
596 :ascii-compatible-p t
597 :code-space [0 255]
598 :map "KOI8-U")
600 (define-charset 'koi8-t
601 "KOI8-T"
602 :short-name "KOI8-T"
603 :ascii-compatible-p t
604 :code-space [0 255]
605 :map "KOI8-T")
607 (define-charset 'georgian-ps
608 "GEORGIAN-PS"
609 :short-name "GEORGIAN-PS"
610 :ascii-compatible-p t
611 :code-space [0 255]
612 :map "KA-PS")
614 (define-charset 'georgian-academy
615 "GEORGIAN-ACADEMY"
616 :short-name "GEORGIAN-ACADEMY"
617 :ascii-compatible-p t
618 :code-space [0 255]
619 :map "KA-ACADEMY")
621 (define-charset 'windows-1250
622 "WINDOWS-1250 (Central Europe)"
623 :short-name "WINDOWS-1250"
624 :ascii-compatible-p t
625 :code-space [0 255]
626 :map "CP1250")
627 (define-charset-alias 'cp1250 'windows-1250)
629 (define-charset 'windows-1251
630 "WINDOWS-1251 (Cyrillic)"
631 :short-name "WINDOWS-1251"
632 :ascii-compatible-p t
633 :code-space [0 255]
634 :map "CP1251")
635 (define-charset-alias 'cp1251 'windows-1251)
637 (define-charset 'windows-1252
638 "WINDOWS-1252 (Latin I)"
639 :short-name "WINDOWS-1252"
640 :ascii-compatible-p t
641 :code-space [0 255]
642 :map "CP1252")
643 (define-charset-alias 'cp1252 'windows-1252)
645 (define-charset 'windows-1253
646 "WINDOWS-1253 (Greek)"
647 :short-name "WINDOWS-1253"
648 :ascii-compatible-p t
649 :code-space [0 255]
650 :map "CP1253")
651 (define-charset-alias 'cp1253 'windows-1253)
653 (define-charset 'windows-1254
654 "WINDOWS-1254 (Turkish)"
655 :short-name "WINDOWS-1254"
656 :ascii-compatible-p t
657 :code-space [0 255]
658 :map "CP1254")
659 (define-charset-alias 'cp1254 'windows-1254)
661 (define-charset 'windows-1255
662 "WINDOWS-1255 (Hebrew)"
663 :short-name "WINDOWS-1255"
664 :ascii-compatible-p t
665 :code-space [0 255]
666 :map "CP1255")
667 (define-charset-alias 'cp1255 'windows-1255)
669 (define-charset 'windows-1256
670 "WINDOWS-1256 (Arabic)"
671 :short-name "WINDOWS-1256"
672 :ascii-compatible-p t
673 :code-space [0 255]
674 :map "CP1256")
675 (define-charset-alias 'cp1256 'windows-1256)
677 (define-charset 'windows-1257
678 "WINDOWS-1257 (Baltic)"
679 :short-name "WINDOWS-1257"
680 :ascii-compatible-p t
681 :code-space [0 255]
682 :map "CP1257")
683 (define-charset-alias 'cp1257 'windows-1257)
685 (define-charset 'windows-1258
686 "WINDOWS-1258 (Viet Nam)"
687 :short-name "WINDOWS-1258"
688 :ascii-compatible-p t
689 :code-space [0 255]
690 :map "CP1258")
691 (define-charset-alias 'cp1258 'windows-1258)
693 (define-charset 'next
694 "NEXT"
695 :short-name "NEXT"
696 :ascii-compatible-p t
697 :code-space [0 255]
698 :map "NEXTSTEP")
700 (define-charset 'cp1125
701 "CP1125"
702 :short-name "CP1125"
703 :code-space [0 255]
704 :ascii-compatible-p t
705 :map "CP1125")
706 (define-charset-alias 'ruscii 'cp1125)
707 ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
708 (define-charset-alias 'cp866u 'cp1125)
710 ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
711 ;; shows this as not ASCII compatible, with various graphics in
712 ;; 0x01-0x1F.
713 (define-charset 'cp437
714 "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
715 :short-name "CP437"
716 :code-space [0 255]
717 :ascii-compatible-p t
718 :map "IBM437")
720 (define-charset 'cp720
721 "CP720 (Arabic)"
722 :short-name "CP720"
723 :code-space [0 255]
724 :ascii-compatible-p t
725 :map "CP720")
727 (define-charset 'cp737
728 "CP737 (PC Greek)"
729 :short-name "CP737"
730 :code-space [0 255]
731 :ascii-compatible-p t
732 :map "CP737")
734 (define-charset 'cp775
735 "CP775 (PC Baltic)"
736 :short-name "CP775"
737 :code-space [0 255]
738 :ascii-compatible-p t
739 :map "CP775")
741 (define-charset 'cp851
742 "CP851 (Greek)"
743 :short-name "CP851"
744 :code-space [0 255]
745 :ascii-compatible-p t
746 :map "IBM851")
748 (define-charset 'cp852
749 "CP852 (MS-DOS Latin-2)"
750 :short-name "CP852"
751 :code-space [0 255]
752 :ascii-compatible-p t
753 :map "IBM852")
755 (define-charset 'cp855
756 "CP855 (IBM Cyrillic)"
757 :short-name "CP855"
758 :code-space [0 255]
759 :ascii-compatible-p t
760 :map "IBM855")
762 (define-charset 'cp857
763 "CP857 (IBM Turkish)"
764 :short-name "CP857"
765 :code-space [0 255]
766 :ascii-compatible-p t
767 :map "IBM857")
769 (define-charset 'cp858
770 "CP858 (Multilingual Latin I + Euro)"
771 :short-name "CP858"
772 :code-space [0 255]
773 :ascii-compatible-p t
774 :map "CP858")
775 (define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858
777 (define-charset 'cp860
778 "CP860 (MS-DOS Portuguese)"
779 :short-name "CP860"
780 :code-space [0 255]
781 :ascii-compatible-p t
782 :map "IBM860")
784 (define-charset 'cp861
785 "CP861 (MS-DOS Icelandic)"
786 :short-name "CP861"
787 :code-space [0 255]
788 :ascii-compatible-p t
789 :map "IBM861")
791 (define-charset 'cp862
792 "CP862 (PC Hebrew)"
793 :short-name "CP862"
794 :code-space [0 255]
795 :ascii-compatible-p t
796 :map "IBM862")
798 (define-charset 'cp863
799 "CP863 (MS-DOS Canadian French)"
800 :short-name "CP863"
801 :code-space [0 255]
802 :ascii-compatible-p t
803 :map "IBM863")
805 (define-charset 'cp864
806 "CP864 (PC Arabic)"
807 :short-name "CP864"
808 :code-space [0 255]
809 :ascii-compatible-p t
810 :map "IBM864")
812 (define-charset 'cp865
813 "CP865 (MS-DOS Nordic)"
814 :short-name "CP865"
815 :code-space [0 255]
816 :ascii-compatible-p t
817 :map "IBM865")
819 (define-charset 'cp869
820 "CP869 (IBM Modern Greek)"
821 :short-name "CP869"
822 :code-space [0 255]
823 :ascii-compatible-p t
824 :map "IBM869")
826 (define-charset 'cp874
827 "CP874 (IBM Thai)"
828 :short-name "CP874"
829 :code-space [0 255]
830 :ascii-compatible-p t
831 :map "IBM874")
833 ;; For Arabic, we need three different types of character sets.
834 ;; Digits are of direction left-to-right and of width 1-column.
835 ;; Others are of direction right-to-left and of width 1-column or
836 ;; 2-column.
837 (define-charset 'arabic-digit
838 "Arabic digit"
839 :short-name "Arabic digit"
840 :iso-final-char ?2
841 :emacs-mule-id 164
842 :supplementary-p t
843 :code-space [34 42]
844 :code-offset #x0600)
846 (define-charset 'arabic-1-column
847 "Arabic 1-column"
848 :short-name "Arabic 1-col"
849 :long-name "Arabic 1-column"
850 :iso-final-char ?3
851 :emacs-mule-id 165
852 :supplementary-p t
853 :code-space [33 126]
854 :code-offset #x200100)
856 (define-charset 'arabic-2-column
857 "Arabic 2-column"
858 :short-name "Arabic 2-col"
859 :long-name "Arabic 2-column"
860 :iso-final-char ?4
861 :emacs-mule-id 224
862 :supplementary-p t
863 :code-space [33 126]
864 :code-offset #x200180)
866 ;; Lao script.
867 ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
868 ;; Not all of them are defined in Unicode.
869 (define-charset 'lao
870 "Lao characters (ISO10646 0E81..0EDF)"
871 :short-name "Lao"
872 :iso-final-char ?1
873 :emacs-mule-id 167
874 :supplementary-p t
875 :code-space [33 126]
876 :code-offset #x0E81)
878 (define-charset 'mule-lao
879 "Lao characters (ISO10646 0E81..0EDF)"
880 :short-name "Lao"
881 :code-space [0 255]
882 :supplementary-p t
883 :superset '(ascii eight-bit-control (lao . 128)))
886 ;; Indian scripts. Symbolic charset for data exchange. Glyphs are
887 ;; not assigned. They are automatically converted to each Indian
888 ;; script which IS-13194 supports.
890 (define-charset 'indian-is13194
891 "7-bit representation of IS 13194 (ISCII) for Devanagari"
892 :short-name "IS 13194 (DEV)"
893 :long-name "Indian IS 13194 (DEV)"
894 :iso-final-char ?5
895 :emacs-mule-id 225
896 :supplementary-p t
897 :code-space [33 126]
898 :code-offset #x180000
899 :unify-map "MULE-is13194")
901 (let ((code-offset #x180100))
902 (dolist (script '(devanagari sanskrit bengali tamil telugu assamese
903 oriya kannada malayalam gujarati punjabi))
904 (define-charset (intern (format "%s-cdac" script))
905 (format "Glyphs of %s script for CDAC font. Subset of `indian-glyph'."
906 (capitalize (symbol-name script)))
907 :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
908 :supplementary-p t
909 :code-space [0 255]
910 :code-offset code-offset)
911 (setq code-offset (+ code-offset #x100)))
913 (dolist (script '(devanagari bengali punjabi gujarati
914 oriya tamil telugu kannada malayalam))
915 (define-charset (intern (format "%s-akruti" script))
916 (format "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'."
917 (capitalize (symbol-name script)))
918 :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
919 :supplementary-p t
920 :code-space [0 255]
921 :code-offset code-offset)
922 (setq code-offset (+ code-offset #x100))))
924 (define-charset 'indian-glyph
925 "Glyphs for Indian characters."
926 :short-name "Indian glyph"
927 :iso-final-char ?4
928 :emacs-mule-id 240
929 :supplementary-p t
930 :code-space [32 127 32 127]
931 :code-offset #x180100)
933 ;; Actual Glyph for 1-column width.
934 (define-charset 'indian-1-column
935 "Indian charset for 1-column width glyphs."
936 :short-name "Indian 1-col"
937 :long-name "Indian 1 Column"
938 :iso-final-char ?6
939 :emacs-mule-id 251
940 :supplementary-p t
941 :code-space [33 126 33 126]
942 :code-offset #x184000)
944 ;; Actual Glyph for 2-column width.
945 (define-charset 'indian-2-column
946 "Indian charset for 2-column width glyphs."
947 :short-name "Indian 2-col"
948 :long-name "Indian 2 Column"
949 :iso-final-char ?5
950 :emacs-mule-id 251
951 :supplementary-p t
952 :code-space [33 126 33 126]
953 :code-offset #x184000)
955 (define-charset 'tibetan
956 "Tibetan characters"
957 :iso-final-char ?7
958 :short-name "Tibetan 2-col"
959 :long-name "Tibetan 2 column"
960 :iso-final-char ?7
961 :emacs-mule-id 252
962 :unify-map "MULE-tibetan"
963 :supplementary-p t
964 :code-space [33 126 33 37]
965 :code-offset #x190000)
967 (define-charset 'tibetan-1-column
968 "Tibetan 1 column glyph"
969 :short-name "Tibetan 1-col"
970 :long-name "Tibetan 1 column"
971 :iso-final-char ?8
972 :emacs-mule-id 241
973 :supplementary-p t
974 :code-space [33 126 33 37]
975 :code-offset #x190000)
977 ;; Subsets of Unicode.
978 (define-charset 'mule-unicode-2500-33ff
979 "Unicode characters of the range U+2500..U+33FF."
980 :short-name "Unicode subset 2"
981 :long-name "Unicode subset (U+2500..U+33FF)"
982 :iso-final-char ?2
983 :emacs-mule-id 242
984 :supplementary-p t
985 :code-space [#x20 #x7f #x20 #x47]
986 :code-offset #x2500)
988 (define-charset 'mule-unicode-e000-ffff
989 "Unicode characters of the range U+E000..U+FFFF."
990 :short-name "Unicode subset 3"
991 :long-name "Unicode subset (U+E000+FFFF)"
992 :iso-final-char ?3
993 :emacs-mule-id 243
994 :supplementary-p t
995 :code-space [#x20 #x7F #x20 #x75]
996 :code-offset #xE000
997 :max-code 30015) ; U+FFFF
999 (define-charset 'mule-unicode-0100-24ff
1000 "Unicode characters of the range U+0100..U+24FF."
1001 :short-name "Unicode subset"
1002 :long-name "Unicode subset (U+0100..U+24FF)"
1003 :iso-final-char ?1
1004 :emacs-mule-id 244
1005 :supplementary-p t
1006 :code-space [#x20 #x7F #x20 #x7F]
1007 :code-offset #x100)
1009 (define-charset 'unicode-bmp
1010 "Unicode Basic Multilingual Plane (U+0000..U+FFFF)"
1011 :short-name "Unicode BMP"
1012 :code-space [0 255 0 255]
1013 :code-offset 0)
1015 (define-charset 'unicode-smp
1016 "Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)"
1017 :short-name "Unicode SMP "
1018 :code-space [0 255 0 255]
1019 :code-offset #x10000)
1021 (define-charset 'unicode-sip
1022 "Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)"
1023 :short-name "Unicode SIP"
1024 :code-space [0 255 0 255]
1025 :code-offset #x20000)
1027 (define-charset 'unicode-ssp
1028 "Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)"
1029 :short-name "Unicode SSP"
1030 :code-space [0 255 0 255]
1031 :code-offset #xE0000)
1033 (define-charset 'ethiopic
1034 "Ethiopic characters for Amharic and Tigrigna."
1035 :short-name "Ethiopic"
1036 :long-name "Ethiopic characters"
1037 :iso-final-char ?3
1038 :emacs-mule-id 245
1039 :supplementary-p t
1040 :unify-map "MULE-ethiopic"
1041 :code-space [33 126 33 126]
1042 :code-offset #x1A0000)
1044 (define-charset 'mac-roman
1045 "Mac Roman charset"
1046 :short-name "Mac Roman"
1047 :ascii-compatible-p t
1048 :code-space [0 255]
1049 :map "MACINTOSH")
1051 ;; Fixme: modern EBCDIC variants, e.g. IBM00924?
1052 (define-charset 'ebcdic-us
1053 "US version of EBCDIC"
1054 :short-name "EBCDIC-US"
1055 :code-space [0 255]
1056 :mime-charset 'ebcdic-us
1057 :map "EBCDICUS")
1059 (define-charset 'ebcdic-uk
1060 "UK version of EBCDIC"
1061 :short-name "EBCDIC-UK"
1062 :code-space [0 255]
1063 :mime-charset 'ebcdic-uk
1064 :map "EBCDICUK")
1066 (define-charset 'ibm1047
1067 ;; Says groff:
1068 "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix."
1069 :short-name "IBM1047"
1070 :code-space [0 255]
1071 :mime-charset 'ibm1047
1072 :map "IBM1047")
1073 (define-charset-alias 'cp1047 'ibm1047)
1075 (define-charset 'hp-roman8
1076 "Encoding used by Hewlet-Packard printer software"
1077 :short-name "HP-ROMAN8"
1078 :ascii-compatible-p t
1079 :code-space [0 255]
1080 :map "HP-ROMAN8")
1082 ;; To make a coding system with this, a pre-write-conversion should
1083 ;; account for the commented-out multi-valued code points in
1084 ;; stdenc.map.
1085 (define-charset 'adobe-standard-encoding
1086 "Adobe `standard encoding' used in PostScript"
1087 :short-name "ADOBE-STANDARD-ENCODING"
1088 :code-space [#x20 255]
1089 :map "stdenc")
1091 (define-charset 'symbol
1092 "Adobe symbol encoding used in PostScript"
1093 :short-name "ADOBE-SYMBOL"
1094 :code-space [#x20 255]
1095 :map "symbol")
1097 (define-charset 'ibm850
1098 "DOS codepage 850 (Latin-1)"
1099 :short-name "IBM850"
1100 :ascii-compatible-p t
1101 :code-space [0 255]
1102 :map "IBM850")
1103 (define-charset-alias 'cp850 'ibm850)
1105 (define-charset 'mik
1106 "Bulgarian DOS codepage"
1107 :short-name "MIK"
1108 :ascii-compatible-p t
1109 :code-space [0 255]
1110 :map "MIK")
1112 (define-charset 'ptcp154
1113 "`Paratype' codepage (Asian Cyrillic)"
1114 :short-name "PT154"
1115 :ascii-compatible-p t
1116 :code-space [0 255]
1117 :mime-charset 'pt154
1118 :map "PTCP154")
1119 (define-charset-alias 'pt154 'ptcp154)
1120 (define-charset-alias 'cp154 'ptcp154)
1122 (define-charset 'gb18030-2-byte
1123 "GB18030 2-byte (0x814E..0xFEFE)"
1124 :code-space [#x40 #xFE #x81 #xFE]
1125 :supplementary-p t
1126 :map "GB180302")
1128 (define-charset 'gb18030-4-byte-bmp
1129 "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
1130 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
1131 :supplementary-p t
1132 :map "GB180304")
1134 (define-charset 'gb18030-4-byte-smp
1135 "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
1136 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
1137 :min-code '(#x9030 . #x8130)
1138 :max-code '(#xE332 . #x9A35)
1139 :supplementary-p t
1140 :code-offset #x10000)
1142 (define-charset 'gb18030-4-byte-ext-1
1143 "GB18030 4-byte (0x8431A530-0x8F39FE39)"
1144 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
1145 :min-code '(#x8431 . #xA530)
1146 :max-code '(#x8F39 . #xFE39)
1147 :supplementary-p t
1148 :code-offset #x200000 ; ... #x22484B
1151 (define-charset 'gb18030-4-byte-ext-2
1152 "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
1153 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
1154 :min-code '(#xE332 . #x9A36)
1155 :max-code '(#xFE39 . #xFE39)
1156 :supplementary-p t
1157 :code-offset #x22484C ; ... #x279f93
1160 (define-charset 'gb18030
1161 "GB18030"
1162 :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
1163 :min-code 0
1164 :max-code '(#xFE39 . #xFE39)
1165 :superset '(ascii gb18030-2-byte
1166 gb18030-4-byte-bmp gb18030-4-byte-smp
1167 gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
1169 (define-charset 'chinese-cns11643-15
1170 "CNS11643 Plane 15 Chinese Traditional"
1171 :short-name "CNS11643-15"
1172 :long-name "CNS11643-15 (Chinese traditional)"
1173 :code-space [33 126 33 126]
1174 :code-offset #x27A000)
1176 (unify-charset 'chinese-gb2312)
1177 (unify-charset 'chinese-gbk)
1178 (unify-charset 'chinese-cns11643-1)
1179 (unify-charset 'chinese-cns11643-2)
1180 (unify-charset 'chinese-cns11643-3)
1181 (unify-charset 'chinese-cns11643-4)
1182 (unify-charset 'chinese-cns11643-5)
1183 (unify-charset 'chinese-cns11643-6)
1184 (unify-charset 'chinese-cns11643-7)
1185 (unify-charset 'big5)
1186 (unify-charset 'chinese-big5-1)
1187 (unify-charset 'chinese-big5-2)
1188 (unify-charset 'big5-hkscs)
1189 (unify-charset 'korean-ksc5601)
1190 (unify-charset 'vietnamese-viscii-lower)
1191 (unify-charset 'vietnamese-viscii-upper)
1192 (unify-charset 'chinese-sisheng)
1193 (unify-charset 'ipa)
1194 (unify-charset 'tibetan)
1195 (unify-charset 'ethiopic)
1196 (unify-charset 'indian-is13194)
1197 (unify-charset 'japanese-jisx0208-1978)
1198 (unify-charset 'japanese-jisx0208)
1199 (unify-charset 'japanese-jisx0212)
1200 (unify-charset 'japanese-jisx0213-1)
1201 (unify-charset 'japanese-jisx0213-2)
1204 ;; These are tables for translating characters on decoding and
1205 ;; encoding.
1206 ;; Fixme: these aren't used now -- should they be?
1207 (setq standard-translation-table-for-decode nil)
1209 (setq standard-translation-table-for-encode nil)
1211 ;;; Make fundamental coding systems.
1213 ;; The coding system `no-conversion' and `undecided' are already
1214 ;; defined in coding.c as below:
1216 ;; (define-coding-system 'no-conversion
1217 ;; "..."
1218 ;; :coding-type 'raw-text
1219 ;; ...)
1220 ;; (define-coding-system 'undecided
1221 ;; "..."
1222 ;; :coding-type 'undecided
1223 ;; ...)
1225 (define-coding-system-alias 'binary 'no-conversion)
1226 (define-coding-system-alias 'unix 'undecided-unix)
1227 (define-coding-system-alias 'dos 'undecided-dos)
1228 (define-coding-system-alias 'mac 'undecided-mac)
1230 (define-coding-system 'prefer-utf-8
1231 "Like `undecided' but prefer UTF-8 when appropriate.
1232 On decoding, if the source contains 8-bit codes and they all
1233 are valid UTF-8 sequences, detect the source as UTF-8 encoding
1234 regardless of the coding priority.
1235 On encoding, if the source contains non-ASCII characters, encode them
1236 by UTF-8."
1237 :coding-type 'undecided
1238 :mnemonic ?-
1239 :charset-list '(emacs)
1240 :prefer-utf-8 t)
1242 (define-coding-system 'raw-text
1243 "Raw text, which means text contains random 8-bit codes.
1244 Encoding text with this coding system produces the actual byte
1245 sequence of the text in buffers and strings. An exception is made for
1246 characters from the `eight-bit' character set. Each of them is encoded
1247 into a single byte.
1249 When you visit a file with this coding, the file is read into a
1250 unibyte buffer as is (except for EOL format), thus each byte of a file
1251 is treated as a character."
1252 :coding-type 'raw-text
1253 :for-unibyte t
1254 :mnemonic ?t)
1256 (define-coding-system 'no-conversion-multibyte
1257 "Like `no-conversion' but don't read a file into a unibyte buffer."
1258 :coding-type 'raw-text
1259 :eol-type 'unix
1260 :mnemonic ?=)
1262 (define-coding-system 'iso-latin-1
1263 "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1264 :coding-type 'charset
1265 :mnemonic ?1
1266 :charset-list '(iso-8859-1)
1267 :mime-charset 'iso-8859-1)
1269 (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1270 (define-coding-system-alias 'latin-1 'iso-latin-1)
1272 ;; Coding systems not specific to each language environment.
1274 (define-coding-system 'emacs-mule
1275 "Emacs 21 internal format used in buffer and string."
1276 :coding-type 'emacs-mule
1277 :charset-list 'emacs-mule
1278 :mnemonic ?M)
1280 (define-coding-system 'utf-8
1281 "UTF-8 (no signature (BOM))"
1282 :coding-type 'utf-8
1283 :mnemonic ?U
1284 :charset-list '(unicode)
1285 :mime-charset 'utf-8)
1287 (define-coding-system 'utf-8-with-signature
1288 "UTF-8 (with signature (BOM))"
1289 :coding-type 'utf-8
1290 :mnemonic ?U
1291 :charset-list '(unicode)
1292 :bom t)
1294 (define-coding-system 'utf-8-auto
1295 "UTF-8 (auto-detect signature (BOM))"
1296 :coding-type 'utf-8
1297 :mnemonic ?U
1298 :charset-list '(unicode)
1299 :bom '(utf-8-with-signature . utf-8))
1301 (define-coding-system-alias 'mule-utf-8 'utf-8)
1303 (define-coding-system 'utf-8-emacs
1304 "Support for all Emacs characters (including non-Unicode characters)."
1305 :coding-type 'utf-8
1306 :mnemonic ?U
1307 :charset-list '(emacs))
1309 ;; The encoding used internally. This encoding is meant to be able to save
1310 ;; any multibyte buffer without losing information. It can change between
1311 ;; Emacs releases, tho, so should only be used for internal files.
1312 (define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix)
1314 (define-coding-system 'utf-16le
1315 "UTF-16LE (little endian, no signature (BOM))."
1316 :coding-type 'utf-16
1317 :mnemonic ?U
1318 :charset-list '(unicode)
1319 :endian 'little
1320 :mime-text-unsuitable t
1321 :mime-charset 'utf-16le)
1323 (define-coding-system 'utf-16be
1324 "UTF-16BE (big endian, no signature (BOM))."
1325 :coding-type 'utf-16
1326 :mnemonic ?U
1327 :charset-list '(unicode)
1328 :endian 'big
1329 :mime-text-unsuitable t
1330 :mime-charset 'utf-16be)
1332 (define-coding-system 'utf-16le-with-signature
1333 "UTF-16 (little endian, with signature (BOM))."
1334 :coding-type 'utf-16
1335 :mnemonic ?U
1336 :charset-list '(unicode)
1337 :bom t
1338 :endian 'little
1339 :mime-text-unsuitable t
1340 :mime-charset 'utf-16)
1342 (define-coding-system 'utf-16be-with-signature
1343 "UTF-16 (big endian, with signature (BOM))."
1344 :coding-type 'utf-16
1345 :mnemonic ?U
1346 :charset-list '(unicode)
1347 :bom t
1348 :endian 'big
1349 :mime-text-unsuitable t
1350 :mime-charset 'utf-16)
1352 (define-coding-system 'utf-16
1353 "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)."
1354 :coding-type 'utf-16
1355 :mnemonic ?U
1356 :charset-list '(unicode)
1357 :bom '(utf-16le-with-signature . utf-16be-with-signature)
1358 :endian 'big
1359 :mime-text-unsuitable t
1360 :mime-charset 'utf-16)
1362 ;; Backwards compatibility (old names, also used by Mule-UCS). We
1363 ;; prefer the MIME names.
1364 (define-coding-system-alias 'utf-16-le 'utf-16le-with-signature)
1365 (define-coding-system-alias 'utf-16-be 'utf-16be-with-signature)
1368 (define-coding-system 'iso-2022-7bit
1369 "ISO 2022 based 7-bit encoding using only G0."
1370 :coding-type 'iso-2022
1371 :mnemonic ?J
1372 :charset-list 'iso-2022
1373 :designation [(ascii t) nil nil nil]
1374 :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1376 (define-coding-system 'iso-2022-7bit-ss2
1377 "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
1378 :coding-type 'iso-2022
1379 :mnemonic ?$
1380 :charset-list 'iso-2022
1381 :designation [(ascii 94) nil (nil 96) nil]
1382 :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1383 designation single-shift composition))
1385 (define-coding-system 'iso-2022-7bit-lock
1386 "ISO-2022 coding system using Locking-Shift for 96-charset."
1387 :coding-type 'iso-2022
1388 :mnemonic ?&
1389 :charset-list 'iso-2022
1390 :designation [(ascii 94) (nil 96) nil nil]
1391 :flags '(ascii-at-eol ascii-at-cntl 7-bit
1392 designation locking-shift composition))
1394 (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
1396 (define-coding-system 'iso-2022-7bit-lock-ss2
1397 "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
1398 :coding-type 'iso-2022
1399 :mnemonic ?i
1400 :charset-list '(ascii
1401 japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1402 korean-ksc5601
1403 chinese-gb2312
1404 chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1405 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1406 chinese-cns11643-7)
1407 :designation [(ascii 94)
1408 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1409 (nil chinese-cns11643-2)
1410 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1411 chinese-cns11643-6 chinese-cns11643-7)]
1412 :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1413 single-shift init-bol))
1415 (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
1417 (define-coding-system 'iso-2022-8bit-ss2
1418 "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
1419 :coding-type 'iso-2022
1420 :mnemonic ?@
1421 :charset-list 'iso-2022
1422 :designation [(ascii 94) nil (nil 96) nil]
1423 :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
1425 (define-coding-system 'compound-text
1426 "Compound text based generic encoding.
1427 This coding system is an extension of X's \"Compound Text Encoding\".
1428 It encodes many characters using the normal ISO-2022 designation sequences,
1429 but it doesn't support extended segments of CTEXT."
1430 :coding-type 'iso-2022
1431 :mnemonic ?x
1432 :charset-list 'iso-2022
1433 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1434 :flags '(ascii-at-eol ascii-at-cntl long-form
1435 designation locking-shift single-shift composition)
1436 ;; Fixme: this isn't a valid MIME charset and has to be
1437 ;; special-cased elsewhere -- fx
1438 :mime-charset 'x-ctext)
1440 (define-coding-system-alias 'x-ctext 'compound-text)
1441 (define-coding-system-alias 'ctext 'compound-text)
1443 ;; Same as compound-text, but doesn't produce composition escape
1444 ;; sequences. Used in post-read and pre-write conversions of
1445 ;; compound-text-with-extensions, see mule.el. Note that this should
1446 ;; not have a mime-charset property, to prevent it from showing up
1447 ;; close to the beginning of coding systems ordered by priority.
1448 (define-coding-system 'ctext-no-compositions
1449 "Compound text based generic encoding.
1451 Like `compound-text', but does not produce escape sequences for compositions."
1452 :coding-type 'iso-2022
1453 :mnemonic ?x
1454 :charset-list 'iso-2022
1455 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1456 :flags '(ascii-at-eol ascii-at-cntl
1457 designation locking-shift single-shift))
1459 (define-coding-system 'compound-text-with-extensions
1460 "Compound text encoding with ICCCM Extended Segment extensions.
1462 See the variables `ctext-standard-encodings' and
1463 `ctext-non-standard-encodings-alist' for the detail about how
1464 extended segments are handled.
1466 This coding system should be used only for X selections. It is inappropriate
1467 for decoding and encoding files, process I/O, etc."
1468 :coding-type 'iso-2022
1469 :mnemonic ?x
1470 :charset-list 'iso-2022
1471 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1472 :flags '(ascii-at-eol ascii-at-cntl long-form
1473 designation locking-shift single-shift)
1474 :post-read-conversion 'ctext-post-read-conversion
1475 :pre-write-conversion 'ctext-pre-write-conversion
1476 :mime-charset 'x-ctext)
1478 (define-coding-system-alias
1479 'x-ctext-with-extensions 'compound-text-with-extensions)
1480 (define-coding-system-alias
1481 'ctext-with-extensions 'compound-text-with-extensions)
1483 (define-coding-system 'us-ascii
1484 "Encode ASCII as-is and encode non-ASCII characters to `?'."
1485 :coding-type 'charset
1486 :mnemonic ?-
1487 :charset-list '(ascii)
1488 :default-char ??
1489 :mime-charset 'us-ascii)
1491 (define-coding-system-alias 'iso-safe 'us-ascii)
1493 (define-coding-system 'utf-7
1494 "UTF-7 encoding of Unicode (RFC 2152)."
1495 :coding-type 'utf-8
1496 :mnemonic ?U
1497 :mime-charset 'utf-7
1498 :charset-list '(unicode)
1499 :pre-write-conversion 'utf-7-pre-write-conversion
1500 :post-read-conversion 'utf-7-post-read-conversion)
1502 (define-coding-system 'utf-7-imap
1503 "UTF-7 encoding of Unicode, IMAP version (RFC 2060)"
1504 :coding-type 'utf-8
1505 :mnemonic ?u
1506 :charset-list '(unicode)
1507 :pre-write-conversion 'utf-7-imap-pre-write-conversion
1508 :post-read-conversion 'utf-7-imap-post-read-conversion)
1510 ;; Use us-ascii for terminal output if some other coding system is not
1511 ;; specified explicitly.
1512 (set-safe-terminal-coding-system-internal 'us-ascii)
1514 ;; The other coding-systems are defined in each language specific
1515 ;; files under lisp/language.
1517 ;; Normally, set coding system to `undecided' before reading a file.
1518 ;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1519 ;; but we regard them as containing multibyte characters.
1520 ;; Tar files are not decoded at all, but we treat them as raw bytes.
1522 (setq file-coding-system-alist
1523 (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
1524 '(("\\.elc\\'" . utf-8-emacs)
1525 ("\\.el\\'" . prefer-utf-8)
1526 ("\\.utf\\(-8\\)?\\'" . utf-8)
1527 ("\\.xml\\'" . xml-find-file-coding-system)
1528 ;; We use raw-text for reading loaddefs.el so that if it
1529 ;; happens to have DOS or Mac EOLs, they are converted to
1530 ;; newlines. This is required to make the special treatment
1531 ;; of the "\ newline" combination in loaddefs.el, which marks
1532 ;; the beginning of a doc string, work.
1533 ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
1534 ("\\.tar\\'" . (no-conversion . no-conversion))
1535 ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
1536 ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)
1537 ("" . (undecided . nil)))))
1540 ;;; Setting coding categories and their priorities.
1542 ;; This setting is just to read an Emacs Lisp source files which
1543 ;; contain multilingual text while dumping Emacs. More appropriate
1544 ;; values are set by the command `set-language-environment' for each
1545 ;; language environment.
1547 (set-coding-system-priority
1548 'iso-latin-1
1549 'utf-8
1550 'iso-2022-7bit
1554 ;;; Miscellaneous settings.
1556 ;; Make all multibyte characters self-insert.
1557 (set-char-table-range (nth 1 global-map)
1558 (cons 128 (max-char))
1559 'self-insert-command)
1561 (aset latin-extra-code-table ?\221 t)
1562 (aset latin-extra-code-table ?\222 t)
1563 (aset latin-extra-code-table ?\223 t)
1564 (aset latin-extra-code-table ?\224 t)
1565 (aset latin-extra-code-table ?\225 t)
1566 (aset latin-extra-code-table ?\226 t)
1568 ;; The old code-pages library is obsoleted by coding systems based on
1569 ;; the charsets defined in this file but might be required by user
1570 ;; code.
1571 (provide 'code-pages)
1573 ;;; mule-conf.el ends here