Fix quoting in tramp-find-inline-compress for w32
[emacs.git] / lisp / international / mule-conf.el
blobea687f0ae5e2f07bf123be9f4b2cad5b7e9ea786
1 ;;; mule-conf.el --- configure multilingual environment
3 ;; Copyright (C) 1997-2018 Free Software Foundation, Inc.
4 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 ;; National Institute of Advanced Industrial Science and Technology (AIST)
6 ;; Registration Number H14PRO021
7 ;; Copyright (C) 2003
8 ;; National Institute of Advanced Industrial Science and Technology (AIST)
9 ;; Registration Number H13PRO009
11 ;; Keywords: i18n, mule, multilingual, character set, coding system
13 ;; This file is part of GNU Emacs.
15 ;; GNU Emacs is free software: you can redistribute it and/or modify
16 ;; it under the terms of the GNU General Public License as published by
17 ;; the Free Software Foundation, either version 3 of the License, or
18 ;; (at your option) any later version.
20 ;; GNU Emacs is distributed in the hope that it will be useful,
21 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ;; GNU General Public License for more details.
25 ;; You should have received a copy of the GNU General Public License
26 ;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
28 ;;; Commentary:
30 ;; This file defines the Emacs charsets and some basic coding systems.
31 ;; Other coding systems are defined in the files in directory
32 ;; lisp/language.
34 ;;; Code:
36 ;;; Remarks
38 ;; The ISO-IR registry is maintained by the Information Processing
39 ;; Society of Japan/Information Technology Standards Commission of
40 ;; Japan (IPSJ/ITSCJ) at https://www.itscj.ipsj.or.jp/itscj_english/.
41 ;; Standards docs equivalent to iso-2022 and iso-8859 are at
42 ;; http://www.ecma.ch/.
44 ;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
45 ;; MS Windows, which are presumably the only charsets we really need
46 ;; to worry about on such systems:
47 ;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
48 ;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
49 ;; 1258, 874, 932, 936, 949, 950
51 ;;; Definitions of character sets.
53 ;; The charsets `ascii', `unicode' and `eight-bit' are already defined
54 ;; in charset.c as below:
56 ;; (define-charset 'ascii
57 ;; ""
58 ;; :dimension 1
59 ;; :code-space [0 127]
60 ;; :iso-final-char ?B
61 ;; :ascii-compatible-p t
62 ;; :emacs-mule-id 0
63 ;; :code-offset 0)
65 ;; (define-charset 'unicode
66 ;; ""
67 ;; :dimension 3
68 ;; :code-space [0 255 0 255 0 16]
69 ;; :ascii-compatible-p t
70 ;; :code-offset 0)
72 ;; (define-charset 'emacs
73 ;; ""
74 ;; :dimension 3
75 ;; :code-space [0 255 0 255 0 63]
76 ;; :ascii-compatible-p t
77 ;; :supplementary-p t
78 ;; :code-offset 0)
80 ;; (define-charset 'eight-bit
81 ;; ""
82 ;; :dimension 1
83 ;; :code-space [128 255]
84 ;; :code-offset #x3FFF80)
86 ;; We now set :docstring, :short-name, and :long-name properties.
88 (put-charset-property
89 'ascii :docstring "ASCII (ISO646 IRV)")
90 (put-charset-property
91 'ascii :short-name "ASCII")
92 (put-charset-property
93 'ascii :long-name "ASCII (ISO646 IRV)")
94 (put-charset-property
95 'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)")
96 (put-charset-property
97 'iso-8859-1 :short-name "Latin-1")
98 (put-charset-property
99 'iso-8859-1 :long-name "Latin-1")
100 (put-charset-property
101 'unicode :docstring "Unicode (ISO10646)")
102 (put-charset-property
103 'unicode :short-name "Unicode")
104 (put-charset-property
105 'unicode :long-name "Unicode (ISO10646)")
106 (put-charset-property
107 'emacs :docstring "Full Emacs charset (excluding eight bit chars)")
108 (put-charset-property
109 'emacs :short-name "Emacs")
110 (put-charset-property
111 'emacs :long-name "Emacs")
113 (put-charset-property 'eight-bit :docstring "Raw bytes 128-255")
114 (put-charset-property 'eight-bit :short-name "Raw bytes")
116 (define-charset-alias 'ucs 'unicode)
118 (define-charset 'latin-iso8859-1
119 "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
120 :short-name "RHP of Latin-1"
121 :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
122 :iso-final-char ?A
123 :emacs-mule-id 129
124 :code-space [32 127]
125 :code-offset 160)
127 ;; Name perhaps not ideal, but is XEmacs-compatible.
128 (define-charset 'control-1
129 "8-bit control code (0x80..0x9F)"
130 :short-name "8-bit control code"
131 :code-space [128 159]
132 :code-offset 128)
134 (define-charset 'eight-bit-control
135 "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
136 :short-name "Raw bytes 0x80..0x9F"
137 :supplementary-p t
138 :code-space [128 159]
139 :code-offset #x3FFF80) ; see character.h
141 (define-charset 'eight-bit-graphic
142 "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
143 :short-name "Raw bytes 0xA0..0xFF"
144 :supplementary-p t
145 :code-space [160 255]
146 :code-offset #x3FFFA0) ; see character.h
148 (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
149 iso-ir iso-final
150 emacs-mule-id map)
151 `(progn
152 (define-charset ,symbol
153 ,name
154 :short-name ,nickname
155 :long-name ,name
156 :ascii-compatible-p t
157 :code-space [0 255]
158 :map ,map)
159 (if ,iso-symbol
160 (define-charset ,iso-symbol
161 (if ,iso-ir
162 (format "Right-Hand Part of %s (%s): ISO-IR-%d"
163 ,name ,nickname ,iso-ir)
164 (format "Right-Hand Part of %s (%s)" ,name ,nickname))
165 :short-name (format "RHP of %s" ,name)
166 :long-name (format "RHP of %s (%s)" ,name ,nickname)
167 :iso-final-char ,iso-final
168 :emacs-mule-id ,emacs-mule-id
169 :code-space [32 127]
170 :subset (list ,symbol 160 255 -128)))))
172 (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
173 "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
175 (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
176 "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
178 (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
179 "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
181 (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
182 "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
184 (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
185 "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
187 (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
188 "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
190 (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
191 "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
193 (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
194 "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
196 (define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
197 "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
199 ;; http://www.nectec.or.th/it-standards/iso8859-11/
200 ;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
201 ;; plus nbsp
202 (define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
203 "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
205 ;; 8859-12 doesn't (yet?) exist.
207 (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
208 "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
210 (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
211 "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
213 (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
214 "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
216 (define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
217 "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
219 ;; No point in keeping it around.
220 (fmakunbound 'define-iso-single-byte-charset)
222 ;; Can this be shared with 8859-11?
223 ;; N.b. not all of these are defined in Unicode.
224 (define-charset 'thai-tis620
225 "TIS620.2533"
226 :short-name "TIS620.2533"
227 :iso-final-char ?T
228 :emacs-mule-id 133
229 :code-space [32 127]
230 :code-offset #x0E00)
232 ;; Fixme: doc for this, c.f. above
233 (define-charset 'tis620-2533
234 "TIS620.2533"
235 :short-name "TIS620.2533"
236 :ascii-compatible-p t
237 :code-space [0 255]
238 :superset '(ascii eight-bit-control (thai-tis620 . 128)))
240 (define-charset 'jisx0201
241 "JISX0201"
242 :short-name "JISX0201"
243 :code-space [0 #xDF]
244 :map "JISX0201")
246 (define-charset 'latin-jisx0201
247 "Roman Part of JISX0201.1976"
248 :short-name "JISX0201 Roman"
249 :long-name "Japanese Roman (JISX0201.1976)"
250 :iso-final-char ?J
251 :emacs-mule-id 138
252 :supplementary-p t
253 :code-space [33 126]
254 :subset '(jisx0201 33 126 0))
256 (define-charset 'katakana-jisx0201
257 "Katakana Part of JISX0201.1976"
258 :short-name "JISX0201 Katakana"
259 :long-name "Japanese Katakana (JISX0201.1976)"
260 :iso-final-char ?I
261 :emacs-mule-id 137
262 :supplementary-p t
263 :code-space [33 126]
264 :subset '(jisx0201 161 254 -128))
266 (define-charset 'chinese-gb2312
267 "GB2312 Chinese simplified: ISO-IR-58"
268 :short-name "GB2312"
269 :long-name "GB2312: ISO-IR-58"
270 :iso-final-char ?A
271 :emacs-mule-id 145
272 :code-space [33 126 33 126]
273 :code-offset #x110000
274 :unify-map "GB2312")
276 (define-charset 'chinese-gbk
277 "GBK Chinese simplified."
278 :short-name "GBK"
279 :code-space [#x40 #xFE #x81 #xFE]
280 :code-offset #x160000
281 :unify-map "GBK")
282 (define-charset-alias 'cp936 'chinese-gbk)
283 (define-charset-alias 'windows-936 'chinese-gbk)
285 (define-charset 'chinese-cns11643-1
286 "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
287 :short-name "CNS11643-1"
288 :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
289 :iso-final-char ?G
290 :emacs-mule-id 149
291 :code-space [33 126 33 126]
292 :code-offset #x114000
293 :unify-map "CNS-1")
295 (define-charset 'chinese-cns11643-2
296 "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
297 :short-name "CNS11643-2"
298 :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
299 :iso-final-char ?H
300 :emacs-mule-id 150
301 :code-space [33 126 33 126]
302 :code-offset #x118000
303 :unify-map "CNS-2")
305 (define-charset 'chinese-cns11643-3
306 "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
307 :short-name "CNS11643-3"
308 :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
309 :iso-final-char ?I
310 :code-space [33 126 33 126]
311 :emacs-mule-id 246
312 :code-offset #x11C000
313 :unify-map "CNS-3")
315 (define-charset 'chinese-cns11643-4
316 "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
317 :short-name "CNS11643-4"
318 :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
319 :iso-final-char ?J
320 :emacs-mule-id 247
321 :code-space [33 126 33 126]
322 :code-offset #x120000
323 :unify-map "CNS-4")
325 (define-charset 'chinese-cns11643-5
326 "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
327 :short-name "CNS11643-5"
328 :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
329 :iso-final-char ?K
330 :emacs-mule-id 248
331 :code-space [33 126 33 126]
332 :code-offset #x124000
333 :unify-map "CNS-5")
335 (define-charset 'chinese-cns11643-6
336 "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
337 :short-name "CNS11643-6"
338 :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
339 :iso-final-char ?L
340 :emacs-mule-id 249
341 :code-space [33 126 33 126]
342 :code-offset #x128000
343 :unify-map "CNS-6")
345 (define-charset 'chinese-cns11643-7
346 "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
347 :short-name "CNS11643-7"
348 :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
349 :iso-final-char ?M
350 :emacs-mule-id 250
351 :code-space [33 126 33 126]
352 :code-offset #x12C000
353 :unify-map "CNS-7")
355 (define-charset 'big5
356 "Big5 (Chinese traditional)"
357 :short-name "Big5"
358 :code-space [#x40 #xFE #xA1 #xFE]
359 :code-offset #x130000
360 :unify-map "BIG5")
361 ;; Fixme: AKA cp950 according to
362 ;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>. Is
363 ;; that correct?
365 (define-charset 'chinese-big5-1
366 "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
367 :short-name "Big5 (Level-1)"
368 :long-name "Big5 (Level-1) A141-C67F"
369 :iso-final-char ?0
370 :emacs-mule-id 152
371 :supplementary-p t
372 :code-space [#x21 #x7E #x21 #x7E]
373 :code-offset #x135000
374 :unify-map "BIG5-1")
376 (define-charset 'chinese-big5-2
377 "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
378 :short-name "Big5 (Level-2)"
379 :long-name "Big5 (Level-2) C940-FEFE"
380 :iso-final-char ?1
381 :emacs-mule-id 153
382 :supplementary-p t
383 :code-space [#x21 #x7E #x21 #x7E]
384 :code-offset #x137800
385 :unify-map "BIG5-2")
387 (define-charset 'japanese-jisx0208
388 "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
389 :short-name "JISX0208"
390 :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
391 :iso-final-char ?B
392 :emacs-mule-id 146
393 :code-space [33 126 33 126]
394 :code-offset #x140000
395 :unify-map "JISX0208")
397 (define-charset 'japanese-jisx0208-1978
398 "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
399 :short-name "JISX0208.1978"
400 :long-name "JISX0208.1978 (JISC6226.1978): ISO-IR-42"
401 :iso-final-char ?@
402 :emacs-mule-id 144
403 :code-space [33 126 33 126]
404 :code-offset #x144000
405 :unify-map "JISC6226")
407 (define-charset 'japanese-jisx0212
408 "JISX0212 Japanese supplement: ISO-IR-159"
409 :short-name "JISX0212"
410 :long-name "JISX0212 (Japanese): ISO-IR-159"
411 :iso-final-char ?D
412 :emacs-mule-id 148
413 :code-space [33 126 33 126]
414 :code-offset #x148000
415 :unify-map "JISX0212")
417 ;; Note that jisx0213 contains characters not in Unicode (3.2?). It's
418 ;; arguable whether it should have a unify-map.
419 (define-charset 'japanese-jisx0213-1
420 "JISX0213.2000 Plane 1 (Japanese)"
421 :short-name "JISX0213-1"
422 :iso-final-char ?O
423 :emacs-mule-id 151
424 :unify-map "JISX2131"
425 :code-space [33 126 33 126]
426 :code-offset #x14C000)
428 (define-charset 'japanese-jisx0213-2
429 "JISX0213.2000 Plane 2 (Japanese)"
430 :short-name "JISX0213-2"
431 :iso-final-char ?P
432 :emacs-mule-id 254
433 :unify-map "JISX2132"
434 :code-space [33 126 33 126]
435 :code-offset #x150000)
437 (define-charset 'japanese-jisx0213-a
438 "JISX0213.2004 adds these characters to JISX0213.2000."
439 :short-name "JISX0213A"
440 :dimension 2
441 :code-space [33 126 33 126]
442 :supplementary-p t
443 :map "JISX213A")
445 (define-charset 'japanese-jisx0213.2004-1
446 "JISX0213.2004 Plane1 (Japanese)"
447 :short-name "JISX0213.2004-1"
448 :dimension 2
449 :code-space [33 126 33 126]
450 :iso-final-char ?Q
451 :superset '(japanese-jisx0213-a japanese-jisx0213-1))
453 (define-charset 'katakana-sjis
454 "Katakana part of Shift-JIS"
455 :dimension 1
456 :code-space [#xA1 #xDF]
457 :subset '(jisx0201 #xA1 #xDF 0)
458 :supplementary-p t)
460 (define-charset 'cp932-2-byte
461 "2-byte part of CP932"
462 :dimension 2
463 :map "CP932-2BYTE"
464 :code-space [#x40 #xFC #x81 #xFC]
465 :supplementary-p t)
467 (define-charset 'cp932
468 "CP932 (Microsoft shift-jis)"
469 :code-space [#x00 #xFF #x00 #xFE]
470 :short-name "CP932"
471 :superset '(ascii katakana-sjis cp932-2-byte))
473 (define-charset 'korean-ksc5601
474 "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
475 :short-name "KSC5601"
476 :long-name "KSC5601 (Korean): ISO-IR-149"
477 :iso-final-char ?C
478 :emacs-mule-id 147
479 :code-space [33 126 33 126]
480 :code-offset #x279f94 ; ... #x27c217
481 :unify-map "KSC5601")
483 (define-charset 'big5-hkscs
484 "Big5-HKSCS (Chinese traditional, Hong Kong supplement)"
485 :short-name "Big5"
486 :code-space [#x40 #xFE #xA1 #xFE]
487 :code-offset #x27c218 ; ... #x280839
488 :unify-map "BIG5-HKSCS")
490 (define-charset 'cp949-2-byte
491 "2-byte part of CP949"
492 :dimension 2
493 :map "CP949-2BYTE"
494 :code-space [#x41 #xFE #x81 #xFD]
495 :supplementary-p t)
497 (define-charset 'cp949
498 "CP949 (Korean)"
499 :short-name "CP949"
500 :long-name "CP949 (Korean)"
501 :code-space [#x00 #xFE #x00 #xFD]
502 :superset '(ascii cp949-2-byte))
504 (define-charset 'chinese-sisheng
505 "SiSheng characters for PinYin/ZhuYin"
506 :short-name "SiSheng"
507 :long-name "SiSheng (PinYin/ZhuYin)"
508 :iso-final-char ?0
509 :emacs-mule-id 160
510 :code-space [33 126]
511 :unify-map "MULE-sisheng"
512 :supplementary-p t
513 :code-offset #x200000)
515 ;; A subset of the 1989 version of IPA. It consists of the consonant
516 ;; signs used in English, French, German and Italian, and all vowels
517 ;; signs in the table. [says old MULE doc]
518 (define-charset 'ipa
519 "IPA (International Phonetic Association)"
520 :short-name "IPA"
521 :iso-final-char ?0
522 :emacs-mule-id 161
523 :unify-map "MULE-ipa"
524 :code-space [32 127]
525 :supplementary-p t
526 :code-offset #x200080)
528 (define-charset 'viscii
529 "VISCII1.1"
530 :short-name "VISCII"
531 :long-name "VISCII 1.1"
532 :code-space [0 255]
533 :map "VISCII")
535 (define-charset 'vietnamese-viscii-lower
536 "VISCII1.1 lower-case"
537 :short-name "VISCII lower"
538 :long-name "VISCII lower-case"
539 :iso-final-char ?1
540 :emacs-mule-id 162
541 :code-space [32 127]
542 :code-offset #x200200
543 :supplementary-p t
544 :unify-map "MULE-lviscii")
546 (define-charset 'vietnamese-viscii-upper
547 "VISCII1.1 upper-case"
548 :short-name "VISCII upper"
549 :long-name "VISCII upper-case"
550 :iso-final-char ?2
551 :emacs-mule-id 163
552 :code-space [32 127]
553 :code-offset #x200280
554 :supplementary-p t
555 :unify-map "MULE-uviscii")
557 (define-charset 'vscii
558 "VSCII1.1 (TCVN-5712 VN1)"
559 :short-name "VSCII"
560 :code-space [0 255]
561 :map "VSCII")
563 (define-charset-alias 'tcvn-5712 'vscii)
565 ;; Fixme: see note in tcvn.map about combining characters
566 (define-charset 'vscii-2
567 "VSCII-2 (TCVN-5712 VN2)"
568 :code-space [0 255]
569 :map "VSCII-2")
571 (define-charset 'koi8-r
572 "KOI8-R"
573 :short-name "KOI8-R"
574 :ascii-compatible-p t
575 :code-space [0 255]
576 :map "KOI8-R")
578 (define-charset-alias 'koi8 'koi8-r)
580 (define-charset 'alternativnyj
581 "ALTERNATIVNYJ"
582 :short-name "alternativnyj"
583 :ascii-compatible-p t
584 :code-space [0 255]
585 :map "ALTERNATIVNYJ")
587 (define-charset 'cp866
588 "CP866"
589 :short-name "cp866"
590 :ascii-compatible-p t
591 :code-space [0 255]
592 :map "IBM866")
593 (define-charset-alias 'ibm866 'cp866)
595 (define-charset 'koi8-u
596 "KOI8-U"
597 :short-name "KOI8-U"
598 :ascii-compatible-p t
599 :code-space [0 255]
600 :map "KOI8-U")
602 (define-charset 'koi8-t
603 "KOI8-T"
604 :short-name "KOI8-T"
605 :ascii-compatible-p t
606 :code-space [0 255]
607 :map "KOI8-T")
609 (define-charset 'georgian-ps
610 "GEORGIAN-PS"
611 :short-name "GEORGIAN-PS"
612 :ascii-compatible-p t
613 :code-space [0 255]
614 :map "KA-PS")
616 (define-charset 'georgian-academy
617 "GEORGIAN-ACADEMY"
618 :short-name "GEORGIAN-ACADEMY"
619 :ascii-compatible-p t
620 :code-space [0 255]
621 :map "KA-ACADEMY")
623 (define-charset 'windows-1250
624 "WINDOWS-1250 (Central Europe)"
625 :short-name "WINDOWS-1250"
626 :ascii-compatible-p t
627 :code-space [0 255]
628 :map "CP1250")
629 (define-charset-alias 'cp1250 'windows-1250)
631 (define-charset 'windows-1251
632 "WINDOWS-1251 (Cyrillic)"
633 :short-name "WINDOWS-1251"
634 :ascii-compatible-p t
635 :code-space [0 255]
636 :map "CP1251")
637 (define-charset-alias 'cp1251 'windows-1251)
639 (define-charset 'windows-1252
640 "WINDOWS-1252 (Latin I)"
641 :short-name "WINDOWS-1252"
642 :ascii-compatible-p t
643 :code-space [0 255]
644 :map "CP1252")
645 (define-charset-alias 'cp1252 'windows-1252)
647 (define-charset 'windows-1253
648 "WINDOWS-1253 (Greek)"
649 :short-name "WINDOWS-1253"
650 :ascii-compatible-p t
651 :code-space [0 255]
652 :map "CP1253")
653 (define-charset-alias 'cp1253 'windows-1253)
655 (define-charset 'windows-1254
656 "WINDOWS-1254 (Turkish)"
657 :short-name "WINDOWS-1254"
658 :ascii-compatible-p t
659 :code-space [0 255]
660 :map "CP1254")
661 (define-charset-alias 'cp1254 'windows-1254)
663 (define-charset 'windows-1255
664 "WINDOWS-1255 (Hebrew)"
665 :short-name "WINDOWS-1255"
666 :ascii-compatible-p t
667 :code-space [0 255]
668 :map "CP1255")
669 (define-charset-alias 'cp1255 'windows-1255)
671 (define-charset 'windows-1256
672 "WINDOWS-1256 (Arabic)"
673 :short-name "WINDOWS-1256"
674 :ascii-compatible-p t
675 :code-space [0 255]
676 :map "CP1256")
677 (define-charset-alias 'cp1256 'windows-1256)
679 (define-charset 'windows-1257
680 "WINDOWS-1257 (Baltic)"
681 :short-name "WINDOWS-1257"
682 :ascii-compatible-p t
683 :code-space [0 255]
684 :map "CP1257")
685 (define-charset-alias 'cp1257 'windows-1257)
687 (define-charset 'windows-1258
688 "WINDOWS-1258 (Viet Nam)"
689 :short-name "WINDOWS-1258"
690 :ascii-compatible-p t
691 :code-space [0 255]
692 :map "CP1258")
693 (define-charset-alias 'cp1258 'windows-1258)
695 (define-charset 'next
696 "NEXT"
697 :short-name "NEXT"
698 :ascii-compatible-p t
699 :code-space [0 255]
700 :map "NEXTSTEP")
702 (define-charset 'cp1125
703 "CP1125"
704 :short-name "CP1125"
705 :code-space [0 255]
706 :ascii-compatible-p t
707 :map "CP1125")
708 (define-charset-alias 'ruscii 'cp1125)
709 ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
710 (define-charset-alias 'cp866u 'cp1125)
712 ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
713 ;; shows this as not ASCII compatible, with various graphics in
714 ;; 0x01-0x1F.
715 (define-charset 'cp437
716 "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
717 :short-name "CP437"
718 :code-space [0 255]
719 :ascii-compatible-p t
720 :map "IBM437")
722 (define-charset 'cp720
723 "CP720 (Arabic)"
724 :short-name "CP720"
725 :code-space [0 255]
726 :ascii-compatible-p t
727 :map "CP720")
729 (define-charset 'cp737
730 "CP737 (PC Greek)"
731 :short-name "CP737"
732 :code-space [0 255]
733 :ascii-compatible-p t
734 :map "CP737")
736 (define-charset 'cp775
737 "CP775 (PC Baltic)"
738 :short-name "CP775"
739 :code-space [0 255]
740 :ascii-compatible-p t
741 :map "CP775")
743 (define-charset 'cp851
744 "CP851 (Greek)"
745 :short-name "CP851"
746 :code-space [0 255]
747 :ascii-compatible-p t
748 :map "IBM851")
750 (define-charset 'cp852
751 "CP852 (MS-DOS Latin-2)"
752 :short-name "CP852"
753 :code-space [0 255]
754 :ascii-compatible-p t
755 :map "IBM852")
757 (define-charset 'cp855
758 "CP855 (IBM Cyrillic)"
759 :short-name "CP855"
760 :code-space [0 255]
761 :ascii-compatible-p t
762 :map "IBM855")
764 (define-charset 'cp857
765 "CP857 (IBM Turkish)"
766 :short-name "CP857"
767 :code-space [0 255]
768 :ascii-compatible-p t
769 :map "IBM857")
771 (define-charset 'cp858
772 "CP858 (Multilingual Latin I + Euro)"
773 :short-name "CP858"
774 :code-space [0 255]
775 :ascii-compatible-p t
776 :map "CP858")
777 (define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858
779 (define-charset 'cp860
780 "CP860 (MS-DOS Portuguese)"
781 :short-name "CP860"
782 :code-space [0 255]
783 :ascii-compatible-p t
784 :map "IBM860")
786 (define-charset 'cp861
787 "CP861 (MS-DOS Icelandic)"
788 :short-name "CP861"
789 :code-space [0 255]
790 :ascii-compatible-p t
791 :map "IBM861")
793 (define-charset 'cp862
794 "CP862 (PC Hebrew)"
795 :short-name "CP862"
796 :code-space [0 255]
797 :ascii-compatible-p t
798 :map "IBM862")
800 (define-charset 'cp863
801 "CP863 (MS-DOS Canadian French)"
802 :short-name "CP863"
803 :code-space [0 255]
804 :ascii-compatible-p t
805 :map "IBM863")
807 (define-charset 'cp864
808 "CP864 (PC Arabic)"
809 :short-name "CP864"
810 :code-space [0 255]
811 :ascii-compatible-p t
812 :map "IBM864")
814 (define-charset 'cp865
815 "CP865 (MS-DOS Nordic)"
816 :short-name "CP865"
817 :code-space [0 255]
818 :ascii-compatible-p t
819 :map "IBM865")
821 (define-charset 'cp869
822 "CP869 (IBM Modern Greek)"
823 :short-name "CP869"
824 :code-space [0 255]
825 :ascii-compatible-p t
826 :map "IBM869")
828 (define-charset 'cp874
829 "CP874 (IBM Thai)"
830 :short-name "CP874"
831 :code-space [0 255]
832 :ascii-compatible-p t
833 :map "IBM874")
835 ;; For Arabic, we need three different types of character sets.
836 ;; Digits are of direction left-to-right and of width 1-column.
837 ;; Others are of direction right-to-left and of width 1-column or
838 ;; 2-column.
839 (define-charset 'arabic-digit
840 "Arabic digit"
841 :short-name "Arabic digit"
842 :iso-final-char ?2
843 :emacs-mule-id 164
844 :supplementary-p t
845 :code-space [34 42]
846 :code-offset #x0600)
848 (define-charset 'arabic-1-column
849 "Arabic 1-column"
850 :short-name "Arabic 1-col"
851 :long-name "Arabic 1-column"
852 :iso-final-char ?3
853 :emacs-mule-id 165
854 :supplementary-p t
855 :code-space [33 126]
856 :code-offset #x200100)
858 (define-charset 'arabic-2-column
859 "Arabic 2-column"
860 :short-name "Arabic 2-col"
861 :long-name "Arabic 2-column"
862 :iso-final-char ?4
863 :emacs-mule-id 224
864 :supplementary-p t
865 :code-space [33 126]
866 :code-offset #x200180)
868 ;; Lao script.
869 ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
870 ;; Not all of them are defined in Unicode.
871 (define-charset 'lao
872 "Lao characters (ISO10646 0E81..0EDF)"
873 :short-name "Lao"
874 :iso-final-char ?1
875 :emacs-mule-id 167
876 :supplementary-p t
877 :code-space [33 126]
878 :code-offset #x0E81)
880 (define-charset 'mule-lao
881 "Lao characters (ISO10646 0E81..0EDF)"
882 :short-name "Lao"
883 :code-space [0 255]
884 :supplementary-p t
885 :superset '(ascii eight-bit-control (lao . 128)))
888 ;; Indian scripts. Symbolic charset for data exchange. Glyphs are
889 ;; not assigned. They are automatically converted to each Indian
890 ;; script which IS-13194 supports.
892 (define-charset 'indian-is13194
893 "7-bit representation of IS 13194 (ISCII) for Devanagari"
894 :short-name "IS 13194 (DEV)"
895 :long-name "Indian IS 13194 (DEV)"
896 :iso-final-char ?5
897 :emacs-mule-id 225
898 :supplementary-p t
899 :code-space [33 126]
900 :code-offset #x180000
901 :unify-map "MULE-is13194")
903 (let ((code-offset #x180100))
904 (dolist (script '(devanagari sanskrit bengali tamil telugu assamese
905 oriya kannada malayalam gujarati punjabi))
906 (define-charset (intern (format "%s-cdac" script))
907 (format
908 "Glyphs of %s script for CDAC font. Subset of `indian-glyph'."
909 (capitalize (symbol-name script)))
910 :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
911 :supplementary-p t
912 :code-space [0 255]
913 :code-offset code-offset)
914 (setq code-offset (+ code-offset #x100)))
916 (dolist (script '(devanagari bengali punjabi gujarati
917 oriya tamil telugu kannada malayalam))
918 (define-charset (intern (format "%s-akruti" script))
919 (format
920 "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'."
921 (capitalize (symbol-name script)))
922 :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
923 :supplementary-p t
924 :code-space [0 255]
925 :code-offset code-offset)
926 (setq code-offset (+ code-offset #x100))))
928 (define-charset 'indian-glyph
929 "Glyphs for Indian characters."
930 :short-name "Indian glyph"
931 :iso-final-char ?4
932 :emacs-mule-id 240
933 :supplementary-p t
934 :code-space [32 127 32 127]
935 :code-offset #x180100)
937 ;; Actual Glyph for 1-column width.
938 (define-charset 'indian-1-column
939 "Indian charset for 1-column width glyphs."
940 :short-name "Indian 1-col"
941 :long-name "Indian 1 Column"
942 :iso-final-char ?6
943 :emacs-mule-id 251
944 :supplementary-p t
945 :code-space [33 126 33 126]
946 :code-offset #x184000)
948 ;; Actual Glyph for 2-column width.
949 (define-charset 'indian-2-column
950 "Indian charset for 2-column width glyphs."
951 :short-name "Indian 2-col"
952 :long-name "Indian 2 Column"
953 :iso-final-char ?5
954 :emacs-mule-id 251
955 :supplementary-p t
956 :code-space [33 126 33 126]
957 :code-offset #x184000)
959 (define-charset 'tibetan
960 "Tibetan characters"
961 :iso-final-char ?7
962 :short-name "Tibetan 2-col"
963 :long-name "Tibetan 2 column"
964 :iso-final-char ?7
965 :emacs-mule-id 252
966 :unify-map "MULE-tibetan"
967 :supplementary-p t
968 :code-space [33 126 33 37]
969 :code-offset #x190000)
971 (define-charset 'tibetan-1-column
972 "Tibetan 1 column glyph"
973 :short-name "Tibetan 1-col"
974 :long-name "Tibetan 1 column"
975 :iso-final-char ?8
976 :emacs-mule-id 241
977 :supplementary-p t
978 :code-space [33 126 33 37]
979 :code-offset #x190000)
981 ;; Subsets of Unicode.
982 (define-charset 'mule-unicode-2500-33ff
983 "Unicode characters of the range U+2500..U+33FF."
984 :short-name "Unicode subset 2"
985 :long-name "Unicode subset (U+2500..U+33FF)"
986 :iso-final-char ?2
987 :emacs-mule-id 242
988 :supplementary-p t
989 :code-space [#x20 #x7f #x20 #x47]
990 :code-offset #x2500)
992 (define-charset 'mule-unicode-e000-ffff
993 "Unicode characters of the range U+E000..U+FFFF."
994 :short-name "Unicode subset 3"
995 :long-name "Unicode subset (U+E000+FFFF)"
996 :iso-final-char ?3
997 :emacs-mule-id 243
998 :supplementary-p t
999 :code-space [#x20 #x7F #x20 #x75]
1000 :code-offset #xE000
1001 :max-code 30015) ; U+FFFF
1003 (define-charset 'mule-unicode-0100-24ff
1004 "Unicode characters of the range U+0100..U+24FF."
1005 :short-name "Unicode subset"
1006 :long-name "Unicode subset (U+0100..U+24FF)"
1007 :iso-final-char ?1
1008 :emacs-mule-id 244
1009 :supplementary-p t
1010 :code-space [#x20 #x7F #x20 #x7F]
1011 :code-offset #x100)
1013 (define-charset 'unicode-bmp
1014 "Unicode Basic Multilingual Plane (U+0000..U+FFFF)"
1015 :short-name "Unicode BMP"
1016 :code-space [0 255 0 255]
1017 :code-offset 0)
1019 (define-charset 'unicode-smp
1020 "Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)"
1021 :short-name "Unicode SMP "
1022 :code-space [0 255 0 255]
1023 :code-offset #x10000)
1025 (define-charset 'unicode-sip
1026 "Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)"
1027 :short-name "Unicode SIP"
1028 :code-space [0 255 0 255]
1029 :code-offset #x20000)
1031 (define-charset 'unicode-ssp
1032 "Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)"
1033 :short-name "Unicode SSP"
1034 :code-space [0 255 0 255]
1035 :code-offset #xE0000)
1037 (define-charset 'ethiopic
1038 "Ethiopic characters for Amharic and Tigrigna."
1039 :short-name "Ethiopic"
1040 :long-name "Ethiopic characters"
1041 :iso-final-char ?3
1042 :emacs-mule-id 245
1043 :supplementary-p t
1044 :unify-map "MULE-ethiopic"
1045 :code-space [33 126 33 126]
1046 :code-offset #x1A0000)
1048 (define-charset 'mac-roman
1049 "Mac Roman charset"
1050 :short-name "Mac Roman"
1051 :ascii-compatible-p t
1052 :code-space [0 255]
1053 :map "MACINTOSH")
1055 ;; Fixme: modern EBCDIC variants, e.g. IBM00924?
1056 (define-charset 'ebcdic-us
1057 "US version of EBCDIC"
1058 :short-name "EBCDIC-US"
1059 :code-space [0 255]
1060 :mime-charset 'ebcdic-us
1061 :map "EBCDICUS")
1063 (define-charset 'ebcdic-uk
1064 "UK version of EBCDIC"
1065 :short-name "EBCDIC-UK"
1066 :code-space [0 255]
1067 :mime-charset 'ebcdic-uk
1068 :map "EBCDICUK")
1070 (define-charset 'ibm1047
1071 ;; Says groff:
1072 "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix."
1073 :short-name "IBM1047"
1074 :code-space [0 255]
1075 :mime-charset 'ibm1047
1076 :map "IBM1047")
1077 (define-charset-alias 'cp1047 'ibm1047)
1079 (define-charset 'hp-roman8
1080 "Encoding used by Hewlet-Packard printer software"
1081 :short-name "HP-ROMAN8"
1082 :ascii-compatible-p t
1083 :code-space [0 255]
1084 :map "HP-ROMAN8")
1086 ;; To make a coding system with this, a pre-write-conversion should
1087 ;; account for the commented-out multi-valued code points in
1088 ;; stdenc.map.
1089 (define-charset 'adobe-standard-encoding
1090 "Adobe `standard encoding' used in PostScript"
1091 :short-name "ADOBE-STANDARD-ENCODING"
1092 :code-space [#x20 255]
1093 :map "stdenc")
1095 (define-charset 'symbol
1096 "Adobe symbol encoding used in PostScript"
1097 :short-name "ADOBE-SYMBOL"
1098 :code-space [#x20 255]
1099 :map "symbol")
1101 (define-charset 'ibm850
1102 "DOS codepage 850 (Latin-1)"
1103 :short-name "IBM850"
1104 :ascii-compatible-p t
1105 :code-space [0 255]
1106 :map "IBM850")
1107 (define-charset-alias 'cp850 'ibm850)
1109 (define-charset 'mik
1110 "Bulgarian DOS codepage"
1111 :short-name "MIK"
1112 :ascii-compatible-p t
1113 :code-space [0 255]
1114 :map "MIK")
1116 (define-charset 'ptcp154
1117 "ParaType codepage (Asian Cyrillic)"
1118 :short-name "PT154"
1119 :ascii-compatible-p t
1120 :code-space [0 255]
1121 :mime-charset 'pt154
1122 :map "PTCP154")
1123 (define-charset-alias 'pt154 'ptcp154)
1124 (define-charset-alias 'cp154 'ptcp154)
1126 (define-charset 'gb18030-2-byte
1127 "GB18030 2-byte (0x814E..0xFEFE)"
1128 :code-space [#x40 #xFE #x81 #xFE]
1129 :supplementary-p t
1130 :map "GB180302")
1132 (define-charset 'gb18030-4-byte-bmp
1133 "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
1134 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
1135 :supplementary-p t
1136 :map "GB180304")
1138 (define-charset 'gb18030-4-byte-smp
1139 "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
1140 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
1141 :min-code '(#x9030 . #x8130)
1142 :max-code '(#xE332 . #x9A35)
1143 :supplementary-p t
1144 :code-offset #x10000)
1146 (define-charset 'gb18030-4-byte-ext-1
1147 "GB18030 4-byte (0x8431A530-0x8F39FE39)"
1148 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
1149 :min-code '(#x8431 . #xA530)
1150 :max-code '(#x8F39 . #xFE39)
1151 :supplementary-p t
1152 :code-offset #x200000 ; ... #x22484B
1155 (define-charset 'gb18030-4-byte-ext-2
1156 "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
1157 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
1158 :min-code '(#xE332 . #x9A36)
1159 :max-code '(#xFE39 . #xFE39)
1160 :supplementary-p t
1161 :code-offset #x22484C ; ... #x279f93
1164 (define-charset 'gb18030
1165 "GB18030"
1166 :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
1167 :min-code 0
1168 :max-code '(#xFE39 . #xFE39)
1169 :superset '(ascii gb18030-2-byte
1170 gb18030-4-byte-bmp gb18030-4-byte-smp
1171 gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
1173 (define-charset 'chinese-cns11643-15
1174 "CNS11643 Plane 15 Chinese Traditional"
1175 :short-name "CNS11643-15"
1176 :long-name "CNS11643-15 (Chinese traditional)"
1177 :code-space [33 126 33 126]
1178 :code-offset #x27A000
1179 :unify-map "CNS-F")
1181 (unify-charset 'chinese-gb2312)
1182 (unify-charset 'chinese-gbk)
1183 (unify-charset 'chinese-cns11643-1)
1184 (unify-charset 'chinese-cns11643-2)
1185 (unify-charset 'chinese-cns11643-3)
1186 (unify-charset 'chinese-cns11643-4)
1187 (unify-charset 'chinese-cns11643-5)
1188 (unify-charset 'chinese-cns11643-6)
1189 (unify-charset 'chinese-cns11643-7)
1190 (unify-charset 'chinese-cns11643-15)
1191 (unify-charset 'big5)
1192 (unify-charset 'chinese-big5-1)
1193 (unify-charset 'chinese-big5-2)
1194 (unify-charset 'big5-hkscs)
1195 (unify-charset 'korean-ksc5601)
1196 (unify-charset 'vietnamese-viscii-lower)
1197 (unify-charset 'vietnamese-viscii-upper)
1198 (unify-charset 'chinese-sisheng)
1199 (unify-charset 'ipa)
1200 (unify-charset 'tibetan)
1201 (unify-charset 'ethiopic)
1202 (unify-charset 'indian-is13194)
1203 (unify-charset 'japanese-jisx0208-1978)
1204 (unify-charset 'japanese-jisx0208)
1205 (unify-charset 'japanese-jisx0212)
1206 (unify-charset 'japanese-jisx0213-1)
1207 (unify-charset 'japanese-jisx0213-2)
1210 ;; These are tables for translating characters on decoding and
1211 ;; encoding.
1212 ;; Fixme: these aren't used now -- should they be?
1213 (setq standard-translation-table-for-decode nil)
1215 (setq standard-translation-table-for-encode nil)
1217 ;;; Make fundamental coding systems.
1219 ;; The coding system `no-conversion' and `undecided' are already
1220 ;; defined in coding.c as below:
1222 ;; (define-coding-system 'no-conversion
1223 ;; "..."
1224 ;; :coding-type 'raw-text
1225 ;; ...)
1226 ;; (define-coding-system 'undecided
1227 ;; "..."
1228 ;; :coding-type 'undecided
1229 ;; ...)
1231 (define-coding-system-alias 'binary 'no-conversion)
1232 (define-coding-system-alias 'unix 'undecided-unix)
1233 (define-coding-system-alias 'dos 'undecided-dos)
1234 (define-coding-system-alias 'mac 'undecided-mac)
1236 (define-coding-system 'prefer-utf-8
1237 "Like `undecided' but prefer UTF-8 when appropriate.
1238 On decoding, if the source contains 8-bit codes and they all
1239 are valid UTF-8 sequences, detect the source as UTF-8 encoding
1240 regardless of the coding priority.
1241 On encoding, if the source contains non-ASCII characters, encode them
1242 by UTF-8."
1243 :coding-type 'undecided
1244 :mnemonic ?-
1245 :charset-list '(emacs)
1246 :prefer-utf-8 t)
1248 (define-coding-system 'raw-text
1249 "Raw text, which means text contains random 8-bit codes.
1250 Encoding text with this coding system produces the actual byte
1251 sequence of the text in buffers and strings. An exception is made for
1252 characters from the `eight-bit' character set. Each of them is encoded
1253 into a single byte.
1255 When you visit a file with this coding, the file is read into a
1256 unibyte buffer as is (except for EOL format), thus each byte of a file
1257 is treated as a character."
1258 :coding-type 'raw-text
1259 :for-unibyte t
1260 :mnemonic ?t)
1262 (define-coding-system 'no-conversion-multibyte
1263 "Like `no-conversion' but don't read a file into a unibyte buffer."
1264 :coding-type 'raw-text
1265 :eol-type 'unix
1266 :mnemonic ?=)
1268 (define-coding-system 'iso-latin-1
1269 "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1270 :coding-type 'charset
1271 :mnemonic ?1
1272 :charset-list '(iso-8859-1)
1273 :mime-charset 'iso-8859-1)
1275 (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1276 (define-coding-system-alias 'latin-1 'iso-latin-1)
1278 ;; Coding systems not specific to each language environment.
1280 (define-coding-system 'emacs-mule
1281 "Emacs 21 internal format used in buffer and string."
1282 :coding-type 'emacs-mule
1283 :charset-list 'emacs-mule
1284 :mnemonic ?M)
1286 (define-coding-system 'utf-8
1287 "UTF-8 (no signature (BOM))"
1288 :coding-type 'utf-8
1289 :mnemonic ?U
1290 :charset-list '(unicode)
1291 :mime-charset 'utf-8)
1293 (define-coding-system 'utf-8-with-signature
1294 "UTF-8 (with signature (BOM))"
1295 :coding-type 'utf-8
1296 :mnemonic ?U
1297 :charset-list '(unicode)
1298 :bom t)
1300 (define-coding-system 'utf-8-auto
1301 "UTF-8 (auto-detect signature (BOM))"
1302 :coding-type 'utf-8
1303 :mnemonic ?U
1304 :charset-list '(unicode)
1305 :bom '(utf-8-with-signature . utf-8))
1307 (define-coding-system-alias 'mule-utf-8 'utf-8)
1309 (define-coding-system 'utf-8-emacs
1310 "Support for all Emacs characters (including non-Unicode characters)."
1311 :coding-type 'utf-8
1312 :mnemonic ?U
1313 :charset-list '(emacs))
1315 ;; The encoding used internally. This encoding is meant to be able to save
1316 ;; any multibyte buffer without losing information. It can change between
1317 ;; Emacs releases, tho, so should only be used for internal files.
1318 (define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix)
1320 (define-coding-system 'utf-16le
1321 "UTF-16LE (little endian, no signature (BOM))."
1322 :coding-type 'utf-16
1323 :mnemonic ?U
1324 :charset-list '(unicode)
1325 :endian 'little
1326 :mime-text-unsuitable t
1327 :mime-charset 'utf-16le)
1329 (define-coding-system 'utf-16be
1330 "UTF-16BE (big endian, no signature (BOM))."
1331 :coding-type 'utf-16
1332 :mnemonic ?U
1333 :charset-list '(unicode)
1334 :endian 'big
1335 :mime-text-unsuitable t
1336 :mime-charset 'utf-16be)
1338 (define-coding-system 'utf-16le-with-signature
1339 "UTF-16 (little endian, with signature (BOM))."
1340 :coding-type 'utf-16
1341 :mnemonic ?U
1342 :charset-list '(unicode)
1343 :bom t
1344 :endian 'little
1345 :mime-text-unsuitable t
1346 :mime-charset 'utf-16)
1348 (define-coding-system 'utf-16be-with-signature
1349 "UTF-16 (big endian, with signature (BOM))."
1350 :coding-type 'utf-16
1351 :mnemonic ?U
1352 :charset-list '(unicode)
1353 :bom t
1354 :endian 'big
1355 :mime-text-unsuitable t
1356 :mime-charset 'utf-16)
1358 (define-coding-system 'utf-16
1359 "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)."
1360 :coding-type 'utf-16
1361 :mnemonic ?U
1362 :charset-list '(unicode)
1363 :bom '(utf-16le-with-signature . utf-16be-with-signature)
1364 :endian 'big
1365 :mime-text-unsuitable t
1366 :mime-charset 'utf-16)
1368 ;; Backwards compatibility (old names, also used by Mule-UCS). We
1369 ;; prefer the MIME names.
1370 (define-coding-system-alias 'utf-16-le 'utf-16le-with-signature)
1371 (define-coding-system-alias 'utf-16-be 'utf-16be-with-signature)
1374 (define-coding-system 'iso-2022-7bit
1375 "ISO 2022 based 7-bit encoding using only G0."
1376 :coding-type 'iso-2022
1377 :mnemonic ?J
1378 :charset-list 'iso-2022
1379 :designation [(ascii t) nil nil nil]
1380 :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1382 (define-coding-system 'iso-2022-7bit-ss2
1383 "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
1384 :coding-type 'iso-2022
1385 :mnemonic ?$
1386 :charset-list 'iso-2022
1387 :designation [(ascii 94) nil (nil 96) nil]
1388 :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1389 designation single-shift composition))
1391 (define-coding-system 'iso-2022-7bit-lock
1392 "ISO-2022 coding system using Locking-Shift for 96-charset."
1393 :coding-type 'iso-2022
1394 :mnemonic ?&
1395 :charset-list 'iso-2022
1396 :designation [(ascii 94) (nil 96) nil nil]
1397 :flags '(ascii-at-eol ascii-at-cntl 7-bit
1398 designation locking-shift composition))
1400 (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
1402 (define-coding-system 'iso-2022-7bit-lock-ss2
1403 "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
1404 :coding-type 'iso-2022
1405 :mnemonic ?i
1406 :charset-list '(ascii
1407 japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1408 korean-ksc5601
1409 chinese-gb2312
1410 chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1411 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1412 chinese-cns11643-7)
1413 :designation [(ascii 94)
1414 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1415 (nil chinese-cns11643-2)
1416 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1417 chinese-cns11643-6 chinese-cns11643-7)]
1418 :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1419 single-shift init-bol))
1421 (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
1423 (define-coding-system 'iso-2022-8bit-ss2
1424 "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
1425 :coding-type 'iso-2022
1426 :mnemonic ?@
1427 :charset-list 'iso-2022
1428 :designation [(ascii 94) nil (nil 96) nil]
1429 :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
1431 (define-coding-system 'compound-text
1432 "Compound text based generic encoding.
1433 This coding system is an extension of X's \"Compound Text Encoding\".
1434 It encodes many characters using the normal ISO-2022 designation sequences,
1435 but it doesn't support extended segments of CTEXT."
1436 :coding-type 'iso-2022
1437 :mnemonic ?x
1438 :charset-list 'iso-2022
1439 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1440 :flags '(ascii-at-eol ascii-at-cntl long-form
1441 designation locking-shift single-shift composition)
1442 ;; Fixme: this isn't a valid MIME charset and has to be
1443 ;; special-cased elsewhere -- fx
1444 :mime-charset 'x-ctext)
1446 (define-coding-system-alias 'x-ctext 'compound-text)
1447 (define-coding-system-alias 'ctext 'compound-text)
1449 ;; Same as compound-text, but doesn't produce composition escape
1450 ;; sequences. Used in post-read and pre-write conversions of
1451 ;; compound-text-with-extensions, see mule.el. Note that this should
1452 ;; not have a mime-charset property, to prevent it from showing up
1453 ;; close to the beginning of coding systems ordered by priority.
1454 (define-coding-system 'ctext-no-compositions
1455 "Compound text based generic encoding.
1457 Like `compound-text', but does not produce escape sequences for compositions."
1458 :coding-type 'iso-2022
1459 :mnemonic ?x
1460 :charset-list 'iso-2022
1461 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1462 :flags '(ascii-at-eol ascii-at-cntl
1463 designation locking-shift single-shift))
1465 (define-coding-system 'compound-text-with-extensions
1466 "Compound text encoding with ICCCM Extended Segment extensions.
1468 See the variables `ctext-standard-encodings' and
1469 `ctext-non-standard-encodings-alist' for the detail about how
1470 extended segments are handled.
1472 This coding system should be used only for X selections. It is inappropriate
1473 for decoding and encoding files, process I/O, etc."
1474 :coding-type 'iso-2022
1475 :mnemonic ?x
1476 :charset-list 'iso-2022
1477 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1478 :flags '(ascii-at-eol ascii-at-cntl long-form
1479 designation locking-shift single-shift)
1480 :post-read-conversion 'ctext-post-read-conversion
1481 :pre-write-conversion 'ctext-pre-write-conversion
1482 :mime-charset 'x-ctext)
1484 (define-coding-system-alias
1485 'x-ctext-with-extensions 'compound-text-with-extensions)
1486 (define-coding-system-alias
1487 'ctext-with-extensions 'compound-text-with-extensions)
1489 (define-coding-system 'us-ascii
1490 "Encode ASCII as-is and encode non-ASCII characters to `?'."
1491 :coding-type 'charset
1492 :mnemonic ?-
1493 :charset-list '(ascii)
1494 :default-char ??
1495 :mime-charset 'us-ascii)
1497 (define-coding-system-alias 'iso-safe 'us-ascii)
1499 (define-coding-system 'utf-7
1500 "UTF-7 encoding of Unicode (RFC 2152)."
1501 :coding-type 'utf-8
1502 :mnemonic ?U
1503 :mime-charset 'utf-7
1504 :charset-list '(unicode)
1505 :pre-write-conversion 'utf-7-pre-write-conversion
1506 :post-read-conversion 'utf-7-post-read-conversion)
1508 (define-coding-system 'utf-7-imap
1509 "UTF-7 encoding of Unicode, IMAP version (RFC 2060)"
1510 :coding-type 'utf-8
1511 :mnemonic ?u
1512 :charset-list '(unicode)
1513 :pre-write-conversion 'utf-7-imap-pre-write-conversion
1514 :post-read-conversion 'utf-7-imap-post-read-conversion)
1516 ;; Use us-ascii for terminal output if some other coding system is not
1517 ;; specified explicitly.
1518 (set-safe-terminal-coding-system-internal 'us-ascii)
1520 ;; The other coding-systems are defined in each language specific
1521 ;; files under lisp/language.
1523 ;; Normally, set coding system to `undecided' before reading a file.
1524 ;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1525 ;; but we regard them as containing multibyte characters.
1526 ;; Tar files are not decoded at all, but we treat them as raw bytes.
1528 (setq file-coding-system-alist
1529 (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
1530 '(("\\.elc\\'" . utf-8-emacs)
1531 ("\\.el\\'" . prefer-utf-8)
1532 ("\\.utf\\(-8\\)?\\'" . utf-8)
1533 ("\\.xml\\'" . xml-find-file-coding-system)
1534 ;; We use raw-text for reading loaddefs.el so that if it
1535 ;; happens to have DOS or Mac EOLs, they are converted to
1536 ;; newlines. This is required to make the special treatment
1537 ;; of the "\ newline" combination in loaddefs.el, which marks
1538 ;; the beginning of a doc string, work.
1539 ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
1540 ("\\.tar\\'" . (no-conversion . no-conversion))
1541 ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
1542 ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)
1543 ("" . (undecided . nil)))))
1546 ;;; Setting coding categories and their priorities.
1548 ;; This setting is just to read an Emacs Lisp source files which
1549 ;; contain multilingual text while dumping Emacs. More appropriate
1550 ;; values are set by the command `set-language-environment' for each
1551 ;; language environment.
1553 (set-coding-system-priority
1554 'iso-latin-1
1555 'utf-8
1556 'iso-2022-7bit
1560 ;;; Miscellaneous settings.
1562 ;; Make all multibyte characters self-insert.
1563 (set-char-table-range (nth 1 global-map)
1564 (cons 128 (max-char))
1565 'self-insert-command)
1567 (aset latin-extra-code-table ?\221 t)
1568 (aset latin-extra-code-table ?\222 t)
1569 (aset latin-extra-code-table ?\223 t)
1570 (aset latin-extra-code-table ?\224 t)
1571 (aset latin-extra-code-table ?\225 t)
1572 (aset latin-extra-code-table ?\226 t)
1574 ;; The old code-pages library is obsoleted by coding systems based on
1575 ;; the charsets defined in this file but might be required by user
1576 ;; code.
1577 (provide 'code-pages)
1579 ;;; mule-conf.el ends here