An electric test is now passing
[emacs.git] / lisp / international / mule-conf.el
blob69a505d3066df394c18ffc164a4d5a2f801ad247
1 ;;; mule-conf.el --- configure multilingual environment
3 ;; Copyright (C) 1997-2019 Free Software Foundation, Inc.
4 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 ;; National Institute of Advanced Industrial Science and Technology (AIST)
6 ;; Registration Number H14PRO021
7 ;; Copyright (C) 2003
8 ;; National Institute of Advanced Industrial Science and Technology (AIST)
9 ;; Registration Number H13PRO009
11 ;; Keywords: i18n, mule, multilingual, character set, coding system
13 ;; This file is part of GNU Emacs.
15 ;; GNU Emacs is free software: you can redistribute it and/or modify
16 ;; it under the terms of the GNU General Public License as published by
17 ;; the Free Software Foundation, either version 3 of the License, or
18 ;; (at your option) any later version.
20 ;; GNU Emacs is distributed in the hope that it will be useful,
21 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ;; GNU General Public License for more details.
25 ;; You should have received a copy of the GNU General Public License
26 ;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
28 ;;; Commentary:
30 ;; This file defines the Emacs charsets and some basic coding systems.
31 ;; Other coding systems are defined in the files in directory
32 ;; lisp/language.
34 ;;; Code:
36 ;;; Remarks
38 ;; The ISO-IR registry is maintained by the Information Processing
39 ;; Society of Japan/Information Technology Standards Commission of
40 ;; Japan (IPSJ/ITSCJ) at https://www.itscj.ipsj.or.jp/itscj_english/.
41 ;; Standards docs equivalent to iso-2022 and iso-8859 are at
42 ;; http://www.ecma.ch/.
44 ;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
45 ;; MS Windows, which are presumably the only charsets we really need
46 ;; to worry about on such systems:
47 ;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
48 ;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
49 ;; 1258, 874, 932, 936, 949, 950
51 ;;; Definitions of character sets.
53 ;; The charsets `ascii', `unicode' and `eight-bit' are already defined
54 ;; in charset.c as below:
56 ;; (define-charset 'ascii
57 ;; ""
58 ;; :dimension 1
59 ;; :code-space [0 127]
60 ;; :iso-final-char ?B
61 ;; :ascii-compatible-p t
62 ;; :emacs-mule-id 0
63 ;; :code-offset 0)
65 ;; (define-charset 'unicode
66 ;; ""
67 ;; :dimension 3
68 ;; :code-space [0 255 0 255 0 16]
69 ;; :ascii-compatible-p t
70 ;; :code-offset 0)
72 ;; (define-charset 'emacs
73 ;; ""
74 ;; :dimension 3
75 ;; :code-space [0 255 0 255 0 63]
76 ;; :ascii-compatible-p t
77 ;; :supplementary-p t
78 ;; :code-offset 0)
80 ;; (define-charset 'eight-bit
81 ;; ""
82 ;; :dimension 1
83 ;; :code-space [128 255]
84 ;; :code-offset #x3FFF80)
86 ;; We now set :docstring, :short-name, and :long-name properties.
88 (put-charset-property
89 'ascii :docstring "ASCII (ISO646 IRV)")
90 (put-charset-property
91 'ascii :short-name "ASCII")
92 (put-charset-property
93 'ascii :long-name "ASCII (ISO646 IRV)")
94 (put-charset-property
95 'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)")
96 (put-charset-property
97 'iso-8859-1 :short-name "Latin-1")
98 (put-charset-property
99 'iso-8859-1 :long-name "Latin-1")
100 (put-charset-property
101 'unicode :docstring "Unicode (ISO10646)")
102 (put-charset-property
103 'unicode :short-name "Unicode")
104 (put-charset-property
105 'unicode :long-name "Unicode (ISO10646)")
106 (put-charset-property
107 'emacs :docstring "Full Emacs charset (excluding eight bit chars)")
108 (put-charset-property
109 'emacs :short-name "Emacs")
110 (put-charset-property
111 'emacs :long-name "Emacs")
113 (put-charset-property 'eight-bit :docstring "Raw bytes 128-255")
114 (put-charset-property 'eight-bit :short-name "Raw bytes")
116 (define-charset-alias 'ucs 'unicode)
118 (define-charset 'latin-iso8859-1
119 "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
120 :short-name "RHP of Latin-1"
121 :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
122 :iso-final-char ?A
123 :emacs-mule-id 129
124 :code-space [32 127]
125 :code-offset 160)
127 ;; Name perhaps not ideal, but is XEmacs-compatible.
128 (define-charset 'control-1
129 "8-bit control code (0x80..0x9F)"
130 :short-name "8-bit control code"
131 :code-space [128 159]
132 :code-offset 128)
134 (define-charset 'eight-bit-control
135 "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
136 :short-name "Raw bytes 0x80..0x9F"
137 :supplementary-p t
138 :code-space [128 159]
139 :code-offset #x3FFF80) ; see character.h
141 (define-charset 'eight-bit-graphic
142 "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
143 :short-name "Raw bytes 0xA0..0xFF"
144 :supplementary-p t
145 :code-space [160 255]
146 :code-offset #x3FFFA0) ; see character.h
148 (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
149 iso-ir iso-final
150 emacs-mule-id map)
151 `(progn
152 (define-charset ,symbol
153 ,name
154 :short-name ,nickname
155 :long-name ,name
156 :ascii-compatible-p t
157 :code-space [0 255]
158 :map ,map)
159 (if ,iso-symbol
160 (define-charset ,iso-symbol
161 (if ,iso-ir
162 (format "Right-Hand Part of %s (%s): ISO-IR-%d"
163 ,name ,nickname ,iso-ir)
164 (format "Right-Hand Part of %s (%s)" ,name ,nickname))
165 :short-name (format "RHP of %s" ,name)
166 :long-name (format "RHP of %s (%s)" ,name ,nickname)
167 :iso-final-char ,iso-final
168 :emacs-mule-id ,emacs-mule-id
169 :code-space [32 127]
170 :subset (list ,symbol 160 255 -128)))))
172 (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
173 "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
175 (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
176 "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
178 (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
179 "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
181 (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
182 "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
184 (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
185 "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
187 (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
188 "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
190 (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
191 "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
193 (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
194 "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
196 (define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
197 "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
199 ;; http://www.nectec.or.th/it-standards/iso8859-11/
200 ;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
201 ;; plus nbsp
202 (define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
203 "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
205 ;; 8859-12 doesn't (yet?) exist.
207 (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
208 "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
210 (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
211 "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
213 (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
214 "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
216 (define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
217 "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
219 ;; No point in keeping it around.
220 (fmakunbound 'define-iso-single-byte-charset)
222 ;; Can this be shared with 8859-11?
223 ;; N.b. not all of these are defined in Unicode.
224 (define-charset 'thai-tis620
225 "MULE charset for TIS620.2533"
226 :short-name "TIS620.2533"
227 :iso-final-char ?T
228 :emacs-mule-id 133
229 :code-space [32 127]
230 :code-offset #x0E00)
232 (define-charset 'tis620-2533
233 "TIS620.2533, a.k.a. TIS-620. Like `thai-iso8859-11', but without NBSP."
234 :short-name "TIS620.2533"
235 :ascii-compatible-p t
236 :code-space [0 255]
237 :superset '(ascii (thai-tis620 . 128)))
239 (define-charset 'jisx0201
240 "JISX0201"
241 :short-name "JISX0201"
242 :code-space [0 #xDF]
243 :map "JISX0201")
245 (define-charset 'latin-jisx0201
246 "Roman Part of JISX0201.1976"
247 :short-name "JISX0201 Roman"
248 :long-name "Japanese Roman (JISX0201.1976)"
249 :iso-final-char ?J
250 :emacs-mule-id 138
251 :supplementary-p t
252 :code-space [33 126]
253 :subset '(jisx0201 33 126 0))
255 (define-charset 'katakana-jisx0201
256 "Katakana Part of JISX0201.1976"
257 :short-name "JISX0201 Katakana"
258 :long-name "Japanese Katakana (JISX0201.1976)"
259 :iso-final-char ?I
260 :emacs-mule-id 137
261 :supplementary-p t
262 :code-space [33 126]
263 :subset '(jisx0201 161 254 -128))
265 (define-charset 'chinese-gb2312
266 "GB2312 Chinese simplified: ISO-IR-58"
267 :short-name "GB2312"
268 :long-name "GB2312: ISO-IR-58"
269 :iso-final-char ?A
270 :emacs-mule-id 145
271 :code-space [33 126 33 126]
272 :code-offset #x110000
273 :unify-map "GB2312")
275 (define-charset 'chinese-gbk
276 "GBK Chinese simplified."
277 :short-name "GBK"
278 :code-space [#x40 #xFE #x81 #xFE]
279 :code-offset #x160000
280 :unify-map "GBK")
281 (define-charset-alias 'cp936 'chinese-gbk)
282 (define-charset-alias 'windows-936 'chinese-gbk)
284 (define-charset 'chinese-cns11643-1
285 "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
286 :short-name "CNS11643-1"
287 :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
288 :iso-final-char ?G
289 :emacs-mule-id 149
290 :code-space [33 126 33 126]
291 :code-offset #x114000
292 :unify-map "CNS-1")
294 (define-charset 'chinese-cns11643-2
295 "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
296 :short-name "CNS11643-2"
297 :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
298 :iso-final-char ?H
299 :emacs-mule-id 150
300 :code-space [33 126 33 126]
301 :code-offset #x118000
302 :unify-map "CNS-2")
304 (define-charset 'chinese-cns11643-3
305 "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
306 :short-name "CNS11643-3"
307 :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
308 :iso-final-char ?I
309 :code-space [33 126 33 126]
310 :emacs-mule-id 246
311 :code-offset #x11C000
312 :unify-map "CNS-3")
314 (define-charset 'chinese-cns11643-4
315 "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
316 :short-name "CNS11643-4"
317 :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
318 :iso-final-char ?J
319 :emacs-mule-id 247
320 :code-space [33 126 33 126]
321 :code-offset #x120000
322 :unify-map "CNS-4")
324 (define-charset 'chinese-cns11643-5
325 "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
326 :short-name "CNS11643-5"
327 :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
328 :iso-final-char ?K
329 :emacs-mule-id 248
330 :code-space [33 126 33 126]
331 :code-offset #x124000
332 :unify-map "CNS-5")
334 (define-charset 'chinese-cns11643-6
335 "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
336 :short-name "CNS11643-6"
337 :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
338 :iso-final-char ?L
339 :emacs-mule-id 249
340 :code-space [33 126 33 126]
341 :code-offset #x128000
342 :unify-map "CNS-6")
344 (define-charset 'chinese-cns11643-7
345 "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
346 :short-name "CNS11643-7"
347 :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
348 :iso-final-char ?M
349 :emacs-mule-id 250
350 :code-space [33 126 33 126]
351 :code-offset #x12C000
352 :unify-map "CNS-7")
354 (define-charset 'big5
355 "Big5 (Chinese traditional)"
356 :short-name "Big5"
357 :code-space [#x40 #xFE #xA1 #xFE]
358 :code-offset #x130000
359 :unify-map "BIG5")
360 ;; Fixme: AKA cp950 according to
361 ;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>. Is
362 ;; that correct?
364 (define-charset 'chinese-big5-1
365 "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
366 :short-name "Big5 (Level-1)"
367 :long-name "Big5 (Level-1) A141-C67F"
368 :iso-final-char ?0
369 :emacs-mule-id 152
370 :supplementary-p t
371 :code-space [#x21 #x7E #x21 #x7E]
372 :code-offset #x135000
373 :unify-map "BIG5-1")
375 (define-charset 'chinese-big5-2
376 "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
377 :short-name "Big5 (Level-2)"
378 :long-name "Big5 (Level-2) C940-FEFE"
379 :iso-final-char ?1
380 :emacs-mule-id 153
381 :supplementary-p t
382 :code-space [#x21 #x7E #x21 #x7E]
383 :code-offset #x137800
384 :unify-map "BIG5-2")
386 (define-charset 'japanese-jisx0208
387 "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
388 :short-name "JISX0208"
389 :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
390 :iso-final-char ?B
391 :emacs-mule-id 146
392 :code-space [33 126 33 126]
393 :code-offset #x140000
394 :unify-map "JISX0208")
396 (define-charset 'japanese-jisx0208-1978
397 "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
398 :short-name "JISX0208.1978"
399 :long-name "JISX0208.1978 (JISC6226.1978): ISO-IR-42"
400 :iso-final-char ?@
401 :emacs-mule-id 144
402 :code-space [33 126 33 126]
403 :code-offset #x144000
404 :unify-map "JISC6226")
406 (define-charset 'japanese-jisx0212
407 "JISX0212 Japanese supplement: ISO-IR-159"
408 :short-name "JISX0212"
409 :long-name "JISX0212 (Japanese): ISO-IR-159"
410 :iso-final-char ?D
411 :emacs-mule-id 148
412 :code-space [33 126 33 126]
413 :code-offset #x148000
414 :unify-map "JISX0212")
416 ;; Note that jisx0213 contains characters not in Unicode (3.2?). It's
417 ;; arguable whether it should have a unify-map.
418 (define-charset 'japanese-jisx0213-1
419 "JISX0213.2000 Plane 1 (Japanese)"
420 :short-name "JISX0213-1"
421 :iso-final-char ?O
422 :emacs-mule-id 151
423 :unify-map "JISX2131"
424 :code-space [33 126 33 126]
425 :code-offset #x14C000)
427 (define-charset 'japanese-jisx0213-2
428 "JISX0213.2000 Plane 2 (Japanese)"
429 :short-name "JISX0213-2"
430 :iso-final-char ?P
431 :emacs-mule-id 254
432 :unify-map "JISX2132"
433 :code-space [33 126 33 126]
434 :code-offset #x150000)
436 (define-charset 'japanese-jisx0213-a
437 "JISX0213.2004 adds these characters to JISX0213.2000."
438 :short-name "JISX0213A"
439 :dimension 2
440 :code-space [33 126 33 126]
441 :supplementary-p t
442 :map "JISX213A")
444 (define-charset 'japanese-jisx0213.2004-1
445 "JISX0213.2004 Plane1 (Japanese)"
446 :short-name "JISX0213.2004-1"
447 :dimension 2
448 :code-space [33 126 33 126]
449 :iso-final-char ?Q
450 :superset '(japanese-jisx0213-a japanese-jisx0213-1))
452 (define-charset 'katakana-sjis
453 "Katakana part of Shift-JIS"
454 :dimension 1
455 :code-space [#xA1 #xDF]
456 :subset '(jisx0201 #xA1 #xDF 0)
457 :supplementary-p t)
459 (define-charset 'cp932-2-byte
460 "2-byte part of CP932"
461 :dimension 2
462 :map "CP932-2BYTE"
463 :code-space [#x40 #xFC #x81 #xFC]
464 :supplementary-p t)
466 (define-charset 'cp932
467 "CP932 (Microsoft shift-jis)"
468 :code-space [#x00 #xFF #x00 #xFE]
469 :short-name "CP932"
470 :superset '(ascii katakana-sjis cp932-2-byte))
472 (define-charset 'korean-ksc5601
473 "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
474 :short-name "KSC5601"
475 :long-name "KSC5601 (Korean): ISO-IR-149"
476 :iso-final-char ?C
477 :emacs-mule-id 147
478 :code-space [33 126 33 126]
479 :code-offset #x279f94 ; ... #x27c217
480 :unify-map "KSC5601")
482 (define-charset 'big5-hkscs
483 "Big5-HKSCS (Chinese traditional, Hong Kong supplement)"
484 :short-name "Big5"
485 :code-space [#x40 #xFE #xA1 #xFE]
486 :code-offset #x27c218 ; ... #x280839
487 :unify-map "BIG5-HKSCS")
489 (define-charset 'cp949-2-byte
490 "2-byte part of CP949"
491 :dimension 2
492 :map "CP949-2BYTE"
493 :code-space [#x41 #xFE #x81 #xFD]
494 :supplementary-p t)
496 (define-charset 'cp949
497 "CP949 (Korean)"
498 :short-name "CP949"
499 :long-name "CP949 (Korean)"
500 :code-space [#x00 #xFE #x00 #xFD]
501 :superset '(ascii cp949-2-byte))
503 (define-charset 'chinese-sisheng
504 "SiSheng characters for PinYin/ZhuYin"
505 :short-name "SiSheng"
506 :long-name "SiSheng (PinYin/ZhuYin)"
507 :iso-final-char ?0
508 :emacs-mule-id 160
509 :code-space [33 126]
510 :unify-map "MULE-sisheng"
511 :supplementary-p t
512 :code-offset #x200000)
514 ;; A subset of the 1989 version of IPA. It consists of the consonant
515 ;; signs used in English, French, German and Italian, and all vowels
516 ;; signs in the table. [says old MULE doc]
517 (define-charset 'ipa
518 "IPA (International Phonetic Association)"
519 :short-name "IPA"
520 :iso-final-char ?0
521 :emacs-mule-id 161
522 :unify-map "MULE-ipa"
523 :code-space [32 127]
524 :supplementary-p t
525 :code-offset #x200080)
527 (define-charset 'viscii
528 "VISCII1.1"
529 :short-name "VISCII"
530 :long-name "VISCII 1.1"
531 :code-space [0 255]
532 :map "VISCII")
534 (define-charset 'vietnamese-viscii-lower
535 "VISCII1.1 lower-case"
536 :short-name "VISCII lower"
537 :long-name "VISCII lower-case"
538 :iso-final-char ?1
539 :emacs-mule-id 162
540 :code-space [32 127]
541 :code-offset #x200200
542 :supplementary-p t
543 :unify-map "MULE-lviscii")
545 (define-charset 'vietnamese-viscii-upper
546 "VISCII1.1 upper-case"
547 :short-name "VISCII upper"
548 :long-name "VISCII upper-case"
549 :iso-final-char ?2
550 :emacs-mule-id 163
551 :code-space [32 127]
552 :code-offset #x200280
553 :supplementary-p t
554 :unify-map "MULE-uviscii")
556 (define-charset 'vscii
557 "VSCII1.1 (TCVN-5712 VN1)"
558 :short-name "VSCII"
559 :code-space [0 255]
560 :map "VSCII")
562 (define-charset-alias 'tcvn-5712 'vscii)
564 ;; Fixme: see note in tcvn.map about combining characters
565 (define-charset 'vscii-2
566 "VSCII-2 (TCVN-5712 VN2)"
567 :code-space [0 255]
568 :map "VSCII-2")
570 (define-charset 'koi8-r
571 "KOI8-R"
572 :short-name "KOI8-R"
573 :ascii-compatible-p t
574 :code-space [0 255]
575 :map "KOI8-R")
577 (define-charset-alias 'koi8 'koi8-r)
579 (define-charset 'alternativnyj
580 "ALTERNATIVNYJ"
581 :short-name "alternativnyj"
582 :ascii-compatible-p t
583 :code-space [0 255]
584 :map "ALTERNATIVNYJ")
586 (define-charset 'cp866
587 "CP866"
588 :short-name "cp866"
589 :ascii-compatible-p t
590 :code-space [0 255]
591 :map "IBM866")
592 (define-charset-alias 'ibm866 'cp866)
594 (define-charset 'koi8-u
595 "KOI8-U"
596 :short-name "KOI8-U"
597 :ascii-compatible-p t
598 :code-space [0 255]
599 :map "KOI8-U")
601 (define-charset 'koi8-t
602 "KOI8-T"
603 :short-name "KOI8-T"
604 :ascii-compatible-p t
605 :code-space [0 255]
606 :map "KOI8-T")
608 (define-charset 'georgian-ps
609 "GEORGIAN-PS"
610 :short-name "GEORGIAN-PS"
611 :ascii-compatible-p t
612 :code-space [0 255]
613 :map "KA-PS")
615 (define-charset 'georgian-academy
616 "GEORGIAN-ACADEMY"
617 :short-name "GEORGIAN-ACADEMY"
618 :ascii-compatible-p t
619 :code-space [0 255]
620 :map "KA-ACADEMY")
622 (define-charset 'windows-1250
623 "WINDOWS-1250 (Central Europe)"
624 :short-name "WINDOWS-1250"
625 :ascii-compatible-p t
626 :code-space [0 255]
627 :map "CP1250")
628 (define-charset-alias 'cp1250 'windows-1250)
630 (define-charset 'windows-1251
631 "WINDOWS-1251 (Cyrillic)"
632 :short-name "WINDOWS-1251"
633 :ascii-compatible-p t
634 :code-space [0 255]
635 :map "CP1251")
636 (define-charset-alias 'cp1251 'windows-1251)
638 (define-charset 'windows-1252
639 "WINDOWS-1252 (Latin I)"
640 :short-name "WINDOWS-1252"
641 :ascii-compatible-p t
642 :code-space [0 255]
643 :map "CP1252")
644 (define-charset-alias 'cp1252 'windows-1252)
646 (define-charset 'windows-1253
647 "WINDOWS-1253 (Greek)"
648 :short-name "WINDOWS-1253"
649 :ascii-compatible-p t
650 :code-space [0 255]
651 :map "CP1253")
652 (define-charset-alias 'cp1253 'windows-1253)
654 (define-charset 'windows-1254
655 "WINDOWS-1254 (Turkish)"
656 :short-name "WINDOWS-1254"
657 :ascii-compatible-p t
658 :code-space [0 255]
659 :map "CP1254")
660 (define-charset-alias 'cp1254 'windows-1254)
662 (define-charset 'windows-1255
663 "WINDOWS-1255 (Hebrew)"
664 :short-name "WINDOWS-1255"
665 :ascii-compatible-p t
666 :code-space [0 255]
667 :map "CP1255")
668 (define-charset-alias 'cp1255 'windows-1255)
670 (define-charset 'windows-1256
671 "WINDOWS-1256 (Arabic)"
672 :short-name "WINDOWS-1256"
673 :ascii-compatible-p t
674 :code-space [0 255]
675 :map "CP1256")
676 (define-charset-alias 'cp1256 'windows-1256)
678 (define-charset 'windows-1257
679 "WINDOWS-1257 (Baltic)"
680 :short-name "WINDOWS-1257"
681 :ascii-compatible-p t
682 :code-space [0 255]
683 :map "CP1257")
684 (define-charset-alias 'cp1257 'windows-1257)
686 (define-charset 'windows-1258
687 "WINDOWS-1258 (Viet Nam)"
688 :short-name "WINDOWS-1258"
689 :ascii-compatible-p t
690 :code-space [0 255]
691 :map "CP1258")
692 (define-charset-alias 'cp1258 'windows-1258)
694 (define-charset 'next
695 "NEXT"
696 :short-name "NEXT"
697 :ascii-compatible-p t
698 :code-space [0 255]
699 :map "NEXTSTEP")
701 (define-charset 'cp1125
702 "CP1125"
703 :short-name "CP1125"
704 :code-space [0 255]
705 :ascii-compatible-p t
706 :map "CP1125")
707 (define-charset-alias 'ruscii 'cp1125)
708 ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
709 (define-charset-alias 'cp866u 'cp1125)
711 ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
712 ;; shows this as not ASCII compatible, with various graphics in
713 ;; 0x01-0x1F.
714 (define-charset 'cp437
715 "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
716 :short-name "CP437"
717 :code-space [0 255]
718 :ascii-compatible-p t
719 :map "IBM437")
721 (define-charset 'cp720
722 "CP720 (Arabic)"
723 :short-name "CP720"
724 :code-space [0 255]
725 :ascii-compatible-p t
726 :map "CP720")
728 (define-charset 'cp737
729 "CP737 (PC Greek)"
730 :short-name "CP737"
731 :code-space [0 255]
732 :ascii-compatible-p t
733 :map "CP737")
735 (define-charset 'cp775
736 "CP775 (PC Baltic)"
737 :short-name "CP775"
738 :code-space [0 255]
739 :ascii-compatible-p t
740 :map "CP775")
742 (define-charset 'cp851
743 "CP851 (Greek)"
744 :short-name "CP851"
745 :code-space [0 255]
746 :ascii-compatible-p t
747 :map "IBM851")
749 (define-charset 'cp852
750 "CP852 (MS-DOS Latin-2)"
751 :short-name "CP852"
752 :code-space [0 255]
753 :ascii-compatible-p t
754 :map "IBM852")
756 (define-charset 'cp855
757 "CP855 (IBM Cyrillic)"
758 :short-name "CP855"
759 :code-space [0 255]
760 :ascii-compatible-p t
761 :map "IBM855")
763 (define-charset 'cp857
764 "CP857 (IBM Turkish)"
765 :short-name "CP857"
766 :code-space [0 255]
767 :ascii-compatible-p t
768 :map "IBM857")
770 (define-charset 'cp858
771 "CP858 (Multilingual Latin I + Euro)"
772 :short-name "CP858"
773 :code-space [0 255]
774 :ascii-compatible-p t
775 :map "CP858")
776 (define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858
778 (define-charset 'cp860
779 "CP860 (MS-DOS Portuguese)"
780 :short-name "CP860"
781 :code-space [0 255]
782 :ascii-compatible-p t
783 :map "IBM860")
785 (define-charset 'cp861
786 "CP861 (MS-DOS Icelandic)"
787 :short-name "CP861"
788 :code-space [0 255]
789 :ascii-compatible-p t
790 :map "IBM861")
792 (define-charset 'cp862
793 "CP862 (PC Hebrew)"
794 :short-name "CP862"
795 :code-space [0 255]
796 :ascii-compatible-p t
797 :map "IBM862")
799 (define-charset 'cp863
800 "CP863 (MS-DOS Canadian French)"
801 :short-name "CP863"
802 :code-space [0 255]
803 :ascii-compatible-p t
804 :map "IBM863")
806 (define-charset 'cp864
807 "CP864 (PC Arabic)"
808 :short-name "CP864"
809 :code-space [0 255]
810 :ascii-compatible-p t
811 :map "IBM864")
813 (define-charset 'cp865
814 "CP865 (MS-DOS Nordic)"
815 :short-name "CP865"
816 :code-space [0 255]
817 :ascii-compatible-p t
818 :map "IBM865")
820 (define-charset 'cp869
821 "CP869 (IBM Modern Greek)"
822 :short-name "CP869"
823 :code-space [0 255]
824 :ascii-compatible-p t
825 :map "IBM869")
827 (define-charset 'cp874
828 "CP874 (IBM Thai)"
829 :short-name "CP874"
830 :code-space [0 255]
831 :ascii-compatible-p t
832 :map "IBM874")
834 ;; For Arabic, we need three different types of character sets.
835 ;; Digits are of direction left-to-right and of width 1-column.
836 ;; Others are of direction right-to-left and of width 1-column or
837 ;; 2-column.
838 (define-charset 'arabic-digit
839 "Arabic digit"
840 :short-name "Arabic digit"
841 :iso-final-char ?2
842 :emacs-mule-id 164
843 :supplementary-p t
844 :code-space [34 42]
845 :code-offset #x0600)
847 (define-charset 'arabic-1-column
848 "Arabic 1-column"
849 :short-name "Arabic 1-col"
850 :long-name "Arabic 1-column"
851 :iso-final-char ?3
852 :emacs-mule-id 165
853 :supplementary-p t
854 :code-space [33 126]
855 :code-offset #x200100)
857 (define-charset 'arabic-2-column
858 "Arabic 2-column"
859 :short-name "Arabic 2-col"
860 :long-name "Arabic 2-column"
861 :iso-final-char ?4
862 :emacs-mule-id 224
863 :supplementary-p t
864 :code-space [33 126]
865 :code-offset #x200180)
867 ;; Lao script.
868 ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
869 ;; Not all of them are defined in Unicode.
870 (define-charset 'lao
871 "Lao characters (ISO10646 0E81..0EDF)"
872 :short-name "Lao"
873 :iso-final-char ?1
874 :emacs-mule-id 167
875 :supplementary-p t
876 :code-space [33 126]
877 :code-offset #x0E81)
879 (define-charset 'mule-lao
880 "Lao characters (ISO10646 0E81..0EDF)"
881 :short-name "Lao"
882 :code-space [0 255]
883 :supplementary-p t
884 :superset '(ascii eight-bit-control (lao . 128)))
887 ;; Indian scripts. Symbolic charset for data exchange. Glyphs are
888 ;; not assigned. They are automatically converted to each Indian
889 ;; script which IS-13194 supports.
891 (define-charset 'indian-is13194
892 "7-bit representation of IS 13194 (ISCII) for Devanagari"
893 :short-name "IS 13194 (DEV)"
894 :long-name "Indian IS 13194 (DEV)"
895 :iso-final-char ?5
896 :emacs-mule-id 225
897 :supplementary-p t
898 :code-space [33 126]
899 :code-offset #x180000
900 :unify-map "MULE-is13194")
902 (let ((code-offset #x180100))
903 (dolist (script '(devanagari sanskrit bengali tamil telugu assamese
904 oriya kannada malayalam gujarati punjabi))
905 (define-charset (intern (format "%s-cdac" script))
906 (format
907 "Glyphs of %s script for CDAC font. Subset of `indian-glyph'."
908 (capitalize (symbol-name script)))
909 :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
910 :supplementary-p t
911 :code-space [0 255]
912 :code-offset code-offset)
913 (setq code-offset (+ code-offset #x100)))
915 (dolist (script '(devanagari bengali punjabi gujarati
916 oriya tamil telugu kannada malayalam))
917 (define-charset (intern (format "%s-akruti" script))
918 (format
919 "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'."
920 (capitalize (symbol-name script)))
921 :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
922 :supplementary-p t
923 :code-space [0 255]
924 :code-offset code-offset)
925 (setq code-offset (+ code-offset #x100))))
927 (define-charset 'indian-glyph
928 "Glyphs for Indian characters."
929 :short-name "Indian glyph"
930 :iso-final-char ?4
931 :emacs-mule-id 240
932 :supplementary-p t
933 :code-space [32 127 32 127]
934 :code-offset #x180100)
936 ;; Actual Glyph for 1-column width.
937 (define-charset 'indian-1-column
938 "Indian charset for 1-column width glyphs."
939 :short-name "Indian 1-col"
940 :long-name "Indian 1 Column"
941 :iso-final-char ?6
942 :emacs-mule-id 251
943 :supplementary-p t
944 :code-space [33 126 33 126]
945 :code-offset #x184000)
947 ;; Actual Glyph for 2-column width.
948 (define-charset 'indian-2-column
949 "Indian charset for 2-column width glyphs."
950 :short-name "Indian 2-col"
951 :long-name "Indian 2 Column"
952 :iso-final-char ?5
953 :emacs-mule-id 251
954 :supplementary-p t
955 :code-space [33 126 33 126]
956 :code-offset #x184000)
958 (define-charset 'tibetan
959 "Tibetan characters"
960 :iso-final-char ?7
961 :short-name "Tibetan 2-col"
962 :long-name "Tibetan 2 column"
963 :iso-final-char ?7
964 :emacs-mule-id 252
965 :unify-map "MULE-tibetan"
966 :supplementary-p t
967 :code-space [33 126 33 37]
968 :code-offset #x190000)
970 (define-charset 'tibetan-1-column
971 "Tibetan 1 column glyph"
972 :short-name "Tibetan 1-col"
973 :long-name "Tibetan 1 column"
974 :iso-final-char ?8
975 :emacs-mule-id 241
976 :supplementary-p t
977 :code-space [33 126 33 37]
978 :code-offset #x190000)
980 ;; Subsets of Unicode.
981 (define-charset 'mule-unicode-2500-33ff
982 "Unicode characters of the range U+2500..U+33FF."
983 :short-name "Unicode subset 2"
984 :long-name "Unicode subset (U+2500..U+33FF)"
985 :iso-final-char ?2
986 :emacs-mule-id 242
987 :supplementary-p t
988 :code-space [#x20 #x7f #x20 #x47]
989 :code-offset #x2500)
991 (define-charset 'mule-unicode-e000-ffff
992 "Unicode characters of the range U+E000..U+FFFF."
993 :short-name "Unicode subset 3"
994 :long-name "Unicode subset (U+E000+FFFF)"
995 :iso-final-char ?3
996 :emacs-mule-id 243
997 :supplementary-p t
998 :code-space [#x20 #x7F #x20 #x75]
999 :code-offset #xE000
1000 :max-code 30015) ; U+FFFF
1002 (define-charset 'mule-unicode-0100-24ff
1003 "Unicode characters of the range U+0100..U+24FF."
1004 :short-name "Unicode subset"
1005 :long-name "Unicode subset (U+0100..U+24FF)"
1006 :iso-final-char ?1
1007 :emacs-mule-id 244
1008 :supplementary-p t
1009 :code-space [#x20 #x7F #x20 #x7F]
1010 :code-offset #x100)
1012 (define-charset 'unicode-bmp
1013 "Unicode Basic Multilingual Plane (U+0000..U+FFFF)"
1014 :short-name "Unicode BMP"
1015 :code-space [0 255 0 255]
1016 :code-offset 0)
1018 (define-charset 'unicode-smp
1019 "Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)"
1020 :short-name "Unicode SMP "
1021 :code-space [0 255 0 255]
1022 :code-offset #x10000)
1024 (define-charset 'unicode-sip
1025 "Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)"
1026 :short-name "Unicode SIP"
1027 :code-space [0 255 0 255]
1028 :code-offset #x20000)
1030 (define-charset 'unicode-ssp
1031 "Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)"
1032 :short-name "Unicode SSP"
1033 :code-space [0 255 0 255]
1034 :code-offset #xE0000)
1036 (define-charset 'ethiopic
1037 "Ethiopic characters for Amharic and Tigrigna."
1038 :short-name "Ethiopic"
1039 :long-name "Ethiopic characters"
1040 :iso-final-char ?3
1041 :emacs-mule-id 245
1042 :supplementary-p t
1043 :unify-map "MULE-ethiopic"
1044 :code-space [33 126 33 126]
1045 :code-offset #x1A0000)
1047 (define-charset 'mac-roman
1048 "Mac Roman charset"
1049 :short-name "Mac Roman"
1050 :ascii-compatible-p t
1051 :code-space [0 255]
1052 :map "MACINTOSH")
1054 ;; Fixme: modern EBCDIC variants, e.g. IBM00924?
1055 (define-charset 'ebcdic-us
1056 "US version of EBCDIC"
1057 :short-name "EBCDIC-US"
1058 :code-space [0 255]
1059 :mime-charset 'ebcdic-us
1060 :map "EBCDICUS")
1062 (define-charset 'ebcdic-uk
1063 "UK version of EBCDIC"
1064 :short-name "EBCDIC-UK"
1065 :code-space [0 255]
1066 :mime-charset 'ebcdic-uk
1067 :map "EBCDICUK")
1069 (define-charset 'ibm038
1070 "International version of EBCDIC"
1071 :short-name "IBM038"
1072 :code-space [0 255]
1073 :mime-charset 'ibm038
1074 :map "IBM038")
1075 (define-charset-alias 'ebcdic-int 'ibm038)
1076 (define-charset-alias 'cp038 'ibm038)
1078 (define-charset 'ibm1047
1079 ;; Says groff:
1080 "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix."
1081 :short-name "IBM1047"
1082 :code-space [0 255]
1083 :mime-charset 'ibm1047
1084 :map "IBM1047")
1085 (define-charset-alias 'cp1047 'ibm1047)
1087 (define-charset 'hp-roman8
1088 "Encoding used by Hewlet-Packard printer software"
1089 :short-name "HP-ROMAN8"
1090 :ascii-compatible-p t
1091 :code-space [0 255]
1092 :map "HP-ROMAN8")
1094 ;; To make a coding system with this, a pre-write-conversion should
1095 ;; account for the commented-out multi-valued code points in
1096 ;; stdenc.map.
1097 (define-charset 'adobe-standard-encoding
1098 "Adobe `standard encoding' used in PostScript"
1099 :short-name "ADOBE-STANDARD-ENCODING"
1100 :code-space [#x20 255]
1101 :map "stdenc")
1103 (define-charset 'symbol
1104 "Adobe symbol encoding used in PostScript"
1105 :short-name "ADOBE-SYMBOL"
1106 :code-space [#x20 255]
1107 :map "symbol")
1109 (define-charset 'ibm850
1110 "DOS codepage 850 (Latin-1)"
1111 :short-name "IBM850"
1112 :ascii-compatible-p t
1113 :code-space [0 255]
1114 :map "IBM850")
1115 (define-charset-alias 'cp850 'ibm850)
1117 (define-charset 'mik
1118 "Bulgarian DOS codepage"
1119 :short-name "MIK"
1120 :ascii-compatible-p t
1121 :code-space [0 255]
1122 :map "MIK")
1124 (define-charset 'ptcp154
1125 "ParaType codepage (Asian Cyrillic)"
1126 :short-name "PT154"
1127 :ascii-compatible-p t
1128 :code-space [0 255]
1129 :mime-charset 'pt154
1130 :map "PTCP154")
1131 (define-charset-alias 'pt154 'ptcp154)
1132 (define-charset-alias 'cp154 'ptcp154)
1134 (define-charset 'gb18030-2-byte
1135 "GB18030 2-byte (0x814E..0xFEFE)"
1136 :code-space [#x40 #xFE #x81 #xFE]
1137 :supplementary-p t
1138 :map "GB180302")
1140 (define-charset 'gb18030-4-byte-bmp
1141 "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
1142 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
1143 :supplementary-p t
1144 :map "GB180304")
1146 (define-charset 'gb18030-4-byte-smp
1147 "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
1148 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
1149 :min-code '(#x9030 . #x8130)
1150 :max-code '(#xE332 . #x9A35)
1151 :supplementary-p t
1152 :code-offset #x10000)
1154 (define-charset 'gb18030-4-byte-ext-1
1155 "GB18030 4-byte (0x8431A530-0x8F39FE39)"
1156 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
1157 :min-code '(#x8431 . #xA530)
1158 :max-code '(#x8F39 . #xFE39)
1159 :supplementary-p t
1160 :code-offset #x200000 ; ... #x22484B
1163 (define-charset 'gb18030-4-byte-ext-2
1164 "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
1165 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
1166 :min-code '(#xE332 . #x9A36)
1167 :max-code '(#xFE39 . #xFE39)
1168 :supplementary-p t
1169 :code-offset #x22484C ; ... #x279f93
1172 (define-charset 'gb18030
1173 "GB18030"
1174 :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
1175 :min-code 0
1176 :max-code '(#xFE39 . #xFE39)
1177 :superset '(ascii gb18030-2-byte
1178 gb18030-4-byte-bmp gb18030-4-byte-smp
1179 gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
1181 (define-charset 'chinese-cns11643-15
1182 "CNS11643 Plane 15 Chinese Traditional"
1183 :short-name "CNS11643-15"
1184 :long-name "CNS11643-15 (Chinese traditional)"
1185 :code-space [33 126 33 126]
1186 :code-offset #x27A000
1187 :unify-map "CNS-F")
1189 (unify-charset 'chinese-gb2312)
1190 (unify-charset 'chinese-gbk)
1191 (unify-charset 'chinese-cns11643-1)
1192 (unify-charset 'chinese-cns11643-2)
1193 (unify-charset 'chinese-cns11643-3)
1194 (unify-charset 'chinese-cns11643-4)
1195 (unify-charset 'chinese-cns11643-5)
1196 (unify-charset 'chinese-cns11643-6)
1197 (unify-charset 'chinese-cns11643-7)
1198 (unify-charset 'chinese-cns11643-15)
1199 (unify-charset 'big5)
1200 (unify-charset 'chinese-big5-1)
1201 (unify-charset 'chinese-big5-2)
1202 (unify-charset 'big5-hkscs)
1203 (unify-charset 'korean-ksc5601)
1204 (unify-charset 'vietnamese-viscii-lower)
1205 (unify-charset 'vietnamese-viscii-upper)
1206 (unify-charset 'chinese-sisheng)
1207 (unify-charset 'ipa)
1208 (unify-charset 'tibetan)
1209 (unify-charset 'ethiopic)
1210 (unify-charset 'indian-is13194)
1211 (unify-charset 'japanese-jisx0208-1978)
1212 (unify-charset 'japanese-jisx0208)
1213 (unify-charset 'japanese-jisx0212)
1214 (unify-charset 'japanese-jisx0213-1)
1215 (unify-charset 'japanese-jisx0213-2)
1218 ;; These are tables for translating characters on decoding and
1219 ;; encoding.
1220 ;; Fixme: these aren't used now -- should they be?
1221 (setq standard-translation-table-for-decode nil)
1223 (setq standard-translation-table-for-encode nil)
1225 ;;; Make fundamental coding systems.
1227 ;; The coding system `no-conversion' and `undecided' are already
1228 ;; defined in coding.c as below:
1230 ;; (define-coding-system 'no-conversion
1231 ;; "..."
1232 ;; :coding-type 'raw-text
1233 ;; ...)
1234 ;; (define-coding-system 'undecided
1235 ;; "..."
1236 ;; :coding-type 'undecided
1237 ;; ...)
1239 (define-coding-system-alias 'binary 'no-conversion)
1240 (define-coding-system-alias 'unix 'undecided-unix)
1241 (define-coding-system-alias 'dos 'undecided-dos)
1242 (define-coding-system-alias 'mac 'undecided-mac)
1244 (define-coding-system 'prefer-utf-8
1245 "Like `undecided' but prefer UTF-8 when appropriate.
1246 On decoding, if the source contains 8-bit codes and they all
1247 are valid UTF-8 sequences, detect the source as UTF-8 encoding
1248 regardless of the coding priority.
1249 On encoding, if the source contains non-ASCII characters, encode them
1250 by UTF-8."
1251 :coding-type 'undecided
1252 :mnemonic ?-
1253 :charset-list '(emacs)
1254 :prefer-utf-8 t)
1256 (define-coding-system 'raw-text
1257 "Raw text, which means text contains random 8-bit codes.
1258 Encoding text with this coding system produces the actual byte
1259 sequence of the text in buffers and strings. An exception is made for
1260 characters from the `eight-bit' character set. Each of them is encoded
1261 into a single byte.
1263 When you visit a file with this coding, the file is read into a
1264 unibyte buffer as is (except for EOL format), thus each byte of a file
1265 is treated as a character."
1266 :coding-type 'raw-text
1267 :for-unibyte t
1268 :mnemonic ?t)
1270 (define-coding-system 'no-conversion-multibyte
1271 "Like `no-conversion' but don't read a file into a unibyte buffer."
1272 :coding-type 'raw-text
1273 :eol-type 'unix
1274 :mnemonic ?=)
1276 (define-coding-system 'iso-latin-1
1277 "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1278 :coding-type 'charset
1279 :mnemonic ?1
1280 :charset-list '(iso-8859-1)
1281 :mime-charset 'iso-8859-1)
1283 (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1284 (define-coding-system-alias 'latin-1 'iso-latin-1)
1286 ;; Coding systems not specific to each language environment.
1288 (define-coding-system 'emacs-mule
1289 "Emacs 21 internal format used in buffer and string."
1290 :coding-type 'emacs-mule
1291 :charset-list 'emacs-mule
1292 :mnemonic ?M)
1294 (define-coding-system 'utf-8
1295 "UTF-8 (no signature (BOM))"
1296 :coding-type 'utf-8
1297 :mnemonic ?U
1298 :charset-list '(unicode)
1299 :mime-charset 'utf-8)
1301 (define-coding-system 'utf-8-with-signature
1302 "UTF-8 (with signature (BOM))"
1303 :coding-type 'utf-8
1304 :mnemonic ?U
1305 :charset-list '(unicode)
1306 :bom t)
1308 (define-coding-system 'utf-8-auto
1309 "UTF-8 (auto-detect signature (BOM))"
1310 :coding-type 'utf-8
1311 :mnemonic ?U
1312 :charset-list '(unicode)
1313 :bom '(utf-8-with-signature . utf-8))
1315 (define-coding-system-alias 'mule-utf-8 'utf-8)
1316 ;; See this page:
1317 ;; https://docs.microsoft.com/en-us/windows/desktop/intl/code-page-identifiers
1318 ;; Starting with Windows 10, people are trying to set their systems to
1319 ;; use UTF-8 , so we had better recognized this alias:
1320 (define-coding-system-alias 'cp65001 'utf-8)
1322 (define-coding-system 'utf-8-emacs
1323 "Support for all Emacs characters (including non-Unicode characters)."
1324 :coding-type 'utf-8
1325 :mnemonic ?U
1326 :charset-list '(emacs))
1328 ;; The encoding used internally. This encoding is meant to be able to save
1329 ;; any multibyte buffer without losing information. It can change between
1330 ;; Emacs releases, tho, so should only be used for internal files.
1331 (define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix)
1333 (define-coding-system 'utf-16le
1334 "UTF-16LE (little endian, no signature (BOM))."
1335 :coding-type 'utf-16
1336 :mnemonic ?U
1337 :charset-list '(unicode)
1338 :endian 'little
1339 :mime-text-unsuitable t
1340 :mime-charset 'utf-16le)
1342 (define-coding-system 'utf-16be
1343 "UTF-16BE (big endian, no signature (BOM))."
1344 :coding-type 'utf-16
1345 :mnemonic ?U
1346 :charset-list '(unicode)
1347 :endian 'big
1348 :mime-text-unsuitable t
1349 :mime-charset 'utf-16be)
1351 (define-coding-system 'utf-16le-with-signature
1352 "UTF-16 (little endian, with signature (BOM))."
1353 :coding-type 'utf-16
1354 :mnemonic ?U
1355 :charset-list '(unicode)
1356 :bom t
1357 :endian 'little
1358 :mime-text-unsuitable t
1359 :mime-charset 'utf-16)
1361 (define-coding-system 'utf-16be-with-signature
1362 "UTF-16 (big endian, with signature (BOM))."
1363 :coding-type 'utf-16
1364 :mnemonic ?U
1365 :charset-list '(unicode)
1366 :bom t
1367 :endian 'big
1368 :mime-text-unsuitable t
1369 :mime-charset 'utf-16)
1371 (define-coding-system 'utf-16
1372 "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)."
1373 :coding-type 'utf-16
1374 :mnemonic ?U
1375 :charset-list '(unicode)
1376 :bom '(utf-16le-with-signature . utf-16be-with-signature)
1377 :endian 'big
1378 :mime-text-unsuitable t
1379 :mime-charset 'utf-16)
1381 ;; Backwards compatibility (old names, also used by Mule-UCS). We
1382 ;; prefer the MIME names.
1383 (define-coding-system-alias 'utf-16-le 'utf-16le-with-signature)
1384 (define-coding-system-alias 'utf-16-be 'utf-16be-with-signature)
1387 (define-coding-system 'iso-2022-7bit
1388 "ISO 2022 based 7-bit encoding using only G0."
1389 :coding-type 'iso-2022
1390 :mnemonic ?J
1391 :charset-list 'iso-2022
1392 :designation [(ascii t) nil nil nil]
1393 :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1395 (define-coding-system 'iso-2022-7bit-ss2
1396 "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
1397 :coding-type 'iso-2022
1398 :mnemonic ?$
1399 :charset-list 'iso-2022
1400 :designation [(ascii 94) nil (nil 96) nil]
1401 :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1402 designation single-shift composition))
1404 (define-coding-system 'iso-2022-7bit-lock
1405 "ISO-2022 coding system using Locking-Shift for 96-charset."
1406 :coding-type 'iso-2022
1407 :mnemonic ?&
1408 :charset-list 'iso-2022
1409 :designation [(ascii 94) (nil 96) nil nil]
1410 :flags '(ascii-at-eol ascii-at-cntl 7-bit
1411 designation locking-shift composition))
1413 (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
1415 (define-coding-system 'iso-2022-7bit-lock-ss2
1416 "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
1417 :coding-type 'iso-2022
1418 :mnemonic ?i
1419 :charset-list '(ascii
1420 japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1421 korean-ksc5601
1422 chinese-gb2312
1423 chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1424 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1425 chinese-cns11643-7)
1426 :designation [(ascii 94)
1427 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1428 (nil chinese-cns11643-2)
1429 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1430 chinese-cns11643-6 chinese-cns11643-7)]
1431 :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1432 single-shift init-bol))
1434 (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
1436 (define-coding-system 'iso-2022-8bit-ss2
1437 "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
1438 :coding-type 'iso-2022
1439 :mnemonic ?@
1440 :charset-list 'iso-2022
1441 :designation [(ascii 94) nil (nil 96) nil]
1442 :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
1444 (define-coding-system 'compound-text
1445 "Compound text based generic encoding.
1446 This coding system is an extension of X's \"Compound Text Encoding\".
1447 It encodes many characters using the normal ISO-2022 designation sequences,
1448 but it doesn't support extended segments of CTEXT."
1449 :coding-type 'iso-2022
1450 :mnemonic ?x
1451 :charset-list 'iso-2022
1452 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1453 :flags '(ascii-at-eol ascii-at-cntl long-form
1454 designation locking-shift single-shift composition)
1455 ;; Fixme: this isn't a valid MIME charset and has to be
1456 ;; special-cased elsewhere -- fx
1457 :mime-charset 'x-ctext)
1459 (define-coding-system-alias 'x-ctext 'compound-text)
1460 (define-coding-system-alias 'ctext 'compound-text)
1462 ;; Same as compound-text, but doesn't produce composition escape
1463 ;; sequences. Used in post-read and pre-write conversions of
1464 ;; compound-text-with-extensions, see mule.el. Note that this should
1465 ;; not have a mime-charset property, to prevent it from showing up
1466 ;; close to the beginning of coding systems ordered by priority.
1467 (define-coding-system 'ctext-no-compositions
1468 "Compound text based generic encoding.
1470 Like `compound-text', but does not produce escape sequences for compositions."
1471 :coding-type 'iso-2022
1472 :mnemonic ?x
1473 :charset-list 'iso-2022
1474 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1475 :flags '(ascii-at-eol ascii-at-cntl
1476 designation locking-shift single-shift))
1478 (define-coding-system 'compound-text-with-extensions
1479 "Compound text encoding with ICCCM Extended Segment extensions.
1481 See the variables `ctext-standard-encodings' and
1482 `ctext-non-standard-encodings-alist' for the detail about how
1483 extended segments are handled.
1485 This coding system should be used only for X selections. It is inappropriate
1486 for decoding and encoding files, process I/O, etc."
1487 :coding-type 'iso-2022
1488 :mnemonic ?x
1489 :charset-list 'iso-2022
1490 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1491 :flags '(ascii-at-eol ascii-at-cntl long-form
1492 designation locking-shift single-shift)
1493 :post-read-conversion 'ctext-post-read-conversion
1494 :pre-write-conversion 'ctext-pre-write-conversion
1495 :mime-charset 'x-ctext)
1497 (define-coding-system-alias
1498 'x-ctext-with-extensions 'compound-text-with-extensions)
1499 (define-coding-system-alias
1500 'ctext-with-extensions 'compound-text-with-extensions)
1502 (define-coding-system 'us-ascii
1503 "Encode ASCII as-is and encode non-ASCII characters to `?'."
1504 :coding-type 'charset
1505 :mnemonic ?-
1506 :charset-list '(ascii)
1507 :default-char ??
1508 :mime-charset 'us-ascii)
1510 (define-coding-system-alias 'iso-safe 'us-ascii)
1512 (define-coding-system 'utf-7
1513 "UTF-7 encoding of Unicode (RFC 2152)."
1514 :coding-type 'utf-8
1515 :mnemonic ?U
1516 :mime-charset 'utf-7
1517 :charset-list '(unicode)
1518 :pre-write-conversion 'utf-7-pre-write-conversion
1519 :post-read-conversion 'utf-7-post-read-conversion)
1521 (define-coding-system 'utf-7-imap
1522 "UTF-7 encoding of Unicode, IMAP version (RFC 2060)"
1523 :coding-type 'utf-8
1524 :mnemonic ?u
1525 :charset-list '(unicode)
1526 :pre-write-conversion 'utf-7-imap-pre-write-conversion
1527 :post-read-conversion 'utf-7-imap-post-read-conversion)
1529 ;; Use us-ascii for terminal output if some other coding system is not
1530 ;; specified explicitly.
1531 (set-safe-terminal-coding-system-internal 'us-ascii)
1533 ;; The other coding-systems are defined in each language specific
1534 ;; files under lisp/language.
1536 ;; Normally, set coding system to `undecided' before reading a file.
1537 ;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1538 ;; but we regard them as containing multibyte characters.
1539 ;; Tar files are not decoded at all, but we treat them as raw bytes.
1541 (setq file-coding-system-alist
1542 (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
1543 '(("\\.elc\\'" . utf-8-emacs)
1544 ("\\.el\\'" . prefer-utf-8)
1545 ("\\.utf\\(-8\\)?\\'" . utf-8)
1546 ("\\.xml\\'" . xml-find-file-coding-system)
1547 ;; We use raw-text for reading loaddefs.el so that if it
1548 ;; happens to have DOS or Mac EOLs, they are converted to
1549 ;; newlines. This is required to make the special treatment
1550 ;; of the "\ newline" combination in loaddefs.el, which marks
1551 ;; the beginning of a doc string, work.
1552 ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
1553 ("\\.tar\\'" . (no-conversion . no-conversion))
1554 ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
1555 ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)
1556 ("" . (undecided . nil)))))
1559 ;;; Setting coding categories and their priorities.
1561 ;; This setting is just to read an Emacs Lisp source files which
1562 ;; contain multilingual text while dumping Emacs. More appropriate
1563 ;; values are set by the command `set-language-environment' for each
1564 ;; language environment.
1566 (set-coding-system-priority
1567 'iso-latin-1
1568 'utf-8
1569 'iso-2022-7bit
1573 ;;; Miscellaneous settings.
1575 ;; Make all multibyte characters self-insert.
1576 (set-char-table-range (nth 1 global-map)
1577 (cons 128 (max-char))
1578 'self-insert-command)
1580 (aset latin-extra-code-table ?\221 t)
1581 (aset latin-extra-code-table ?\222 t)
1582 (aset latin-extra-code-table ?\223 t)
1583 (aset latin-extra-code-table ?\224 t)
1584 (aset latin-extra-code-table ?\225 t)
1585 (aset latin-extra-code-table ?\226 t)
1587 (defcustom password-word-equivalents
1588 '("password" "passcode" "passphrase" "pass phrase" "pin"
1589 ; These are sorted according to the GNU en_US locale.
1590 "암호" ; ko
1591 "パスワード" ; ja
1592 "ପ୍ରବେଶ ସଙ୍କେତ" ; or
1593 "ពាក្យសម្ងាត់" ; km
1594 "adgangskode" ; da
1595 "contraseña" ; es
1596 "contrasenya" ; ca
1597 "geslo" ; sl
1598 "hasło" ; pl
1599 "heslo" ; cs, sk
1600 "iphasiwedi" ; zu
1601 "jelszó" ; hu
1602 "lösenord" ; sv
1603 "lozinka" ; hr, sr
1604 "mật khẩu" ; vi
1605 "mot de passe" ; fr
1606 "parola" ; tr
1607 "pasahitza" ; eu
1608 "passord" ; nb
1609 "passwort" ; de
1610 "pasvorto" ; eo
1611 "salasana" ; fi
1612 "senha" ; pt
1613 "slaptažodis" ; lt
1614 "wachtwoord" ; nl
1615 "كلمة السر" ; ar
1616 "ססמה" ; he
1617 "лозинка" ; sr
1618 "пароль" ; kk, ru, uk
1619 "गुप्तशब्द" ; mr
1620 "शब्दकूट" ; hi
1621 "પાસવર્ડ" ; gu
1622 "సంకేతపదము" ; te
1623 "ਪਾਸਵਰਡ" ; pa
1624 "ಗುಪ್ತಪದ" ; kn
1625 "கடவுச்சொல்" ; ta
1626 "അടയാളവാക്ക്" ; ml
1627 "গুপ্তশব্দ" ; as
1628 "পাসওয়ার্ড" ; bn_IN
1629 "රහස්පදය" ; si
1630 "密码" ; zh_CN
1631 "密碼" ; zh_TW
1633 "List of words equivalent to \"password\".
1634 This is used by Shell mode and other parts of Emacs to recognize
1635 password prompts, including prompts in languages other than
1636 English. Different case choices should not be assumed to be
1637 included; callers should bind `case-fold-search' to t."
1638 :type '(repeat string)
1639 :version "27.1"
1640 :group 'processes)
1642 ;; The old code-pages library is obsoleted by coding systems based on
1643 ;; the charsets defined in this file but might be required by user
1644 ;; code.
1645 (provide 'code-pages)
1647 ;;; mule-conf.el ends here