lisp/international/mule-conf.el

   1 ;;; mule-conf.el --- configure multilingual environment
   2
   3 ;; Copyright (C) 1997-2019 Free Software Foundation, Inc.
   4 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   5 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
   6 ;;   Registration Number H14PRO021
   7 ;; Copyright (C) 2003
   8 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
   9 ;;   Registration Number H13PRO009
  10
  11 ;; Keywords: i18n, mule, multilingual, character set, coding system
  12
  13 ;; This file is part of GNU Emacs.
  14
  15 ;; GNU Emacs is free software: you can redistribute it and/or modify
  16 ;; it under the terms of the GNU General Public License as published by
  17 ;; the Free Software Foundation, either version 3 of the License, or
  18 ;; (at your option) any later version.
  19
  20 ;; GNU Emacs is distributed in the hope that it will be useful,
  21 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  23 ;; GNU General Public License for more details.
  24
  25 ;; You should have received a copy of the GNU General Public License
  26 ;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
  27
  28 ;;; Commentary:
  29
  30 ;; This file defines the Emacs charsets and some basic coding systems.
  31 ;; Other coding systems are defined in the files in directory
  32 ;; lisp/language.
  33
  34 ;;; Code:
  35
  36 ;;; Remarks
  37
  38 ;; The ISO-IR registry is maintained by the Information Processing
  39 ;; Society of Japan/Information Technology Standards Commission of
  40 ;; Japan (IPSJ/ITSCJ) at https://www.itscj.ipsj.or.jp/itscj_english/.
  41 ;; Standards docs equivalent to iso-2022 and iso-8859 are at
  42 ;; http://www.ecma.ch/.
  43
  44 ;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
  45 ;; MS Windows, which are presumably the only charsets we really need
  46 ;; to worry about on such systems:
  47 ;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
  48 ;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
  49 ;;                      1258, 874, 932, 936, 949, 950
  50
  51 ;;; Definitions of character sets.
  52
  53 ;; The charsets `ascii', `unicode' and `eight-bit' are already defined
  54 ;; in charset.c as below:
  55 ;;
  56 ;; (define-charset 'ascii
  57 ;;   ""
  58 ;;   :dimension 1
  59 ;;   :code-space [0 127]
  60 ;;   :iso-final-char ?B
  61 ;;   :ascii-compatible-p t
  62 ;;   :emacs-mule-id 0
  63 ;;   :code-offset 0)
  64 ;;
  65 ;; (define-charset 'unicode
  66 ;;   ""
  67 ;;   :dimension 3
  68 ;;   :code-space [0 255 0 255 0 16]
  69 ;;   :ascii-compatible-p t
  70 ;;   :code-offset 0)
  71 ;;
  72 ;; (define-charset 'emacs
  73 ;;   ""
  74 ;;   :dimension 3
  75 ;;   :code-space [0 255 0 255 0 63]
  76 ;;   :ascii-compatible-p t
  77 ;;   :supplementary-p t
  78 ;;   :code-offset 0)
  79 ;;
  80 ;; (define-charset 'eight-bit
  81 ;;   ""
  82 ;;   :dimension 1
  83 ;;   :code-space [128 255]
  84 ;;   :code-offset #x3FFF80)
  85 ;;
  86 ;; We now set :docstring, :short-name, and :long-name properties.
  87
  88 (put-charset-property
  89  'ascii :docstring "ASCII (ISO646 IRV)")
  90 (put-charset-property
  91  'ascii :short-name "ASCII")
  92 (put-charset-property
  93  'ascii :long-name "ASCII (ISO646 IRV)")
  94 (put-charset-property
  95  'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)")
  96 (put-charset-property
  97  'iso-8859-1 :short-name "Latin-1")
  98 (put-charset-property
  99  'iso-8859-1 :long-name "Latin-1")
 100 (put-charset-property
 101  'unicode :docstring "Unicode (ISO10646)")
 102 (put-charset-property
 103  'unicode :short-name "Unicode")
 104 (put-charset-property
 105  'unicode :long-name "Unicode (ISO10646)")
 106 (put-charset-property
 107  'emacs :docstring "Full Emacs charset (excluding eight bit chars)")
 108 (put-charset-property
 109  'emacs :short-name "Emacs")
 110 (put-charset-property
 111  'emacs :long-name "Emacs")
 112
 113 (put-charset-property 'eight-bit :docstring "Raw bytes 128-255")
 114 (put-charset-property 'eight-bit :short-name "Raw bytes")
 115
 116 (define-charset-alias 'ucs 'unicode)
 117
 118 (define-charset 'latin-iso8859-1
 119   "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
 120   :short-name "RHP of Latin-1"
 121   :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
 122   :iso-final-char ?A
 123   :emacs-mule-id 129
 124   :code-space [32 127]
 125   :code-offset 160)
 126
 127 ;; Name perhaps not ideal, but is XEmacs-compatible.
 128 (define-charset 'control-1
 129   "8-bit control code (0x80..0x9F)"
 130   :short-name "8-bit control code"
 131   :code-space [128 159]
 132   :code-offset 128)
 133
 134 (define-charset 'eight-bit-control
 135   "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
 136   :short-name "Raw bytes 0x80..0x9F"
 137   :supplementary-p t
 138   :code-space [128 159]
 139   :code-offset #x3FFF80)                ; see character.h
 140
 141 (define-charset 'eight-bit-graphic
 142   "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
 143   :short-name "Raw bytes 0xA0..0xFF"
 144   :supplementary-p t
 145   :code-space [160 255]
 146   :code-offset #x3FFFA0)                ; see character.h
 147
 148 (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
 149                                                  iso-ir iso-final
 150                                                  emacs-mule-id map)
 151   `(progn
 152      (define-charset ,symbol
 153        ,name
 154        :short-name ,nickname
 155        :long-name ,name
 156        :ascii-compatible-p t
 157        :code-space [0 255]
 158        :map ,map)
 159      (if ,iso-symbol
 160          (define-charset ,iso-symbol
 161            (if ,iso-ir
 162                (format "Right-Hand Part of %s (%s): ISO-IR-%d"
 163                        ,name ,nickname ,iso-ir)
 164              (format "Right-Hand Part of %s (%s)" ,name ,nickname))
 165            :short-name (format "RHP of %s" ,name)
 166            :long-name (format "RHP of %s (%s)" ,name ,nickname)
 167            :iso-final-char ,iso-final
 168            :emacs-mule-id ,emacs-mule-id
 169            :code-space [32 127]
 170            :subset (list ,symbol 160 255 -128)))))
 171
 172 (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
 173   "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
 174
 175 (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
 176   "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
 177
 178 (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
 179   "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
 180
 181 (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
 182   "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
 183
 184 (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
 185   "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
 186
 187 (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
 188   "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
 189
 190 (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
 191   "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
 192
 193 (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
 194   "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
 195
 196 (define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
 197   "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
 198
 199 ;; http://www.nectec.or.th/it-standards/iso8859-11/
 200 ;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
 201 ;; plus nbsp
 202 (define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
 203   "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
 204
 205 ;; 8859-12 doesn't (yet?) exist.
 206
 207 (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
 208   "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
 209
 210 (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
 211   "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
 212
 213 (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
 214   "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
 215
 216 (define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
 217   "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
 218
 219 ;; No point in keeping it around.
 220 (fmakunbound 'define-iso-single-byte-charset)
 221
 222 ;; Can this be shared with 8859-11?
 223 ;; N.b. not all of these are defined in Unicode.
 224 (define-charset 'thai-tis620
 225   "MULE charset for TIS620.2533"
 226   :short-name "TIS620.2533"
 227   :iso-final-char ?T
 228   :emacs-mule-id 133
 229   :code-space [32 127]
 230   :code-offset #x0E00)
 231
 232 (define-charset 'tis620-2533
 233   "TIS620.2533, a.k.a. TIS-620.  Like `thai-iso8859-11', but without NBSP."
 234   :short-name "TIS620.2533"
 235   :ascii-compatible-p t
 236   :code-space [0 255]
 237   :superset '(ascii (thai-tis620 . 128)))
 238
 239 (define-charset 'jisx0201
 240   "JISX0201"
 241   :short-name "JISX0201"
 242   :code-space [0 #xDF]
 243   :map "JISX0201")
 244
 245 (define-charset 'latin-jisx0201
 246   "Roman Part of JISX0201.1976"
 247   :short-name "JISX0201 Roman"
 248   :long-name "Japanese Roman (JISX0201.1976)"
 249   :iso-final-char ?J
 250   :emacs-mule-id  138
 251   :supplementary-p t
 252   :code-space [33 126]
 253   :subset '(jisx0201 33 126 0))
 254
 255 (define-charset 'katakana-jisx0201
 256   "Katakana Part of JISX0201.1976"
 257   :short-name "JISX0201 Katakana"
 258   :long-name "Japanese Katakana (JISX0201.1976)"
 259   :iso-final-char ?I
 260   :emacs-mule-id  137
 261   :supplementary-p t
 262   :code-space [33 126]
 263   :subset '(jisx0201 161 254 -128))
 264
 265 (define-charset 'chinese-gb2312
 266   "GB2312 Chinese simplified: ISO-IR-58"
 267   :short-name "GB2312"
 268   :long-name "GB2312: ISO-IR-58"
 269   :iso-final-char ?A
 270   :emacs-mule-id 145
 271   :code-space [33 126 33 126]
 272   :code-offset #x110000
 273   :unify-map "GB2312")
 274
 275 (define-charset 'chinese-gbk
 276   "GBK Chinese simplified."
 277   :short-name "GBK"
 278   :code-space [#x40 #xFE #x81 #xFE]
 279   :code-offset #x160000
 280   :unify-map "GBK")
 281 (define-charset-alias 'cp936 'chinese-gbk)
 282 (define-charset-alias 'windows-936 'chinese-gbk)
 283
 284 (define-charset 'chinese-cns11643-1
 285   "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
 286   :short-name "CNS11643-1"
 287   :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
 288   :iso-final-char ?G
 289   :emacs-mule-id  149
 290   :code-space [33 126 33 126]
 291   :code-offset #x114000
 292   :unify-map "CNS-1")
 293
 294 (define-charset 'chinese-cns11643-2
 295   "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
 296   :short-name "CNS11643-2"
 297   :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
 298   :iso-final-char ?H
 299   :emacs-mule-id  150
 300   :code-space [33 126 33 126]
 301   :code-offset #x118000
 302   :unify-map "CNS-2")
 303
 304 (define-charset 'chinese-cns11643-3
 305   "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
 306   :short-name  "CNS11643-3"
 307   :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
 308   :iso-final-char ?I
 309   :code-space [33 126 33 126]
 310   :emacs-mule-id  246
 311   :code-offset #x11C000
 312   :unify-map "CNS-3")
 313
 314 (define-charset 'chinese-cns11643-4
 315   "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
 316   :short-name  "CNS11643-4"
 317   :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
 318   :iso-final-char ?J
 319   :emacs-mule-id  247
 320   :code-space [33 126 33 126]
 321   :code-offset #x120000
 322   :unify-map "CNS-4")
 323
 324 (define-charset 'chinese-cns11643-5
 325   "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
 326   :short-name  "CNS11643-5"
 327   :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
 328   :iso-final-char ?K
 329   :emacs-mule-id  248
 330   :code-space [33 126 33 126]
 331   :code-offset #x124000
 332   :unify-map "CNS-5")
 333
 334 (define-charset 'chinese-cns11643-6
 335   "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
 336   :short-name  "CNS11643-6"
 337   :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
 338   :iso-final-char ?L
 339   :emacs-mule-id 249
 340   :code-space [33 126 33 126]
 341   :code-offset #x128000
 342   :unify-map "CNS-6")
 343
 344 (define-charset 'chinese-cns11643-7
 345   "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
 346   :short-name  "CNS11643-7"
 347   :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
 348   :iso-final-char ?M
 349   :emacs-mule-id 250
 350   :code-space [33 126 33 126]
 351   :code-offset #x12C000
 352   :unify-map "CNS-7")
 353
 354 (define-charset 'big5
 355   "Big5 (Chinese traditional)"
 356   :short-name "Big5"
 357   :code-space [#x40 #xFE #xA1 #xFE]
 358   :code-offset #x130000
 359   :unify-map "BIG5")
 360 ;; Fixme: AKA cp950 according to
 361 ;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>.  Is
 362 ;; that correct?
 363
 364 (define-charset 'chinese-big5-1
 365   "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
 366   :short-name "Big5 (Level-1)"
 367   :long-name "Big5 (Level-1) A141-C67F"
 368   :iso-final-char ?0
 369   :emacs-mule-id 152
 370   :supplementary-p t
 371   :code-space [#x21 #x7E #x21 #x7E]
 372   :code-offset #x135000
 373   :unify-map "BIG5-1")
 374
 375 (define-charset 'chinese-big5-2
 376   "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
 377   :short-name "Big5 (Level-2)"
 378   :long-name "Big5 (Level-2) C940-FEFE"
 379   :iso-final-char ?1
 380   :emacs-mule-id  153
 381   :supplementary-p t
 382   :code-space [#x21 #x7E #x21 #x7E]
 383   :code-offset #x137800
 384   :unify-map "BIG5-2")
 385
 386 (define-charset 'japanese-jisx0208
 387   "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
 388   :short-name "JISX0208"
 389   :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
 390   :iso-final-char ?B
 391   :emacs-mule-id 146
 392   :code-space [33 126 33 126]
 393   :code-offset #x140000
 394   :unify-map "JISX0208")
 395
 396 (define-charset 'japanese-jisx0208-1978
 397   "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
 398   :short-name "JISX0208.1978"
 399   :long-name  "JISX0208.1978 (JISC6226.1978): ISO-IR-42"
 400   :iso-final-char ?@
 401   :emacs-mule-id  144
 402   :code-space [33 126 33 126]
 403   :code-offset #x144000
 404   :unify-map "JISC6226")
 405
 406 (define-charset 'japanese-jisx0212
 407   "JISX0212 Japanese supplement: ISO-IR-159"
 408   :short-name "JISX0212"
 409   :long-name "JISX0212 (Japanese): ISO-IR-159"
 410   :iso-final-char ?D
 411   :emacs-mule-id 148
 412   :code-space [33 126 33 126]
 413   :code-offset #x148000
 414   :unify-map "JISX0212")
 415
 416 ;; Note that jisx0213 contains characters not in Unicode (3.2?).  It's
 417 ;; arguable whether it should have a unify-map.
 418 (define-charset 'japanese-jisx0213-1
 419   "JISX0213.2000 Plane 1 (Japanese)"
 420   :short-name "JISX0213-1"
 421   :iso-final-char ?O
 422   :emacs-mule-id  151
 423   :unify-map "JISX2131"
 424   :code-space [33 126 33 126]
 425   :code-offset #x14C000)
 426
 427 (define-charset 'japanese-jisx0213-2
 428   "JISX0213.2000 Plane 2 (Japanese)"
 429   :short-name "JISX0213-2"
 430   :iso-final-char ?P
 431   :emacs-mule-id 254
 432   :unify-map "JISX2132"
 433   :code-space [33 126 33 126]
 434   :code-offset #x150000)
 435
 436 (define-charset 'japanese-jisx0213-a
 437   "JISX0213.2004 adds these characters to JISX0213.2000."
 438   :short-name "JISX0213A"
 439   :dimension 2
 440   :code-space [33 126 33 126]
 441   :supplementary-p t
 442   :map "JISX213A")
 443
 444 (define-charset 'japanese-jisx0213.2004-1
 445   "JISX0213.2004 Plane1 (Japanese)"
 446   :short-name "JISX0213.2004-1"
 447   :dimension 2
 448   :code-space [33 126 33 126]
 449   :iso-final-char ?Q
 450   :superset '(japanese-jisx0213-a japanese-jisx0213-1))
 451
 452 (define-charset 'katakana-sjis
 453   "Katakana part of Shift-JIS"
 454   :dimension 1
 455   :code-space [#xA1 #xDF]
 456   :subset '(jisx0201 #xA1 #xDF 0)
 457   :supplementary-p t)
 458
 459 (define-charset 'cp932-2-byte
 460   "2-byte part of CP932"
 461   :dimension 2
 462   :map "CP932-2BYTE"
 463   :code-space [#x40 #xFC #x81 #xFC]
 464   :supplementary-p t)
 465
 466 (define-charset 'cp932
 467   "CP932 (Microsoft shift-jis)"
 468   :code-space [#x00 #xFF #x00 #xFE]
 469   :short-name "CP932"
 470   :superset '(ascii katakana-sjis cp932-2-byte))
 471
 472 (define-charset 'korean-ksc5601
 473   "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
 474   :short-name "KSC5601"
 475   :long-name "KSC5601 (Korean): ISO-IR-149"
 476   :iso-final-char ?C
 477   :emacs-mule-id 147
 478   :code-space [33 126 33 126]
 479   :code-offset #x279f94                 ; ... #x27c217
 480   :unify-map "KSC5601")
 481
 482 (define-charset 'big5-hkscs
 483   "Big5-HKSCS (Chinese traditional, Hong Kong supplement)"
 484   :short-name "Big5"
 485   :code-space [#x40 #xFE #xA1 #xFE]
 486   :code-offset #x27c218                 ; ... #x280839
 487   :unify-map "BIG5-HKSCS")
 488
 489 (define-charset 'cp949-2-byte
 490   "2-byte part of CP949"
 491   :dimension 2
 492   :map "CP949-2BYTE"
 493   :code-space [#x41 #xFE #x81 #xFD]
 494   :supplementary-p t)
 495
 496 (define-charset 'cp949
 497   "CP949 (Korean)"
 498   :short-name "CP949"
 499   :long-name  "CP949 (Korean)"
 500   :code-space [#x00 #xFE #x00 #xFD]
 501   :superset '(ascii cp949-2-byte))
 502
 503 (define-charset 'chinese-sisheng
 504   "SiSheng characters for PinYin/ZhuYin"
 505   :short-name "SiSheng"
 506   :long-name "SiSheng (PinYin/ZhuYin)"
 507   :iso-final-char ?0
 508   :emacs-mule-id 160
 509   :code-space [33 126]
 510   :unify-map "MULE-sisheng"
 511   :supplementary-p t
 512   :code-offset #x200000)
 513
 514 ;; A subset of the 1989 version of IPA.  It consists of the consonant
 515 ;; signs used in English, French, German and Italian, and all vowels
 516 ;; signs in the table.  [says old MULE doc]
 517 (define-charset 'ipa
 518   "IPA (International Phonetic Association)"
 519   :short-name "IPA"
 520   :iso-final-char ?0
 521   :emacs-mule-id  161
 522   :unify-map "MULE-ipa"
 523   :code-space [32 127]
 524   :supplementary-p t
 525   :code-offset #x200080)
 526
 527 (define-charset 'viscii
 528   "VISCII1.1"
 529   :short-name "VISCII"
 530   :long-name "VISCII 1.1"
 531   :code-space [0 255]
 532   :map "VISCII")
 533
 534 (define-charset 'vietnamese-viscii-lower
 535   "VISCII1.1 lower-case"
 536   :short-name "VISCII lower"
 537   :long-name "VISCII lower-case"
 538   :iso-final-char ?1
 539   :emacs-mule-id  162
 540   :code-space [32 127]
 541   :code-offset #x200200
 542   :supplementary-p t
 543   :unify-map "MULE-lviscii")
 544
 545 (define-charset 'vietnamese-viscii-upper
 546   "VISCII1.1 upper-case"
 547   :short-name "VISCII upper"
 548   :long-name "VISCII upper-case"
 549   :iso-final-char ?2
 550   :emacs-mule-id  163
 551   :code-space [32 127]
 552   :code-offset #x200280
 553   :supplementary-p t
 554   :unify-map "MULE-uviscii")
 555
 556 (define-charset 'vscii
 557   "VSCII1.1 (TCVN-5712 VN1)"
 558   :short-name "VSCII"
 559   :code-space [0 255]
 560   :map "VSCII")
 561
 562 (define-charset-alias 'tcvn-5712 'vscii)
 563
 564 ;; Fixme: see note in tcvn.map about combining characters
 565 (define-charset 'vscii-2
 566   "VSCII-2 (TCVN-5712 VN2)"
 567   :code-space [0 255]
 568   :map "VSCII-2")
 569
 570 (define-charset 'koi8-r
 571   "KOI8-R"
 572   :short-name "KOI8-R"
 573   :ascii-compatible-p t
 574   :code-space [0 255]
 575   :map "KOI8-R")
 576
 577 (define-charset-alias 'koi8 'koi8-r)
 578
 579 (define-charset 'alternativnyj
 580   "ALTERNATIVNYJ"
 581   :short-name "alternativnyj"
 582   :ascii-compatible-p t
 583   :code-space [0 255]
 584   :map "ALTERNATIVNYJ")
 585
 586 (define-charset 'cp866
 587   "CP866"
 588   :short-name "cp866"
 589   :ascii-compatible-p t
 590   :code-space [0 255]
 591   :map "IBM866")
 592 (define-charset-alias 'ibm866 'cp866)
 593
 594 (define-charset 'koi8-u
 595   "KOI8-U"
 596   :short-name "KOI8-U"
 597   :ascii-compatible-p t
 598   :code-space [0 255]
 599   :map "KOI8-U")
 600
 601 (define-charset 'koi8-t
 602   "KOI8-T"
 603   :short-name "KOI8-T"
 604   :ascii-compatible-p t
 605   :code-space [0 255]
 606   :map "KOI8-T")
 607
 608 (define-charset 'georgian-ps
 609   "GEORGIAN-PS"
 610   :short-name "GEORGIAN-PS"
 611   :ascii-compatible-p t
 612   :code-space [0 255]
 613   :map "KA-PS")
 614
 615 (define-charset 'georgian-academy
 616   "GEORGIAN-ACADEMY"
 617   :short-name "GEORGIAN-ACADEMY"
 618   :ascii-compatible-p t
 619   :code-space [0 255]
 620   :map "KA-ACADEMY")
 621
 622 (define-charset 'windows-1250
 623   "WINDOWS-1250 (Central Europe)"
 624   :short-name "WINDOWS-1250"
 625   :ascii-compatible-p t
 626   :code-space [0 255]
 627   :map "CP1250")
 628 (define-charset-alias 'cp1250 'windows-1250)
 629
 630 (define-charset 'windows-1251
 631   "WINDOWS-1251 (Cyrillic)"
 632   :short-name "WINDOWS-1251"
 633   :ascii-compatible-p t
 634   :code-space [0 255]
 635   :map "CP1251")
 636 (define-charset-alias 'cp1251 'windows-1251)
 637
 638 (define-charset 'windows-1252
 639   "WINDOWS-1252 (Latin I)"
 640   :short-name "WINDOWS-1252"
 641   :ascii-compatible-p t
 642   :code-space [0 255]
 643   :map "CP1252")
 644 (define-charset-alias 'cp1252 'windows-1252)
 645
 646 (define-charset 'windows-1253
 647   "WINDOWS-1253 (Greek)"
 648   :short-name "WINDOWS-1253"
 649   :ascii-compatible-p t
 650   :code-space [0 255]
 651   :map "CP1253")
 652 (define-charset-alias 'cp1253 'windows-1253)
 653
 654 (define-charset 'windows-1254
 655   "WINDOWS-1254 (Turkish)"
 656   :short-name "WINDOWS-1254"
 657   :ascii-compatible-p t
 658   :code-space [0 255]
 659   :map "CP1254")
 660 (define-charset-alias 'cp1254 'windows-1254)
 661
 662 (define-charset 'windows-1255
 663   "WINDOWS-1255 (Hebrew)"
 664   :short-name "WINDOWS-1255"
 665   :ascii-compatible-p t
 666   :code-space [0 255]
 667   :map "CP1255")
 668 (define-charset-alias 'cp1255 'windows-1255)
 669
 670 (define-charset 'windows-1256
 671   "WINDOWS-1256 (Arabic)"
 672   :short-name "WINDOWS-1256"
 673   :ascii-compatible-p t
 674   :code-space [0 255]
 675   :map "CP1256")
 676 (define-charset-alias 'cp1256 'windows-1256)
 677
 678 (define-charset 'windows-1257
 679   "WINDOWS-1257 (Baltic)"
 680   :short-name "WINDOWS-1257"
 681   :ascii-compatible-p t
 682   :code-space [0 255]
 683   :map "CP1257")
 684 (define-charset-alias 'cp1257 'windows-1257)
 685
 686 (define-charset 'windows-1258
 687   "WINDOWS-1258 (Viet Nam)"
 688   :short-name "WINDOWS-1258"
 689   :ascii-compatible-p t
 690   :code-space [0 255]
 691   :map "CP1258")
 692 (define-charset-alias 'cp1258 'windows-1258)
 693
 694 (define-charset 'next
 695   "NEXT"
 696   :short-name "NEXT"
 697   :ascii-compatible-p t
 698   :code-space [0 255]
 699   :map "NEXTSTEP")
 700
 701 (define-charset 'cp1125
 702   "CP1125"
 703   :short-name "CP1125"
 704   :code-space [0 255]
 705   :ascii-compatible-p t
 706   :map "CP1125")
 707 (define-charset-alias 'ruscii 'cp1125)
 708 ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
 709 (define-charset-alias 'cp866u 'cp1125)
 710
 711 ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
 712 ;; shows this as not ASCII compatible, with various graphics in
 713 ;; 0x01-0x1F.
 714 (define-charset 'cp437
 715   "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
 716   :short-name "CP437"
 717   :code-space [0 255]
 718   :ascii-compatible-p t
 719   :map "IBM437")
 720
 721 (define-charset 'cp720
 722   "CP720 (Arabic)"
 723   :short-name "CP720"
 724   :code-space [0 255]
 725   :ascii-compatible-p t
 726   :map "CP720")
 727
 728 (define-charset 'cp737
 729   "CP737 (PC Greek)"
 730   :short-name "CP737"
 731   :code-space [0 255]
 732   :ascii-compatible-p t
 733   :map "CP737")
 734
 735 (define-charset 'cp775
 736   "CP775 (PC Baltic)"
 737   :short-name "CP775"
 738   :code-space [0 255]
 739   :ascii-compatible-p t
 740   :map "CP775")
 741
 742 (define-charset 'cp851
 743   "CP851 (Greek)"
 744   :short-name "CP851"
 745   :code-space [0 255]
 746   :ascii-compatible-p t
 747   :map "IBM851")
 748
 749 (define-charset 'cp852
 750   "CP852 (MS-DOS Latin-2)"
 751   :short-name "CP852"
 752   :code-space [0 255]
 753   :ascii-compatible-p t
 754   :map "IBM852")
 755
 756 (define-charset 'cp855
 757   "CP855 (IBM Cyrillic)"
 758   :short-name "CP855"
 759   :code-space [0 255]
 760   :ascii-compatible-p t
 761   :map "IBM855")
 762
 763 (define-charset 'cp857
 764   "CP857 (IBM Turkish)"
 765   :short-name "CP857"
 766   :code-space [0 255]
 767   :ascii-compatible-p t
 768   :map "IBM857")
 769
 770 (define-charset 'cp858
 771   "CP858 (Multilingual Latin I + Euro)"
 772   :short-name "CP858"
 773   :code-space [0 255]
 774   :ascii-compatible-p t
 775   :map "CP858")
 776 (define-charset-alias 'cp00858 'cp858)  ; IANA has IBM00858/CP00858
 777
 778 (define-charset 'cp860
 779   "CP860 (MS-DOS Portuguese)"
 780   :short-name "CP860"
 781   :code-space [0 255]
 782   :ascii-compatible-p t
 783   :map "IBM860")
 784
 785 (define-charset 'cp861
 786   "CP861 (MS-DOS Icelandic)"
 787   :short-name "CP861"
 788   :code-space [0 255]
 789   :ascii-compatible-p t
 790   :map "IBM861")
 791
 792 (define-charset 'cp862
 793   "CP862 (PC Hebrew)"
 794   :short-name "CP862"
 795   :code-space [0 255]
 796   :ascii-compatible-p t
 797   :map "IBM862")
 798
 799 (define-charset 'cp863
 800   "CP863 (MS-DOS Canadian French)"
 801   :short-name "CP863"
 802   :code-space [0 255]
 803   :ascii-compatible-p t
 804   :map "IBM863")
 805
 806 (define-charset 'cp864
 807   "CP864 (PC Arabic)"
 808   :short-name "CP864"
 809   :code-space [0 255]
 810   :ascii-compatible-p t
 811   :map "IBM864")
 812
 813 (define-charset 'cp865
 814   "CP865 (MS-DOS Nordic)"
 815   :short-name "CP865"
 816   :code-space [0 255]
 817   :ascii-compatible-p t
 818   :map "IBM865")
 819
 820 (define-charset 'cp869
 821   "CP869 (IBM Modern Greek)"
 822   :short-name "CP869"
 823   :code-space [0 255]
 824   :ascii-compatible-p t
 825   :map "IBM869")
 826
 827 (define-charset 'cp874
 828   "CP874 (IBM Thai)"
 829   :short-name "CP874"
 830   :code-space [0 255]
 831   :ascii-compatible-p t
 832   :map "IBM874")
 833
 834 ;; For Arabic, we need three different types of character sets.
 835 ;; Digits are of direction left-to-right and of width 1-column.
 836 ;; Others are of direction right-to-left and of width 1-column or
 837 ;; 2-column.
 838 (define-charset 'arabic-digit
 839   "Arabic digit"
 840   :short-name "Arabic digit"
 841   :iso-final-char ?2
 842   :emacs-mule-id 164
 843   :supplementary-p t
 844   :code-space [34 42]
 845   :code-offset #x0600)
 846
 847 (define-charset 'arabic-1-column
 848   "Arabic 1-column"
 849   :short-name "Arabic 1-col"
 850   :long-name "Arabic 1-column"
 851   :iso-final-char ?3
 852   :emacs-mule-id 165
 853   :supplementary-p t
 854   :code-space [33 126]
 855   :code-offset #x200100)
 856
 857 (define-charset 'arabic-2-column
 858   "Arabic 2-column"
 859   :short-name "Arabic 2-col"
 860   :long-name "Arabic 2-column"
 861   :iso-final-char ?4
 862   :emacs-mule-id 224
 863   :supplementary-p t
 864   :code-space [33 126]
 865   :code-offset #x200180)
 866
 867 ;; Lao script.
 868 ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
 869 ;; Not all of them are defined in Unicode.
 870 (define-charset 'lao
 871   "Lao characters (ISO10646 0E81..0EDF)"
 872   :short-name "Lao"
 873   :iso-final-char ?1
 874   :emacs-mule-id 167
 875   :supplementary-p t
 876   :code-space [33 126]
 877   :code-offset #x0E81)
 878
 879 (define-charset 'mule-lao
 880   "Lao characters (ISO10646 0E81..0EDF)"
 881   :short-name "Lao"
 882   :code-space [0 255]
 883   :supplementary-p t
 884   :superset '(ascii eight-bit-control (lao . 128)))
 885
 886
 887 ;; Indian scripts.  Symbolic charset for data exchange.  Glyphs are
 888 ;; not assigned.  They are automatically converted to each Indian
 889 ;; script which IS-13194 supports.
 890
 891 (define-charset 'indian-is13194
 892   "7-bit representation of IS 13194 (ISCII) for Devanagari"
 893   :short-name "IS 13194 (DEV)"
 894   :long-name "Indian IS 13194 (DEV)"
 895   :iso-final-char ?5
 896   :emacs-mule-id 225
 897   :supplementary-p t
 898   :code-space [33 126]
 899   :code-offset #x180000
 900   :unify-map "MULE-is13194")
 901
 902 (let ((code-offset #x180100))
 903   (dolist (script '(devanagari sanskrit bengali tamil telugu assamese
 904                                oriya kannada malayalam gujarati punjabi))
 905     (define-charset (intern (format "%s-cdac" script))
 906       (format
 907        "Glyphs of %s script for CDAC font.  Subset of `indian-glyph'."
 908        (capitalize (symbol-name script)))
 909       :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
 910       :supplementary-p t
 911       :code-space [0 255]
 912       :code-offset code-offset)
 913     (setq code-offset (+ code-offset #x100)))
 914
 915   (dolist (script '(devanagari bengali punjabi gujarati
 916                                oriya tamil telugu kannada malayalam))
 917     (define-charset (intern (format "%s-akruti" script))
 918       (format
 919        "Glyphs of %s script for AKRUTI font.  Subset of `indian-glyph'."
 920        (capitalize (symbol-name script)))
 921       :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
 922       :supplementary-p t
 923       :code-space [0 255]
 924       :code-offset code-offset)
 925     (setq code-offset (+ code-offset #x100))))
 926
 927 (define-charset 'indian-glyph
 928   "Glyphs for Indian characters."
 929   :short-name "Indian glyph"
 930   :iso-final-char ?4
 931   :emacs-mule-id 240
 932   :supplementary-p t
 933   :code-space [32 127 32 127]
 934   :code-offset #x180100)
 935
 936 ;; Actual Glyph for 1-column width.
 937 (define-charset 'indian-1-column
 938   "Indian charset for 1-column width glyphs."
 939   :short-name "Indian 1-col"
 940   :long-name "Indian 1 Column"
 941   :iso-final-char ?6
 942   :emacs-mule-id  251
 943   :supplementary-p t
 944   :code-space [33 126 33 126]
 945   :code-offset #x184000)
 946
 947 ;; Actual Glyph for 2-column width.
 948 (define-charset 'indian-2-column
 949   "Indian charset for 2-column width glyphs."
 950   :short-name "Indian 2-col"
 951   :long-name "Indian 2 Column"
 952   :iso-final-char ?5
 953   :emacs-mule-id  251
 954   :supplementary-p t
 955   :code-space [33 126 33 126]
 956   :code-offset #x184000)
 957
 958 (define-charset 'tibetan
 959   "Tibetan characters"
 960   :iso-final-char ?7
 961   :short-name "Tibetan 2-col"
 962   :long-name "Tibetan 2 column"
 963   :iso-final-char ?7
 964   :emacs-mule-id 252
 965   :unify-map "MULE-tibetan"
 966   :supplementary-p t
 967   :code-space [33 126 33 37]
 968   :code-offset #x190000)
 969
 970 (define-charset 'tibetan-1-column
 971   "Tibetan 1 column glyph"
 972   :short-name "Tibetan 1-col"
 973   :long-name "Tibetan 1 column"
 974   :iso-final-char ?8
 975   :emacs-mule-id 241
 976   :supplementary-p t
 977   :code-space [33 126 33 37]
 978   :code-offset #x190000)
 979
 980 ;; Subsets of Unicode.
 981 (define-charset 'mule-unicode-2500-33ff
 982   "Unicode characters of the range U+2500..U+33FF."
 983   :short-name "Unicode subset 2"
 984   :long-name "Unicode subset (U+2500..U+33FF)"
 985   :iso-final-char ?2
 986   :emacs-mule-id 242
 987   :supplementary-p t
 988   :code-space [#x20 #x7f #x20 #x47]
 989   :code-offset #x2500)
 990
 991 (define-charset 'mule-unicode-e000-ffff
 992   "Unicode characters of the range U+E000..U+FFFF."
 993   :short-name "Unicode subset 3"
 994   :long-name "Unicode subset (U+E000+FFFF)"
 995   :iso-final-char ?3
 996   :emacs-mule-id 243
 997   :supplementary-p t
 998   :code-space [#x20 #x7F #x20 #x75]
 999   :code-offset #xE000
1000   :max-code 30015)                      ; U+FFFF
1001
1002 (define-charset 'mule-unicode-0100-24ff
1003   "Unicode characters of the range U+0100..U+24FF."
1004   :short-name "Unicode subset"
1005   :long-name "Unicode subset (U+0100..U+24FF)"
1006   :iso-final-char ?1
1007   :emacs-mule-id 244
1008   :supplementary-p t
1009   :code-space [#x20 #x7F #x20 #x7F]
1010   :code-offset #x100)
1011
1012 (define-charset 'unicode-bmp
1013   "Unicode Basic Multilingual Plane (U+0000..U+FFFF)"
1014   :short-name "Unicode BMP"
1015   :code-space [0 255 0 255]
1016   :code-offset 0)
1017
1018 (define-charset 'unicode-smp
1019   "Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)"
1020   :short-name "Unicode SMP "
1021   :code-space [0 255 0 255]
1022   :code-offset #x10000)
1023
1024 (define-charset 'unicode-sip
1025   "Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)"
1026   :short-name "Unicode SIP"
1027   :code-space [0 255 0 255]
1028   :code-offset #x20000)
1029
1030 (define-charset 'unicode-ssp
1031   "Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)"
1032   :short-name "Unicode SSP"
1033   :code-space [0 255 0 255]
1034   :code-offset #xE0000)
1035
1036 (define-charset 'ethiopic
1037   "Ethiopic characters for Amharic and Tigrigna."
1038   :short-name "Ethiopic"
1039   :long-name "Ethiopic characters"
1040   :iso-final-char ?3
1041   :emacs-mule-id  245
1042   :supplementary-p t
1043   :unify-map "MULE-ethiopic"
1044   :code-space [33 126 33 126]
1045   :code-offset #x1A0000)
1046
1047 (define-charset 'mac-roman
1048   "Mac Roman charset"
1049   :short-name "Mac Roman"
1050   :ascii-compatible-p t
1051   :code-space [0 255]
1052   :map "MACINTOSH")
1053
1054 ;; Fixme: modern EBCDIC variants, e.g. IBM00924?
1055 (define-charset 'ebcdic-us
1056   "US version of EBCDIC"
1057   :short-name "EBCDIC-US"
1058   :code-space [0 255]
1059   :mime-charset 'ebcdic-us
1060   :map "EBCDICUS")
1061
1062 (define-charset 'ebcdic-uk
1063   "UK version of EBCDIC"
1064   :short-name "EBCDIC-UK"
1065   :code-space [0 255]
1066   :mime-charset 'ebcdic-uk
1067   :map "EBCDICUK")
1068
1069 (define-charset 'ibm038
1070   "International version of EBCDIC"
1071   :short-name "IBM038"
1072   :code-space [0 255]
1073   :mime-charset 'ibm038
1074   :map "IBM038")
1075 (define-charset-alias 'ebcdic-int 'ibm038)
1076 (define-charset-alias 'cp038 'ibm038)
1077
1078 (define-charset 'ibm1047
1079   ;; Says groff:
1080   "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix."
1081   :short-name "IBM1047"
1082   :code-space [0 255]
1083   :mime-charset 'ibm1047
1084   :map "IBM1047")
1085 (define-charset-alias 'cp1047 'ibm1047)
1086
1087 (define-charset 'hp-roman8
1088   "Encoding used by Hewlet-Packard printer software"
1089   :short-name "HP-ROMAN8"
1090   :ascii-compatible-p t
1091   :code-space [0 255]
1092   :map "HP-ROMAN8")
1093
1094 ;; To make a coding system with this, a pre-write-conversion should
1095 ;; account for the commented-out multi-valued code points in
1096 ;; stdenc.map.
1097 (define-charset 'adobe-standard-encoding
1098   "Adobe `standard encoding' used in PostScript"
1099   :short-name "ADOBE-STANDARD-ENCODING"
1100   :code-space [#x20 255]
1101   :map "stdenc")
1102
1103 (define-charset 'symbol
1104   "Adobe symbol encoding used in PostScript"
1105   :short-name "ADOBE-SYMBOL"
1106   :code-space [#x20 255]
1107   :map "symbol")
1108
1109 (define-charset 'ibm850
1110   "DOS codepage 850 (Latin-1)"
1111   :short-name "IBM850"
1112   :ascii-compatible-p t
1113   :code-space [0 255]
1114   :map "IBM850")
1115 (define-charset-alias 'cp850 'ibm850)
1116
1117 (define-charset 'mik
1118   "Bulgarian DOS codepage"
1119   :short-name "MIK"
1120   :ascii-compatible-p t
1121   :code-space [0 255]
1122   :map "MIK")
1123
1124 (define-charset 'ptcp154
1125   "ParaType codepage (Asian Cyrillic)"
1126   :short-name "PT154"
1127   :ascii-compatible-p t
1128   :code-space [0 255]
1129   :mime-charset 'pt154
1130   :map "PTCP154")
1131 (define-charset-alias 'pt154 'ptcp154)
1132 (define-charset-alias 'cp154 'ptcp154)
1133
1134 (define-charset 'gb18030-2-byte
1135   "GB18030 2-byte (0x814E..0xFEFE)"
1136   :code-space [#x40 #xFE #x81 #xFE]
1137   :supplementary-p t
1138   :map "GB180302")
1139
1140 (define-charset 'gb18030-4-byte-bmp
1141   "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
1142   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
1143   :supplementary-p t
1144   :map "GB180304")
1145
1146 (define-charset 'gb18030-4-byte-smp
1147   "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
1148   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
1149   :min-code '(#x9030 . #x8130)
1150   :max-code '(#xE332 . #x9A35)
1151   :supplementary-p t
1152   :code-offset #x10000)
1153
1154 (define-charset 'gb18030-4-byte-ext-1
1155   "GB18030 4-byte (0x8431A530-0x8F39FE39)"
1156   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
1157   :min-code '(#x8431 . #xA530)
1158   :max-code '(#x8F39 . #xFE39)
1159   :supplementary-p t
1160   :code-offset #x200000                 ; ... #x22484B
1161   )
1162
1163 (define-charset 'gb18030-4-byte-ext-2
1164   "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
1165   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
1166   :min-code '(#xE332 . #x9A36)
1167   :max-code '(#xFE39 . #xFE39)
1168   :supplementary-p t
1169   :code-offset #x22484C                 ; ... #x279f93
1170   )
1171
1172 (define-charset 'gb18030
1173   "GB18030"
1174   :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
1175   :min-code 0
1176   :max-code '(#xFE39 . #xFE39)
1177   :superset '(ascii gb18030-2-byte
1178                     gb18030-4-byte-bmp gb18030-4-byte-smp
1179                     gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
1180
1181 (define-charset 'chinese-cns11643-15
1182   "CNS11643 Plane 15 Chinese Traditional"
1183   :short-name  "CNS11643-15"
1184   :long-name "CNS11643-15 (Chinese traditional)"
1185   :code-space [33 126 33 126]
1186   :code-offset #x27A000
1187   :unify-map "CNS-F")
1188
1189 (unify-charset 'chinese-gb2312)
1190 (unify-charset 'chinese-gbk)
1191 (unify-charset 'chinese-cns11643-1)
1192 (unify-charset 'chinese-cns11643-2)
1193 (unify-charset 'chinese-cns11643-3)
1194 (unify-charset 'chinese-cns11643-4)
1195 (unify-charset 'chinese-cns11643-5)
1196 (unify-charset 'chinese-cns11643-6)
1197 (unify-charset 'chinese-cns11643-7)
1198 (unify-charset 'chinese-cns11643-15)
1199 (unify-charset 'big5)
1200 (unify-charset 'chinese-big5-1)
1201 (unify-charset 'chinese-big5-2)
1202 (unify-charset 'big5-hkscs)
1203 (unify-charset 'korean-ksc5601)
1204 (unify-charset 'vietnamese-viscii-lower)
1205 (unify-charset 'vietnamese-viscii-upper)
1206 (unify-charset 'chinese-sisheng)
1207 (unify-charset 'ipa)
1208 (unify-charset 'tibetan)
1209 (unify-charset 'ethiopic)
1210 (unify-charset 'indian-is13194)
1211 (unify-charset 'japanese-jisx0208-1978)
1212 (unify-charset 'japanese-jisx0208)
1213 (unify-charset 'japanese-jisx0212)
1214 (unify-charset 'japanese-jisx0213-1)
1215 (unify-charset 'japanese-jisx0213-2)
1216
1217 \f
1218 ;; These are tables for translating characters on decoding and
1219 ;; encoding.
1220 ;; Fixme: these aren't used now -- should they be?
1221 (setq standard-translation-table-for-decode nil)
1222
1223 (setq standard-translation-table-for-encode nil)
1224 \f
1225 ;;; Make fundamental coding systems.
1226
1227 ;; The coding system `no-conversion' and `undecided' are already
1228 ;; defined in coding.c as below:
1229 ;;
1230 ;; (define-coding-system 'no-conversion
1231 ;;   "..."
1232 ;;   :coding-type 'raw-text
1233 ;;   ...)
1234 ;; (define-coding-system 'undecided
1235 ;;   "..."
1236 ;;   :coding-type 'undecided
1237 ;;   ...)
1238
1239 (define-coding-system-alias 'binary 'no-conversion)
1240 (define-coding-system-alias 'unix 'undecided-unix)
1241 (define-coding-system-alias 'dos 'undecided-dos)
1242 (define-coding-system-alias 'mac 'undecided-mac)
1243
1244 (define-coding-system 'prefer-utf-8
1245   "Like `undecided' but prefer UTF-8 when appropriate.
1246 On decoding, if the source contains 8-bit codes and they all
1247 are valid UTF-8 sequences, detect the source as UTF-8 encoding
1248 regardless of the coding priority.
1249 On encoding, if the source contains non-ASCII characters, encode them
1250 by UTF-8."
1251   :coding-type 'undecided
1252   :mnemonic ?-
1253   :charset-list '(emacs)
1254   :prefer-utf-8 t)
1255
1256 (define-coding-system 'raw-text
1257   "Raw text, which means text contains random 8-bit codes.
1258 Encoding text with this coding system produces the actual byte
1259 sequence of the text in buffers and strings.  An exception is made for
1260 characters from the `eight-bit' character set.  Each of them is encoded
1261 into a single byte.
1262
1263 When you visit a file with this coding, the file is read into a
1264 unibyte buffer as is (except for EOL format), thus each byte of a file
1265 is treated as a character."
1266   :coding-type 'raw-text
1267   :for-unibyte t
1268   :mnemonic ?t)
1269
1270 (define-coding-system 'no-conversion-multibyte
1271   "Like `no-conversion' but don't read a file into a unibyte buffer."
1272   :coding-type 'raw-text
1273   :eol-type 'unix
1274   :mnemonic ?=)
1275
1276 (define-coding-system 'iso-latin-1
1277   "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1278   :coding-type 'charset
1279   :mnemonic ?1
1280   :charset-list '(iso-8859-1)
1281   :mime-charset 'iso-8859-1)
1282
1283 (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1284 (define-coding-system-alias 'latin-1 'iso-latin-1)
1285
1286 ;; Coding systems not specific to each language environment.
1287
1288 (define-coding-system 'emacs-mule
1289  "Emacs 21 internal format used in buffer and string."
1290  :coding-type 'emacs-mule
1291  :charset-list 'emacs-mule
1292  :mnemonic ?M)
1293
1294 (define-coding-system 'utf-8
1295   "UTF-8 (no signature (BOM))"
1296   :coding-type 'utf-8
1297   :mnemonic ?U
1298   :charset-list '(unicode)
1299   :mime-charset 'utf-8)
1300
1301 (define-coding-system 'utf-8-with-signature
1302   "UTF-8 (with signature (BOM))"
1303   :coding-type 'utf-8
1304   :mnemonic ?U
1305   :charset-list '(unicode)
1306   :bom t)
1307
1308 (define-coding-system 'utf-8-auto
1309   "UTF-8 (auto-detect signature (BOM))"
1310   :coding-type 'utf-8
1311   :mnemonic ?U
1312   :charset-list '(unicode)
1313   :bom '(utf-8-with-signature . utf-8))
1314
1315 (define-coding-system-alias 'mule-utf-8 'utf-8)
1316 ;; See this page:
1317 ;; https://docs.microsoft.com/en-us/windows/desktop/intl/code-page-identifiers
1318 ;; Starting with Windows 10, people are trying to set their systems to
1319 ;; use UTF-8 , so we had better recognized this alias:
1320 (define-coding-system-alias 'cp65001 'utf-8)
1321
1322 (define-coding-system 'utf-8-emacs
1323   "Support for all Emacs characters (including non-Unicode characters)."
1324   :coding-type 'utf-8
1325   :mnemonic ?U
1326   :charset-list '(emacs))
1327
1328 ;; The encoding used internally.  This encoding is meant to be able to save
1329 ;; any multibyte buffer without losing information.  It can change between
1330 ;; Emacs releases, tho, so should only be used for internal files.
1331 (define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix)
1332
1333 (define-coding-system 'utf-16le
1334   "UTF-16LE (little endian, no signature (BOM))."
1335   :coding-type 'utf-16
1336   :mnemonic ?U
1337   :charset-list '(unicode)
1338   :endian 'little
1339   :mime-text-unsuitable t
1340   :mime-charset 'utf-16le)
1341
1342 (define-coding-system 'utf-16be
1343   "UTF-16BE (big endian, no signature (BOM))."
1344   :coding-type 'utf-16
1345   :mnemonic ?U
1346   :charset-list '(unicode)
1347   :endian 'big
1348   :mime-text-unsuitable t
1349   :mime-charset 'utf-16be)
1350
1351 (define-coding-system 'utf-16le-with-signature
1352   "UTF-16 (little endian, with signature (BOM))."
1353   :coding-type 'utf-16
1354   :mnemonic ?U
1355   :charset-list '(unicode)
1356   :bom t
1357   :endian 'little
1358   :mime-text-unsuitable t
1359   :mime-charset 'utf-16)
1360
1361 (define-coding-system 'utf-16be-with-signature
1362   "UTF-16 (big endian, with signature (BOM))."
1363   :coding-type 'utf-16
1364   :mnemonic ?U
1365   :charset-list '(unicode)
1366   :bom t
1367   :endian 'big
1368   :mime-text-unsuitable t
1369   :mime-charset 'utf-16)
1370
1371 (define-coding-system 'utf-16
1372   "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)."
1373   :coding-type 'utf-16
1374   :mnemonic ?U
1375   :charset-list '(unicode)
1376   :bom '(utf-16le-with-signature . utf-16be-with-signature)
1377   :endian 'big
1378   :mime-text-unsuitable t
1379   :mime-charset 'utf-16)
1380
1381 ;; Backwards compatibility (old names, also used by Mule-UCS).  We
1382 ;; prefer the MIME names.
1383 (define-coding-system-alias 'utf-16-le 'utf-16le-with-signature)
1384 (define-coding-system-alias 'utf-16-be 'utf-16be-with-signature)
1385
1386
1387 (define-coding-system 'iso-2022-7bit
1388   "ISO 2022 based 7-bit encoding using only G0."
1389   :coding-type 'iso-2022
1390   :mnemonic ?J
1391   :charset-list 'iso-2022
1392   :designation [(ascii t) nil nil nil]
1393   :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1394
1395 (define-coding-system 'iso-2022-7bit-ss2
1396   "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
1397   :coding-type 'iso-2022
1398   :mnemonic ?$
1399   :charset-list 'iso-2022
1400   :designation [(ascii 94) nil (nil 96) nil]
1401   :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1402                  designation single-shift composition))
1403
1404 (define-coding-system 'iso-2022-7bit-lock
1405   "ISO-2022 coding system using Locking-Shift for 96-charset."
1406   :coding-type 'iso-2022
1407   :mnemonic ?&
1408   :charset-list 'iso-2022
1409   :designation [(ascii 94) (nil 96) nil nil]
1410   :flags '(ascii-at-eol ascii-at-cntl 7-bit
1411                         designation locking-shift composition))
1412
1413 (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
1414
1415 (define-coding-system 'iso-2022-7bit-lock-ss2
1416   "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
1417   :coding-type 'iso-2022
1418   :mnemonic ?i
1419   :charset-list '(ascii
1420                   japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1421                   korean-ksc5601
1422                   chinese-gb2312
1423                   chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1424                   chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1425                   chinese-cns11643-7)
1426   :designation [(ascii 94)
1427                 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1428                 (nil chinese-cns11643-2)
1429                 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1430                      chinese-cns11643-6 chinese-cns11643-7)]
1431   :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1432                  single-shift init-bol))
1433
1434 (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
1435
1436 (define-coding-system 'iso-2022-8bit-ss2
1437   "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
1438   :coding-type 'iso-2022
1439   :mnemonic ?@
1440   :charset-list 'iso-2022
1441   :designation [(ascii 94) nil (nil 96) nil]
1442   :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
1443
1444 (define-coding-system 'compound-text
1445   "Compound text based generic encoding.
1446 This coding system is an extension of X's \"Compound Text Encoding\".
1447 It encodes many characters using the normal ISO-2022 designation sequences,
1448 but it doesn't support extended segments of CTEXT."
1449   :coding-type 'iso-2022
1450   :mnemonic ?x
1451   :charset-list 'iso-2022
1452   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1453   :flags '(ascii-at-eol ascii-at-cntl long-form
1454                         designation locking-shift single-shift composition)
1455   ;; Fixme: this isn't a valid MIME charset and has to be
1456   ;; special-cased elsewhere  -- fx
1457   :mime-charset 'x-ctext)
1458
1459 (define-coding-system-alias  'x-ctext 'compound-text)
1460 (define-coding-system-alias  'ctext 'compound-text)
1461
1462 ;; Same as compound-text, but doesn't produce composition escape
1463 ;; sequences.  Used in post-read and pre-write conversions of
1464 ;; compound-text-with-extensions, see mule.el.  Note that this should
1465 ;; not have a mime-charset property, to prevent it from showing up
1466 ;; close to the beginning of coding systems ordered by priority.
1467 (define-coding-system 'ctext-no-compositions
1468  "Compound text based generic encoding.
1469
1470 Like `compound-text', but does not produce escape sequences for compositions."
1471   :coding-type 'iso-2022
1472   :mnemonic ?x
1473   :charset-list 'iso-2022
1474   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1475   :flags '(ascii-at-eol ascii-at-cntl
1476                         designation locking-shift single-shift))
1477
1478 (define-coding-system 'compound-text-with-extensions
1479  "Compound text encoding with ICCCM Extended Segment extensions.
1480
1481 See the variables `ctext-standard-encodings' and
1482 `ctext-non-standard-encodings-alist' for the detail about how
1483 extended segments are handled.
1484
1485 This coding system should be used only for X selections.  It is inappropriate
1486 for decoding and encoding files, process I/O, etc."
1487   :coding-type 'iso-2022
1488   :mnemonic ?x
1489   :charset-list 'iso-2022
1490   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1491   :flags '(ascii-at-eol ascii-at-cntl long-form
1492                         designation locking-shift single-shift)
1493   :post-read-conversion 'ctext-post-read-conversion
1494   :pre-write-conversion 'ctext-pre-write-conversion
1495   :mime-charset 'x-ctext)
1496
1497 (define-coding-system-alias
1498   'x-ctext-with-extensions 'compound-text-with-extensions)
1499 (define-coding-system-alias
1500   'ctext-with-extensions 'compound-text-with-extensions)
1501
1502 (define-coding-system 'us-ascii
1503   "Encode ASCII as-is and encode non-ASCII characters to `?'."
1504   :coding-type 'charset
1505   :mnemonic ?-
1506   :charset-list '(ascii)
1507   :default-char ??
1508   :mime-charset 'us-ascii)
1509
1510 (define-coding-system-alias 'iso-safe 'us-ascii)
1511
1512 (define-coding-system 'utf-7
1513   "UTF-7 encoding of Unicode (RFC 2152)."
1514   :coding-type 'utf-8
1515   :mnemonic ?U
1516   :mime-charset 'utf-7
1517   :charset-list '(unicode)
1518   :pre-write-conversion 'utf-7-pre-write-conversion
1519   :post-read-conversion 'utf-7-post-read-conversion)
1520
1521 (define-coding-system 'utf-7-imap
1522   "UTF-7 encoding of Unicode, IMAP version (RFC 2060)"
1523   :coding-type 'utf-8
1524   :mnemonic ?u
1525   :charset-list '(unicode)
1526   :pre-write-conversion 'utf-7-imap-pre-write-conversion
1527   :post-read-conversion 'utf-7-imap-post-read-conversion)
1528
1529 ;; Use us-ascii for terminal output if some other coding system is not
1530 ;; specified explicitly.
1531 (set-safe-terminal-coding-system-internal 'us-ascii)
1532
1533 ;; The other coding-systems are defined in each language specific
1534 ;; files under lisp/language.
1535
1536 ;; Normally, set coding system to `undecided' before reading a file.
1537 ;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1538 ;; but we regard them as containing multibyte characters.
1539 ;; Tar files are not decoded at all, but we treat them as raw bytes.
1540
1541 (setq file-coding-system-alist
1542       (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
1543       '(("\\.elc\\'" . utf-8-emacs)
1544         ("\\.el\\'" . prefer-utf-8)
1545         ("\\.utf\\(-8\\)?\\'" . utf-8)
1546         ("\\.xml\\'" . xml-find-file-coding-system)
1547         ;; We use raw-text for reading loaddefs.el so that if it
1548         ;; happens to have DOS or Mac EOLs, they are converted to
1549         ;; newlines.  This is required to make the special treatment
1550         ;; of the "\ newline" combination in loaddefs.el, which marks
1551         ;; the beginning of a doc string, work.
1552         ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
1553         ("\\.tar\\'" . (no-conversion . no-conversion))
1554         ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
1555         ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)
1556         ("" . (undecided . nil)))))
1557
1558 \f
1559 ;;; Setting coding categories and their priorities.
1560
1561 ;; This setting is just to read an Emacs Lisp source files which
1562 ;; contain multilingual text while dumping Emacs.  More appropriate
1563 ;; values are set by the command `set-language-environment' for each
1564 ;; language environment.
1565
1566 (set-coding-system-priority
1567  'iso-latin-1
1568  'utf-8
1569  'iso-2022-7bit
1570  )
1571
1572 \f
1573 ;;; Miscellaneous settings.
1574
1575 ;; Make all multibyte characters self-insert.
1576 (set-char-table-range (nth 1 global-map)
1577                       (cons 128 (max-char))
1578                       'self-insert-command)
1579
1580 (aset latin-extra-code-table ?\221 t)
1581 (aset latin-extra-code-table ?\222 t)
1582 (aset latin-extra-code-table ?\223 t)
1583 (aset latin-extra-code-table ?\224 t)
1584 (aset latin-extra-code-table ?\225 t)
1585 (aset latin-extra-code-table ?\226 t)
1586
1587 (defcustom password-word-equivalents
1588   '("password" "passcode" "passphrase" "pass phrase" "pin"
1589     ; These are sorted according to the GNU en_US locale.
1590     "암호"            ; ko
1591     "パスワード"   ; ja
1592     "ପ୍ରବେଶ ସଙ୍କେତ"     ; or
1593     "ពាក្យសម្ងាត់"              ; km
1594     "adgangskode"       ; da
1595     "contraseña"       ; es
1596     "contrasenya"       ; ca
1597     "geslo"             ; sl
1598     "hasło"            ; pl
1599     "heslo"             ; cs, sk
1600     "iphasiwedi"        ; zu
1601     "jelszó"           ; hu
1602     "lösenord"         ; sv
1603     "lozinka"           ; hr, sr
1604     "mật khẩu"              ; vi
1605     "mot de passe"      ; fr
1606     "parola"            ; tr
1607     "pasahitza"         ; eu
1608     "passord"           ; nb
1609     "passwort"          ; de
1610     "pasvorto"          ; eo
1611     "salasana"          ; fi
1612     "senha"             ; pt
1613     "slaptažodis"      ; lt
1614     "wachtwoord"        ; nl
1615     "كلمة السر"         ; ar
1616     "ססמה"          ; he
1617     "лозинка"            ; sr
1618     "пароль"              ; kk, ru, uk
1619     "गुप्तशब्द"               ; mr
1620     "शब्दकूट"             ; hi
1621     "પાસવર્ડ"             ; gu
1622     "సంకేతపదము"               ; te
1623     "ਪਾਸਵਰਡ"                ; pa
1624     "ಗುಪ್ತಪದ"             ; kn
1625     "கடவுச்சொல்"            ; ta
1626     "അടയാളവാക്ക്"         ; ml
1627     "গুপ্তশব্দ"               ; as
1628     "পাসওয়ার্ড"               ; bn_IN
1629     "රහස්පදය"             ; si
1630     "密码"            ; zh_CN
1631     "密碼"            ; zh_TW
1632     )
1633   "List of words equivalent to \"password\".
1634 This is used by Shell mode and other parts of Emacs to recognize
1635 password prompts, including prompts in languages other than
1636 English.  Different case choices should not be assumed to be
1637 included; callers should bind `case-fold-search' to t."
1638   :type '(repeat string)
1639   :version "27.1"
1640   :group 'processes)
1641
1642 ;; The old code-pages library is obsoleted by coding systems based on
1643 ;; the charsets defined in this file but might be required by user
1644 ;; code.
1645 (provide 'code-pages)
1646
1647 ;;; mule-conf.el ends here