lisp/international/mule-conf.el

   1 ;;; mule-conf.el --- configure multilingual environment
   2
   3 ;; Copyright (C) 1997-2018 Free Software Foundation, Inc.
   4 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   5 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
   6 ;;   Registration Number H14PRO021
   7 ;; Copyright (C) 2003
   8 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
   9 ;;   Registration Number H13PRO009
  10
  11 ;; Keywords: i18n, mule, multilingual, character set, coding system
  12
  13 ;; This file is part of GNU Emacs.
  14
  15 ;; GNU Emacs is free software: you can redistribute it and/or modify
  16 ;; it under the terms of the GNU General Public License as published by
  17 ;; the Free Software Foundation, either version 3 of the License, or
  18 ;; (at your option) any later version.
  19
  20 ;; GNU Emacs is distributed in the hope that it will be useful,
  21 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  23 ;; GNU General Public License for more details.
  24
  25 ;; You should have received a copy of the GNU General Public License
  26 ;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
  27
  28 ;;; Commentary:
  29
  30 ;; This file defines the Emacs charsets and some basic coding systems.
  31 ;; Other coding systems are defined in the files in directory
  32 ;; lisp/language.
  33
  34 ;;; Code:
  35
  36 ;;; Remarks
  37
  38 ;; The ISO-IR registry is maintained by the Information Processing
  39 ;; Society of Japan/Information Technology Standards Commission of
  40 ;; Japan (IPSJ/ITSCJ) at https://www.itscj.ipsj.or.jp/itscj_english/.
  41 ;; Standards docs equivalent to iso-2022 and iso-8859 are at
  42 ;; http://www.ecma.ch/.
  43
  44 ;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
  45 ;; MS Windows, which are presumably the only charsets we really need
  46 ;; to worry about on such systems:
  47 ;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
  48 ;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
  49 ;;                      1258, 874, 932, 936, 949, 950
  50
  51 ;;; Definitions of character sets.
  52
  53 ;; The charsets `ascii', `unicode' and `eight-bit' are already defined
  54 ;; in charset.c as below:
  55 ;;
  56 ;; (define-charset 'ascii
  57 ;;   ""
  58 ;;   :dimension 1
  59 ;;   :code-space [0 127]
  60 ;;   :iso-final-char ?B
  61 ;;   :ascii-compatible-p t
  62 ;;   :emacs-mule-id 0
  63 ;;   :code-offset 0)
  64 ;;
  65 ;; (define-charset 'unicode
  66 ;;   ""
  67 ;;   :dimension 3
  68 ;;   :code-space [0 255 0 255 0 16]
  69 ;;   :ascii-compatible-p t
  70 ;;   :code-offset 0)
  71 ;;
  72 ;; (define-charset 'emacs
  73 ;;   ""
  74 ;;   :dimension 3
  75 ;;   :code-space [0 255 0 255 0 63]
  76 ;;   :ascii-compatible-p t
  77 ;;   :supplementary-p t
  78 ;;   :code-offset 0)
  79 ;;
  80 ;; (define-charset 'eight-bit
  81 ;;   ""
  82 ;;   :dimension 1
  83 ;;   :code-space [128 255]
  84 ;;   :code-offset #x3FFF80)
  85 ;;
  86 ;; We now set :docstring, :short-name, and :long-name properties.
  87
  88 (put-charset-property
  89  'ascii :docstring "ASCII (ISO646 IRV)")
  90 (put-charset-property
  91  'ascii :short-name "ASCII")
  92 (put-charset-property
  93  'ascii :long-name "ASCII (ISO646 IRV)")
  94 (put-charset-property
  95  'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)")
  96 (put-charset-property
  97  'iso-8859-1 :short-name "Latin-1")
  98 (put-charset-property
  99  'iso-8859-1 :long-name "Latin-1")
 100 (put-charset-property
 101  'unicode :docstring "Unicode (ISO10646)")
 102 (put-charset-property
 103  'unicode :short-name "Unicode")
 104 (put-charset-property
 105  'unicode :long-name "Unicode (ISO10646)")
 106 (put-charset-property
 107  'emacs :docstring "Full Emacs charset (excluding eight bit chars)")
 108 (put-charset-property
 109  'emacs :short-name "Emacs")
 110 (put-charset-property
 111  'emacs :long-name "Emacs")
 112
 113 (put-charset-property 'eight-bit :docstring "Raw bytes 128-255")
 114 (put-charset-property 'eight-bit :short-name "Raw bytes")
 115
 116 (define-charset-alias 'ucs 'unicode)
 117
 118 (define-charset 'latin-iso8859-1
 119   "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
 120   :short-name "RHP of Latin-1"
 121   :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
 122   :iso-final-char ?A
 123   :emacs-mule-id 129
 124   :code-space [32 127]
 125   :code-offset 160)
 126
 127 ;; Name perhaps not ideal, but is XEmacs-compatible.
 128 (define-charset 'control-1
 129   "8-bit control code (0x80..0x9F)"
 130   :short-name "8-bit control code"
 131   :code-space [128 159]
 132   :code-offset 128)
 133
 134 (define-charset 'eight-bit-control
 135   "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
 136   :short-name "Raw bytes 0x80..0x9F"
 137   :supplementary-p t
 138   :code-space [128 159]
 139   :code-offset #x3FFF80)                ; see character.h
 140
 141 (define-charset 'eight-bit-graphic
 142   "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
 143   :short-name "Raw bytes 0xA0..0xFF"
 144   :supplementary-p t
 145   :code-space [160 255]
 146   :code-offset #x3FFFA0)                ; see character.h
 147
 148 (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
 149                                                  iso-ir iso-final
 150                                                  emacs-mule-id map)
 151   `(progn
 152      (define-charset ,symbol
 153        ,name
 154        :short-name ,nickname
 155        :long-name ,name
 156        :ascii-compatible-p t
 157        :code-space [0 255]
 158        :map ,map)
 159      (if ,iso-symbol
 160          (define-charset ,iso-symbol
 161            (if ,iso-ir
 162                (format "Right-Hand Part of %s (%s): ISO-IR-%d"
 163                        ,name ,nickname ,iso-ir)
 164              (format "Right-Hand Part of %s (%s)" ,name ,nickname))
 165            :short-name (format "RHP of %s" ,name)
 166            :long-name (format "RHP of %s (%s)" ,name ,nickname)
 167            :iso-final-char ,iso-final
 168            :emacs-mule-id ,emacs-mule-id
 169            :code-space [32 127]
 170            :subset (list ,symbol 160 255 -128)))))
 171
 172 (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
 173   "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
 174
 175 (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
 176   "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
 177
 178 (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
 179   "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
 180
 181 (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
 182   "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
 183
 184 (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
 185   "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
 186
 187 (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
 188   "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
 189
 190 (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
 191   "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
 192
 193 (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
 194   "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
 195
 196 (define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
 197   "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
 198
 199 ;; http://www.nectec.or.th/it-standards/iso8859-11/
 200 ;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
 201 ;; plus nbsp
 202 (define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
 203   "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
 204
 205 ;; 8859-12 doesn't (yet?) exist.
 206
 207 (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
 208   "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
 209
 210 (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
 211   "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
 212
 213 (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
 214   "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
 215
 216 (define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
 217   "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
 218
 219 ;; No point in keeping it around.
 220 (fmakunbound 'define-iso-single-byte-charset)
 221
 222 ;; Can this be shared with 8859-11?
 223 ;; N.b. not all of these are defined in Unicode.
 224 (define-charset 'thai-tis620
 225   "TIS620.2533"
 226   :short-name "TIS620.2533"
 227   :iso-final-char ?T
 228   :emacs-mule-id 133
 229   :code-space [32 127]
 230   :code-offset #x0E00)
 231
 232 ;; Fixme: doc for this, c.f. above
 233 (define-charset 'tis620-2533
 234   "TIS620.2533"
 235   :short-name "TIS620.2533"
 236   :ascii-compatible-p t
 237   :code-space [0 255]
 238   :superset '(ascii eight-bit-control (thai-tis620 . 128)))
 239
 240 (define-charset 'jisx0201
 241   "JISX0201"
 242   :short-name "JISX0201"
 243   :code-space [0 #xDF]
 244   :map "JISX0201")
 245
 246 (define-charset 'latin-jisx0201
 247   "Roman Part of JISX0201.1976"
 248   :short-name "JISX0201 Roman"
 249   :long-name "Japanese Roman (JISX0201.1976)"
 250   :iso-final-char ?J
 251   :emacs-mule-id  138
 252   :supplementary-p t
 253   :code-space [33 126]
 254   :subset '(jisx0201 33 126 0))
 255
 256 (define-charset 'katakana-jisx0201
 257   "Katakana Part of JISX0201.1976"
 258   :short-name "JISX0201 Katakana"
 259   :long-name "Japanese Katakana (JISX0201.1976)"
 260   :iso-final-char ?I
 261   :emacs-mule-id  137
 262   :supplementary-p t
 263   :code-space [33 126]
 264   :subset '(jisx0201 161 254 -128))
 265
 266 (define-charset 'chinese-gb2312
 267   "GB2312 Chinese simplified: ISO-IR-58"
 268   :short-name "GB2312"
 269   :long-name "GB2312: ISO-IR-58"
 270   :iso-final-char ?A
 271   :emacs-mule-id 145
 272   :code-space [33 126 33 126]
 273   :code-offset #x110000
 274   :unify-map "GB2312")
 275
 276 (define-charset 'chinese-gbk
 277   "GBK Chinese simplified."
 278   :short-name "GBK"
 279   :code-space [#x40 #xFE #x81 #xFE]
 280   :code-offset #x160000
 281   :unify-map "GBK")
 282 (define-charset-alias 'cp936 'chinese-gbk)
 283 (define-charset-alias 'windows-936 'chinese-gbk)
 284
 285 (define-charset 'chinese-cns11643-1
 286   "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
 287   :short-name "CNS11643-1"
 288   :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
 289   :iso-final-char ?G
 290   :emacs-mule-id  149
 291   :code-space [33 126 33 126]
 292   :code-offset #x114000
 293   :unify-map "CNS-1")
 294
 295 (define-charset 'chinese-cns11643-2
 296   "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
 297   :short-name "CNS11643-2"
 298   :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
 299   :iso-final-char ?H
 300   :emacs-mule-id  150
 301   :code-space [33 126 33 126]
 302   :code-offset #x118000
 303   :unify-map "CNS-2")
 304
 305 (define-charset 'chinese-cns11643-3
 306   "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
 307   :short-name  "CNS11643-3"
 308   :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
 309   :iso-final-char ?I
 310   :code-space [33 126 33 126]
 311   :emacs-mule-id  246
 312   :code-offset #x11C000
 313   :unify-map "CNS-3")
 314
 315 (define-charset 'chinese-cns11643-4
 316   "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
 317   :short-name  "CNS11643-4"
 318   :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
 319   :iso-final-char ?J
 320   :emacs-mule-id  247
 321   :code-space [33 126 33 126]
 322   :code-offset #x120000
 323   :unify-map "CNS-4")
 324
 325 (define-charset 'chinese-cns11643-5
 326   "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
 327   :short-name  "CNS11643-5"
 328   :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
 329   :iso-final-char ?K
 330   :emacs-mule-id  248
 331   :code-space [33 126 33 126]
 332   :code-offset #x124000
 333   :unify-map "CNS-5")
 334
 335 (define-charset 'chinese-cns11643-6
 336   "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
 337   :short-name  "CNS11643-6"
 338   :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
 339   :iso-final-char ?L
 340   :emacs-mule-id 249
 341   :code-space [33 126 33 126]
 342   :code-offset #x128000
 343   :unify-map "CNS-6")
 344
 345 (define-charset 'chinese-cns11643-7
 346   "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
 347   :short-name  "CNS11643-7"
 348   :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
 349   :iso-final-char ?M
 350   :emacs-mule-id 250
 351   :code-space [33 126 33 126]
 352   :code-offset #x12C000
 353   :unify-map "CNS-7")
 354
 355 (define-charset 'big5
 356   "Big5 (Chinese traditional)"
 357   :short-name "Big5"
 358   :code-space [#x40 #xFE #xA1 #xFE]
 359   :code-offset #x130000
 360   :unify-map "BIG5")
 361 ;; Fixme: AKA cp950 according to
 362 ;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>.  Is
 363 ;; that correct?
 364
 365 (define-charset 'chinese-big5-1
 366   "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
 367   :short-name "Big5 (Level-1)"
 368   :long-name "Big5 (Level-1) A141-C67F"
 369   :iso-final-char ?0
 370   :emacs-mule-id 152
 371   :supplementary-p t
 372   :code-space [#x21 #x7E #x21 #x7E]
 373   :code-offset #x135000
 374   :unify-map "BIG5-1")
 375
 376 (define-charset 'chinese-big5-2
 377   "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
 378   :short-name "Big5 (Level-2)"
 379   :long-name "Big5 (Level-2) C940-FEFE"
 380   :iso-final-char ?1
 381   :emacs-mule-id  153
 382   :supplementary-p t
 383   :code-space [#x21 #x7E #x21 #x7E]
 384   :code-offset #x137800
 385   :unify-map "BIG5-2")
 386
 387 (define-charset 'japanese-jisx0208
 388   "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
 389   :short-name "JISX0208"
 390   :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
 391   :iso-final-char ?B
 392   :emacs-mule-id 146
 393   :code-space [33 126 33 126]
 394   :code-offset #x140000
 395   :unify-map "JISX0208")
 396
 397 (define-charset 'japanese-jisx0208-1978
 398   "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
 399   :short-name "JISX0208.1978"
 400   :long-name  "JISX0208.1978 (JISC6226.1978): ISO-IR-42"
 401   :iso-final-char ?@
 402   :emacs-mule-id  144
 403   :code-space [33 126 33 126]
 404   :code-offset #x144000
 405   :unify-map "JISC6226")
 406
 407 (define-charset 'japanese-jisx0212
 408   "JISX0212 Japanese supplement: ISO-IR-159"
 409   :short-name "JISX0212"
 410   :long-name "JISX0212 (Japanese): ISO-IR-159"
 411   :iso-final-char ?D
 412   :emacs-mule-id 148
 413   :code-space [33 126 33 126]
 414   :code-offset #x148000
 415   :unify-map "JISX0212")
 416
 417 ;; Note that jisx0213 contains characters not in Unicode (3.2?).  It's
 418 ;; arguable whether it should have a unify-map.
 419 (define-charset 'japanese-jisx0213-1
 420   "JISX0213.2000 Plane 1 (Japanese)"
 421   :short-name "JISX0213-1"
 422   :iso-final-char ?O
 423   :emacs-mule-id  151
 424   :unify-map "JISX2131"
 425   :code-space [33 126 33 126]
 426   :code-offset #x14C000)
 427
 428 (define-charset 'japanese-jisx0213-2
 429   "JISX0213.2000 Plane 2 (Japanese)"
 430   :short-name "JISX0213-2"
 431   :iso-final-char ?P
 432   :emacs-mule-id 254
 433   :unify-map "JISX2132"
 434   :code-space [33 126 33 126]
 435   :code-offset #x150000)
 436
 437 (define-charset 'japanese-jisx0213-a
 438   "JISX0213.2004 adds these characters to JISX0213.2000."
 439   :short-name "JISX0213A"
 440   :dimension 2
 441   :code-space [33 126 33 126]
 442   :supplementary-p t
 443   :map "JISX213A")
 444
 445 (define-charset 'japanese-jisx0213.2004-1
 446   "JISX0213.2004 Plane1 (Japanese)"
 447   :short-name "JISX0213.2004-1"
 448   :dimension 2
 449   :code-space [33 126 33 126]
 450   :iso-final-char ?Q
 451   :superset '(japanese-jisx0213-a japanese-jisx0213-1))
 452
 453 (define-charset 'katakana-sjis
 454   "Katakana part of Shift-JIS"
 455   :dimension 1
 456   :code-space [#xA1 #xDF]
 457   :subset '(jisx0201 #xA1 #xDF 0)
 458   :supplementary-p t)
 459
 460 (define-charset 'cp932-2-byte
 461   "2-byte part of CP932"
 462   :dimension 2
 463   :map "CP932-2BYTE"
 464   :code-space [#x40 #xFC #x81 #xFC]
 465   :supplementary-p t)
 466
 467 (define-charset 'cp932
 468   "CP932 (Microsoft shift-jis)"
 469   :code-space [#x00 #xFF #x00 #xFE]
 470   :short-name "CP932"
 471   :superset '(ascii katakana-sjis cp932-2-byte))
 472
 473 (define-charset 'korean-ksc5601
 474   "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
 475   :short-name "KSC5601"
 476   :long-name "KSC5601 (Korean): ISO-IR-149"
 477   :iso-final-char ?C
 478   :emacs-mule-id 147
 479   :code-space [33 126 33 126]
 480   :code-offset #x279f94                 ; ... #x27c217
 481   :unify-map "KSC5601")
 482
 483 (define-charset 'big5-hkscs
 484   "Big5-HKSCS (Chinese traditional, Hong Kong supplement)"
 485   :short-name "Big5"
 486   :code-space [#x40 #xFE #xA1 #xFE]
 487   :code-offset #x27c218                 ; ... #x280839
 488   :unify-map "BIG5-HKSCS")
 489
 490 (define-charset 'cp949-2-byte
 491   "2-byte part of CP949"
 492   :dimension 2
 493   :map "CP949-2BYTE"
 494   :code-space [#x41 #xFE #x81 #xFD]
 495   :supplementary-p t)
 496
 497 (define-charset 'cp949
 498   "CP949 (Korean)"
 499   :short-name "CP949"
 500   :long-name  "CP949 (Korean)"
 501   :code-space [#x00 #xFE #x00 #xFD]
 502   :superset '(ascii cp949-2-byte))
 503
 504 (define-charset 'chinese-sisheng
 505   "SiSheng characters for PinYin/ZhuYin"
 506   :short-name "SiSheng"
 507   :long-name "SiSheng (PinYin/ZhuYin)"
 508   :iso-final-char ?0
 509   :emacs-mule-id 160
 510   :code-space [33 126]
 511   :unify-map "MULE-sisheng"
 512   :supplementary-p t
 513   :code-offset #x200000)
 514
 515 ;; A subset of the 1989 version of IPA.  It consists of the consonant
 516 ;; signs used in English, French, German and Italian, and all vowels
 517 ;; signs in the table.  [says old MULE doc]
 518 (define-charset 'ipa
 519   "IPA (International Phonetic Association)"
 520   :short-name "IPA"
 521   :iso-final-char ?0
 522   :emacs-mule-id  161
 523   :unify-map "MULE-ipa"
 524   :code-space [32 127]
 525   :supplementary-p t
 526   :code-offset #x200080)
 527
 528 (define-charset 'viscii
 529   "VISCII1.1"
 530   :short-name "VISCII"
 531   :long-name "VISCII 1.1"
 532   :code-space [0 255]
 533   :map "VISCII")
 534
 535 (define-charset 'vietnamese-viscii-lower
 536   "VISCII1.1 lower-case"
 537   :short-name "VISCII lower"
 538   :long-name "VISCII lower-case"
 539   :iso-final-char ?1
 540   :emacs-mule-id  162
 541   :code-space [32 127]
 542   :code-offset #x200200
 543   :supplementary-p t
 544   :unify-map "MULE-lviscii")
 545
 546 (define-charset 'vietnamese-viscii-upper
 547   "VISCII1.1 upper-case"
 548   :short-name "VISCII upper"
 549   :long-name "VISCII upper-case"
 550   :iso-final-char ?2
 551   :emacs-mule-id  163
 552   :code-space [32 127]
 553   :code-offset #x200280
 554   :supplementary-p t
 555   :unify-map "MULE-uviscii")
 556
 557 (define-charset 'vscii
 558   "VSCII1.1 (TCVN-5712 VN1)"
 559   :short-name "VSCII"
 560   :code-space [0 255]
 561   :map "VSCII")
 562
 563 (define-charset-alias 'tcvn-5712 'vscii)
 564
 565 ;; Fixme: see note in tcvn.map about combining characters
 566 (define-charset 'vscii-2
 567   "VSCII-2 (TCVN-5712 VN2)"
 568   :code-space [0 255]
 569   :map "VSCII-2")
 570
 571 (define-charset 'koi8-r
 572   "KOI8-R"
 573   :short-name "KOI8-R"
 574   :ascii-compatible-p t
 575   :code-space [0 255]
 576   :map "KOI8-R")
 577
 578 (define-charset-alias 'koi8 'koi8-r)
 579
 580 (define-charset 'alternativnyj
 581   "ALTERNATIVNYJ"
 582   :short-name "alternativnyj"
 583   :ascii-compatible-p t
 584   :code-space [0 255]
 585   :map "ALTERNATIVNYJ")
 586
 587 (define-charset 'cp866
 588   "CP866"
 589   :short-name "cp866"
 590   :ascii-compatible-p t
 591   :code-space [0 255]
 592   :map "IBM866")
 593 (define-charset-alias 'ibm866 'cp866)
 594
 595 (define-charset 'koi8-u
 596   "KOI8-U"
 597   :short-name "KOI8-U"
 598   :ascii-compatible-p t
 599   :code-space [0 255]
 600   :map "KOI8-U")
 601
 602 (define-charset 'koi8-t
 603   "KOI8-T"
 604   :short-name "KOI8-T"
 605   :ascii-compatible-p t
 606   :code-space [0 255]
 607   :map "KOI8-T")
 608
 609 (define-charset 'georgian-ps
 610   "GEORGIAN-PS"
 611   :short-name "GEORGIAN-PS"
 612   :ascii-compatible-p t
 613   :code-space [0 255]
 614   :map "KA-PS")
 615
 616 (define-charset 'georgian-academy
 617   "GEORGIAN-ACADEMY"
 618   :short-name "GEORGIAN-ACADEMY"
 619   :ascii-compatible-p t
 620   :code-space [0 255]
 621   :map "KA-ACADEMY")
 622
 623 (define-charset 'windows-1250
 624   "WINDOWS-1250 (Central Europe)"
 625   :short-name "WINDOWS-1250"
 626   :ascii-compatible-p t
 627   :code-space [0 255]
 628   :map "CP1250")
 629 (define-charset-alias 'cp1250 'windows-1250)
 630
 631 (define-charset 'windows-1251
 632   "WINDOWS-1251 (Cyrillic)"
 633   :short-name "WINDOWS-1251"
 634   :ascii-compatible-p t
 635   :code-space [0 255]
 636   :map "CP1251")
 637 (define-charset-alias 'cp1251 'windows-1251)
 638
 639 (define-charset 'windows-1252
 640   "WINDOWS-1252 (Latin I)"
 641   :short-name "WINDOWS-1252"
 642   :ascii-compatible-p t
 643   :code-space [0 255]
 644   :map "CP1252")
 645 (define-charset-alias 'cp1252 'windows-1252)
 646
 647 (define-charset 'windows-1253
 648   "WINDOWS-1253 (Greek)"
 649   :short-name "WINDOWS-1253"
 650   :ascii-compatible-p t
 651   :code-space [0 255]
 652   :map "CP1253")
 653 (define-charset-alias 'cp1253 'windows-1253)
 654
 655 (define-charset 'windows-1254
 656   "WINDOWS-1254 (Turkish)"
 657   :short-name "WINDOWS-1254"
 658   :ascii-compatible-p t
 659   :code-space [0 255]
 660   :map "CP1254")
 661 (define-charset-alias 'cp1254 'windows-1254)
 662
 663 (define-charset 'windows-1255
 664   "WINDOWS-1255 (Hebrew)"
 665   :short-name "WINDOWS-1255"
 666   :ascii-compatible-p t
 667   :code-space [0 255]
 668   :map "CP1255")
 669 (define-charset-alias 'cp1255 'windows-1255)
 670
 671 (define-charset 'windows-1256
 672   "WINDOWS-1256 (Arabic)"
 673   :short-name "WINDOWS-1256"
 674   :ascii-compatible-p t
 675   :code-space [0 255]
 676   :map "CP1256")
 677 (define-charset-alias 'cp1256 'windows-1256)
 678
 679 (define-charset 'windows-1257
 680   "WINDOWS-1257 (Baltic)"
 681   :short-name "WINDOWS-1257"
 682   :ascii-compatible-p t
 683   :code-space [0 255]
 684   :map "CP1257")
 685 (define-charset-alias 'cp1257 'windows-1257)
 686
 687 (define-charset 'windows-1258
 688   "WINDOWS-1258 (Viet Nam)"
 689   :short-name "WINDOWS-1258"
 690   :ascii-compatible-p t
 691   :code-space [0 255]
 692   :map "CP1258")
 693 (define-charset-alias 'cp1258 'windows-1258)
 694
 695 (define-charset 'next
 696   "NEXT"
 697   :short-name "NEXT"
 698   :ascii-compatible-p t
 699   :code-space [0 255]
 700   :map "NEXTSTEP")
 701
 702 (define-charset 'cp1125
 703   "CP1125"
 704   :short-name "CP1125"
 705   :code-space [0 255]
 706   :ascii-compatible-p t
 707   :map "CP1125")
 708 (define-charset-alias 'ruscii 'cp1125)
 709 ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
 710 (define-charset-alias 'cp866u 'cp1125)
 711
 712 ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
 713 ;; shows this as not ASCII compatible, with various graphics in
 714 ;; 0x01-0x1F.
 715 (define-charset 'cp437
 716   "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
 717   :short-name "CP437"
 718   :code-space [0 255]
 719   :ascii-compatible-p t
 720   :map "IBM437")
 721
 722 (define-charset 'cp720
 723   "CP720 (Arabic)"
 724   :short-name "CP720"
 725   :code-space [0 255]
 726   :ascii-compatible-p t
 727   :map "CP720")
 728
 729 (define-charset 'cp737
 730   "CP737 (PC Greek)"
 731   :short-name "CP737"
 732   :code-space [0 255]
 733   :ascii-compatible-p t
 734   :map "CP737")
 735
 736 (define-charset 'cp775
 737   "CP775 (PC Baltic)"
 738   :short-name "CP775"
 739   :code-space [0 255]
 740   :ascii-compatible-p t
 741   :map "CP775")
 742
 743 (define-charset 'cp851
 744   "CP851 (Greek)"
 745   :short-name "CP851"
 746   :code-space [0 255]
 747   :ascii-compatible-p t
 748   :map "IBM851")
 749
 750 (define-charset 'cp852
 751   "CP852 (MS-DOS Latin-2)"
 752   :short-name "CP852"
 753   :code-space [0 255]
 754   :ascii-compatible-p t
 755   :map "IBM852")
 756
 757 (define-charset 'cp855
 758   "CP855 (IBM Cyrillic)"
 759   :short-name "CP855"
 760   :code-space [0 255]
 761   :ascii-compatible-p t
 762   :map "IBM855")
 763
 764 (define-charset 'cp857
 765   "CP857 (IBM Turkish)"
 766   :short-name "CP857"
 767   :code-space [0 255]
 768   :ascii-compatible-p t
 769   :map "IBM857")
 770
 771 (define-charset 'cp858
 772   "CP858 (Multilingual Latin I + Euro)"
 773   :short-name "CP858"
 774   :code-space [0 255]
 775   :ascii-compatible-p t
 776   :map "CP858")
 777 (define-charset-alias 'cp00858 'cp858)  ; IANA has IBM00858/CP00858
 778
 779 (define-charset 'cp860
 780   "CP860 (MS-DOS Portuguese)"
 781   :short-name "CP860"
 782   :code-space [0 255]
 783   :ascii-compatible-p t
 784   :map "IBM860")
 785
 786 (define-charset 'cp861
 787   "CP861 (MS-DOS Icelandic)"
 788   :short-name "CP861"
 789   :code-space [0 255]
 790   :ascii-compatible-p t
 791   :map "IBM861")
 792
 793 (define-charset 'cp862
 794   "CP862 (PC Hebrew)"
 795   :short-name "CP862"
 796   :code-space [0 255]
 797   :ascii-compatible-p t
 798   :map "IBM862")
 799
 800 (define-charset 'cp863
 801   "CP863 (MS-DOS Canadian French)"
 802   :short-name "CP863"
 803   :code-space [0 255]
 804   :ascii-compatible-p t
 805   :map "IBM863")
 806
 807 (define-charset 'cp864
 808   "CP864 (PC Arabic)"
 809   :short-name "CP864"
 810   :code-space [0 255]
 811   :ascii-compatible-p t
 812   :map "IBM864")
 813
 814 (define-charset 'cp865
 815   "CP865 (MS-DOS Nordic)"
 816   :short-name "CP865"
 817   :code-space [0 255]
 818   :ascii-compatible-p t
 819   :map "IBM865")
 820
 821 (define-charset 'cp869
 822   "CP869 (IBM Modern Greek)"
 823   :short-name "CP869"
 824   :code-space [0 255]
 825   :ascii-compatible-p t
 826   :map "IBM869")
 827
 828 (define-charset 'cp874
 829   "CP874 (IBM Thai)"
 830   :short-name "CP874"
 831   :code-space [0 255]
 832   :ascii-compatible-p t
 833   :map "IBM874")
 834
 835 ;; For Arabic, we need three different types of character sets.
 836 ;; Digits are of direction left-to-right and of width 1-column.
 837 ;; Others are of direction right-to-left and of width 1-column or
 838 ;; 2-column.
 839 (define-charset 'arabic-digit
 840   "Arabic digit"
 841   :short-name "Arabic digit"
 842   :iso-final-char ?2
 843   :emacs-mule-id 164
 844   :supplementary-p t
 845   :code-space [34 42]
 846   :code-offset #x0600)
 847
 848 (define-charset 'arabic-1-column
 849   "Arabic 1-column"
 850   :short-name "Arabic 1-col"
 851   :long-name "Arabic 1-column"
 852   :iso-final-char ?3
 853   :emacs-mule-id 165
 854   :supplementary-p t
 855   :code-space [33 126]
 856   :code-offset #x200100)
 857
 858 (define-charset 'arabic-2-column
 859   "Arabic 2-column"
 860   :short-name "Arabic 2-col"
 861   :long-name "Arabic 2-column"
 862   :iso-final-char ?4
 863   :emacs-mule-id 224
 864   :supplementary-p t
 865   :code-space [33 126]
 866   :code-offset #x200180)
 867
 868 ;; Lao script.
 869 ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
 870 ;; Not all of them are defined in Unicode.
 871 (define-charset 'lao
 872   "Lao characters (ISO10646 0E81..0EDF)"
 873   :short-name "Lao"
 874   :iso-final-char ?1
 875   :emacs-mule-id 167
 876   :supplementary-p t
 877   :code-space [33 126]
 878   :code-offset #x0E81)
 879
 880 (define-charset 'mule-lao
 881   "Lao characters (ISO10646 0E81..0EDF)"
 882   :short-name "Lao"
 883   :code-space [0 255]
 884   :supplementary-p t
 885   :superset '(ascii eight-bit-control (lao . 128)))
 886
 887
 888 ;; Indian scripts.  Symbolic charset for data exchange.  Glyphs are
 889 ;; not assigned.  They are automatically converted to each Indian
 890 ;; script which IS-13194 supports.
 891
 892 (define-charset 'indian-is13194
 893   "7-bit representation of IS 13194 (ISCII) for Devanagari"
 894   :short-name "IS 13194 (DEV)"
 895   :long-name "Indian IS 13194 (DEV)"
 896   :iso-final-char ?5
 897   :emacs-mule-id 225
 898   :supplementary-p t
 899   :code-space [33 126]
 900   :code-offset #x180000
 901   :unify-map "MULE-is13194")
 902
 903 (let ((code-offset #x180100))
 904   (dolist (script '(devanagari sanskrit bengali tamil telugu assamese
 905                                oriya kannada malayalam gujarati punjabi))
 906     (define-charset (intern (format "%s-cdac" script))
 907       (format
 908        "Glyphs of %s script for CDAC font.  Subset of `indian-glyph'."
 909        (capitalize (symbol-name script)))
 910       :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
 911       :supplementary-p t
 912       :code-space [0 255]
 913       :code-offset code-offset)
 914     (setq code-offset (+ code-offset #x100)))
 915
 916   (dolist (script '(devanagari bengali punjabi gujarati
 917                                oriya tamil telugu kannada malayalam))
 918     (define-charset (intern (format "%s-akruti" script))
 919       (format
 920        "Glyphs of %s script for AKRUTI font.  Subset of `indian-glyph'."
 921        (capitalize (symbol-name script)))
 922       :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
 923       :supplementary-p t
 924       :code-space [0 255]
 925       :code-offset code-offset)
 926     (setq code-offset (+ code-offset #x100))))
 927
 928 (define-charset 'indian-glyph
 929   "Glyphs for Indian characters."
 930   :short-name "Indian glyph"
 931   :iso-final-char ?4
 932   :emacs-mule-id 240
 933   :supplementary-p t
 934   :code-space [32 127 32 127]
 935   :code-offset #x180100)
 936
 937 ;; Actual Glyph for 1-column width.
 938 (define-charset 'indian-1-column
 939   "Indian charset for 1-column width glyphs."
 940   :short-name "Indian 1-col"
 941   :long-name "Indian 1 Column"
 942   :iso-final-char ?6
 943   :emacs-mule-id  251
 944   :supplementary-p t
 945   :code-space [33 126 33 126]
 946   :code-offset #x184000)
 947
 948 ;; Actual Glyph for 2-column width.
 949 (define-charset 'indian-2-column
 950   "Indian charset for 2-column width glyphs."
 951   :short-name "Indian 2-col"
 952   :long-name "Indian 2 Column"
 953   :iso-final-char ?5
 954   :emacs-mule-id  251
 955   :supplementary-p t
 956   :code-space [33 126 33 126]
 957   :code-offset #x184000)
 958
 959 (define-charset 'tibetan
 960   "Tibetan characters"
 961   :iso-final-char ?7
 962   :short-name "Tibetan 2-col"
 963   :long-name "Tibetan 2 column"
 964   :iso-final-char ?7
 965   :emacs-mule-id 252
 966   :unify-map "MULE-tibetan"
 967   :supplementary-p t
 968   :code-space [33 126 33 37]
 969   :code-offset #x190000)
 970
 971 (define-charset 'tibetan-1-column
 972   "Tibetan 1 column glyph"
 973   :short-name "Tibetan 1-col"
 974   :long-name "Tibetan 1 column"
 975   :iso-final-char ?8
 976   :emacs-mule-id 241
 977   :supplementary-p t
 978   :code-space [33 126 33 37]
 979   :code-offset #x190000)
 980
 981 ;; Subsets of Unicode.
 982 (define-charset 'mule-unicode-2500-33ff
 983   "Unicode characters of the range U+2500..U+33FF."
 984   :short-name "Unicode subset 2"
 985   :long-name "Unicode subset (U+2500..U+33FF)"
 986   :iso-final-char ?2
 987   :emacs-mule-id 242
 988   :supplementary-p t
 989   :code-space [#x20 #x7f #x20 #x47]
 990   :code-offset #x2500)
 991
 992 (define-charset 'mule-unicode-e000-ffff
 993   "Unicode characters of the range U+E000..U+FFFF."
 994   :short-name "Unicode subset 3"
 995   :long-name "Unicode subset (U+E000+FFFF)"
 996   :iso-final-char ?3
 997   :emacs-mule-id 243
 998   :supplementary-p t
 999   :code-space [#x20 #x7F #x20 #x75]
1000   :code-offset #xE000
1001   :max-code 30015)                      ; U+FFFF
1002
1003 (define-charset 'mule-unicode-0100-24ff
1004   "Unicode characters of the range U+0100..U+24FF."
1005   :short-name "Unicode subset"
1006   :long-name "Unicode subset (U+0100..U+24FF)"
1007   :iso-final-char ?1
1008   :emacs-mule-id 244
1009   :supplementary-p t
1010   :code-space [#x20 #x7F #x20 #x7F]
1011   :code-offset #x100)
1012
1013 (define-charset 'unicode-bmp
1014   "Unicode Basic Multilingual Plane (U+0000..U+FFFF)"
1015   :short-name "Unicode BMP"
1016   :code-space [0 255 0 255]
1017   :code-offset 0)
1018
1019 (define-charset 'unicode-smp
1020   "Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)"
1021   :short-name "Unicode SMP "
1022   :code-space [0 255 0 255]
1023   :code-offset #x10000)
1024
1025 (define-charset 'unicode-sip
1026   "Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)"
1027   :short-name "Unicode SIP"
1028   :code-space [0 255 0 255]
1029   :code-offset #x20000)
1030
1031 (define-charset 'unicode-ssp
1032   "Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)"
1033   :short-name "Unicode SSP"
1034   :code-space [0 255 0 255]
1035   :code-offset #xE0000)
1036
1037 (define-charset 'ethiopic
1038   "Ethiopic characters for Amharic and Tigrigna."
1039   :short-name "Ethiopic"
1040   :long-name "Ethiopic characters"
1041   :iso-final-char ?3
1042   :emacs-mule-id  245
1043   :supplementary-p t
1044   :unify-map "MULE-ethiopic"
1045   :code-space [33 126 33 126]
1046   :code-offset #x1A0000)
1047
1048 (define-charset 'mac-roman
1049   "Mac Roman charset"
1050   :short-name "Mac Roman"
1051   :ascii-compatible-p t
1052   :code-space [0 255]
1053   :map "MACINTOSH")
1054
1055 ;; Fixme: modern EBCDIC variants, e.g. IBM00924?
1056 (define-charset 'ebcdic-us
1057   "US version of EBCDIC"
1058   :short-name "EBCDIC-US"
1059   :code-space [0 255]
1060   :mime-charset 'ebcdic-us
1061   :map "EBCDICUS")
1062
1063 (define-charset 'ebcdic-uk
1064   "UK version of EBCDIC"
1065   :short-name "EBCDIC-UK"
1066   :code-space [0 255]
1067   :mime-charset 'ebcdic-uk
1068   :map "EBCDICUK")
1069
1070 (define-charset 'ibm1047
1071   ;; Says groff:
1072   "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix."
1073   :short-name "IBM1047"
1074   :code-space [0 255]
1075   :mime-charset 'ibm1047
1076   :map "IBM1047")
1077 (define-charset-alias 'cp1047 'ibm1047)
1078
1079 (define-charset 'hp-roman8
1080   "Encoding used by Hewlet-Packard printer software"
1081   :short-name "HP-ROMAN8"
1082   :ascii-compatible-p t
1083   :code-space [0 255]
1084   :map "HP-ROMAN8")
1085
1086 ;; To make a coding system with this, a pre-write-conversion should
1087 ;; account for the commented-out multi-valued code points in
1088 ;; stdenc.map.
1089 (define-charset 'adobe-standard-encoding
1090   "Adobe `standard encoding' used in PostScript"
1091   :short-name "ADOBE-STANDARD-ENCODING"
1092   :code-space [#x20 255]
1093   :map "stdenc")
1094
1095 (define-charset 'symbol
1096   "Adobe symbol encoding used in PostScript"
1097   :short-name "ADOBE-SYMBOL"
1098   :code-space [#x20 255]
1099   :map "symbol")
1100
1101 (define-charset 'ibm850
1102   "DOS codepage 850 (Latin-1)"
1103   :short-name "IBM850"
1104   :ascii-compatible-p t
1105   :code-space [0 255]
1106   :map "IBM850")
1107 (define-charset-alias 'cp850 'ibm850)
1108
1109 (define-charset 'mik
1110   "Bulgarian DOS codepage"
1111   :short-name "MIK"
1112   :ascii-compatible-p t
1113   :code-space [0 255]
1114   :map "MIK")
1115
1116 (define-charset 'ptcp154
1117   "ParaType codepage (Asian Cyrillic)"
1118   :short-name "PT154"
1119   :ascii-compatible-p t
1120   :code-space [0 255]
1121   :mime-charset 'pt154
1122   :map "PTCP154")
1123 (define-charset-alias 'pt154 'ptcp154)
1124 (define-charset-alias 'cp154 'ptcp154)
1125
1126 (define-charset 'gb18030-2-byte
1127   "GB18030 2-byte (0x814E..0xFEFE)"
1128   :code-space [#x40 #xFE #x81 #xFE]
1129   :supplementary-p t
1130   :map "GB180302")
1131
1132 (define-charset 'gb18030-4-byte-bmp
1133   "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
1134   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
1135   :supplementary-p t
1136   :map "GB180304")
1137
1138 (define-charset 'gb18030-4-byte-smp
1139   "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
1140   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
1141   :min-code '(#x9030 . #x8130)
1142   :max-code '(#xE332 . #x9A35)
1143   :supplementary-p t
1144   :code-offset #x10000)
1145
1146 (define-charset 'gb18030-4-byte-ext-1
1147   "GB18030 4-byte (0x8431A530-0x8F39FE39)"
1148   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
1149   :min-code '(#x8431 . #xA530)
1150   :max-code '(#x8F39 . #xFE39)
1151   :supplementary-p t
1152   :code-offset #x200000                 ; ... #x22484B
1153   )
1154
1155 (define-charset 'gb18030-4-byte-ext-2
1156   "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
1157   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
1158   :min-code '(#xE332 . #x9A36)
1159   :max-code '(#xFE39 . #xFE39)
1160   :supplementary-p t
1161   :code-offset #x22484C                 ; ... #x279f93
1162   )
1163
1164 (define-charset 'gb18030
1165   "GB18030"
1166   :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
1167   :min-code 0
1168   :max-code '(#xFE39 . #xFE39)
1169   :superset '(ascii gb18030-2-byte
1170                     gb18030-4-byte-bmp gb18030-4-byte-smp
1171                     gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
1172
1173 (define-charset 'chinese-cns11643-15
1174   "CNS11643 Plane 15 Chinese Traditional"
1175   :short-name  "CNS11643-15"
1176   :long-name "CNS11643-15 (Chinese traditional)"
1177   :code-space [33 126 33 126]
1178   :code-offset #x27A000
1179   :unify-map "CNS-F")
1180
1181 (unify-charset 'chinese-gb2312)
1182 (unify-charset 'chinese-gbk)
1183 (unify-charset 'chinese-cns11643-1)
1184 (unify-charset 'chinese-cns11643-2)
1185 (unify-charset 'chinese-cns11643-3)
1186 (unify-charset 'chinese-cns11643-4)
1187 (unify-charset 'chinese-cns11643-5)
1188 (unify-charset 'chinese-cns11643-6)
1189 (unify-charset 'chinese-cns11643-7)
1190 (unify-charset 'chinese-cns11643-15)
1191 (unify-charset 'big5)
1192 (unify-charset 'chinese-big5-1)
1193 (unify-charset 'chinese-big5-2)
1194 (unify-charset 'big5-hkscs)
1195 (unify-charset 'korean-ksc5601)
1196 (unify-charset 'vietnamese-viscii-lower)
1197 (unify-charset 'vietnamese-viscii-upper)
1198 (unify-charset 'chinese-sisheng)
1199 (unify-charset 'ipa)
1200 (unify-charset 'tibetan)
1201 (unify-charset 'ethiopic)
1202 (unify-charset 'indian-is13194)
1203 (unify-charset 'japanese-jisx0208-1978)
1204 (unify-charset 'japanese-jisx0208)
1205 (unify-charset 'japanese-jisx0212)
1206 (unify-charset 'japanese-jisx0213-1)
1207 (unify-charset 'japanese-jisx0213-2)
1208
1209 \f
1210 ;; These are tables for translating characters on decoding and
1211 ;; encoding.
1212 ;; Fixme: these aren't used now -- should they be?
1213 (setq standard-translation-table-for-decode nil)
1214
1215 (setq standard-translation-table-for-encode nil)
1216 \f
1217 ;;; Make fundamental coding systems.
1218
1219 ;; The coding system `no-conversion' and `undecided' are already
1220 ;; defined in coding.c as below:
1221 ;;
1222 ;; (define-coding-system 'no-conversion
1223 ;;   "..."
1224 ;;   :coding-type 'raw-text
1225 ;;   ...)
1226 ;; (define-coding-system 'undecided
1227 ;;   "..."
1228 ;;   :coding-type 'undecided
1229 ;;   ...)
1230
1231 (define-coding-system-alias 'binary 'no-conversion)
1232 (define-coding-system-alias 'unix 'undecided-unix)
1233 (define-coding-system-alias 'dos 'undecided-dos)
1234 (define-coding-system-alias 'mac 'undecided-mac)
1235
1236 (define-coding-system 'prefer-utf-8
1237   "Like `undecided' but prefer UTF-8 when appropriate.
1238 On decoding, if the source contains 8-bit codes and they all
1239 are valid UTF-8 sequences, detect the source as UTF-8 encoding
1240 regardless of the coding priority.
1241 On encoding, if the source contains non-ASCII characters, encode them
1242 by UTF-8."
1243   :coding-type 'undecided
1244   :mnemonic ?-
1245   :charset-list '(emacs)
1246   :prefer-utf-8 t)
1247
1248 (define-coding-system 'raw-text
1249   "Raw text, which means text contains random 8-bit codes.
1250 Encoding text with this coding system produces the actual byte
1251 sequence of the text in buffers and strings.  An exception is made for
1252 characters from the `eight-bit' character set.  Each of them is encoded
1253 into a single byte.
1254
1255 When you visit a file with this coding, the file is read into a
1256 unibyte buffer as is (except for EOL format), thus each byte of a file
1257 is treated as a character."
1258   :coding-type 'raw-text
1259   :for-unibyte t
1260   :mnemonic ?t)
1261
1262 (define-coding-system 'no-conversion-multibyte
1263   "Like `no-conversion' but don't read a file into a unibyte buffer."
1264   :coding-type 'raw-text
1265   :eol-type 'unix
1266   :mnemonic ?=)
1267
1268 (define-coding-system 'iso-latin-1
1269   "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1270   :coding-type 'charset
1271   :mnemonic ?1
1272   :charset-list '(iso-8859-1)
1273   :mime-charset 'iso-8859-1)
1274
1275 (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1276 (define-coding-system-alias 'latin-1 'iso-latin-1)
1277
1278 ;; Coding systems not specific to each language environment.
1279
1280 (define-coding-system 'emacs-mule
1281  "Emacs 21 internal format used in buffer and string."
1282  :coding-type 'emacs-mule
1283  :charset-list 'emacs-mule
1284  :mnemonic ?M)
1285
1286 (define-coding-system 'utf-8
1287   "UTF-8 (no signature (BOM))"
1288   :coding-type 'utf-8
1289   :mnemonic ?U
1290   :charset-list '(unicode)
1291   :mime-charset 'utf-8)
1292
1293 (define-coding-system 'utf-8-with-signature
1294   "UTF-8 (with signature (BOM))"
1295   :coding-type 'utf-8
1296   :mnemonic ?U
1297   :charset-list '(unicode)
1298   :bom t)
1299
1300 (define-coding-system 'utf-8-auto
1301   "UTF-8 (auto-detect signature (BOM))"
1302   :coding-type 'utf-8
1303   :mnemonic ?U
1304   :charset-list '(unicode)
1305   :bom '(utf-8-with-signature . utf-8))
1306
1307 (define-coding-system-alias 'mule-utf-8 'utf-8)
1308
1309 (define-coding-system 'utf-8-emacs
1310   "Support for all Emacs characters (including non-Unicode characters)."
1311   :coding-type 'utf-8
1312   :mnemonic ?U
1313   :charset-list '(emacs))
1314
1315 ;; The encoding used internally.  This encoding is meant to be able to save
1316 ;; any multibyte buffer without losing information.  It can change between
1317 ;; Emacs releases, tho, so should only be used for internal files.
1318 (define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix)
1319
1320 (define-coding-system 'utf-16le
1321   "UTF-16LE (little endian, no signature (BOM))."
1322   :coding-type 'utf-16
1323   :mnemonic ?U
1324   :charset-list '(unicode)
1325   :endian 'little
1326   :mime-text-unsuitable t
1327   :mime-charset 'utf-16le)
1328
1329 (define-coding-system 'utf-16be
1330   "UTF-16BE (big endian, no signature (BOM))."
1331   :coding-type 'utf-16
1332   :mnemonic ?U
1333   :charset-list '(unicode)
1334   :endian 'big
1335   :mime-text-unsuitable t
1336   :mime-charset 'utf-16be)
1337
1338 (define-coding-system 'utf-16le-with-signature
1339   "UTF-16 (little endian, with signature (BOM))."
1340   :coding-type 'utf-16
1341   :mnemonic ?U
1342   :charset-list '(unicode)
1343   :bom t
1344   :endian 'little
1345   :mime-text-unsuitable t
1346   :mime-charset 'utf-16)
1347
1348 (define-coding-system 'utf-16be-with-signature
1349   "UTF-16 (big endian, with signature (BOM))."
1350   :coding-type 'utf-16
1351   :mnemonic ?U
1352   :charset-list '(unicode)
1353   :bom t
1354   :endian 'big
1355   :mime-text-unsuitable t
1356   :mime-charset 'utf-16)
1357
1358 (define-coding-system 'utf-16
1359   "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)."
1360   :coding-type 'utf-16
1361   :mnemonic ?U
1362   :charset-list '(unicode)
1363   :bom '(utf-16le-with-signature . utf-16be-with-signature)
1364   :endian 'big
1365   :mime-text-unsuitable t
1366   :mime-charset 'utf-16)
1367
1368 ;; Backwards compatibility (old names, also used by Mule-UCS).  We
1369 ;; prefer the MIME names.
1370 (define-coding-system-alias 'utf-16-le 'utf-16le-with-signature)
1371 (define-coding-system-alias 'utf-16-be 'utf-16be-with-signature)
1372
1373
1374 (define-coding-system 'iso-2022-7bit
1375   "ISO 2022 based 7-bit encoding using only G0."
1376   :coding-type 'iso-2022
1377   :mnemonic ?J
1378   :charset-list 'iso-2022
1379   :designation [(ascii t) nil nil nil]
1380   :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1381
1382 (define-coding-system 'iso-2022-7bit-ss2
1383   "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
1384   :coding-type 'iso-2022
1385   :mnemonic ?$
1386   :charset-list 'iso-2022
1387   :designation [(ascii 94) nil (nil 96) nil]
1388   :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1389                  designation single-shift composition))
1390
1391 (define-coding-system 'iso-2022-7bit-lock
1392   "ISO-2022 coding system using Locking-Shift for 96-charset."
1393   :coding-type 'iso-2022
1394   :mnemonic ?&
1395   :charset-list 'iso-2022
1396   :designation [(ascii 94) (nil 96) nil nil]
1397   :flags '(ascii-at-eol ascii-at-cntl 7-bit
1398                         designation locking-shift composition))
1399
1400 (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
1401
1402 (define-coding-system 'iso-2022-7bit-lock-ss2
1403   "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
1404   :coding-type 'iso-2022
1405   :mnemonic ?i
1406   :charset-list '(ascii
1407                   japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1408                   korean-ksc5601
1409                   chinese-gb2312
1410                   chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1411                   chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1412                   chinese-cns11643-7)
1413   :designation [(ascii 94)
1414                 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1415                 (nil chinese-cns11643-2)
1416                 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1417                      chinese-cns11643-6 chinese-cns11643-7)]
1418   :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1419                  single-shift init-bol))
1420
1421 (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
1422
1423 (define-coding-system 'iso-2022-8bit-ss2
1424   "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
1425   :coding-type 'iso-2022
1426   :mnemonic ?@
1427   :charset-list 'iso-2022
1428   :designation [(ascii 94) nil (nil 96) nil]
1429   :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
1430
1431 (define-coding-system 'compound-text
1432   "Compound text based generic encoding.
1433 This coding system is an extension of X's \"Compound Text Encoding\".
1434 It encodes many characters using the normal ISO-2022 designation sequences,
1435 but it doesn't support extended segments of CTEXT."
1436   :coding-type 'iso-2022
1437   :mnemonic ?x
1438   :charset-list 'iso-2022
1439   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1440   :flags '(ascii-at-eol ascii-at-cntl long-form
1441                         designation locking-shift single-shift composition)
1442   ;; Fixme: this isn't a valid MIME charset and has to be
1443   ;; special-cased elsewhere  -- fx
1444   :mime-charset 'x-ctext)
1445
1446 (define-coding-system-alias  'x-ctext 'compound-text)
1447 (define-coding-system-alias  'ctext 'compound-text)
1448
1449 ;; Same as compound-text, but doesn't produce composition escape
1450 ;; sequences.  Used in post-read and pre-write conversions of
1451 ;; compound-text-with-extensions, see mule.el.  Note that this should
1452 ;; not have a mime-charset property, to prevent it from showing up
1453 ;; close to the beginning of coding systems ordered by priority.
1454 (define-coding-system 'ctext-no-compositions
1455  "Compound text based generic encoding.
1456
1457 Like `compound-text', but does not produce escape sequences for compositions."
1458   :coding-type 'iso-2022
1459   :mnemonic ?x
1460   :charset-list 'iso-2022
1461   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1462   :flags '(ascii-at-eol ascii-at-cntl
1463                         designation locking-shift single-shift))
1464
1465 (define-coding-system 'compound-text-with-extensions
1466  "Compound text encoding with ICCCM Extended Segment extensions.
1467
1468 See the variables `ctext-standard-encodings' and
1469 `ctext-non-standard-encodings-alist' for the detail about how
1470 extended segments are handled.
1471
1472 This coding system should be used only for X selections.  It is inappropriate
1473 for decoding and encoding files, process I/O, etc."
1474   :coding-type 'iso-2022
1475   :mnemonic ?x
1476   :charset-list 'iso-2022
1477   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1478   :flags '(ascii-at-eol ascii-at-cntl long-form
1479                         designation locking-shift single-shift)
1480   :post-read-conversion 'ctext-post-read-conversion
1481   :pre-write-conversion 'ctext-pre-write-conversion
1482   :mime-charset 'x-ctext)
1483
1484 (define-coding-system-alias
1485   'x-ctext-with-extensions 'compound-text-with-extensions)
1486 (define-coding-system-alias
1487   'ctext-with-extensions 'compound-text-with-extensions)
1488
1489 (define-coding-system 'us-ascii
1490   "Encode ASCII as-is and encode non-ASCII characters to `?'."
1491   :coding-type 'charset
1492   :mnemonic ?-
1493   :charset-list '(ascii)
1494   :default-char ??
1495   :mime-charset 'us-ascii)
1496
1497 (define-coding-system-alias 'iso-safe 'us-ascii)
1498
1499 (define-coding-system 'utf-7
1500   "UTF-7 encoding of Unicode (RFC 2152)."
1501   :coding-type 'utf-8
1502   :mnemonic ?U
1503   :mime-charset 'utf-7
1504   :charset-list '(unicode)
1505   :pre-write-conversion 'utf-7-pre-write-conversion
1506   :post-read-conversion 'utf-7-post-read-conversion)
1507
1508 (define-coding-system 'utf-7-imap
1509   "UTF-7 encoding of Unicode, IMAP version (RFC 2060)"
1510   :coding-type 'utf-8
1511   :mnemonic ?u
1512   :charset-list '(unicode)
1513   :pre-write-conversion 'utf-7-imap-pre-write-conversion
1514   :post-read-conversion 'utf-7-imap-post-read-conversion)
1515
1516 ;; Use us-ascii for terminal output if some other coding system is not
1517 ;; specified explicitly.
1518 (set-safe-terminal-coding-system-internal 'us-ascii)
1519
1520 ;; The other coding-systems are defined in each language specific
1521 ;; files under lisp/language.
1522
1523 ;; Normally, set coding system to `undecided' before reading a file.
1524 ;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1525 ;; but we regard them as containing multibyte characters.
1526 ;; Tar files are not decoded at all, but we treat them as raw bytes.
1527
1528 (setq file-coding-system-alist
1529       (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
1530       '(("\\.elc\\'" . utf-8-emacs)
1531         ("\\.el\\'" . prefer-utf-8)
1532         ("\\.utf\\(-8\\)?\\'" . utf-8)
1533         ("\\.xml\\'" . xml-find-file-coding-system)
1534         ;; We use raw-text for reading loaddefs.el so that if it
1535         ;; happens to have DOS or Mac EOLs, they are converted to
1536         ;; newlines.  This is required to make the special treatment
1537         ;; of the "\ newline" combination in loaddefs.el, which marks
1538         ;; the beginning of a doc string, work.
1539         ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
1540         ("\\.tar\\'" . (no-conversion . no-conversion))
1541         ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
1542         ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)
1543         ("" . (undecided . nil)))))
1544
1545 \f
1546 ;;; Setting coding categories and their priorities.
1547
1548 ;; This setting is just to read an Emacs Lisp source files which
1549 ;; contain multilingual text while dumping Emacs.  More appropriate
1550 ;; values are set by the command `set-language-environment' for each
1551 ;; language environment.
1552
1553 (set-coding-system-priority
1554  'iso-latin-1
1555  'utf-8
1556  'iso-2022-7bit
1557  )
1558
1559 \f
1560 ;;; Miscellaneous settings.
1561
1562 ;; Make all multibyte characters self-insert.
1563 (set-char-table-range (nth 1 global-map)
1564                       (cons 128 (max-char))
1565                       'self-insert-command)
1566
1567 (aset latin-extra-code-table ?\221 t)
1568 (aset latin-extra-code-table ?\222 t)
1569 (aset latin-extra-code-table ?\223 t)
1570 (aset latin-extra-code-table ?\224 t)
1571 (aset latin-extra-code-table ?\225 t)
1572 (aset latin-extra-code-table ?\226 t)
1573
1574 ;; The old code-pages library is obsoleted by coding systems based on
1575 ;; the charsets defined in this file but might be required by user
1576 ;; code.
1577 (provide 'code-pages)
1578
1579 ;;; mule-conf.el ends here