lisp/international/mule-conf.el

   1 ;;; mule-conf.el --- configure multilingual environment
   2
   3 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003,
   4 ;;   2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation, Inc.
   5 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
   6 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
   7 ;;   Registration Number H14PRO021
   8 ;; Copyright (C) 2003
   9 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
  10 ;;   Registration Number H13PRO009
  11
  12 ;; Keywords: i18n, mule, multilingual, character set, coding system
  13
  14 ;; This file is part of GNU Emacs.
  15
  16 ;; GNU Emacs is free software: you can redistribute it and/or modify
  17 ;; it under the terms of the GNU General Public License as published by
  18 ;; the Free Software Foundation, either version 3 of the License, or
  19 ;; (at your option) any later version.
  20
  21 ;; GNU Emacs is distributed in the hope that it will be useful,
  22 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  24 ;; GNU General Public License for more details.
  25
  26 ;; You should have received a copy of the GNU General Public License
  27 ;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
  28
  29 ;;; Commentary:
  30
  31 ;; This file defines the Emacs charsets and some basic coding systems.
  32 ;; Other coding systems are defined in the files in directory
  33 ;; lisp/language.
  34
  35 ;;; Code:
  36
  37 ;;; Remarks
  38
  39 ;; The ISO-IR registry is at http://www.itscj.ipsj.or.jp/ISO-IR/.
  40 ;; Standards docs equivalent to iso-2022 and iso-8859 are at
  41 ;; http://www.ecma.ch/.
  42
  43 ;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
  44 ;; MS Windows, which are presumably the only charsets we really need
  45 ;; to worry about on such systems:
  46 ;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
  47 ;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
  48 ;;                      1258, 874, 932, 936, 949, 950
  49
  50 ;;; Definitions of character sets.
  51
  52 ;; The charsets `ascii', `unicode' and `eight-bit' are already defined
  53 ;; in charset.c as below:
  54 ;;
  55 ;; (define-charset 'ascii
  56 ;;   ""
  57 ;;   :dimension 1
  58 ;;   :code-space [0 127]
  59 ;;   :iso-final-char ?B
  60 ;;   :ascii-compatible-p t
  61 ;;   :emacs-mule-id 0
  62 ;;   :code-offset 0)
  63 ;;
  64 ;; (define-charset 'unicode
  65 ;;   ""
  66 ;;   :dimension 3
  67 ;;   :code-space [0 255 0 255 0 16]
  68 ;;   :ascii-compatible-p t
  69 ;;   :code-offset 0)
  70 ;;
  71 ;; (define-charset 'emacs
  72 ;;   ""
  73 ;;   :dimension 3
  74 ;;   :code-space [0 255 0 255 0 63]
  75 ;;   :ascii-compatible-p t
  76 ;;   :supplementary-p t
  77 ;;   :code-offset 0)
  78 ;;
  79 ;; (define-charset 'eight-bit
  80 ;;   ""
  81 ;;   :dimension 1
  82 ;;   :code-space [128 255]
  83 ;;   :code-offset #x3FFF80)
  84 ;;
  85 ;; We now set :docstring, :short-name, and :long-name properties.
  86
  87 (put-charset-property
  88  'ascii :docstring "ASCII (ISO646 IRV)")
  89 (put-charset-property
  90  'ascii :short-name "ASCII")
  91 (put-charset-property
  92  'ascii :long-name "ASCII (ISO646 IRV)")
  93 (put-charset-property
  94  'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)")
  95 (put-charset-property
  96  'iso-8859-1 :short-name "Latin-1")
  97 (put-charset-property
  98  'iso-8859-1 :long-name "Latin-1")
  99 (put-charset-property
 100  'unicode :docstring "Unicode (ISO10646)")
 101 (put-charset-property
 102  'unicode :short-name "Unicode")
 103 (put-charset-property
 104  'unicode :long-name "Unicode (ISO10646)")
 105 (put-charset-property
 106  'emacs :docstring "Full Emacs charset (excluding eight bit chars)")
 107 (put-charset-property
 108  'emacs :short-name "Emacs")
 109 (put-charset-property
 110  'emacs :long-name "Emacs")
 111
 112 (put-charset-property 'eight-bit :docstring "Raw bytes 128-255")
 113 (put-charset-property 'eight-bit :short-name "Raw bytes")
 114
 115 (define-charset-alias 'ucs 'unicode)
 116
 117 (define-charset 'latin-iso8859-1
 118   "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
 119   :short-name "RHP of Latin-1"
 120   :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
 121   :iso-final-char ?A
 122   :emacs-mule-id 129
 123   :code-space [32 127]
 124   :code-offset 160)
 125
 126 ;; Name perhaps not ideal, but is XEmacs-compatible.
 127 (define-charset 'control-1
 128   "8-bit control code (0x80..0x9F)"
 129   :short-name "8-bit control code"
 130   :code-space [128 159]
 131   :code-offset 128)
 132
 133 (define-charset 'eight-bit-control
 134   "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
 135   :short-name "Raw bytes 0x80..0x9F"
 136   :supplementary-p t
 137   :code-space [128 159]
 138   :code-offset #x3FFF80)                ; see character.h
 139
 140 (define-charset 'eight-bit-graphic
 141   "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
 142   :short-name "Raw bytes 0xA0..0xFF"
 143   :supplementary-p t
 144   :code-space [160 255]
 145   :code-offset #x3FFFA0)                ; see character.h
 146
 147 (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
 148                                                  iso-ir iso-final
 149                                                  emacs-mule-id map)
 150   `(progn
 151      (define-charset ,symbol
 152        ,name
 153        :short-name ,nickname
 154        :long-name ,name
 155        :ascii-compatible-p t
 156        :code-space [0 255]
 157        :map ,map)
 158      (if ,iso-symbol
 159          (define-charset ,iso-symbol
 160            (if ,iso-ir
 161                (format "Right-Hand Part of %s (%s): ISO-IR-%d"
 162                        ,name ,nickname ,iso-ir)
 163              (format "Right-Hand Part of %s (%s)" ,name ,nickname))
 164            :short-name (format "RHP of %s" ,name)
 165            :long-name (format "RHP of %s (%s)" ,name ,nickname)
 166            :iso-final-char ,iso-final
 167            :emacs-mule-id ,emacs-mule-id
 168            :code-space [32 127]
 169            :subset (list ,symbol 160 255 -128)))))
 170
 171 (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
 172   "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
 173
 174 (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
 175   "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
 176
 177 (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
 178   "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
 179
 180 (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
 181   "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
 182
 183 (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
 184   "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
 185
 186 (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
 187   "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
 188
 189 (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
 190   "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
 191
 192 (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
 193   "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
 194
 195 (define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
 196   "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
 197
 198 ;; http://www.nectec.or.th/it-standards/iso8859-11/
 199 ;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
 200 ;; plus nbsp
 201 (define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
 202   "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
 203
 204 ;; 8859-12 doesn't (yet?) exist.
 205
 206 (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
 207   "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
 208
 209 (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
 210   "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
 211
 212 (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
 213   "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
 214
 215 (define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
 216   "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
 217
 218 ;; No point in keeping it around.
 219 (fmakunbound 'define-iso-single-byte-charset)
 220
 221 ;; Can this be shared with 8859-11?
 222 ;; N.b. not all of these are defined unicodes.
 223 (define-charset 'thai-tis620
 224   "TIS620.2533"
 225   :short-name "TIS620.2533"
 226   :iso-final-char ?T
 227   :emacs-mule-id 133
 228   :code-space [32 127]
 229   :code-offset #x0E00)
 230
 231 ;; Fixme: doc for this, c.f. above
 232 (define-charset 'tis620-2533
 233   "TIS620.2533"
 234   :short-name "TIS620.2533"
 235   :ascii-compatible-p t
 236   :code-space [0 255]
 237   :superset '(ascii eight-bit-control (thai-tis620 . 128)))
 238
 239 (define-charset 'jisx0201
 240   "JISX0201"
 241   :short-name "JISX0201"
 242   :code-space [0 #xDF]
 243   :map "JISX0201")
 244
 245 (define-charset 'latin-jisx0201
 246   "Roman Part of JISX0201.1976"
 247   :short-name "JISX0201 Roman"
 248   :long-name "Japanese Roman (JISX0201.1976)"
 249   :iso-final-char ?J
 250   :emacs-mule-id  138
 251   :supplementary-p t
 252   :code-space [33 126]
 253   :subset '(jisx0201 33 126 0))
 254
 255 (define-charset 'katakana-jisx0201
 256   "Katakana Part of JISX0201.1976"
 257   :short-name "JISX0201 Katakana"
 258   :long-name "Japanese Katakana (JISX0201.1976)"
 259   :iso-final-char ?I
 260   :emacs-mule-id  137
 261   :supplementary-p t
 262   :code-space [33 126]
 263   :subset '(jisx0201 161 254 -128))
 264
 265 (define-charset 'chinese-gb2312
 266   "GB2312 Chinese simplified: ISO-IR-58"
 267   :short-name "GB2312"
 268   :long-name "GB2312: ISO-IR-58"
 269   :iso-final-char ?A
 270   :emacs-mule-id 145
 271   :code-space [33 126 33 126]
 272   :code-offset #x110000
 273   :unify-map "GB2312")
 274
 275 (define-charset 'chinese-gbk
 276   "GBK Chinese simplified."
 277   :short-name "GBK"
 278   :code-space [#x40 #xFE #x81 #xFE]
 279   :code-offset #x160000
 280   :unify-map "GBK")
 281 (define-charset-alias 'cp936 'chinese-gbk)
 282 (define-charset-alias 'windows-936 'chinese-gbk)
 283
 284 (define-charset 'chinese-cns11643-1
 285   "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
 286   :short-name "CNS11643-1"
 287   :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
 288   :iso-final-char ?G
 289   :emacs-mule-id  149
 290   :code-space [33 126 33 126]
 291   :code-offset #x114000
 292   :unify-map "CNS-1")
 293
 294 (define-charset 'chinese-cns11643-2
 295   "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
 296   :short-name "CNS11643-2"
 297   :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
 298   :iso-final-char ?H
 299   :emacs-mule-id  150
 300   :code-space [33 126 33 126]
 301   :code-offset #x118000
 302   :unify-map "CNS-2")
 303
 304 (define-charset 'chinese-cns11643-3
 305   "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
 306   :short-name  "CNS11643-3"
 307   :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
 308   :iso-final-char ?I
 309   :code-space [33 126 33 126]
 310   :emacs-mule-id  246
 311   :code-offset #x11C000
 312   :unify-map "CNS-3")
 313
 314 (define-charset 'chinese-cns11643-4
 315   "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
 316   :short-name  "CNS11643-4"
 317   :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
 318   :iso-final-char ?J
 319   :emacs-mule-id  247
 320   :code-space [33 126 33 126]
 321   :code-offset #x120000
 322   :unify-map "CNS-4")
 323
 324 (define-charset 'chinese-cns11643-5
 325   "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
 326   :short-name  "CNS11643-5"
 327   :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
 328   :iso-final-char ?K
 329   :emacs-mule-id  248
 330   :code-space [33 126 33 126]
 331   :code-offset #x124000
 332   :unify-map "CNS-5")
 333
 334 (define-charset 'chinese-cns11643-6
 335   "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
 336   :short-name  "CNS11643-6"
 337   :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
 338   :iso-final-char ?L
 339   :emacs-mule-id 249
 340   :code-space [33 126 33 126]
 341   :code-offset #x128000
 342   :unify-map "CNS-6")
 343
 344 (define-charset 'chinese-cns11643-7
 345   "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
 346   :short-name  "CNS11643-7"
 347   :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
 348   :iso-final-char ?M
 349   :emacs-mule-id 250
 350   :code-space [33 126 33 126]
 351   :code-offset #x12C000
 352   :unify-map "CNS-7")
 353
 354 (define-charset 'big5
 355   "Big5 (Chinese traditional)"
 356   :short-name "Big5"
 357   :code-space [#x40 #xFE #xA1 #xFE]
 358   :code-offset #x130000
 359   :unify-map "BIG5")
 360 ;; Fixme: AKA cp950 according to
 361 ;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>.  Is
 362 ;; that correct?
 363
 364 (define-charset 'chinese-big5-1
 365   "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
 366   :short-name "Big5 (Level-1)"
 367   :long-name "Big5 (Level-1) A141-C67F"
 368   :iso-final-char ?0
 369   :emacs-mule-id 152
 370   :supplementary-p t
 371   :code-space [#x21 #x7E #x21 #x7E]
 372   :code-offset #x135000
 373   :unify-map "BIG5-1")
 374
 375 (define-charset 'chinese-big5-2
 376   "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
 377   :short-name "Big5 (Level-2)"
 378   :long-name "Big5 (Level-2) C940-FEFE"
 379   :iso-final-char ?1
 380   :emacs-mule-id  153
 381   :supplementary-p t
 382   :code-space [#x21 #x7E #x21 #x7E]
 383   :code-offset #x137800
 384   :unify-map "BIG5-2")
 385
 386 (define-charset 'japanese-jisx0208
 387   "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
 388   :short-name "JISX0208"
 389   :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
 390   :iso-final-char ?B
 391   :emacs-mule-id 146
 392   :code-space [33 126 33 126]
 393   :code-offset #x140000
 394   :unify-map "JISX0208")
 395
 396 (define-charset 'japanese-jisx0208-1978
 397   "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
 398   :short-name "JISX0208.1978"
 399   :long-name  "JISX0208.1978 (JISC6226.1978): ISO-IR-42"
 400   :iso-final-char ?@
 401   :emacs-mule-id  144
 402   :code-space [33 126 33 126]
 403   :code-offset #x144000
 404   :unify-map "JISC6226")
 405
 406 (define-charset 'japanese-jisx0212
 407   "JISX0212 Japanese supplement: ISO-IR-159"
 408   :short-name "JISX0212"
 409   :long-name "JISX0212 (Japanese): ISO-IR-159"
 410   :iso-final-char ?D
 411   :emacs-mule-id 148
 412   :code-space [33 126 33 126]
 413   :code-offset #x148000
 414   :unify-map "JISX0212")
 415
 416 ;; Note that jisx0213 contains characters not in Unicode (3.2?).  It's
 417 ;; arguable whether it should have a unify-map.
 418 (define-charset 'japanese-jisx0213-1
 419   "JISX0213.2000 Plane 1 (Japanese)"
 420   :short-name "JISX0213-1"
 421   :iso-final-char ?O
 422   :emacs-mule-id  151
 423   :unify-map "JISX2131"
 424   :code-space [33 126 33 126]
 425   :code-offset #x14C000)
 426
 427 (define-charset 'japanese-jisx0213-2
 428   "JISX0213.2000 Plane 2 (Japanese)"
 429   :short-name "JISX0213-2"
 430   :iso-final-char ?P
 431   :emacs-mule-id 254
 432   :unify-map "JISX2132"
 433   :code-space [33 126 33 126]
 434   :code-offset #x150000)
 435
 436 (define-charset 'japanese-jisx0213-a
 437   "JISX0213.2004 adds these characters to JISX0213.2000."
 438   :short-name "JISX0213A"
 439   :dimension 2
 440   :code-space [33 126 33 126]
 441   :supplementary-p t
 442   :map "JISX213A")
 443
 444 (define-charset 'japanese-jisx0213.2004-1
 445   "JISX0213.2004 Plane1 (Japanese)"
 446   :short-name "JISX0213.2004-1"
 447   :dimension 2
 448   :code-space [33 126 33 126]
 449   :iso-final-char ?Q
 450   :superset '(japanese-jisx0213-a japanese-jisx0213-1))
 451
 452 (define-charset 'katakana-sjis
 453   "Katakana part of Shift-JIS"
 454   :dimension 1
 455   :code-space [#xA1 #xDF]
 456   :subset '(jisx0201 #xA1 #xDF 0)
 457   :supplementary-p t)
 458
 459 (define-charset 'cp932-2-byte
 460   "2-byte part of CP932"
 461   :dimension 2
 462   :map "CP932-2BYTE"
 463   :code-space [#x40 #xFC #x81 #xFC]
 464   :supplementary-p t)
 465
 466 (define-charset 'cp932
 467   "CP932 (Microsoft shift-jis)"
 468   :code-space [#x00 #xFF #x00 #xFE]
 469   :short-name "CP932"
 470   :superset '(ascii katakana-sjis cp932-2-byte))
 471
 472 (define-charset 'korean-ksc5601
 473   "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
 474   :short-name "KSC5601"
 475   :long-name "KSC5601 (Korean): ISO-IR-149"
 476   :iso-final-char ?C
 477   :emacs-mule-id 147
 478   :code-space [33 126 33 126]
 479   :code-offset #x279f94                 ; ... #x27c217
 480   :unify-map "KSC5601")
 481
 482 (define-charset 'big5-hkscs
 483   "Big5-HKSCS (Chinese traditional, Hong Kong supplement)"
 484   :short-name "Big5"
 485   :code-space [#x40 #xFE #xA1 #xFE]
 486   :code-offset #x27c218                 ; ... #x280839
 487   :unify-map "BIG5-HKSCS")
 488
 489 ;; Fixme: Korean cp949/UHC
 490
 491 (define-charset 'chinese-sisheng
 492   "SiSheng characters for PinYin/ZhuYin"
 493   :short-name "SiSheng"
 494   :long-name "SiSheng (PinYin/ZhuYin)"
 495   :iso-final-char ?0
 496   :emacs-mule-id 160
 497   :code-space [33 126]
 498   :unify-map "MULE-sisheng"
 499   :supplementary-p t
 500   :code-offset #x200000)
 501
 502 ;; A subset of the 1989 version of IPA.  It consists of the consonant
 503 ;; signs used in English, French, German and Italian, and all vowels
 504 ;; signs in the table.  [says old MULE doc]
 505 (define-charset 'ipa
 506   "IPA (International Phonetic Association)"
 507   :short-name "IPA"
 508   :iso-final-char ?0
 509   :emacs-mule-id  161
 510   :unify-map "MULE-ipa"
 511   :code-space [32 127]
 512   :supplementary-p t
 513   :code-offset #x200080)
 514
 515 (define-charset 'viscii
 516   "VISCII1.1"
 517   :short-name "VISCII"
 518   :long-name "VISCII 1.1"
 519   :code-space [0 255]
 520   :map "VISCII")
 521
 522 (define-charset 'vietnamese-viscii-lower
 523   "VISCII1.1 lower-case"
 524   :short-name "VISCII lower"
 525   :long-name "VISCII lower-case"
 526   :iso-final-char ?1
 527   :emacs-mule-id  162
 528   :code-space [32 127]
 529   :code-offset #x200200
 530   :supplementary-p t
 531   :unify-map "MULE-lviscii")
 532
 533 (define-charset 'vietnamese-viscii-upper
 534   "VISCII1.1 upper-case"
 535   :short-name "VISCII upper"
 536   :long-name "VISCII upper-case"
 537   :iso-final-char ?2
 538   :emacs-mule-id  163
 539   :code-space [32 127]
 540   :code-offset #x200280
 541   :supplementary-p t
 542   :unify-map "MULE-uviscii")
 543
 544 (define-charset 'vscii
 545   "VSCII1.1 (TCVN-5712 VN1)"
 546   :short-name "VSCII"
 547   :code-space [0 255]
 548   :map "VSCII")
 549
 550 (define-charset-alias 'tcvn-5712 'vscii)
 551
 552 ;; Fixme: see note in tcvn.map about combining characters
 553 (define-charset 'vscii-2
 554   "VSCII-2 (TCVN-5712 VN2)"
 555   :code-space [0 255]
 556   :map "VSCII-2")
 557
 558 (define-charset 'koi8-r
 559   "KOI8-R"
 560   :short-name "KOI8-R"
 561   :ascii-compatible-p t
 562   :code-space [0 255]
 563   :map "KOI8-R")
 564
 565 (define-charset-alias 'koi8 'koi8-r)
 566
 567 (define-charset 'alternativnyj
 568   "ALTERNATIVNYJ"
 569   :short-name "alternativnyj"
 570   :ascii-compatible-p t
 571   :code-space [0 255]
 572   :map "ALTERNATIVNYJ")
 573
 574 (define-charset 'cp866
 575   "CP866"
 576   :short-name "cp866"
 577   :ascii-compatible-p t
 578   :code-space [0 255]
 579   :map "IBM866")
 580 (define-charset-alias 'ibm866 'cp866)
 581
 582 (define-charset 'koi8-u
 583   "KOI8-U"
 584   :short-name "KOI8-U"
 585   :ascii-compatible-p t
 586   :code-space [0 255]
 587   :map "KOI8-U")
 588
 589 (define-charset 'koi8-t
 590   "KOI8-T"
 591   :short-name "KOI8-T"
 592   :ascii-compatible-p t
 593   :code-space [0 255]
 594   :map "KOI8-T")
 595
 596 (define-charset 'georgian-ps
 597   "GEORGIAN-PS"
 598   :short-name "GEORGIAN-PS"
 599   :ascii-compatible-p t
 600   :code-space [0 255]
 601   :map "KA-PS")
 602
 603 (define-charset 'georgian-academy
 604   "GEORGIAN-ACADEMY"
 605   :short-name "GEORGIAN-ACADEMY"
 606   :ascii-compatible-p t
 607   :code-space [0 255]
 608   :map "KA-ACADEMY")
 609
 610 (define-charset 'windows-1250
 611   "WINDOWS-1250 (Central Europe)"
 612   :short-name "WINDOWS-1250"
 613   :ascii-compatible-p t
 614   :code-space [0 255]
 615   :map "CP1250")
 616 (define-charset-alias 'cp1250 'windows-1250)
 617
 618 (define-charset 'windows-1251
 619   "WINDOWS-1251 (Cyrillic)"
 620   :short-name "WINDOWS-1251"
 621   :ascii-compatible-p t
 622   :code-space [0 255]
 623   :map "CP1251")
 624 (define-charset-alias 'cp1251 'windows-1251)
 625
 626 (define-charset 'windows-1252
 627   "WINDOWS-1252 (Latin I)"
 628   :short-name "WINDOWS-1252"
 629   :ascii-compatible-p t
 630   :code-space [0 255]
 631   :map "CP1252")
 632 (define-charset-alias 'cp1252 'windows-1252)
 633
 634 (define-charset 'windows-1253
 635   "WINDOWS-1253 (Greek)"
 636   :short-name "WINDOWS-1253"
 637   :ascii-compatible-p t
 638   :code-space [0 255]
 639   :map "CP1253")
 640 (define-charset-alias 'cp1253 'windows-1253)
 641
 642 (define-charset 'windows-1254
 643   "WINDOWS-1254 (Turkish)"
 644   :short-name "WINDOWS-1254"
 645   :ascii-compatible-p t
 646   :code-space [0 255]
 647   :map "CP1254")
 648 (define-charset-alias 'cp1254 'windows-1254)
 649
 650 (define-charset 'windows-1255
 651   "WINDOWS-1255 (Hebrew)"
 652   :short-name "WINDOWS-1255"
 653   :ascii-compatible-p t
 654   :code-space [0 255]
 655   :map "CP1255")
 656 (define-charset-alias 'cp1255 'windows-1255)
 657
 658 (define-charset 'windows-1256
 659   "WINDOWS-1256 (Arabic)"
 660   :short-name "WINDOWS-1256"
 661   :ascii-compatible-p t
 662   :code-space [0 255]
 663   :map "CP1256")
 664 (define-charset-alias 'cp1256 'windows-1256)
 665
 666 (define-charset 'windows-1257
 667   "WINDOWS-1257 (Baltic)"
 668   :short-name "WINDOWS-1257"
 669   :ascii-compatible-p t
 670   :code-space [0 255]
 671   :map "CP1257")
 672 (define-charset-alias 'cp1257 'windows-1257)
 673
 674 (define-charset 'windows-1258
 675   "WINDOWS-1258 (Viet Nam)"
 676   :short-name "WINDOWS-1258"
 677   :ascii-compatible-p t
 678   :code-space [0 255]
 679   :map "CP1258")
 680 (define-charset-alias 'cp1258 'windows-1258)
 681
 682 (define-charset 'next
 683   "NEXT"
 684   :short-name "NEXT"
 685   :ascii-compatible-p t
 686   :code-space [0 255]
 687   :map "NEXTSTEP")
 688
 689 (define-charset 'cp1125
 690   "CP1125"
 691   :short-name "CP1125"
 692   :code-space [0 255]
 693   :ascii-compatible-p t
 694   :map "CP1125")
 695 (define-charset-alias 'ruscii 'cp1125)
 696 ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
 697 (define-charset-alias 'cp866u 'cp1125)
 698
 699 ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
 700 ;; shows this as not ASCII comptaible, with various graphics in
 701 ;; 0x01-0x1F.
 702 (define-charset 'cp437
 703   "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
 704   :short-name "CP437"
 705   :code-space [0 255]
 706   :ascii-compatible-p t
 707   :map "IBM437")
 708
 709 (define-charset 'cp720
 710   "CP720 (Arabic)"
 711   :short-name "CP720"
 712   :code-space [0 255]
 713   :ascii-compatible-p t
 714   :map "CP720")
 715
 716 (define-charset 'cp737
 717   "CP737 (PC Greek)"
 718   :short-name "CP737"
 719   :code-space [0 255]
 720   :ascii-compatible-p t
 721   :map "CP737")
 722
 723 (define-charset 'cp775
 724   "CP775 (PC Baltic)"
 725   :short-name "CP775"
 726   :code-space [0 255]
 727   :ascii-compatible-p t
 728   :map "CP775")
 729
 730 (define-charset 'cp851
 731   "CP851 (Greek)"
 732   :short-name "CP851"
 733   :code-space [0 255]
 734   :ascii-compatible-p t
 735   :map "IBM851")
 736
 737 (define-charset 'cp852
 738   "CP852 (MS-DOS Latin-2)"
 739   :short-name "CP852"
 740   :code-space [0 255]
 741   :ascii-compatible-p t
 742   :map "IBM852")
 743
 744 (define-charset 'cp855
 745   "CP855 (IBM Cyrillic)"
 746   :short-name "CP855"
 747   :code-space [0 255]
 748   :ascii-compatible-p t
 749   :map "IBM855")
 750
 751 (define-charset 'cp857
 752   "CP857 (IBM Turkish)"
 753   :short-name "CP857"
 754   :code-space [0 255]
 755   :ascii-compatible-p t
 756   :map "IBM857")
 757
 758 (define-charset 'cp858
 759   "CP858 (Multilingual Latin I + Euro)"
 760   :short-name "CP858"
 761   :code-space [0 255]
 762   :ascii-compatible-p t
 763   :map "CP858")
 764 (define-charset-alias 'cp00858 'cp858)  ; IANA has IBM00858/CP00858
 765
 766 (define-charset 'cp860
 767   "CP860 (MS-DOS Portuguese)"
 768   :short-name "CP860"
 769   :code-space [0 255]
 770   :ascii-compatible-p t
 771   :map "IBM860")
 772
 773 (define-charset 'cp861
 774   "CP861 (MS-DOS Icelandic)"
 775   :short-name "CP861"
 776   :code-space [0 255]
 777   :ascii-compatible-p t
 778   :map "IBM861")
 779
 780 (define-charset 'cp862
 781   "CP862 (PC Hebrew)"
 782   :short-name "CP862"
 783   :code-space [0 255]
 784   :ascii-compatible-p t
 785   :map "IBM862")
 786
 787 (define-charset 'cp863
 788   "CP863 (MS-DOS Canadian French)"
 789   :short-name "CP863"
 790   :code-space [0 255]
 791   :ascii-compatible-p t
 792   :map "IBM863")
 793
 794 (define-charset 'cp864
 795   "CP864 (PC Arabic)"
 796   :short-name "CP864"
 797   :code-space [0 255]
 798   :ascii-compatible-p t
 799   :map "IBM864")
 800
 801 (define-charset 'cp865
 802   "CP865 (MS-DOS Nordic)"
 803   :short-name "CP865"
 804   :code-space [0 255]
 805   :ascii-compatible-p t
 806   :map "IBM865")
 807
 808 (define-charset 'cp869
 809   "CP869 (IBM Modern Greek)"
 810   :short-name "CP869"
 811   :code-space [0 255]
 812   :ascii-compatible-p t
 813   :map "IBM869")
 814
 815 (define-charset 'cp874
 816   "CP874 (IBM Thai)"
 817   :short-name "CP874"
 818   :code-space [0 255]
 819   :ascii-compatible-p t
 820   :map "IBM874")
 821
 822 ;; For Arabic, we need three different types of character sets.
 823 ;; Digits are of direction left-to-right and of width 1-column.
 824 ;; Others are of direction right-to-left and of width 1-column or
 825 ;; 2-column.
 826 (define-charset 'arabic-digit
 827   "Arabic digit"
 828   :short-name "Arabic digit"
 829   :iso-final-char ?2
 830   :emacs-mule-id 164
 831   :supplementary-p t
 832   :code-space [34 42]
 833   :code-offset #x0600)
 834
 835 (define-charset 'arabic-1-column
 836   "Arabic 1-column"
 837   :short-name "Arabic 1-col"
 838   :long-name "Arabic 1-column"
 839   :iso-final-char ?3
 840   :emacs-mule-id 165
 841   :supplementary-p t
 842   :code-space [33 126]
 843   :code-offset #x200100)
 844
 845 (define-charset 'arabic-2-column
 846   "Arabic 2-column"
 847   :short-name "Arabic 2-col"
 848   :long-name "Arabic 2-column"
 849   :iso-final-char ?4
 850   :emacs-mule-id 224
 851   :supplementary-p t
 852   :code-space [33 126]
 853   :code-offset #x200180)
 854
 855 ;; Lao script.
 856 ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
 857 ;; Not all of them are defined unicodes.
 858 (define-charset 'lao
 859   "Lao characters (ISO10646 0E81..0EDF)"
 860   :short-name "Lao"
 861   :iso-final-char ?1
 862   :emacs-mule-id 167
 863   :supplementary-p t
 864   :code-space [33 126]
 865   :code-offset #x0E81)
 866
 867 (define-charset 'mule-lao
 868   "Lao characters (ISO10646 0E81..0EDF)"
 869   :short-name "Lao"
 870   :code-space [0 255]
 871   :supplementary-p t
 872   :superset '(ascii eight-bit-control (lao . 128)))
 873
 874
 875 ;; Indian scripts.  Symbolic charset for data exchange.  Glyphs are
 876 ;; not assigned.  They are automatically converted to each Indian
 877 ;; script which IS-13194 supports.
 878
 879 (define-charset 'indian-is13194
 880   "Generic Indian charset for data exchange with IS 13194"
 881   :short-name "IS 13194"
 882   :long-name "Indian IS 13194"
 883   :iso-final-char ?5
 884   :emacs-mule-id 225
 885   :supplementary-p t
 886   :code-space [33 126]
 887   :code-offset #x180000)
 888
 889 (let ((code-offset #x180100))
 890   (dolist (script '(devanagari sanskrit bengali tamil telugu assamese
 891                                oriya kannada malayalam gujarati punjabi))
 892     (define-charset (intern (format "%s-cdac" script))
 893       (format "Glyphs of %s script for CDAC font.  Subset of `indian-glyph'."
 894               (capitalize (symbol-name script)))
 895       :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
 896       :supplementary-p t
 897       :code-space [0 255]
 898       :code-offset code-offset)
 899     (setq code-offset (+ code-offset #x100)))
 900
 901   (dolist (script '(devanagari bengali punjabi gujarati
 902                                oriya tamil telugu kannada malayalam))
 903     (define-charset (intern (format "%s-akruti" script))
 904       (format "Glyphs of %s script for AKRUTI font.  Subset of `indian-glyph'."
 905               (capitalize (symbol-name script)))
 906       :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
 907       :supplementary-p t
 908       :code-space [0 255]
 909       :code-offset code-offset)
 910     (setq code-offset (+ code-offset #x100))))
 911
 912 (define-charset 'indian-glyph
 913   "Glyphs for Indian characters."
 914   :short-name "Indian glyph"
 915   :iso-final-char ?4
 916   :emacs-mule-id 240
 917   :supplementary-p t
 918   :code-space [32 127 32 127]
 919   :code-offset #x180100)
 920
 921 ;; Actual Glyph for 1-column width.
 922 (define-charset 'indian-1-column
 923   "Indian charset for 1-column width glyphs."
 924   :short-name "Indian 1-col"
 925   :long-name "Indian 1 Column"
 926   :iso-final-char ?6
 927   :emacs-mule-id  251
 928   :supplementary-p t
 929   :code-space [33 126 33 126]
 930   :code-offset #x184000)
 931
 932 ;; Actual Glyph for 2-column width.
 933 (define-charset 'indian-2-column
 934   "Indian charset for 2-column width glyphs."
 935   :short-name "Indian 2-col"
 936   :long-name "Indian 2 Column"
 937   :iso-final-char ?5
 938   :emacs-mule-id  251
 939   :supplementary-p t
 940   :code-space [33 126 33 126]
 941   :code-offset #x184000)
 942
 943 (define-charset 'tibetan
 944   "Tibetan characters"
 945   :iso-final-char ?7
 946   :short-name "Tibetan 2-col"
 947   :long-name "Tibetan 2 column"
 948   :iso-final-char ?7
 949   :emacs-mule-id 252
 950   :unify-map "MULE-tibetan"
 951   :supplementary-p t
 952   :code-space [33 126 33 37]
 953   :code-offset #x190000)
 954
 955 (define-charset 'tibetan-1-column
 956   "Tibetan 1 column glyph"
 957   :short-name "Tibetan 1-col"
 958   :long-name "Tibetan 1 column"
 959   :iso-final-char ?8
 960   :emacs-mule-id 241
 961   :supplementary-p t
 962   :code-space [33 126 33 37]
 963   :code-offset #x190000)
 964
 965 ;; Subsets of Unicode.
 966 (define-charset 'mule-unicode-2500-33ff
 967   "Unicode characters of the range U+2500..U+33FF."
 968   :short-name "Unicode subset 2"
 969   :long-name "Unicode subset (U+2500..U+33FF)"
 970   :iso-final-char ?2
 971   :emacs-mule-id 242
 972   :supplementary-p t
 973   :code-space [#x20 #x7f #x20 #x47]
 974   :code-offset #x2500)
 975
 976 (define-charset 'mule-unicode-e000-ffff
 977   "Unicode characters of the range U+E000..U+FFFF."
 978   :short-name "Unicode subset 3"
 979   :long-name "Unicode subset (U+E000+FFFF)"
 980   :iso-final-char ?3
 981   :emacs-mule-id 243
 982   :supplementary-p t
 983   :code-space [#x20 #x7F #x20 #x75]
 984   :code-offset #xE000
 985   :max-code 30015)                      ; U+FFFF
 986
 987 (define-charset 'mule-unicode-0100-24ff
 988   "Unicode characters of the range U+0100..U+24FF."
 989   :short-name "Unicode subset"
 990   :long-name "Unicode subset (U+0100..U+24FF)"
 991   :iso-final-char ?1
 992   :emacs-mule-id 244
 993   :supplementary-p t
 994   :code-space [#x20 #x7F #x20 #x7F]
 995   :code-offset #x100)
 996
 997 (define-charset 'unicode-bmp
 998   "Unicode Basic Multilingual Plane (U+0000..U+FFFF)"
 999   :short-name "Unicode BMP"
1000   :code-space [0 255 0 255]
1001   :code-offset 0)
1002
1003 (define-charset 'unicode-smp
1004   "Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)"
1005   :short-name "Unicode SMP "
1006   :code-space [0 255 0 255]
1007   :code-offset #x10000)
1008
1009 (define-charset 'unicode-sip
1010   "Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)"
1011   :short-name "Unicode SIP"
1012   :code-space [0 255 0 255]
1013   :code-offset #x20000)
1014
1015 (define-charset 'unicode-ssp
1016   "Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)"
1017   :short-name "Unicode SSP"
1018   :code-space [0 255 0 255]
1019   :code-offset #xE0000)
1020
1021 (define-charset 'ethiopic
1022   "Ethiopic characters for Amharic and Tigrigna."
1023   :short-name "Ethiopic"
1024   :long-name "Ethiopic characters"
1025   :iso-final-char ?3
1026   :emacs-mule-id  245
1027   :supplementary-p t
1028   :unify-map "MULE-ethiopic"
1029   :code-space [33 126 33 126]
1030   :code-offset #x1A0000)
1031
1032 (define-charset 'mac-roman
1033   "Mac Roman charset"
1034   :short-name "Mac Roman"
1035   :ascii-compatible-p t
1036   :code-space [0 255]
1037   :map "MACINTOSH")
1038
1039 ;; Fixme: modern EBCDIC variants, e.g. IBM00924?
1040 (define-charset 'ebcdic-us
1041   "US version of EBCDIC"
1042   :short-name "EBCDIC-US"
1043   :code-space [0 255]
1044   :mime-charset 'ebcdic-us
1045   :map "EBCDICUS")
1046
1047 (define-charset 'ebcdic-uk
1048   "UK version of EBCDIC"
1049   :short-name "EBCDIC-UK"
1050   :code-space [0 255]
1051   :mime-charset 'ebcdic-uk
1052   :map "EBCDICUK")
1053
1054 (define-charset 'ibm1047
1055   ;; Says groff:
1056   "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix."
1057   :short-name "IBM1047"
1058   :code-space [0 255]
1059   :mime-charset 'ibm1047
1060   :map "IBM1047")
1061 (define-charset-alias 'cp1047 'ibm1047)
1062
1063 (define-charset 'hp-roman8
1064   "Encoding used by Hewlet-Packard printer software"
1065   :short-name "HP-ROMAN8"
1066   :ascii-compatible-p t
1067   :code-space [0 255]
1068   :map "HP-ROMAN8")
1069
1070 ;; To make a coding system with this, a pre-write-conversion should
1071 ;; account for the commented-out multi-valued code points in
1072 ;; stdenc.map.
1073 (define-charset 'adobe-standard-encoding
1074   "Adobe `standard encoding' used in PostScript"
1075   :short-name "ADOBE-STANDARD-ENCODING"
1076   :code-space [#x20 255]
1077   :map "stdenc")
1078
1079 (define-charset 'symbol
1080   "Adobe symbol encoding used in PostScript"
1081   :short-name "ADOBE-SYMBOL"
1082   :code-space [#x20 255]
1083   :map "symbol")
1084
1085 (define-charset 'ibm850
1086   "DOS codepage 850 (Latin-1)"
1087   :short-name "IBM850"
1088   :ascii-compatible-p t
1089   :code-space [0 255]
1090   :map "IBM850")
1091 (define-charset-alias 'cp850 'ibm850)
1092
1093 (define-charset 'mik
1094   "Bulgarian DOS codepage"
1095   :short-name "MIK"
1096   :ascii-compatible-p t
1097   :code-space [0 255]
1098   :map "MIK")
1099
1100 (define-charset 'ptcp154
1101   "`Paratype' codepage (Asian Cyrillic)"
1102   :short-name "PT154"
1103   :ascii-compatible-p t
1104   :code-space [0 255]
1105   :mime-charset 'pt154
1106   :map "PTCP154")
1107 (define-charset-alias 'pt154 'ptcp154)
1108 (define-charset-alias 'cp154 'ptcp154)
1109
1110 (define-charset 'gb18030-2-byte
1111   "GB18030 2-byte (0x814E..0xFEFE)"
1112   :code-space [#x40 #xFE #x81 #xFE]
1113   :supplementary-p t
1114   :map "GB180302")
1115
1116 (define-charset 'gb18030-4-byte-bmp
1117   "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
1118   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
1119   :supplementary-p t
1120   :map "GB180304")
1121
1122 (define-charset 'gb18030-4-byte-smp
1123   "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
1124   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
1125   :min-code '(#x9030 . #x8130)
1126   :max-code '(#xE332 . #x9A35)
1127   :supplementary-p t
1128   :code-offset #x10000)
1129
1130 (define-charset 'gb18030-4-byte-ext-1
1131   "GB18030 4-byte (0x8431A530-0x8F39FE39)"
1132   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
1133   :min-code '(#x8431 . #xA530)
1134   :max-code '(#x8F39 . #xFE39)
1135   :supplementary-p t
1136   :code-offset #x200000                 ; ... #x22484B
1137   )
1138
1139 (define-charset 'gb18030-4-byte-ext-2
1140   "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
1141   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
1142   :min-code '(#xE332 . #x9A36)
1143   :max-code '(#xFE39 . #xFE39)
1144   :supplementary-p t
1145   :code-offset #x22484C                 ; ... #x279f93
1146   )
1147
1148 (define-charset 'gb18030
1149   "GB18030"
1150   :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
1151   :min-code 0
1152   :max-code '(#xFE39 . #xFE39)
1153   :superset '(ascii gb18030-2-byte
1154                     gb18030-4-byte-bmp gb18030-4-byte-smp
1155                     gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
1156
1157 (define-charset 'chinese-cns11643-15
1158   "CNS11643 Plane 15 Chinese Traditional"
1159   :short-name  "CNS11643-15"
1160   :long-name "CNS11643-15 (Chinese traditional)"
1161   :code-space [33 126 33 126]
1162   :code-offset #x27A000)
1163
1164 (unify-charset 'chinese-gb2312)
1165 (unify-charset 'chinese-gbk)
1166 (unify-charset 'chinese-cns11643-1)
1167 (unify-charset 'chinese-cns11643-2)
1168 (unify-charset 'chinese-cns11643-3)
1169 (unify-charset 'chinese-cns11643-4)
1170 (unify-charset 'chinese-cns11643-5)
1171 (unify-charset 'chinese-cns11643-6)
1172 (unify-charset 'chinese-cns11643-7)
1173 (unify-charset 'big5)
1174 (unify-charset 'chinese-big5-1)
1175 (unify-charset 'chinese-big5-2)
1176 (unify-charset 'big5-hkscs)
1177 (unify-charset 'korean-ksc5601)
1178 (unify-charset 'vietnamese-viscii-lower)
1179 (unify-charset 'vietnamese-viscii-upper)
1180 (unify-charset 'chinese-sisheng)
1181 (unify-charset 'ipa)
1182 (unify-charset 'tibetan)
1183 (unify-charset 'ethiopic)
1184 (unify-charset 'japanese-jisx0208-1978)
1185 (unify-charset 'japanese-jisx0208)
1186 (unify-charset 'japanese-jisx0212)
1187 (unify-charset 'japanese-jisx0213-1)
1188 (unify-charset 'japanese-jisx0213-2)
1189
1190 \f
1191 ;; These are tables for translating characters on decoding and
1192 ;; encoding.
1193 ;; Fixme: these aren't used now -- should they be?
1194 (setq standard-translation-table-for-decode nil)
1195
1196 (setq standard-translation-table-for-encode nil)
1197 \f
1198 ;;; Make fundamental coding systems.
1199
1200 ;; The coding system `no-conversion' and `undecided' are already
1201 ;; defined in coding.c as below:
1202 ;;
1203 ;; (define-coding-system 'no-conversion
1204 ;;   "..."
1205 ;;   :coding-type 'raw-text
1206 ;;   ...)
1207 ;; (define-coding-system 'undecided
1208 ;;   "..."
1209 ;;   :coding-type 'undecided
1210 ;;   ...)
1211
1212 (define-coding-system-alias 'binary 'no-conversion)
1213 (define-coding-system-alias 'unix 'undecided-unix)
1214 (define-coding-system-alias 'dos 'undecided-dos)
1215 (define-coding-system-alias 'mac 'undecided-mac)
1216
1217 (define-coding-system 'raw-text
1218   "Raw text, which means text contains random 8-bit codes.
1219 Encoding text with this coding system produces the actual byte
1220 sequence of the text in buffers and strings.  An exception is made for
1221 characters from the `eight-bit' character set.  Each of them is encoded
1222 into a single byte.
1223
1224 When you visit a file with this coding, the file is read into a
1225 unibyte buffer as is (except for EOL format), thus each byte of a file
1226 is treated as a character."
1227   :coding-type 'raw-text
1228   :for-unibyte t
1229   :mnemonic ?t)
1230
1231 (define-coding-system 'no-conversion-multibyte
1232   "Like `no-conversion' but don't read a file into a unibyte buffer."
1233   :coding-type 'raw-text
1234   :eol-type 'unix
1235   :mnemonic ?=)
1236
1237 (define-coding-system 'iso-latin-1
1238   "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1239   :coding-type 'charset
1240   :mnemonic ?1
1241   :charset-list '(iso-8859-1)
1242   :mime-charset 'iso-8859-1)
1243
1244 (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1245 (define-coding-system-alias 'latin-1 'iso-latin-1)
1246
1247 ;; Coding systems not specific to each language environment.
1248
1249 (define-coding-system 'emacs-mule
1250  "Emacs 21 internal format used in buffer and string."
1251  :coding-type 'emacs-mule
1252  :charset-list 'emacs-mule
1253  :mnemonic ?M)
1254
1255 (define-coding-system 'utf-8
1256   "UTF-8 (no signature (BOM))"
1257   :coding-type 'utf-8
1258   :mnemonic ?U
1259   :charset-list '(unicode)
1260   :mime-charset 'utf-8)
1261
1262 (define-coding-system 'utf-8-with-signature
1263   "UTF-8 (with signature (BOM))"
1264   :coding-type 'utf-8
1265   :mnemonic ?U
1266   :charset-list '(unicode)
1267   :bom t)
1268
1269 (define-coding-system 'utf-8-auto
1270   "UTF-8 (auto-detect signature (BOM))"
1271   :coding-type 'utf-8
1272   :mnemonic ?U
1273   :charset-list '(unicode)
1274   :bom '(utf-8-with-signature . utf-8))
1275
1276 (define-coding-system-alias 'mule-utf-8 'utf-8)
1277
1278 (define-coding-system 'utf-8-emacs
1279   "Support for all Emacs characters (including non-Unicode characters)."
1280   :coding-type 'utf-8
1281   :mnemonic ?U
1282   :charset-list '(emacs))
1283
1284 ;; The encoding used internally.  This encoding is meant to be able to save
1285 ;; any multibyte buffer without losing information.  It can change between
1286 ;; Emacs releases, tho, so should only be used for internal files.
1287 (define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix)
1288
1289 (define-coding-system 'utf-16le
1290   "UTF-16LE (little endian, no signature (BOM))."
1291   :coding-type 'utf-16
1292   :mnemonic ?U
1293   :charset-list '(unicode)
1294   :endian 'little
1295   :mime-text-unsuitable t
1296   :mime-charset 'utf-16le)
1297
1298 (define-coding-system 'utf-16be
1299   "UTF-16BE (big endian, no signature (BOM))."
1300   :coding-type 'utf-16
1301   :mnemonic ?U
1302   :charset-list '(unicode)
1303   :endian 'big
1304   :mime-text-unsuitable t
1305   :mime-charset 'utf-16be)
1306
1307 (define-coding-system 'utf-16le-with-signature
1308   "UTF-16 (little endian, with signature (BOM))."
1309   :coding-type 'utf-16
1310   :mnemonic ?U
1311   :charset-list '(unicode)
1312   :bom t
1313   :endian 'little
1314   :mime-text-unsuitable t
1315   :mime-charset 'utf-16)
1316
1317 (define-coding-system 'utf-16be-with-signature
1318   "UTF-16 (big endian, with signature)."
1319   :coding-type 'utf-16
1320   :mnemonic ?U
1321   :charset-list '(unicode)
1322   :bom t
1323   :endian 'big
1324   :mime-text-unsuitable t
1325   :mime-charset 'utf-16)
1326
1327 (define-coding-system 'utf-16
1328   "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)."
1329   :coding-type 'utf-16
1330   :mnemonic ?U
1331   :charset-list '(unicode)
1332   :bom '(utf-16le-with-signature . utf-16be-with-signature)
1333   :endian 'big
1334   :mime-text-unsuitable t
1335   :mime-charset 'utf-16)
1336
1337 ;; Backwards compatibility (old names, also used by Mule-UCS).  We
1338 ;; prefer the MIME names.
1339 (define-coding-system-alias 'utf-16-le 'utf-16le-with-signature)
1340 (define-coding-system-alias 'utf-16-be 'utf-16be-with-signature)
1341
1342
1343 (define-coding-system 'iso-2022-7bit
1344   "ISO 2022 based 7-bit encoding using only G0."
1345   :coding-type 'iso-2022
1346   :mnemonic ?J
1347   :charset-list 'iso-2022
1348   :designation [(ascii t) nil nil nil]
1349   :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1350
1351 (define-coding-system 'iso-2022-7bit-ss2
1352   "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
1353   :coding-type 'iso-2022
1354   :mnemonic ?$
1355   :charset-list 'iso-2022
1356   :designation [(ascii 94) nil (nil 96) nil]
1357   :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1358                  designation single-shift composition))
1359
1360 (define-coding-system 'iso-2022-7bit-lock
1361   "ISO-2022 coding system using Locking-Shift for 96-charset."
1362   :coding-type 'iso-2022
1363   :mnemonic ?&
1364   :charset-list 'iso-2022
1365   :designation [(ascii 94) (nil 96) nil nil]
1366   :flags '(ascii-at-eol ascii-at-cntl 7-bit
1367                         designation locking-shift composition))
1368
1369 (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
1370
1371 (define-coding-system 'iso-2022-7bit-lock-ss2
1372   "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
1373   :coding-type 'iso-2022
1374   :mnemonic ?i
1375   :charset-list '(ascii
1376                   japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1377                   korean-ksc5601
1378                   chinese-gb2312
1379                   chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1380                   chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1381                   chinese-cns11643-7)
1382   :designation [(ascii 94)
1383                 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1384                 (nil chinese-cns11643-2)
1385                 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1386                      chinese-cns11643-6 chinese-cns11643-7)]
1387   :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1388                  single-shift init-bol))
1389
1390 (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
1391
1392 (define-coding-system 'iso-2022-8bit-ss2
1393   "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
1394   :coding-type 'iso-2022
1395   :mnemonic ?@
1396   :charset-list 'iso-2022
1397   :designation [(ascii 94) nil (nil 96) nil]
1398   :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
1399
1400 (define-coding-system 'compound-text
1401   "Compound text based generic encoding for decoding unknown messages.
1402
1403 This coding system does not support extended segments of CTEXT."
1404   :coding-type 'iso-2022
1405   :mnemonic ?x
1406   :charset-list 'iso-2022
1407   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1408   :flags '(ascii-at-eol ascii-at-cntl long-form
1409                         designation locking-shift single-shift composition)
1410   ;; Fixme: this isn't a valid MIME charset and has to be
1411   ;; special-cased elsewhere  -- fx
1412   :mime-charset 'x-ctext)
1413
1414 (define-coding-system-alias  'x-ctext 'compound-text)
1415 (define-coding-system-alias  'ctext 'compound-text)
1416
1417 ;; Same as compound-text, but doesn't produce composition escape
1418 ;; sequences.  Used in post-read and pre-write conversions of
1419 ;; compound-text-with-extensions, see mule.el.  Note that this should
1420 ;; not have a mime-charset property, to prevent it from showing up
1421 ;; close to the beginning of coding systems ordered by priority.
1422 (define-coding-system 'ctext-no-compositions
1423  "Compound text based generic encoding for decoding unknown messages.
1424
1425 Like `compound-text', but does not produce escape sequences for compositions."
1426   :coding-type 'iso-2022
1427   :mnemonic ?x
1428   :charset-list 'iso-2022
1429   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1430   :flags '(ascii-at-eol ascii-at-cntl
1431                         designation locking-shift single-shift))
1432
1433 (define-coding-system 'compound-text-with-extensions
1434  "Compound text encoding with ICCCM Extended Segment extensions.
1435
1436 See the variable `ctext-non-standard-encodings-alist' for the
1437 detail about how extended segments are handled.
1438
1439 This coding system should be used only for X selections.  It is inappropriate
1440 for decoding and encoding files, process I/O, etc."
1441   :coding-type 'iso-2022
1442   :mnemonic ?x
1443   :charset-list 'iso-2022
1444   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1445   :flags '(ascii-at-eol ascii-at-cntl long-form
1446                         designation locking-shift single-shift)
1447   :post-read-conversion 'ctext-post-read-conversion
1448   :pre-write-conversion 'ctext-pre-write-conversion)
1449
1450 (define-coding-system-alias
1451   'x-ctext-with-extensions 'compound-text-with-extensions)
1452 (define-coding-system-alias
1453   'ctext-with-extensions 'compound-text-with-extensions)
1454
1455 (define-coding-system 'us-ascii
1456   "Encode ASCII as-is and encode non-ASCII characters to `?'."
1457   :coding-type 'charset
1458   :mnemonic ?-
1459   :charset-list '(ascii)
1460   :default-char ??
1461   :mime-charset 'us-ascii)
1462
1463 (define-coding-system-alias 'iso-safe 'us-ascii)
1464
1465 (define-coding-system 'utf-7
1466   "UTF-7 encoding of Unicode (RFC 2152)."
1467   :coding-type 'utf-8
1468   :mnemonic ?U
1469   :mime-charset 'utf-7
1470   :charset-list '(unicode)
1471   :pre-write-conversion 'utf-7-pre-write-conversion
1472   :post-read-conversion 'utf-7-post-read-conversion)
1473
1474 (define-coding-system 'utf-7-imap
1475   "UTF-7 encoding of Unicode, IMAP version (RFC 2060)"
1476   :coding-type 'utf-8
1477   :mnemonic ?u
1478   :charset-list '(unicode)
1479   :pre-write-conversion 'utf-7-imap-pre-write-conversion
1480   :post-read-conversion 'utf-7-imap-post-read-conversion)
1481
1482 ;; Use us-ascii for terminal output if some other coding system is not
1483 ;; specified explicitly.
1484 (set-safe-terminal-coding-system-internal 'us-ascii)
1485
1486 ;; The other coding-systems are defined in each language specific
1487 ;; files under lisp/language.
1488
1489 ;; Normally, set coding system to `undecided' before reading a file.
1490 ;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1491 ;; but we regard them as containing multibyte characters.
1492 ;; Tar files are not decoded at all, but we treat them as raw bytes.
1493
1494 (setq file-coding-system-alist
1495       '(("\\.elc\\'" . utf-8-emacs)
1496         ("\\.utf\\(-8\\)?\\'" . utf-8)
1497         ("\\.xml\\'" . xml-find-file-coding-system)
1498         ;; We use raw-text for reading loaddefs.el so that if it
1499         ;; happens to have DOS or Mac EOLs, they are converted to
1500         ;; newlines.  This is required to make the special treatment
1501         ;; of the "\ newline" combination in loaddefs.el, which marks
1502         ;; the beginning of a doc string, work.
1503         ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
1504         ("\\.tar\\'" . (no-conversion . no-conversion))
1505         ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
1506         ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)
1507         ("" . (undecided . nil))))
1508
1509 \f
1510 ;;; Setting coding categories and their priorities.
1511
1512 ;; This setting is just to read an Emacs Lisp source files which
1513 ;; contain multilingual text while dumping Emacs.  More appropriate
1514 ;; values are set by the command `set-language-environment' for each
1515 ;; language environment.
1516
1517 (set-coding-system-priority
1518  'iso-latin-1
1519  'utf-8
1520  'iso-2022-7bit
1521  )
1522
1523 \f
1524 ;;; Miscellaneous settings.
1525
1526 ;; Make all multibyte characters self-insert.
1527 (set-char-table-range (nth 1 global-map)
1528                       (cons 128 (max-char))
1529                       'self-insert-command)
1530
1531 (aset latin-extra-code-table ?\221 t)
1532 (aset latin-extra-code-table ?\222 t)
1533 (aset latin-extra-code-table ?\223 t)
1534 (aset latin-extra-code-table ?\224 t)
1535 (aset latin-extra-code-table ?\225 t)
1536 (aset latin-extra-code-table ?\226 t)
1537
1538 ;; The old code-pages library is obsoleted by coding systems based on
1539 ;; the charsets defined in this file but might be required by user
1540 ;; code.
1541 (provide 'code-pages)
1542
1543 ;; Local variables:
1544 ;; no-byte-compile: t
1545 ;; End:
1546
1547 ;; arch-tag: 7d5fed55-b6df-42f6-8d3d-0011190551f5
1548 ;;; mule-conf.el ends here