lisp/international/mule-conf.el

   1 ;;; mule-conf.el --- configure multilingual environment
   2
   3 ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN.
   4 ;; Licensed to the Free Software Foundation.
   5 ;; Copyright (C) 2001, 2002
   6 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
   7 ;;   Registration Number H13PRO009
   8 ;; Copyright (C) 2002 Free Software Foundation, Inc.
   9
  10 ;; Keywords: i18n, mule, multilingual, character set, coding system
  11
  12 ;; This file is part of GNU Emacs.
  13
  14 ;; GNU Emacs is free software; you can redistribute it and/or modify
  15 ;; it under the terms of the GNU General Public License as published by
  16 ;; the Free Software Foundation; either version 2, or (at your option)
  17 ;; any later version.
  18
  19 ;; GNU Emacs is distributed in the hope that it will be useful,
  20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 ;; GNU General Public License for more details.
  23
  24 ;; You should have received a copy of the GNU General Public License
  25 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
  26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  27 ;; Boston, MA 02111-1307, USA.
  28
  29 ;;; Commentary:
  30
  31 ;; This file defines the Emacs charsets and some basic coding systems.
  32 ;; Other coding systems are defined in the files in directory
  33 ;; lisp/language.
  34
  35 ;;; Code:
  36
  37 ;;; Remarks
  38
  39 ;; The ISO-IR registry is at http://www.itscj.ipsj.or.jp/ISO-IR/.
  40 ;; Standards docs equivalent to iso-2022 and iso-8859 are at
  41 ;; http://www.ecma.ch/.
  42
  43 ;; FWIW, http://www.microsoft.com/globaldev/reference/ lists the
  44 ;; following for MS Windows, which are presumably the only charsets we
  45 ;; really need to worry about on such systems:
  46 ;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
  47 ;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
  48 ;;                      1258, 874, 932, 936, 949, 950
  49
  50 ;;; Definitions of character sets.
  51
  52 ;; The charsets `ascii' and `unicode' are already defined in charset.c
  53 ;; as below:
  54 ;;
  55 ;; (define-charset 'ascii
  56 ;;   ""
  57 ;;   :dimension 1
  58 ;;   :code-space [0 127]
  59 ;;   :iso-final-char ?A
  60 ;;   :ascii-compatible-p t
  61 ;;   :emacs-mule-id 0
  62 ;;   :code-offset 0)
  63 ;;
  64 ;; (define-charset 'unicode
  65 ;;   ""
  66 ;;   :dimension 3
  67 ;;   :code-space [0 255 0 255 0 16]
  68 ;;   :ascii-compatible-p t
  69 ;;   :code-offset 0)
  70 ;;
  71 ;; We now set :docstring, :short-name, and :long-name properties.
  72
  73 (put-charset-property
  74  'ascii :docstring "ASCII (ISO646 IRV)")
  75 (put-charset-property
  76  'ascii :short-name "ASCII")
  77 (put-charset-property
  78  'ascii :long-name "ASCII (ISO646 IRV)")
  79 (put-charset-property
  80  'unicode :docstring "Unicode (ISO10646)")
  81 (put-charset-property
  82  'unicode :short-name "Unicode")
  83 (put-charset-property
  84  'unicode :long-name "Unicode (ISO10646)")
  85
  86 (define-charset-alias 'ucs 'unicode)
  87
  88 (define-charset 'emacs
  89   "Full Emacs characters"
  90   :ascii-compatible-p t
  91   :code-space [ 0 255 0 255 0 63 ]
  92   :code-offset 0
  93   :supplementary-p t)
  94
  95 (define-charset 'iso-8859-1
  96   "Latin-1 (ISO/IEC 8859-1)"
  97   :short-name "Latin-1"
  98   :ascii-compatible-p t
  99   :code-space [0 255]
 100   :code-offset 0)
 101
 102 (define-charset 'latin-iso8859-1
 103   "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
 104   :short-name "RHP of Latin-1"
 105   :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
 106   :iso-final-char ?A
 107   :emacs-mule-id 129
 108   :code-space [32 127]
 109   :code-offset 160)
 110
 111 (define-charset 'eight-bit-control
 112   "8-bit control code (0x80..0x9F)"
 113   :short-name "8-bit control code"
 114   :code-space [128 159]
 115   :code-offset 128)
 116
 117 (define-charset 'eight-bit-graphic
 118   "8-bit graphic code (0xA0..0xFF)"
 119   :short-name "8-bit graphic code"
 120   :code-space [160 255]
 121   :code-offset 160)
 122
 123 (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
 124                                                  iso-ir iso-final
 125                                                  emacs-mule-id map)
 126   `(progn
 127      (define-charset ,symbol
 128        ,name
 129        :short-name ,nickname
 130        :long-name ,name
 131        :ascii-compatible-p t
 132        :code-space [0 255]
 133        :map ,map)
 134      (if ,iso-symbol
 135          (define-charset ,iso-symbol
 136            (if ,iso-ir
 137                (format "Right-Hand Part of %s (%s): ISO-IR-%d"
 138                        ,name ,nickname ,iso-ir)
 139              (format "Right-Hand Part of %s (%s)" ,name ,nickname))
 140            :short-name (format "RHP of %s" ,name)
 141            :long-name (format "RHP of %s (%s)" ,name ,nickname)
 142            :iso-final-char ,iso-final
 143            :emacs-mule-id ,emacs-mule-id
 144            :code-space [32 127]
 145            :parents (list (cons ,symbol 128))))))
 146
 147 (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
 148   "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
 149
 150 (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
 151   "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
 152
 153 (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
 154   "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
 155
 156 (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
 157   "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
 158
 159 (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
 160   "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
 161
 162 (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
 163   "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
 164
 165 (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
 166   "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
 167
 168 (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
 169   "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
 170
 171 (define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
 172   "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
 173
 174 ;; http://www.nectec.or.th/it-standards/iso8859-11/
 175 ;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
 176 ;; plus nbsp
 177 (define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
 178   "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
 179
 180 ;; 8859-12 doesn't (yet?) exist.
 181
 182 (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
 183   "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
 184
 185 (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
 186   "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
 187
 188 (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
 189   "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
 190
 191 (define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
 192   "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
 193
 194 ;; No point in keeping it around.
 195 (fmakunbound 'define-iso-single-byte-charset)
 196
 197 ;; Can this be shared with 8859-11?
 198 (define-charset 'thai-tis620
 199   "TIS620.2533"
 200   :short-name "TIS620.2533"
 201   :iso-final-char ?T
 202   :emacs-mule-id 133
 203   :code-space [32 127]
 204   :code-offset #x0E00)
 205
 206 ;; Fixme: doc for this, c.f. above
 207 (define-charset 'tis620-2533
 208   "TIS620.2533"
 209   :short-name "TIS620.2533"
 210   :ascii-compatible-p t
 211   :code-space [0 255]
 212   :parents '(ascii eight-bit-control (thai-tis620 . -128)))
 213
 214 (define-charset 'jisx0201
 215   "JISX0201"
 216   :short-name "JISX0201"
 217   :code-space [33 254]
 218   :map "jisx0201")
 219
 220 (define-charset 'latin-jisx0201
 221   "Roman Part of JISX0201.1976"
 222   :short-name "JISX0201 Roman"
 223   :long-name "Japanese Roman (JISX0201.1976)"
 224   :iso-final-char ?J
 225   :emacs-mule-id  138
 226   :code-space [33 126]
 227   :parents '(jisx0201))
 228
 229 (define-charset 'katakana-jisx0201
 230   "Katakana Part of JISX0201.1976"
 231   :short-name "JISX0201 Katakana"
 232   :long-name "Japanese Katakana (JISX0201.1976)"
 233   :iso-final-char ?I
 234   :emacs-mule-id  137
 235   :code-space [33 126]
 236   :parents '((jisx0201 . #x80)))
 237
 238 (define-charset 'chinese-gb2312
 239   "GB2312 Chinese simplified: ISO-IR-58"
 240   :short-name "GB2312"
 241   :long-name "GB2312: ISO-IR-58"
 242   :iso-final-char ?A
 243   :emacs-mule-id 145
 244   :code-space [33 126 33 126]
 245   :code-offset #x110000
 246   :unify-map "gb2312-1980")
 247
 248 (define-charset 'chinese-gbk
 249   "GBK Chinese simplified."
 250   :short-name "GBK"
 251   :code-space [#x40 #xFE #x81 #xFE]
 252   :code-offset #x160000
 253   :unify-map "gbk")
 254 (define-charset-alias 'cp936 'chinese-gbk)
 255
 256 (define-charset 'chinese-cns11643-1
 257   "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
 258   :short-name "CNS11643-1"
 259   :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
 260   :iso-final-char ?G
 261   :emacs-mule-id  149
 262   :code-space [33 126 33 126]
 263   :code-offset #x114000
 264   :unify-map "cns11643-1")
 265
 266 (define-charset 'chinese-cns11643-2
 267   "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
 268   :short-name "CNS11643-2"
 269   :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
 270   :iso-final-char ?H
 271   :emacs-mule-id  150
 272   :code-space [33 126 33 126]
 273   :code-offset #x118000
 274   :unify-map "cns11643-2")
 275
 276 (define-charset 'chinese-cns11643-3
 277   "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
 278   :short-name  "CNS11643-3"
 279   :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
 280   :iso-final-char ?I
 281   :code-space [33 126 33 126]
 282   :emacs-mule-id  246
 283   :code-offset #x11C000)
 284
 285 (define-charset 'chinese-cns11643-4
 286   "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
 287   :short-name  "CNS11643-4"
 288   :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
 289   :iso-final-char ?J
 290   :emacs-mule-id  247
 291   :code-space [33 126 33 126]
 292   :code-offset #x120000)
 293
 294 (define-charset 'chinese-cns11643-5
 295   "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
 296   :short-name  "CNS11643-5"
 297   :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
 298   :iso-final-char ?K
 299   :emacs-mule-id  248
 300   :code-space [33 126 33 126]
 301   :code-offset #x124000)
 302
 303 (define-charset 'chinese-cns11643-6
 304   "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
 305   :short-name  "CNS11643-6"
 306   :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
 307   :iso-final-char ?L
 308   :emacs-mule-id 249
 309   :code-space [33 126 33 126]
 310   :code-offset #x128000)
 311
 312 (define-charset 'chinese-cns11643-7
 313   "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
 314   :short-name  "CNS11643-7"
 315   :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
 316   :iso-final-char ?M
 317   :emacs-mule-id 250
 318   :code-space [33 126 33 126]
 319   :code-offset #x12C000)
 320
 321 (define-charset 'big5
 322   "Big5 (Chinese traditional)"
 323   :short-name "Big5"
 324   :code-space [#x40 #xFE #xA1 #xFE]
 325   :code-offset #x130000
 326   :unify-map "big5")
 327 ;; Fixme: AKA cp950 according to
 328 ;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>.  Is
 329 ;; that correct?
 330
 331 (define-charset 'chinese-big5-1
 332   "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
 333   :short-name "Big5 (Level-1)"
 334   :long-name "Big5 (Level-1) A141-C67F"
 335   :iso-final-char ?0
 336   :emacs-mule-id 152
 337   :code-space [#x21 #x7E #x21 #x7E]
 338   :code-offset #x135000
 339   :unify-map "big5-1")
 340
 341 (define-charset 'chinese-big5-2
 342   "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
 343   :short-name "Big5 (Level-2)"
 344   :long-name "Big5 (Level-2) C940-FEFE"
 345   :iso-final-char ?1
 346   :emacs-mule-id  153
 347   :code-space [#x21 #x7E #x21 #x7E]
 348   :code-offset #x137800
 349   :unify-map "big5-2")
 350
 351 (define-charset 'japanese-jisx0208
 352   "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
 353   :short-name "JISX0208"
 354   :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
 355   :iso-final-char ?B
 356   :emacs-mule-id 146
 357   :code-space [33 126 33 126]
 358   :code-offset #x140000
 359   :unify-map "jisx0208-1990")
 360
 361 (define-charset 'japanese-jisx0208-1978
 362   "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
 363   :short-name "JISX0208.1978"
 364   :long-name  "JISX0208.1978 (Japanese): ISO-IR-42"
 365   :iso-final-char ?@
 366   :emacs-mule-id  144
 367   :code-space [33 126 33 126]
 368   :code-offset #x144000
 369   :unify-map "jisx0208-1978")
 370
 371 (define-charset 'japanese-jisx0212
 372   "JISX0212 Japanese supplement: ISO-IR-159"
 373   :short-name "JISX0212"
 374   :long-name "JISX0212 (Japanese): ISO-IR-159"
 375   :iso-final-char ?D
 376   :emacs-mule-id 148
 377   :code-space [33 126 33 126]
 378   :code-offset #x148000
 379   :unify-map "jisx0212-1990")
 380
 381 (define-charset 'japanese-jisx0213-1
 382   "JISX0213 Plane 1 (Japanese)"
 383   :short-name "JISX0213-1"
 384   :iso-final-char ?O
 385   :emacs-mule-id  151
 386   :code-space [33 126 33 126]
 387   :code-offset #x14C000)
 388
 389 (define-charset 'japanese-jisx0213-2
 390   "JISX0213 Plane 2 (Japanese)"
 391   :short-name "JISX0213-2"
 392   :iso-final-char ?P
 393   :emacs-mule-id 254
 394   :code-space [33 126 33 126]
 395   :code-offset #x150000)
 396
 397 (define-charset 'korean-ksc5601
 398   "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
 399   :short-name "KSC5601"
 400   :long-name "KSC5601 (Korean): ISO-IR-149"
 401   :iso-final-char ?C
 402   :emacs-mule-id 147
 403   :code-space [33 126 33 126]
 404   :map "ksc5601-1987")
 405
 406 ;; Fixme: Korean cp949/UHC
 407
 408 (define-charset 'chinese-sisheng
 409   "SiSheng characters for PinYin/ZhuYin"
 410   :short-name "SiSheng"
 411   :long-name "SiSheng (PinYin/ZhuYin)"
 412   :iso-final-char ?0
 413   :emacs-mule-id 160
 414   :code-space [33 126]
 415   :unify-map "sisheng"
 416   :code-offset #x200000)
 417
 418 (define-charset 'ipa
 419   "IPA (International Phonetic Association)"
 420   :short-name "IPA"
 421   :iso-final-char ?0
 422   :emacs-mule-id  161
 423   :unify-map "ipa"
 424   :code-space [32 127]
 425   :code-offset #x200080)
 426
 427 (define-charset 'viscii
 428   "VISCII1.1"
 429   :short-name "VISCII"
 430   :long-name "VISCII 1.1"
 431   :code-space [0 255]
 432   :map "viscii")
 433
 434 (define-charset 'vietnamese-viscii-lower
 435   "VISCII1.1 lower-case"
 436   :short-name "VISCII lower"
 437   :long-name "VISCII lower-case"
 438   :iso-final-char ?1
 439   :emacs-mule-id  162
 440   :code-space [32 127]
 441   :code-offset #x200200
 442   :unify-map "viscii-lower")
 443
 444 (define-charset 'vietnamese-viscii-upper
 445   "VISCII1.1 upper-case"
 446   :short-name "VISCII upper"
 447   :long-name "VISCII upper-case"
 448   :iso-final-char ?2
 449   :emacs-mule-id  163
 450   :code-space [32 127]
 451   :code-offset #x200280
 452   :unify-map "viscii-upper")
 453
 454 (define-charset 'vscii
 455   "VSCII1.1"
 456   :short-name "VSCII"
 457   :code-space [0 255]
 458   :map "vscii")
 459
 460 (define-charset 'koi8-r
 461   "KOI8-R"
 462   :short-name "KOI8-R"
 463   :ascii-compatible-p t
 464   :code-space [0 255]
 465   :map "koi8-r")
 466
 467 (define-charset-alias 'koi8 'koi8-r)
 468
 469 (define-charset 'alternativnyj
 470   "ALTERNATIVNYJ"
 471   :short-name "alternativnyj"
 472   :ascii-compatible-p t
 473   :code-space [0 255]
 474   :map "ibm866")
 475 ;; Fixme: http://czyborra.com/charsets/cyrillic.html says the
 476 ;; following, but the iconv map for cp866 isn't the same as his chart
 477 ;; for alternativnyj.  I can't find anything that looks like an
 478 ;; official definition of alternativnyj.
 479 (define-charset-alias 'cp866 'alternativnyj)
 480 (define-charset-alias 'ibm866 'alternativnyj)
 481
 482 (define-charset 'koi8-u
 483   "KOI8-U"
 484   :short-name "KOI8-U"
 485   :ascii-compatible-p t
 486   :code-space [0 255]
 487   :map "koi8-u")
 488
 489 (define-charset 'koi8-t
 490   "KOI8-T"
 491   :short-name "KOI8-T"
 492   :ascii-compatible-p t
 493   :code-space [0 255]
 494   :map "koi8-t")
 495
 496 (define-charset 'georgian-ps
 497   "GEORGIAN-PS"
 498   :short-name "GEORGIAN-PS"
 499   :ascii-compatible-p t
 500   :code-space [0 255]
 501   :map "georgian-ps")
 502
 503 (define-charset 'windows-1250
 504   "WINDOWS-1250 (Central Europe)"
 505   :short-name "WINDOWS-1250"
 506   :ascii-compatible-p t
 507   :code-space [0 255]
 508   :map "windows-1250")
 509 (define-charset-alias 'cp1250 'windows-1250)
 510
 511 (define-charset 'windows-1251
 512   "WINDOWS-1251 (Cyrillic)"
 513   :short-name "WINDOWS-1251"
 514   :ascii-compatible-p t
 515   :code-space [0 255]
 516   :map "windows-1251")
 517 (define-charset-alias 'cp1251 'windows-1251)
 518
 519 (define-charset 'windows-1252
 520   "WINDOWS-1252 (Latin I)"
 521   :short-name "WINDOWS-1252"
 522   :ascii-compatible-p t
 523   :code-space [0 255]
 524   :map "windows-1252")
 525 (define-charset-alias 'cp1252 'windows-1252)
 526
 527 (define-charset 'windows-1253
 528   "WINDOWS-1253 (Greek)"
 529   :short-name "WINDOWS-1253"
 530   :ascii-compatible-p t
 531   :code-space [0 255]
 532   :map "windows-1253")
 533 (define-charset-alias 'cp1253 'windows-1253)
 534
 535 (define-charset 'windows-1254
 536   "WINDOWS-1254 (Turkish)"
 537   :short-name "WINDOWS-1254"
 538   :ascii-compatible-p t
 539   :code-space [0 255]
 540   :map "windows-1254")
 541 (define-charset-alias 'cp1254 'windows-1254)
 542
 543 (define-charset 'windows-1255
 544   "WINDOWS-1255 (Hebrew)"
 545   :short-name "WINDOWS-1255"
 546   :ascii-compatible-p t
 547   :code-space [0 255]
 548   :map "windows-1255")
 549 (define-charset-alias 'cp1255 'windows-1255)
 550
 551 (define-charset 'windows-1256
 552   "WINDOWS-1256 (Arabic)"
 553   :short-name "WINDOWS-1256"
 554   :ascii-compatible-p t
 555   :code-space [0 255]
 556   :map "windows-1256")
 557 (define-charset-alias 'cp1256 'windows-1256)
 558
 559 (define-charset 'windows-1257
 560   "WINDOWS-1257 (Baltic)"
 561   :short-name "WINDOWS-1257"
 562   :ascii-compatible-p t
 563   :code-space [0 255]
 564   :map "windows-1257")
 565 (define-charset-alias 'cp1257 'windows-1257)
 566
 567 (define-charset 'windows-1258
 568   "WINDOWS-1258 (Viet Nam)"
 569   :short-name "WINDOWS-1258"
 570   :ascii-compatible-p t
 571   :code-space [0 255]
 572   :map "windows-1258")
 573 (define-charset-alias 'cp1258 'windows-1258)
 574
 575 (define-charset 'next
 576   "NEXT"
 577   :short-name "NEXT"
 578   :ascii-compatible-p t
 579   :code-space [0 255]
 580   :map "next")
 581
 582 (define-charset 'cp1125
 583   "CP1125"
 584   :short-name "CP1125"
 585   :code-space [0 255]
 586   :map "cp1125")
 587 (define-charset-alias 'ruscii 'cp1125)
 588 ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
 589 (define-charset-alias 'cp866u 'cp1125)
 590
 591 ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
 592 ;; shows this as not ASCII comptaible, with various graphics in
 593 ;; 0x01-0x1F.
 594 (define-charset 'cp437
 595   "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
 596   :short-name "CP437"
 597   :code-space [0 255]
 598   :ascii-compatible-p t
 599   :map "cp437")
 600
 601 (define-charset 'cp720
 602   "CP720 (Arabic)"
 603   :short-name "CP720"
 604   :code-space [0 255]
 605   :ascii-compatible-p t
 606   :map "cp720")
 607
 608 (define-charset 'cp737
 609   "CP737 (PC Greek)"
 610   :short-name "CP737"
 611   :code-space [0 255]
 612   :ascii-compatible-p t
 613   :map "cp737")
 614
 615 (define-charset 'cp775
 616   "CP775 (PC Baltic)"
 617   :short-name "CP775"
 618   :code-space [0 255]
 619   :ascii-compatible-p t
 620   :map "cp775")
 621
 622 (define-charset 'cp851
 623   "CP851"
 624   :short-name "CP851"
 625   :code-space [0 255]
 626   :ascii-compatible-p t
 627   :map "cp851")
 628
 629 (define-charset 'cp852
 630   "CP852 (MS-DOS Latin-2)"
 631   :short-name "CP852"
 632   :code-space [0 255]
 633   :ascii-compatible-p t
 634   :map "cp852")
 635
 636 (define-charset 'cp855
 637   "CP855 (IBM Cyrillic)"
 638   :short-name "CP855"
 639   :code-space [0 255]
 640   :ascii-compatible-p t
 641   :map "cp855")
 642
 643 (define-charset 'cp857
 644   "CP857 (IBM Turkish)"
 645   :short-name "CP857"
 646   :code-space [0 255]
 647   :ascii-compatible-p t
 648   :map "cp857")
 649
 650 (define-charset 'cp858
 651   "CP858 (Multilingual Latin I + Euro)"
 652   :short-name "CP858"
 653   :code-space [0 255]
 654   :ascii-compatible-p t
 655   :map "cp858")
 656 (define-charset-alias 'cp00858 'cp858)  ; IANA has IBM00858/CP00858
 657
 658 (define-charset 'cp860
 659   "CP860 (MS-DOS Portuguese)"
 660   :short-name "CP860"
 661   :code-space [0 255]
 662   :ascii-compatible-p t
 663   :map "cp860")
 664
 665 (define-charset 'cp861
 666   "CP861 (MS-DOS Icelandic)"
 667   :short-name "CP861"
 668   :code-space [0 255]
 669   :ascii-compatible-p t
 670   :map "cp861")
 671
 672 (define-charset 'cp862
 673   "CP862 (PC Hebrew)"
 674   :short-name "CP862"
 675   :code-space [0 255]
 676   :ascii-compatible-p t
 677   :map "cp862")
 678
 679 (define-charset 'cp863
 680   "CP863 (MS-DOS Canadian French)"
 681   :short-name "CP863"
 682   :code-space [0 255]
 683   :ascii-compatible-p t
 684   :map "cp863")
 685
 686 (define-charset 'cp864
 687   "CP864 (PC Arabic)"
 688   :short-name "CP864"
 689   :code-space [0 255]
 690   :ascii-compatible-p t
 691   :map "cp864")
 692
 693 (define-charset 'cp865
 694   "CP865 (MS-DOS Nordic)"
 695   :short-name "CP865"
 696   :code-space [0 255]
 697   :ascii-compatible-p t
 698   :map "cp865")
 699
 700 (define-charset 'cp869
 701   "CP869 (IBM Modern Greek)"
 702   :short-name "CP869"
 703   :code-space [0 255]
 704   :ascii-compatible-p t
 705   :map "cp869")
 706
 707 (define-charset 'cp874
 708   "CP874 (IBM Thai)"
 709   :short-name "CP874"
 710   :code-space [0 255]
 711   :ascii-compatible-p t
 712   :map "cp874")
 713
 714 ;; For Arabic, we need three different types of character sets.
 715 ;; Digits are of direction left-to-right and of width 1-column.
 716 ;; Others are of direction right-to-left and of width 1-column or
 717 ;; 2-column.
 718 (define-charset 'arabic-digit
 719   "Arabic digit"
 720   :short-name "Arabic digit"
 721   :iso-final-char ?2
 722   :emacs-mule-id 164
 723   :code-space [34 42]
 724   :code-offset #x0600)
 725
 726 (define-charset 'arabic-1-column
 727   "Arabic 1-column"
 728   :short-name "Arabic 1-col"
 729   :long-name "Arabic 1-column"
 730   :iso-final-char ?3
 731   :emacs-mule-id 165
 732   :code-space [33 126]
 733   :code-offset #x200100)
 734
 735 (define-charset 'arabic-2-column
 736   "Arabic 2-column"
 737   :short-name "Arabic 2-col"
 738   :long-name "Arabic 2-column"
 739   :iso-final-char ?4
 740   :emacs-mule-id 224
 741   :code-space [33 126]
 742   :code-offset #x200180)
 743
 744 ;; Lao script.
 745 ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
 746 (define-charset 'lao
 747   "Lao characters (ISO10646 0E81..0EDF)"
 748   :short-name "Lao"
 749   :iso-final-char ?1
 750   :emacs-mule-id 167
 751   :code-space [33 126]
 752   :code-offset #x0E81)
 753
 754 (define-charset 'mule-lao
 755   "Lao characters (ISO10646 0E81..0EDF)"
 756   :short-name "Lao"
 757   :code-space [0 255]
 758   :parents '(ascii eight-bit-control (lao . -128)))
 759
 760
 761 ;; Indian scripts.  Symbolic charset for data exchange.  Glyphs are
 762 ;; not assigned.  They are automatically converted to each Indian
 763 ;; script which IS-13194 supports.
 764
 765 (define-charset 'indian-is13194
 766   "Generic Indian charset for data exchange with IS 13194"
 767   :short-name "IS 13194"
 768   :long-name "Indian IS 13194"
 769   :iso-final-char ?5
 770   :emacs-mule-id 225
 771   :code-space [33 126]
 772   :unify-map "is13194"
 773   :code-offset #x180000)
 774
 775 (define-charset  'indian-glyph
 776   "Glyphs for Indian characters."
 777   :short-name "Indian glyph"
 778   :iso-final-char ?4
 779   :emacs-mule-id 240
 780   :code-space [32 127 32 127]
 781   :code-offset #x180100)
 782
 783 ;; Actual Glyph for 1-column width.
 784 (define-charset 'indian-1-column
 785   "Indian charset for 1-column width glyphs"
 786   :short-name "Indian 1-col"
 787   :long-name "Indian 1 Column"
 788   :iso-final-char ?6
 789   :emacs-mule-id  240
 790   :code-space [33 126 33 126]
 791   :code-offset #x184000)
 792
 793 ;; Actual Glyph for 2-column width.
 794 (define-charset 'indian-2-column
 795   "Indian charset for 2-column width glyphs"
 796   :short-name "Indian 2-col"
 797   :long-name "Indian 2 Column"
 798   :iso-final-char ?5
 799   :emacs-mule-id  251
 800   :code-space [33 126 33 126]
 801   :parents '(indian-1-column))
 802
 803 (define-charset 'tibetan
 804   "Tibetan characters"
 805   :iso-final-char ?7
 806   :short-name "Tibetan 2-col"
 807   :long-name "Tibetan 2 column"
 808   :iso-final-char ?7
 809   :emacs-mule-id 252
 810   :unify-map "tibetan"
 811   :code-space [33 126 33 126]
 812   :code-offset #x190000)
 813
 814 (define-charset 'tibetan-1-column
 815   "Tibetan 1 column glyph"
 816   :short-name "Tibetan 1-col"
 817   :long-name "Tibetan 1 column"
 818   :iso-final-char ?8
 819   :emacs-mule-id 241
 820   :code-space [33 126 33 37]
 821   :parents '(tibetan))
 822
 823 ;; Subsets of Unicode.
 824 (define-charset 'mule-unicode-2500-33ff
 825   "Unicode characters of the range U+2500..U+33FF."
 826   :short-name "Unicode subset 2"
 827   :long-name "Unicode subset (U+2500..U+33FF)"
 828   :iso-final-char ?2
 829   :emacs-mule-id 242
 830   :code-space [#x20 #x7f #x20 #x47]
 831   :code-offset #x2500)
 832
 833 (define-charset 'mule-unicode-e000-ffff
 834   "Unicode characters of the range U+E000..U+FFFF."
 835   :short-name "Unicode subset 3"
 836   :long-name "Unicode subset (U+E000+FFFF)"
 837   :iso-final-char ?3
 838   :emacs-mule-id 243
 839   :code-space [#x20 #x7F #x20 #x75]
 840   :code-offset #xE000)
 841
 842 (define-charset 'mule-unicode-0100-24ff
 843   "Unicode characters of the range U+0100..U+24FF."
 844   :short-name "Unicode subset"
 845   :long-name "Unicode subset (U+0100..U+24FF)"
 846   :iso-final-char ?1
 847   :emacs-mule-id 244
 848   :code-space [#x20 #x7F #x20 #x7F]
 849   :code-offset #x100)
 850
 851 (define-charset 'ethiopic
 852   "Ethiopic characters for Amharic and Tigrigna."
 853   :short-name "Ethiopic"
 854   :long-name "Ethiopic characters"
 855   :iso-final-char ?3
 856   :emacs-mule-id  245
 857   :unify-map "ethiopic"
 858   :code-space [33 126 33 126]
 859   :code-offset #x1A0000)
 860
 861 (define-charset 'mac-roman
 862   "Mac Roman charset"
 863   :short-name "Mac Roman"
 864   :ascii-compatible-p t
 865   :code-space [0 255]
 866   :map "mac-roman")
 867
 868 ;; Fixme: modern EBCDIC variants, e.g. IBM00924?
 869 (define-charset 'ebcdic-us
 870   "US version of EBCDIC"
 871   :short-name "EBCDIC-US"
 872   :code-space [0 255]
 873   :mime-charset 'ebcdic-us
 874   :map "ebcdic-us")
 875
 876 (define-charset 'ebcdic-uk
 877   "UK version of EBCDIC"
 878   :short-name "EBCDIC-UK"
 879   :code-space [0 255]
 880   :mime-charset 'ebcdic-uk
 881   :map "ebcdic-uk")
 882
 883 (define-charset 'hp-roman8
 884   "Encoding used by Hewlet-Packard printer software"
 885   :short-name "HP-ROMAN8"
 886   :ascii-compatible-p t
 887   :code-space [0 255]
 888   :map "hp-roman8")
 889
 890 ;; To make a coding system with this, a pre-write-conversion should
 891 ;; account for the commented-out multi-valued code points in
 892 ;; stdenc.map.
 893 (define-charset 'adobe-standard-encoding
 894   "Adobe `standard encoding' used in PostScript"
 895   :short-name "ADOBE-STANDARD-ENCODING"
 896   :code-space [#x20 255]
 897   :map "stdenc")
 898
 899 (define-charset 'symbol
 900   "Adobe symbol encoding used in PostScript"
 901   :short-name "ADOBE-SYMBOL"
 902   :code-space [#x20 255]
 903   :map "symbol")
 904
 905 (define-charset 'ibm850
 906   "DOS codepage 850 (Latin-1)"
 907   :short-name "IBM850"
 908   :ascii-compatible-p t
 909   :code-space [0 255]
 910   :map "ibm850")
 911 (define-charset-alias 'cp850 'ibm850)
 912
 913 (define-charset 'gb18030-2-byte
 914   "GB18030 2-byte (0x814E..0xFEFE)"
 915   :code-space [#x40 #xFE #x81 #xFE]
 916   :supplementary-p t
 917   :map "gb18030-2")
 918
 919 (define-charset 'gb18030-4-byte-bmp
 920   "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
 921   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
 922   :supplementary-p t
 923   :map "gb18030-4")
 924
 925 (define-charset 'gb18030-4-byte-smp
 926   "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
 927   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
 928   :min-code '(#x9030 . #x8130)
 929   :max-code '(#xE332 . #x9A35)
 930   :supplementary-p t
 931   :code-offset #x10000)
 932
 933 (define-charset 'gb18030-4-byte-ext-1
 934   "GB18030 4-byte (0x8431A530-0x8F39FE39)"
 935   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
 936   :min-code '(#x8431 . #xA530)
 937   :max-code '(#x8F39 . #xFE39)
 938   :supplementary-p t
 939   :code-offset #x200000                 ; ... #x22484B
 940   )
 941
 942 (define-charset 'gb18030-4-byte-ext-2
 943   "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
 944   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
 945   :min-code '(#xE332 . #x9A36)
 946   :max-code '(#xFE39 . #xFE39)
 947   :supplementary-p t
 948   :code-offset #X22484C                 ; ... #x279f93
 949   )
 950
 951 (define-charset 'gb18030
 952   "GB18030"
 953   :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
 954   :min-code 0
 955   :max-code '(#xFE39 . #xFE39)
 956   :parents '(ascii gb18030-2-byte
 957                    gb18030-4-byte-bmp gb18030-4-byte-smp
 958                    gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
 959
 960 (unify-charset 'chinese-gb2312)
 961 (unify-charset 'chinese-gbk)
 962 (unify-charset 'chinese-cns11643-1)
 963 (unify-charset 'chinese-cns11643-2)
 964 (unify-charset 'big5)
 965 (unify-charset 'chinese-big5-1)
 966 (unify-charset 'chinese-big5-2)
 967 (unify-charset 'vietnamese-viscii-lower)
 968 (unify-charset 'vietnamese-viscii-upper)
 969 ;; (unify-charset 'chinese-sisheng)
 970 ;; (unify-charset 'ipa)
 971 ;; (unify-charset 'indian-is13194)
 972 ;; (unify-charset 'tibetan)
 973 ;; (unify-charset 'ethiopic)
 974 ;; (unify-charset 'japanese-jisx0208-1978)
 975 ;; (unify-charset 'japanese-jisx0208)
 976 ;; (unify-charset 'japanese-jisx0212)
 977
 978 \f
 979 ;; These are tables for translating characters on decoding and
 980 ;; encoding.
 981 (setq standard-translation-table-for-decode nil)
 982
 983 (setq standard-translation-table-for-encode nil)
 984
 985 (defvar translation-table-for-input nil
 986   "If non-nil, a char table used to translate characters from input methods.
 987 \(Currently only used by Quail.)")
 988 \f
 989 ;;; Make fundamental coding systems.
 990
 991 ;; The coding system `no-conversion' is already defined in coding.c as
 992 ;; below:
 993 ;;
 994 ;; (define-coding-system 'no-conversion
 995 ;;   "Do no conversion."
 996 ;;   :coding-type 'raw-text
 997 ;;   :mnemonic ?=)
 998
 999 (define-coding-system-alias 'binary 'no-conversion)
1000
1001 (define-coding-system 'raw-text
1002  "Raw text, which means text contains random 8-bit codes.
1003 Encoding text with this coding system produces the actual byte
1004 sequence of the text in buffers and strings.  An exception is made for
1005 eight-bit-control characters.  Each of them is encoded into a single
1006 byte.
1007
1008 When you visit a file with this coding, the file is read into a
1009 unibyte buffer as is (except for EOL format), thus each byte of a file
1010 is treated as a character."
1011  :coding-type 'raw-text
1012  :mnemonic ?t)
1013
1014 (define-coding-system 'undecided
1015   "No conversion on encoding, automatic conversion on decoding."
1016   :coding-type 'undecided
1017   :mnemonic ?-
1018   :charset-list '(ascii))
1019
1020 (define-coding-system-alias 'unix 'undecided-unix)
1021 (define-coding-system-alias 'dos 'undecided-dos)
1022 (define-coding-system-alias 'mac 'undecided-mac)
1023
1024 (define-coding-system 'iso-latin-1
1025   "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1026   :coding-type 'iso-2022
1027   :mnemonic ?1
1028   :charset-list '(ascii latin-iso8859-1)
1029   :designation [ascii latin-iso8859-1 nil nil]
1030   :mime-charset 'iso-8859-1)
1031
1032 (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1033 (define-coding-system-alias 'latin-1 'iso-latin-1)
1034
1035 ;; Coding systems not specific to each language environment.
1036
1037 (define-coding-system 'emacs-mule
1038  "Emacs 21 internal format used in buffer and string."
1039  :coding-type 'emacs-mule
1040  :charset-list 'emacs-mule
1041  :mnemonic ?M)
1042
1043 (define-coding-system 'utf-8
1044   "UTF-8."
1045   :coding-type 'utf-8
1046   :mnemonic ?U
1047   :charset-list '(unicode))
1048
1049 (define-coding-system-alias 'mule-utf-8 'utf-8)
1050
1051 (define-coding-system 'utf-8-emacs
1052   "Support for all Emacs characters (including non-Unicode characters)."
1053   :coding-type 'utf-8
1054   :mnemonic ?U
1055   :charset-list '(emacs)
1056   :mime-charset 'utf-8)
1057
1058 (define-coding-system 'utf-16
1059   "UTF-16"
1060   :coding-type 'utf-16
1061   :mnemonic ?U
1062   :charset-list '(unicode)
1063   :mime-charset 'utf-16)
1064
1065 (define-coding-system 'utf-16-le-nosig
1066   "UTF-16, little endian, no signature."
1067   :coding-type 'utf-16
1068   :mnemonic ?U
1069   :charset-list '(unicode)
1070   :endian 'little)
1071
1072 (define-coding-system 'utf-16-be-nosig
1073   "UTF-16, big endian, no signature."
1074   :coding-type 'utf-16
1075   :mnemonic ?U
1076   :charset-list '(unicode)
1077   :endian 'big)
1078
1079 (define-coding-system 'utf-16-le
1080   "UTF-16, little endian, with signature."
1081   :coding-type 'utf-16
1082   :mnemonic ?U
1083   :charset-list '(unicode)
1084   :bom t
1085   :endian 'little
1086   :mime-charset 'utf-16-le)
1087
1088 (define-coding-system 'utf-16-be
1089   "UTF-16, big endian, with signature."
1090   :coding-type 'utf-16
1091   :mnemonic ?U
1092   :charset-list '(unicode)
1093   :bom t
1094   :endian 'big
1095   :mime-charset 'utf-16-be)
1096
1097 (define-coding-system 'iso-2022-7bit
1098   "ISO 2022 based 7-bit encoding using only G0."
1099   :coding-type 'iso-2022
1100   :mnemonic ?J
1101   :charset-list 'iso-2022
1102   :designation [(ascii t) nil nil nil]
1103   :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1104
1105 (define-coding-system 'iso-2022-7bit-ss2
1106   "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
1107   :coding-type 'iso-2022
1108   :mnemonic ?$
1109   :charset-list 'iso-2022
1110   :designation [(ascii 94) nil (nil 96) nil]
1111   :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1112                  designation single-shift composition))
1113
1114 (define-coding-system 'iso-2022-7bit-lock
1115   "ISO-2022 coding system using Locking-Shift for 96-charset."
1116   :coding-type 'iso-2022
1117   :mnemonic ?&
1118   :charset-list 'iso-2022
1119   :designation [(ascii 94) (nil 96) nil nil]
1120   :flags '(ascii-at-eol ascii-at-cntl 7-bit
1121                         designation locking-shift composition))
1122
1123 (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
1124
1125 (define-coding-system 'iso-2022-7bit-lock-ss2
1126   "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
1127   :coding-type 'iso-2022
1128   :mnemonic ?i
1129   :charset-list '(ascii
1130                   japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1131                   korean-ksc5601
1132                   chinese-gb2312
1133                   chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1134                   chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1135                   chinese-cns11643-7)
1136   :designation [(ascii 94)
1137                 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1138                 (nil chinese-cns11643-2)
1139                 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1140                      chinese-cns11643-6 chinese-cns11643-7)]
1141   :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1142                  single-shift init-bol))
1143
1144 (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
1145
1146 (define-coding-system 'iso-2022-8bit-ss2
1147   "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
1148   :coding-type 'iso-2022
1149   :mnemonic ?@
1150   :charset-list 'iso-2022
1151   :designation [(ascii 94) nil (nil 96) nil]
1152   :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
1153
1154 (define-coding-system 'compound-text
1155   "Compound text based generic encoding for decoding unknown messages.
1156
1157 This coding system does not support ICCCM Extended Segments."
1158   :coding-type 'iso-2022
1159   :mnemonic ?x
1160   :charset-list 'iso-2022
1161   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1162   :flags '(ascii-at-eol ascii-at-cntl
1163                         designation locking-shift single-shift composition)
1164   ;; Fixme: this isn't a valid MIME charset and has to be
1165   ;; special-cased elsewhere  -- fx
1166   :mime-charset 'x-ctext)
1167
1168 (define-coding-system-alias  'x-ctext 'compound-text)
1169 (define-coding-system-alias  'ctext 'compound-text)
1170
1171 ;; Same as compound-text, but doesn't produce composition escape
1172 ;; sequences.  Used in post-read and pre-write conversions of
1173 ;; compound-text-with-extensions, see mule.el.  Note that this should
1174 ;; not have a mime-charset property, to prevent it from showing up
1175 ;; close to the beginning of coding systems ordered by priority.
1176 (define-coding-system 'ctext-no-compositions 2 ?x
1177  "Compound text based generic encoding for decoding unknown messages.
1178
1179 Like `compound-text', but does not produce escape sequences for compositions."
1180   :coding-type 'iso-2022
1181   :mnemonic ?x
1182   :charset-list 'iso-2022
1183   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1184   :flags '(ascii-at-eol ascii-at-cntl
1185                         designation locking-shift single-shift))
1186
1187 (define-coding-system 'compound-text-with-extensions
1188  "Compound text encoding with ICCCM Extended Segment extensions.
1189
1190 This coding system should be used only for X selections.  It is inappropriate
1191 for decoding and encoding files, process I/O, etc."
1192   :coding-type 'raw-text
1193   :mnemonic ?x
1194   :post-read-conversion 'ctext-post-read-conversion
1195   :pre-write-conversion 'ctext-pre-write-conversion)
1196
1197 (define-coding-system-alias
1198   'x-ctext-with-extensions 'compound-text-with-extensions)
1199 (define-coding-system-alias
1200   'ctext-with-extensions 'compound-text-with-extensions)
1201
1202 (define-coding-system 'us-ascii
1203   "Convert all characters but ASCII to `?'."
1204   :coding-type 'charset
1205   :mnemonic ?-
1206   :charset-list '(ascii)
1207   :default-char ??
1208   :mime-charset 'us-ascii)
1209
1210 (define-coding-system-alias 'iso-safe 'us-ascii)
1211
1212 ;; Use us-ascii for terminal output if some other coding system is not
1213 ;; specified explicitly.
1214 (set-safe-terminal-coding-system-internal 'us-ascii)
1215
1216 ;; The other coding-systems are defined in each language specific
1217 ;; files under lisp/language.
1218
1219 ;; Normally, set coding system to `undecided' before reading a file.
1220 ;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1221 ;; but we regard them as containing multibyte characters.
1222 ;; Tar files are not decoded at all, but we treat them as raw bytes.
1223
1224 (setq file-coding-system-alist
1225       '(("\\.elc\\'" . (emacs-mule . emacs-mule))
1226         ("\\.utf\\(-8\\)?\\'" . utf-8)
1227         ;; This is the defined default for XML documents.  It may be
1228         ;; overridden by a charset specification in the header.  That
1229         ;; should be grokked by the auto-coding mechanism, but rms
1230         ;; vetoed that.  -- fx
1231         ("\\.xml\\'" . utf-8)
1232         ;; We use raw-text for reading loaddefs.el so that if it
1233         ;; happens to have DOS or Mac EOLs, they are converted to
1234         ;; newlines.  This is required to make the special treatment
1235         ;; of the "\ newline" combination in loaddefs.el, which marks
1236         ;; the beginning of a doc string, work.
1237         ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
1238         ("\\.tar\\'" . (no-conversion . no-conversion))
1239         ("" . (undecided . nil))))
1240
1241 \f
1242 ;;; Setting coding categories and their priorities.
1243
1244 ;; This setting is just to read an Emacs Lisp source files which
1245 ;; contain multilingual text while dumping Emacs.  More appropriate
1246 ;; values are set by the command `set-language-environment' for each
1247 ;; language environment.
1248
1249 (set-coding-system-priority
1250  'iso-latin-1
1251  'utf-8
1252  'iso-2022-7bit
1253  )
1254
1255 \f
1256 ;;; Miscellaneous settings.
1257
1258 ;; Make all multibyte characters self-insert.
1259 (set-char-table-range (nth 1 global-map)
1260                       (cons 128 (max-char))
1261                       'self-insert-command)
1262
1263 (aset latin-extra-code-table ?\222 t)
1264
1265 ;; Move least specific charsets to end of priority list
1266
1267 (apply #'set-charset-priority
1268        (delq 'unicode (delq 'emacs (charset-priority-list))))
1269
1270 ;; The old code-pages library is obsoleted by coding systems based on
1271 ;; the charsets defined in this file but might be required by user
1272 ;; code.
1273 (provide 'code-pages)
1274
1275 ;; Local variables:
1276 ;; no-byte-compile: t
1277 ;; End:
1278
1279 ;;; mule-conf.el ends here