Tweak previous vz-bzr change yet again
[emacs.git] / lisp / language / cyrillic.el
blob33c5fbbf3f101bda573109268d837de42b0ab7cf
1 ;;; cyrillic.el --- support for Cyrillic -*- coding: iso-2022-7bit; -*-
3 ;; Copyright (C) 1997-1998, 2001-2012 Free Software Foundation, Inc.
4 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H14PRO021
8 ;; Copyright (C) 2003
9 ;; National Institute of Advanced Industrial Science and Technology (AIST)
10 ;; Registration Number H13PRO009
12 ;; Author: Kenichi Handa <handa@etl.go.jp>
13 ;; Keywords: multilingual, Cyrillic, i18n
15 ;; This file is part of GNU Emacs.
17 ;; GNU Emacs is free software: you can redistribute it and/or modify
18 ;; it under the terms of the GNU General Public License as published by
19 ;; the Free Software Foundation, either version 3 of the License, or
20 ;; (at your option) any later version.
22 ;; GNU Emacs is distributed in the hope that it will be useful,
23 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
24 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 ;; GNU General Public License for more details.
27 ;; You should have received a copy of the GNU General Public License
28 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
30 ;;; Commentary:
32 ;; The character set ISO8859-5 is supported. KOI-8 and ALTERNATIVNYJ
33 ;; are converted to Unicode internally. See
34 ;; <URL:http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM>. For more info
35 ;; on Cyrillic charsets, see
36 ;; <URL:http://czyborra.com/charsets/cyrillic.html>. The KOI and
37 ;; Alternativnyj coding systems should live in code-pages.el, but
38 ;; they've always been preloaded and the coding system autoload
39 ;; mechanism didn't get accepted, so they have to stay here and
40 ;; duplicate code-pages stuff.
42 ;; Note that 8859-5 maps directly onto the Unicode Cyrillic block,
43 ;; apart from codepoints 160 (NBSP, c.f. U+0400), 173 (soft hyphen,
44 ;; c.f. U+04OD) and 253 (section sign, c.f U+045D). The KOI-8 and
45 ;; Alternativnyj coding systems encode both 8859-5 and Unicode.
46 ;; ucs-tables.el provides unification for cyrillic-iso-8bit.
48 ;; Customizing `utf-fragment-on-decoding' allows decoding characters
49 ;; from KOI and Alternativnyj into 8859-5 where that's possible.
50 ;; cyrillic-iso8859-5 characters take half as much space in the buffer
51 ;; as the mule-unicode-0100-24ff equivalents, though that's probably
52 ;; not normally a big deal.
54 ;;; Code:
56 ;; Cyrillic (general)
58 ;; ISO-8859-5 stuff
60 (define-coding-system 'cyrillic-iso-8bit
61 "ISO 2022 based 8-bit encoding for Cyrillic script (MIME:ISO-8859-5)."
62 :coding-type 'charset
63 :mnemonic ?5
64 :charset-list '(iso-8859-5)
65 :mime-charset 'iso-8859-5)
67 (define-coding-system-alias 'iso-8859-5 'cyrillic-iso-8bit)
69 (set-language-info-alist
70 "Cyrillic-ISO" '((charset iso-8859-5)
71 (coding-system cyrillic-iso-8bit)
72 (coding-priority cyrillic-iso-8bit)
73 (input-method . "cyrillic-yawerty") ; fixme
74 (nonascii-translation . iso-8859-5)
75 (unibyte-display . cyrillic-iso-8bit)
76 (features cyril-util)
77 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
78 (documentation . "Support for Cyrillic ISO-8859-5."))
79 '("Cyrillic"))
81 ;; KOI-8R stuff
83 (define-coding-system 'cyrillic-koi8
84 "KOI8 8-bit encoding for Cyrillic (MIME: KOI8-R)."
85 :coding-type 'charset
86 ;; We used to use ?K. It is true that ?K is more strictly correct,
87 ;; but it is also used for Korean. So people who use koi8 for
88 ;; languages other than Russian will have to forgive us.
89 :mnemonic ?R
90 :charset-list '(koi8)
91 :mime-charset 'koi8-r)
93 (define-coding-system-alias 'koi8-r 'cyrillic-koi8)
94 (define-coding-system-alias 'koi8 'cyrillic-koi8)
95 (define-coding-system-alias 'cp878 'cyrillic-koi8)
97 (set-language-info-alist
98 "Cyrillic-KOI8" `((charset koi8)
99 (coding-system cyrillic-koi8)
100 (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
101 (ctext-non-standard-encodings "koi8-r")
102 (nonascii-translation . koi8)
103 (input-method . "russian-typewriter")
104 (features cyril-util)
105 (unibyte-display . cyrillic-koi8)
106 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
107 (documentation . "Support for Cyrillic KOI8-R."))
108 '("Cyrillic"))
110 (set-language-info-alist
111 "Russian" `((charset cyrillic-iso8859-5)
112 (nonascii-translation
113 . ,(get 'cyrillic-koi8-r-nonascii-translation-table
114 'translation-table))
115 (coding-system cyrillic-koi8)
116 (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
117 (input-method . "russian-computer")
118 (features cyril-util)
119 (unibyte-display . cyrillic-koi8)
120 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
121 (documentation . "\
122 Support for Russian using koi8-r and the russian-computer input method.")
123 (tutorial . "TUTORIAL.ru"))
124 '("Cyrillic"))
126 (define-coding-system 'koi8-u
127 "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)"
128 :coding-type 'charset
129 :mnemonic ?U
130 :charset-list '(koi8-u)
131 :mime-charset 'koi8-u)
133 (set-language-info-alist
134 "Ukrainian" `((charset koi8-u)
135 (coding-system koi8-u)
136 (coding-priority koi8-u)
137 (nonascii-translation . koi8-u)
138 (input-method . "ukrainian-computer")
139 (documentation
140 . "Support for Ukrainian with KOI8-U character set."))
141 '("Cyrillic"))
143 ;;; ALTERNATIVNYJ stuff
145 (define-coding-system 'cyrillic-alternativnyj
146 "ALTERNATIVNYJ 8-bit encoding for Cyrillic."
147 :coding-type 'charset
148 :mnemonic ?A
149 :charset-list '(alternativnyj))
151 (define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj)
153 (set-language-info-alist
154 "Cyrillic-ALT" `((charset alternativnyj)
155 (coding-system cyrillic-alternativnyj)
156 (coding-priority cyrillic-alternativnyj)
157 (nonascii-translation . alternativnyj)
158 (input-method . "russian-typewriter")
159 (features cyril-util)
160 (unibyte-display . cyrillic-alternativnyj)
161 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
162 (documentation . "Support for Cyrillic ALTERNATIVNYJ."))
163 '("Cyrillic"))
165 (define-coding-system 'cp866
166 "CP866 encoding for Cyrillic."
167 :coding-type 'charset
168 :mnemonic ?*
169 :charset-list '(ibm866)
170 :mime-charset 'cp866)
172 (define-coding-system 'koi8-u
173 "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)"
174 :coding-type 'charset
175 :mnemonic ?U
176 :charset-list '(koi8-u)
177 :mime-charset 'koi8-u)
179 (define-coding-system 'koi8-t
180 "KOI8-T 8-bit encoding for Cyrillic"
181 :coding-type 'charset
182 :mnemonic ?*
183 :charset-list '(koi8-t)
184 :mime-charset 'koi8-t)
186 (define-coding-system 'windows-1251
187 "windows-1251 8-bit encoding for Cyrillic (MIME: WINDOWS-1251)"
188 :coding-type 'charset
189 :mnemonic ?b
190 :charset-list '(windows-1251)
191 :mime-charset 'windows-1251)
192 (define-coding-system-alias 'cp1251 'windows-1251)
194 (define-coding-system 'cp1125
195 "cp1125 8-bit encoding for Cyrillic"
196 :coding-type 'charset
197 :mnemonic ?*
198 :charset-list '(cp1125))
199 (define-coding-system-alias 'ruscii 'cp1125)
200 ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
201 (define-coding-system-alias 'cp866u 'cp1125)
203 (define-coding-system 'cp855
204 "DOS codepage 855 (Russian)"
205 :coding-type 'charset
206 :mnemonic ?D
207 :charset-list '(cp855)
208 :mime-charset 'cp855)
209 (define-coding-system-alias 'ibm855 'cp855)
211 (define-coding-system 'mik
212 "Bulgarian DOS codepage"
213 :coding-type 'charset
214 :mnemonic ?D
215 :charset-list '(mik))
217 (define-coding-system 'pt154
218 "Parattype Asian Cyrillic codepage"
219 :coding-type 'charset
220 :mnemonic ?D
221 :charset-list '(pt154))
223 ;; (set-language-info-alist
224 ;; "Windows-1251" `((coding-system windows-1251)
225 ;; (coding-priority windows-1251)
226 ;; (input-method . "russian-typewriter") ; fixme?
227 ;; (features code-pages)
228 ;; (documentation . "Support for windows-1251 character set."))
229 ;; '("Cyrillic"))
231 (set-language-info-alist
232 "Tajik" `((coding-system koi8-t)
233 (coding-priority koi8-t)
234 (nonascii-translation . cyrillic-koi8-t)
235 (charset koi8-t)
236 (input-method . "russian-typewriter") ; fixme?
237 (features code-pages)
238 (documentation . "Support for Tajik using KOI8-T."))
239 '("Cyrillic"))
241 (set-language-info-alist
242 "Bulgarian" `((coding-system windows-1251)
243 (coding-priority windows-1251)
244 (nonascii-translation . windows-1251)
245 (charset windows-1251)
246 (ctext-non-standard-encodings "microsoft-cp1251")
247 (input-method . "bulgarian-bds")
248 (documentation
249 . "Support for Bulgarian with windows-1251 character set."))
250 '("Cyrillic"))
252 (set-language-info-alist
253 "Belarusian" `((coding-system windows-1251)
254 (coding-priority windows-1251)
255 (nonascii-translation . windows-1251)
256 (charset windows-1251)
257 (ctext-non-standard-encodings "microsoft-cp1251")
258 (input-method . "belarusian")
259 (documentation
260 . "Support for Belarusian with windows-1251 character set.
261 \(The name Belarusian replaced Byelorussian in the early 1990s.)"))
262 '("Cyrillic"))
264 (set-language-info-alist
265 "Ukrainian" `((coding-system koi8-u)
266 (coding-priority koi8-u)
267 (input-method . "ukrainian-computer")
268 (documentation
269 . "Support for Ukrainian with koi8-u character set."))
270 '("Cyrillic"))
272 (provide 'cyrillic)
274 ;;; cyrillic.el ends here