1 ;;; cyrillic.el --- support for Cyrillic -*- coding: iso-2022-7bit; -*-
3 ;; Copyright (C) 1997, 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 ;; 2005, 2006, 2007, 2008, 2009, 2010
7 ;; National Institute of Advanced Industrial Science and Technology (AIST)
8 ;; Registration Number H14PRO021
10 ;; National Institute of Advanced Industrial Science and Technology (AIST)
11 ;; Registration Number H13PRO009
13 ;; Author: Kenichi Handa <handa@etl.go.jp>
14 ;; Keywords: multilingual, Cyrillic, i18n
16 ;; This file is part of GNU Emacs.
18 ;; GNU Emacs is free software: you can redistribute it and/or modify
19 ;; it under the terms of the GNU General Public License as published by
20 ;; the Free Software Foundation, either version 3 of the License, or
21 ;; (at your option) any later version.
23 ;; GNU Emacs is distributed in the hope that it will be useful,
24 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
25 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 ;; GNU General Public License for more details.
28 ;; You should have received a copy of the GNU General Public License
29 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
33 ;; The character set ISO8859-5 is supported. KOI-8 and ALTERNATIVNYJ
34 ;; are converted to Unicode internally. See
35 ;; <URL:http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM>. For more info
36 ;; on Cyrillic charsets, see
37 ;; <URL:http://czyborra.com/charsets/cyrillic.html>. The KOI and
38 ;; Alternativnyj coding systems should live in code-pages.el, but
39 ;; they've always been preloaded and the coding system autoload
40 ;; mechanism didn't get accepted, so they have to stay here and
41 ;; duplicate code-pages stuff.
43 ;; Note that 8859-5 maps directly onto the Unicode Cyrillic block,
44 ;; apart from codepoints 160 (NBSP, c.f. U+0400), 173 (soft hyphen,
45 ;; c.f. U+04OD) and 253 (section sign, c.f U+045D). The KOI-8 and
46 ;; Alternativnyj coding systems encode both 8859-5 and Unicode.
47 ;; ucs-tables.el provides unification for cyrillic-iso-8bit.
49 ;; Customizing `utf-fragment-on-decoding' allows decoding characters
50 ;; from KOI and Alternativnyj into 8859-5 where that's possible.
51 ;; cyrillic-iso8859-5 characters take half as much space in the buffer
52 ;; as the mule-unicode-0100-24ff equivalents, though that's probably
53 ;; not normally a big deal.
61 (define-coding-system 'cyrillic-iso-8bit
62 "ISO 2022 based 8-bit encoding for Cyrillic script (MIME:ISO-8859-5)."
65 :charset-list
'(iso-8859-5)
66 :mime-charset
'iso-8859-5
)
68 (define-coding-system-alias 'iso-8859-5
'cyrillic-iso-8bit
)
70 (set-language-info-alist
71 "Cyrillic-ISO" '((charset iso-8859-5
)
72 (coding-system cyrillic-iso-8bit
)
73 (coding-priority cyrillic-iso-8bit
)
74 (input-method .
"cyrillic-yawerty") ; fixme
75 (nonascii-translation . iso-8859-5
)
76 (unibyte-display . cyrillic-iso-8bit
)
78 (sample-text .
"Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
79 (documentation .
"Support for Cyrillic ISO-8859-5."))
84 (define-coding-system 'cyrillic-koi8
85 "KOI8 8-bit encoding for Cyrillic (MIME: KOI8-R)."
87 ;; We used to use ?K. It is true that ?K is more strictly correct,
88 ;; but it is also used for Korean. So people who use koi8 for
89 ;; languages other than Russian will have to forgive us.
92 :mime-charset
'koi8-r
)
94 (define-coding-system-alias 'koi8-r
'cyrillic-koi8
)
95 (define-coding-system-alias 'koi8
'cyrillic-koi8
)
96 (define-coding-system-alias 'cp878
'cyrillic-koi8
)
98 (set-language-info-alist
99 "Cyrillic-KOI8" `((charset koi8
)
100 (coding-system cyrillic-koi8
)
101 (coding-priority cyrillic-koi8 cyrillic-iso-8bit
)
102 (ctext-non-standard-encodings "koi8-r")
103 (nonascii-translation . koi8
)
104 (input-method .
"russian-typewriter")
105 (features cyril-util
)
106 (unibyte-display . cyrillic-koi8
)
107 (sample-text .
"Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
108 (documentation .
"Support for Cyrillic KOI8-R."))
111 (set-language-info-alist
112 "Russian" `((charset cyrillic-iso8859-5
)
113 (nonascii-translation
114 .
,(get 'cyrillic-koi8-r-nonascii-translation-table
116 (coding-system cyrillic-koi8
)
117 (coding-priority cyrillic-koi8 cyrillic-iso-8bit
)
118 (input-method .
"russian-computer")
119 (features cyril-util
)
120 (unibyte-display . cyrillic-koi8
)
121 (sample-text .
"Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
123 Support for Russian using koi8-r and the russian-computer input method.")
124 (tutorial .
"TUTORIAL.ru"))
127 (define-coding-system 'koi8-u
128 "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)"
129 :coding-type
'charset
131 :charset-list
'(koi8-u)
132 :mime-charset
'koi8-u
)
134 (set-language-info-alist
135 "Ukrainian" `((charset koi8-u
)
136 (coding-system koi8-u
)
137 (coding-priority koi8-u
)
138 (nonascii-translation . koi8-u
)
139 (input-method .
"ukrainian-computer")
141 .
"Support for Ukrainian with KOI8-U character set."))
144 ;;; ALTERNATIVNYJ stuff
146 (define-coding-system 'cyrillic-alternativnyj
147 "ALTERNATIVNYJ 8-bit encoding for Cyrillic."
148 :coding-type
'charset
150 :charset-list
'(alternativnyj))
152 (define-coding-system-alias 'alternativnyj
'cyrillic-alternativnyj
)
154 (set-language-info-alist
155 "Cyrillic-ALT" `((charset alternativnyj
)
156 (coding-system cyrillic-alternativnyj
)
157 (coding-priority cyrillic-alternativnyj
)
158 (nonascii-translation . alternativnyj
)
159 (input-method .
"russian-typewriter")
160 (features cyril-util
)
161 (unibyte-display . cyrillic-alternativnyj
)
162 (sample-text .
"Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
163 (documentation .
"Support for Cyrillic ALTERNATIVNYJ."))
166 (define-coding-system 'cp866
167 "CP866 encoding for Cyrillic."
168 :coding-type
'charset
170 :charset-list
'(ibm866)
171 :mime-charset
'cp866
)
173 (define-coding-system 'koi8-u
174 "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)"
175 :coding-type
'charset
177 :charset-list
'(koi8-u)
178 :mime-charset
'koi8-u
)
180 (define-coding-system 'koi8-t
181 "KOI8-T 8-bit encoding for Cyrillic"
182 :coding-type
'charset
184 :charset-list
'(koi8-t)
185 :mime-charset
'koi8-t
)
187 (define-coding-system 'windows-1251
188 "windows-1251 8-bit encoding for Cyrillic (MIME: WINDOWS-1251)"
189 :coding-type
'charset
191 :charset-list
'(windows-1251)
192 :mime-charset
'windows-1251
)
193 (define-coding-system-alias 'cp1251
'windows-1251
)
195 (define-coding-system 'cp1125
196 "cp1125 8-bit encoding for Cyrillic"
197 :coding-type
'charset
199 :charset-list
'(cp1125))
200 (define-coding-system-alias 'ruscii
'cp1125
)
201 ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
202 (define-coding-system-alias 'cp866u
'cp1125
)
204 (define-coding-system 'cp855
205 "DOS codepage 855 (Russian)"
206 :coding-type
'charset
208 :charset-list
'(cp855)
209 :mime-charset
'cp855
)
210 (define-coding-system-alias 'ibm855
'cp855
)
212 (define-coding-system 'mik
213 "Bulgarian DOS codepage"
214 :coding-type
'charset
216 :charset-list
'(mik))
218 (define-coding-system 'pt154
219 "Parattype Asian Cyrillic codepage"
220 :coding-type
'charset
222 :charset-list
'(pt154))
224 ;; (set-language-info-alist
225 ;; "Windows-1251" `((coding-system windows-1251)
226 ;; (coding-priority windows-1251)
227 ;; (input-method . "russian-typewriter") ; fixme?
228 ;; (features code-pages)
229 ;; (documentation . "Support for windows-1251 character set."))
232 (set-language-info-alist
233 "Tajik" `((coding-system koi8-t
)
234 (coding-priority koi8-t
)
235 (nonascii-translation . cyrillic-koi8-t
)
237 (input-method .
"russian-typewriter") ; fixme?
238 (features code-pages
)
239 (documentation .
"Support for Tajik using KOI8-T."))
242 (set-language-info-alist
243 "Bulgarian" `((coding-system windows-1251
)
244 (coding-priority windows-1251
)
245 (nonascii-translation . windows-1251
)
246 (charset windows-1251
)
247 (ctext-non-standard-encodings "microsoft-cp1251")
248 (input-method .
"bulgarian-bds")
250 .
"Support for Bulgrian with windows-1251 character set."))
253 (set-language-info-alist
254 "Belarusian" `((coding-system windows-1251
)
255 (coding-priority windows-1251
)
256 (nonascii-translation . windows-1251
)
257 (charset windows-1251
)
258 (ctext-non-standard-encodings "microsoft-cp1251")
259 (input-method .
"belarusian")
261 .
"Support for Belarusian with windows-1251 character set.
262 \(The name Belarusian replaced Byelorussian in the early 1990s.)"))
265 (set-language-info-alist
266 "Ukrainian" `((coding-system koi8-u
)
267 (coding-priority koi8-u
)
268 (input-method .
"ukrainian-computer")
270 .
"Support for Ukrainian with koi8-u character set."))
275 ;; arch-tag: bda71ae0-ba41-4cb6-a6e0-1dff542313d3
276 ;;; cyrillic.el ends here