1 ;;; chinese.el --- support for Chinese -*- coding: iso-2022-7bit; -*-
3 ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 ;; 2005, 2006, 2007, 2008
7 ;; National Institute of Advanced Industrial Science and Technology (AIST)
8 ;; Registration Number H14PRO021
10 ;; Keywords: multilingual, Chinese
12 ;; This file is part of GNU Emacs.
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 3, or (at your option)
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27 ;; Boston, MA 02110-1301, USA.
31 ;; For Chinese, three character sets GB2312, BIG5, and CNS11643 are
36 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
38 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)."
44 (nil chinese-gb2312 chinese-cns11643-1
)
45 (nil chinese-cns11643-2
)
47 nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil
49 '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2
)
50 (mime-charset . iso-2022-cn
)))
52 (define-coding-system-alias 'chinese-iso-7bit
'iso-2022-cn
)
56 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)."
58 (nil chinese-gb2312 chinese-cns11643-1
)
59 (nil chinese-cns11643-2
)
60 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
61 chinese-cns11643-6 chinese-cns11643-7
)
62 nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil
64 '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2
65 chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
66 chinese-cns11643-6 chinese-cns11643-7
)
67 (mime-charset . iso-2022-cn-ext
)))
70 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
71 ;;; Chinese GB2312 (simplified)
72 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
75 'chinese-iso-8bit
2 ?c
76 "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)."
77 '(ascii chinese-gb2312 nil nil
78 nil ascii-eol ascii-cntl nil nil nil nil
)
79 '((safe-charsets ascii chinese-gb2312
)
80 (mime-charset . gb2312
)))
82 (define-coding-system-alias 'cn-gb-2312
'chinese-iso-8bit
)
83 (define-coding-system-alias 'euc-china
'chinese-iso-8bit
)
84 (define-coding-system-alias 'euc-cn
'chinese-iso-8bit
)
85 (define-coding-system-alias 'cn-gb
'chinese-iso-8bit
)
86 (define-coding-system-alias 'gb2312
'chinese-iso-8bit
)
87 (define-coding-system-alias 'cp936
'chinese-iso-8bit
)
91 "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)."
93 '((safe-charsets ascii chinese-gb2312
)
94 (mime-charset . hz-gb-2312
)
95 (post-read-conversion . post-read-decode-hz
)
96 (pre-write-conversion . pre-write-encode-hz
)))
98 (define-coding-system-alias 'hz-gb-2312
'chinese-hz
)
99 (define-coding-system-alias 'hz
'chinese-hz
)
101 (defun post-read-decode-hz (len)
103 (buffer-modified-p (buffer-modified-p))
104 last-coding-system-used
)
106 (decode-hz-region pos
(+ pos len
))
107 (set-buffer-modified-p buffer-modified-p
))))
109 (defun pre-write-encode-hz (from to
)
110 (let ((buf (current-buffer)))
111 (set-buffer (generate-new-buffer " *temp*"))
114 (insert-buffer-substring buf from to
))
115 (let (last-coding-system-used)
116 (encode-hz-region 1 (point-max)))
119 (set-language-info-alist
120 "Chinese-GB" '((charset chinese-gb2312 chinese-sisheng
)
121 (coding-system chinese-iso-8bit iso-2022-cn chinese-hz
)
122 (coding-priority chinese-iso-8bit chinese-big5 iso-2022-cn
)
123 (input-method .
"chinese-py-punct")
124 (features china-util
)
125 (sample-text .
"Chinese (\e$AVPND\e(B,\e$AFUM(;0\e(B,\e$A::So\e(B) \e$ADc:C\e(B")
126 (documentation .
"Support for Chinese GB2312 character set.")
127 (tutorial .
"TUTORIAL.cn"))
130 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
131 ;; Chinese BIG5 (traditional)
132 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
136 "BIG5 8-bit encoding for Chinese (MIME:Big5)."
138 '((safe-charsets ascii chinese-big5-1 chinese-big5-2
)
139 (mime-charset . big5
)
140 (charset-origin-alist (chinese-big5-1 "BIG5" encode-big5-char
)
141 (chinese-big5-2 "BIG5" encode-big5-char
))))
143 (define-coding-system-alias 'big5
'chinese-big5
)
144 (define-coding-system-alias 'cn-big5
'chinese-big5
)
145 (define-coding-system-alias 'cp950
'chinese-big5
)
147 ;; Big5 font requires special encoding.
148 (define-ccl-program ccl-encode-big5-font
150 ;; In: R0:chinese-big5-1 or chinese-big5-2
151 ;; R1:position code 1
152 ;; R2:position code 2
153 ;; Out: R1:font code point 1
154 ;; R2:font code point 2
155 ((r2 = ((((r1 - ?
\x21) * 94) + r2
) - ?
\x21))
156 (if (r0 == ,(charset-id 'chinese-big5-2
)) (r2 += 6280))
157 (r1 = ((r2 / 157) + ?
\xA1))
159 (if (r2 < ?
\x3F) (r2 += ?
\x40) (r2 += ?
\x62))))
160 "CCL program to encode a Big5 code to code point of Big5 font.")
162 (setq font-ccl-encoder-alist
163 (cons (cons "big5" ccl-encode-big5-font
) font-ccl-encoder-alist
))
165 (set-language-info-alist
166 "Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2
)
167 (coding-system chinese-big5 chinese-iso-7bit
)
168 (coding-priority chinese-big5 iso-2022-cn chinese-iso-8bit
)
169 (input-method .
"chinese-py-punct-b5")
170 (features china-util
)
171 (sample-text .
"Cantonese (\e$(0GnM$\e(B,\e$(0N]0*Hd\e(B) \e$(0*/=(\e(B, \e$(0+$)p\e(B")
172 (documentation .
"Support for Chinese Big5 character set.")
173 (tutorial .
"TUTORIAL.zh"))
176 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
177 ;; Chinese CNS11643 (traditional)
178 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
180 (defvar big5-to-cns
(make-translation-table)
181 "Translation table for encoding to `euc-tw'.")
182 ;; Could have been done by china-util loaded before.
183 (unless (get 'big5-to-cns
'translation-table
)
184 (define-translation-table 'big5-to-cns big5-to-cns
))
186 (define-ccl-program ccl-decode-euc-tw
187 ;; CNS plane 1 needs either two or four bytes in EUC-TW encoding;
188 ;; CNS planes 2 to 7 always need four bytes. In internal encoding of
189 ;; Emacs, CNS planes 1 and 2 need three bytes, and planes 3 to 7 need
190 ;; four bytes. Thus a buffer magnification value of 2 (for both
191 ;; encoding and decoding) is sufficient.
193 ;; we don't have enough registers to hold all charset-ids
194 ((r4 = ,(charset-id 'chinese-cns11643-1
))
195 (r5 = ,(charset-id 'chinese-cns11643-2
))
196 (r6 = ,(charset-id 'chinese-cns11643-3
))
212 ;; OK, we have a plane
214 ;; invalid first byte
218 ;; invalid second byte
226 (r1 = ,(charset-id 'chinese-cns11643-4
))
227 (r1 = ,(charset-id 'chinese-cns11643-5
))
228 (r1 = ,(charset-id 'chinese-cns11643-6
))
229 (r1 = ,(charset-id 'chinese-cns11643-7
)))
230 (r2 = ((((r2 -
#x80
) << 7) + r3
) -
#x80
))
231 (write-multibyte-character r1 r2
)
235 ;; invalid first byte
238 ;; invalid second byte
242 ((r1 = ((((r0 -
#x80
) << 7) + r1
) -
#x80
))
243 (write-multibyte-character r4 r1
)
245 "CCL program to decode EUC-TW encoding."
248 (define-ccl-program ccl-encode-euc-tw
250 ;; we don't have enough registers to hold all charset-ids
251 ((r2 = ,(charset-id 'ascii
))
252 (r3 = ,(charset-id 'chinese-big5-1
))
253 (r4 = ,(charset-id 'chinese-big5-2
))
254 (r5 = ,(charset-id 'chinese-cns11643-1
))
255 (r6 = ,(charset-id 'chinese-cns11643-2
))
257 (read-multibyte-character r0 r1
)
260 (;; Big 5 encoded characters are first translated to CNS
262 (translate-character big5-to-cns r0 r1
)
264 (translate-character big5-to-cns r0 r1
)))
269 (if (r0 == ,(charset-id 'chinese-cns11643-3
))
271 (if (r0 == ,(charset-id 'chinese-cns11643-4
))
273 (if (r0 == ,(charset-id 'chinese-cns11643-5
))
275 (if (r0 == ,(charset-id 'chinese-cns11643-6
))
277 (if (r0 == ,(charset-id 'chinese-cns11643-7
))
279 ;; not CNS. We use a dummy character which
280 ;; can't occur in EUC-TW encoding to indicate
282 (write-repeat #xFF
))))))))))
284 ;; single shift and CNS plane
287 (write ((r1 >> 7) + #x80
))
288 (write ((r1 %
#x80
) + #x80
))
290 "CCL program to encode EUC-TW encoding."
293 (defun euc-tw-pre-write-conversion (beg end
)
294 "Semi-dummy pre-write function effectively to autoload china-util."
295 ;; Ensure translation table is loaded.
296 (require 'china-util
)
297 ;; Don't do this again.
298 (coding-system-put 'euc-tw
'pre-write-conversion nil
)
303 "ISO 2022 based EUC encoding for Chinese CNS11643.
304 Big5 encoding is accepted for input also (which is then converted to CNS)."
305 '(ccl-decode-euc-tw . ccl-encode-euc-tw
)
306 '((safe-charsets ascii
316 (valid-codes (0 .
255))
317 (pre-write-conversion . euc-tw-pre-write-conversion
)))
319 (define-coding-system-alias 'euc-taiwan
'euc-tw
)
321 (set-language-info-alist
322 "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2
323 chinese-cns11643-3 chinese-cns11643-4
324 chinese-cns11643-5 chinese-cns11643-6
326 (coding-system iso-2022-cn euc-tw
)
327 (coding-priority iso-2022-cn euc-tw chinese-big5
329 (features china-util
)
330 (input-method .
"chinese-cns-quick")
332 Support for Chinese CNS character sets. Note that the EUC-TW coding system
333 accepts Big5 for input also (which is then converted to CNS)."))
336 (set-language-info-alist
337 "Chinese-EUC-TW" '((charset chinese-cns11643-1 chinese-cns11643-2
338 chinese-cns11643-3 chinese-cns11643-4
339 chinese-cns11643-5 chinese-cns11643-6
340 chinese-cns11643-7 chinese-big5-1 chinese-big5-2
)
341 (coding-system euc-tw iso-2022-cn
)
342 (coding-priority euc-tw chinese-big5 iso-2022-cn
344 (features china-util
)
345 (input-method .
"chinese-cns-quick")
347 Support for Chinese, prefering the EUC-TW character set. Note that
348 the EUC-TW coding system accepts Big5 for input also (which is then
349 converted to CNS)."))
354 ;;; arch-tag: b82fcf7a-84f6-4e0b-b38c-1742dac0e09f
355 ;;; chinese.el ends here