(occur-read-primary-args): Pass default to read-from-minibuffer.
[emacs.git] / lisp / language / chinese.el
blobe3a1938b7908aeacae61c27ce59dc93996950b9b
1 ;;; chinese.el --- support for Chinese -*- coding: iso-2022-7bit; -*-
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
6 ;; Keywords: multilingual, Chinese
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
25 ;;; Commentary:
27 ;; For Chinese, three character sets GB2312, BIG5, and CNS11643 are
28 ;; supported.
30 ;;; Code:
32 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33 ;;; Chinese (general)
34 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
36 (make-coding-system
37 'iso-2022-cn 2 ?C
38 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)."
39 '(ascii
40 (nil chinese-gb2312 chinese-cns11643-1)
41 (nil chinese-cns11643-2)
42 nil
43 nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil
44 init-bol)
45 '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2)
46 (mime-charset . iso-2022-cn)))
48 (define-coding-system-alias 'chinese-iso-7bit 'iso-2022-cn)
50 (make-coding-system
51 'iso-2022-cn-ext 2 ?C
52 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)."
53 '(ascii
54 (nil chinese-gb2312 chinese-cns11643-1)
55 (nil chinese-cns11643-2)
56 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
57 chinese-cns11643-6 chinese-cns11643-7)
58 nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil
59 init-bol)
60 '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2
61 chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
62 chinese-cns11643-6 chinese-cns11643-7)
63 (mime-charset . iso-2022-cn-ext)))
66 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
67 ;;; Chinese GB2312 (simplified)
68 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
70 (make-coding-system
71 'chinese-iso-8bit 2 ?c
72 "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)."
73 '(ascii chinese-gb2312 nil nil
74 nil ascii-eol ascii-cntl nil nil nil nil)
75 '((safe-charsets ascii chinese-gb2312)
76 (mime-charset . gb2312)))
78 (define-coding-system-alias 'cn-gb-2312 'chinese-iso-8bit)
79 (define-coding-system-alias 'euc-china 'chinese-iso-8bit)
80 (define-coding-system-alias 'euc-cn 'chinese-iso-8bit)
81 (define-coding-system-alias 'cn-gb 'chinese-iso-8bit)
82 (define-coding-system-alias 'gb2312 'chinese-iso-8bit)
83 (define-coding-system-alias 'cp936 'chinese-iso-8bit)
85 (make-coding-system
86 'chinese-hz 0 ?z
87 "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)."
88 nil
89 '((safe-charsets ascii chinese-gb2312)
90 (mime-charset . hz-gb-2312)
91 (post-read-conversion . post-read-decode-hz)
92 (pre-write-conversion . pre-write-encode-hz)))
94 (define-coding-system-alias 'hz-gb-2312 'chinese-hz)
95 (define-coding-system-alias 'hz 'chinese-hz)
97 (defun post-read-decode-hz (len)
98 (let ((pos (point))
99 (buffer-modified-p (buffer-modified-p))
100 last-coding-system-used)
101 (prog1
102 (decode-hz-region pos (+ pos len))
103 (set-buffer-modified-p buffer-modified-p))))
105 (defun pre-write-encode-hz (from to)
106 (let ((buf (current-buffer)))
107 (set-buffer (generate-new-buffer " *temp*"))
108 (if (stringp from)
109 (insert from)
110 (insert-buffer-substring buf from to))
111 (let (last-coding-system-used)
112 (encode-hz-region 1 (point-max)))
113 nil))
115 (set-language-info-alist
116 "Chinese-GB" '((charset chinese-gb2312 chinese-sisheng)
117 (coding-system chinese-iso-8bit iso-2022-cn chinese-hz)
118 (coding-priority chinese-iso-8bit chinese-big5 iso-2022-cn)
119 (input-method . "chinese-py-punct")
120 (features china-util)
121 (sample-text . "Chinese (\e$AVPND\e(B,\e$AFUM(;0\e(B,\e$A::So\e(B) \e$ADc:C\e(B")
122 (documentation . "Support for Chinese GB2312 character set.")
123 (tutorial . "TUTORIAL.cn"))
124 '("Chinese"))
126 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
127 ;; Chinese BIG5 (traditional)
128 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
130 (make-coding-system
131 'chinese-big5 3 ?B
132 "BIG5 8-bit encoding for Chinese (MIME:Big5)."
134 '((safe-charsets ascii chinese-big5-1 chinese-big5-2)
135 (mime-charset . big5)
136 (charset-origin-alist (chinese-big5-1 "BIG5" encode-big5-char)
137 (chinese-big5-2 "BIG5" encode-big5-char))))
139 (define-coding-system-alias 'big5 'chinese-big5)
140 (define-coding-system-alias 'cn-big5 'chinese-big5)
141 (define-coding-system-alias 'cp950 'chinese-big5)
143 ;; Big5 font requires special encoding.
144 (define-ccl-program ccl-encode-big5-font
146 ;; In: R0:chinese-big5-1 or chinese-big5-2
147 ;; R1:position code 1
148 ;; R2:position code 2
149 ;; Out: R1:font code point 1
150 ;; R2:font code point 2
151 ((r2 = ((((r1 - ?\x21) * 94) + r2) - ?\x21))
152 (if (r0 == ,(charset-id 'chinese-big5-2)) (r2 += 6280))
153 (r1 = ((r2 / 157) + ?\xA1))
154 (r2 %= 157)
155 (if (r2 < ?\x3F) (r2 += ?\x40) (r2 += ?\x62))))
156 "CCL program to encode a Big5 code to code point of Big5 font.")
158 (setq font-ccl-encoder-alist
159 (cons (cons "big5" ccl-encode-big5-font) font-ccl-encoder-alist))
161 (set-language-info-alist
162 "Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2)
163 (coding-system chinese-big5 chinese-iso-7bit)
164 (coding-priority chinese-big5 iso-2022-cn chinese-iso-8bit)
165 (input-method . "chinese-py-punct-b5")
166 (features china-util)
167 (sample-text . "Cantonese (\e$(0GnM$\e(B,\e$(0N]0*Hd\e(B) \e$(0*/=(\e(B, \e$(0+$)p\e(B")
168 (documentation . "Support for Chinese Big5 character set.")
169 (tutorial . "TUTORIAL.zh"))
170 '("Chinese"))
172 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
173 ;; Chinese CNS11643 (traditional)
174 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
176 (defvar big5-to-cns (make-translation-table)
177 "Translation table for encoding to `euc-tw'.")
178 ;; Could have been done by china-util loaded before.
179 (unless (get 'big5-to-cns 'translation-table)
180 (define-translation-table 'big5-to-cns big5-to-cns))
182 (define-ccl-program ccl-decode-euc-tw
183 ;; CNS plane 1 needs either two or four bytes in EUC-TW encoding;
184 ;; CNS planes 2 to 7 always need four bytes. In internal encoding of
185 ;; Emacs, CNS planes 1 and 2 need three bytes, and planes 3 to 7 need
186 ;; four bytes. Thus a buffer magnification value of 2 (for both
187 ;; encoding and decoding) is sufficient.
189 ;; we don't have enough registers to hold all charset-ids
190 ((r4 = ,(charset-id 'chinese-cns11643-1))
191 (r5 = ,(charset-id 'chinese-cns11643-2))
192 (r6 = ,(charset-id 'chinese-cns11643-3))
193 (loop
194 (read-if (r0 < #x80)
195 ;; ASCII
196 (write-repeat r0)
197 ;; not ASCII
198 (if (r0 == #x8E)
199 ;; single shift
200 (read-if (r1 < #xA1)
201 ;; invalid byte
202 ((write r0)
203 (write-repeat r1))
204 (if (r1 > #xA7)
205 ;; invalid plane
206 ((write r0)
207 (write-repeat r1))
208 ;; OK, we have a plane
209 (read-if (r2 < #xA1)
210 ;; invalid first byte
211 ((write r0 r1)
212 (write-repeat r2))
213 (read-if (r3 < #xA1)
214 ;; invalid second byte
215 ((write r0 r1 r2)
216 (write-repeat r3))
217 ;; CNS 1-7, finally
218 ((branch (r1 - #xA1)
219 (r1 = r4)
220 (r1 = r5)
221 (r1 = r6)
222 (r1 = ,(charset-id 'chinese-cns11643-4))
223 (r1 = ,(charset-id 'chinese-cns11643-5))
224 (r1 = ,(charset-id 'chinese-cns11643-6))
225 (r1 = ,(charset-id 'chinese-cns11643-7)))
226 (r2 = ((((r2 - #x80) << 7) + r3) - #x80))
227 (write-multibyte-character r1 r2)
228 (repeat))))))
229 ;; standard EUC
230 (if (r0 < #xA1)
231 ;; invalid first byte
232 (write-repeat r0)
233 (read-if (r1 < #xA1)
234 ;; invalid second byte
235 ((write r0)
236 (write-repeat r1))
237 ;; CNS 1, finally
238 ((r1 = ((((r0 - #x80) << 7) + r1) - #x80))
239 (write-multibyte-character r4 r1)
240 (repeat)))))))))
241 "CCL program to decode EUC-TW encoding."
244 (define-ccl-program ccl-encode-euc-tw
246 ;; we don't have enough registers to hold all charset-ids
247 ((r2 = ,(charset-id 'ascii))
248 (r3 = ,(charset-id 'chinese-big5-1))
249 (r4 = ,(charset-id 'chinese-big5-2))
250 (r5 = ,(charset-id 'chinese-cns11643-1))
251 (r6 = ,(charset-id 'chinese-cns11643-2))
252 (loop
253 (read-multibyte-character r0 r1)
254 (if (r0 == r2)
255 (write-repeat r1)
256 (;; Big 5 encoded characters are first translated to CNS
257 (if (r0 == r3)
258 (translate-character big5-to-cns r0 r1)
259 (if (r0 == r4)
260 (translate-character big5-to-cns r0 r1)))
261 (if (r0 == r5)
262 (r0 = #xA1)
263 (if (r0 == r6)
264 (r0 = #xA2)
265 (if (r0 == ,(charset-id 'chinese-cns11643-3))
266 (r0 = #xA3)
267 (if (r0 == ,(charset-id 'chinese-cns11643-4))
268 (r0 = #xA4)
269 (if (r0 == ,(charset-id 'chinese-cns11643-5))
270 (r0 = #xA5)
271 (if (r0 == ,(charset-id 'chinese-cns11643-6))
272 (r0 = #xA6)
273 (if (r0 == ,(charset-id 'chinese-cns11643-7))
274 (r0 = #xA7)
275 ;; not CNS. We use a dummy character which
276 ;; can't occur in EUC-TW encoding to indicate
277 ;; this.
278 (write-repeat #xFF))))))))))
279 (if (r0 != #xA1)
280 ;; single shift and CNS plane
281 ((write #x8E)
282 (write r0)))
283 (write ((r1 >> 7) + #x80))
284 (write ((r1 % #x80) + #x80))
285 (repeat))))
286 "CCL program to encode EUC-TW encoding."
289 (defun euc-tw-pre-write-conversion (beg end)
290 "Semi-dummy pre-write function effectively to autoload china-util."
291 ;; Ensure translation table is loaded.
292 (require 'china-util)
293 ;; Don't do this again.
294 (coding-system-put 'euc-tw 'pre-write-conversion nil)
295 nil)
297 (make-coding-system
298 'euc-tw 4 ?Z
299 "ISO 2022 based EUC encoding for Chinese CNS11643.
300 Big5 encoding is accepted for input also (which is then converted to CNS)."
301 '(ccl-decode-euc-tw . ccl-encode-euc-tw)
302 '((safe-charsets ascii
303 chinese-big5-1
304 chinese-big5-2
305 chinese-cns11643-1
306 chinese-cns11643-2
307 chinese-cns11643-3
308 chinese-cns11643-4
309 chinese-cns11643-5
310 chinese-cns11643-6
311 chinese-cns11643-7)
312 (valid-codes (0 . 255))
313 (pre-write-conversion . euc-tw-pre-write-conversion)))
315 (define-coding-system-alias 'euc-taiwan 'euc-tw)
317 (set-language-info-alist
318 "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2
319 chinese-cns11643-3 chinese-cns11643-4
320 chinese-cns11643-5 chinese-cns11643-6
321 chinese-cns11643-7)
322 (coding-system iso-2022-cn euc-tw)
323 (coding-priority iso-2022-cn euc-tw chinese-big5
324 chinese-iso-8bit)
325 (features china-util)
326 (input-method . "chinese-cns-quick")
327 (documentation . "\
328 Support for Chinese CNS character sets. Note that the EUC-TW coding system
329 accepts Big5 for input also (which is then converted to CNS)."))
330 '("Chinese"))
332 (set-language-info-alist
333 "Chinese-EUC-TW" '((charset chinese-cns11643-1 chinese-cns11643-2
334 chinese-cns11643-3 chinese-cns11643-4
335 chinese-cns11643-5 chinese-cns11643-6
336 chinese-cns11643-7 chinese-big5-1 chinese-big5-2)
337 (coding-system euc-tw iso-2022-cn)
338 (coding-priority euc-tw chinese-big5 iso-2022-cn
339 chinese-iso-8bit)
340 (features china-util)
341 (input-method . "chinese-cns-quick")
342 (documentation . "\
343 Support for Chinese, prefering the EUC-TW character set. Note that
344 the EUC-TW coding system accepts Big5 for input also (which is then
345 converted to CNS)."))
346 '("Chinese"))
348 (provide 'chinese)
350 ;;; arch-tag: b82fcf7a-84f6-4e0b-b38c-1742dac0e09f
351 ;;; chinese.el ends here