1 ;;; japan-util.el --- utilities for Japanese
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
6 ;; Keywords: mule, multilingual, Japanese
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (defvar sentence-end-save nil
)
30 (defun setup-japanese-environment ()
31 "Setup multilingual environment (MULE) for Japanese."
33 (set-language-environment "Japanese"))
36 (defun setup-japanese-environment-internal ()
37 (cond ((eq system-type
'ms-dos
)
38 (prefer-coding-system 'japanese-shift-jis
))
39 ((eq system-type
'usg-unix-v
)
40 (prefer-coding-system 'japanese-iso-8bit
)))
41 (setq sentence-end-save sentence-end
)
42 (setq sentence-end
(concat sentence-end
"\\|[\e$B!#!)!*\e(B]")))
44 (defun exit-japanese-environment ()
45 (setq sentence-end sentence-end-save
))
47 (defconst japanese-kana-table
48 '((?
\e$B$
"\e(B ?\e$B%"\e(B ?
\e(I1\e(B) (?
\e$B$$
\e(B ?
\e$B%$
\e(B ?
\e(I2\e(B) (?
\e$B$
&\e(B ?
\e$B%
&\e(B ?
\e(I3\e(B) (?
\e$B$
(\e(B ?
\e$B%
(\e(B ?
\e(I4\e(B) (?
\e$B$
*\e(B ?
\e$B%
*\e(B ?
\e(I5\e(B)
49 (?
\e$B$
+\e(B ?
\e$B%
+\e(B ?
\e(I6\e(B) (?
\e$B$-
\e(B ?
\e$B%-
\e(B ?
\e(I7\e(B) (?
\e$B$
/\e(B ?
\e$B%
/\e(B ?
\e(I8\e(B) (?
\e$B$
1\e(B ?
\e$B%
1\e(B ?
\e(I9\e(B) (?
\e$B$
3\e(B ?
\e$B%
3\e(B ?
\e(I:\e(B)
50 (?
\e$B$
5\e(B ?
\e$B%
5\e(B ?
\e(I;\e(B) (?\e$B$7\e(B ?\e$B%7\e(B ?\e(I<\e(B) (?\e$B$9\e(B ?\e$B%9\e(B ?\e(I=\e(B) (?\e$B$;\e(B ?\e$B%;\e(B ?\e(I>\e(B) (?\e$B$=\e(B ?\e$B%=\e(B ?\e(I?\e(B)
51 (?
\e$B$?
\e(B ?
\e$B%?
\e(B ?
\e(I@\e(B) (?
\e$B$A
\e(B ?
\e$B%A
\e(B ?
\e(IA\e(B) (?
\e$B$D
\e(B ?
\e$B%D
\e(B ?
\e(IB\e(B) (?
\e$B$F
\e(B ?
\e$B%F
\e(B ?
\e(IC\e(B) (?
\e$B$H
\e(B ?
\e$B%H
\e(B ?
\e(ID\e(B)
52 (?
\e$B$J
\e(B ?
\e$B%J
\e(B ?
\e(IE\e(B) (?
\e$B$K
\e(B ?
\e$B%K
\e(B ?
\e(IF\e(B) (?
\e$B$L
\e(B ?
\e$B%L
\e(B ?
\e(IG\e(B) (?
\e$B$M
\e(B ?
\e$B%M
\e(B ?
\e(IH\e(B) (?
\e$B$N
\e(B ?
\e$B%N
\e(B ?
\e(II\e(B)
53 (?
\e$B$O
\e(B ?
\e$B%O
\e(B ?
\e(IJ\e(B) (?
\e$B$R
\e(B ?
\e$B%R
\e(B ?
\e(IK\e(B) (?
\e$B$U
\e(B ?
\e$B%U
\e(B ?
\e(IL\e(B) (?
\e$B$X
\e(B ?
\e$B%X
\e(B ?
\e(IM\e(B) (?
\e$B$
[\e(B ?
\e$B%
[\e(B ?
\e(IN\e(B)
54 (?
\e$B$^
\e(B ?
\e$B%^
\e(B ?
\e(IO\e(B) (?
\e$B$_
\e(B ?
\e$B%_
\e(B ?
\e(IP\e(B) (?
\e$B$
`\e(B ?
\e$B%
`\e(B ?
\e(IQ\e(B) (?
\e$B$a
\e(B ?
\e$B%a
\e(B ?
\e(IR\e(B) (?
\e$B$b
\e(B ?
\e$B%b
\e(B ?
\e(IS\e(B)
55 (?
\e$B$d
\e(B ?
\e$B%d
\e(B ?
\e(IT\e(B) (?
\e$B$f
\e(B ?
\e$B%f
\e(B ?
\e(IU\e(B) (?
\e$B$h
\e(B ?
\e$B%h
\e(B ?
\e(IV\e(B)
56 (?
\e$B$i
\e(B ?
\e$B%i
\e(B ?
\e(IW\e(B) (?
\e$B$j
\e(B ?
\e$B%j
\e(B ?
\e(IX\e(B) (?
\e$B$k
\e(B ?
\e$B%k
\e(B ?
\e(IY\e(B) (?
\e$B$l
\e(B ?
\e$B%l
\e(B ?
\e(IZ\e(B) (?
\e$B$m
\e(B ?
\e$B%m
\e(B ?
\e(I[\e(B)
57 (?
\e$B$o
\e(B ?
\e$B%o
\e(B ?
\e(I\
\e(B) (?
\e$B$p
\e(B ?
\e$B%p
\e(B "\e(I2\e(B") (?
\e$B$q
\e(B ?
\e$B%q
\e(B "\e(I4\e(B") (?
\e$B$r
\e(B ?
\e$B%r
\e(B ?
\e(I&\e(B)
58 (?
\e$B$s
\e(B ?
\e$B%s
\e(B ?
\e(I]\e(B)
59 (?
\e$B$
,\e(B ?
\e$B%
,\e(B "\e(I6^\e(B") (?
\e$B$.
\e(B ?
\e$B%.
\e(B "\e(I7^\e(B") (?
\e$B$
0\e(B ?
\e$B%
0\e(B "\e(I8^\e(B") (?
\e$B$
2\e(B ?
\e$B%
2\e(B "\e(I9^\e(B") (?
\e$B$
4\e(B ?
\e$B%
4\e(B "\e(I:^\e(B")
60 (?
\e$B$
6\e(B ?
\e$B%
6\e(B "\e(I;^\e(B") (?
\e$B$
8\e(B ?
\e$B%
8\e(B "\e(I<^\e(B") (?
\e$B$
:\e(B ?
\e$B%
:\e(B "\e(I=^\e(B") (?
\e$B$
<\e(B ?
\e$B%
<\e(B "\e(I>^\e(B") (?
\e$B$
>\e(B ?
\e$B%
>\e(B "\e(I?^\e(B")
61 (?
\e$B$
@\e(B ?
\e$B%
@\e(B "\e(I@^\e(B") (?
\e$B$B
\e(B ?
\e$B%B
\e(B "\e(IA^\e(B") (?
\e$B$E
\e(B ?
\e$B%E
\e(B "\e(IB^\e(B") (?
\e$B$G
\e(B ?
\e$B%G
\e(B "\e(IC^\e(B") (?
\e$B$I
\e(B ?
\e$B%I
\e(B "\e(ID^\e(B")
62 (?
\e$B$P
\e(B ?
\e$B%P
\e(B "\e(IJ^\e(B") (?
\e$B$S
\e(B ?
\e$B%S
\e(B "\e(IK^\e(B") (?
\e$B$V
\e(B ?
\e$B%V
\e(B "\e(IL^\e(B") (?
\e$B$Y
\e(B ?
\e$B%Y
\e(B "\e(IM^\e(B") (?
\e$B$\
\e(B ?
\e$B%\
\e(B "\e(IN^\e(B")
63 (?
\e$B$Q
\e(B ?
\e$B%Q
\e(B "\e(IJ_\e(B") (?
\e$B$T
\e(B ?
\e$B%T
\e(B "\e(IK_\e(B") (?
\e$B$W
\e(B ?
\e$B%W
\e(B "\e(IL_\e(B") (?
\e$B$Z
\e(B ?
\e$B%Z
\e(B "\e(IM_\e(B") (?
\e$B$
]\e(B ?
\e$B%
]\e(B "\e(IN_\e(B")
64 (?
\e$B$
!\e(B ?
\e$B%
!\e(B ?
\e(I'\e(B) (?
\e$B$
#\e(B ?
\e$B%
#\e(B ?
\e(I(\e(B) (?
\e$B$%
\e(B ?
\e$B%%
\e(B ?
\e(I)\e(B) (?
\e$B$
'\e(B ?
\e$B%
'\e(B ?
\e(I*\e(B) (?
\e$B$
)\e(B ?
\e$B%
)\e(B ?
\e(I+\e(B)
65 (?
\e$B$C
\e(B ?
\e$B%C
\e(B ?
\e(I/\e(B)
66 (?
\e$B$c
\e(B ?
\e$B%c
\e(B ?
\e(I,\e(B) (?
\e$B$e
\e(B ?
\e$B%e
\e(B ?
\e(I-\e(B) (?
\e$B$g
\e(B ?
\e$B%g
\e(B ?
\e(I.
\e(B)
67 (?
\e$B$n
\e(B ?
\e$B%n
\e(B "\e(I\\e(B")
68 ("\e$B$&!+\e(B" ?
\e$B%t
\e(B "\e(I3^\e(B") (nil ?
\e$B%u
\e(B "\e(I6\e(B") (nil ?
\e$B%v
\e(B "\e(I9\e(B"))
69 "Japanese JISX0208 Kana character table.
70 Each element is of the form (HIRAGANA KATAKANA HANKAKU-KATAKANA), where
71 HIRAGANA and KATAKANA belong to `japanese-jisx0208',
72 HANKAKU-KATAKANA belongs to `japanese-jisx0201-kana'.")
74 ;; Put properties 'katakana, 'hiragana, and 'jix0201 to each Japanese
75 ;; kana characters for conversion among them.
76 (let ((l japanese-kana-table
)
77 slot hiragana katakana jisx0201
)
80 hiragana
(car slot
) katakana
(nth 1 slot
) jisx0201
(nth 2 slot
)
83 (if (stringp hiragana
)
84 (if (> (length hiragana
) 1)
85 (let ((hira (aref hiragana
0)))
86 (put-char-code-property
87 hira
'kana-composition
88 (cons (cons (aref hiragana
1) katakana
)
89 (get-char-code-property hira
'kana-composition
)))))
90 (put-char-code-property hiragana
'katakana katakana
)
91 (put-char-code-property hiragana
'jisx0201 jisx0201
)))
92 (when (integerp katakana
)
93 (put-char-code-property katakana
'hiragana hiragana
)
94 (put-char-code-property katakana
'jisx0201 jisx0201
))
96 (if (stringp jisx0201
)
97 (if (> (length jisx0201
) 1)
98 (let ((kana (aref jisx0201
0)))
99 (put-char-code-property
100 kana
'kana-composition
101 (cons (cons (aref jisx0201
1) katakana
)
102 (get-char-code-property kana
'kana-composition
)))))
103 (put-char-code-property jisx0201
'hiragana hiragana
)
104 (put-char-code-property jisx0201
'katakana katakana
)
105 (put-char-code-property jisx0201
'jisx0208 katakana
)))))
107 (defconst japanese-symbol-table
108 '((?\
\e$B
!!\e(B ?\
) (?
\e$B
!$
\e(B ?
, ?
\e(I$
\e(B) (?
\e$B
!%
\e(B ?. ?
\e(I!\e(B) (?
\e$B
!"\e(B ?, ?\e(I$\e(B) (?\e$B!#\e(B ?. ?\e(I!\e(B) (?\e$B!&\e(B nil ?\e(I%\e(B)
109 (?\e$B!'\e(B ?:) (?\e$B!(\e(B ?\;) (?\e$B!)\e(B ??) (?\e$B!*\e(B ?!) (?\e$B!+\e(B nil ?\e(I^\e(B) (?\e$B!,\e(B nil ?\e(I_\e(B)
110 (?\e$B!-\e(B ?') (?\e$B!.\e(B ?`) (?\e$B!0\e(B ?^) (?\e$B!2\e(B ?_) (?\e$B!<\e(B ?-) (?\e$B!=\e(B ?-) (?\e$B!>\e(B ?-)
111 (?\e$B!?\e(B ?/) (?\e$B!@\e(B ?\\) (?\e$B!A\e(B ?~) (?\e$B!C\e(B ?|) (?\e$B!F\e(B ?`) (?\e$B!G\e(B ?') (?\e$B!H\e(B ?\") (?\e$B!I\e(B ?\")
112 (?\\e$B!J\e(B ?\() (?\\e$B!K\e(B ?\)) (?\\e$B!N\e(B ?[) (?\\e$B!O\e(B ?]) (?\\e$B!P\e(B ?{) (?\\e$B!Q\e(B ?})
113 (?\e$B!R\e(B ?<) (?\e$B!S\e(B ?>) (?\e$B!\\e(B ?+) (?\e$B!]\e(B ?-) (?\e$B!a\e(B ?=) (?\e$B!c\e(B ?<) (?\e$B!d\e(B ?>)
114 (?\e$B!l\e(B ?') (?\e$B!m\e(B ?\") (?\e$B!o\e(B ?\\) (?\e$B!p\e(B ?$) (?\e$B!s\e(B ?%) (?\e$B!t\e(B ?#) (?\e$B!u\e(B ?&) (?\e$B!v\e(B ?*)
116 "Japanese JISX0208 symbol character table.
117 Each element is of the form
(SYMBOL ASCII HANKAKU
), where SYMBOL
118 belongs to
`japanese-jisx0208
', ASCII belongs to
`ascii
', and HANKAKU
119 belongs to
`japanese-jisx0201-kana
'.
")
121 ;; Put properties 'jisx0208, 'jisx0201, and 'ascii to each Japanese
122 ;; symbol and ASCII characters for conversion among them.
123 (let ((l japanese-symbol-table)
124 slot jisx0208 ascii jisx0201)
127 jisx0208 (car slot) ascii (nth 1 slot) jisx0201 (nth 2 slot)
131 (put-char-code-property jisx0208 'ascii ascii)
132 (put-char-code-property ascii 'jisx0208 jisx0208)))
135 (put-char-code-property jisx0208 'jisx0201 jisx0201)
136 (put-char-code-property jisx0201 'jisx0208 jisx0208)))))
138 (defconst japanese-alpha-numeric-table
139 '((?\e$B#0\e(B . ?0) (?\e$B#1\e(B . ?1) (?\e$B#2\e(B . ?2) (?\e$B#3\e(B . ?3) (?\e$B#4\e(B . ?4)
140 (?\e$B#5\e(B . ?5) (?\e$B#6\e(B . ?6) (?\e$B#7\e(B . ?7) (?\e$B#8\e(B . ?8) (?\e$B#9\e(B . ?9)
141 (?\e$B#A\e(B . ?A) (?\e$B#B\e(B . ?B) (?\e$B#C\e(B . ?C) (?\e$B#D\e(B . ?D) (?\e$B#E\e(B . ?E)
142 (?\e$B#F\e(B . ?F) (?\e$B#G\e(B . ?G) (?\e$B#H\e(B . ?H) (?\e$B#I\e(B . ?I) (?\e$B#J\e(B . ?J)
143 (?\e$B#K\e(B . ?K) (?\e$B#L\e(B . ?L) (?\e$B#M\e(B . ?M) (?\e$B#N\e(B . ?N) (?\e$B#O\e(B . ?O)
144 (?\e$B#P\e(B . ?P) (?\e$B#Q\e(B . ?Q) (?\e$B#R\e(B . ?R) (?\e$B#S\e(B . ?S) (?\e$B#T\e(B . ?T)
145 (?\e$B#U\e(B . ?U) (?\e$B#V\e(B . ?V) (?\e$B#W\e(B . ?W) (?\e$B#X\e(B . ?X) (?\e$B#Y\e(B . ?Y) (?\e$B#Z\e(B . ?Z)
146 (?\e$B#a\e(B . ?a) (?\e$B#b\e(B . ?b) (?\e$B#c\e(B . ?c) (?\e$B#d\e(B . ?d) (?\e$B#e\e(B . ?e)
147 (?\e$B#f\e(B . ?f) (?\e$B#g\e(B . ?g) (?\e$B#h\e(B . ?h) (?\e$B#i\e(B . ?i) (?\e$B#j\e(B . ?j)
148 (?\e$B#k\e(B . ?k) (?\e$B#l\e(B . ?l) (?\e$B#m\e(B . ?m) (?\e$B#n\e(B . ?n) (?\e$B#o\e(B . ?o)
149 (?\e$B#p\e(B . ?p) (?\e$B#q\e(B . ?q) (?\e$B#r\e(B . ?r) (?\e$B#s\e(B . ?s) (?\e$B#t\e(B . ?t)
150 (?\e$B#u\e(B . ?u) (?\e$B#v\e(B . ?v) (?\e$B#w\e(B . ?w) (?\e$B#x\e(B . ?x) (?\e$B#y\e(B . ?y) (?\e$B#z\e(B . ?z))
151 "Japanese JISX0208 alpha numeric character table.
152 Each element is of the form
(ALPHA-NUMERIC ASCII
), where ALPHA-NUMERIC
153 belongs to
`japanese-jisx0208
', ASCII belongs to
`ascii
'.
")
155 ;; Put properties 'jisx0208 and 'ascii to each Japanese alpha numeric
156 ;; and ASCII characters for conversion between them.
157 (let ((l japanese-alpha-numeric-table)
161 jisx0208 (car slot) ascii (cdr slot)
163 (put-char-code-property jisx0208 'ascii ascii)
164 (put-char-code-property ascii 'jisx0208 jisx0208)))
166 ;; Convert string STR by FUNC and return a resulting string.
167 (defun japanese-string-conversion (str func &rest args)
168 (let ((buf (get-buffer-create " *Japanese work
*")))
173 (apply func 1 (point) args)
177 (defun japanese-katakana (obj &optional hankaku)
178 "Convert argument to Katakana and return that.
179 The argument may be a character or string. The result has the same type.
180 The argument object is not altered--the value is a copy.
181 Optional argument HANKAKU t means to convert to
`hankaku
' Katakana
182 \
(`japanese-jisx0201-kana
'), in which case return value
183 may be a string even if OBJ is a character if two Katakanas are
184 necessary to represent OBJ.
"
186 (japanese-string-conversion obj 'japanese-katakana-region hankaku)
187 (or (get-char-code-property obj (if hankaku 'jisx0201 'katakana))
191 (defun japanese-hiragana (obj)
192 "Convert argument to Hiragana and return that.
193 The argument may be a character or string. The result has the same type.
194 The argument object is not altered--the value is a copy.
"
196 (japanese-string-conversion obj 'japanese-hiragana-region)
197 (or (get-char-code-property obj 'hiragana)
201 (defun japanese-hankaku (obj &optional ascii-only)
202 "Convert argument to
`hankaku
' and return that.
203 The argument may be a character or string. The result has the same type.
204 The argument object is not altered--the value is a copy.
205 Optional argument ASCII-ONLY non-nil means to return only ASCII character.
"
207 (japanese-string-conversion obj 'japanese-hankaku-region ascii-only)
208 (or (get-char-code-property obj 'ascii)
209 (and (not ascii-only)
210 (get-char-code-property obj 'jisx0201))
214 (defun japanese-zenkaku (obj)
215 "Convert argument to
`zenkaku
' and return that.
216 The argument may be a character or string. The result has the same type.
217 The argument object is not altered--the value is a copy.
"
219 (japanese-string-conversion obj 'japanese-zenkaku-region)
220 (or (get-char-code-property obj 'jisx0208)
223 (defun japanese-replace-region (from to string)
224 "Replace the region specified by FROM and TO to STRING.
"
227 (delete-char (- to from)))
230 (defun japanese-katakana-region (from to &optional hankaku)
231 "Convert Japanese
`hiragana
' chars in the region to
`katakana
' chars.
232 Optional argument HANKAKU t means to convert to
`hankaku katakana
' character
233 of which charset is
`japanese-jisx0201-kana
'.
"
236 (narrow-to-region from to)
238 (goto-char (point-min))
239 (while (re-search-forward "\\cH
\\|
\\cK
" nil t)
240 (let* ((kana (preceding-char))
241 (composition (get-char-code-property kana 'kana-composition))
243 (if (and composition (setq slot (assq (following-char) composition)))
244 (japanese-replace-region (match-beginning 0) (1+ (point))
246 (let ((kata (get-char-code-property
247 kana (if hankaku 'jisx0201 'katakana))))
249 (japanese-replace-region (match-beginning 0) (point)
254 (defun japanese-hiragana-region (from to)
255 "Convert Japanese
`katakana
' chars in the region to
`hiragana
' chars.
"
258 (narrow-to-region from to)
260 (goto-char (point-min))
261 (while (re-search-forward "\\cK
\\|
\\ck
" nil t)
262 (let* ((kata (preceding-char))
263 (composition (get-char-code-property kata 'kana-composition))
265 (if (and composition (setq slot (assq (following-char) composition)))
266 (japanese-replace-region (match-beginning 0) (1+ (point))
267 (get-char-code-property
268 (cdr slot) 'hiragana))
269 (let ((hira (get-char-code-property kata 'hiragana)))
271 (japanese-replace-region (match-beginning 0) (point)
275 (defun japanese-hankaku-region (from to &optional ascii-only)
276 "Convert Japanese
`zenkaku
' chars in the region to
`hankaku
' chars.
277 `Zenkaku
' chars belong to
`japanese-jisx0208
'
278 `Hankaku
' chars belong to
`ascii
' or
`japanese-jisx0201-kana
'.
279 Optional argument ASCII-ONLY non-nil means to convert only to ASCII char.
"
282 (narrow-to-region from to)
284 (goto-char (point-min))
285 (while (re-search-forward "\\cj
" nil t)
286 (let* ((zenkaku (preceding-char))
287 (hankaku (or (get-char-code-property zenkaku 'ascii)
288 (and (not ascii-only)
289 (get-char-code-property zenkaku 'jisx0201)))))
291 (japanese-replace-region (match-beginning 0) (match-end 0)
295 (defun japanese-zenkaku-region (from to &optional katakana-only)
296 "Convert hankaku
' chars in the region to Japanese
`zenkaku
' chars.
297 `Zenkaku
' chars belong to
`japanese-jisx0208
'
298 `Hankaku
' chars belong to
`ascii
' or
`japanese-jisx0201-kana
'.
299 Optional argument KATAKANA-ONLY non-nil means to convert only KATAKANA char.
"
302 (narrow-to-region from to)
304 (goto-char (point-min))
305 (while (or (and katakana-only
306 (re-search-forward "\\ck
" nil t))
307 (and (not katakana-only)
308 (re-search-forward "\\ca
\\|
\\ck
" nil t)))
309 (let* ((hankaku (preceding-char))
310 (composition (get-char-code-property hankaku 'kana-composition))
312 (if (and composition (setq slot (assq (following-char) composition)))
313 (japanese-replace-region (match-beginning 0) (1+ (point))
315 (let ((zenkaku (japanese-zenkaku hankaku)))
317 (japanese-replace-region (match-beginning 0) (match-end 0)
321 (defun read-hiragana-string (prompt &optional initial-input)
322 "Read a Hiragana string from the minibuffer
, prompting with string PROMPT.
323 If non-nil
, second arg INITIAL-INPUT is a string to insert before reading.
"
324 (read-multilingual-string prompt initial-input "japanese-hiragana
"))
327 (provide 'japan-util)
329 ;;; japan-util.el ends here