1 ;;; ja-dic-utl.el --- utilities for handling Japanese dictionary (SKK-JISYO.L)
3 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 ;; National Institute of Advanced Industrial Science and Technology (AIST)
6 ;; Registration Number H14PRO021
8 ;; Keywords: i18n, mule, multilingual, Japanese
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
27 ;; This file provides a generic function to look up a Japanese
28 ;; dictionary of SKK format.
30 ;; SKK is a free Japanese input method running on Mule created by
31 ;; Masahiko Sato <masahiko@sato.riec.tohoku.ac.jp>. The Emacs Lisp
32 ;; library kkc.el provides a facility to convert a Japanese kana
33 ;; string to a kanji-kana-mixed string by using SKK's dictionary.
35 ;; The original SKK dictionary SKK-JISYO.L is converted to ja-dic.el
36 ;; by ja-dic-cnv.el. We get entries of the dictionary in four
37 ;; variables (listed below) by loading that file (or byte-compiled
38 ;; version ja-dic.elc).
42 ;; The following four variables are set by loading ja-dic.el[c].
43 (defvar skkdic-okuri-ari nil
44 "Nested alist for OKURI-ARI entries of SKK dictionary.")
46 (defvar skkdic-postfix nil
47 "Nested alist for SETSUBIJI (postfix) entries of SKK dictionary.")
49 (defvar skkdic-prefix nil
50 "Nested alist SETTOUJI (prefix) entries of SKK dictionary.")
52 (defvar skkdic-okuri-nasi nil
53 "Nested alist for OKURI-NASI entries of SKK dictionary.")
55 (defconst skkdic-okurigana-table
56 '((?
\e$B$
!\e(B . ?a
) (?
\e$B$
"\e(B . ?a) (?\e$B$#\e(B . ?i) (?\e$B$$\e(B . ?i) (?\e$B$%\e(B . ?u)
57 (?\e$B$&\e(B . ?u) (?\e$B$'\e(B . ?e) (?\e$B$(\e(B . ?e) (?\e$B$)\e(B . ?o) (?\e$B$*\e(B . ?o)
58 (?\e$B$+\e(B . ?k) (?\e$B$,\e(B . ?g) (?\e$B$-\e(B . ?k) (?\e$B$.\e(B . ?g) (?\e$B$/\e(B . ?k)
59 (?\e$B$0\e(B . ?g) (?\e$B$1\e(B . ?k) (?\e$B$2\e(B . ?g) (?\e$B$3\e(B . ?k) (?\e$B$4\e(B . ?g)
60 (?\e$B$5\e(B . ?s) (?\e$B$6\e(B . ?z) (?\e$B$7\e(B . ?s) (?\e$B$8\e(B . ?j) (?\e$B$9\e(B . ?s)
61 (?\e$B$:\e(B . ?z) (?\e$B$;\e(B . ?s) (?\e$B$<\e(B . ?z) (?\e$B$=\e(B . ?s) (?\e$B$>\e(B . ?z)
62 (?\e$B$?\e(B . ?t) (?\e$B$@\e(B . ?d) (?\e$B$A\e(B . ?t) (?\e$B$B\e(B . ?d) (?\e$B$C\e(B . ?t)
63 (?\e$B$D\e(B . ?t) (?\e$B$E\e(B . ?d) (?\e$B$F\e(B . ?t) (?\e$B$G\e(B . ?d) (?\e$B$H\e(B . ?t) (?\e$B$I\e(B . ?d)
64 (?\e$B$J\e(B . ?n) (?\e$B$K\e(B . ?n) (?\e$B$L\e(B . ?n) (?\e$B$M\e(B . ?n) (?\e$B$N\e(B . ?n)
65 (?\e$B$O\e(B . ?h) (?\e$B$P\e(B . ?b) (?\e$B$Q\e(B . ?p) (?\e$B$R\e(B . ?h) (?\e$B$S\e(B . ?b)
66 (?\e$B$T\e(B . ?p) (?\e$B$U\e(B . ?h) (?\e$B$V\e(B . ?b) (?\e$B$W\e(B . ?p) (?\e$B$X\e(B . ?h)
67 (?\e$B$Y\e(B . ?b) (?\e$B$Z\e(B . ?p) (?\e$B$[\e(B . ?h) (?\e$B$\\e(B . ?b) (?\e$B$]\e(B . ?p)
68 (?\e$B$^\e(B . ?m) (?\e$B$_\e(B . ?m) (?\e$B$`\e(B . ?m) (?\e$B$a\e(B . ?m) (?\e$B$b\e(B . ?m)
69 (?\e$B$c\e(B . ?y) (?\e$B$d\e(B . ?y) (?\e$B$e\e(B . ?y) (?\e$B$f\e(B . ?y) (?\e$B$g\e(B . ?y) (?\e$B$h\e(B . ?y)
70 (?\e$B$i\e(B . ?r) (?\e$B$j\e(B . ?r) (?\e$B$k\e(B . ?r) (?\e$B$l\e(B . ?r) (?\e$B$m\e(B . ?r)
71 (?\e$B$o\e(B . ?w) (?\e$B$p\e(B . ?w) (?\e$B$q\e(B . ?w) (?\e$B$r\e(B . ?w)
74 "Alist of Okuriganas vs trailing ASCII letters in OKURI-ARI entry.
")
76 (defun skkdic-merge-head-and-tail (heads tails postfix)
81 (>= (length (car heads)) min-len))
85 (>= (length (car tail)) min-len))
86 (setq l (cons (concat (car heads) (car tail)) l)))
87 (setq tail (cdr tail)))))
88 (setq heads (cdr heads)))
91 (defconst skkdic-jisx0208-hiragana-block
92 (cons (decode-char 'japanese-jisx0208 #x2421)
93 (decode-char 'japanese-jisx0208 #x247E)))
95 (defun skkdic-lookup-key (seq len &optional postfix prefer-noun)
96 "Return a list of conversion string for sequence SEQ of length LEN.
98 SEQ is a vector of Kana characters to be converted by SKK dictionary.
99 If LEN is shorter than the length of KEYSEQ
, the first LEN keys in SEQ
100 are took into account.
102 Optional
3rd arg POSTFIX non-nil means SETSUBIJI
(postfix) are also
103 considered to find conversion strings.
105 Optional
4th arg PREFER-NOUN non-nil means that the conversions
106 without okurigana are placed at the head of the returned list.
"
107 (or skkdic-okuri-nasi
109 (load-library "ja-dic
/ja-dic
")
111 (with-output-to-temp-buffer "*Help
*"
112 (princ "The library
`ja-dic
' can
't be loaded.
114 The most common case is that you have not yet installed the library
115 included in LEIM
(Libraries of Emacs Input Method
) which is
116 distributed separately from Emacs.
118 LEIM is available from the same ftp directory as Emacs.
"))
119 (signal (car err) (cdr err)))))
121 (let ((vec (make-vector len 0))
124 ;; At first, generate vector VEC from SEQ for looking up SKK
125 ;; alists. Nth element in VEC corresponds to Nth element in SEQ.
126 ;; The values are decided as follows.
127 ;; If SEQ[N] is `\e$B!<\e(B', VEC[N] is 0,
128 ;; else if SEQ[N] is a Hiragana character, VEC[N] is:
129 ;; ((The 2nd position code of SEQ[N]) - 32),
130 ;; else VEC[N] is 128.
132 (let ((ch (aref seq i))
134 (cond ((= ch ?\e$B!<\e(B)
136 ((and (>= ch (car skkdic-jisx0208-hiragana-block))
137 (<= ch (cdr skkdic-jisx0208-hiragana-block)))
138 (setq code (encode-char ch 'japanese-jisx0208))
140 (aset vec i (- (logand code #xFF) 32))
146 ;; Search OKURI-NASI entries.
147 (setq entry (lookup-nested-alist vec skkdic-okuri-nasi len 0 t))
148 (if (consp (car entry))
149 (setq entry (copy-sequence (car entry)))
153 ;; Search OKURI-NASI entries with postfixes.
154 (let ((break (max (- len (car skkdic-postfix)) 1))
155 entry-head entry-postfix entry2)
157 (if (and (setq entry-head
158 (lookup-nested-alist vec skkdic-okuri-nasi
160 (consp (car entry-head))
162 (lookup-nested-alist vec skkdic-postfix
164 (consp (car entry-postfix))
165 (setq entry2 (skkdic-merge-head-and-tail
166 (car entry-head) (car entry-postfix) t)))
169 (setq entry entry2)))
170 (setq break (1+ break)))))
172 ;; Search OKURI-NASI entries with prefixes.
173 (let ((break (min (car skkdic-prefix) (- len 2)))
174 entry-prefix entry-tail entry2)
176 (if (and (setq entry-prefix
177 (lookup-nested-alist vec skkdic-prefix break 0 t))
178 (consp (car entry-prefix))
180 (lookup-nested-alist vec skkdic-okuri-nasi len break t))
181 (consp (car entry-tail))
182 (setq entry2 (skkdic-merge-head-and-tail
183 (car entry-prefix) (car entry-tail) nil)))
187 (setq entry entry2))))
188 (setq break (1- break))))
190 ;; Search OKURI-ARI entries.
191 (let ((okurigana (assq (aref seq (1- len)) skkdic-okurigana-table))
195 (setq orig-element (aref vec (1- len)))
196 (aset vec (1- len) (- (cdr okurigana)))
197 (if (and (setq entry2 (lookup-nested-alist vec skkdic-okuri-ari
199 (consp (car entry2)))
201 (setq entry2 (copy-sequence (car entry2)))
203 (okuri (char-to-string (aref seq (1- len)))))
205 (setcar l (concat (car l) okuri))
210 (setq entry2 (nreverse entry2))
213 (setq entry (nreverse entry2))))))
214 (aset vec (1- len) orig-element))))
219 (provide 'ja-dic-utl)
222 ;; coding: iso-2022-7bit
225 ;;; ja-dic-utl.el ends here