1 ;;; ja-dic-utl.el --- utilities for handling Japanese dictionary (SKK-JISYO.L)
3 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 ;; 2005, 2006, 2007, 2008
5 ;; National Institute of Advanced Industrial Science and Technology (AIST)
6 ;; Registration Number H14PRO021
8 ;; Keywords: mule, multilingual, Japanese
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software; you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 3, or (at your option)
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 ;; Boston, MA 02110-1301, USA.
29 ;; This file provides a generic function to look up a Japanese
30 ;; dictionary of SKK format.
32 ;; SKK is a free Japanese input method running on Mule created by
33 ;; Masahiko Sato <masahiko@sato.riec.tohoku.ac.jp>. The Emacs Lisp
34 ;; library kkc.el provides a facility to convert a Japanese kana
35 ;; string to a kanji-kana-mixed string by using SKK's dictionary.
37 ;; The original SKK dictionary SKK-JISYO.L is converted to ja-dic.el
38 ;; by ja-dic-cnv.el. We get entries of the dictionary in four
39 ;; variables (listed below) by loading that file (or byte-compiled
40 ;; version ja-dic.elc).
44 ;; The following four variables are set by loading ja-dic.el[c].
45 (defvar skkdic-okuri-ari nil
46 "Nested alist for OKURI-ARI entries of SKK dictionary.")
48 (defvar skkdic-postfix nil
49 "Nested alist for SETSUBIJI (postfix) entries of SKK dictionary.")
51 (defvar skkdic-prefix nil
52 "Nested alist SETTOUJI (prefix) entries of SKK dictionary.")
54 (defvar skkdic-okuri-nasi nil
55 "Nested alist for OKURI-NASI entries of SKK dictionary.")
57 (defconst skkdic-okurigana-table
58 '((?
\e$B$
!\e(B . ?a
) (?
\e$B$
"\e(B . ?a) (?\e$B$#\e(B . ?i) (?\e$B$$\e(B . ?i) (?\e$B$%\e(B . ?u)
59 (?\e$B$&\e(B . ?u) (?\e$B$'\e(B . ?e) (?\e$B$(\e(B . ?e) (?\e$B$)\e(B . ?o) (?\e$B$*\e(B . ?o)
60 (?\e$B$+\e(B . ?k) (?\e$B$,\e(B . ?g) (?\e$B$-\e(B . ?k) (?\e$B$.\e(B . ?g) (?\e$B$/\e(B . ?k)
61 (?\e$B$0\e(B . ?g) (?\e$B$1\e(B . ?k) (?\e$B$2\e(B . ?g) (?\e$B$3\e(B . ?k) (?\e$B$4\e(B . ?g)
62 (?\e$B$5\e(B . ?s) (?\e$B$6\e(B . ?z) (?\e$B$7\e(B . ?s) (?\e$B$8\e(B . ?j) (?\e$B$9\e(B . ?s)
63 (?\e$B$:\e(B . ?z) (?\e$B$;\e(B . ?s) (?\e$B$<\e(B . ?z) (?\e$B$=\e(B . ?s) (?\e$B$>\e(B . ?z)
64 (?\e$B$?\e(B . ?t) (?\e$B$@\e(B . ?d) (?\e$B$A\e(B . ?t) (?\e$B$B\e(B . ?d) (?\e$B$C\e(B . ?t)
65 (?\e$B$D\e(B . ?t) (?\e$B$E\e(B . ?d) (?\e$B$F\e(B . ?t) (?\e$B$G\e(B . ?d) (?\e$B$H\e(B . ?t) (?\e$B$I\e(B . ?d)
66 (?\e$B$J\e(B . ?n) (?\e$B$K\e(B . ?n) (?\e$B$L\e(B . ?n) (?\e$B$M\e(B . ?n) (?\e$B$N\e(B . ?n)
67 (?\e$B$O\e(B . ?h) (?\e$B$P\e(B . ?b) (?\e$B$Q\e(B . ?p) (?\e$B$R\e(B . ?h) (?\e$B$S\e(B . ?b)
68 (?\e$B$T\e(B . ?p) (?\e$B$U\e(B . ?h) (?\e$B$V\e(B . ?b) (?\e$B$W\e(B . ?p) (?\e$B$X\e(B . ?h)
69 (?\e$B$Y\e(B . ?b) (?\e$B$Z\e(B . ?p) (?\e$B$[\e(B . ?h) (?\e$B$\\e(B . ?b) (?\e$B$]\e(B . ?p)
70 (?\e$B$^\e(B . ?m) (?\e$B$_\e(B . ?m) (?\e$B$`\e(B . ?m) (?\e$B$a\e(B . ?m) (?\e$B$b\e(B . ?m)
71 (?\e$B$c\e(B . ?y) (?\e$B$d\e(B . ?y) (?\e$B$e\e(B . ?y) (?\e$B$f\e(B . ?y) (?\e$B$g\e(B . ?y) (?\e$B$h\e(B . ?y)
72 (?\e$B$i\e(B . ?r) (?\e$B$j\e(B . ?r) (?\e$B$k\e(B . ?r) (?\e$B$l\e(B . ?r) (?\e$B$m\e(B . ?r)
73 (?\e$B$o\e(B . ?w) (?\e$B$p\e(B . ?w) (?\e$B$q\e(B . ?w) (?\e$B$r\e(B . ?w)
76 "Alist of Okuriganas vs trailing ASCII letters in OKURI-ARI entry.
")
78 (defun skkdic-merge-head-and-tail (heads tails postfix)
83 (>= (length (car heads)) min-len))
87 (>= (length (car tail)) min-len))
88 (setq l (cons (concat (car heads) (car tail)) l)))
89 (setq tail (cdr tail)))))
90 (setq heads (cdr heads)))
93 (defconst skkdic-jisx0208-hiragana-block
94 (cons (decode-char 'japanese-jisx0208 #x2421)
95 (decode-char 'japanese-jisx0208 #x247E)))
97 (defun skkdic-lookup-key (seq len &optional postfix prefer-noun)
98 "Return a list of conversion string for sequence SEQ of length LEN.
100 SEQ is a vector of Kana characters to be converted by SKK dictionary.
101 If LEN is shorter than the length of KEYSEQ
, the first LEN keys in SEQ
102 are took into account.
104 Optional
3rd arg POSTFIX non-nil means SETSUBIJI
(postfix) are also
105 considered to find conversion strings.
107 Optional
4th arg PREFER-NOUN non-nil means that the conversions
108 without okurigana are placed at the head of the returned list.
"
109 (or skkdic-okuri-nasi
111 (load-library "ja-dic
/ja-dic
")
113 (with-output-to-temp-buffer "*Help
*"
114 (princ "The library
`ja-dic
' can
't be loaded.
116 The most common case is that you have not yet installed the library
117 included in LEIM
(Libraries of Emacs Input Method
) which is
118 distributed separately from Emacs.
120 LEIM is available from the same ftp directory as Emacs.
"))
121 (signal (car err) (cdr err)))))
123 (let ((vec (make-vector len 0))
126 ;; At first, generate vector VEC from SEQ for looking up SKK
127 ;; alists. Nth element in VEC corresponds to Nth element in SEQ.
128 ;; The values are decided as follows.
129 ;; If SEQ[N] is `\e$B!<\e(B', VEC[N] is 0,
130 ;; else if SEQ[N] is a Hiragana character, VEC[N] is:
131 ;; ((The 2nd position code of SEQ[N]) - 32),
132 ;; else VEC[N] is 128.
134 (let ((ch (aref seq i))
136 (cond ((= ch ?\e$B!<\e(B)
138 ((and (>= ch (car skkdic-jisx0208-hiragana-block))
139 (<= ch (cdr skkdic-jisx0208-hiragana-block)))
140 (setq code (encode-char ch 'japanese-jisx0208))
142 (aset vec i (- (logand code #xFF) 32))
148 ;; Search OKURI-NASI entries.
149 (setq entry (lookup-nested-alist vec skkdic-okuri-nasi len 0 t))
150 (if (consp (car entry))
151 (setq entry (copy-sequence (car entry)))
155 ;; Search OKURI-NASI entries with postfixes.
156 (let ((break (max (- len (car skkdic-postfix)) 1))
157 entry-head entry-postfix entry2)
159 (if (and (setq entry-head
160 (lookup-nested-alist vec skkdic-okuri-nasi
162 (consp (car entry-head))
164 (lookup-nested-alist vec skkdic-postfix
166 (consp (car entry-postfix))
167 (setq entry2 (skkdic-merge-head-and-tail
168 (car entry-head) (car entry-postfix) t)))
171 (setq entry entry2)))
172 (setq break (1+ break)))))
174 ;; Search OKURI-NASI entries with prefixes.
175 (let ((break (min (car skkdic-prefix) (- len 2)))
176 entry-prefix entry-tail entry2)
178 (if (and (setq entry-prefix
179 (lookup-nested-alist vec skkdic-prefix break 0 t))
180 (consp (car entry-prefix))
182 (lookup-nested-alist vec skkdic-okuri-nasi len break t))
183 (consp (car entry-tail))
184 (setq entry2 (skkdic-merge-head-and-tail
185 (car entry-prefix) (car entry-tail) nil)))
189 (setq entry entry2))))
190 (setq break (1- break))))
192 ;; Search OKURI-ARI entries.
193 (let ((okurigana (assq (aref seq (1- len)) skkdic-okurigana-table))
197 (setq orig-element (aref vec (1- len)))
198 (aset vec (1- len) (- (cdr okurigana)))
199 (if (and (setq entry2 (lookup-nested-alist vec skkdic-okuri-ari
201 (consp (car entry2)))
203 (setq entry2 (copy-sequence (car entry2)))
205 (okuri (char-to-string (aref seq (1- len)))))
207 (setcar l (concat (car l) okuri))
212 (setq entry2 (nreverse entry2))
215 (setq entry (nreverse entry2))))))
216 (aset vec (1- len) orig-element))))
221 (provide 'ja-dic-utl)
224 ;; coding: iso-2022-7bit
227 ;;; arch-tag: df2218fa-469c-40f6-bace-7f89a053f9c0
228 ;;; ja-dic-utl.el ends here