(Vthrow_on_input): New variable.
[emacs.git] / lisp / language / knd-util.el
blob51d2fa4f4d92b4c31e3dbbef7d054a7f13208801
1 ;;; knd-util.el --- Support for composing Kannada characters
3 ;; Copyright (C) 2003 Free Software Foundation, Inc.
5 ;; Maintainer: Maintainer: CHOWKSEY, Kailash C. <klchxbec@m-net.arbornet.org>
6 ;; Keywords: multilingual, Kannada
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
25 ;; Created: Jul. 14. 2003
27 ;;; Commentary:
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Kannada script characters.
32 ;;; Code:
34 ;;;###autoload
36 ;; Kannada Composable Pattern
37 ;; C .. Consonants
38 ;; V .. Vowel
39 ;; H .. Virama
40 ;; M .. Matra
41 ;; V .. Vowel
42 ;; (N .. Zerowidth Non Joiner)
43 ;; (J .. Zerowidth Joiner. )
44 ;; 1. vowel
45 ;; V(A)?
46 ;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya)
47 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?)?
49 (defconst kannada-consonant
50 "[\e$,1>u\e(B-\e$,1?9\e(B]")
52 (defconst kannada-consonant-needs-twirl
53 "[\e$,1>u>w\e(B-\e$,1>{>}\e(B-\e$,1>~? \e(B-\e$,1?"?$\e(B-\e$,1?+?-?0?3\e(B-\e$,1?9\e(B]\\(\e$,1?M\e(B[\e$,1>u\e(B-\e$,1?9\e(B]\\)*[\e$,1?A?B?C?D>b\e(B]?$")
55 (defconst kannada-composable-pattern
56 (concat
57 "\\([\e$,1>b\e(B-\e$,1>t?`>l\e(B]\\)\\|[\e$,1>c\e(B]"
58 "\\|\\("
59 "\\(?:\\(?:[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?\\(?:[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?\\(?:[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?"
60 "[\e$,1>u\e(B-\e$,1?9\e(B]\\(?:\e$,1?M\e(B\\|[\e$,1?>\e(B-\e$,1?M?U?C\e(B]?\\)?"
61 "\\)")
62 "Regexp matching a composable sequence of Kannada characters.")
64 ;;;###autoload
65 (defun kannada-compose-region (from to)
66 (interactive "r")
67 (save-excursion
68 (save-restriction
69 (narrow-to-region from to)
70 (goto-char (point-min))
71 (while (re-search-forward kannada-composable-pattern nil t)
72 (kannada-compose-syllable-region (match-beginning 0)
73 (match-end 0))))))
74 ;;;###autoload
75 (defun kannada-compose-string (string)
76 (with-temp-buffer
77 (insert (decompose-string string))
78 (kannada-compose-region (point-min) (point-max))
79 (buffer-string)))
81 ;;;###autoload
82 (defun kannada-post-read-conversion (len)
83 (save-excursion
84 (save-restriction
85 (let ((buffer-modified-p (buffer-modified-p)))
86 (narrow-to-region (point) (+ (point) len))
87 (kannada-compose-region (point-min) (point-max))
88 (set-buffer-modified-p buffer-modified-p)
89 (- (point-max) (point-min))))))
91 (defun kannada-range (from to)
92 "Make the list of the integers of range FROM to TO."
93 (let (result)
94 (while (<= from to) (setq result (cons to result) to (1- to))) result))
96 (defun kannada-regexp-of-hashtbl-keys (hashtbl)
97 "Return a regular expression that matches all keys in hashtable HASHTBL."
98 (let ((max-specpdl-size 1000))
99 (regexp-opt
100 (sort
101 (let (dummy)
102 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
103 dummy)
104 (function (lambda (x y) (> (length x) (length y))))))))
106 (defun kannada-regexp-of-hashtbl-vals (hashtbl)
107 "Return a regular expression that matches all values in hashtable HASHTBL."
108 (let ((max-specpdl-size 1000))
109 (regexp-opt
110 (sort
111 (let (dummy)
112 (maphash (function (lambda (key val) (setq dummy (cons val dummy)))) hashtbl)
113 dummy)
114 (function (lambda (x y) (> (length x) (length y))))))))
116 (defun kannada-composition-function (from to pattern &optional string)
117 "Compose Kannada characters in REGION, or STRING if specified.
118 Assume that the REGION or STRING must fully match the composable
119 PATTERN regexp."
120 (if string (kannada-compose-syllable-string string)
121 (kannada-compose-syllable-region from to))
122 (- to from))
124 ;; Register a function to compose Kannada characters.
125 (mapc
126 (function (lambda (ucs)
127 (aset composition-function-table (decode-char 'ucs ucs)
128 (list (cons kannada-composable-pattern
129 'kannada-composition-function)))))
130 (kannada-range #x0c80 #x0cff))
132 ;; Notes on conversion steps.
134 ;; 1. chars to glyphs
136 ;; Rules will not be applied to the virama appeared at the end of the
137 ;; text. Also, the preceding/following "r" will be treated as special case.
139 ;; 2. glyphs reordering.
141 ;; The glyphs are split by virama, and each glyph groups are
142 ;; re-ordered in the following order.
144 ;; Note that `consonant-glyph' mentioned here does not contain the
145 ;; vertical bar (right modifier) attached at the right of the
146 ;; consonant.
148 ;; If the glyph-group contains right modifier,
149 ;; (1) consonant-glyphs/vowels
150 ;; (2) spacing
151 ;; (3) right modifier (may be matra)
152 ;; (4) top matra
153 ;; (5) preceding "r"
154 ;; (7) following "r"
155 ;; (8) bottom matra or virama.
157 ;; Otherwise,
158 ;; (1) consonant-glyph/vowels, with nukta sign
159 ;; (3) left matra
160 ;; (4) top matra
161 ;; (5) preceding "r"
162 ;; (7) following "r"
163 ;; (8) bottom matra or virama.
164 ;; (2) spacing
166 ;; 3. glyph to glyph
168 ;; For better display, some glyph display would be tuned.
170 ;; 4. Composition.
172 ;; left modifiers will be attached at the left.
173 ;; others will be attached right.
175 ;; Problem::
176 ;; Can we generalize this methods to other Indian scripts?
178 (defvar knd-char-glyph
179 '(("\e$,1>e\e(B" . "\e$,43@\e(B")
180 ("\e$,1>f\e(B" . "\e$,43A\e(B")
181 ("\e$,1?>\e(B" . "\e$,44{\e(B")
182 ("\e$,1>g\e(B" . "\e$,43B\e(B")
183 ("\e$,1??\e(B" . nil)
184 ("\e$,1>h\e(B" . "\e$,43C\e(B")
185 ("\e$,1?@\e(B" . nil)
186 ("\e$,1>i\e(B" . "\e$,43D\e(B")
187 ("\e$,1?A\e(B" . "\\e$,44\x7f\e(B")
188 ("\e$,1>j\e(B" . "\e$,43E\e(B")
189 ("\e$,1?B\e(B" . "\\e$,45 \e(B")
190 ("\e$,1>k\e(B" . "\e$,43F4\x7f\e(B")
191 ("\e$,1?C\e(B" . "\\e$,45$\e(B")
192 ("\e$,1?`\e(B" . "\e$,43F5 \e(B")
193 ("\e$,1?D\e(B" . "\\e$,45%\e(B")
194 ;;("\e$,1>l\e(B" . nil) ; not implemented.
195 ;;("\e$,1?a\e(B" . nil)
196 ("\e$,1>n\e(B" . "\e$,43G\e(B")
197 ("\e$,1>o\e(B" . "\e$,43H\e(B")
198 ("\e$,1>p\e(B" . "\e$,43I\e(B")
199 ("\e$,1?F\e(B" . "\\e$,45&\e(B")
200 ("\e$,1?G\e(B" . "\\e$,45&4~\e(B")
201 ("\e$,1?H\e(B" . "\\e$,45&5'\e(B")
202 ("\e$,1>r\e(B" . "\e$,43J\e(B")
203 ("\e$,1?J\e(B" . "\e$,45&5 \e(B")
204 ("\e$,1>s\e(B" . "\e$,43K\e(B")
205 ("\e$,1?K\e(B" . "\\e$,45&5 4~\e(B")
206 ("\e$,1>t\e(B" . "\e$,43L\e(B")
207 ("\e$,1?L\e(B" . "\\e$,45(\e(B")
208 ("\e$,1>b\e(B" . "\e$,43M\e(B")
209 ("\e$,1>c\e(B" . "\e$,43N\e(B")
210 ("\e$,1>u?M\e(B" . "\e$,43O5)\e(B") ("\e$,1>u\e(B" . "\e$,43O\e(B") ("\e$,1>u??\e(B" . "\e$,43P\e(B") ("\e$,1>u?@\e(B" . "\e$,43P4~\e(B")
211 ("\e$,1>v?M\e(B" . "\e$,43S5)\e(B") ("\e$,1>v\e(B" . "\e$,43S\e(B") ("\e$,1>v??\e(B" . "\e$,43T\e(B") ("\e$,1>v?@\e(B" . "\e$,43T4~\e(B") ("\e$,1>v?F\e(B" . "\e$,43S5&\e(B") ("\e$,1>v?G\e(B" . "\e$,43S5&4~\e(B") ("\e$,1>v?H\e(B" . "\e$,43S5&5'\e(B") ("\e$,1>v?J\e(B" . "\e$,43S5&5&5 \e(B") ("\e$,1>v?K\e(B" . "\e$,43S5&5&5 4~\e(B") ("\e$,1>v?L\e(B" . "\e$,43S5(\e(B")
212 ("\e$,1>w?M\e(B" . "\e$,43V5)\e(B") ("\e$,1>w\e(B" . "\e$,43V\e(B") ("\e$,1>w??\e(B" . "\e$,43W\e(B") ("\e$,1>w?@\e(B" . "\e$,43W4~\e(B")
213 ("\e$,1>x?M\e(B" . "\e$,43Y5)\e(B") ("\e$,1>x\e(B" . "\e$,43Y\e(B") ("\e$,1>x??\e(B" . "\e$,43Z\e(B") ("\e$,1>x?@\e(B" . "\e$,43Z4~\e(B")
214 ("\e$,1>y?M\e(B" . "\e$,43\5)\e(B") ("\e$,1>y\e(B" . "\e$,43\\e(B")
215 ("\e$,1>z?M\e(B" . "\e$,43^5)\e(B") ("\e$,1>z\e(B" . "\e$,43^\e(B") ("\e$,1>z??\e(B" . "\e$,43_\e(B") ("\e$,1>z?@\e(B" . "\e$,43_4~\e(B")
216 ("\e$,1>{?M\e(B" . "\e$,43a5)\e(B") ("\e$,1>{\e(B" . "\e$,43a\e(B") ("\e$,1>{??\e(B" . "\e$,43b\e(B") ("\e$,1>{?@\e(B" . "\e$,43b4~\e(B")
217 ("\e$,1>|?M\e(B" . "\e$,43d5)\e(B") ("\e$,1>|\e(B" . "\e$,43d\e(B") ("\e$,1>|??\e(B" . "\e$,43f\e(B") ("\e$,1>|?@\e(B" . "\e$,43f4~\e(B") ("\e$,1>|?F\e(B" . "\e$,43e5&\e(B") ("\e$,1>|?G\e(B" . "\e$,43e5&4~\e(B") ("\e$,1>|?H\e(B" . "\e$,43e5&5'\e(B") ("\e$,1>|?J\e(B" . "\e$,43e5&5&5 \e(B") ("\e$,1>|?K\e(B" . "\e$,43e5&5&5 4~\e(B") ("\e$,1>|?L\e(B" . "\e$,43e5(\e(B")
218 ("\e$,1>}?M\e(B" . "\e$,44a4z3h4\x7f5)\e(B") ("\e$,1>}\e(B" . "\e$,44a4z3h4\x7f\e(B") ("\e$,1>}??\e(B" . "\e$,44b3h4\x7f\e(B") ("\e$,1>}?@\e(B" . "\e$,44b3h4\x7f4~\e(B") ("\e$,1>}?B\e(B". "\e$,44a4z3h5 \e(B") ("\e$,1>}?J\e(B". "\e$,44a5&3h5 \e(B") ("\e$,1>}?K\e(B". "\e$,44a5&3h5 4~\e(B")
219 ("\e$,1>~?M\e(B" . "\e$,43j5)\e(B") ("\e$,1>~\e(B" . "\e$,43j\e(B")
220 ("\e$,1>\x7f?M\e(B" . "\e$,43m5)\e(B") ("\e$,1>\x7f\e(B" . "\e$,43l\e(B") ("\e$,1?#?>\e(B" . "\e$,43m4{\e(B") ("\e$,1>\x7f??\e(B" . "\e$,43n\e(B") ("\e$,1>\x7f?@\e(B" . "\e$,43n4~\e(B") ("\e$,1>\x7f?F\e(B" . "\e$,43m5&\e(B") ("\e$,1>\x7f?G\e(B" . "\e$,43m5&4~\e(B") ("\e$,1>\x7f?H\e(B" . "\e$,43m5&5'\e(B") ("\e$,1>\x7f?J\e(B" . "\e$,43m5&5&5 \e(B") ("\e$,1>\x7f?K\e(B" . "\e$,43m5&5&5 4~\e(B") ("\e$,1>\x7f?L\e(B" . "\e$,43m5(\e(B")
221 ("\e$,1? ?M\e(B" . "\e$,43p5)\e(B") ("\e$,1? \e(B" . "\e$,43p\e(B") ("\e$,1? ??\e(B" . "\e$,43q\e(B") ("\e$,1? ?@\e(B" . "\e$,43q4~\e(B")
222 ("\e$,1?!?M\e(B" . "\e$,43s5)\e(B") ("\e$,1?!\e(B" . "\e$,43s\e(B") ("\e$,1?!??\e(B" . "\e$,43t\e(B") ("\e$,1?!?@\e(B" . "\e$,43t4~\e(B")
223 ("\e$,1?"?M\e(B" . "\e$,43v5)\e(B") ("\e$,1?"\e(B" . "\e$,43v\e(B") ("\e$,1?"??\e(B" . "\e$,43w\e(B") ("\e$,1?"?@\e(B" . "\e$,43w4~\e(B")
224 ("\e$,1?#?M\e(B" . "\e$,43z5)\e(B") ("\e$,1?#\e(B" . "\e$,43y\e(B") ("\e$,1?#?>\e(B" . "\e$,43z4{\e(B") ("\e$,1?#??\e(B" . "\e$,43{\e(B") ("\e$,1?#?@\e(B" . "\e$,43{4~\e(B") ("\e$,1?#?F\e(B" . "\e$,43z5&\e(B") ("\e$,1?#?G\e(B" . "\e$,43z5&4~\e(B") ("\e$,1?#?H\e(B" . "\e$,43z5&5'\e(B") ("\e$,1?#?J\e(B" . "\e$,43z5&5&5 \e(B") ("\e$,1?#?K\e(B" . "\e$,43z5&5&5 4~\e(B") ("\e$,1?#?L\e(B" . "\e$,43z5(\e(B")
225 ("\e$,1?$?M\e(B" . "\e$,43}5)\e(B") ("\e$,1?$\e(B" . "\e$,43}\e(B") ("\e$,1?$??\e(B" . "\e$,43~\e(B") ("\e$,1?$?@\e(B" . "\e$,43~4~\e(B")
226 ("\e$,1?%?M\e(B" . "\e$,44B5)\e(B") ("\e$,1?%\e(B" . "\e$,44B\e(B") ("\e$,1?%??\e(B" . "\e$,44C\e(B") ("\e$,1?%?@\e(B" . "\e$,44C4~\e(B")
227 ("\e$,1?&?M\e(B" . "\e$,44E5)\e(B") ("\e$,1?&\e(B" . "\e$,44E\e(B") ("\e$,1?&??\e(B" . "\e$,44F\e(B") ("\e$,1?&?@\e(B" . "\e$,44F4~\e(B")
228 ("\e$,1?'?M\e(B" . "\e$,44H5)\e(B") ("\e$,1?'\e(B" . "\e$,44H\e(B") ("\e$,1?'??\e(B" . "\e$,44I\e(B") ("\e$,1?'?@\e(B" . "\e$,44I4~\e(B")
229 ("\e$,1?(?M\e(B" . "\e$,44K5)\e(B") ("\e$,1?(\e(B" . "\e$,44K\e(B") ("\e$,1?(??\e(B" . "\e$,44L\e(B") ("\e$,1?(?@\e(B" . "\e$,44L4~\e(B")
230 ("\e$,1?*?M\e(B" . "\e$,44N5)\e(B") ("\e$,1?*\e(B" . "\e$,44N\e(B") ("\e$,1?*??\e(B" . "\e$,44O\e(B") ("\e$,1?*?@\e(B" . "\e$,44O4~\e(B") ("\e$,1?*?A\e(B" . "\e$,44N5"\e(B") ("\e$,1?*?B\e(B" . "\e$,44N5#\e(B") ("\e$,1?*?J\e(B" . "\e$,44N5&5#\e(B") ("\e$,1?*?K\e(B" . "\e$,44N5&5#4~\e(B")
231 ("\e$,1?+?M\e(B" . "\e$,44Q5)\e(B") ("\e$,1?+\e(B" . "\e$,44Q\e(B") ("\e$,1?+??\e(B" . "\e$,44R\e(B") ("\e$,1?+?@\e(B" . "\e$,44R4~\e(B") ("\e$,1?+?A\e(B" . "\e$,44Q5"\e(B") ("\e$,1?+?B\e(B" . "\e$,44Q5#\e(B") ("\e$,1?+?J\e(B" . "\e$,44Q5&5#\e(B") ("\e$,1?+?K\e(B" . "\e$,44Q5&5#4~\e(B")
232 ("\e$,1?,?M\e(B" . "\e$,44W5)\e(B") ("\e$,1?,\e(B" . "\e$,44V\e(B") ("\e$,1?,?>\e(B". "\e$,44W4{\e(B") ("\e$,1?,??\e(B" . "\e$,44X\e(B") ("\e$,1?,?@\e(B" . "\e$,44X4~\e(B") ("\e$,1?,?F\e(B" . "\e$,44W5&\e(B") ("\e$,1?,?G\e(B" . "\e$,44W5&4~\e(B") ("\e$,1?,?H\e(B" . "\e$,44W5&5'\e(B") ("\e$,1?,?J\e(B" . "\e$,44W5&5&5 \e(B") ("\e$,1?,?K\e(B" . "\e$,44W5&5&5 4~\e(B") ("\e$,1?,?L\e(B" . "\e$,44W5(\e(B")
233 ("\e$,1?-?M\e(B" . "\e$,44Z5)\e(B") ("\e$,1?-\e(B" . "\e$,44Z\e(B") ("\e$,1?-??\e(B" . "\e$,44[\e(B") ("\e$,1?-?@\e(B" . "\e$,44[4~\e(B")
234 ("\e$,1?.?M\e(B" . "\e$,44h5!5)\e(B") ("\e$,1?.\e(B" . "\e$,44h4z4\x7f\e(B") ("\e$,1?.?>\e(B" . "\e$,44h4z5!4{\e(B") ("\e$,1?.??\e(B" . "\e$,44i4\x7f\e(B") ("\e$,1?.?@\e(B" . "\e$,44i4\x7f4~\e(B") ("\e$,1?.?J\e(B". "\e$,44h5&5 \e(B") ("\e$,1?.?K\e(B". "\e$,44h5&5 4~\e(B")
235 ("\e$,1?/?M\e(B" . "\e$,44^4z5!5)\e(B") ("\e$,1?/\e(B" . "\e$,44^4z4\x7f\e(B") ("\e$,1?/?>\e(B" . "\e$,44^4z5!4{\e(B")("\e$,1?/??\e(B" . "\e$,44_4\x7f\e(B") ("\e$,1?/?@\e(B" . "\e$,44_4\x7f4~\e(B") ("\e$,1?/?J\e(B" . "\e$,44^5&5 \e(B") ("\e$,1?/?K\e(B" . "\e$,44^5&5 4~\e(B")
236 ("\e$,1?0?M\e(B" . "\e$,44a5)\e(B") ("\e$,1?0\e(B" . "\e$,44a\e(B") ("\e$,1?0??\e(B" . "\e$,44b\e(B") ("\e$,1?0?@\e(B" . "\e$,44b4~\e(B")
237 ("\e$,1?0?M\e(B" . "\e$,44a5)\e(B") ("\e$,1?0\e(B" . "\e$,44a\e(B") ("\e$,1?0??\e(B" . "\e$,44b\e(B") ("\e$,1?0?@\e(B" . "\e$,44b4~\e(B")
238 ("\e$,1?2?M\e(B" . "\e$,44e5)\e(B") ("\e$,1?2\e(B" . "\e$,44d\e(B") ("\e$,1?2?>\e(B" . "\e$,44e4{\e(B") ("\e$,1?2??\e(B" . "\e$,44f\e(B") ("\e$,1?2?@\e(B" . "\e$,44f4~\e(B") ("\e$,1?2?F\e(B" . "\e$,44e5&\e(B") ("\e$,1?2?G\e(B" . "\e$,44e5&4~\e(B") ("\e$,1?2?H\e(B" . "\e$,44e5&5'\e(B") ("\e$,1?2?J\e(B" . "\e$,44e5&5&5 \e(B") ("\e$,1?2?K\e(B" . "\e$,44e5&5&5 4~\e(B") ("\e$,1?2?L\e(B" . "\e$,44e5(\e(B")
239 ("\e$,1?5?M\e(B" . "\e$,44h5)\e(B") ("\e$,1?5\e(B" . "\e$,44h\e(B") ("\e$,1?5??\e(B" . "\e$,44i\e(B") ("\e$,1?5?@\e(B" . "\e$,44i4~\e(B") ("\e$,1?5?A\e(B" . "\e$,44h5"\e(B") ("\e$,1?5?B\e(B" . "\e$,44h5#\e(B") ("\e$,1?5?J\e(B" . "\e$,44h5&5#\e(B") ("\e$,1?5?K\e(B" . "\e$,44h5&5#4~\e(B")
240 ("\e$,1?6?M\e(B" . "\e$,44k5)\e(B") ("\e$,1?6\e(B" . "\e$,44k\e(B") ("\e$,1?6??\e(B" . "\e$,44l\e(B") ("\e$,1?6?@\e(B" . "\e$,44l4~\e(B")
241 ("\e$,1?7?M\e(B" . "\e$,44n5)\e(B") ("\e$,1?7\e(B" . "\e$,44n\e(B") ("\e$,1?7??\e(B" . "\e$,44o\e(B") ("\e$,1?7?@\e(B" . "\e$,44o4~\e(B")
242 ("\e$,1?8?M\e(B" . "\e$,44q5)\e(B") ("\e$,1?8\e(B" . "\e$,44q\e(B") ("\e$,1?8??\e(B" . "\e$,44r\e(B") ("\e$,1?8?@\e(B" . "\e$,44r4~\e(B")
243 ("\e$,1?9?M\e(B" . "\e$,44t5)\e(B") ("\e$,1?9\e(B" . "\e$,44t\e(B") ("\e$,1?9??\e(B" . "\e$,44u\e(B") ("\e$,1?9?@\e(B" . "\e$,44u4~\e(B")
244 ("\e$,1?3?M\e(B" . "\e$,44w5)\e(B") ("\e$,1?3\e(B" . "\e$,44w\e(B") ("\e$,1?3??\e(B" . "\e$,44x\e(B") ("\e$,1?3?@\e(B" . "\e$,44x4~\e(B"))
245 "Kannada characters to glyphs conversion table.
246 Default value contains only the basic rules.")
248 (defvar knd-char-glyph-hash
249 (let* ((hash (make-hash-table :test 'equal)))
250 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
251 knd-char-glyph)
252 hash))
254 (defvar knd-char-glyph-regexp
255 (kannada-regexp-of-hashtbl-keys knd-char-glyph-hash))
257 (defvar knd-conjunct-glyph
258 '(("\e$,1>u\e(B" . "\e$,43Q\e(B") ("\e$,1>v\e(B" . "\e$,43U\e(B") ("\e$,1>w\e(B" . "\e$,43X\e(B") ("\e$,1>x\e(B" . "\e$,43[\e(B") ("\e$,1>y\e(B" . "\e$,43]\e(B")
259 ("\e$,1>z\e(B" . "\e$,43`\e(B") ("\e$,1>{\e(B" . "\e$,43c\e(B") ("\e$,1>|\e(B" . "\e$,43g\e(B") ("\e$,1>}\e(B" . "\e$,43i\e(B") ("\e$,1>~\e(B" . "\e$,43k\e(B")
260 ("\e$,1>\x7f\e(B" . "\e$,43o\e(B") ("\e$,1? \e(B" . "\e$,43r\e(B") ("\e$,1?!\e(B" . "\e$,43u\e(B") ("\e$,1?"\e(B" . "\e$,43x\e(B") ("\e$,1?#\e(B" . "\e$,43|\e(B")
261 ("\e$,1?$\e(B" . "\e$,44A\e(B") ("\e$,1?%\e(B" . "\e$,44D\e(B") ("\e$,1?&\e(B" . "\e$,44G\e(B") ("\e$,1?'\e(B" . "\e$,44J\e(B") ("\e$,1?(\e(B" . "\e$,44M\e(B")
262 ("\e$,1?*\e(B" . "\e$,44P\e(B") ("\e$,1?+\e(B" . "\e$,44U\e(B") ("\e$,1?,\e(B" . "\e$,44Y\e(B") ("\e$,1?-\e(B" . "\e$,44\\e(B") ("\e$,1?.\e(B" . "\e$,44]\e(B")
263 ("\e$,1?/\e(B" . "\e$,44`\e(B") ("\e$,1?0\e(B" . "\e$,44c\e(B") ("\e$,1?2\e(B" . "\e$,44g\e(B") ("\e$,1?3\e(B" . "\e$,44y\e(B") ("\e$,1?5\e(B" . "\e$,44j\e(B")
264 ("\e$,1?6\e(B" . "\e$,44m\e(B") ("\e$,1?7\e(B" . "\e$,44p\e(B") ("\e$,1?8\e(B" . "\e$,44s\e(B") ("\e$,1?9\e(B" . "\e$,44v\e(B"))
265 "Kannada characters to conjunct glyphs conversion table.")
267 (defvar knd-conjunct-glyph-hash
268 (let* ((hash (make-hash-table :test 'equal)))
269 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
270 knd-conjunct-glyph)
271 hash))
273 (defvar knd-conjunct-glyph-regexp
274 (kannada-regexp-of-hashtbl-vals knd-conjunct-glyph-hash))
276 (mapc
277 (function (lambda (x)
278 (put-char-code-property (aref (cdr x) 0) 'reference-point '(5 . 3))))
279 knd-conjunct-glyph)
281 ;; glyph-to-glyph conversion table.
282 ;; it is supposed that glyphs are ordered in
283 ;; [consonant/nukta] - [matra/virama] - [preceding-r] - [anuswar].
285 (defvar knd-glyph-glyph
286 '(("\e$,45$4A\e(B" . "\e$,45*\e(B")
287 ("\e$,45'4A\e(B" . "\e$,45+\e(B")
288 ("\e$,44A3g\e(B" . "\e$,45,\e(B")
289 ("\e$,45$3Q\e(B" . "\e$,45-\e(B")))
291 (defvar knd-glyph-glyph-hash
292 (let* ((hash (make-hash-table :test 'equal)))
293 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
294 knd-glyph-glyph)
295 hash))
296 (defvar knd-glyph-glyph-regexp
297 (kannada-regexp-of-hashtbl-keys knd-glyph-glyph-hash))
299 (defun knd-charseq (from &optional to)
300 (if (null to) (setq to from))
301 (mapcar (function (lambda (x) (indian-glyph-char x 'kannada)))
302 (kannada-range from to)))
304 (defvar knd-glyph-cv
305 (append
306 (knd-charseq #x40 #x50)
307 (knd-charseq #x52 #x54)
308 (knd-charseq #x56 #x57)
309 (knd-charseq #x59 #x5a)
310 (knd-charseq #x5c)
311 (knd-charseq #x5e #x5f)
312 (knd-charseq #x61 #x62)
313 (knd-charseq #x64 #x66)
314 (knd-charseq #x6a)
315 (knd-charseq #x6c #x6e)
316 (knd-charseq #x70 #x71)
317 (knd-charseq #x73 #x74)
318 (knd-charseq #x76 #x77)
319 (knd-charseq #x79 #x7b)
320 (knd-charseq #x7d #x7e)
321 (knd-charseq #xa2 #xa3)
322 (knd-charseq #xa5 #xa6)
323 (knd-charseq #xa8 #xa9)
324 (knd-charseq #xab #xac)
325 (knd-charseq #xae #xaf)
326 (knd-charseq #xb1 #xb2)
327 (knd-charseq #xb6 #xb8)
328 (knd-charseq #xb6 #xb8)
329 (knd-charseq #xba #xbb)
330 (knd-charseq #xbe #xbf)
331 (knd-charseq #xc1 #xc2)
332 (knd-charseq #xc4 #xc6)
333 (knd-charseq #xc8 #xc9)
334 (knd-charseq #xcb #xcc)
335 (knd-charseq #xce #xcf)
336 (knd-charseq #xd1 #xd2)
337 (knd-charseq #xd4 #xd5)
338 (knd-charseq #xd7 #xd8)
339 (knd-charseq #xc3))
340 "Kannada Consonants/Vowels/Nukta Glyphs")
342 (defvar knd-glyph-space
343 (knd-charseq #xb3 #xb4)
344 "Kannada Spacing Glyphs")
346 (defvar knd-glyph-right-modifier
347 (append
348 (knd-charseq #xdb #xdd)
349 (knd-charseq #xdf)
350 (knd-charseq #xe0 #xe3)
351 (knd-charseq #xe9))
352 "Kannada Modifiers attached at the right side.")
354 (defvar knd-glyph-right-modifier-regexp
355 (concat "[" knd-glyph-right-modifier "]"))
357 (defvar knd-glyph-jha-tail
358 (knd-charseq #x68)
359 "Kannada tail for jha.")
361 (defvar knd-glyph-top-matra
362 (append
363 (knd-charseq #xda)
364 (knd-charseq #xdd)
365 (knd-charseq #xe6)
366 (knd-charseq #xe8))
367 "Kannada Matras attached at the top side.")
369 (defvar knd-glyph-bottom-matra
370 (append
371 (knd-charseq #xe4 #xe5)
372 (knd-charseq #xe7))
373 "Kannada Matras attached at the bottom.")
375 (defvar knd-glyph-end-marks
376 (append
377 (knd-charseq #x25)
378 (knd-charseq #x4d #x4e)
379 (knd-charseq #xde))
380 "Kannada end marks: arkavattu, virama, au and diirghaa.")
382 (defvar knd-glyph-bottom-modifier
383 (append
384 (knd-charseq #x51)
385 (knd-charseq #x55)
386 (knd-charseq #x58)
387 (knd-charseq #x5b)
388 (knd-charseq #x5d)
389 (knd-charseq #x60)
390 (knd-charseq #x63)
391 (knd-charseq #x67)
392 (knd-charseq #x69)
393 (knd-charseq #x6b)
394 (knd-charseq #x6f)
395 (knd-charseq #x72)
396 (knd-charseq #x75)
397 (knd-charseq #x78)
398 (knd-charseq #x7c)
399 (knd-charseq #xa1)
400 (knd-charseq #xa4)
401 (knd-charseq #xa7)
402 (knd-charseq #xaa)
403 (knd-charseq #xad)
404 (knd-charseq #xb0)
405 (knd-charseq #xb5)
406 (knd-charseq #xb9)
407 (knd-charseq #xbc #xbd)
408 (knd-charseq #xc0)
409 (knd-charseq #xc3)
410 (knd-charseq #xc7)
411 (knd-charseq #xca)
412 (knd-charseq #xcd)
413 (knd-charseq #xd0)
414 (knd-charseq #xd3)
415 (knd-charseq #xd6)
416 (knd-charseq #xd9)
417 (knd-charseq #xea #xef))
418 "Kannada Modifiers attached at the bottom.")
420 (defvar knd-glyph-order
421 `((,knd-glyph-cv . 1)
422 (,knd-glyph-top-matra . 2)
423 (,knd-glyph-jha-tail . 3)
424 (,knd-glyph-right-modifier . 4)
425 (,knd-glyph-space . 5)
426 (,knd-glyph-bottom-modifier . 5)
427 (,knd-glyph-bottom-matra . 6)
428 (,knd-glyph-end-marks . 7)
431 (mapc
432 (function (lambda (x)
433 (mapc
434 (function (lambda (y)
435 (put-char-code-property y 'composition-order (cdr x))))
436 (car x))))
437 knd-glyph-order)
439 (defun kannada-compose-syllable-string (string)
440 (with-temp-buffer
441 (insert (decompose-string string))
442 (kannada-compose-syllable-region (point-min) (point-max))
443 (buffer-string)))
445 ;; kch
446 (defun kannada-compose-syllable-region (from to)
447 "Compose kannada syllable in region FROM to TO."
448 (let ((glyph-str nil) (cons-num 0) (glyph-str-list nil)
449 (last-virama nil) (preceding-r nil) (last-modifier nil)
450 (last-char (char-before to)) match-str pos
451 glyph-block split-pos (conj nil) (rest nil))
452 (save-excursion
453 (save-restriction
454 ;;; *** char-to-glyph conversion ***
455 ;; Special rule 1. -- Last virama must be preserved.
456 (if (eq last-char ?\e$,1?M\e(B)
457 (progn
458 (setq last-virama t)
459 (narrow-to-region from (1- to)))
460 (narrow-to-region from to))
461 (goto-char (point-min))
462 ;; Special rule 2. -- preceding "r virama" must be modifier.
463 (when (looking-at "\e$,1?0?M\e(B.")
464 (setq preceding-r t)
465 (goto-char (+ 2 (point))))
466 ;; remove conjunct consonants
467 (while (re-search-forward knd-char-glyph-regexp nil t)
468 (setq match-str (match-string 0))
469 (if (and (string-match kannada-consonant match-str)
470 (> cons-num 0))
471 (progn
472 (setq conj (concat conj (gethash (match-string 0 match-str)
473 knd-conjunct-glyph-hash)))
474 (setq match-str (replace-match "" t nil match-str))
475 (if (string-match "\e$,1?M\e(B" rest)
476 (setq rest (replace-match "" t nil rest)))))
477 (setq rest (concat rest match-str))
478 ;; count the number of consonant-glyhs.
479 (if (string-match kannada-consonant match-str)
480 (setq cons-num (1+ cons-num))))
481 ;; translate the rest characters into glyphs
482 (setq pos 0)
483 (while (string-match knd-char-glyph-regexp rest pos)
484 (setq match-str (match-string 0 rest))
485 (setq pos (match-end 0))
486 (setq glyph-str
487 (concat glyph-str (gethash match-str knd-char-glyph-hash))))
489 (if conj (setq glyph-str (concat glyph-str conj)))
490 (if last-virama (setq glyph-str (concat glyph-str "\e$,45)\e(B"))
491 (goto-char (point-min))
492 (if (re-search-forward kannada-consonant-needs-twirl nil t)
493 (progn
494 (setq match-str (match-string 0))
495 (setq glyph-str (concat glyph-str "\e$,44z\e(B")))))
496 ;; preceding-r must be attached
497 (if preceding-r
498 (setq glyph-str (concat glyph-str "\e$,43%\e(B")))
499 ;;; *** glyph-to-glyph conversion ***
500 (when (string-match knd-glyph-glyph-regexp glyph-str)
501 (setq glyph-str
502 (replace-match (gethash (match-string 0 glyph-str)
503 knd-glyph-glyph-hash)
504 nil t glyph-str)))
505 ;;; *** glyph reordering ***
506 (while (setq split-pos (string-match "\e$,45)\e(B\\|.$" glyph-str))
507 (setq glyph-block (substring glyph-str 0 (1+ split-pos)))
508 (setq glyph-str (substring glyph-str (1+ split-pos)))
509 (setq
510 glyph-block
511 (sort (string-to-list glyph-block)
512 (function (lambda (x y)
513 (< (get-char-code-property x 'composition-order)
514 (get-char-code-property y 'composition-order))))))
515 (setq glyph-str-list (nconc glyph-str-list glyph-block)))
516 ;;; *** insert space glyphs for kerning ***
517 (if (> cons-num 0)
518 (let ((curr glyph-str-list) (prev nil) (last-bott nil) bott co)
519 (while curr
520 (setq co (get-char-code-property
521 (car curr) 'composition-order)
522 bott (or (eq co 5) (eq co 6)))
523 (if (and bott last-bott)
524 (setcdr prev (cons ?\e$,44T\e(B curr)))
525 (setq last-bott bott prev curr curr (cdr curr)))))
526 ;; concatenate and attach reference-points.
527 (setq glyph-str
528 (cdr
529 (apply
530 'nconc
531 (mapcar
532 (function (lambda (x)
533 (list
534 (or (get-char-code-property x 'reference-point)
535 '(5 . 3) ;; default reference point.
537 x)))
538 glyph-str-list))))))
539 (compose-region from to glyph-str)))
541 (provide 'knd-util)
543 ;;; arch-tag: 78d32230-a960-46a5-b622-61ed6ffcf8fc
544 ;;; knd-util.el ends here