1 ;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*-
3 ;; Copyright (C) 2001 Free Software Foundation, Inc.
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6 ;; Keywords: multilingual, Indian, Tamil
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 ;; Boston, MA 02110-1301, USA.
25 ;; Created: Nov. 08. 2002
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Tamil script characters.
34 ;; Tamil Composable Pattern
44 ;; 2. syllable : only ligature-formed pattern forms composition.
50 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
52 (defconst tamil-consonant
53 "[\e$,1<5\e(B-\e$,1<Y\e(B]")
55 (defconst tamil-composable-pattern
57 "\\([\e$,1<%\e(B-\e$,1<4\e(B]\\)\\|"
58 "[\e$,1<"<#\e(B]\\|
" ;; vowel modifier considered independent
59 "\\(\\(?
:\\(?
:\e$
,1<5<m
<W
\e(B\\)\\|
[\e$
,1<5\e(B-\e$
,1<Y
\e(B]\\)[\e$
,1<m
<^
\e(B-\e$
,1<l
\e(B]?
\\)\\|
"
60 "\\(\e$
,1<W
<m
<P
<`\e(B\\)")
61 "Regexp matching a composable sequence of Tamil characters.
")
64 (defun tamil-compose-region (from to)
68 (narrow-to-region from to)
69 (goto-char (point-min))
70 (while (re-search-forward tamil-composable-pattern nil t)
71 (tamil-compose-syllable-region (match-beginning 0)
73 (defun tamil-compose-string (string)
75 (insert (decompose-string string))
76 (tamil-compose-region (point-min) (point-max))
80 (defun tamil-post-read-conversion (len)
83 (let ((buffer-modified-p (buffer-modified-p)))
84 (narrow-to-region (point) (+ (point) len))
85 (tamil-compose-region (point-min) (point-max))
86 (set-buffer-modified-p buffer-modified-p)
87 (- (point-max) (point-min))))))
89 (defun tamil-range (from to)
90 "Make the list of the integers of range FROM to TO.
"
92 (while (<= from to) (setq result (cons to result) to (1- to))) result))
94 (defun tamil-regexp-of-hashtbl-keys (hashtbl)
95 "Return a regular expression that matches all keys in hashtable HASHTBL.
"
96 (let ((max-specpdl-size 1000))
100 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
102 (function (lambda (x y) (> (length x) (length y))))))))
106 (defun tamil-composition-function (from to pattern &optional string)
107 "Compose Tamil characters in REGION
, or STRING if specified.
108 Assume that the REGION or STRING must fully match the composable
110 (if string (tamil-compose-syllable-string string)
111 (tamil-compose-syllable-region from to))
114 ;; Register a function to compose Tamil characters.
116 (function (lambda (ucs)
117 (aset composition-function-table (decode-char 'ucs ucs)
118 (list (cons tamil-composable-pattern
119 'tamil-composition-function)))))
120 (nconc '(#x0b82 #x0b83) (tamil-range #x0b85 #x0bb9)))
122 ;; Notes on conversion steps.
124 ;; 1. chars to glyphs
125 ;; Simple replacement of characters to glyphs is done.
127 ;; 2. glyphs reordering.
128 ;; following "\e$
,4)j
\e(B", "\e$
,4)k
\e(B", "\e$
,4)l
\e(B" goes to the front.
130 ;; 3. glyphs to glyphs
131 ;; reordered vowels are ligatured to consonants.
134 ;; left modifiers will be attached at the left.
135 ;; others will be attached right.
137 (defvar tml-char-glyph
139 ;;("\e$
,1<"\e(B" .
"")
140 ("\e$,1<#\e(B" .
"\e$,4*G\e(B")
141 ;; Independent Vowels
142 ("\e$,1<%\e(B" .
"\e$,4*<\e(B")
143 ("\e$,1<&\e(B" .
"\e$,4*=\e(B")
144 ("\e$,1<'\e(B" .
"\e$,4*>\e(B")
145 ("\e$,1<(\e(B" .
"\e$,4*?\e(B")
146 ("\e$,1<)\e(B" .
"\e$,4*@\e(B")
147 ("\e$,1<*\e(B" .
"\e$,4*A\e(B")
148 ("\e$,1<.\e(B" .
"\e$,4*B\e(B")
149 ("\e$,1</\e(B" .
"\e$,4*C\e(B")
150 ("\e$,1<0\e(B" .
"\e$,4*D\e(B")
151 ("\e$,1<2\e(B" .
"\e$,4*E\e(B")
152 ("\e$,1<3\e(B" .
"\e$,4*F\e(B")
153 ("\e$,1<4\e(B" .
"\e$,4*E*W\e(B")
155 ("\e$,1<5<m<W<m\e(B" .
"\e$,4):\e(B") ; ks.
156 ("\e$,1<5<m<W\e(B" .
"\e$,4*^\e(B") ; ks
157 ("\e$,1<5\e(B" .
"\e$,4*H\e(B")
159 ("\e$,1<9\e(B" .
"\e$,4*I\e(B")
160 ("\e$,1<:\e(B" .
"\e$,4*J\e(B")
161 ("\e$,1<<\e(B" .
"\e$,4*\\e(B")
162 ("\e$,1<<<m\e(B" .
"\e$,4)8\e(B")
163 ("\e$,1<>\e(B" .
"\e$,4*K\e(B")
164 ("\e$,1<?\e(B" .
"\e$,4*L\e(B")
165 ("\e$,1<C\e(B" .
"\e$,4*M\e(B")
166 ("\e$,1<D\e(B" .
"\e$,4*N\e(B")
167 ("\e$,1<H\e(B" .
"\e$,4*O\e(B")
168 ("\e$,1<I\e(B" .
"\e$,4*Y\e(B")
169 ("\e$,1<I<m\e(B" .
"\e$,4)a\e(B")
170 ("\e$,1<J\e(B" .
"\e$,4*P\e(B")
171 ("\e$,1<N\e(B" .
"\e$,4*Q\e(B")
172 ("\e$,1<O\e(B" .
"\e$,4*R\e(B")
173 ("\e$,1<P\e(B" .
"\e$,4*S\e(B")
174 ("\e$,1<Q\e(B" .
"\e$,4*X\e(B")
175 ("\e$,1<R\e(B" .
"\e$,4*T\e(B")
176 ("\e$,1<S\e(B" .
"\e$,4*W\e(B")
177 ("\e$,1<T\e(B" .
"\e$,4*V\e(B")
178 ("\e$,1<U\e(B" .
"\e$,4*U\e(B")
179 ("\e$,1<W\e(B" .
"\e$,4*[\e(B")
180 ("\e$,1<W<m\e(B" .
"\e$,4)7\e(B")
181 ("\e$,1<W<m<P<`\e(B" .
"\e$,4*_\e(B")
182 ("\e$,1<X\e(B" .
"\e$,4*Z\e(B")
183 ("\e$,1<X<m\e(B" .
"\e$,4)6\e(B")
184 ("\e$,1<Y\e(B" .
"\e$,4*]\e(B")
185 ("\e$,1<Y<m\e(B" .
"\e$,4)9\e(B")
187 ;; Dependent vowel signs
188 ("\e$,1<^\e(B" .
"\e$,4)c\e(B")
189 ("\e$,1<_\e(B" .
"\e$,4)d\e(B")
190 ("\e$,1<`\e(B" .
"\e$,4)f\e(B")
191 ("\e$,1<a\e(B" .
"\e$,4)g\e(B")
192 ("\e$,1<b\e(B" .
"\e$,4)h\e(B")
193 ("\e$,1<f\e(B" .
"\e$,4)j\e(B")
194 ("\e$,1<g\e(B" .
"\e$,4)k\e(B")
195 ("\e$,1<h\e(B" .
"\e$,4)l\e(B")
196 ("\e$,1<j\e(B" .
"\e$,4)j)c\e(B")
197 ("\e$,1<k\e(B" .
"\e$,4)k)c\e(B")
198 ("\e$,1<l\e(B" .
"\e$,4)j*W\e(B")
201 ("\e$,1<m\e(B" .
"\e$,4)b\e(B")
202 ("\e$,1<w\e(B" .
"nil") ;; not supported?
205 (defvar tml-char-glyph-hash
206 (let* ((hash (make-hash-table :test
'equal
)))
207 (mapc (function (lambda (x) (puthash (car x
) (cdr x
) hash
)))
211 (defvar tml-char-glyph-regexp
212 (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash
))
214 ;; Tamil languages needed to be reordered.
216 (defvar tml-consonants-regexp
217 "[\e$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*]\e(B]")
219 (defvar tml-glyph-reorder-key-glyphs
"[\e$,4)j)k)l\e(B]")
221 (defvar tml-glyph-reordering-regexp-list
223 (concat "\\(" tml-consonants-regexp
"\\)\\([\e$,4)j)k)l\e(B]\\)") "\\2\\1"))
225 ;; Tamil vowel modifiers to be ligatured.
226 (defvar tml-glyph-glyph
228 ("\e$,4*H)d\e(B" .
"\e$,4(a\e(B") ; ki
229 ("\e$,4*^)d\e(B" .
"\e$,4(v\e(B") ; ksi
230 ("\e$,4*^)f\e(B" .
"\e$,4)2\e(B") ; ksi~
231 ("\e$,4*I)d\e(B" .
"\e$,4(b\e(B") ; n^i
232 ("\e$,4*J)d\e(B" .
"\e$,4(c\e(B") ; ci
233 ("\e$,4*K)d\e(B" .
"\e$,4(d\e(B") ; n~i
234 ("\e$,4*L)d\e(B" .
"\e$,4)n\e(B") ; t.i
235 ("\e$,4*M)d\e(B" .
"\e$,4(e\e(B") ; n.i
236 ("\e$,4*N)d\e(B" .
"\e$,4(f\e(B") ; ti
237 ("\e$,4*O)d\e(B" .
"\e$,4(g\e(B") ; ni
238 ("\e$,4*P)d\e(B" .
"\e$,4(h\e(B") ; pi
239 ("\e$,4*Q)d\e(B" .
"\e$,4(i\e(B") ; mi
240 ("\e$,4*R)d\e(B" .
"\e$,4(j\e(B") ; yi
241 ("\e$,4*S)d\e(B" .
"\e$,4(k\e(B") ; ri
242 ("\e$,4*T)d\e(B" .
"\e$,4(l\e(B") ; li
243 ("\e$,4*U)d\e(B" .
"\e$,4(m\e(B") ; vi
244 ("\e$,4*V)d\e(B" .
"\e$,4(n\e(B") ; l_i
245 ("\e$,4*W)d\e(B" .
"\e$,4(o\e(B") ; l.i
246 ("\e$,4*X)d\e(B" .
"\e$,4(p\e(B") ; r_i
247 ("\e$,4*Y)d\e(B" .
"\e$,4(q\e(B") ; n_i
248 ("\e$,4*Z)d\e(B" .
"\e$,4(r\e(B") ; si
249 ("\e$,4*[)d\e(B" .
"\e$,4(s\e(B") ; s'i
250 ("\e$,4*\)d\e(B" .
"\e$,4(t\e(B") ; ji
251 ("\e$,4*])d\e(B" .
"\e$,4(u\e(B") ; hi
253 ("\e$,4*H)f\e(B" .
"\e$,4(w\e(B") ; ki~
254 ("\e$,4*I)f\e(B" .
"\e$,4(x\e(B") ; n^i~
255 ("\e$,4*J)f\e(B" .
"\e$,4(y\e(B") ; ci~
256 ("\e$,4*K)f\e(B" .
"\e$,4(z\e(B") ; n~i~
257 ("\e$,4*L)f\e(B" .
"\e$,4)o\e(B") ; t.i~
258 ("\e$,4*M)f\e(B" .
"\e$,4)!\e(B") ; n.i~
259 ("\e$,4*N)f\e(B" .
"\e$,4)"\e(B") ; ti~
260 ("\e$
,4*O
)f
\e(B" . "\e$
,4)#\e(B") ; ni~
261 ("\e$
,4*P
)f
\e(B" . "\e$
,4)$
\e(B") ; pi~
262 ("\e$
,4*Q
)f
\e(B" . "\e$
,4)%
\e(B") ; mi~
263 ("\e$
,4*R
)f
\e(B" . "\e$
,4)&\e(B") ; yi~
264 ("\e$
,4*S
)f
\e(B" . "\e$
,4)'\e(B") ; ri~
265 ("\e$
,4*T
)f
\e(B" . "\e$
,4)(\e(B") ; li~
266 ("\e$
,4*U
)f
\e(B" . "\e$
,4))\e(B") ; vi~
267 ("\e$
,4*V
)f
\e(B" . "\e$
,4)*\e(B") ; l_i~
268 ("\e$
,4*W
)f
\e(B" . "\e$
,4)+\e(B") ; l.i~
269 ("\e$
,4*X
)f
\e(B" . "\e$
,4),\e(B") ; r_i~
270 ("\e$
,4*Y
)f
\e(B" . "\e$
,4)-
\e(B") ; n_i~
271 ("\e$
,4*Z
)f
\e(B" . "\e$
,4).
\e(B") ; si~
272 ("\e$
,4*[)f
\e(B" . "\e$
,4)/\e(B") ; s'i~
273 ("\e$
,4*\
)f
\e(B" . "\e$
,4)0\e(B") ; ji~
274 ("\e$
,4*])f
\e(B" . "\e$
,4)1\e(B") ; hi~
276 ("\e$
,4*H
)g
\e(B" . "\e$
,4)p
\e(B") ; ku
277 ("\e$
,4*I
)g
\e(B" . "\e$
,4)q
\e(B") ; n^u
278 ("\e$
,4*J
)g
\e(B" . "\e$
,4)r
\e(B") ; cu
279 ("\e$
,4*K
)g
\e(B" . "\e$
,4)s
\e(B") ; n~u
280 ("\e$
,4*L
)g
\e(B" . "\e$
,4)t
\e(B") ; t.u
281 ("\e$
,4*M
)g
\e(B" . "\e$
,4)u
\e(B") ; n.u
282 ("\e$
,4*N
)g
\e(B" . "\e$
,4)v
\e(B") ; tu
283 ("\e$
,4*O
)g
\e(B" . "\e$
,4)x
\e(B") ; nu
284 ("\e$
,4*P
)g
\e(B" . "\e$
,4)y
\e(B") ; pu
285 ("\e$
,4*Q
)g
\e(B" . "\e$
,4)z
\e(B") ; mu
286 ("\e$
,4*R
)g
\e(B" . "\e$
,4){\e(B") ; yu
287 ("\e$
,4*S
)g
\e(B" . "\e$
,4)|
\e(B") ; ru
288 ("\e$
,4*T
)g
\e(B" . "\e$
,4)}\e(B") ; lu
289 ("\e$
,4*U
)g
\e(B" . "\e$
,4)~
\e(B") ; vu
290 ("\e$
,4*V
)g
\e(B" . "\e$
,4)\x7f\e(B") ; l_u
291 ("\e$
,4*W
)g
\e(B" . "\e$
,4* \e(B") ; l.u
292 ("\e$
,4*X
)g
\e(B" . "\e$
,4*!\e(B") ; r_u
293 ("\e$
,4*Y
)g
\e(B" . "\e$
,4*"\e(B") ; n_u
295 ("\e$,4*H)h\e(B" .
"\e$,4*#\e(B") ; ku~
296 ("\e$,4*I)h\e(B" .
"\e$,4*$\e(B") ; n^u~
297 ("\e$,4*J)h\e(B" .
"\e$,4*%\e(B") ; cu~
298 ("\e$,4*K)h\e(B" .
"\e$,4*&\e(B") ; n~u~
299 ("\e$,4*L)h\e(B" .
"\e$,4*'\e(B") ; t.u~
300 ("\e$,4*M)h\e(B" .
"\e$,4*(\e(B") ; n.u~
301 ("\e$,4*N)h\e(B" .
"\e$,4*)\e(B") ; tu~
302 ("\e$,4*O)h\e(B" .
"\e$,4*+\e(B") ; nu~
303 ("\e$,4*P)h\e(B" .
"\e$,4*,\e(B") ; pu~
304 ("\e$,4*Q)h\e(B" .
"\e$,4*-\e(B") ; mu~
305 ("\e$,4*R)h\e(B" .
"\e$,4*.\e(B") ; yu~
306 ("\e$,4*S)h\e(B" .
"\e$,4*/\e(B") ; ru~
307 ("\e$,4*T)h\e(B" .
"\e$,4*6\e(B") ; lu~
308 ("\e$,4*U)h\e(B" .
"\e$,4*7\e(B") ; vu~
309 ("\e$,4*V)h\e(B" .
"\e$,4*8\e(B") ; l_u~
310 ("\e$,4*W)h\e(B" .
"\e$,4*9\e(B") ; l.u~
311 ("\e$,4*X)h\e(B" .
"\e$,4*:\e(B") ; r_u~
312 ("\e$,4*Y)h\e(B" .
"\e$,4*;\e(B") ; n_u~
315 (defvar tml-glyph-glyph-hash
316 (let* ((hash (make-hash-table :test
'equal
)))
317 (mapc (function (lambda (x) (puthash (car x
) (cdr x
) hash
)))
321 (defvar tml-glyph-glyph-regexp
322 (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash
))
324 (defun tamil-compose-syllable-string (string)
326 (insert (decompose-string string
))
327 (tamil-compose-syllable-region (point-min) (point-max))
330 (defun tamil-compose-syllable-region (from to
)
331 "Compose tamil syllable in region FROM to TO."
332 (let (glyph-str match-str glyph-reorder-regexps
)
335 (narrow-to-region from to
)
336 (goto-char (point-min))
337 ;; char-glyph-conversion
338 (while (re-search-forward tml-char-glyph-regexp nil t
)
339 (setq match-str
(match-string 0))
341 (concat glyph-str
(gethash match-str tml-char-glyph-hash
))))
343 (when (string-match tml-glyph-reorder-key-glyphs glyph-str
)
344 (if (string-match (car tml-glyph-reordering-regexp-list
)
347 (replace-match (cdr tml-glyph-reordering-regexp-list
)
348 nil nil glyph-str
))))
349 ;; glyph-glyph-conversion
350 (when (string-match tml-glyph-glyph-regexp glyph-str
)
351 (setq match-str
(match-string 0 glyph-str
))
353 (replace-match (gethash match-str tml-glyph-glyph-hash
)
355 ;; concatenate and attach reference-points.
362 (lambda (x) (list '(5 .
3) x
))) ;; default ref. point.
364 (compose-region from to glyph-str
)))))
368 ;;; arch-tag: 4d1c9737-e7b1-44cf-a040-4f64c50e773e
369 ;;; tml-util.el ends here