New file.
[emacs.git] / lisp / language / tml-util.el
blobbb8c8f19e04eac6b6e072e16008f95df2546dfbd
1 ;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*-
3 ;; Copyright (C) 2001 Free Software Foundation, Inc.
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6 ;; Keywords: multilingual, Indian, Tamil
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
25 ;; Created: Nov. 08. 2002
27 ;;; Commentary:
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Tamil script characters.
32 ;;; Code:
34 ;; Tamil Composable Pattern
35 ;; C .. Consonants
36 ;; V .. Vowel
37 ;; H .. Pulli
38 ;; M .. Matra
39 ;; V .. Vowel
40 ;; A .. Anuswar
41 ;; D .. Chandrabindu
42 ;; 1. vowel
43 ;; V
44 ;; 2. syllable : only ligature-formed pattern forms composition.
45 ;; (CkHCs|C)(H|M)?
46 ;; 3. sri special
47 ;; (CsHCrVi)
49 ;; oririnal
50 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
52 (defconst tamil-consonant
53 "[\e$,1<5\e(B-\e$,1<Y\e(B]")
55 (defconst tamil-composable-pattern
56 (concat
57 "\\([\e$,1<%\e(B-\e$,1<4\e(B]\\)\\|"
58 "[\e$,1<"<#\e(B]\\|" ;; vowel modifier considered independent
59 "\\(\\(?:\\(?:\e$,1<5<m<W\e(B\\)\\|[\e$,1<5\e(B-\e$,1<Y\e(B]\\)[\e$,1<m<^\e(B-\e$,1<l\e(B]?\\)\\|"
60 "\\(\e$,1<W<m<P<`\e(B\\)")
61 "Regexp matching a composable sequence of Tamil characters.")
63 ;;;###autoload
64 (defun tamil-compose-region (from to)
65 (interactive "r")
66 (save-excursion
67 (save-restriction
68 (narrow-to-region from to)
69 (goto-char (point-min))
70 (while (re-search-forward tamil-composable-pattern nil t)
71 (tamil-compose-syllable-region (match-beginning 0)
72 (match-end 0))))))
73 (defun tamil-compose-string (string)
74 (with-temp-buffer
75 (insert (decompose-string string))
76 (tamil-compose-region (point-min) (point-max))
77 (buffer-string)))
79 (defun tamil-post-read-conversion (len)
80 (save-excursion
81 (save-restriction
82 (let ((buffer-modified-p (buffer-modified-p)))
83 (narrow-to-region (point) (+ (point) len))
84 (tamil-compose-region (point-min) (point-max))
85 (set-buffer-modified-p buffer-modified-p)
86 (- (point-max) (point-min))))))
88 (defun tamil-range (from to)
89 "Make the list of the integers of range FROM to TO."
90 (let (result)
91 (while (<= from to) (setq result (cons to result) to (1- to))) result))
93 (defun tamil-regexp-of-hashtbl-keys (hashtbl)
94 "Return a regular expression that matches all keys in hashtable HASHTBL."
95 (let ((max-specpdl-size 1000))
96 (regexp-opt
97 (sort
98 (let (dummy)
99 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
100 dummy)
101 (function (lambda (x y) (> (length x) (length y))))))))
104 ;;;###autoload
105 (defun tamil-composition-function (from to pattern &optional string)
106 "Compose Tamil characters in REGION, or STRING if specified.
107 Assume that the REGION or STRING must fully match the composable
108 PATTERN regexp."
109 (if string (tamil-compose-syllable-string string)
110 (tamil-compose-syllable-region from to))
111 (- to from))
113 ;; Register a function to compose Tamil characters.
114 (mapc
115 (function (lambda (ucs)
116 (aset composition-function-table (decode-char 'ucs ucs)
117 (list (cons tamil-composable-pattern
118 'tamil-composition-function)))))
119 (nconc '(#x0b82 #x0b83) (tamil-range #x0b85 #x0bb9)))
121 ;; Notes on conversion steps.
123 ;; 1. chars to glyphs
124 ;; Simple replacement of characters to glyphs is done.
126 ;; 2. glyphs reordering.
127 ;; following "\e$,4)j\e(B", "\e$,4)k\e(B", "\e$,4)l\e(B" goes to the front.
129 ;; 3. glyphs to glyphs
130 ;; reordered vowels are ligatured to consonants.
132 ;; 4. Composition.
133 ;; left modifiers will be attached at the left.
134 ;; others will be attached right.
136 (defvar tml-char-glyph
137 '(;; various signs
138 ;;("\e$,1<"\e(B" . "")
139 ("\e$,1<#\e(B" . "\e$,4*G\e(B")
140 ;; Independent Vowels
141 ("\e$,1<%\e(B" . "\e$,4*<\e(B")
142 ("\e$,1<&\e(B" . "\e$,4*=\e(B")
143 ("\e$,1<'\e(B" . "\e$,4*>\e(B")
144 ("\e$,1<(\e(B" . "\e$,4*?\e(B")
145 ("\e$,1<)\e(B" . "\e$,4*@\e(B")
146 ("\e$,1<*\e(B" . "\e$,4*A\e(B")
147 ("\e$,1<.\e(B" . "\e$,4*B\e(B")
148 ("\e$,1</\e(B" . "\e$,4*C\e(B")
149 ("\e$,1<0\e(B" . "\e$,4*D\e(B")
150 ("\e$,1<2\e(B" . "\e$,4*E\e(B")
151 ("\e$,1<3\e(B" . "\e$,4*F\e(B")
152 ("\e$,1<4\e(B" . "\e$,4*E*W\e(B")
153 ;; Consonants
154 ("\e$,1<5<m<W<m\e(B" . "\e$,4):\e(B") ; ks.
155 ("\e$,1<5<m<W\e(B" . "\e$,4*^\e(B") ; ks
156 ("\e$,1<5\e(B" . "\e$,4*H\e(B")
158 ("\e$,1<9\e(B" . "\e$,4*I\e(B")
159 ("\e$,1<:\e(B" . "\e$,4*J\e(B")
160 ("\e$,1<<\e(B" . "\e$,4*\\e(B")
161 ("\e$,1<<<m\e(B" . "\e$,4)8\e(B")
162 ("\e$,1<>\e(B" . "\e$,4*K\e(B")
163 ("\e$,1<?\e(B" . "\e$,4*L\e(B")
164 ("\e$,1<C\e(B" . "\e$,4*M\e(B")
165 ("\e$,1<D\e(B" . "\e$,4*N\e(B")
166 ("\e$,1<H\e(B" . "\e$,4*O\e(B")
167 ("\e$,1<I\e(B" . "\e$,4*Y\e(B")
168 ("\e$,1<I<m\e(B" . "\e$,4)a\e(B")
169 ("\e$,1<J\e(B" . "\e$,4*P\e(B")
170 ("\e$,1<N\e(B" . "\e$,4*Q\e(B")
171 ("\e$,1<O\e(B" . "\e$,4*R\e(B")
172 ("\e$,1<P\e(B" . "\e$,4*S\e(B")
173 ("\e$,1<Q\e(B" . "\e$,4*X\e(B")
174 ("\e$,1<R\e(B" . "\e$,4*T\e(B")
175 ("\e$,1<S\e(B" . "\e$,4*W\e(B")
176 ("\e$,1<T\e(B" . "\e$,4*V\e(B")
177 ("\e$,1<U\e(B" . "\e$,4*U\e(B")
178 ("\e$,1<W\e(B" . "\e$,4*[\e(B")
179 ("\e$,1<W<m\e(B" . "\e$,4)7\e(B")
180 ("\e$,1<W<m<P<`\e(B" . "\e$,4*_\e(B")
181 ("\e$,1<X\e(B" . "\e$,4*Z\e(B")
182 ("\e$,1<X<m\e(B" . "\e$,4)6\e(B")
183 ("\e$,1<Y\e(B" . "\e$,4*]\e(B")
184 ("\e$,1<Y<m\e(B" . "\e$,4)9\e(B")
186 ;; Dependent vowel signs
187 ("\e$,1<^\e(B" . "\e$,4)c\e(B")
188 ("\e$,1<_\e(B" . "\e$,4)d\e(B")
189 ("\e$,1<`\e(B" . "\e$,4)f\e(B")
190 ("\e$,1<a\e(B" . "\e$,4)g\e(B")
191 ("\e$,1<b\e(B" . "\e$,4)h\e(B")
192 ("\e$,1<f\e(B" . "\e$,4)j\e(B")
193 ("\e$,1<g\e(B" . "\e$,4)k\e(B")
194 ("\e$,1<h\e(B" . "\e$,4)l\e(B")
195 ("\e$,1<j\e(B" . "\e$,4)j)c\e(B")
196 ("\e$,1<k\e(B" . "\e$,4)k)c\e(B")
197 ("\e$,1<l\e(B" . "\e$,4)j*W\e(B")
199 ;; Various signs
200 ("\e$,1<m\e(B" . "\e$,4)b\e(B")
201 ("\e$,1<w\e(B" . "nil") ;; not supported?
204 (defvar tml-char-glyph-hash
205 (let* ((hash (make-hash-table :test 'equal)))
206 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
207 tml-char-glyph)
208 hash))
210 (defvar tml-char-glyph-regexp
211 (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash))
213 ;; Tamil languages needed to be reordered.
215 (defvar tml-consonants-regexp
216 "[\e$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*]\e(B]")
218 (defvar tml-glyph-reorder-key-glyphs "[\e$,4)j)k)l\e(B]")
220 (defvar tml-glyph-reordering-regexp-list
221 (cons
222 (concat "\\(" tml-consonants-regexp "\\)\\([\e$,4)j)k)l\e(B]\\)") "\\2\\1"))
224 ;; Tamil vowel modifiers to be ligatured.
225 (defvar tml-glyph-glyph
227 ("\e$,4*H)d\e(B" . "\e$,4(a\e(B") ; ki
228 ("\e$,4*^)d\e(B" . "\e$,4(v\e(B") ; ksi
229 ("\e$,4*^)f\e(B" . "\e$,4)2\e(B") ; ksi~
230 ("\e$,4*I)d\e(B" . "\e$,4(b\e(B") ; n^i
231 ("\e$,4*J)d\e(B" . "\e$,4(c\e(B") ; ci
232 ("\e$,4*K)d\e(B" . "\e$,4(d\e(B") ; n~i
233 ("\e$,4*L)d\e(B" . "\e$,4)n\e(B") ; t.i
234 ("\e$,4*M)d\e(B" . "\e$,4(e\e(B") ; n.i
235 ("\e$,4*N)d\e(B" . "\e$,4(f\e(B") ; ti
236 ("\e$,4*O)d\e(B" . "\e$,4(g\e(B") ; ni
237 ("\e$,4*P)d\e(B" . "\e$,4(h\e(B") ; pi
238 ("\e$,4*Q)d\e(B" . "\e$,4(i\e(B") ; mi
239 ("\e$,4*R)d\e(B" . "\e$,4(j\e(B") ; yi
240 ("\e$,4*S)d\e(B" . "\e$,4(k\e(B") ; ri
241 ("\e$,4*T)d\e(B" . "\e$,4(l\e(B") ; li
242 ("\e$,4*U)d\e(B" . "\e$,4(m\e(B") ; vi
243 ("\e$,4*V)d\e(B" . "\e$,4(n\e(B") ; l_i
244 ("\e$,4*W)d\e(B" . "\e$,4(o\e(B") ; l.i
245 ("\e$,4*X)d\e(B" . "\e$,4(p\e(B") ; r_i
246 ("\e$,4*Y)d\e(B" . "\e$,4(q\e(B") ; n_i
247 ("\e$,4*Z)d\e(B" . "\e$,4(r\e(B") ; si
248 ("\e$,4*[)d\e(B" . "\e$,4(s\e(B") ; s'i
249 ("\e$,4*\)d\e(B" . "\e$,4(t\e(B") ; ji
250 ("\e$,4*])d\e(B" . "\e$,4(u\e(B") ; hi
252 ("\e$,4*H)f\e(B" . "\e$,4(w\e(B") ; ki~
253 ("\e$,4*I)f\e(B" . "\e$,4(x\e(B") ; n^i~
254 ("\e$,4*J)f\e(B" . "\e$,4(y\e(B") ; ci~
255 ("\e$,4*K)f\e(B" . "\e$,4(z\e(B") ; n~i~
256 ("\e$,4*L)f\e(B" . "\e$,4)o\e(B") ; t.i~
257 ("\e$,4*M)f\e(B" . "\e$,4)!\e(B") ; n.i~
258 ("\e$,4*N)f\e(B" . "\e$,4)"\e(B") ; ti~
259 ("\e$,4*O)f\e(B" . "\e$,4)#\e(B") ; ni~
260 ("\e$,4*P)f\e(B" . "\e$,4)$\e(B") ; pi~
261 ("\e$,4*Q)f\e(B" . "\e$,4)%\e(B") ; mi~
262 ("\e$,4*R)f\e(B" . "\e$,4)&\e(B") ; yi~
263 ("\e$,4*S)f\e(B" . "\e$,4)'\e(B") ; ri~
264 ("\e$,4*T)f\e(B" . "\e$,4)(\e(B") ; li~
265 ("\e$,4*U)f\e(B" . "\e$,4))\e(B") ; vi~
266 ("\e$,4*V)f\e(B" . "\e$,4)*\e(B") ; l_i~
267 ("\e$,4*W)f\e(B" . "\e$,4)+\e(B") ; l.i~
268 ("\e$,4*X)f\e(B" . "\e$,4),\e(B") ; r_i~
269 ("\e$,4*Y)f\e(B" . "\e$,4)-\e(B") ; n_i~
270 ("\e$,4*Z)f\e(B" . "\e$,4).\e(B") ; si~
271 ("\e$,4*[)f\e(B" . "\e$,4)/\e(B") ; s'i~
272 ("\e$,4*\)f\e(B" . "\e$,4)0\e(B") ; ji~
273 ("\e$,4*])f\e(B" . "\e$,4)1\e(B") ; hi~
275 ("\e$,4*H)g\e(B" . "\e$,4)p\e(B") ; ku
276 ("\e$,4*I)g\e(B" . "\e$,4)q\e(B") ; n^u
277 ("\e$,4*J)g\e(B" . "\e$,4)r\e(B") ; cu
278 ("\e$,4*K)g\e(B" . "\e$,4)s\e(B") ; n~u
279 ("\e$,4*L)g\e(B" . "\e$,4)t\e(B") ; t.u
280 ("\e$,4*M)g\e(B" . "\e$,4)u\e(B") ; n.u
281 ("\e$,4*N)g\e(B" . "\e$,4)v\e(B") ; tu
282 ("\e$,4*O)g\e(B" . "\e$,4)x\e(B") ; nu
283 ("\e$,4*P)g\e(B" . "\e$,4)y\e(B") ; pu
284 ("\e$,4*Q)g\e(B" . "\e$,4)z\e(B") ; mu
285 ("\e$,4*R)g\e(B" . "\e$,4){\e(B") ; yu
286 ("\e$,4*S)g\e(B" . "\e$,4)|\e(B") ; ru
287 ("\e$,4*T)g\e(B" . "\e$,4)}\e(B") ; lu
288 ("\e$,4*U)g\e(B" . "\e$,4)~\e(B") ; vu
289 ("\e$,4*V)g\e(B" . "\e$,4)\x7f\e(B") ; l_u
290 ("\e$,4*W)g\e(B" . "\e$,4* \e(B") ; l.u
291 ("\e$,4*X)g\e(B" . "\e$,4*!\e(B") ; r_u
292 ("\e$,4*Y)g\e(B" . "\e$,4*"\e(B") ; n_u
294 ("\e$,4*H)h\e(B" . "\e$,4*#\e(B") ; ku~
295 ("\e$,4*I)h\e(B" . "\e$,4*$\e(B") ; n^u~
296 ("\e$,4*J)h\e(B" . "\e$,4*%\e(B") ; cu~
297 ("\e$,4*K)h\e(B" . "\e$,4*&\e(B") ; n~u~
298 ("\e$,4*L)h\e(B" . "\e$,4*'\e(B") ; t.u~
299 ("\e$,4*M)h\e(B" . "\e$,4*(\e(B") ; n.u~
300 ("\e$,4*N)h\e(B" . "\e$,4*)\e(B") ; tu~
301 ("\e$,4*O)h\e(B" . "\e$,4*+\e(B") ; nu~
302 ("\e$,4*P)h\e(B" . "\e$,4*,\e(B") ; pu~
303 ("\e$,4*Q)h\e(B" . "\e$,4*-\e(B") ; mu~
304 ("\e$,4*R)h\e(B" . "\e$,4*.\e(B") ; yu~
305 ("\e$,4*S)h\e(B" . "\e$,4*/\e(B") ; ru~
306 ("\e$,4*T)h\e(B" . "\e$,4*6\e(B") ; lu~
307 ("\e$,4*U)h\e(B" . "\e$,4*7\e(B") ; vu~
308 ("\e$,4*V)h\e(B" . "\e$,4*8\e(B") ; l_u~
309 ("\e$,4*W)h\e(B" . "\e$,4*9\e(B") ; l.u~
310 ("\e$,4*X)h\e(B" . "\e$,4*:\e(B") ; r_u~
311 ("\e$,4*Y)h\e(B" . "\e$,4*;\e(B") ; n_u~
314 (defvar tml-glyph-glyph-hash
315 (let* ((hash (make-hash-table :test 'equal)))
316 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
317 tml-glyph-glyph)
318 hash))
320 (defvar tml-glyph-glyph-regexp
321 (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash))
323 (defun tamil-compose-syllable-string (string)
324 (with-temp-buffer
325 (insert (decompose-string string))
326 (tamil-compose-syllable-region (point-min) (point-max))
327 (buffer-string)))
329 (defun tamil-compose-syllable-region (from to)
330 "Compose tamil syllable in region FROM to TO."
331 (let (glyph-str match-str glyph-reorder-regexps)
332 (save-excursion
333 (save-restriction
334 (narrow-to-region from to)
335 (goto-char (point-min))
336 ;; char-glyph-conversion
337 (while (re-search-forward tml-char-glyph-regexp nil t)
338 (setq match-str (match-string 0))
339 (setq glyph-str
340 (concat glyph-str (gethash match-str tml-char-glyph-hash))))
341 ;; glyph reordering
342 (when (string-match tml-glyph-reorder-key-glyphs glyph-str)
343 (if (string-match (car tml-glyph-reordering-regexp-list)
344 glyph-str)
345 (setq glyph-str
346 (replace-match (cdr tml-glyph-reordering-regexp-list)
347 nil nil glyph-str))))
348 ;; glyph-glyph-conversion
349 (when (string-match tml-glyph-glyph-regexp glyph-str)
350 (setq match-str (match-string 0 glyph-str))
351 (setq glyph-str
352 (replace-match (gethash match-str tml-glyph-glyph-hash)
353 nil nil glyph-str)))
354 ;; concatenate and attach reference-points.
355 (setq glyph-str
356 (cdr
357 (apply
358 'nconc
359 (mapcar
360 (function
361 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
362 glyph-str))))
363 (compose-region from to glyph-str)))))
365 (provide 'tml-util)
367 ;;; tml-util.el ends here