(latexenc-find-file-coding-system): Don't inherit the EOL part of the
[emacs.git] / lisp / language / tml-util.el
blob84fa0d01b0ed90c05cbd58f4f4187e808b2d0525
1 ;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*-
3 ;; Copyright (C) 2001 Free Software Foundation, Inc.
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6 ;; Keywords: multilingual, Indian, Tamil
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
25 ;; Created: Nov. 08. 2002
27 ;;; Commentary:
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Tamil script characters.
32 ;;; Code:
34 ;; Tamil Composable Pattern
35 ;; C .. Consonants
36 ;; V .. Vowel
37 ;; H .. Pulli
38 ;; M .. Matra
39 ;; V .. Vowel
40 ;; A .. Anuswar
41 ;; D .. Chandrabindu
42 ;; 1. vowel
43 ;; V
44 ;; 2. syllable : only ligature-formed pattern forms composition.
45 ;; (CkHCs|C)(H|M)?
46 ;; 3. sri special
47 ;; (CsHCrVi)
49 ;; oririnal
50 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
52 (defconst tamil-consonant
53 "[\e$,1<5\e(B-\e$,1<Y\e(B]")
55 (defconst tamil-composable-pattern
56 (concat
57 "\\([\e$,1<%\e(B-\e$,1<4\e(B]\\)\\|"
58 "[\e$,1<"<#\e(B]\\|" ;; vowel modifier considered independent
59 "\\(\\(?:\\(?:\e$,1<5<m<W\e(B\\)\\|[\e$,1<5\e(B-\e$,1<Y\e(B]\\)[\e$,1<m<^\e(B-\e$,1<l\e(B]?\\)\\|"
60 "\\(\e$,1<W<m<P<`\e(B\\)")
61 "Regexp matching a composable sequence of Tamil characters.")
63 ;;;###autoload
64 (defun tamil-compose-region (from to)
65 (interactive "r")
66 (save-excursion
67 (save-restriction
68 (narrow-to-region from to)
69 (goto-char (point-min))
70 (while (re-search-forward tamil-composable-pattern nil t)
71 (tamil-compose-syllable-region (match-beginning 0)
72 (match-end 0))))))
73 (defun tamil-compose-string (string)
74 (with-temp-buffer
75 (insert (decompose-string string))
76 (tamil-compose-region (point-min) (point-max))
77 (buffer-string)))
79 ;;;###autoload
80 (defun tamil-post-read-conversion (len)
81 (save-excursion
82 (save-restriction
83 (let ((buffer-modified-p (buffer-modified-p)))
84 (narrow-to-region (point) (+ (point) len))
85 (tamil-compose-region (point-min) (point-max))
86 (set-buffer-modified-p buffer-modified-p)
87 (- (point-max) (point-min))))))
89 (defun tamil-range (from to)
90 "Make the list of the integers of range FROM to TO."
91 (let (result)
92 (while (<= from to) (setq result (cons to result) to (1- to))) result))
94 (defun tamil-regexp-of-hashtbl-keys (hashtbl)
95 "Return a regular expression that matches all keys in hashtable HASHTBL."
96 (let ((max-specpdl-size 1000))
97 (regexp-opt
98 (sort
99 (let (dummy)
100 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
101 dummy)
102 (function (lambda (x y) (> (length x) (length y))))))))
105 ;;;###autoload
106 (defun tamil-composition-function (from to pattern &optional string)
107 "Compose Tamil characters in REGION, or STRING if specified.
108 Assume that the REGION or STRING must fully match the composable
109 PATTERN regexp."
110 (if string (tamil-compose-syllable-string string)
111 (tamil-compose-syllable-region from to))
112 (- to from))
114 ;; Register a function to compose Tamil characters.
115 (mapc
116 (function (lambda (ucs)
117 (aset composition-function-table (decode-char 'ucs ucs)
118 (list (cons tamil-composable-pattern
119 'tamil-composition-function)))))
120 (nconc '(#x0b82 #x0b83) (tamil-range #x0b85 #x0bb9)))
122 ;; Notes on conversion steps.
124 ;; 1. chars to glyphs
125 ;; Simple replacement of characters to glyphs is done.
127 ;; 2. glyphs reordering.
128 ;; following "\e$,4)j\e(B", "\e$,4)k\e(B", "\e$,4)l\e(B" goes to the front.
130 ;; 3. glyphs to glyphs
131 ;; reordered vowels are ligatured to consonants.
133 ;; 4. Composition.
134 ;; left modifiers will be attached at the left.
135 ;; others will be attached right.
137 (defvar tml-char-glyph
138 '(;; various signs
139 ;;("\e$,1<"\e(B" . "")
140 ("\e$,1<#\e(B" . "\e$,4*G\e(B")
141 ;; Independent Vowels
142 ("\e$,1<%\e(B" . "\e$,4*<\e(B")
143 ("\e$,1<&\e(B" . "\e$,4*=\e(B")
144 ("\e$,1<'\e(B" . "\e$,4*>\e(B")
145 ("\e$,1<(\e(B" . "\e$,4*?\e(B")
146 ("\e$,1<)\e(B" . "\e$,4*@\e(B")
147 ("\e$,1<*\e(B" . "\e$,4*A\e(B")
148 ("\e$,1<.\e(B" . "\e$,4*B\e(B")
149 ("\e$,1</\e(B" . "\e$,4*C\e(B")
150 ("\e$,1<0\e(B" . "\e$,4*D\e(B")
151 ("\e$,1<2\e(B" . "\e$,4*E\e(B")
152 ("\e$,1<3\e(B" . "\e$,4*F\e(B")
153 ("\e$,1<4\e(B" . "\e$,4*E*W\e(B")
154 ;; Consonants
155 ("\e$,1<5<m<W<m\e(B" . "\e$,4):\e(B") ; ks.
156 ("\e$,1<5<m<W\e(B" . "\e$,4*^\e(B") ; ks
157 ("\e$,1<5\e(B" . "\e$,4*H\e(B")
159 ("\e$,1<9\e(B" . "\e$,4*I\e(B")
160 ("\e$,1<:\e(B" . "\e$,4*J\e(B")
161 ("\e$,1<<\e(B" . "\e$,4*\\e(B")
162 ("\e$,1<<<m\e(B" . "\e$,4)8\e(B")
163 ("\e$,1<>\e(B" . "\e$,4*K\e(B")
164 ("\e$,1<?\e(B" . "\e$,4*L\e(B")
165 ("\e$,1<C\e(B" . "\e$,4*M\e(B")
166 ("\e$,1<D\e(B" . "\e$,4*N\e(B")
167 ("\e$,1<H\e(B" . "\e$,4*O\e(B")
168 ("\e$,1<I\e(B" . "\e$,4*Y\e(B")
169 ("\e$,1<I<m\e(B" . "\e$,4)a\e(B")
170 ("\e$,1<J\e(B" . "\e$,4*P\e(B")
171 ("\e$,1<N\e(B" . "\e$,4*Q\e(B")
172 ("\e$,1<O\e(B" . "\e$,4*R\e(B")
173 ("\e$,1<P\e(B" . "\e$,4*S\e(B")
174 ("\e$,1<Q\e(B" . "\e$,4*X\e(B")
175 ("\e$,1<R\e(B" . "\e$,4*T\e(B")
176 ("\e$,1<S\e(B" . "\e$,4*W\e(B")
177 ("\e$,1<T\e(B" . "\e$,4*V\e(B")
178 ("\e$,1<U\e(B" . "\e$,4*U\e(B")
179 ("\e$,1<W\e(B" . "\e$,4*[\e(B")
180 ("\e$,1<W<m\e(B" . "\e$,4)7\e(B")
181 ("\e$,1<W<m<P<`\e(B" . "\e$,4*_\e(B")
182 ("\e$,1<X\e(B" . "\e$,4*Z\e(B")
183 ("\e$,1<X<m\e(B" . "\e$,4)6\e(B")
184 ("\e$,1<Y\e(B" . "\e$,4*]\e(B")
185 ("\e$,1<Y<m\e(B" . "\e$,4)9\e(B")
187 ;; Dependent vowel signs
188 ("\e$,1<^\e(B" . "\e$,4)c\e(B")
189 ("\e$,1<_\e(B" . "\e$,4)d\e(B")
190 ("\e$,1<`\e(B" . "\e$,4)f\e(B")
191 ("\e$,1<a\e(B" . "\e$,4)g\e(B")
192 ("\e$,1<b\e(B" . "\e$,4)h\e(B")
193 ("\e$,1<f\e(B" . "\e$,4)j\e(B")
194 ("\e$,1<g\e(B" . "\e$,4)k\e(B")
195 ("\e$,1<h\e(B" . "\e$,4)l\e(B")
196 ("\e$,1<j\e(B" . "\e$,4)j)c\e(B")
197 ("\e$,1<k\e(B" . "\e$,4)k)c\e(B")
198 ("\e$,1<l\e(B" . "\e$,4)j*W\e(B")
200 ;; Various signs
201 ("\e$,1<m\e(B" . "\e$,4)b\e(B")
202 ("\e$,1<w\e(B" . "nil") ;; not supported?
205 (defvar tml-char-glyph-hash
206 (let* ((hash (make-hash-table :test 'equal)))
207 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
208 tml-char-glyph)
209 hash))
211 (defvar tml-char-glyph-regexp
212 (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash))
214 ;; Tamil languages needed to be reordered.
216 (defvar tml-consonants-regexp
217 "[\e$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*]\e(B]")
219 (defvar tml-glyph-reorder-key-glyphs "[\e$,4)j)k)l\e(B]")
221 (defvar tml-glyph-reordering-regexp-list
222 (cons
223 (concat "\\(" tml-consonants-regexp "\\)\\([\e$,4)j)k)l\e(B]\\)") "\\2\\1"))
225 ;; Tamil vowel modifiers to be ligatured.
226 (defvar tml-glyph-glyph
228 ("\e$,4*H)d\e(B" . "\e$,4(a\e(B") ; ki
229 ("\e$,4*^)d\e(B" . "\e$,4(v\e(B") ; ksi
230 ("\e$,4*^)f\e(B" . "\e$,4)2\e(B") ; ksi~
231 ("\e$,4*I)d\e(B" . "\e$,4(b\e(B") ; n^i
232 ("\e$,4*J)d\e(B" . "\e$,4(c\e(B") ; ci
233 ("\e$,4*K)d\e(B" . "\e$,4(d\e(B") ; n~i
234 ("\e$,4*L)d\e(B" . "\e$,4)n\e(B") ; t.i
235 ("\e$,4*M)d\e(B" . "\e$,4(e\e(B") ; n.i
236 ("\e$,4*N)d\e(B" . "\e$,4(f\e(B") ; ti
237 ("\e$,4*O)d\e(B" . "\e$,4(g\e(B") ; ni
238 ("\e$,4*P)d\e(B" . "\e$,4(h\e(B") ; pi
239 ("\e$,4*Q)d\e(B" . "\e$,4(i\e(B") ; mi
240 ("\e$,4*R)d\e(B" . "\e$,4(j\e(B") ; yi
241 ("\e$,4*S)d\e(B" . "\e$,4(k\e(B") ; ri
242 ("\e$,4*T)d\e(B" . "\e$,4(l\e(B") ; li
243 ("\e$,4*U)d\e(B" . "\e$,4(m\e(B") ; vi
244 ("\e$,4*V)d\e(B" . "\e$,4(n\e(B") ; l_i
245 ("\e$,4*W)d\e(B" . "\e$,4(o\e(B") ; l.i
246 ("\e$,4*X)d\e(B" . "\e$,4(p\e(B") ; r_i
247 ("\e$,4*Y)d\e(B" . "\e$,4(q\e(B") ; n_i
248 ("\e$,4*Z)d\e(B" . "\e$,4(r\e(B") ; si
249 ("\e$,4*[)d\e(B" . "\e$,4(s\e(B") ; s'i
250 ("\e$,4*\)d\e(B" . "\e$,4(t\e(B") ; ji
251 ("\e$,4*])d\e(B" . "\e$,4(u\e(B") ; hi
253 ("\e$,4*H)f\e(B" . "\e$,4(w\e(B") ; ki~
254 ("\e$,4*I)f\e(B" . "\e$,4(x\e(B") ; n^i~
255 ("\e$,4*J)f\e(B" . "\e$,4(y\e(B") ; ci~
256 ("\e$,4*K)f\e(B" . "\e$,4(z\e(B") ; n~i~
257 ("\e$,4*L)f\e(B" . "\e$,4)o\e(B") ; t.i~
258 ("\e$,4*M)f\e(B" . "\e$,4)!\e(B") ; n.i~
259 ("\e$,4*N)f\e(B" . "\e$,4)"\e(B") ; ti~
260 ("\e$,4*O)f\e(B" . "\e$,4)#\e(B") ; ni~
261 ("\e$,4*P)f\e(B" . "\e$,4)$\e(B") ; pi~
262 ("\e$,4*Q)f\e(B" . "\e$,4)%\e(B") ; mi~
263 ("\e$,4*R)f\e(B" . "\e$,4)&\e(B") ; yi~
264 ("\e$,4*S)f\e(B" . "\e$,4)'\e(B") ; ri~
265 ("\e$,4*T)f\e(B" . "\e$,4)(\e(B") ; li~
266 ("\e$,4*U)f\e(B" . "\e$,4))\e(B") ; vi~
267 ("\e$,4*V)f\e(B" . "\e$,4)*\e(B") ; l_i~
268 ("\e$,4*W)f\e(B" . "\e$,4)+\e(B") ; l.i~
269 ("\e$,4*X)f\e(B" . "\e$,4),\e(B") ; r_i~
270 ("\e$,4*Y)f\e(B" . "\e$,4)-\e(B") ; n_i~
271 ("\e$,4*Z)f\e(B" . "\e$,4).\e(B") ; si~
272 ("\e$,4*[)f\e(B" . "\e$,4)/\e(B") ; s'i~
273 ("\e$,4*\)f\e(B" . "\e$,4)0\e(B") ; ji~
274 ("\e$,4*])f\e(B" . "\e$,4)1\e(B") ; hi~
276 ("\e$,4*H)g\e(B" . "\e$,4)p\e(B") ; ku
277 ("\e$,4*I)g\e(B" . "\e$,4)q\e(B") ; n^u
278 ("\e$,4*J)g\e(B" . "\e$,4)r\e(B") ; cu
279 ("\e$,4*K)g\e(B" . "\e$,4)s\e(B") ; n~u
280 ("\e$,4*L)g\e(B" . "\e$,4)t\e(B") ; t.u
281 ("\e$,4*M)g\e(B" . "\e$,4)u\e(B") ; n.u
282 ("\e$,4*N)g\e(B" . "\e$,4)v\e(B") ; tu
283 ("\e$,4*O)g\e(B" . "\e$,4)x\e(B") ; nu
284 ("\e$,4*P)g\e(B" . "\e$,4)y\e(B") ; pu
285 ("\e$,4*Q)g\e(B" . "\e$,4)z\e(B") ; mu
286 ("\e$,4*R)g\e(B" . "\e$,4){\e(B") ; yu
287 ("\e$,4*S)g\e(B" . "\e$,4)|\e(B") ; ru
288 ("\e$,4*T)g\e(B" . "\e$,4)}\e(B") ; lu
289 ("\e$,4*U)g\e(B" . "\e$,4)~\e(B") ; vu
290 ("\e$,4*V)g\e(B" . "\e$,4)\x7f\e(B") ; l_u
291 ("\e$,4*W)g\e(B" . "\e$,4* \e(B") ; l.u
292 ("\e$,4*X)g\e(B" . "\e$,4*!\e(B") ; r_u
293 ("\e$,4*Y)g\e(B" . "\e$,4*"\e(B") ; n_u
295 ("\e$,4*H)h\e(B" . "\e$,4*#\e(B") ; ku~
296 ("\e$,4*I)h\e(B" . "\e$,4*$\e(B") ; n^u~
297 ("\e$,4*J)h\e(B" . "\e$,4*%\e(B") ; cu~
298 ("\e$,4*K)h\e(B" . "\e$,4*&\e(B") ; n~u~
299 ("\e$,4*L)h\e(B" . "\e$,4*'\e(B") ; t.u~
300 ("\e$,4*M)h\e(B" . "\e$,4*(\e(B") ; n.u~
301 ("\e$,4*N)h\e(B" . "\e$,4*)\e(B") ; tu~
302 ("\e$,4*O)h\e(B" . "\e$,4*+\e(B") ; nu~
303 ("\e$,4*P)h\e(B" . "\e$,4*,\e(B") ; pu~
304 ("\e$,4*Q)h\e(B" . "\e$,4*-\e(B") ; mu~
305 ("\e$,4*R)h\e(B" . "\e$,4*.\e(B") ; yu~
306 ("\e$,4*S)h\e(B" . "\e$,4*/\e(B") ; ru~
307 ("\e$,4*T)h\e(B" . "\e$,4*6\e(B") ; lu~
308 ("\e$,4*U)h\e(B" . "\e$,4*7\e(B") ; vu~
309 ("\e$,4*V)h\e(B" . "\e$,4*8\e(B") ; l_u~
310 ("\e$,4*W)h\e(B" . "\e$,4*9\e(B") ; l.u~
311 ("\e$,4*X)h\e(B" . "\e$,4*:\e(B") ; r_u~
312 ("\e$,4*Y)h\e(B" . "\e$,4*;\e(B") ; n_u~
315 (defvar tml-glyph-glyph-hash
316 (let* ((hash (make-hash-table :test 'equal)))
317 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
318 tml-glyph-glyph)
319 hash))
321 (defvar tml-glyph-glyph-regexp
322 (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash))
324 (defun tamil-compose-syllable-string (string)
325 (with-temp-buffer
326 (insert (decompose-string string))
327 (tamil-compose-syllable-region (point-min) (point-max))
328 (buffer-string)))
330 (defun tamil-compose-syllable-region (from to)
331 "Compose tamil syllable in region FROM to TO."
332 (let (glyph-str match-str glyph-reorder-regexps)
333 (save-excursion
334 (save-restriction
335 (narrow-to-region from to)
336 (goto-char (point-min))
337 ;; char-glyph-conversion
338 (while (re-search-forward tml-char-glyph-regexp nil t)
339 (setq match-str (match-string 0))
340 (setq glyph-str
341 (concat glyph-str (gethash match-str tml-char-glyph-hash))))
342 ;; glyph reordering
343 (when (string-match tml-glyph-reorder-key-glyphs glyph-str)
344 (if (string-match (car tml-glyph-reordering-regexp-list)
345 glyph-str)
346 (setq glyph-str
347 (replace-match (cdr tml-glyph-reordering-regexp-list)
348 nil nil glyph-str))))
349 ;; glyph-glyph-conversion
350 (when (string-match tml-glyph-glyph-regexp glyph-str)
351 (setq match-str (match-string 0 glyph-str))
352 (setq glyph-str
353 (replace-match (gethash match-str tml-glyph-glyph-hash)
354 nil nil glyph-str)))
355 ;; concatenate and attach reference-points.
356 (setq glyph-str
357 (cdr
358 (apply
359 'nconc
360 (mapcar
361 (function
362 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
363 glyph-str))))
364 (compose-region from to glyph-str)))))
366 (provide 'tml-util)
368 ;;; arch-tag: 4d1c9737-e7b1-44cf-a040-4f64c50e773e
369 ;;; tml-util.el ends here