Add emacs-xtra.
[emacs.git] / lisp / language / mlm-util.el
blob27910714e26069f6b7599fea31ae10f275c8a965
1 ;;; mlm-util.el --- support for composing malayalam characters -*-coding: iso-2022-7bit;-*-
3 ;; Copyright (C) 2003 Free Software Foundation, Inc.
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6 ;; Keywords: multilingual, Malayalam
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
25 ;; Created: Feb. 11. 2003
27 ;;; Commentary:
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Malayalam script characters.
32 ;;; Code:
34 ;; Malayalam Composable Pattern
35 ;; C .. Consonants
36 ;; V .. Vowel
37 ;; H .. Halant
38 ;; M .. Matra
39 ;; V .. Vowel
40 ;; A .. Anuswar
41 ;; D .. Chandrabindu
42 ;; (N .. Zerowidth Non Joiner)
43 ;; (J .. Zerowidth Joiner. )
44 ;; 1. vowel
45 ;; V(A|visargam)?
46 ;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya)
47 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
49 (defconst malayalam-consonant
50 "[\e$,1@5\e(B-\e$,1@Y\e(B]")
52 (defconst malayalam-composable-pattern
53 (concat
54 "\\([\e$,1@%\e(B-\e$,1@4\e(B][\e$,1@"\e(B]?\\)\\|\e$,1@#\e(B"
55 "\\|\\("
56 "\\(?:\\(?:[\e$,1@5\e(B-\e$,1@Y\e(B]\e$,1@m\e(B\\)?\\(?:[\e$,1@5\e(B-\e$,1@Y\e(B]\e$,1@m\e(B\\)?\\(?:[\e$,1@5\e(B-\e$,1@Y\e(B]\e$,1@m\e(B\\)?[\e$,1@5\e(B-\e$,1@Y\e(B]\e$,1@m\e(B\\)?"
57 "[\e$,1@5\e(B-\e$,1@Y\e(B]\\(?:\e$,1@m\e(B\\|[\e$,1@^\e(B-\e$,1@c@f@g@h@j@j@k@l\e(B]?[\e$,1@"@m\e(B]?\\)?"
58 "\\)")
59 "Regexp matching a composable sequence of Malayalam characters.")
61 ;;;###autoload
62 (defun malayalam-compose-region (from to)
63 (interactive "r")
64 (save-excursion
65 (save-restriction
66 (narrow-to-region from to)
67 (goto-char (point-min))
68 (while (re-search-forward malayalam-composable-pattern nil t)
69 (malayalam-compose-syllable-region (match-beginning 0)
70 (match-end 0))))))
71 (defun malayalam-compose-string (string)
72 (with-temp-buffer
73 (insert (decompose-string string))
74 (malayalam-compose-region (point-min) (point-max))
75 (buffer-string)))
77 ;;;###autoload
78 (defun malayalam-post-read-conversion (len)
79 (save-excursion
80 (save-restriction
81 (let ((buffer-modified-p (buffer-modified-p)))
82 (narrow-to-region (point) (+ (point) len))
83 (malayalam-compose-region (point-min) (point-max))
84 (set-buffer-modified-p buffer-modified-p)
85 (- (point-max) (point-min))))))
87 (defun malayalam-range (from to)
88 "Make the list of the integers of range FROM to TO."
89 (let (result)
90 (while (<= from to) (setq result (cons to result) to (1- to))) result))
92 (defun malayalam-regexp-of-hashtbl-keys (hashtbl)
93 "Return a regular expression that matches all keys in hashtable HASHTBL."
94 (let ((max-specpdl-size 1000))
95 (regexp-opt
96 (sort
97 (let (dummy)
98 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
99 dummy)
100 (function (lambda (x y) (> (length x) (length y))))))))
103 ;;;###autoload
104 (defun malayalam-composition-function (from to pattern &optional string)
105 "Compose Malayalam characters in REGION, or STRING if specified.
106 Assume that the REGION or STRING must fully match the composable
107 PATTERN regexp."
108 (if string (malayalam-compose-syllable-string string)
109 (malayalam-compose-syllable-region from to))
110 (- to from))
112 ;; Register a function to compose Malayalam characters.
113 (mapc
114 (function (lambda (ucs)
115 (aset composition-function-table (decode-char 'ucs ucs)
116 (list (cons malayalam-composable-pattern
117 'malayalam-composition-function)))))
118 (nconc '(#x0d02 #x0d03) (malayalam-range #x0d05 #x0d39)))
120 ;; Notes on conversion steps.
122 ;; 1. chars to glyphs
124 ;; Simple replacement of characters to glyphs is done.
126 ;; 2. glyphs reordering.
128 ;; Two special reordering rule takes place.
129 ;; a. following "\e$,46[\e(B" goes to the front.
130 ;; b. following "\e$,46S6S\e(B", "\e$,46S\e(B" or "\e$,46T\e(B" goes to the front.
131 ;; This reordering occurs only to the last cluster of consonants.
132 ;; Preceding consonants with halant characters are not affected.
134 ;; 3. Composition.
136 ;; left modifiers will be attached at the left.
137 ;; others will be attached right.
139 (defvar mlm-char-glyph
140 '(;; various signs
141 ("\e$,1@"\e(B" . "\e$,46W\e(B")
142 ("\e$,1@#\e(B" . "\e$,46X\e(B")
143 ;; Independent Vowels
144 ("\e$,1@%\e(B" . "\e$,46!\e(B")
145 ("\e$,1@&\e(B" . "\e$,46"\e(B")
146 ("\e$,1@'\e(B" . "\e$,46#\e(B")
147 ("\e$,1@(\e(B" . "\e$,46#6U\e(B")
148 ("\e$,1@)\e(B" . "\e$,46$\e(B")
149 ("\e$,1@*\e(B" . "\e$,46$6U\e(B")
150 ("\e$,1@+\e(B" . "\e$,46%\e(B")
151 ("\e$,1@,\e(B" . "nil") ;; not in present use, not supported.
152 ("\e$,1@.\e(B" . "\e$,46&\e(B")
153 ("\e$,1@/\e(B" . "\e$,46'\e(B")
154 ("\e$,1@0\e(B" . "\e$,46S6&\e(B")
155 ("\e$,1@2\e(B" . "\e$,46(\e(B")
156 ("\e$,1@3\e(B" . "\e$,46(6M\e(B")
157 ("\e$,1@4\e(B" . "\e$,46(6U\e(B")
158 ;; Consonants
159 ("\e$,1@5\e(B" . "\e$,46)\e(B")
160 ("\e$,1@5@m@5\e(B" . "\e$,47!\e(B")
161 ("\e$,1@5@m@S\e(B" . "\e$,47"\e(B")
162 ("\e$,1@5@m@W\e(B" . "\e$,47#\e(B")
163 ("\e$,1@5@m@?\e(B" . "\e$,47N\e(B")
164 ("\e$,1@5@m@D\e(B" . "\e$,47`\e(B")
165 ("\e$,1@5@a\e(B" . "\e$,47f\e(B")
166 ("\e$,1@5@m@5@a\e(B" . "\e$,47g\e(B")
167 ("\e$,1@5@a\e(B" . "\e$,47f\e(B")
168 ("\e$,1@5@m@5@a\e(B" . "\e$,47g\e(B")
170 ("\e$,1@6\e(B" . "\e$,46*\e(B")
172 ("\e$,1@7\e(B" . "\e$,46+\e(B")
173 ("\e$,1@7@m@7\e(B" . "\e$,47$\e(B")
174 ("\e$,1@7@m@R\e(B" . "\e$,47%\e(B")
175 ("\e$,1@7@m@N\e(B" . "\e$,47\\e(B")
176 ("\e$,1@7@m@H\e(B" . "\e$,47a\e(B")
178 ("\e$,1@8\e(B" . "\e$,46,\e(B")
180 ("\e$,1@9\e(B" . "\e$,46-\e(B")
181 ("\e$,1@9@m@5\e(B" . "\e$,47&\e(B")
182 ("\e$,1@9@m@9\e(B" . "\e$,47'\e(B")
183 ("\e$,1@9@m@5@a\e(B" . "\e$,47h\e(B")
185 ("\e$,1@:\e(B" . "\e$,46.\e(B")
186 ("\e$,1@:@m@:\e(B" . "\e$,47(\e(B") ;; duplicate
187 ("\e$,1@:@m@;\e(B" . "\e$,47Q\e(B")
189 ("\e$,1@;\e(B" . "\e$,46/\e(B")
191 ("\e$,1@<\e(B" . "\e$,460\e(B")
192 ("\e$,1@<@m@<\e(B" . "\e$,47V\e(B")
193 ("\e$,1@<@m@>\e(B" . "\e$,47Z\e(B")
195 ("\e$,1@=\e(B" . "\e$,461\e(B")
197 ("\e$,1@>\e(B" . "\e$,462\e(B")
198 ("\e$,1@>@m@:\e(B" . "\e$,47)\e(B")
199 ("\e$,1@>@m@>\e(B" . "\e$,47*\e(B")
201 ("\e$,1@?\e(B" . "\e$,463\e(B")
202 ("\e$,1@?@m@?\e(B" . "\e$,47+\e(B")
204 ("\e$,1@@\e(B" . "\e$,464\e(B")
205 ("\e$,1@A\e(B" . "\e$,465\e(B")
206 ("\e$,1@A@m@A\e(B" . "\e$,47M\e(B")
207 ("\e$,1@B\e(B" . "\e$,466\e(B")
209 ("\e$,1@C\e(B" . "\e$,467\e(B")
210 ("\e$,1@C@a@m\e(B" . "\e$,47,\e(B") ;; half consonant
211 ("\e$,1@C@m@?\e(B" . "\e$,47-\e(B")
212 ("\e$,1@C@m@C\e(B" . "\e$,47.\e(B")
213 ("\e$,1@C@m@N\e(B" . "\e$,47W\e(B")
214 ("\e$,1@C@m@A\e(B" . "\e$,47^\e(B")
215 ("\e$,1@C@a\e(B" . "\e$,47i\e(B")
217 ("\e$,1@D\e(B" . "\e$,468\e(B")
218 ("\e$,1@D@m@D\e(B" . "\e$,47/\e(B")
219 ("\e$,1@D@m@E\e(B" . "\e$,470\e(B")
220 ("\e$,1@D@m@X\e(B" . "\e$,47U\e(B")
221 ("\e$,1@D@m@M\e(B" . "\e$,47[\e(B")
222 ("\e$,1@D@m@N\e(B" . "\e$,47_\e(B")
224 ("\e$,1@E\e(B" . "\e$,469\e(B")
226 ("\e$,1@F\e(B" . "\e$,46:\e(B")
227 ("\e$,1@F@m@F\e(B" . "\e$,471\e(B")
228 ("\e$,1@F@m@G\e(B" . "\e$,472\e(B")
230 ("\e$,1@G\e(B" . "\e$,46;\e(B")
232 ("\e$,1@H\e(B" . "\e$,46<\e(B")
233 ("\e$,1@H@a@m\e(B" . "\e$,473\e(B") ;; half consonant
234 ("\e$,1@H@m@D\e(B" . "\e$,474\e(B")
235 ("\e$,1@H@m@F\e(B" . "\e$,475\e(B")
236 ("\e$,1@H@m@H\e(B" . "\e$,476\e(B")
237 ("\e$,1@H@m@N\e(B" . "\e$,477\e(B")
238 ("\e$,1@H@m@G\e(B" . "\e$,47T\e(B")
239 ("\e$,1@H@m@E\e(B" . "\e$,47Y\e(B")
240 ("\e$,1@H@m@Q\e(B" . "\e$,47b\e(B")
241 ("\e$,1@H@a\e(B" . "\e$,47k\e(B")
242 ("\e$,1@H@m@H@a\e(B" . "\e$,47l\e(B")
244 ("\e$,1@J\e(B" . "\e$,46=\e(B")
245 ("\e$,1@J@m@J\e(B" . "\e$,478\e(B") ;; duplicate
246 ("\e$,1@J@m@R\e(B" . "\e$,479\e(B") ;; lakar
248 ("\e$,1@K\e(B" . "\e$,46>\e(B")
250 ("\e$,1@L\e(B" . "\e$,46?\e(B")
251 ("\e$,1@L@m@L\e(B" . "\e$,47:\e(B") ;; duplicate
252 ("\e$,1@L@m@R\e(B" . "\e$,47;\e(B") ;; lakar
253 ("\e$,1@L@m@G\e(B" . "\e$,47O\e(B")
254 ("\e$,1@L@m@F\e(B" . "\e$,47P\e(B")
256 ("\e$,1@M\e(B" . "\e$,46@\e(B")
258 ("\e$,1@N\e(B" . "\e$,46A\e(B")
259 ("\e$,1@N@m@J\e(B" . "\e$,47<\e(B")
260 ("\e$,1@N@m@N\e(B" . "\e$,47=\e(B")
261 ("\e$,1@N@m@R\e(B" . "\e$,47>\e(B") ;; lakar
263 ("\e$,1@O\e(B" . "\e$,46B\e(B")
264 ("\e$,1@O@m@O\e(B" . "\e$,47?\e(B") ;; duplicate
265 ("\e$,1@O@m@5@m@5\e(B" . "\e$,47m\e(B")
267 ("\e$,1@P\e(B" . "\e$,46C\e(B")
268 ("\e$,1@P@a@m\e(B" . "\e$,47@\e(B")
269 ("\e$,1@P@a\e(B" . "\e$,47j\e(B")
271 ("\e$,1@Q\e(B" . "\e$,46D\e(B")
272 ("\e$,1@Q@m\e(B" . "\e$,47@\e(B") ;; same glyph as "\e$,1@P@m\e(B"
273 ("\e$,1@Q@a@m\e(B" . "\e$,47@\e(B") ;; same glyph as "\e$,1@P@m\e(B"
274 ;;("\e$,1@Q@m@Q\e(B" . "\e$,47A\e(B")
275 ("\e$,1@Q@m@Q\e(B" . "\e$,47d\e(B")
277 ("\e$,1@R\e(B" . "\e$,46E\e(B")
278 ("\e$,1@R@a@m\e(B" . "\e$,47B\e(B")
279 ("\e$,1@R@m@R\e(B" . "\e$,47C\e(B") ;; lakar
280 ("\e$,1@R@m@J\e(B" . "\e$,47e\e(B")
282 ("\e$,1@S\e(B" . "\e$,46F\e(B")
283 ("\e$,1@S@a@m\e(B" . "\e$,47D\e(B")
284 ("\e$,1@S@m@S\e(B" . "\e$,47E\e(B")
286 ("\e$,1@T\e(B" . "\e$,46G\e(B")
288 ("\e$,1@U\e(B" . "\e$,46H\e(B")
289 ("\e$,1@U@m@U\e(B" . "\e$,47F\e(B")
291 ("\e$,1@V\e(B" . "\e$,46I\e(B")
292 ("\e$,1@V@m@R\e(B" . "\e$,47G\e(B")
293 ("\e$,1@V@m@V\e(B" . "\e$,47H\e(B")
294 ("\e$,1@V@m@:\e(B" . "\e$,47]\e(B")
296 ("\e$,1@W\e(B" . "\e$,46J\e(B")
297 ("\e$,1@W@m@?\e(B" . "\e$,47c\e(B")
299 ("\e$,1@X\e(B" . "\e$,46K\e(B")
300 ("\e$,1@X@m@R\e(B" . "\e$,47I\e(B")
301 ("\e$,1@X@m@X\e(B" . "\e$,47J\e(B")
302 ("\e$,1@X@m@Q@m@Q\e(B" . "\e$,47L\e(B")
303 ("\e$,1@X@m@E\e(B" . "\e$,47X\e(B")
305 ("\e$,1@Y\e(B" . "\e$,46L\e(B")
306 ("\e$,1@Y@m@R\e(B" . "\e$,47K\e(B")
307 ("\e$,1@Y@m@N\e(B" . "\e$,47R\e(B")
308 ("\e$,1@Y@m@H\e(B" . "\e$,47S\e(B")
310 ;; Dependent vowel signs
311 ("\e$,1@^\e(B" . "\e$,46M\e(B")
312 ("\e$,1@_\e(B" . "\e$,46N\e(B")
313 ("\e$,1@`\e(B" . "\e$,46O\e(B")
314 ("\e$,1@a\e(B" . "\e$,46P\e(B")
315 ("\e$,1@b\e(B" . "\e$,46Q\e(B")
316 ("\e$,1@c\e(B" . "\e$,46R\e(B")
317 ("\e$,1@f\e(B" . "\e$,46S\e(B")
318 ("\e$,1@g\e(B" . "\e$,46T\e(B")
319 ("\e$,1@h\e(B" . "\e$,46S6S\e(B")
320 ("\e$,1@j\e(B" . "\e$,46S6M\e(B")
321 ("\e$,1@k\e(B" . "\e$,46T6M\e(B")
322 ("\e$,1@l\e(B" . "\e$,46U\e(B")
323 ;; Various signs
324 ("\e$,1@m\e(B" . "\e$,46V\e(B")
325 ("\e$,1@m@O\e(B" . "\e$,46Y\e(B") ;; yakar
326 ("\e$,1@m@O@a\e(B" . "\e$,46\\e(B") ;; yakar + u
327 ("\e$,1@m@O@b\e(B" . "\e$,46]\e(B") ;; yakar + uu
328 ("\e$,1@m@U\e(B" . "\e$,46Z\e(B") ;; vakar modifier
329 ("\e$,1@m@P\e(B" . "\e$,46[\e(B") ;; rakar modifier is the same to rra modifier.
330 ("\e$,1@m@P@m\e(B" . "\e$,46R\e(B") ;; halant + rakar + halant
331 ("\e$,1@m@Q\e(B" . "\e$,46[\e(B") ;; rrakar modifier
332 ("\e$,1@m@Q@m\e(B" . "\e$,46R\e(B") ;; halant + rrakar + halant
333 ("\e$,1@m@m\e(B" . "\e$,46V\e(B") ;; double omission sign to stop forming half consonant.
334 ("\e$,1@w\e(B" . "\e$,46U\e(B") ;; not in present use, already at 0D4C.
337 (defvar mlm-char-glyph-hash
338 (let* ((hash (make-hash-table :test 'equal)))
339 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
340 mlm-char-glyph)
341 hash))
343 (defvar mlm-char-glyph-regexp
344 (malayalam-regexp-of-hashtbl-keys mlm-char-glyph-hash))
346 ;; Malayalam languages needed to be reordered in a complex mannar.
348 (defvar mlm-consonants
349 (concat
350 "\e$,46)6*6+6,6-6.6/606162636465666768696:6;6<6=6>6?6@6A6B6C6D6E6F6G6H6I6J6K6L\e(B"
351 "\e$,47!7"7#7$7%7&7'7(7)7*7+7,7-7.7/707172737475767778797:7;7<7=7>7?7@7A7B7C7D7E7F7G7H7I7J7K7L7M7N7O7P7Q7R7S7T7U7V7W7X7Y7Z7[7\7]7^7_7`7a7b7c7d7e\e(B"
354 (defvar mlm-consonants-regexp
355 (concat "\\(\e$,46[\e(B?[" mlm-consonants "][\e$,46Y6Z\e(B]?\\)"))
357 (defvar mlm-glyph-reorder-key-glyphs "[\e$,46[6S6T\e(B]")
359 (defvar mlm-glyph-reordering-regexp-list
360 `((,(concat "\\([" mlm-consonants "][\e$,46Y6Z\e(B]?\\)\e$,46[\e(B") . "\e$,46[\e(B\\1")
361 (,(concat mlm-consonants-regexp "\e$,46S6S\e(B") . "\e$,46S6S\e(B\\1")
362 (,(concat mlm-consonants-regexp "\e$,46S\e(B") . "\e$,46S\e(B\\1")
363 (,(concat mlm-consonants-regexp "\e$,46T\e(B") . "\e$,46T\e(B\\1")))
365 (defun malayalam-compose-syllable-string (string)
366 (with-temp-buffer
367 (insert (decompose-string string))
368 (malayalam-compose-syllable-region (point-min) (point-max))
369 (buffer-string)))
371 (defun malayalam-compose-syllable-region (from to)
372 "Compose malayalam syllable in region FROM to TO."
373 (let (glyph-str
374 match-str
375 glyph-reorder-regexps
376 glyph-reorder-replace
377 glyph-reorder-regexp)
378 (save-excursion
379 (save-restriction
380 (narrow-to-region from to)
381 (goto-char (point-min))
382 ;; char-glyph-conversion
383 (while (re-search-forward mlm-char-glyph-regexp nil t)
384 (setq match-str (match-string 0))
385 (setq glyph-str
386 (concat glyph-str (gethash match-str mlm-char-glyph-hash))))
387 (when (string-match mlm-glyph-reorder-key-glyphs glyph-str)
388 ;; glyph reordering
389 (setq glyph-reorder-regexps mlm-glyph-reordering-regexp-list)
390 (while glyph-reorder-regexps
391 (setq glyph-reorder-regexp (caar glyph-reorder-regexps))
392 (setq glyph-reorder-replace (cdar glyph-reorder-regexps))
393 (setq glyph-reorder-regexps (cdr glyph-reorder-regexps))
394 (if (string-match glyph-reorder-regexp glyph-str)
395 (setq glyph-str
396 (replace-match glyph-reorder-replace nil nil
397 glyph-str)))))
398 ;; concatenate and attach reference-points.
399 (setq glyph-str
400 (cdr
401 (apply
402 'nconc
403 (mapcar
404 (function
405 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
406 glyph-str))))
407 (compose-region from to glyph-str)))))
409 (provide 'mlm-util)
411 ;;; arch-tag: 7f25ee67-8f9d-49f2-837b-35c412c00eba
412 ;;; devan-util.el ends here