1 ;;; devan-util.el --- Support for Devanagari Script Composition
3 ;; Copyright (C) 1996 Free Software Foundation, Inc.
5 ;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
7 ;; Keywords: multilingual, Indian, Devanagari
9 ;; This file is part of GNU Emacs.
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
29 ;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
30 ;; 1997.3.24 fixed some bugs.
33 ;; Decompose the input characters and process them on the character basis.
35 ;; Devanagari script composition rules and related programs.
40 ;;; Steps toward composition of Devanagari Characters.
46 (defun indian-to-devanagari (char)
47 "Convert IS 13194 character CHAR to Devanagari basic characters.
48 If CHAR is not IS 13194, return CHAR as is."
49 (let ((charcodes (split-char char
)))
50 (if (eq (car charcodes
) 'indian-is13194
)
51 (make-char 'indian-2-column ?
\x21 (nth 1 charcodes
))
55 (defun devanagari-to-indian (char)
56 "Convert Devanagari basic character CHAR to IS 13194 characters.
57 If CHAR is not Devanagari basic character, return CHAR as is."
58 (let ((charcodes (split-char char
)))
59 (if (and (eq (car charcodes
) 'indian-2-column
)
60 (= (nth 1 charcodes
) ?
\x21))
61 (make-char 'indian-is13194
(nth 2 charcodes
))
65 (defun indian-to-devanagari-region (from to
)
66 "Convert IS 13194 characters in region to Devanagari basic characters.
67 When called from a program, expects two arguments,
68 positions (integers or markers) specifying the region."
73 (let ((char (following-char)))
74 (if (eq (char-charset char
) 'indian-is13194
)
77 (insert (indian-to-devanagari char
)))
81 (defun devanagari-to-indian-region (from to
)
82 "Convert Devanagari basic characters in region to Indian characters.
83 When called from a program, expects two arguments,
84 positions (integers or markers) specifying the region."
89 (let ((char (following-char)))
90 (if (eq (char-charset char
) 'indian-2-column
)
93 (insert (devanagari-to-indian char
)))
97 (defun indian-to-devanagari-string (string)
98 "Convert Indian characters in STRING to Devanagari Basic characters."
99 (let* ((len (length string
))
101 (vec (make-vector len
0)))
103 (aset vec i
(indian-to-devanagari (aref string i
)))
107 ;; Phase 0 - Determine whether the characters can be composed.
110 ;;; Regular expressions to split characters for composition.
113 ;; Indian script word contains one or more syllables.
114 ;; In BNF, it can be expressed as follows:
116 ;; Word ::= {Syllable} [Cons-Syllable]
117 ;; Syllable ::= Cons-Vowel-Syllable | Vowel-Syllable
118 ;; Vowel-Syllable ::= V[D]
119 ;; Cons-Vowel-Syllable ::= [Cons-Syllable] Full-Cons [M] [D]
120 ;; Cons-Syllable ::= [Pure-Cons] [Pure-Cons] [Pure-Cons] Pure-Cons
121 ;; Pure-Cons ::= Full-Cons H
122 ;; Full-Cons ::= C [N]
124 ;; {} repeat, [] optional
126 ;; C - Consonant (\e$(5!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A!B!C!D!E\e(B
127 ;; \e$(5!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X\e(B)
128 ;; N - Nukta (\e$(5!i\e(B)
129 ;; H - Halant(\e$(5!h\e(B) or Virama
130 ;; V - Vowel (\e$(5!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2#&#'#*\e(B)
131 ;; ("\e$(5#&#'#*\e(B" can be obtained by IS13194 vowels with nukta.)
132 ;; D - Vowel Modifiers, i.e. Anuswar, Chandrabindu (\e$(5!!!"\e(B)
133 ;; (Visaraga (\e$(5!#\e(B) is excluded.)
134 ;; M - Matra (\e$(5!Z![!\!]!^!_!`!a!b!c!d!e!f!g#K#L#M\e(B)
135 ;; ("\e$(5#K#L#M\e(B" can be obtained by IS13194 matras with nukta.)
137 ;; In Emacs, one syllable of Indian language is considered to be one
138 ;; composite glyph. If we expand the above expression for
139 ;; cons-vowel-syllable, it would be:
141 ;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] [D]
143 ;; Therefore, in worst case, the one syllable may contain
144 ;; following characters.
146 ;; C N H C N H C N H C N H C N M D
148 ;; The example is a sanskrit word "kArtsnya", where five consecutive
149 ;; consonants appear.
151 ;; On the other hand, consonant-syllable, which appears at the end of
152 ;; the word, would have the following expression:
154 ;; [C [N] H] [C [N] H] [C [N] H] C [N] H
156 ;; This is acceptable BEFORE proper consonant-syllable is input. The
157 ;; string which doesn't match with the above expression is invalid and
158 ;; thus must be fixed.
161 ;; Third case can be considered, which is an acceptable syllable and can
162 ;; not add any code more.
164 ;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] D
166 ;; However, to make editing possible even in this condition, we will
167 ;; not consider about this case.
170 ;; Currently, it seems that the only following consonants would have
171 ;; Nukta sign attatched.
172 ;; (\e$(5!3!4!5!:!?!@!I\e(B)
173 ;; Therefore, [\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B? can be re-written as
174 ;; \\([\e$(5!3!4!5!:!?!@!I\e(B]\e$(5!i\e(B\\)\\|[\e$(5!3\e(B-\e$(5!X\e(B]
176 (defconst devanagari-full-cons
177 "\\(\\([\e$(5!3!4!5!:!?!@!I\e(B]\e$(5!i\e(B\\)\\|[\e$(5!3\e(B-\e$(5!X$.$E"%
\e(B]\\)"
178 "Devanagari full consonant
")
180 (defconst devanagari-pure-cons
181 (concat "\\(" devanagari-full-cons "\e$
(5!h
\e(B\\)")
182 "Devanagari pure consonant
")
184 (defconst devanagari-matra
185 "\\(\\([\e$
(5!_
![!\
\e(B]\e$
(5!i
\e(B\\)\\|
[\e$
(5!Z
\e(B-\e$
(5!g
#K
#L
#M
\e(B]\\)"
186 "Devanagari Matra Signs.
'\e$
(5#K
#L
#M
\e(B' can also be created from the combination
187 of
'\e$
(5!_
![!\
\e(B' and nukta sign.
")
189 (defconst devanagari-vowel
190 "\\(\\([\e$
(5!*!&!'\e(B]\e$
(5!i
\e(B\\)\\|
[\e$
(5!$
\e(B-\e$
(5!2#&#'#*\e(B]\\)"
191 "Devanagari Vowels.
'\e$
(5#&#'#*\e(B' can also be created from the combination
192 of
'\e$
(5!*!&!'\e(B' and nukta sign.
")
194 (defconst devanagari-vowel-syllable
195 (concat devanagari-vowel "[\e$
(5!!!"\e(B]?")
196 "Devanagari vowel syllable.")
198 (defconst devanagari-cons-syllable
199 (concat devanagari-pure-cons
"?" devanagari-pure-cons
"?"
200 devanagari-pure-cons
"?" devanagari-pure-cons
"$")
201 "Devanagari consonant syllable")
203 (defconst devanagari-cons-vowel-syllable
205 devanagari-pure-cons
"?" devanagari-pure-cons
"?"
206 devanagari-pure-cons
"?" devanagari-pure-cons
"\\)?"
207 devanagari-full-cons devanagari-matra
"?[\e$(5!!!"\e(B]?
")
208 "Devanagari consonant vowel syllable.
")
211 ;; Also, digits and virams should be processed other than syllables.
213 ;; In IS 13194, Avagrah is obtained by Nukta after Viram, and
214 ;; OM is obtained by Nukta after Chandrabindu
217 (defconst devanagari-digit-viram-visarga
218 "[\e$
(5!q
\e(B-\e$
(5!z
!j
!#\e(B]")
220 (defconst devanagari-other-sign
221 "\\([\e$
(5!!!j
\e(B]\e$
(5!i
\e(B\\)\\|
\\([\e$
(5#!#J
\e(B]\\)")
223 (defconst devanagari-composite-glyph-unit
224 (concat "\\(" devanagari-cons-syllable
225 "\\)\\|
\\(" devanagari-vowel-syllable
226 "\\)\\|
\\(" devanagari-cons-vowel-syllable
227 "\\)\\|
\\(" devanagari-other-sign
228 "\\)\\|
\\(" devanagari-digit-viram-visarga "\\)")
229 "Regexp matching to Devanagari string to be composed form one glyph.
")
231 ;;(put-charset-property charset-devanagari-1-column
232 ;; 'char-to-glyph 'devanagari-compose-string)
233 ;;(put-charset-property charset-devanagari-2-column
234 ;; 'char-to-glyph 'devanagari-compose-string)
238 ;;(string-match devanagari-cons-vowel-syllable-examine "\e$
(5!X
![\e(B") => 0
239 ;;(string-match devanagari-cons-vowel-syllable-examine "\e$
(5!F
!h
!D
!\
\e(B") => 0
240 ;;(string-match devanagari-cons-vowel-syllable-examine "\e$
(5!X
![!F
!h
!D
!\
\e(B") => 0
243 ;; Steps toward the composition
244 ;; Converting Character Codes to Composite Glyph.
246 ;; Example : \e$(5!X![\e(B/\e$(5!F!h!D!\\e(B
248 ;; First, convert Characters to appropriate glyphs.
250 ;; => \e$(5!X![\e(B/\e$(5"F
!D
!\
\e(B
252 ;; Then, determine the base glyph, apply-orders and apply-rules.
254 ;; => \e$(5!X\e(B (ml.mr) \e$(5![\e(B / \e$(5!D\e(B (ml.mr) \e$(5"F\e(B (mr ml) \e$(5!\\e(B
256 ;; Finally, convert 2-column glyphs to 1-column glyph
257 ;; if such a glyph exist.
259 ;; => \e$(6!X\e(B (ml.mr) \e$(6![\e(B / \e$(6!D\e(B (ml.mr) \e$(6"F\e(B (mr ml) \e$(6!\\e(B
261 ;; Compose the glyph.
263 ;; => \e4\e$(6!Xt%![\e0!X![\e1\e(B/\e4\e$(6!Dt%"Fv#!\\e0!D"F!\\e1\e(B
264 ;; => \e4\e$(6!Xt%![\e0!X![\e1\e4!Dt%"Fv#!\\e0!D"F!\\e1\e(B
268 ;; Phase 1: Converting Character Code to Glyph Code.
272 ;; There may be many rules that you many want to suppress.
273 ;; In that case, please comment out that rule.
275 ;; RULES WILL BE EVALUATED FROM FIRST TO LAST.
276 ;; PUT MORE SPECIFIC RULES FIRST.
279 ;; Prepare multiple specific list of rules for each languages
280 ;; that adopt Devanagari script.
283 (defconst devanagari-char-to-glyph-rules
286 ;; `r' at the top of syllable and followed by other consonants.
287 ;; ("[^\e$(5!h\e(B]\\(\e$(5!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" "\e$(5"p\e(B")
288 ("^\\(\e$(5!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" "\e$(5"p
\e(B")
291 ("\\(\e$
(5!3!h
!B
!h
!O
!h
!M
\e(B\\)" "\e$
(5$
!\e(B" sanskrit)
292 ("\\(\e$
(5!3!h
!B
!h
!T
\e(B\\)" "\e$
(5$
"\e(B" sanskrit
)
293 ("\\(\e$(5!3!h!B!h!M\e(B\\)" "\e$(5$#\e(B" sanskrit
)
294 ("\\(\e$(5!3!h!F!h!M\e(B\\)" "\e$(5$$\e(B")
295 ("\\(\e$(5!3!h!O!h!M\e(B\\)" "\e$(5$%\e(B")
296 ("\\(\e$(5!3!h!O\e(B\\)" "\e$(5"#\e(B") ; Post "r
"
297 ("\\(\e$
(5!3!h
!T
!h
!M
\e(B\\)" "\e$
(5$
&\e(B" sanskrit)
298 ("\\(\e$
(5!3!h
\e(B\\)\e$
(5!3!h
\e(B[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"3\e(B") ; Special Half Form
299 ("\\(\e$(5!3!h!3\e(B\\)" "\e$(5$'\e(B")
300 ("\\(\e$(5!3!h\e(B\\)\e$(5!B!h!O\e(B" "\e$(5"3\e(B") ; Special Rules for "k-tr
"
301 ("\\(\e$
(5!3!h
!B
\e(B\\)" "\e$
(5$
(\e(B")
302 ("\\(\e$
(5!3!h
!F
\e(B\\)" "\e$
(5$
)\e(B")
303 ("\\(\e$
(5!3!h
!L
\e(B\\)" "\e$
(5$
*\e(B")
304 ("\\(\e$
(5!3!h
!M
\e(B\\)" "\e$
(5$
+\e(B")
305 ("\\(\e$
(5!3!h
!Q
\e(B\\)" "\e$
(5$
,\e(B")
306 ("\\(\e$
(5!3!h
!T
\e(B\\)" "\e$
(5$-
\e(B")
307 ("\\(\e$
(5!3!h
!V
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"l\e(B") ; Half Form
308 ("\\(\e$(5$.!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"l
\e(B") ; Half Form
309 ("\\(\e$
(5!3!h
!V
\e(B\\)" "\e$
(5$.
\e(B")
310 ("\\(\e$
(5!3!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"3\e(B") ; Half Form
311 ("\\(\e$(5!3!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"s
\e(B") ; Nukta Half Form
312 ("\\(\e$
(5!3!i
\e(B\\)" "\e$
(5#3\e(B") ; Nukta
313 ("\\(\e$
(5!4!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"4\e(B") ; Half Form
314 ("\\(\e$(5!4!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"t
\e(B") ; Nukta Half Form
315 ("\\(\e$
(5!4!i
\e(B\\)" "\e$
(5#4\e(B") ; Nukta
316 ("\\(\e$
(5!5!h
!O
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"`\e(B") ; Half Form
317 ("\\(\e$(5!5!h!O\e(B\\)" "\e$(5"$
\e(B") ; Post "r
"
318 ("\\(\e$
(5!5!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"5\e(B") ; Half Form
319 ("\\(\e$(5!5!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"u
\e(B") ; Nukta Half Form
320 ("\\(\e$
(5!5!i
\e(B\\)" "\e$
(5#5\e(B") ; Nukta
321 ("\\(\e$
(5!6!h
!F
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"a\e(B") ; Half Form
322 ("\\(\e$(5!6!h!F\e(B\\)" "\e$(5$/\e(B")
324 ("\\(\e$(5!6!h!O\e(B\\)" "\e$(5!6"q
\e(B") ; Post "r
"
325 ("\\(\e$
(5!6!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"6\e(B") ; Half Form
326 ("\\(\e$(5!7!h!3!h!B!h!M\e(B\\)" "\e$(5$0\e(B" sanskrit
)
327 ("\\(\e$(5!7!h!3!h!V!h!T\e(B\\)" "\e$(5$1\e(B" sanskrit
)
328 ("\\(\e$(5!7!h!3!h!B\e(B\\)" "\e$(5$2\e(B" sanskrit
)
329 ("\\(\e$(5!7!h!3!h!V\e(B\\)" "\e$(5$3\e(B" sanskrit
)
330 ("\\(\e$(5!7!h!3!h!O\e(B\\)" "\e$(5$9"q
\e(B") ; Special Rule. May be precomposed font needed.
331 ("\\(\e$
(5!7!h
!6!h
!O
\e(B\\)" "\e$
(5$
4\e(B" sanskrit)
332 ("\\(\e$
(5!7!h
!3!h
!M
\e(B\\)" "\e$
(5$
5\e(B" sanskrit)
333 ("\\(\e$
(5!7!h
!4!h
!M
\e(B\\)" "\e$
(5$
6\e(B" sanskrit)
334 ("\\(\e$
(5!7!h
!5!h
!M
\e(B\\)" "\e$
(5$
7\e(B" sanskrit)
335 ("\\(\e$
(5!7!h
!6!h
!M
\e(B\\)" "\e$
(5$
8\e(B" sanskrit)
336 ("\\(\e$
(5!7!h
!3\e(B\\)" "\e$
(5$
9\e(B")
337 ("\\(\e$
(5!7!h
!4\e(B\\)" "\e$
(5$
:\e(B")
338 ("\\(\e$
(5!7!h
!5!h
!O
\e(B\\)" "\e$
(5$
;"q\e(B") ; Special Rule. May be precomposed font needed.
339 ("\\(\e$(5!7!h!5\e(B\\)" "\e$(5$;\e(B")
340 ("\\(\e$(5!7!h!6\e(B\\)" "\e$(5$<\e(B")
341 ("\\(\e$(5!7!h!7\e(B\\)" "\e$(5$=\e(B")
342 ("\\(\e$(5!7!h!F\e(B\\)" "\e$(5$>\e(B")
343 ("\\(\e$(5!7!h!L\e(B\\)" "\e$(5$?\e(B")
344 ("\\(\e$(5!7!h!M\e(B\\)" "\e$(5$@\e(B")
345 ("\\(\e$(5!8!h\e(B\\)[\e$(5!8!<\e(B]\e$(5!h\e(B" "\e$(5"8\e(B") ; Half Form
346 ("\\(\e$
(5!8!h
!8\e(B\\)" "\e$
(5$A
\e(B")
347 ("\\(\e$
(5!8!h
!<\e(B\\)" "\e$
(5$B
\e(B")
348 ("\\(\e$
(5!8!h
!O
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"8"q
\e(B") ; Half Form Post "r
"
349 ("\\(\e$
(5!8!h
!O
\e(B\\)" "\e$
(5!8"q\e(B") ; Post "r"
350 ("\\(\e$(5!8!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"8\e(B") ; Half Form
351 ("\\(\e$
(5!9!h
!M
\e(B\\)" "\e$
(5$C
\e(B")
352 ("\\(\e$
(5!:!h
!O
\e(B\\)" "\e$
(5$D
\e(B")
353 ("\\(\e$
(5!:!h
!<!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"m\e(B") ; Half Form
354 ("\\(\e$(5!:!h!<\e(B\\)" "\e$(5$E\e(B")
355 ("\\(\e$(5!:!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5":\e(B") ; Half Form
356 ("\\(\e$
(5!:!i
!h
!O
\e(B\\)" "\e$
(5"!\e(B") ; Nukta Post "r"
357 ("\\(\e$(5!:!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"z
\e(B") ; Nukta Half Form
358 ("\\(\e$
(5!:!i
\e(B\\)" "\e$
(5#:\e(B") ; Nukta
359 ("\\(\e$
(5!;!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5";\e(B") ; Half Form
360 ("\\(\e$(5!<!h\e(B\\)\e$(5!8!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"<\e(B") ; Special Half Form
361 ("\\(\e$
(5!<!h
!8\e(B\\)" "\e$
(5$F
\e(B")
362 ("\\(\e$
(5!<!h
\e(B\\)\e$
(5!:!h
\e(B[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"<\e(B") ; Special Half Form
363 ("\\(\e$(5!<!h!:\e(B\\)" "\e$(5$G\e(B")
364 ("\\(\e$(5!<!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"<\e(B") ; Half Form
365 ("\\(\e$
(5!=!h
!3\e(B\\)" "\e$
(5$H
\e(B")
366 ("\\(\e$
(5!=!h
!=\e(B\\)" "\e$
(5$I
\e(B")
367 ("\\(\e$
(5!=!h
!>\e(B\\)" "\e$
(5$J
\e(B")
368 ("\\(\e$
(5!=!h
!M
\e(B\\)" "\e$
(5$K
\e(B")
369 ("\\(\e$
(5!>!h
!M
\e(B\\)" "\e$
(5$L
\e(B")
370 ("\\(\e$
(5!?
!h
!5!h
!M
\e(B\\)" "\e$
(5$M
\e(B" sanskrit)
371 ("\\(\e$
(5!?
!h
!6!h
!O
\e(B\\)" "\e$
(5$N
\e(B" sanskrit)
372 ("\\(\e$
(5!?
!h
!O
!h
!M
\e(B\\)" "\e$
(5$O
\e(B")
373 ("\\(\e$
(5!?
!h
!5\e(B\\)" "\e$
(5$P
\e(B")
374 ("\\(\e$
(5!?
!h
!6\e(B\\)" "\e$
(5$Q
\e(B")
375 ("\\(\e$
(5!?
!h
!?
\e(B\\)" "\e$
(5$R
\e(B")
376 ("\\(\e$
(5!?
!h
!L
\e(B\\)" "\e$
(5$S
\e(B")
377 ("\\(\e$
(5!?
!h
!M
\e(B\\)" "\e$
(5$T
\e(B")
378 ("\\(\e$
(5!?
!i
\e(B\\)" "\e$
(5#?
\e(B") ; Nukta
379 ("\\(\e$
(5!@!h
!M
\e(B\\)" "\e$
(5$
`\e(B")
380 ("\\(\e$
(5!@!i
\e(B\\)" "\e$
(5#@\e(B") ; Nukta
381 ("\\(\e$
(5!A
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"A\e(B") ; Half Form
382 ("\\(\e$(5!B!h\e(B\\)\e$(5!B!h!O\e(B" "\e$(5"B
\e(B") ; Special Rule for "t-tr
"
383 ("\\(\e$
(5!B
!h
!B
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"c\e(B") ; Half Form
384 ("\\(\e$(5!B!h!B\e(B\\)" "\e$(5$a\e(B")
385 ("\\(\e$(5!B!h!F\e(B\\)" "\e$(5$b\e(B")
386 ("\\(\e$(5!B!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"d
\e(B") ; Half Form Post "r
"
387 ("\\(\e$
(5!B
!h
!O
\e(B\\)" "\e$
(5"%\e(B") ; Post "r"
388 ("\\(\e$(5!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"B
\e(B") ; Half Form
389 ("\\(\e$
(5!C
!h
!O
\e(B\\)" "\e$
(5!C
"q\e(B") ; Post "r"
390 ("\\(\e$(5!C!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"C
\e(B") ; Half Form
391 ("\\(\e$
(5!D
!h
!D
!h
!M
\e(B\\)" "\e$
(5$c
\e(B")
392 ("\\(\e$
(5!D
!h
!E
!h
!M
\e(B\\)" "\e$
(5$d
\e(B")
393 ("\\(\e$
(5!D
!h
!K
!h
!M
\e(B\\)" "\e$
(5$e
\e(B")
394 ("\\(\e$
(5!D
!h
!K
!h
!O
\e(B\\)" "\e$
(5$r
"r\e(B") ; Special Case for "dbhr" ; ***
395 ("\\(\e$(5!D!h!O!h!M\e(B\\)" "\e$(5$f\e(B")
396 ("\\(\e$(5!D!h!T!h!M\e(B\\)" "\e$(5$g\e(B")
397 ("\\(\e$(5!D!h!5!h!O\e(B\\)" "\e$(5$h\e(B")
398 ("\\(\e$(5!D!h!6!h!O\e(B\\)" "\e$(5$i\e(B")
399 ("\\(\e$(5!D!h!D!h!T\e(B\\)" "\e$(5$j\e(B")
400 ("\\(\e$(5!D!h!E!h!T\e(B\\)" "\e$(5$k\e(B")
401 ("\\(\e$(5!D!h\e(B\\)\e$(5!E!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5!D!h\e(B") ; Special Half Form (for ddhra)
402 ("\\(\e$(5!D!h!5\e(B\\)" "\e$(5$l\e(B")
403 ("\\(\e$(5!D!h!6\e(B\\)" "\e$(5$m\e(B")
404 ("\\(\e$(5!D!h!D\e(B\\)" "\e$(5$n\e(B")
405 ("\\(\e$(5!D!h!E\e(B\\)" "\e$(5$o\e(B")
406 ("\\(\e$(5!D!h!F\e(B\\)" "\e$(5$p\e(B")
407 ("\\(\e$(5!D!h\e(B\\)\e$(5!J!h\e(B" "\e$(5!D!h\e(B") ; Suppressing "db-"
408 ("\\(\e$(5!D!h!J\e(B\\)" "\e$(5$q\e(B")
409 ("\\(\e$(5!D!h!K\e(B\\)" "\e$(5$r\e(B")
410 ("\\(\e$(5!D!h!L\e(B\\)" "\e$(5$s\e(B")
411 ("\\(\e$(5!D!h!M\e(B\\)" "\e$(5$t\e(B")
412 ("\\(\e$(5!D!h!T\e(B\\)" "\e$(5$u\e(B")
413 ("\\(\e$(5!E!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"e
\e(B") ; Half Form
414 ("\\(\e$
(5!E
!h
!F
\e(B\\)" "\e$
(5$v
\e(B")
415 ("\\(\e$
(5!E
!h
!O
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"f\e(B") ; Half Form Post "r"
416 ("\\(\e$(5!E!h!O\e(B\\)" "\e$(5!E"q
\e(B") ; Post "r
"
417 ("\\(\e$
(5!E
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"E\e(B") ; Half Form
418 ("\\(\e$(5!F!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"k
\e(B") ; Half Form
419 ("\\(\e$
(5!F
!h
!F
\e(B\\)" "\e$
(5$w
\e(B")
420 ("\\(\e$
(5!F
!h
!O
\e(B\\)" "\e$
(5!F
"q\e(B")
421 ("\\(\e$(5!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"F
\e(B") ; Half Form
422 ("\\(\e$
(5!G
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"G\e(B") ; Nukta Half Form
423 ("\\(\e$(5!H!h\e(B\\)\e$(5!B!h!O\e(B" "\e$(5"H
\e(B") ; Special Rule for "p-tr
"
424 ("\\(\e$
(5!H
!h
!B
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"g\e(B") ; Half Form
425 ("\\(\e$(5!H!h!B\e(B\\)" "\e$(5$x\e(B")
426 ("\\(\e$(5!H!h!F\e(B\\)" "\e$(5$y\e(B")
427 ("\\(\e$(5!H!h!Q\e(B\\)" "\e$(5$z\e(B")
428 ("\\(\e$(5!H!h!O\e(B\\)" "\e$(5"&\e(B") ; Post "r
"
429 ("\\(\e$
(5!H
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"H\e(B") ; Half Form
430 ("\\(\e$(5!I!h!O\e(B\\)" "\e$(5"'\e(B") ; Post "r
"
431 ("\\(\e$
(5!I
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"I\e(B") ; Half Form
432 ("\\(\e$(5!I!i!h!O\e(B\\)" "\e$(5""\e(B") ; Nukta Post "r"
433 ("\\(\e$(5!I!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"y
\e(B") ; Nukta Half Form
434 ("\\(\e$
(5!I
!i
\e(B\\)" "\e$
(5#I
\e(B") ; Nukta
435 ("\\(\e$
(5!J
!h
\e(B\\)\e$
(5!F
!h
\e(B[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"J\e(B") ; Special Half Form
436 ("\\(\e$(5!J!h!F\e(B\\)" "\e$(5${\e(B")
437 ("\\(\e$(5!J!h\e(B\\)\e$(5!J!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"J
\e(B") ; Special Half Form
438 ("\\(\e$
(5!J
!h
!J
\e(B\\)" "\e$
(5$|
\e(B")
439 ("\\(\e$
(5!J
!h
\e(B\\)\e$
(5!T
!h
\e(B[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"J\e(B") ; Special Half Form
440 ("\\(\e$(5!J!h!T\e(B\\)" "\e$(5$}\e(B")
441 ("\\(\e$(5!J!h!O\e(B\\)" "\e$(5!J"q
\e(B") ; Post "r
"
442 ("\\(\e$
(5!J
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"J\e(B") ; Half Form
443 ("\\(\e$(5!K!h!F\e(B\\)" "\e$(5$~\e(B")
444 ("\\(\e$(5!K!h!O\e(B\\)" "\e$(5!K"q
\e(B") ; Post "r
"
445 ("\\(\e$
(5!K
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"K\e(B") ; Half Form
446 ("\\(\e$(5!L!h!F\e(B\\)" "\e$(5#P\e(B")
447 ("\\(\e$(5!L!h!Q\e(B\\)" "\e$(5#Q\e(B")
448 ("\\(\e$(5!L!h!O\e(B\\)" "\e$(5!L"q
\e(B") ; Post "r
"
449 ("\\(\e$
(5!L
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"L\e(B") ; Half Form
450 ("\\(\e$(5!M!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"M
\e(B") ; Half Form
451 ("\\(\e$
(5!N
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"N\e(B") ; Half Form
452 ;; special form for "ru".
453 ("\\(\e$(5!O!]\e(B\\)" "\e$(5",\e(B")
454 ("\\(\e$
(5!O
!^
\e(B\\)" "\e$
(5"-\e(B")
455 ("\\(\e$(5!P!]\e(B\\)" "\e$(5".
\e(B")
456 ("\\(\e$
(5!P
!^
\e(B\\)" "\e$
(5"/\e(B")
458 ("\\(\e$(5!Q!h!Q\e(B\\)" "\e$(5#`\e(B" sanskrit
)
459 ("\\(\e$(5!Q!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"Q
\e(B") ; Half Form
460 ("\\(\e$
(5!R
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"R\e(B") ; Half Form
461 ("\\(\e$(5!S!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"S
\e(B") ; Half Form
462 ("\\(\e$
(5!T
!h
!F
\e(B\\)" "\e$
(5#a
\e(B")
463 ("\\(\e$
(5!T
!h
!T
\e(B\\)" "\e$
(5#b
\e(B")
464 ("\\(\e$
(5!T
!h
!O
\e(B\\)" "\e$
(5!T
"q\e(B") ; Post "r"
465 ("\\(\e$(5!T!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"T
\e(B") ; Half Form
466 ("\\(\e$
(5!U
!h
!8!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"h\e(B") ; Half Form
467 ("\\(\e$(5!U!h!8\e(B\\)" "\e$(5#c\e(B")
468 ("\\(\e$(5!U!h!F\e(B\\)" "\e$(5#d\e(B")
469 ("\\(\e$(5!U!h!J\e(B\\)" "\e$(5#e\e(B")
470 ("\\(\e$(5!U!h!Q\e(B\\)" "\e$(5#f\e(B")
471 ("\\(\e$(5!U!h\e(B\\)\e$(5!T!h!O\e(B" "\e$(5"U
\e(B") ; Special Half Form
472 ("\\(\e$
(5!U
!h
!T
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"j\e(B") ; Half Form
473 ; ("\\(\e$(5!U!h!T\e(B\\)" "\e$(5#g\e(B")
474 ("\\(\e$(5!U!h!O!h!T\e(B\\)" "\e$(5#g\e(B")
475 ("\\(\e$(5!U!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"i
\e(B") ; Half Form
476 ("\\(\e$
(5!U
!h
!O
\e(B\\)" "\e$
(5")\e(B") ; Post "r"
477 ("\\(\e$(5!U!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"U
\e(B") ; Half Form
478 ("\\(\e$
(5!V
!h
!=!h
!O
!h
!M
\e(B\\)" "\e$
(5#h
\e(B")
479 ("\\(\e$
(5!V
!h
!=!h
!M
\e(B\\)" "\e$
(5#i
\e(B")
480 ("\\(\e$
(5!V
!h
!=!h
!T
\e(B\\)" "\e$
(5#j
\e(B")
481 ("\\(\e$
(5!V
!h
!=\e(B\\)" "\e$
(5#k
\e(B")
482 ("\\(\e$
(5!V
!h
!>\e(B\\)" "\e$
(5#l
\e(B")
483 ("\\(\e$
(5!V
!h
!O
\e(B\\)" "\e$
(5!V
"q\e(B") ; Post "r"
484 ("\\(\e$(5!V!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"V
\e(B") ; Half Form
485 ("\\(\e$
(5!W
!h
!F
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"W"F
\e(B") ; Special Half Form
486 ("\\(\e$
(5!W
!h
!F
\e(B\\)" "\e$
(5#m
\e(B")
487 ("\\(\e$
(5!W
!h
!O
\e(B\\)" "\e$
(5#n
\e(B")
488 ("\\(\e$
(5!W
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!N
!P
\e(B-\e$
(5!X
\e(B]" "\e$
(5"W\e(B") ; Half Form
489 ("\\(\e$(5!X!h!A\e(B\\)" "\e$(5#p\e(B")
490 ("\\(\e$(5!X!h!F\e(B\\)" "\e$(5#q\e(B")
491 ("\\(\e$(5!X!h!L\e(B\\)" "\e$(5#r\e(B")
492 ("\\(\e$(5!X!h!M\e(B\\)" "\e$(5#s\e(B")
493 ("\\(\e$(5!X!h!O\e(B\\)" "\e$(5#t\e(B")
494 ("\\(\e$(5!X!h!Q\e(B\\)" "\e$(5#u\e(B")
495 ("\\(\e$(5!X!h!T\e(B\\)" "\e$(5#v\e(B")
496 ;; Special Ligature Rules
497 ("\\(\e$(5!X!_\e(B\\)" "\e$(5#R\e(B")
499 ;; For consonants other than listed above, glyph-composition will
500 ;; be applied. If the consonant which is preceding "\e$(5!O\e(B" does not
501 ;; have the vertical line (such as "\e$(5!?\e(B"), "\e$(5"r\e(B" is put beneath the
504 ("[\e$(5!7!9!=!>!?!@!D!O!P!R!S!X\e(B]\\(\e$(5!h!O\e(B\\)" "\e$(5"r
\e(B")
505 ("[\e$
(5!6!8!C
!E
!F
!H
!J
!K
!L
!M
!T
!V
\e(B]\\(\e$
(5!h
!O
\e(B\\)" "\e$
(5"q\e(B")
506 ("\e$(5!?!i\e(B\\(\e$(5!h!O\e(B\\)" "\e$(5"r
\e(B")
507 ("\e$
(5!@!i
\e(B\\(\e$
(5!h
!O
\e(B\\)" "\e$
(5"r\e(B")
509 ;; Nukta with Non-Consonants
510 ("\\(\e$(5!!!i\e(B\\)" "\e$(5#!\e(B")
511 ("\\(\e$(5!&!i\e(B\\)" "\e$(5#&\e(B")
512 ("\\(\e$(5!'!i\e(B\\)" "\e$(5#'\e(B")
513 ("\\(\e$(5!*!i\e(B\\)" "\e$(5#*\e(B")
514 ("\\(\e$(5![!i\e(B\\)" "\e$(5#L\e(B")
515 ("\\(\e$(5!\!i\e(B\\)" "\e$(5#M\e(B")
516 ("\\(\e$(5!_!i\e(B\\)" "\e$(5#K\e(B")
517 ("\\(\e$(5!j!i\e(B\\)" "\e$(5#J\e(B")
519 ;; Special rule for "r + some vowels"
520 ("\\(\e$(5!O!_!i\e(B\\)" "\e$(5#*"p
\e(B")
521 ("\\(\e$
(5!O
![!i
\e(B\\)" "\e$
(5#&"p\e(B")
522 ("\\(\e$(5!O!\!i\e(B\\)" "\e$(5#'"p
\e(B")
523 ("\\(\e$
(5!O
!_
\e(B\\)" "\e$
(5!*"p\e(B")
524 ;; If everything fails, "y" will connect to the front consonant.
525 ("\\(\e$(5!h!M\e(B\\)" "\e$(5"]\e(B")
527 "Alist of regexps of Devanagari character sequences vs composed characters.
")
529 (let ((rules devanagari-char-to-glyph-rules))
531 (let ((rule (car rules))
532 (chars) (char) (glyphs) (glyph))
533 (setq rules (cdr rules))
534 (string-match "\\\\(\\(.
+\\)\\\\)" (car rule))
535 (setq chars (substring (car rule) (match-beginning 1) (match-end 1)))
536 (setq char (string-to-char chars))
537 (setq glyphs (cdr rule))
538 (setq glyph (string-to-char (car glyphs)))
539 (put-char-code-property
541 ;; We don't "cons
" it since priority is top to down.
542 (append (get-char-code-property char 'char-to-glyph) (list rule)))
544 (if (and (< ?\e(5z\e(B glyph) ; Glyphs only.
545 (null (get-char-code-property glyph 'glyph-to-char)))
546 ; One glyph may corresponds to multiple characters,
547 ; e.g., surrounding vowel in Tamil, etc.
548 ; but for Devanagari, we put this restriction
549 ; to make sure the fact that one glyph corresponds to one char.
550 (put-char-code-property
552 (cons (list (car glyphs) chars)
553 (get-char-code-property glyph 'glyph-to-char)
557 ;; Function used in both characters-to-glyphs conversion and
558 ;; glyphs-to-characters conversion.
561 (defun max-match-len (regexp)
562 "Return the maximum length of text that can match the pattern REGEXP.
563 Only
[...
] pattern of regexp is recognized.
"
566 (while (string-match "\\[\\([^\
]]\\)+\\]" regexp index)
567 (setq len (+ len (- (match-beginning 0) index) 1)
568 index (match-end 0)))
571 ;; Return t iff at least one member appears in both LIST1 and LIST2.
572 (defun intersecting-p (list1 list2)
574 (while (and list1 (not found))
575 (if (memq (car list1) list2)
577 (setq list1 (cdr list1))))
580 (defun string-conversion-by-rule (source symbol &rest specs)
581 "Convert string SOURCE by rules stored in SYMBOL property of each character.
582 The remaining arguments forms a list SPECS that restricts applicable rules.
584 The rules has the form
((REGEXP STR RULE-SPEC ...
) ...
).
585 Each character sequence in STRING that matches REGEXP is
588 If SPECS is nil
, only rules with no RULE-SPECs is applied. Otherwise
589 rules with no RULE-SPECS and rules that have at least one member of
590 SPECS in RULE-SPECs is applied.
592 Rules are tested in the order of the list
, thus more specific rules
593 should be placed in front of less specific rules.
595 If rule is given in the forms of regexp
'...
\\(...
\\)...
', a character
596 sequence that matches the pattern inside of the parenthesis is the
597 subject of the match. Otherwise
, the entire expression is the subject
601 (while (< pos (length source))
603 (rules (get-char-code-property
605 (substring source pos)) symbol)))
607 (let* ((rule (car rules))
609 (replace-str (car (cdr rule)))
610 (rule-specs (cdr (cdr rule)))
612 (if (not (or (null rule-specs)
613 (intersecting-p specs rule-specs)))
614 (setq rules (cdr rules))
615 (if (null (string-match "\\\\(.
+\\\\)" regexp))
617 (setq regexp (concat "\\(" regexp "\\)"))
618 (setq search-pos pos))
619 (setq search-pos (- pos (max-match-len
621 (string-match "^
[^
\\\\]*" regexp)
623 (if (< search-pos 0) (setq search-pos 0))
624 (if (string-match regexp source search-pos)
625 (if (= (match-beginning 1) pos)
627 (setq dst-str (concat dst-str replace-str))
628 (setq rules nil) ; Get out of the loop.
630 ;; proceed `pos' for replaced characters.
631 (setq pos (match-end 1)))
632 (setq rules (cdr rules)))
633 (setq rules (cdr rules))))))
634 ;; proceed to next position
636 (setq dst-str (concat dst-str (substring source pos (1+ pos)))
642 ;; Convert Character Code to Glyph Code
646 (defun char-to-glyph-devanagari (string &rest langs)
647 "Convert Devanagari characters in STRING to Devanagari glyphs.
648 Ligatures and special rules are processed.
"
650 'string-conversion-by-rule
651 (append (list string 'char-to-glyph) langs)))
654 ;;(char-to-glyph-devanagari "\e$
(5!X
![!F
!h
!D
!\
\e(B") => "\e$
(5!X
!["F!D!\\e(B"
655 ;;(char-to-glyph-devanagari "\e$(5!O!Z!V!h!=!h!O![!M\e(B") => ???
658 ;; Phase 2: Compose Glyphs to form One Glyph.
661 ;; Each list consists of glyph, application-priority and application-direction.
663 ;; Glyphs will be ordered from low priority number to high priority number.
664 ;; If application-priority is omitted, it is assumed to be 0.
665 ;; If application-direction is omitted, it is asumbed to be '(mr . ml).
667 (defconst devanagari-composition-rules
668 '((?
\e$
(5!!\e(B 0 (tr . br
))
669 (?
\e$
(5!"\e(B 0 (mr . mr))
726 (?\e$(5![\e(B 0 (ml . mr))
728 (?\e$(5!]\e(B 0 (br . tr))
729 (?\e$(5!^\e(B 0 (br . tr))
730 (?\e$(5!_\e(B 0 (br . tr))
731 (?\e$(5!`\e(B 0 (mr . mr)) ; (tc . bc)
732 (?\e$(5!a\e(B 0 (mr . mr))
733 (?\e$(5!b\e(B 0 (mr . mr))
734 (?\e$(5!c\e(B 0 (mr . mr))
739 (?\e$(5!h\e(B 0 (br . tr))
740 (?\e$(5!i\e(B 0 (br . tr))
841 (?\e$(5"p
\e(B 10 (mr . mr
))
842 (?
\e$
(5"q\e(B 0 (br . br))
843 (?\e$(5"r
\e(B 0 (br . tr
))
898 (?\e$(5#K\e(B 0 (br . tr))
899 (?\e$(5#L\e(B 0 (br . tr))
900 (?\e$(5#M\e(B 0 (br . tr))
1046 ;; Determine composition priority and rule of the array of Glyphs.
1047 ;; Sort the glyphs with their priority.
1049 (defun devanagari-reorder-glyphs-for-composition (string start end
)
1051 (ordered-glyphs nil
))
1053 (let ((glyph (aref string pos
)))
1055 (setq ordered-glyphs
1056 (append ordered-glyphs
1057 (list (assq glyph devanagari-composition-rules
))))))
1058 (sort ordered-glyphs
'(lambda (x y
) (< (car (cdr x
)) (car (cdr y
)))))))
1060 ! ;;(devanagari-compose-to-one-glyph "\e$(5"5!X![\e(B") => "\e4\e$(6!Xv#"5t%![\e0!X"5![\e1\e(B"
1062 (defun devanagari-compose-to-one-glyph (devanagari-string)
1063 (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition
1064 devanagari-string
0 (length devanagari-string
)))
1065 ;; List of glyphs to be composed.
1066 (cmp-glyph-list (list (car (car o-glyph-list
))))
1067 (o-glyph-list (cdr o-glyph-list
)))
1069 (let* ((o-glyph (car o-glyph-list
))
1070 (glyph (if (< 2 (length o-glyph
))
1071 ;; default composition
1072 (list (car (cdr (cdr o-glyph
))) (car o-glyph
))
1073 ;; composition with a specified rule
1074 (list '(mr . ml
) (car o-glyph
)))))
1075 (setq o-glyph-list
(cdr o-glyph-list
))
1076 (setq cmp-glyph-list
(append cmp-glyph-list glyph
))))
1077 ;; Before applying compose-chars, convert glyphs to
1078 ;; 1-column width if possible.
1079 (setq cmp-glyph-list
(devanagari-wide-to-narrow cmp-glyph-list
))
1080 (if (= (length cmp-glyph-list
) 1) (char-to-string (car cmp-glyph-list
))
1081 (apply 'compose-chars cmp-glyph-list
))))
1083 (defun devanagari-composition-component (string &optional start end
)
1084 (or start
(setq start
0))
1085 (or end
(setq end
(length string
)))
1086 (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition
1088 ;; List of glyphs to be composed.
1089 (cmp-glyph-list (list (car (car o-glyph-list
)))))
1090 (setq o-glyph-list
(cdr o-glyph-list
))
1092 (let* ((o-glyph (car o-glyph-list
))
1093 (glyph (if (< 2 (length o-glyph
))
1094 ;; default composition
1095 (list (car (cdr (cdr o-glyph
))) (car o-glyph
))
1096 ;; composition with a specified rule
1097 (list '(mr . ml
) (car o-glyph
)))))
1098 (setq o-glyph-list
(cdr o-glyph-list
))
1099 (setq cmp-glyph-list
(append cmp-glyph-list glyph
))))
1100 ;; Convert glyphs to 1-column width if possible.
1101 (devanagari-wide-to-narrow cmp-glyph-list
)))
1103 ;; Utility function for Phase 2.5
1105 ;; Check whether GLYPH is a Devanagari vertical modifier or not.
1106 ;; If it is a vertical modifier, whether it should be 1-column shape or not
1107 ;; depends on previous non-vertical modifier.
1108 (defun devanagari-vertical-modifier-p (glyph)
1109 (string-match (char-to-string glyph
)
1110 "[\e$(5!"!]!^
!_
!`!a
!b
!c
!h
!i
"p"q
"r#K#L#M\e(B]"))
1112 (defun devanagari-non-vertical-modifier-p (glyph)
1113 (string-match (char-to-string glyph
)
1114 ; "[\e$(5!Z![!\!d!e!f!g\e(B]"))
1117 (defun devanagari-wide-to-narrow-char (char)
1118 "Convert Devanagari character CHAR to the corresponding narrow character.
1119 If there's no corresponding narrow character, return CHAR as is."
1120 (let ((narrow (cdr (assq char devanagari-1-column-char
))))
1124 ;; Phase 2.5 Convert appropriate character to 1-column shape.
1126 ;; This is temporary and should be removed out when Emacs supports
1127 ;; variable width characters.
1129 ;; This will convert the composing glyphs (2 column glyphs)
1130 ;; to narrow (1 column) glyphs if they exist.
1132 ;; devanagari-wide-to-narrow-old converts glyphs simply.
1133 ;; devanagari-wide-to-narrow takes care of upper/lower apply-glyphs
1134 ;; with 2 column base-glyph.
1136 ;; Execution Examples
1137 ;;(devanagari-wide-to-narrow '(?\e$(5!3\e(B (ml . ml) ?\e$(5!a\e(B))
1138 ;;(devanagari-wide-to-narrow '(?\e$(5!F\e(B (ml . ml) ?\e$(5!a\e(B))
1140 (defun devanagari-wide-to-narrow (src-list)
1141 (devanagari-wide-to-narrow-iter src-list t
))
1143 (defun devanagari-wide-to-narrow-iter (src-list 2-col-glyph
)
1144 (let ((glyph (car src-list
)))
1145 (cond ((null src-list
) '())
1147 ((not (numberp glyph
))
1149 (devanagari-wide-to-narrow-iter (cdr src-list
) 2-col-glyph
)))
1150 ; glyphs to be processed regardless of the value of "2-col-glyph"
1151 ((devanagari-non-vertical-modifier-p glyph
)
1152 (cons (devanagari-wide-to-narrow-char glyph
)
1153 (devanagari-wide-to-narrow-iter (cdr src-list
) 2-col-glyph
)))
1154 ; glyphs which are depends on the value of "2-col-glyph"
1155 ((devanagari-vertical-modifier-p glyph
)
1158 (devanagari-wide-to-narrow-iter (cdr src-list
) t
))
1159 (cons (devanagari-wide-to-narrow-char glyph
)
1160 (devanagari-wide-to-narrow-iter (cdr src-list
)
1164 (if (cdr (assq glyph devanagari-1-column-char
))
1165 (cons (devanagari-wide-to-narrow-char glyph
)
1166 (devanagari-wide-to-narrow-iter (cdr src-list
) nil
))
1168 (devanagari-wide-to-narrow-iter (cdr src-list
) t
)))))))
1176 ;; Decomposition of composite sequence.
1180 (defun devanagari-decompose-string (str)
1181 "Decompose Devanagari string STR"
1182 (decompose-string (copy-sequence str
)))
1185 (defun devanagari-decompose-region (from to
)
1187 (decompose-region from to
))
1194 (defun devanagari-compose-string (str &rest langs
)
1195 (setq str
(copy-sequence str
))
1197 rest match-b match-e
)
1198 (while (string-match devanagari-composite-glyph-unit str idx
)
1199 (let* ((match-b (match-beginning 0))
1200 (match-e (match-end 0))
1201 (cmps (devanagari-composition-component
1203 'char-to-glyph-devanagari
1204 (cons (substring str match-b match-e
) langs
)))))
1205 (compose-string str match-b match-e cmps
)
1206 (setq idx match-e
))))
1210 (defun devanagari-compose-region (from to
&rest langs
)
1214 (narrow-to-region from to
)
1215 (goto-char (point-min))
1216 (while (re-search-forward devanagari-composite-glyph-unit nil t
)
1217 (let* ((match-b (match-beginning 0)) (match-e (match-end 0))
1218 (cmps (devanagari-composition-component
1220 'char-to-glyph-devanagari
1221 (cons (buffer-substring match-b match-e
) langs
)))))
1222 (compose-region match-b match-e cmps
))))))
1224 ;; For pre-write and post-read conversion
1227 (defun devanagari-compose-from-is13194-region (from to
)
1228 "Compose IS 13194 characters in the region to Devanagari characters."
1232 (narrow-to-region from to
)
1233 (indian-to-devanagari-region (point-min) (point-max))
1234 (devanagari-compose-region (point-min) (point-max))
1235 (- (point-max) (point-min)))))
1238 (defun in-is13194-devanagari-post-read-conversion (len)
1239 (let ((pos (point)))
1240 (devanagari-compose-from-is13194-region pos
(+ pos len
))))
1243 (defun devanagari-decompose-to-is13194-region (from to
)
1244 "Decompose Devanagari characters in the region to IS 13194 characters."
1248 (narrow-to-region from to
)
1249 (devanagari-decompose-region (point-min) (point-max))
1250 (devanagari-to-indian-region (point-min) (point-max)))))
1253 (defun in-is13194-devanagari-pre-write-conversion (from to
)
1254 (let ((old-buf (current-buffer)))
1255 (set-buffer (generate-new-buffer " *temp*"))
1258 (insert-buffer-substring old-buf from to
))
1259 (devanagari-decompose-to-is13194-region (point-min) (point-max))
1260 ;; Should return nil as annotations.
1263 ;; For input/output of ITRANS
1266 (defun devanagari-encode-itrans-region (from to
)
1269 (narrow-to-region from to
)
1270 (devanagari-decompose-to-is13194-region (point-min) (point-max))
1271 (indian-encode-itrans-region (point-min) (point-max))))
1274 (defun devanagari-decode-itrans-region (from to
)
1277 (narrow-to-region from to
)
1278 (indian-decode-itrans-region (point-min) (point-max))
1279 (devanagari-compose-from-is13194-region (point-min) (point-max))))
1282 (provide 'devan-util
)
1284 ;;; devan-util.el end here