1 ;;; ind-util.el --- Transliteration and Misc. Tools for Indian Languages -*- coding: utf-8-emacs; -*-
3 ;; Copyright (C) 2001-2017 Free Software Foundation, Inc.
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6 ;; Keywords: multilingual, Indian, Devanagari
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;; This file provides conversion between UCS and various
26 ;; transliteration schemes, such as ITRANS, kyoto-harvard and aiba
27 ;; methods. It also provides conversion between IS 13194 and UCS.
28 ;; Finally, this program provides the compatibility support with
29 ;; old implementation of Devanagari script.
35 ;; The followings provide the various transliteration schemes (such as
36 ;; ITRANS, kyoto-harvard, and Aiba) of Indian scripts. They are also
37 ;; used in quail/indian.el for typing Indian script in Emacs.
41 (defun indian-regexp-of-hashtbl-keys (hashtbl)
42 "Return the regular expression of hash table keys."
44 (maphash (lambda (key val
) (push key keys
)) hashtbl
)
47 (defvar indian-dev-base-table
50 (?अ nil
) (?आ ?ा
) (?इ ?ि
) (?ई ?ी
) (?उ ?ु
) (?ऊ ?ू
)
51 (?ऋ ?ृ
) (?ऌ ?ॢ
) (?ऍ ?ॅ
) (?ऎ ?ॆ
) (?ए ?े
) (?ऐ ?ै
)
52 (?ऑ ?ॉ
) (?ऒ ?ॊ
) (?ओ ?ो
) (?औ ?ौ
) (?ॠ ?ॄ
) (?ॡ ?ॣ
))
53 (;; CONSONANTS (currently 42, including special cases)
54 ?क ?ख ?ग ?घ ?ङ
;; GUTTRULS
55 ?च ?छ ?ज ?झ ?ञ
;; PALATALS
56 ?ट ?ठ ?ड ?ढ ?ण
;; CEREBRALS
57 ?त ?थ ?द ?ध ?न ?ऩ
;; DENTALS
58 ?प ?फ ?ब ?भ ?म
;; LABIALS
59 ?य ?र ?ऱ ?ल ?ळ ?ऴ ?व
;; SEMIVOWELS
60 ?श ?ष ?स ?ह
;; SIBILANTS
61 ?क़ ?ख़ ?ग़ ?ज़ ?ड़ ?ढ़ ?फ़ ?य़
;; NUKTAS
66 ?० ?१ ?२ ?३ ?४ ?५ ?६ ?७ ?८ ?९
)
67 (;; Inscript-extra (4) (#, $, ^, *, ])
68 "्र" "र्" "त्र" "श्र" "़")))
70 ;; Punjabi is also known as Gurmukhi.
71 (defvar indian-pnj-base-table
74 (?ਅ nil
) (?ਆ ?ਾ
) (?ਇ ?ਿ
) (?ਈ ?ੀ
) (?ਉ ?ੁ
) (?ਊ ?ੂ
)
75 nil nil nil nil
(?ਏ ?ੇ
) (?ਐ ?ੈ
)
76 nil nil
(?ਓ ?ੋ
) (?ਔ ?ੌ
) nil nil
)
78 ?ਕ ?ਖ ?ਗ ?ਘ ?ਙ
;; GUTTRULS
79 ?ਚ ?ਛ ?ਜ ?ਝ ?ਞ
;; PALATALS
80 ?ਟ ?ਠ ?ਡ ?ਢ ?ਣ
;; CEREBRALS
81 ?ਤ ?ਥ ?ਦ ?ਧ ?ਨ nil
;; DENTALS
82 ?ਪ ?ਫ ?ਬ ?ਭ ?ਮ
;; LABIALS
83 ?ਯ ?ਰ nil ?ਲ ?ਲ਼ nil ?ਵ
;; SEMIVOWELS
84 ?ਸ਼ nil ?ਸ ?ਹ
;; SIBILANTS
85 nil ?ਖ਼ ?ਗ਼ ?ਜ਼ ?ੜ nil ?ਫ਼ nil
;; NUKTAS
88 nil ?ਂ nil nil ?੍ nil nil
) ;; ek onkar, etc.
90 ?੦ ?੧ ?੨ ?੩ ?੪ ?੫ ?੬ ?੭ ?੮ ?੯
)
91 (;; Inscript-extra (4) (#, $, ^, *, ])
92 "੍ਰ" "ਰ੍" "ਤ੍ਰ" "ਸ਼੍ਰ" "਼")))
94 (defvar indian-gjr-base-table
97 (?અ nil
) (?આ ?ા
) (?ઇ ?િ
) (?ઈ ?ી
) (?ઉ ?ુ
) (?ઊ ?ૂ
)
98 (?ઋ ?ૃ
) nil
(?ઍ ?ૅ
) nil
(?એ ?ે
) (?ઐ ?ૈ
)
99 (?ઑ ?ૉ
) nil
(?ઓ ?ો
) (?ઔ ?ૌ
) (?ૠ ?ૄ
) nil
)
101 ?ક ?ખ ?ગ ?ઘ ?ઙ
;; GUTTRULS
102 ?ચ ?છ ?જ ?ઝ ?ઞ
;; PALATALS
103 ?ટ ?ઠ ?ડ ?ઢ ?ણ
;; CEREBRALS
104 ?ત ?થ ?દ ?ધ ?ન nil
;; DENTALS
105 ?પ ?ફ ?બ ?ભ ?મ
;; LABIALS
106 ?ય ?ર nil ?લ ?ળ nil ?વ
;; SEMIVOWELS
107 ?શ ?ષ ?સ ?હ
;; SIBILANTS
108 nil nil nil nil nil nil nil nil
;; NUKTAS
111 ?ઁ ?ં ?ઃ ?ઽ ?્ ?ૐ nil
)
113 ?૦ ?૧ ?૨ ?૩ ?૪ ?૫ ?૬ ?૭ ?૮ ?૯
)
114 (;; Inscript-extra (4) (#, $, ^, *, ])
115 "્ર" "ર્" "ત્ર" "શ્ર" "઼")))
117 (defvar indian-ori-base-table
120 (?ଅ nil
) (?ଆ ?ା
) (?ଇ ?ି
) (?ଈ ?ୀ
) (?ଉ ?ୁ
) (?ଊ ?ୂ
)
121 (?ଋ ?ୃ
) (?ଌ nil
) nil nil
(?ଏ ?େ
) (?ଐ ?ୈ
)
122 nil nil
(?ଓ ?ୋ
) (?ଔ ?ୌ
) (?ୠ nil
) (?ୡ nil
))
124 ?କ ?ଖ ?ଗ ?ଘ ?ଙ
;; GUTTRULS
125 ?ଚ ?ଛ ?ଜ ?ଝ ?ଞ
;; PALATALS
126 ?ଟ ?ଠ ?ଡ ?ଢ ?ଣ
;; CEREBRALS
127 ?ତ ?ଥ ?ଦ ?ଧ ?ନ nil
;; DENTALS
128 ?ପ ?ଫ ?ବ ?ଭ ?ମ
;; LABIALS
129 ?ଯ ?ର nil ?ଲ ?ଳ nil nil
;; SEMIVOWELS
130 ?ଶ ?ଷ ?ସ ?ହ
;; SIBILANTS
131 nil nil nil nil ?ଡ଼ ?ଢ଼ nil ?ୟ
;; NUKTAS
134 ?ଁ ?ଂ ?ଃ ?ଽ ?୍ nil nil
)
136 ?୦ ?୧ ?୨ ?୩ ?୪ ?୫ ?୬ ?୭ ?୮ ?୯
)
137 (;; Inscript-extra (4) (#, $, ^, *, ])
138 "୍ର" "ର୍" "ତ୍ର" "ଶ୍ର" "଼")))
140 (defvar indian-bng-base-table
143 (?অ nil
) (?আ ?া
) (?ই ?ি
) (?ঈ ?ী
) (?উ ?ু
) (?ঊ ?ূ
)
144 (?ঋ ?ৃ
) (?ঌ ?ৢ
) nil nil
(?এ ?ে
) (?ঐ ?ৈ
)
145 nil nil
(?ও ?ো
) (?ঔ ?ৌ
) (?ৠ ?ৄ
) (?ৡ ?ৣ
))
147 ?ক ?খ ?গ ?ঘ ?ঙ
;; GUTTRULS
148 ?চ ?ছ ?জ ?ঝ ?ঞ
;; PALATALS
149 ?ট ?ঠ ?ড ?ঢ ?ণ
;; CEREBRALS
150 ?ত ?থ ?দ ?ধ ?ন nil
;; DENTALS
151 ?প ?ফ ?ব ?ভ ?ম
;; LABIALS
152 ?য ?র nil ?ল nil nil nil
;; SEMIVOWELS
153 ?শ ?ষ ?স ?হ
;; SIBILANTS
154 nil nil nil nil ?ড় ?ঢ় nil ?য়
;; NUKTAS
157 ?ঁ ?ং ?ঃ nil ?্ nil nil
)
159 ?০ ?১ ?২ ?৩ ?৪ ?৫ ?৬ ?৭ ?৮ ?৯
)
160 (;; Inscript-extra (4) (#, $, ^, *, ])
161 "্র" "র্" "ত্র" "শ্র" "়")))
163 (defvar indian-asm-base-table
166 (?অ nil
) (?আ ?া
) (?ই ?ি
) (?ঈ ?ী
) (?উ ?ু
) (?ঊ ?ূ
)
167 (?ঋ ?ৃ
) (?ঌ ?ৢ
) nil nil
(?এ ?ে
) (?ঐ ?ৈ
)
168 nil nil
(?ও ?ো
) (?ঔ ?ৌ
) (?ৠ ?ৄ
) (?ৡ ?ৣ
))
170 ?ক ?খ ?গ ?ঘ ?ঙ
;; GUTTRULS
171 ?চ ?ছ ?জ ?ঝ ?ঞ
;; PALATALS
172 ?ট ?ঠ ?ড ?ঢ ?ণ
;; CEREBRALS
173 ?ত ?থ ?দ ?ধ ?ন nil
;; DENTALS
174 ?প ?ফ ?ব ?ভ ?ম
;; LABIALS
175 ?য ?ৰ nil ?ল nil nil ?ৱ
;; SEMIVOWELS
176 ?শ ?ষ ?স ?হ
;; SIBILANTS
177 nil nil nil nil ?ড় ?ঢ় nil ?য়
;; NUKTAS
180 ?ঁ ?ং ?ঃ nil ?্ nil nil
)
182 ?০ ?১ ?২ ?৩ ?৪ ?৫ ?৬ ?৭ ?৮ ?৯
)
183 (;; Inscript-extra (4) (#, $, ^, *, ])
184 "্ৰ" "ৰ্" "ত্ৰ" "শ্ৰ" "়")))
186 (defvar indian-tlg-base-table
189 (?అ nil
) (?ఆ ?ా
) (?ఇ ?ి
) (?ఈ ?ీ
) (?ఉ ?ు
) (?ఊ ?ూ
)
190 (?ఋ ?ృ
) (?ఌ nil
) nil
(?ఏ ?ే
) (?ఎ ?ె
) (?ఐ ?ై
)
191 nil
(?ఓ ?ో
) (?ఒ ?ొ
) (?ఔ ?ౌ
) (?ౠ ?ౄ
) (?ౡ nil
))
193 ?క ?ఖ ?గ ?ఘ ?ఙ
;; GUTTRULS
194 ?చ ?ఛ ?జ ?ఝ ?ఞ
;; PALATALS
195 ?ట ?ఠ ?డ ?ఢ ?ణ
;; CEREBRALS
196 ?త ?థ ?ద ?ధ ?న nil
;; DENTALS
197 ?ప ?ఫ ?బ ?భ ?మ
;; LABIALS
198 ?య ?ర ?ఱ ?ల ?ళ nil ?వ
;; SEMIVOWELS
199 ?శ ?ష ?స ?హ
;; SIBILANTS
200 nil nil nil nil nil nil nil nil
;; NUKTAS
203 ?ఁ ?ం ?ః nil ?్ nil nil
)
205 ?౦ ?౧ ?౨ ?౩ ?౪ ?౫ ?౬ ?౭ ?౮ ?౯
)
206 (;; Inscript-extra (4) (#, $, ^, *, ])
207 "్ర" "ర్" "త్ర" "శ్ర" nil
)))
209 (defvar indian-knd-base-table
212 (?ಅ nil
) (?ಆ ?ಾ
) (?ಇ ?ಿ
) (?ಈ ?ೀ
) (?ಉ ?ು
) (?ಊ ?ೂ
)
213 (?ಋ ?ೃ
) (?ಌ nil
) nil
(?ಏ ?ೇ
) (?ಎ ?ೆ
) (?ಐ ?ೈ
)
214 nil
(?ಓ ?ೋ
) (?ಒ ?ೊ
) (?ಔ ?ೌ
) (?ೠ ?ೄ
) (?ೡ nil
))
216 ?ಕ ?ಖ ?ಗ ?ಘ ?ಙ
;; GUTTRULS
217 ?ಚ ?ಛ ?ಜ ?ಝ ?ಞ
;; PALATALS
218 ?ಟ ?ಠ ?ಡ ?ಢ ?ಣ
;; CEREBRALS
219 ?ತ ?ಥ ?ದ ?ಧ ?ನ nil
;; DENTALS
220 ?ಪ ?ಫ ?ಬ ?ಭ ?ಮ
;; LABIALS
221 ?ಯ ?ರ ?ಱ ?ಲ ?ಳ nil ?ವ
;; SEMIVOWELS
222 ?ಶ ?ಷ ?ಸ ?ಹ
;; SIBILANTS
223 nil nil nil nil nil nil ?ೞ nil
;; NUKTAS
226 nil ?ಂ ?ಃ nil ?್ nil nil
)
228 ?೦ ?೧ ?೨ ?೩ ?೪ ?೫ ?೬ ?೭ ?೮ ?೯
)
229 (;; Inscript-extra (4) (#, $, ^, *, ])
230 "್ರ" "ರ್" "ತ್ರ" "ಶ್ರ" nil
)))
232 (defvar indian-mlm-base-table
235 (?അ nil
) (?ആ ?ാ
) (?ഇ ?ി
) (?ഈ ?ീ
) (?ഉ ?ു
) (?ഊ ?ൂ
)
236 (?ഋ ?ൃ
) (?ഌ nil
) nil
(?ഏ ?േ
) (?എ ?െ
) (?ഐ ?ൈ
)
237 nil
(?ഓ ?ോ
) (?ഒ ?ൊ
) (?ഔ ?ൌ
) nil nil
)
239 ?ക ?ഖ ?ഗ ?ഘ ?ങ
;; GUTTRULS
240 ?ച ?ഛ ?ജ ?ഝ ?ഞ
;; PALATALS
241 ?ട ?ഠ ?ഡ ?ഢ ?ണ
;; CEREBRALS
242 ?ത ?ഥ ?ദ ?ധ ?ന nil
;; DENTALS
243 ?പ ?ഫ ?ബ ?ഭ ?മ
;; LABIALS
244 ?യ ?ര ?റ ?ല ?ള ?ഴ ?വ
;; SEMIVOWELS
245 ?ശ ?ഷ ?സ ?ഹ
;; SIBILANTS
246 nil nil nil nil nil nil nil nil
;; NUKTAS
249 nil ?ം ?ഃ nil ?് nil nil
)
251 ?൦ ?൧ ?൨ ?൩ ?൪ ?൫ ?൬ ?൭ ?൮ ?൯
)
252 (;; Inscript-extra (4) (#, $, ^, *, ])
253 "്ര" "ര്" "ത്ര" "ശ്ര" nil
)))
255 (defvar indian-tml-base-table
258 (?அ nil
) (?ஆ ?ா
) (?இ ?ி
) (?ஈ ?ீ
) (?உ ?ு
) (?ஊ ?ூ
)
259 nil nil nil
(?ஏ ?ே
) (?எ ?ெ
) (?ஐ ?ை
)
260 nil
(?ஓ ?ோ
) (?ஒ ?ொ
) (?ஔ ?ௌ
) nil nil
)
262 ?க nil nil nil ?ங
;; GUTTRULS
263 ?ச nil ?ஜ nil ?ஞ
;; PALATALS
264 ?ட nil nil nil ?ண
;; CEREBRALS
265 ?த nil nil nil ?ந ?ன
;; DENTALS
266 ?ப nil nil nil ?ம
;; LABIALS
267 ?ய ?ர ?ற ?ல ?ள ?ழ ?வ
;; SEMIVOWELS
268 nil ?ஷ ?ஸ ?ஹ
;; SIBILANTS
269 nil nil nil nil nil nil nil nil
;; NUKTAS
272 nil ?ஂ ?ஃ nil ?் nil nil
)
274 ?௦ ?௧ ?௨ ?௩ ?௪ ?௫ ?௬ ?௭ ?௮ ?௯
)
275 (;; Inscript-extra (4) (#, $, ^, *, ])
276 "்ர" "ர்" "த்ர" nil nil
)))
278 (defvar indian-base-table-to-language-alist
279 '((indian-dev-base-table .
"Devanagari")
280 (indian-pnj-base-table .
"Punjabi")
281 (indian-ori-base-table .
"Oriya")
282 (indian-bng-base-table .
"Bengali")
283 (indian-asm-base-table .
"Assamese")
284 (indian-tlg-base-table .
"Telugu")
285 (indian-knd-base-table .
"Kannada")
286 (indian-mlm-base-table .
"Malayalam")
287 (indian-tml-base-table .
"Tamil")))
289 (defvar indian-itrans-v5-table
290 '(;; for encode/decode
292 "a" ("aa" "A") "i" ("ii" "I") "u" ("uu" "U")
293 ("RRi" "R^i") ("LLi" "L^i") (".c" "e.c") "E" "e" "ai"
294 "o.c" "O" "o" "au" ("RRI" "R^I") ("LLI" "L^I"))
296 "k" "kh" "g" "gh" ("~N" "N^")
297 "ch" ("Ch" "chh") "j" "jh" ("~n" "JN")
298 "T" "Th" "D" "Dh" "N"
299 "t" "th" "d" "dh" "n" "nh"
300 "p" "ph" "b" "bh" "m"
301 "y" "r" "rh" "l" ("L" "ld") nil
("v" "w")
302 "sh" ("Sh" "shh") "s" "h"
303 "q" "K" "G" ("J" "z") ".D" ".Dh" "f" ("Y" "yh")
306 ".N" (".n" "M") "H" ".a" ".h" ("AUM" "OM") "..")))
308 (defvar indian-itrans-v5-table-for-tamil
309 '(;; for encode/decode
311 "a" ("aa" "A") "i" ("ii" "I") "u" ("uu" "U")
312 ("RRi" "R^i") ("LLi" "L^i") (".c" "e.c") "E" "e" "ai"
313 "o.c" "O" "o" "au" ("RRI" "R^I") ("LLI" "L^I"))
315 "k" "kh" "g" "gh" ("~N" "N^")
316 "ch" ("Ch" "chh") "j" "jh" ("~n" "JN")
317 "T" "Th" "D" "Dh" "N"
318 "t" "th" "d" "dh" "n" "nh"
319 "p" "ph" "b" "bh" "m"
320 "y" "r" "rh" "l" ("L" "ld") ("J" "z") ("v" "w")
321 "sh" ("Sh" "shh") "s" "h"
322 "q" "K" "G" nil
".D" ".Dh" "f" ("Y" "yh")
325 ".N" (".n" "M") "H" ".a" ".h" ("AUM" "OM") "..")))
327 (defvar indian-kyoto-harvard-table
328 '(;; for encode/decode
330 "a" ("A" "aa") "i" ("I" "ii") "u" ("U" "uu")
331 "R" ("L" "lR") nil nil
"e" "ai"
332 nil nil
"o" "au" ("q" "RR" "Q") ("E" "LL" "lRR"))
334 "k" "kh" "g" "gh" "G"
335 "c" "ch" "j" "jh" "J"
336 "T" "Th" "D" "Dh" "N"
337 "t" "th" "d" "dh" "n" nil
338 "p" "ph" "b" "bh" "m"
339 "y" "r" nil
"l" "L" nil
"v"
340 ("z" "Z") "S" "s" "h"
341 nil nil nil nil nil nil nil nil
344 nil
"M" "H" "'" nil
"." nil
)))
346 (defvar indian-harvard-table
347 '(;; for encode/decode
349 "a" ("A" "aa") "i" ("I" "ii") "u" ("U" "uu")
350 "R" ("L" "lR") nil nil
"e" "ai"
351 nil nil
"o" "au" ("RR" "q" "Q") ("LL" "E" "lRR"))
353 "k" "kh" "g" "gh" "G"
354 "c" "ch" "j" "jh" "J"
355 "T" "Th" "D" "Dh" "N"
356 "t" "th" "d" "dh" "n" nil
357 "p" "ph" "b" "bh" "m"
358 "y" "r" nil
"l" "L" nil
"v"
359 ("z" "Z") "S" "s" "h"
360 nil nil nil nil nil nil nil nil
363 nil
"M" "H" "'" nil
"." nil
)))
365 (defvar indian-tokyo-table
366 '(;; for encode/decode
368 "a" ("A" "aa") "i" ("I" "ii") "u" ("U" "uu")
369 "R" ("L" "lR") nil nil
"e" "ai"
370 nil nil
"o" "au" ("Q" "RR" "q") ("E" "LL" "lRR"))
372 "k" "kh" "g" "gh" "G"
373 "c" "ch" "j" "jh" "J"
374 "T" "Th" "D" "Dh" "N"
375 "t" "th" "d" "dh" "n" nil
376 "p" "ph" "b" "bh" "m"
377 "y" "r" nil
"l" "L" nil
"v"
378 ("Z" "z") "S" "s" "h"
379 nil nil nil nil nil nil nil nil
382 nil
"M" "H" "'" nil
"." nil
)))
384 (defvar indian-aiba-table
385 '(;; for encode/decode
387 "a" "aa" "i" "ii" "u" "uu"
388 ".r" ".l" nil nil
"e" "ai"
389 nil nil
"o" "au" "~r" "~l")
391 "k" "kh" "g" "gh" "^n"
392 "c" "ch" "j" "jh" "~n"
393 ".t" ".th" ".d" ".dh" ".n"
394 "t" "th" "d" "dh" "n" nil
395 "p" "ph" "b" "bh" "m"
396 "y" "r" nil
"l" nil nil
"v"
398 nil nil nil nil nil nil nil nil
401 nil
".m" ".h" "'" nil
"." nil
)))
403 (defun combinatorial (head &rest tail
)
406 (mapcar (lambda (y) (mapcar (lambda (x) (cons x y
)) head
))
407 (apply 'combinatorial tail
)))
408 (mapcar 'list head
)))
410 (defun indian--puthash-char (char trans-char hashtbls
)
411 (let ((encode-hash (car hashtbls
)) ;; char -> trans
412 (decode-hash (cdr hashtbls
)) ;; trans -> char
414 ;; char -- nil / char / string (/ list of vowel & matra)
415 ;; trans-char -- nil / string / list of strings
416 (when (and char trans-char
)
417 (if (stringp trans-char
) (setq trans-char
(list trans-char
)))
418 (if (characterp char
) (setq char
(char-to-string char
)))
419 (puthash char
(car trans-char
) encode-hash
)
420 (dolist (trans trans-char
)
421 (puthash trans char decode-hash
)))))
423 (defun indian--map (f l1 l2
)
425 (funcall f
(pop l1
) (pop l2
))))
427 (defun indian--puthash-v (v trans-v hashtbls
)
430 (indian--puthash-char (car v
) trans-v hashtbls
))
433 (defun indian--puthash-c (c trans-c halant hashtbls
)
436 (if (characterp c
) (setq c
(char-to-string c
)))
437 (indian--puthash-char (concat c halant
) trans-c hashtbls
))
440 (defun indian--puthash-m (m trans-m hashtbls
)
443 (indian--puthash-char m trans-m hashtbls
))
446 (defun indian--puthash-cv (c trans-c v trans-v hashtbls
)
451 (when (and c trans-c v trans-v
)
452 (if (characterp c
) (setq c
(char-to-string c
)))
453 (setq v
(if (characterp (cadr v
)) (char-to-string (cadr v
)) ""))
454 (if (stringp trans-c
) (setq trans-c
(list trans-c
)))
455 (if (stringp trans-v
) (setq trans-v
(list trans-v
)))
456 (indian--puthash-char
458 (mapcar (lambda (x) (apply 'concat x
))
459 (combinatorial trans-c trans-v
))
464 (defun indian-make-hash (table trans-table
)
465 "Indian Transliteration Hash for decode/encode"
466 (let* ((encode-hash (make-hash-table :test
'equal
))
467 (decode-hash (make-hash-table :test
'equal
))
468 (hashtbls (cons encode-hash decode-hash
))
469 (vowels (elt table
0))
470 (consonants (elt table
1))
472 (digits (elt table
3))
473 (halant (char-to-string (elt misc
4)))
474 (trans-vowels (elt trans-table
0))
475 (trans-consonants (elt trans-table
1))
476 (trans-misc (elt trans-table
2))
477 (trans-digits '("0" "1" "2" "3" "4" "5" "6" "7" "8" "9")))
478 (indian--puthash-v vowels trans-vowels hashtbls
)
479 (indian--puthash-c consonants trans-consonants halant hashtbls
)
480 (indian--puthash-cv consonants trans-consonants
481 vowels trans-vowels hashtbls
)
482 (indian--puthash-m misc trans-misc hashtbls
)
483 (indian--puthash-m digits trans-digits hashtbls
)
486 (defvar indian-dev-itrans-v5-hash
487 (indian-make-hash indian-dev-base-table
488 indian-itrans-v5-table
))
489 (defvar indian-dev-kyoto-harvard-hash
490 (indian-make-hash indian-dev-base-table
491 indian-kyoto-harvard-table
))
492 (defvar indian-dev-aiba-hash
493 (indian-make-hash indian-dev-base-table
496 (defvar indian-pnj-itrans-v5-hash
497 (indian-make-hash indian-pnj-base-table
498 indian-itrans-v5-table
))
500 (defvar indian-gjr-itrans-v5-hash
501 (indian-make-hash indian-gjr-base-table
502 indian-itrans-v5-table
))
504 (defvar indian-ori-itrans-v5-hash
505 (indian-make-hash indian-ori-base-table
506 indian-itrans-v5-table
))
508 (defvar indian-bng-itrans-v5-hash
509 (indian-make-hash indian-bng-base-table
510 indian-itrans-v5-table
))
512 (defvar indian-asm-itrans-v5-hash
513 (indian-make-hash indian-asm-base-table
514 indian-itrans-v5-table
))
516 (defvar indian-tlg-itrans-v5-hash
517 (indian-make-hash indian-tlg-base-table
518 indian-itrans-v5-table
))
520 (defvar indian-knd-itrans-v5-hash
521 (indian-make-hash indian-knd-base-table
522 indian-itrans-v5-table
))
524 (defvar indian-mlm-itrans-v5-hash
525 (indian-make-hash indian-mlm-base-table
526 indian-itrans-v5-table
))
528 (defvar indian-tml-itrans-v5-hash
529 (indian-make-hash indian-tml-base-table
530 indian-itrans-v5-table-for-tamil
))
533 (defmacro indian-translate-region
(from to hashtable encode-p
)
536 (let ((regexp ,(indian-regexp-of-hashtbl-keys
537 (if encode-p
(car (eval hashtable
))
538 (cdr (eval hashtable
))))))
539 (narrow-to-region from to
)
540 (goto-char (point-min))
541 (while (re-search-forward regexp nil t
)
542 (let ((matchstr (gethash (match-string 0)
546 (if matchstr
(replace-match matchstr
))))))))
550 (defun indian-dev-itrans-v5-encode-region (from to
)
552 (indian-translate-region
553 from to indian-dev-itrans-v5-hash t
))
555 (defun indian-dev-itrans-v5-decode-region (from to
)
557 (indian-translate-region
558 from to indian-dev-itrans-v5-hash nil
))
560 (defun indian-dev-kyoto-harvard-encode-region (from to
)
562 (indian-translate-region
563 from to indian-dev-kyoto-harvard-hash t
))
565 (defun indian-dev-kyoto-harvard-decode-region (from to
)
567 (indian-translate-region
568 from to indian-dev-kyoto-harvard-hash nil
))
570 (defun indian-dev-aiba-encode-region (from to
)
572 (indian-translate-region
573 from to indian-dev-aiba-hash t
))
575 (defun indian-dev-aiba-decode-region (from to
)
577 (indian-translate-region
578 from to indian-dev-aiba-hash nil
))
583 ;;; IS 13194 utilities
585 ;; The followings provide conversion between IS 13194 (ISCII) and UCS.
588 ;;Unicode vs IS13194 ;; only Devanagari is supported now.
589 ((ucs-devanagari-to-is13194-alist
590 '((?
\x0900 .
"[U+0900]")
594 (?
\x0904 .
"[U+0904]")
648 (?
\x093a .
"[U+093a]")
649 (?
\x093b .
"[U+093b]")
668 (?
\x094e .
"[U+094e]")
669 (?
\x094f .
"[U+094f]")
673 (?
\x0953 .
"[DEVANAGARI GRAVE ACCENT]")
674 (?
\x0954 .
"[DEVANAGARI ACUTE ACCENT]")
675 (?
\x0955 .
"[U+0955]")
676 (?
\x0956 .
"[U+0956]")
677 (?
\x0957 .
"[U+0957]")
702 (?
\x0970 .
"[U+0970]")
703 (?
\x0971 .
"[U+0971]")
704 (?
\x0972 .
"[U+0972]")
705 (?
\x0973 .
"[U+0973]")
706 (?
\x0974 .
"[U+0974]")
707 (?
\x0975 .
"[U+0975]")
708 (?
\x0976 .
"[U+0976]")
709 (?
\x0977 .
"[U+0977]")
710 (?
\x0978 .
"[U+0978]")
711 (?
\x0979 .
"[U+0979]")
712 (?
\x097a .
"[U+097a]")
713 (?
\x097b .
"[U+097b]")
714 (?
\x097c .
"[U+097c]")
715 (?
\x097d .
"[U+097d]")
716 (?
\x097e .
"[U+097e]")
717 (?
\x097f .
"[U+097f]")))
718 (ucs-bengali-to-is13194-alist nil
)
719 (ucs-assamese-to-is13194-alist nil
)
720 (ucs-gurmukhi-to-is13194-alist nil
)
721 (ucs-gujarati-to-is13194-alist nil
)
722 (ucs-oriya-to-is13194-alist nil
)
723 (ucs-tamil-to-is13194-alist nil
)
724 (ucs-telugu-to-is13194-alist nil
)
725 (ucs-malayalam-to-is13194-alist nil
)
726 (ucs-kannada-to-is13194-alist nil
))
727 (dolist (script '(devanagari bengali assamese gurmukhi gujarati
728 oriya tamil telugu malayalam kannada
))
729 (let ((hashtable (intern (concat "is13194-to-ucs-"
730 (symbol-name script
) "-hashtbl" )))
731 (regexp (intern (concat "is13194-to-ucs-"
732 (symbol-name script
) "-regexp"))))
733 (set hashtable
(make-hash-table :test
'equal
:size
128))
734 (dolist (x (eval (intern (concat "ucs-" (symbol-name script
)
735 "-to-is13194-alist"))))
736 (put-char-code-property (car x
) 'script script
)
737 (put-char-code-property (car x
) 'iscii
(cdr x
))
738 (puthash (cdr x
) (char-to-string (car x
)) (eval hashtable
)))
739 (set regexp
(indian-regexp-of-hashtbl-keys (eval hashtable
))))))
741 (defvar is13194-default-repertory
'devanagari
)
743 (defvar is13194-repertory-to-ucs-script
744 `((DEF ?
\x40 ,is13194-default-repertory
)
745 (RMN ?
\x41 ,is13194-default-repertory
)
746 (DEV ?
\x42 devanagari
)
753 (MLM ?
\x49 malayalam
)
755 (PNJ ?
\x4b gurmukhi
)))
757 ;; for guiding find-variable function.
758 (defvar is13194-to-ucs-devanagari-hashtbl nil
)
759 (defvar is13194-to-ucs-devanagari-regexp nil
)
760 (defvar is13194-to-ucs-bengali-hashtbl nil
)
761 (defvar is13194-to-ucs-bengali-regexp nil
)
762 (defvar is13194-to-ucs-assamese-hashtbl nil
)
763 (defvar is13194-to-ucs-assamese-regexp nil
)
764 (defvar is13194-to-ucs-gurmukhi-hashtbl nil
)
765 (defvar is13194-to-ucs-gurmukhi-regexp nil
)
766 (defvar is13194-to-ucs-gujarati-hashtbl nil
)
767 (defvar is13194-to-ucs-gujarati-regexp nil
)
768 (defvar is13194-to-ucs-oriya-hashtbl nil
)
769 (defvar is13194-to-ucs-oriya-regexp nil
)
770 (defvar is13194-to-ucs-tamil-hashtbl nil
)
771 (defvar is13194-to-ucs-tamil-regexp nil
)
772 (defvar is13194-to-ucs-telugu-hashtbl nil
)
773 (defvar is13194-to-ucs-telugu-regexp nil
)
774 (defvar is13194-to-ucs-malayalam-hashtbl nil
)
775 (defvar is13194-to-ucs-malayalam-regexp nil
)
776 (defvar is13194-to-ucs-kannada-hashtbl nil
)
777 (defvar is13194-to-ucs-kannada-regexp nil
)
779 (defvar ucs-to-is13194-regexp
780 ;; only Devanagari is supported now.
781 (concat "[" (char-to-string #x0900
)
782 "-" (char-to-string #x097f
) "]")
783 "Regexp that matches to conversion")
785 (defun ucs-to-iscii-region (from to
)
786 "Converts the indian UCS characters in the region to ISCII.
787 Returns new end position."
789 ;; only Devanagari is supported now.
792 (narrow-to-region from to
)
793 (goto-char (point-min))
794 (let* ((current-repertory is13194-default-repertory
))
795 (while (re-search-forward ucs-to-is13194-regexp nil t
)
797 (get-char-code-property (string-to-char (match-string 0))
801 (defun iscii-to-ucs-region (from to
)
802 "Converts the ISCII characters in the region to UCS.
803 Returns new end position."
805 ;; only Devanagari is supported now.
808 (narrow-to-region from to
)
809 (goto-char (point-min))
810 (let* ((current-repertory is13194-default-repertory
)
812 (intern (concat "is13194-to-ucs-"
813 (symbol-name current-repertory
) "-hashtbl")))
815 (intern (concat "is13194-to-ucs-"
816 (symbol-name current-repertory
) "-regexp")))
817 (re (eval current-regexp
))
818 (hash (eval current-hashtable
)))
819 (while (re-search-forward re nil t
)
820 (replace-match (gethash (match-string 0) hash
""))))
824 (defun indian-compose-region (from to
)
825 "Compose the region according to `composition-function-table'."
829 (let ((pos from
) newpos func
(max to
))
830 (narrow-to-region from to
)
832 (setq func
(aref composition-function-table
(char-after pos
)))
834 (setq newpos
(funcall func pos nil
)
835 pos
(if (and (integerp newpos
) (> newpos pos
))
837 (setq pos
(1+ pos
))))))))
840 (defun indian-compose-string (string)
843 (indian-compose-region (point-min) (point-max))
847 (defun in-is13194-post-read-conversion (len)
848 (let ((pos (point)) endpos
)
849 (setq endpos
(iscii-to-ucs-region pos
(+ pos len
)))
853 (defun in-is13194-pre-write-conversion (from to
)
854 (let ((buf (current-buffer)))
855 (set-buffer (generate-new-buffer " *temp*"))
858 (insert-buffer-substring buf from to
))
859 (ucs-to-iscii-region (point-min) (point-max))
865 ;;; Backward Compatibility support programs
867 ;; The following provides the conversion from old-implementation of
868 ;; Emacs Devanagari script to UCS.
870 (defconst indian-2-colum-to-ucs
872 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
890 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
908 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
926 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
943 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
957 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
969 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
983 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
995 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1011 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1021 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1036 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1046 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1055 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1062 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1070 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1075 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1092 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1101 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1118 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1136 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1154 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1161 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1179 ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f
1197 (defconst indian-2-column-to-ucs-regexp
1200 (put 'indian-2-column-to-ucs-chartable
'char-table-extra-slots
1)
1201 (defconst indian-2-column-to-ucs-chartable
1202 (let ((table (make-char-table 'indian-2-column-to-ucs-chartable
))
1204 (dolist (elt indian-2-colum-to-ucs
)
1205 (if (= (length (car elt
)) 1)
1206 (aset table
(aref (car elt
) 0) (cdr elt
))
1207 (setq alist
(cons elt alist
))))
1208 (set-char-table-extra-slot table
0 alist
)
1212 (defun indian-2-column-to-ucs-region (from to
)
1213 "Convert old Emacs Devanagari characters to UCS."
1218 (alist (char-table-extra-slot indian-2-column-to-ucs-chartable
0)))
1219 (narrow-to-region from to
)
1220 (decompose-region from to
)
1221 (goto-char (point-min))
1222 (while (re-search-forward indian-2-column-to-ucs-regexp nil t
)
1223 (let ((len (- (match-end 0) (match-beginning 0)))
1226 (setq subst
(aref indian-2-column-to-ucs-chartable
1227 (char-after (match-beginning 0))))
1228 (setq subst
(cdr (assoc (match-string 0) alist
))))
1229 (replace-match (if subst subst
"?"))))
1230 (indian-compose-region (point-min) (point-max))))))
1234 ;;; ind-util.el ends here