1 ;;; indian.el --- Indian languages support -*- coding: utf-8; -*-
3 ;; Copyright (C) 1997, 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H14PRO021
9 ;; Maintainer: Kenichi Handa <handa@m17n.org>
10 ;; KAWABATA, Taichi <kawabata@m17n.org>
11 ;; Keywords: multilingual, i18n, Indian
13 ;; This file is part of GNU Emacs.
15 ;; GNU Emacs is free software: you can redistribute it and/or modify
16 ;; it under the terms of the GNU General Public License as published by
17 ;; the Free Software Foundation, either version 3 of the License, or
18 ;; (at your option) any later version.
20 ;; GNU Emacs is distributed in the hope that it will be useful,
21 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ;; GNU General Public License for more details.
25 ;; You should have received a copy of the GNU General Public License
26 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
30 ;; This file contains definitions of Indian language environments, and
31 ;; setups for displaying the scrtipts used there.
35 (define-coding-system 'in-is13194-devanagari
36 "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)."
37 :coding-type
'iso-2022
39 :designation
[ascii indian-is13194 nil nil
]
40 :charset-list
'(ascii indian-is13194
)
41 :post-read-conversion
'in-is13194-post-read-conversion
42 :pre-write-conversion
'in-is13194-pre-write-conversion
)
44 (define-coding-system-alias 'devanagari
'in-is13194-devanagari
)
46 (set-language-info-alist
47 "Devanagari" '((charset unicode
)
49 (coding-priority utf-8
)
50 (input-method .
"dev-aiba")
52 Such languages using Devanagari script as Hindi and Marathi
53 are supported in this language environment."))
56 (set-language-info-alist
57 "Bengali" '((charset unicode
)
59 (coding-priority utf-8
)
60 (input-method .
"bengali-itrans")
62 Such languages using Bengali script as Bengali and Assamese
63 are supported in this language environment."))
66 (set-language-info-alist
67 "Punjabi" '((charset unicode
)
69 (coding-priority utf-8
)
70 (input-method .
"punjabi-itrans")
72 North Indian language Punjabi is supported in this language environment."))
75 (set-language-info-alist
76 "Gujarati" '((charset unicode
)
78 (coding-priority utf-8
)
79 (input-method .
"gujarati-itrans")
81 North Indian language Gujarati is supported in this language environment."))
84 (set-language-info-alist
85 "Oriya" '((charset unicode
)
87 (coding-priority utf-8
)
88 (input-method .
"oriya-itrans")
90 Such languages using Oriya script as Oriya, Khonti, and Santali
91 are supported in this language environment."))
94 (set-language-info-alist
95 "Tamil" '((charset unicode
)
97 (coding-priority utf-8
)
98 (input-method .
"tamil-itrans")
100 South Indian Language Tamil is supported in this language environment."))
103 (set-language-info-alist
104 "Telugu" '((charset unicode
)
105 (coding-system utf-8
)
106 (coding-priority utf-8
)
107 (input-method .
"telugu-itrans")
109 South Indian Language Telugu is supported in this language environment."))
112 (set-language-info-alist
113 "Kannada" '((charset unicode
)
114 (coding-system mule-utf-8
)
115 (coding-priority mule-utf-8
)
116 (input-method .
"kannada-itrans")
117 (sample-text .
"Kannada (ಕನ್ನಡ) ನಮಸ್ಕಾರ")
119 Kannada language and script is supported in this language
123 (set-language-info-alist
124 "Malayalam" '((charset unicode
)
125 (coding-system utf-8
)
126 (coding-priority utf-8
)
127 (input-method .
"malayalam-itrans")
129 South Indian language Malayalam is supported in this language environment."))
132 ;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is
133 ;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING).
135 (defun indian-compose-regexp (regexp table
)
136 (let ((case-fold-search nil
))
138 (setq regexp
(replace-regexp-in-string (car elt
) (cdr elt
) regexp t t
)))
141 (defconst devanagari-composable-pattern
143 '(("a" .
"[\u0900-\u0902]") ; vowel modifier (above)
144 ("A" .
"\u0903") ; vowel modifier (post)
145 ("V" .
"[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel
146 ("C" .
"[\u0915-\u0939\u0958-\u095F\u0979-\u097F]") ; consonant
147 ("R" .
"\u0930") ; RA
148 ("n" .
"\u093C") ; NUKTA
149 ("v" .
"[\u093E-\u094C\u094E\u0955\u0962-\u0963]") ; vowel sign
150 ("H" .
"\u094D") ; HALANT
151 ("s" .
"[\u0951-\u0952]") ; stress sign
152 ("t" .
"[\u0953-\u0954]") ; accent
153 ("N" .
"\u200C") ; ZWNJ
154 ("J" .
"\u200D") ; ZWJ
155 ("X" .
"[\u0900-\u097F]")))) ; all coverage
156 (indian-compose-regexp
158 ;; syllables with an independent vowel, or
159 "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?s?t?A?\\|"
160 ;; consonant-based syllables, or
161 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|"
162 ;; special consonant form, or
164 ;; any other singleton characters
167 "Regexp matching a composable sequence of Devanagari characters.")
169 (defconst bengali-composable-pattern
171 '(("a" .
"\u0981") ; SIGN CANDRABINDU
172 ("A" .
"[\u0982-\u0983]") ; SIGN ANUSVARA .. VISARGA
173 ("V" .
"[\u0985-\u0994\u09E0-\u09E1]") ; independent vowel
174 ("C" .
"[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant
175 ("B" .
"[\u09AC\u09AF-\u09B0\u09F0]") ; BA, YA, RA
176 ("R" .
"[\u09B0\u09F0]") ; RA
177 ("n" .
"\u09BC") ; NUKTA
178 ("v" .
"[\u09BE-\u09CC\u09D7\u09E2-\u09E3]") ; vowel sign
179 ("H" .
"\u09CD") ; HALANT
180 ("T" .
"\u09CE") ; KHANDA TA
181 ("N" .
"\u200C") ; ZWNJ
182 ("J" .
"\u200D") ; ZWJ
183 ("X" .
"[\u0980-\u09FF]")))) ; all coverage
184 (indian-compose-regexp
186 ;; syllables with an independent vowel, or
187 "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
188 ;; consonant-based syllables, or
189 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|"
190 ;; another syllables with an independent vowel, or
192 ;; special consonant form, or
194 ;; any other singleton characters
197 "Regexp matching a composable sequence of Bengali characters.")
199 (defconst gurmukhi-composable-pattern
201 '(("a" .
"[\u0A01-\u0A02\u0A70]") ; SIGN ADAK BINDI .. BINDI, TIPPI
202 ("A" .
"\u0A03") ; SIGN VISARGA
203 ("V" .
"[\u0A05-\u0A14]") ; independent vowel
204 ("C" .
"[\u0A15-\u0A39\u0A59-\u0A5E]") ; consonant
205 ("Y" .
"[\u0A2F-u0A30\u0A35\u0A39]") ; YA, RA, VA, HA
206 ("n" .
"\u0A3C") ; NUKTA
207 ("v" .
"[\u0A3E-\u0A4C]") ; vowel sign
208 ("H" .
"\u0A4D") ; VIRAMA
209 ("N" .
"\u200C") ; ZWNJ
210 ("J" .
"\u200D") ; ZWJ
211 ("X" .
"[\u0A00-\u0A7F]")))) ; all coverage
212 (indian-compose-regexp
214 ;; consonant-based syllables, or
215 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
216 ;; syllables with an independent vowel, or
217 "Vn?\\(?:J?HY\\)?v*n?a?A?\\|"
218 ;; special consonant form, or
220 ;; any other singleton characters
223 "Regexp matching a composable sequence of Gurmukhi characters.")
225 (defconst gujarati-composable-pattern
227 '(("a" .
"[\u0A81-\u0A82]") ; SIGN CANDRABINDU .. ANUSVARA
228 ("A" .
"\u0A83") ; SIGN VISARGA
229 ("V" .
"[\u0A85-\u0A94\u0AE0-\u0AE1]") ; independent vowel
230 ("C" .
"[\u0A95-\u0AB9]") ; consonant
231 ("R" .
"\u0AB0") ; RA
232 ("n" .
"\u0ABC") ; NUKTA
233 ("v" .
"[\u0ABE-\u0ACC\u0AE2-\u0AE3]") ; vowel sign
234 ("H" .
"\u0ACD") ; VIRAMA
235 ("N" .
"\u200C") ; ZWNJ
236 ("J" .
"\u200D") ; ZWJ
237 ("X" .
"[\u0A80-\u0AFF]")))) ; all coverage
238 (indian-compose-regexp
240 ;; syllables with an independent vowel, or
241 "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?A?\\|"
242 ;; consonant-based syllables, or
243 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
244 ;; special consonant form, or
246 ;; any other singleton characters
249 "Regexp matching a composable sequence of Gujarati characters.")
251 (defconst oriya-composable-pattern
253 '(("a" .
"\u0B01") ; SIGN CANDRABINDU
254 ("A" .
"[\u0B02-\u0B03]") ; SIGN ANUSVARA .. VISARGA
255 ("V" .
"[\u0B05-\u0B14\u0B60-\u0B61]") ; independent vowel
256 ("C" .
"[\u0B15-\u0B39\u0B5C-\u0B5D\u0B71]") ; consonant
257 ("B" .
"[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23-\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38-\u0B39]") ; consonant with below form
258 ("R" .
"\u0B30") ; RA
259 ("n" .
"\u0B3C") ; NUKTA
260 ("v" .
"[\u0B3E-\u0B4C\u0B56-\u0B57\u0B62-\u0B63]") ; vowel sign
261 ("H" .
"\u0B4D") ; VIRAMA
262 ("N" .
"\u200C") ; ZWNJ
263 ("J" .
"\u200D") ; ZWJ
264 ("X" .
"[\u0B00-\u0B7F]")))) ; all coverage
265 (indian-compose-regexp
267 ;; syllables with an independent vowel, or
268 "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
269 ;; consonant-based syllables, or
270 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
271 ;; special consonant form, or
273 ;; any other singleton characters
276 "Regexp matching a composable sequence of Oriya characters.")
278 (defconst tamil-composable-pattern
280 '(("a" .
"\u0B82") ; SIGN ANUSVARA
281 ("V" .
"[\u0B85-\u0B94]") ; independent vowel
282 ("C" .
"[\u0B95-\u0BB9]") ; consonant
283 ("v" .
"[\u0BBE-\u0BCC\u0BD7]") ; vowel sign
284 ("H" .
"\u0BCD") ; VIRAMA
285 ("N" .
"\u200C") ; ZWNJ
286 ("J" .
"\u200D") ; ZWJ
287 ("X" .
"[\u0B80-\u0BFF]")))) ; all coverage
288 (indian-compose-regexp
290 ;; consonant-based syllables, or
291 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|"
292 ;; syllables with an independent vowel, or
294 ;; any other singleton characters
297 "Regexp matching a composable sequence of Tamil characters.")
299 (defconst telugu-composable-pattern
301 '(("a" .
"[\u0C01-\u0C03]") ; SIGN CANDRABINDU .. VISARGA
302 ("V" .
"[\u0C05-\u0C14\u0C60-\u0C61]") ; independent vowel
303 ("C" .
"[\u0C15-\u0C39\u0C58-\u0C59]") ; consonant
304 ("v" .
"[\u0C3E-\u0C4C\u0C55-\u0C56\u0C62-\u0C63]") ; vowel sign
305 ("H" .
"\u0C4D") ; VIRAMA
306 ("N" .
"\u200C") ; ZWNJ
307 ("J" .
"\u200D") ; ZWJ
308 ("X" .
"[\u0C00-\u0C7F]")))) ; all coverage
309 (indian-compose-regexp
311 ;; consonant-based syllables, or
312 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|"
313 ;; syllables with an independent vowel, or
314 "V\\(?:J?HC\\)?v*a?\\|"
315 ;; special consonant form, or
317 ;; any other singleton characters
320 "Regexp matching a composable sequence of Telugu characters.")
322 (defconst kannada-composable-pattern
324 '(("A" .
"[\u0C82-\u0C83]") ; SIGN ANUSVARA .. VISARGA
325 ("V" .
"[\u0C85-\u0C94\u0CE0-\u0CE1]") ; independent vowel
326 ("C" .
"[\u0C95-\u0CB9\u0CDE]") ; consonant
327 ("R" .
"\u0CB0") ; RA
328 ("n" .
"\u0CBC") ; NUKTA
329 ("v" .
"[\u0CBE-\u0CCC\u0CD5-\u0CD6\u0CE2-\u0CE3]") ; vowel sign
330 ("H" .
"\u0CCD") ; VIRAMA
331 ("N" .
"\u200C") ; ZWNJ
332 ("J" .
"\u200D") ; ZWJ
333 ("X" .
"[\u0C80-\u0CFF]")))) ; all coverage
334 (indian-compose-regexp
336 ;; syllables with an independent vowel, or
337 "\\(?:RH\\)?Vn?\\(?:J?HC\\)?v?A?\\|"
338 ;; consonant-based syllables, or
339 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?A?\\)\\|"
340 ;; special consonant form, or
342 ;; any other singleton characters
345 "Regexp matching a composable sequence of Kannada characters.")
347 (defconst malayalam-composable-pattern
349 '(("A" .
"[\u0D02-\u0D03]") ; SIGN ANUSVARA .. VISARGA
350 ("V" .
"[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel
351 ("C" .
"[\u0D15-\u0D39]") ; consonant
352 ("Y" .
"[\u0D2F-\u0D30\u0D32\u0D35]") ; YA, RA, LA, VA
353 ("v" .
"[\u0D3E-\u0D4C\u0D57\u0D62-\u0D63]") ; postbase matra
354 ("H" .
"\u0D4D") ; SIGN VIRAMA
355 ("N" .
"\u200C") ; ZWNJ
356 ("J" .
"\u200D") ; ZWJ
357 ("X" .
"[\u0D00-\u0D7F]")))) ; all coverage
358 (indian-compose-regexp
360 ;; consonant-based syllables, or
361 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v?A?\\)\\|"
362 ;; syllables with an independent vowel, or
363 "V\\(?:J?HY\\)?v*?A?\\|"
364 ;; special consonant form, or
366 ;; any other singleton characters
369 "Regexp matching a composable sequence of Malayalam characters.")
371 (let ((script-regexp-alist
372 `((devanagari .
,devanagari-composable-pattern
)
373 (bengali .
,bengali-composable-pattern
)
374 (gurmukhi .
,gurmukhi-composable-pattern
)
375 (gujarati .
,gujarati-composable-pattern
)
376 (oriya .
,oriya-composable-pattern
)
377 (tamil .
,tamil-composable-pattern
)
378 (telugu .
,telugu-composable-pattern
)
379 (kannada .
,kannada-composable-pattern
)
380 (malayalam .
,malayalam-composable-pattern
))))
383 (let ((slot (assq val script-regexp-alist
)))
385 (set-char-table-range
386 composition-function-table key
387 (list (vector (cdr slot
) 0 'font-shape-gstring
))))))
392 ;; arch-tag: 83aa8fc7-7ee2-4364-a6e5-498f5e3b8c2f
393 ;;; indian.el ends here