1 ;;; indian.el --- Indian languages support -*- coding: utf-8; -*-
3 ;; Copyright (C) 1997, 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H14PRO021
9 ;; Maintainer: Kenichi Handa <handa@m17n.org>
10 ;; KAWABATA, Taichi <kawabata@m17n.org>
11 ;; Keywords: multilingual, i18n, Indian
13 ;; This file is part of GNU Emacs.
15 ;; GNU Emacs is free software: you can redistribute it and/or modify
16 ;; it under the terms of the GNU General Public License as published by
17 ;; the Free Software Foundation, either version 3 of the License, or
18 ;; (at your option) any later version.
20 ;; GNU Emacs is distributed in the hope that it will be useful,
21 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ;; GNU General Public License for more details.
25 ;; You should have received a copy of the GNU General Public License
26 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
30 ;; This file contains definitions of Indian language environments, and
31 ;; setups for displaying the scrtipts used there.
35 (define-coding-system 'in-is13194-devanagari
36 "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)."
37 :coding-type
'iso-2022
39 :designation
[ascii indian-is13194 nil nil
]
40 :charset-list
'(ascii indian-is13194
)
41 :post-read-conversion
'in-is13194-post-read-conversion
42 :pre-write-conversion
'in-is13194-pre-write-conversion
)
44 (define-coding-system-alias 'devanagari
'in-is13194-devanagari
)
46 (set-language-info-alist
47 "Devanagari" '((charset unicode
)
49 (coding-priority utf-8
)
50 (input-method .
"dev-aiba")
52 Such languages using Devanagari script as Hindi and Marathi
53 are supported in this language environment."))
56 (set-language-info-alist
57 "Bengali" '((charset unicode
)
59 (coding-priority utf-8
)
60 (input-method .
"bengali-itrans")
62 Such languages using Bengali script as Bengali and Assamese
63 are supported in this language environment."))
66 (set-language-info-alist
67 "Punjabi" '((charset unicode
)
69 (coding-priority utf-8
)
70 (input-method .
"punjabi-itrans")
72 North Indian language Punjabi is supported in this language environment."))
75 (set-language-info-alist
76 "Gujarati" '((charset unicode
)
78 (coding-priority utf-8
)
79 (input-method .
"gujarati-itrans")
81 North Indian language Gujarati is supported in this language environment."))
84 (set-language-info-alist
85 "Oriya" '((charset unicode
)
87 (coding-priority utf-8
)
88 (input-method .
"oriya-itrans")
90 Such languages using Oriya script as Oriya, Khonti, and Santali
91 are supported in this language environment."))
94 (set-language-info-alist
95 "Tamil" '((charset unicode
)
97 (coding-priority utf-8
)
98 (input-method .
"tamil-itrans")
100 South Indian Language Tamil is supported in this language environment."))
103 (set-language-info-alist
104 "Telugu" '((charset unicode
)
105 (coding-system utf-8
)
106 (coding-priority utf-8
)
107 (input-method .
"telugu-itrans")
109 South Indian Language Telugu is supported in this language environment."))
112 (set-language-info-alist
113 "Kannada" '((charset unicode
)
114 (coding-system mule-utf-8
)
115 (coding-priority mule-utf-8
)
116 (input-method .
"kannada-itrans")
117 (sample-text .
"Kannada (ಕನ್ನಡ) ನಮಸ್ಕಾರ")
119 Kannada language and script is supported in this language
123 (set-language-info-alist
124 "Malayalam" '((charset unicode
)
125 (coding-system utf-8
)
126 (coding-priority utf-8
)
127 (input-method .
"malayalam-itrans")
129 South Indian language Malayalam is supported in this language environment."))
132 ;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is
133 ;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING).
135 (defun indian-compose-regexp (regexp table
)
136 (let ((case-fold-search nil
))
138 (setq regexp
(replace-regexp-in-string (car elt
) (cdr elt
) regexp t t
)))
141 (defconst devanagari-composable-pattern
143 "\\([अ-औॠॡ][ँं]?\\)\\|[ः।]"
145 "\\(?:\\(?:[क-हक़-य़]्\\)?\\(?:[क-हक़-य़]्\\)?\\(?:[क-हक़-य़]्\\)?[क-हक़-य़]्\\)?"
146 "[क-हक़-य़]\\(?:्\\|[ा-्ॢॣ]?[ंँ]?\\)?"
148 "Regexp matching a composable sequence of Devanagari characters.")
150 (defconst tamil-composable-pattern
153 "[ஂஃ]\\|" ;; vowel modifier considered independent
154 "\\(\\(?:\\(?:க்ஷ\\)\\|[க-ஹ]\\)[்ா-ௌ]?\\)\\|"
156 "Regexp matching a composable sequence of Tamil characters.")
158 (defconst kannada-composable-pattern
160 "\\([ಂ-ಔೠಌ]\\)\\|[ಃ]"
162 "\\(?:\\(?:[ಕ-ಹ]್\\)?\\(?:[ಕ-ಹ]್\\)?\\(?:[ಕ-ಹ]್\\)?[ಕ-ಹ]್\\)?"
163 "[ಕ-ಹ]\\(?:್\\|[ಾ-್ೕೃ]?\\)?"
165 "Regexp matching a composable sequence of Kannada characters.")
167 (defconst malayalam-composable-pattern
168 (let ((table '(("V" .
"[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel
169 ("C" .
"[\u0D15-\u0D39]") ; consonant
170 ("m" .
"[\u0D46-\u0D48\u0D4A-\u0D4C]") ; prebase matra
171 ("p" .
"[\u0D3E-\u0D44\u0D57]") ; postname matra
172 ("b" .
"[\u0D62-\u0D63]") ; belowbase matra
173 ("a" .
"[\u0D02-\u0D03]") ; abovebase sign
174 ("H" .
"്") ; virama sign
175 ("N" .
"\u200D") ; ZWJ
176 ("J" .
"\u200C") ; ZWNJ
177 ("X" .
"[\u0D00-\u0D7F]")))) ; all coverage
178 (indian-compose-regexp
180 ;; consonant-based syllables
181 "\\(CJ?HJ?\\)*C\\(H[NJ]?\\|m?b?p?a?\\)\\|"
182 ;; syllables with an independent vowel
183 "V\\(J?HC\\)?m?b?p?n?a?\\|"
184 ;; special consonant form
186 ;; any other singleton characters
189 "Regexp matching a composable sequence of Malayalam characters.")
191 (let ((script-regexp-alist
192 `((devanagari .
"[\x900-\x97F\x200C\x200D]+")
193 (bengali .
"[\x980-\x9FF\x200C\x200D]+")
194 (gurmukhi .
"[\xA00-\xA7F\x200C\x200D]+")
195 (gujarati .
"[\xA80-\xAFF\x200C\x200D]+")
196 (oriya .
"[\xB00-\xB7F\x200C\x200D]+")
197 (tamil .
"[\xB80-\xBFF\x200C\x200D]+")
198 (telugu .
"[\xC00-\xC7F\x200C\x200D]+")
199 (kannada .
"[\xC80-\xCFF\x200C\x200D]+")
200 (malayalam .
,malayalam-composable-pattern
))))
203 (let ((slot (assq val script-regexp-alist
)))
205 (set-char-table-range
206 composition-function-table key
207 (list (vector (cdr slot
) 0 'font-shape-gstring
))))))
212 ;; arch-tag: 83aa8fc7-7ee2-4364-a6e5-498f5e3b8c2f
213 ;;; indian.el ends here