1 ;;; indian.el --- Support for Indian Languages
3 ;; Copyright (C) 1995 Free Software Foundation, Inc.
5 ;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
7 ;; Keywords: multilingual, Indian
9 ;; This file is part of GNU Emacs.
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
29 ;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
31 ;; For Indian, the character set IS 13194 is supported.
33 ;; IS 13194 does not specifically assign glyphs for each characters.
34 ;; Following code is not specific to each Indian language.
36 ;; Eventually, this code will support generic information about
49 ;; In this file, charsets other than charset-ascii and charset-indian-is13194
50 ;; should not be used except in the comment.
54 ;; Followings are what you see when you refer to the Emacs
55 ;; representations of IS 13194 charcters. However, this is merely
56 ;; tentative apperance, and you must convert them by
57 ;; indian-to-xxxxxx(specific script) function to use them.
58 ;; Devanagari is not an exception of this rule.
60 ;; 0xa0 //\e(5!"#$%&'()*+,-./\e(B
61 ;; 0xb0 \e(50123456789:;<=>?\e(B
62 ;; 0xc0 \e(5@ABCDEFGHIJKLMNO\e(B
63 ;; 0xd0 \e(5PQRSTUVWXYZ[\]^_\e(B
64 ;; 0xe0 \e(5`abcdefghijklmno\e(B
65 ;; 0xf0 \e(5pqrstuvwxyz{|}~\e(B//
67 ;; Note - In IS 13194, several symbols are obtained by special
68 ;; combination of several characters and Nukta sign.
70 ;; Sanskrit Vowel R -> \e(5*\e(B + \e(5i\e(B
71 ;; Sanskrit Vowel L -> \e(5&\e(B + \e(5i\e(B
72 ;; Sanskrit Vowel LL -> \e(5'\e(B + \e(5i\e(B
73 ;; Sanskrit Avagrah -> \e(5j\e(B + \e(5i\e(B
74 ;; OM -> \e(5!\e(B + \e(5i\e(B
76 ;; Note - IS 13194 defines ATR(0xEF) and EXT(0xF0), but they are
79 ;; Note - the above characters DO NOT represent any script. For
80 ;; example, if you want to obtain Devanagari character, you must do
81 ;; something like the following.
83 ;; (char-to-string (indian-to-devanagari ?\e(5$\e(B))
87 '(;; chars syntax category
88 ("\e(5!"#\e(B" "w
" ?7) ; vowel-modifying diacritical mark
89 ; chandrabindu, anuswar, visarga
90 ("\e(5$
\e(B-\e(52\e(B" "w
" ?5) ; independent vowel
91 ("\e(53\e(B-\e(5X\e(B" "w
" ?0) ; consonant
92 ("\e(5Z\e(B-\e(5g\e(B" "w
" ?8) ; matra
93 ("\e(5q\e(B-\e(5z\e(B" "w
" ?6) ; digit
95 elm chars len syntax category to ch i)
97 (setq elm (car deflist))
104 (if (= (aref chars i) ?-)
107 (setq ch (sref chars i)
110 (modify-syntax-entry ch syntax)
111 (modify-category-entry ch category)
113 (setq i (+ i (char-bytes to))))
114 (setq deflist (cdr deflist))))
119 ;; ITRANS is one of the most popular method to exchange indian scripts
120 ;; electronically. Here is the table to convert between ITRANS code and
123 (defvar indian-itrans-consonant-alist
131 ("chh
" . "\e(59\e(B")
145 ("nh" .
"\e(5G\e(B") ; For transcription of non-Devanagari Languages.
152 ("yh" .
"\e(5N\e(B") ; For transcription of non-Devanagari Languages.
154 ("rh" .
"\e(5P\e(B") ; For transcription of non-Devanagari Languages.
158 ("shh" .
"\e(5V\e(B")
163 ("ksh" .
"\e$(5!3!h!V\e(B")
164 ("GY" .
"***GY***") ; Must check out later.
165 ;; special consonants
171 (".D" .
"\e(5?i\e(B")
172 (".Dh" .
"\e(5@i\e(B")
175 (defvar indian-itrans-vowel-sign-alist
177 ;; Special treatment unique to IS 13194 Transliteration
180 ;; Matra (Vowel Sign)
189 ("R^i" .
"\e(5_\e(B") ; These must be checked out later.
190 ("R^I" .
"\e(5_i\e(B")
191 ("L^i" .
"\e(5[i\e(B")
192 ("L^I" .
"\e(5\i\e(B")
193 ("E" .
"\e(5`\e(B") ; For transcription of non-Devanangri Languages.
196 ;; ("e.c" . "\e(5c\e(B") ; Tentatively suppressed.
197 ("O" .
"\e(5d\e(B") ; For transcription of non-Devanagari Languages.
200 ;; ("o.c" . "\e(5g\e(B") ; Tentatively suppressed.
204 ;; Independent vowels and other signs.
207 (defvar indian-itrans-other-letters-alist
218 ("R^i" .
"\e(5*\e(B")
219 ("R^I" .
"\e(5*i\e(B")
220 ("L^i" .
"\e(5&i\e(B")
221 ("L^I" .
"\e(5'i\e(B")
222 ("E" .
"\e(5+\e(B") ; For transcription of non-Devanagari Languages.
225 ;; ("e.c" . "\e(5.\e(B") ; Candra E
226 ("O" .
"\e(5/\e(B") ; For transcription of non-Devanagari Languages.
229 ;; ("o.c" . "\e(52\e(B") ; Candra O
232 ("AUM" .
"\e(5!i\e(B")
233 ("OM" .
"\e(5!i\e(B")
234 (".r" .
"\e(5Oh\e(B")
237 (".h
" . "\e(5h\e(B") ; Halant
239 (".a
" . "\e(5ji\e(B") ; Avagrah
252 ;; Regular expression matching single Indian character represented
255 (defvar indian-itrans-regexp
256 (let ((consonant "\\([cs]hh?\\)\\|[kgjTDnpbyr]h?\\|\\(N\\^?\\)\\|\\(jN\\)\\|[mvqKGzfs]\\|\\(ld?\\)\\|\\(ksh\\)\\|\\(GY\\)\\|\\(\\.Dh?\\)")
257 (vowel "\\(a[aiu]\\)\\|\\(ii\\)\\|\\(uu\\)\\|\\([RL]\\^[iI]\\)\\|[AIEOeoaiu]")
258 (misc "[MH0-9]\\|\\(AUM\\)\\|\\(OM\\)\\|\\(\\.[rnNh\\.a]\\)")
259 (lpre "\\(") (rpre "\\)") (orre "\\|"))
260 (concat lpre misc rpre orre
261 lpre lpre consonant rpre "?" lpre vowel rpre rpre orre
262 lpre consonant rpre )))
265 ;; Regular expression matching single ITRANS unit for IS 13194 characters.
268 (defvar itrans-indian-regexp
269 (let ((vowel "[\e(5$\e(B-\e(52\e(B]")
270 (consonant "[\e(53\e(B-\e(5X\e(B]")
271 (matra "[\e(5Z\e(B-\e(5g\e(B]")
272 (misc "[\e(5q\e(B-\e(5z\e(B]")
273 (lpre "\\(") (rpre "\\)") (orre "\\|"))
275 lpre consonant matra "?" rpre orre
279 ;; IS13194 - ITRANS conversion table for string matching above regexp.
282 (defvar indian-itrans-alist
283 (let ((cl indian-itrans-consonant-alist)
284 (ml indian-itrans-other-letters-alist) rules)
286 (let ((vl indian-itrans-vowel-sign-alist))
289 (cons (cons (concat (car (car cl)) (car (car vl)))
290 (concat (cdr (car cl)) (cdr (car vl))))
295 (setq rules (cons (cons (car (car ml))
302 ;; Utility program to convert from ITRANS to IS 13194 in specified region.
305 (defun indian-decode-itrans-region (from to)
306 "Convert `ITRANS' mnemonics of the current region to Indian characters.
307 When called from a program, expects two arguments,
308 positions (integers or markers) specifying the stretch of the region."
311 (narrow-to-region from to)
312 (goto-char (point-min))
313 (while (re-search-forward indian-itrans-regexp nil t)
314 (let* ((itrans (buffer-substring (match-beginning 0) (match-end 0)))
315 (ch (cdr (assoc itrans indian-itrans-alist))))
318 (delete-region (match-beginning 0) (match-end 0))
320 (goto-char (point-min))
321 (while (re-search-forward "\\(\e(5h\e(B\\)[^\\c0]" nil t)
322 (delete-region (match-beginning 1) (match-end 1)))))
325 ;; Utility program to convert from IS 13194 to ITRANS in specified region.
328 (defun indian-encode-itrans-region (from to)
329 "Convert indian region to ITRANS mnemonics."
332 (narrow-to-region from to)
333 (goto-char (point-min))
334 (while (re-search-forward itrans-indian-regexp nil t)
335 (let* ((indian (buffer-substring (match-beginning 0) (match-end 0)))
336 (ch (car (rassoc indian indian-itrans-alist))))
339 (delete-region (match-beginning 0) (match-end 0))
341 (goto-char (point-min))))
343 ;;; indian.el ends here