1 ;;; indian.el --- Support for Indian Languages -*- coding: iso-2022-7bit; -*-
3 ;; Copyright (C) 1995 Free Software Foundation, Inc.
5 ;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
7 ;; Keywords: multilingual, Indian
9 ;; This file is part of GNU Emacs.
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
29 ;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
31 ;; For Indian, the character set IS 13194 is supported.
33 ;; IS 13194 does not specifically assign glyphs for each characters.
34 ;; Following code is not specific to each Indian language.
36 ;; Eventually, this code will support generic information about
49 ;; In this file, charsets other than charset-ascii and charset-indian-is13194
50 ;; should not be used except in the comment.
54 ;; Followings are what you see when you refer to the Emacs
55 ;; representations of IS 13194 charcters. However, this is merely
56 ;; tentative apperance, and you must convert them by
57 ;; indian-to-xxxxxx(specific script) function to use them.
58 ;; Devanagari is not an exception of this rule.
60 ;; 0xa0 //\e(5!"#$%&'()*+,-./\e(B
61 ;; 0xb0 \e(50123456789:;<=>?\e(B
62 ;; 0xc0 \e(5@ABCDEFGHIJKLMNO\e(B
63 ;; 0xd0 \e(5PQRSTUVWXYZ[\]^_\e(B
64 ;; 0xe0 \e(5`abcdefghijklmno\e(B
65 ;; 0xf0 \e(5pqrstuvwxyz{|}~\e(B//
67 ;; Note - In IS 13194, several symbols are obtained by special
68 ;; combination of several characters and Nukta sign.
70 ;; Sanskrit Vowel R -> \e(5*\e(B + \e(5i\e(B
71 ;; Sanskrit Vowel L -> \e(5&\e(B + \e(5i\e(B
72 ;; Sanskrit Vowel LL -> \e(5'\e(B + \e(5i\e(B
73 ;; Sanskrit Avagrah -> \e(5j\e(B + \e(5i\e(B
74 ;; OM -> \e(5!\e(B + \e(5i\e(B
76 ;; Note - IS 13194 defines ATR(0xEF) and EXT(0xF0), but they are
79 ;; Note - the above characters DO NOT represent any script. For
80 ;; example, if you want to obtain Devanagari character, you must do
81 ;; something like the following.
83 ;; (char-to-string (indian-to-devanagari ?\e(5$\e(B))
88 ;; ITRANS is one of the most popular method to exchange indian scripts
89 ;; electronically. Here is the table to convert between ITRANS code and
92 (defvar indian-itrans-consonant-alist
100 ("chh" .
"\e(59\e(B")
114 ("nh" .
"\e(5G\e(B") ; For transcription of non-Devanagari Languages.
121 ("yh" .
"\e(5N\e(B") ; For transcription of non-Devanagari Languages.
123 ("rh" .
"\e(5P\e(B") ; For transcription of non-Devanagari Languages.
127 ("shh" .
"\e(5V\e(B")
132 ("ksh" .
"\e$(5!3!h!V\e(B")
133 ("GY" .
"***GY***") ; Must check out later.
134 ;; special consonants
140 (".D" .
"\e(5?i\e(B")
141 (".Dh" .
"\e(5@i\e(B")
144 (defvar indian-itrans-vowel-sign-alist
146 ;; Special treatment unique to IS 13194 Transliteration
149 ;; Matra (Vowel Sign)
158 ("R^i" .
"\e(5_\e(B") ; These must be checked out later.
159 ("R^I" .
"\e(5_i\e(B")
160 ("L^i" .
"\e(5[i\e(B")
161 ("L^I" .
"\e(5\i\e(B")
162 ("E" .
"\e(5`\e(B") ; For transcription of non-Devanangri Languages.
165 ;; ("e.c" . "\e(5c\e(B") ; Tentatively suppressed.
166 ("O" .
"\e(5d\e(B") ; For transcription of non-Devanagari Languages.
169 ;; ("o.c" . "\e(5g\e(B") ; Tentatively suppressed.
173 ;; Independent vowels and other signs.
176 (defvar indian-itrans-other-letters-alist
187 ("R^i" .
"\e(5*\e(B")
188 ("R^I" .
"\e(5*i\e(B")
189 ("L^i" .
"\e(5&i\e(B")
190 ("L^I" .
"\e(5'i\e(B")
191 ("E" .
"\e(5+\e(B") ; For transcription of non-Devanagari Languages.
194 ;; ("e.c" . "\e(5.\e(B") ; Candra E
195 ("O" .
"\e(5/\e(B") ; For transcription of non-Devanagari Languages.
198 ;; ("o.c" . "\e(52\e(B") ; Candra O
201 ("AUM" .
"\e(5!i\e(B")
202 ("OM" .
"\e(5!i\e(B")
203 (".r" .
"\e(5Oh\e(B")
206 (".h
" . "\e(5h\e(B") ; Halant
208 (".a
" . "\e(5ji\e(B") ; Avagrah
221 ;; Regular expression matching single Indian character represented
224 (defvar indian-itrans-regexp
225 (let ((consonant "\\([cs]hh?\\)\\|[kgjTDnpbyr]h?\\|\\(N\\^?\\)\\|\\(jN\\)\\|[mvqKGzfs]\\|\\(ld?\\)\\|\\(ksh\\)\\|\\(GY\\)\\|\\(\\.Dh?\\)")
226 (vowel "\\(a[aiu]\\)\\|\\(ii\\)\\|\\(uu\\)\\|\\([RL]\\^[iI]\\)\\|[AIEOeoaiu]")
227 (misc "[MH0-9]\\|\\(AUM\\)\\|\\(OM\\)\\|\\(\\.[rnNh\\.a]\\)")
228 (lpre "\\(") (rpre "\\)") (orre "\\|"))
229 (concat lpre misc rpre orre
230 lpre lpre consonant rpre "?" lpre vowel rpre rpre orre
231 lpre consonant rpre )))
234 ;; Regular expression matching single ITRANS unit for IS 13194 characters.
237 (defvar itrans-indian-regexp
238 (let ((vowel "[\e(5$\e(B-\e(52\e(B]")
239 (consonant "[\e(53\e(B-\e(5X\e(B]")
240 (matra "[\e(5Z\e(B-\e(5g\e(B]")
241 (misc "[\e(5q\e(B-\e(5z\e(B]")
242 (lpre "\\(") (rpre "\\)") (orre "\\|"))
244 lpre consonant matra "?" rpre orre
248 ;; IS13194 - ITRANS conversion table for string matching above regexp.
251 (defvar indian-itrans-alist
252 (let ((cl indian-itrans-consonant-alist)
253 (ml indian-itrans-other-letters-alist) rules)
255 (let ((vl indian-itrans-vowel-sign-alist))
258 (cons (cons (concat (car (car cl)) (car (car vl)))
259 (concat (cdr (car cl)) (cdr (car vl))))
264 (setq rules (cons (cons (car (car ml))
271 ;; Utility program to convert from ITRANS to IS 13194 in specified region.
274 (defun indian-decode-itrans-region (from to)
275 "Convert `ITRANS' mnemonics of the current region to Indian characters.
276 When called from a program, expects two arguments,
277 positions (integers or markers) specifying the stretch of the region."
280 (narrow-to-region from to)
281 (goto-char (point-min))
282 (while (re-search-forward indian-itrans-regexp nil t)
283 (let* ((itrans (buffer-substring (match-beginning 0) (match-end 0)))
284 (ch (cdr (assoc itrans indian-itrans-alist))))
287 (delete-region (match-beginning 0) (match-end 0))
289 (goto-char (point-min))
290 (while (re-search-forward "\\(\e(5h\e(B\\)[^\\c0]" nil t)
291 (delete-region (match-beginning 1) (match-end 1)))))
294 ;; Utility program to convert from IS 13194 to ITRANS in specified region.
297 (defun indian-encode-itrans-region (from to)
298 "Convert indian region to ITRANS mnemonics."
301 (narrow-to-region from to)
302 (goto-char (point-min))
303 (while (re-search-forward itrans-indian-regexp nil t)
304 (let* ((indian (buffer-substring (match-beginning 0) (match-end 0)))
305 (ch (car (rassoc indian indian-itrans-alist))))
308 (delete-region (match-beginning 0) (match-end 0))
310 (goto-char (point-min))))
312 ;;; indian.el ends here