(GETPGRP_NO_ARG): If Glibc 2.1 or later,
[emacs.git] / lisp / language / indian.el
blob809ff48ae9df288226b100f1967d219cef24ee54
1 ;;; indian.el --- Support for Indian Languages
3 ;; Copyright (C) 1995 Free Software Foundation, Inc.
5 ;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
7 ;; Keywords: multilingual, Indian
9 ;; This file is part of GNU Emacs.
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
26 ;;; Commentary:
28 ;; History:
29 ;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
31 ;; For Indian, the character set IS 13194 is supported.
33 ;; IS 13194 does not specifically assign glyphs for each characters.
34 ;; Following code is not specific to each Indian language.
36 ;; Eventually, this code will support generic information about
37 ;; following scripts.
39 ;; Devanagari
40 ;; Bengali
41 ;; Gurmukhi
42 ;; Gujarati
43 ;; Oriya
44 ;; Tamil
45 ;; Telgu
46 ;; Kannada
47 ;; Malayalam
49 ;; In this file, charsets other than charset-ascii and charset-indian-is13194
50 ;; should not be used except in the comment.
52 ;;; Code:
54 ;; Followings are what you see when you refer to the Emacs
55 ;; representations of IS 13194 charcters. However, this is merely
56 ;; tentative apperance, and you must convert them by
57 ;; indian-to-xxxxxx(specific script) function to use them.
58 ;; Devanagari is not an exception of this rule.
60 ;; 0xa0 //\e(5!"#$%&'()*+,-./\e(B
61 ;; 0xb0 \e(50123456789:;<=>?\e(B
62 ;; 0xc0 \e(5@ABCDEFGHIJKLMNO\e(B
63 ;; 0xd0 \e(5PQRSTUVWXYZ[\]^_\e(B
64 ;; 0xe0 \e(5`abcdefghijklmno\e(B
65 ;; 0xf0 \e(5pqrstuvwxyz{|}~\e(B//
67 ;; Note - In IS 13194, several symbols are obtained by special
68 ;; combination of several characters and Nukta sign.
70 ;; Sanskrit Vowel R -> \e(5*\e(B + \e(5i\e(B
71 ;; Sanskrit Vowel L -> \e(5&\e(B + \e(5i\e(B
72 ;; Sanskrit Vowel LL -> \e(5'\e(B + \e(5i\e(B
73 ;; Sanskrit Avagrah -> \e(5j\e(B + \e(5i\e(B
74 ;; OM -> \e(5!\e(B + \e(5i\e(B
76 ;; Note - IS 13194 defines ATR(0xEF) and EXT(0xF0), but they are
77 ;; not used in Emacs.
79 ;; Note - the above characters DO NOT represent any script. For
80 ;; example, if you want to obtain Devanagari character, you must do
81 ;; something like the following.
83 ;; (char-to-string (indian-to-devanagari ?\e(5$\e(B))
84 ;; "\e$(5!$\e(B"
86 (let ((deflist
87 '(;; chars syntax category
88 ("\e(5!"#\e(B" "w" ?7) ; vowel-modifying diacritical mark
89 ; chandrabindu, anuswar, visarga
90 ("\e(5$\e(B-\e(52\e(B" "w" ?5) ; independent vowel
91 ("\e(53\e(B-\e(5X\e(B" "w" ?0) ; consonant
92 ("\e(5Z\e(B-\e(5g\e(B" "w" ?8) ; matra
93 ("\e(5q\e(B-\e(5z\e(B" "w" ?6) ; digit
95 elm chars len syntax category to ch i)
96 (while deflist
97 (setq elm (car deflist))
98 (setq chars (car elm)
99 len (length chars)
100 syntax (nth 1 elm)
101 category (nth 2 elm)
102 i 0)
103 (while (< i len)
104 (if (= (aref chars i) ?-)
105 (setq i (1+ i)
106 to (sref chars i))
107 (setq ch (sref chars i)
108 to ch))
109 (while (<= ch to)
110 (modify-syntax-entry ch syntax)
111 (modify-category-entry ch category)
112 (setq ch (1+ ch)))
113 (setq i (+ i (char-bytes to))))
114 (setq deflist (cdr deflist))))
117 ;;; ITRANS
119 ;; ITRANS is one of the most popular method to exchange indian scripts
120 ;; electronically. Here is the table to convert between ITRANS code and
121 ;; IS 13194 code.
123 (defvar indian-itrans-consonant-alist
125 ("k" . "\e(53\e(B")
126 ("kh" . "\e(54\e(B")
127 ("g" . "\e(55\e(B")
128 ("gh" . "\e(56\e(B")
129 ("N^" . "\e(57\e(B")
130 ("ch" . "\e(58\e(B")
131 ("chh" . "\e(59\e(B")
132 ("j" . "\e(5:\e(B")
133 ("jh" . "\e(5;\e(B")
134 ("JN" . "\e(5<\e(B")
135 ("T" . "\e(5=\e(B")
136 ("Th" . "\e(5>\e(B")
137 ("D" . "\e(5?\e(B")
138 ("Dh" . "\e(5@\e(B")
139 ("N" . "\e(5A\e(B")
140 ("t" . "\e(5B\e(B")
141 ("th" . "\e(5C\e(B")
142 ("d" . "\e(5D\e(B")
143 ("dh" . "\e(5E\e(B")
144 ("n" . "\e(5F\e(B")
145 ("nh" . "\e(5G\e(B") ; For transcription of non-Devanagari Languages.
146 ("p" . "\e(5H\e(B")
147 ("ph" . "\e(5I\e(B")
148 ("b" . "\e(5J\e(B")
149 ("bh" . "\e(5K\e(B")
150 ("m" . "\e(5L\e(B")
151 ("y" . "\e(5M\e(B")
152 ("yh" . "\e(5N\e(B") ; For transcription of non-Devanagari Languages.
153 ("r" . "\e(5O\e(B")
154 ("rh" . "\e(5P\e(B") ; For transcription of non-Devanagari Languages.
155 ("l" . "\e(5Q\e(B")
156 ("v" . "\e(5T\e(B")
157 ("sh" . "\e(5U\e(B")
158 ("shh" . "\e(5V\e(B")
159 ("s" . "\e(5W\e(B")
160 ("h" . "\e(5X\e(B")
161 ("ld" . "\e(5R\e(B")
162 ("L" . "\e(5R\e(B")
163 ("ksh" . "\e$(5!3!h!V\e(B")
164 ("GY" . "***GY***") ; Must check out later.
165 ;; special consonants
166 ("q" . "\e(53i\e(B")
167 ("K" . "\e(54i\e(B")
168 ("G" . "\e(55i\e(B")
169 ("z" . "\e(5:i\e(B")
170 ("f" . "\e(5Ii\e(B")
171 (".D" . "\e(5?i\e(B")
172 (".Dh" . "\e(5@i\e(B")
175 (defvar indian-itrans-vowel-sign-alist
177 ;; Special treatment unique to IS 13194 Transliteration
178 ("" . "\e(5h\e(B")
179 ("a" . "")
180 ;; Matra (Vowel Sign)
181 ("aa" . "\e(5Z\e(B")
182 ("A" . "\e(5Z\e(B")
183 ("i" . "\e(5[\e(B")
184 ("ii" . "\e(5\\e(B")
185 ("I" . "\e(5\\e(B")
186 ("u" . "\e(5]\e(B")
187 ("uu" . "\e(5^\e(B")
188 ("U" . "\e(5^\e(B")
189 ("R^i" . "\e(5_\e(B") ; These must be checked out later.
190 ("R^I" . "\e(5_i\e(B")
191 ("L^i" . "\e(5[i\e(B")
192 ("L^I" . "\e(5\i\e(B")
193 ("E" . "\e(5`\e(B") ; For transcription of non-Devanangri Languages.
194 ("e" . "\e(5a\e(B")
195 ("ai" . "\e(5b\e(B")
196 ;; ("e.c" . "\e(5c\e(B") ; Tentatively suppressed.
197 ("O" . "\e(5d\e(B") ; For transcription of non-Devanagari Languages.
198 ("o" . "\e(5e\e(B")
199 ("au" . "\e(5f\e(B")
200 ;; ("o.c" . "\e(5g\e(B") ; Tentatively suppressed.
204 ;; Independent vowels and other signs.
207 (defvar indian-itrans-other-letters-alist
209 ("a" . "\e(5$\e(B")
210 ("aa" . "\e(5%\e(B")
211 ("A" . "\e(5%\e(B")
212 ("i" . "\e(5&\e(B")
213 ("ii" . "\e(5'\e(B")
214 ("I" . "\e(5'\e(B")
215 ("u" . "\e(5(\e(B")
216 ("uu" . "\e(5)\e(B")
217 ("U" . "\e(5)\e(B")
218 ("R^i" . "\e(5*\e(B")
219 ("R^I" . "\e(5*i\e(B")
220 ("L^i" . "\e(5&i\e(B")
221 ("L^I" . "\e(5'i\e(B")
222 ("E" . "\e(5+\e(B") ; For transcription of non-Devanagari Languages.
223 ("e" . "\e(5,\e(B")
224 ("ai" . "\e(5-\e(B")
225 ;; ("e.c" . "\e(5.\e(B") ; Candra E
226 ("O" . "\e(5/\e(B") ; For transcription of non-Devanagari Languages.
227 ("o" . "\e(50\e(B")
228 ("au" . "\e(51\e(B")
229 ;; ("o.c" . "\e(52\e(B") ; Candra O
230 ("M" . "\e(5$\e(B")
231 ("H" . "\e(5#\e(B")
232 ("AUM" . "\e(5!i\e(B")
233 ("OM" . "\e(5!i\e(B")
234 (".r" . "\e(5Oh\e(B")
235 (".n" . "\e(5"\e(B")
236 (".N" . "\e(5!\e(B")
237 (".h" . "\e(5h\e(B") ; Halant
238 (".." . "\e(5j\e(B")
239 (".a" . "\e(5ji\e(B") ; Avagrah
240 ("0" . "\e(5q\e(B")
241 ("1" . "\e(5r\e(B")
242 ("2" . "\e(5s\e(B")
243 ("3" . "\e(5t\e(B")
244 ("4" . "\e(5u\e(B")
245 ("5" . "\e(5v\e(B")
246 ("6" . "\e(5w\e(B")
247 ("7" . "\e(5x\e(B")
248 ("8" . "\e(5y\e(B")
249 ("9" . "\e(5z\e(B")
252 ;; Regular expression matching single Indian character represented
253 ;; by ITRANS.
255 (defvar indian-itrans-regexp
256 (let ((consonant "\\([cs]hh?\\)\\|[kgjTDnpbyr]h?\\|\\(N\\^?\\)\\|\\(jN\\)\\|[mvqKGzfs]\\|\\(ld?\\)\\|\\(ksh\\)\\|\\(GY\\)\\|\\(\\.Dh?\\)")
257 (vowel "\\(a[aiu]\\)\\|\\(ii\\)\\|\\(uu\\)\\|\\([RL]\\^[iI]\\)\\|[AIEOeoaiu]")
258 (misc "[MH0-9]\\|\\(AUM\\)\\|\\(OM\\)\\|\\(\\.[rnNh\\.a]\\)")
259 (lpre "\\(") (rpre "\\)") (orre "\\|"))
260 (concat lpre misc rpre orre
261 lpre lpre consonant rpre "?" lpre vowel rpre rpre orre
262 lpre consonant rpre )))
265 ;; Regular expression matching single ITRANS unit for IS 13194 characters.
268 (defvar itrans-indian-regexp
269 (let ((vowel "[\e(5$\e(B-\e(52\e(B]")
270 (consonant "[\e(53\e(B-\e(5X\e(B]")
271 (matra "[\e(5Z\e(B-\e(5g\e(B]")
272 (misc "[\e(5q\e(B-\e(5z\e(B]")
273 (lpre "\\(") (rpre "\\)") (orre "\\|"))
274 (concat misc orre
275 lpre consonant matra "?" rpre orre
276 vowel)))
279 ;; IS13194 - ITRANS conversion table for string matching above regexp.
282 (defvar indian-itrans-alist
283 (let ((cl indian-itrans-consonant-alist)
284 (ml indian-itrans-other-letters-alist) rules)
285 (while cl
286 (let ((vl indian-itrans-vowel-sign-alist))
287 (while vl
288 (setq rules
289 (cons (cons (concat (car (car cl)) (car (car vl)))
290 (concat (cdr (car cl)) (cdr (car vl))))
291 rules))
292 (setq vl (cdr vl))))
293 (setq cl (cdr cl)))
294 (while ml
295 (setq rules (cons (cons (car (car ml))
296 (cdr (car ml)))
297 rules))
298 (setq ml (cdr ml)))
299 rules))
302 ;; Utility program to convert from ITRANS to IS 13194 in specified region.
305 (defun indian-decode-itrans-region (from to)
306 "Convert `ITRANS' mnemonics of the current region to Indian characters.
307 When called from a program, expects two arguments,
308 positions (integers or markers) specifying the stretch of the region."
309 (interactive "r")
310 (save-restriction
311 (narrow-to-region from to)
312 (goto-char (point-min))
313 (while (re-search-forward indian-itrans-regexp nil t)
314 (let* ((itrans (buffer-substring (match-beginning 0) (match-end 0)))
315 (ch (cdr (assoc itrans indian-itrans-alist))))
316 (if ch
317 (progn
318 (delete-region (match-beginning 0) (match-end 0))
319 (insert ch)))))
320 (goto-char (point-min))
321 (while (re-search-forward "\\(\e(5h\e(B\\)[^\\c0]" nil t)
322 (delete-region (match-beginning 1) (match-end 1)))))
325 ;; Utility program to convert from IS 13194 to ITRANS in specified region.
328 (defun indian-encode-itrans-region (from to)
329 "Convert indian region to ITRANS mnemonics."
330 (interactive "r")
331 (save-restriction
332 (narrow-to-region from to)
333 (goto-char (point-min))
334 (while (re-search-forward itrans-indian-regexp nil t)
335 (let* ((indian (buffer-substring (match-beginning 0) (match-end 0)))
336 (ch (car (rassoc indian indian-itrans-alist))))
337 (if ch
338 (progn
339 (delete-region (match-beginning 0) (match-end 0))
340 (insert ch)))))
341 (goto-char (point-min))))
343 ;;; indian.el ends here