1 ;;; thai-util.el --- utilities for Thai -*- coding: iso-2022-7bit; -*-
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
6 ;; Keywords: mule, multilingual, thai
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
29 ;; Setting information of Thai characters.
31 (defconst thai-category-table
(make-category-table))
32 (define-category ?c
"Thai consonant" thai-category-table
)
33 (define-category ?v
"Thai upper/lower vowel" thai-category-table
)
34 (define-category ?t
"Thai tone" thai-category-table
)
36 ;; The general composing rules are as follows:
40 ;; CV -> C, CT -> C, CVT -> C, Cv -> C, CvT -> C
43 ;; where C: consonant, V: vowel upper, v: vowel lower, T: tone mark.
45 (defvar thai-composition-pattern
"\\cc\\(\\ct\\|\\cv\\ct?\\)"
46 "Regular expression matching a Thai composite sequence.")
48 (let ((l '((?
\e,T
!\e(B consonant
"LETTER KO KAI") ; 0xA1
49 (?
\e,T
"\e(B consonant "LETTER KHO KHAI
") ; 0xA2
50 (?\e,T#\e(B consonant "LETTER KHO KHUAT
") ; 0xA3
51 (?\e,T$\e(B consonant "LETTER KHO KHWAI
") ; 0xA4
52 (?\e,T%\e(B consonant "LETTER KHO KHON
") ; 0xA5
53 (?\e,T&\e(B consonant "LETTER KHO RAKHANG
") ; 0xA6
54 (?\e,T'\e(B consonant "LETTER NGO NGU
") ; 0xA7
55 (?\e,T(\e(B consonant "LETTER CHO CHAN
") ; 0xA8
56 (?\e,T)\e(B consonant "LETTER CHO CHING
") ; 0xA9
57 (?\e,T*\e(B consonant "LETTER CHO CHANG
") ; 0xAA
58 (?\e,T+\e(B consonant "LETTER SO SO
") ; 0xAB
59 (?\e,T,\e(B consonant "LETTER CHO CHOE
") ; 0xAC
60 (?\e,T-\e(B consonant "LETTER YO YING
") ; 0xAD
61 (?\e,T.\e(B consonant "LETTER DO CHADA
") ; 0xAE
62 (?\e,T/\e(B consonant "LETTER TO PATAK
") ; 0xAF
63 (?\e,T0\e(B consonant "LETTER THO THAN
") ; 0xB0
64 (?\e,T1\e(B consonant "LETTER THO NANGMONTHO
") ; 0xB1
65 (?\e,T2\e(B consonant "LETTER THO PHUTHAO
") ; 0xB2
66 (?\e,T3\e(B consonant "LETTER NO NEN
") ; 0xB3
67 (?\e,T4\e(B consonant "LETTER DO DEK
") ; 0xB4
68 (?\e,T5\e(B consonant "LETTER TO TAO
") ; 0xB5
69 (?\e,T6\e(B consonant "LETTER THO THUNG
") ; 0xB6
70 (?\e,T7\e(B consonant "LETTER THO THAHAN
") ; 0xB7
71 (?\e,T8\e(B consonant "LETTER THO THONG
") ; 0xB8
72 (?\e,T9\e(B consonant "LETTER NO NU
") ; 0xB9
73 (?\e,T:\e(B consonant "LETTER BO BAIMAI
") ; 0xBA
74 (?\e,T;\e(B consonant "LETTER PO PLA
") ; 0xBB
75 (?\e,T<\e(B consonant "LETTER PHO PHUNG
") ; 0xBC
76 (?\e,T=\e(B consonant "LETTER FO FA
") ; 0xBD
77 (?\e,T>\e(B consonant "LETTER PHO PHAN
") ; 0xBE
78 (?\e,T?\e(B consonant "LETTER FO FAN
") ; 0xBF
79 (?\e,T@\e(B consonant "LETTER PHO SAMPHAO
") ; 0xC0
80 (?\e,TA\e(B consonant "LETTER MO MA
") ; 0xC1
81 (?\e,TB\e(B consonant "LETTER YO YAK
") ; 0xC2
82 (?\e,TC\e(B consonant "LETTER RO RUA
") ; 0xC3
83 (?\e,TD\e(B vowel-base "LETTER RU
(Pali vowel letter
)") ; 0xC4
84 (?\e,TE\e(B consonant "LETTER LO LING
") ; 0xC5
85 (?\e,TF\e(B vowel-base "LETTER LU
(Pali vowel letter
)") ; 0xC6
86 (?\e,TG\e(B consonant "LETTER WO WAEN
") ; 0xC7
87 (?\e,TH\e(B consonant "LETTER SO SALA
") ; 0xC8
88 (?\e,TI\e(B consonant "LETTER SO RUSI
") ; 0xC9
89 (?\e,TJ\e(B consonant "LETTER SO SUA
") ; 0xCA
90 (?\e,TK\e(B consonant "LETTER HO HIP
") ; 0xCB
91 (?\e,TL\e(B consonant "LETTER LO CHULA
") ; 0xCC
92 (?\e,TM\e(B consonant "LETTER O ANG
") ; 0xCD
93 (?\e,TN\e(B consonant "LETTER HO NOK HUK
") ; 0xCE
94 (?\e,TO\e(B special "PAI YAN NOI
(abbreviation)") ; 0xCF
95 (?\e,TP\e(B vowel-base "VOWEL SIGN SARA A
") ; 0xD0
96 (?\e,TQ\e(B vowel-upper "VOWEL SIGN MAI HAN-AKAT N
/S-T
") ; 0xD1
97 (?\e,TR\e(B vowel-base "VOWEL SIGN SARA AA
") ; 0xD2
98 (?\e,TS\e(B vowel-base "VOWEL SIGN SARA AM
") ; 0xD3
99 (?\e,TT\e(B vowel-upper "VOWEL SIGN SARA I N
/S-T
") ; 0xD4
100 (?\e,TU\e(B vowel-upper "VOWEL SIGN SARA II N
/S-T
") ; 0xD5
101 (?\e,TV\e(B vowel-upper "VOWEL SIGN SARA UE N
/S-T
") ; 0xD6
102 (?\e,TW\e(B vowel-upper "VOWEL SIGN SARA UEE N
/S-T
") ; 0xD7
103 (?\e,TX\e(B vowel-lower "VOWEL SIGN SARA U N
/S-B
") ; 0xD8
104 (?\e,TY\e(B vowel-lower "VOWEL SIGN SARA UU N
/S-B
") ; 0xD9
105 (?\e,TZ\e(B vowel-lower "VOWEL SIGN PHINTHU N
/S-B
(Pali virama
)") ; 0xDA
106 (?\e,T[\e(B invalid nil) ; 0xDA
107 (?\e,T\\e(B invalid nil) ; 0xDC
108 (?\e,T]\e(B invalid nil) ; 0xDC
109 (?\e,T^\e(B invalid nil) ; 0xDC
110 (?\e,T_\e(B special "BAHT SIGN
(currency symbol
)") ; 0xDF
111 (?\e,T`\e(B vowel-base "VOWEL SIGN SARA E
") ; 0xE0
112 (?\e,Ta\e(B vowel-base "VOWEL SIGN SARA AE
") ; 0xE1
113 (?\e,Tb\e(B vowel-base "VOWEL SIGN SARA O
") ; 0xE2
114 (?\e,Tc\e(B vowel-base "VOWEL SIGN SARA MAI MUAN
") ; 0xE3
115 (?\e,Td\e(B vowel-base "VOWEL SIGN SARA MAI MALAI
") ; 0xE4
116 (?\e,Te\e(B vowel-base "LAK KHANG YAO
") ; 0xE5
117 (?\e,Tf\e(B special "MAI YAMOK
(repetion)") ; 0xE6
118 (?\e,Tg\e(B vowel-upper "VOWEL SIGN MAI TAI KHU N
/S-T
") ; 0xE7
119 (?\e,Th\e(B tone "TONE MAI EK N
/S-T
") ; 0xE8
120 (?\e,Ti\e(B tone "TONE MAI THO N
/S-T
") ; 0xE9
121 (?\e,Tj\e(B tone "TONE MAI TRI N
/S-T
") ; 0xEA
122 (?\e,Tk\e(B tone "TONE MAI CHATTAWA N
/S-T
") ; 0xEB
123 (?\e,Tl\e(B tone "THANTHAKHAT N
/S-T
(cancellation mark
)") ; 0xEC
124 (?\e,Tm\e(B tone "NIKKHAHIT N
/S-T
(final nasal
)") ; 0xED
125 (?\e,Tn\e(B vowel-upper "YAMAKKAN N
/S-T
") ; 0xEE
126 (?\e,To\e(B special "FONRMAN
") ; 0xEF
127 (?\e,Tp\e(B special "DIGIT ZERO
") ; 0xF0
128 (?\e,Tq\e(B special "DIGIT ONE
") ; 0xF1
129 (?\e,Tr\e(B special "DIGIT TWO
") ; 0xF2
130 (?\e,Ts\e(B special "DIGIT THREE
") ; 0xF3
131 (?\e,Tt\e(B special "DIGIT FOUR
") ; 0xF4
132 (?\e,Tu\e(B special "DIGIT FIVE
") ; 0xF5
133 (?\e,Tv\e(B special "DIGIT SIX
") ; 0xF6
134 (?\e,Tw\e(B special "DIGIT SEVEN
") ; 0xF7
135 (?\e,Tx\e(B special "DIGIT EIGHT
") ; 0xF8
136 (?\e,Ty\e(B special "DIGIT NINE
") ; 0xF9
137 (?\e,Tz\e(B special "ANGKHANKHU
(ellipsis)") ; 0xFA
138 (?\e,T{\e(B special "KHOMUT
(beginning of religious texts
)") ; 0xFB
139 (?\e,T|\e(B invalid nil) ; 0xFC
140 (?\e,T}\e(B invalid nil) ; 0xFD
141 (?\e,T~\e(B invalid nil) ; 0xFE
143 ;; Unicode equivalents
144 (?\e$,1Ba\e(B consonant "LETTER KO KAI
")
145 (?\e$,1Bb\e(B consonant "LETTER KHO KHAI
")
146 (?\e$,1Bc\e(B consonant "LETTER KHO KHUAT
")
147 (?\e$,1Bd\e(B consonant "LETTER KHO KHWAI
")
148 (?\e$,1Be\e(B consonant "LETTER KHO KHON
")
149 (?\e$,1Bf\e(B consonant "LETTER KHO RAKHANG
")
150 (?\e$,1Bg\e(B consonant "LETTER NGO NGU
")
151 (?\e$,1Bh\e(B consonant "LETTER CHO CHAN
")
152 (?\e$,1Bi\e(B consonant "LETTER CHO CHING
")
153 (?\e$,1Bj\e(B consonant "LETTER CHO CHANG
")
154 (?\e$,1Bk\e(B consonant "LETTER SO SO
")
155 (?\e$,1Bl\e(B consonant "LETTER CHO CHOE
")
156 (?\e$,1Bm\e(B consonant "LETTER YO YING
")
157 (?\e$,1Bn\e(B consonant "LETTER DO CHADA
")
158 (?\e$,1Bo\e(B consonant "LETTER TO PATAK
")
159 (?\e$,1Bp\e(B consonant "LETTER THO THAN
")
160 (?\e$,1Bq\e(B consonant "LETTER THO NANGMONTHO
")
161 (?\e$,1Br\e(B consonant "LETTER THO PHUTHAO
")
162 (?\e$,1Bs\e(B consonant "LETTER NO NEN
")
163 (?\e$,1Bt\e(B consonant "LETTER DO DEK
")
164 (?\e$,1Bu\e(B consonant "LETTER TO TAO
")
165 (?\e$,1Bv\e(B consonant "LETTER THO THUNG
")
166 (?\e$,1Bw\e(B consonant "LETTER THO THAHAN
")
167 (?\e$,1Bx\e(B consonant "LETTER THO THONG
")
168 (?\e$,1By\e(B consonant "LETTER NO NU
")
169 (?\e$,1Bz\e(B consonant "LETTER BO BAIMAI
")
170 (?\e$,1B{\e(B consonant "LETTER PO PLA
")
171 (?\e$,1B|\e(B consonant "LETTER PHO PHUNG
")
172 (?\e$,1B}\e(B consonant "LETTER FO FA
")
173 (?\e$,1B~\e(B consonant "LETTER PHO PHAN
")
174 (?\e$,1B\x7f\e(B consonant "LETTER FO FAN
")
175 (?\e$,1C \e(B consonant "LETTER PHO SAMPHAO
")
176 (?\e$,1C!\e(B consonant "LETTER MO MA
")
177 (?\e$,1C"\e(B consonant
"LETTER YO YAK")
178 (?
\e$
,1C
#\e(B consonant
"LETTER RO RUA")
179 (?
\e$
,1C$
\e(B vowel-base
"LETTER RU (Pali vowel letter)")
180 (?
\e$
,1C%
\e(B consonant
"LETTER LO LING")
181 (?
\e$
,1C
&\e(B vowel-base
"LETTER LU (Pali vowel letter)")
182 (?
\e$
,1C
'\e(B consonant
"LETTER WO WAEN")
183 (?
\e$
,1C
(\e(B consonant
"LETTER SO SALA")
184 (?
\e$
,1C
)\e(B consonant
"LETTER SO RUSI")
185 (?
\e$
,1C
*\e(B consonant
"LETTER SO SUA")
186 (?
\e$
,1C
+\e(B consonant
"LETTER HO HIP")
187 (?
\e$
,1C
,\e(B consonant
"LETTER LO CHULA")
188 (?
\e$
,1C-
\e(B consonant
"LETTER O ANG")
189 (?
\e$
,1C.
\e(B consonant
"LETTER HO NOK HUK")
190 (?
\e$
,1C
/\e(B special
"PAI YAN NOI (abbreviation)")
191 (?
\e$
,1C0
\e(B vowel-base
"VOWEL SIGN SARA A")
192 (?
\e$
,1C1
\e(B vowel-upper
"VOWEL SIGN MAI HAN-AKAT N/S-T")
193 (?
\e$
,1C2
\e(B vowel-base
"VOWEL SIGN SARA AA")
194 (?
\e$
,1C3
\e(B vowel-base
"VOWEL SIGN SARA AM")
195 (?
\e$
,1C4
\e(B vowel-upper
"VOWEL SIGN SARA I N/S-T")
196 (?
\e$
,1C5
\e(B vowel-upper
"VOWEL SIGN SARA II N/S-T")
197 (?
\e$
,1C6
\e(B vowel-upper
"VOWEL SIGN SARA UE N/S-T")
198 (?
\e$
,1C7
\e(B vowel-upper
"VOWEL SIGN SARA UEE N/S-T")
199 (?
\e$
,1C8
\e(B vowel-lower
"VOWEL SIGN SARA U N/S-B")
200 (?
\e$
,1C9
\e(B vowel-lower
"VOWEL SIGN SARA UU N/S-B")
201 (?
\e$
,1C
:\e(B vowel-lower
"VOWEL SIGN PHINTHU N/S-B (Pali virama)")
202 (?
\e$
,1C?
\e(B special
"BAHT SIGN (currency symbol)")
203 (?
\e$
,1C
@\e(B vowel-base
"VOWEL SIGN SARA E")
204 (?
\e$
,1CA
\e(B vowel-base
"VOWEL SIGN SARA AE")
205 (?
\e$
,1CB
\e(B vowel-base
"VOWEL SIGN SARA O")
206 (?
\e$
,1CC
\e(B vowel-base
"VOWEL SIGN SARA MAI MUAN")
207 (?
\e$
,1CD
\e(B vowel-base
"VOWEL SIGN SARA MAI MALAI")
208 (?
\e$
,1CE
\e(B vowel-base
"LAK KHANG YAO")
209 (?
\e$
,1CF
\e(B special
"MAI YAMOK (repetion)")
210 (?
\e$
,1CG
\e(B vowel-upper
"VOWEL SIGN MAI TAI KHU N/S-T")
211 (?
\e$
,1CH
\e(B tone
"TONE MAI EK N/S-T")
212 (?
\e$
,1CI
\e(B tone
"TONE MAI THO N/S-T")
213 (?
\e$
,1CJ
\e(B tone
"TONE MAI TRI N/S-T")
214 (?
\e$
,1CK
\e(B tone
"TONE MAI CHATTAWA N/S-T")
215 (?
\e$
,1CL
\e(B tone
"THANTHAKHAT N/S-T (cancellation mark)")
216 (?
\e$
,1CM
\e(B tone
"NIKKHAHIT N/S-T (final nasal)")
217 (?
\e$
,1CN
\e(B vowel-upper
"YAMAKKAN N/S-T")
218 (?
\e$
,1CO
\e(B special
"FONRMAN")
219 (?
\e$
,1CP
\e(B special
"DIGIT ZERO")
220 (?
\e$
,1CQ
\e(B special
"DIGIT ONE")
221 (?
\e$
,1CR
\e(B special
"DIGIT TWO")
222 (?
\e$
,1CS
\e(B special
"DIGIT THREE")
223 (?
\e$
,1CT
\e(B special
"DIGIT FOUR")
224 (?
\e$
,1CU
\e(B special
"DIGIT FIVE")
225 (?
\e$
,1CV
\e(B special
"DIGIT SIX")
226 (?
\e$
,1CW
\e(B special
"DIGIT SEVEN")
227 (?
\e$
,1CX
\e(B special
"DIGIT EIGHT")
228 (?
\e$
,1CY
\e(B special
"DIGIT NINE")
229 (?
\e$
,1CZ
\e(B special
"ANGKHANKHU (ellipsis)")
230 (?
\e$
,1C
[\e(B special
"KHOMUT (beginning of religious texts)")
234 (setq elm
(car l
) l
(cdr l
))
235 (let ((char (car elm
))
237 (put-char-code-property char
'phonetic-type ptype
)
238 (cond ((eq ptype
'consonant
)
239 (modify-category-entry char ?c thai-category-table
))
240 ((memq ptype
'(vowel-upper vowel-lower
))
241 (modify-category-entry char ?v thai-category-table
))
243 (modify-category-entry char ?t thai-category-table
)))
244 (put-char-code-property char
'name
(nth 2 elm
)))))
247 (defun thai-compose-region (beg end
)
248 "Compose Thai characters in the region.
249 When called from a program, expects two arguments,
250 positions (integers or markers) specifying the region."
253 (narrow-to-region beg end
)
254 (goto-char (point-min))
255 (with-category-table thai-category-table
256 (while (re-search-forward thai-composition-pattern nil t
)
257 (compose-region (match-beginning 0) (match-end 0))))))
260 (defun thai-compose-string (string)
261 "Compose Thai characters in STRING and return the resulting string."
262 (with-category-table thai-category-table
264 (while (setq idx
(string-match thai-composition-pattern string idx
))
265 (compose-string string idx
(match-end 0))
266 (setq idx
(match-end 0)))))
270 (defun thai-compose-buffer ()
271 "Compose Thai characters in the current buffer."
273 (thai-compose-region (point-min) (point-max)))
276 (defun thai-post-read-conversion (len)
277 (thai-compose-region (point) (+ (point) len
))
281 (defun thai-composition-function (from to pattern
&optional string
)
282 "Compose Thai text in the region FROM and TO.
283 The text matches the regular expression PATTERN.
284 Optional 4th argument STRING, if non-nil, is a string containing text
287 The return value is number of composed characters."
291 (compose-string string from to
)
292 (compose-region from to
))
298 ;;; arch-tag: 59425d6a-8cf9-4e06-a6ab-8ab7dc7a7a97
299 ;;; thai-util.el ends here