1 ;;; thai-util.el --- utilities for Thai -*- coding: utf-8; -*-
3 ;; Copyright (C) 2000-2012 Free Software Foundation, Inc.
4 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H14PRO021
9 ;; Keywords: mule, multilingual, Thai, i18n
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software: you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation, either version 3 of the License, or
16 ;; (at your option) any later version.
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
30 (defvar thai-auto-composition-mode
)
32 ;; Setting information of Thai characters.
34 (defconst thai-category-table
(make-category-table))
35 (define-category ?c
"Thai consonant" thai-category-table
)
36 (define-category ?v
"Thai upper/lower vowel" thai-category-table
)
37 (define-category ?t
"Thai tone mark" thai-category-table
)
38 (define-category ?u
"Thai tone mark and upper sign" thai-category-table
)
39 (define-category ?I
"THAI CHARACTER SARA I" thai-category-table
)
40 (define-category ?U
"THAI CHARACTER THANTHAKHAT" thai-category-table
)
42 ;; The general composing rules are as follows:
46 ;; CV -> C, CU -> C, CVT -> C, Cv -> C, CvU -> C
49 ;; where C: consonant, V: vowel upper, v: vowel lower,
50 ;; T: tone mark, U: tone mark and upper sign.
51 ;; Special rule: The sign `์' can be put on the vowel `ิ'.
54 (defvar thai-composition-pattern
55 "\\cc\\(\\cu\\|\\cI\\cU\\|\\cv\\ct?\\)\\|\\cv\\ct\\|\\cI\\cU"
56 "Regular expression matching a Thai composite sequence.")
58 (let ((l '((?ก consonant
"LETTER KO KAI") ; 0xA1
59 (?ข consonant
"LETTER KHO KHAI") ; 0xA2
60 (?ฃ consonant
"LETTER KHO KHUAT") ; 0xA3
61 (?ค consonant
"LETTER KHO KHWAI") ; 0xA4
62 (?ฅ consonant
"LETTER KHO KHON") ; 0xA5
63 (?ฆ consonant
"LETTER KHO RAKHANG") ; 0xA6
64 (?ง consonant
"LETTER NGO NGU") ; 0xA7
65 (?จ consonant
"LETTER CHO CHAN") ; 0xA8
66 (?ฉ consonant
"LETTER CHO CHING") ; 0xA9
67 (?ช consonant
"LETTER CHO CHANG") ; 0xAA
68 (?ซ consonant
"LETTER SO SO") ; 0xAB
69 (?ฌ consonant
"LETTER CHO CHOE") ; 0xAC
70 (?ญ consonant
"LETTER YO YING") ; 0xAD
71 (?ฎ consonant
"LETTER DO CHADA") ; 0xAE
72 (?ฏ consonant
"LETTER TO PATAK") ; 0xAF
73 (?ฐ consonant
"LETTER THO THAN") ; 0xB0
74 (?ฑ consonant
"LETTER THO NANGMONTHO") ; 0xB1
75 (?ฒ consonant
"LETTER THO PHUTHAO") ; 0xB2
76 (?ณ consonant
"LETTER NO NEN") ; 0xB3
77 (?ด consonant
"LETTER DO DEK") ; 0xB4
78 (?ต consonant
"LETTER TO TAO") ; 0xB5
79 (?ถ consonant
"LETTER THO THUNG") ; 0xB6
80 (?ท consonant
"LETTER THO THAHAN") ; 0xB7
81 (?ธ consonant
"LETTER THO THONG") ; 0xB8
82 (?น consonant
"LETTER NO NU") ; 0xB9
83 (?บ consonant
"LETTER BO BAIMAI") ; 0xBA
84 (?ป consonant
"LETTER PO PLA") ; 0xBB
85 (?ผ consonant
"LETTER PHO PHUNG") ; 0xBC
86 (?ฝ consonant
"LETTER FO FA") ; 0xBD
87 (?พ consonant
"LETTER PHO PHAN") ; 0xBE
88 (?ฟ consonant
"LETTER FO FAN") ; 0xBF
89 (?ภ consonant
"LETTER PHO SAMPHAO") ; 0xC0
90 (?ม consonant
"LETTER MO MA") ; 0xC1
91 (?ย consonant
"LETTER YO YAK") ; 0xC2
92 (?ร consonant
"LETTER RO RUA") ; 0xC3
93 (?ฤ vowel-base
"LETTER RU (Pali vowel letter)") ; 0xC4
94 (?ล consonant
"LETTER LO LING") ; 0xC5
95 (?ฦ vowel-base
"LETTER LU (Pali vowel letter)") ; 0xC6
96 (?ว consonant
"LETTER WO WAEN") ; 0xC7
97 (?ศ consonant
"LETTER SO SALA") ; 0xC8
98 (?ษ consonant
"LETTER SO RUSI") ; 0xC9
99 (?ส consonant
"LETTER SO SUA") ; 0xCA
100 (?ห consonant
"LETTER HO HIP") ; 0xCB
101 (?ฬ consonant
"LETTER LO CHULA") ; 0xCC
102 (?อ consonant
"LETTER O ANG") ; 0xCD
103 (?ฮ consonant
"LETTER HO NOK HUK") ; 0xCE
104 (?ฯ special
"PAI YAN NOI (abbreviation)") ; 0xCF
105 (?ะ vowel-base
"VOWEL SIGN SARA A") ; 0xD0
106 (?ั vowel-upper
"VOWEL SIGN MAI HAN-AKAT N/S-T") ; 0xD1
107 (?า vowel-base
"VOWEL SIGN SARA AA") ; 0xD2
108 (?ำ vowel-base
"VOWEL SIGN SARA AM") ; 0xD3
109 (?ิ vowel-upper
"VOWEL SIGN SARA I N/S-T") ; 0xD4
110 (?ี vowel-upper
"VOWEL SIGN SARA II N/S-T") ; 0xD5
111 (?ึ vowel-upper
"VOWEL SIGN SARA UE N/S-T") ; 0xD6
112 (?ื vowel-upper
"VOWEL SIGN SARA UEE N/S-T") ; 0xD7
113 (?ุ vowel-lower
"VOWEL SIGN SARA U N/S-B") ; 0xD8
114 (?ู vowel-lower
"VOWEL SIGN SARA UU N/S-B") ; 0xD9
115 (?ฺ vowel-lower
"VOWEL SIGN PHINTHU N/S-B (Pali virama)") ; 0xDA
116 (? invalid nil
) ; 0xDA
117 (? invalid nil
) ; 0xDC
118 (? invalid nil
) ; 0xDC
119 (? invalid nil
) ; 0xDC
120 (?฿ special
"BAHT SIGN (currency symbol)") ; 0xDF
121 (?เ vowel-base
"VOWEL SIGN SARA E") ; 0xE0
122 (?แ vowel-base
"VOWEL SIGN SARA AE") ; 0xE1
123 (?โ vowel-base
"VOWEL SIGN SARA O") ; 0xE2
124 (?ใ vowel-base
"VOWEL SIGN SARA MAI MUAN") ; 0xE3
125 (?ไ vowel-base
"VOWEL SIGN SARA MAI MALAI") ; 0xE4
126 (?ๅ vowel-base
"LAK KHANG YAO") ; 0xE5
127 (?ๆ special
"MAI YAMOK (repetition)") ; 0xE6
128 (?็ sign-upper
"VOWEL SIGN MAI TAI KHU N/S-T") ; 0xE7
129 (?่ tone
"TONE MAI EK N/S-T") ; 0xE8
130 (?้ tone
"TONE MAI THO N/S-T") ; 0xE9
131 (?๊ tone
"TONE MAI TRI N/S-T") ; 0xEA
132 (?๋ tone
"TONE MAI CHATTAWA N/S-T") ; 0xEB
133 (?์ sign-upper
"THANTHAKHAT N/S-T (cancellation mark)") ; 0xEC
134 (?ํ sign-upper
"NIKKHAHIT N/S-T (final nasal)") ; 0xED
135 (?๎ sign-upper
"YAMAKKAN N/S-T") ; 0xEE
136 (?๏ special
"FONRMAN") ; 0xEF
137 (?๐ special
"DIGIT ZERO") ; 0xF0
138 (?๑ special
"DIGIT ONE") ; 0xF1
139 (?๒ special
"DIGIT TWO") ; 0xF2
140 (?๓ special
"DIGIT THREE") ; 0xF3
141 (?๔ special
"DIGIT FOUR") ; 0xF4
142 (?๕ special
"DIGIT FIVE") ; 0xF5
143 (?๖ special
"DIGIT SIX") ; 0xF6
144 (?๗ special
"DIGIT SEVEN") ; 0xF7
145 (?๘ special
"DIGIT EIGHT") ; 0xF8
146 (?๙ special
"DIGIT NINE") ; 0xF9
147 (?๚ special
"ANGKHANKHU (ellipsis)") ; 0xFA
148 (?๛ special
"KHOMUT (beginning of religious texts)") ; 0xFB
149 (? invalid nil
) ; 0xFC
150 (? invalid nil
) ; 0xFD
151 (? invalid nil
) ; 0xFE
155 (setq elm
(car l
) l
(cdr l
))
156 (let ((char (car elm
))
158 (put-char-code-property char
'phonetic-type ptype
)
159 (cond ((eq ptype
'consonant
)
160 (modify-category-entry char ?c thai-category-table
))
161 ((memq ptype
'(vowel-upper vowel-lower
))
162 (modify-category-entry char ?v thai-category-table
)
164 ;; Give category `I' to "SARA I".
165 (modify-category-entry char ?I thai-category-table
)))
167 (modify-category-entry char ?t thai-category-table
)
168 (modify-category-entry char ?u thai-category-table
))
169 ((eq ptype
'sign-upper
)
170 (modify-category-entry char ?u thai-category-table
)
172 ;; Give category `U' to "THANTHAKHAT".
173 (modify-category-entry char ?U thai-category-table
))))
174 (put-char-code-property char
'name
(nth 2 elm
)))))
176 (defun thai-compose-syllable (beg end
&optional category-set string
)
179 (char-category-set (if string
(aref string beg
) (char-after beg
)))))
180 (if (aref category-set ?c
)
181 ;; Starting with a consonant. We do relative composition.
183 (compose-string string beg end
)
184 (compose-region beg end
))
185 ;; Vowel tone sequence.
187 (compose-string string beg end
(list (aref string beg
) '(Bc . Bc
)
188 (aref string
(1+ beg
))))
189 (compose-region beg end
(list (char-after beg
) '(Bc . Bc
)
190 (char-after (1+ beg
))))))
194 (defun thai-compose-region (beg end
)
195 "Compose Thai characters in the region.
196 When called from a program, expects two arguments,
197 positions (integers or markers) specifying the region."
201 (narrow-to-region beg end
)
202 (goto-char (point-min))
203 (with-category-table thai-category-table
204 (while (re-search-forward thai-composition-pattern nil t
)
205 (setq beg
(match-beginning 0) end
(match-end 0))
206 (if (and (> pos beg
) (< pos end
))
208 (thai-compose-syllable beg end
209 (char-category-set (char-after beg
))))))
213 (defun thai-compose-string (string)
214 "Compose Thai characters in STRING and return the resulting string."
215 (with-category-table thai-category-table
217 (while (setq idx
(string-match thai-composition-pattern string idx
))
218 (thai-compose-syllable idx
(match-end 0) nil string
)
219 (setq idx
(match-end 0)))))
223 (defun thai-compose-buffer ()
224 "Compose Thai characters in the current buffer."
226 (thai-compose-region (point-min) (point-max)))
229 (defun thai-composition-function (gstring)
230 (if (= (lgstring-char-len gstring
) 1)
231 (compose-gstring-for-graphic gstring
)
232 (or (font-shape-gstring gstring
)
233 (let ((glyph-len (lgstring-glyph-len gstring
))
234 (last-char (lgstring-char gstring
235 (1- (lgstring-char-len gstring
))))
238 (while (and (< i glyph-len
)
239 (setq glyph
(lgstring-glyph gstring i
)))
243 (compose-glyph-string-relative gstring
0 i
0.1)))))
245 ;; Thai-word-mode requires functions in the feature `thai-word'.
248 (defvar thai-word-mode-map
249 (let ((map (make-sparse-keymap)))
250 (define-key map
[remap forward-word
] 'thai-forward-word
)
251 (define-key map
[remap backward-word
] 'thai-backward-word
)
252 (define-key map
[remap kill-word
] 'thai-kill-word
)
253 (define-key map
[remap backward-kill-word
] 'thai-backward-kill-word
)
254 (define-key map
[remap transpose-words
] 'thai-transpose-words
)
256 "Keymap for `thai-word-mode'.")
258 (define-minor-mode thai-word-mode
259 "Minor mode to make word-oriented commands aware of Thai words.
260 With a prefix argument ARG, enable the mode if ARG is positive,
261 and disable it otherwise. If called from Lisp, enable the mode
262 if ARG is omitted or nil. The commands affected are
263 \\[forward-word], \\[backward-word], \\[kill-word], \\[backward-kill-word],
264 \\[transpose-words], and \\[fill-paragraph]."
265 :global t
:group
'mule
266 (cond (thai-word-mode
267 ;; This enables linebreak between Thai characters.
268 (modify-category-entry (make-char 'thai-tis620
) ?|
)
269 ;; This enables linebreak at a Thai word boundary.
270 (put-charset-property 'thai-tis620
'fill-find-break-point-function
271 'thai-fill-find-break-point
))
273 (modify-category-entry (make-char 'thai-tis620
) ?| nil t
)
274 (put-charset-property 'thai-tis620
'fill-find-break-point-function
277 ;; Function to call on entering the Thai language environment.
278 (defun setup-thai-language-environment-internal ()
281 ;; Function to call on exiting the Thai language environment.
282 (defun exit-thai-language-environment-internal ()
288 ;;; thai-util.el ends here