1 ;;; viet-util.el --- utilities for Vietnamese -*- coding: utf-8; -*-
3 ;; Copyright (C) 1998, 2001-2013 Free Software Foundation, Inc.
4 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H14PRO021
9 ;; National Institute of Advanced Industrial Science and Technology (AIST)
10 ;; Registration Number H13PRO009
12 ;; Keywords: mule, multilingual, Vietnamese
14 ;; This file is part of GNU Emacs.
16 ;; GNU Emacs is free software: you can redistribute it and/or modify
17 ;; it under the terms of the GNU General Public License as published by
18 ;; the Free Software Foundation, either version 3 of the License, or
19 ;; (at your option) any later version.
21 ;; GNU Emacs is distributed in the hope that it will be useful,
22 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 ;; GNU General Public License for more details.
26 ;; You should have received a copy of the GNU General Public License
27 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
31 ;; Vietnamese uses ASCII characters and additional 134 unique
32 ;; characters (these are Latin alphabets with various diacritical and
33 ;; tone marks). As far as I know, Vietnamese now has 5 different ways
34 ;; for representing these characters: VISCII, TCVN-5712, VPS, VIQR,
35 ;; and Unicode. VISCII, TCVN-5712 and VPS are simple 1-byte code
36 ;; which assigns 134 unique characters in control-code area
37 ;; (0x00..0x1F) and right half area (0x80..0xFF). VIQR is a mnemonic
38 ;; encoding specification representing diacritical marks by following
43 (defvar viet-viscii-nonascii-translation-table
)
46 (defun viet-encode-viscii-char (char)
47 "Return VISCII character code of CHAR if appropriate."
48 (encode-char char
'viscii
))
50 ;; VIQR is a mnemonic encoding specification for Vietnamese.
51 ;; It represents diacritical marks by ASCII characters as follows:
52 ;; ------------+----------+--------
53 ;; mark | mnemonic | example
54 ;; ------------+----------+---------
55 ;; breve | ( | a( -> ă
56 ;; circumflex | ^ | a^ -> â
58 ;; ------------+----------+---------
59 ;; acute | ' | a' -> á
60 ;; grave | ` | a` -> à
61 ;; hook above | ? | a? -> ả
62 ;; tilde | ~ | a~ -> ã
63 ;; dot below | . | a. -> ạ
64 ;; ------------+----------+---------
65 ;; d bar | dd | dd -> đ
66 ;; ------------+----------+---------
68 (defvar viet-viqr-alist
209 ;; escape from composition
210 (?\
( .
"\\(") ; breve (left parenthesis)
211 (?^ .
"\\^") ; circumflex (caret)
212 (?
+ .
"\\+") ; horn (plus sign)
213 (?
' .
"\\'") ; acute (apostrophe)
214 (?
` .
"\\`") ; grave (backquote)
215 (?? .
"\\?") ; hook above (question mark)
216 (?~ .
"\\~") ; tilde (tilde)
217 (?. .
"\\.") ; dot below (period)
218 (?d .
"\\d") ; d-bar (d)
219 (?
\\ .
"\\\\") ; literal backslash
221 "Alist of Vietnamese characters vs corresponding `VIQR' string.")
223 ;; Regular expression matching single Vietnamese character represented
225 (defconst viqr-regexp
226 "[aeiouyAEIOUY]\\([(^+]?['`?~.]\\|[(^+]\\)\\|[Dd][Dd]")
229 (defun viet-decode-viqr-region (from to
)
230 "Convert `VIQR' mnemonics of the current region to Vietnamese characters.
231 When called from a program, expects two arguments,
232 positions (integers or markers) specifying the stretch of the region."
235 (narrow-to-region from to
)
236 (goto-char (point-min))
237 (while (re-search-forward viqr-regexp nil t
)
238 (let* ((viqr (buffer-substring (match-beginning 0) (match-end 0)))
239 (ch (car (rassoc viqr viet-viqr-alist
))))
242 (delete-region (match-beginning 0) (match-end 0))
246 (defun viet-decode-viqr-buffer ()
247 "Convert `VIQR' mnemonics of the current buffer to Vietnamese characters."
249 (viet-decode-viqr-region (point-min) (point-max)))
252 (defun viet-encode-viqr-region (from to
)
253 "Convert Vietnamese characters of the current region to `VIQR' mnemonics.
254 When called from a program, expects two arguments,
255 positions (integers or markers) specifying the stretch of the region."
258 (narrow-to-region from to
)
259 (goto-char (point-min))
260 (while (re-search-forward "\\cv" nil t
)
261 (let* ((ch (preceding-char))
262 (viqr (cdr (assq ch viet-viqr-alist
))))
269 (defun viet-encode-viqr-buffer ()
270 "Convert Vietnamese characters of the current buffer to `VIQR' mnemonics."
272 (viet-encode-viqr-region (point-min) (point-max)))
275 (defun viqr-post-read-conversion (len)
278 (narrow-to-region (point) (+ (point) len
))
279 (let ((buffer-modified-p (buffer-modified-p)))
280 (viet-decode-viqr-region (point-min) (point-max))
281 (set-buffer-modified-p buffer-modified-p
)
282 (- (point-max) (point-min))))))
285 (defun viqr-pre-write-conversion (from to
)
286 (let ((old-buf (current-buffer)))
287 (set-buffer (generate-new-buffer " *temp*"))
290 (insert-buffer-substring old-buf from to
))
291 (viet-encode-viqr-region (point-min) (point-max))
292 ;; Should return nil as annotations.
298 ;;; viet-util.el ends here