1 ;;; ethio-util.el --- utilities for Ethiopic -*- coding: utf-8; -*-
3 ;; Copyright (C) 1997, 1998, 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
7 ;; National Institute of Advanced Industrial Science and Technology (AIST)
8 ;; Registration Number H14PRO021
9 ;; Copyright (C) 2005, 2006
10 ;; National Institute of Advanced Industrial Science and Technology (AIST)
11 ;; Registration Number: H15PRO110
13 ;; Keywords: mule, multilingual, Ethiopic
15 ;; This file is part of GNU Emacs.
17 ;; GNU Emacs is free software; you can redistribute it and/or modify
18 ;; it under the terms of the GNU General Public License as published by
19 ;; the Free Software Foundation; either version 3, or (at your option)
22 ;; GNU Emacs is distributed in the hope that it will be useful,
23 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
24 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 ;; GNU General Public License for more details.
27 ;; You should have received a copy of the GNU General Public License
28 ;; along with GNU Emacs; see the file COPYING. If not, write to the
29 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
30 ;; Boston, MA 02110-1301, USA.
32 ;; Author: TAKAHASHI Naoto <ntakahas@m17n.org>
40 (defvar rmail-current-message
)
41 (defvar rmail-message-vector
)
43 ;; Information for exiting Ethiopic environment.
44 (defvar exit-ethiopic-environment-data nil
)
47 (defun setup-ethiopic-environment-internal ()
48 (let ((key-bindings '((" " . ethio-insert-space
)
49 ([?\S-
] . ethio-insert-ethio-space
)
50 ;; ([?\C-'] . ethio-gemination)
51 ([f3] . ethio-fidel-to-sera-buffer)
52 ([S-f3] . ethio-fidel-to-sera-region)
53 ([C-f3] . ethio-fidel-to-sera-marker)
54 ([f4] . ethio-sera-to-fidel-buffer)
55 ([S-f4] . ethio-sera-to-fidel-region)
56 ([C-f4] . ethio-sera-to-fidel-marker)
57 ([S-f5] . ethio-toggle-punctuation)
58 ([S-f6] . ethio-modify-vowel)
59 ([S-f7] . ethio-replace-space)
60 ;; ([S-f8] . ethio-input-special-character) ; deprecated
61 ([C-f9] . ethio-toggle-space)
62 ([S-f9] . ethio-replace-space) ; as requested
66 (setq kb (car (car key-bindings)))
67 (setq exit-ethiopic-environment-data
68 (cons (cons kb (global-key-binding kb))
69 exit-ethiopic-environment-data))
70 (global-set-key kb (cdr (car key-bindings)))
71 (setq key-bindings (cdr key-bindings))))
73 (add-hook 'find-file-hook 'ethio-find-file)
74 (add-hook 'write-file-functions 'ethio-write-file)
75 (add-hook 'after-save-hook 'ethio-find-file))
77 (defun exit-ethiopic-environment ()
78 "Exit Ethiopic language environment."
79 (while exit-ethiopic-environment-data
80 (global-set-key (car (car exit-ethiopic-environment-data))
81 (cdr (car exit-ethiopic-environment-data)))
82 (setq exit-ethiopic-environment-data
83 (cdr exit-ethiopic-environment-data)))
85 (remove-hook 'find-file-hook 'ethio-find-file)
86 (remove-hook 'write-file-functions 'ethio-write-file)
87 (remove-hook 'after-save-hook 'ethio-find-file))
90 ;; ETHIOPIC UTILITY FUNCTIONS
93 ;; If the filename ends in ".sera", editing is done in fidel
94 ;; but file I/O is done in SERA.
96 ;; If the filename ends in ".java", editing is done in fidel
97 ;; but file I/O is done in the \uXXXX style, where XXXX is
98 ;; the Unicode codepoint for the Ethiopic character.
100 ;; If the filename ends in ".tex", editing is done in fidel
101 ;; but file I/O is done in EthioTeX format.
107 (defvar ethio-primary-language 'tigrigna
108 "*Symbol that defines the primary language in SERA --> FIDEL conversion.
109 The value should be one of: `tigrigna', `amharic' or `english'.")
111 (defvar ethio-secondary-language 'english
112 "*Symbol that defines the secondary language in SERA --> FIDEL conversion.
113 The value should be one of: `tigrigna', `amharic' or `english'.")
115 (defvar ethio-use-colon-for-colon nil
116 "*Non-nil means associate ASCII colon with Ethiopic colon.
117 If nil, associate ASCII colon with Ethiopic word separator, i.e., two
118 vertically stacked dots. All SERA <--> FIDEL converters refer this
121 (defvar ethio-use-three-dot-question nil
122 "*Non-nil means associate ASCII question mark with Ethiopic old style question mark (three vertically stacked dots).
123 If nil, associate ASCII question mark with Ethiopic stylized question
124 mark. All SERA <--> FIDEL converters refer this variable.")
126 (defvar ethio-quote-vowel-always nil
127 "*Non-nil means always put an apostrophe before an isolated vowel (except at word initial) in FIDEL --> SERA conversion.
128 If nil, put an apostrophe only between a 6th-form consonant and an
131 (defvar ethio-W-sixth-always nil
132 "*Non-nil means convert the Wu-form of a 12-form consonant to \"W'\" instead of \"Wu\" in FIDEL --> SERA conversion.")
134 (defvar ethio-numeric-reduction 0
135 "*Degree of reduction in converting Ethiopic digits into Arabic digits.
137 For example, ({10}{9}{100}{80}{7}) is converted into:
138 `10`9`100`80`7 if `ethio-numeric-reduction' is 0,
139 `109100807 if `ethio-numeric-reduction' is 1,
140 `10900807 if `ethio-numeric-reduction' is 2.")
142 (defvar ethio-java-save-lowercase nil
143 "*Non-nil means save Ethiopic characters in lowercase hex numbers to Java files.
144 If nil, use uppercases.")
146 (defun ethio-prefer-amharic-p ()
147 (or (eq ethio-primary-language 'amharic)
148 (and (not (eq ethio-primary-language 'tigrigna))
149 (eq ethio-secondary-language 'amharic))))
151 (defun ethio-prefer-amharic (arg)
154 (robin-modify-package "ethiopic-sera" "'a" ?አ)
155 (robin-modify-package "ethiopic-sera" "a" "አ")
156 (robin-modify-package "ethiopic-sera" "'A" ?ኣ)
157 (robin-modify-package "ethiopic-sera" "A" "ኣ"))
158 (robin-modify-package "ethiopic-sera" "'A" ?አ)
159 (robin-modify-package "ethiopic-sera" "A" "አ")
160 (robin-modify-package "ethiopic-sera" "'a" ?ኣ)
161 (robin-modify-package "ethiopic-sera" "a" "ኣ")))
163 (defun ethio-use-colon-for-colon (arg)
166 (robin-modify-package "ethiopic-sera" ":" ?፥)
167 (robin-modify-package "ethiopic-sera" "`:" ?፡))
168 (robin-modify-package "ethiopic-sera" " : " ?፡)
169 (robin-modify-package "ethiopic-sera" ":" "፡")
170 (robin-modify-package "ethiopic-sera" "-:" ?፥)))
172 (defun ethio-use-three-dot-question (arg)
175 (robin-modify-package "ethiopic-sera" "?" ?፧)
176 (robin-modify-package "ethiopic-sera" "`?" ??))
177 (robin-modify-package "ethiopic-sera" "?" ??)
178 (robin-modify-package "ethiopic-sera" "`?" ?፧)))
180 (defun ethio-adjust-robin ()
181 (ethio-prefer-amharic (ethio-prefer-amharic-p))
182 (ethio-use-colon-for-colon ethio-use-colon-for-colon)
183 (ethio-use-three-dot-question ethio-use-three-dot-question))
185 (add-hook 'robin-activate-hook 'ethio-adjust-robin)
192 (defun ethio-sera-to-fidel-buffer (&optional secondary force)
193 "Convert the current buffer from SERA to FIDEL.
195 The variable `ethio-primary-language' specifies the primary
196 language and `ethio-secondary-language' specifies the secondary.
198 If the 1st optional argument SECONDARY is non-nil, assume the
199 buffer begins with the secondary language; otherwise with the
202 If the 2nd optional argument FORCE is non-nil, perform conversion
203 even if the buffer is read-only.
205 See also the descriptions of the variables
206 `ethio-use-colon-for-colon' and `ethio-use-three-dot-question'."
209 (ethio-sera-to-fidel-region (point-min) (point-max) secondary force))
211 ;; To avoid byte-compiler warnings. It should never be set globally.
212 (defvar ethio-sera-being-called-by-w3)
213 ;; This variable will be bound by some third-party package.
214 (defvar sera-being-called-by-w3)
217 (defun ethio-sera-to-fidel-region (begin end &optional secondary force)
218 "Convert the characters in region from SERA to FIDEL.
220 The variable `ethio-primary-language' specifies the primary
221 language and `ethio-secondary-language' specifies the secondary.
223 If the 3rd argument SECONDARY is given and non-nil, assume the
224 region begins with the secondary language; otherwise with the
227 If the 4th argument FORCE is given and non-nil, perform
228 conversion even if the buffer is read-only.
230 See also the descriptions of the variables
231 `ethio-use-colon-for-colon' and `ethio-use-three-dot-question'."
234 (if (and buffer-read-only
236 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
239 (let ((ethio-primary-language ethio-primary-language)
240 (ethio-secondary-language ethio-secondary-language)
241 ;; The above two variables may be changed temporarily by tilde
242 ;; escapes during conversion. We bind them to the variables
243 ;; of the same names so that the original values are restored
244 ;; when this function exits.
245 (buffer-read-only nil)
246 (lang (if secondary ethio-secondary-language ethio-primary-language))
249 (ethio-use-colon-for-colon ethio-use-colon-for-colon)
250 (ethio-use-three-dot-question ethio-use-three-dot-question)
253 (narrow-to-region begin end)
254 (goto-char (point-min))
259 (ethio-prefer-amharic t)
260 (ethio-sera-to-fidel-region-ethio 'amharic))
262 (ethio-prefer-amharic nil)
263 (ethio-sera-to-fidel-region-ethio 'tigrigna))
265 (ethio-sera-to-fidel-region-noethio))))
268 (if (eq lang ethio-primary-language)
269 ethio-secondary-language
270 ethio-primary-language)
273 ;; Restore user's preference.
274 (ethio-adjust-robin))
276 (defun ethio-sera-to-fidel-region-noethio ()
277 "Return next language as symbol: amharic, tigrigna, toggle or nil."
281 ;; No more "\", i.e. nothing to do.
282 ((not (search-forward "\\" nil 0))
285 ;; Hereafter point is put after a "\".
286 ;; First delete that "\", then check the following chars.
289 ((progn (delete-char -1) (setq lflag (ethio-process-language-flag)))
292 ;; "\\" : leave the second "\" and continue in the same language.
293 ((= (following-char) ?\\)
297 ;; "\ " : delete the following " " and toggle the language.
298 ((= (following-char) 32)
302 ;; A "\" but not a special sequence: simply toggle the language.
306 (defun ethio-sera-to-fidel-region-ethio (lang)
307 "Return next language as symbol: amharic, tigrigna, toggle or nil."
311 (if (re-search-forward "\\(`[1-9][0-9]*\\)\\|[\\<&]" nil t)
314 (robin-convert-region (point-min) (point-max) "ethiopic-sera")
315 (goto-char (point-max)))
319 ((= (following-char) ?`)
321 (ethio-process-digits)
325 (if (or (and (boundp 'ethio-sera-being-called-by-w3)
326 ethio-sera-being-called-by-w3)
327 (and (boundp 'sera-being-called-by-w3)
328 sera-being-called-by-w3))
329 (search-forward (if (= (following-char) ?<) ">" ";") nil 0)
336 ;; Now we must be looking at a "\".
337 ;; First delete that "\", then check the following chars.
339 ((progn (delete-char 1) (= (following-char) 32))
343 ((looking-at "[,.;:'`?\\]+")
344 (goto-char (match-end 0))
347 ((/= (following-char) ?~)
350 ;; Now we must be looking at a "~".
352 ((setq lflag (ethio-process-language-flag))
355 ;; Delete the following "~" and check the following chars.
357 ((progn (delete-char 1) (looking-at "! ?"))
359 (if (re-search-forward "\\\\~! ?" nil 0)
365 (ethio-use-colon-for-colon t)
370 (ethio-use-colon-for-colon nil)
375 (ethio-use-three-dot-question t)
378 ((looking-at "\\? ?")
380 (ethio-use-three-dot-question nil)
383 ;; Unknown tilde escape. Recover the deleted chars.
388 (defun ethio-process-language-flag nil
389 "Process a language flag of the form \"~lang\" or \"~lang1~lang2\".
391 If looking at \"~lang1~lang2\", set `ethio-primary-language' and
392 `ethio-secondary-language' based on \"lang1\" and \"lang2\".
393 Then delete the language flag \"~lang1~lang2\" from the buffer.
394 Return value is the new primary language.
396 If looking at \"~lang\", delete that language flag \"~lang\" from
397 the buffer and return that language. In this case
398 `ethio-primary-language' and `ethio-secondary-language' are left
401 If an unsupported language flag is found, just return nil without
409 "~\\([a-z][a-z][a-z]?\\)~\\([a-z][a-z][a-z]?\\)[ \t\n\\]")
410 (setq lang1 (ethio-flag-to-language (match-string 1)))
411 (setq lang2 (ethio-flag-to-language (match-string 2))))
412 (setq ethio-primary-language lang1
413 ethio-secondary-language lang2)
414 (delete-region (point) (match-end 2))
415 (if (= (following-char) 32)
417 ethio-primary-language)
420 ((and (looking-at "~\\([a-z][a-z][a-z]?\\)[ \t\n\\]")
421 (setq lang1 (ethio-flag-to-language (match-string 1))))
422 (delete-region (point) (match-end 1))
423 (if (= (following-char) 32)
431 (defun ethio-flag-to-language (flag)
433 ((or (string= flag "en") (string= flag "eng")) 'english)
434 ((or (string= flag "ti") (string= flag "tir")) 'tigrigna)
435 ((or (string= flag "am") (string= flag "amh")) 'amharic)
438 (defun ethio-process-digits nil
439 "Convert Arabic digits to Ethiopic digits."
441 (while (and (>= (setq ch (following-char)) ?1)
445 ;; count up following zeros
447 (while (= (following-char) ?0)
453 ;; first digit is 10, 20, ..., or 90
455 (insert (aref [?፲ ?፳ ?፴ ?፵ ?፶ ?፷ ?፸ ?፹ ?፺] (- ch ?1)))
458 ;; first digit is 2, 3, ..., or 9
460 (insert (aref [?፪ ?፫ ?፬ ?፭ ?፮ ?፯ ?፰ ?፱] (- ch ?2))))
471 (insert-char ?፼ (/ z 4)))))
474 (defun ethio-sera-to-fidel-marker (&optional force)
475 "Convert the regions surrounded by \"<sera>\" and \"</sera>\" from SERA to FIDEL.
476 Assume that each region begins with `ethio-primary-language'.
477 The markers \"<sera>\" and \"</sera>\" themselves are not deleted."
479 (if (and buffer-read-only
481 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
484 (goto-char (point-min))
485 (while (search-forward "<sera>" nil t)
486 (ethio-sera-to-fidel-region
488 (if (search-forward "</sera>" nil t)
498 (defun ethio-language-to-flag (lang)
500 ((eq lang 'english) "eng")
501 ((eq lang 'tigrigna) "tir")
502 ((eq lang 'amharic) "amh")
506 (defun ethio-fidel-to-sera-buffer (&optional secondary force)
507 "Replace all the FIDEL characters in the current buffer to the SERA format.
508 The variable `ethio-primary-language' specifies the primary
509 language and `ethio-secondary-language' specifies the secondary.
511 If the 1st optional argument SECONDARY is non-nil, try to convert the
512 region so that it begins with the secondary language; otherwise with the
515 If the 2nd optional argument FORCE is non-nil, convert even if the
518 See also the descriptions of the variables
519 `ethio-use-colon-for-colon', `ethio-use-three-dot-question',
520 `ethio-quote-vowel-always' and `ethio-numeric-reduction'."
523 (ethio-fidel-to-sera-region (point-min) (point-max) secondary force))
526 (defun ethio-fidel-to-sera-region (begin end &optional secondary force)
527 "Replace all the FIDEL characters in the region to the SERA format.
529 The variable `ethio-primary-language' specifies the primary
530 language and `ethio-secondary-language' specifies the secondary.
532 If the 3rd argument SECONDARY is given and non-nil, convert
533 the region so that it begins with the secondary language; otherwise with
534 the primary language.
536 If the 4th argument FORCE is given and non-nil, convert even if the
539 See also the descriptions of the variables
540 `ethio-use-colon-for-colon', `ethio-use-three-dot-question',
541 `ethio-quote-vowel-always' and `ethio-numeric-reduction'."
544 (if (and buffer-read-only
546 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
550 (narrow-to-region begin end)
552 (let ((buffer-read-only nil)
554 ethio-secondary-language
555 ethio-primary-language))
556 (flag (if (ethio-prefer-amharic-p) "\\~amh " "\\~tir "))
559 (goto-char (point-min))
561 (unless (eq mode 'english)
562 (setq mode 'ethiopic))
563 (if (and (eq mode 'english) (looking-at "\\ce"))
564 (setq mode 'ethiopic))
565 (if (and (eq mode 'ethiopic) (looking-at "\\Ce"))
566 (setq mode 'english))
567 (insert (if (eq mode 'english) "\\~eng " flag))
571 (if (eq mode 'english)
573 (if (re-search-forward "\\(\\ce\\|\\\\\\)" nil 0)
576 ((eq (following-char) ?\\)
581 (setq mode 'ethiopic))))
583 ;; If we reach here, mode is ethiopic.
585 (if (re-search-forward "[a-z,.;:'`?\\<&]" nil 0)
588 (narrow-to-region p (point))
589 (robin-invert-region (point-min) (point-max) "ethiopic-sera")
591 ;; ethio-quote-vowel-alwyas
592 (goto-char (point-min))
593 (while (re-search-forward "'[eauio]" nil t)
596 (setq ch (preceding-char))
597 (if (or (and (>= ch ?a) (<= ch ?z))
598 (and (>= ch ?A) (<= ch ?Z)))
599 (if (and (not ethio-quote-vowel-always)
600 (memq ch '(?e ?a ?u ?i ?o ?E ?A ?I)))
604 ;; ethio-W-sixth-always
605 (unless ethio-W-sixth-always
606 (goto-char (point-min))
607 (while (search-forward "W'" nil t)
611 ;; ethio-numeric-reduction
612 (when (> ethio-numeric-reduction 0)
613 (goto-char (point-min))
614 (while (re-search-forward "\\([0-9]\\)`\\([0-9]\\)" nil t)
615 (replace-match "\\1\\2")
617 (when (= ethio-numeric-reduction 2)
618 (goto-char (point-min))
619 (while (re-search-forward "\\([0-9]\\)1\\(0+\\)" nil t)
620 (replace-match "\\1\\2")))
622 (goto-char (point-max)))
625 ((looking-at "[a-z]")
627 (setq mode 'english))
628 ((looking-at "[,.;:'`\\]+")
630 (goto-char (1+ (match-end 0))))
631 ((= (following-char) ??)
632 (if ethio-use-three-dot-question
636 (if (or (and (boundp 'ethio-sera-being-called-by-w3)
637 ethio-sera-being-called-by-w3)
638 (and (boundp 'sera-being-called-by-w3)
639 sera-being-called-by-w3))
640 (search-forward (if (= (following-char) ?<) ">" "&") nil 0)
641 (forward-char 1)))))))))
644 (defun ethio-fidel-to-sera-marker (&optional force)
645 "Convert the regions surrounded by \"<sera>\" and \"</sera>\" from FIDEL to SERA.
646 The markers \"<sera>\" and \"</sera>\" themselves are not deleted."
649 (if (and buffer-read-only
651 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
654 (goto-char (point-min))
655 (while (search-forward "<sera>" nil t)
656 (ethio-fidel-to-sera-region
658 (if (search-forward "</sera>" nil t)
665 ;; vowel modification
669 (defun ethio-modify-vowel nil
670 "Modify the vowel of the FIDEL that is under the cursor."
673 (let ((consonant (ethio-get-consonant (following-char)))
676 (error "") ; not an Ethiopic char
677 (setq vowel (read-char "Modify vowel to: "))
679 (if (and (string= consonant "'") (= vowel ?W))
682 (narrow-to-region (point) (point))
683 (insert consonant vowel)
684 (robin-convert-region (point-min) (point-max) "ethiopic-sera"))))))
686 (defun ethio-get-consonant (ch)
687 "Return the consonant part of CH's SERA spelling in ethiopic-sera."
688 (let ((sera (get-char-code-property ch 'ethiopic-sera)))
691 ((= ch ?ኧ) "'") ; Only this has two vowel letters.
694 (if (memq (preceding-char) '(?e ?u ?i ?a ?o ?E ?I ?A ?'))
696 (buffer-substring (point-min) (point-max)))))))
703 (defun ethio-replace-space (ch begin end)
704 "Replace ASCII spaces with Ethiopic word separators in the region.
706 In the specified region, replace word separators surrounded by two
707 Ethiopic characters, depending on the first argument CH, which should
710 If CH = 1, word separator will be replaced with an ASCII space.
711 If CH = 2, with two ASCII spaces.
712 If CH = 3, with the Ethiopic colon-like word separator.
714 The 2nd and 3rd arguments BEGIN and END specify the region."
716 (interactive "*cReplace spaces to: 1 (sg col), 2 (dbl col), 3 (Ethiopic)\nr")
717 (if (not (memq ch '(?1 ?2 ?3)))
721 (narrow-to-region begin end)
725 ;; an Ethiopic word separator --> an ASCII space
726 (goto-char (point-min))
727 (while (search-forward "፡" nil t)
730 ;; two ASCII spaces between Ethiopic characters --> an ASCII space
731 (goto-char (point-min))
732 (while (re-search-forward "\\(\\ce\\) \\(\\ce\\)" nil t)
733 (replace-match "\\1 \\2")
737 ;; An Ethiopic word separator --> two ASCII spaces
738 (goto-char (point-min))
739 (while (search-forward "፡" nil t)
742 ;; An ASCII space between Ethiopic characters --> two ASCII spaces
743 (goto-char (point-min))
744 (while (re-search-forward "\\(\\ce\\) \\(\\ce\\)" nil t)
745 (replace-match "\\1 \\2")
749 ;; One or two ASCII spaces between Ethiopic characters
750 ;; --> An Ethiopic word separator
751 (goto-char (point-min))
752 (while (re-search-forward "\\(\\ce\\) ?\\(\\ce\\)" nil t)
753 (replace-match "\\1፡\\2")
756 ;; Three or more ASCII spaces between Ethiopic characters
757 ;; --> An Ethiopic word separator + (N - 2) ASCII spaces
758 (goto-char (point-min))
759 (while (re-search-forward "\\(\\ce\\) \\( +\\ce\\)" nil t)
760 (replace-match "\\1፡\\2")
761 (forward-char -1)))))))
767 ;; This function is deprecated.
769 (defun ethio-input-special-character (arg)
770 "This function is deprecated."
771 (interactive "*cInput number: 1. 2. 3. 4. 5.")
791 (defun ethio-fidel-to-tex-buffer nil
792 "Convert each fidel characters in the current buffer into a fidel-tex command."
794 (let ((buffer-read-only nil)
797 ;; Special treatment for geminated characters.
798 ;; Geminated characters la", etc. change into \geminateG{\laG}, etc.
799 (goto-char (point-min))
800 (while (re-search-forward "፟\\|" nil t)
801 (setq comp (find-composition (match-beginning 0)))
803 (replace-match "\\\\geminateG{}" t)
804 (decompose-region (car comp) (cadr comp))
807 (insert "\\geminateG{")
811 ;; Special Ethiopic punctuations.
812 (goto-char (point-min))
813 (while (re-search-forward "\\ce[»\\.\\?]\\|«\\ce" nil t)
815 ((= (setq ch (preceding-char)) ?\»)
817 (insert "\\rquoteG"))
830 ;; Ethiopic characters to TeX macros
831 (robin-invert-region (point-min) (point-max) "ethiopic-tex")
833 (goto-char (point-min))
834 (set-buffer-modified-p nil)))
837 (defun ethio-tex-to-fidel-buffer nil
838 "Convert fidel-tex commands in the current buffer into fidel chars."
840 (let ((buffer-read-only nil)
843 ;; TeX macros to Ethiopic characters
844 (robin-convert-region (point-min) (point-max) "ethiopic-tex")
846 ;; compose geminated characters
847 (goto-char (point-min))
848 (while (re-search-forward "\\\\geminateG{\\(\\ce?\\)}" nil t)
849 (replace-match "\\1፟"))
851 ;; remove redundant braces, if any
852 (goto-char (point-min))
853 (while (re-search-forward "{\\(\\ce\\)}" nil t)
854 (replace-match "\\1"))
856 (goto-char (point-min))
857 (set-buffer-modified-p nil)))
864 (defun ethio-fidel-to-java-buffer nil
865 "Convert Ethiopic characters into the Java escape sequences.
867 Each escape sequence is of the form \\uXXXX, where XXXX is the
868 character's codepoint (in hex) in Unicode.
870 If `ethio-java-save-lowercase' is non-nil, use [0-9a-f].
871 Otherwise, [0-9A-F]."
874 (goto-char (point-min))
875 (while (re-search-forward "[ሀ-፼]" nil t)
876 (setq ucode (preceding-char))
877 (delete-backward-char 1)
879 (format (if ethio-java-save-lowercase "\\u%4x" "\\u%4X")
883 (defun ethio-java-to-fidel-buffer nil
884 "Convert the Java escape sequences into corresponding Ethiopic characters."
885 (let ((case-fold-search t)
887 (goto-char (point-min))
888 (while (re-search-forward "\\\\u\\([0-9a-f][0-9a-f][0-9a-f][0-9a-f]\\)" nil t)
889 (setq ucode (read (concat "#x" (match-string 1))))
890 (when (and (>= ucode #x1200) (<= ucode #x137f))
891 (replace-match (char-to-string ucode))))))
898 (defun ethio-find-file nil
899 "Transliterate file content into Ethiopic dependig on filename suffix."
902 ((string-match "\\.sera$" (buffer-file-name))
904 (ethio-sera-to-fidel-buffer nil 'force)
905 (set-buffer-modified-p nil)))
907 ((string-match "\\.html$" (buffer-file-name))
908 (let ((ethio-sera-being-called-by-w3 t))
910 (ethio-sera-to-fidel-marker 'force)
911 (goto-char (point-min))
912 (while (re-search-forward "&[lr]aquo;" nil t)
913 (if (= (char-after (1+ (match-beginning 0))) ?l)
915 (replace-match "»")))
916 (set-buffer-modified-p nil))))
918 ((string-match "\\.tex$" (buffer-file-name))
920 (ethio-tex-to-fidel-buffer)
921 (set-buffer-modified-p nil)))
923 ((string-match "\\.java$" (buffer-file-name))
925 (ethio-java-to-fidel-buffer)
926 (set-buffer-modified-p nil)))
932 (defun ethio-write-file nil
933 "Transliterate Ethiopic characters in ASCII depending on the file extension."
936 ((string-match "\\.sera$" (buffer-file-name))
938 (ethio-fidel-to-sera-buffer nil 'force)
939 (goto-char (point-min))
940 (ethio-record-user-preference)
941 (set-buffer-modified-p nil)))
943 ((string-match "\\.html$" (buffer-file-name))
945 (let ((ethio-sera-being-called-by-w3 t))
946 (ethio-fidel-to-sera-marker 'force)
947 (goto-char (point-min))
948 (while (re-search-forward "[«»]" nil t)
949 (replace-match (if (= (preceding-char) ?«) "«" "»")))
950 (goto-char (point-min))
951 (if (search-forward "<sera>" nil t)
952 (ethio-record-user-preference))
953 (set-buffer-modified-p nil))))
955 ((string-match "\\.tex$" (buffer-file-name))
957 (ethio-fidel-to-tex-buffer)
958 (set-buffer-modified-p nil)))
960 ((string-match "\\.java$" (buffer-file-name))
962 (ethio-fidel-to-java-buffer)
963 (set-buffer-modified-p nil)))
968 (defun ethio-record-user-preference nil
969 (insert (if ethio-use-colon-for-colon "\\~-: " "\\~`: ")
970 (if ethio-use-three-dot-question "\\~`| " "\\~? ")))
973 ;; Ethiopic word separator vs. ASCII space
976 (defvar ethio-prefer-ascii-space t)
977 (make-variable-buffer-local 'ethio-prefer-ascii-space)
979 (defun ethio-toggle-space nil
980 "Toggle ASCII space and Ethiopic separator for keyboard input."
982 (setq ethio-prefer-ascii-space
983 (not ethio-prefer-ascii-space)))
985 (defun ethio-insert-space (arg)
986 "Insert ASCII spaces or Ethiopic word separators depending on context.
988 If the current word separator (indicated in mode-line) is the ASCII space,
989 insert an ASCII space. With ARG, insert that many ASCII spaces.
991 If the current word separator is the colon-like Ethiopic word
992 separator and the point is preceded by `an Ethiopic punctuation mark
993 followed by zero or more ASCII spaces', then insert also an ASCII
994 space. With ARG, insert that many ASCII spaces.
996 Otherwise, insert a colon-like Ethiopic word separator. With ARG, insert that
997 many Ethiopic word separators."
1001 (ethio-prefer-ascii-space
1002 (insert-char 32 arg))
1004 (skip-chars-backward " ")
1005 (memq (preceding-char)
1006 '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨ ? ? ? ? ?)))
1007 (insert-char 32 arg))
1009 (insert-char ?፡ arg))))
1012 (defun ethio-insert-ethio-space (arg)
1013 "Insert the Ethiopic word delimiter (the colon-like character).
1014 With ARG, insert that many delimiters."
1016 (insert-char ?፡ arg))
1023 (defun ethio-composition-function (pos to font-object string)
1025 (let ((pattern "\\ce\\(፟\\|\\)"))
1028 (eq (string-match pattern string pos) pos))
1029 (prog1 (match-end 0)
1030 (compose-string string pos (match-end 0))))
1031 (if (>= pos (point-min))
1034 (if (looking-at pattern)
1035 (prog1 (match-end 0)
1036 (compose-region pos (match-end 0)))))))))
1038 ;; This function is not used any more.
1039 (defun ethio-gemination nil
1040 "Compose the character before the point with the Ethiopic gemination mark.
1041 If the character is already composed, decompose it and remove the gemination
1044 (let ((ch (preceding-char)))
1046 ((and (= ch ?) (find-composition (1- (point))))
1047 (decompose-region (- (point) 2) (point)))
1048 ((and (>= ch #x1200) (<= ch #x137f))
1050 (compose-region (- (point) 2) (point)))
1058 (robin-define-package "ethiopic-sera"
1059 "SERA transliteration system for Ethiopic."
1065 ("hE" ?ሄ) ("hee" "ሄ")
1069 ("le" ?ለ) ("Le" "ለ")
1070 ("lu" ?ሉ) ("Lu" "ሉ")
1071 ("li" ?ሊ) ("Li" "ሊ")
1072 ("la" ?ላ) ("La" "ላ")
1073 ("lE" ?ሌ) ("LE" "ሌ") ("lee" "ሌ") ("Lee" "ሌ")
1075 ("lo" ?ሎ) ("Lo" "ሎ")
1076 ("lWa" ?ሏ) ("LWa" "ሏ") ("lW" "ሏ") ("LW" "ሏ")
1082 ("HE" ?ሔ) ("Hee" "ሔ")
1085 ("HWa" ?ሗ) ("HW" "ሗ")
1087 ("me" ?መ) ("Me" "መ")
1088 ("mu" ?ሙ) ("Mu" "ሙ")
1089 ("mi" ?ሚ) ("Mi" "ሚ")
1090 ("ma" ?ማ) ("Ma" "ማ")
1091 ("mE" ?ሜ) ("ME" "ሜ") ("mee" "ሜ") ("Mee" "ሜ")
1093 ("mo" ?ሞ) ("Mo" "ሞ")
1094 ("mWa" ?ሟ) ("MWa" "ሟ") ("mW" "ሟ") ("MW" "ሟ")
1096 ("`se" ?ሠ) ("sse" "ሠ") ("s2e" "ሠ")
1097 ("`su" ?ሡ) ("ssu" "ሡ") ("s2u" "ሡ")
1098 ("`si" ?ሢ) ("ssi" "ሢ") ("s2i" "ሢ")
1099 ("`sa" ?ሣ) ("ssa" "ሣ") ("s2a" "ሣ")
1100 ("`sE" ?ሤ) ("ssE" "ሤ") ("s2E" "ሤ")
1101 ("`see" "ሤ") ("ssee" "ሤ") ("s2ee" "ሤ")
1102 ("`s" ?ሥ) ("ss" "ሥ") ("s2" "ሥ")
1103 ("`so" ?ሦ) ("sso" "ሦ") ("s2o" "ሦ")
1104 ("`sWa" ?ሧ) ("ssWa" "ሧ") ("s2Wa" "ሧ")
1105 ("`sW" "ሧ") ("ssW" "ሧ") ("s2W" "ሧ")
1107 ("re" ?ረ) ("Re" "ረ")
1108 ("ru" ?ሩ) ("Ru" "ሩ")
1109 ("ri" ?ሪ) ("Ri" "ሪ")
1110 ("ra" ?ራ) ("Ra" "ራ")
1111 ("rE" ?ሬ) ("RE" "ሬ") ("ree" "ሬ") ("Ree" "ሬ")
1113 ("ro" ?ሮ) ("Ro" "ሮ")
1114 ("rWa" ?ሯ) ("RWa" "ሯ") ("rW" "ሯ") ("RW" "ሯ")
1120 ("sE" ?ሴ) ("see" "ሴ")
1123 ("sWa" ?ሷ) ("sW" "ሷ")
1129 ("xE" ?ሼ) ("xee" "ሼ")
1132 ("xWa" ?ሿ) ("xW" "ሿ")
1138 ("qE" ?ቄ) ("qee" "ቄ")
1143 ("qWa" ?ቋ) ("qW" "ቋ")
1144 ("qWE" ?ቌ) ("qWee" "ቌ")
1145 ("qW'" ?ቍ) ("qWu" "ቍ")
1151 ("QE" ?ቔ) ("Qee" "ቔ")
1156 ("QWa" ?ቛ) ("QW" "ቛ")
1157 ("QWE" ?ቜ) ("QWee" "ቜ")
1158 ("QW'" ?ቝ) ("QWu" "ቝ")
1160 ("be" ?በ) ("Be" "በ")
1161 ("bu" ?ቡ) ("Bu" "ቡ")
1162 ("bi" ?ቢ) ("Bi" "ቢ")
1163 ("ba" ?ባ) ("Ba" "ባ")
1164 ("bE" ?ቤ) ("BE" "ቤ") ("bee" "ቤ") ("Bee" "ቤ")
1166 ("bo" ?ቦ) ("Bo" "ቦ")
1167 ("bWa" ?ቧ) ("BWa" "ቧ") ("bW" "ቧ") ("BW" "ቧ")
1169 ("ve" ?ቨ) ("Ve" "ቨ")
1170 ("vu" ?ቩ) ("Vu" "ቩ")
1171 ("vi" ?ቪ) ("Vi" "ቪ")
1172 ("va" ?ቫ) ("Va" "ቫ")
1173 ("vE" ?ቬ) ("VE" "ቬ") ("vee" "ቬ") ("Vee" "ቬ")
1175 ("vo" ?ቮ) ("Vo" "ቮ")
1176 ("vWa" ?ቯ) ("VWa" "ቯ") ("vW" "ቯ") ("VW" "ቯ")
1182 ("tE" ?ቴ) ("tee" "ቴ")
1185 ("tWa" ?ቷ) ("tW" "ቷ")
1191 ("cE" ?ቼ) ("cee" "ቼ")
1194 ("cWa" ?ቿ) ("cW" "ቿ")
1196 ("`he" ?ኀ) ("hhe" "ኀ") ("h2e" "ኀ")
1197 ("`hu" ?ኁ) ("hhu" "ኁ") ("h2u" "ኁ")
1198 ("`hi" ?ኂ) ("hhi" "ኂ") ("h2i" "ኂ")
1199 ("`ha" ?ኃ) ("hha" "ኃ") ("h2a" "ኃ")
1200 ("`hE" ?ኄ) ("hhE" "ኄ") ("h2E" "ኄ")
1201 ("`hee" "ኄ") ("hhee" "ኄ") ("h2ee" "ኄ")
1202 ("`h" ?ኅ) ("hh" "ኅ") ("h2" "ኅ")
1203 ("`ho" ?ኆ) ("hho" "ኆ") ("h2o" "ኆ")
1204 ("`hWe" ?ኈ) ("hhWe" "ኈ") ("h2We" "ኈ") ("hWe" "ኈ")
1205 ("`hWi" ?ኊ) ("hhWi" "ኊ") ("h2Wi" "ኊ") ("hWi" "ኊ")
1206 ("`hWa" ?ኋ) ("hhWa" "ኋ") ("h2Wa" "ኋ") ("hWa" "ኋ")
1207 ("`hW" "ኋ") ("hhW" "ኋ") ("h2W" "ኋ")
1208 ("`hWE" ?ኌ) ("hhWE" "ኌ") ("h2WE" "ኌ") ("hWE" "ኌ")
1209 ("`hWee" "ኌ") ("hhWee" "ኌ") ("h2Wee" "ኌ") ("hWee" "ኌ")
1210 ("`hW'" ?ኍ) ("hhW'" "ኍ") ("h2W'" "ኍ") ("hW'" "ኍ")
1211 ("`hWu" "ኍ") ("hhWu" "ኍ") ("h2Wu" "ኍ") ("hWu" "ኍ")
1217 ("nE" ?ኔ) ("nee" "ኔ")
1220 ("nWa" ?ኗ) ("nW" "ኗ")
1226 ("NE" ?ኜ) ("Nee" "ኜ")
1229 ("NWa" ?ኟ) ("NW" "ኟ")
1232 ("'u" ?ኡ) ("u" "ኡ") ("'U" "ኡ") ("U" "ኡ")
1236 ("'I" ?እ) ("I" "እ") ("'e" "እ") ("e" "እ")
1237 ("'o" ?ኦ) ("o" "ኦ") ("'O" "ኦ") ("O" "ኦ")
1238 ("'ea" ?ኧ) ("ea" "ኧ")
1244 ("kE" ?ኬ) ("kee" "ኬ")
1249 ("kWa" ?ኳ) ("kW" "ኳ")
1250 ("kWE" ?ኴ) ("kWee" "ኴ")
1251 ("kW'" ?ኵ) ("kWu" "ኵ")
1257 ("KE" ?ኼ) ("Kee" "ኼ")
1262 ("KWa" ?ዃ) ("KW" "ዃ")
1263 ("KWE" ?ዄ) ("KWee" "ዄ")
1264 ("KW'" ?ዅ) ("KWu" "ዅ")
1270 ("wE" ?ዌ) ("wee" "ዌ")
1274 ("`e" ?ዐ) ("ae" "ዐ") ("aaa" "ዐ") ("e2" "ዐ")
1275 ("`u" ?ዑ) ("uu" "ዑ") ("u2" "ዑ") ("`U" "ዑ") ("UU" "ዑ") ("U2" "ዑ")
1276 ("`i" ?ዒ) ("ii" "ዒ") ("i2" "ዒ")
1277 ("`a" ?ዓ) ("aa" "ዓ") ("a2" "ዓ") ("`A" "ዓ") ("AA" "ዓ") ("A2" "ዓ")
1278 ("`E" ?ዔ) ("EE" "ዔ") ("E2" "ዔ")
1279 ("`I" ?ዕ) ("II" "ዕ") ("I2" "ዕ") ("ee" "ዕ")
1280 ("`o" ?ዖ) ("oo" "ዖ") ("o2" "ዖ") ("`O" "ዖ") ("OO" "ዖ") ("O2" "ዖ")
1286 ("zE" ?ዜ) ("zee" "ዜ")
1289 ("zWa" ?ዟ) ("zW" "ዟ")
1295 ("ZE" ?ዤ) ("Zee" "ዤ")
1298 ("ZWa" ?ዧ) ("ZW" "ዧ")
1300 ("ye" ?የ) ("Ye" "የ")
1301 ("yu" ?ዩ) ("Yu" "ዩ")
1302 ("yi" ?ዪ) ("Yi" "ዪ")
1303 ("ya" ?ያ) ("Ya" "ያ")
1304 ("yE" ?ዬ) ("YE" "ዬ") ("yee" "ዬ") ("Yee" "ዬ")
1306 ("yo" ?ዮ) ("Yo" "ዮ")
1312 ("dE" ?ዴ) ("dee" "ዴ")
1315 ("dWa" ?ዷ) ("dW" "ዷ")
1321 ("DE" ?ዼ) ("Dee" "ዼ")
1324 ("DWa" ?ዿ) ("DW" "ዿ")
1326 ("je" ?ጀ) ("Je" "ጀ")
1327 ("ju" ?ጁ) ("Ju" "ጁ")
1328 ("ji" ?ጂ) ("Ji" "ጂ")
1329 ("ja" ?ጃ) ("Ja" "ጃ")
1330 ("jE" ?ጄ) ("JE" "ጄ") ("jee" "ጄ") ("Jee" "ጄ")
1332 ("jo" ?ጆ) ("Jo" "ጆ")
1333 ("jWa" ?ጇ) ("jW" "ጇ") ("JWa" "ጇ") ("JW" "ጇ")
1339 ("gE" ?ጌ) ("gee" "ጌ")
1344 ("gWa" ?ጓ) ("gW" "ጓ")
1345 ("gWE" ?ጔ) ("gWee" "ጔ")
1346 ("gW'" ?ጕ) ("gWu" "ጕ")
1352 ("GE" ?ጜ) ("Gee" "ጜ")
1360 ("TE" ?ጤ) ("Tee" "ጤ")
1363 ("TWa" ?ጧ) ("TW" "ጧ")
1369 ("CE" ?ጬ) ("Cee" "ጬ")
1372 ("CWa" ?ጯ) ("CW" "ጯ")
1378 ("PE" ?ጴ) ("Pee" "ጴ")
1381 ("PWa" ?ጷ) ("PW" "ጷ")
1387 ("SE" ?ጼ) ("See" "ጼ")
1390 ("SWa" ?ጿ) ("`SWa" "ጿ") ("SSWa" "ጿ") ("S2Wa" "ጿ")
1391 ("SW" "ጿ") ("`SW" "ጿ") ("SSW" "ጿ") ("S2W" "ጿ")
1393 ("`Se" ?ፀ) ("SSe" "ፀ") ("S2e" "ፀ")
1394 ("`Su" ?ፁ) ("SSu" "ፁ") ("S2u" "ፁ")
1395 ("`Si" ?ፂ) ("SSi" "ፂ") ("S2i" "ፂ")
1396 ("`Sa" ?ፃ) ("SSa" "ፃ") ("S2a" "ፃ")
1397 ("`SE" ?ፄ) ("SSE" "ፄ") ("S2E" "ፄ")
1398 ("`See" "ፄ") ("SSee" "ፄ") ("S2ee" "ፄ")
1399 ("`S" ?ፅ) ("SS" "ፅ") ("S2" "ፅ")
1400 ("`So" ?ፆ) ("SSo" "ፆ") ("S2o" "ፆ")
1402 ("fe" ?ፈ) ("Fe" "ፈ")
1403 ("fu" ?ፉ) ("Fu" "ፉ")
1404 ("fi" ?ፊ) ("Fi" "ፊ")
1405 ("fa" ?ፋ) ("Fa" "ፋ")
1406 ("fE" ?ፌ) ("FE" "ፌ") ("fee" "ፌ") ("Fee" "ፌ")
1408 ("fo" ?ፎ) ("Fo" "ፎ")
1409 ("fWa" ?ፏ) ("FWa" "ፏ") ("fW" "ፏ") ("FW" "ፏ")
1415 ("pE" ?ፔ) ("pee" "ፔ")
1418 ("pWa" ?ፗ) ("pW" "ፗ")
1420 ("rYa" ?ፘ) ("RYa" "ፘ") ("rY" "ፘ") ("RY" "ፘ")
1421 ("mYa" ?ፙ) ("MYa" "ፙ") ("mY" "ፙ") ("MY" "ፙ")
1422 ("fYa" ?ፚ) ("FYa" "ፚ") ("fY" "ፚ") ("FY" "ፚ")
1424 (" : " ?፡) (":" "፡") ("`:" "፡")
1430 ("`?" ?፧) ("??" "፧")
1431 (":|:" ?፨) ("**" "፨")
1433 ;; Explicit syllable delimiter
1436 ;; Quick ASCII input
1501 (register-input-method
1502 "ethiopic-sera" "Ethiopic"
1503 'robin-use-package "et" "An input method for Ethiopic.")
1505 (robin-define-package "ethiopic-tex"
1506 "TeX transliteration system for Ethiopic."
1508 ("\\heG" ?ሀ) ; U+1200 ..
1525 ("\\HeG" ?ሐ) ; U+1210 ..
1542 ("\\sseG" ?ሠ) ; U+1220 ..
1559 ("\\seG" ?ሰ) ; U+1230 ..
1576 ("\\qeG" ?ቀ) ; U+1240 ..
1593 ("\\QeG" ?ቐ) ; U+1250 ..
1610 ("\\beG" ?በ) ; U+1260 ..
1627 ("\\teG" ?ተ) ; U+1270 ..
1644 ("\\hheG" ?ኀ) ; U+1280 ..
1661 ("\\neG" ?ነ) ; U+1290 ..
1678 ("\\eG" ?አ) ; U+12A0 ..
1695 ("\\kWeG" ?ኰ) ; U+12B0 ..
1712 ("\\KWeG" ?ዀ) ; U+12C0 ..
1729 ("\\eeG" ?ዐ) ; U+12D0 ..
1746 ("\\ZeG" ?ዠ) ; U+12E0 ..
1763 ("\\deG" ?ደ) ; U+12F0 ..
1780 ("\\jeG" ?ጀ) ; U+1300 ..
1797 ("\\gWeG" ?ጐ) ; U+1310 ..
1814 ("\\TeG" ?ጠ) ; U+1320 ..
1831 ("\\PeG" ?ጰ) ; U+1330 ..
1848 ("\\SSeG" ?ፀ) ; U+1340 ..
1865 ("\\peG" ?ፐ) ; U+1350 ..
1882 ;; reserved ; U+1360 ..
1899 ("\\smntG" ?፰) ; U+1370 ..
1917 ;; private extension
1920 ("\\yWaG" ?) ; U+1A00EF (was U+12EF)
1922 ("\\GWaG" ?) ; U+1A011F (was U+131F)
1924 ("\\qqeG" ?) ; U+1A0180 .. (was U+1380 ..)
1941 ("\\kkeG" ?) ; U+1A0190 .. (was U+1390 ..)
1958 ("\\XeG" ?) ; U+1A01A0 .. (was U+13A0 ..)
1975 ("\\ggeG" ?) ; U+1A01B0 .. (was U+13B0 ..)
1992 ("\\ornamentG" ?) ; U+1A01C0 .. (was U+FDF0 ..)
2002 ;; Gemination () is handled in a special way.
2005 ;; Assign reverse conversion to Fidel chars.
2006 ;; Then override forward conversion with ASCII chars.
2007 ;; ASCII chars should not have reverse conversions.
2008 ("\\dotG" ?) ("\\dotG" ".")
2009 ("\\lquoteG" ?) ("\\lquoteG" "«")
2010 ("\\rquoteG" ?) ("\\rquoteG" "»")
2011 ("\\qmarkG" ?) ("\\qmarkG" "?")
2014 ;; New characters in Unicode 4.1.
2016 ;; In forward conversion, these characters override the old private
2017 ;; extensions above. The old private extensions still keep their
2018 ;; reverse conversion.
2074 ;; The ethiopic-tex package is not used for keyboard input, therefore
2075 ;; not registered with the register-input-method function.
2077 (provide 'ethio-util)
2079 ;;; ethio-util.el ends here
2081 ;;; arch-tag: c8feb3d6-39bf-4b0a-b6ef-26f03fbc8140
2082 ;;; ethio-util.el ends here