1 ;;; ethio-util.el --- utilities for Ethiopic -*- coding: utf-8-emacs; -*-
3 ;; Copyright (C) 1997-1998, 2002-2016 Free Software Foundation, Inc.
4 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
5 ;; 2006, 2007, 2008, 2009, 2010, 2011
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H14PRO021
8 ;; Copyright (C) 2005, 2006
9 ;; National Institute of Advanced Industrial Science and Technology (AIST)
10 ;; Registration Number: H15PRO110
12 ;; Keywords: mule, multilingual, Ethiopic
14 ;; This file is part of GNU Emacs.
16 ;; GNU Emacs is free software: you can redistribute it and/or modify
17 ;; it under the terms of the GNU General Public License as published by
18 ;; the Free Software Foundation, either version 3 of the License, or
19 ;; (at your option) any later version.
21 ;; GNU Emacs is distributed in the hope that it will be useful,
22 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 ;; GNU General Public License for more details.
26 ;; You should have received a copy of the GNU General Public License
27 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
29 ;; Author: TAKAHASHI Naoto <ntakahas@m17n.org>
37 ;; Information for exiting Ethiopic environment.
38 (defvar exit-ethiopic-environment-data nil
)
41 (defun setup-ethiopic-environment-internal ()
42 (let ((key-bindings '((" " . ethio-insert-space
)
43 ([?\S-
] . ethio-insert-ethio-space
)
44 ;; ([?\C-'] . ethio-gemination)
45 ([f3] . ethio-fidel-to-sera-buffer)
46 ([S-f3] . ethio-fidel-to-sera-region)
47 ([C-f3] . ethio-fidel-to-sera-marker)
48 ([f4] . ethio-sera-to-fidel-buffer)
49 ([S-f4] . ethio-sera-to-fidel-region)
50 ([C-f4] . ethio-sera-to-fidel-marker)
51 ;; ([S-f5] . ethio-toggle-punctuation)
52 ([S-f6] . ethio-modify-vowel)
53 ([S-f7] . ethio-replace-space)
54 ;; ([S-f8] . ethio-input-special-character) ; deprecated
55 ([C-f9] . ethio-toggle-space)
56 ([S-f9] . ethio-replace-space) ; as requested
60 (setq kb (car (car key-bindings)))
61 (setq exit-ethiopic-environment-data
62 (cons (cons kb (global-key-binding kb))
63 exit-ethiopic-environment-data))
64 (global-set-key kb (cdr (car key-bindings)))
65 (setq key-bindings (cdr key-bindings))))
67 (add-hook 'find-file-hook 'ethio-find-file)
68 (add-hook 'write-file-functions 'ethio-write-file)
69 (add-hook 'after-save-hook 'ethio-find-file))
71 (defun exit-ethiopic-environment ()
72 "Exit Ethiopic language environment."
73 (while exit-ethiopic-environment-data
74 (global-set-key (car (car exit-ethiopic-environment-data))
75 (cdr (car exit-ethiopic-environment-data)))
76 (setq exit-ethiopic-environment-data
77 (cdr exit-ethiopic-environment-data)))
79 (remove-hook 'find-file-hook 'ethio-find-file)
80 (remove-hook 'write-file-functions 'ethio-write-file)
81 (remove-hook 'after-save-hook 'ethio-find-file))
84 ;; ETHIOPIC UTILITY FUNCTIONS
87 ;; If the filename ends in ".sera", editing is done in fidel
88 ;; but file I/O is done in SERA.
90 ;; If the filename ends in ".java", editing is done in fidel
91 ;; but file I/O is done in the \uXXXX style, where XXXX is
92 ;; the Unicode codepoint for the Ethiopic character.
94 ;; If the filename ends in ".tex", editing is done in fidel
95 ;; but file I/O is done in EthioTeX format.
101 (defvar ethio-primary-language 'tigrigna
102 "Symbol that defines the primary language in SERA --> FIDEL conversion.
103 The value should be one of: `tigrigna', `amharic' or `english'.")
105 (defvar ethio-secondary-language 'english
106 "Symbol that defines the secondary language in SERA --> FIDEL conversion.
107 The value should be one of: `tigrigna', `amharic' or `english'.")
109 (defvar ethio-use-colon-for-colon nil
110 "Non-nil means associate ASCII colon with Ethiopic colon.
111 If nil, associate ASCII colon with Ethiopic word separator, i.e., two
112 vertically stacked dots. All SERA <--> FIDEL converters refer this
115 (defvar ethio-use-three-dot-question nil
116 "Non-nil means associate ASCII question mark with Ethiopic old style question mark (three vertically stacked dots).
117 If nil, associate ASCII question mark with Ethiopic stylized question
118 mark. All SERA <--> FIDEL converters refer this variable.")
120 (defvar ethio-quote-vowel-always nil
121 "Non-nil means always put an apostrophe before an isolated vowel (except at word initial) in FIDEL --> SERA conversion.
122 If nil, put an apostrophe only between a 6th-form consonant and an
125 (defvar ethio-W-sixth-always nil
126 "Non-nil means convert the Wu-form of a 12-form consonant to \"W'\" instead of \"Wu\" in FIDEL --> SERA conversion.")
128 (defvar ethio-numeric-reduction 0
129 "Degree of reduction in converting Ethiopic digits into Arabic digits.
131 For example, ({10}{9}{100}{80}{7}) is converted into:
132 \\=`10\\=`9\\=`100\\=`80\\=`7 if `ethio-numeric-reduction' is 0,
133 \\=`109100807 if `ethio-numeric-reduction' is 1,
134 \\=`10900807 if `ethio-numeric-reduction' is 2.")
136 (defvar ethio-java-save-lowercase nil
137 "Non-nil means save Ethiopic characters in lowercase hex numbers to Java files.
138 If nil, use uppercases.")
140 (defun ethio-prefer-amharic-p ()
141 (or (eq ethio-primary-language 'amharic)
142 (and (not (eq ethio-primary-language 'tigrigna))
143 (eq ethio-secondary-language 'amharic))))
145 (defun ethio-prefer-amharic (arg)
148 (robin-modify-package "ethiopic-sera" "'a" ?አ)
149 (robin-modify-package "ethiopic-sera" "a" "አ")
150 (robin-modify-package "ethiopic-sera" "'A" ?ኣ)
151 (robin-modify-package "ethiopic-sera" "A" "ኣ"))
152 (robin-modify-package "ethiopic-sera" "'A" ?አ)
153 (robin-modify-package "ethiopic-sera" "A" "አ")
154 (robin-modify-package "ethiopic-sera" "'a" ?ኣ)
155 (robin-modify-package "ethiopic-sera" "a" "ኣ")))
157 (defun ethio-use-colon-for-colon (arg)
160 (robin-modify-package "ethiopic-sera" ":" ?፥)
161 (robin-modify-package "ethiopic-sera" "`:" ?፡))
162 (robin-modify-package "ethiopic-sera" " : " ?፡)
163 (robin-modify-package "ethiopic-sera" ":" "፡")
164 (robin-modify-package "ethiopic-sera" "-:" ?፥)))
166 (defun ethio-use-three-dot-question (arg)
169 (robin-modify-package "ethiopic-sera" "?" ?፧)
170 (robin-modify-package "ethiopic-sera" "`?" ??))
171 (robin-modify-package "ethiopic-sera" "?" ??)
172 (robin-modify-package "ethiopic-sera" "`?" ?፧)))
174 (defun ethio-adjust-robin ()
175 (ethio-prefer-amharic (ethio-prefer-amharic-p))
176 (ethio-use-colon-for-colon ethio-use-colon-for-colon)
177 (ethio-use-three-dot-question ethio-use-three-dot-question))
179 (add-hook 'robin-activate-hook 'ethio-adjust-robin)
186 (defun ethio-sera-to-fidel-buffer (&optional secondary force)
187 "Convert the current buffer from SERA to FIDEL.
189 The variable `ethio-primary-language' specifies the primary
190 language and `ethio-secondary-language' specifies the secondary.
192 If the 1st optional argument SECONDARY is non-nil, assume the
193 buffer begins with the secondary language; otherwise with the
196 If the 2nd optional argument FORCE is non-nil, perform conversion
197 even if the buffer is read-only.
199 See also the descriptions of the variables
200 `ethio-use-colon-for-colon' and `ethio-use-three-dot-question'."
203 (ethio-sera-to-fidel-region (point-min) (point-max) secondary force))
205 ;; To avoid byte-compiler warnings. It should never be set globally.
206 (defvar ethio-sera-being-called-by-w3)
207 ;; This variable will be bound by some third-party package.
208 (defvar sera-being-called-by-w3)
211 (defun ethio-sera-to-fidel-region (begin end &optional secondary force)
212 "Convert the characters in region from SERA to FIDEL.
214 The variable `ethio-primary-language' specifies the primary
215 language and `ethio-secondary-language' specifies the secondary.
217 If the 3rd argument SECONDARY is given and non-nil, assume the
218 region begins with the secondary language; otherwise with the
221 If the 4th argument FORCE is given and non-nil, perform
222 conversion even if the buffer is read-only.
224 See also the descriptions of the variables
225 `ethio-use-colon-for-colon' and `ethio-use-three-dot-question'."
228 (if (and buffer-read-only
230 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
233 (let ((ethio-primary-language ethio-primary-language)
234 (ethio-secondary-language ethio-secondary-language)
235 ;; The above two variables may be changed temporarily by tilde
236 ;; escapes during conversion. We bind them to the variables
237 ;; of the same names so that the original values are restored
238 ;; when this function exits.
239 (buffer-read-only nil)
240 (lang (if secondary ethio-secondary-language ethio-primary-language))
243 (ethio-use-colon-for-colon ethio-use-colon-for-colon)
244 (ethio-use-three-dot-question ethio-use-three-dot-question)
247 (narrow-to-region begin end)
248 (goto-char (point-min))
253 (ethio-prefer-amharic t)
254 (ethio-sera-to-fidel-region-ethio 'amharic))
256 (ethio-prefer-amharic nil)
257 (ethio-sera-to-fidel-region-ethio 'tigrigna))
259 (ethio-sera-to-fidel-region-noethio))))
262 (if (eq lang ethio-primary-language)
263 ethio-secondary-language
264 ethio-primary-language)
267 ;; Restore user's preference.
268 (ethio-adjust-robin))
270 (defun ethio-sera-to-fidel-region-noethio ()
271 "Return next language as symbol: amharic, tigrigna, toggle or nil."
275 ;; No more "\", i.e. nothing to do.
276 ((not (search-forward "\\" nil 0))
279 ;; Hereafter point is put after a "\".
280 ;; First delete that "\", then check the following chars.
283 ((progn (delete-char -1) (setq lflag (ethio-process-language-flag)))
286 ;; "\\" : leave the second "\" and continue in the same language.
287 ((= (following-char) ?\\)
291 ;; "\ " : delete the following " " and toggle the language.
292 ((= (following-char) 32)
296 ;; A "\" but not a special sequence: simply toggle the language.
300 (defun ethio-sera-to-fidel-region-ethio (lang)
301 "Return next language as symbol: amharic, tigrigna, toggle or nil."
305 (if (re-search-forward "\\(`[1-9][0-9]*\\)\\|[\\<&]" nil t)
308 (robin-convert-region (point-min) (point-max) "ethiopic-sera")
309 (goto-char (point-max)))
313 ((= (following-char) ?`)
315 (ethio-process-digits)
319 (if (or (and (boundp 'ethio-sera-being-called-by-w3)
320 ethio-sera-being-called-by-w3)
321 (and (boundp 'sera-being-called-by-w3)
322 sera-being-called-by-w3))
323 (search-forward (if (= (following-char) ?<) ">" ";") nil 0)
330 ;; Now we must be looking at a "\".
331 ;; First delete that "\", then check the following chars.
333 ((progn (delete-char 1) (= (following-char) 32))
337 ((looking-at "[,.;:'`?\\]+")
338 (goto-char (match-end 0))
341 ((/= (following-char) ?~)
344 ;; Now we must be looking at a "~".
346 ((setq lflag (ethio-process-language-flag))
349 ;; Delete the following "~" and check the following chars.
351 ((progn (delete-char 1) (looking-at "! ?"))
353 (if (re-search-forward "\\\\~! ?" nil 0)
359 (ethio-use-colon-for-colon t)
364 (ethio-use-colon-for-colon nil)
369 (ethio-use-three-dot-question t)
372 ((looking-at "\\? ?")
374 (ethio-use-three-dot-question nil)
377 ;; Unknown tilde escape. Recover the deleted chars.
382 (defun ethio-process-language-flag nil
383 "Process a language flag of the form \"~lang\" or \"~lang1~lang2\".
385 If looking at \"~lang1~lang2\", set `ethio-primary-language' and
386 `ethio-secondary-language' based on \"lang1\" and \"lang2\".
387 Then delete the language flag \"~lang1~lang2\" from the buffer.
388 Return value is the new primary language.
390 If looking at \"~lang\", delete that language flag \"~lang\" from
391 the buffer and return that language. In this case
392 `ethio-primary-language' and `ethio-secondary-language' are left
395 If an unsupported language flag is found, just return nil without
403 "~\\([a-z][a-z][a-z]?\\)~\\([a-z][a-z][a-z]?\\)[ \t\n\\]")
404 (setq lang1 (ethio-flag-to-language (match-string 1)))
405 (setq lang2 (ethio-flag-to-language (match-string 2))))
406 (setq ethio-primary-language lang1
407 ethio-secondary-language lang2)
408 (delete-region (point) (match-end 2))
409 (if (= (following-char) 32)
411 ethio-primary-language)
414 ((and (looking-at "~\\([a-z][a-z][a-z]?\\)[ \t\n\\]")
415 (setq lang1 (ethio-flag-to-language (match-string 1))))
416 (delete-region (point) (match-end 1))
417 (if (= (following-char) 32)
425 (defun ethio-flag-to-language (flag)
427 ((or (string= flag "en") (string= flag "eng")) 'english)
428 ((or (string= flag "ti") (string= flag "tir")) 'tigrigna)
429 ((or (string= flag "am") (string= flag "amh")) 'amharic)
432 (defun ethio-process-digits nil
433 "Convert Arabic digits to Ethiopic digits."
435 (while (and (>= (setq ch (following-char)) ?1)
439 ;; count up following zeros
441 (while (= (following-char) ?0)
447 ;; first digit is 10, 20, ..., or 90
449 (insert (aref [?፲ ?፳ ?፴ ?፵ ?፶ ?፷ ?፸ ?፹ ?፺] (- ch ?1)))
452 ;; first digit is 2, 3, ..., or 9
454 (insert (aref [?፪ ?፫ ?፬ ?፭ ?፮ ?፯ ?፰ ?፱] (- ch ?2))))
465 (insert-char ?፼ (/ z 4)))))
468 (defun ethio-sera-to-fidel-marker (&optional force)
469 "Convert the regions surrounded by \"<sera>\" and \"</sera>\" from SERA to FIDEL.
470 Assume that each region begins with `ethio-primary-language'.
471 The markers \"<sera>\" and \"</sera>\" themselves are not deleted."
473 (if (and buffer-read-only
475 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
478 (goto-char (point-min))
479 (while (search-forward "<sera>" nil t)
480 (ethio-sera-to-fidel-region
482 (if (search-forward "</sera>" nil t)
492 (defun ethio-language-to-flag (lang)
494 ((eq lang 'english) "eng")
495 ((eq lang 'tigrigna) "tir")
496 ((eq lang 'amharic) "amh")
500 (defun ethio-fidel-to-sera-buffer (&optional secondary force)
501 "Replace all the FIDEL characters in the current buffer to the SERA format.
502 The variable `ethio-primary-language' specifies the primary
503 language and `ethio-secondary-language' specifies the secondary.
505 If the 1st optional argument SECONDARY is non-nil, try to convert the
506 region so that it begins with the secondary language; otherwise with the
509 If the 2nd optional argument FORCE is non-nil, convert even if the
512 See also the descriptions of the variables
513 `ethio-use-colon-for-colon', `ethio-use-three-dot-question',
514 `ethio-quote-vowel-always' and `ethio-numeric-reduction'."
517 (ethio-fidel-to-sera-region (point-min) (point-max) secondary force))
520 (defun ethio-fidel-to-sera-region (begin end &optional secondary force)
521 "Replace all the FIDEL characters in the region to the SERA format.
523 The variable `ethio-primary-language' specifies the primary
524 language and `ethio-secondary-language' specifies the secondary.
526 If the 3rd argument SECONDARY is given and non-nil, convert
527 the region so that it begins with the secondary language; otherwise with
528 the primary language.
530 If the 4th argument FORCE is given and non-nil, convert even if the
533 See also the descriptions of the variables
534 `ethio-use-colon-for-colon', `ethio-use-three-dot-question',
535 `ethio-quote-vowel-always' and `ethio-numeric-reduction'."
538 (if (and buffer-read-only
540 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
544 (narrow-to-region begin end)
546 (let ((buffer-read-only nil)
548 ethio-secondary-language
549 ethio-primary-language))
550 (flag (if (ethio-prefer-amharic-p) "\\~amh " "\\~tir "))
553 (goto-char (point-min))
555 (unless (eq mode 'english)
556 (setq mode 'ethiopic))
557 (if (and (eq mode 'english) (looking-at "\\ce"))
558 (setq mode 'ethiopic))
559 (if (and (eq mode 'ethiopic) (looking-at "\\Ce"))
560 (setq mode 'english))
561 (insert (if (eq mode 'english) "\\~eng " flag))
565 (if (eq mode 'english)
567 (if (re-search-forward "\\(\\ce\\|\\\\\\)" nil 0)
570 ((eq (following-char) ?\\)
575 (setq mode 'ethiopic))))
577 ;; If we reach here, mode is ethiopic.
579 (if (re-search-forward "[a-z,.;:'`?\\<&]" nil 0)
582 (narrow-to-region p (point))
583 (robin-invert-region (point-min) (point-max) "ethiopic-sera")
585 ;; ethio-quote-vowel-always
586 (goto-char (point-min))
587 (while (re-search-forward "'[eauio]" nil t)
590 (setq ch (preceding-char))
591 (if (or (and (>= ch ?a) (<= ch ?z))
592 (and (>= ch ?A) (<= ch ?Z)))
593 (if (and (not ethio-quote-vowel-always)
594 (memq ch '(?e ?a ?u ?i ?o ?E ?A ?I)))
598 ;; ethio-W-sixth-always
599 (unless ethio-W-sixth-always
600 (goto-char (point-min))
601 (while (search-forward "W'" nil t)
605 ;; ethio-numeric-reduction
606 (when (> ethio-numeric-reduction 0)
607 (goto-char (point-min))
608 (while (re-search-forward "\\([0-9]\\)`\\([0-9]\\)" nil t)
609 (replace-match "\\1\\2")
611 (when (= ethio-numeric-reduction 2)
612 (goto-char (point-min))
613 (while (re-search-forward "\\([0-9]\\)1\\(0+\\)" nil t)
614 (replace-match "\\1\\2")))
616 (goto-char (point-max)))
619 ((looking-at "[a-z]")
621 (setq mode 'english))
622 ((looking-at "[,.;:'`\\]+")
624 (goto-char (1+ (match-end 0))))
625 ((= (following-char) ??)
626 (if ethio-use-three-dot-question
630 (if (or (and (boundp 'ethio-sera-being-called-by-w3)
631 ethio-sera-being-called-by-w3)
632 (and (boundp 'sera-being-called-by-w3)
633 sera-being-called-by-w3))
634 (search-forward (if (= (following-char) ?<) ">" "&") nil 0)
635 (forward-char 1)))))))))
638 (defun ethio-fidel-to-sera-marker (&optional force)
639 "Convert the regions surrounded by \"<sera>\" and \"</sera>\" from FIDEL to SERA.
640 The markers \"<sera>\" and \"</sera>\" themselves are not deleted."
643 (if (and buffer-read-only
645 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
648 (goto-char (point-min))
649 (while (search-forward "<sera>" nil t)
650 (ethio-fidel-to-sera-region
652 (if (search-forward "</sera>" nil t)
659 ;; vowel modification
663 (defun ethio-modify-vowel nil
664 "Modify the vowel of the FIDEL that is under the cursor."
667 (let ((consonant (ethio-get-consonant (following-char)))
670 (error "") ; not an Ethiopic char
671 (setq vowel (read-char "Modify vowel to: "))
673 (if (and (string= consonant "'") (= vowel ?W))
676 (narrow-to-region (point) (point))
677 (insert consonant vowel)
678 (robin-convert-region (point-min) (point-max) "ethiopic-sera"))))))
680 (defun ethio-get-consonant (ch)
681 "Return the consonant part of CH's SERA spelling in ethiopic-sera."
682 (let ((sera (get-char-code-property ch 'ethiopic-sera)))
685 ((= ch ?ኧ) "'") ; Only this has two vowel letters.
688 (if (memq (preceding-char) '(?e ?u ?i ?a ?o ?E ?I ?A ?'))
690 (buffer-substring (point-min) (point-max)))))))
697 (defun ethio-replace-space (ch begin end)
698 "Replace ASCII spaces with Ethiopic word separators in the region.
700 In the specified region, replace word separators surrounded by two
701 Ethiopic characters, depending on the first argument CH, which should
704 If CH = 1, word separator will be replaced with an ASCII space.
705 If CH = 2, with two ASCII spaces.
706 If CH = 3, with the Ethiopic colon-like word separator.
708 The 2nd and 3rd arguments BEGIN and END specify the region."
710 (interactive "*cReplace spaces to: 1 (sg col), 2 (dbl col), 3 (Ethiopic)\nr")
711 (if (not (memq ch '(?1 ?2 ?3)))
715 (narrow-to-region begin end)
719 ;; an Ethiopic word separator --> an ASCII space
720 (goto-char (point-min))
721 (while (search-forward "፡" nil t)
724 ;; two ASCII spaces between Ethiopic characters --> an ASCII space
725 (goto-char (point-min))
726 (while (re-search-forward "\\(\\ce\\) \\(\\ce\\)" nil t)
727 (replace-match "\\1 \\2")
731 ;; An Ethiopic word separator --> two ASCII spaces
732 (goto-char (point-min))
733 (while (search-forward "፡" nil t)
736 ;; An ASCII space between Ethiopic characters --> two ASCII spaces
737 (goto-char (point-min))
738 (while (re-search-forward "\\(\\ce\\) \\(\\ce\\)" nil t)
739 (replace-match "\\1 \\2")
743 ;; One or two ASCII spaces between Ethiopic characters
744 ;; --> An Ethiopic word separator
745 (goto-char (point-min))
746 (while (re-search-forward "\\(\\ce\\) ?\\(\\ce\\)" nil t)
747 (replace-match "\\1፡\\2")
750 ;; Three or more ASCII spaces between Ethiopic characters
751 ;; --> An Ethiopic word separator + (N - 2) ASCII spaces
752 (goto-char (point-min))
753 (while (re-search-forward "\\(\\ce\\) \\( +\\ce\\)" nil t)
754 (replace-match "\\1፡\\2")
755 (forward-char -1)))))))
761 ;; This function is deprecated.
763 (defun ethio-input-special-character (arg)
764 "This function is deprecated."
765 (interactive "*cInput number: 1. 2. 3. 4. 5.")
785 (defun ethio-fidel-to-tex-buffer nil
786 "Convert each fidel characters in the current buffer into a fidel-tex command."
788 (let ((buffer-read-only nil)
791 ;; Special treatment for geminated characters.
792 ;; Geminated characters la", etc. change into \geminateG{\laG}, etc.
793 (goto-char (point-min))
794 (while (re-search-forward "፟\\|" nil t)
795 (setq comp (find-composition (match-beginning 0)))
797 (replace-match "\\\\geminateG{}" t)
798 (decompose-region (car comp) (cadr comp))
801 (insert "\\geminateG{")
805 ;; Special Ethiopic punctuation.
806 (goto-char (point-min))
807 (while (re-search-forward "\\ce[»\\.\\?]\\|«\\ce" nil t)
809 ((= (setq ch (preceding-char)) ?\»)
811 (insert "\\rquoteG"))
824 ;; Ethiopic characters to TeX macros
825 (robin-invert-region (point-min) (point-max) "ethiopic-tex")
827 (goto-char (point-min))
828 (set-buffer-modified-p nil)))
831 (defun ethio-tex-to-fidel-buffer nil
832 "Convert fidel-tex commands in the current buffer into fidel chars."
834 (let ((buffer-read-only nil)
837 ;; TeX macros to Ethiopic characters
838 (robin-convert-region (point-min) (point-max) "ethiopic-tex")
840 ;; compose geminated characters
841 (goto-char (point-min))
842 (while (re-search-forward "\\\\geminateG{\\(\\ce?\\)}" nil t)
843 (replace-match "\\1፟"))
845 ;; remove redundant braces, if any
846 (goto-char (point-min))
847 (while (re-search-forward "{\\(\\ce\\)}" nil t)
848 (replace-match "\\1"))
850 (goto-char (point-min))
851 (set-buffer-modified-p nil)))
858 (defun ethio-fidel-to-java-buffer nil
859 "Convert Ethiopic characters into the Java escape sequences.
861 Each escape sequence is of the form \\uXXXX, where XXXX is the
862 character's codepoint (in hex) in Unicode.
864 If `ethio-java-save-lowercase' is non-nil, use [0-9a-f].
865 Otherwise, [0-9A-F]."
868 (goto-char (point-min))
869 (while (re-search-forward "[ሀ-፼]" nil t)
870 (setq ucode (preceding-char))
873 (format (if ethio-java-save-lowercase "\\u%4x" "\\u%4X")
877 (defun ethio-java-to-fidel-buffer nil
878 "Convert the Java escape sequences into corresponding Ethiopic characters."
879 (let ((case-fold-search t)
881 (goto-char (point-min))
882 (while (re-search-forward "\\\\u\\([0-9a-f][0-9a-f][0-9a-f][0-9a-f]\\)" nil t)
883 (setq ucode (read (concat "#x" (match-string 1))))
884 (when (and (>= ucode #x1200) (<= ucode #x137f))
885 (replace-match (char-to-string ucode))))))
892 (defun ethio-find-file nil
893 "Transliterate file content into Ethiopic depending on filename suffix."
896 ((string-match "\\.sera$" (buffer-file-name))
898 (ethio-sera-to-fidel-buffer nil 'force)
899 (set-buffer-modified-p nil)))
901 ((string-match "\\.html$" (buffer-file-name))
902 (let ((ethio-sera-being-called-by-w3 t))
904 (ethio-sera-to-fidel-marker 'force)
905 (goto-char (point-min))
906 (while (re-search-forward "&[lr]aquo;" nil t)
907 (if (= (char-after (1+ (match-beginning 0))) ?l)
909 (replace-match "»")))
910 (set-buffer-modified-p nil))))
912 ((string-match "\\.tex$" (buffer-file-name))
914 (ethio-tex-to-fidel-buffer)
915 (set-buffer-modified-p nil)))
917 ((string-match "\\.java$" (buffer-file-name))
919 (ethio-java-to-fidel-buffer)
920 (set-buffer-modified-p nil)))
926 (defun ethio-write-file nil
927 "Transliterate Ethiopic characters in ASCII depending on the file extension."
930 ((string-match "\\.sera$" (buffer-file-name))
932 (ethio-fidel-to-sera-buffer nil 'force)
933 (goto-char (point-min))
934 (ethio-record-user-preference)
935 (set-buffer-modified-p nil)))
937 ((string-match "\\.html$" (buffer-file-name))
939 (let ((ethio-sera-being-called-by-w3 t))
940 (ethio-fidel-to-sera-marker 'force)
941 (goto-char (point-min))
942 (while (re-search-forward "[«»]" nil t)
943 (replace-match (if (= (preceding-char) ?«) "«" "»")))
944 (goto-char (point-min))
945 (if (search-forward "<sera>" nil t)
946 (ethio-record-user-preference))
947 (set-buffer-modified-p nil))))
949 ((string-match "\\.tex$" (buffer-file-name))
951 (ethio-fidel-to-tex-buffer)
952 (set-buffer-modified-p nil)))
954 ((string-match "\\.java$" (buffer-file-name))
956 (ethio-fidel-to-java-buffer)
957 (set-buffer-modified-p nil)))
962 (defun ethio-record-user-preference nil
963 (insert (if ethio-use-colon-for-colon "\\~-: " "\\~`: ")
964 (if ethio-use-three-dot-question "\\~`| " "\\~? ")))
967 ;; Ethiopic word separator vs. ASCII space
970 (defvar ethio-prefer-ascii-space t)
971 (make-variable-buffer-local 'ethio-prefer-ascii-space)
973 (defun ethio-toggle-space nil
974 "Toggle ASCII space and Ethiopic separator for keyboard input."
976 (setq ethio-prefer-ascii-space
977 (not ethio-prefer-ascii-space)))
979 (defun ethio-insert-space (arg)
980 "Insert ASCII spaces or Ethiopic word separators depending on context.
982 If the current word separator (indicated in mode-line) is the ASCII space,
983 insert an ASCII space. With ARG, insert that many ASCII spaces.
985 If the current word separator is the colon-like Ethiopic word
986 separator and the point is preceded by `an Ethiopic punctuation mark
987 followed by zero or more ASCII spaces', then insert also an ASCII
988 space. With ARG, insert that many ASCII spaces.
990 Otherwise, insert a colon-like Ethiopic word separator. With ARG, insert that
991 many Ethiopic word separators."
995 (ethio-prefer-ascii-space
996 (insert-char 32 arg))
998 (skip-chars-backward " ")
999 (memq (preceding-char)
1000 '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨ ? ? ? ? ?)))
1001 (insert-char 32 arg))
1003 (insert-char ?፡ arg))))
1006 (defun ethio-insert-ethio-space (arg)
1007 "Insert the Ethiopic word delimiter (the colon-like character).
1008 With ARG, insert that many delimiters."
1010 (insert-char ?፡ arg))
1017 (defun ethio-composition-function (pos to font-object string)
1019 (let ((pattern "\\ce\\(፟\\|\\)"))
1022 (eq (string-match pattern string pos) pos))
1023 (prog1 (match-end 0)
1024 (compose-string string pos (match-end 0))))
1025 (if (>= pos (point-min))
1028 (if (looking-at pattern)
1029 (prog1 (match-end 0)
1030 (compose-region pos (match-end 0)))))))))
1032 ;; This function is not used any more.
1033 (defun ethio-gemination nil
1034 "Compose the character before the point with the Ethiopic gemination mark.
1035 If the character is already composed, decompose it and remove the gemination
1038 (let ((ch (preceding-char)))
1040 ((and (= ch ?) (find-composition (1- (point))))
1041 (decompose-region (- (point) 2) (point)))
1042 ((and (>= ch #x1200) (<= ch #x137f))
1044 (compose-region (- (point) 2) (point)))
1052 (robin-define-package "ethiopic-sera"
1053 "SERA transliteration system for Ethiopic."
1059 ("hE" ?ሄ) ("hee" "ሄ")
1063 ("le" ?ለ) ("Le" "ለ")
1064 ("lu" ?ሉ) ("Lu" "ሉ")
1065 ("li" ?ሊ) ("Li" "ሊ")
1066 ("la" ?ላ) ("La" "ላ")
1067 ("lE" ?ሌ) ("LE" "ሌ") ("lee" "ሌ") ("Lee" "ሌ")
1069 ("lo" ?ሎ) ("Lo" "ሎ")
1070 ("lWa" ?ሏ) ("LWa" "ሏ") ("lW" "ሏ") ("LW" "ሏ")
1076 ("HE" ?ሔ) ("Hee" "ሔ")
1079 ("HWa" ?ሗ) ("HW" "ሗ")
1081 ("me" ?መ) ("Me" "መ")
1082 ("mu" ?ሙ) ("Mu" "ሙ")
1083 ("mi" ?ሚ) ("Mi" "ሚ")
1084 ("ma" ?ማ) ("Ma" "ማ")
1085 ("mE" ?ሜ) ("ME" "ሜ") ("mee" "ሜ") ("Mee" "ሜ")
1087 ("mo" ?ሞ) ("Mo" "ሞ")
1088 ("mWa" ?ሟ) ("MWa" "ሟ") ("mW" "ሟ") ("MW" "ሟ")
1090 ("`se" ?ሠ) ("sse" "ሠ") ("s2e" "ሠ")
1091 ("`su" ?ሡ) ("ssu" "ሡ") ("s2u" "ሡ")
1092 ("`si" ?ሢ) ("ssi" "ሢ") ("s2i" "ሢ")
1093 ("`sa" ?ሣ) ("ssa" "ሣ") ("s2a" "ሣ")
1094 ("`sE" ?ሤ) ("ssE" "ሤ") ("s2E" "ሤ")
1095 ("`see" "ሤ") ("ssee" "ሤ") ("s2ee" "ሤ")
1096 ("`s" ?ሥ) ("ss" "ሥ") ("s2" "ሥ")
1097 ("`so" ?ሦ) ("sso" "ሦ") ("s2o" "ሦ")
1098 ("`sWa" ?ሧ) ("ssWa" "ሧ") ("s2Wa" "ሧ")
1099 ("`sW" "ሧ") ("ssW" "ሧ") ("s2W" "ሧ")
1101 ("re" ?ረ) ("Re" "ረ")
1102 ("ru" ?ሩ) ("Ru" "ሩ")
1103 ("ri" ?ሪ) ("Ri" "ሪ")
1104 ("ra" ?ራ) ("Ra" "ራ")
1105 ("rE" ?ሬ) ("RE" "ሬ") ("ree" "ሬ") ("Ree" "ሬ")
1107 ("ro" ?ሮ) ("Ro" "ሮ")
1108 ("rWa" ?ሯ) ("RWa" "ሯ") ("rW" "ሯ") ("RW" "ሯ")
1114 ("sE" ?ሴ) ("see" "ሴ")
1117 ("sWa" ?ሷ) ("sW" "ሷ")
1123 ("xE" ?ሼ) ("xee" "ሼ")
1126 ("xWa" ?ሿ) ("xW" "ሿ")
1132 ("qE" ?ቄ) ("qee" "ቄ")
1137 ("qWa" ?ቋ) ("qW" "ቋ")
1138 ("qWE" ?ቌ) ("qWee" "ቌ")
1139 ("qW'" ?ቍ) ("qWu" "ቍ")
1145 ("QE" ?ቔ) ("Qee" "ቔ")
1150 ("QWa" ?ቛ) ("QW" "ቛ")
1151 ("QWE" ?ቜ) ("QWee" "ቜ")
1152 ("QW'" ?ቝ) ("QWu" "ቝ")
1154 ("be" ?በ) ("Be" "በ")
1155 ("bu" ?ቡ) ("Bu" "ቡ")
1156 ("bi" ?ቢ) ("Bi" "ቢ")
1157 ("ba" ?ባ) ("Ba" "ባ")
1158 ("bE" ?ቤ) ("BE" "ቤ") ("bee" "ቤ") ("Bee" "ቤ")
1160 ("bo" ?ቦ) ("Bo" "ቦ")
1161 ("bWa" ?ቧ) ("BWa" "ቧ") ("bW" "ቧ") ("BW" "ቧ")
1163 ("ve" ?ቨ) ("Ve" "ቨ")
1164 ("vu" ?ቩ) ("Vu" "ቩ")
1165 ("vi" ?ቪ) ("Vi" "ቪ")
1166 ("va" ?ቫ) ("Va" "ቫ")
1167 ("vE" ?ቬ) ("VE" "ቬ") ("vee" "ቬ") ("Vee" "ቬ")
1169 ("vo" ?ቮ) ("Vo" "ቮ")
1170 ("vWa" ?ቯ) ("VWa" "ቯ") ("vW" "ቯ") ("VW" "ቯ")
1176 ("tE" ?ቴ) ("tee" "ቴ")
1179 ("tWa" ?ቷ) ("tW" "ቷ")
1185 ("cE" ?ቼ) ("cee" "ቼ")
1188 ("cWa" ?ቿ) ("cW" "ቿ")
1190 ("`he" ?ኀ) ("hhe" "ኀ") ("h2e" "ኀ")
1191 ("`hu" ?ኁ) ("hhu" "ኁ") ("h2u" "ኁ")
1192 ("`hi" ?ኂ) ("hhi" "ኂ") ("h2i" "ኂ")
1193 ("`ha" ?ኃ) ("hha" "ኃ") ("h2a" "ኃ")
1194 ("`hE" ?ኄ) ("hhE" "ኄ") ("h2E" "ኄ")
1195 ("`hee" "ኄ") ("hhee" "ኄ") ("h2ee" "ኄ")
1196 ("`h" ?ኅ) ("hh" "ኅ") ("h2" "ኅ")
1197 ("`ho" ?ኆ) ("hho" "ኆ") ("h2o" "ኆ")
1198 ("`hWe" ?ኈ) ("hhWe" "ኈ") ("h2We" "ኈ") ("hWe" "ኈ")
1199 ("`hWi" ?ኊ) ("hhWi" "ኊ") ("h2Wi" "ኊ") ("hWi" "ኊ")
1200 ("`hWa" ?ኋ) ("hhWa" "ኋ") ("h2Wa" "ኋ") ("hWa" "ኋ")
1201 ("`hW" "ኋ") ("hhW" "ኋ") ("h2W" "ኋ")
1202 ("`hWE" ?ኌ) ("hhWE" "ኌ") ("h2WE" "ኌ") ("hWE" "ኌ")
1203 ("`hWee" "ኌ") ("hhWee" "ኌ") ("h2Wee" "ኌ") ("hWee" "ኌ")
1204 ("`hW'" ?ኍ) ("hhW'" "ኍ") ("h2W'" "ኍ") ("hW'" "ኍ")
1205 ("`hWu" "ኍ") ("hhWu" "ኍ") ("h2Wu" "ኍ") ("hWu" "ኍ")
1211 ("nE" ?ኔ) ("nee" "ኔ")
1214 ("nWa" ?ኗ) ("nW" "ኗ")
1220 ("NE" ?ኜ) ("Nee" "ኜ")
1223 ("NWa" ?ኟ) ("NW" "ኟ")
1226 ("'u" ?ኡ) ("u" "ኡ") ("'U" "ኡ") ("U" "ኡ")
1230 ("'I" ?እ) ("I" "እ") ("'e" "እ") ("e" "እ")
1231 ("'o" ?ኦ) ("o" "ኦ") ("'O" "ኦ") ("O" "ኦ")
1232 ("'ea" ?ኧ) ("ea" "ኧ")
1238 ("kE" ?ኬ) ("kee" "ኬ")
1243 ("kWa" ?ኳ) ("kW" "ኳ")
1244 ("kWE" ?ኴ) ("kWee" "ኴ")
1245 ("kW'" ?ኵ) ("kWu" "ኵ")
1251 ("KE" ?ኼ) ("Kee" "ኼ")
1256 ("KWa" ?ዃ) ("KW" "ዃ")
1257 ("KWE" ?ዄ) ("KWee" "ዄ")
1258 ("KW'" ?ዅ) ("KWu" "ዅ")
1264 ("wE" ?ዌ) ("wee" "ዌ")
1268 ("`e" ?ዐ) ("ae" "ዐ") ("aaa" "ዐ") ("e2" "ዐ")
1269 ("`u" ?ዑ) ("uu" "ዑ") ("u2" "ዑ") ("`U" "ዑ") ("UU" "ዑ") ("U2" "ዑ")
1270 ("`i" ?ዒ) ("ii" "ዒ") ("i2" "ዒ")
1271 ("`a" ?ዓ) ("aa" "ዓ") ("a2" "ዓ") ("`A" "ዓ") ("AA" "ዓ") ("A2" "ዓ")
1272 ("`E" ?ዔ) ("EE" "ዔ") ("E2" "ዔ")
1273 ("`I" ?ዕ) ("II" "ዕ") ("I2" "ዕ") ("ee" "ዕ")
1274 ("`o" ?ዖ) ("oo" "ዖ") ("o2" "ዖ") ("`O" "ዖ") ("OO" "ዖ") ("O2" "ዖ")
1280 ("zE" ?ዜ) ("zee" "ዜ")
1283 ("zWa" ?ዟ) ("zW" "ዟ")
1289 ("ZE" ?ዤ) ("Zee" "ዤ")
1292 ("ZWa" ?ዧ) ("ZW" "ዧ")
1294 ("ye" ?የ) ("Ye" "የ")
1295 ("yu" ?ዩ) ("Yu" "ዩ")
1296 ("yi" ?ዪ) ("Yi" "ዪ")
1297 ("ya" ?ያ) ("Ya" "ያ")
1298 ("yE" ?ዬ) ("YE" "ዬ") ("yee" "ዬ") ("Yee" "ዬ")
1300 ("yo" ?ዮ) ("Yo" "ዮ")
1306 ("dE" ?ዴ) ("dee" "ዴ")
1309 ("dWa" ?ዷ) ("dW" "ዷ")
1315 ("DE" ?ዼ) ("Dee" "ዼ")
1318 ("DWa" ?ዿ) ("DW" "ዿ")
1320 ("je" ?ጀ) ("Je" "ጀ")
1321 ("ju" ?ጁ) ("Ju" "ጁ")
1322 ("ji" ?ጂ) ("Ji" "ጂ")
1323 ("ja" ?ጃ) ("Ja" "ጃ")
1324 ("jE" ?ጄ) ("JE" "ጄ") ("jee" "ጄ") ("Jee" "ጄ")
1326 ("jo" ?ጆ) ("Jo" "ጆ")
1327 ("jWa" ?ጇ) ("jW" "ጇ") ("JWa" "ጇ") ("JW" "ጇ")
1333 ("gE" ?ጌ) ("gee" "ጌ")
1338 ("gWa" ?ጓ) ("gW" "ጓ")
1339 ("gWE" ?ጔ) ("gWee" "ጔ")
1340 ("gW'" ?ጕ) ("gWu" "ጕ")
1346 ("GE" ?ጜ) ("Gee" "ጜ")
1354 ("TE" ?ጤ) ("Tee" "ጤ")
1357 ("TWa" ?ጧ) ("TW" "ጧ")
1363 ("CE" ?ጬ) ("Cee" "ጬ")
1366 ("CWa" ?ጯ) ("CW" "ጯ")
1372 ("PE" ?ጴ) ("Pee" "ጴ")
1375 ("PWa" ?ጷ) ("PW" "ጷ")
1381 ("SE" ?ጼ) ("See" "ጼ")
1384 ("SWa" ?ጿ) ("`SWa" "ጿ") ("SSWa" "ጿ") ("S2Wa" "ጿ")
1385 ("SW" "ጿ") ("`SW" "ጿ") ("SSW" "ጿ") ("S2W" "ጿ")
1387 ("`Se" ?ፀ) ("SSe" "ፀ") ("S2e" "ፀ")
1388 ("`Su" ?ፁ) ("SSu" "ፁ") ("S2u" "ፁ")
1389 ("`Si" ?ፂ) ("SSi" "ፂ") ("S2i" "ፂ")
1390 ("`Sa" ?ፃ) ("SSa" "ፃ") ("S2a" "ፃ")
1391 ("`SE" ?ፄ) ("SSE" "ፄ") ("S2E" "ፄ")
1392 ("`See" "ፄ") ("SSee" "ፄ") ("S2ee" "ፄ")
1393 ("`S" ?ፅ) ("SS" "ፅ") ("S2" "ፅ")
1394 ("`So" ?ፆ) ("SSo" "ፆ") ("S2o" "ፆ")
1396 ("fe" ?ፈ) ("Fe" "ፈ")
1397 ("fu" ?ፉ) ("Fu" "ፉ")
1398 ("fi" ?ፊ) ("Fi" "ፊ")
1399 ("fa" ?ፋ) ("Fa" "ፋ")
1400 ("fE" ?ፌ) ("FE" "ፌ") ("fee" "ፌ") ("Fee" "ፌ")
1402 ("fo" ?ፎ) ("Fo" "ፎ")
1403 ("fWa" ?ፏ) ("FWa" "ፏ") ("fW" "ፏ") ("FW" "ፏ")
1409 ("pE" ?ፔ) ("pee" "ፔ")
1412 ("pWa" ?ፗ) ("pW" "ፗ")
1414 ("rYa" ?ፘ) ("RYa" "ፘ") ("rY" "ፘ") ("RY" "ፘ")
1415 ("mYa" ?ፙ) ("MYa" "ፙ") ("mY" "ፙ") ("MY" "ፙ")
1416 ("fYa" ?ፚ) ("FYa" "ፚ") ("fY" "ፚ") ("FY" "ፚ")
1418 (" : " ?፡) (":" "፡") ("`:" "፡")
1424 ("`?" ?፧) ("??" "፧")
1425 (":|:" ?፨) ("**" "፨")
1427 ;; Explicit syllable delimiter
1430 ;; Quick ASCII input
1495 (register-input-method
1496 "ethiopic-sera" "Ethiopic"
1497 'robin-use-package "et" "An input method for Ethiopic.")
1499 (robin-define-package "ethiopic-tex"
1500 "TeX transliteration system for Ethiopic."
1502 ("\\heG" ?ሀ) ; U+1200 ..
1519 ("\\HeG" ?ሐ) ; U+1210 ..
1536 ("\\sseG" ?ሠ) ; U+1220 ..
1553 ("\\seG" ?ሰ) ; U+1230 ..
1570 ("\\qeG" ?ቀ) ; U+1240 ..
1587 ("\\QeG" ?ቐ) ; U+1250 ..
1604 ("\\beG" ?በ) ; U+1260 ..
1621 ("\\teG" ?ተ) ; U+1270 ..
1638 ("\\hheG" ?ኀ) ; U+1280 ..
1655 ("\\neG" ?ነ) ; U+1290 ..
1672 ("\\eG" ?አ) ; U+12A0 ..
1689 ("\\kWeG" ?ኰ) ; U+12B0 ..
1706 ("\\KWeG" ?ዀ) ; U+12C0 ..
1723 ("\\eeG" ?ዐ) ; U+12D0 ..
1740 ("\\ZeG" ?ዠ) ; U+12E0 ..
1757 ("\\deG" ?ደ) ; U+12F0 ..
1774 ("\\jeG" ?ጀ) ; U+1300 ..
1791 ("\\gWeG" ?ጐ) ; U+1310 ..
1808 ("\\TeG" ?ጠ) ; U+1320 ..
1825 ("\\PeG" ?ጰ) ; U+1330 ..
1842 ("\\SSeG" ?ፀ) ; U+1340 ..
1859 ("\\peG" ?ፐ) ; U+1350 ..
1876 ;; reserved ; U+1360 ..
1893 ("\\smntG" ?፰) ; U+1370 ..
1911 ;; private extension
1914 ("\\yWaG" ?) ; U+1A00EF (was U+12EF)
1916 ("\\GWaG" ?) ; U+1A011F (was U+131F)
1918 ("\\qqeG" ?) ; U+1A0180 .. (was U+1380 ..)
1935 ("\\kkeG" ?) ; U+1A0190 .. (was U+1390 ..)
1952 ("\\XeG" ?) ; U+1A01A0 .. (was U+13A0 ..)
1969 ("\\ggeG" ?) ; U+1A01B0 .. (was U+13B0 ..)
1986 ("\\ornamentG" ?) ; U+1A01C0 .. (was U+FDF0 ..)
1996 ;; Gemination () is handled in a special way.
1999 ;; Assign reverse conversion to Fidel chars.
2000 ;; Then override forward conversion with ASCII chars.
2001 ;; ASCII chars should not have reverse conversions.
2002 ("\\dotG" ?) ("\\dotG" ".")
2003 ("\\lquoteG" ?) ("\\lquoteG" "«")
2004 ("\\rquoteG" ?) ("\\rquoteG" "»")
2005 ("\\qmarkG" ?) ("\\qmarkG" "?")
2008 ;; New characters in Unicode 4.1.
2010 ;; In forward conversion, these characters override the old private
2011 ;; extensions above. The old private extensions still keep their
2012 ;; reverse conversion.
2068 ;; The ethiopic-tex package is not used for keyboard input, therefore
2069 ;; not registered with the register-input-method function.
2071 (provide 'ethio-util)
2073 ;;; ethio-util.el ends here