1 ;;; ethio-util.el --- utilities for Ethiopic -*- coding: utf-8; -*-
3 ;; Copyright (C) 1997, 1998, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
6 ;; 2006, 2007, 2008, 2009
7 ;; National Institute of Advanced Industrial Science and Technology (AIST)
8 ;; Registration Number H14PRO021
9 ;; Copyright (C) 2005, 2006
10 ;; National Institute of Advanced Industrial Science and Technology (AIST)
11 ;; Registration Number: H15PRO110
13 ;; Keywords: mule, multilingual, Ethiopic
15 ;; This file is part of GNU Emacs.
17 ;; GNU Emacs is free software: you can redistribute it and/or modify
18 ;; it under the terms of the GNU General Public License as published by
19 ;; the Free Software Foundation, either version 3 of the License, or
20 ;; (at your option) any later version.
22 ;; GNU Emacs is distributed in the hope that it will be useful,
23 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
24 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 ;; GNU General Public License for more details.
27 ;; You should have received a copy of the GNU General Public License
28 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
30 ;; Author: TAKAHASHI Naoto <ntakahas@m17n.org>
38 (defvar rmail-current-message
)
39 (defvar rmail-message-vector
)
41 ;; Information for exiting Ethiopic environment.
42 (defvar exit-ethiopic-environment-data nil
)
45 (defun setup-ethiopic-environment-internal ()
46 (let ((key-bindings '((" " . ethio-insert-space
)
47 ([?\S-
] . ethio-insert-ethio-space
)
48 ;; ([?\C-'] . ethio-gemination)
49 ([f3] . ethio-fidel-to-sera-buffer)
50 ([S-f3] . ethio-fidel-to-sera-region)
51 ([C-f3] . ethio-fidel-to-sera-marker)
52 ([f4] . ethio-sera-to-fidel-buffer)
53 ([S-f4] . ethio-sera-to-fidel-region)
54 ([C-f4] . ethio-sera-to-fidel-marker)
55 ([S-f5] . ethio-toggle-punctuation)
56 ([S-f6] . ethio-modify-vowel)
57 ([S-f7] . ethio-replace-space)
58 ;; ([S-f8] . ethio-input-special-character) ; deprecated
59 ([C-f9] . ethio-toggle-space)
60 ([S-f9] . ethio-replace-space) ; as requested
64 (setq kb (car (car key-bindings)))
65 (setq exit-ethiopic-environment-data
66 (cons (cons kb (global-key-binding kb))
67 exit-ethiopic-environment-data))
68 (global-set-key kb (cdr (car key-bindings)))
69 (setq key-bindings (cdr key-bindings))))
71 (add-hook 'find-file-hook 'ethio-find-file)
72 (add-hook 'write-file-functions 'ethio-write-file)
73 (add-hook 'after-save-hook 'ethio-find-file))
75 (defun exit-ethiopic-environment ()
76 "Exit Ethiopic language environment."
77 (while exit-ethiopic-environment-data
78 (global-set-key (car (car exit-ethiopic-environment-data))
79 (cdr (car exit-ethiopic-environment-data)))
80 (setq exit-ethiopic-environment-data
81 (cdr exit-ethiopic-environment-data)))
83 (remove-hook 'find-file-hook 'ethio-find-file)
84 (remove-hook 'write-file-functions 'ethio-write-file)
85 (remove-hook 'after-save-hook 'ethio-find-file))
88 ;; ETHIOPIC UTILITY FUNCTIONS
91 ;; If the filename ends in ".sera", editing is done in fidel
92 ;; but file I/O is done in SERA.
94 ;; If the filename ends in ".java", editing is done in fidel
95 ;; but file I/O is done in the \uXXXX style, where XXXX is
96 ;; the Unicode codepoint for the Ethiopic character.
98 ;; If the filename ends in ".tex", editing is done in fidel
99 ;; but file I/O is done in EthioTeX format.
105 (defvar ethio-primary-language 'tigrigna
106 "*Symbol that defines the primary language in SERA --> FIDEL conversion.
107 The value should be one of: `tigrigna', `amharic' or `english'.")
109 (defvar ethio-secondary-language 'english
110 "*Symbol that defines the secondary language in SERA --> FIDEL conversion.
111 The value should be one of: `tigrigna', `amharic' or `english'.")
113 (defvar ethio-use-colon-for-colon nil
114 "*Non-nil means associate ASCII colon with Ethiopic colon.
115 If nil, associate ASCII colon with Ethiopic word separator, i.e., two
116 vertically stacked dots. All SERA <--> FIDEL converters refer this
119 (defvar ethio-use-three-dot-question nil
120 "*Non-nil means associate ASCII question mark with Ethiopic old style question mark (three vertically stacked dots).
121 If nil, associate ASCII question mark with Ethiopic stylized question
122 mark. All SERA <--> FIDEL converters refer this variable.")
124 (defvar ethio-quote-vowel-always nil
125 "*Non-nil means always put an apostrophe before an isolated vowel (except at word initial) in FIDEL --> SERA conversion.
126 If nil, put an apostrophe only between a 6th-form consonant and an
129 (defvar ethio-W-sixth-always nil
130 "*Non-nil means convert the Wu-form of a 12-form consonant to \"W'\" instead of \"Wu\" in FIDEL --> SERA conversion.")
132 (defvar ethio-numeric-reduction 0
133 "*Degree of reduction in converting Ethiopic digits into Arabic digits.
135 For example, ({10}{9}{100}{80}{7}) is converted into:
136 `10`9`100`80`7 if `ethio-numeric-reduction' is 0,
137 `109100807 if `ethio-numeric-reduction' is 1,
138 `10900807 if `ethio-numeric-reduction' is 2.")
140 (defvar ethio-java-save-lowercase nil
141 "*Non-nil means save Ethiopic characters in lowercase hex numbers to Java files.
142 If nil, use uppercases.")
144 (defun ethio-prefer-amharic-p ()
145 (or (eq ethio-primary-language 'amharic)
146 (and (not (eq ethio-primary-language 'tigrigna))
147 (eq ethio-secondary-language 'amharic))))
149 (defun ethio-prefer-amharic (arg)
152 (robin-modify-package "ethiopic-sera" "'a" ?አ)
153 (robin-modify-package "ethiopic-sera" "a" "አ")
154 (robin-modify-package "ethiopic-sera" "'A" ?ኣ)
155 (robin-modify-package "ethiopic-sera" "A" "ኣ"))
156 (robin-modify-package "ethiopic-sera" "'A" ?አ)
157 (robin-modify-package "ethiopic-sera" "A" "አ")
158 (robin-modify-package "ethiopic-sera" "'a" ?ኣ)
159 (robin-modify-package "ethiopic-sera" "a" "ኣ")))
161 (defun ethio-use-colon-for-colon (arg)
164 (robin-modify-package "ethiopic-sera" ":" ?፥)
165 (robin-modify-package "ethiopic-sera" "`:" ?፡))
166 (robin-modify-package "ethiopic-sera" " : " ?፡)
167 (robin-modify-package "ethiopic-sera" ":" "፡")
168 (robin-modify-package "ethiopic-sera" "-:" ?፥)))
170 (defun ethio-use-three-dot-question (arg)
173 (robin-modify-package "ethiopic-sera" "?" ?፧)
174 (robin-modify-package "ethiopic-sera" "`?" ??))
175 (robin-modify-package "ethiopic-sera" "?" ??)
176 (robin-modify-package "ethiopic-sera" "`?" ?፧)))
178 (defun ethio-adjust-robin ()
179 (ethio-prefer-amharic (ethio-prefer-amharic-p))
180 (ethio-use-colon-for-colon ethio-use-colon-for-colon)
181 (ethio-use-three-dot-question ethio-use-three-dot-question))
183 (add-hook 'robin-activate-hook 'ethio-adjust-robin)
190 (defun ethio-sera-to-fidel-buffer (&optional secondary force)
191 "Convert the current buffer from SERA to FIDEL.
193 The variable `ethio-primary-language' specifies the primary
194 language and `ethio-secondary-language' specifies the secondary.
196 If the 1st optional argument SECONDARY is non-nil, assume the
197 buffer begins with the secondary language; otherwise with the
200 If the 2nd optional argument FORCE is non-nil, perform conversion
201 even if the buffer is read-only.
203 See also the descriptions of the variables
204 `ethio-use-colon-for-colon' and `ethio-use-three-dot-question'."
207 (ethio-sera-to-fidel-region (point-min) (point-max) secondary force))
209 ;; To avoid byte-compiler warnings. It should never be set globally.
210 (defvar ethio-sera-being-called-by-w3)
211 ;; This variable will be bound by some third-party package.
212 (defvar sera-being-called-by-w3)
215 (defun ethio-sera-to-fidel-region (begin end &optional secondary force)
216 "Convert the characters in region from SERA to FIDEL.
218 The variable `ethio-primary-language' specifies the primary
219 language and `ethio-secondary-language' specifies the secondary.
221 If the 3rd argument SECONDARY is given and non-nil, assume the
222 region begins with the secondary language; otherwise with the
225 If the 4th argument FORCE is given and non-nil, perform
226 conversion even if the buffer is read-only.
228 See also the descriptions of the variables
229 `ethio-use-colon-for-colon' and `ethio-use-three-dot-question'."
232 (if (and buffer-read-only
234 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
237 (let ((ethio-primary-language ethio-primary-language)
238 (ethio-secondary-language ethio-secondary-language)
239 ;; The above two variables may be changed temporarily by tilde
240 ;; escapes during conversion. We bind them to the variables
241 ;; of the same names so that the original values are restored
242 ;; when this function exits.
243 (buffer-read-only nil)
244 (lang (if secondary ethio-secondary-language ethio-primary-language))
247 (ethio-use-colon-for-colon ethio-use-colon-for-colon)
248 (ethio-use-three-dot-question ethio-use-three-dot-question)
251 (narrow-to-region begin end)
252 (goto-char (point-min))
257 (ethio-prefer-amharic t)
258 (ethio-sera-to-fidel-region-ethio 'amharic))
260 (ethio-prefer-amharic nil)
261 (ethio-sera-to-fidel-region-ethio 'tigrigna))
263 (ethio-sera-to-fidel-region-noethio))))
266 (if (eq lang ethio-primary-language)
267 ethio-secondary-language
268 ethio-primary-language)
271 ;; Restore user's preference.
272 (ethio-adjust-robin))
274 (defun ethio-sera-to-fidel-region-noethio ()
275 "Return next language as symbol: amharic, tigrigna, toggle or nil."
279 ;; No more "\", i.e. nothing to do.
280 ((not (search-forward "\\" nil 0))
283 ;; Hereafter point is put after a "\".
284 ;; First delete that "\", then check the following chars.
287 ((progn (delete-char -1) (setq lflag (ethio-process-language-flag)))
290 ;; "\\" : leave the second "\" and continue in the same language.
291 ((= (following-char) ?\\)
295 ;; "\ " : delete the following " " and toggle the language.
296 ((= (following-char) 32)
300 ;; A "\" but not a special sequence: simply toggle the language.
304 (defun ethio-sera-to-fidel-region-ethio (lang)
305 "Return next language as symbol: amharic, tigrigna, toggle or nil."
309 (if (re-search-forward "\\(`[1-9][0-9]*\\)\\|[\\<&]" nil t)
312 (robin-convert-region (point-min) (point-max) "ethiopic-sera")
313 (goto-char (point-max)))
317 ((= (following-char) ?`)
319 (ethio-process-digits)
323 (if (or (and (boundp 'ethio-sera-being-called-by-w3)
324 ethio-sera-being-called-by-w3)
325 (and (boundp 'sera-being-called-by-w3)
326 sera-being-called-by-w3))
327 (search-forward (if (= (following-char) ?<) ">" ";") nil 0)
334 ;; Now we must be looking at a "\".
335 ;; First delete that "\", then check the following chars.
337 ((progn (delete-char 1) (= (following-char) 32))
341 ((looking-at "[,.;:'`?\\]+")
342 (goto-char (match-end 0))
345 ((/= (following-char) ?~)
348 ;; Now we must be looking at a "~".
350 ((setq lflag (ethio-process-language-flag))
353 ;; Delete the following "~" and check the following chars.
355 ((progn (delete-char 1) (looking-at "! ?"))
357 (if (re-search-forward "\\\\~! ?" nil 0)
363 (ethio-use-colon-for-colon t)
368 (ethio-use-colon-for-colon nil)
373 (ethio-use-three-dot-question t)
376 ((looking-at "\\? ?")
378 (ethio-use-three-dot-question nil)
381 ;; Unknown tilde escape. Recover the deleted chars.
386 (defun ethio-process-language-flag nil
387 "Process a language flag of the form \"~lang\" or \"~lang1~lang2\".
389 If looking at \"~lang1~lang2\", set `ethio-primary-language' and
390 `ethio-secondary-language' based on \"lang1\" and \"lang2\".
391 Then delete the language flag \"~lang1~lang2\" from the buffer.
392 Return value is the new primary language.
394 If looking at \"~lang\", delete that language flag \"~lang\" from
395 the buffer and return that language. In this case
396 `ethio-primary-language' and `ethio-secondary-language' are left
399 If an unsupported language flag is found, just return nil without
407 "~\\([a-z][a-z][a-z]?\\)~\\([a-z][a-z][a-z]?\\)[ \t\n\\]")
408 (setq lang1 (ethio-flag-to-language (match-string 1)))
409 (setq lang2 (ethio-flag-to-language (match-string 2))))
410 (setq ethio-primary-language lang1
411 ethio-secondary-language lang2)
412 (delete-region (point) (match-end 2))
413 (if (= (following-char) 32)
415 ethio-primary-language)
418 ((and (looking-at "~\\([a-z][a-z][a-z]?\\)[ \t\n\\]")
419 (setq lang1 (ethio-flag-to-language (match-string 1))))
420 (delete-region (point) (match-end 1))
421 (if (= (following-char) 32)
429 (defun ethio-flag-to-language (flag)
431 ((or (string= flag "en") (string= flag "eng")) 'english)
432 ((or (string= flag "ti") (string= flag "tir")) 'tigrigna)
433 ((or (string= flag "am") (string= flag "amh")) 'amharic)
436 (defun ethio-process-digits nil
437 "Convert Arabic digits to Ethiopic digits."
439 (while (and (>= (setq ch (following-char)) ?1)
443 ;; count up following zeros
445 (while (= (following-char) ?0)
451 ;; first digit is 10, 20, ..., or 90
453 (insert (aref [?፲ ?፳ ?፴ ?፵ ?፶ ?፷ ?፸ ?፹ ?፺] (- ch ?1)))
456 ;; first digit is 2, 3, ..., or 9
458 (insert (aref [?፪ ?፫ ?፬ ?፭ ?፮ ?፯ ?፰ ?፱] (- ch ?2))))
469 (insert-char ?፼ (/ z 4)))))
472 (defun ethio-sera-to-fidel-marker (&optional force)
473 "Convert the regions surrounded by \"<sera>\" and \"</sera>\" from SERA to FIDEL.
474 Assume that each region begins with `ethio-primary-language'.
475 The markers \"<sera>\" and \"</sera>\" themselves are not deleted."
477 (if (and buffer-read-only
479 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
482 (goto-char (point-min))
483 (while (search-forward "<sera>" nil t)
484 (ethio-sera-to-fidel-region
486 (if (search-forward "</sera>" nil t)
496 (defun ethio-language-to-flag (lang)
498 ((eq lang 'english) "eng")
499 ((eq lang 'tigrigna) "tir")
500 ((eq lang 'amharic) "amh")
504 (defun ethio-fidel-to-sera-buffer (&optional secondary force)
505 "Replace all the FIDEL characters in the current buffer to the SERA format.
506 The variable `ethio-primary-language' specifies the primary
507 language and `ethio-secondary-language' specifies the secondary.
509 If the 1st optional argument SECONDARY is non-nil, try to convert the
510 region so that it begins with the secondary language; otherwise with the
513 If the 2nd optional argument FORCE is non-nil, convert even if the
516 See also the descriptions of the variables
517 `ethio-use-colon-for-colon', `ethio-use-three-dot-question',
518 `ethio-quote-vowel-always' and `ethio-numeric-reduction'."
521 (ethio-fidel-to-sera-region (point-min) (point-max) secondary force))
524 (defun ethio-fidel-to-sera-region (begin end &optional secondary force)
525 "Replace all the FIDEL characters in the region to the SERA format.
527 The variable `ethio-primary-language' specifies the primary
528 language and `ethio-secondary-language' specifies the secondary.
530 If the 3rd argument SECONDARY is given and non-nil, convert
531 the region so that it begins with the secondary language; otherwise with
532 the primary language.
534 If the 4th argument FORCE is given and non-nil, convert even if the
537 See also the descriptions of the variables
538 `ethio-use-colon-for-colon', `ethio-use-three-dot-question',
539 `ethio-quote-vowel-always' and `ethio-numeric-reduction'."
542 (if (and buffer-read-only
544 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
548 (narrow-to-region begin end)
550 (let ((buffer-read-only nil)
552 ethio-secondary-language
553 ethio-primary-language))
554 (flag (if (ethio-prefer-amharic-p) "\\~amh " "\\~tir "))
557 (goto-char (point-min))
559 (unless (eq mode 'english)
560 (setq mode 'ethiopic))
561 (if (and (eq mode 'english) (looking-at "\\ce"))
562 (setq mode 'ethiopic))
563 (if (and (eq mode 'ethiopic) (looking-at "\\Ce"))
564 (setq mode 'english))
565 (insert (if (eq mode 'english) "\\~eng " flag))
569 (if (eq mode 'english)
571 (if (re-search-forward "\\(\\ce\\|\\\\\\)" nil 0)
574 ((eq (following-char) ?\\)
579 (setq mode 'ethiopic))))
581 ;; If we reach here, mode is ethiopic.
583 (if (re-search-forward "[a-z,.;:'`?\\<&]" nil 0)
586 (narrow-to-region p (point))
587 (robin-invert-region (point-min) (point-max) "ethiopic-sera")
589 ;; ethio-quote-vowel-alwyas
590 (goto-char (point-min))
591 (while (re-search-forward "'[eauio]" nil t)
594 (setq ch (preceding-char))
595 (if (or (and (>= ch ?a) (<= ch ?z))
596 (and (>= ch ?A) (<= ch ?Z)))
597 (if (and (not ethio-quote-vowel-always)
598 (memq ch '(?e ?a ?u ?i ?o ?E ?A ?I)))
602 ;; ethio-W-sixth-always
603 (unless ethio-W-sixth-always
604 (goto-char (point-min))
605 (while (search-forward "W'" nil t)
609 ;; ethio-numeric-reduction
610 (when (> ethio-numeric-reduction 0)
611 (goto-char (point-min))
612 (while (re-search-forward "\\([0-9]\\)`\\([0-9]\\)" nil t)
613 (replace-match "\\1\\2")
615 (when (= ethio-numeric-reduction 2)
616 (goto-char (point-min))
617 (while (re-search-forward "\\([0-9]\\)1\\(0+\\)" nil t)
618 (replace-match "\\1\\2")))
620 (goto-char (point-max)))
623 ((looking-at "[a-z]")
625 (setq mode 'english))
626 ((looking-at "[,.;:'`\\]+")
628 (goto-char (1+ (match-end 0))))
629 ((= (following-char) ??)
630 (if ethio-use-three-dot-question
634 (if (or (and (boundp 'ethio-sera-being-called-by-w3)
635 ethio-sera-being-called-by-w3)
636 (and (boundp 'sera-being-called-by-w3)
637 sera-being-called-by-w3))
638 (search-forward (if (= (following-char) ?<) ">" "&") nil 0)
639 (forward-char 1)))))))))
642 (defun ethio-fidel-to-sera-marker (&optional force)
643 "Convert the regions surrounded by \"<sera>\" and \"</sera>\" from FIDEL to SERA.
644 The markers \"<sera>\" and \"</sera>\" themselves are not deleted."
647 (if (and buffer-read-only
649 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
652 (goto-char (point-min))
653 (while (search-forward "<sera>" nil t)
654 (ethio-fidel-to-sera-region
656 (if (search-forward "</sera>" nil t)
663 ;; vowel modification
667 (defun ethio-modify-vowel nil
668 "Modify the vowel of the FIDEL that is under the cursor."
671 (let ((consonant (ethio-get-consonant (following-char)))
674 (error "") ; not an Ethiopic char
675 (setq vowel (read-char "Modify vowel to: "))
677 (if (and (string= consonant "'") (= vowel ?W))
680 (narrow-to-region (point) (point))
681 (insert consonant vowel)
682 (robin-convert-region (point-min) (point-max) "ethiopic-sera"))))))
684 (defun ethio-get-consonant (ch)
685 "Return the consonant part of CH's SERA spelling in ethiopic-sera."
686 (let ((sera (get-char-code-property ch 'ethiopic-sera)))
689 ((= ch ?ኧ) "'") ; Only this has two vowel letters.
692 (if (memq (preceding-char) '(?e ?u ?i ?a ?o ?E ?I ?A ?'))
694 (buffer-substring (point-min) (point-max)))))))
701 (defun ethio-replace-space (ch begin end)
702 "Replace ASCII spaces with Ethiopic word separators in the region.
704 In the specified region, replace word separators surrounded by two
705 Ethiopic characters, depending on the first argument CH, which should
708 If CH = 1, word separator will be replaced with an ASCII space.
709 If CH = 2, with two ASCII spaces.
710 If CH = 3, with the Ethiopic colon-like word separator.
712 The 2nd and 3rd arguments BEGIN and END specify the region."
714 (interactive "*cReplace spaces to: 1 (sg col), 2 (dbl col), 3 (Ethiopic)\nr")
715 (if (not (memq ch '(?1 ?2 ?3)))
719 (narrow-to-region begin end)
723 ;; an Ethiopic word separator --> an ASCII space
724 (goto-char (point-min))
725 (while (search-forward "፡" nil t)
728 ;; two ASCII spaces between Ethiopic characters --> an ASCII space
729 (goto-char (point-min))
730 (while (re-search-forward "\\(\\ce\\) \\(\\ce\\)" nil t)
731 (replace-match "\\1 \\2")
735 ;; An Ethiopic word separator --> two ASCII spaces
736 (goto-char (point-min))
737 (while (search-forward "፡" nil t)
740 ;; An ASCII space between Ethiopic characters --> two ASCII spaces
741 (goto-char (point-min))
742 (while (re-search-forward "\\(\\ce\\) \\(\\ce\\)" nil t)
743 (replace-match "\\1 \\2")
747 ;; One or two ASCII spaces between Ethiopic characters
748 ;; --> An Ethiopic word separator
749 (goto-char (point-min))
750 (while (re-search-forward "\\(\\ce\\) ?\\(\\ce\\)" nil t)
751 (replace-match "\\1፡\\2")
754 ;; Three or more ASCII spaces between Ethiopic characters
755 ;; --> An Ethiopic word separator + (N - 2) ASCII spaces
756 (goto-char (point-min))
757 (while (re-search-forward "\\(\\ce\\) \\( +\\ce\\)" nil t)
758 (replace-match "\\1፡\\2")
759 (forward-char -1)))))))
765 ;; This function is deprecated.
767 (defun ethio-input-special-character (arg)
768 "This function is deprecated."
769 (interactive "*cInput number: 1. 2. 3. 4. 5.")
789 (defun ethio-fidel-to-tex-buffer nil
790 "Convert each fidel characters in the current buffer into a fidel-tex command."
792 (let ((buffer-read-only nil)
795 ;; Special treatment for geminated characters.
796 ;; Geminated characters la", etc. change into \geminateG{\laG}, etc.
797 (goto-char (point-min))
798 (while (re-search-forward "፟\\|" nil t)
799 (setq comp (find-composition (match-beginning 0)))
801 (replace-match "\\\\geminateG{}" t)
802 (decompose-region (car comp) (cadr comp))
805 (insert "\\geminateG{")
809 ;; Special Ethiopic punctuations.
810 (goto-char (point-min))
811 (while (re-search-forward "\\ce[»\\.\\?]\\|«\\ce" nil t)
813 ((= (setq ch (preceding-char)) ?\»)
815 (insert "\\rquoteG"))
828 ;; Ethiopic characters to TeX macros
829 (robin-invert-region (point-min) (point-max) "ethiopic-tex")
831 (goto-char (point-min))
832 (set-buffer-modified-p nil)))
835 (defun ethio-tex-to-fidel-buffer nil
836 "Convert fidel-tex commands in the current buffer into fidel chars."
838 (let ((buffer-read-only nil)
841 ;; TeX macros to Ethiopic characters
842 (robin-convert-region (point-min) (point-max) "ethiopic-tex")
844 ;; compose geminated characters
845 (goto-char (point-min))
846 (while (re-search-forward "\\\\geminateG{\\(\\ce?\\)}" nil t)
847 (replace-match "\\1፟"))
849 ;; remove redundant braces, if any
850 (goto-char (point-min))
851 (while (re-search-forward "{\\(\\ce\\)}" nil t)
852 (replace-match "\\1"))
854 (goto-char (point-min))
855 (set-buffer-modified-p nil)))
862 (defun ethio-fidel-to-java-buffer nil
863 "Convert Ethiopic characters into the Java escape sequences.
865 Each escape sequence is of the form \\uXXXX, where XXXX is the
866 character's codepoint (in hex) in Unicode.
868 If `ethio-java-save-lowercase' is non-nil, use [0-9a-f].
869 Otherwise, [0-9A-F]."
872 (goto-char (point-min))
873 (while (re-search-forward "[ሀ-፼]" nil t)
874 (setq ucode (preceding-char))
875 (delete-backward-char 1)
877 (format (if ethio-java-save-lowercase "\\u%4x" "\\u%4X")
881 (defun ethio-java-to-fidel-buffer nil
882 "Convert the Java escape sequences into corresponding Ethiopic characters."
883 (let ((case-fold-search t)
885 (goto-char (point-min))
886 (while (re-search-forward "\\\\u\\([0-9a-f][0-9a-f][0-9a-f][0-9a-f]\\)" nil t)
887 (setq ucode (read (concat "#x" (match-string 1))))
888 (when (and (>= ucode #x1200) (<= ucode #x137f))
889 (replace-match (char-to-string ucode))))))
896 (defun ethio-find-file nil
897 "Transliterate file content into Ethiopic dependig on filename suffix."
900 ((string-match "\\.sera$" (buffer-file-name))
902 (ethio-sera-to-fidel-buffer nil 'force)
903 (set-buffer-modified-p nil)))
905 ((string-match "\\.html$" (buffer-file-name))
906 (let ((ethio-sera-being-called-by-w3 t))
908 (ethio-sera-to-fidel-marker 'force)
909 (goto-char (point-min))
910 (while (re-search-forward "&[lr]aquo;" nil t)
911 (if (= (char-after (1+ (match-beginning 0))) ?l)
913 (replace-match "»")))
914 (set-buffer-modified-p nil))))
916 ((string-match "\\.tex$" (buffer-file-name))
918 (ethio-tex-to-fidel-buffer)
919 (set-buffer-modified-p nil)))
921 ((string-match "\\.java$" (buffer-file-name))
923 (ethio-java-to-fidel-buffer)
924 (set-buffer-modified-p nil)))
930 (defun ethio-write-file nil
931 "Transliterate Ethiopic characters in ASCII depending on the file extension."
934 ((string-match "\\.sera$" (buffer-file-name))
936 (ethio-fidel-to-sera-buffer nil 'force)
937 (goto-char (point-min))
938 (ethio-record-user-preference)
939 (set-buffer-modified-p nil)))
941 ((string-match "\\.html$" (buffer-file-name))
943 (let ((ethio-sera-being-called-by-w3 t))
944 (ethio-fidel-to-sera-marker 'force)
945 (goto-char (point-min))
946 (while (re-search-forward "[«»]" nil t)
947 (replace-match (if (= (preceding-char) ?«) "«" "»")))
948 (goto-char (point-min))
949 (if (search-forward "<sera>" nil t)
950 (ethio-record-user-preference))
951 (set-buffer-modified-p nil))))
953 ((string-match "\\.tex$" (buffer-file-name))
955 (ethio-fidel-to-tex-buffer)
956 (set-buffer-modified-p nil)))
958 ((string-match "\\.java$" (buffer-file-name))
960 (ethio-fidel-to-java-buffer)
961 (set-buffer-modified-p nil)))
966 (defun ethio-record-user-preference nil
967 (insert (if ethio-use-colon-for-colon "\\~-: " "\\~`: ")
968 (if ethio-use-three-dot-question "\\~`| " "\\~? ")))
971 ;; Ethiopic word separator vs. ASCII space
974 (defvar ethio-prefer-ascii-space t)
975 (make-variable-buffer-local 'ethio-prefer-ascii-space)
977 (defun ethio-toggle-space nil
978 "Toggle ASCII space and Ethiopic separator for keyboard input."
980 (setq ethio-prefer-ascii-space
981 (not ethio-prefer-ascii-space)))
983 (defun ethio-insert-space (arg)
984 "Insert ASCII spaces or Ethiopic word separators depending on context.
986 If the current word separator (indicated in mode-line) is the ASCII space,
987 insert an ASCII space. With ARG, insert that many ASCII spaces.
989 If the current word separator is the colon-like Ethiopic word
990 separator and the point is preceded by `an Ethiopic punctuation mark
991 followed by zero or more ASCII spaces', then insert also an ASCII
992 space. With ARG, insert that many ASCII spaces.
994 Otherwise, insert a colon-like Ethiopic word separator. With ARG, insert that
995 many Ethiopic word separators."
999 (ethio-prefer-ascii-space
1000 (insert-char 32 arg))
1002 (skip-chars-backward " ")
1003 (memq (preceding-char)
1004 '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨ ? ? ? ? ?)))
1005 (insert-char 32 arg))
1007 (insert-char ?፡ arg))))
1010 (defun ethio-insert-ethio-space (arg)
1011 "Insert the Ethiopic word delimiter (the colon-like character).
1012 With ARG, insert that many delimiters."
1014 (insert-char ?፡ arg))
1021 (defun ethio-composition-function (pos to font-object string)
1023 (let ((pattern "\\ce\\(፟\\|\\)"))
1026 (eq (string-match pattern string pos) pos))
1027 (prog1 (match-end 0)
1028 (compose-string string pos (match-end 0))))
1029 (if (>= pos (point-min))
1032 (if (looking-at pattern)
1033 (prog1 (match-end 0)
1034 (compose-region pos (match-end 0)))))))))
1036 ;; This function is not used any more.
1037 (defun ethio-gemination nil
1038 "Compose the character before the point with the Ethiopic gemination mark.
1039 If the character is already composed, decompose it and remove the gemination
1042 (let ((ch (preceding-char)))
1044 ((and (= ch ?) (find-composition (1- (point))))
1045 (decompose-region (- (point) 2) (point)))
1046 ((and (>= ch #x1200) (<= ch #x137f))
1048 (compose-region (- (point) 2) (point)))
1056 (robin-define-package "ethiopic-sera"
1057 "SERA transliteration system for Ethiopic."
1063 ("hE" ?ሄ) ("hee" "ሄ")
1067 ("le" ?ለ) ("Le" "ለ")
1068 ("lu" ?ሉ) ("Lu" "ሉ")
1069 ("li" ?ሊ) ("Li" "ሊ")
1070 ("la" ?ላ) ("La" "ላ")
1071 ("lE" ?ሌ) ("LE" "ሌ") ("lee" "ሌ") ("Lee" "ሌ")
1073 ("lo" ?ሎ) ("Lo" "ሎ")
1074 ("lWa" ?ሏ) ("LWa" "ሏ") ("lW" "ሏ") ("LW" "ሏ")
1080 ("HE" ?ሔ) ("Hee" "ሔ")
1083 ("HWa" ?ሗ) ("HW" "ሗ")
1085 ("me" ?መ) ("Me" "መ")
1086 ("mu" ?ሙ) ("Mu" "ሙ")
1087 ("mi" ?ሚ) ("Mi" "ሚ")
1088 ("ma" ?ማ) ("Ma" "ማ")
1089 ("mE" ?ሜ) ("ME" "ሜ") ("mee" "ሜ") ("Mee" "ሜ")
1091 ("mo" ?ሞ) ("Mo" "ሞ")
1092 ("mWa" ?ሟ) ("MWa" "ሟ") ("mW" "ሟ") ("MW" "ሟ")
1094 ("`se" ?ሠ) ("sse" "ሠ") ("s2e" "ሠ")
1095 ("`su" ?ሡ) ("ssu" "ሡ") ("s2u" "ሡ")
1096 ("`si" ?ሢ) ("ssi" "ሢ") ("s2i" "ሢ")
1097 ("`sa" ?ሣ) ("ssa" "ሣ") ("s2a" "ሣ")
1098 ("`sE" ?ሤ) ("ssE" "ሤ") ("s2E" "ሤ")
1099 ("`see" "ሤ") ("ssee" "ሤ") ("s2ee" "ሤ")
1100 ("`s" ?ሥ) ("ss" "ሥ") ("s2" "ሥ")
1101 ("`so" ?ሦ) ("sso" "ሦ") ("s2o" "ሦ")
1102 ("`sWa" ?ሧ) ("ssWa" "ሧ") ("s2Wa" "ሧ")
1103 ("`sW" "ሧ") ("ssW" "ሧ") ("s2W" "ሧ")
1105 ("re" ?ረ) ("Re" "ረ")
1106 ("ru" ?ሩ) ("Ru" "ሩ")
1107 ("ri" ?ሪ) ("Ri" "ሪ")
1108 ("ra" ?ራ) ("Ra" "ራ")
1109 ("rE" ?ሬ) ("RE" "ሬ") ("ree" "ሬ") ("Ree" "ሬ")
1111 ("ro" ?ሮ) ("Ro" "ሮ")
1112 ("rWa" ?ሯ) ("RWa" "ሯ") ("rW" "ሯ") ("RW" "ሯ")
1118 ("sE" ?ሴ) ("see" "ሴ")
1121 ("sWa" ?ሷ) ("sW" "ሷ")
1127 ("xE" ?ሼ) ("xee" "ሼ")
1130 ("xWa" ?ሿ) ("xW" "ሿ")
1136 ("qE" ?ቄ) ("qee" "ቄ")
1141 ("qWa" ?ቋ) ("qW" "ቋ")
1142 ("qWE" ?ቌ) ("qWee" "ቌ")
1143 ("qW'" ?ቍ) ("qWu" "ቍ")
1149 ("QE" ?ቔ) ("Qee" "ቔ")
1154 ("QWa" ?ቛ) ("QW" "ቛ")
1155 ("QWE" ?ቜ) ("QWee" "ቜ")
1156 ("QW'" ?ቝ) ("QWu" "ቝ")
1158 ("be" ?በ) ("Be" "በ")
1159 ("bu" ?ቡ) ("Bu" "ቡ")
1160 ("bi" ?ቢ) ("Bi" "ቢ")
1161 ("ba" ?ባ) ("Ba" "ባ")
1162 ("bE" ?ቤ) ("BE" "ቤ") ("bee" "ቤ") ("Bee" "ቤ")
1164 ("bo" ?ቦ) ("Bo" "ቦ")
1165 ("bWa" ?ቧ) ("BWa" "ቧ") ("bW" "ቧ") ("BW" "ቧ")
1167 ("ve" ?ቨ) ("Ve" "ቨ")
1168 ("vu" ?ቩ) ("Vu" "ቩ")
1169 ("vi" ?ቪ) ("Vi" "ቪ")
1170 ("va" ?ቫ) ("Va" "ቫ")
1171 ("vE" ?ቬ) ("VE" "ቬ") ("vee" "ቬ") ("Vee" "ቬ")
1173 ("vo" ?ቮ) ("Vo" "ቮ")
1174 ("vWa" ?ቯ) ("VWa" "ቯ") ("vW" "ቯ") ("VW" "ቯ")
1180 ("tE" ?ቴ) ("tee" "ቴ")
1183 ("tWa" ?ቷ) ("tW" "ቷ")
1189 ("cE" ?ቼ) ("cee" "ቼ")
1192 ("cWa" ?ቿ) ("cW" "ቿ")
1194 ("`he" ?ኀ) ("hhe" "ኀ") ("h2e" "ኀ")
1195 ("`hu" ?ኁ) ("hhu" "ኁ") ("h2u" "ኁ")
1196 ("`hi" ?ኂ) ("hhi" "ኂ") ("h2i" "ኂ")
1197 ("`ha" ?ኃ) ("hha" "ኃ") ("h2a" "ኃ")
1198 ("`hE" ?ኄ) ("hhE" "ኄ") ("h2E" "ኄ")
1199 ("`hee" "ኄ") ("hhee" "ኄ") ("h2ee" "ኄ")
1200 ("`h" ?ኅ) ("hh" "ኅ") ("h2" "ኅ")
1201 ("`ho" ?ኆ) ("hho" "ኆ") ("h2o" "ኆ")
1202 ("`hWe" ?ኈ) ("hhWe" "ኈ") ("h2We" "ኈ") ("hWe" "ኈ")
1203 ("`hWi" ?ኊ) ("hhWi" "ኊ") ("h2Wi" "ኊ") ("hWi" "ኊ")
1204 ("`hWa" ?ኋ) ("hhWa" "ኋ") ("h2Wa" "ኋ") ("hWa" "ኋ")
1205 ("`hW" "ኋ") ("hhW" "ኋ") ("h2W" "ኋ")
1206 ("`hWE" ?ኌ) ("hhWE" "ኌ") ("h2WE" "ኌ") ("hWE" "ኌ")
1207 ("`hWee" "ኌ") ("hhWee" "ኌ") ("h2Wee" "ኌ") ("hWee" "ኌ")
1208 ("`hW'" ?ኍ) ("hhW'" "ኍ") ("h2W'" "ኍ") ("hW'" "ኍ")
1209 ("`hWu" "ኍ") ("hhWu" "ኍ") ("h2Wu" "ኍ") ("hWu" "ኍ")
1215 ("nE" ?ኔ) ("nee" "ኔ")
1218 ("nWa" ?ኗ) ("nW" "ኗ")
1224 ("NE" ?ኜ) ("Nee" "ኜ")
1227 ("NWa" ?ኟ) ("NW" "ኟ")
1230 ("'u" ?ኡ) ("u" "ኡ") ("'U" "ኡ") ("U" "ኡ")
1234 ("'I" ?እ) ("I" "እ") ("'e" "እ") ("e" "እ")
1235 ("'o" ?ኦ) ("o" "ኦ") ("'O" "ኦ") ("O" "ኦ")
1236 ("'ea" ?ኧ) ("ea" "ኧ")
1242 ("kE" ?ኬ) ("kee" "ኬ")
1247 ("kWa" ?ኳ) ("kW" "ኳ")
1248 ("kWE" ?ኴ) ("kWee" "ኴ")
1249 ("kW'" ?ኵ) ("kWu" "ኵ")
1255 ("KE" ?ኼ) ("Kee" "ኼ")
1260 ("KWa" ?ዃ) ("KW" "ዃ")
1261 ("KWE" ?ዄ) ("KWee" "ዄ")
1262 ("KW'" ?ዅ) ("KWu" "ዅ")
1268 ("wE" ?ዌ) ("wee" "ዌ")
1272 ("`e" ?ዐ) ("ae" "ዐ") ("aaa" "ዐ") ("e2" "ዐ")
1273 ("`u" ?ዑ) ("uu" "ዑ") ("u2" "ዑ") ("`U" "ዑ") ("UU" "ዑ") ("U2" "ዑ")
1274 ("`i" ?ዒ) ("ii" "ዒ") ("i2" "ዒ")
1275 ("`a" ?ዓ) ("aa" "ዓ") ("a2" "ዓ") ("`A" "ዓ") ("AA" "ዓ") ("A2" "ዓ")
1276 ("`E" ?ዔ) ("EE" "ዔ") ("E2" "ዔ")
1277 ("`I" ?ዕ) ("II" "ዕ") ("I2" "ዕ") ("ee" "ዕ")
1278 ("`o" ?ዖ) ("oo" "ዖ") ("o2" "ዖ") ("`O" "ዖ") ("OO" "ዖ") ("O2" "ዖ")
1284 ("zE" ?ዜ) ("zee" "ዜ")
1287 ("zWa" ?ዟ) ("zW" "ዟ")
1293 ("ZE" ?ዤ) ("Zee" "ዤ")
1296 ("ZWa" ?ዧ) ("ZW" "ዧ")
1298 ("ye" ?የ) ("Ye" "የ")
1299 ("yu" ?ዩ) ("Yu" "ዩ")
1300 ("yi" ?ዪ) ("Yi" "ዪ")
1301 ("ya" ?ያ) ("Ya" "ያ")
1302 ("yE" ?ዬ) ("YE" "ዬ") ("yee" "ዬ") ("Yee" "ዬ")
1304 ("yo" ?ዮ) ("Yo" "ዮ")
1310 ("dE" ?ዴ) ("dee" "ዴ")
1313 ("dWa" ?ዷ) ("dW" "ዷ")
1319 ("DE" ?ዼ) ("Dee" "ዼ")
1322 ("DWa" ?ዿ) ("DW" "ዿ")
1324 ("je" ?ጀ) ("Je" "ጀ")
1325 ("ju" ?ጁ) ("Ju" "ጁ")
1326 ("ji" ?ጂ) ("Ji" "ጂ")
1327 ("ja" ?ጃ) ("Ja" "ጃ")
1328 ("jE" ?ጄ) ("JE" "ጄ") ("jee" "ጄ") ("Jee" "ጄ")
1330 ("jo" ?ጆ) ("Jo" "ጆ")
1331 ("jWa" ?ጇ) ("jW" "ጇ") ("JWa" "ጇ") ("JW" "ጇ")
1337 ("gE" ?ጌ) ("gee" "ጌ")
1342 ("gWa" ?ጓ) ("gW" "ጓ")
1343 ("gWE" ?ጔ) ("gWee" "ጔ")
1344 ("gW'" ?ጕ) ("gWu" "ጕ")
1350 ("GE" ?ጜ) ("Gee" "ጜ")
1358 ("TE" ?ጤ) ("Tee" "ጤ")
1361 ("TWa" ?ጧ) ("TW" "ጧ")
1367 ("CE" ?ጬ) ("Cee" "ጬ")
1370 ("CWa" ?ጯ) ("CW" "ጯ")
1376 ("PE" ?ጴ) ("Pee" "ጴ")
1379 ("PWa" ?ጷ) ("PW" "ጷ")
1385 ("SE" ?ጼ) ("See" "ጼ")
1388 ("SWa" ?ጿ) ("`SWa" "ጿ") ("SSWa" "ጿ") ("S2Wa" "ጿ")
1389 ("SW" "ጿ") ("`SW" "ጿ") ("SSW" "ጿ") ("S2W" "ጿ")
1391 ("`Se" ?ፀ) ("SSe" "ፀ") ("S2e" "ፀ")
1392 ("`Su" ?ፁ) ("SSu" "ፁ") ("S2u" "ፁ")
1393 ("`Si" ?ፂ) ("SSi" "ፂ") ("S2i" "ፂ")
1394 ("`Sa" ?ፃ) ("SSa" "ፃ") ("S2a" "ፃ")
1395 ("`SE" ?ፄ) ("SSE" "ፄ") ("S2E" "ፄ")
1396 ("`See" "ፄ") ("SSee" "ፄ") ("S2ee" "ፄ")
1397 ("`S" ?ፅ) ("SS" "ፅ") ("S2" "ፅ")
1398 ("`So" ?ፆ) ("SSo" "ፆ") ("S2o" "ፆ")
1400 ("fe" ?ፈ) ("Fe" "ፈ")
1401 ("fu" ?ፉ) ("Fu" "ፉ")
1402 ("fi" ?ፊ) ("Fi" "ፊ")
1403 ("fa" ?ፋ) ("Fa" "ፋ")
1404 ("fE" ?ፌ) ("FE" "ፌ") ("fee" "ፌ") ("Fee" "ፌ")
1406 ("fo" ?ፎ) ("Fo" "ፎ")
1407 ("fWa" ?ፏ) ("FWa" "ፏ") ("fW" "ፏ") ("FW" "ፏ")
1413 ("pE" ?ፔ) ("pee" "ፔ")
1416 ("pWa" ?ፗ) ("pW" "ፗ")
1418 ("rYa" ?ፘ) ("RYa" "ፘ") ("rY" "ፘ") ("RY" "ፘ")
1419 ("mYa" ?ፙ) ("MYa" "ፙ") ("mY" "ፙ") ("MY" "ፙ")
1420 ("fYa" ?ፚ) ("FYa" "ፚ") ("fY" "ፚ") ("FY" "ፚ")
1422 (" : " ?፡) (":" "፡") ("`:" "፡")
1428 ("`?" ?፧) ("??" "፧")
1429 (":|:" ?፨) ("**" "፨")
1431 ;; Explicit syllable delimiter
1434 ;; Quick ASCII input
1499 (register-input-method
1500 "ethiopic-sera" "Ethiopic"
1501 'robin-use-package "et" "An input method for Ethiopic.")
1503 (robin-define-package "ethiopic-tex"
1504 "TeX transliteration system for Ethiopic."
1506 ("\\heG" ?ሀ) ; U+1200 ..
1523 ("\\HeG" ?ሐ) ; U+1210 ..
1540 ("\\sseG" ?ሠ) ; U+1220 ..
1557 ("\\seG" ?ሰ) ; U+1230 ..
1574 ("\\qeG" ?ቀ) ; U+1240 ..
1591 ("\\QeG" ?ቐ) ; U+1250 ..
1608 ("\\beG" ?በ) ; U+1260 ..
1625 ("\\teG" ?ተ) ; U+1270 ..
1642 ("\\hheG" ?ኀ) ; U+1280 ..
1659 ("\\neG" ?ነ) ; U+1290 ..
1676 ("\\eG" ?አ) ; U+12A0 ..
1693 ("\\kWeG" ?ኰ) ; U+12B0 ..
1710 ("\\KWeG" ?ዀ) ; U+12C0 ..
1727 ("\\eeG" ?ዐ) ; U+12D0 ..
1744 ("\\ZeG" ?ዠ) ; U+12E0 ..
1761 ("\\deG" ?ደ) ; U+12F0 ..
1778 ("\\jeG" ?ጀ) ; U+1300 ..
1795 ("\\gWeG" ?ጐ) ; U+1310 ..
1812 ("\\TeG" ?ጠ) ; U+1320 ..
1829 ("\\PeG" ?ጰ) ; U+1330 ..
1846 ("\\SSeG" ?ፀ) ; U+1340 ..
1863 ("\\peG" ?ፐ) ; U+1350 ..
1880 ;; reserved ; U+1360 ..
1897 ("\\smntG" ?፰) ; U+1370 ..
1915 ;; private extension
1918 ("\\yWaG" ?) ; U+1A00EF (was U+12EF)
1920 ("\\GWaG" ?) ; U+1A011F (was U+131F)
1922 ("\\qqeG" ?) ; U+1A0180 .. (was U+1380 ..)
1939 ("\\kkeG" ?) ; U+1A0190 .. (was U+1390 ..)
1956 ("\\XeG" ?) ; U+1A01A0 .. (was U+13A0 ..)
1973 ("\\ggeG" ?) ; U+1A01B0 .. (was U+13B0 ..)
1990 ("\\ornamentG" ?) ; U+1A01C0 .. (was U+FDF0 ..)
2000 ;; Gemination () is handled in a special way.
2003 ;; Assign reverse conversion to Fidel chars.
2004 ;; Then override forward conversion with ASCII chars.
2005 ;; ASCII chars should not have reverse conversions.
2006 ("\\dotG" ?) ("\\dotG" ".")
2007 ("\\lquoteG" ?) ("\\lquoteG" "«")
2008 ("\\rquoteG" ?) ("\\rquoteG" "»")
2009 ("\\qmarkG" ?) ("\\qmarkG" "?")
2012 ;; New characters in Unicode 4.1.
2014 ;; In forward conversion, these characters override the old private
2015 ;; extensions above. The old private extensions still keep their
2016 ;; reverse conversion.
2072 ;; The ethiopic-tex package is not used for keyboard input, therefore
2073 ;; not registered with the register-input-method function.
2075 (provide 'ethio-util)
2077 ;;; ethio-util.el ends here
2079 ;; arch-tag: c8feb3d6-39bf-4b0a-b6ef-26f03fbc8140
2080 ;;; ethio-util.el ends here