1 ;;; ogonek.el --- change the encoding of Polish diacritics
3 ;; Copyright (C) 1997, 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 ;; Free Software Foundation, Inc.
6 ;; Author: W{\l}odek Bzyl, Ryszard Kubiak
7 ;; Maintainer: rysiek@ipipan.gda.pl (Ryszard Kubiak)
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software; you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 3, or (at your option)
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 ;; Boston, MA 02110-1301, USA.
29 ;; To use this library load it using
30 ;; M-x load-library [enter] ogonek
31 ;; Then, you may get a short info by calling one of
32 ;; M-x ogonek-jak -- in Polish
33 ;; M-x ogonek-how -- in English "
38 "Change the encoding of Polish diacritic characters."
42 (defconst ogonek-name-encoding-alist
43 '(("ascii" .
(?A ?C ?E ?L ?N ?O ?S ?Z ?Z
44 ?a ?c ?e ?l ?n ?o ?s ?z ?z
))
45 ("iso8859-2" .
(161 198 202 163 209 211 166 172 175
46 177 230 234 179 241 243 182 188 191))
47 ("mazovia" .
(143 149 144 156 165 163 152 160 161
48 134 141 145 146 164 162 158 166 167))
49 ("windows-EE" .
(165 198 202 163 209 211 140 143 175
50 185 230 234 179 241 243 156 159 191))
51 ("windows-PL" .
(165 198 202 163 209 211 140 143 175
52 185 230 234 179 241 243 156 159 191))
53 ("latin-2" .
(164 143 168 157 227 224 151 141 189
54 165 134 169 136 228 162 152 171 190))
55 ("CP852" .
(164 143 168 157 227 224 151 141 189
56 165 134 169 136 228 162 152 171 190))
57 ("MeX" .
(129 130 134 138 139 211 145 153 155
58 161 162 166 170 171 243 177 185 187))
59 ("CorelDraw" .
(197 242 201 163 209 211 255 225 237
60 229 236 230 198 241 243 165 170 186))
61 ("Amiga" .
(194 202 203 206 207 211 212 218 219
62 226 234 235 238 239 243 244 250 251))
63 ("Mac" .
(132 140 162 252 193 238 229 143 251
64 136 141 171 184 196 151 230 144 253))
66 "The constant `ogonek-name-encoding-alist' is a list of (NAME.LIST) pairs.
67 Each LIST contains codes for 18 Polish diacritic characters. The codes
68 are given in the following order:
69 Aogonek Cacute Eogonek Lslash Nacute Oacute Sacute Zacute Zdotaccent
70 aogonek cacute eogonek lslash nacute oacute sacute zacute zdotaccent.")
72 ;; ------ A Little Info in Polish ---------------
74 (defconst ogonek-informacja
75 " FUNKCJE INTERAKCYJNE UDOST/EPNIANE PRZEZ BIBLIOTEK/E `ogonek'.
77 Je/sli czytasz ten tekst, to albo przegl/adasz plik /xr/od/lowy
78 biblioteki `ogonek.el', albo wywo/la/le/s polecenie `ogonek-jak'.
79 W drugim przypadku mo/zesz usun/a/c tekst z ekranu, stosuj/ac
80 polecenie `M-x kill-buffer'.
82 Niniejsza biblioteka dostarcza funkcji do zmiany kodowania polskich
83 znak/ow diakrytycznych. Funkcje te mo/zna pogrupowa/c nast/epuj/aco.
85 1. `ogonek-recode-region' oraz `ogonek-recode-buffer'
86 przekodowuj/a zaznaczony fragment wzgl/ednie ca/ly buffor.
87 Po wywo/laniu interakcyjnym funkcji zadawane s/a
88 pytania o parametry przekodowania: nazw/e kodowania
89 w tek/scie /xr/od/lowym i nazw/e kodowania docelowego.
90 Poni/zsze przyk/lady powinny wyja/sni/c, jakich parametr/ow
91 oczekuj/a wymienione funkcje:
93 (ogonek-recode-region (poczatek-fragmentu) (koniec-fragmentu)
94 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa)
96 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa)
98 2. `ogonek-prefixify-region' oraz `ogonek-prefixify-buffer'
99 s/lu/z/a do wprowadzania notacji prefiksowej.
101 (ogonek-prefixify-region (poczatek-fragmentu) (koniec-fragmentu)
102 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu)
103 (ogonek-prefixify-buffer
104 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu)
106 3. `ogonek-deprefixify-region' oraz `ogonek-deprefixify-buffer'
107 s/lu/z/a do usuwania notacji prefiksowej.
109 (ogonek-deprefixify-region (poczatek-fragmentu) (koniec-fragmentu)
110 znak-prefiksu nazwa-kodowania-docelowa)
111 (ogonek-prefixify-buffer
112 znak-prefiksu nazwa-kodowania-docelowa)
114 U/zycie klawisza TAB w trybie interakcyjnym powoduje wy/swietlenie
115 listy dopuszczalnych nazw kod/ow, pami/etanych w sta/lej
116 `ogonek-name-encoding-alist'.
118 Funkcje biblioteki odwo/luj/a si/e do pi/eciu zmiennych, kt/ore
119 przechowuj/a podpowiedzi do zadawanych pyta/n. Nazwy tych zmiennych
120 oraz ich warto/sci domy/slne s/a nast/epuj/ace:
122 ogonek-from-encoding iso8859-2
123 ogonek-to-encoding ascii
125 ogonek-prefix-from-encoding iso8859-2
126 ogonek-prefix-to-encoding iso8859-2
128 Powy/zsze warto/sci domy/slne mo/zna zmieni/c przez umieszczenie w pliku
129 konfiguracyjnym `~/.emacs' odpowiednich przypisa/n, na przyk/lad:
131 (setq ogonek-prefix-char ?/)
132 (setq ogonek-prefix-to-encoding \"iso8859-2\")
134 Zamiast wczytywania ca/lej biblioteki `ogonek.el' mo/zna w pliku
135 `~/.emacs' za/z/ada/c wczytania wybranych funkcji, na dodatek dopiero
136 w chwili ich rzeczywistego u/zycia:
138 (autoload 'ogonek-jak \"ogonek\")
139 (autoload 'ogonek-recode-region \"ogonek\")
140 (autoload 'ogonek-prefixify-region \"ogonek\")
141 (autoload 'ogonek-deprefixify-region \"ogonek\")
143 Cz/esto wyst/epuj/ace kombinacje wywo/la/n funkcji mo/zna dla wygody
144 skr/oci/c i przypisa/c klawiszom. Oto praktyczne przyk/lady:
146 (defun deprefixify-iso8859-2-region (start end)
148 (ogonek-deprefixify-region start end ?/ \"iso8859-2\"))
149 (global-set-key \"\\C-cd\" 'deprefixify-iso8859-2-region) ; ctrl-c d
151 (defun mazovia-to-iso8859-2 (start end)
153 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\"))
154 (global-set-key \"\\C-cr\" 'mazovia-to-iso8859-2) ; ctrl-c r
156 (defun prefixify-iso8859-2-region (start end)
158 (ogonek-prefixify-region start end \"iso8859-2\" ?/))
159 (global-set-key \"\\C-cp\" 'prefixify-iso8859-2-region) ; ctrl-c p
161 Ka/zd/a operacj/e przekodowania mo/zna w ca/lo/sci odwo/la/c
162 przez wykonanie polecenia `undo'.")
165 "Display `ogonek-informacja' in an auxiliary *ogonek-jak* buffer."
167 (set-buffer (get-buffer-create " *ogonek-jak*"))
168 (insert ogonek-informacja
)
169 (switch-to-buffer " *ogonek-jak*")
170 (goto-char (point-min)))
172 ;; ------ A Little Info in English --------
174 (defconst ogonek-information
175 " THE INTERACTIVE FUNCTIONS PROVIDED BY THE LIBRARY `ogonek'.
177 If you read this text then you are either looking at the library's
178 source text or you have called the `ogonek-how' command. In the
179 latter case you may remove this text using `M-x kill-buffer'.
181 The library provides functions for changing the encoding of Polish
182 diacritic characters, the ones with an `ogonek' below or above them.
183 The functions come in the following groups.
185 1. `ogonek-recode-region' and `ogonek-recode-buffer' to change
186 between one-character encodings, such as `iso-8859-2', `mazovia',
187 plain `ascii' or `TeX'. As the names suggest you may recode
188 either the entire current buffer or just a marked region
189 in it. You may use the functions interactively as commands.
190 Once you call a command you will be asked about the code
191 currently used in your text and the target encoding, the one
192 you want to get. The following example shows a non-interactive
193 use of the functions in a program. This also illustrates what
194 type of parameters the functions expect to be called with:
196 (ogonek-recode-region
197 (region-beginning) (region-end) from-code-name to-code-name)
198 (ogonek-recode-buffer from-code-name to-code-name)
200 2. `ogonek-prefixify-region' and `ogonek-prefixify-buffer' for
201 introducing prefix notation:
203 (ogonek-prefixify-region
204 (region-beginning) (region-end) from-code-name prefix-char)
205 (ogonek-prefixify-buffer from-code-name prefix-char)
207 3. `ogonek-deprefixify-region' and `ogonek-deprefixify-buffer' for
208 removing prefix notation:
210 (ogonek-deprefixify-region
211 (region-beginning) (region-end) prefix-char to-code-name)
212 (ogonek-prefixify-buffer prefix-char to-code-name)
214 The TAB character used in interactive mode makes `emacs'
215 display the list of encodings recognized by the library. The list
216 is stored in the constant `ogonek-name-encoding-alist'.
218 The `ogonek' functions refer to five variables in which the suggested
219 answers to dialogue questions are stored. The variables and their
222 ogonek-from-encoding iso8859-2
223 ogonek-to-encoding ascii
225 ogonek-prefix-from-encoding iso8859-2
226 ogonek-prefix-to-encoding iso8859-2
228 The above default values can be changed by placing appropriate settings
229 in the '~/.emacs' file:
231 (setq ogonek-prefix-char ?/)
232 (setq ogonek-prefix-to-encoding \"iso8859-2\")
234 Instead of loading the whole library `ogonek' it may be better to
235 autoload the needed functions, for example by placing in `~/.emacs':
237 (autoload 'ogonek-how \"ogonek\")
238 (autoload 'ogonek-recode-region \"ogonek\")
239 (autoload 'ogonek-prefixify-region \"ogonek\")
240 (autoload 'ogonek-deprefixify-region \"ogonek\")
242 The most frequent function calls can be abbreviated and assigned to
243 keyboard keys. Here are a few practical examples:
245 (defun deprefixify-iso8859-2-region (start end)
247 (ogonek-deprefixify-region start end ?/ \"iso8859-2\"))
248 (global-set-key \"\\C-cd\" 'deprefixify-iso8859-2-region) ; ctrl-c d
250 (defun mazovia-to-iso8859-2 (start end)
252 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\"))
253 (global-set-key \"\\C-cr\" 'mazovia-to-iso8859-2) ; ctrl-c r
255 (defun prefixify-iso8859-2-region (start end)
257 (ogonek-prefixify-region start end \"iso8859-2\" ?/))
258 (global-set-key \"\\C-cp\" 'prefixify-iso8859-2-region) ; ctrl-c p
260 Each recoding operation can be called off using the `undo' command.")
263 "Display `ogonek-information' in an auxiliary *recode-how* buffer."
265 (set-buffer (get-buffer-create " *ogonek-how*"))
266 (insert ogonek-information
)
267 (switch-to-buffer " *ogonek-how*")
268 (goto-char (point-min)))
270 ;; ---- Variables keeping the suggested answers to dialogue questions -----
271 (defvar ogonek-encoding-choices
273 (mapcar (lambda (x) (list 'const
(car x
)))
274 ogonek-name-encoding-alist
))
275 "List of ogonek encodings. Used only for customization.")
276 (defcustom ogonek-from-encoding
"iso8859-2"
277 "*Encoding in the source file of recoding."
278 :type ogonek-encoding-choices
280 (defcustom ogonek-to-encoding
"ascii"
281 "*Encoding in the target file of recoding."
282 :type ogonek-encoding-choices
284 (defcustom ogonek-prefix-char ?
/
285 "*Prefix character for prefix encodings."
288 (defcustom ogonek-prefix-from-encoding
"iso8859-2"
289 "*Encoding in the source file subject to prefixifation."
290 :type ogonek-encoding-choices
292 (defcustom ogonek-prefix-to-encoding
"iso8859-2"
293 "*Encoding in the target file subject to deprefixifation."
294 :type ogonek-encoding-choices
297 ;; ---- Auxiliary functions for reading parameters in interactive mode ----
299 (defun ogonek-read-encoding (prompt default-name-var
)
300 "Read encoding name with completion based on `ogonek-name-encoding-alist'.
301 Store the name in the parameter-variable DEFAULT-NAME-VAR.
302 PROMPT is a string to be shown when the user is asked for a name."
305 (format "%s (default %s): " prompt
(eval default-name-var
))
306 ogonek-name-encoding-alist nil t
)))
307 ;; change the default name to the one just read
308 (set default-name-var
309 (if (string= encoding
"") (eval default-name-var
) encoding
))
310 ;; return the new default as the name you read
311 (eval default-name-var
)))
313 (defun ogonek-read-prefix (prompt default-prefix-var
)
314 "Read a prefix character for prefix notation.
315 The result is stored in the variable DEFAULT-PREFIX-VAR.
316 PROMPT is a string to be shown when the user is asked for a new prefix."
319 (format "%s (default %s): " prompt
320 (char-to-string (eval default-prefix-var
))))))
321 (if (> (length prefix-string
) 1)
322 (error "! Only one character expected")
323 ;; set the default prefix character to the one just read
324 (set default-prefix-var
325 (if (string= prefix-string
"")
326 (eval default-prefix-var
)
327 (string-to-char prefix-string
)))
328 ;; the new default prefix is the function's result:
329 (eval default-prefix-var
))))
331 (defun ogonek-lookup-encoding (encoding)
332 "Pick up an association for ENCODING in `ogonek-name-encoding-alist'.
333 Before returning a result test whether the string ENCODING is in
334 the list `ogonek-name-encoding-alist'"
335 (let ((code-list (assoc encoding ogonek-name-encoding-alist
)))
337 (error "! Name `%s' not known in `ogonek-name-encoding-alist'"
341 ;; ---- An auxiliary function for zipping two lists of equal length ----
343 (defun ogonek-zip-lists (xs ys
)
344 "Build a list of pairs from lists XS and YS of the same length."
347 (setq pairs
(cons (cons (car xs
) (car ys
)) pairs
))
350 ;; `pairs' are the function's result
353 ;; ---- An auxiliary function building a one-to-one recoding table -----
355 (defun ogonek-build-table (recoding-pairs)
356 "Build a table required by Emacs's `translate-region' function.
357 RECODING-PAIRS is a list of character pairs for which recoding
359 By using the built-in `translate-region' function
360 we gain better performance compared to converting characters
361 by a hand-written routine as it is done for prefix encodings."
362 (let ((table (make-string 256 0))
367 ;; make changes in `table' according to `recoding-pairs'
368 (while recoding-pairs
369 (aset table
(car (car recoding-pairs
)) (cdr (car recoding-pairs
)))
370 (setq recoding-pairs
(cdr recoding-pairs
)))
371 ;; return the table just built
374 ;; ---- Commands for one-to-one recoding -------------------------------
376 (defun ogonek-recode-region (start end from-encoding to-encoding
)
377 "Recode text in a marked region in one-to-one manner.
378 When called interactively ask the user for the names of the FROM-
380 (interactive (progn (barf-if-buffer-read-only)
384 (ogonek-read-encoding "From code" 'ogonek-from-encoding
)
385 (ogonek-read-encoding "To code" 'ogonek-to-encoding
))))
391 (ogonek-lookup-encoding from-encoding
)
392 (ogonek-lookup-encoding to-encoding
))))))
394 (defun ogonek-recode-buffer (from-encoding to-encoding
)
395 "Call `ogonek-recode-region' on the entire buffer.
396 When called interactively ask the user for the names of the FROM-
398 (interactive (progn (barf-if-buffer-read-only)
400 (ogonek-read-encoding "From code" 'ogonek-from-encoding
)
401 (ogonek-read-encoding "To code" 'ogonek-to-encoding
))))
402 (ogonek-recode-region
403 (point-min) (point-max) from-encoding to-encoding
))
405 ;; ---- Recoding with prefix notation -------------------------------
407 (defconst ogonek-prefix-code
'(?A ?C ?E ?L ?N ?O ?S ?X ?Z
408 ?a ?c ?e ?l ?n ?o ?s ?x ?z
))
410 (defun ogonek-prefixify-region (start end from-encoding prefix-char
)
411 "In a region, replace FROM-encoded Polish characters with PREFIX pairs.
412 A PREFIX pair generated consists of PREFIX-CHAR and the respective
413 character listed in the `ogonek-prefix-code' constant.
414 PREFIX-CHAR itself gets doubled."
415 (interactive (progn (barf-if-buffer-read-only)
419 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding
)
420 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char
))))
422 ((from-code (ogonek-lookup-encoding from-encoding
))
423 (to-code ogonek-prefix-code
)
424 (recoding-pairs ; `ogonek-prefix-char' added for doubling
426 (cons prefix-char from-code
)
427 (cons prefix-char to-code
))))
430 (while (< (point) end
)
431 (let ((pair (assoc (following-char) recoding-pairs
)))
433 ;; not a Polish character -- skip it
435 ;; Polish character -- insert a prefix pair instead
437 (insert ogonek-prefix-char
)
439 ;; the region is now one character longer
440 (setq end
(1+ end
))))))))
442 (defun ogonek-prefixify-buffer (from-encoding prefix-char
)
443 "Call `ogonek-prefixify-region' on the entire buffer."
444 (interactive (progn (barf-if-buffer-read-only)
446 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding
)
447 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char
))))
448 (ogonek-prefixify-region
449 (point-min) (point-max) from-encoding prefix-char
))
451 (defun ogonek-deprefixify-region (start end prefix-char to-encoding
)
452 "In a region, replace PREFIX pairs with their corresponding TO-encodings.
453 PREFIX-CHAR followed by a Polish character from the `ogonek-prefix-code'
454 list is replaced with the corresponding TO-encoded character. A doubled
455 PREFIX-CHAR gets replaced with a single one. A combination of PREFIX-CHAR
456 followed by a non-Polish character, that is one not listed in the
457 `ogonek-prefix-code' constant, is left unchanged."
458 (interactive (progn (barf-if-buffer-read-only)
459 (list (region-beginning)
462 "Prefix character" 'ogonek-prefix-char
)
463 (ogonek-read-encoding
464 "To code" 'ogonek-prefix-to-encoding
))))
466 ((from-code ogonek-prefix-code
)
467 (to-code (ogonek-lookup-encoding to-encoding
))
470 (cons prefix-char from-code
)
471 (cons prefix-char to-code
))))
474 (while (< (point) end
)
476 (if (or (not (= (preceding-char) prefix-char
)) (= (point) end
))
477 ;; non-prefix character or the end-of-region -- do nothing
479 ;; now, we can check the next character
480 (let ((pair (assoc (following-char) recoding-pairs
)))
482 ;; `following-char' is not a Polish character nor it is
483 ;; `prefix-char' since the one is among `recoding-pairs'
485 ;; else prefix followed by a Polish character has been found
486 ;; replace it by the corresponding Polish character
490 ;; the region got shorter by one character
491 (setq end
(1- end
)))))))))
493 (defun ogonek-deprefixify-buffer (prefix-char to-encoding
)
494 "Call `ogonek-deprefixify-region' on the entire buffer."
495 (interactive (progn (barf-if-buffer-read-only)
497 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char
)
498 (ogonek-read-encoding "To code" 'ogonek-prefix-to-encoding
))))
499 (ogonek-deprefixify-region
500 (point-min) (point-max) prefix-char to-encoding
))
504 ;;; arch-tag: 672d7744-28ac-412b-965e-06a27e50d1d7
505 ;;; ogonek.el ends here