1 ;;; ogonek.el --- change the encoding of Polish diacritics
3 ;; Copyright (C) 1997-1998, 2001-2012 Free Software Foundation, Inc.
5 ;; Author: W{\l}odek Bzyl
7 ;; Maintainer: Ryszard Kubiak <rysiek@ipipan.gda.pl>
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
27 ;; To use this library load it using
28 ;; M-x load-library [enter] ogonek
29 ;; Then, you may get a short info by calling one of
30 ;; M-x ogonek-jak -- in Polish
31 ;; M-x ogonek-how -- in English "
36 "Change the encoding of Polish diacritic characters."
40 (defconst ogonek-name-encoding-alist
41 '(("ascii" .
(?A ?C ?E ?L ?N ?O ?S ?Z ?Z
42 ?a ?c ?e ?l ?n ?o ?s ?z ?z
))
43 ("iso8859-2" .
(161 198 202 163 209 211 166 172 175
44 177 230 234 179 241 243 182 188 191))
45 ("mazovia" .
(143 149 144 156 165 163 152 160 161
46 134 141 145 146 164 162 158 166 167))
47 ("windows-EE" .
(165 198 202 163 209 211 140 143 175
48 185 230 234 179 241 243 156 159 191))
49 ("windows-PL" .
(165 198 202 163 209 211 140 143 175
50 185 230 234 179 241 243 156 159 191))
51 ("latin-2" .
(164 143 168 157 227 224 151 141 189
52 165 134 169 136 228 162 152 171 190))
53 ("CP852" .
(164 143 168 157 227 224 151 141 189
54 165 134 169 136 228 162 152 171 190))
55 ("MeX" .
(129 130 134 138 139 211 145 153 155
56 161 162 166 170 171 243 177 185 187))
57 ("CorelDraw" .
(197 242 201 163 209 211 255 225 237
58 229 236 230 198 241 243 165 170 186))
59 ("Amiga" .
(194 202 203 206 207 211 212 218 219
60 226 234 235 238 239 243 244 250 251))
61 ("Mac" .
(132 140 162 252 193 238 229 143 251
62 136 141 171 184 196 151 230 144 253))
64 "The constant `ogonek-name-encoding-alist' is a list of (NAME.LIST) pairs.
65 Each LIST contains codes for 18 Polish diacritic characters. The codes
66 are given in the following order:
67 Aogonek Cacute Eogonek Lslash Nacute Oacute Sacute Zacute Zdotaccent
68 aogonek cacute eogonek lslash nacute oacute sacute zacute zdotaccent.")
70 ;; ------ A Little Info in Polish ---------------
72 (defconst ogonek-informacja
73 " FUNKCJE INTERAKCYJNE UDOST/EPNIANE PRZEZ BIBLIOTEK/E `ogonek'.
75 Je/sli czytasz ten tekst, to albo przegl/adasz plik /xr/od/lowy
76 biblioteki `ogonek.el', albo wywo/la/le/s polecenie `ogonek-jak'.
77 W drugim przypadku mo/zesz usun/a/c tekst z ekranu, stosuj/ac
78 polecenie `M-x kill-buffer'.
80 Niniejsza biblioteka dostarcza funkcji do zmiany kodowania polskich
81 znak/ow diakrytycznych. Funkcje te mo/zna pogrupowa/c nast/epuj/aco.
83 1. `ogonek-recode-region' oraz `ogonek-recode-buffer'
84 przekodowuj/a zaznaczony fragment wzgl/ednie ca/ly buffor.
85 Po wywo/laniu interakcyjnym funkcji zadawane s/a
86 pytania o parametry przekodowania: nazw/e kodowania
87 w tek/scie /xr/od/lowym i nazw/e kodowania docelowego.
88 Poni/zsze przyk/lady powinny wyja/sni/c, jakich parametr/ow
89 oczekuj/a wymienione funkcje:
91 (ogonek-recode-region (poczatek-fragmentu) (koniec-fragmentu)
92 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa)
94 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa)
96 2. `ogonek-prefixify-region' oraz `ogonek-prefixify-buffer'
97 s/lu/z/a do wprowadzania notacji prefiksowej.
99 (ogonek-prefixify-region (poczatek-fragmentu) (koniec-fragmentu)
100 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu)
101 (ogonek-prefixify-buffer
102 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu)
104 3. `ogonek-deprefixify-region' oraz `ogonek-deprefixify-buffer'
105 s/lu/z/a do usuwania notacji prefiksowej.
107 (ogonek-deprefixify-region (poczatek-fragmentu) (koniec-fragmentu)
108 znak-prefiksu nazwa-kodowania-docelowa)
109 (ogonek-prefixify-buffer
110 znak-prefiksu nazwa-kodowania-docelowa)
112 U/zycie klawisza TAB w trybie interakcyjnym powoduje wy/swietlenie
113 listy dopuszczalnych nazw kod/ow, pami/etanych w sta/lej
114 `ogonek-name-encoding-alist'.
116 Funkcje biblioteki odwo/luj/a si/e do pi/eciu zmiennych, kt/ore
117 przechowuj/a podpowiedzi do zadawanych pyta/n. Nazwy tych zmiennych
118 oraz ich warto/sci domy/slne s/a nast/epuj/ace:
120 ogonek-from-encoding iso8859-2
121 ogonek-to-encoding ascii
123 ogonek-prefix-from-encoding iso8859-2
124 ogonek-prefix-to-encoding iso8859-2
126 Powy/zsze warto/sci domy/slne mo/zna zmieni/c przez umieszczenie w pliku
127 konfiguracyjnym `~/.emacs' odpowiednich przypisa/n, na przyk/lad:
129 (setq ogonek-prefix-char ?/)
130 (setq ogonek-prefix-to-encoding \"iso8859-2\")
132 Zamiast wczytywania ca/lej biblioteki `ogonek.el' mo/zna w pliku
133 `~/.emacs' za/z/ada/c wczytania wybranych funkcji, na dodatek dopiero
134 w chwili ich rzeczywistego u/zycia:
136 (autoload 'ogonek-jak \"ogonek\")
137 (autoload 'ogonek-recode-region \"ogonek\")
138 (autoload 'ogonek-prefixify-region \"ogonek\")
139 (autoload 'ogonek-deprefixify-region \"ogonek\")
141 Cz/esto wyst/epuj/ace kombinacje wywo/la/n funkcji mo/zna dla wygody
142 skr/oci/c i przypisa/c klawiszom. Oto praktyczne przyk/lady:
144 (defun deprefixify-iso8859-2-region (start end)
146 (ogonek-deprefixify-region start end ?/ \"iso8859-2\"))
147 (global-set-key \"\\C-cd\" 'deprefixify-iso8859-2-region) ; ctrl-c d
149 (defun mazovia-to-iso8859-2 (start end)
151 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\"))
152 (global-set-key \"\\C-cr\" 'mazovia-to-iso8859-2) ; ctrl-c r
154 (defun prefixify-iso8859-2-region (start end)
156 (ogonek-prefixify-region start end \"iso8859-2\" ?/))
157 (global-set-key \"\\C-cp\" 'prefixify-iso8859-2-region) ; ctrl-c p
159 Ka/zd/a operacj/e przekodowania mo/zna w ca/lo/sci odwo/la/c
160 przez wykonanie polecenia `undo'.")
163 "Display `ogonek-informacja' in an auxiliary *ogonek-jak* buffer."
165 (set-buffer (get-buffer-create " *ogonek-jak*"))
166 (insert ogonek-informacja
)
167 (switch-to-buffer " *ogonek-jak*")
168 (goto-char (point-min)))
170 ;; ------ A Little Info in English --------
172 (defconst ogonek-information
173 " THE INTERACTIVE FUNCTIONS PROVIDED BY THE LIBRARY `ogonek'.
175 If you read this text then you are either looking at the library's
176 source text or you have called the `ogonek-how' command. In the
177 latter case you may remove this text using `M-x kill-buffer'.
179 The library provides functions for changing the encoding of Polish
180 diacritic characters, the ones with an `ogonek' below or above them.
181 The functions come in the following groups.
183 1. `ogonek-recode-region' and `ogonek-recode-buffer' to change
184 between one-character encodings, such as `iso-8859-2', `mazovia',
185 plain `ascii' or `TeX'. As the names suggest you may recode
186 either the entire current buffer or just a marked region
187 in it. You may use the functions interactively as commands.
188 Once you call a command you will be asked about the code
189 currently used in your text and the target encoding, the one
190 you want to get. The following example shows a non-interactive
191 use of the functions in a program. This also illustrates what
192 type of parameters the functions expect to be called with:
194 (ogonek-recode-region
195 (region-beginning) (region-end) from-code-name to-code-name)
196 (ogonek-recode-buffer from-code-name to-code-name)
198 2. `ogonek-prefixify-region' and `ogonek-prefixify-buffer' for
199 introducing prefix notation:
201 (ogonek-prefixify-region
202 (region-beginning) (region-end) from-code-name prefix-char)
203 (ogonek-prefixify-buffer from-code-name prefix-char)
205 3. `ogonek-deprefixify-region' and `ogonek-deprefixify-buffer' for
206 removing prefix notation:
208 (ogonek-deprefixify-region
209 (region-beginning) (region-end) prefix-char to-code-name)
210 (ogonek-prefixify-buffer prefix-char to-code-name)
212 The TAB character used in interactive mode makes `emacs'
213 display the list of encodings recognized by the library. The list
214 is stored in the constant `ogonek-name-encoding-alist'.
216 The `ogonek' functions refer to five variables in which the suggested
217 answers to dialogue questions are stored. The variables and their
220 ogonek-from-encoding iso8859-2
221 ogonek-to-encoding ascii
223 ogonek-prefix-from-encoding iso8859-2
224 ogonek-prefix-to-encoding iso8859-2
226 The above default values can be changed by placing appropriate settings
227 in the '~/.emacs' file:
229 (setq ogonek-prefix-char ?/)
230 (setq ogonek-prefix-to-encoding \"iso8859-2\")
232 Instead of loading the whole library `ogonek' it may be better to
233 autoload the needed functions, for example by placing in `~/.emacs':
235 (autoload 'ogonek-how \"ogonek\")
236 (autoload 'ogonek-recode-region \"ogonek\")
237 (autoload 'ogonek-prefixify-region \"ogonek\")
238 (autoload 'ogonek-deprefixify-region \"ogonek\")
240 The most frequent function calls can be abbreviated and assigned to
241 keyboard keys. Here are a few practical examples:
243 (defun deprefixify-iso8859-2-region (start end)
245 (ogonek-deprefixify-region start end ?/ \"iso8859-2\"))
246 (global-set-key \"\\C-cd\" 'deprefixify-iso8859-2-region) ; ctrl-c d
248 (defun mazovia-to-iso8859-2 (start end)
250 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\"))
251 (global-set-key \"\\C-cr\" 'mazovia-to-iso8859-2) ; ctrl-c r
253 (defun prefixify-iso8859-2-region (start end)
255 (ogonek-prefixify-region start end \"iso8859-2\" ?/))
256 (global-set-key \"\\C-cp\" 'prefixify-iso8859-2-region) ; ctrl-c p
258 Each recoding operation can be called off using the `undo' command.")
261 "Display `ogonek-information' in an auxiliary *recode-how* buffer."
263 (set-buffer (get-buffer-create " *ogonek-how*"))
264 (insert ogonek-information
)
265 (switch-to-buffer " *ogonek-how*")
266 (goto-char (point-min)))
268 ;; ---- Variables keeping the suggested answers to dialogue questions -----
269 (defvar ogonek-encoding-choices
271 (mapcar (lambda (x) (list 'const
(car x
)))
272 ogonek-name-encoding-alist
))
273 "List of ogonek encodings. Used only for customization.")
274 (defcustom ogonek-from-encoding
"iso8859-2"
275 "Encoding in the source file of recoding."
276 :type ogonek-encoding-choices
278 (defcustom ogonek-to-encoding
"ascii"
279 "Encoding in the target file of recoding."
280 :type ogonek-encoding-choices
282 (defcustom ogonek-prefix-char ?
/
283 "Prefix character for prefix encodings."
286 (defcustom ogonek-prefix-from-encoding
"iso8859-2"
287 "Encoding in the source file subject to prefixation."
288 :type ogonek-encoding-choices
290 (defcustom ogonek-prefix-to-encoding
"iso8859-2"
291 "Encoding in the target file subject to deprefixation."
292 :type ogonek-encoding-choices
295 ;; ---- Auxiliary functions for reading parameters in interactive mode ----
297 (defun ogonek-read-encoding (prompt default-name-var
)
298 "Read encoding name with completion based on `ogonek-name-encoding-alist'.
299 Store the name in the parameter-variable DEFAULT-NAME-VAR.
300 PROMPT is a string to be shown when the user is asked for a name."
303 (format "%s (default %s): " prompt
(eval default-name-var
))
304 ogonek-name-encoding-alist nil t
)))
305 ;; change the default name to the one just read
306 (set default-name-var
307 (if (string= encoding
"") (eval default-name-var
) encoding
))
308 ;; return the new default as the name you read
309 (eval default-name-var
)))
311 (defun ogonek-read-prefix (prompt default-prefix-var
)
312 "Read a prefix character for prefix notation.
313 The result is stored in the variable DEFAULT-PREFIX-VAR.
314 PROMPT is a string to be shown when the user is asked for a new prefix."
317 (format "%s (default %s): " prompt
318 (char-to-string (eval default-prefix-var
))))))
319 (if (> (length prefix-string
) 1)
320 (error "! Only one character expected")
321 ;; set the default prefix character to the one just read
322 (set default-prefix-var
323 (if (string= prefix-string
"")
324 (eval default-prefix-var
)
325 (string-to-char prefix-string
)))
326 ;; the new default prefix is the function's result:
327 (eval default-prefix-var
))))
329 (defun ogonek-lookup-encoding (encoding)
330 "Pick up an association for ENCODING in `ogonek-name-encoding-alist'.
331 Before returning a result test whether the string ENCODING is in
332 the list `ogonek-name-encoding-alist'"
333 (let ((code-list (assoc encoding ogonek-name-encoding-alist
)))
335 (error "! Name `%s' not known in `ogonek-name-encoding-alist'"
339 ;; ---- An auxiliary function for zipping two lists of equal length ----
341 (defun ogonek-zip-lists (xs ys
)
342 "Build a list of pairs from lists XS and YS of the same length."
345 (setq pairs
(cons (cons (car xs
) (car ys
)) pairs
))
348 ;; `pairs' are the function's result
351 ;; ---- An auxiliary function building a one-to-one recoding table -----
353 (defun ogonek-build-table (recoding-pairs)
354 "Build a table required by Emacs's `translate-region' function.
355 RECODING-PAIRS is a list of character pairs for which recoding
357 By using the built-in `translate-region' function
358 we gain better performance compared to converting characters
359 by a hand-written routine as it is done for prefix encodings."
360 (let ((table (make-string 256 0))
365 ;; make changes in `table' according to `recoding-pairs'
366 (while recoding-pairs
367 (aset table
(car (car recoding-pairs
)) (cdr (car recoding-pairs
)))
368 (setq recoding-pairs
(cdr recoding-pairs
)))
369 ;; return the table just built
372 ;; ---- Commands for one-to-one recoding -------------------------------
374 (defun ogonek-recode-region (start end from-encoding to-encoding
)
375 "Recode text in a marked region in one-to-one manner.
376 When called interactively ask the user for the names of the FROM-
378 (interactive (progn (barf-if-buffer-read-only)
382 (ogonek-read-encoding "From code" 'ogonek-from-encoding
)
383 (ogonek-read-encoding "To code" 'ogonek-to-encoding
))))
389 (ogonek-lookup-encoding from-encoding
)
390 (ogonek-lookup-encoding to-encoding
))))))
392 (defun ogonek-recode-buffer (from-encoding to-encoding
)
393 "Call `ogonek-recode-region' on the entire buffer.
394 When called interactively ask the user for the names of the FROM-
396 (interactive (progn (barf-if-buffer-read-only)
398 (ogonek-read-encoding "From code" 'ogonek-from-encoding
)
399 (ogonek-read-encoding "To code" 'ogonek-to-encoding
))))
400 (ogonek-recode-region
401 (point-min) (point-max) from-encoding to-encoding
))
403 ;; ---- Recoding with prefix notation -------------------------------
405 (defconst ogonek-prefix-code
'(?A ?C ?E ?L ?N ?O ?S ?X ?Z
406 ?a ?c ?e ?l ?n ?o ?s ?x ?z
))
408 (defun ogonek-prefixify-region (start end from-encoding prefix-char
)
409 "In a region, replace FROM-encoded Polish characters with PREFIX pairs.
410 A PREFIX pair generated consists of PREFIX-CHAR and the respective
411 character listed in the `ogonek-prefix-code' constant.
412 PREFIX-CHAR itself gets doubled."
413 (interactive (progn (barf-if-buffer-read-only)
417 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding
)
418 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char
))))
420 ((from-code (ogonek-lookup-encoding from-encoding
))
421 (to-code ogonek-prefix-code
)
422 (recoding-pairs ; `ogonek-prefix-char' added for doubling
424 (cons prefix-char from-code
)
425 (cons prefix-char to-code
))))
428 (while (< (point) end
)
429 (let ((pair (assoc (following-char) recoding-pairs
)))
431 ;; not a Polish character -- skip it
433 ;; Polish character -- insert a prefix pair instead
435 (insert ogonek-prefix-char
)
437 ;; the region is now one character longer
438 (setq end
(1+ end
))))))))
440 (defun ogonek-prefixify-buffer (from-encoding prefix-char
)
441 "Call `ogonek-prefixify-region' on the entire buffer."
442 (interactive (progn (barf-if-buffer-read-only)
444 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding
)
445 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char
))))
446 (ogonek-prefixify-region
447 (point-min) (point-max) from-encoding prefix-char
))
449 (defun ogonek-deprefixify-region (start end prefix-char to-encoding
)
450 "In a region, replace PREFIX pairs with their corresponding TO-encodings.
451 PREFIX-CHAR followed by a Polish character from the `ogonek-prefix-code'
452 list is replaced with the corresponding TO-encoded character. A doubled
453 PREFIX-CHAR gets replaced with a single one. A combination of PREFIX-CHAR
454 followed by a non-Polish character, that is one not listed in the
455 `ogonek-prefix-code' constant, is left unchanged."
456 (interactive (progn (barf-if-buffer-read-only)
457 (list (region-beginning)
460 "Prefix character" 'ogonek-prefix-char
)
461 (ogonek-read-encoding
462 "To code" 'ogonek-prefix-to-encoding
))))
464 ((from-code ogonek-prefix-code
)
465 (to-code (ogonek-lookup-encoding to-encoding
))
468 (cons prefix-char from-code
)
469 (cons prefix-char to-code
))))
472 (while (< (point) end
)
474 (if (or (not (= (preceding-char) prefix-char
)) (= (point) end
))
475 ;; non-prefix character or the end-of-region -- do nothing
477 ;; now, we can check the next character
478 (let ((pair (assoc (following-char) recoding-pairs
)))
480 ;; `following-char' is not a Polish character nor it is
481 ;; `prefix-char' since the one is among `recoding-pairs'
483 ;; else prefix followed by a Polish character has been found
484 ;; replace it by the corresponding Polish character
488 ;; the region got shorter by one character
489 (setq end
(1- end
)))))))))
491 (defun ogonek-deprefixify-buffer (prefix-char to-encoding
)
492 "Call `ogonek-deprefixify-region' on the entire buffer."
493 (interactive (progn (barf-if-buffer-read-only)
495 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char
)
496 (ogonek-read-encoding "To code" 'ogonek-prefix-to-encoding
))))
497 (ogonek-deprefixify-region
498 (point-min) (point-max) prefix-char to-encoding
))
502 ;;; ogonek.el ends here