1 ;;; ogonek.el --- change the encoding of Polish diacritics
3 ;; Copyright (C) 1997 Free Software Foundation, Inc.
5 ;; Author: W{\l}odek Bzyl, Ryszard Kubiak
6 ;; Maintainer: rysiek@ipipan.gda.pl (Ryszard Kubiak)
9 ;; This file is part of GNU Emacs.
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
28 ;; To use this library load it using
29 ;; M-x load-library [enter] ogonek
30 ;; Then, you may get a short info by calling one of
31 ;; M-x ogonek-jak -- in Polish
32 ;; M-x ogonek-how -- in English "
37 "Change the encoding of Polish diacritic characters."
41 (defconst ogonek-name-encoding-alist
42 '(("ascii" .
(?A ?C ?E ?L ?N ?O ?S ?Z ?Z
43 ?a ?c ?e ?l ?n ?o ?s ?z ?z
))
44 ("iso8859-2" .
(161 198 202 163 209 211 166 172 175
45 177 230 234 179 241 243 182 188 191))
46 ("mazovia" .
(143 149 144 156 165 163 152 160 161
47 134 141 145 146 164 162 158 166 167))
48 ("windows-EE" .
(165 198 202 163 209 211 140 143 175
49 185 230 234 179 241 243 156 159 191))
50 ("windows-PL" .
(165 198 202 163 209 211 140 143 175
51 185 230 234 179 241 243 156 159 191))
52 ("latin-2" .
(164 143 168 157 227 224 151 141 189
53 165 134 169 136 228 162 152 171 190))
54 ("CP852" .
(164 143 168 157 227 224 151 141 189
55 165 134 169 136 228 162 152 171 190))
56 ("MeX" .
(129 130 134 138 139 211 145 153 155
57 161 162 166 170 171 243 177 185 187))
58 ("CorelDraw" .
(197 242 201 163 209 211 255 225 237
59 229 236 230 198 241 243 165 170 186))
60 ("Amiga" .
(194 202 203 206 207 211 212 218 219
61 226 234 235 238 239 243 244 250 251))
62 ("Mac" .
(132 140 162 252 193 238 229 143 251
63 136 141 171 184 196 151 230 144 253))
65 "The constant `ogonek-name-encoding-alist' is a list of (NAME.LIST) pairs.
66 Each LIST contains codes for 18 Polish diacritic characters. The codes
67 are given in the following order:
68 Aogonek Cacute Eogonek Lslash Nacute Oacute Sacute Zacute Zdotaccent
69 aogonek cacute eogonek lslash nacute oacute sacute zacute zdotaccent.")
71 ;; ------ A Little Info in Polish ---------------
73 (defconst ogonek-informacja
74 " FUNKCJE INTERAKCYJNE UDOST/EPNIANE PRZEZ BIBLIOTEK/E `ogonek'.
76 Je/sli czytasz ten tekst, to albo przegl/adasz plik /xr/od/lowy
77 biblioteki `ogonek.el', albo wywo/la/le/s polecenie `ogonek-jak'.
78 W drugim przypadku mo/zesz usun/a/c tekst z ekranu, stosuj/ac
79 polecenie `M-x kill-buffer'.
81 Niniejsza biblioteka dostarcza funkcji do zmiany kodowania polskich
82 znak/ow diakrytycznych. Funkcje te mo/zna pogrupowa/c nast/epuj/aco.
84 1. `ogonek-recode-region' oraz `ogonek-recode-buffer'
85 przekodowuj/a zaznaczony fragment wzgl/ednie ca/ly buffor.
86 Po wywo/laniu interakcyjnym funkcji zadawane s/a
87 pytania o parametry przekodowania: nazw/e kodowania
88 w tek/scie /xr/od/lowym i nazw/e kodowania docelowego.
89 Poni/zsze przyk/lady powinny wyja/sni/c, jakich parametr/ow
90 oczekuj/a wymienione funkcje:
92 (ogonek-recode-region (poczatek-fragmentu) (koniec-fragmentu)
93 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa)
95 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa)
97 2. `ogonek-prefixify-region' oraz `ogonek-prefixify-buffer'
98 s/lu/z/a do wprowadzania notacji prefiksowej.
100 (ogonek-prefixify-region (poczatek-fragmentu) (koniec-fragmentu)
101 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu)
102 (ogonek-prefixify-buffer
103 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu)
105 3. `ogonek-deprefixify-region' oraz `ogonek-deprefixify-buffer'
106 s/lu/z/a do usuwania notacji prefiksowej.
108 (ogonek-deprefixify-region (poczatek-fragmentu) (koniec-fragmentu)
109 znak-prefiksu nazwa-kodowania-docelowa)
110 (ogonek-prefixify-buffer
111 znak-prefiksu nazwa-kodowania-docelowa)
113 U/zycie klawisza TAB w trybie interakcyjnym powoduje wy/swietlenie
114 listy dopuszczalnych nazw kod/ow, pami/etanych w sta/lej
115 `ogonek-name-encoding-alist'.
117 Funkcje biblioteki odwo/luj/a si/e do pi/eciu zmiennych, kt/ore
118 przechowuj/a podpowiedzi do zadawanych pyta/n. Nazwy tych zmiennych
119 oraz ich warto/sci domy/slne s/a nast/epuj/ace:
121 ogonek-from-encoding iso8859-2
122 ogonek-to-encoding ascii
124 ogonek-prefix-from-encoding iso8859-2
125 ogonek-prefix-to-encoding iso8859-2
127 Powy/zsze warto/sci domy/slne mo/zna zmieni/c przez umieszczenie w pliku
128 konfiguracyjnym `~/.emacs' odpowiednich przypisa/n, na przyk/lad:
130 (setq ogonek-prefix-char ?/)
131 (setq ogonek-prefix-to-encoding \"iso8859-2\")
133 Zamiast wczytywania ca/lej biblioteki `ogonek.el' mo/zna w pliku
134 `~/.emacs' za/z/ada/c wczytania wybranych funkcji, na dodatek dopiero
135 w chwili ich rzeczywistego u/zycia:
137 (autoload 'ogonek-jak \"ogonek\")
138 (autoload 'ogonek-recode-region \"ogonek\")
139 (autoload 'ogonek-prefixify-region \"ogonek\")
140 (autoload 'ogonek-deprefixify-region \"ogonek\")
142 Cz/esto wyst/epuj/ace kombinacje wywo/la/n funkcji mo/zna dla wygody
143 skr/oci/c i przypisa/c klawiszom. Oto praktyczne przyk/lady:
145 (defun deprefixify-iso8859-2-region (start end)
147 (ogonek-deprefixify-region start end ?/ \"iso8859-2\"))
148 (global-set-key \"\\C-cd\" 'deprefixify-iso8859-2-region) ; ctrl-c d
150 (defun mazovia-to-iso8859-2 (start end)
152 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\"))
153 (global-set-key \"\\C-cr\" 'mazovia-to-iso8859-2) ; ctrl-c r
155 (defun prefixify-iso8859-2-region (start end)
157 (ogonek-prefixify-region start end \"iso8859-2\" ?/))
158 (global-set-key \"\\C-cp\" 'prefixify-iso8859-2-region) ; ctrl-c p
160 Ka/zd/a operacj/e przekodowania mo/zna w ca/lo/sci odwo/la/c
161 przez wykonanie polecenia `undo'.")
164 "Display `ogonek-informacja' in an auxiliary *ogonek-jak* buffer."
166 (set-buffer (get-buffer-create " *ogonek-jak*"))
167 (insert ogonek-informacja
)
168 (switch-to-buffer " *ogonek-jak*")
169 (beginning-of-buffer))
171 ;; ------ A Little Info in English --------
173 (defconst ogonek-information
174 " THE INTERACTIVE FUNCTIONS PROVIDED BY THE LIBRARY `ogonek'.
176 If you read this text then you are either looking at the library's
177 source text or you have called the `ogonek-how' command. In the
178 latter case you may remove this text using `M-x kill-buffer'.
180 The library provides functions for changing the encoding of Polish
181 diacritic characters, the ones with an `ogonek' below or above them.
182 The functions come in the following groups.
184 1. `ogonek-recode-region' and `ogonek-recode-buffer' to change
185 between one-character encodings, such as `iso-8859-2', `mazovia',
186 plain `ascii' or `TeX'. As the names suggest you may recode
187 either the entire current buffer or just a marked region
188 in it. You may use the functions interactively as commands.
189 Once you call a command you will be asked about the code
190 currently used in your text and the target encoding, the one
191 you want to get. The following example shows a non-interactive
192 use of the functions in a program. This also illustrates what
193 type of parameters the functions expect to be called with:
195 (ogonek-recode-region
196 (region-beginning) (region-end) from-code-name to-code-name)
197 (ogonek-recode-buffer from-code-name to-code-name)
199 2. `ogonek-prefixify-region' and `ogonek-prefixify-buffer' for
200 introducing prefix notation:
202 (ogonek-prefixify-region
203 (region-beginning) (region-end) from-code-name prefix-char)
204 (ogonek-prefixify-buffer from-code-name prefix-char)
206 3. `ogonek-deprefixify-region' and `ogonek-deprefixify-buffer' for
207 removing prefix notation:
209 (ogonek-deprefixify-region
210 (region-beginning) (region-end) prefix-char to-code-name)
211 (ogonek-prefixify-buffer prefix-char to-code-name)
213 The TAB character used in interactive mode makes `emacs'
214 display the list of encodings recognized by the library. The list
215 is stored in the constant `ogonek-name-encoding-alist'.
217 The `ogonek' functions refer to five variables in which the suggested
218 answers to dialogue questions are stored. The variables and their
221 ogonek-from-encoding iso8859-2
222 ogonek-to-encoding ascii
224 ogonek-prefix-from-encoding iso8859-2
225 ogonek-prefix-to-encoding iso8859-2
227 The above default values can be changed by placing appropriate settings
228 in the '~/.emacs' file:
230 (setq ogonek-prefix-char ?/)
231 (setq ogonek-prefix-to-encoding \"iso8859-2\")
233 Instead of loading the whole library `ogonek' it may be better to
234 autoload the needed functions, for example by placing in `~/.emacs':
236 (autoload 'ogonek-how \"ogonek\")
237 (autoload 'ogonek-recode-region \"ogonek\")
238 (autoload 'ogonek-prefixify-region \"ogonek\")
239 (autoload 'ogonek-deprefixify-region \"ogonek\")
241 The most frequent function calls can be abbreviated and assigned to
242 keyboard keys. Here are a few practical examples:
244 (defun deprefixify-iso8859-2-region (start end)
246 (ogonek-deprefixify-region start end ?/ \"iso8859-2\"))
247 (global-set-key \"\\C-cd\" 'deprefixify-iso8859-2-region) ; ctrl-c d
249 (defun mazovia-to-iso8859-2 (start end)
251 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\"))
252 (global-set-key \"\\C-cr\" 'mazovia-to-iso8859-2) ; ctrl-c r
254 (defun prefixify-iso8859-2-region (start end)
256 (ogonek-prefixify-region start end \"iso8859-2\" ?/))
257 (global-set-key \"\\C-cp\" 'prefixify-iso8859-2-region) ; ctrl-c p
259 Each recoding operation can be called off using the `undo' command.")
262 "Display `ogonek-information' in an auxiliary *recode-how* buffer."
264 (set-buffer (get-buffer-create " *ogonek-how*"))
265 (insert ogonek-information
)
266 (switch-to-buffer " *ogonek-how*")
267 (beginning-of-buffer))
269 ;; ---- Variables keeping the suggested answers to dialogue questions -----
270 (defvar ogonek-encoding-choices
272 (mapcar (lambda (x) (list 'const
(car x
)))
273 ogonek-name-encoding-alist
))
274 "List of ogonek encodings. Used only for customization.")
275 (defcustom ogonek-from-encoding
"iso8859-2"
276 "*Encoding in the source file of recoding."
277 :type ogonek-encoding-choices
279 (defcustom ogonek-to-encoding
"ascii"
280 "*Encoding in the target file of recoding."
281 :type ogonek-encoding-choices
283 (defcustom ogonek-prefix-char ?
/
284 "*Prefix character for prefix encodings."
287 (defcustom ogonek-prefix-from-encoding
"iso8859-2"
288 "*Encoding in the source file subject to prefixifation."
289 :type ogonek-encoding-choices
291 (defcustom ogonek-prefix-to-encoding
"iso8859-2"
292 "*Encoding in the target file subject to deprefixifation."
293 :type ogonek-encoding-choices
296 ;; ---- Auxiliary functions for reading parameters in interactive mode ----
298 (defun ogonek-read-encoding (prompt default-name-var
)
299 "Read encoding name with completion based on `ogonek-name-encoding-alist'.
300 Store the name in the parameter-variable DEFAULT-NAME-VAR.
301 PROMPT is a string to be shown when the user is asked for a name."
304 (format "%s (default %s): " prompt
(eval default-name-var
))
305 ogonek-name-encoding-alist nil t
)))
306 ;; change the default name to the one just read
307 (set default-name-var
308 (if (string= encoding
"") (eval default-name-var
) encoding
))
309 ;; return the new default as the name you read
310 (eval default-name-var
)))
312 (defun ogonek-read-prefix (prompt default-prefix-var
)
313 "Read a prefix character for prefix notation.
314 The result is stored in the variable DEFAULT-PREFIX-VAR.
315 PROMPT is a string to be shown when the user is asked for a new prefix."
318 (format "%s (default %s): " prompt
319 (char-to-string (eval default-prefix-var
))))))
320 (if (> (length prefix-string
) 1)
321 (error "! Only one character expected")
322 ;; set the default prefix character to the one just read
323 (set default-prefix-var
324 (if (string= prefix-string
"")
325 (eval default-prefix-var
)
326 (string-to-char prefix-string
)))
327 ;; the new default prefix is the function's result:
328 (eval default-prefix-var
))))
330 (defun ogonek-lookup-encoding (encoding)
331 "Pick up an association for ENCODING in `ogonek-name-encoding-alist'.
332 Before returning a result test whether the string ENCODING is in
333 the list `ogonek-name-encoding-alist'"
334 (let ((code-list (assoc encoding ogonek-name-encoding-alist
)))
336 (error "! Name `%s' not known in `ogonek-name-encoding-alist'"
340 ;; ---- An auxiliary function for zipping two lists of equal length ----
342 (defun ogonek-zip-lists (xs ys
)
343 "Build a list of pairs from lists XS and YS of the same length."
346 (setq pairs
(cons (cons (car xs
) (car ys
)) pairs
))
349 ;; `pairs' are the function's result
352 ;; ---- An auxiliary function building a one-to-one recoding table -----
354 (defun ogonek-build-table (recoding-pairs)
355 "Build a table required by Emacs's `translate-region' function.
356 RECODING-PAIRS is a list of character pairs for which recoding
358 By using the built-in `translate-region' function
359 we gain better performance compared to converting characters
360 by a hand-written routine as it is done for prefix encodings."
361 (let ((table (make-string 256 0))
366 ;; make changes in `table' according to `recoding-pairs'
367 (while recoding-pairs
368 (aset table
(car (car recoding-pairs
)) (cdr (car recoding-pairs
)))
369 (setq recoding-pairs
(cdr recoding-pairs
)))
370 ;; return the table just built
373 ;; ---- Commands for one-to-one recoding -------------------------------
375 (defun ogonek-recode-region (start end from-encoding to-encoding
)
376 "Recode text in a marked region in one-to-one manner.
377 When called interactively ask the user for the names of the FROM-
379 (interactive (progn (barf-if-buffer-read-only)
383 (ogonek-read-encoding "From code" 'ogonek-from-encoding
)
384 (ogonek-read-encoding "To code" 'ogonek-to-encoding
))))
390 (ogonek-lookup-encoding from-encoding
)
391 (ogonek-lookup-encoding to-encoding
))))))
393 (defun ogonek-recode-buffer (from-encoding to-encoding
)
394 "Call `ogonek-recode-region' on the entire buffer.
395 When called interactively ask the user for the names of the FROM-
397 (interactive (progn (barf-if-buffer-read-only)
399 (ogonek-read-encoding "From code" 'ogonek-from-encoding
)
400 (ogonek-read-encoding "To code" 'ogonek-to-encoding
))))
401 (ogonek-recode-region
402 (point-min) (point-max) from-encoding to-encoding
))
404 ;; ---- Recoding with prefix notation -------------------------------
406 (defconst ogonek-prefix-code
'(?A ?C ?E ?L ?N ?O ?S ?X ?Z
407 ?a ?c ?e ?l ?n ?o ?s ?x ?z
))
409 (defun ogonek-prefixify-region (start end from-encoding prefix-char
)
410 "In a region, replace FROM-encoded Polish characters with PREFIX pairs.
411 A PREFIX pair generated consists of PREFIX-CHAR and the respective
412 character listed in the `ogonek-prefix-code' constant.
413 PREFIX-CHAR itself gets doubled."
414 (interactive (progn (barf-if-buffer-read-only)
418 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding
)
419 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char
))))
421 ((from-code (ogonek-lookup-encoding from-encoding
))
422 (to-code ogonek-prefix-code
)
423 (recoding-pairs ; `ogonek-prefix-char' added for doubling
425 (cons prefix-char from-code
)
426 (cons prefix-char to-code
))))
429 (while (< (point) end
)
430 (let ((pair (assoc (following-char) recoding-pairs
)))
432 ;; not a Polish character -- skip it
434 ;; Polish character -- insert a prefix pair instead
436 (insert ogonek-prefix-char
)
438 ;; the region is now one character longer
439 (setq end
(1+ end
))))))))
441 (defun ogonek-prefixify-buffer (from-encoding prefix-char
)
442 "Call `ogonek-prefixify-region' on the entire buffer."
443 (interactive (progn (barf-if-buffer-read-only)
445 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding
)
446 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char
))))
447 (ogonek-prefixify-region
448 (point-min) (point-max) from-encoding prefix-char
))
450 (defun ogonek-deprefixify-region (start end prefix-char to-encoding
)
451 "In a region, replace PREFIX pairs with their corresponding TO-encodings.
452 PREFIX-CHAR followed by a Polish character from the `ogonek-prefix-code'
453 list is replaced with the corresponding TO-encoded character. A doubled
454 PREFIX-CHAR gets replaced with a single one. A combination of PREFIX-CHAR
455 followed by a non-Polish character, that is one not listed in the
456 `ogonek-prefix-code' constant, is left unchanged."
457 (interactive (progn (barf-if-buffer-read-only)
458 (list (region-beginning)
461 "Prefix character" 'ogonek-prefix-char
)
462 (ogonek-read-encoding
463 "To code" 'ogonek-prefix-to-encoding
))))
465 ((from-code ogonek-prefix-code
)
466 (to-code (ogonek-lookup-encoding to-encoding
))
469 (cons prefix-char from-code
)
470 (cons prefix-char to-code
))))
473 (while (< (point) end
)
475 (if (or (not (= (preceding-char) prefix-char
)) (= (point) end
))
476 ;; non-prefix character or the end-of-region -- do nothing
478 ;; now, we can check the next character
479 (let ((pair (assoc (following-char) recoding-pairs
)))
481 ;; `following-char' is not a Polish character nor it is
482 ;; `prefix-char' since the one is among `recoding-pairs'
484 ;; else prefix followed by a Polish character has been found
485 ;; replace it by the corresponding Polish character
489 ;; the region got shorter by one character
490 (setq end
(1- end
)))))))))
492 (defun ogonek-deprefixify-buffer (prefix-char to-encoding
)
493 "Call `ogonek-deprefixify-region' on the entire buffer."
494 (interactive (progn (barf-if-buffer-read-only)
496 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char
)
497 (ogonek-read-encoding "To code" 'ogonek-prefix-to-encoding
))))
498 (ogonek-deprefixify-region
499 (point-min) (point-max) prefix-char to-encoding
))
503 ;;; ogonek.el ends here