1 ;;; ogonek.el --- change the encoding of Polish diacritics
3 ;; Copyright (C) 1997-1998, 2001-2016 Free Software Foundation, Inc.
7 ;; Maintainer: Ryszard Kubiak <rysiek@ipipan.gda.pl>
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
27 ;; To use this library load it using
28 ;; M-x load-library [enter] ogonek
29 ;; Then, you may get a short info by calling one of
30 ;; M-x ogonek-jak -- in Polish
31 ;; M-x ogonek-how -- in English "
36 "Change the encoding of Polish diacritic characters."
40 (defconst ogonek-name-encoding-alist
41 '(("ascii" .
(?A ?C ?E ?L ?N ?O ?S ?Z ?Z
42 ?a ?c ?e ?l ?n ?o ?s ?z ?z
))
43 ("iso8859-2" .
(161 198 202 163 209 211 166 172 175
44 177 230 234 179 241 243 182 188 191))
45 ("mazovia" .
(143 149 144 156 165 163 152 160 161
46 134 141 145 146 164 162 158 166 167))
47 ("windows-EE" .
(165 198 202 163 209 211 140 143 175
48 185 230 234 179 241 243 156 159 191))
49 ("windows-PL" .
(165 198 202 163 209 211 140 143 175
50 185 230 234 179 241 243 156 159 191))
51 ("latin-2" .
(164 143 168 157 227 224 151 141 189
52 165 134 169 136 228 162 152 171 190))
53 ("CP852" .
(164 143 168 157 227 224 151 141 189
54 165 134 169 136 228 162 152 171 190))
55 ("MeX" .
(129 130 134 138 139 211 145 153 155
56 161 162 166 170 171 243 177 185 187))
57 ("CorelDraw" .
(197 242 201 163 209 211 255 225 237
58 229 236 230 198 241 243 165 170 186))
59 ("Amiga" .
(194 202 203 206 207 211 212 218 219
60 226 234 235 238 239 243 244 250 251))
61 ("Mac" .
(132 140 162 252 193 238 229 143 251
62 136 141 171 184 196 151 230 144 253))
64 "The constant `ogonek-name-encoding-alist' is a list of (NAME.LIST) pairs.
65 Each LIST contains codes for 18 Polish diacritic characters. The codes
66 are given in the following order:
67 Aogonek Cacute Eogonek Lslash Nacute Oacute Sacute Zacute Zdotaccent
68 aogonek cacute eogonek lslash nacute oacute sacute zacute zdotaccent.")
70 ;; ------ A Little Info in Polish ---------------
72 (defconst ogonek-informacja
73 " FUNKCJE INTERAKCYJNE UDOST/EPNIANE PRZEZ BIBLIOTEK/E `ogonek'.
75 Je/sli czytasz ten tekst, to albo przegl/adasz plik /xr/od/lowy
76 biblioteki `ogonek.el', albo wywo/la/le/s polecenie `ogonek-jak'.
77 W drugim przypadku mo/zesz usun/a/c tekst z ekranu, stosuj/ac
78 polecenie `\\[kill-buffer]'.
80 Niniejsza biblioteka dostarcza funkcji do zmiany kodowania polskich
81 znak/ow diakrytycznych. Funkcje te mo/zna pogrupowa/c nast/epuj/aco.
83 1. `ogonek-recode-region' oraz `ogonek-recode-buffer'
84 przekodowuj/a zaznaczony fragment wzgl/ednie ca/ly buffor.
85 Po wywo/laniu interakcyjnym funkcji zadawane s/a
86 pytania o parametry przekodowania: nazw/e kodowania
87 w tek/scie /xr/od/lowym i nazw/e kodowania docelowego.
88 Poni/zsze przyk/lady powinny wyja/sni/c, jakich parametr/ow
89 oczekuj/a wymienione funkcje:
91 (ogonek-recode-region (poczatek-fragmentu) (koniec-fragmentu)
92 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa)
94 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa)
96 2. `ogonek-prefixify-region' oraz `ogonek-prefixify-buffer'
97 s/lu/z/a do wprowadzania notacji prefiksowej.
99 (ogonek-prefixify-region (poczatek-fragmentu) (koniec-fragmentu)
100 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu)
101 (ogonek-prefixify-buffer
102 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu)
104 3. `ogonek-deprefixify-region' oraz `ogonek-deprefixify-buffer'
105 s/lu/z/a do usuwania notacji prefiksowej.
107 (ogonek-deprefixify-region (poczatek-fragmentu) (koniec-fragmentu)
108 znak-prefiksu nazwa-kodowania-docelowa)
109 (ogonek-prefixify-buffer
110 znak-prefiksu nazwa-kodowania-docelowa)
112 U/zycie klawisza TAB w trybie interakcyjnym powoduje wy/swietlenie
113 listy dopuszczalnych nazw kod/ow, pami/etanych w sta/lej
114 `ogonek-name-encoding-alist'.
116 Funkcje biblioteki odwo/luj/a si/e do pi/eciu zmiennych, kt/ore
117 przechowuj/a podpowiedzi do zadawanych pyta/n. Nazwy tych zmiennych
118 oraz ich warto/sci domy/slne s/a nast/epuj/ace:
120 ogonek-from-encoding iso8859-2
121 ogonek-to-encoding ascii
123 ogonek-prefix-from-encoding iso8859-2
124 ogonek-prefix-to-encoding iso8859-2
126 Powy/zsze warto/sci domy/slne mo/zna zmieni/c przez umieszczenie w pliku
127 konfiguracyjnym `~/.emacs' odpowiednich przypisa/n, na przyk/lad:
129 (setq ogonek-prefix-char ?/)
130 (setq ogonek-prefix-to-encoding \"iso8859-2\")
132 Zamiast wczytywania ca/lej biblioteki `ogonek.el' mo/zna w pliku
133 `~/.emacs' za/z/ada/c wczytania wybranych funkcji, na dodatek dopiero
134 w chwili ich rzeczywistego u/zycia:
136 (autoload \\='ogonek-jak \"ogonek\")
137 (autoload \\='ogonek-recode-region \"ogonek\")
138 (autoload \\='ogonek-prefixify-region \"ogonek\")
139 (autoload \\='ogonek-deprefixify-region \"ogonek\")
141 Cz/esto wyst/epuj/ace kombinacje wywo/la/n funkcji mo/zna dla wygody
142 skr/oci/c i przypisa/c klawiszom. Oto praktyczne przyk/lady:
144 (defun deprefixify-iso8859-2-region (start end)
146 (ogonek-deprefixify-region start end ?/ \"iso8859-2\"))
147 (global-set-key \"\\C-cd\" \\='deprefixify-iso8859-2-region) ; ctrl-c d
149 (defun mazovia-to-iso8859-2 (start end)
151 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\"))
152 (global-set-key \"\\C-cr\" \\='mazovia-to-iso8859-2) ; ctrl-c r
154 (defun prefixify-iso8859-2-region (start end)
156 (ogonek-prefixify-region start end \"iso8859-2\" ?/))
157 (global-set-key \"\\C-cp\" \\='prefixify-iso8859-2-region) ; ctrl-c p
159 Ka/zd/a operacj/e przekodowania mo/zna w ca/lo/sci odwo/la/c
160 przez wykonanie polecenia `undo'.")
163 "Display `ogonek-informacja' in an auxiliary *ogonek-jak* buffer."
165 (set-buffer (get-buffer-create " *ogonek-jak*"))
166 (insert (substitute-command-keys ogonek-informacja
))
167 (switch-to-buffer " *ogonek-jak*")
168 (goto-char (point-min)))
170 ;; ------ A Little Info in English --------
172 (defconst ogonek-information
173 " THE INTERACTIVE FUNCTIONS PROVIDED BY THE LIBRARY `ogonek'.
175 If you read this text then you are either looking at the library's
176 source text or you have called the `ogonek-how' command. In the
177 latter case you may remove this text using `\\[kill-buffer]'.
179 The library provides functions for changing the encoding of Polish
180 diacritic characters, the ones with an `ogonek' below or above them.
181 The functions come in the following groups.
183 1. `ogonek-recode-region' and `ogonek-recode-buffer' to change
184 between one-character encodings, such as `iso-8859-2', `mazovia',
185 plain `ascii' or `TeX'. As the names suggest you may recode
186 either the entire current buffer or just a marked region
187 in it. You may use the functions interactively as commands.
188 Once you call a command you will be asked about the code
189 currently used in your text and the target encoding, the one
190 you want to get. The following example shows a non-interactive
191 use of the functions in a program. This also illustrates what
192 type of parameters the functions expect to be called with:
194 (ogonek-recode-region
195 (region-beginning) (region-end) from-code-name to-code-name)
196 (ogonek-recode-buffer from-code-name to-code-name)
198 2. `ogonek-prefixify-region' and `ogonek-prefixify-buffer' for
199 introducing prefix notation:
201 (ogonek-prefixify-region
202 (region-beginning) (region-end) from-code-name prefix-char)
203 (ogonek-prefixify-buffer from-code-name prefix-char)
205 3. `ogonek-deprefixify-region' and `ogonek-deprefixify-buffer' for
206 removing prefix notation:
208 (ogonek-deprefixify-region
209 (region-beginning) (region-end) prefix-char to-code-name)
210 (ogonek-prefixify-buffer prefix-char to-code-name)
212 The TAB character used in interactive mode makes `emacs'
213 display the list of encodings recognized by the library. The list
214 is stored in the constant `ogonek-name-encoding-alist'.
216 The `ogonek' functions refer to five variables in which the suggested
217 answers to dialogue questions are stored. The variables and their
220 ogonek-from-encoding iso8859-2
221 ogonek-to-encoding ascii
223 ogonek-prefix-from-encoding iso8859-2
224 ogonek-prefix-to-encoding iso8859-2
226 The above default values can be changed by placing appropriate settings
229 (setq ogonek-prefix-char ?/)
230 (setq ogonek-prefix-to-encoding \"iso8859-2\")
232 Instead of loading the whole library `ogonek' it may be better
233 to autoload the needed functions, for example by adding the
234 following lines to your init file:
236 (autoload \\='ogonek-how \"ogonek\")
237 (autoload \\='ogonek-recode-region \"ogonek\")
238 (autoload \\='ogonek-prefixify-region \"ogonek\")
239 (autoload \\='ogonek-deprefixify-region \"ogonek\")
241 The most frequent function calls can be abbreviated and assigned to
242 keyboard keys. Here are a few practical examples:
244 (defun deprefixify-iso8859-2-region (start end)
246 (ogonek-deprefixify-region start end ?/ \"iso8859-2\"))
247 (global-set-key \"\\C-cd\" \\='deprefixify-iso8859-2-region) ; ctrl-c d
249 (defun mazovia-to-iso8859-2 (start end)
251 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\"))
252 (global-set-key \"\\C-cr\" \\='mazovia-to-iso8859-2) ; ctrl-c r
254 (defun prefixify-iso8859-2-region (start end)
256 (ogonek-prefixify-region start end \"iso8859-2\" ?/))
257 (global-set-key \"\\C-cp\" \\='prefixify-iso8859-2-region) ; ctrl-c p
259 Each recoding operation can be called off using the `undo' command.")
262 "Display `ogonek-information' in an auxiliary *recode-how* buffer."
264 (set-buffer (get-buffer-create " *ogonek-how*"))
265 (insert (substitute-command-keys ogonek-information
))
266 (switch-to-buffer " *ogonek-how*")
267 (goto-char (point-min)))
269 ;; ---- Variables keeping the suggested answers to dialogue questions -----
270 (defvar ogonek-encoding-choices
272 (mapcar (lambda (x) (list 'const
(car x
)))
273 ogonek-name-encoding-alist
))
274 "List of ogonek encodings. Used only for customization.")
275 (defcustom ogonek-from-encoding
"iso8859-2"
276 "Encoding in the source file of recoding."
277 :type ogonek-encoding-choices
279 (defcustom ogonek-to-encoding
"ascii"
280 "Encoding in the target file of recoding."
281 :type ogonek-encoding-choices
283 (defcustom ogonek-prefix-char ?
/
284 "Prefix character for prefix encodings."
287 (defcustom ogonek-prefix-from-encoding
"iso8859-2"
288 "Encoding in the source file subject to prefixation."
289 :type ogonek-encoding-choices
291 (defcustom ogonek-prefix-to-encoding
"iso8859-2"
292 "Encoding in the target file subject to deprefixation."
293 :type ogonek-encoding-choices
296 ;; ---- Auxiliary functions for reading parameters in interactive mode ----
298 (defun ogonek-read-encoding (prompt default-name-var
)
299 "Read encoding name with completion based on `ogonek-name-encoding-alist'.
300 Store the name in the parameter-variable DEFAULT-NAME-VAR.
301 PROMPT is a string to be shown when the user is asked for a name."
304 (format "%s (default %s): " prompt
(eval default-name-var
))
305 ogonek-name-encoding-alist nil t
)))
306 ;; change the default name to the one just read
307 (set default-name-var
308 (if (string= encoding
"") (eval default-name-var
) encoding
))
309 ;; return the new default as the name you read
310 (eval default-name-var
)))
312 (defun ogonek-read-prefix (prompt default-prefix-var
)
313 "Read a prefix character for prefix notation.
314 The result is stored in the variable DEFAULT-PREFIX-VAR.
315 PROMPT is a string to be shown when the user is asked for a new prefix."
318 (format "%s (default %s): " prompt
319 (char-to-string (eval default-prefix-var
))))))
320 (if (> (length prefix-string
) 1)
321 (error "! Only one character expected")
322 ;; set the default prefix character to the one just read
323 (set default-prefix-var
324 (if (string= prefix-string
"")
325 (eval default-prefix-var
)
326 (string-to-char prefix-string
)))
327 ;; the new default prefix is the function's result:
328 (eval default-prefix-var
))))
330 (defun ogonek-lookup-encoding (encoding)
331 "Pick up an association for ENCODING in `ogonek-name-encoding-alist'.
332 Before returning a result test whether the string ENCODING is in
333 the list `ogonek-name-encoding-alist'"
334 (let ((code-list (assoc encoding ogonek-name-encoding-alist
)))
336 (error "! Name `%s' not known in `ogonek-name-encoding-alist'"
340 ;; ---- An auxiliary function for zipping two lists of equal length ----
342 (defun ogonek-zip-lists (xs ys
)
343 "Build a list of pairs from lists XS and YS of the same length."
346 (setq pairs
(cons (cons (car xs
) (car ys
)) pairs
))
349 ;; `pairs' are the function's result
352 ;; ---- An auxiliary function building a one-to-one recoding table -----
354 (defun ogonek-build-table (recoding-pairs)
355 "Build a table required by Emacs's `translate-region' function.
356 RECODING-PAIRS is a list of character pairs for which recoding
358 By using the built-in `translate-region' function
359 we gain better performance compared to converting characters
360 by a hand-written routine as it is done for prefix encodings."
361 (let ((table (make-string 256 0))
366 ;; make changes in `table' according to `recoding-pairs'
367 (while recoding-pairs
368 (aset table
(car (car recoding-pairs
)) (cdr (car recoding-pairs
)))
369 (setq recoding-pairs
(cdr recoding-pairs
)))
370 ;; return the table just built
373 ;; ---- Commands for one-to-one recoding -------------------------------
375 (defun ogonek-recode-region (start end from-encoding to-encoding
)
376 "Recode text in a marked region in one-to-one manner.
377 When called interactively ask the user for the names of the FROM-
379 (interactive (progn (barf-if-buffer-read-only)
383 (ogonek-read-encoding "From code" 'ogonek-from-encoding
)
384 (ogonek-read-encoding "To code" 'ogonek-to-encoding
))))
390 (ogonek-lookup-encoding from-encoding
)
391 (ogonek-lookup-encoding to-encoding
))))))
393 (defun ogonek-recode-buffer (from-encoding to-encoding
)
394 "Call `ogonek-recode-region' on the entire buffer.
395 When called interactively ask the user for the names of the FROM-
397 (interactive (progn (barf-if-buffer-read-only)
399 (ogonek-read-encoding "From code" 'ogonek-from-encoding
)
400 (ogonek-read-encoding "To code" 'ogonek-to-encoding
))))
401 (ogonek-recode-region
402 (point-min) (point-max) from-encoding to-encoding
))
404 ;; ---- Recoding with prefix notation -------------------------------
406 (defconst ogonek-prefix-code
'(?A ?C ?E ?L ?N ?O ?S ?X ?Z
407 ?a ?c ?e ?l ?n ?o ?s ?x ?z
))
409 (defun ogonek-prefixify-region (start end from-encoding prefix-char
)
410 "In a region, replace FROM-encoded Polish characters with PREFIX pairs.
411 A PREFIX pair generated consists of PREFIX-CHAR and the respective
412 character listed in the `ogonek-prefix-code' constant.
413 PREFIX-CHAR itself gets doubled."
414 (interactive (progn (barf-if-buffer-read-only)
418 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding
)
419 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char
))))
421 ((from-code (ogonek-lookup-encoding from-encoding
))
422 (to-code ogonek-prefix-code
)
423 (recoding-pairs ; `ogonek-prefix-char' added for doubling
425 (cons prefix-char from-code
)
426 (cons prefix-char to-code
))))
429 (while (< (point) end
)
430 (let ((pair (assoc (following-char) recoding-pairs
)))
432 ;; not a Polish character -- skip it
434 ;; Polish character -- insert a prefix pair instead
436 (insert ogonek-prefix-char
)
438 ;; the region is now one character longer
439 (setq end
(1+ end
))))))))
441 (defun ogonek-prefixify-buffer (from-encoding prefix-char
)
442 "Call `ogonek-prefixify-region' on the entire buffer."
443 (interactive (progn (barf-if-buffer-read-only)
445 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding
)
446 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char
))))
447 (ogonek-prefixify-region
448 (point-min) (point-max) from-encoding prefix-char
))
450 (defun ogonek-deprefixify-region (start end prefix-char to-encoding
)
451 "In a region, replace PREFIX pairs with their corresponding TO-encodings.
452 PREFIX-CHAR followed by a Polish character from the `ogonek-prefix-code'
453 list is replaced with the corresponding TO-encoded character. A doubled
454 PREFIX-CHAR gets replaced with a single one. A combination of PREFIX-CHAR
455 followed by a non-Polish character, that is one not listed in the
456 `ogonek-prefix-code' constant, is left unchanged."
457 (interactive (progn (barf-if-buffer-read-only)
458 (list (region-beginning)
461 "Prefix character" 'ogonek-prefix-char
)
462 (ogonek-read-encoding
463 "To code" 'ogonek-prefix-to-encoding
))))
465 ((from-code ogonek-prefix-code
)
466 (to-code (ogonek-lookup-encoding to-encoding
))
469 (cons prefix-char from-code
)
470 (cons prefix-char to-code
))))
473 (while (< (point) end
)
475 (if (or (not (= (preceding-char) prefix-char
)) (= (point) end
))
476 ;; non-prefix character or the end-of-region -- do nothing
478 ;; now, we can check the next character
479 (let ((pair (assoc (following-char) recoding-pairs
)))
481 ;; `following-char' is not a Polish character nor it is
482 ;; `prefix-char' since the one is among `recoding-pairs'
484 ;; else prefix followed by a Polish character has been found
485 ;; replace it by the corresponding Polish character
489 ;; the region got shorter by one character
490 (setq end
(1- end
)))))))))
492 (defun ogonek-deprefixify-buffer (prefix-char to-encoding
)
493 "Call `ogonek-deprefixify-region' on the entire buffer."
494 (interactive (progn (barf-if-buffer-read-only)
496 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char
)
497 (ogonek-read-encoding "To code" 'ogonek-prefix-to-encoding
))))
498 (ogonek-deprefixify-region
499 (point-min) (point-max) prefix-char to-encoding
))
503 ;;; ogonek.el ends here