1 ;;; european.el --- support for European languages -*- coding: utf-8; -*-
3 ;; Copyright (C) 1997-1998, 2000-2013 Free Software Foundation, Inc.
4 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H14PRO021
9 ;; National Institute of Advanced Industrial Science and Technology (AIST)
10 ;; Registration Number H13PRO009
12 ;; Keywords: multilingual, European
14 ;; This file is part of GNU Emacs.
16 ;; GNU Emacs is free software: you can redistribute it and/or modify
17 ;; it under the terms of the GNU General Public License as published by
18 ;; the Free Software Foundation, either version 3 of the License, or
19 ;; (at your option) any later version.
21 ;; GNU Emacs is distributed in the hope that it will be useful,
22 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 ;; GNU General Public License for more details.
26 ;; You should have received a copy of the GNU General Public License
27 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
31 ;; For European scripts, all the ISO Latin character sets are
32 ;; supported, along with various others.
36 ;; Latin-1 (ISO-8859-1)
38 (set-language-info-alist
39 "Latin-1" '((charset iso-8859-1
)
40 (coding-system iso-latin-1 iso-latin-9 windows-1252
)
41 (coding-priority iso-latin-1
)
42 (nonascii-translation . iso-8859-1
)
43 (unibyte-display . iso-latin-1
)
44 (input-method .
"latin-1-prefix")
46 .
"Hello, Hej, Tere, Hei, Bonjour, Grüß Gott, Ciao, ¡Hola!")
48 This language environment is a generic one for the Latin-1 (ISO-8859-1)
49 character set which supports the following European languages:
50 Albanian, Basque, Breton, Catalan, Danish, Dutch, English, Faeroese,
51 Finnish, French (with restrictions -- see Latin-9), Frisian, Galician,
52 German, Greenlandic, Icelandic, Irish Gaelic (new orthography),
53 Italian, Latin, Luxemburgish, Norwegian, Portuguese, Rhaeto-Romanic,
54 Scottish Gaelic, Spanish, and Swedish.
55 We also have specific language environments for the following languages:
57 For German, \"German\".
58 For French, \"French\".
59 For Italian, \"Italian\".
60 For Slovenian, \"Slovenian\".
61 For Spanish, \"Spanish\".
63 Latin-1 also covers several written languages outside Europe, including
64 Indonesian/Malay, Tagalog (Philippines), Swahili and Afrikaans."))
68 ;; Latin-2 (ISO-8859-2)
70 (define-coding-system 'iso-latin-2
71 "ISO 2022 based 8-bit encoding for Latin-2 (MIME:ISO-8859-2)."
74 :charset-list
'(iso-8859-2)
75 :mime-charset
'iso-8859-2
)
77 (define-coding-system-alias 'iso-8859-2
'iso-latin-2
)
78 (define-coding-system-alias 'latin-2
'iso-latin-2
)
80 (set-language-info-alist
81 "Latin-2" '((charset iso-8859-2
)
82 (coding-system iso-latin-2 windows-1250
)
83 (coding-priority iso-latin-2
)
84 (nonascii-translation . iso-8859-2
)
85 (unibyte-display . iso-latin-2
)
86 (input-method .
"latin-2-prefix")
88 This language environment is a generic one for the Latin-2 (ISO-8859-2)
89 character set which supports the following languages:
90 Albanian, Czech, English, German, Hungarian, Polish, Romanian,
91 Serbo-Croatian or Croatian, Slovak, Slovene, Sorbian (upper and lower),
93 We also have specific language environments for the following languages:
95 For Croatian, \"Croatian\".
96 For Polish, \"Polish\".
97 For Romanian, \"Romanian\".
98 For Slovak, \"Slovak\"."))
102 ;; Latin-3 (ISO-8859-3)
104 (define-coding-system 'iso-latin-3
105 "ISO 2022 based 8-bit encoding for Latin-3 (MIME:ISO-8859-3)."
106 :coding-type
'charset
108 :charset-list
'(iso-8859-3)
109 :mime-charset
'iso-8859-3
)
111 (define-coding-system-alias 'iso-8859-3
'iso-latin-3
)
112 (define-coding-system-alias 'latin-3
'iso-latin-3
)
114 (set-language-info-alist
115 "Latin-3" '((charset iso-8859-3
)
116 (coding-system iso-latin-3
)
117 (coding-priority iso-latin-3
)
118 (nonascii-translation . iso-8859-3
)
119 (unibyte-display . iso-latin-3
)
120 (input-method .
"latin-3-prefix")
122 These languages are supported with the Latin-3 (ISO-8859-3) character set:
123 Afrikaans, Catalan, Dutch, English, Esperanto, French, Galician,
124 German, Italian, Maltese, Spanish, and Turkish."))
128 ;; Latin-4 (ISO-8859-4)
130 (define-coding-system 'iso-latin-4
131 "ISO 2022 based 8-bit encoding for Latin-4 (MIME:ISO-8859-4)."
132 :coding-type
'charset
134 :charset-list
'(iso-8859-4)
135 :mime-charset
'iso-8859-4
)
137 (define-coding-system-alias 'iso-8859-4
'iso-latin-4
)
138 (define-coding-system-alias 'latin-4
'iso-latin-4
)
140 (set-language-info-alist
141 "Latin-4" '((charset iso-8859-4
)
142 (coding-system iso-8859-4
)
143 (coding-priority iso-8859-4
)
144 (nonascii-translation . iso-8859-4
)
145 (unibyte-display . iso-8859-4
)
146 (input-method .
"latin-4-postfix")
148 These languages are supported with the Latin-4 (ISO-8859-4) character set:
149 Danish, English, Estonian, Finnish, German, Greenlandic, Latvian,
150 Lithuanian, Norwegian, and Sami."))
154 ;; Latin-5 (ISO-8859-9)
156 (define-coding-system 'iso-latin-5
157 "ISO 2022 based 8-bit encoding for Latin-5 (MIME:ISO-8859-9)."
158 :coding-type
'charset
160 :charset-list
'(iso-8859-9)
161 :mime-charset
'iso-8859-9
)
163 (define-coding-system-alias 'iso-8859-9
'iso-latin-5
)
164 (define-coding-system-alias 'latin-5
'iso-latin-5
)
166 (set-language-info-alist
167 "Latin-5" '((charset iso-8859-9
)
168 (coding-system iso-latin-5
)
169 (coding-priority iso-latin-5
)
170 (nonascii-translation . iso-8859-9
)
171 (unibyte-display . iso-latin-5
)
172 (input-method .
"latin-5-postfix")
173 (documentation .
"Support for Latin-5.\
174 See also the Turkish environment."))
178 ;; Latin-6 (ISO-8859-10)
180 (define-coding-system 'iso-latin-6
181 "ISO 2022 based 8-bit encoding for Latin-6 (MIME:ISO-8859-10)."
182 :coding-type
'charset
184 :charset-list
'(iso-8859-10)
185 :mime-charset
'iso-8859-10
)
187 (define-coding-system-alias 'iso-8859-10
'iso-latin-6
)
188 (define-coding-system-alias 'latin-6
'iso-latin-6
)
190 (set-language-info-alist
191 "Latin-6" '((charset iso-8859-10
)
192 (coding-system iso-latin-6
)
193 (coding-priority iso-latin-6
)
194 (nonascii-translation . iso-8859-10
)
195 (unibyte-display . iso-latin-6
)
196 ;; Fixme: input method.
197 (documentation .
"Support for generic Latin-6 (Northern European)."))
201 ;; Latin-7 (ISO-8859-13)
203 (define-coding-system 'iso-latin-7
204 "ISO 2022 based 8-bit encoding for Latin-7 (MIME:ISO-8859-13)."
205 :coding-type
'charset
207 :charset-list
'(iso-8859-13)
208 :mime-charset
'iso-8859-13
)
210 (define-coding-system-alias 'iso-8859-13
'iso-latin-7
)
211 (define-coding-system-alias 'latin-7
'iso-latin-7
)
213 (set-language-info-alist
214 "Latin-7" '((charset iso-8859-13
)
215 (coding-system iso-latin-7
)
216 (coding-priority iso-latin-7
)
217 (nonascii-translation . iso-8859-13
)
218 (unibyte-display . iso-latin-7
)
219 ;; Fixme: input method.
220 (documentation .
"Support for generic Latin-7 (Baltic Rim)."))
223 ;; Latin-8 (ISO-8859-14)
225 (define-coding-system 'iso-latin-8
226 "ISO 2022 based 8-bit encoding for Latin-8 (MIME:ISO-8859-14)."
227 :coding-type
'charset
228 ;; `W' for `Welsh', since `C' for `Celtic' is taken.
230 :charset-list
'(iso-8859-14)
231 :mime-charset
'iso-8859-14
)
233 (define-coding-system-alias 'iso-8859-14
'iso-latin-8
)
234 (define-coding-system-alias 'latin-8
'iso-latin-8
)
236 (set-language-info-alist
237 "Latin-8" '((charset iso-8859-14
)
238 (coding-system iso-latin-8
)
239 (coding-priority iso-latin-8
)
240 (nonascii-translation . iso-8859-14
)
241 (unibyte-display . iso-latin-8
)
242 (input-method .
"latin-8-prefix")
243 ;; Fixme: Welsh/Ga{e}lic greetings
244 (sample-text .
"ḃ ŵ Ŷ")
246 This language environment is a generic one for the Latin-8 (ISO-8859-14)
247 character set which supports the Celtic languages, including those not
248 covered by other ISO-8859 character sets:
249 Welsh, Manx Gaelic and Irish Gaelic (old orthography)."))
252 ;; Latin-9 (ISO-8859-15)
254 (define-coding-system 'iso-latin-9
255 "ISO 2022 based 8-bit encoding for Latin-9 (MIME:ISO-8859-15)."
256 :coding-type
'charset
259 :charset-list
'(iso-8859-15)
260 :mime-charset
'iso-8859-15
)
262 (define-coding-system-alias 'iso-8859-15
'iso-latin-9
)
263 (define-coding-system-alias 'latin-9
'iso-latin-9
)
264 (define-coding-system-alias 'latin-0
'iso-latin-9
)
266 (set-language-info-alist
267 "Latin-9" '((charset iso-8859-15
)
268 (coding-system iso-latin-9
)
269 (coding-priority iso-latin-9
)
270 (nonascii-translation . iso-8859-15
)
271 (unibyte-display . iso-latin-9
)
272 (input-method .
"latin-9-prefix")
276 This language environment is a generic one for the Latin-9 (ISO-8859-15)
277 character set which supports the same languages as Latin-1 with the
278 addition of the Euro sign and some additional French and Finnish letters.
279 Latin-9 is sometimes nicknamed `Latin-0'."))
282 (set-language-info-alist
283 "Esperanto" '((tutorial .
"TUTORIAL.eo")
285 (coding-system iso-latin-3
)
286 (coding-priority iso-latin-3
)
287 (nonascii-translation . latin-iso8859-3
)
288 (unibyte-syntax .
"latin-3")
289 (unibyte-display . iso-latin-3
)
290 (input-method .
"latin-3-prefix")
291 (documentation .
"Support for Esperanto with ISO-8859-3 character set."))
295 (define-coding-system 'windows-1250
296 "windows-1250 (Central European) encoding (MIME: WINDOWS-1250)"
297 :coding-type
'charset
299 :charset-list
'(windows-1250)
300 :mime-charset
'windows-1250
)
301 (define-coding-system-alias 'cp1250
'windows-1250
)
303 (define-coding-system 'windows-1252
304 "windows-1252 (Western European) encoding (MIME: WINDOWS-1252)"
305 :coding-type
'charset
307 :charset-list
'(windows-1252)
308 :mime-charset
'windows-1252
)
309 (define-coding-system-alias 'cp1252
'windows-1252
)
311 (define-coding-system 'windows-1254
312 "windows-1254 (Turkish) encoding (MIME: WINDOWS-1254)"
313 :coding-type
'charset
315 :charset-list
'(windows-1254)
316 :mime-charset
'windows-1254
)
317 (define-coding-system-alias 'cp1254
'windows-1254
)
319 (define-coding-system 'windows-1257
320 "windows-1257 (Baltic) encoding (MIME: WINDOWS-1257)"
321 :coding-type
'charset
323 :charset-list
'(windows-1257)
324 :mime-charset
'windows-1257
)
325 (define-coding-system-alias 'cp1257
'windows-1257
)
327 (define-coding-system 'cp775
328 "DOS codepage 775 (PC Baltic, MS-DOS Baltic Rim)"
329 :coding-type
'charset
331 :charset-list
'(cp775)
332 :mime-charset
'cp775
)
333 (define-coding-system-alias 'ibm775
'cp775
)
335 (define-coding-system 'cp850
336 "DOS codepage 850 (Western European)"
337 :coding-type
'charset
339 :charset-list
'(cp850)
340 :mime-charset
'cp850
)
341 (define-coding-system-alias 'ibm850
'cp850
)
343 (define-coding-system 'cp852
344 "DOS codepage 852 (Slavic)"
345 :coding-type
'charset
347 :charset-list
'(cp852)
348 :mime-charset
'cp852
)
349 (define-coding-system-alias 'ibm852
'cp852
)
351 (define-coding-system 'cp857
352 "DOS codepage 857 (Turkish)"
353 :coding-type
'charset
355 :charset-list
'(cp857)
356 :mime-charset
'cp857
)
357 (define-coding-system-alias 'ibm857
'cp857
)
359 (define-coding-system 'cp858
360 "Codepage 858 (Multilingual Latin I + Euro)"
361 :coding-type
'charset
363 :charset-list
'(cp858)
364 :mime-charset
'cp858
)
366 (define-coding-system 'cp860
367 "DOS codepage 860 (Portuguese)"
368 :coding-type
'charset
370 :charset-list
'(cp860)
371 :mime-charset
'cp860
)
372 (define-coding-system-alias 'ibm860
'cp860
)
374 (define-coding-system 'cp861
375 "DOS codepage 861 (Icelandic)"
376 :coding-type
'charset
378 :charset-list
'(cp861)
379 :mime-charset
'cp861
)
380 (define-coding-system-alias 'ibm861
'cp861
)
382 (define-coding-system 'cp863
383 "DOS codepage 863 (French Canadian)"
384 :coding-type
'charset
386 :charset-list
'(cp863)
387 :mime-charset
'cp863
)
388 (define-coding-system-alias 'ibm863
'cp863
)
390 (define-coding-system 'cp865
391 "DOS codepage 865 (Norwegian/Danish)"
392 :coding-type
'charset
394 :charset-list
'(cp865)
395 :mime-charset
'cp865
)
396 (define-coding-system-alias 'ibm865
'cp865
)
398 (define-coding-system 'cp437
400 :coding-type
'charset
402 :charset-list
'(cp437)
403 :mime-charset
'cp437
)
404 (define-coding-system-alias 'ibm437
'cp437
)
406 (set-language-info-alist
407 "Dutch" '((tutorial .
"TUTORIAL.nl")
409 (coding-system iso-latin-1 iso-latin-9
)
410 (coding-priority iso-latin-1
)
411 (nonascii-translation . iso-8859-1
)
412 (unibyte-display . iso-latin-1
)
413 (input-method .
"dutch")
414 (sample-text .
"Er is een aantal manieren waarop je dit kan doen")
416 This language environment is almost the same as Latin-1,
417 but it selects the Dutch tutorial and input method."))
420 (set-language-info-alist
421 "German" '((tutorial .
"TUTORIAL.de")
423 (coding-system iso-latin-1 iso-latin-9
)
424 (coding-priority iso-latin-1
)
425 (nonascii-translation . iso-8859-1
)
426 (input-method .
"german-postfix")
427 (unibyte-display . iso-latin-1
)
429 German (Deutsch Nord) Guten Tag
430 German (Deutsch Süd) Grüß Gott")
432 This language environment is almost the same as Latin-1,
433 but sets the default input method to \"german-postfix\".
434 Additionally, it selects the German tutorial."))
437 (set-language-info-alist
438 "French" '((tutorial .
"TUTORIAL.fr")
440 (coding-system iso-latin-1 iso-latin-9
)
441 (coding-priority iso-latin-1
)
442 (nonascii-translation . iso-8859-1
)
443 (unibyte-display . iso-latin-1
)
444 (input-method .
"latin-1-prefix")
445 (sample-text .
"French (Français) Bonjour, Salut")
447 This language environment is almost the same as Latin-1,
448 but it selects the French tutorial and input method."))
451 (set-language-info-alist
452 "Italian" '((tutorial .
"TUTORIAL.it")
454 (coding-system iso-latin-1 iso-latin-9
)
455 (coding-priority iso-latin-1
)
456 (nonascii-translation . iso-8859-1
)
457 (unibyte-display . iso-latin-1
)
458 (input-method .
"italian-postfix")
459 (sample-text .
"Salve, ciao!")
461 This language environment is almost the same as Latin-1,
462 but sets the default input method to \"italian-postfix\".
463 Additionally, it selects the Italian tutorial."))
466 (set-language-info-alist
467 "Slovenian" '((charset iso-8859-2
)
468 (coding-system .
(iso-8859-2 windows-1250
))
469 (coding-priority .
(iso-8859-2))
470 (nonascii-translation . iso-8859-2
)
471 (input-method .
"slovenian")
472 (unibyte-display . iso-8859-2
)
473 (tutorial .
"TUTORIAL.sl")
474 (sample-text .
"Želimo vam uspešen dan!")
476 This language environment is almost the same as Latin-2,
477 but it selects the Slovenian tutorial and input method."))
480 (set-language-info-alist
481 "Spanish" '((tutorial .
"TUTORIAL.es")
483 (coding-system iso-latin-1 iso-latin-9
)
484 (coding-priority iso-latin-1
)
485 (input-method .
"spanish-postfix")
486 (nonascii-translation . iso-8859-1
)
487 (unibyte-display . iso-latin-1
)
488 (sample-text .
"Spanish (Español) ¡Hola!")
490 This language environment is almost the same as Latin-1,
491 but it sets the default input method to \"spanish-postfix\",
492 and it selects the Spanish tutorial."))
495 ;; For Turkish, the character set ISO-8859-9 (Latin-5) is used. But,
496 ;; before the introduction of ISO-8859-9 in 1988, ISO-8859-3 (Latin-3)
497 ;; was used for Turkish. Those who use Latin-3 for Turkish should use
498 ;; "Latin-3" language environment.
500 (set-language-info-alist
501 "Turkish" '((charset iso-8859-9
)
502 (coding-system iso-latin-5 windows-1254 iso-latin-3
)
503 (coding-priority iso-latin-5
)
504 (nonascii-translation . iso-8859-9
)
505 (unibyte-display . iso-latin-5
)
506 (input-method .
"turkish-postfix")
507 (sample-text .
"Turkish (Türkçe) Merhaba")
508 (setup-function . turkish-case-conversion-enable
)
509 (setup-function . turkish-case-conversion-disable
)
510 (documentation .
"Support for Turkish.
511 Differs from the Latin-5 environment in using the `turkish-postfix' input
512 method and applying Turkish case rules for the characters i, I, ı, İ.")))
514 (defun turkish-case-conversion-enable ()
515 "Set up Turkish case conversion of `i' and `I' into `İ' and `ı'."
516 (let ((table (standard-case-table)))
517 (set-case-syntax-pair ?İ ?i table
)
518 (set-case-syntax-pair ?I ?ı table
)))
520 (defun turkish-case-conversion-disable ()
521 "Set up normal (non-Turkish) case conversion of `i' into `I'."
522 (let ((table (standard-case-table)))
523 (set-case-syntax-pair ?I ?i table
)
524 (set-case-syntax ?İ
"w" table
)
525 (set-case-syntax ?ı
"w" table
)))
527 ;; Polish ISO 8859-2 environment.
528 ;; Maintainer: Wlodek Bzyl <matwb@univ.gda.pl>
529 ;; Keywords: multilingual, Polish
531 (set-language-info-alist
532 "Polish" '((charset iso-8859-2
)
533 (coding-system iso-8859-2 windows-1250
)
534 (coding-priority iso-8859-2
)
535 (input-method .
"polish-slash")
536 (nonascii-translation . iso-8859-2
)
537 (unibyte-display . iso-8859-2
)
538 (tutorial .
"TUTORIAL.pl")
539 (sample-text .
"Pójdź, kiń-że tę chmurność w głąb flaszy")
543 (set-language-info-alist
544 "Welsh" `((coding-system utf-8 latin-8
) ; the input method is Unicode-based
545 (coding-priority utf-8 latin-8
)
546 (nonascii-translation . iso-8859-14
)
547 (input-method .
"welsh")
548 (documentation .
"Support for Welsh, using Unicode."))
551 (set-language-info-alist
552 "Latin-6" `((coding-system latin-6
)
553 (coding-priority latin-6
)
554 (nonascii-translation .
,(get 'decode-iso-latin-6
'translation-table
))
555 (input-method .
"latin-prefix")
556 (features code-pages
)
557 (documentation .
"Support for Latin-6."))
560 (set-language-info-alist
561 "Latin-7" `((coding-system latin-7
)
562 (coding-priority latin-7
)
563 (nonascii-translation . iso-8859-13
)
564 (input-method .
"latin-prefix")
565 (documentation .
"Support for Latin-7, e.g. Latvian, Lithuanian."))
568 (set-language-info-alist
569 "Lithuanian" `((coding-system latin-7 windows-1257
)
570 (coding-priority latin-7
)
571 (nonascii-translation . iso-8859-13
)
572 (input-method .
"lithuanian-keyboard")
573 (documentation .
"Support for Lithuanian."))
576 (set-language-info-alist
577 "Latvian" `((coding-system latin-7 windows-1257
)
578 (coding-priority latin-7
)
579 (nonascii-translation . iso-8859-13
)
580 (input-method .
"latvian-keyboard")
581 (documentation .
"Support for Latvian."))
584 (set-language-info-alist
585 "Swedish" '((tutorial .
"TUTORIAL.sv")
587 (coding-system iso-latin-1
)
588 (coding-priority iso-latin-1
)
589 (nonascii-translation . iso-8859-1
)
590 (unibyte-display . iso-latin-1
)
591 (sample-text .
"Goddag Hej")
592 (documentation .
"Support for Swedish"))
595 (set-language-info-alist
596 "Croatian" '((charset iso-8859-2
)
597 (coding-system iso-8859-2
)
598 (coding-priority iso-8859-2
)
599 (input-method .
"croatian")
600 (nonascii-translation . iso-8859-2
)
601 (unibyte-display . iso-8859-2
)
602 (documentation .
"Support for Croatian with Latin-2 encoding."))
605 (set-language-info-alist
606 "Brazilian Portuguese" '((tutorial .
"TUTORIAL.pt_BR")
608 (coding-system iso-latin-1 iso-latin-9
)
609 (coding-priority iso-latin-1
)
610 (nonascii-translation . iso-8859-1
)
611 (unibyte-display . iso-8859-1
)
612 (input-method .
"latin-1-prefix")
614 (documentation .
"Support for Brazilian Portuguese."))
618 (define-coding-system 'mac-roman
619 "Mac Roman Encoding (MIME:MACINTOSH)."
620 :coding-type
'charset
622 :charset-list
'(mac-roman)
623 :mime-charset
'macintosh
)
624 (define-coding-system-alias 'macintosh
'mac-roman
)
626 (define-coding-system 'next
628 :coding-type
'charset
630 :charset-list
'(next)
633 (define-coding-system 'hp-roman8
634 "Hewlet-Packard roman-8 encoding (MIME:ROMAN-8)"
635 :coding-type
'charset
637 :charset-list
'(hp-roman8)
638 :mime-charset
'hp-roman8
)
639 (define-coding-system-alias 'roman8
'hp-roman8
)
641 (define-coding-system 'adobe-standard-encoding
642 "Adobe `standard' encoding for PostScript"
643 :coding-type
'charset
645 :charset-list
'(adobe-standard-encoding)
646 :mime-charset
'adobe-standard-encoding
)
650 ;;; european.el ends here