1 ;;; codepages.el --- coding systems for assorted codepages -*-coding: utf-8;-*-
3 ;; Copyright (C) 2001 Free Software Foundation, Inc.
5 ;; Author: Dave Love <fx@gnu.org>
8 ;; This file is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; This file is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GNU Emacs; see the file COPYING. If not, write to
20 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 ;; Boston, MA 02111-1307, USA.
25 ;; Definitions of miscellaneous 8-bit coding systems based on ASCII,
26 ;; mainly for PC `code pages'. They are decoded into Latin-1 and
27 ;; mule-unicode character sets rather than (lossily) into single iso8859
28 ;; charsets. A utility function `cp-make-coding-system' derives them
29 ;; from simple tables.
31 ;; Those covered are: cp437, cp775, cp850, cp851, cp852, cp855, cp857,
32 ;; cp860, cp861, cp862, cp863, cp864, cp865, cp866, cp869, cp874,
33 ;; windows-1250, windows-1251, windows-1252, windows-1253,
34 ;; windows-1254, windows-1255, windows-1256, windows-1257,
35 ;; windows-1258, next, koi8-r, koi8-u.
37 ;; Note that koi8-r and cp866 (alternativnyj) clash with the
38 ;; iso8859-5-based versions in cyrillic.el. A few CPs from
39 ;; codepage.el aren't covered (in the absence of translation tables to
42 ;; Compile this to avoid loading `ccl' at runtime.
46 ;; Re-written to allow nil in the vector.
47 (defun make-translation-table-from-vector (vec)
48 "Make translation table from decoding vector VEC.
49 VEC is an array of 256 elements to map unibyte codes to multibyte
50 characters. Elements may be nil for undefined code points.
51 See also the variable `nonascii-translation-table'."
52 (let ((table (make-char-table 'translation-table
))
53 (rev-table (make-char-table 'translation-table
))
56 (setq ch
(aref vec i
))
60 (aset rev-table ch i
))))
61 (set-char-table-extra-slot table
0 rev-table
)
64 (defun cp-make-translation-table (v)
65 "Return a translation table made from 128-long vector V.
66 V comprises characters encodable by mule-utf-8."
67 (let ((encoding-vector (make-vector 256 0))
71 (aset encoding-vector i i
)
74 (aset encoding-vector i
(aref v
(- i
128)))
76 (make-translation-table-from-vector encoding-vector
)))
78 (defun cp-valid-codes (v)
79 "Derive a valid-codes list for translation vector V.
80 See `make-coding-system'."
82 (i 128) ; index into v
83 (start 0) ; start of a valid range
84 (end 127)) ; end of a valid range
86 (if (aref v
(- i
128)) ; start or extend range
89 (unless start
(setq start i
)))
91 (push (cons start end
) pairs
))
94 (if start
(push (cons start end
) pairs
))
97 ;; Macro to allow the ccl compilation at byte-compile time, avoiding
99 (defmacro cp-make-coding-system
(name v
&optional doc-string mnemonic
)
100 "Make coding system NAME for and 8-bit, extended-ASCII character set.
101 V is a 128-long vector of characters to translate the upper half of
102 the charactert set. DOC-STRING and MNEMONIC are used as the
103 corresponding args of `make-coding-system'. If MNEMONIC isn't given,
105 (let* ((encoder (intern (format "encode-%s" name
)))
106 (decoder (intern (format "decode-%s" name
)))
112 (if (r1 < 128) ;; ASCII
113 (r0 = ,(charset-id 'ascii
))
115 (r0 = ,(charset-id 'eight-bit-control
))
116 (r0 = ,(charset-id 'eight-bit-graphic
))))
117 (translate-character ,decoder r0 r1
)
118 (write-multibyte-character r0 r1
)
124 (read-multibyte-character r0 r1
)
125 (translate-character ,encoder r0 r1
)
126 (write-repeat r1
)))))))
127 `(let ((translation-table (cp-make-translation-table ,v
)))
128 (define-translation-table ',decoder translation-table
)
129 (define-translation-table ',encoder
130 (char-table-extra-slot translation-table
0))
132 ',name
4 ,(or mnemonic ?D
)
133 (or ,doc-string
(format "%s encoding" ',name
))
134 (cons ,ccl-decoder
,ccl-encoder
)
135 (list (cons 'safe-chars
(get ',encoder
'translation-table
))
136 (cons 'valid-codes
(cp-valid-codes translation-table
))
137 (cons 'mime-charset
',name
))))))
140 ;; These tables were mostly derived by running somthing like
141 ;; `recode -f cpxxx/..utf-8' on a binary file filled by
142 ;; `(dotimes (i 128) (insert ?? ?\\ (+ 128 i) ?\n))' and then
143 ;; exchanging the ?\� entries for nil. iconv was used instead in at
146 (cp-make-coding-system
279 (cp-make-coding-system
411 (cp-make-coding-system
543 (cp-make-coding-system
675 (cp-make-coding-system
807 (cp-make-coding-system
939 (cp-make-coding-system
1071 (cp-make-coding-system
1203 (cp-make-coding-system
1335 (cp-make-coding-system
1467 (cp-make-coding-system
1599 (cp-make-coding-system
1731 (cp-make-coding-system
1863 ;; This should be the same as cyrillic-alternativnyj,
1864 ;; (<URL:http://czyborra.com/charsets/cyrillic.html>), but code point
1865 ;; 255 in the cyrillic.el alternativnyj table is `№', i.e. point 240
1866 ;; in 8859-5, not no-break space as below; `№' should be at point 252.
1867 (cp-make-coding-system
1998 "CP866 (Cyrillic Alternativnyj) encoding using Unicode.")
2000 (cp-make-coding-system
2132 (cp-make-coding-system
2263 (cp-make-coding-system
2393 "CP1250/Windows-1250 Encoding")
2395 (cp-make-coding-system
2525 "CP1251/Windows-1251 Encoding")
2527 (cp-make-coding-system
2657 "CP1252/Windows-1252 (Western) Encoding")
2659 (cp-make-coding-system
2789 "CP1253/Windows-1253 Encoding")
2791 (cp-make-coding-system
2921 "CP1254/Windows-1254 Encoding")
2923 (cp-make-coding-system
3053 "CP1255/Windows-1255 Encoding")
3055 (cp-make-coding-system
3185 "CP1256/Windows-1256 Encoding")
3187 (cp-make-coding-system
3317 "CP1257/Windows-1257 Encoding")
3319 (cp-make-coding-system
3449 "CP1258/Windows-1258 Encoding")
3451 (cp-make-coding-system
3581 "NeXTstep encoding." ?N
)
3583 (cp-make-coding-system
3713 "Cyrillic KOI8-U (Ukranian) encoding.")
3715 ;; Unicode-based, not cyrillic-iso8859-5 based (and incomplete) like the
3716 ;; standard version.
3717 (cp-make-coding-system
3847 "KOI8 8-bit encoding for Cyrillic (MIME: KOI8-R) using Unicode.")
3848 (coding-system-put 'cyrillic-koi8
'mime-charset
'koi8-r
)
3850 (define-coding-system-alias 'cp1250
'windows-1250
)
3851 (define-coding-system-alias 'cp1251
'windows-1251
)
3852 (define-coding-system-alias 'cp1252
'windows-1252
)
3853 (define-coding-system-alias 'cp1253
'windows-1253
)
3854 (define-coding-system-alias 'cp1254
'windows-1254
)
3855 (define-coding-system-alias 'cp1255
'windows-1255
)
3856 (define-coding-system-alias 'cp1256
'windows-1256
)
3857 (define-coding-system-alias 'cp1257
'windows-1257
)
3858 (define-coding-system-alias 'cp1258
'windows-1258
)
3860 ;; Use Unicode font under Windows. Jason Rumney fecit.
3861 (if (and (fboundp 'w32-add-charset-info
)
3862 (not (boundp 'w32-unicode-charset-defined
)))
3863 (w32-add-charset-info "iso10646-1" 'w32-charset-ansi t
))
3865 (provide 'codepages
)
3866 ;;; codepages.el ends here