1 ;;; codepage.el --- MS-DOS/MS-Windows specific coding systems
3 ;; Copyright (C) 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
4 ;; Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
5 ;; National Institute of Advanced Industrial Science and Technology (AIST)
6 ;; Registration Number H14PRO021
8 ;; Author: Eli Zaretskii
10 ;; Keywords: i18n ms-dos ms-windows codepage
12 ;; This file is part of GNU Emacs.
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 2, or (at your option)
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27 ;; Boston, MA 02110-1301, USA.
31 ;; Special coding systems for DOS/Windows codepage support.
33 ;; These coding systems perform conversion from the DOS/Windows
34 ;; codepage encoding to one of the ISO-8859 character sets. Each
35 ;; codepage has its corresponding ISO-8859 charset, chosen so as to be
36 ;; able to convert all (or most) of the characters. The idea is that
37 ;; Emacs internally works with the usual MULE charsets, and the
38 ;; conversion to and from the DOS codepage is performed on I/O only.
39 ;; See term/internal.el for the complementary setup of the DOS
40 ;; terminal display and input methods.
42 ;; Thanks to Ken'ichi Handa <handa@etl.go.jp> for writing the CCL
43 ;; encoders/decoders, and for help in debugging this code.
47 (defvar dos-unsupported-char-glyph
)
49 (defun cp-coding-system-for-codepage-1 (coding mnemonic iso-name
51 "Make coding system CODING for a DOS codepage using translation tables.
52 MNEMONIC is a character to be displayed on mode line for the coding system.
53 ISO-NAME is the name of the ISO-8859 charset which corresponds to this
55 DECODER is a translation table for converting characters in the DOS codepage
56 encoding to Emacs multibyte characters.
57 ENCODER is a translation table for encoding Emacs multibyte characters into
58 external DOS codepage codes."
60 (let* ((coding-name (symbol-name coding
))
61 (undef (if (eq system-type
'ms-dos
)
62 (if dos-unsupported-char-glyph
63 (logand dos-unsupported-char-glyph
255)
66 (safe-chars (make-char-table 'safe-chars
))
69 ;; The 4 here supplies the buf_magnification parameter
70 ;; for the CCL program. A multibyte character may take
74 ((r0 = ,(charset-id 'ascii
))
75 (translate-character ,decoder r0 r1
)
76 (write-multibyte-character r0 r1
))
81 ;; The 2 here supplies the buf_magnification parameter for
82 ;; the CCL program. Since the -dos coding system generates
83 ;; \r\n for each \n, a factor of 2 covers even the worst case
84 ;; of empty lines with a single \n.
85 `(2 (loop (read-multibyte-character r0 r1
)
86 (if (r0 != ,(charset-id 'ascii
))
87 ((translate-character ,encoder r0 r1
)
88 (if (r0 == ,(charset-id 'japanese-jisx0208
))
91 (write-repeat r1
))))))
93 ;; Set elements of safe multibyte characters for this codepage
94 ;; to t in the char-table safe-chars.
95 (let ((tbl (get decoder
'translation-table
))
99 (setq ch
(aref tbl i
))
100 (if ch
(aset safe-chars ch t
))
103 ;; Make coding system CODING.
106 (concat "8-bit encoding of " (symbol-name iso-name
)
107 " characters using IBM codepage " coding-name
)
108 (cons ccl-decoder ccl-encoder
)
109 `((safe-charsets ascii eight-bit-control eight-bit-graphic
,iso-name
)
110 (safe-chars .
,safe-chars
)
111 (valid-codes (0 .
255)))))))
113 (defun cp-decoding-vector-for-codepage (table charset offset
)
114 "Create a vector for decoding IBM PC characters using conversion table
115 TABLE into an ISO-8859 character set CHARSET whose first non-ASCII
116 character is generated by (make-char CHARSET OFFSET)."
117 (let* ((len (length table
))
119 (if (eq system-type
'ms-dos
)
120 (if dos-unsupported-char-glyph
121 (logand dos-unsupported-char-glyph
255)
124 (vec1 (make-vector 256 undefined-char
))
132 (aset vec1
(aref table i
) (make-char charset
(+ i offset
))))
136 ;;; You don't think I created all these tables below by hand, do you?
137 ;;; The following Awk script will create the table for cp850-to-Latin-1
138 ;;; conversion from the RFC 1345 file (the other tables are left as an
140 ;;; BEGIN { n_pages = 11;
141 ;;; pn["IBM437"] = 0; pn["IBM850"] = 1; pn["IBM851"] = 2;
142 ;;; pn["IBM852"] = 3; pn["IBM855"] = 4; pn["IBM860"] = 5;
143 ;;; pn["IBM861"] = 6; pn["IBM862"] = 7; pn["IBM863"] = 8;
144 ;;; pn["IBM864"] = 9; pn["IBM865"] = 10;
146 ;;; $1 == "&charset" { charset = $2; }
147 ;;; $1 == "&code" { code = $2; }
149 ;;; if ((charset ~ /^IBM(437|8(5[0125]|6[0-5]))$/) || (charset ~ /^ISO_8859-1/))
151 ;;; for (i = 1; i <= NF; i++)
152 ;;; chars[charset,code++] = $i;
157 ;;; for (i = 160; i < 256; i++)
159 ;;; c = chars["ISO_8859-1:1987",i];
160 ;;; if (c == "??") # skip unused positions
163 ;;; if ((i - 159)%16 == 0)
170 ;;; for (combined in chars)
172 ;;; candidate = chars[combined];
173 ;;; split (combined, separate, SUBSEP);
174 ;;; if (separate[1] == "IBM850" && candidate == c)
177 ;;; map[separate[1]] = separate[2];
180 ;;; printf " %s", map["IBM850"];
181 ;;; if ((i - 159)%16 == 0)
186 ;;; WARNING WARNING WARNING!!!
188 ;;; If you want to get fancy with these tables, remember that the inverse
189 ;;; tables, created by `cp-decoding-vector-for-codepage' above, are installed
190 ;;; on MS-DOS as nonascii-translation-table (see `dos-codepage-setup' on
191 ;;; internal.el). Therefore, you should NOT put any codes below 128 in
192 ;;; these tables! Otherwise, various Emacs commands and functions will
193 ;;; mysteriously fail! For example, a typical screwup is to map the Latin-N
194 ;;; acute accent character to the apostrophe, and have all regexps which
195 ;;; end with "\\'" begin to fail (e.g., the automatic setting of the major
196 ;;; mode by file name extension will stop working).
198 ;;; You HAVE BEEN warned!
200 ;; US/English/PC-8/IBM-2. This doesn't support Latin-1 characters very
201 ;; well, but why not use what we can salvage?
202 (defvar cp437-decode-table
203 ;; Nth element is the code of a cp437 glyph for the multibyte
204 ;; character created by (make-char 'latin-iso8859-1 (+ N 160)).
205 ;; The element nil means there's no corresponding cp437 glyph.
207 255 173 155 156 nil
157 179 nil nil nil
166 174 170 196 nil nil
208 248 241 253 nil nil nil nil
249 nil nil
167 175 172 171 nil
168
209 nil nil nil nil
142 143 146 128 nil
144 nil nil nil nil nil nil
210 nil
165 nil nil nil nil
153 nil nil nil nil nil
154 nil nil
225
211 133 160 131 nil
132 134 145 135 138 130 136 137 141 161 140 139
212 nil
164 149 162 147 nil
148 246 nil
151 163 150 129 nil nil
152]
213 "Table for converting ISO-8859-1 characters into codepage 437 glyphs.")
214 (setplist 'cp437-decode-table
215 '(charset latin-iso8859-1 language
"Latin-1" offset
160))
217 ;; Multilingual (Latin-1)
218 (defvar cp850-decode-table
219 ;; Nth element is the code of a cp850 glyph for the multibyte
220 ;; character created by (make-char 'latin-iso8859-1 (+ N 160)).
221 ;; The element nil means there's no corresponding cp850 glyph.
223 255 173 189 156 207 190 221 245 249 184 166 174 170 240 169 238
224 248 241 253 252 239 230 244 250 247 251 167 175 172 171 243 168
225 183 181 182 199 142 143 146 128 212 144 210 211 222 214 215 216
226 209 165 227 224 226 229 153 158 157 235 233 234 154 237 232 225
227 133 160 131 198 132 134 145 135 138 130 136 137 141 161 140 139
228 208 164 149 162 147 228 148 246 155 151 163 150 129 236 231 152]
229 "Table for converting ISO-8859-1 characters into codepage 850 glyphs.")
230 (setplist 'cp850-decode-table
231 '(charset latin-iso8859-1 language
"Latin-1" offset
160))
233 ;; Multilingual (Latin-9)
234 (defvar cp858-decode-table
235 ;; Nth element is the code of a cp858 glyph for the multibyte
236 ;; character created by (make-char 'latin-iso8859-15 (+ N 160)).
237 ;; The element nil means there's no corresponding cp858 glyph.
239 255 173 189 156 213 190 221 245 249 184 166 174 170 240 169 238
240 248 241 253 252 239 230 244 250 247 251 167 175 172 171 243 168
241 183 181 182 199 142 143 146 128 212 144 210 211 222 214 215 216
242 209 165 227 224 226 229 153 158 157 235 233 234 154 237 232 225
243 133 160 131 198 132 134 145 135 138 130 136 137 141 161 140 139
244 208 164 149 162 147 228 148 246 155 151 163 150 129 236 231 152]
245 "Table for converting ISO-8859-15 characters into codepage 858 glyphs.")
246 (setplist 'cp858-decode-table
247 '(charset latin-iso8859-15 language
"Latin-9" offset
160))
250 (defvar cp851-decode-table
252 255 nil nil
156 nil nil nil
245 249 nil nil
174 nil
240 nil nil
253 248 241 nil nil
239 nil
134 nil
141 143 144 175 146 171 149 152
254 161 164 165 166 167 168 169 170 172 173 181 182 184 183 189 190
255 198 199 nil
207 208 209 210 211 212 213 nil nil
155 157 158 159
256 252 214 215 216 221 222 224 225 226 227 228 229 230 231 232 233
257 234 235 237 236 238 242 243 244 246 250 160 251 162 163 253 nil
]
258 "Table for converting ISO-8859-7 characters into codepage 851 glyphs.")
259 (setplist 'cp851-decode-table
260 '(charset greek-iso8859-7 language
"Greek" offset
160))
262 ;; Slavic/Eastern Europe (Latin-2)
263 (defvar cp852-decode-table
265 255 164 244 157 207 149 151 245 249 230 184 155 141 240 166 189
266 248 165 247 136 239 150 152 243 242 231 173 156 171 241 167 190
267 232 181 182 198 142 145 143 128 172 144 168 211 183 214 215 210
268 209 227 213 224 226 138 153 158 252 222 233 235 154 237 221 225
269 234 160 131 199 132 146 134 135 159 130 169 137 216 161 140 212
270 208 228 229 162 147 139 148 246 253 133 163 251 129 236 238 250]
271 "Table for converting ISO-8859-2 characters into codepage 852 glyphs.")
272 (setplist 'cp852-decode-table
273 '(charset latin-iso8859-2 language
"Latin-2" offset
160))
276 (defvar cp855-decode-table
278 255 133 129 131 135 137 139 141 143 145 147 149 151 240 153 155
279 161 163 236 173 167 169 234 244 184 190 199 209 211 213 215 221
280 226 228 230 232 171 182 165 252 246 250 159 242 238 248 157 224
281 160 162 235 172 166 168 233 243 183 189 198 208 210 212 214 216
282 225 227 229 231 170 181 164 251 245 249 158 241 237 247 156 222
283 239 132 128 130 134 136 138 140 142 144 146 148 150 253 152 154]
284 "Table for converting ISO-8859-5 characters into codepage 855 glyphs.")
285 (setplist 'cp855-decode-table
286 '(charset cyrillic-iso8859-5 language
"Cyrillic-ISO" offset
160))
289 (defvar cp857-decode-table
291 255 nil nil
156 207 nil
245 249 152 158 166 nil
240 nil
292 248 nil
253 252 239 nil nil nil nil
141 159 167 nil
171 nil
293 183 181 182 142 nil nil
128 212 144 210 211 222 214 215 216
294 165 227 224 226 nil
153 232 nil
235 233 234 154 nil nil
225
295 133 160 131 132 nil nil
135 138 130 136 137 236 161 140 139
296 164 149 162 147 nil
148 246 nil
151 163 150 129 nil nil
250]
297 "Table for converting ISO-8859-3 characters into codepage 857 glyphs.")
298 (setplist 'cp857-decode-table
299 '(charset latin-iso8859-3 language
"Latin-3" offset
160))
302 (defvar cp860-decode-table
304 255 173 155 156 nil nil
179 nil nil nil
166 174 170 nil nil nil
305 nil
241 253 nil nil nil nil
249 nil nil
167 175 172 171 nil
168
306 145 134 143 142 nil nil nil
128 146 144 137 nil
152 nil
139 nil
307 nil
165 159 169 140 153 nil nil nil
157 150 nil
154 nil nil nil
308 133 160 131 132 nil nil nil
135 138 130 136 nil
141 161 nil nil
309 nil
164 149 162 147 148 nil
246 nil
151 163 nil
129 nil nil nil
]
310 "Table for converting ISO-8859-1 characters into codepage 860 glyphs.")
311 (setplist 'cp860-decode-table
312 '(charset latin-iso8859-1 language
"Latin-1" offset
160))
315 (defvar cp861-decode-table
317 255 173 nil
156 nil nil nil nil nil nil nil
174 170 nil nil nil
318 nil
241 253 nil nil nil nil
249 nil nil nil
175 172 171 nil
168
319 nil
164 nil nil
142 143 146 128 nil
144 nil nil nil
165 nil nil
320 139 nil
159 166 nil nil
153 nil
157 nil
167 nil
154 151 141 nil
321 133 160 131 nil
132 134 145 135 138 130 136 137 nil
161 nil nil
322 140 nil nil
162 147 nil
148 246 155 nil
163 150 129 152 149 nil
]
323 "Table for converting ISO-8859-1 characters into codepage 861 glyphs.")
324 (setplist 'cp861-decode-table
325 '(charset latin-iso8859-1 language
"Latin-1" offset
160))
328 (defvar cp862-decode-table
329 ;; Nth element is the code of a cp862 glyph for the multibyte
330 ;; character created by (make-char 'hebrew-iso8859-8 (+ N 160)).
331 ;; The element nil means there's no corresponding cp862 glyph.
333 255 173 155 156 nil
157 179 nil nil nil nil
174 170 196 nil nil
334 248 241 253 nil nil
230 nil
249 nil nil
246 175 172 171 nil nil
335 nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
336 nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
205
337 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
338 144 145 146 147 148 149 150 151 152 153 154 nil nil nil nil nil
]
339 "Table for converting ISO-8859-8 characters into codepage 862 glyphs.")
340 (setplist 'cp862-decode-table
341 '(charset hebrew-iso8859-8 language
"Hebrew" offset
160))
344 (defvar cp863-decode-table
346 255 nil
155 156 152 nil
160 143 164 nil nil
174 170 nil nil
167
347 nil
241 253 166 161 nil
134 249 165 nil nil
175 172 171 173 nil
348 142 nil
132 nil nil nil nil
128 145 144 146 148 nil nil
168 149
349 nil nil nil nil
153 nil nil nil nil
157 nil
158 154 nil nil nil
350 133 nil
131 nil nil nil nil
135 138 130 136 137 141 nil
140 139
351 nil nil nil
162 147 nil nil
246 nil
151 163 150 129 nil nil nil
]
352 "Table for converting ISO-8859-1 characters into codepage 863 glyphs.")
353 (setplist 'cp863-decode-table
354 '(charset latin-iso8859-1 language
"Latin-1" offset
160))
357 ;; FIXME: Emacs doesn't seem to support the "Arabic" language
358 ;; environment yet. So this is only partially usable, for now
359 (defvar cp864-decode-table
361 255 nil nil nil
164 nil nil nil nil nil nil nil
172 161 nil nil
362 nil nil nil nil nil nil nil nil nil nil nil
187 nil nil nil
191
363 nil
193 194 195 196 nil
198 199 169 201 170 171 173 174 175 207
364 208 209 210 188 189 190 235 215 216 223 238 nil nil nil nil nil
365 224 247 248 252 251 239 242 243 232 233 253 nil nil nil nil nil
366 nil
241 nil nil nil nil nil nil nil nil nil nil nil nil nil nil
]
367 "Table for converting ISO-8859-6 characters into codepage 864 glyphs.")
368 (setplist 'cp864-decode-table
369 '(charset arabic-iso8859-6 language nil offset
160))
371 ;; Arabic OEM codepage used by Windows
372 ;; FIXME: Emacs doesn't seem to support the "Arabic" language
373 ;; environment yet. So this is only partially usable, for now
374 (defvar cp720-decode-table
376 255 nil nil nil
148 nil nil nil nil nil nil nil nil
196 nil nil
377 nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
378 nil
152 153 154 155 157 158 159 160 161 162 163 164 165 166 167
379 168 169 170 171 172 173 224 225 226 227 228 nil nil nil nil nil
380 149 229 231 232 233 234 235 236 237 238 239 241 242 243 244 245
381 246 145 146 nil nil nil nil nil nil nil nil nil nil nil nil nil
]
382 "Table for converting ISO-8859-6 characters into codepage 720 glyphs.")
383 (setplist 'cp720-decode-table
384 '(charset arabic-iso8859-6 language nil offset
160))
387 ;; Nordic (Norwegian/Danish)
388 (defvar cp865-decode-table
390 255 173 nil
156 nil nil nil nil nil nil
166 174 170 nil nil nil
391 nil
241 253 nil nil nil nil
249 nil nil
167 175 172 171 nil
168
392 nil nil nil nil
142 143 146 128 nil
144 nil nil nil nil nil nil
393 nil
165 nil nil nil nil
153 nil
157 nil nil nil
154 nil nil nil
394 133 160 131 nil
132 134 145 135 138 130 136 137 141 161 140 139
395 nil
164 149 162 147 nil
148 246 155 151 163 150 129 nil nil
152]
396 "Table for converting ISO-8859-1 characters into codepage 865 glyphs.")
397 (setplist 'cp865-decode-table
398 '(charset latin-iso8859-1 language
"Latin-1" offset
160))
400 ;; Russian (Yes, another one! This one's supposed to be used
401 ;; on Windows as the Russian OEM code page.)
402 (defvar cp866-decode-table
404 255 240 nil nil
242 nil nil
244 nil nil nil nil nil nil
246 nil
405 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
406 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
407 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
408 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
409 252 241 nil nil
243 nil nil
245 nil nil nil nil nil nil
247 nil
]
410 "Table for converting ISO-8859-5 characters into codepage 866 glyphs.")
411 (setplist 'cp866-decode-table
412 '(charset cyrillic-iso8859-5 language
"Cyrillic-ISO" offset
160))
414 ;; Greek (yes, another one!)
415 (defvar cp869-decode-table
417 255 139 140 156 nil nil
138 245 249 151 nil
174 137 240 nil
142
418 248 241 153 154 239 247 134 136 141 143 144 175 146 171 149 152
419 161 164 165 166 167 168 169 170 172 173 181 182 183 184 189 190
420 198 199 nil
207 208 209 210 211 212 213 145 150 155 157 158 159
421 252 214 215 216 221 222 224 225 226 227 228 229 230 231 232 233
422 234 235 237 236 238 242 243 244 246 250 160 251 162 163 253 nil
]
423 "Table for converting ISO-8859-7 characters into codepage 869 glyphs.")
424 (setplist 'cp869-decode-table
425 '(charset greek-iso8859-7 language
"Greek" offset
160))
427 ;; Greek OEM codepage used by Windows
428 (defvar cp737-decode-table
430 255 nil nil nil nil nil
179 nil nil nil nil nil nil
196 nil nil
431 248 241 253 nil nil nil
234 250 235 236 237 nil
238 nil
239 240
432 nil
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
433 143 144 nil
145 146 147 148 149 150 151 244 245 225 226 227 229
434 nil
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
435 167 168 170 169 171 172 173 174 175 224 228 232 230 231 233 nil
]
436 "Table for converting ISO-8859-7 characters into codepage 737 glyphs.")
437 (setplist 'cp737-decode-table
438 '(charset greek-iso8859-7 language
"Greek" offset
160))
440 ;; Conversion from codepages 770-775 to Latin-4 for Baltic countries.
441 ;; FIXME: Once we support Latin-7, these should be remapped into it.
442 (defvar cp770-decode-table
444 255 143 nil nil
155 nil
156 nil
157 159 137 168 nil
196 146 nil
445 248 133 nil nil nil nil
134 nil nil
158 136 152 nil nil
145 nil
446 160 nil nil nil
142 nil nil
173 128 nil
139 nil
144 nil nil
161
447 nil nil nil
163 nil
149 153 nil nil
167 nil nil
154 nil
166 225
448 131 nil nil nil
132 nil nil
141 135 nil
138 nil
130 nil nil
140
449 nil nil nil
162 nil
147 148 247 nil
151 nil nil
129 nil
150 nil
]
450 "Table for converting ISO-8859-4 characters into codepage 770 glyphs.")
451 (setplist 'cp770-decode-table
452 '(charset latin-iso8859-4 language
"Latin-4" offset
160))
454 (defvar cp773-decode-table
456 255 220 nil
138 150 nil
234 190 166 246 237 149 173 196 252 nil
457 208 nil nil
139 239 nil
235 nil nil
247 137 133 136 nil
253 nil
458 160 nil nil nil
142 143 146 244 222 144 240 nil
242 nil nil
161
459 nil
238 226 232 nil
229 153 158 157 248 nil nil
154 nil
250 225
460 131 nil nil nil
132 134 145 245 223 130 241 nil
243 nil nil
140
461 nil
236 147 233 nil
228 148 198 155 249 nil nil
129 nil
251 nil
]
462 "Table for converting ISO-8859-4 characters into codepage 773 glyphs.")
463 (setplist 'cp773-decode-table
464 '(charset latin-iso8859-4 language
"Latin-4" offset
160))
466 (defvar cp774-decode-table
468 255 181 nil nil
155 nil nil nil
245 190 nil nil nil
196 207 nil
469 248 208 nil nil nil nil nil nil nil
213 nil nil nil nil
216 nil
470 nil nil nil nil
142 143 146 189 182 144 183 nil
184 nil nil nil
471 nil nil nil nil nil nil
153 nil nil
198 nil nil
154 nil
199 225
472 nil
160 nil nil
132 134 145 212 209 130 210 137 211 161 140 nil
473 nil nil nil nil
147 nil
148 246 237 214 163 150 129 nil
215 248]
474 "Table for converting ISO-8859-4 characters into codepage 774 glyphs.")
475 (setplist 'cp774-decode-table
476 '(charset latin-iso8859-4 language
"Latin-4" offset
160))
478 (defvar cp775-decode-table
480 255 181 nil
138 150 nil
234 245 166 190 237 149 173 240 207 nil
481 248 208 nil
139 239 nil
235 nil nil
213 137 133 136 nil
216 nil
482 160 nil nil nil
142 143 146 189 182 144 183 nil
184 nil nil
161
483 nil
238 226 232 nil
229 153 158 157 198 nil nil
154 nil
199 225
484 131 nil nil nil
132 134 145 212 209 130 210 nil
211 nil nil
140
485 nil
236 147 233 nil
228 148 247 155 214 nil nil
129 nil
215 nil
]
486 "Table for converting ISO-8859-4 characters into codepage 775 glyphs.")
487 (setplist 'cp775-decode-table
488 '(charset latin-iso8859-4 language
"Latin-4" offset
160))
490 ;; Support for the Windows 12xx series of codepages that MS has
491 ;; butchered from the ISO-8859 specs. This does not add support for
492 ;; the extended characters that MS has added in the 128 - 159 coding
493 ;; range, only translates those characters that can be expressed in
494 ;; the corresponding iso-8859 charset.
498 ;; Windows-1250: ISO-8859-2 (Central Europe) - differs in some positions
499 ;; Windows-1251: ISO-8859-5 (Cyrillic) - differs wildly
500 ;; Windows-1252: ISO-8859-1 (West Europe) - exact match
501 ;; Windows-1253: ISO-8859-7 (Greek) - differs in some positions
502 ;; Windows-1254: ISO-8859-9 (Turkish) - exact match
503 ;; Windows-1255: ISO-8859-8 (Hebrew) - exact match
504 ;; Windows-1256: ISO-8859-6 (Arabic) - half match
505 ;; Windows-1257: ISO-8859-4 (Baltic) - differs, future Latin-7
506 ;; Windows-1258: VISCII (Vietnamese) - Completely different
508 (defvar cp1250-decode-table
510 160 165 162 163 164 188 140 167 168 138 170 141 143 173 142 175
511 176 185 178 179 180 190 156 161 184 154 186 157 159 189 158 191
512 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
513 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
514 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
515 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 ]
516 "ISO-8859-2 to Windows-1250 (Central Europe) codepage decoding table.")
517 (setplist 'cp1250-decode-table
518 '(charset latin-iso8859-2 language
"Latin-2" offset
160))
520 (defvar cp1251-decode-table
522 160 168 128 129 170 189 178 175 163 138 140 142 141 173 161 143
523 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
524 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
525 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
526 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
527 185 184 144 131 186 190 179 191 188 154 156 158 157 167 162 159 ]
528 "ISO-8859-5 to Windows-1251 (Cyrillic) codepage decoding table.")
529 (setplist 'cp1251-decode-table
530 '(charset cyrillic-iso8859-5 language
"Cyrillic-ISO" offset
160))
532 ;; cp1253 is missing nbsp so we cannot quite translate perfectly. It
533 ;; also has two micro/mu characters which would require more complex
534 ;; processing to accomodate.
535 (defvar cp1253-decode-table
537 nil
145 146 163 nil nil
166 167 168 169 nil
171 172 173 nil
151
538 176 177 178 179 180 161 162 183 184 185 186 187 188 189 190 191
539 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
540 208 209 nil
211 212 213 214 215 216 217 218 219 220 221 222 223
541 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
542 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 nil
]
543 "ISO-8859-7 to Windows-1253 (Greek) codepage decoding table.")
544 (setplist 'cp1253-decode-table
545 '(charset greek-iso8859-7 language
"Greek" offset
160))
547 ;; Since Latin-7 is not yet official, and Emacs does not support it,
548 ;; provide translation between Windows-1257 and Latin-4 the best we
550 (defvar cp1257-decode-table
552 160 192 nil
170 164 nil
207 167 nil
208 199 204 nil
173 222 nil
553 176 224 nil
186 nil nil
239 nil nil
240 231 236 nil nil
254 nil
554 194 nil nil nil
196 197 175 193 200 201 198 nil
203 nil nil
206
555 nil
210 212 205 nil
213 214 215 168 216 nil nil
220 nil
219 223
556 226 nil nil nil
228 229 191 225 232 233 230 nil
235 nil nil
238
557 nil
242 244 237 nil
245 246 247 184 248 nil nil
252 nil
251 nil
]
558 "ISO-8859-4 to Windows-1257 (Baltic) codepage decoding table.")
559 (setplist 'cp1257-decode-table
560 '(charset latin-iso8859-4 language
"Latin-4" offset
160))
563 (defun cp-make-coding-systems-for-codepage (codepage iso-name offset
)
564 "Create a coding system to convert IBM CODEPAGE into charset ISO-NAME
565 whose first character is at offset OFFSET from the beginning of 8-bit
568 The created coding system has the usual 3 subsidiary systems: for Unix-,
569 DOS- and Mac-style EOL conversion. However, unlike built-in coding
570 systems, the Mac-style EOL conversion is currently not supported by the
571 decoder and encoder created by this function."
572 (let* ((decode-table (intern (format "%s-decode-table" codepage
)))
574 (intern (format "%s-nonascii-translation-table" codepage
)))
576 (intern (format "%s-decode-translation-table" codepage
)))
578 (intern (format "%s-encode-translation-table" codepage
))))
580 (make-translation-table-from-vector
581 (cp-decoding-vector-for-codepage
582 (symbol-value decode-table
) iso-name offset
)))
583 (define-translation-table encode-translation
584 (char-table-extra-slot (symbol-value nonascii-table
) 0))
585 ;; For charsets other than ascii, eight-bit-* and ISO-NAME, set
586 ;; `?' for one-column charsets, and some Japanese character for
587 ;; wide-column charsets. CCL encoder convert that Japanese
588 ;; character to either dos-unsupported-char-glyph or "??".
589 (let ((tbl (char-table-extra-slot (symbol-value nonascii-table
) 0))
590 (undef (if (eq system-type
'ms-dos
)
591 (if dos-unsupported-char-glyph
592 (logand dos-unsupported-char-glyph
255)
595 (charsets (delq 'ascii
596 (delq 'eight-bit-control
597 (delq 'eight-bit-graphic
599 (copy-sequence charset-list
))))))
600 (wide-column-char (make-char 'japanese-jisx0208
32 32)))
602 (aset tbl
(make-char (car charsets
))
603 (if (= (charset-width (car charsets
)) 1) undef wide-column-char
))
604 (setq charsets
(cdr charsets
))))
605 (define-translation-table decode-translation
606 (symbol-value nonascii-table
))
607 (cp-coding-system-for-codepage-1
608 (intern codepage
) ?D iso-name decode-translation encode-translation
)
611 (defun cp-codepage-decoder (codepage)
612 "If CODEPAGE is the name of a supported codepage, return its decode table.
613 Otherwise return nil."
614 (let ((cp (if (symbolp codepage
) (symbol-name codepage
) codepage
)))
617 (intern-soft (format "%s-decode-table" cp
)))
621 (defun cp-charset-for-codepage (codepage)
622 "Return the charset for which there is a translation table to DOS CODEPAGE.
623 CODEPAGE must be the name of a DOS codepage, a string."
624 (let ((cp-decoder (cp-codepage-decoder codepage
)))
625 (if (null cp-decoder
)
626 (error "Unsupported codepage %s" codepage
)
627 (get cp-decoder
'charset
))))
630 (defun cp-language-for-codepage (codepage)
631 "Return the name of the MULE language environment for CODEPAGE.
632 CODEPAGE must be the name of a DOS codepage, a string."
633 (let ((cp-decoder (cp-codepage-decoder codepage
)))
634 (if (null cp-decoder
)
635 (error "Unsupported codepage %s" codepage
)
636 (get cp-decoder
'language
))))
639 (defun cp-offset-for-codepage (codepage)
640 "Return the offset to be used in setting up coding systems for CODEPAGE.
641 CODEPAGE must be the name of a DOS codepage, a string."
642 (let ((cp-decoder (cp-codepage-decoder codepage
)))
643 (if (null cp-decoder
)
644 (error "Unsupported codepage %s" codepage
)
645 (get cp-decoder
'offset
))))
648 (defun cp-supported-codepages ()
649 "Return an alist of supported codepages.
651 Each association in the alist has the form (NNN . CHARSET), where NNN is the
652 codepage number, and CHARSET is the MULE charset which is the closest match
653 for the character set supported by that codepage.
655 A codepage NNN is supported if a variable called `cpNNN-decode-table' exists,
656 is a vector, and has a charset property."
658 (let (alist chset sname
)
662 (if (and (boundp sym
)
663 (string-match "\\`cp\\([1-9][0-9][0-9][0-9]?\\)-decode-table\\'"
664 (setq sname
(symbol-name sym
)))
665 (vectorp (symbol-value sym
))
666 (setq chset
(get sym
'charset
)))
668 (cons (cons (match-string 1 sname
) chset
) alist
))))))
672 (defun codepage-setup (codepage)
673 "Create a coding system cpCODEPAGE to support the IBM codepage CODEPAGE.
675 These coding systems are meant for encoding and decoding 8-bit non-ASCII
676 characters used by the IBM codepages, typically in conjunction with files
677 read/written by MS-DOS software, or for display on the MS-DOS terminal."
679 (let ((completion-ignore-case t
)
680 (candidates (cp-supported-codepages)))
681 (list (completing-read "Setup DOS Codepage (default 437): " candidates
682 nil t nil nil
"437"))))
683 (let* ((cp (format "cp%s" codepage
))
684 (cp-defined (intern-soft cp
)))
685 (or (and cp-defined
;; avoid defining if already defined
686 (coding-system-p cp-defined
))
687 (cp-make-coding-systems-for-codepage
688 cp
(cp-charset-for-codepage cp
) (cp-offset-for-codepage cp
)))))
690 ;; Add DOS codepages to `non-iso-charset-alist'.
691 (eval-after-load "mule-diag"
692 '(let ((tail (cp-supported-codepages))
695 (setq elt
(car tail
) tail
(cdr tail
))
696 ;; Now ELT is (CODEPAGE . CHARSET), where CODEPAGE is a string
697 ;; (e.g. "850"), CHARSET is a charset that characters in CODEPAGE
699 (unless (assq (intern (concat "cp" (car elt
))) non-iso-charset-alist
)
700 (setq non-iso-charset-alist
701 (cons (list (intern (concat "cp" (car elt
)))
702 (list 'ascii
(cdr elt
))
704 (decode-codepage-char ,(string-to-int (car elt
))
707 non-iso-charset-alist
))))))
711 ;;; arch-tag: 80328de8-b94e-4386-be26-5876105731f0
712 ;;; codepage.el ends here