Auto-commit of generated files.
[emacs.git] / src / charset.c
blobe7435c292e2ad8a6622afb235ef555d1bfddbc81
1 /* Basic character set support.
2 Copyright (C) 2001-2011 Free Software Foundation, Inc.
3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
8 Copyright (C) 2003, 2004
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
12 This file is part of GNU Emacs.
14 GNU Emacs is free software: you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or
17 (at your option) any later version.
19 GNU Emacs is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27 #include <config.h>
29 #include <stdio.h>
30 #include <unistd.h>
31 #include <ctype.h>
32 #include <limits.h>
33 #include <sys/types.h>
34 #include <setjmp.h>
35 #include "lisp.h"
36 #include "character.h"
37 #include "charset.h"
38 #include "coding.h"
39 #include "disptab.h"
40 #include "buffer.h"
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
57 /* Hash table that contains attributes of each charset. Keys are
58 charset symbols, and values are vectors of charset attributes. */
59 Lisp_Object Vcharset_hash_table;
61 /* Table of struct charset. */
62 struct charset *charset_table;
64 static int charset_table_size;
65 static int charset_table_used;
67 Lisp_Object Qcharsetp;
69 /* Special charset symbols. */
70 Lisp_Object Qascii;
71 static Lisp_Object Qeight_bit;
72 static Lisp_Object Qiso_8859_1;
73 static Lisp_Object Qunicode;
74 static Lisp_Object Qemacs;
76 /* The corresponding charsets. */
77 int charset_ascii;
78 int charset_eight_bit;
79 static int charset_iso_8859_1;
80 int charset_unicode;
81 static int charset_emacs;
83 /* The other special charsets. */
84 int charset_jisx0201_roman;
85 int charset_jisx0208_1978;
86 int charset_jisx0208;
87 int charset_ksc5601;
89 /* Value of charset attribute `charset-iso-plane'. */
90 static Lisp_Object Qgl, Qgr;
92 /* Charset of unibyte characters. */
93 int charset_unibyte;
95 /* List of charsets ordered by the priority. */
96 Lisp_Object Vcharset_ordered_list;
98 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
99 charsets. */
100 Lisp_Object Vcharset_non_preferred_head;
102 /* Incremented everytime we change Vcharset_ordered_list. This is
103 unsigned short so that it fits in Lisp_Int and never matches
104 -1. */
105 unsigned short charset_ordered_list_tick;
107 /* List of iso-2022 charsets. */
108 Lisp_Object Viso_2022_charset_list;
110 /* List of emacs-mule charsets. */
111 Lisp_Object Vemacs_mule_charset_list;
113 int emacs_mule_charset[256];
115 /* Mapping table from ISO2022's charset (specified by DIMENSION,
116 CHARS, and FINAL-CHAR) to Emacs' charset. */
117 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
119 #define CODE_POINT_TO_INDEX(charset, code) \
120 ((charset)->code_linear_p \
121 ? (code) - (charset)->min_code \
122 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
123 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
124 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
125 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
126 ? (((((code) >> 24) - (charset)->code_space[12]) \
127 * (charset)->code_space[11]) \
128 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
129 * (charset)->code_space[7]) \
130 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
131 * (charset)->code_space[3]) \
132 + (((code) & 0xFF) - (charset)->code_space[0]) \
133 - ((charset)->char_index_offset)) \
134 : -1)
137 /* Convert the character index IDX to code-point CODE for CHARSET.
138 It is assumed that IDX is in a valid range. */
140 #define INDEX_TO_CODE_POINT(charset, idx) \
141 ((charset)->code_linear_p \
142 ? (idx) + (charset)->min_code \
143 : (idx += (charset)->char_index_offset, \
144 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
145 | (((charset)->code_space[4] \
146 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
147 << 8) \
148 | (((charset)->code_space[8] \
149 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
150 << 16) \
151 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
152 << 24))))
154 /* Structure to hold mapping tables for a charset. Used by temacs
155 invoked for dumping. */
157 static struct
159 /* The current charset for which the following tables are setup. */
160 struct charset *current;
162 /* 1 iff the following table is used for encoder. */
163 short for_encoder;
165 /* When the following table is used for encoding, mininum and
166 maxinum character of the current charset. */
167 int min_char, max_char;
169 /* A Unicode character correspoinding to the code indice 0 (i.e. the
170 minimum code-point) of the current charset, or -1 if the code
171 indice 0 is not a Unicode character. This is checked when
172 table.encoder[CHAR] is zero. */
173 int zero_index_char;
175 union {
176 /* Table mapping code-indices (not code-points) of the current
177 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
178 doesn't belong to the current charset. */
179 int decoder[0x10000];
180 /* Table mapping Unicode characters to code-indices of the current
181 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
182 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
183 (0x20000..0x2FFFF). Note that there is no charset map that
184 uses both SMP and SIP. */
185 unsigned short encoder[0x20000];
186 } table;
187 } *temp_charset_work;
189 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
190 do { \
191 if ((CODE) == 0) \
192 temp_charset_work->zero_index_char = (C); \
193 else if ((C) < 0x20000) \
194 temp_charset_work->table.encoder[(C)] = (CODE); \
195 else \
196 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
197 } while (0)
199 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
200 ((C) == temp_charset_work->zero_index_char ? 0 \
201 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
202 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
203 : temp_charset_work->table.encoder[(C) - 0x10000] \
204 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
206 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
207 (temp_charset_work->table.decoder[(CODE)] = (C))
209 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
210 (temp_charset_work->table.decoder[(CODE)])
213 /* Set to 1 to warn that a charset map is loaded and thus a buffer
214 text and a string data may be relocated. */
215 int charset_map_loaded;
217 struct charset_map_entries
219 struct {
220 unsigned from, to;
221 int c;
222 } entry[0x10000];
223 struct charset_map_entries *next;
226 /* Load the mapping information of CHARSET from ENTRIES for
227 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
228 encoding (CONTROL_FLAG == 2).
230 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
231 and CHARSET->fast_map.
233 If CONTROL_FLAG is 1, setup the following tables according to
234 CHARSET->method and inhibit_load_charset_map.
236 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
237 ----------------------+--------------------+---------------------------
238 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
239 ----------------------+--------------------+---------------------------
240 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
242 If CONTROL_FLAG is 2, setup the following tables.
244 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
245 ----------------------+--------------------+---------------------------
246 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
247 ----------------------+--------------------+--------------------------
248 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
251 static void
252 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
254 Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil);
255 unsigned max_code = CHARSET_MAX_CODE (charset);
256 int ascii_compatible_p = charset->ascii_compatible_p;
257 int min_char, max_char, nonascii_min_char;
258 int i;
259 unsigned char *fast_map = charset->fast_map;
261 if (n_entries <= 0)
262 return;
264 if (control_flag)
266 if (! inhibit_load_charset_map)
268 if (control_flag == 1)
270 if (charset->method == CHARSET_METHOD_MAP)
272 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
274 vec = CHARSET_DECODER (charset)
275 = Fmake_vector (make_number (n), make_number (-1));
277 else
279 char_table_set_range (Vchar_unify_table,
280 charset->min_char, charset->max_char,
281 Qnil);
284 else
286 table = Fmake_char_table (Qnil, Qnil);
287 if (charset->method == CHARSET_METHOD_MAP)
288 CHARSET_ENCODER (charset) = table;
289 else
290 CHARSET_DEUNIFIER (charset) = table;
293 else
295 if (! temp_charset_work)
296 temp_charset_work = xmalloc (sizeof (*temp_charset_work));
297 if (control_flag == 1)
299 memset (temp_charset_work->table.decoder, -1,
300 sizeof (int) * 0x10000);
302 else
304 memset (temp_charset_work->table.encoder, 0,
305 sizeof (unsigned short) * 0x20000);
306 temp_charset_work->zero_index_char = -1;
308 temp_charset_work->current = charset;
309 temp_charset_work->for_encoder = (control_flag == 2);
310 control_flag += 2;
312 charset_map_loaded = 1;
315 min_char = max_char = entries->entry[0].c;
316 nonascii_min_char = MAX_CHAR;
317 for (i = 0; i < n_entries; i++)
319 unsigned from, to;
320 int from_index, to_index, lim_index;
321 int from_c, to_c;
322 int idx = i % 0x10000;
324 if (i > 0 && idx == 0)
325 entries = entries->next;
326 from = entries->entry[idx].from;
327 to = entries->entry[idx].to;
328 from_c = entries->entry[idx].c;
329 from_index = CODE_POINT_TO_INDEX (charset, from);
330 if (from == to)
332 to_index = from_index;
333 to_c = from_c;
335 else
337 to_index = CODE_POINT_TO_INDEX (charset, to);
338 to_c = from_c + (to_index - from_index);
340 if (from_index < 0 || to_index < 0)
341 continue;
342 lim_index = to_index + 1;
344 if (to_c > max_char)
345 max_char = to_c;
346 else if (from_c < min_char)
347 min_char = from_c;
349 if (control_flag == 1)
351 if (charset->method == CHARSET_METHOD_MAP)
352 for (; from_index < lim_index; from_index++, from_c++)
353 ASET (vec, from_index, make_number (from_c));
354 else
355 for (; from_index < lim_index; from_index++, from_c++)
356 CHAR_TABLE_SET (Vchar_unify_table,
357 CHARSET_CODE_OFFSET (charset) + from_index,
358 make_number (from_c));
360 else if (control_flag == 2)
362 if (charset->method == CHARSET_METHOD_MAP
363 && CHARSET_COMPACT_CODES_P (charset))
364 for (; from_index < lim_index; from_index++, from_c++)
366 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
368 if (NILP (CHAR_TABLE_REF (table, from_c)))
369 CHAR_TABLE_SET (table, from_c, make_number (code));
371 else
372 for (; from_index < lim_index; from_index++, from_c++)
374 if (NILP (CHAR_TABLE_REF (table, from_c)))
375 CHAR_TABLE_SET (table, from_c, make_number (from_index));
378 else if (control_flag == 3)
379 for (; from_index < lim_index; from_index++, from_c++)
380 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
381 else if (control_flag == 4)
382 for (; from_index < lim_index; from_index++, from_c++)
383 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
384 else /* control_flag == 0 */
386 if (ascii_compatible_p)
388 if (! ASCII_BYTE_P (from_c))
390 if (from_c < nonascii_min_char)
391 nonascii_min_char = from_c;
393 else if (! ASCII_BYTE_P (to_c))
395 nonascii_min_char = 0x80;
399 for (; from_c <= to_c; from_c++)
400 CHARSET_FAST_MAP_SET (from_c, fast_map);
404 if (control_flag == 0)
406 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
407 ? nonascii_min_char : min_char);
408 CHARSET_MAX_CHAR (charset) = max_char;
410 else if (control_flag == 4)
412 temp_charset_work->min_char = min_char;
413 temp_charset_work->max_char = max_char;
418 /* Read a hexadecimal number (preceded by "0x") from the file FP while
419 paying attention to comment character '#'. */
421 static INLINE unsigned
422 read_hex (FILE *fp, int *eof)
424 int c;
425 unsigned n;
427 while ((c = getc (fp)) != EOF)
429 if (c == '#')
431 while ((c = getc (fp)) != EOF && c != '\n');
433 else if (c == '0')
435 if ((c = getc (fp)) == EOF || c == 'x')
436 break;
439 if (c == EOF)
441 *eof = 1;
442 return 0;
444 *eof = 0;
445 n = 0;
446 if (c == 'x')
447 while ((c = getc (fp)) != EOF && isxdigit (c))
448 n = ((n << 4)
449 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
450 else
451 while ((c = getc (fp)) != EOF && isdigit (c))
452 n = (n * 10) + c - '0';
453 if (c != EOF)
454 ungetc (c, fp);
455 return n;
458 /* Return a mapping vector for CHARSET loaded from MAPFILE.
459 Each line of MAPFILE has this form
460 0xAAAA 0xCCCC
461 where 0xAAAA is a code-point and 0xCCCC is the corresponding
462 character code, or this form
463 0xAAAA-0xBBBB 0xCCCC
464 where 0xAAAA and 0xBBBB are code-points specifying a range, and
465 0xCCCC is the first character code of the range.
467 The returned vector has this form:
468 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
469 where CODE1 is a code-point or a cons of code-points specifying a
470 range.
472 Note that this function uses `openp' to open MAPFILE but ignores
473 `file-name-handler-alist' to avoid running any Lisp code. */
475 static void
476 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int control_flag)
478 unsigned min_code = CHARSET_MIN_CODE (charset);
479 unsigned max_code = CHARSET_MAX_CODE (charset);
480 int fd;
481 FILE *fp;
482 int eof;
483 Lisp_Object suffixes;
484 struct charset_map_entries *head, *entries;
485 int n_entries, count;
486 USE_SAFE_ALLOCA;
488 suffixes = Fcons (build_string (".map"),
489 Fcons (build_string (".TXT"), Qnil));
491 count = SPECPDL_INDEX ();
492 specbind (Qfile_name_handler_alist, Qnil);
493 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
494 unbind_to (count, Qnil);
495 if (fd < 0
496 || ! (fp = fdopen (fd, "r")))
497 error ("Failure in loading charset map: %s", SDATA (mapfile));
499 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
500 large (larger than MAX_ALLOCA). */
501 SAFE_ALLOCA (head, struct charset_map_entries *,
502 sizeof (struct charset_map_entries));
503 entries = head;
504 memset (entries, 0, sizeof (struct charset_map_entries));
506 n_entries = 0;
507 eof = 0;
508 while (1)
510 unsigned from, to;
511 int c;
512 int idx;
514 from = read_hex (fp, &eof);
515 if (eof)
516 break;
517 if (getc (fp) == '-')
518 to = read_hex (fp, &eof);
519 else
520 to = from;
521 c = (int) read_hex (fp, &eof);
523 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
524 continue;
526 if (n_entries > 0 && (n_entries % 0x10000) == 0)
528 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
529 sizeof (struct charset_map_entries));
530 entries = entries->next;
531 memset (entries, 0, sizeof (struct charset_map_entries));
533 idx = n_entries % 0x10000;
534 entries->entry[idx].from = from;
535 entries->entry[idx].to = to;
536 entries->entry[idx].c = c;
537 n_entries++;
539 fclose (fp);
541 load_charset_map (charset, head, n_entries, control_flag);
542 SAFE_FREE ();
545 static void
546 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
548 unsigned min_code = CHARSET_MIN_CODE (charset);
549 unsigned max_code = CHARSET_MAX_CODE (charset);
550 struct charset_map_entries *head, *entries;
551 int n_entries;
552 int len = ASIZE (vec);
553 int i;
554 USE_SAFE_ALLOCA;
556 if (len % 2 == 1)
558 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
559 return;
562 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
563 large (larger than MAX_ALLOCA). */
564 SAFE_ALLOCA (head, struct charset_map_entries *,
565 sizeof (struct charset_map_entries));
566 entries = head;
567 memset (entries, 0, sizeof (struct charset_map_entries));
569 n_entries = 0;
570 for (i = 0; i < len; i += 2)
572 Lisp_Object val, val2;
573 unsigned from, to;
574 int c;
575 int idx;
577 val = AREF (vec, i);
578 if (CONSP (val))
580 val2 = XCDR (val);
581 val = XCAR (val);
582 CHECK_NATNUM (val);
583 CHECK_NATNUM (val2);
584 from = XFASTINT (val);
585 to = XFASTINT (val2);
587 else
589 CHECK_NATNUM (val);
590 from = to = XFASTINT (val);
592 val = AREF (vec, i + 1);
593 CHECK_NATNUM (val);
594 c = XFASTINT (val);
596 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
597 continue;
599 if (n_entries > 0 && (n_entries % 0x10000) == 0)
601 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
602 sizeof (struct charset_map_entries));
603 entries = entries->next;
604 memset (entries, 0, sizeof (struct charset_map_entries));
606 idx = n_entries % 0x10000;
607 entries->entry[idx].from = from;
608 entries->entry[idx].to = to;
609 entries->entry[idx].c = c;
610 n_entries++;
613 load_charset_map (charset, head, n_entries, control_flag);
614 SAFE_FREE ();
618 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
619 map it is (see the comment of load_charset_map for the detail). */
621 static void
622 load_charset (struct charset *charset, int control_flag)
624 Lisp_Object map;
626 if (inhibit_load_charset_map
627 && temp_charset_work
628 && charset == temp_charset_work->current
629 && ((control_flag == 2) == temp_charset_work->for_encoder))
630 return;
632 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
633 map = CHARSET_MAP (charset);
634 else
636 if (! CHARSET_UNIFIED_P (charset))
637 abort ();
638 map = CHARSET_UNIFY_MAP (charset);
640 if (STRINGP (map))
641 load_charset_map_from_file (charset, map, control_flag);
642 else
643 load_charset_map_from_vector (charset, map, control_flag);
647 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
648 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
649 (Lisp_Object object)
651 return (CHARSETP (object) ? Qt : Qnil);
655 static void
656 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
657 Lisp_Object function, Lisp_Object arg,
658 unsigned int from, unsigned int to)
660 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
661 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
662 Lisp_Object range;
663 int c, stop;
664 struct gcpro gcpro1;
666 range = Fcons (Qnil, Qnil);
667 GCPRO1 (range);
669 c = temp_charset_work->min_char;
670 stop = (temp_charset_work->max_char < 0x20000
671 ? temp_charset_work->max_char : 0xFFFF);
673 while (1)
675 int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
677 if (idx >= from_idx && idx <= to_idx)
679 if (NILP (XCAR (range)))
680 XSETCAR (range, make_number (c));
682 else if (! NILP (XCAR (range)))
684 XSETCDR (range, make_number (c - 1));
685 if (c_function)
686 (*c_function) (arg, range);
687 else
688 call2 (function, range, arg);
689 XSETCAR (range, Qnil);
691 if (c == stop)
693 if (c == temp_charset_work->max_char)
695 if (! NILP (XCAR (range)))
697 XSETCDR (range, make_number (c));
698 if (c_function)
699 (*c_function) (arg, range);
700 else
701 call2 (function, range, arg);
703 break;
705 c = 0x1FFFF;
706 stop = temp_charset_work->max_char;
708 c++;
710 UNGCPRO;
713 void
714 map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
715 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
717 Lisp_Object range;
718 int partial;
720 partial = (from > CHARSET_MIN_CODE (charset)
721 || to < CHARSET_MAX_CODE (charset));
723 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
725 int from_idx = CODE_POINT_TO_INDEX (charset, from);
726 int to_idx = CODE_POINT_TO_INDEX (charset, to);
727 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
728 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
730 if (CHARSET_UNIFIED_P (charset))
732 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
733 load_charset (charset, 2);
734 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
735 map_char_table_for_charset (c_function, function,
736 CHARSET_DEUNIFIER (charset), arg,
737 partial ? charset : NULL, from, to);
738 else
739 map_charset_for_dump (c_function, function, arg, from, to);
742 range = Fcons (make_number (from_c), make_number (to_c));
743 if (NILP (function))
744 (*c_function) (arg, range);
745 else
746 call2 (function, range, arg);
748 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
750 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
751 load_charset (charset, 2);
752 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
753 map_char_table_for_charset (c_function, function,
754 CHARSET_ENCODER (charset), arg,
755 partial ? charset : NULL, from, to);
756 else
757 map_charset_for_dump (c_function, function, arg, from, to);
759 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
761 Lisp_Object subset_info;
762 int offset;
764 subset_info = CHARSET_SUBSET (charset);
765 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
766 offset = XINT (AREF (subset_info, 3));
767 from -= offset;
768 if (from < XFASTINT (AREF (subset_info, 1)))
769 from = XFASTINT (AREF (subset_info, 1));
770 to -= offset;
771 if (to > XFASTINT (AREF (subset_info, 2)))
772 to = XFASTINT (AREF (subset_info, 2));
773 map_charset_chars (c_function, function, arg, charset, from, to);
775 else /* i.e. CHARSET_METHOD_SUPERSET */
777 Lisp_Object parents;
779 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
780 parents = XCDR (parents))
782 int offset;
783 unsigned this_from, this_to;
785 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
786 offset = XINT (XCDR (XCAR (parents)));
787 this_from = from > offset ? from - offset : 0;
788 this_to = to > offset ? to - offset : 0;
789 if (this_from < CHARSET_MIN_CODE (charset))
790 this_from = CHARSET_MIN_CODE (charset);
791 if (this_to > CHARSET_MAX_CODE (charset))
792 this_to = CHARSET_MAX_CODE (charset);
793 map_charset_chars (c_function, function, arg, charset,
794 this_from, this_to);
799 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
800 doc: /* Call FUNCTION for all characters in CHARSET.
801 FUNCTION is called with an argument RANGE and the optional 3rd
802 argument ARG.
804 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
805 characters contained in CHARSET.
807 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
808 range of code points (in CHARSET) of target characters. */)
809 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
811 struct charset *cs;
812 unsigned from, to;
814 CHECK_CHARSET_GET_CHARSET (charset, cs);
815 if (NILP (from_code))
816 from = CHARSET_MIN_CODE (cs);
817 else
819 CHECK_NATNUM (from_code);
820 from = XINT (from_code);
821 if (from < CHARSET_MIN_CODE (cs))
822 from = CHARSET_MIN_CODE (cs);
824 if (NILP (to_code))
825 to = CHARSET_MAX_CODE (cs);
826 else
828 CHECK_NATNUM (to_code);
829 to = XINT (to_code);
830 if (to > CHARSET_MAX_CODE (cs))
831 to = CHARSET_MAX_CODE (cs);
833 map_charset_chars (NULL, function, arg, cs, from, to);
834 return Qnil;
838 /* Define a charset according to the arguments. The Nth argument is
839 the Nth attribute of the charset (the last attribute `charset-id'
840 is not included). See the docstring of `define-charset' for the
841 detail. */
843 DEFUN ("define-charset-internal", Fdefine_charset_internal,
844 Sdefine_charset_internal, charset_arg_max, MANY, 0,
845 doc: /* For internal use only.
846 usage: (define-charset-internal ...) */)
847 (size_t nargs, Lisp_Object *args)
849 /* Charset attr vector. */
850 Lisp_Object attrs;
851 Lisp_Object val;
852 unsigned hash_code;
853 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
854 int i, j;
855 struct charset charset;
856 int id;
857 int dimension;
858 int new_definition_p;
859 int nchars;
861 if (nargs != charset_arg_max)
862 return Fsignal (Qwrong_number_of_arguments,
863 Fcons (intern ("define-charset-internal"),
864 make_number (nargs)));
866 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
868 CHECK_SYMBOL (args[charset_arg_name]);
869 ASET (attrs, charset_name, args[charset_arg_name]);
871 val = args[charset_arg_code_space];
872 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
874 int min_byte, max_byte;
876 min_byte = XINT (Faref (val, make_number (i * 2)));
877 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
878 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
879 error ("Invalid :code-space value");
880 charset.code_space[i * 4] = min_byte;
881 charset.code_space[i * 4 + 1] = max_byte;
882 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
883 nchars *= charset.code_space[i * 4 + 2];
884 charset.code_space[i * 4 + 3] = nchars;
885 if (max_byte > 0)
886 dimension = i + 1;
889 val = args[charset_arg_dimension];
890 if (NILP (val))
891 charset.dimension = dimension;
892 else
894 CHECK_NATNUM (val);
895 charset.dimension = XINT (val);
896 if (charset.dimension < 1 || charset.dimension > 4)
897 args_out_of_range_3 (val, make_number (1), make_number (4));
900 charset.code_linear_p
901 = (charset.dimension == 1
902 || (charset.code_space[2] == 256
903 && (charset.dimension == 2
904 || (charset.code_space[6] == 256
905 && (charset.dimension == 3
906 || charset.code_space[10] == 256)))));
908 if (! charset.code_linear_p)
910 charset.code_space_mask = (unsigned char *) xmalloc (256);
911 memset (charset.code_space_mask, 0, 256);
912 for (i = 0; i < 4; i++)
913 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
914 j++)
915 charset.code_space_mask[j] |= (1 << i);
918 charset.iso_chars_96 = charset.code_space[2] == 96;
920 charset.min_code = (charset.code_space[0]
921 | (charset.code_space[4] << 8)
922 | (charset.code_space[8] << 16)
923 | (charset.code_space[12] << 24));
924 charset.max_code = (charset.code_space[1]
925 | (charset.code_space[5] << 8)
926 | (charset.code_space[9] << 16)
927 | (charset.code_space[13] << 24));
928 charset.char_index_offset = 0;
930 val = args[charset_arg_min_code];
931 if (! NILP (val))
933 unsigned code;
935 if (INTEGERP (val))
936 code = XINT (val);
937 else
939 CHECK_CONS (val);
940 CHECK_NUMBER_CAR (val);
941 CHECK_NUMBER_CDR (val);
942 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
944 if (code < charset.min_code
945 || code > charset.max_code)
946 args_out_of_range_3 (make_number (charset.min_code),
947 make_number (charset.max_code), val);
948 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
949 charset.min_code = code;
952 val = args[charset_arg_max_code];
953 if (! NILP (val))
955 unsigned code;
957 if (INTEGERP (val))
958 code = XINT (val);
959 else
961 CHECK_CONS (val);
962 CHECK_NUMBER_CAR (val);
963 CHECK_NUMBER_CDR (val);
964 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
966 if (code < charset.min_code
967 || code > charset.max_code)
968 args_out_of_range_3 (make_number (charset.min_code),
969 make_number (charset.max_code), val);
970 charset.max_code = code;
973 charset.compact_codes_p = charset.max_code < 0x10000;
975 val = args[charset_arg_invalid_code];
976 if (NILP (val))
978 if (charset.min_code > 0)
979 charset.invalid_code = 0;
980 else
982 XSETINT (val, charset.max_code + 1);
983 if (XINT (val) == charset.max_code + 1)
984 charset.invalid_code = charset.max_code + 1;
985 else
986 error ("Attribute :invalid-code must be specified");
989 else
991 CHECK_NATNUM (val);
992 charset.invalid_code = XFASTINT (val);
995 val = args[charset_arg_iso_final];
996 if (NILP (val))
997 charset.iso_final = -1;
998 else
1000 CHECK_NUMBER (val);
1001 if (XINT (val) < '0' || XINT (val) > 127)
1002 error ("Invalid iso-final-char: %"pEd, XINT (val));
1003 charset.iso_final = XINT (val);
1006 val = args[charset_arg_iso_revision];
1007 if (NILP (val))
1008 charset.iso_revision = -1;
1009 else
1011 CHECK_NUMBER (val);
1012 if (XINT (val) > 63)
1013 args_out_of_range (make_number (63), val);
1014 charset.iso_revision = XINT (val);
1017 val = args[charset_arg_emacs_mule_id];
1018 if (NILP (val))
1019 charset.emacs_mule_id = -1;
1020 else
1022 CHECK_NATNUM (val);
1023 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1024 error ("Invalid emacs-mule-id: %"pEd, XINT (val));
1025 charset.emacs_mule_id = XINT (val);
1028 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1030 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1032 charset.unified_p = 0;
1034 memset (charset.fast_map, 0, sizeof (charset.fast_map));
1036 if (! NILP (args[charset_arg_code_offset]))
1038 val = args[charset_arg_code_offset];
1039 CHECK_NUMBER (val);
1041 charset.method = CHARSET_METHOD_OFFSET;
1042 charset.code_offset = XINT (val);
1044 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1045 charset.min_char = i + charset.code_offset;
1046 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1047 charset.max_char = i + charset.code_offset;
1048 if (charset.max_char > MAX_CHAR)
1049 error ("Unsupported max char: %d", charset.max_char);
1051 i = (charset.min_char >> 7) << 7;
1052 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1053 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1054 i = (i >> 12) << 12;
1055 for (; i <= charset.max_char; i += 0x1000)
1056 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1057 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1058 charset.ascii_compatible_p = 1;
1060 else if (! NILP (args[charset_arg_map]))
1062 val = args[charset_arg_map];
1063 ASET (attrs, charset_map, val);
1064 charset.method = CHARSET_METHOD_MAP;
1066 else if (! NILP (args[charset_arg_subset]))
1068 Lisp_Object parent;
1069 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1070 struct charset *parent_charset;
1072 val = args[charset_arg_subset];
1073 parent = Fcar (val);
1074 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1075 parent_min_code = Fnth (make_number (1), val);
1076 CHECK_NATNUM (parent_min_code);
1077 parent_max_code = Fnth (make_number (2), val);
1078 CHECK_NATNUM (parent_max_code);
1079 parent_code_offset = Fnth (make_number (3), val);
1080 CHECK_NUMBER (parent_code_offset);
1081 val = Fmake_vector (make_number (4), Qnil);
1082 ASET (val, 0, make_number (parent_charset->id));
1083 ASET (val, 1, parent_min_code);
1084 ASET (val, 2, parent_max_code);
1085 ASET (val, 3, parent_code_offset);
1086 ASET (attrs, charset_subset, val);
1088 charset.method = CHARSET_METHOD_SUBSET;
1089 /* Here, we just copy the parent's fast_map. It's not accurate,
1090 but at least it works for quickly detecting which character
1091 DOESN'T belong to this charset. */
1092 for (i = 0; i < 190; i++)
1093 charset.fast_map[i] = parent_charset->fast_map[i];
1095 /* We also copy these for parents. */
1096 charset.min_char = parent_charset->min_char;
1097 charset.max_char = parent_charset->max_char;
1099 else if (! NILP (args[charset_arg_superset]))
1101 val = args[charset_arg_superset];
1102 charset.method = CHARSET_METHOD_SUPERSET;
1103 val = Fcopy_sequence (val);
1104 ASET (attrs, charset_superset, val);
1106 charset.min_char = MAX_CHAR;
1107 charset.max_char = 0;
1108 for (; ! NILP (val); val = Fcdr (val))
1110 Lisp_Object elt, car_part, cdr_part;
1111 int this_id, offset;
1112 struct charset *this_charset;
1114 elt = Fcar (val);
1115 if (CONSP (elt))
1117 car_part = XCAR (elt);
1118 cdr_part = XCDR (elt);
1119 CHECK_CHARSET_GET_ID (car_part, this_id);
1120 CHECK_NUMBER (cdr_part);
1121 offset = XINT (cdr_part);
1123 else
1125 CHECK_CHARSET_GET_ID (elt, this_id);
1126 offset = 0;
1128 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1130 this_charset = CHARSET_FROM_ID (this_id);
1131 if (charset.min_char > this_charset->min_char)
1132 charset.min_char = this_charset->min_char;
1133 if (charset.max_char < this_charset->max_char)
1134 charset.max_char = this_charset->max_char;
1135 for (i = 0; i < 190; i++)
1136 charset.fast_map[i] |= this_charset->fast_map[i];
1139 else
1140 error ("None of :code-offset, :map, :parents are specified");
1142 val = args[charset_arg_unify_map];
1143 if (! NILP (val) && !STRINGP (val))
1144 CHECK_VECTOR (val);
1145 ASET (attrs, charset_unify_map, val);
1147 CHECK_LIST (args[charset_arg_plist]);
1148 ASET (attrs, charset_plist, args[charset_arg_plist]);
1150 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1151 &hash_code);
1152 if (charset.hash_index >= 0)
1154 new_definition_p = 0;
1155 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1156 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1158 else
1160 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1161 hash_code);
1162 if (charset_table_used == charset_table_size)
1164 struct charset *new_table
1165 = (struct charset *) xmalloc (sizeof (struct charset)
1166 * (charset_table_size + 16));
1167 memcpy (new_table, charset_table,
1168 sizeof (struct charset) * charset_table_size);
1169 charset_table_size += 16;
1170 charset_table = new_table;
1172 id = charset_table_used++;
1173 new_definition_p = 1;
1176 ASET (attrs, charset_id, make_number (id));
1177 charset.id = id;
1178 charset_table[id] = charset;
1180 if (charset.method == CHARSET_METHOD_MAP)
1182 load_charset (&charset, 0);
1183 charset_table[id] = charset;
1186 if (charset.iso_final >= 0)
1188 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1189 charset.iso_final) = id;
1190 if (new_definition_p)
1191 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1192 Fcons (make_number (id), Qnil));
1193 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1194 charset_jisx0201_roman = id;
1195 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1196 charset_jisx0208_1978 = id;
1197 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1198 charset_jisx0208 = id;
1199 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1200 charset_ksc5601 = id;
1203 if (charset.emacs_mule_id >= 0)
1205 emacs_mule_charset[charset.emacs_mule_id] = id;
1206 if (charset.emacs_mule_id < 0xA0)
1207 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1208 else
1209 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1210 if (new_definition_p)
1211 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1212 Fcons (make_number (id), Qnil));
1215 if (new_definition_p)
1217 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1218 if (charset.supplementary_p)
1219 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1220 Fcons (make_number (id), Qnil));
1221 else
1223 Lisp_Object tail;
1225 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1227 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1229 if (cs->supplementary_p)
1230 break;
1232 if (EQ (tail, Vcharset_ordered_list))
1233 Vcharset_ordered_list = Fcons (make_number (id),
1234 Vcharset_ordered_list);
1235 else if (NILP (tail))
1236 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1237 Fcons (make_number (id), Qnil));
1238 else
1240 val = Fcons (XCAR (tail), XCDR (tail));
1241 XSETCDR (tail, val);
1242 XSETCAR (tail, make_number (id));
1245 charset_ordered_list_tick++;
1248 return Qnil;
1252 /* Same as Fdefine_charset_internal but arguments are more convenient
1253 to call from C (typically in syms_of_charset). This can define a
1254 charset of `offset' method only. Return the ID of the new
1255 charset. */
1257 static int
1258 define_charset_internal (Lisp_Object name,
1259 int dimension,
1260 const char *code_space_chars,
1261 unsigned min_code, unsigned max_code,
1262 int iso_final, int iso_revision, int emacs_mule_id,
1263 int ascii_compatible, int supplementary,
1264 int code_offset)
1266 const unsigned char *code_space = (const unsigned char *) code_space_chars;
1267 Lisp_Object args[charset_arg_max];
1268 Lisp_Object plist[14];
1269 Lisp_Object val;
1270 int i;
1272 args[charset_arg_name] = name;
1273 args[charset_arg_dimension] = make_number (dimension);
1274 val = Fmake_vector (make_number (8), make_number (0));
1275 for (i = 0; i < 8; i++)
1276 ASET (val, i, make_number (code_space[i]));
1277 args[charset_arg_code_space] = val;
1278 args[charset_arg_min_code] = make_number (min_code);
1279 args[charset_arg_max_code] = make_number (max_code);
1280 args[charset_arg_iso_final]
1281 = (iso_final < 0 ? Qnil : make_number (iso_final));
1282 args[charset_arg_iso_revision] = make_number (iso_revision);
1283 args[charset_arg_emacs_mule_id]
1284 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1285 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1286 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1287 args[charset_arg_invalid_code] = Qnil;
1288 args[charset_arg_code_offset] = make_number (code_offset);
1289 args[charset_arg_map] = Qnil;
1290 args[charset_arg_subset] = Qnil;
1291 args[charset_arg_superset] = Qnil;
1292 args[charset_arg_unify_map] = Qnil;
1294 plist[0] = intern_c_string (":name");
1295 plist[1] = args[charset_arg_name];
1296 plist[2] = intern_c_string (":dimension");
1297 plist[3] = args[charset_arg_dimension];
1298 plist[4] = intern_c_string (":code-space");
1299 plist[5] = args[charset_arg_code_space];
1300 plist[6] = intern_c_string (":iso-final-char");
1301 plist[7] = args[charset_arg_iso_final];
1302 plist[8] = intern_c_string (":emacs-mule-id");
1303 plist[9] = args[charset_arg_emacs_mule_id];
1304 plist[10] = intern_c_string (":ascii-compatible-p");
1305 plist[11] = args[charset_arg_ascii_compatible_p];
1306 plist[12] = intern_c_string (":code-offset");
1307 plist[13] = args[charset_arg_code_offset];
1309 args[charset_arg_plist] = Flist (14, plist);
1310 Fdefine_charset_internal (charset_arg_max, args);
1312 return XINT (CHARSET_SYMBOL_ID (name));
1316 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1317 Sdefine_charset_alias, 2, 2, 0,
1318 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1319 (Lisp_Object alias, Lisp_Object charset)
1321 Lisp_Object attr;
1323 CHECK_CHARSET_GET_ATTR (charset, attr);
1324 Fputhash (alias, attr, Vcharset_hash_table);
1325 Vcharset_list = Fcons (alias, Vcharset_list);
1326 return Qnil;
1330 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1331 doc: /* Return the property list of CHARSET. */)
1332 (Lisp_Object charset)
1334 Lisp_Object attrs;
1336 CHECK_CHARSET_GET_ATTR (charset, attrs);
1337 return CHARSET_ATTR_PLIST (attrs);
1341 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1342 doc: /* Set CHARSET's property list to PLIST. */)
1343 (Lisp_Object charset, Lisp_Object plist)
1345 Lisp_Object attrs;
1347 CHECK_CHARSET_GET_ATTR (charset, attrs);
1348 CHARSET_ATTR_PLIST (attrs) = plist;
1349 return plist;
1353 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1354 doc: /* Unify characters of CHARSET with Unicode.
1355 This means reading the relevant file and installing the table defined
1356 by CHARSET's `:unify-map' property.
1358 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1359 the same meaning as the `:unify-map' attribute in the function
1360 `define-charset' (which see).
1362 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1363 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
1365 int id;
1366 struct charset *cs;
1368 CHECK_CHARSET_GET_ID (charset, id);
1369 cs = CHARSET_FROM_ID (id);
1370 if (NILP (deunify)
1371 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1372 : ! CHARSET_UNIFIED_P (cs))
1373 return Qnil;
1375 CHARSET_UNIFIED_P (cs) = 0;
1376 if (NILP (deunify))
1378 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1379 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1380 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1381 if (NILP (unify_map))
1382 unify_map = CHARSET_UNIFY_MAP (cs);
1383 else
1385 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1386 signal_error ("Bad unify-map", unify_map);
1387 CHARSET_UNIFY_MAP (cs) = unify_map;
1389 if (NILP (Vchar_unify_table))
1390 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1391 char_table_set_range (Vchar_unify_table,
1392 cs->min_char, cs->max_char, charset);
1393 CHARSET_UNIFIED_P (cs) = 1;
1395 else if (CHAR_TABLE_P (Vchar_unify_table))
1397 int min_code = CHARSET_MIN_CODE (cs);
1398 int max_code = CHARSET_MAX_CODE (cs);
1399 int min_char = DECODE_CHAR (cs, min_code);
1400 int max_char = DECODE_CHAR (cs, max_code);
1402 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1405 return Qnil;
1408 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1409 Sget_unused_iso_final_char, 2, 2, 0,
1410 doc: /*
1411 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1412 DIMENSION is the number of bytes to represent a character: 1 or 2.
1413 CHARS is the number of characters in a dimension: 94 or 96.
1415 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1416 If there's no unused final char for the specified kind of charset,
1417 return nil. */)
1418 (Lisp_Object dimension, Lisp_Object chars)
1420 int final_char;
1422 CHECK_NUMBER (dimension);
1423 CHECK_NUMBER (chars);
1424 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1425 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1426 if (XINT (chars) != 94 && XINT (chars) != 96)
1427 args_out_of_range_3 (chars, make_number (94), make_number (96));
1428 for (final_char = '0'; final_char <= '?'; final_char++)
1429 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1430 break;
1431 return (final_char <= '?' ? make_number (final_char) : Qnil);
1434 static void
1435 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
1437 CHECK_NATNUM (dimension);
1438 CHECK_NATNUM (chars);
1439 CHECK_CHARACTER (final_char);
1441 if (XINT (dimension) > 3)
1442 error ("Invalid DIMENSION %"pEd", it should be 1, 2, or 3",
1443 XINT (dimension));
1444 if (XINT (chars) != 94 && XINT (chars) != 96)
1445 error ("Invalid CHARS %"pEd", it should be 94 or 96", XINT (chars));
1446 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1447 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'",
1448 (int)XINT (final_char));
1452 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1453 4, 4, 0,
1454 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1456 On decoding by an ISO-2022 base coding system, when a charset
1457 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1458 if CHARSET is designated instead. */)
1459 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
1461 int id;
1462 int chars_flag;
1464 CHECK_CHARSET_GET_ID (charset, id);
1465 check_iso_charset_parameter (dimension, chars, final_char);
1466 chars_flag = XINT (chars) == 96;
1467 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1468 return Qnil;
1472 /* Return information about charsets in the text at PTR of NBYTES
1473 bytes, which are NCHARS characters. The value is:
1475 0: Each character is represented by one byte. This is always
1476 true for a unibyte string. For a multibyte string, true if
1477 it contains only ASCII characters.
1479 1: No charsets other than ascii, control-1, and latin-1 are
1480 found.
1482 2: Otherwise.
1486 string_xstring_p (Lisp_Object string)
1488 const unsigned char *p = SDATA (string);
1489 const unsigned char *endp = p + SBYTES (string);
1491 if (SCHARS (string) == SBYTES (string))
1492 return 0;
1494 while (p < endp)
1496 int c = STRING_CHAR_ADVANCE (p);
1498 if (c >= 0x100)
1499 return 2;
1501 return 1;
1505 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1507 CHARSETS is a vector. If Nth element is non-nil, it means the
1508 charset whose id is N is already found.
1510 It may lookup a translation table TABLE if supplied. */
1512 static void
1513 find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
1515 const unsigned char *pend = ptr + nbytes;
1517 if (nchars == nbytes)
1519 if (multibyte)
1520 ASET (charsets, charset_ascii, Qt);
1521 else
1522 while (ptr < pend)
1524 int c = *ptr++;
1526 if (!NILP (table))
1527 c = translate_char (table, c);
1528 if (ASCII_BYTE_P (c))
1529 ASET (charsets, charset_ascii, Qt);
1530 else
1531 ASET (charsets, charset_eight_bit, Qt);
1534 else
1536 while (ptr < pend)
1538 int c = STRING_CHAR_ADVANCE (ptr);
1539 struct charset *charset;
1541 if (!NILP (table))
1542 c = translate_char (table, c);
1543 charset = CHAR_CHARSET (c);
1544 ASET (charsets, CHARSET_ID (charset), Qt);
1549 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1550 2, 3, 0,
1551 doc: /* Return a list of charsets in the region between BEG and END.
1552 BEG and END are buffer positions.
1553 Optional arg TABLE if non-nil is a translation table to look up.
1555 If the current buffer is unibyte, the returned list may contain
1556 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1557 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
1559 Lisp_Object charsets;
1560 EMACS_INT from, from_byte, to, stop, stop_byte;
1561 int i;
1562 Lisp_Object val;
1563 int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1565 validate_region (&beg, &end);
1566 from = XFASTINT (beg);
1567 stop = to = XFASTINT (end);
1569 if (from < GPT && GPT < to)
1571 stop = GPT;
1572 stop_byte = GPT_BYTE;
1574 else
1575 stop_byte = CHAR_TO_BYTE (stop);
1577 from_byte = CHAR_TO_BYTE (from);
1579 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1580 while (1)
1582 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1583 stop_byte - from_byte, charsets, table,
1584 multibyte);
1585 if (stop < to)
1587 from = stop, from_byte = stop_byte;
1588 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1590 else
1591 break;
1594 val = Qnil;
1595 for (i = charset_table_used - 1; i >= 0; i--)
1596 if (!NILP (AREF (charsets, i)))
1597 val = Fcons (CHARSET_NAME (charset_table + i), val);
1598 return val;
1601 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1602 1, 2, 0,
1603 doc: /* Return a list of charsets in STR.
1604 Optional arg TABLE if non-nil is a translation table to look up.
1606 If STR is unibyte, the returned list may contain
1607 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1608 (Lisp_Object str, Lisp_Object table)
1610 Lisp_Object charsets;
1611 int i;
1612 Lisp_Object val;
1614 CHECK_STRING (str);
1616 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1617 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1618 charsets, table,
1619 STRING_MULTIBYTE (str));
1620 val = Qnil;
1621 for (i = charset_table_used - 1; i >= 0; i--)
1622 if (!NILP (AREF (charsets, i)))
1623 val = Fcons (CHARSET_NAME (charset_table + i), val);
1624 return val;
1629 /* Return a unified character code for C (>= 0x110000). VAL is a
1630 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1631 charset symbol. */
1633 maybe_unify_char (int c, Lisp_Object val)
1635 struct charset *charset;
1637 if (INTEGERP (val))
1638 return XINT (val);
1639 if (NILP (val))
1640 return c;
1642 CHECK_CHARSET_GET_CHARSET (val, charset);
1643 load_charset (charset, 1);
1644 if (! inhibit_load_charset_map)
1646 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1647 if (! NILP (val))
1648 c = XINT (val);
1650 else
1652 int code_index = c - CHARSET_CODE_OFFSET (charset);
1653 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1655 if (unified > 0)
1656 c = unified;
1658 return c;
1662 /* Return a character correponding to the code-point CODE of
1663 CHARSET. */
1666 decode_char (struct charset *charset, unsigned int code)
1668 int c, char_index;
1669 enum charset_method method = CHARSET_METHOD (charset);
1671 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1672 return -1;
1674 if (method == CHARSET_METHOD_SUBSET)
1676 Lisp_Object subset_info;
1678 subset_info = CHARSET_SUBSET (charset);
1679 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1680 code -= XINT (AREF (subset_info, 3));
1681 if (code < XFASTINT (AREF (subset_info, 1))
1682 || code > XFASTINT (AREF (subset_info, 2)))
1683 c = -1;
1684 else
1685 c = DECODE_CHAR (charset, code);
1687 else if (method == CHARSET_METHOD_SUPERSET)
1689 Lisp_Object parents;
1691 parents = CHARSET_SUPERSET (charset);
1692 c = -1;
1693 for (; CONSP (parents); parents = XCDR (parents))
1695 int id = XINT (XCAR (XCAR (parents)));
1696 int code_offset = XINT (XCDR (XCAR (parents)));
1697 unsigned this_code = code - code_offset;
1699 charset = CHARSET_FROM_ID (id);
1700 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1701 break;
1704 else
1706 char_index = CODE_POINT_TO_INDEX (charset, code);
1707 if (char_index < 0)
1708 return -1;
1710 if (method == CHARSET_METHOD_MAP)
1712 Lisp_Object decoder;
1714 decoder = CHARSET_DECODER (charset);
1715 if (! VECTORP (decoder))
1717 load_charset (charset, 1);
1718 decoder = CHARSET_DECODER (charset);
1720 if (VECTORP (decoder))
1721 c = XINT (AREF (decoder, char_index));
1722 else
1723 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1725 else /* method == CHARSET_METHOD_OFFSET */
1727 c = char_index + CHARSET_CODE_OFFSET (charset);
1728 if (CHARSET_UNIFIED_P (charset)
1729 && c > MAX_UNICODE_CHAR)
1730 MAYBE_UNIFY_CHAR (c);
1734 return c;
1737 /* Variable used temporarily by the macro ENCODE_CHAR. */
1738 Lisp_Object charset_work;
1740 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1741 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1742 use CHARSET's strict_max_char instead of max_char. */
1744 unsigned
1745 encode_char (struct charset *charset, int c)
1747 unsigned code;
1748 enum charset_method method = CHARSET_METHOD (charset);
1750 if (CHARSET_UNIFIED_P (charset))
1752 Lisp_Object deunifier;
1753 int code_index = -1;
1755 deunifier = CHARSET_DEUNIFIER (charset);
1756 if (! CHAR_TABLE_P (deunifier))
1758 load_charset (charset, 2);
1759 deunifier = CHARSET_DEUNIFIER (charset);
1761 if (CHAR_TABLE_P (deunifier))
1763 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1765 if (INTEGERP (deunified))
1766 code_index = XINT (deunified);
1768 else
1770 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1772 if (code_index >= 0)
1773 c = CHARSET_CODE_OFFSET (charset) + code_index;
1776 if (method == CHARSET_METHOD_SUBSET)
1778 Lisp_Object subset_info;
1779 struct charset *this_charset;
1781 subset_info = CHARSET_SUBSET (charset);
1782 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1783 code = ENCODE_CHAR (this_charset, c);
1784 if (code == CHARSET_INVALID_CODE (this_charset)
1785 || code < XFASTINT (AREF (subset_info, 1))
1786 || code > XFASTINT (AREF (subset_info, 2)))
1787 return CHARSET_INVALID_CODE (charset);
1788 code += XINT (AREF (subset_info, 3));
1789 return code;
1792 if (method == CHARSET_METHOD_SUPERSET)
1794 Lisp_Object parents;
1796 parents = CHARSET_SUPERSET (charset);
1797 for (; CONSP (parents); parents = XCDR (parents))
1799 int id = XINT (XCAR (XCAR (parents)));
1800 int code_offset = XINT (XCDR (XCAR (parents)));
1801 struct charset *this_charset = CHARSET_FROM_ID (id);
1803 code = ENCODE_CHAR (this_charset, c);
1804 if (code != CHARSET_INVALID_CODE (this_charset))
1805 return code + code_offset;
1807 return CHARSET_INVALID_CODE (charset);
1810 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1811 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1812 return CHARSET_INVALID_CODE (charset);
1814 if (method == CHARSET_METHOD_MAP)
1816 Lisp_Object encoder;
1817 Lisp_Object val;
1819 encoder = CHARSET_ENCODER (charset);
1820 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1822 load_charset (charset, 2);
1823 encoder = CHARSET_ENCODER (charset);
1825 if (CHAR_TABLE_P (encoder))
1827 val = CHAR_TABLE_REF (encoder, c);
1828 if (NILP (val))
1829 return CHARSET_INVALID_CODE (charset);
1830 code = XINT (val);
1831 if (! CHARSET_COMPACT_CODES_P (charset))
1832 code = INDEX_TO_CODE_POINT (charset, code);
1834 else
1836 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1837 code = INDEX_TO_CODE_POINT (charset, code);
1840 else /* method == CHARSET_METHOD_OFFSET */
1842 int code_index = c - CHARSET_CODE_OFFSET (charset);
1844 code = INDEX_TO_CODE_POINT (charset, code_index);
1847 return code;
1851 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1852 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1853 Return nil if CODE-POINT is not valid in CHARSET.
1855 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1857 Optional argument RESTRICTION specifies a way to map the pair of CCS
1858 and CODE-POINT to a character. Currently not supported and just ignored. */)
1859 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
1861 int c, id;
1862 unsigned code;
1863 struct charset *charsetp;
1865 CHECK_CHARSET_GET_ID (charset, id);
1866 if (CONSP (code_point))
1868 CHECK_NATNUM_CAR (code_point);
1869 CHECK_NATNUM_CDR (code_point);
1870 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1872 else
1874 CHECK_NATNUM (code_point);
1875 code = XINT (code_point);
1877 charsetp = CHARSET_FROM_ID (id);
1878 c = DECODE_CHAR (charsetp, code);
1879 return (c >= 0 ? make_number (c) : Qnil);
1883 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1884 doc: /* Encode the character CH into a code-point of CHARSET.
1885 Return nil if CHARSET doesn't include CH.
1887 Optional argument RESTRICTION specifies a way to map CH to a
1888 code-point in CCS. Currently not supported and just ignored. */)
1889 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
1891 int id;
1892 unsigned code;
1893 struct charset *charsetp;
1895 CHECK_CHARSET_GET_ID (charset, id);
1896 CHECK_NATNUM (ch);
1897 charsetp = CHARSET_FROM_ID (id);
1898 code = ENCODE_CHAR (charsetp, XINT (ch));
1899 if (code == CHARSET_INVALID_CODE (charsetp))
1900 return Qnil;
1901 if (code > 0x7FFFFFF)
1902 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1903 return make_number (code);
1907 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1908 doc:
1909 /* Return a character of CHARSET whose position codes are CODEn.
1911 CODE1 through CODE4 are optional, but if you don't supply sufficient
1912 position codes, it is assumed that the minimum code in each dimension
1913 is specified. */)
1914 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
1916 int id, dimension;
1917 struct charset *charsetp;
1918 unsigned code;
1919 int c;
1921 CHECK_CHARSET_GET_ID (charset, id);
1922 charsetp = CHARSET_FROM_ID (id);
1924 dimension = CHARSET_DIMENSION (charsetp);
1925 if (NILP (code1))
1926 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1927 ? 0 : CHARSET_MIN_CODE (charsetp));
1928 else
1930 CHECK_NATNUM (code1);
1931 if (XFASTINT (code1) >= 0x100)
1932 args_out_of_range (make_number (0xFF), code1);
1933 code = XFASTINT (code1);
1935 if (dimension > 1)
1937 code <<= 8;
1938 if (NILP (code2))
1939 code |= charsetp->code_space[(dimension - 2) * 4];
1940 else
1942 CHECK_NATNUM (code2);
1943 if (XFASTINT (code2) >= 0x100)
1944 args_out_of_range (make_number (0xFF), code2);
1945 code |= XFASTINT (code2);
1948 if (dimension > 2)
1950 code <<= 8;
1951 if (NILP (code3))
1952 code |= charsetp->code_space[(dimension - 3) * 4];
1953 else
1955 CHECK_NATNUM (code3);
1956 if (XFASTINT (code3) >= 0x100)
1957 args_out_of_range (make_number (0xFF), code3);
1958 code |= XFASTINT (code3);
1961 if (dimension > 3)
1963 code <<= 8;
1964 if (NILP (code4))
1965 code |= charsetp->code_space[0];
1966 else
1968 CHECK_NATNUM (code4);
1969 if (XFASTINT (code4) >= 0x100)
1970 args_out_of_range (make_number (0xFF), code4);
1971 code |= XFASTINT (code4);
1978 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1979 code &= 0x7F7F7F7F;
1980 c = DECODE_CHAR (charsetp, code);
1981 if (c < 0)
1982 error ("Invalid code(s)");
1983 return make_number (c);
1987 /* Return the first charset in CHARSET_LIST that contains C.
1988 CHARSET_LIST is a list of charset IDs. If it is nil, use
1989 Vcharset_ordered_list. */
1991 struct charset *
1992 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
1994 int maybe_null = 0;
1996 if (NILP (charset_list))
1997 charset_list = Vcharset_ordered_list;
1998 else
1999 maybe_null = 1;
2001 while (CONSP (charset_list))
2003 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2004 unsigned code = ENCODE_CHAR (charset, c);
2006 if (code != CHARSET_INVALID_CODE (charset))
2008 if (code_return)
2009 *code_return = code;
2010 return charset;
2012 charset_list = XCDR (charset_list);
2013 if (! maybe_null
2014 && c <= MAX_UNICODE_CHAR
2015 && EQ (charset_list, Vcharset_non_preferred_head))
2016 return CHARSET_FROM_ID (charset_unicode);
2018 return (maybe_null ? NULL
2019 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2020 : CHARSET_FROM_ID (charset_eight_bit));
2024 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2025 doc:
2026 /*Return list of charset and one to four position-codes of CH.
2027 The charset is decided by the current priority order of charsets.
2028 A position-code is a byte value of each dimension of the code-point of
2029 CH in the charset. */)
2030 (Lisp_Object ch)
2032 struct charset *charset;
2033 int c, dimension;
2034 unsigned code;
2035 Lisp_Object val;
2037 CHECK_CHARACTER (ch);
2038 c = XFASTINT (ch);
2039 charset = CHAR_CHARSET (c);
2040 if (! charset)
2041 abort ();
2042 code = ENCODE_CHAR (charset, c);
2043 if (code == CHARSET_INVALID_CODE (charset))
2044 abort ();
2045 dimension = CHARSET_DIMENSION (charset);
2046 for (val = Qnil; dimension > 0; dimension--)
2048 val = Fcons (make_number (code & 0xFF), val);
2049 code >>= 8;
2051 return Fcons (CHARSET_NAME (charset), val);
2055 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2056 doc: /* Return the charset of highest priority that contains CH.
2057 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2058 from which to find the charset. It may also be a coding system. In
2059 that case, find the charset from what supported by that coding system. */)
2060 (Lisp_Object ch, Lisp_Object restriction)
2062 struct charset *charset;
2064 CHECK_CHARACTER (ch);
2065 if (NILP (restriction))
2066 charset = CHAR_CHARSET (XINT (ch));
2067 else
2069 if (CONSP (restriction))
2071 int c = XFASTINT (ch);
2073 for (; CONSP (restriction); restriction = XCDR (restriction))
2075 struct charset *rcharset;
2077 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
2078 if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
2079 return XCAR (restriction);
2081 return Qnil;
2083 restriction = coding_system_charset_list (restriction);
2084 charset = char_charset (XINT (ch), restriction, NULL);
2085 if (! charset)
2086 return Qnil;
2088 return (CHARSET_NAME (charset));
2092 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2093 doc: /*
2094 Return charset of a character in the current buffer at position POS.
2095 If POS is nil, it defauls to the current point.
2096 If POS is out of range, the value is nil. */)
2097 (Lisp_Object pos)
2099 Lisp_Object ch;
2100 struct charset *charset;
2102 ch = Fchar_after (pos);
2103 if (! INTEGERP (ch))
2104 return ch;
2105 charset = CHAR_CHARSET (XINT (ch));
2106 return (CHARSET_NAME (charset));
2110 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2111 doc: /*
2112 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2114 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2115 by their DIMENSION, CHARS, and FINAL-CHAR,
2116 whereas Emacs distinguishes them by charset symbol.
2117 See the documentation of the function `charset-info' for the meanings of
2118 DIMENSION, CHARS, and FINAL-CHAR. */)
2119 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
2121 int id;
2122 int chars_flag;
2124 check_iso_charset_parameter (dimension, chars, final_char);
2125 chars_flag = XFASTINT (chars) == 96;
2126 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2127 XFASTINT (final_char));
2128 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2132 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2133 0, 0, 0,
2134 doc: /*
2135 Internal use only.
2136 Clear temporary charset mapping tables.
2137 It should be called only from temacs invoked for dumping. */)
2138 (void)
2140 if (temp_charset_work)
2142 xfree (temp_charset_work);
2143 temp_charset_work = NULL;
2146 if (CHAR_TABLE_P (Vchar_unify_table))
2147 Foptimize_char_table (Vchar_unify_table, Qnil);
2149 return Qnil;
2152 DEFUN ("charset-priority-list", Fcharset_priority_list,
2153 Scharset_priority_list, 0, 1, 0,
2154 doc: /* Return the list of charsets ordered by priority.
2155 HIGHESTP non-nil means just return the highest priority one. */)
2156 (Lisp_Object highestp)
2158 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2160 if (!NILP (highestp))
2161 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2163 while (!NILP (list))
2165 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2166 list = XCDR (list);
2168 return Fnreverse (val);
2171 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2172 1, MANY, 0,
2173 doc: /* Assign higher priority to the charsets given as arguments.
2174 usage: (set-charset-priority &rest charsets) */)
2175 (size_t nargs, Lisp_Object *args)
2177 Lisp_Object new_head, old_list, arglist[2];
2178 Lisp_Object list_2022, list_emacs_mule;
2179 size_t i;
2180 int id;
2182 old_list = Fcopy_sequence (Vcharset_ordered_list);
2183 new_head = Qnil;
2184 for (i = 0; i < nargs; i++)
2186 CHECK_CHARSET_GET_ID (args[i], id);
2187 if (! NILP (Fmemq (make_number (id), old_list)))
2189 old_list = Fdelq (make_number (id), old_list);
2190 new_head = Fcons (make_number (id), new_head);
2193 arglist[0] = Fnreverse (new_head);
2194 arglist[1] = Vcharset_non_preferred_head = old_list;
2195 Vcharset_ordered_list = Fnconc (2, arglist);
2196 charset_ordered_list_tick++;
2198 charset_unibyte = -1;
2199 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2200 CONSP (old_list); old_list = XCDR (old_list))
2202 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2203 list_2022 = Fcons (XCAR (old_list), list_2022);
2204 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2205 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2206 if (charset_unibyte < 0)
2208 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2210 if (CHARSET_DIMENSION (charset) == 1
2211 && CHARSET_ASCII_COMPATIBLE_P (charset)
2212 && CHARSET_MAX_CHAR (charset) >= 0x80)
2213 charset_unibyte = CHARSET_ID (charset);
2216 Viso_2022_charset_list = Fnreverse (list_2022);
2217 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2218 if (charset_unibyte < 0)
2219 charset_unibyte = charset_iso_8859_1;
2221 return Qnil;
2224 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2225 0, 1, 0,
2226 doc: /* Internal use only.
2227 Return charset identification number of CHARSET. */)
2228 (Lisp_Object charset)
2230 int id;
2232 CHECK_CHARSET_GET_ID (charset, id);
2233 return make_number (id);
2236 struct charset_sort_data
2238 Lisp_Object charset;
2239 int id;
2240 int priority;
2243 static int
2244 charset_compare (const void *d1, const void *d2)
2246 const struct charset_sort_data *data1 = d1, *data2 = d2;
2247 return (data1->priority - data2->priority);
2250 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2251 doc: /* Sort charset list CHARSETS by a priority of each charset.
2252 Return the sorted list. CHARSETS is modified by side effects.
2253 See also `charset-priority-list' and `set-charset-priority'. */)
2254 (Lisp_Object charsets)
2256 Lisp_Object len = Flength (charsets);
2257 int n = XFASTINT (len), i, j, done;
2258 Lisp_Object tail, elt, attrs;
2259 struct charset_sort_data *sort_data;
2260 int id, min_id = INT_MAX, max_id = INT_MIN;
2261 USE_SAFE_ALLOCA;
2263 if (n == 0)
2264 return Qnil;
2265 SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
2266 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2268 elt = XCAR (tail);
2269 CHECK_CHARSET_GET_ATTR (elt, attrs);
2270 sort_data[i].charset = elt;
2271 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
2272 if (id < min_id)
2273 min_id = id;
2274 if (id > max_id)
2275 max_id = id;
2277 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2278 done < n && CONSP (tail); tail = XCDR (tail), i++)
2280 elt = XCAR (tail);
2281 id = XFASTINT (elt);
2282 if (id >= min_id && id <= max_id)
2283 for (j = 0; j < n; j++)
2284 if (sort_data[j].id == id)
2286 sort_data[j].priority = i;
2287 done++;
2290 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2291 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2292 XSETCAR (tail, sort_data[i].charset);
2293 SAFE_FREE ();
2294 return charsets;
2298 void
2299 init_charset (void)
2301 Lisp_Object tempdir;
2302 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2303 if (access (SSDATA (tempdir), 0) < 0)
2305 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2306 Emacs will not function correctly without the character map files.\n\
2307 Please check your installation!\n",
2308 tempdir);
2309 /* TODO should this be a fatal error? (Bug#909) */
2312 Vcharset_map_path = Fcons (tempdir, Qnil);
2316 void
2317 init_charset_once (void)
2319 int i, j, k;
2321 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2322 for (j = 0; j < ISO_MAX_CHARS; j++)
2323 for (k = 0; k < ISO_MAX_FINAL; k++)
2324 iso_charset_table[i][j][k] = -1;
2326 for (i = 0; i < 256; i++)
2327 emacs_mule_charset[i] = -1;
2329 charset_jisx0201_roman = -1;
2330 charset_jisx0208_1978 = -1;
2331 charset_jisx0208 = -1;
2332 charset_ksc5601 = -1;
2335 #ifdef emacs
2337 void
2338 syms_of_charset (void)
2340 DEFSYM (Qcharsetp, "charsetp");
2342 DEFSYM (Qascii, "ascii");
2343 DEFSYM (Qunicode, "unicode");
2344 DEFSYM (Qemacs, "emacs");
2345 DEFSYM (Qeight_bit, "eight-bit");
2346 DEFSYM (Qiso_8859_1, "iso-8859-1");
2348 DEFSYM (Qgl, "gl");
2349 DEFSYM (Qgr, "gr");
2351 staticpro (&Vcharset_ordered_list);
2352 Vcharset_ordered_list = Qnil;
2354 staticpro (&Viso_2022_charset_list);
2355 Viso_2022_charset_list = Qnil;
2357 staticpro (&Vemacs_mule_charset_list);
2358 Vemacs_mule_charset_list = Qnil;
2360 /* Don't staticpro them here. It's done in syms_of_fns. */
2361 QCtest = intern_c_string (":test");
2362 Qeq = intern_c_string ("eq");
2364 staticpro (&Vcharset_hash_table);
2366 Lisp_Object args[2];
2367 args[0] = QCtest;
2368 args[1] = Qeq;
2369 Vcharset_hash_table = Fmake_hash_table (2, args);
2372 charset_table_size = 128;
2373 charset_table = ((struct charset *)
2374 xmalloc (sizeof (struct charset) * charset_table_size));
2375 charset_table_used = 0;
2377 defsubr (&Scharsetp);
2378 defsubr (&Smap_charset_chars);
2379 defsubr (&Sdefine_charset_internal);
2380 defsubr (&Sdefine_charset_alias);
2381 defsubr (&Scharset_plist);
2382 defsubr (&Sset_charset_plist);
2383 defsubr (&Sunify_charset);
2384 defsubr (&Sget_unused_iso_final_char);
2385 defsubr (&Sdeclare_equiv_charset);
2386 defsubr (&Sfind_charset_region);
2387 defsubr (&Sfind_charset_string);
2388 defsubr (&Sdecode_char);
2389 defsubr (&Sencode_char);
2390 defsubr (&Ssplit_char);
2391 defsubr (&Smake_char);
2392 defsubr (&Schar_charset);
2393 defsubr (&Scharset_after);
2394 defsubr (&Siso_charset);
2395 defsubr (&Sclear_charset_maps);
2396 defsubr (&Scharset_priority_list);
2397 defsubr (&Sset_charset_priority);
2398 defsubr (&Scharset_id_internal);
2399 defsubr (&Ssort_charsets);
2401 DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
2402 doc: /* *List of directories to search for charset map files. */);
2403 Vcharset_map_path = Qnil;
2405 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
2406 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2407 inhibit_load_charset_map = 0;
2409 DEFVAR_LISP ("charset-list", Vcharset_list,
2410 doc: /* List of all charsets ever defined. */);
2411 Vcharset_list = Qnil;
2413 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
2414 doc: /* ISO639 language mnemonic symbol for the current language environment.
2415 If the current language environment is for multiple languages (e.g. "Latin-1"),
2416 the value may be a list of mnemonics. */);
2417 Vcurrent_iso639_language = Qnil;
2419 charset_ascii
2420 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2421 0, 127, 'B', -1, 0, 1, 0, 0);
2422 charset_iso_8859_1
2423 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2424 0, 255, -1, -1, -1, 1, 0, 0);
2425 charset_unicode
2426 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2427 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2428 charset_emacs
2429 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2430 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2431 charset_eight_bit
2432 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2433 128, 255, -1, 0, -1, 0, 1,
2434 MAX_5_BYTE_CHAR + 1);
2435 charset_unibyte = charset_iso_8859_1;
2438 #endif /* emacs */