Remove arbitrary limit of 2**31 entries in hash tables.
[emacs.git] / src / charset.c
blob0af21b48ad2b9dbf4b2f6e96aa112e02dcf9afe4
1 /* Basic character set support.
2 Copyright (C) 2001-2011 Free Software Foundation, Inc.
3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
8 Copyright (C) 2003, 2004
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
12 This file is part of GNU Emacs.
14 GNU Emacs is free software: you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or
17 (at your option) any later version.
19 GNU Emacs is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27 #include <config.h>
29 #include <stdio.h>
30 #include <unistd.h>
31 #include <ctype.h>
32 #include <limits.h>
33 #include <sys/types.h>
34 #include <setjmp.h>
35 #include "lisp.h"
36 #include "character.h"
37 #include "charset.h"
38 #include "coding.h"
39 #include "disptab.h"
40 #include "buffer.h"
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
57 /* Hash table that contains attributes of each charset. Keys are
58 charset symbols, and values are vectors of charset attributes. */
59 Lisp_Object Vcharset_hash_table;
61 /* Table of struct charset. */
62 struct charset *charset_table;
64 static int charset_table_size;
65 static int charset_table_used;
67 Lisp_Object Qcharsetp;
69 /* Special charset symbols. */
70 Lisp_Object Qascii;
71 static Lisp_Object Qeight_bit;
72 static Lisp_Object Qiso_8859_1;
73 static Lisp_Object Qunicode;
74 static Lisp_Object Qemacs;
76 /* The corresponding charsets. */
77 int charset_ascii;
78 int charset_eight_bit;
79 static int charset_iso_8859_1;
80 int charset_unicode;
81 static int charset_emacs;
83 /* The other special charsets. */
84 int charset_jisx0201_roman;
85 int charset_jisx0208_1978;
86 int charset_jisx0208;
87 int charset_ksc5601;
89 /* Value of charset attribute `charset-iso-plane'. */
90 static Lisp_Object Qgl, Qgr;
92 /* Charset of unibyte characters. */
93 int charset_unibyte;
95 /* List of charsets ordered by the priority. */
96 Lisp_Object Vcharset_ordered_list;
98 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
99 charsets. */
100 Lisp_Object Vcharset_non_preferred_head;
102 /* Incremented everytime we change Vcharset_ordered_list. This is
103 unsigned short so that it fits in Lisp_Int and never matches
104 -1. */
105 unsigned short charset_ordered_list_tick;
107 /* List of iso-2022 charsets. */
108 Lisp_Object Viso_2022_charset_list;
110 /* List of emacs-mule charsets. */
111 Lisp_Object Vemacs_mule_charset_list;
113 int emacs_mule_charset[256];
115 /* Mapping table from ISO2022's charset (specified by DIMENSION,
116 CHARS, and FINAL-CHAR) to Emacs' charset. */
117 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
119 #define CODE_POINT_TO_INDEX(charset, code) \
120 ((charset)->code_linear_p \
121 ? (code) - (charset)->min_code \
122 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
123 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
124 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
125 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
126 ? (((((code) >> 24) - (charset)->code_space[12]) \
127 * (charset)->code_space[11]) \
128 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
129 * (charset)->code_space[7]) \
130 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
131 * (charset)->code_space[3]) \
132 + (((code) & 0xFF) - (charset)->code_space[0]) \
133 - ((charset)->char_index_offset)) \
134 : -1)
137 /* Convert the character index IDX to code-point CODE for CHARSET.
138 It is assumed that IDX is in a valid range. */
140 #define INDEX_TO_CODE_POINT(charset, idx) \
141 ((charset)->code_linear_p \
142 ? (idx) + (charset)->min_code \
143 : (idx += (charset)->char_index_offset, \
144 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
145 | (((charset)->code_space[4] \
146 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
147 << 8) \
148 | (((charset)->code_space[8] \
149 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
150 << 16) \
151 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
152 << 24))))
154 /* Structure to hold mapping tables for a charset. Used by temacs
155 invoked for dumping. */
157 static struct
159 /* The current charset for which the following tables are setup. */
160 struct charset *current;
162 /* 1 iff the following table is used for encoder. */
163 short for_encoder;
165 /* When the following table is used for encoding, mininum and
166 maxinum character of the current charset. */
167 int min_char, max_char;
169 /* A Unicode character correspoinding to the code indice 0 (i.e. the
170 minimum code-point) of the current charset, or -1 if the code
171 indice 0 is not a Unicode character. This is checked when
172 table.encoder[CHAR] is zero. */
173 int zero_index_char;
175 union {
176 /* Table mapping code-indices (not code-points) of the current
177 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
178 doesn't belong to the current charset. */
179 int decoder[0x10000];
180 /* Table mapping Unicode characters to code-indices of the current
181 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
182 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
183 (0x20000..0x2FFFF). Note that there is no charset map that
184 uses both SMP and SIP. */
185 unsigned short encoder[0x20000];
186 } table;
187 } *temp_charset_work;
189 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
190 do { \
191 if ((CODE) == 0) \
192 temp_charset_work->zero_index_char = (C); \
193 else if ((C) < 0x20000) \
194 temp_charset_work->table.encoder[(C)] = (CODE); \
195 else \
196 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
197 } while (0)
199 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
200 ((C) == temp_charset_work->zero_index_char ? 0 \
201 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
202 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
203 : temp_charset_work->table.encoder[(C) - 0x10000] \
204 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
206 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
207 (temp_charset_work->table.decoder[(CODE)] = (C))
209 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
210 (temp_charset_work->table.decoder[(CODE)])
213 /* Set to 1 to warn that a charset map is loaded and thus a buffer
214 text and a string data may be relocated. */
215 int charset_map_loaded;
217 struct charset_map_entries
219 struct {
220 unsigned from, to;
221 int c;
222 } entry[0x10000];
223 struct charset_map_entries *next;
226 /* Load the mapping information of CHARSET from ENTRIES for
227 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
228 encoding (CONTROL_FLAG == 2).
230 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
231 and CHARSET->fast_map.
233 If CONTROL_FLAG is 1, setup the following tables according to
234 CHARSET->method and inhibit_load_charset_map.
236 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
237 ----------------------+--------------------+---------------------------
238 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
239 ----------------------+--------------------+---------------------------
240 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
242 If CONTROL_FLAG is 2, setup the following tables.
244 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
245 ----------------------+--------------------+---------------------------
246 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
247 ----------------------+--------------------+--------------------------
248 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
251 static void
252 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
254 Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil);
255 unsigned max_code = CHARSET_MAX_CODE (charset);
256 int ascii_compatible_p = charset->ascii_compatible_p;
257 int min_char, max_char, nonascii_min_char;
258 int i;
259 unsigned char *fast_map = charset->fast_map;
261 if (n_entries <= 0)
262 return;
264 if (control_flag)
266 if (! inhibit_load_charset_map)
268 if (control_flag == 1)
270 if (charset->method == CHARSET_METHOD_MAP)
272 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
274 vec = CHARSET_DECODER (charset)
275 = Fmake_vector (make_number (n), make_number (-1));
277 else
279 char_table_set_range (Vchar_unify_table,
280 charset->min_char, charset->max_char,
281 Qnil);
284 else
286 table = Fmake_char_table (Qnil, Qnil);
287 if (charset->method == CHARSET_METHOD_MAP)
288 CHARSET_ENCODER (charset) = table;
289 else
290 CHARSET_DEUNIFIER (charset) = table;
293 else
295 if (! temp_charset_work)
296 temp_charset_work = xmalloc (sizeof (*temp_charset_work));
297 if (control_flag == 1)
299 memset (temp_charset_work->table.decoder, -1,
300 sizeof (int) * 0x10000);
302 else
304 memset (temp_charset_work->table.encoder, 0,
305 sizeof (unsigned short) * 0x20000);
306 temp_charset_work->zero_index_char = -1;
308 temp_charset_work->current = charset;
309 temp_charset_work->for_encoder = (control_flag == 2);
310 control_flag += 2;
312 charset_map_loaded = 1;
315 min_char = max_char = entries->entry[0].c;
316 nonascii_min_char = MAX_CHAR;
317 for (i = 0; i < n_entries; i++)
319 unsigned from, to;
320 int from_index, to_index, lim_index;
321 int from_c, to_c;
322 int idx = i % 0x10000;
324 if (i > 0 && idx == 0)
325 entries = entries->next;
326 from = entries->entry[idx].from;
327 to = entries->entry[idx].to;
328 from_c = entries->entry[idx].c;
329 from_index = CODE_POINT_TO_INDEX (charset, from);
330 if (from == to)
332 to_index = from_index;
333 to_c = from_c;
335 else
337 to_index = CODE_POINT_TO_INDEX (charset, to);
338 to_c = from_c + (to_index - from_index);
340 if (from_index < 0 || to_index < 0)
341 continue;
342 lim_index = to_index + 1;
344 if (to_c > max_char)
345 max_char = to_c;
346 else if (from_c < min_char)
347 min_char = from_c;
349 if (control_flag == 1)
351 if (charset->method == CHARSET_METHOD_MAP)
352 for (; from_index < lim_index; from_index++, from_c++)
353 ASET (vec, from_index, make_number (from_c));
354 else
355 for (; from_index < lim_index; from_index++, from_c++)
356 CHAR_TABLE_SET (Vchar_unify_table,
357 CHARSET_CODE_OFFSET (charset) + from_index,
358 make_number (from_c));
360 else if (control_flag == 2)
362 if (charset->method == CHARSET_METHOD_MAP
363 && CHARSET_COMPACT_CODES_P (charset))
364 for (; from_index < lim_index; from_index++, from_c++)
366 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
368 if (NILP (CHAR_TABLE_REF (table, from_c)))
369 CHAR_TABLE_SET (table, from_c, make_number (code));
371 else
372 for (; from_index < lim_index; from_index++, from_c++)
374 if (NILP (CHAR_TABLE_REF (table, from_c)))
375 CHAR_TABLE_SET (table, from_c, make_number (from_index));
378 else if (control_flag == 3)
379 for (; from_index < lim_index; from_index++, from_c++)
380 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
381 else if (control_flag == 4)
382 for (; from_index < lim_index; from_index++, from_c++)
383 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
384 else /* control_flag == 0 */
386 if (ascii_compatible_p)
388 if (! ASCII_BYTE_P (from_c))
390 if (from_c < nonascii_min_char)
391 nonascii_min_char = from_c;
393 else if (! ASCII_BYTE_P (to_c))
395 nonascii_min_char = 0x80;
399 for (; from_c <= to_c; from_c++)
400 CHARSET_FAST_MAP_SET (from_c, fast_map);
404 if (control_flag == 0)
406 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
407 ? nonascii_min_char : min_char);
408 CHARSET_MAX_CHAR (charset) = max_char;
410 else if (control_flag == 4)
412 temp_charset_work->min_char = min_char;
413 temp_charset_work->max_char = max_char;
418 /* Read a hexadecimal number (preceded by "0x") from the file FP while
419 paying attention to comment character '#'. */
421 static inline unsigned
422 read_hex (FILE *fp, int *eof)
424 int c;
425 unsigned n;
427 while ((c = getc (fp)) != EOF)
429 if (c == '#')
431 while ((c = getc (fp)) != EOF && c != '\n');
433 else if (c == '0')
435 if ((c = getc (fp)) == EOF || c == 'x')
436 break;
439 if (c == EOF)
441 *eof = 1;
442 return 0;
444 *eof = 0;
445 n = 0;
446 if (c == 'x')
447 while ((c = getc (fp)) != EOF && isxdigit (c))
448 n = ((n << 4)
449 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
450 else
451 while ((c = getc (fp)) != EOF && isdigit (c))
452 n = (n * 10) + c - '0';
453 if (c != EOF)
454 ungetc (c, fp);
455 return n;
458 /* Return a mapping vector for CHARSET loaded from MAPFILE.
459 Each line of MAPFILE has this form
460 0xAAAA 0xCCCC
461 where 0xAAAA is a code-point and 0xCCCC is the corresponding
462 character code, or this form
463 0xAAAA-0xBBBB 0xCCCC
464 where 0xAAAA and 0xBBBB are code-points specifying a range, and
465 0xCCCC is the first character code of the range.
467 The returned vector has this form:
468 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
469 where CODE1 is a code-point or a cons of code-points specifying a
470 range.
472 Note that this function uses `openp' to open MAPFILE but ignores
473 `file-name-handler-alist' to avoid running any Lisp code. */
475 static void
476 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int control_flag)
478 unsigned min_code = CHARSET_MIN_CODE (charset);
479 unsigned max_code = CHARSET_MAX_CODE (charset);
480 int fd;
481 FILE *fp;
482 int eof;
483 Lisp_Object suffixes;
484 struct charset_map_entries *head, *entries;
485 int n_entries, count;
486 USE_SAFE_ALLOCA;
488 suffixes = Fcons (build_string (".map"),
489 Fcons (build_string (".TXT"), Qnil));
491 count = SPECPDL_INDEX ();
492 specbind (Qfile_name_handler_alist, Qnil);
493 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
494 unbind_to (count, Qnil);
495 if (fd < 0
496 || ! (fp = fdopen (fd, "r")))
497 error ("Failure in loading charset map: %s", SDATA (mapfile));
499 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
500 large (larger than MAX_ALLOCA). */
501 SAFE_ALLOCA (head, struct charset_map_entries *,
502 sizeof (struct charset_map_entries));
503 entries = head;
504 memset (entries, 0, sizeof (struct charset_map_entries));
506 n_entries = 0;
507 eof = 0;
508 while (1)
510 unsigned from, to;
511 int c;
512 int idx;
514 from = read_hex (fp, &eof);
515 if (eof)
516 break;
517 if (getc (fp) == '-')
518 to = read_hex (fp, &eof);
519 else
520 to = from;
521 c = (int) read_hex (fp, &eof);
523 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
524 continue;
526 if (n_entries > 0 && (n_entries % 0x10000) == 0)
528 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
529 sizeof (struct charset_map_entries));
530 entries = entries->next;
531 memset (entries, 0, sizeof (struct charset_map_entries));
533 idx = n_entries % 0x10000;
534 entries->entry[idx].from = from;
535 entries->entry[idx].to = to;
536 entries->entry[idx].c = c;
537 n_entries++;
539 fclose (fp);
541 load_charset_map (charset, head, n_entries, control_flag);
542 SAFE_FREE ();
545 static void
546 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
548 unsigned min_code = CHARSET_MIN_CODE (charset);
549 unsigned max_code = CHARSET_MAX_CODE (charset);
550 struct charset_map_entries *head, *entries;
551 int n_entries;
552 int len = ASIZE (vec);
553 int i;
554 USE_SAFE_ALLOCA;
556 if (len % 2 == 1)
558 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
559 return;
562 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
563 large (larger than MAX_ALLOCA). */
564 SAFE_ALLOCA (head, struct charset_map_entries *,
565 sizeof (struct charset_map_entries));
566 entries = head;
567 memset (entries, 0, sizeof (struct charset_map_entries));
569 n_entries = 0;
570 for (i = 0; i < len; i += 2)
572 Lisp_Object val, val2;
573 unsigned from, to;
574 int c;
575 int idx;
577 val = AREF (vec, i);
578 if (CONSP (val))
580 val2 = XCDR (val);
581 val = XCAR (val);
582 CHECK_NATNUM (val);
583 CHECK_NATNUM (val2);
584 from = XFASTINT (val);
585 to = XFASTINT (val2);
587 else
589 CHECK_NATNUM (val);
590 from = to = XFASTINT (val);
592 val = AREF (vec, i + 1);
593 CHECK_NATNUM (val);
594 c = XFASTINT (val);
596 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
597 continue;
599 if (n_entries > 0 && (n_entries % 0x10000) == 0)
601 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
602 sizeof (struct charset_map_entries));
603 entries = entries->next;
604 memset (entries, 0, sizeof (struct charset_map_entries));
606 idx = n_entries % 0x10000;
607 entries->entry[idx].from = from;
608 entries->entry[idx].to = to;
609 entries->entry[idx].c = c;
610 n_entries++;
613 load_charset_map (charset, head, n_entries, control_flag);
614 SAFE_FREE ();
618 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
619 map it is (see the comment of load_charset_map for the detail). */
621 static void
622 load_charset (struct charset *charset, int control_flag)
624 Lisp_Object map;
626 if (inhibit_load_charset_map
627 && temp_charset_work
628 && charset == temp_charset_work->current
629 && ((control_flag == 2) == temp_charset_work->for_encoder))
630 return;
632 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
633 map = CHARSET_MAP (charset);
634 else
636 if (! CHARSET_UNIFIED_P (charset))
637 abort ();
638 map = CHARSET_UNIFY_MAP (charset);
640 if (STRINGP (map))
641 load_charset_map_from_file (charset, map, control_flag);
642 else
643 load_charset_map_from_vector (charset, map, control_flag);
647 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
648 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
649 (Lisp_Object object)
651 return (CHARSETP (object) ? Qt : Qnil);
655 static void
656 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
657 Lisp_Object function, Lisp_Object arg,
658 unsigned int from, unsigned int to)
660 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
661 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
662 Lisp_Object range;
663 int c, stop;
664 struct gcpro gcpro1;
666 range = Fcons (Qnil, Qnil);
667 GCPRO1 (range);
669 c = temp_charset_work->min_char;
670 stop = (temp_charset_work->max_char < 0x20000
671 ? temp_charset_work->max_char : 0xFFFF);
673 while (1)
675 int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
677 if (idx >= from_idx && idx <= to_idx)
679 if (NILP (XCAR (range)))
680 XSETCAR (range, make_number (c));
682 else if (! NILP (XCAR (range)))
684 XSETCDR (range, make_number (c - 1));
685 if (c_function)
686 (*c_function) (arg, range);
687 else
688 call2 (function, range, arg);
689 XSETCAR (range, Qnil);
691 if (c == stop)
693 if (c == temp_charset_work->max_char)
695 if (! NILP (XCAR (range)))
697 XSETCDR (range, make_number (c));
698 if (c_function)
699 (*c_function) (arg, range);
700 else
701 call2 (function, range, arg);
703 break;
705 c = 0x1FFFF;
706 stop = temp_charset_work->max_char;
708 c++;
710 UNGCPRO;
713 void
714 map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
715 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
717 Lisp_Object range;
718 int partial;
720 partial = (from > CHARSET_MIN_CODE (charset)
721 || to < CHARSET_MAX_CODE (charset));
723 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
725 int from_idx = CODE_POINT_TO_INDEX (charset, from);
726 int to_idx = CODE_POINT_TO_INDEX (charset, to);
727 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
728 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
730 if (CHARSET_UNIFIED_P (charset))
732 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
733 load_charset (charset, 2);
734 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
735 map_char_table_for_charset (c_function, function,
736 CHARSET_DEUNIFIER (charset), arg,
737 partial ? charset : NULL, from, to);
738 else
739 map_charset_for_dump (c_function, function, arg, from, to);
742 range = Fcons (make_number (from_c), make_number (to_c));
743 if (NILP (function))
744 (*c_function) (arg, range);
745 else
746 call2 (function, range, arg);
748 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
750 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
751 load_charset (charset, 2);
752 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
753 map_char_table_for_charset (c_function, function,
754 CHARSET_ENCODER (charset), arg,
755 partial ? charset : NULL, from, to);
756 else
757 map_charset_for_dump (c_function, function, arg, from, to);
759 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
761 Lisp_Object subset_info;
762 int offset;
764 subset_info = CHARSET_SUBSET (charset);
765 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
766 offset = XINT (AREF (subset_info, 3));
767 from -= offset;
768 if (from < XFASTINT (AREF (subset_info, 1)))
769 from = XFASTINT (AREF (subset_info, 1));
770 to -= offset;
771 if (to > XFASTINT (AREF (subset_info, 2)))
772 to = XFASTINT (AREF (subset_info, 2));
773 map_charset_chars (c_function, function, arg, charset, from, to);
775 else /* i.e. CHARSET_METHOD_SUPERSET */
777 Lisp_Object parents;
779 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
780 parents = XCDR (parents))
782 int offset;
783 unsigned this_from, this_to;
785 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
786 offset = XINT (XCDR (XCAR (parents)));
787 this_from = from > offset ? from - offset : 0;
788 this_to = to > offset ? to - offset : 0;
789 if (this_from < CHARSET_MIN_CODE (charset))
790 this_from = CHARSET_MIN_CODE (charset);
791 if (this_to > CHARSET_MAX_CODE (charset))
792 this_to = CHARSET_MAX_CODE (charset);
793 map_charset_chars (c_function, function, arg, charset,
794 this_from, this_to);
799 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
800 doc: /* Call FUNCTION for all characters in CHARSET.
801 FUNCTION is called with an argument RANGE and the optional 3rd
802 argument ARG.
804 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
805 characters contained in CHARSET.
807 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
808 range of code points (in CHARSET) of target characters. */)
809 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
811 struct charset *cs;
812 unsigned from, to;
814 CHECK_CHARSET_GET_CHARSET (charset, cs);
815 if (NILP (from_code))
816 from = CHARSET_MIN_CODE (cs);
817 else
819 CHECK_NATNUM (from_code);
820 from = XINT (from_code);
821 if (from < CHARSET_MIN_CODE (cs))
822 from = CHARSET_MIN_CODE (cs);
824 if (NILP (to_code))
825 to = CHARSET_MAX_CODE (cs);
826 else
828 CHECK_NATNUM (to_code);
829 to = XINT (to_code);
830 if (to > CHARSET_MAX_CODE (cs))
831 to = CHARSET_MAX_CODE (cs);
833 map_charset_chars (NULL, function, arg, cs, from, to);
834 return Qnil;
838 /* Define a charset according to the arguments. The Nth argument is
839 the Nth attribute of the charset (the last attribute `charset-id'
840 is not included). See the docstring of `define-charset' for the
841 detail. */
843 DEFUN ("define-charset-internal", Fdefine_charset_internal,
844 Sdefine_charset_internal, charset_arg_max, MANY, 0,
845 doc: /* For internal use only.
846 usage: (define-charset-internal ...) */)
847 (size_t nargs, Lisp_Object *args)
849 /* Charset attr vector. */
850 Lisp_Object attrs;
851 Lisp_Object val;
852 EMACS_UINT hash_code;
853 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
854 int i, j;
855 struct charset charset;
856 int id;
857 int dimension;
858 int new_definition_p;
859 int nchars;
861 if (nargs != charset_arg_max)
862 return Fsignal (Qwrong_number_of_arguments,
863 Fcons (intern ("define-charset-internal"),
864 make_number (nargs)));
866 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
868 CHECK_SYMBOL (args[charset_arg_name]);
869 ASET (attrs, charset_name, args[charset_arg_name]);
871 val = args[charset_arg_code_space];
872 for (i = 0, dimension = 0, nchars = 1; ; i++)
874 int min_byte, max_byte;
876 min_byte = XINT (Faref (val, make_number (i * 2)));
877 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
878 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
879 error ("Invalid :code-space value");
880 charset.code_space[i * 4] = min_byte;
881 charset.code_space[i * 4 + 1] = max_byte;
882 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
883 if (max_byte > 0)
884 dimension = i + 1;
885 if (i == 3)
886 break;
887 nchars *= charset.code_space[i * 4 + 2];
888 charset.code_space[i * 4 + 3] = nchars;
891 val = args[charset_arg_dimension];
892 if (NILP (val))
893 charset.dimension = dimension;
894 else
896 CHECK_NATNUM (val);
897 charset.dimension = XINT (val);
898 if (charset.dimension < 1 || charset.dimension > 4)
899 args_out_of_range_3 (val, make_number (1), make_number (4));
902 charset.code_linear_p
903 = (charset.dimension == 1
904 || (charset.code_space[2] == 256
905 && (charset.dimension == 2
906 || (charset.code_space[6] == 256
907 && (charset.dimension == 3
908 || charset.code_space[10] == 256)))));
910 if (! charset.code_linear_p)
912 charset.code_space_mask = (unsigned char *) xmalloc (256);
913 memset (charset.code_space_mask, 0, 256);
914 for (i = 0; i < 4; i++)
915 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
916 j++)
917 charset.code_space_mask[j] |= (1 << i);
920 charset.iso_chars_96 = charset.code_space[2] == 96;
922 charset.min_code = (charset.code_space[0]
923 | (charset.code_space[4] << 8)
924 | (charset.code_space[8] << 16)
925 | (charset.code_space[12] << 24));
926 charset.max_code = (charset.code_space[1]
927 | (charset.code_space[5] << 8)
928 | (charset.code_space[9] << 16)
929 | (charset.code_space[13] << 24));
930 charset.char_index_offset = 0;
932 val = args[charset_arg_min_code];
933 if (! NILP (val))
935 unsigned code;
937 if (INTEGERP (val))
938 code = XINT (val);
939 else
941 CHECK_CONS (val);
942 CHECK_NUMBER_CAR (val);
943 CHECK_NUMBER_CDR (val);
944 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
946 if (code < charset.min_code
947 || code > charset.max_code)
948 args_out_of_range_3 (make_number (charset.min_code),
949 make_number (charset.max_code), val);
950 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
951 charset.min_code = code;
954 val = args[charset_arg_max_code];
955 if (! NILP (val))
957 unsigned code;
959 if (INTEGERP (val))
960 code = XINT (val);
961 else
963 CHECK_CONS (val);
964 CHECK_NUMBER_CAR (val);
965 CHECK_NUMBER_CDR (val);
966 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
968 if (code < charset.min_code
969 || code > charset.max_code)
970 args_out_of_range_3 (make_number (charset.min_code),
971 make_number (charset.max_code), val);
972 charset.max_code = code;
975 charset.compact_codes_p = charset.max_code < 0x10000;
977 val = args[charset_arg_invalid_code];
978 if (NILP (val))
980 if (charset.min_code > 0)
981 charset.invalid_code = 0;
982 else
984 XSETINT (val, charset.max_code + 1);
985 if (XINT (val) == charset.max_code + 1)
986 charset.invalid_code = charset.max_code + 1;
987 else
988 error ("Attribute :invalid-code must be specified");
991 else
993 CHECK_NATNUM (val);
994 charset.invalid_code = XFASTINT (val);
997 val = args[charset_arg_iso_final];
998 if (NILP (val))
999 charset.iso_final = -1;
1000 else
1002 CHECK_NUMBER (val);
1003 if (XINT (val) < '0' || XINT (val) > 127)
1004 error ("Invalid iso-final-char: %"pI"d", XINT (val));
1005 charset.iso_final = XINT (val);
1008 val = args[charset_arg_iso_revision];
1009 if (NILP (val))
1010 charset.iso_revision = -1;
1011 else
1013 CHECK_NUMBER (val);
1014 if (XINT (val) > 63)
1015 args_out_of_range (make_number (63), val);
1016 charset.iso_revision = XINT (val);
1019 val = args[charset_arg_emacs_mule_id];
1020 if (NILP (val))
1021 charset.emacs_mule_id = -1;
1022 else
1024 CHECK_NATNUM (val);
1025 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1026 error ("Invalid emacs-mule-id: %"pI"d", XINT (val));
1027 charset.emacs_mule_id = XINT (val);
1030 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1032 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1034 charset.unified_p = 0;
1036 memset (charset.fast_map, 0, sizeof (charset.fast_map));
1038 if (! NILP (args[charset_arg_code_offset]))
1040 val = args[charset_arg_code_offset];
1041 CHECK_NUMBER (val);
1043 charset.method = CHARSET_METHOD_OFFSET;
1044 charset.code_offset = XINT (val);
1046 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1047 charset.min_char = i + charset.code_offset;
1048 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1049 charset.max_char = i + charset.code_offset;
1050 if (charset.max_char > MAX_CHAR)
1051 error ("Unsupported max char: %d", charset.max_char);
1053 i = (charset.min_char >> 7) << 7;
1054 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1055 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1056 i = (i >> 12) << 12;
1057 for (; i <= charset.max_char; i += 0x1000)
1058 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1059 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1060 charset.ascii_compatible_p = 1;
1062 else if (! NILP (args[charset_arg_map]))
1064 val = args[charset_arg_map];
1065 ASET (attrs, charset_map, val);
1066 charset.method = CHARSET_METHOD_MAP;
1068 else if (! NILP (args[charset_arg_subset]))
1070 Lisp_Object parent;
1071 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1072 struct charset *parent_charset;
1074 val = args[charset_arg_subset];
1075 parent = Fcar (val);
1076 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1077 parent_min_code = Fnth (make_number (1), val);
1078 CHECK_NATNUM (parent_min_code);
1079 parent_max_code = Fnth (make_number (2), val);
1080 CHECK_NATNUM (parent_max_code);
1081 parent_code_offset = Fnth (make_number (3), val);
1082 CHECK_NUMBER (parent_code_offset);
1083 val = Fmake_vector (make_number (4), Qnil);
1084 ASET (val, 0, make_number (parent_charset->id));
1085 ASET (val, 1, parent_min_code);
1086 ASET (val, 2, parent_max_code);
1087 ASET (val, 3, parent_code_offset);
1088 ASET (attrs, charset_subset, val);
1090 charset.method = CHARSET_METHOD_SUBSET;
1091 /* Here, we just copy the parent's fast_map. It's not accurate,
1092 but at least it works for quickly detecting which character
1093 DOESN'T belong to this charset. */
1094 for (i = 0; i < 190; i++)
1095 charset.fast_map[i] = parent_charset->fast_map[i];
1097 /* We also copy these for parents. */
1098 charset.min_char = parent_charset->min_char;
1099 charset.max_char = parent_charset->max_char;
1101 else if (! NILP (args[charset_arg_superset]))
1103 val = args[charset_arg_superset];
1104 charset.method = CHARSET_METHOD_SUPERSET;
1105 val = Fcopy_sequence (val);
1106 ASET (attrs, charset_superset, val);
1108 charset.min_char = MAX_CHAR;
1109 charset.max_char = 0;
1110 for (; ! NILP (val); val = Fcdr (val))
1112 Lisp_Object elt, car_part, cdr_part;
1113 int this_id, offset;
1114 struct charset *this_charset;
1116 elt = Fcar (val);
1117 if (CONSP (elt))
1119 car_part = XCAR (elt);
1120 cdr_part = XCDR (elt);
1121 CHECK_CHARSET_GET_ID (car_part, this_id);
1122 CHECK_NUMBER (cdr_part);
1123 offset = XINT (cdr_part);
1125 else
1127 CHECK_CHARSET_GET_ID (elt, this_id);
1128 offset = 0;
1130 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1132 this_charset = CHARSET_FROM_ID (this_id);
1133 if (charset.min_char > this_charset->min_char)
1134 charset.min_char = this_charset->min_char;
1135 if (charset.max_char < this_charset->max_char)
1136 charset.max_char = this_charset->max_char;
1137 for (i = 0; i < 190; i++)
1138 charset.fast_map[i] |= this_charset->fast_map[i];
1141 else
1142 error ("None of :code-offset, :map, :parents are specified");
1144 val = args[charset_arg_unify_map];
1145 if (! NILP (val) && !STRINGP (val))
1146 CHECK_VECTOR (val);
1147 ASET (attrs, charset_unify_map, val);
1149 CHECK_LIST (args[charset_arg_plist]);
1150 ASET (attrs, charset_plist, args[charset_arg_plist]);
1152 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1153 &hash_code);
1154 if (charset.hash_index >= 0)
1156 new_definition_p = 0;
1157 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1158 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1160 else
1162 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1163 hash_code);
1164 if (charset_table_used == charset_table_size)
1166 struct charset *new_table
1167 = (struct charset *) xmalloc (sizeof (struct charset)
1168 * (charset_table_size + 16));
1169 memcpy (new_table, charset_table,
1170 sizeof (struct charset) * charset_table_size);
1171 charset_table_size += 16;
1172 charset_table = new_table;
1174 id = charset_table_used++;
1175 new_definition_p = 1;
1178 ASET (attrs, charset_id, make_number (id));
1179 charset.id = id;
1180 charset_table[id] = charset;
1182 if (charset.method == CHARSET_METHOD_MAP)
1184 load_charset (&charset, 0);
1185 charset_table[id] = charset;
1188 if (charset.iso_final >= 0)
1190 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1191 charset.iso_final) = id;
1192 if (new_definition_p)
1193 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1194 Fcons (make_number (id), Qnil));
1195 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1196 charset_jisx0201_roman = id;
1197 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1198 charset_jisx0208_1978 = id;
1199 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1200 charset_jisx0208 = id;
1201 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1202 charset_ksc5601 = id;
1205 if (charset.emacs_mule_id >= 0)
1207 emacs_mule_charset[charset.emacs_mule_id] = id;
1208 if (charset.emacs_mule_id < 0xA0)
1209 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1210 else
1211 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1212 if (new_definition_p)
1213 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1214 Fcons (make_number (id), Qnil));
1217 if (new_definition_p)
1219 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1220 if (charset.supplementary_p)
1221 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1222 Fcons (make_number (id), Qnil));
1223 else
1225 Lisp_Object tail;
1227 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1229 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1231 if (cs->supplementary_p)
1232 break;
1234 if (EQ (tail, Vcharset_ordered_list))
1235 Vcharset_ordered_list = Fcons (make_number (id),
1236 Vcharset_ordered_list);
1237 else if (NILP (tail))
1238 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1239 Fcons (make_number (id), Qnil));
1240 else
1242 val = Fcons (XCAR (tail), XCDR (tail));
1243 XSETCDR (tail, val);
1244 XSETCAR (tail, make_number (id));
1247 charset_ordered_list_tick++;
1250 return Qnil;
1254 /* Same as Fdefine_charset_internal but arguments are more convenient
1255 to call from C (typically in syms_of_charset). This can define a
1256 charset of `offset' method only. Return the ID of the new
1257 charset. */
1259 static int
1260 define_charset_internal (Lisp_Object name,
1261 int dimension,
1262 const char *code_space_chars,
1263 unsigned min_code, unsigned max_code,
1264 int iso_final, int iso_revision, int emacs_mule_id,
1265 int ascii_compatible, int supplementary,
1266 int code_offset)
1268 const unsigned char *code_space = (const unsigned char *) code_space_chars;
1269 Lisp_Object args[charset_arg_max];
1270 Lisp_Object plist[14];
1271 Lisp_Object val;
1272 int i;
1274 args[charset_arg_name] = name;
1275 args[charset_arg_dimension] = make_number (dimension);
1276 val = Fmake_vector (make_number (8), make_number (0));
1277 for (i = 0; i < 8; i++)
1278 ASET (val, i, make_number (code_space[i]));
1279 args[charset_arg_code_space] = val;
1280 args[charset_arg_min_code] = make_number (min_code);
1281 args[charset_arg_max_code] = make_number (max_code);
1282 args[charset_arg_iso_final]
1283 = (iso_final < 0 ? Qnil : make_number (iso_final));
1284 args[charset_arg_iso_revision] = make_number (iso_revision);
1285 args[charset_arg_emacs_mule_id]
1286 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1287 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1288 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1289 args[charset_arg_invalid_code] = Qnil;
1290 args[charset_arg_code_offset] = make_number (code_offset);
1291 args[charset_arg_map] = Qnil;
1292 args[charset_arg_subset] = Qnil;
1293 args[charset_arg_superset] = Qnil;
1294 args[charset_arg_unify_map] = Qnil;
1296 plist[0] = intern_c_string (":name");
1297 plist[1] = args[charset_arg_name];
1298 plist[2] = intern_c_string (":dimension");
1299 plist[3] = args[charset_arg_dimension];
1300 plist[4] = intern_c_string (":code-space");
1301 plist[5] = args[charset_arg_code_space];
1302 plist[6] = intern_c_string (":iso-final-char");
1303 plist[7] = args[charset_arg_iso_final];
1304 plist[8] = intern_c_string (":emacs-mule-id");
1305 plist[9] = args[charset_arg_emacs_mule_id];
1306 plist[10] = intern_c_string (":ascii-compatible-p");
1307 plist[11] = args[charset_arg_ascii_compatible_p];
1308 plist[12] = intern_c_string (":code-offset");
1309 plist[13] = args[charset_arg_code_offset];
1311 args[charset_arg_plist] = Flist (14, plist);
1312 Fdefine_charset_internal (charset_arg_max, args);
1314 return XINT (CHARSET_SYMBOL_ID (name));
1318 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1319 Sdefine_charset_alias, 2, 2, 0,
1320 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1321 (Lisp_Object alias, Lisp_Object charset)
1323 Lisp_Object attr;
1325 CHECK_CHARSET_GET_ATTR (charset, attr);
1326 Fputhash (alias, attr, Vcharset_hash_table);
1327 Vcharset_list = Fcons (alias, Vcharset_list);
1328 return Qnil;
1332 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1333 doc: /* Return the property list of CHARSET. */)
1334 (Lisp_Object charset)
1336 Lisp_Object attrs;
1338 CHECK_CHARSET_GET_ATTR (charset, attrs);
1339 return CHARSET_ATTR_PLIST (attrs);
1343 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1344 doc: /* Set CHARSET's property list to PLIST. */)
1345 (Lisp_Object charset, Lisp_Object plist)
1347 Lisp_Object attrs;
1349 CHECK_CHARSET_GET_ATTR (charset, attrs);
1350 CHARSET_ATTR_PLIST (attrs) = plist;
1351 return plist;
1355 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1356 doc: /* Unify characters of CHARSET with Unicode.
1357 This means reading the relevant file and installing the table defined
1358 by CHARSET's `:unify-map' property.
1360 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1361 the same meaning as the `:unify-map' attribute in the function
1362 `define-charset' (which see).
1364 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1365 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
1367 int id;
1368 struct charset *cs;
1370 CHECK_CHARSET_GET_ID (charset, id);
1371 cs = CHARSET_FROM_ID (id);
1372 if (NILP (deunify)
1373 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1374 : ! CHARSET_UNIFIED_P (cs))
1375 return Qnil;
1377 CHARSET_UNIFIED_P (cs) = 0;
1378 if (NILP (deunify))
1380 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1381 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1382 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1383 if (NILP (unify_map))
1384 unify_map = CHARSET_UNIFY_MAP (cs);
1385 else
1387 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1388 signal_error ("Bad unify-map", unify_map);
1389 CHARSET_UNIFY_MAP (cs) = unify_map;
1391 if (NILP (Vchar_unify_table))
1392 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1393 char_table_set_range (Vchar_unify_table,
1394 cs->min_char, cs->max_char, charset);
1395 CHARSET_UNIFIED_P (cs) = 1;
1397 else if (CHAR_TABLE_P (Vchar_unify_table))
1399 int min_code = CHARSET_MIN_CODE (cs);
1400 int max_code = CHARSET_MAX_CODE (cs);
1401 int min_char = DECODE_CHAR (cs, min_code);
1402 int max_char = DECODE_CHAR (cs, max_code);
1404 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1407 return Qnil;
1410 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1411 Sget_unused_iso_final_char, 2, 2, 0,
1412 doc: /*
1413 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1414 DIMENSION is the number of bytes to represent a character: 1 or 2.
1415 CHARS is the number of characters in a dimension: 94 or 96.
1417 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1418 If there's no unused final char for the specified kind of charset,
1419 return nil. */)
1420 (Lisp_Object dimension, Lisp_Object chars)
1422 int final_char;
1424 CHECK_NUMBER (dimension);
1425 CHECK_NUMBER (chars);
1426 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1427 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1428 if (XINT (chars) != 94 && XINT (chars) != 96)
1429 args_out_of_range_3 (chars, make_number (94), make_number (96));
1430 for (final_char = '0'; final_char <= '?'; final_char++)
1431 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1432 break;
1433 return (final_char <= '?' ? make_number (final_char) : Qnil);
1436 static void
1437 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
1439 CHECK_NATNUM (dimension);
1440 CHECK_NATNUM (chars);
1441 CHECK_CHARACTER (final_char);
1443 if (XINT (dimension) > 3)
1444 error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
1445 XINT (dimension));
1446 if (XINT (chars) != 94 && XINT (chars) != 96)
1447 error ("Invalid CHARS %"pI"d, it should be 94 or 96", XINT (chars));
1448 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1449 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'",
1450 (int)XINT (final_char));
1454 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1455 4, 4, 0,
1456 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1458 On decoding by an ISO-2022 base coding system, when a charset
1459 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1460 if CHARSET is designated instead. */)
1461 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
1463 int id;
1464 int chars_flag;
1466 CHECK_CHARSET_GET_ID (charset, id);
1467 check_iso_charset_parameter (dimension, chars, final_char);
1468 chars_flag = XINT (chars) == 96;
1469 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1470 return Qnil;
1474 /* Return information about charsets in the text at PTR of NBYTES
1475 bytes, which are NCHARS characters. The value is:
1477 0: Each character is represented by one byte. This is always
1478 true for a unibyte string. For a multibyte string, true if
1479 it contains only ASCII characters.
1481 1: No charsets other than ascii, control-1, and latin-1 are
1482 found.
1484 2: Otherwise.
1488 string_xstring_p (Lisp_Object string)
1490 const unsigned char *p = SDATA (string);
1491 const unsigned char *endp = p + SBYTES (string);
1493 if (SCHARS (string) == SBYTES (string))
1494 return 0;
1496 while (p < endp)
1498 int c = STRING_CHAR_ADVANCE (p);
1500 if (c >= 0x100)
1501 return 2;
1503 return 1;
1507 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1509 CHARSETS is a vector. If Nth element is non-nil, it means the
1510 charset whose id is N is already found.
1512 It may lookup a translation table TABLE if supplied. */
1514 static void
1515 find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
1517 const unsigned char *pend = ptr + nbytes;
1519 if (nchars == nbytes)
1521 if (multibyte)
1522 ASET (charsets, charset_ascii, Qt);
1523 else
1524 while (ptr < pend)
1526 int c = *ptr++;
1528 if (!NILP (table))
1529 c = translate_char (table, c);
1530 if (ASCII_BYTE_P (c))
1531 ASET (charsets, charset_ascii, Qt);
1532 else
1533 ASET (charsets, charset_eight_bit, Qt);
1536 else
1538 while (ptr < pend)
1540 int c = STRING_CHAR_ADVANCE (ptr);
1541 struct charset *charset;
1543 if (!NILP (table))
1544 c = translate_char (table, c);
1545 charset = CHAR_CHARSET (c);
1546 ASET (charsets, CHARSET_ID (charset), Qt);
1551 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1552 2, 3, 0,
1553 doc: /* Return a list of charsets in the region between BEG and END.
1554 BEG and END are buffer positions.
1555 Optional arg TABLE if non-nil is a translation table to look up.
1557 If the current buffer is unibyte, the returned list may contain
1558 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1559 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
1561 Lisp_Object charsets;
1562 EMACS_INT from, from_byte, to, stop, stop_byte;
1563 int i;
1564 Lisp_Object val;
1565 int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1567 validate_region (&beg, &end);
1568 from = XFASTINT (beg);
1569 stop = to = XFASTINT (end);
1571 if (from < GPT && GPT < to)
1573 stop = GPT;
1574 stop_byte = GPT_BYTE;
1576 else
1577 stop_byte = CHAR_TO_BYTE (stop);
1579 from_byte = CHAR_TO_BYTE (from);
1581 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1582 while (1)
1584 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1585 stop_byte - from_byte, charsets, table,
1586 multibyte);
1587 if (stop < to)
1589 from = stop, from_byte = stop_byte;
1590 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1592 else
1593 break;
1596 val = Qnil;
1597 for (i = charset_table_used - 1; i >= 0; i--)
1598 if (!NILP (AREF (charsets, i)))
1599 val = Fcons (CHARSET_NAME (charset_table + i), val);
1600 return val;
1603 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1604 1, 2, 0,
1605 doc: /* Return a list of charsets in STR.
1606 Optional arg TABLE if non-nil is a translation table to look up.
1608 If STR is unibyte, the returned list may contain
1609 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1610 (Lisp_Object str, Lisp_Object table)
1612 Lisp_Object charsets;
1613 int i;
1614 Lisp_Object val;
1616 CHECK_STRING (str);
1618 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1619 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1620 charsets, table,
1621 STRING_MULTIBYTE (str));
1622 val = Qnil;
1623 for (i = charset_table_used - 1; i >= 0; i--)
1624 if (!NILP (AREF (charsets, i)))
1625 val = Fcons (CHARSET_NAME (charset_table + i), val);
1626 return val;
1631 /* Return a unified character code for C (>= 0x110000). VAL is a
1632 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1633 charset symbol. */
1635 maybe_unify_char (int c, Lisp_Object val)
1637 struct charset *charset;
1639 if (INTEGERP (val))
1640 return XINT (val);
1641 if (NILP (val))
1642 return c;
1644 CHECK_CHARSET_GET_CHARSET (val, charset);
1645 load_charset (charset, 1);
1646 if (! inhibit_load_charset_map)
1648 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1649 if (! NILP (val))
1650 c = XINT (val);
1652 else
1654 int code_index = c - CHARSET_CODE_OFFSET (charset);
1655 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1657 if (unified > 0)
1658 c = unified;
1660 return c;
1664 /* Return a character correponding to the code-point CODE of
1665 CHARSET. */
1668 decode_char (struct charset *charset, unsigned int code)
1670 int c, char_index;
1671 enum charset_method method = CHARSET_METHOD (charset);
1673 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1674 return -1;
1676 if (method == CHARSET_METHOD_SUBSET)
1678 Lisp_Object subset_info;
1680 subset_info = CHARSET_SUBSET (charset);
1681 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1682 code -= XINT (AREF (subset_info, 3));
1683 if (code < XFASTINT (AREF (subset_info, 1))
1684 || code > XFASTINT (AREF (subset_info, 2)))
1685 c = -1;
1686 else
1687 c = DECODE_CHAR (charset, code);
1689 else if (method == CHARSET_METHOD_SUPERSET)
1691 Lisp_Object parents;
1693 parents = CHARSET_SUPERSET (charset);
1694 c = -1;
1695 for (; CONSP (parents); parents = XCDR (parents))
1697 int id = XINT (XCAR (XCAR (parents)));
1698 int code_offset = XINT (XCDR (XCAR (parents)));
1699 unsigned this_code = code - code_offset;
1701 charset = CHARSET_FROM_ID (id);
1702 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1703 break;
1706 else
1708 char_index = CODE_POINT_TO_INDEX (charset, code);
1709 if (char_index < 0)
1710 return -1;
1712 if (method == CHARSET_METHOD_MAP)
1714 Lisp_Object decoder;
1716 decoder = CHARSET_DECODER (charset);
1717 if (! VECTORP (decoder))
1719 load_charset (charset, 1);
1720 decoder = CHARSET_DECODER (charset);
1722 if (VECTORP (decoder))
1723 c = XINT (AREF (decoder, char_index));
1724 else
1725 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1727 else /* method == CHARSET_METHOD_OFFSET */
1729 c = char_index + CHARSET_CODE_OFFSET (charset);
1730 if (CHARSET_UNIFIED_P (charset)
1731 && c > MAX_UNICODE_CHAR)
1732 MAYBE_UNIFY_CHAR (c);
1736 return c;
1739 /* Variable used temporarily by the macro ENCODE_CHAR. */
1740 Lisp_Object charset_work;
1742 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1743 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1744 use CHARSET's strict_max_char instead of max_char. */
1746 unsigned
1747 encode_char (struct charset *charset, int c)
1749 unsigned code;
1750 enum charset_method method = CHARSET_METHOD (charset);
1752 if (CHARSET_UNIFIED_P (charset))
1754 Lisp_Object deunifier;
1755 int code_index = -1;
1757 deunifier = CHARSET_DEUNIFIER (charset);
1758 if (! CHAR_TABLE_P (deunifier))
1760 load_charset (charset, 2);
1761 deunifier = CHARSET_DEUNIFIER (charset);
1763 if (CHAR_TABLE_P (deunifier))
1765 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1767 if (INTEGERP (deunified))
1768 code_index = XINT (deunified);
1770 else
1772 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1774 if (code_index >= 0)
1775 c = CHARSET_CODE_OFFSET (charset) + code_index;
1778 if (method == CHARSET_METHOD_SUBSET)
1780 Lisp_Object subset_info;
1781 struct charset *this_charset;
1783 subset_info = CHARSET_SUBSET (charset);
1784 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1785 code = ENCODE_CHAR (this_charset, c);
1786 if (code == CHARSET_INVALID_CODE (this_charset)
1787 || code < XFASTINT (AREF (subset_info, 1))
1788 || code > XFASTINT (AREF (subset_info, 2)))
1789 return CHARSET_INVALID_CODE (charset);
1790 code += XINT (AREF (subset_info, 3));
1791 return code;
1794 if (method == CHARSET_METHOD_SUPERSET)
1796 Lisp_Object parents;
1798 parents = CHARSET_SUPERSET (charset);
1799 for (; CONSP (parents); parents = XCDR (parents))
1801 int id = XINT (XCAR (XCAR (parents)));
1802 int code_offset = XINT (XCDR (XCAR (parents)));
1803 struct charset *this_charset = CHARSET_FROM_ID (id);
1805 code = ENCODE_CHAR (this_charset, c);
1806 if (code != CHARSET_INVALID_CODE (this_charset))
1807 return code + code_offset;
1809 return CHARSET_INVALID_CODE (charset);
1812 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1813 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1814 return CHARSET_INVALID_CODE (charset);
1816 if (method == CHARSET_METHOD_MAP)
1818 Lisp_Object encoder;
1819 Lisp_Object val;
1821 encoder = CHARSET_ENCODER (charset);
1822 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1824 load_charset (charset, 2);
1825 encoder = CHARSET_ENCODER (charset);
1827 if (CHAR_TABLE_P (encoder))
1829 val = CHAR_TABLE_REF (encoder, c);
1830 if (NILP (val))
1831 return CHARSET_INVALID_CODE (charset);
1832 code = XINT (val);
1833 if (! CHARSET_COMPACT_CODES_P (charset))
1834 code = INDEX_TO_CODE_POINT (charset, code);
1836 else
1838 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1839 code = INDEX_TO_CODE_POINT (charset, code);
1842 else /* method == CHARSET_METHOD_OFFSET */
1844 int code_index = c - CHARSET_CODE_OFFSET (charset);
1846 code = INDEX_TO_CODE_POINT (charset, code_index);
1849 return code;
1853 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1854 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1855 Return nil if CODE-POINT is not valid in CHARSET.
1857 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1859 Optional argument RESTRICTION specifies a way to map the pair of CCS
1860 and CODE-POINT to a character. Currently not supported and just ignored. */)
1861 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
1863 int c, id;
1864 unsigned code;
1865 struct charset *charsetp;
1867 CHECK_CHARSET_GET_ID (charset, id);
1868 if (CONSP (code_point))
1870 CHECK_NATNUM_CAR (code_point);
1871 CHECK_NATNUM_CDR (code_point);
1872 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1874 else
1876 CHECK_NATNUM (code_point);
1877 code = XINT (code_point);
1879 charsetp = CHARSET_FROM_ID (id);
1880 c = DECODE_CHAR (charsetp, code);
1881 return (c >= 0 ? make_number (c) : Qnil);
1885 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1886 doc: /* Encode the character CH into a code-point of CHARSET.
1887 Return nil if CHARSET doesn't include CH.
1889 Optional argument RESTRICTION specifies a way to map CH to a
1890 code-point in CCS. Currently not supported and just ignored. */)
1891 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
1893 int id;
1894 unsigned code;
1895 struct charset *charsetp;
1897 CHECK_CHARSET_GET_ID (charset, id);
1898 CHECK_NATNUM (ch);
1899 charsetp = CHARSET_FROM_ID (id);
1900 code = ENCODE_CHAR (charsetp, XINT (ch));
1901 if (code == CHARSET_INVALID_CODE (charsetp))
1902 return Qnil;
1903 if (code > 0x7FFFFFF)
1904 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1905 return make_number (code);
1909 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1910 doc:
1911 /* Return a character of CHARSET whose position codes are CODEn.
1913 CODE1 through CODE4 are optional, but if you don't supply sufficient
1914 position codes, it is assumed that the minimum code in each dimension
1915 is specified. */)
1916 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
1918 int id, dimension;
1919 struct charset *charsetp;
1920 unsigned code;
1921 int c;
1923 CHECK_CHARSET_GET_ID (charset, id);
1924 charsetp = CHARSET_FROM_ID (id);
1926 dimension = CHARSET_DIMENSION (charsetp);
1927 if (NILP (code1))
1928 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1929 ? 0 : CHARSET_MIN_CODE (charsetp));
1930 else
1932 CHECK_NATNUM (code1);
1933 if (XFASTINT (code1) >= 0x100)
1934 args_out_of_range (make_number (0xFF), code1);
1935 code = XFASTINT (code1);
1937 if (dimension > 1)
1939 code <<= 8;
1940 if (NILP (code2))
1941 code |= charsetp->code_space[(dimension - 2) * 4];
1942 else
1944 CHECK_NATNUM (code2);
1945 if (XFASTINT (code2) >= 0x100)
1946 args_out_of_range (make_number (0xFF), code2);
1947 code |= XFASTINT (code2);
1950 if (dimension > 2)
1952 code <<= 8;
1953 if (NILP (code3))
1954 code |= charsetp->code_space[(dimension - 3) * 4];
1955 else
1957 CHECK_NATNUM (code3);
1958 if (XFASTINT (code3) >= 0x100)
1959 args_out_of_range (make_number (0xFF), code3);
1960 code |= XFASTINT (code3);
1963 if (dimension > 3)
1965 code <<= 8;
1966 if (NILP (code4))
1967 code |= charsetp->code_space[0];
1968 else
1970 CHECK_NATNUM (code4);
1971 if (XFASTINT (code4) >= 0x100)
1972 args_out_of_range (make_number (0xFF), code4);
1973 code |= XFASTINT (code4);
1980 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1981 code &= 0x7F7F7F7F;
1982 c = DECODE_CHAR (charsetp, code);
1983 if (c < 0)
1984 error ("Invalid code(s)");
1985 return make_number (c);
1989 /* Return the first charset in CHARSET_LIST that contains C.
1990 CHARSET_LIST is a list of charset IDs. If it is nil, use
1991 Vcharset_ordered_list. */
1993 struct charset *
1994 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
1996 int maybe_null = 0;
1998 if (NILP (charset_list))
1999 charset_list = Vcharset_ordered_list;
2000 else
2001 maybe_null = 1;
2003 while (CONSP (charset_list))
2005 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2006 unsigned code = ENCODE_CHAR (charset, c);
2008 if (code != CHARSET_INVALID_CODE (charset))
2010 if (code_return)
2011 *code_return = code;
2012 return charset;
2014 charset_list = XCDR (charset_list);
2015 if (! maybe_null
2016 && c <= MAX_UNICODE_CHAR
2017 && EQ (charset_list, Vcharset_non_preferred_head))
2018 return CHARSET_FROM_ID (charset_unicode);
2020 return (maybe_null ? NULL
2021 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2022 : CHARSET_FROM_ID (charset_eight_bit));
2026 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2027 doc:
2028 /*Return list of charset and one to four position-codes of CH.
2029 The charset is decided by the current priority order of charsets.
2030 A position-code is a byte value of each dimension of the code-point of
2031 CH in the charset. */)
2032 (Lisp_Object ch)
2034 struct charset *charset;
2035 int c, dimension;
2036 unsigned code;
2037 Lisp_Object val;
2039 CHECK_CHARACTER (ch);
2040 c = XFASTINT (ch);
2041 charset = CHAR_CHARSET (c);
2042 if (! charset)
2043 abort ();
2044 code = ENCODE_CHAR (charset, c);
2045 if (code == CHARSET_INVALID_CODE (charset))
2046 abort ();
2047 dimension = CHARSET_DIMENSION (charset);
2048 for (val = Qnil; dimension > 0; dimension--)
2050 val = Fcons (make_number (code & 0xFF), val);
2051 code >>= 8;
2053 return Fcons (CHARSET_NAME (charset), val);
2057 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2058 doc: /* Return the charset of highest priority that contains CH.
2059 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2060 from which to find the charset. It may also be a coding system. In
2061 that case, find the charset from what supported by that coding system. */)
2062 (Lisp_Object ch, Lisp_Object restriction)
2064 struct charset *charset;
2066 CHECK_CHARACTER (ch);
2067 if (NILP (restriction))
2068 charset = CHAR_CHARSET (XINT (ch));
2069 else
2071 if (CONSP (restriction))
2073 int c = XFASTINT (ch);
2075 for (; CONSP (restriction); restriction = XCDR (restriction))
2077 struct charset *rcharset;
2079 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
2080 if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
2081 return XCAR (restriction);
2083 return Qnil;
2085 restriction = coding_system_charset_list (restriction);
2086 charset = char_charset (XINT (ch), restriction, NULL);
2087 if (! charset)
2088 return Qnil;
2090 return (CHARSET_NAME (charset));
2094 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2095 doc: /*
2096 Return charset of a character in the current buffer at position POS.
2097 If POS is nil, it defauls to the current point.
2098 If POS is out of range, the value is nil. */)
2099 (Lisp_Object pos)
2101 Lisp_Object ch;
2102 struct charset *charset;
2104 ch = Fchar_after (pos);
2105 if (! INTEGERP (ch))
2106 return ch;
2107 charset = CHAR_CHARSET (XINT (ch));
2108 return (CHARSET_NAME (charset));
2112 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2113 doc: /*
2114 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2116 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2117 by their DIMENSION, CHARS, and FINAL-CHAR,
2118 whereas Emacs distinguishes them by charset symbol.
2119 See the documentation of the function `charset-info' for the meanings of
2120 DIMENSION, CHARS, and FINAL-CHAR. */)
2121 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
2123 int id;
2124 int chars_flag;
2126 check_iso_charset_parameter (dimension, chars, final_char);
2127 chars_flag = XFASTINT (chars) == 96;
2128 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2129 XFASTINT (final_char));
2130 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2134 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2135 0, 0, 0,
2136 doc: /*
2137 Internal use only.
2138 Clear temporary charset mapping tables.
2139 It should be called only from temacs invoked for dumping. */)
2140 (void)
2142 if (temp_charset_work)
2144 xfree (temp_charset_work);
2145 temp_charset_work = NULL;
2148 if (CHAR_TABLE_P (Vchar_unify_table))
2149 Foptimize_char_table (Vchar_unify_table, Qnil);
2151 return Qnil;
2154 DEFUN ("charset-priority-list", Fcharset_priority_list,
2155 Scharset_priority_list, 0, 1, 0,
2156 doc: /* Return the list of charsets ordered by priority.
2157 HIGHESTP non-nil means just return the highest priority one. */)
2158 (Lisp_Object highestp)
2160 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2162 if (!NILP (highestp))
2163 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2165 while (!NILP (list))
2167 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2168 list = XCDR (list);
2170 return Fnreverse (val);
2173 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2174 1, MANY, 0,
2175 doc: /* Assign higher priority to the charsets given as arguments.
2176 usage: (set-charset-priority &rest charsets) */)
2177 (size_t nargs, Lisp_Object *args)
2179 Lisp_Object new_head, old_list, arglist[2];
2180 Lisp_Object list_2022, list_emacs_mule;
2181 size_t i;
2182 int id;
2184 old_list = Fcopy_sequence (Vcharset_ordered_list);
2185 new_head = Qnil;
2186 for (i = 0; i < nargs; i++)
2188 CHECK_CHARSET_GET_ID (args[i], id);
2189 if (! NILP (Fmemq (make_number (id), old_list)))
2191 old_list = Fdelq (make_number (id), old_list);
2192 new_head = Fcons (make_number (id), new_head);
2195 arglist[0] = Fnreverse (new_head);
2196 arglist[1] = Vcharset_non_preferred_head = old_list;
2197 Vcharset_ordered_list = Fnconc (2, arglist);
2198 charset_ordered_list_tick++;
2200 charset_unibyte = -1;
2201 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2202 CONSP (old_list); old_list = XCDR (old_list))
2204 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2205 list_2022 = Fcons (XCAR (old_list), list_2022);
2206 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2207 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2208 if (charset_unibyte < 0)
2210 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2212 if (CHARSET_DIMENSION (charset) == 1
2213 && CHARSET_ASCII_COMPATIBLE_P (charset)
2214 && CHARSET_MAX_CHAR (charset) >= 0x80)
2215 charset_unibyte = CHARSET_ID (charset);
2218 Viso_2022_charset_list = Fnreverse (list_2022);
2219 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2220 if (charset_unibyte < 0)
2221 charset_unibyte = charset_iso_8859_1;
2223 return Qnil;
2226 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2227 0, 1, 0,
2228 doc: /* Internal use only.
2229 Return charset identification number of CHARSET. */)
2230 (Lisp_Object charset)
2232 int id;
2234 CHECK_CHARSET_GET_ID (charset, id);
2235 return make_number (id);
2238 struct charset_sort_data
2240 Lisp_Object charset;
2241 int id;
2242 int priority;
2245 static int
2246 charset_compare (const void *d1, const void *d2)
2248 const struct charset_sort_data *data1 = d1, *data2 = d2;
2249 return (data1->priority - data2->priority);
2252 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2253 doc: /* Sort charset list CHARSETS by a priority of each charset.
2254 Return the sorted list. CHARSETS is modified by side effects.
2255 See also `charset-priority-list' and `set-charset-priority'. */)
2256 (Lisp_Object charsets)
2258 Lisp_Object len = Flength (charsets);
2259 int n = XFASTINT (len), i, j, done;
2260 Lisp_Object tail, elt, attrs;
2261 struct charset_sort_data *sort_data;
2262 int id, min_id = INT_MAX, max_id = INT_MIN;
2263 USE_SAFE_ALLOCA;
2265 if (n == 0)
2266 return Qnil;
2267 SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
2268 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2270 elt = XCAR (tail);
2271 CHECK_CHARSET_GET_ATTR (elt, attrs);
2272 sort_data[i].charset = elt;
2273 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
2274 if (id < min_id)
2275 min_id = id;
2276 if (id > max_id)
2277 max_id = id;
2279 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2280 done < n && CONSP (tail); tail = XCDR (tail), i++)
2282 elt = XCAR (tail);
2283 id = XFASTINT (elt);
2284 if (id >= min_id && id <= max_id)
2285 for (j = 0; j < n; j++)
2286 if (sort_data[j].id == id)
2288 sort_data[j].priority = i;
2289 done++;
2292 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2293 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2294 XSETCAR (tail, sort_data[i].charset);
2295 SAFE_FREE ();
2296 return charsets;
2300 void
2301 init_charset (void)
2303 Lisp_Object tempdir;
2304 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2305 if (access (SSDATA (tempdir), 0) < 0)
2307 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2308 Emacs will not function correctly without the character map files.\n\
2309 Please check your installation!\n",
2310 tempdir);
2311 /* TODO should this be a fatal error? (Bug#909) */
2314 Vcharset_map_path = Fcons (tempdir, Qnil);
2318 void
2319 init_charset_once (void)
2321 int i, j, k;
2323 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2324 for (j = 0; j < ISO_MAX_CHARS; j++)
2325 for (k = 0; k < ISO_MAX_FINAL; k++)
2326 iso_charset_table[i][j][k] = -1;
2328 for (i = 0; i < 256; i++)
2329 emacs_mule_charset[i] = -1;
2331 charset_jisx0201_roman = -1;
2332 charset_jisx0208_1978 = -1;
2333 charset_jisx0208 = -1;
2334 charset_ksc5601 = -1;
2337 #ifdef emacs
2339 void
2340 syms_of_charset (void)
2342 DEFSYM (Qcharsetp, "charsetp");
2344 DEFSYM (Qascii, "ascii");
2345 DEFSYM (Qunicode, "unicode");
2346 DEFSYM (Qemacs, "emacs");
2347 DEFSYM (Qeight_bit, "eight-bit");
2348 DEFSYM (Qiso_8859_1, "iso-8859-1");
2350 DEFSYM (Qgl, "gl");
2351 DEFSYM (Qgr, "gr");
2353 staticpro (&Vcharset_ordered_list);
2354 Vcharset_ordered_list = Qnil;
2356 staticpro (&Viso_2022_charset_list);
2357 Viso_2022_charset_list = Qnil;
2359 staticpro (&Vemacs_mule_charset_list);
2360 Vemacs_mule_charset_list = Qnil;
2362 /* Don't staticpro them here. It's done in syms_of_fns. */
2363 QCtest = intern_c_string (":test");
2364 Qeq = intern_c_string ("eq");
2366 staticpro (&Vcharset_hash_table);
2368 Lisp_Object args[2];
2369 args[0] = QCtest;
2370 args[1] = Qeq;
2371 Vcharset_hash_table = Fmake_hash_table (2, args);
2374 charset_table_size = 128;
2375 charset_table = ((struct charset *)
2376 xmalloc (sizeof (struct charset) * charset_table_size));
2377 charset_table_used = 0;
2379 defsubr (&Scharsetp);
2380 defsubr (&Smap_charset_chars);
2381 defsubr (&Sdefine_charset_internal);
2382 defsubr (&Sdefine_charset_alias);
2383 defsubr (&Scharset_plist);
2384 defsubr (&Sset_charset_plist);
2385 defsubr (&Sunify_charset);
2386 defsubr (&Sget_unused_iso_final_char);
2387 defsubr (&Sdeclare_equiv_charset);
2388 defsubr (&Sfind_charset_region);
2389 defsubr (&Sfind_charset_string);
2390 defsubr (&Sdecode_char);
2391 defsubr (&Sencode_char);
2392 defsubr (&Ssplit_char);
2393 defsubr (&Smake_char);
2394 defsubr (&Schar_charset);
2395 defsubr (&Scharset_after);
2396 defsubr (&Siso_charset);
2397 defsubr (&Sclear_charset_maps);
2398 defsubr (&Scharset_priority_list);
2399 defsubr (&Sset_charset_priority);
2400 defsubr (&Scharset_id_internal);
2401 defsubr (&Ssort_charsets);
2403 DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
2404 doc: /* *List of directories to search for charset map files. */);
2405 Vcharset_map_path = Qnil;
2407 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
2408 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2409 inhibit_load_charset_map = 0;
2411 DEFVAR_LISP ("charset-list", Vcharset_list,
2412 doc: /* List of all charsets ever defined. */);
2413 Vcharset_list = Qnil;
2415 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
2416 doc: /* ISO639 language mnemonic symbol for the current language environment.
2417 If the current language environment is for multiple languages (e.g. "Latin-1"),
2418 the value may be a list of mnemonics. */);
2419 Vcurrent_iso639_language = Qnil;
2421 charset_ascii
2422 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2423 0, 127, 'B', -1, 0, 1, 0, 0);
2424 charset_iso_8859_1
2425 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2426 0, 255, -1, -1, -1, 1, 0, 0);
2427 charset_unicode
2428 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2429 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2430 charset_emacs
2431 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2432 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2433 charset_eight_bit
2434 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2435 128, 255, -1, 0, -1, 0, 1,
2436 MAX_5_BYTE_CHAR + 1);
2437 charset_unibyte = charset_iso_8859_1;
2440 #endif /* emacs */