Merge changes made in Gnus trunk.
[emacs.git] / src / charset.c
blob39a376a947f36c4232cac1a4ec028b3bc659f762
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
28 #include <config.h>
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include <setjmp.h>
35 #include "lisp.h"
36 #include "character.h"
37 #include "charset.h"
38 #include "coding.h"
39 #include "disptab.h"
40 #include "buffer.h"
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
57 /* List of all charsets. This variable is used only from Emacs
58 Lisp. */
59 Lisp_Object Vcharset_list;
61 /* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63 Lisp_Object Vcharset_hash_table;
65 /* Table of struct charset. */
66 struct charset *charset_table;
68 static int charset_table_size;
69 static int charset_table_used;
71 Lisp_Object Qcharsetp;
73 /* Special charset symbols. */
74 Lisp_Object Qascii;
75 Lisp_Object Qeight_bit;
76 Lisp_Object Qiso_8859_1;
77 Lisp_Object Qunicode;
78 Lisp_Object Qemacs;
80 /* The corresponding charsets. */
81 int charset_ascii;
82 int charset_eight_bit;
83 int charset_iso_8859_1;
84 int charset_unicode;
85 int charset_emacs;
87 /* The other special charsets. */
88 int charset_jisx0201_roman;
89 int charset_jisx0208_1978;
90 int charset_jisx0208;
91 int charset_ksc5601;
93 /* Value of charset attribute `charset-iso-plane'. */
94 Lisp_Object Qgl, Qgr;
96 /* Charset of unibyte characters. */
97 int charset_unibyte;
99 /* List of charsets ordered by the priority. */
100 Lisp_Object Vcharset_ordered_list;
102 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
103 charsets. */
104 Lisp_Object Vcharset_non_preferred_head;
106 /* Incremented everytime we change Vcharset_ordered_list. This is
107 unsigned short so that it fits in Lisp_Int and never matches
108 -1. */
109 unsigned short charset_ordered_list_tick;
111 /* List of iso-2022 charsets. */
112 Lisp_Object Viso_2022_charset_list;
114 /* List of emacs-mule charsets. */
115 Lisp_Object Vemacs_mule_charset_list;
117 int emacs_mule_charset[256];
119 /* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
121 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
123 Lisp_Object Vcharset_map_path;
125 /* If nonzero, don't load charset maps. */
126 int inhibit_load_charset_map;
128 Lisp_Object Vcurrent_iso639_language;
130 #define CODE_POINT_TO_INDEX(charset, code) \
131 ((charset)->code_linear_p \
132 ? (code) - (charset)->min_code \
133 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
134 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
135 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
136 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
137 ? (((((code) >> 24) - (charset)->code_space[12]) \
138 * (charset)->code_space[11]) \
139 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
140 * (charset)->code_space[7]) \
141 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
142 * (charset)->code_space[3]) \
143 + (((code) & 0xFF) - (charset)->code_space[0]) \
144 - ((charset)->char_index_offset)) \
145 : -1)
148 /* Convert the character index IDX to code-point CODE for CHARSET.
149 It is assumed that IDX is in a valid range. */
151 #define INDEX_TO_CODE_POINT(charset, idx) \
152 ((charset)->code_linear_p \
153 ? (idx) + (charset)->min_code \
154 : (idx += (charset)->char_index_offset, \
155 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
156 | (((charset)->code_space[4] \
157 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
158 << 8) \
159 | (((charset)->code_space[8] \
160 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
161 << 16) \
162 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
163 << 24))))
165 /* Structure to hold mapping tables for a charset. Used by temacs
166 invoked for dumping. */
168 static struct
170 /* The current charset for which the following tables are setup. */
171 struct charset *current;
173 /* 1 iff the following table is used for encoder. */
174 short for_encoder;
176 /* When the following table is used for encoding, mininum and
177 maxinum character of the current charset. */
178 int min_char, max_char;
180 /* A Unicode character correspoinding to the code indice 0 (i.e. the
181 minimum code-point) of the current charset, or -1 if the code
182 indice 0 is not a Unicode character. This is checked when
183 table.encoder[CHAR] is zero. */
184 int zero_index_char;
186 union {
187 /* Table mapping code-indices (not code-points) of the current
188 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
189 doesn't belong to the current charset. */
190 int decoder[0x10000];
191 /* Table mapping Unicode characters to code-indices of the current
192 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
193 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
194 (0x20000..0x2FFFF). Note that there is no charset map that
195 uses both SMP and SIP. */
196 unsigned short encoder[0x20000];
197 } table;
198 } *temp_charset_work;
200 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
201 do { \
202 if ((CODE) == 0) \
203 temp_charset_work->zero_index_char = (C); \
204 else if ((C) < 0x20000) \
205 temp_charset_work->table.encoder[(C)] = (CODE); \
206 else \
207 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
208 } while (0)
210 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
211 ((C) == temp_charset_work->zero_index_char ? 0 \
212 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
213 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
214 : temp_charset_work->table.encoder[(C) - 0x10000] \
215 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
217 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
218 (temp_charset_work->table.decoder[(CODE)] = (C))
220 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
221 (temp_charset_work->table.decoder[(CODE)])
224 /* Set to 1 to warn that a charset map is loaded and thus a buffer
225 text and a string data may be relocated. */
226 int charset_map_loaded;
228 struct charset_map_entries
230 struct {
231 unsigned from, to;
232 int c;
233 } entry[0x10000];
234 struct charset_map_entries *next;
237 /* Load the mapping information of CHARSET from ENTRIES for
238 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
239 encoding (CONTROL_FLAG == 2).
241 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
242 and CHARSET->fast_map.
244 If CONTROL_FLAG is 1, setup the following tables according to
245 CHARSET->method and inhibit_load_charset_map.
247 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
248 ----------------------+--------------------+---------------------------
249 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
250 ----------------------+--------------------+---------------------------
251 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
253 If CONTROL_FLAG is 2, setup the following tables.
255 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
256 ----------------------+--------------------+---------------------------
257 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
258 ----------------------+--------------------+--------------------------
259 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
262 static void
263 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
265 Lisp_Object vec, table;
266 unsigned max_code = CHARSET_MAX_CODE (charset);
267 int ascii_compatible_p = charset->ascii_compatible_p;
268 int min_char, max_char, nonascii_min_char;
269 int i;
270 unsigned char *fast_map = charset->fast_map;
272 if (n_entries <= 0)
273 return;
275 if (control_flag)
277 if (! inhibit_load_charset_map)
279 if (control_flag == 1)
281 if (charset->method == CHARSET_METHOD_MAP)
283 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
285 vec = CHARSET_DECODER (charset)
286 = Fmake_vector (make_number (n), make_number (-1));
288 else
290 char_table_set_range (Vchar_unify_table,
291 charset->min_char, charset->max_char,
292 Qnil);
295 else
297 table = Fmake_char_table (Qnil, Qnil);
298 if (charset->method == CHARSET_METHOD_MAP)
299 CHARSET_ENCODER (charset) = table;
300 else
301 CHARSET_DEUNIFIER (charset) = table;
304 else
306 if (! temp_charset_work)
307 temp_charset_work = malloc (sizeof (*temp_charset_work));
308 if (control_flag == 1)
310 memset (temp_charset_work->table.decoder, -1,
311 sizeof (int) * 0x10000);
313 else
315 memset (temp_charset_work->table.encoder, 0,
316 sizeof (unsigned short) * 0x20000);
317 temp_charset_work->zero_index_char = -1;
319 temp_charset_work->current = charset;
320 temp_charset_work->for_encoder = (control_flag == 2);
321 control_flag += 2;
323 charset_map_loaded = 1;
326 min_char = max_char = entries->entry[0].c;
327 nonascii_min_char = MAX_CHAR;
328 for (i = 0; i < n_entries; i++)
330 unsigned from, to;
331 int from_index, to_index;
332 int from_c, to_c;
333 int idx = i % 0x10000;
335 if (i > 0 && idx == 0)
336 entries = entries->next;
337 from = entries->entry[idx].from;
338 to = entries->entry[idx].to;
339 from_c = entries->entry[idx].c;
340 from_index = CODE_POINT_TO_INDEX (charset, from);
341 if (from == to)
343 to_index = from_index;
344 to_c = from_c;
346 else
348 to_index = CODE_POINT_TO_INDEX (charset, to);
349 to_c = from_c + (to_index - from_index);
351 if (from_index < 0 || to_index < 0)
352 continue;
354 if (to_c > max_char)
355 max_char = to_c;
356 else if (from_c < min_char)
357 min_char = from_c;
359 if (control_flag == 1)
361 if (charset->method == CHARSET_METHOD_MAP)
362 for (; from_index <= to_index; from_index++, from_c++)
363 ASET (vec, from_index, make_number (from_c));
364 else
365 for (; from_index <= to_index; from_index++, from_c++)
366 CHAR_TABLE_SET (Vchar_unify_table,
367 CHARSET_CODE_OFFSET (charset) + from_index,
368 make_number (from_c));
370 else if (control_flag == 2)
372 if (charset->method == CHARSET_METHOD_MAP
373 && CHARSET_COMPACT_CODES_P (charset))
374 for (; from_index <= to_index; from_index++, from_c++)
376 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
378 if (NILP (CHAR_TABLE_REF (table, from_c)))
379 CHAR_TABLE_SET (table, from_c, make_number (code));
381 else
382 for (; from_index <= to_index; from_index++, from_c++)
384 if (NILP (CHAR_TABLE_REF (table, from_c)))
385 CHAR_TABLE_SET (table, from_c, make_number (from_index));
388 else if (control_flag == 3)
389 for (; from_index <= to_index; from_index++, from_c++)
390 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
391 else if (control_flag == 4)
392 for (; from_index <= to_index; from_index++, from_c++)
393 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
394 else /* control_flag == 0 */
396 if (ascii_compatible_p)
398 if (! ASCII_BYTE_P (from_c))
400 if (from_c < nonascii_min_char)
401 nonascii_min_char = from_c;
403 else if (! ASCII_BYTE_P (to_c))
405 nonascii_min_char = 0x80;
409 for (; from_c <= to_c; from_c++)
410 CHARSET_FAST_MAP_SET (from_c, fast_map);
414 if (control_flag == 0)
416 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
417 ? nonascii_min_char : min_char);
418 CHARSET_MAX_CHAR (charset) = max_char;
420 else if (control_flag == 4)
422 temp_charset_work->min_char = min_char;
423 temp_charset_work->max_char = max_char;
428 /* Read a hexadecimal number (preceded by "0x") from the file FP while
429 paying attention to comment character '#'. */
431 static INLINE unsigned
432 read_hex (FILE *fp, int *eof)
434 int c;
435 unsigned n;
437 while ((c = getc (fp)) != EOF)
439 if (c == '#')
441 while ((c = getc (fp)) != EOF && c != '\n');
443 else if (c == '0')
445 if ((c = getc (fp)) == EOF || c == 'x')
446 break;
449 if (c == EOF)
451 *eof = 1;
452 return 0;
454 *eof = 0;
455 n = 0;
456 if (c == 'x')
457 while ((c = getc (fp)) != EOF && isxdigit (c))
458 n = ((n << 4)
459 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
460 else
461 while ((c = getc (fp)) != EOF && isdigit (c))
462 n = (n * 10) + c - '0';
463 if (c != EOF)
464 ungetc (c, fp);
465 return n;
468 /* Return a mapping vector for CHARSET loaded from MAPFILE.
469 Each line of MAPFILE has this form
470 0xAAAA 0xCCCC
471 where 0xAAAA is a code-point and 0xCCCC is the corresponding
472 character code, or this form
473 0xAAAA-0xBBBB 0xCCCC
474 where 0xAAAA and 0xBBBB are code-points specifying a range, and
475 0xCCCC is the first character code of the range.
477 The returned vector has this form:
478 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
479 where CODE1 is a code-point or a cons of code-points specifying a
480 range.
482 Note that this function uses `openp' to open MAPFILE but ignores
483 `file-name-handler-alist' to avoid running any Lisp code. */
485 static void
486 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int control_flag)
488 unsigned min_code = CHARSET_MIN_CODE (charset);
489 unsigned max_code = CHARSET_MAX_CODE (charset);
490 int fd;
491 FILE *fp;
492 int eof;
493 Lisp_Object suffixes;
494 struct charset_map_entries *head, *entries;
495 int n_entries, count;
496 USE_SAFE_ALLOCA;
498 suffixes = Fcons (build_string (".map"),
499 Fcons (build_string (".TXT"), Qnil));
501 count = SPECPDL_INDEX ();
502 specbind (Qfile_name_handler_alist, Qnil);
503 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
504 unbind_to (count, Qnil);
505 if (fd < 0
506 || ! (fp = fdopen (fd, "r")))
507 error ("Failure in loading charset map: %S", SDATA (mapfile));
509 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
510 large (larger than MAX_ALLOCA). */
511 SAFE_ALLOCA (head, struct charset_map_entries *,
512 sizeof (struct charset_map_entries));
513 entries = head;
514 memset (entries, 0, sizeof (struct charset_map_entries));
516 n_entries = 0;
517 eof = 0;
518 while (1)
520 unsigned from, to;
521 int c;
522 int idx;
524 from = read_hex (fp, &eof);
525 if (eof)
526 break;
527 if (getc (fp) == '-')
528 to = read_hex (fp, &eof);
529 else
530 to = from;
531 c = (int) read_hex (fp, &eof);
533 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
534 continue;
536 if (n_entries > 0 && (n_entries % 0x10000) == 0)
538 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
539 sizeof (struct charset_map_entries));
540 entries = entries->next;
541 memset (entries, 0, sizeof (struct charset_map_entries));
543 idx = n_entries % 0x10000;
544 entries->entry[idx].from = from;
545 entries->entry[idx].to = to;
546 entries->entry[idx].c = c;
547 n_entries++;
549 fclose (fp);
551 load_charset_map (charset, head, n_entries, control_flag);
552 SAFE_FREE ();
555 static void
556 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
558 unsigned min_code = CHARSET_MIN_CODE (charset);
559 unsigned max_code = CHARSET_MAX_CODE (charset);
560 struct charset_map_entries *head, *entries;
561 int n_entries;
562 int len = ASIZE (vec);
563 int i;
564 USE_SAFE_ALLOCA;
566 if (len % 2 == 1)
568 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
569 return;
572 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
573 large (larger than MAX_ALLOCA). */
574 SAFE_ALLOCA (head, struct charset_map_entries *,
575 sizeof (struct charset_map_entries));
576 entries = head;
577 memset (entries, 0, sizeof (struct charset_map_entries));
579 n_entries = 0;
580 for (i = 0; i < len; i += 2)
582 Lisp_Object val, val2;
583 unsigned from, to;
584 int c;
585 int idx;
587 val = AREF (vec, i);
588 if (CONSP (val))
590 val2 = XCDR (val);
591 val = XCAR (val);
592 CHECK_NATNUM (val);
593 CHECK_NATNUM (val2);
594 from = XFASTINT (val);
595 to = XFASTINT (val2);
597 else
599 CHECK_NATNUM (val);
600 from = to = XFASTINT (val);
602 val = AREF (vec, i + 1);
603 CHECK_NATNUM (val);
604 c = XFASTINT (val);
606 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
607 continue;
609 if (n_entries > 0 && (n_entries % 0x10000) == 0)
611 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
612 sizeof (struct charset_map_entries));
613 entries = entries->next;
614 memset (entries, 0, sizeof (struct charset_map_entries));
616 idx = n_entries % 0x10000;
617 entries->entry[idx].from = from;
618 entries->entry[idx].to = to;
619 entries->entry[idx].c = c;
620 n_entries++;
623 load_charset_map (charset, head, n_entries, control_flag);
624 SAFE_FREE ();
628 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
629 map it is (see the comment of load_charset_map for the detail). */
631 static void
632 load_charset (struct charset *charset, int control_flag)
634 Lisp_Object map;
636 if (inhibit_load_charset_map
637 && temp_charset_work
638 && charset == temp_charset_work->current
639 && ((control_flag == 2) == temp_charset_work->for_encoder))
640 return;
642 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
643 map = CHARSET_MAP (charset);
644 else if (CHARSET_UNIFIED_P (charset))
645 map = CHARSET_UNIFY_MAP (charset);
646 if (STRINGP (map))
647 load_charset_map_from_file (charset, map, control_flag);
648 else
649 load_charset_map_from_vector (charset, map, control_flag);
653 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
654 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
655 (Lisp_Object object)
657 return (CHARSETP (object) ? Qt : Qnil);
661 void map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
662 Lisp_Object function, Lisp_Object arg,
663 unsigned from, unsigned to);
665 void
666 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object arg, unsigned int from, unsigned int to)
668 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
669 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
670 Lisp_Object range;
671 int c, stop;
672 struct gcpro gcpro1;
674 range = Fcons (Qnil, Qnil);
675 GCPRO1 (range);
677 c = temp_charset_work->min_char;
678 stop = (temp_charset_work->max_char < 0x20000
679 ? temp_charset_work->max_char : 0xFFFF);
681 while (1)
683 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
685 if (index >= from_idx && index <= to_idx)
687 if (NILP (XCAR (range)))
688 XSETCAR (range, make_number (c));
690 else if (! NILP (XCAR (range)))
692 XSETCDR (range, make_number (c - 1));
693 if (c_function)
694 (*c_function) (arg, range);
695 else
696 call2 (function, range, arg);
697 XSETCAR (range, Qnil);
699 if (c == stop)
701 if (c == temp_charset_work->max_char)
703 if (! NILP (XCAR (range)))
705 XSETCDR (range, make_number (c));
706 if (c_function)
707 (*c_function) (arg, range);
708 else
709 call2 (function, range, arg);
711 break;
713 c = 0x1FFFF;
714 stop = temp_charset_work->max_char;
716 c++;
718 UNGCPRO;
721 void
722 map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
723 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
725 Lisp_Object range;
726 int partial;
728 partial = (from > CHARSET_MIN_CODE (charset)
729 || to < CHARSET_MAX_CODE (charset));
731 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
733 int from_idx = CODE_POINT_TO_INDEX (charset, from);
734 int to_idx = CODE_POINT_TO_INDEX (charset, to);
735 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
736 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
738 if (CHARSET_UNIFIED_P (charset))
740 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
741 load_charset (charset, 2);
742 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
743 map_char_table_for_charset (c_function, function,
744 CHARSET_DEUNIFIER (charset), arg,
745 partial ? charset : NULL, from, to);
746 else
747 map_charset_for_dump (c_function, function, arg, from, to);
750 range = Fcons (make_number (from_c), make_number (to_c));
751 if (NILP (function))
752 (*c_function) (arg, range);
753 else
754 call2 (function, range, arg);
756 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
758 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
759 load_charset (charset, 2);
760 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
761 map_char_table_for_charset (c_function, function,
762 CHARSET_ENCODER (charset), arg,
763 partial ? charset : NULL, from, to);
764 else
765 map_charset_for_dump (c_function, function, arg, from, to);
767 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
769 Lisp_Object subset_info;
770 int offset;
772 subset_info = CHARSET_SUBSET (charset);
773 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
774 offset = XINT (AREF (subset_info, 3));
775 from -= offset;
776 if (from < XFASTINT (AREF (subset_info, 1)))
777 from = XFASTINT (AREF (subset_info, 1));
778 to -= offset;
779 if (to > XFASTINT (AREF (subset_info, 2)))
780 to = XFASTINT (AREF (subset_info, 2));
781 map_charset_chars (c_function, function, arg, charset, from, to);
783 else /* i.e. CHARSET_METHOD_SUPERSET */
785 Lisp_Object parents;
787 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
788 parents = XCDR (parents))
790 int offset;
791 unsigned this_from, this_to;
793 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
794 offset = XINT (XCDR (XCAR (parents)));
795 this_from = from > offset ? from - offset : 0;
796 this_to = to > offset ? to - offset : 0;
797 if (this_from < CHARSET_MIN_CODE (charset))
798 this_from = CHARSET_MIN_CODE (charset);
799 if (this_to > CHARSET_MAX_CODE (charset))
800 this_to = CHARSET_MAX_CODE (charset);
801 map_charset_chars (c_function, function, arg, charset,
802 this_from, this_to);
807 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
808 doc: /* Call FUNCTION for all characters in CHARSET.
809 FUNCTION is called with an argument RANGE and the optional 3rd
810 argument ARG.
812 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
813 characters contained in CHARSET.
815 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
816 range of code points (in CHARSET) of target characters. */)
817 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
819 struct charset *cs;
820 unsigned from, to;
822 CHECK_CHARSET_GET_CHARSET (charset, cs);
823 if (NILP (from_code))
824 from = CHARSET_MIN_CODE (cs);
825 else
827 CHECK_NATNUM (from_code);
828 from = XINT (from_code);
829 if (from < CHARSET_MIN_CODE (cs))
830 from = CHARSET_MIN_CODE (cs);
832 if (NILP (to_code))
833 to = CHARSET_MAX_CODE (cs);
834 else
836 CHECK_NATNUM (to_code);
837 to = XINT (to_code);
838 if (to > CHARSET_MAX_CODE (cs))
839 to = CHARSET_MAX_CODE (cs);
841 map_charset_chars (NULL, function, arg, cs, from, to);
842 return Qnil;
846 /* Define a charset according to the arguments. The Nth argument is
847 the Nth attribute of the charset (the last attribute `charset-id'
848 is not included). See the docstring of `define-charset' for the
849 detail. */
851 DEFUN ("define-charset-internal", Fdefine_charset_internal,
852 Sdefine_charset_internal, charset_arg_max, MANY, 0,
853 doc: /* For internal use only.
854 usage: (define-charset-internal ...) */)
855 (int nargs, Lisp_Object *args)
857 /* Charset attr vector. */
858 Lisp_Object attrs;
859 Lisp_Object val;
860 unsigned hash_code;
861 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
862 int i, j;
863 struct charset charset;
864 int id;
865 int dimension;
866 int new_definition_p;
867 int nchars;
869 if (nargs != charset_arg_max)
870 return Fsignal (Qwrong_number_of_arguments,
871 Fcons (intern ("define-charset-internal"),
872 make_number (nargs)));
874 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
876 CHECK_SYMBOL (args[charset_arg_name]);
877 ASET (attrs, charset_name, args[charset_arg_name]);
879 val = args[charset_arg_code_space];
880 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
882 int min_byte, max_byte;
884 min_byte = XINT (Faref (val, make_number (i * 2)));
885 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
886 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
887 error ("Invalid :code-space value");
888 charset.code_space[i * 4] = min_byte;
889 charset.code_space[i * 4 + 1] = max_byte;
890 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
891 nchars *= charset.code_space[i * 4 + 2];
892 charset.code_space[i * 4 + 3] = nchars;
893 if (max_byte > 0)
894 dimension = i + 1;
897 val = args[charset_arg_dimension];
898 if (NILP (val))
899 charset.dimension = dimension;
900 else
902 CHECK_NATNUM (val);
903 charset.dimension = XINT (val);
904 if (charset.dimension < 1 || charset.dimension > 4)
905 args_out_of_range_3 (val, make_number (1), make_number (4));
908 charset.code_linear_p
909 = (charset.dimension == 1
910 || (charset.code_space[2] == 256
911 && (charset.dimension == 2
912 || (charset.code_space[6] == 256
913 && (charset.dimension == 3
914 || charset.code_space[10] == 256)))));
916 if (! charset.code_linear_p)
918 charset.code_space_mask = (unsigned char *) xmalloc (256);
919 memset (charset.code_space_mask, 0, 256);
920 for (i = 0; i < 4; i++)
921 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
922 j++)
923 charset.code_space_mask[j] |= (1 << i);
926 charset.iso_chars_96 = charset.code_space[2] == 96;
928 charset.min_code = (charset.code_space[0]
929 | (charset.code_space[4] << 8)
930 | (charset.code_space[8] << 16)
931 | (charset.code_space[12] << 24));
932 charset.max_code = (charset.code_space[1]
933 | (charset.code_space[5] << 8)
934 | (charset.code_space[9] << 16)
935 | (charset.code_space[13] << 24));
936 charset.char_index_offset = 0;
938 val = args[charset_arg_min_code];
939 if (! NILP (val))
941 unsigned code;
943 if (INTEGERP (val))
944 code = XINT (val);
945 else
947 CHECK_CONS (val);
948 CHECK_NUMBER_CAR (val);
949 CHECK_NUMBER_CDR (val);
950 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
952 if (code < charset.min_code
953 || code > charset.max_code)
954 args_out_of_range_3 (make_number (charset.min_code),
955 make_number (charset.max_code), val);
956 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
957 charset.min_code = code;
960 val = args[charset_arg_max_code];
961 if (! NILP (val))
963 unsigned code;
965 if (INTEGERP (val))
966 code = XINT (val);
967 else
969 CHECK_CONS (val);
970 CHECK_NUMBER_CAR (val);
971 CHECK_NUMBER_CDR (val);
972 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
974 if (code < charset.min_code
975 || code > charset.max_code)
976 args_out_of_range_3 (make_number (charset.min_code),
977 make_number (charset.max_code), val);
978 charset.max_code = code;
981 charset.compact_codes_p = charset.max_code < 0x10000;
983 val = args[charset_arg_invalid_code];
984 if (NILP (val))
986 if (charset.min_code > 0)
987 charset.invalid_code = 0;
988 else
990 XSETINT (val, charset.max_code + 1);
991 if (XINT (val) == charset.max_code + 1)
992 charset.invalid_code = charset.max_code + 1;
993 else
994 error ("Attribute :invalid-code must be specified");
997 else
999 CHECK_NATNUM (val);
1000 charset.invalid_code = XFASTINT (val);
1003 val = args[charset_arg_iso_final];
1004 if (NILP (val))
1005 charset.iso_final = -1;
1006 else
1008 CHECK_NUMBER (val);
1009 if (XINT (val) < '0' || XINT (val) > 127)
1010 error ("Invalid iso-final-char: %d", XINT (val));
1011 charset.iso_final = XINT (val);
1014 val = args[charset_arg_iso_revision];
1015 if (NILP (val))
1016 charset.iso_revision = -1;
1017 else
1019 CHECK_NUMBER (val);
1020 if (XINT (val) > 63)
1021 args_out_of_range (make_number (63), val);
1022 charset.iso_revision = XINT (val);
1025 val = args[charset_arg_emacs_mule_id];
1026 if (NILP (val))
1027 charset.emacs_mule_id = -1;
1028 else
1030 CHECK_NATNUM (val);
1031 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1032 error ("Invalid emacs-mule-id: %d", XINT (val));
1033 charset.emacs_mule_id = XINT (val);
1036 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1038 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1040 charset.unified_p = 0;
1042 memset (charset.fast_map, 0, sizeof (charset.fast_map));
1044 if (! NILP (args[charset_arg_code_offset]))
1046 val = args[charset_arg_code_offset];
1047 CHECK_NUMBER (val);
1049 charset.method = CHARSET_METHOD_OFFSET;
1050 charset.code_offset = XINT (val);
1052 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1053 charset.min_char = i + charset.code_offset;
1054 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1055 charset.max_char = i + charset.code_offset;
1056 if (charset.max_char > MAX_CHAR)
1057 error ("Unsupported max char: %d", charset.max_char);
1059 i = (charset.min_char >> 7) << 7;
1060 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1061 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1062 i = (i >> 12) << 12;
1063 for (; i <= charset.max_char; i += 0x1000)
1064 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1065 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1066 charset.ascii_compatible_p = 1;
1068 else if (! NILP (args[charset_arg_map]))
1070 val = args[charset_arg_map];
1071 ASET (attrs, charset_map, val);
1072 charset.method = CHARSET_METHOD_MAP;
1074 else if (! NILP (args[charset_arg_subset]))
1076 Lisp_Object parent;
1077 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1078 struct charset *parent_charset;
1080 val = args[charset_arg_subset];
1081 parent = Fcar (val);
1082 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1083 parent_min_code = Fnth (make_number (1), val);
1084 CHECK_NATNUM (parent_min_code);
1085 parent_max_code = Fnth (make_number (2), val);
1086 CHECK_NATNUM (parent_max_code);
1087 parent_code_offset = Fnth (make_number (3), val);
1088 CHECK_NUMBER (parent_code_offset);
1089 val = Fmake_vector (make_number (4), Qnil);
1090 ASET (val, 0, make_number (parent_charset->id));
1091 ASET (val, 1, parent_min_code);
1092 ASET (val, 2, parent_max_code);
1093 ASET (val, 3, parent_code_offset);
1094 ASET (attrs, charset_subset, val);
1096 charset.method = CHARSET_METHOD_SUBSET;
1097 /* Here, we just copy the parent's fast_map. It's not accurate,
1098 but at least it works for quickly detecting which character
1099 DOESN'T belong to this charset. */
1100 for (i = 0; i < 190; i++)
1101 charset.fast_map[i] = parent_charset->fast_map[i];
1103 /* We also copy these for parents. */
1104 charset.min_char = parent_charset->min_char;
1105 charset.max_char = parent_charset->max_char;
1107 else if (! NILP (args[charset_arg_superset]))
1109 val = args[charset_arg_superset];
1110 charset.method = CHARSET_METHOD_SUPERSET;
1111 val = Fcopy_sequence (val);
1112 ASET (attrs, charset_superset, val);
1114 charset.min_char = MAX_CHAR;
1115 charset.max_char = 0;
1116 for (; ! NILP (val); val = Fcdr (val))
1118 Lisp_Object elt, car_part, cdr_part;
1119 int this_id, offset;
1120 struct charset *this_charset;
1122 elt = Fcar (val);
1123 if (CONSP (elt))
1125 car_part = XCAR (elt);
1126 cdr_part = XCDR (elt);
1127 CHECK_CHARSET_GET_ID (car_part, this_id);
1128 CHECK_NUMBER (cdr_part);
1129 offset = XINT (cdr_part);
1131 else
1133 CHECK_CHARSET_GET_ID (elt, this_id);
1134 offset = 0;
1136 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1138 this_charset = CHARSET_FROM_ID (this_id);
1139 if (charset.min_char > this_charset->min_char)
1140 charset.min_char = this_charset->min_char;
1141 if (charset.max_char < this_charset->max_char)
1142 charset.max_char = this_charset->max_char;
1143 for (i = 0; i < 190; i++)
1144 charset.fast_map[i] |= this_charset->fast_map[i];
1147 else
1148 error ("None of :code-offset, :map, :parents are specified");
1150 val = args[charset_arg_unify_map];
1151 if (! NILP (val) && !STRINGP (val))
1152 CHECK_VECTOR (val);
1153 ASET (attrs, charset_unify_map, val);
1155 CHECK_LIST (args[charset_arg_plist]);
1156 ASET (attrs, charset_plist, args[charset_arg_plist]);
1158 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1159 &hash_code);
1160 if (charset.hash_index >= 0)
1162 new_definition_p = 0;
1163 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1164 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1166 else
1168 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1169 hash_code);
1170 if (charset_table_used == charset_table_size)
1172 struct charset *new_table
1173 = (struct charset *) xmalloc (sizeof (struct charset)
1174 * (charset_table_size + 16));
1175 memcpy (new_table, charset_table,
1176 sizeof (struct charset) * charset_table_size);
1177 charset_table_size += 16;
1178 charset_table = new_table;
1180 id = charset_table_used++;
1181 new_definition_p = 1;
1184 ASET (attrs, charset_id, make_number (id));
1185 charset.id = id;
1186 charset_table[id] = charset;
1188 if (charset.method == CHARSET_METHOD_MAP)
1190 load_charset (&charset, 0);
1191 charset_table[id] = charset;
1194 if (charset.iso_final >= 0)
1196 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1197 charset.iso_final) = id;
1198 if (new_definition_p)
1199 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1200 Fcons (make_number (id), Qnil));
1201 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1202 charset_jisx0201_roman = id;
1203 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1204 charset_jisx0208_1978 = id;
1205 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1206 charset_jisx0208 = id;
1207 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1208 charset_ksc5601 = id;
1211 if (charset.emacs_mule_id >= 0)
1213 emacs_mule_charset[charset.emacs_mule_id] = id;
1214 if (charset.emacs_mule_id < 0xA0)
1215 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1216 else
1217 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1218 if (new_definition_p)
1219 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1220 Fcons (make_number (id), Qnil));
1223 if (new_definition_p)
1225 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1226 if (charset.supplementary_p)
1227 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1228 Fcons (make_number (id), Qnil));
1229 else
1231 Lisp_Object tail;
1233 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1235 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1237 if (cs->supplementary_p)
1238 break;
1240 if (EQ (tail, Vcharset_ordered_list))
1241 Vcharset_ordered_list = Fcons (make_number (id),
1242 Vcharset_ordered_list);
1243 else if (NILP (tail))
1244 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1245 Fcons (make_number (id), Qnil));
1246 else
1248 val = Fcons (XCAR (tail), XCDR (tail));
1249 XSETCDR (tail, val);
1250 XSETCAR (tail, make_number (id));
1253 charset_ordered_list_tick++;
1256 return Qnil;
1260 /* Same as Fdefine_charset_internal but arguments are more convenient
1261 to call from C (typically in syms_of_charset). This can define a
1262 charset of `offset' method only. Return the ID of the new
1263 charset. */
1265 static int
1266 define_charset_internal (Lisp_Object name,
1267 int dimension,
1268 const unsigned char *code_space,
1269 unsigned min_code, unsigned max_code,
1270 int iso_final, int iso_revision, int emacs_mule_id,
1271 int ascii_compatible, int supplementary,
1272 int code_offset)
1274 Lisp_Object args[charset_arg_max];
1275 Lisp_Object plist[14];
1276 Lisp_Object val;
1277 int i;
1279 args[charset_arg_name] = name;
1280 args[charset_arg_dimension] = make_number (dimension);
1281 val = Fmake_vector (make_number (8), make_number (0));
1282 for (i = 0; i < 8; i++)
1283 ASET (val, i, make_number (code_space[i]));
1284 args[charset_arg_code_space] = val;
1285 args[charset_arg_min_code] = make_number (min_code);
1286 args[charset_arg_max_code] = make_number (max_code);
1287 args[charset_arg_iso_final]
1288 = (iso_final < 0 ? Qnil : make_number (iso_final));
1289 args[charset_arg_iso_revision] = make_number (iso_revision);
1290 args[charset_arg_emacs_mule_id]
1291 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1292 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1293 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1294 args[charset_arg_invalid_code] = Qnil;
1295 args[charset_arg_code_offset] = make_number (code_offset);
1296 args[charset_arg_map] = Qnil;
1297 args[charset_arg_subset] = Qnil;
1298 args[charset_arg_superset] = Qnil;
1299 args[charset_arg_unify_map] = Qnil;
1301 plist[0] = intern_c_string (":name");
1302 plist[1] = args[charset_arg_name];
1303 plist[2] = intern_c_string (":dimension");
1304 plist[3] = args[charset_arg_dimension];
1305 plist[4] = intern_c_string (":code-space");
1306 plist[5] = args[charset_arg_code_space];
1307 plist[6] = intern_c_string (":iso-final-char");
1308 plist[7] = args[charset_arg_iso_final];
1309 plist[8] = intern_c_string (":emacs-mule-id");
1310 plist[9] = args[charset_arg_emacs_mule_id];
1311 plist[10] = intern_c_string (":ascii-compatible-p");
1312 plist[11] = args[charset_arg_ascii_compatible_p];
1313 plist[12] = intern_c_string (":code-offset");
1314 plist[13] = args[charset_arg_code_offset];
1316 args[charset_arg_plist] = Flist (14, plist);
1317 Fdefine_charset_internal (charset_arg_max, args);
1319 return XINT (CHARSET_SYMBOL_ID (name));
1323 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1324 Sdefine_charset_alias, 2, 2, 0,
1325 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1326 (Lisp_Object alias, Lisp_Object charset)
1328 Lisp_Object attr;
1330 CHECK_CHARSET_GET_ATTR (charset, attr);
1331 Fputhash (alias, attr, Vcharset_hash_table);
1332 Vcharset_list = Fcons (alias, Vcharset_list);
1333 return Qnil;
1337 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1338 doc: /* Return the property list of CHARSET. */)
1339 (Lisp_Object charset)
1341 Lisp_Object attrs;
1343 CHECK_CHARSET_GET_ATTR (charset, attrs);
1344 return CHARSET_ATTR_PLIST (attrs);
1348 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1349 doc: /* Set CHARSET's property list to PLIST. */)
1350 (Lisp_Object charset, Lisp_Object plist)
1352 Lisp_Object attrs;
1354 CHECK_CHARSET_GET_ATTR (charset, attrs);
1355 CHARSET_ATTR_PLIST (attrs) = plist;
1356 return plist;
1360 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1361 doc: /* Unify characters of CHARSET with Unicode.
1362 This means reading the relevant file and installing the table defined
1363 by CHARSET's `:unify-map' property.
1365 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1366 the same meaning as the `:unify-map' attribute in the function
1367 `define-charset' (which see).
1369 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1370 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
1372 int id;
1373 struct charset *cs;
1375 CHECK_CHARSET_GET_ID (charset, id);
1376 cs = CHARSET_FROM_ID (id);
1377 if (NILP (deunify)
1378 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1379 : ! CHARSET_UNIFIED_P (cs))
1380 return Qnil;
1382 CHARSET_UNIFIED_P (cs) = 0;
1383 if (NILP (deunify))
1385 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1386 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1387 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1388 if (NILP (unify_map))
1389 unify_map = CHARSET_UNIFY_MAP (cs);
1390 else
1392 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1393 signal_error ("Bad unify-map", unify_map);
1394 CHARSET_UNIFY_MAP (cs) = unify_map;
1396 if (NILP (Vchar_unify_table))
1397 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1398 char_table_set_range (Vchar_unify_table,
1399 cs->min_char, cs->max_char, charset);
1400 CHARSET_UNIFIED_P (cs) = 1;
1402 else if (CHAR_TABLE_P (Vchar_unify_table))
1404 int min_code = CHARSET_MIN_CODE (cs);
1405 int max_code = CHARSET_MAX_CODE (cs);
1406 int min_char = DECODE_CHAR (cs, min_code);
1407 int max_char = DECODE_CHAR (cs, max_code);
1409 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1412 return Qnil;
1415 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1416 Sget_unused_iso_final_char, 2, 2, 0,
1417 doc: /*
1418 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1419 DIMENSION is the number of bytes to represent a character: 1 or 2.
1420 CHARS is the number of characters in a dimension: 94 or 96.
1422 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1423 If there's no unused final char for the specified kind of charset,
1424 return nil. */)
1425 (Lisp_Object dimension, Lisp_Object chars)
1427 int final_char;
1429 CHECK_NUMBER (dimension);
1430 CHECK_NUMBER (chars);
1431 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1432 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1433 if (XINT (chars) != 94 && XINT (chars) != 96)
1434 args_out_of_range_3 (chars, make_number (94), make_number (96));
1435 for (final_char = '0'; final_char <= '?'; final_char++)
1436 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1437 break;
1438 return (final_char <= '?' ? make_number (final_char) : Qnil);
1441 static void
1442 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
1444 CHECK_NATNUM (dimension);
1445 CHECK_NATNUM (chars);
1446 CHECK_NATNUM (final_char);
1448 if (XINT (dimension) > 3)
1449 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1450 if (XINT (chars) != 94 && XINT (chars) != 96)
1451 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1452 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1453 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1457 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1458 4, 4, 0,
1459 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1461 On decoding by an ISO-2022 base coding system, when a charset
1462 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1463 if CHARSET is designated instead. */)
1464 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
1466 int id;
1467 int chars_flag;
1469 CHECK_CHARSET_GET_ID (charset, id);
1470 check_iso_charset_parameter (dimension, chars, final_char);
1471 chars_flag = XINT (chars) == 96;
1472 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1473 return Qnil;
1477 /* Return information about charsets in the text at PTR of NBYTES
1478 bytes, which are NCHARS characters. The value is:
1480 0: Each character is represented by one byte. This is always
1481 true for a unibyte string. For a multibyte string, true if
1482 it contains only ASCII characters.
1484 1: No charsets other than ascii, control-1, and latin-1 are
1485 found.
1487 2: Otherwise.
1491 string_xstring_p (Lisp_Object string)
1493 const unsigned char *p = SDATA (string);
1494 const unsigned char *endp = p + SBYTES (string);
1496 if (SCHARS (string) == SBYTES (string))
1497 return 0;
1499 while (p < endp)
1501 int c = STRING_CHAR_ADVANCE (p);
1503 if (c >= 0x100)
1504 return 2;
1506 return 1;
1510 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1512 CHARSETS is a vector. If Nth element is non-nil, it means the
1513 charset whose id is N is already found.
1515 It may lookup a translation table TABLE if supplied. */
1517 static void
1518 find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
1520 const unsigned char *pend = ptr + nbytes;
1522 if (nchars == nbytes)
1524 if (multibyte)
1525 ASET (charsets, charset_ascii, Qt);
1526 else
1527 while (ptr < pend)
1529 int c = *ptr++;
1531 if (!NILP (table))
1532 c = translate_char (table, c);
1533 if (ASCII_BYTE_P (c))
1534 ASET (charsets, charset_ascii, Qt);
1535 else
1536 ASET (charsets, charset_eight_bit, Qt);
1539 else
1541 while (ptr < pend)
1543 int c = STRING_CHAR_ADVANCE (ptr);
1544 struct charset *charset;
1546 if (!NILP (table))
1547 c = translate_char (table, c);
1548 charset = CHAR_CHARSET (c);
1549 ASET (charsets, CHARSET_ID (charset), Qt);
1554 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1555 2, 3, 0,
1556 doc: /* Return a list of charsets in the region between BEG and END.
1557 BEG and END are buffer positions.
1558 Optional arg TABLE if non-nil is a translation table to look up.
1560 If the current buffer is unibyte, the returned list may contain
1561 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1562 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
1564 Lisp_Object charsets;
1565 EMACS_INT from, from_byte, to, stop, stop_byte;
1566 int i;
1567 Lisp_Object val;
1568 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1570 validate_region (&beg, &end);
1571 from = XFASTINT (beg);
1572 stop = to = XFASTINT (end);
1574 if (from < GPT && GPT < to)
1576 stop = GPT;
1577 stop_byte = GPT_BYTE;
1579 else
1580 stop_byte = CHAR_TO_BYTE (stop);
1582 from_byte = CHAR_TO_BYTE (from);
1584 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1585 while (1)
1587 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1588 stop_byte - from_byte, charsets, table,
1589 multibyte);
1590 if (stop < to)
1592 from = stop, from_byte = stop_byte;
1593 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1595 else
1596 break;
1599 val = Qnil;
1600 for (i = charset_table_used - 1; i >= 0; i--)
1601 if (!NILP (AREF (charsets, i)))
1602 val = Fcons (CHARSET_NAME (charset_table + i), val);
1603 return val;
1606 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1607 1, 2, 0,
1608 doc: /* Return a list of charsets in STR.
1609 Optional arg TABLE if non-nil is a translation table to look up.
1611 If STR is unibyte, the returned list may contain
1612 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1613 (Lisp_Object str, Lisp_Object table)
1615 Lisp_Object charsets;
1616 int i;
1617 Lisp_Object val;
1619 CHECK_STRING (str);
1621 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1622 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1623 charsets, table,
1624 STRING_MULTIBYTE (str));
1625 val = Qnil;
1626 for (i = charset_table_used - 1; i >= 0; i--)
1627 if (!NILP (AREF (charsets, i)))
1628 val = Fcons (CHARSET_NAME (charset_table + i), val);
1629 return val;
1634 /* Return a unified character code for C (>= 0x110000). VAL is a
1635 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1636 charset symbol. */
1638 maybe_unify_char (int c, Lisp_Object val)
1640 struct charset *charset;
1642 if (INTEGERP (val))
1643 return XINT (val);
1644 if (NILP (val))
1645 return c;
1647 CHECK_CHARSET_GET_CHARSET (val, charset);
1648 load_charset (charset, 1);
1649 if (! inhibit_load_charset_map)
1651 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1652 if (! NILP (val))
1653 c = XINT (val);
1655 else
1657 int code_index = c - CHARSET_CODE_OFFSET (charset);
1658 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1660 if (unified > 0)
1661 c = unified;
1663 return c;
1667 /* Return a character correponding to the code-point CODE of
1668 CHARSET. */
1671 decode_char (struct charset *charset, unsigned int code)
1673 int c, char_index;
1674 enum charset_method method = CHARSET_METHOD (charset);
1676 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1677 return -1;
1679 if (method == CHARSET_METHOD_SUBSET)
1681 Lisp_Object subset_info;
1683 subset_info = CHARSET_SUBSET (charset);
1684 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1685 code -= XINT (AREF (subset_info, 3));
1686 if (code < XFASTINT (AREF (subset_info, 1))
1687 || code > XFASTINT (AREF (subset_info, 2)))
1688 c = -1;
1689 else
1690 c = DECODE_CHAR (charset, code);
1692 else if (method == CHARSET_METHOD_SUPERSET)
1694 Lisp_Object parents;
1696 parents = CHARSET_SUPERSET (charset);
1697 c = -1;
1698 for (; CONSP (parents); parents = XCDR (parents))
1700 int id = XINT (XCAR (XCAR (parents)));
1701 int code_offset = XINT (XCDR (XCAR (parents)));
1702 unsigned this_code = code - code_offset;
1704 charset = CHARSET_FROM_ID (id);
1705 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1706 break;
1709 else
1711 char_index = CODE_POINT_TO_INDEX (charset, code);
1712 if (char_index < 0)
1713 return -1;
1715 if (method == CHARSET_METHOD_MAP)
1717 Lisp_Object decoder;
1719 decoder = CHARSET_DECODER (charset);
1720 if (! VECTORP (decoder))
1722 load_charset (charset, 1);
1723 decoder = CHARSET_DECODER (charset);
1725 if (VECTORP (decoder))
1726 c = XINT (AREF (decoder, char_index));
1727 else
1728 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1730 else /* method == CHARSET_METHOD_OFFSET */
1732 c = char_index + CHARSET_CODE_OFFSET (charset);
1733 if (CHARSET_UNIFIED_P (charset)
1734 && c > MAX_UNICODE_CHAR)
1735 MAYBE_UNIFY_CHAR (c);
1739 return c;
1742 /* Variable used temporarily by the macro ENCODE_CHAR. */
1743 Lisp_Object charset_work;
1745 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1746 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1747 use CHARSET's strict_max_char instead of max_char. */
1749 unsigned
1750 encode_char (struct charset *charset, int c)
1752 unsigned code;
1753 enum charset_method method = CHARSET_METHOD (charset);
1755 if (CHARSET_UNIFIED_P (charset))
1757 Lisp_Object deunifier;
1758 int code_index = -1;
1760 deunifier = CHARSET_DEUNIFIER (charset);
1761 if (! CHAR_TABLE_P (deunifier))
1763 load_charset (charset, 2);
1764 deunifier = CHARSET_DEUNIFIER (charset);
1766 if (CHAR_TABLE_P (deunifier))
1768 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1770 if (INTEGERP (deunified))
1771 code_index = XINT (deunified);
1773 else
1775 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1777 if (code_index >= 0)
1778 c = CHARSET_CODE_OFFSET (charset) + code_index;
1781 if (method == CHARSET_METHOD_SUBSET)
1783 Lisp_Object subset_info;
1784 struct charset *this_charset;
1786 subset_info = CHARSET_SUBSET (charset);
1787 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1788 code = ENCODE_CHAR (this_charset, c);
1789 if (code == CHARSET_INVALID_CODE (this_charset)
1790 || code < XFASTINT (AREF (subset_info, 1))
1791 || code > XFASTINT (AREF (subset_info, 2)))
1792 return CHARSET_INVALID_CODE (charset);
1793 code += XINT (AREF (subset_info, 3));
1794 return code;
1797 if (method == CHARSET_METHOD_SUPERSET)
1799 Lisp_Object parents;
1801 parents = CHARSET_SUPERSET (charset);
1802 for (; CONSP (parents); parents = XCDR (parents))
1804 int id = XINT (XCAR (XCAR (parents)));
1805 int code_offset = XINT (XCDR (XCAR (parents)));
1806 struct charset *this_charset = CHARSET_FROM_ID (id);
1808 code = ENCODE_CHAR (this_charset, c);
1809 if (code != CHARSET_INVALID_CODE (this_charset))
1810 return code + code_offset;
1812 return CHARSET_INVALID_CODE (charset);
1815 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1816 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1817 return CHARSET_INVALID_CODE (charset);
1819 if (method == CHARSET_METHOD_MAP)
1821 Lisp_Object encoder;
1822 Lisp_Object val;
1824 encoder = CHARSET_ENCODER (charset);
1825 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1827 load_charset (charset, 2);
1828 encoder = CHARSET_ENCODER (charset);
1830 if (CHAR_TABLE_P (encoder))
1832 val = CHAR_TABLE_REF (encoder, c);
1833 if (NILP (val))
1834 return CHARSET_INVALID_CODE (charset);
1835 code = XINT (val);
1836 if (! CHARSET_COMPACT_CODES_P (charset))
1837 code = INDEX_TO_CODE_POINT (charset, code);
1839 else
1841 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1842 code = INDEX_TO_CODE_POINT (charset, code);
1845 else /* method == CHARSET_METHOD_OFFSET */
1847 int code_index = c - CHARSET_CODE_OFFSET (charset);
1849 code = INDEX_TO_CODE_POINT (charset, code_index);
1852 return code;
1856 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1857 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1858 Return nil if CODE-POINT is not valid in CHARSET.
1860 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1862 Optional argument RESTRICTION specifies a way to map the pair of CCS
1863 and CODE-POINT to a character. Currently not supported and just ignored. */)
1864 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
1866 int c, id;
1867 unsigned code;
1868 struct charset *charsetp;
1870 CHECK_CHARSET_GET_ID (charset, id);
1871 if (CONSP (code_point))
1873 CHECK_NATNUM_CAR (code_point);
1874 CHECK_NATNUM_CDR (code_point);
1875 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1877 else
1879 CHECK_NATNUM (code_point);
1880 code = XINT (code_point);
1882 charsetp = CHARSET_FROM_ID (id);
1883 c = DECODE_CHAR (charsetp, code);
1884 return (c >= 0 ? make_number (c) : Qnil);
1888 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1889 doc: /* Encode the character CH into a code-point of CHARSET.
1890 Return nil if CHARSET doesn't include CH.
1892 Optional argument RESTRICTION specifies a way to map CH to a
1893 code-point in CCS. Currently not supported and just ignored. */)
1894 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
1896 int id;
1897 unsigned code;
1898 struct charset *charsetp;
1900 CHECK_CHARSET_GET_ID (charset, id);
1901 CHECK_NATNUM (ch);
1902 charsetp = CHARSET_FROM_ID (id);
1903 code = ENCODE_CHAR (charsetp, XINT (ch));
1904 if (code == CHARSET_INVALID_CODE (charsetp))
1905 return Qnil;
1906 if (code > 0x7FFFFFF)
1907 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1908 return make_number (code);
1912 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1913 doc:
1914 /* Return a character of CHARSET whose position codes are CODEn.
1916 CODE1 through CODE4 are optional, but if you don't supply sufficient
1917 position codes, it is assumed that the minimum code in each dimension
1918 is specified. */)
1919 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
1921 int id, dimension;
1922 struct charset *charsetp;
1923 unsigned code;
1924 int c;
1926 CHECK_CHARSET_GET_ID (charset, id);
1927 charsetp = CHARSET_FROM_ID (id);
1929 dimension = CHARSET_DIMENSION (charsetp);
1930 if (NILP (code1))
1931 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1932 ? 0 : CHARSET_MIN_CODE (charsetp));
1933 else
1935 CHECK_NATNUM (code1);
1936 if (XFASTINT (code1) >= 0x100)
1937 args_out_of_range (make_number (0xFF), code1);
1938 code = XFASTINT (code1);
1940 if (dimension > 1)
1942 code <<= 8;
1943 if (NILP (code2))
1944 code |= charsetp->code_space[(dimension - 2) * 4];
1945 else
1947 CHECK_NATNUM (code2);
1948 if (XFASTINT (code2) >= 0x100)
1949 args_out_of_range (make_number (0xFF), code2);
1950 code |= XFASTINT (code2);
1953 if (dimension > 2)
1955 code <<= 8;
1956 if (NILP (code3))
1957 code |= charsetp->code_space[(dimension - 3) * 4];
1958 else
1960 CHECK_NATNUM (code3);
1961 if (XFASTINT (code3) >= 0x100)
1962 args_out_of_range (make_number (0xFF), code3);
1963 code |= XFASTINT (code3);
1966 if (dimension > 3)
1968 code <<= 8;
1969 if (NILP (code4))
1970 code |= charsetp->code_space[0];
1971 else
1973 CHECK_NATNUM (code4);
1974 if (XFASTINT (code4) >= 0x100)
1975 args_out_of_range (make_number (0xFF), code4);
1976 code |= XFASTINT (code4);
1983 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1984 code &= 0x7F7F7F7F;
1985 c = DECODE_CHAR (charsetp, code);
1986 if (c < 0)
1987 error ("Invalid code(s)");
1988 return make_number (c);
1992 /* Return the first charset in CHARSET_LIST that contains C.
1993 CHARSET_LIST is a list of charset IDs. If it is nil, use
1994 Vcharset_ordered_list. */
1996 struct charset *
1997 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
1999 int maybe_null = 0;
2001 if (NILP (charset_list))
2002 charset_list = Vcharset_ordered_list;
2003 else
2004 maybe_null = 1;
2006 while (CONSP (charset_list))
2008 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2009 unsigned code = ENCODE_CHAR (charset, c);
2011 if (code != CHARSET_INVALID_CODE (charset))
2013 if (code_return)
2014 *code_return = code;
2015 return charset;
2017 charset_list = XCDR (charset_list);
2018 if (! maybe_null
2019 && c <= MAX_UNICODE_CHAR
2020 && EQ (charset_list, Vcharset_non_preferred_head))
2021 return CHARSET_FROM_ID (charset_unicode);
2023 return (maybe_null ? NULL
2024 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2025 : CHARSET_FROM_ID (charset_eight_bit));
2029 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2030 doc:
2031 /*Return list of charset and one to four position-codes of CH.
2032 The charset is decided by the current priority order of charsets.
2033 A position-code is a byte value of each dimension of the code-point of
2034 CH in the charset. */)
2035 (Lisp_Object ch)
2037 struct charset *charset;
2038 int c, dimension;
2039 unsigned code;
2040 Lisp_Object val;
2042 CHECK_CHARACTER (ch);
2043 c = XFASTINT (ch);
2044 charset = CHAR_CHARSET (c);
2045 if (! charset)
2046 abort ();
2047 code = ENCODE_CHAR (charset, c);
2048 if (code == CHARSET_INVALID_CODE (charset))
2049 abort ();
2050 dimension = CHARSET_DIMENSION (charset);
2051 for (val = Qnil; dimension > 0; dimension--)
2053 val = Fcons (make_number (code & 0xFF), val);
2054 code >>= 8;
2056 return Fcons (CHARSET_NAME (charset), val);
2060 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2061 doc: /* Return the charset of highest priority that contains CH.
2062 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2063 from which to find the charset. It may also be a coding system. In
2064 that case, find the charset from what supported by that coding system. */)
2065 (Lisp_Object ch, Lisp_Object restriction)
2067 struct charset *charset;
2069 CHECK_CHARACTER (ch);
2070 if (NILP (restriction))
2071 charset = CHAR_CHARSET (XINT (ch));
2072 else
2074 if (CONSP (restriction))
2076 int c = XFASTINT (ch);
2078 for (; CONSP (restriction); restriction = XCDR (restriction))
2080 struct charset *charset;
2082 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), charset);
2083 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
2084 return XCAR (restriction);
2086 return Qnil;
2088 restriction = coding_system_charset_list (restriction);
2089 charset = char_charset (XINT (ch), restriction, NULL);
2090 if (! charset)
2091 return Qnil;
2093 return (CHARSET_NAME (charset));
2097 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2098 doc: /*
2099 Return charset of a character in the current buffer at position POS.
2100 If POS is nil, it defauls to the current point.
2101 If POS is out of range, the value is nil. */)
2102 (Lisp_Object pos)
2104 Lisp_Object ch;
2105 struct charset *charset;
2107 ch = Fchar_after (pos);
2108 if (! INTEGERP (ch))
2109 return ch;
2110 charset = CHAR_CHARSET (XINT (ch));
2111 return (CHARSET_NAME (charset));
2115 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2116 doc: /*
2117 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2119 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2120 by their DIMENSION, CHARS, and FINAL-CHAR,
2121 whereas Emacs distinguishes them by charset symbol.
2122 See the documentation of the function `charset-info' for the meanings of
2123 DIMENSION, CHARS, and FINAL-CHAR. */)
2124 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
2126 int id;
2127 int chars_flag;
2129 check_iso_charset_parameter (dimension, chars, final_char);
2130 chars_flag = XFASTINT (chars) == 96;
2131 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2132 XFASTINT (final_char));
2133 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2137 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2138 0, 0, 0,
2139 doc: /*
2140 Internal use only.
2141 Clear temporary charset mapping tables.
2142 It should be called only from temacs invoked for dumping. */)
2143 (void)
2145 if (temp_charset_work)
2147 free (temp_charset_work);
2148 temp_charset_work = NULL;
2151 if (CHAR_TABLE_P (Vchar_unify_table))
2152 Foptimize_char_table (Vchar_unify_table, Qnil);
2154 return Qnil;
2157 DEFUN ("charset-priority-list", Fcharset_priority_list,
2158 Scharset_priority_list, 0, 1, 0,
2159 doc: /* Return the list of charsets ordered by priority.
2160 HIGHESTP non-nil means just return the highest priority one. */)
2161 (Lisp_Object highestp)
2163 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2165 if (!NILP (highestp))
2166 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2168 while (!NILP (list))
2170 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2171 list = XCDR (list);
2173 return Fnreverse (val);
2176 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2177 1, MANY, 0,
2178 doc: /* Assign higher priority to the charsets given as arguments.
2179 usage: (set-charset-priority &rest charsets) */)
2180 (int nargs, Lisp_Object *args)
2182 Lisp_Object new_head, old_list, arglist[2];
2183 Lisp_Object list_2022, list_emacs_mule;
2184 int i, id;
2186 old_list = Fcopy_sequence (Vcharset_ordered_list);
2187 new_head = Qnil;
2188 for (i = 0; i < nargs; i++)
2190 CHECK_CHARSET_GET_ID (args[i], id);
2191 if (! NILP (Fmemq (make_number (id), old_list)))
2193 old_list = Fdelq (make_number (id), old_list);
2194 new_head = Fcons (make_number (id), new_head);
2197 arglist[0] = Fnreverse (new_head);
2198 arglist[1] = Vcharset_non_preferred_head = old_list;
2199 Vcharset_ordered_list = Fnconc (2, arglist);
2200 charset_ordered_list_tick++;
2202 charset_unibyte = -1;
2203 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2204 CONSP (old_list); old_list = XCDR (old_list))
2206 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2207 list_2022 = Fcons (XCAR (old_list), list_2022);
2208 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2209 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2210 if (charset_unibyte < 0)
2212 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2214 if (CHARSET_DIMENSION (charset) == 1
2215 && CHARSET_ASCII_COMPATIBLE_P (charset)
2216 && CHARSET_MAX_CHAR (charset) >= 0x80)
2217 charset_unibyte = CHARSET_ID (charset);
2220 Viso_2022_charset_list = Fnreverse (list_2022);
2221 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2222 if (charset_unibyte < 0)
2223 charset_unibyte = charset_iso_8859_1;
2225 return Qnil;
2228 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2229 0, 1, 0,
2230 doc: /* Internal use only.
2231 Return charset identification number of CHARSET. */)
2232 (Lisp_Object charset)
2234 int id;
2236 CHECK_CHARSET_GET_ID (charset, id);
2237 return make_number (id);
2240 struct charset_sort_data
2242 Lisp_Object charset;
2243 int id;
2244 int priority;
2247 static int
2248 charset_compare (const void *d1, const void *d2)
2250 const struct charset_sort_data *data1 = d1, *data2 = d2;
2251 return (data1->priority - data2->priority);
2254 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2255 doc: /* Sort charset list CHARSETS by a priority of each charset.
2256 Return the sorted list. CHARSETS is modified by side effects.
2257 See also `charset-priority-list' and `set-charset-priority'. */)
2258 (Lisp_Object charsets)
2260 Lisp_Object len = Flength (charsets);
2261 int n = XFASTINT (len), i, j, done;
2262 Lisp_Object tail, elt, attrs;
2263 struct charset_sort_data *sort_data;
2264 int id, min_id, max_id;
2265 USE_SAFE_ALLOCA;
2267 if (n == 0)
2268 return Qnil;
2269 SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
2270 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2272 elt = XCAR (tail);
2273 CHECK_CHARSET_GET_ATTR (elt, attrs);
2274 sort_data[i].charset = elt;
2275 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
2276 if (i == 0)
2277 min_id = max_id = id;
2278 else if (id < min_id)
2279 min_id = id;
2280 else if (id > max_id)
2281 max_id = id;
2283 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2284 done < n && CONSP (tail); tail = XCDR (tail), i++)
2286 elt = XCAR (tail);
2287 id = XFASTINT (elt);
2288 if (id >= min_id && id <= max_id)
2289 for (j = 0; j < n; j++)
2290 if (sort_data[j].id == id)
2292 sort_data[j].priority = i;
2293 done++;
2296 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2297 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2298 XSETCAR (tail, sort_data[i].charset);
2299 SAFE_FREE ();
2300 return charsets;
2304 void
2305 init_charset (void)
2307 Lisp_Object tempdir;
2308 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2309 if (access ((char *) SDATA (tempdir), 0) < 0)
2311 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2312 Emacs will not function correctly without the character map files.\n\
2313 Please check your installation!\n",
2314 tempdir);
2315 /* TODO should this be a fatal error? (Bug#909) */
2318 Vcharset_map_path = Fcons (tempdir, Qnil);
2322 void
2323 init_charset_once (void)
2325 int i, j, k;
2327 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2328 for (j = 0; j < ISO_MAX_CHARS; j++)
2329 for (k = 0; k < ISO_MAX_FINAL; k++)
2330 iso_charset_table[i][j][k] = -1;
2332 for (i = 0; i < 256; i++)
2333 emacs_mule_charset[i] = -1;
2335 charset_jisx0201_roman = -1;
2336 charset_jisx0208_1978 = -1;
2337 charset_jisx0208 = -1;
2338 charset_ksc5601 = -1;
2341 #ifdef emacs
2343 void
2344 syms_of_charset (void)
2346 DEFSYM (Qcharsetp, "charsetp");
2348 DEFSYM (Qascii, "ascii");
2349 DEFSYM (Qunicode, "unicode");
2350 DEFSYM (Qemacs, "emacs");
2351 DEFSYM (Qeight_bit, "eight-bit");
2352 DEFSYM (Qiso_8859_1, "iso-8859-1");
2354 DEFSYM (Qgl, "gl");
2355 DEFSYM (Qgr, "gr");
2357 staticpro (&Vcharset_ordered_list);
2358 Vcharset_ordered_list = Qnil;
2360 staticpro (&Viso_2022_charset_list);
2361 Viso_2022_charset_list = Qnil;
2363 staticpro (&Vemacs_mule_charset_list);
2364 Vemacs_mule_charset_list = Qnil;
2366 /* Don't staticpro them here. It's done in syms_of_fns. */
2367 QCtest = intern_c_string (":test");
2368 Qeq = intern_c_string ("eq");
2370 staticpro (&Vcharset_hash_table);
2372 Lisp_Object args[2];
2373 args[0] = QCtest;
2374 args[1] = Qeq;
2375 Vcharset_hash_table = Fmake_hash_table (2, args);
2378 charset_table_size = 128;
2379 charset_table = ((struct charset *)
2380 xmalloc (sizeof (struct charset) * charset_table_size));
2381 charset_table_used = 0;
2383 defsubr (&Scharsetp);
2384 defsubr (&Smap_charset_chars);
2385 defsubr (&Sdefine_charset_internal);
2386 defsubr (&Sdefine_charset_alias);
2387 defsubr (&Scharset_plist);
2388 defsubr (&Sset_charset_plist);
2389 defsubr (&Sunify_charset);
2390 defsubr (&Sget_unused_iso_final_char);
2391 defsubr (&Sdeclare_equiv_charset);
2392 defsubr (&Sfind_charset_region);
2393 defsubr (&Sfind_charset_string);
2394 defsubr (&Sdecode_char);
2395 defsubr (&Sencode_char);
2396 defsubr (&Ssplit_char);
2397 defsubr (&Smake_char);
2398 defsubr (&Schar_charset);
2399 defsubr (&Scharset_after);
2400 defsubr (&Siso_charset);
2401 defsubr (&Sclear_charset_maps);
2402 defsubr (&Scharset_priority_list);
2403 defsubr (&Sset_charset_priority);
2404 defsubr (&Scharset_id_internal);
2405 defsubr (&Ssort_charsets);
2407 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2408 doc: /* *List of directories to search for charset map files. */);
2409 Vcharset_map_path = Qnil;
2411 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2412 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2413 inhibit_load_charset_map = 0;
2415 DEFVAR_LISP ("charset-list", &Vcharset_list,
2416 doc: /* List of all charsets ever defined. */);
2417 Vcharset_list = Qnil;
2419 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2420 doc: /* ISO639 language mnemonic symbol for the current language environment.
2421 If the current language environment is for multiple languages (e.g. "Latin-1"),
2422 the value may be a list of mnemonics. */);
2423 Vcurrent_iso639_language = Qnil;
2425 charset_ascii
2426 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2427 0, 127, 'B', -1, 0, 1, 0, 0);
2428 charset_iso_8859_1
2429 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2430 0, 255, -1, -1, -1, 1, 0, 0);
2431 charset_unicode
2432 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2433 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2434 charset_emacs
2435 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2436 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2437 charset_eight_bit
2438 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2439 128, 255, -1, 0, -1, 0, 1,
2440 MAX_5_BYTE_CHAR + 1);
2441 charset_unibyte = charset_iso_8859_1;
2444 #endif /* emacs */
2446 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2447 (do not change this comment) */