(custom-group-value-create): Insert some
[emacs.git] / src / charset.c
blob052f31868316035a9e2d571379f402c4c15f314d
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
28 #include <config.h>
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include "lisp.h"
35 #include "character.h"
36 #include "charset.h"
37 #include "coding.h"
38 #include "disptab.h"
39 #include "buffer.h"
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
56 /* List of all charsets. This variable is used only from Emacs
57 Lisp. */
58 Lisp_Object Vcharset_list;
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table;
64 /* Table of struct charset. */
65 struct charset *charset_table;
67 static int charset_table_size;
68 static int charset_table_used;
70 Lisp_Object Qcharsetp;
72 /* Special charset symbols. */
73 Lisp_Object Qascii;
74 Lisp_Object Qeight_bit;
75 Lisp_Object Qiso_8859_1;
76 Lisp_Object Qunicode;
77 Lisp_Object Qemacs;
79 /* The corresponding charsets. */
80 int charset_ascii;
81 int charset_eight_bit;
82 int charset_iso_8859_1;
83 int charset_unicode;
84 int charset_emacs;
86 /* The other special charsets. */
87 int charset_jisx0201_roman;
88 int charset_jisx0208_1978;
89 int charset_jisx0208;
91 /* Value of charset attribute `charset-iso-plane'. */
92 Lisp_Object Qgl, Qgr;
94 /* Charset of unibyte characters. */
95 int charset_unibyte;
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list;
100 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
101 charsets. */
102 Lisp_Object Vcharset_non_preferred_head;
104 /* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
106 -1. */
107 unsigned short charset_ordered_list_tick;
109 /* List of iso-2022 charsets. */
110 Lisp_Object Viso_2022_charset_list;
112 /* List of emacs-mule charsets. */
113 Lisp_Object Vemacs_mule_charset_list;
115 struct charset *emacs_mule_charset[256];
117 /* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
121 Lisp_Object Vcharset_map_path;
123 /* If nonzero, don't load charset maps. */
124 int inhibit_load_charset_map;
126 Lisp_Object Vcurrent_iso639_language;
128 /* Defined in chartab.c */
129 extern void
130 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
131 Lisp_Object function, Lisp_Object table,
132 Lisp_Object arg, struct charset *charset,
133 unsigned from, unsigned to));
135 #define CODE_POINT_TO_INDEX(charset, code) \
136 ((charset)->code_linear_p \
137 ? (code) - (charset)->min_code \
138 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
139 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
140 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
141 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
142 ? (((((code) >> 24) - (charset)->code_space[12]) \
143 * (charset)->code_space[11]) \
144 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
145 * (charset)->code_space[7]) \
146 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
147 * (charset)->code_space[3]) \
148 + (((code) & 0xFF) - (charset)->code_space[0]) \
149 - ((charset)->char_index_offset)) \
150 : -1)
153 /* Convert the character index IDX to code-point CODE for CHARSET.
154 It is assumed that IDX is in a valid range. */
156 #define INDEX_TO_CODE_POINT(charset, idx) \
157 ((charset)->code_linear_p \
158 ? (idx) + (charset)->min_code \
159 : (idx += (charset)->char_index_offset, \
160 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
161 | (((charset)->code_space[4] \
162 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
163 << 8) \
164 | (((charset)->code_space[8] \
165 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
166 << 16) \
167 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
168 << 24))))
170 /* Structure to hold mapping tables for a charset. Used by temacs
171 invoked for dumping. */
173 static struct
175 /* The current charset for which the following tables are setup. */
176 struct charset *current;
178 /* 1 iff the following table is used for encoder. */
179 short for_encoder;
181 /* When the following table is used for encoding, mininum and
182 maxinum character of the current charset. */
183 int min_char, max_char;
185 /* A Unicode character correspoinding to the code indice 0 (i.e. the
186 minimum code-point) of the current charset, or -1 if the code
187 indice 0 is not a Unicode character. This is checked when
188 table.encoder[CHAR] is zero. */
189 int zero_index_char;
191 union {
192 /* Table mapping code-indices (not code-points) of the current
193 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
194 doesn't belong to the current charset. */
195 int decoder[0x10000];
196 /* Table mapping Unicode characters to code-indices of the current
197 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
198 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
199 (0x20000..0x2FFFF). Note that there is no charset map that
200 uses both SMP and SIP. */
201 unsigned short encoder[0x20000];
202 } table;
203 } *temp_charset_work;
205 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
206 do { \
207 if ((CODE) == 0) \
208 temp_charset_work->zero_index_char = (C); \
209 else if ((C) < 0x20000) \
210 temp_charset_work->table.encoder[(C)] = (CODE); \
211 else \
212 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
213 } while (0)
215 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
216 ((C) == temp_charset_work->zero_index_char ? 0 \
217 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
218 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
219 : temp_charset_work->table.encoder[(C) - 0x10000] \
220 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
222 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
223 (temp_charset_work->table.decoder[(CODE)] = (C))
225 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
226 (temp_charset_work->table.decoder[(CODE)])
229 /* Set to 1 to warn that a charset map is loaded and thus a buffer
230 text and a string data may be relocated. */
231 int charset_map_loaded;
233 struct charset_map_entries
235 struct {
236 unsigned from, to;
237 int c;
238 } entry[0x10000];
239 struct charset_map_entries *next;
242 /* Load the mapping information of CHARSET from ENTRIES for
243 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
244 encoding (CONTROL_FLAG == 2).
246 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
247 and CHARSET->fast_map.
249 If CONTROL_FLAG is 1, setup the following tables according to
250 CHARSET->method and inhibit_load_charset_map.
252 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
253 ----------------------+--------------------+---------------------------
254 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
258 If CONTROL_FLAG is 2, setup the following tables.
260 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
261 ----------------------+--------------------+---------------------------
262 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
263 ----------------------+--------------------+--------------------------
264 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
267 static void
268 load_charset_map (charset, entries, n_entries, control_flag)
269 struct charset *charset;
270 struct charset_map_entries *entries;
271 int n_entries;
272 int control_flag;
274 Lisp_Object vec, table;
275 unsigned max_code = CHARSET_MAX_CODE (charset);
276 int ascii_compatible_p = charset->ascii_compatible_p;
277 int min_char, max_char, nonascii_min_char;
278 int i;
279 unsigned char *fast_map = charset->fast_map;
281 if (n_entries <= 0)
282 return;
284 if (control_flag)
286 if (! inhibit_load_charset_map)
288 if (control_flag == 1)
290 if (charset->method == CHARSET_METHOD_MAP)
292 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
294 vec = CHARSET_DECODER (charset)
295 = Fmake_vector (make_number (n), make_number (-1));
297 else
299 char_table_set_range (Vchar_unify_table,
300 charset->min_char, charset->max_char,
301 Qnil);
304 else
306 table = Fmake_char_table (Qnil, Qnil);
307 if (charset->method == CHARSET_METHOD_MAP)
308 CHARSET_ENCODER (charset) = table;
309 else
310 CHARSET_DEUNIFIER (charset) = table;
313 else
315 if (! temp_charset_work)
316 temp_charset_work = malloc (sizeof (*temp_charset_work));
317 if (control_flag == 1)
319 memset (temp_charset_work->table.decoder, -1,
320 sizeof (int) * 0x10000);
321 temp_charset_work->for_encoder = 0;
323 else
325 memset (temp_charset_work->table.encoder, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work->zero_index_char = -1;
329 temp_charset_work->current = charset;
330 temp_charset_work->for_encoder = (control_flag == 2);
331 control_flag += 2;
333 charset_map_loaded = 1;
336 min_char = max_char = entries->entry[0].c;
337 nonascii_min_char = MAX_CHAR;
338 for (i = 0; i < n_entries; i++)
340 unsigned from, to;
341 int from_index, to_index;
342 int from_c, to_c;
343 int idx = i % 0x10000;
345 if (i > 0 && idx == 0)
346 entries = entries->next;
347 from = entries->entry[idx].from;
348 to = entries->entry[idx].to;
349 from_c = entries->entry[idx].c;
350 from_index = CODE_POINT_TO_INDEX (charset, from);
351 if (from == to)
353 to_index = from_index;
354 to_c = from_c;
356 else
358 to_index = CODE_POINT_TO_INDEX (charset, to);
359 to_c = from_c + (to_index - from_index);
361 if (from_index < 0 || to_index < 0)
362 continue;
364 if (to_c > max_char)
365 max_char = to_c;
366 else if (from_c < min_char)
367 min_char = from_c;
369 if (control_flag == 1)
371 if (charset->method == CHARSET_METHOD_MAP)
372 for (; from_index <= to_index; from_index++, from_c++)
373 ASET (vec, from_index, make_number (from_c));
374 else
375 for (; from_index <= to_index; from_index++, from_c++)
376 CHAR_TABLE_SET (Vchar_unify_table,
377 CHARSET_CODE_OFFSET (charset) + from_index,
378 make_number (from_c));
380 else if (control_flag == 2)
382 if (charset->method == CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset))
384 for (; from_index <= to_index; from_index++, from_c++)
386 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
388 if (NILP (CHAR_TABLE_REF (table, from_c)))
389 CHAR_TABLE_SET (table, from_c, make_number (code));
391 else
392 for (; from_index <= to_index; from_index++, from_c++)
394 if (NILP (CHAR_TABLE_REF (table, from_c)))
395 CHAR_TABLE_SET (table, from_c, make_number (from_index));
398 else if (control_flag == 3)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
401 else if (control_flag == 4)
402 for (; from_index <= to_index; from_index++, from_c++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
404 else /* control_flag == 0 */
406 if (ascii_compatible_p)
408 if (! ASCII_BYTE_P (from_c))
410 if (from_c < nonascii_min_char)
411 nonascii_min_char = from_c;
413 else if (! ASCII_BYTE_P (to_c))
415 nonascii_min_char = 0x80;
419 for (; from_c <= to_c; from_c++)
420 CHARSET_FAST_MAP_SET (from_c, fast_map);
424 if (control_flag == 0)
426 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
427 ? nonascii_min_char : min_char);
428 CHARSET_MAX_CHAR (charset) = max_char;
430 else if (control_flag == 4)
432 temp_charset_work->min_char = min_char;
433 temp_charset_work->max_char = max_char;
438 /* Read a hexadecimal number (preceded by "0x") from the file FP while
439 paying attention to comment charcter '#'. */
441 static INLINE unsigned
442 read_hex (fp, eof)
443 FILE *fp;
444 int *eof;
446 int c;
447 unsigned n;
449 while ((c = getc (fp)) != EOF)
451 if (c == '#')
453 while ((c = getc (fp)) != EOF && c != '\n');
455 else if (c == '0')
457 if ((c = getc (fp)) == EOF || c == 'x')
458 break;
461 if (c == EOF)
463 *eof = 1;
464 return 0;
466 *eof = 0;
467 n = 0;
468 if (c == 'x')
469 while ((c = getc (fp)) != EOF && isxdigit (c))
470 n = ((n << 4)
471 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
472 else
473 while ((c = getc (fp)) != EOF && isdigit (c))
474 n = (n * 10) + c - '0';
475 if (c != EOF)
476 ungetc (c, fp);
477 return n;
481 /* Return a mapping vector for CHARSET loaded from MAPFILE.
482 Each line of MAPFILE has this form
483 0xAAAA 0xCCCC
484 where 0xAAAA is a code-point and 0xCCCC is the corresponding
485 character code, or this form
486 0xAAAA-0xBBBB 0xCCCC
487 where 0xAAAA and 0xBBBB are code-points specifying a range, and
488 0xCCCC is the first character code of the range.
490 The returned vector has this form:
491 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
492 where CODE1 is a code-point or a cons of code-points specifying a
493 range. */
495 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
497 static void
498 load_charset_map_from_file (charset, mapfile, control_flag)
499 struct charset *charset;
500 Lisp_Object mapfile;
501 int control_flag;
503 unsigned min_code = CHARSET_MIN_CODE (charset);
504 unsigned max_code = CHARSET_MAX_CODE (charset);
505 int fd;
506 FILE *fp;
507 int eof;
508 Lisp_Object suffixes;
509 struct charset_map_entries *head, *entries;
510 int n_entries;
512 suffixes = Fcons (build_string (".map"),
513 Fcons (build_string (".TXT"), Qnil));
515 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
516 if (fd < 0
517 || ! (fp = fdopen (fd, "r")))
519 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
520 return;
523 head = entries = ((struct charset_map_entries *)
524 alloca (sizeof (struct charset_map_entries)));
525 n_entries = 0;
526 eof = 0;
527 while (1)
529 unsigned from, to;
530 int c;
531 int idx;
533 from = read_hex (fp, &eof);
534 if (eof)
535 break;
536 if (getc (fp) == '-')
537 to = read_hex (fp, &eof);
538 else
539 to = from;
540 c = (int) read_hex (fp, &eof);
542 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
543 continue;
545 if (n_entries > 0 && (n_entries % 0x10000) == 0)
547 entries->next = ((struct charset_map_entries *)
548 alloca (sizeof (struct charset_map_entries)));
549 entries = entries->next;
551 idx = n_entries % 0x10000;
552 entries->entry[idx].from = from;
553 entries->entry[idx].to = to;
554 entries->entry[idx].c = c;
555 n_entries++;
557 fclose (fp);
558 close (fd);
560 load_charset_map (charset, head, n_entries, control_flag);
563 static void
564 load_charset_map_from_vector (charset, vec, control_flag)
565 struct charset *charset;
566 Lisp_Object vec;
567 int control_flag;
569 unsigned min_code = CHARSET_MIN_CODE (charset);
570 unsigned max_code = CHARSET_MAX_CODE (charset);
571 struct charset_map_entries *head, *entries;
572 int n_entries;
573 int len = ASIZE (vec);
574 int i;
576 if (len % 2 == 1)
578 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
579 return;
582 head = entries = ((struct charset_map_entries *)
583 alloca (sizeof (struct charset_map_entries)));
584 n_entries = 0;
585 for (i = 0; i < len; i += 2)
587 Lisp_Object val, val2;
588 unsigned from, to;
589 int c;
590 int idx;
592 val = AREF (vec, i);
593 if (CONSP (val))
595 val2 = XCDR (val);
596 val = XCAR (val);
597 CHECK_NATNUM (val);
598 CHECK_NATNUM (val2);
599 from = XFASTINT (val);
600 to = XFASTINT (val2);
602 else
604 CHECK_NATNUM (val);
605 from = to = XFASTINT (val);
607 val = AREF (vec, i + 1);
608 CHECK_NATNUM (val);
609 c = XFASTINT (val);
611 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
612 continue;
614 if (n_entries > 0 && (n_entries % 0x10000) == 0)
616 entries->next = ((struct charset_map_entries *)
617 alloca (sizeof (struct charset_map_entries)));
618 entries = entries->next;
620 idx = n_entries % 0x10000;
621 entries->entry[idx].from = from;
622 entries->entry[idx].to = to;
623 entries->entry[idx].c = c;
624 n_entries++;
627 load_charset_map (charset, head, n_entries, control_flag);
631 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
632 map it is (see the comment of load_charset_map for the detail). */
634 static void
635 load_charset (charset, control_flag)
636 struct charset *charset;
637 int control_flag;
639 Lisp_Object map;
641 if (inhibit_load_charset_map
642 && temp_charset_work
643 && charset == temp_charset_work->current
644 && (control_flag == 2 == temp_charset_work->for_encoder))
645 return;
647 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
648 map = CHARSET_MAP (charset);
649 else if (CHARSET_UNIFIED_P (charset))
650 map = CHARSET_UNIFY_MAP (charset);
651 if (STRINGP (map))
652 load_charset_map_from_file (charset, map, control_flag);
653 else
654 load_charset_map_from_vector (charset, map, control_flag);
658 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
659 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
660 (object)
661 Lisp_Object object;
663 return (CHARSETP (object) ? Qt : Qnil);
667 void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
668 Lisp_Object function, Lisp_Object arg,
669 unsigned from, unsigned to));
671 void
672 map_charset_for_dump (c_function, function, arg, from, to)
673 void (*c_function) (Lisp_Object, Lisp_Object);
674 Lisp_Object function, arg;
675 unsigned from, to;
677 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
678 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
679 Lisp_Object range;
680 int c, stop;
681 struct gcpro gcpro1;
683 range = Fcons (Qnil, Qnil);
684 GCPRO1 (range);
686 c = temp_charset_work->min_char;
687 stop = (temp_charset_work->max_char < 0x20000
688 ? temp_charset_work->max_char : 0xFFFF);
690 while (1)
692 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
694 if (index >= from_idx && index <= to_idx)
696 if (NILP (XCAR (range)))
697 XSETCAR (range, make_number (c));
699 else if (! NILP (XCAR (range)))
701 XSETCDR (range, make_number (c - 1));
702 if (c_function)
703 (*c_function) (arg, range);
704 else
705 call2 (function, range, arg);
706 XSETCAR (range, Qnil);
708 if (c == stop)
710 if (c == temp_charset_work->max_char)
712 if (! NILP (XCAR (range)))
714 XSETCDR (range, make_number (c));
715 if (c_function)
716 (*c_function) (arg, range);
717 else
718 call2 (function, range, arg);
720 break;
722 c = 0x1FFFF;
723 stop = temp_charset_work->max_char;
725 c++;
729 void
730 map_charset_chars (c_function, function, arg,
731 charset, from, to)
732 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
733 Lisp_Object function, arg;
734 struct charset *charset;
735 unsigned from, to;
737 Lisp_Object range;
738 int partial;
740 partial = (from > CHARSET_MIN_CODE (charset)
741 || to < CHARSET_MAX_CODE (charset));
743 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
745 int from_idx = CODE_POINT_TO_INDEX (charset, from);
746 int to_idx = CODE_POINT_TO_INDEX (charset, to);
747 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
748 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
750 if (CHARSET_UNIFIED_P (charset))
752 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
753 load_charset (charset, 2);
754 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
755 map_char_table_for_charset (c_function, function,
756 CHARSET_DEUNIFIER (charset), arg,
757 partial ? charset : NULL, from, to);
758 else
759 map_charset_for_dump (c_function, function, arg, from, to);
762 range = Fcons (make_number (from_c), make_number (to_c));
763 if (NILP (function))
764 (*c_function) (arg, range);
765 else
766 call2 (function, range, arg);
768 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
770 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
771 load_charset (charset, 2);
772 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
773 map_char_table_for_charset (c_function, function,
774 CHARSET_ENCODER (charset), arg,
775 partial ? charset : NULL, from, to);
776 else
777 map_charset_for_dump (c_function, function, arg, from, to);
779 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
781 Lisp_Object subset_info;
782 int offset;
784 subset_info = CHARSET_SUBSET (charset);
785 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
786 offset = XINT (AREF (subset_info, 3));
787 from -= offset;
788 if (from < XFASTINT (AREF (subset_info, 1)))
789 from = XFASTINT (AREF (subset_info, 1));
790 to -= offset;
791 if (to > XFASTINT (AREF (subset_info, 2)))
792 to = XFASTINT (AREF (subset_info, 2));
793 map_charset_chars (c_function, function, arg, charset, from, to);
795 else /* i.e. CHARSET_METHOD_SUPERSET */
797 Lisp_Object parents;
799 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
800 parents = XCDR (parents))
802 int offset;
803 unsigned this_from, this_to;
805 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
806 offset = XINT (XCDR (XCAR (parents)));
807 this_from = from - offset;
808 this_to = to - offset;
809 if (this_from < CHARSET_MIN_CODE (charset))
810 this_from = CHARSET_MIN_CODE (charset);
811 if (this_to > CHARSET_MAX_CODE (charset))
812 this_to = CHARSET_MAX_CODE (charset);
813 map_charset_chars (c_function, function, arg, charset,
814 this_from, this_to);
819 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
820 doc: /* Call FUNCTION for all characters in CHARSET.
821 FUNCTION is called with an argument RANGE and the optional 3rd
822 argument ARG.
824 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
825 characters contained in CHARSET.
827 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
828 range of code points of target characters. */)
829 (function, charset, arg, from_code, to_code)
830 Lisp_Object function, charset, arg, from_code, to_code;
832 struct charset *cs;
833 unsigned from, to;
835 CHECK_CHARSET_GET_CHARSET (charset, cs);
836 if (NILP (from_code))
837 from = CHARSET_MIN_CODE (cs);
838 else
840 CHECK_NATNUM (from_code);
841 from = XINT (from_code);
842 if (from < CHARSET_MIN_CODE (cs))
843 from = CHARSET_MIN_CODE (cs);
845 if (NILP (to_code))
846 to = CHARSET_MAX_CODE (cs);
847 else
849 CHECK_NATNUM (to_code);
850 to = XINT (to_code);
851 if (to > CHARSET_MAX_CODE (cs))
852 to = CHARSET_MAX_CODE (cs);
854 map_charset_chars (NULL, function, arg, cs, from, to);
855 return Qnil;
859 /* Define a charset according to the arguments. The Nth argument is
860 the Nth attribute of the charset (the last attribute `charset-id'
861 is not included). See the docstring of `define-charset' for the
862 detail. */
864 DEFUN ("define-charset-internal", Fdefine_charset_internal,
865 Sdefine_charset_internal, charset_arg_max, MANY, 0,
866 doc: /* For internal use only.
867 usage: (define-charset-internal ...) */)
868 (nargs, args)
869 int nargs;
870 Lisp_Object *args;
872 /* Charset attr vector. */
873 Lisp_Object attrs;
874 Lisp_Object val;
875 unsigned hash_code;
876 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
877 int i, j;
878 struct charset charset;
879 int id;
880 int dimension;
881 int new_definition_p;
882 int nchars;
884 if (nargs != charset_arg_max)
885 return Fsignal (Qwrong_number_of_arguments,
886 Fcons (intern ("define-charset-internal"),
887 make_number (nargs)));
889 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
891 CHECK_SYMBOL (args[charset_arg_name]);
892 ASET (attrs, charset_name, args[charset_arg_name]);
894 val = args[charset_arg_code_space];
895 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
897 int min_byte, max_byte;
899 min_byte = XINT (Faref (val, make_number (i * 2)));
900 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
901 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
902 error ("Invalid :code-space value");
903 charset.code_space[i * 4] = min_byte;
904 charset.code_space[i * 4 + 1] = max_byte;
905 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
906 nchars *= charset.code_space[i * 4 + 2];
907 charset.code_space[i * 4 + 3] = nchars;
908 if (max_byte > 0)
909 dimension = i + 1;
912 val = args[charset_arg_dimension];
913 if (NILP (val))
914 charset.dimension = dimension;
915 else
917 CHECK_NATNUM (val);
918 charset.dimension = XINT (val);
919 if (charset.dimension < 1 || charset.dimension > 4)
920 args_out_of_range_3 (val, make_number (1), make_number (4));
923 charset.code_linear_p
924 = (charset.dimension == 1
925 || (charset.code_space[2] == 256
926 && (charset.dimension == 2
927 || (charset.code_space[6] == 256
928 && (charset.dimension == 3
929 || charset.code_space[10] == 256)))));
931 if (! charset.code_linear_p)
933 charset.code_space_mask = (unsigned char *) xmalloc (256);
934 bzero (charset.code_space_mask, 256);
935 for (i = 0; i < 4; i++)
936 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
937 j++)
938 charset.code_space_mask[j] |= (1 << i);
941 charset.iso_chars_96 = charset.code_space[2] == 96;
943 charset.min_code = (charset.code_space[0]
944 | (charset.code_space[4] << 8)
945 | (charset.code_space[8] << 16)
946 | (charset.code_space[12] << 24));
947 charset.max_code = (charset.code_space[1]
948 | (charset.code_space[5] << 8)
949 | (charset.code_space[9] << 16)
950 | (charset.code_space[13] << 24));
951 charset.char_index_offset = 0;
953 val = args[charset_arg_min_code];
954 if (! NILP (val))
956 unsigned code;
958 if (INTEGERP (val))
959 code = XINT (val);
960 else
962 CHECK_CONS (val);
963 CHECK_NUMBER_CAR (val);
964 CHECK_NUMBER_CDR (val);
965 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
967 if (code < charset.min_code
968 || code > charset.max_code)
969 args_out_of_range_3 (make_number (charset.min_code),
970 make_number (charset.max_code), val);
971 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
972 charset.min_code = code;
975 val = args[charset_arg_max_code];
976 if (! NILP (val))
978 unsigned code;
980 if (INTEGERP (val))
981 code = XINT (val);
982 else
984 CHECK_CONS (val);
985 CHECK_NUMBER_CAR (val);
986 CHECK_NUMBER_CDR (val);
987 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
989 if (code < charset.min_code
990 || code > charset.max_code)
991 args_out_of_range_3 (make_number (charset.min_code),
992 make_number (charset.max_code), val);
993 charset.max_code = code;
996 charset.compact_codes_p = charset.max_code < 0x10000;
998 val = args[charset_arg_invalid_code];
999 if (NILP (val))
1001 if (charset.min_code > 0)
1002 charset.invalid_code = 0;
1003 else
1005 XSETINT (val, charset.max_code + 1);
1006 if (XINT (val) == charset.max_code + 1)
1007 charset.invalid_code = charset.max_code + 1;
1008 else
1009 error ("Attribute :invalid-code must be specified");
1012 else
1014 CHECK_NATNUM (val);
1015 charset.invalid_code = XFASTINT (val);
1018 val = args[charset_arg_iso_final];
1019 if (NILP (val))
1020 charset.iso_final = -1;
1021 else
1023 CHECK_NUMBER (val);
1024 if (XINT (val) < '0' || XINT (val) > 127)
1025 error ("Invalid iso-final-char: %d", XINT (val));
1026 charset.iso_final = XINT (val);
1029 val = args[charset_arg_iso_revision];
1030 if (NILP (val))
1031 charset.iso_revision = -1;
1032 else
1034 CHECK_NUMBER (val);
1035 if (XINT (val) > 63)
1036 args_out_of_range (make_number (63), val);
1037 charset.iso_revision = XINT (val);
1040 val = args[charset_arg_emacs_mule_id];
1041 if (NILP (val))
1042 charset.emacs_mule_id = -1;
1043 else
1045 CHECK_NATNUM (val);
1046 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1047 error ("Invalid emacs-mule-id: %d", XINT (val));
1048 charset.emacs_mule_id = XINT (val);
1051 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1053 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1055 charset.unified_p = 0;
1057 bzero (charset.fast_map, sizeof (charset.fast_map));
1059 if (! NILP (args[charset_arg_code_offset]))
1061 val = args[charset_arg_code_offset];
1062 CHECK_NUMBER (val);
1064 charset.method = CHARSET_METHOD_OFFSET;
1065 charset.code_offset = XINT (val);
1067 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1068 charset.min_char = i + charset.code_offset;
1069 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1070 charset.max_char = i + charset.code_offset;
1071 if (charset.max_char > MAX_CHAR)
1072 error ("Unsupported max char: %d", charset.max_char);
1074 i = (charset.min_char >> 7) << 7;
1075 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1076 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1077 i = (i >> 12) << 12;
1078 for (; i <= charset.max_char; i += 0x1000)
1079 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1081 else if (! NILP (args[charset_arg_map]))
1083 val = args[charset_arg_map];
1084 ASET (attrs, charset_map, val);
1085 charset.method = CHARSET_METHOD_MAP;
1087 else if (! NILP (args[charset_arg_subset]))
1089 Lisp_Object parent;
1090 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1091 struct charset *parent_charset;
1093 val = args[charset_arg_subset];
1094 parent = Fcar (val);
1095 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1096 parent_min_code = Fnth (make_number (1), val);
1097 CHECK_NATNUM (parent_min_code);
1098 parent_max_code = Fnth (make_number (2), val);
1099 CHECK_NATNUM (parent_max_code);
1100 parent_code_offset = Fnth (make_number (3), val);
1101 CHECK_NUMBER (parent_code_offset);
1102 val = Fmake_vector (make_number (4), Qnil);
1103 ASET (val, 0, make_number (parent_charset->id));
1104 ASET (val, 1, parent_min_code);
1105 ASET (val, 2, parent_max_code);
1106 ASET (val, 3, parent_code_offset);
1107 ASET (attrs, charset_subset, val);
1109 charset.method = CHARSET_METHOD_SUBSET;
1110 /* Here, we just copy the parent's fast_map. It's not accurate,
1111 but at least it works for quickly detecting which character
1112 DOESN'T belong to this charset. */
1113 for (i = 0; i < 190; i++)
1114 charset.fast_map[i] = parent_charset->fast_map[i];
1116 /* We also copy these for parents. */
1117 charset.min_char = parent_charset->min_char;
1118 charset.max_char = parent_charset->max_char;
1120 else if (! NILP (args[charset_arg_superset]))
1122 val = args[charset_arg_superset];
1123 charset.method = CHARSET_METHOD_SUPERSET;
1124 val = Fcopy_sequence (val);
1125 ASET (attrs, charset_superset, val);
1127 charset.min_char = MAX_CHAR;
1128 charset.max_char = 0;
1129 for (; ! NILP (val); val = Fcdr (val))
1131 Lisp_Object elt, car_part, cdr_part;
1132 int this_id, offset;
1133 struct charset *this_charset;
1135 elt = Fcar (val);
1136 if (CONSP (elt))
1138 car_part = XCAR (elt);
1139 cdr_part = XCDR (elt);
1140 CHECK_CHARSET_GET_ID (car_part, this_id);
1141 CHECK_NUMBER (cdr_part);
1142 offset = XINT (cdr_part);
1144 else
1146 CHECK_CHARSET_GET_ID (elt, this_id);
1147 offset = 0;
1149 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1151 this_charset = CHARSET_FROM_ID (this_id);
1152 if (charset.min_char > this_charset->min_char)
1153 charset.min_char = this_charset->min_char;
1154 if (charset.max_char < this_charset->max_char)
1155 charset.max_char = this_charset->max_char;
1156 for (i = 0; i < 190; i++)
1157 charset.fast_map[i] |= this_charset->fast_map[i];
1160 else
1161 error ("None of :code-offset, :map, :parents are specified");
1163 val = args[charset_arg_unify_map];
1164 if (! NILP (val) && !STRINGP (val))
1165 CHECK_VECTOR (val);
1166 ASET (attrs, charset_unify_map, val);
1168 CHECK_LIST (args[charset_arg_plist]);
1169 ASET (attrs, charset_plist, args[charset_arg_plist]);
1171 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1172 &hash_code);
1173 if (charset.hash_index >= 0)
1175 new_definition_p = 0;
1176 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1177 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1179 else
1181 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1182 hash_code);
1183 if (charset_table_used == charset_table_size)
1185 struct charset *new_table
1186 = (struct charset *) xmalloc (sizeof (struct charset)
1187 * (charset_table_size + 16));
1188 bcopy (charset_table, new_table,
1189 sizeof (struct charset) * charset_table_size);
1190 charset_table_size += 16;
1191 charset_table = new_table;
1193 id = charset_table_used++;
1194 new_definition_p = 1;
1197 ASET (attrs, charset_id, make_number (id));
1198 charset.id = id;
1199 charset_table[id] = charset;
1201 if (charset.method == CHARSET_METHOD_MAP)
1203 load_charset (&charset, 0);
1204 charset_table[id] = charset;
1207 if (charset.iso_final >= 0)
1209 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1210 charset.iso_final) = id;
1211 if (new_definition_p)
1212 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1213 Fcons (make_number (id), Qnil));
1214 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1215 charset_jisx0201_roman = id;
1216 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1217 charset_jisx0208_1978 = id;
1218 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1219 charset_jisx0208 = id;
1222 if (charset.emacs_mule_id >= 0)
1224 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1225 if (charset.emacs_mule_id < 0xA0)
1226 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1227 else
1228 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1229 if (new_definition_p)
1230 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1231 Fcons (make_number (id), Qnil));
1234 if (new_definition_p)
1236 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1237 if (charset.supplementary_p)
1238 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1239 Fcons (make_number (id), Qnil));
1240 else
1242 Lisp_Object tail;
1244 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1246 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1248 if (cs->supplementary_p)
1249 break;
1251 if (EQ (tail, Vcharset_ordered_list))
1252 Vcharset_ordered_list = Fcons (make_number (id),
1253 Vcharset_ordered_list);
1254 else if (NILP (tail))
1255 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1256 Fcons (make_number (id), Qnil));
1257 else
1259 val = Fcons (XCAR (tail), XCDR (tail));
1260 XSETCDR (tail, val);
1261 XSETCAR (tail, make_number (id));
1264 charset_ordered_list_tick++;
1267 return Qnil;
1271 /* Same as Fdefine_charset_internal but arguments are more convenient
1272 to call from C (typically in syms_of_charset). This can define a
1273 charset of `offset' method only. Return the ID of the new
1274 charset. */
1276 static int
1277 define_charset_internal (name, dimension, code_space, min_code, max_code,
1278 iso_final, iso_revision, emacs_mule_id,
1279 ascii_compatible, supplementary,
1280 code_offset)
1281 Lisp_Object name;
1282 int dimension;
1283 unsigned char *code_space;
1284 unsigned min_code, max_code;
1285 int iso_final, iso_revision, emacs_mule_id;
1286 int ascii_compatible, supplementary;
1287 int code_offset;
1289 Lisp_Object args[charset_arg_max];
1290 Lisp_Object plist[14];
1291 Lisp_Object val;
1292 int i;
1294 args[charset_arg_name] = name;
1295 args[charset_arg_dimension] = make_number (dimension);
1296 val = Fmake_vector (make_number (8), make_number (0));
1297 for (i = 0; i < 8; i++)
1298 ASET (val, i, make_number (code_space[i]));
1299 args[charset_arg_code_space] = val;
1300 args[charset_arg_min_code] = make_number (min_code);
1301 args[charset_arg_max_code] = make_number (max_code);
1302 args[charset_arg_iso_final]
1303 = (iso_final < 0 ? Qnil : make_number (iso_final));
1304 args[charset_arg_iso_revision] = make_number (iso_revision);
1305 args[charset_arg_emacs_mule_id]
1306 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1307 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1308 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1309 args[charset_arg_invalid_code] = Qnil;
1310 args[charset_arg_code_offset] = make_number (code_offset);
1311 args[charset_arg_map] = Qnil;
1312 args[charset_arg_subset] = Qnil;
1313 args[charset_arg_superset] = Qnil;
1314 args[charset_arg_unify_map] = Qnil;
1316 plist[0] = intern (":name");
1317 plist[1] = args[charset_arg_name];
1318 plist[2] = intern (":dimension");
1319 plist[3] = args[charset_arg_dimension];
1320 plist[4] = intern (":code-space");
1321 plist[5] = args[charset_arg_code_space];
1322 plist[6] = intern (":iso-final-char");
1323 plist[7] = args[charset_arg_iso_final];
1324 plist[8] = intern (":emacs-mule-id");
1325 plist[9] = args[charset_arg_emacs_mule_id];
1326 plist[10] = intern (":ascii-compatible-p");
1327 plist[11] = args[charset_arg_ascii_compatible_p];
1328 plist[12] = intern (":code-offset");
1329 plist[13] = args[charset_arg_code_offset];
1331 args[charset_arg_plist] = Flist (14, plist);
1332 Fdefine_charset_internal (charset_arg_max, args);
1334 return XINT (CHARSET_SYMBOL_ID (name));
1338 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1339 Sdefine_charset_alias, 2, 2, 0,
1340 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1341 (alias, charset)
1342 Lisp_Object alias, charset;
1344 Lisp_Object attr;
1346 CHECK_CHARSET_GET_ATTR (charset, attr);
1347 Fputhash (alias, attr, Vcharset_hash_table);
1348 Vcharset_list = Fcons (alias, Vcharset_list);
1349 return Qnil;
1353 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1354 doc: /* Return the property list of CHARSET. */)
1355 (charset)
1356 Lisp_Object charset;
1358 Lisp_Object attrs;
1360 CHECK_CHARSET_GET_ATTR (charset, attrs);
1361 return CHARSET_ATTR_PLIST (attrs);
1365 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1366 doc: /* Set CHARSET's property list to PLIST. */)
1367 (charset, plist)
1368 Lisp_Object charset, plist;
1370 Lisp_Object attrs;
1372 CHECK_CHARSET_GET_ATTR (charset, attrs);
1373 CHARSET_ATTR_PLIST (attrs) = plist;
1374 return plist;
1378 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1379 doc: /* Unify characters of CHARSET with Unicode.
1380 This means reading the relevant file and installing the table defined
1381 by CHARSET's `:unify-map' property.
1383 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1384 the same meaning as the `:unify-map' attribute in the function
1385 `define-charset' (which see).
1387 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1388 (charset, unify_map, deunify)
1389 Lisp_Object charset, unify_map, deunify;
1391 int id;
1392 struct charset *cs;
1394 CHECK_CHARSET_GET_ID (charset, id);
1395 cs = CHARSET_FROM_ID (id);
1396 if (NILP (deunify)
1397 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1398 : ! CHARSET_UNIFIED_P (cs))
1399 return Qnil;
1401 CHARSET_UNIFIED_P (cs) = 0;
1402 if (NILP (deunify))
1404 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1405 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1406 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1407 if (NILP (unify_map))
1408 unify_map = CHARSET_UNIFY_MAP (cs);
1409 else
1411 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1412 signal_error ("Bad unify-map", unify_map);
1413 CHARSET_UNIFY_MAP (cs) = unify_map;
1415 if (NILP (Vchar_unify_table))
1416 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1417 char_table_set_range (Vchar_unify_table,
1418 cs->min_char, cs->max_char, charset);
1419 CHARSET_UNIFIED_P (cs) = 1;
1421 else if (CHAR_TABLE_P (Vchar_unify_table))
1423 int min_code = CHARSET_MIN_CODE (cs);
1424 int max_code = CHARSET_MAX_CODE (cs);
1425 int min_char = DECODE_CHAR (cs, min_code);
1426 int max_char = DECODE_CHAR (cs, max_code);
1428 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1431 return Qnil;
1434 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1435 Sget_unused_iso_final_char, 2, 2, 0,
1436 doc: /*
1437 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1438 DIMENSION is the number of bytes to represent a character: 1 or 2.
1439 CHARS is the number of characters in a dimension: 94 or 96.
1441 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1442 If there's no unused final char for the specified kind of charset,
1443 return nil. */)
1444 (dimension, chars)
1445 Lisp_Object dimension, chars;
1447 int final_char;
1449 CHECK_NUMBER (dimension);
1450 CHECK_NUMBER (chars);
1451 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1452 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1453 if (XINT (chars) != 94 && XINT (chars) != 96)
1454 args_out_of_range_3 (chars, make_number (94), make_number (96));
1455 for (final_char = '0'; final_char <= '?'; final_char++)
1456 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1457 break;
1458 return (final_char <= '?' ? make_number (final_char) : Qnil);
1461 static void
1462 check_iso_charset_parameter (dimension, chars, final_char)
1463 Lisp_Object dimension, chars, final_char;
1465 CHECK_NATNUM (dimension);
1466 CHECK_NATNUM (chars);
1467 CHECK_NATNUM (final_char);
1469 if (XINT (dimension) > 3)
1470 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1471 if (XINT (chars) != 94 && XINT (chars) != 96)
1472 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1473 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1474 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1478 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1479 4, 4, 0,
1480 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1482 On decoding by an ISO-2022 base coding system, when a charset
1483 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1484 if CHARSET is designated instead. */)
1485 (dimension, chars, final_char, charset)
1486 Lisp_Object dimension, chars, final_char, charset;
1488 int id;
1489 int chars_flag;
1491 CHECK_CHARSET_GET_ID (charset, id);
1492 check_iso_charset_parameter (dimension, chars, final_char);
1493 chars_flag = XINT (chars) == 96;
1494 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1495 return Qnil;
1499 /* Return information about charsets in the text at PTR of NBYTES
1500 bytes, which are NCHARS characters. The value is:
1502 0: Each character is represented by one byte. This is always
1503 true for a unibyte string. For a multibyte string, true if
1504 it contains only ASCII characters.
1506 1: No charsets other than ascii, control-1, and latin-1 are
1507 found.
1509 2: Otherwise.
1513 string_xstring_p (string)
1514 Lisp_Object string;
1516 const unsigned char *p = SDATA (string);
1517 const unsigned char *endp = p + SBYTES (string);
1519 if (SCHARS (string) == SBYTES (string))
1520 return 0;
1522 while (p < endp)
1524 int c = STRING_CHAR_ADVANCE (p);
1526 if (c >= 0x100)
1527 return 2;
1529 return 1;
1533 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1535 CHARSETS is a vector. If Nth element is non-nil, it means the
1536 charset whose id is N is already found.
1538 It may lookup a translation table TABLE if supplied. */
1540 static void
1541 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1542 const unsigned char *ptr;
1543 EMACS_INT nchars, nbytes;
1544 Lisp_Object charsets, table;
1545 int multibyte;
1547 const unsigned char *pend = ptr + nbytes;
1549 if (nchars == nbytes)
1551 if (multibyte)
1552 ASET (charsets, charset_ascii, Qt);
1553 else
1554 while (ptr < pend)
1556 int c = *ptr++;
1558 if (!NILP (table))
1559 c = translate_char (table, c);
1560 if (ASCII_BYTE_P (c))
1561 ASET (charsets, charset_ascii, Qt);
1562 else
1563 ASET (charsets, charset_eight_bit, Qt);
1566 else
1568 while (ptr < pend)
1570 int c = STRING_CHAR_ADVANCE (ptr);
1571 struct charset *charset;
1573 if (!NILP (table))
1574 c = translate_char (table, c);
1575 charset = CHAR_CHARSET (c);
1576 ASET (charsets, CHARSET_ID (charset), Qt);
1581 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1582 2, 3, 0,
1583 doc: /* Return a list of charsets in the region between BEG and END.
1584 BEG and END are buffer positions.
1585 Optional arg TABLE if non-nil is a translation table to look up.
1587 If the current buffer is unibyte, the returned list may contain
1588 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1589 (beg, end, table)
1590 Lisp_Object beg, end, table;
1592 Lisp_Object charsets;
1593 EMACS_INT from, from_byte, to, stop, stop_byte;
1594 int i;
1595 Lisp_Object val;
1596 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1598 validate_region (&beg, &end);
1599 from = XFASTINT (beg);
1600 stop = to = XFASTINT (end);
1602 if (from < GPT && GPT < to)
1604 stop = GPT;
1605 stop_byte = GPT_BYTE;
1607 else
1608 stop_byte = CHAR_TO_BYTE (stop);
1610 from_byte = CHAR_TO_BYTE (from);
1612 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1613 while (1)
1615 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1616 stop_byte - from_byte, charsets, table,
1617 multibyte);
1618 if (stop < to)
1620 from = stop, from_byte = stop_byte;
1621 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1623 else
1624 break;
1627 val = Qnil;
1628 for (i = charset_table_used - 1; i >= 0; i--)
1629 if (!NILP (AREF (charsets, i)))
1630 val = Fcons (CHARSET_NAME (charset_table + i), val);
1631 return val;
1634 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1635 1, 2, 0,
1636 doc: /* Return a list of charsets in STR.
1637 Optional arg TABLE if non-nil is a translation table to look up.
1639 If STR is unibyte, the returned list may contain
1640 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1641 (str, table)
1642 Lisp_Object str, table;
1644 Lisp_Object charsets;
1645 int i;
1646 Lisp_Object val;
1648 CHECK_STRING (str);
1650 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1651 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1652 charsets, table,
1653 STRING_MULTIBYTE (str));
1654 val = Qnil;
1655 for (i = charset_table_used - 1; i >= 0; i--)
1656 if (!NILP (AREF (charsets, i)))
1657 val = Fcons (CHARSET_NAME (charset_table + i), val);
1658 return val;
1663 /* Return a unified character code for C (>= 0x110000). VAL is a
1664 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1665 charset symbol. */
1667 maybe_unify_char (c, val)
1668 int c;
1669 Lisp_Object val;
1671 struct charset *charset;
1673 if (INTEGERP (val))
1674 return XINT (val);
1675 if (NILP (val))
1676 return c;
1678 CHECK_CHARSET_GET_CHARSET (val, charset);
1679 load_charset (charset, 1);
1680 if (! inhibit_load_charset_map)
1682 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1683 if (! NILP (val))
1684 c = XINT (val);
1686 else
1688 int code_index = c - CHARSET_CODE_OFFSET (charset);
1689 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1691 if (unified > 0)
1692 c = unified;
1694 return c;
1698 /* Return a character correponding to the code-point CODE of
1699 CHARSET. */
1702 decode_char (charset, code)
1703 struct charset *charset;
1704 unsigned code;
1706 int c, char_index;
1707 enum charset_method method = CHARSET_METHOD (charset);
1709 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1710 return -1;
1712 if (method == CHARSET_METHOD_SUBSET)
1714 Lisp_Object subset_info;
1716 subset_info = CHARSET_SUBSET (charset);
1717 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1718 code -= XINT (AREF (subset_info, 3));
1719 if (code < XFASTINT (AREF (subset_info, 1))
1720 || code > XFASTINT (AREF (subset_info, 2)))
1721 c = -1;
1722 else
1723 c = DECODE_CHAR (charset, code);
1725 else if (method == CHARSET_METHOD_SUPERSET)
1727 Lisp_Object parents;
1729 parents = CHARSET_SUPERSET (charset);
1730 c = -1;
1731 for (; CONSP (parents); parents = XCDR (parents))
1733 int id = XINT (XCAR (XCAR (parents)));
1734 int code_offset = XINT (XCDR (XCAR (parents)));
1735 unsigned this_code = code - code_offset;
1737 charset = CHARSET_FROM_ID (id);
1738 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1739 break;
1742 else
1744 char_index = CODE_POINT_TO_INDEX (charset, code);
1745 if (char_index < 0)
1746 return -1;
1748 if (method == CHARSET_METHOD_MAP)
1750 Lisp_Object decoder;
1752 decoder = CHARSET_DECODER (charset);
1753 if (! VECTORP (decoder))
1755 load_charset (charset, 1);
1756 decoder = CHARSET_DECODER (charset);
1758 if (VECTORP (decoder))
1759 c = XINT (AREF (decoder, char_index));
1760 else
1761 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1763 else /* method == CHARSET_METHOD_OFFSET */
1765 c = char_index + CHARSET_CODE_OFFSET (charset);
1766 if (CHARSET_UNIFIED_P (charset)
1767 && c > MAX_UNICODE_CHAR)
1768 MAYBE_UNIFY_CHAR (c);
1772 return c;
1775 /* Variable used temporarily by the macro ENCODE_CHAR. */
1776 Lisp_Object charset_work;
1778 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1779 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1780 use CHARSET's strict_max_char instead of max_char. */
1782 unsigned
1783 encode_char (charset, c)
1784 struct charset *charset;
1785 int c;
1787 unsigned code;
1788 enum charset_method method = CHARSET_METHOD (charset);
1790 if (CHARSET_UNIFIED_P (charset))
1792 Lisp_Object deunifier, deunified;
1793 int code_index = -1;
1795 deunifier = CHARSET_DEUNIFIER (charset);
1796 if (! CHAR_TABLE_P (deunifier))
1798 load_charset (charset, 2);
1799 deunifier = CHARSET_DEUNIFIER (charset);
1801 if (CHAR_TABLE_P (deunifier))
1803 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1805 if (INTEGERP (deunified))
1806 code_index = XINT (deunified);
1808 else
1810 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1812 if (code_index >= 0)
1813 c = CHARSET_CODE_OFFSET (charset) + code_index;
1816 if (method == CHARSET_METHOD_SUBSET)
1818 Lisp_Object subset_info;
1819 struct charset *this_charset;
1821 subset_info = CHARSET_SUBSET (charset);
1822 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1823 code = ENCODE_CHAR (this_charset, c);
1824 if (code == CHARSET_INVALID_CODE (this_charset)
1825 || code < XFASTINT (AREF (subset_info, 1))
1826 || code > XFASTINT (AREF (subset_info, 2)))
1827 return CHARSET_INVALID_CODE (charset);
1828 code += XINT (AREF (subset_info, 3));
1829 return code;
1832 if (method == CHARSET_METHOD_SUPERSET)
1834 Lisp_Object parents;
1836 parents = CHARSET_SUPERSET (charset);
1837 for (; CONSP (parents); parents = XCDR (parents))
1839 int id = XINT (XCAR (XCAR (parents)));
1840 int code_offset = XINT (XCDR (XCAR (parents)));
1841 struct charset *this_charset = CHARSET_FROM_ID (id);
1843 code = ENCODE_CHAR (this_charset, c);
1844 if (code != CHARSET_INVALID_CODE (this_charset))
1845 return code + code_offset;
1847 return CHARSET_INVALID_CODE (charset);
1850 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1851 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1852 return CHARSET_INVALID_CODE (charset);
1854 if (method == CHARSET_METHOD_MAP)
1856 Lisp_Object encoder;
1857 Lisp_Object val;
1859 encoder = CHARSET_ENCODER (charset);
1860 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1862 load_charset (charset, 2);
1863 encoder = CHARSET_ENCODER (charset);
1865 if (CHAR_TABLE_P (encoder))
1867 val = CHAR_TABLE_REF (encoder, c);
1868 if (NILP (val))
1869 return CHARSET_INVALID_CODE (charset);
1870 code = XINT (val);
1871 if (! CHARSET_COMPACT_CODES_P (charset))
1872 code = INDEX_TO_CODE_POINT (charset, code);
1874 else
1876 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1877 code = INDEX_TO_CODE_POINT (charset, code);
1880 else /* method == CHARSET_METHOD_OFFSET */
1882 int code_index = c - CHARSET_CODE_OFFSET (charset);
1884 code = INDEX_TO_CODE_POINT (charset, code_index);
1887 return code;
1891 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1892 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1893 Return nil if CODE-POINT is not valid in CHARSET.
1895 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1897 Optional argument RESTRICTION specifies a way to map the pair of CCS
1898 and CODE-POINT to a character. Currently not supported and just ignored. */)
1899 (charset, code_point, restriction)
1900 Lisp_Object charset, code_point, restriction;
1902 int c, id;
1903 unsigned code;
1904 struct charset *charsetp;
1906 CHECK_CHARSET_GET_ID (charset, id);
1907 if (CONSP (code_point))
1909 CHECK_NATNUM_CAR (code_point);
1910 CHECK_NATNUM_CDR (code_point);
1911 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1913 else
1915 CHECK_NATNUM (code_point);
1916 code = XINT (code_point);
1918 charsetp = CHARSET_FROM_ID (id);
1919 c = DECODE_CHAR (charsetp, code);
1920 return (c >= 0 ? make_number (c) : Qnil);
1924 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1925 doc: /* Encode the character CH into a code-point of CHARSET.
1926 Return nil if CHARSET doesn't include CH.
1928 Optional argument RESTRICTION specifies a way to map CH to a
1929 code-point in CCS. Currently not supported and just ignored. */)
1930 (ch, charset, restriction)
1931 Lisp_Object ch, charset, restriction;
1933 int id;
1934 unsigned code;
1935 struct charset *charsetp;
1937 CHECK_CHARSET_GET_ID (charset, id);
1938 CHECK_NATNUM (ch);
1939 charsetp = CHARSET_FROM_ID (id);
1940 code = ENCODE_CHAR (charsetp, XINT (ch));
1941 if (code == CHARSET_INVALID_CODE (charsetp))
1942 return Qnil;
1943 if (code > 0x7FFFFFF)
1944 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1945 return make_number (code);
1949 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1950 doc:
1951 /* Return a character of CHARSET whose position codes are CODEn.
1953 CODE1 through CODE4 are optional, but if you don't supply sufficient
1954 position codes, it is assumed that the minimum code in each dimension
1955 is specified. */)
1956 (charset, code1, code2, code3, code4)
1957 Lisp_Object charset, code1, code2, code3, code4;
1959 int id, dimension;
1960 struct charset *charsetp;
1961 unsigned code;
1962 int c;
1964 CHECK_CHARSET_GET_ID (charset, id);
1965 charsetp = CHARSET_FROM_ID (id);
1967 dimension = CHARSET_DIMENSION (charsetp);
1968 if (NILP (code1))
1969 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1970 ? 0 : CHARSET_MIN_CODE (charsetp));
1971 else
1973 CHECK_NATNUM (code1);
1974 if (XFASTINT (code1) >= 0x100)
1975 args_out_of_range (make_number (0xFF), code1);
1976 code = XFASTINT (code1);
1978 if (dimension > 1)
1980 code <<= 8;
1981 if (NILP (code2))
1982 code |= charsetp->code_space[(dimension - 2) * 4];
1983 else
1985 CHECK_NATNUM (code2);
1986 if (XFASTINT (code2) >= 0x100)
1987 args_out_of_range (make_number (0xFF), code2);
1988 code |= XFASTINT (code2);
1991 if (dimension > 2)
1993 code <<= 8;
1994 if (NILP (code3))
1995 code |= charsetp->code_space[(dimension - 3) * 4];
1996 else
1998 CHECK_NATNUM (code3);
1999 if (XFASTINT (code3) >= 0x100)
2000 args_out_of_range (make_number (0xFF), code3);
2001 code |= XFASTINT (code3);
2004 if (dimension > 3)
2006 code <<= 8;
2007 if (NILP (code4))
2008 code |= charsetp->code_space[0];
2009 else
2011 CHECK_NATNUM (code4);
2012 if (XFASTINT (code4) >= 0x100)
2013 args_out_of_range (make_number (0xFF), code4);
2014 code |= XFASTINT (code4);
2021 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2022 code &= 0x7F7F7F7F;
2023 c = DECODE_CHAR (charsetp, code);
2024 if (c < 0)
2025 error ("Invalid code(s)");
2026 return make_number (c);
2030 /* Return the first charset in CHARSET_LIST that contains C.
2031 CHARSET_LIST is a list of charset IDs. If it is nil, use
2032 Vcharset_ordered_list. */
2034 struct charset *
2035 char_charset (c, charset_list, code_return)
2036 int c;
2037 Lisp_Object charset_list;
2038 unsigned *code_return;
2040 int maybe_null = 0;
2042 if (NILP (charset_list))
2043 charset_list = Vcharset_ordered_list;
2044 else
2045 maybe_null = 1;
2047 while (CONSP (charset_list))
2049 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2050 unsigned code = ENCODE_CHAR (charset, c);
2052 if (code != CHARSET_INVALID_CODE (charset))
2054 if (code_return)
2055 *code_return = code;
2056 return charset;
2058 charset_list = XCDR (charset_list);
2059 if (c <= MAX_UNICODE_CHAR
2060 && EQ (charset_list, Vcharset_non_preferred_head))
2061 return CHARSET_FROM_ID (charset_unicode);
2063 return (maybe_null ? NULL
2064 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2065 : CHARSET_FROM_ID (charset_eight_bit));
2069 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2070 doc:
2071 /*Return list of charset and one to four position-codes of CH.
2072 The charset is decided by the current priority order of charsets.
2073 A position-code is a byte value of each dimension of the code-point of
2074 CH in the charset. */)
2075 (ch)
2076 Lisp_Object ch;
2078 struct charset *charset;
2079 int c, dimension;
2080 unsigned code;
2081 Lisp_Object val;
2083 CHECK_CHARACTER (ch);
2084 c = XFASTINT (ch);
2085 charset = CHAR_CHARSET (c);
2086 if (! charset)
2087 abort ();
2088 code = ENCODE_CHAR (charset, c);
2089 if (code == CHARSET_INVALID_CODE (charset))
2090 abort ();
2091 dimension = CHARSET_DIMENSION (charset);
2092 for (val = Qnil; dimension > 0; dimension--)
2094 val = Fcons (make_number (code & 0xFF), val);
2095 code >>= 8;
2097 return Fcons (CHARSET_NAME (charset), val);
2101 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2102 doc: /* Return the charset of highest priority that contains CH.
2103 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2104 from which to find the charset. It may also be a coding system. In
2105 that case, find the charset from what supported by that coding system. */)
2106 (ch, restriction)
2107 Lisp_Object ch, restriction;
2109 struct charset *charset;
2111 CHECK_CHARACTER (ch);
2112 if (NILP (restriction))
2113 charset = CHAR_CHARSET (XINT (ch));
2114 else
2116 Lisp_Object charset_list;
2118 if (CONSP (restriction))
2120 for (charset_list = Qnil; CONSP (restriction);
2121 restriction = XCDR (restriction))
2123 int id;
2125 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2126 charset_list = Fcons (make_number (id), charset_list);
2128 charset_list = Fnreverse (charset_list);
2130 else
2131 charset_list = coding_system_charset_list (restriction);
2132 charset = char_charset (XINT (ch), charset_list, NULL);
2133 if (! charset)
2134 return Qnil;
2136 return (CHARSET_NAME (charset));
2140 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2141 doc: /*
2142 Return charset of a character in the current buffer at position POS.
2143 If POS is nil, it defauls to the current point.
2144 If POS is out of range, the value is nil. */)
2145 (pos)
2146 Lisp_Object pos;
2148 Lisp_Object ch;
2149 struct charset *charset;
2151 ch = Fchar_after (pos);
2152 if (! INTEGERP (ch))
2153 return ch;
2154 charset = CHAR_CHARSET (XINT (ch));
2155 return (CHARSET_NAME (charset));
2159 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2160 doc: /*
2161 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2163 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2164 by their DIMENSION, CHARS, and FINAL-CHAR,
2165 whereas Emacs distinguishes them by charset symbol.
2166 See the documentation of the function `charset-info' for the meanings of
2167 DIMENSION, CHARS, and FINAL-CHAR. */)
2168 (dimension, chars, final_char)
2169 Lisp_Object dimension, chars, final_char;
2171 int id;
2172 int chars_flag;
2174 check_iso_charset_parameter (dimension, chars, final_char);
2175 chars_flag = XFASTINT (chars) == 96;
2176 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2177 XFASTINT (final_char));
2178 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2182 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2183 0, 0, 0,
2184 doc: /*
2185 Internal use only.
2186 Clear temporary charset mapping tables.
2187 It should be called only from temacs invoked for dumping. */)
2190 int i;
2191 struct charset *charset;
2192 Lisp_Object attrs;
2194 if (temp_charset_work)
2196 free (temp_charset_work);
2197 temp_charset_work = NULL;
2200 if (CHAR_TABLE_P (Vchar_unify_table))
2201 Foptimize_char_table (Vchar_unify_table, Qnil);
2203 return Qnil;
2206 DEFUN ("charset-priority-list", Fcharset_priority_list,
2207 Scharset_priority_list, 0, 1, 0,
2208 doc: /* Return the list of charsets ordered by priority.
2209 HIGHESTP non-nil means just return the highest priority one. */)
2210 (highestp)
2211 Lisp_Object highestp;
2213 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2215 if (!NILP (highestp))
2216 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2218 while (!NILP (list))
2220 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2221 list = XCDR (list);
2223 return Fnreverse (val);
2226 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2227 1, MANY, 0,
2228 doc: /* Assign higher priority to the charsets given as arguments.
2229 usage: (set-charset-priority &rest charsets) */)
2230 (nargs, args)
2231 int nargs;
2232 Lisp_Object *args;
2234 Lisp_Object new_head, old_list, arglist[2];
2235 Lisp_Object list_2022, list_emacs_mule;
2236 int i, id;
2238 old_list = Fcopy_sequence (Vcharset_ordered_list);
2239 new_head = Qnil;
2240 for (i = 0; i < nargs; i++)
2242 CHECK_CHARSET_GET_ID (args[i], id);
2243 if (! NILP (Fmemq (make_number (id), old_list)))
2245 old_list = Fdelq (make_number (id), old_list);
2246 new_head = Fcons (make_number (id), new_head);
2249 arglist[0] = Fnreverse (new_head);
2250 arglist[1] = Vcharset_non_preferred_head = old_list;
2251 Vcharset_ordered_list = Fnconc (2, arglist);
2252 charset_ordered_list_tick++;
2254 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2255 CONSP (old_list); old_list = XCDR (old_list))
2257 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2258 list_2022 = Fcons (XCAR (old_list), list_2022);
2259 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2260 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2262 Viso_2022_charset_list = Fnreverse (list_2022);
2263 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2265 return Qnil;
2268 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2269 0, 1, 0,
2270 doc: /* Internal use only.
2271 Return charset identification number of CHARSET. */)
2272 (charset)
2273 Lisp_Object charset;
2275 int id;
2277 CHECK_CHARSET_GET_ID (charset, id);
2278 return make_number (id);
2282 void
2283 init_charset ()
2285 Lisp_Object tempdir;
2286 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2287 if (access (SDATA (tempdir), 0) < 0)
2289 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2290 Emacs will not function correctly without the character map files.\n\
2291 Please check your installation!\n",
2292 tempdir);
2293 /* TODO should this be a fatal error? (Bug#909) */
2296 Vcharset_map_path = Fcons (tempdir, Qnil);
2300 void
2301 init_charset_once ()
2303 int i, j, k;
2305 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2306 for (j = 0; j < ISO_MAX_CHARS; j++)
2307 for (k = 0; k < ISO_MAX_FINAL; k++)
2308 iso_charset_table[i][j][k] = -1;
2310 for (i = 0; i < 256; i++)
2311 emacs_mule_charset[i] = NULL;
2313 charset_jisx0201_roman = -1;
2314 charset_jisx0208_1978 = -1;
2315 charset_jisx0208 = -1;
2317 for (i = 0; i < 128; i++)
2318 unibyte_to_multibyte_table[i] = i;
2319 for (; i < 256; i++)
2320 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
2323 #ifdef emacs
2325 void
2326 syms_of_charset ()
2328 DEFSYM (Qcharsetp, "charsetp");
2330 DEFSYM (Qascii, "ascii");
2331 DEFSYM (Qunicode, "unicode");
2332 DEFSYM (Qemacs, "emacs");
2333 DEFSYM (Qeight_bit, "eight-bit");
2334 DEFSYM (Qiso_8859_1, "iso-8859-1");
2336 DEFSYM (Qgl, "gl");
2337 DEFSYM (Qgr, "gr");
2339 staticpro (&Vcharset_ordered_list);
2340 Vcharset_ordered_list = Qnil;
2342 staticpro (&Viso_2022_charset_list);
2343 Viso_2022_charset_list = Qnil;
2345 staticpro (&Vemacs_mule_charset_list);
2346 Vemacs_mule_charset_list = Qnil;
2348 /* Don't staticpro them here. It's done in syms_of_fns. */
2349 QCtest = intern (":test");
2350 Qeq = intern ("eq");
2352 staticpro (&Vcharset_hash_table);
2354 Lisp_Object args[2];
2355 args[0] = QCtest;
2356 args[1] = Qeq;
2357 Vcharset_hash_table = Fmake_hash_table (2, args);
2360 charset_table_size = 128;
2361 charset_table = ((struct charset *)
2362 xmalloc (sizeof (struct charset) * charset_table_size));
2363 charset_table_used = 0;
2365 defsubr (&Scharsetp);
2366 defsubr (&Smap_charset_chars);
2367 defsubr (&Sdefine_charset_internal);
2368 defsubr (&Sdefine_charset_alias);
2369 defsubr (&Scharset_plist);
2370 defsubr (&Sset_charset_plist);
2371 defsubr (&Sunify_charset);
2372 defsubr (&Sget_unused_iso_final_char);
2373 defsubr (&Sdeclare_equiv_charset);
2374 defsubr (&Sfind_charset_region);
2375 defsubr (&Sfind_charset_string);
2376 defsubr (&Sdecode_char);
2377 defsubr (&Sencode_char);
2378 defsubr (&Ssplit_char);
2379 defsubr (&Smake_char);
2380 defsubr (&Schar_charset);
2381 defsubr (&Scharset_after);
2382 defsubr (&Siso_charset);
2383 defsubr (&Sclear_charset_maps);
2384 defsubr (&Scharset_priority_list);
2385 defsubr (&Sset_charset_priority);
2386 defsubr (&Scharset_id_internal);
2388 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2389 doc: /* *List of directories to search for charset map files. */);
2390 Vcharset_map_path = Qnil;
2392 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2393 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2394 inhibit_load_charset_map = 0;
2396 DEFVAR_LISP ("charset-list", &Vcharset_list,
2397 doc: /* List of all charsets ever defined. */);
2398 Vcharset_list = Qnil;
2400 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2401 doc: /* ISO639 language mnemonic symbol for the current language environment.
2402 If the current language environment is for multiple languages (e.g. "Latin-1"),
2403 the value may be a list of mnemonics. */);
2404 Vcurrent_iso639_language = Qnil;
2406 charset_ascii
2407 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2408 0, 127, 'B', -1, 0, 1, 0, 0);
2409 charset_iso_8859_1
2410 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2411 0, 255, -1, -1, -1, 1, 0, 0);
2412 charset_unicode
2413 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2414 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2415 charset_emacs
2416 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2417 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2418 charset_eight_bit
2419 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2420 128, 255, -1, 0, -1, 0, 1,
2421 MAX_5_BYTE_CHAR + 1);
2424 #endif /* emacs */
2426 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2427 (do not change this comment) */