* display.texi (Line Height): Emphasize that line-spacing only takes
[emacs.git] / src / charset.c
blobbfe3468662937f39b560dd69672063924587848b
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
28 #include <config.h>
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include "lisp.h"
35 #include "character.h"
36 #include "charset.h"
37 #include "coding.h"
38 #include "disptab.h"
39 #include "buffer.h"
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
56 /* List of all charsets. This variable is used only from Emacs
57 Lisp. */
58 Lisp_Object Vcharset_list;
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table;
64 /* Table of struct charset. */
65 struct charset *charset_table;
67 static int charset_table_size;
68 static int charset_table_used;
70 Lisp_Object Qcharsetp;
72 /* Special charset symbols. */
73 Lisp_Object Qascii;
74 Lisp_Object Qeight_bit;
75 Lisp_Object Qiso_8859_1;
76 Lisp_Object Qunicode;
77 Lisp_Object Qemacs;
79 /* The corresponding charsets. */
80 int charset_ascii;
81 int charset_eight_bit;
82 int charset_iso_8859_1;
83 int charset_unicode;
84 int charset_emacs;
86 /* The other special charsets. */
87 int charset_jisx0201_roman;
88 int charset_jisx0208_1978;
89 int charset_jisx0208;
91 /* Value of charset attribute `charset-iso-plane'. */
92 Lisp_Object Qgl, Qgr;
94 /* Charset of unibyte characters. */
95 int charset_unibyte;
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list;
100 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
101 charsets. */
102 Lisp_Object Vcharset_non_preferred_head;
104 /* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
106 -1. */
107 unsigned short charset_ordered_list_tick;
109 /* List of iso-2022 charsets. */
110 Lisp_Object Viso_2022_charset_list;
112 /* List of emacs-mule charsets. */
113 Lisp_Object Vemacs_mule_charset_list;
115 struct charset *emacs_mule_charset[256];
117 /* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
121 Lisp_Object Vcharset_map_path;
123 /* If nonzero, don't load charset maps. */
124 int inhibit_load_charset_map;
126 Lisp_Object Vcurrent_iso639_language;
128 /* Defined in chartab.c */
129 extern void
130 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
131 Lisp_Object function, Lisp_Object table,
132 Lisp_Object arg, struct charset *charset,
133 unsigned from, unsigned to));
135 #define CODE_POINT_TO_INDEX(charset, code) \
136 ((charset)->code_linear_p \
137 ? (code) - (charset)->min_code \
138 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
139 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
140 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
141 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
142 ? (((((code) >> 24) - (charset)->code_space[12]) \
143 * (charset)->code_space[11]) \
144 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
145 * (charset)->code_space[7]) \
146 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
147 * (charset)->code_space[3]) \
148 + (((code) & 0xFF) - (charset)->code_space[0]) \
149 - ((charset)->char_index_offset)) \
150 : -1)
153 /* Convert the character index IDX to code-point CODE for CHARSET.
154 It is assumed that IDX is in a valid range. */
156 #define INDEX_TO_CODE_POINT(charset, idx) \
157 ((charset)->code_linear_p \
158 ? (idx) + (charset)->min_code \
159 : (idx += (charset)->char_index_offset, \
160 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
161 | (((charset)->code_space[4] \
162 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
163 << 8) \
164 | (((charset)->code_space[8] \
165 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
166 << 16) \
167 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
168 << 24))))
170 /* Structure to hold mapping tables for a charset. Used by temacs
171 invoked for dumping. */
173 static struct
175 /* The current charset for which the following tables are setup. */
176 struct charset *current;
178 /* 1 iff the following table is used for encoder. */
179 short for_encoder;
181 /* When the following table is used for encoding, mininum and
182 maxinum character of the current charset. */
183 int min_char, max_char;
185 /* A Unicode character correspoinding to the code indice 0 (i.e. the
186 minimum code-point) of the current charset, or -1 if the code
187 indice 0 is not a Unicode character. This is checked when
188 table.encoder[CHAR] is zero. */
189 int zero_index_char;
191 union {
192 /* Table mapping code-indices (not code-points) of the current
193 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
194 doesn't belong to the current charset. */
195 int decoder[0x10000];
196 /* Table mapping Unicode characters to code-indices of the current
197 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
198 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
199 (0x20000..0x2FFFF). Note that there is no charset map that
200 uses both SMP and SIP. */
201 unsigned short encoder[0x20000];
202 } table;
203 } *temp_charset_work;
205 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
206 do { \
207 if ((CODE) == 0) \
208 temp_charset_work->zero_index_char = (C); \
209 else if ((C) < 0x20000) \
210 temp_charset_work->table.encoder[(C)] = (CODE); \
211 else \
212 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
213 } while (0)
215 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
216 ((C) == temp_charset_work->zero_index_char ? 0 \
217 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
218 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
219 : temp_charset_work->table.encoder[(C) - 0x10000] \
220 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
222 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
223 (temp_charset_work->table.decoder[(CODE)] = (C))
225 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
226 (temp_charset_work->table.decoder[(CODE)])
229 /* Set to 1 to warn that a charset map is loaded and thus a buffer
230 text and a string data may be relocated. */
231 int charset_map_loaded;
233 struct charset_map_entries
235 struct {
236 unsigned from, to;
237 int c;
238 } entry[0x10000];
239 struct charset_map_entries *next;
242 /* Load the mapping information of CHARSET from ENTRIES for
243 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
244 encoding (CONTROL_FLAG == 2).
246 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
247 and CHARSET->fast_map.
249 If CONTROL_FLAG is 1, setup the following tables according to
250 CHARSET->method and inhibit_load_charset_map.
252 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
253 ----------------------+--------------------+---------------------------
254 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
258 If CONTROL_FLAG is 2, setup the following tables.
260 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
261 ----------------------+--------------------+---------------------------
262 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
263 ----------------------+--------------------+--------------------------
264 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
267 static void
268 load_charset_map (charset, entries, n_entries, control_flag)
269 struct charset *charset;
270 struct charset_map_entries *entries;
271 int n_entries;
272 int control_flag;
274 Lisp_Object vec, table;
275 unsigned max_code = CHARSET_MAX_CODE (charset);
276 int ascii_compatible_p = charset->ascii_compatible_p;
277 int min_char, max_char, nonascii_min_char;
278 int i;
279 unsigned char *fast_map = charset->fast_map;
281 if (n_entries <= 0)
282 return;
284 if (control_flag)
286 if (! inhibit_load_charset_map)
288 if (control_flag == 1)
290 if (charset->method == CHARSET_METHOD_MAP)
292 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
294 vec = CHARSET_DECODER (charset)
295 = Fmake_vector (make_number (n), make_number (-1));
297 else
299 char_table_set_range (Vchar_unify_table,
300 charset->min_char, charset->max_char,
301 Qnil);
304 else
306 table = Fmake_char_table (Qnil, Qnil);
307 if (charset->method == CHARSET_METHOD_MAP)
308 CHARSET_ENCODER (charset) = table;
309 else
310 CHARSET_DEUNIFIER (charset) = table;
313 else
315 if (! temp_charset_work)
316 temp_charset_work = malloc (sizeof (*temp_charset_work));
317 if (control_flag == 1)
319 memset (temp_charset_work->table.decoder, -1,
320 sizeof (int) * 0x10000);
321 temp_charset_work->for_encoder = 0;
323 else
325 memset (temp_charset_work->table.encoder, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work->zero_index_char = -1;
329 temp_charset_work->current = charset;
330 temp_charset_work->for_encoder = (control_flag == 2);
331 control_flag += 2;
333 charset_map_loaded = 1;
336 min_char = max_char = entries->entry[0].c;
337 nonascii_min_char = MAX_CHAR;
338 for (i = 0; i < n_entries; i++)
340 unsigned from, to;
341 int from_index, to_index;
342 int from_c, to_c;
343 int idx = i % 0x10000;
345 if (i > 0 && idx == 0)
346 entries = entries->next;
347 from = entries->entry[idx].from;
348 to = entries->entry[idx].to;
349 from_c = entries->entry[idx].c;
350 from_index = CODE_POINT_TO_INDEX (charset, from);
351 if (from == to)
353 to_index = from_index;
354 to_c = from_c;
356 else
358 to_index = CODE_POINT_TO_INDEX (charset, to);
359 to_c = from_c + (to_index - from_index);
361 if (from_index < 0 || to_index < 0)
362 continue;
364 if (to_c > max_char)
365 max_char = to_c;
366 else if (from_c < min_char)
367 min_char = from_c;
369 if (control_flag == 1)
371 if (charset->method == CHARSET_METHOD_MAP)
372 for (; from_index <= to_index; from_index++, from_c++)
373 ASET (vec, from_index, make_number (from_c));
374 else
375 for (; from_index <= to_index; from_index++, from_c++)
376 CHAR_TABLE_SET (Vchar_unify_table,
377 CHARSET_CODE_OFFSET (charset) + from_index,
378 make_number (from_c));
380 else if (control_flag == 2)
382 if (charset->method == CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset))
384 for (; from_index <= to_index; from_index++, from_c++)
386 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
388 if (NILP (CHAR_TABLE_REF (table, from_c)))
389 CHAR_TABLE_SET (table, from_c, make_number (code));
391 else
392 for (; from_index <= to_index; from_index++, from_c++)
394 if (NILP (CHAR_TABLE_REF (table, from_c)))
395 CHAR_TABLE_SET (table, from_c, make_number (from_index));
398 else if (control_flag == 3)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
401 else if (control_flag == 4)
402 for (; from_index <= to_index; from_index++, from_c++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
404 else /* control_flag == 0 */
406 if (ascii_compatible_p)
408 if (! ASCII_BYTE_P (from_c))
410 if (from_c < nonascii_min_char)
411 nonascii_min_char = from_c;
413 else if (! ASCII_BYTE_P (to_c))
415 nonascii_min_char = 0x80;
419 for (; from_c <= to_c; from_c++)
420 CHARSET_FAST_MAP_SET (from_c, fast_map);
424 if (control_flag == 0)
426 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
427 ? nonascii_min_char : min_char);
428 CHARSET_MAX_CHAR (charset) = max_char;
430 else if (control_flag == 4)
432 temp_charset_work->min_char = min_char;
433 temp_charset_work->max_char = max_char;
438 /* Read a hexadecimal number (preceded by "0x") from the file FP while
439 paying attention to comment charcter '#'. */
441 static INLINE unsigned
442 read_hex (fp, eof)
443 FILE *fp;
444 int *eof;
446 int c;
447 unsigned n;
449 while ((c = getc (fp)) != EOF)
451 if (c == '#')
453 while ((c = getc (fp)) != EOF && c != '\n');
455 else if (c == '0')
457 if ((c = getc (fp)) == EOF || c == 'x')
458 break;
461 if (c == EOF)
463 *eof = 1;
464 return 0;
466 *eof = 0;
467 n = 0;
468 if (c == 'x')
469 while ((c = getc (fp)) != EOF && isxdigit (c))
470 n = ((n << 4)
471 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
472 else
473 while ((c = getc (fp)) != EOF && isdigit (c))
474 n = (n * 10) + c - '0';
475 if (c != EOF)
476 ungetc (c, fp);
477 return n;
480 extern Lisp_Object Qfile_name_handler_alist;
482 /* Return a mapping vector for CHARSET loaded from MAPFILE.
483 Each line of MAPFILE has this form
484 0xAAAA 0xCCCC
485 where 0xAAAA is a code-point and 0xCCCC is the corresponding
486 character code, or this form
487 0xAAAA-0xBBBB 0xCCCC
488 where 0xAAAA and 0xBBBB are code-points specifying a range, and
489 0xCCCC is the first character code of the range.
491 The returned vector has this form:
492 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
493 where CODE1 is a code-point or a cons of code-points specifying a
494 range.
496 Note that this function uses `openp' to open MAPFILE but ignores
497 `file-name-handler-alist' to avoid running any Lisp code. */
499 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
501 static void
502 load_charset_map_from_file (charset, mapfile, control_flag)
503 struct charset *charset;
504 Lisp_Object mapfile;
505 int control_flag;
507 unsigned min_code = CHARSET_MIN_CODE (charset);
508 unsigned max_code = CHARSET_MAX_CODE (charset);
509 int fd;
510 FILE *fp;
511 int eof;
512 Lisp_Object suffixes;
513 struct charset_map_entries *head, *entries;
514 int n_entries;
515 int count = SPECPDL_INDEX ();
517 suffixes = Fcons (build_string (".map"),
518 Fcons (build_string (".TXT"), Qnil));
520 specbind (Qfile_name_handler_alist, Qnil);
521 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
522 unbind_to (count, Qnil);
523 if (fd < 0
524 || ! (fp = fdopen (fd, "r")))
525 error ("Failure in loading charset map: %S", SDATA (mapfile));
527 head = entries = ((struct charset_map_entries *)
528 alloca (sizeof (struct charset_map_entries)));
529 n_entries = 0;
530 eof = 0;
531 while (1)
533 unsigned from, to;
534 int c;
535 int idx;
537 from = read_hex (fp, &eof);
538 if (eof)
539 break;
540 if (getc (fp) == '-')
541 to = read_hex (fp, &eof);
542 else
543 to = from;
544 c = (int) read_hex (fp, &eof);
546 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
547 continue;
549 if (n_entries > 0 && (n_entries % 0x10000) == 0)
551 entries->next = ((struct charset_map_entries *)
552 alloca (sizeof (struct charset_map_entries)));
553 entries = entries->next;
555 idx = n_entries % 0x10000;
556 entries->entry[idx].from = from;
557 entries->entry[idx].to = to;
558 entries->entry[idx].c = c;
559 n_entries++;
561 fclose (fp);
562 close (fd);
564 load_charset_map (charset, head, n_entries, control_flag);
567 static void
568 load_charset_map_from_vector (charset, vec, control_flag)
569 struct charset *charset;
570 Lisp_Object vec;
571 int control_flag;
573 unsigned min_code = CHARSET_MIN_CODE (charset);
574 unsigned max_code = CHARSET_MAX_CODE (charset);
575 struct charset_map_entries *head, *entries;
576 int n_entries;
577 int len = ASIZE (vec);
578 int i;
580 if (len % 2 == 1)
582 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
583 return;
586 head = entries = ((struct charset_map_entries *)
587 alloca (sizeof (struct charset_map_entries)));
588 n_entries = 0;
589 for (i = 0; i < len; i += 2)
591 Lisp_Object val, val2;
592 unsigned from, to;
593 int c;
594 int idx;
596 val = AREF (vec, i);
597 if (CONSP (val))
599 val2 = XCDR (val);
600 val = XCAR (val);
601 CHECK_NATNUM (val);
602 CHECK_NATNUM (val2);
603 from = XFASTINT (val);
604 to = XFASTINT (val2);
606 else
608 CHECK_NATNUM (val);
609 from = to = XFASTINT (val);
611 val = AREF (vec, i + 1);
612 CHECK_NATNUM (val);
613 c = XFASTINT (val);
615 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
616 continue;
618 if (n_entries > 0 && (n_entries % 0x10000) == 0)
620 entries->next = ((struct charset_map_entries *)
621 alloca (sizeof (struct charset_map_entries)));
622 entries = entries->next;
624 idx = n_entries % 0x10000;
625 entries->entry[idx].from = from;
626 entries->entry[idx].to = to;
627 entries->entry[idx].c = c;
628 n_entries++;
631 load_charset_map (charset, head, n_entries, control_flag);
635 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
636 map it is (see the comment of load_charset_map for the detail). */
638 static void
639 load_charset (charset, control_flag)
640 struct charset *charset;
641 int control_flag;
643 Lisp_Object map;
645 if (inhibit_load_charset_map
646 && temp_charset_work
647 && charset == temp_charset_work->current
648 && (control_flag == 2 == temp_charset_work->for_encoder))
649 return;
651 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
652 map = CHARSET_MAP (charset);
653 else if (CHARSET_UNIFIED_P (charset))
654 map = CHARSET_UNIFY_MAP (charset);
655 if (STRINGP (map))
656 load_charset_map_from_file (charset, map, control_flag);
657 else
658 load_charset_map_from_vector (charset, map, control_flag);
662 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
663 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
664 (object)
665 Lisp_Object object;
667 return (CHARSETP (object) ? Qt : Qnil);
671 void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
672 Lisp_Object function, Lisp_Object arg,
673 unsigned from, unsigned to));
675 void
676 map_charset_for_dump (c_function, function, arg, from, to)
677 void (*c_function) (Lisp_Object, Lisp_Object);
678 Lisp_Object function, arg;
679 unsigned from, to;
681 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
682 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
683 Lisp_Object range;
684 int c, stop;
685 struct gcpro gcpro1;
687 range = Fcons (Qnil, Qnil);
688 GCPRO1 (range);
690 c = temp_charset_work->min_char;
691 stop = (temp_charset_work->max_char < 0x20000
692 ? temp_charset_work->max_char : 0xFFFF);
694 while (1)
696 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
698 if (index >= from_idx && index <= to_idx)
700 if (NILP (XCAR (range)))
701 XSETCAR (range, make_number (c));
703 else if (! NILP (XCAR (range)))
705 XSETCDR (range, make_number (c - 1));
706 if (c_function)
707 (*c_function) (arg, range);
708 else
709 call2 (function, range, arg);
710 XSETCAR (range, Qnil);
712 if (c == stop)
714 if (c == temp_charset_work->max_char)
716 if (! NILP (XCAR (range)))
718 XSETCDR (range, make_number (c));
719 if (c_function)
720 (*c_function) (arg, range);
721 else
722 call2 (function, range, arg);
724 break;
726 c = 0x1FFFF;
727 stop = temp_charset_work->max_char;
729 c++;
731 UNGCPRO;
734 void
735 map_charset_chars (c_function, function, arg,
736 charset, from, to)
737 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
738 Lisp_Object function, arg;
739 struct charset *charset;
740 unsigned from, to;
742 Lisp_Object range;
743 int partial;
745 partial = (from > CHARSET_MIN_CODE (charset)
746 || to < CHARSET_MAX_CODE (charset));
748 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
750 int from_idx = CODE_POINT_TO_INDEX (charset, from);
751 int to_idx = CODE_POINT_TO_INDEX (charset, to);
752 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
753 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
755 if (CHARSET_UNIFIED_P (charset))
757 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
758 load_charset (charset, 2);
759 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
760 map_char_table_for_charset (c_function, function,
761 CHARSET_DEUNIFIER (charset), arg,
762 partial ? charset : NULL, from, to);
763 else
764 map_charset_for_dump (c_function, function, arg, from, to);
767 range = Fcons (make_number (from_c), make_number (to_c));
768 if (NILP (function))
769 (*c_function) (arg, range);
770 else
771 call2 (function, range, arg);
773 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
775 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
776 load_charset (charset, 2);
777 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
778 map_char_table_for_charset (c_function, function,
779 CHARSET_ENCODER (charset), arg,
780 partial ? charset : NULL, from, to);
781 else
782 map_charset_for_dump (c_function, function, arg, from, to);
784 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
786 Lisp_Object subset_info;
787 int offset;
789 subset_info = CHARSET_SUBSET (charset);
790 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
791 offset = XINT (AREF (subset_info, 3));
792 from -= offset;
793 if (from < XFASTINT (AREF (subset_info, 1)))
794 from = XFASTINT (AREF (subset_info, 1));
795 to -= offset;
796 if (to > XFASTINT (AREF (subset_info, 2)))
797 to = XFASTINT (AREF (subset_info, 2));
798 map_charset_chars (c_function, function, arg, charset, from, to);
800 else /* i.e. CHARSET_METHOD_SUPERSET */
802 Lisp_Object parents;
804 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
805 parents = XCDR (parents))
807 int offset;
808 unsigned this_from, this_to;
810 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
811 offset = XINT (XCDR (XCAR (parents)));
812 this_from = from > offset ? from - offset : 0;
813 this_to = to > offset ? to - offset : 0;
814 if (this_from < CHARSET_MIN_CODE (charset))
815 this_from = CHARSET_MIN_CODE (charset);
816 if (this_to > CHARSET_MAX_CODE (charset))
817 this_to = CHARSET_MAX_CODE (charset);
818 map_charset_chars (c_function, function, arg, charset,
819 this_from, this_to);
824 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
825 doc: /* Call FUNCTION for all characters in CHARSET.
826 FUNCTION is called with an argument RANGE and the optional 3rd
827 argument ARG.
829 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
830 characters contained in CHARSET.
832 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
833 range of code points of target characters. */)
834 (function, charset, arg, from_code, to_code)
835 Lisp_Object function, charset, arg, from_code, to_code;
837 struct charset *cs;
838 unsigned from, to;
840 CHECK_CHARSET_GET_CHARSET (charset, cs);
841 if (NILP (from_code))
842 from = CHARSET_MIN_CODE (cs);
843 else
845 CHECK_NATNUM (from_code);
846 from = XINT (from_code);
847 if (from < CHARSET_MIN_CODE (cs))
848 from = CHARSET_MIN_CODE (cs);
850 if (NILP (to_code))
851 to = CHARSET_MAX_CODE (cs);
852 else
854 CHECK_NATNUM (to_code);
855 to = XINT (to_code);
856 if (to > CHARSET_MAX_CODE (cs))
857 to = CHARSET_MAX_CODE (cs);
859 map_charset_chars (NULL, function, arg, cs, from, to);
860 return Qnil;
864 /* Define a charset according to the arguments. The Nth argument is
865 the Nth attribute of the charset (the last attribute `charset-id'
866 is not included). See the docstring of `define-charset' for the
867 detail. */
869 DEFUN ("define-charset-internal", Fdefine_charset_internal,
870 Sdefine_charset_internal, charset_arg_max, MANY, 0,
871 doc: /* For internal use only.
872 usage: (define-charset-internal ...) */)
873 (nargs, args)
874 int nargs;
875 Lisp_Object *args;
877 /* Charset attr vector. */
878 Lisp_Object attrs;
879 Lisp_Object val;
880 unsigned hash_code;
881 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
882 int i, j;
883 struct charset charset;
884 int id;
885 int dimension;
886 int new_definition_p;
887 int nchars;
889 if (nargs != charset_arg_max)
890 return Fsignal (Qwrong_number_of_arguments,
891 Fcons (intern ("define-charset-internal"),
892 make_number (nargs)));
894 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
896 CHECK_SYMBOL (args[charset_arg_name]);
897 ASET (attrs, charset_name, args[charset_arg_name]);
899 val = args[charset_arg_code_space];
900 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
902 int min_byte, max_byte;
904 min_byte = XINT (Faref (val, make_number (i * 2)));
905 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
906 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
907 error ("Invalid :code-space value");
908 charset.code_space[i * 4] = min_byte;
909 charset.code_space[i * 4 + 1] = max_byte;
910 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
911 nchars *= charset.code_space[i * 4 + 2];
912 charset.code_space[i * 4 + 3] = nchars;
913 if (max_byte > 0)
914 dimension = i + 1;
917 val = args[charset_arg_dimension];
918 if (NILP (val))
919 charset.dimension = dimension;
920 else
922 CHECK_NATNUM (val);
923 charset.dimension = XINT (val);
924 if (charset.dimension < 1 || charset.dimension > 4)
925 args_out_of_range_3 (val, make_number (1), make_number (4));
928 charset.code_linear_p
929 = (charset.dimension == 1
930 || (charset.code_space[2] == 256
931 && (charset.dimension == 2
932 || (charset.code_space[6] == 256
933 && (charset.dimension == 3
934 || charset.code_space[10] == 256)))));
936 if (! charset.code_linear_p)
938 charset.code_space_mask = (unsigned char *) xmalloc (256);
939 bzero (charset.code_space_mask, 256);
940 for (i = 0; i < 4; i++)
941 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
942 j++)
943 charset.code_space_mask[j] |= (1 << i);
946 charset.iso_chars_96 = charset.code_space[2] == 96;
948 charset.min_code = (charset.code_space[0]
949 | (charset.code_space[4] << 8)
950 | (charset.code_space[8] << 16)
951 | (charset.code_space[12] << 24));
952 charset.max_code = (charset.code_space[1]
953 | (charset.code_space[5] << 8)
954 | (charset.code_space[9] << 16)
955 | (charset.code_space[13] << 24));
956 charset.char_index_offset = 0;
958 val = args[charset_arg_min_code];
959 if (! NILP (val))
961 unsigned code;
963 if (INTEGERP (val))
964 code = XINT (val);
965 else
967 CHECK_CONS (val);
968 CHECK_NUMBER_CAR (val);
969 CHECK_NUMBER_CDR (val);
970 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
972 if (code < charset.min_code
973 || code > charset.max_code)
974 args_out_of_range_3 (make_number (charset.min_code),
975 make_number (charset.max_code), val);
976 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
977 charset.min_code = code;
980 val = args[charset_arg_max_code];
981 if (! NILP (val))
983 unsigned code;
985 if (INTEGERP (val))
986 code = XINT (val);
987 else
989 CHECK_CONS (val);
990 CHECK_NUMBER_CAR (val);
991 CHECK_NUMBER_CDR (val);
992 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
994 if (code < charset.min_code
995 || code > charset.max_code)
996 args_out_of_range_3 (make_number (charset.min_code),
997 make_number (charset.max_code), val);
998 charset.max_code = code;
1001 charset.compact_codes_p = charset.max_code < 0x10000;
1003 val = args[charset_arg_invalid_code];
1004 if (NILP (val))
1006 if (charset.min_code > 0)
1007 charset.invalid_code = 0;
1008 else
1010 XSETINT (val, charset.max_code + 1);
1011 if (XINT (val) == charset.max_code + 1)
1012 charset.invalid_code = charset.max_code + 1;
1013 else
1014 error ("Attribute :invalid-code must be specified");
1017 else
1019 CHECK_NATNUM (val);
1020 charset.invalid_code = XFASTINT (val);
1023 val = args[charset_arg_iso_final];
1024 if (NILP (val))
1025 charset.iso_final = -1;
1026 else
1028 CHECK_NUMBER (val);
1029 if (XINT (val) < '0' || XINT (val) > 127)
1030 error ("Invalid iso-final-char: %d", XINT (val));
1031 charset.iso_final = XINT (val);
1034 val = args[charset_arg_iso_revision];
1035 if (NILP (val))
1036 charset.iso_revision = -1;
1037 else
1039 CHECK_NUMBER (val);
1040 if (XINT (val) > 63)
1041 args_out_of_range (make_number (63), val);
1042 charset.iso_revision = XINT (val);
1045 val = args[charset_arg_emacs_mule_id];
1046 if (NILP (val))
1047 charset.emacs_mule_id = -1;
1048 else
1050 CHECK_NATNUM (val);
1051 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1052 error ("Invalid emacs-mule-id: %d", XINT (val));
1053 charset.emacs_mule_id = XINT (val);
1056 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1058 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1060 charset.unified_p = 0;
1062 bzero (charset.fast_map, sizeof (charset.fast_map));
1064 if (! NILP (args[charset_arg_code_offset]))
1066 val = args[charset_arg_code_offset];
1067 CHECK_NUMBER (val);
1069 charset.method = CHARSET_METHOD_OFFSET;
1070 charset.code_offset = XINT (val);
1072 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1073 charset.min_char = i + charset.code_offset;
1074 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1075 charset.max_char = i + charset.code_offset;
1076 if (charset.max_char > MAX_CHAR)
1077 error ("Unsupported max char: %d", charset.max_char);
1079 i = (charset.min_char >> 7) << 7;
1080 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1081 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1082 i = (i >> 12) << 12;
1083 for (; i <= charset.max_char; i += 0x1000)
1084 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1085 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1086 charset.ascii_compatible_p = 1;
1088 else if (! NILP (args[charset_arg_map]))
1090 val = args[charset_arg_map];
1091 ASET (attrs, charset_map, val);
1092 charset.method = CHARSET_METHOD_MAP;
1094 else if (! NILP (args[charset_arg_subset]))
1096 Lisp_Object parent;
1097 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1098 struct charset *parent_charset;
1100 val = args[charset_arg_subset];
1101 parent = Fcar (val);
1102 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1103 parent_min_code = Fnth (make_number (1), val);
1104 CHECK_NATNUM (parent_min_code);
1105 parent_max_code = Fnth (make_number (2), val);
1106 CHECK_NATNUM (parent_max_code);
1107 parent_code_offset = Fnth (make_number (3), val);
1108 CHECK_NUMBER (parent_code_offset);
1109 val = Fmake_vector (make_number (4), Qnil);
1110 ASET (val, 0, make_number (parent_charset->id));
1111 ASET (val, 1, parent_min_code);
1112 ASET (val, 2, parent_max_code);
1113 ASET (val, 3, parent_code_offset);
1114 ASET (attrs, charset_subset, val);
1116 charset.method = CHARSET_METHOD_SUBSET;
1117 /* Here, we just copy the parent's fast_map. It's not accurate,
1118 but at least it works for quickly detecting which character
1119 DOESN'T belong to this charset. */
1120 for (i = 0; i < 190; i++)
1121 charset.fast_map[i] = parent_charset->fast_map[i];
1123 /* We also copy these for parents. */
1124 charset.min_char = parent_charset->min_char;
1125 charset.max_char = parent_charset->max_char;
1127 else if (! NILP (args[charset_arg_superset]))
1129 val = args[charset_arg_superset];
1130 charset.method = CHARSET_METHOD_SUPERSET;
1131 val = Fcopy_sequence (val);
1132 ASET (attrs, charset_superset, val);
1134 charset.min_char = MAX_CHAR;
1135 charset.max_char = 0;
1136 for (; ! NILP (val); val = Fcdr (val))
1138 Lisp_Object elt, car_part, cdr_part;
1139 int this_id, offset;
1140 struct charset *this_charset;
1142 elt = Fcar (val);
1143 if (CONSP (elt))
1145 car_part = XCAR (elt);
1146 cdr_part = XCDR (elt);
1147 CHECK_CHARSET_GET_ID (car_part, this_id);
1148 CHECK_NUMBER (cdr_part);
1149 offset = XINT (cdr_part);
1151 else
1153 CHECK_CHARSET_GET_ID (elt, this_id);
1154 offset = 0;
1156 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1158 this_charset = CHARSET_FROM_ID (this_id);
1159 if (charset.min_char > this_charset->min_char)
1160 charset.min_char = this_charset->min_char;
1161 if (charset.max_char < this_charset->max_char)
1162 charset.max_char = this_charset->max_char;
1163 for (i = 0; i < 190; i++)
1164 charset.fast_map[i] |= this_charset->fast_map[i];
1167 else
1168 error ("None of :code-offset, :map, :parents are specified");
1170 val = args[charset_arg_unify_map];
1171 if (! NILP (val) && !STRINGP (val))
1172 CHECK_VECTOR (val);
1173 ASET (attrs, charset_unify_map, val);
1175 CHECK_LIST (args[charset_arg_plist]);
1176 ASET (attrs, charset_plist, args[charset_arg_plist]);
1178 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1179 &hash_code);
1180 if (charset.hash_index >= 0)
1182 new_definition_p = 0;
1183 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1184 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1186 else
1188 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1189 hash_code);
1190 if (charset_table_used == charset_table_size)
1192 struct charset *new_table
1193 = (struct charset *) xmalloc (sizeof (struct charset)
1194 * (charset_table_size + 16));
1195 bcopy (charset_table, new_table,
1196 sizeof (struct charset) * charset_table_size);
1197 charset_table_size += 16;
1198 charset_table = new_table;
1200 id = charset_table_used++;
1201 new_definition_p = 1;
1204 ASET (attrs, charset_id, make_number (id));
1205 charset.id = id;
1206 charset_table[id] = charset;
1208 if (charset.method == CHARSET_METHOD_MAP)
1210 load_charset (&charset, 0);
1211 charset_table[id] = charset;
1214 if (charset.iso_final >= 0)
1216 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1217 charset.iso_final) = id;
1218 if (new_definition_p)
1219 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1220 Fcons (make_number (id), Qnil));
1221 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1222 charset_jisx0201_roman = id;
1223 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1224 charset_jisx0208_1978 = id;
1225 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1226 charset_jisx0208 = id;
1229 if (charset.emacs_mule_id >= 0)
1231 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1232 if (charset.emacs_mule_id < 0xA0)
1233 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1234 else
1235 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1236 if (new_definition_p)
1237 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1238 Fcons (make_number (id), Qnil));
1241 if (new_definition_p)
1243 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1244 if (charset.supplementary_p)
1245 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1246 Fcons (make_number (id), Qnil));
1247 else
1249 Lisp_Object tail;
1251 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1253 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1255 if (cs->supplementary_p)
1256 break;
1258 if (EQ (tail, Vcharset_ordered_list))
1259 Vcharset_ordered_list = Fcons (make_number (id),
1260 Vcharset_ordered_list);
1261 else if (NILP (tail))
1262 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1263 Fcons (make_number (id), Qnil));
1264 else
1266 val = Fcons (XCAR (tail), XCDR (tail));
1267 XSETCDR (tail, val);
1268 XSETCAR (tail, make_number (id));
1271 charset_ordered_list_tick++;
1274 return Qnil;
1278 /* Same as Fdefine_charset_internal but arguments are more convenient
1279 to call from C (typically in syms_of_charset). This can define a
1280 charset of `offset' method only. Return the ID of the new
1281 charset. */
1283 static int
1284 define_charset_internal (name, dimension, code_space, min_code, max_code,
1285 iso_final, iso_revision, emacs_mule_id,
1286 ascii_compatible, supplementary,
1287 code_offset)
1288 Lisp_Object name;
1289 int dimension;
1290 unsigned char *code_space;
1291 unsigned min_code, max_code;
1292 int iso_final, iso_revision, emacs_mule_id;
1293 int ascii_compatible, supplementary;
1294 int code_offset;
1296 Lisp_Object args[charset_arg_max];
1297 Lisp_Object plist[14];
1298 Lisp_Object val;
1299 int i;
1301 args[charset_arg_name] = name;
1302 args[charset_arg_dimension] = make_number (dimension);
1303 val = Fmake_vector (make_number (8), make_number (0));
1304 for (i = 0; i < 8; i++)
1305 ASET (val, i, make_number (code_space[i]));
1306 args[charset_arg_code_space] = val;
1307 args[charset_arg_min_code] = make_number (min_code);
1308 args[charset_arg_max_code] = make_number (max_code);
1309 args[charset_arg_iso_final]
1310 = (iso_final < 0 ? Qnil : make_number (iso_final));
1311 args[charset_arg_iso_revision] = make_number (iso_revision);
1312 args[charset_arg_emacs_mule_id]
1313 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1314 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1315 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1316 args[charset_arg_invalid_code] = Qnil;
1317 args[charset_arg_code_offset] = make_number (code_offset);
1318 args[charset_arg_map] = Qnil;
1319 args[charset_arg_subset] = Qnil;
1320 args[charset_arg_superset] = Qnil;
1321 args[charset_arg_unify_map] = Qnil;
1323 plist[0] = intern (":name");
1324 plist[1] = args[charset_arg_name];
1325 plist[2] = intern (":dimension");
1326 plist[3] = args[charset_arg_dimension];
1327 plist[4] = intern (":code-space");
1328 plist[5] = args[charset_arg_code_space];
1329 plist[6] = intern (":iso-final-char");
1330 plist[7] = args[charset_arg_iso_final];
1331 plist[8] = intern (":emacs-mule-id");
1332 plist[9] = args[charset_arg_emacs_mule_id];
1333 plist[10] = intern (":ascii-compatible-p");
1334 plist[11] = args[charset_arg_ascii_compatible_p];
1335 plist[12] = intern (":code-offset");
1336 plist[13] = args[charset_arg_code_offset];
1338 args[charset_arg_plist] = Flist (14, plist);
1339 Fdefine_charset_internal (charset_arg_max, args);
1341 return XINT (CHARSET_SYMBOL_ID (name));
1345 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1346 Sdefine_charset_alias, 2, 2, 0,
1347 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1348 (alias, charset)
1349 Lisp_Object alias, charset;
1351 Lisp_Object attr;
1353 CHECK_CHARSET_GET_ATTR (charset, attr);
1354 Fputhash (alias, attr, Vcharset_hash_table);
1355 Vcharset_list = Fcons (alias, Vcharset_list);
1356 return Qnil;
1360 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1361 doc: /* Return the property list of CHARSET. */)
1362 (charset)
1363 Lisp_Object charset;
1365 Lisp_Object attrs;
1367 CHECK_CHARSET_GET_ATTR (charset, attrs);
1368 return CHARSET_ATTR_PLIST (attrs);
1372 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1373 doc: /* Set CHARSET's property list to PLIST. */)
1374 (charset, plist)
1375 Lisp_Object charset, plist;
1377 Lisp_Object attrs;
1379 CHECK_CHARSET_GET_ATTR (charset, attrs);
1380 CHARSET_ATTR_PLIST (attrs) = plist;
1381 return plist;
1385 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1386 doc: /* Unify characters of CHARSET with Unicode.
1387 This means reading the relevant file and installing the table defined
1388 by CHARSET's `:unify-map' property.
1390 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1391 the same meaning as the `:unify-map' attribute in the function
1392 `define-charset' (which see).
1394 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1395 (charset, unify_map, deunify)
1396 Lisp_Object charset, unify_map, deunify;
1398 int id;
1399 struct charset *cs;
1401 CHECK_CHARSET_GET_ID (charset, id);
1402 cs = CHARSET_FROM_ID (id);
1403 if (NILP (deunify)
1404 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1405 : ! CHARSET_UNIFIED_P (cs))
1406 return Qnil;
1408 CHARSET_UNIFIED_P (cs) = 0;
1409 if (NILP (deunify))
1411 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1412 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1413 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1414 if (NILP (unify_map))
1415 unify_map = CHARSET_UNIFY_MAP (cs);
1416 else
1418 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1419 signal_error ("Bad unify-map", unify_map);
1420 CHARSET_UNIFY_MAP (cs) = unify_map;
1422 if (NILP (Vchar_unify_table))
1423 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1424 char_table_set_range (Vchar_unify_table,
1425 cs->min_char, cs->max_char, charset);
1426 CHARSET_UNIFIED_P (cs) = 1;
1428 else if (CHAR_TABLE_P (Vchar_unify_table))
1430 int min_code = CHARSET_MIN_CODE (cs);
1431 int max_code = CHARSET_MAX_CODE (cs);
1432 int min_char = DECODE_CHAR (cs, min_code);
1433 int max_char = DECODE_CHAR (cs, max_code);
1435 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1438 return Qnil;
1441 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1442 Sget_unused_iso_final_char, 2, 2, 0,
1443 doc: /*
1444 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1445 DIMENSION is the number of bytes to represent a character: 1 or 2.
1446 CHARS is the number of characters in a dimension: 94 or 96.
1448 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1449 If there's no unused final char for the specified kind of charset,
1450 return nil. */)
1451 (dimension, chars)
1452 Lisp_Object dimension, chars;
1454 int final_char;
1456 CHECK_NUMBER (dimension);
1457 CHECK_NUMBER (chars);
1458 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1459 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1460 if (XINT (chars) != 94 && XINT (chars) != 96)
1461 args_out_of_range_3 (chars, make_number (94), make_number (96));
1462 for (final_char = '0'; final_char <= '?'; final_char++)
1463 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1464 break;
1465 return (final_char <= '?' ? make_number (final_char) : Qnil);
1468 static void
1469 check_iso_charset_parameter (dimension, chars, final_char)
1470 Lisp_Object dimension, chars, final_char;
1472 CHECK_NATNUM (dimension);
1473 CHECK_NATNUM (chars);
1474 CHECK_NATNUM (final_char);
1476 if (XINT (dimension) > 3)
1477 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1478 if (XINT (chars) != 94 && XINT (chars) != 96)
1479 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1480 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1481 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1485 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1486 4, 4, 0,
1487 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1489 On decoding by an ISO-2022 base coding system, when a charset
1490 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1491 if CHARSET is designated instead. */)
1492 (dimension, chars, final_char, charset)
1493 Lisp_Object dimension, chars, final_char, charset;
1495 int id;
1496 int chars_flag;
1498 CHECK_CHARSET_GET_ID (charset, id);
1499 check_iso_charset_parameter (dimension, chars, final_char);
1500 chars_flag = XINT (chars) == 96;
1501 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1502 return Qnil;
1506 /* Return information about charsets in the text at PTR of NBYTES
1507 bytes, which are NCHARS characters. The value is:
1509 0: Each character is represented by one byte. This is always
1510 true for a unibyte string. For a multibyte string, true if
1511 it contains only ASCII characters.
1513 1: No charsets other than ascii, control-1, and latin-1 are
1514 found.
1516 2: Otherwise.
1520 string_xstring_p (string)
1521 Lisp_Object string;
1523 const unsigned char *p = SDATA (string);
1524 const unsigned char *endp = p + SBYTES (string);
1526 if (SCHARS (string) == SBYTES (string))
1527 return 0;
1529 while (p < endp)
1531 int c = STRING_CHAR_ADVANCE (p);
1533 if (c >= 0x100)
1534 return 2;
1536 return 1;
1540 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1542 CHARSETS is a vector. If Nth element is non-nil, it means the
1543 charset whose id is N is already found.
1545 It may lookup a translation table TABLE if supplied. */
1547 static void
1548 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1549 const unsigned char *ptr;
1550 EMACS_INT nchars, nbytes;
1551 Lisp_Object charsets, table;
1552 int multibyte;
1554 const unsigned char *pend = ptr + nbytes;
1556 if (nchars == nbytes)
1558 if (multibyte)
1559 ASET (charsets, charset_ascii, Qt);
1560 else
1561 while (ptr < pend)
1563 int c = *ptr++;
1565 if (!NILP (table))
1566 c = translate_char (table, c);
1567 if (ASCII_BYTE_P (c))
1568 ASET (charsets, charset_ascii, Qt);
1569 else
1570 ASET (charsets, charset_eight_bit, Qt);
1573 else
1575 while (ptr < pend)
1577 int c = STRING_CHAR_ADVANCE (ptr);
1578 struct charset *charset;
1580 if (!NILP (table))
1581 c = translate_char (table, c);
1582 charset = CHAR_CHARSET (c);
1583 ASET (charsets, CHARSET_ID (charset), Qt);
1588 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1589 2, 3, 0,
1590 doc: /* Return a list of charsets in the region between BEG and END.
1591 BEG and END are buffer positions.
1592 Optional arg TABLE if non-nil is a translation table to look up.
1594 If the current buffer is unibyte, the returned list may contain
1595 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1596 (beg, end, table)
1597 Lisp_Object beg, end, table;
1599 Lisp_Object charsets;
1600 EMACS_INT from, from_byte, to, stop, stop_byte;
1601 int i;
1602 Lisp_Object val;
1603 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1605 validate_region (&beg, &end);
1606 from = XFASTINT (beg);
1607 stop = to = XFASTINT (end);
1609 if (from < GPT && GPT < to)
1611 stop = GPT;
1612 stop_byte = GPT_BYTE;
1614 else
1615 stop_byte = CHAR_TO_BYTE (stop);
1617 from_byte = CHAR_TO_BYTE (from);
1619 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1620 while (1)
1622 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1623 stop_byte - from_byte, charsets, table,
1624 multibyte);
1625 if (stop < to)
1627 from = stop, from_byte = stop_byte;
1628 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1630 else
1631 break;
1634 val = Qnil;
1635 for (i = charset_table_used - 1; i >= 0; i--)
1636 if (!NILP (AREF (charsets, i)))
1637 val = Fcons (CHARSET_NAME (charset_table + i), val);
1638 return val;
1641 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1642 1, 2, 0,
1643 doc: /* Return a list of charsets in STR.
1644 Optional arg TABLE if non-nil is a translation table to look up.
1646 If STR is unibyte, the returned list may contain
1647 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1648 (str, table)
1649 Lisp_Object str, table;
1651 Lisp_Object charsets;
1652 int i;
1653 Lisp_Object val;
1655 CHECK_STRING (str);
1657 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1658 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1659 charsets, table,
1660 STRING_MULTIBYTE (str));
1661 val = Qnil;
1662 for (i = charset_table_used - 1; i >= 0; i--)
1663 if (!NILP (AREF (charsets, i)))
1664 val = Fcons (CHARSET_NAME (charset_table + i), val);
1665 return val;
1670 /* Return a unified character code for C (>= 0x110000). VAL is a
1671 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1672 charset symbol. */
1674 maybe_unify_char (c, val)
1675 int c;
1676 Lisp_Object val;
1678 struct charset *charset;
1680 if (INTEGERP (val))
1681 return XINT (val);
1682 if (NILP (val))
1683 return c;
1685 CHECK_CHARSET_GET_CHARSET (val, charset);
1686 load_charset (charset, 1);
1687 if (! inhibit_load_charset_map)
1689 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1690 if (! NILP (val))
1691 c = XINT (val);
1693 else
1695 int code_index = c - CHARSET_CODE_OFFSET (charset);
1696 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1698 if (unified > 0)
1699 c = unified;
1701 return c;
1705 /* Return a character correponding to the code-point CODE of
1706 CHARSET. */
1709 decode_char (charset, code)
1710 struct charset *charset;
1711 unsigned code;
1713 int c, char_index;
1714 enum charset_method method = CHARSET_METHOD (charset);
1716 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1717 return -1;
1719 if (method == CHARSET_METHOD_SUBSET)
1721 Lisp_Object subset_info;
1723 subset_info = CHARSET_SUBSET (charset);
1724 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1725 code -= XINT (AREF (subset_info, 3));
1726 if (code < XFASTINT (AREF (subset_info, 1))
1727 || code > XFASTINT (AREF (subset_info, 2)))
1728 c = -1;
1729 else
1730 c = DECODE_CHAR (charset, code);
1732 else if (method == CHARSET_METHOD_SUPERSET)
1734 Lisp_Object parents;
1736 parents = CHARSET_SUPERSET (charset);
1737 c = -1;
1738 for (; CONSP (parents); parents = XCDR (parents))
1740 int id = XINT (XCAR (XCAR (parents)));
1741 int code_offset = XINT (XCDR (XCAR (parents)));
1742 unsigned this_code = code - code_offset;
1744 charset = CHARSET_FROM_ID (id);
1745 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1746 break;
1749 else
1751 char_index = CODE_POINT_TO_INDEX (charset, code);
1752 if (char_index < 0)
1753 return -1;
1755 if (method == CHARSET_METHOD_MAP)
1757 Lisp_Object decoder;
1759 decoder = CHARSET_DECODER (charset);
1760 if (! VECTORP (decoder))
1762 load_charset (charset, 1);
1763 decoder = CHARSET_DECODER (charset);
1765 if (VECTORP (decoder))
1766 c = XINT (AREF (decoder, char_index));
1767 else
1768 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1770 else /* method == CHARSET_METHOD_OFFSET */
1772 c = char_index + CHARSET_CODE_OFFSET (charset);
1773 if (CHARSET_UNIFIED_P (charset)
1774 && c > MAX_UNICODE_CHAR)
1775 MAYBE_UNIFY_CHAR (c);
1779 return c;
1782 /* Variable used temporarily by the macro ENCODE_CHAR. */
1783 Lisp_Object charset_work;
1785 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1786 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1787 use CHARSET's strict_max_char instead of max_char. */
1789 unsigned
1790 encode_char (charset, c)
1791 struct charset *charset;
1792 int c;
1794 unsigned code;
1795 enum charset_method method = CHARSET_METHOD (charset);
1797 if (CHARSET_UNIFIED_P (charset))
1799 Lisp_Object deunifier, deunified;
1800 int code_index = -1;
1802 deunifier = CHARSET_DEUNIFIER (charset);
1803 if (! CHAR_TABLE_P (deunifier))
1805 load_charset (charset, 2);
1806 deunifier = CHARSET_DEUNIFIER (charset);
1808 if (CHAR_TABLE_P (deunifier))
1810 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1812 if (INTEGERP (deunified))
1813 code_index = XINT (deunified);
1815 else
1817 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1819 if (code_index >= 0)
1820 c = CHARSET_CODE_OFFSET (charset) + code_index;
1823 if (method == CHARSET_METHOD_SUBSET)
1825 Lisp_Object subset_info;
1826 struct charset *this_charset;
1828 subset_info = CHARSET_SUBSET (charset);
1829 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1830 code = ENCODE_CHAR (this_charset, c);
1831 if (code == CHARSET_INVALID_CODE (this_charset)
1832 || code < XFASTINT (AREF (subset_info, 1))
1833 || code > XFASTINT (AREF (subset_info, 2)))
1834 return CHARSET_INVALID_CODE (charset);
1835 code += XINT (AREF (subset_info, 3));
1836 return code;
1839 if (method == CHARSET_METHOD_SUPERSET)
1841 Lisp_Object parents;
1843 parents = CHARSET_SUPERSET (charset);
1844 for (; CONSP (parents); parents = XCDR (parents))
1846 int id = XINT (XCAR (XCAR (parents)));
1847 int code_offset = XINT (XCDR (XCAR (parents)));
1848 struct charset *this_charset = CHARSET_FROM_ID (id);
1850 code = ENCODE_CHAR (this_charset, c);
1851 if (code != CHARSET_INVALID_CODE (this_charset))
1852 return code + code_offset;
1854 return CHARSET_INVALID_CODE (charset);
1857 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1858 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1859 return CHARSET_INVALID_CODE (charset);
1861 if (method == CHARSET_METHOD_MAP)
1863 Lisp_Object encoder;
1864 Lisp_Object val;
1866 encoder = CHARSET_ENCODER (charset);
1867 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1869 load_charset (charset, 2);
1870 encoder = CHARSET_ENCODER (charset);
1872 if (CHAR_TABLE_P (encoder))
1874 val = CHAR_TABLE_REF (encoder, c);
1875 if (NILP (val))
1876 return CHARSET_INVALID_CODE (charset);
1877 code = XINT (val);
1878 if (! CHARSET_COMPACT_CODES_P (charset))
1879 code = INDEX_TO_CODE_POINT (charset, code);
1881 else
1883 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1884 code = INDEX_TO_CODE_POINT (charset, code);
1887 else /* method == CHARSET_METHOD_OFFSET */
1889 int code_index = c - CHARSET_CODE_OFFSET (charset);
1891 code = INDEX_TO_CODE_POINT (charset, code_index);
1894 return code;
1898 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1899 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1900 Return nil if CODE-POINT is not valid in CHARSET.
1902 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1904 Optional argument RESTRICTION specifies a way to map the pair of CCS
1905 and CODE-POINT to a character. Currently not supported and just ignored. */)
1906 (charset, code_point, restriction)
1907 Lisp_Object charset, code_point, restriction;
1909 int c, id;
1910 unsigned code;
1911 struct charset *charsetp;
1913 CHECK_CHARSET_GET_ID (charset, id);
1914 if (CONSP (code_point))
1916 CHECK_NATNUM_CAR (code_point);
1917 CHECK_NATNUM_CDR (code_point);
1918 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1920 else
1922 CHECK_NATNUM (code_point);
1923 code = XINT (code_point);
1925 charsetp = CHARSET_FROM_ID (id);
1926 c = DECODE_CHAR (charsetp, code);
1927 return (c >= 0 ? make_number (c) : Qnil);
1931 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1932 doc: /* Encode the character CH into a code-point of CHARSET.
1933 Return nil if CHARSET doesn't include CH.
1935 Optional argument RESTRICTION specifies a way to map CH to a
1936 code-point in CCS. Currently not supported and just ignored. */)
1937 (ch, charset, restriction)
1938 Lisp_Object ch, charset, restriction;
1940 int id;
1941 unsigned code;
1942 struct charset *charsetp;
1944 CHECK_CHARSET_GET_ID (charset, id);
1945 CHECK_NATNUM (ch);
1946 charsetp = CHARSET_FROM_ID (id);
1947 code = ENCODE_CHAR (charsetp, XINT (ch));
1948 if (code == CHARSET_INVALID_CODE (charsetp))
1949 return Qnil;
1950 if (code > 0x7FFFFFF)
1951 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1952 return make_number (code);
1956 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1957 doc:
1958 /* Return a character of CHARSET whose position codes are CODEn.
1960 CODE1 through CODE4 are optional, but if you don't supply sufficient
1961 position codes, it is assumed that the minimum code in each dimension
1962 is specified. */)
1963 (charset, code1, code2, code3, code4)
1964 Lisp_Object charset, code1, code2, code3, code4;
1966 int id, dimension;
1967 struct charset *charsetp;
1968 unsigned code;
1969 int c;
1971 CHECK_CHARSET_GET_ID (charset, id);
1972 charsetp = CHARSET_FROM_ID (id);
1974 dimension = CHARSET_DIMENSION (charsetp);
1975 if (NILP (code1))
1976 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1977 ? 0 : CHARSET_MIN_CODE (charsetp));
1978 else
1980 CHECK_NATNUM (code1);
1981 if (XFASTINT (code1) >= 0x100)
1982 args_out_of_range (make_number (0xFF), code1);
1983 code = XFASTINT (code1);
1985 if (dimension > 1)
1987 code <<= 8;
1988 if (NILP (code2))
1989 code |= charsetp->code_space[(dimension - 2) * 4];
1990 else
1992 CHECK_NATNUM (code2);
1993 if (XFASTINT (code2) >= 0x100)
1994 args_out_of_range (make_number (0xFF), code2);
1995 code |= XFASTINT (code2);
1998 if (dimension > 2)
2000 code <<= 8;
2001 if (NILP (code3))
2002 code |= charsetp->code_space[(dimension - 3) * 4];
2003 else
2005 CHECK_NATNUM (code3);
2006 if (XFASTINT (code3) >= 0x100)
2007 args_out_of_range (make_number (0xFF), code3);
2008 code |= XFASTINT (code3);
2011 if (dimension > 3)
2013 code <<= 8;
2014 if (NILP (code4))
2015 code |= charsetp->code_space[0];
2016 else
2018 CHECK_NATNUM (code4);
2019 if (XFASTINT (code4) >= 0x100)
2020 args_out_of_range (make_number (0xFF), code4);
2021 code |= XFASTINT (code4);
2028 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2029 code &= 0x7F7F7F7F;
2030 c = DECODE_CHAR (charsetp, code);
2031 if (c < 0)
2032 error ("Invalid code(s)");
2033 return make_number (c);
2037 /* Return the first charset in CHARSET_LIST that contains C.
2038 CHARSET_LIST is a list of charset IDs. If it is nil, use
2039 Vcharset_ordered_list. */
2041 struct charset *
2042 char_charset (c, charset_list, code_return)
2043 int c;
2044 Lisp_Object charset_list;
2045 unsigned *code_return;
2047 int maybe_null = 0;
2049 if (NILP (charset_list))
2050 charset_list = Vcharset_ordered_list;
2051 else
2052 maybe_null = 1;
2054 while (CONSP (charset_list))
2056 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2057 unsigned code = ENCODE_CHAR (charset, c);
2059 if (code != CHARSET_INVALID_CODE (charset))
2061 if (code_return)
2062 *code_return = code;
2063 return charset;
2065 charset_list = XCDR (charset_list);
2066 if (c <= MAX_UNICODE_CHAR
2067 && EQ (charset_list, Vcharset_non_preferred_head))
2068 return CHARSET_FROM_ID (charset_unicode);
2070 return (maybe_null ? NULL
2071 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2072 : CHARSET_FROM_ID (charset_eight_bit));
2076 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2077 doc:
2078 /*Return list of charset and one to four position-codes of CH.
2079 The charset is decided by the current priority order of charsets.
2080 A position-code is a byte value of each dimension of the code-point of
2081 CH in the charset. */)
2082 (ch)
2083 Lisp_Object ch;
2085 struct charset *charset;
2086 int c, dimension;
2087 unsigned code;
2088 Lisp_Object val;
2090 CHECK_CHARACTER (ch);
2091 c = XFASTINT (ch);
2092 charset = CHAR_CHARSET (c);
2093 if (! charset)
2094 abort ();
2095 code = ENCODE_CHAR (charset, c);
2096 if (code == CHARSET_INVALID_CODE (charset))
2097 abort ();
2098 dimension = CHARSET_DIMENSION (charset);
2099 for (val = Qnil; dimension > 0; dimension--)
2101 val = Fcons (make_number (code & 0xFF), val);
2102 code >>= 8;
2104 return Fcons (CHARSET_NAME (charset), val);
2108 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2109 doc: /* Return the charset of highest priority that contains CH.
2110 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2111 from which to find the charset. It may also be a coding system. In
2112 that case, find the charset from what supported by that coding system. */)
2113 (ch, restriction)
2114 Lisp_Object ch, restriction;
2116 struct charset *charset;
2118 CHECK_CHARACTER (ch);
2119 if (NILP (restriction))
2120 charset = CHAR_CHARSET (XINT (ch));
2121 else
2123 Lisp_Object charset_list;
2125 if (CONSP (restriction))
2127 for (charset_list = Qnil; CONSP (restriction);
2128 restriction = XCDR (restriction))
2130 int id;
2132 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2133 charset_list = Fcons (make_number (id), charset_list);
2135 charset_list = Fnreverse (charset_list);
2137 else
2138 charset_list = coding_system_charset_list (restriction);
2139 charset = char_charset (XINT (ch), charset_list, NULL);
2140 if (! charset)
2141 return Qnil;
2143 return (CHARSET_NAME (charset));
2147 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2148 doc: /*
2149 Return charset of a character in the current buffer at position POS.
2150 If POS is nil, it defauls to the current point.
2151 If POS is out of range, the value is nil. */)
2152 (pos)
2153 Lisp_Object pos;
2155 Lisp_Object ch;
2156 struct charset *charset;
2158 ch = Fchar_after (pos);
2159 if (! INTEGERP (ch))
2160 return ch;
2161 charset = CHAR_CHARSET (XINT (ch));
2162 return (CHARSET_NAME (charset));
2166 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2167 doc: /*
2168 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2170 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2171 by their DIMENSION, CHARS, and FINAL-CHAR,
2172 whereas Emacs distinguishes them by charset symbol.
2173 See the documentation of the function `charset-info' for the meanings of
2174 DIMENSION, CHARS, and FINAL-CHAR. */)
2175 (dimension, chars, final_char)
2176 Lisp_Object dimension, chars, final_char;
2178 int id;
2179 int chars_flag;
2181 check_iso_charset_parameter (dimension, chars, final_char);
2182 chars_flag = XFASTINT (chars) == 96;
2183 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2184 XFASTINT (final_char));
2185 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2189 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2190 0, 0, 0,
2191 doc: /*
2192 Internal use only.
2193 Clear temporary charset mapping tables.
2194 It should be called only from temacs invoked for dumping. */)
2197 int i;
2198 struct charset *charset;
2199 Lisp_Object attrs;
2201 if (temp_charset_work)
2203 free (temp_charset_work);
2204 temp_charset_work = NULL;
2207 if (CHAR_TABLE_P (Vchar_unify_table))
2208 Foptimize_char_table (Vchar_unify_table, Qnil);
2210 return Qnil;
2213 DEFUN ("charset-priority-list", Fcharset_priority_list,
2214 Scharset_priority_list, 0, 1, 0,
2215 doc: /* Return the list of charsets ordered by priority.
2216 HIGHESTP non-nil means just return the highest priority one. */)
2217 (highestp)
2218 Lisp_Object highestp;
2220 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2222 if (!NILP (highestp))
2223 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2225 while (!NILP (list))
2227 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2228 list = XCDR (list);
2230 return Fnreverse (val);
2233 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2234 1, MANY, 0,
2235 doc: /* Assign higher priority to the charsets given as arguments.
2236 usage: (set-charset-priority &rest charsets) */)
2237 (nargs, args)
2238 int nargs;
2239 Lisp_Object *args;
2241 Lisp_Object new_head, old_list, arglist[2];
2242 Lisp_Object list_2022, list_emacs_mule;
2243 int i, id;
2245 old_list = Fcopy_sequence (Vcharset_ordered_list);
2246 new_head = Qnil;
2247 for (i = 0; i < nargs; i++)
2249 CHECK_CHARSET_GET_ID (args[i], id);
2250 if (! NILP (Fmemq (make_number (id), old_list)))
2252 old_list = Fdelq (make_number (id), old_list);
2253 new_head = Fcons (make_number (id), new_head);
2256 arglist[0] = Fnreverse (new_head);
2257 arglist[1] = Vcharset_non_preferred_head = old_list;
2258 Vcharset_ordered_list = Fnconc (2, arglist);
2259 charset_ordered_list_tick++;
2261 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2262 CONSP (old_list); old_list = XCDR (old_list))
2264 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2265 list_2022 = Fcons (XCAR (old_list), list_2022);
2266 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2267 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2269 Viso_2022_charset_list = Fnreverse (list_2022);
2270 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2272 return Qnil;
2275 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2276 0, 1, 0,
2277 doc: /* Internal use only.
2278 Return charset identification number of CHARSET. */)
2279 (charset)
2280 Lisp_Object charset;
2282 int id;
2284 CHECK_CHARSET_GET_ID (charset, id);
2285 return make_number (id);
2289 void
2290 init_charset ()
2292 Lisp_Object tempdir;
2293 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2294 if (access (SDATA (tempdir), 0) < 0)
2296 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2297 Emacs will not function correctly without the character map files.\n\
2298 Please check your installation!\n",
2299 tempdir);
2300 /* TODO should this be a fatal error? (Bug#909) */
2303 Vcharset_map_path = Fcons (tempdir, Qnil);
2307 void
2308 init_charset_once ()
2310 int i, j, k;
2312 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2313 for (j = 0; j < ISO_MAX_CHARS; j++)
2314 for (k = 0; k < ISO_MAX_FINAL; k++)
2315 iso_charset_table[i][j][k] = -1;
2317 for (i = 0; i < 256; i++)
2318 emacs_mule_charset[i] = NULL;
2320 charset_jisx0201_roman = -1;
2321 charset_jisx0208_1978 = -1;
2322 charset_jisx0208 = -1;
2324 for (i = 0; i < 128; i++)
2325 unibyte_to_multibyte_table[i] = i;
2326 for (; i < 256; i++)
2327 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
2330 #ifdef emacs
2332 void
2333 syms_of_charset ()
2335 DEFSYM (Qcharsetp, "charsetp");
2337 DEFSYM (Qascii, "ascii");
2338 DEFSYM (Qunicode, "unicode");
2339 DEFSYM (Qemacs, "emacs");
2340 DEFSYM (Qeight_bit, "eight-bit");
2341 DEFSYM (Qiso_8859_1, "iso-8859-1");
2343 DEFSYM (Qgl, "gl");
2344 DEFSYM (Qgr, "gr");
2346 staticpro (&Vcharset_ordered_list);
2347 Vcharset_ordered_list = Qnil;
2349 staticpro (&Viso_2022_charset_list);
2350 Viso_2022_charset_list = Qnil;
2352 staticpro (&Vemacs_mule_charset_list);
2353 Vemacs_mule_charset_list = Qnil;
2355 /* Don't staticpro them here. It's done in syms_of_fns. */
2356 QCtest = intern (":test");
2357 Qeq = intern ("eq");
2359 staticpro (&Vcharset_hash_table);
2361 Lisp_Object args[2];
2362 args[0] = QCtest;
2363 args[1] = Qeq;
2364 Vcharset_hash_table = Fmake_hash_table (2, args);
2367 charset_table_size = 128;
2368 charset_table = ((struct charset *)
2369 xmalloc (sizeof (struct charset) * charset_table_size));
2370 charset_table_used = 0;
2372 defsubr (&Scharsetp);
2373 defsubr (&Smap_charset_chars);
2374 defsubr (&Sdefine_charset_internal);
2375 defsubr (&Sdefine_charset_alias);
2376 defsubr (&Scharset_plist);
2377 defsubr (&Sset_charset_plist);
2378 defsubr (&Sunify_charset);
2379 defsubr (&Sget_unused_iso_final_char);
2380 defsubr (&Sdeclare_equiv_charset);
2381 defsubr (&Sfind_charset_region);
2382 defsubr (&Sfind_charset_string);
2383 defsubr (&Sdecode_char);
2384 defsubr (&Sencode_char);
2385 defsubr (&Ssplit_char);
2386 defsubr (&Smake_char);
2387 defsubr (&Schar_charset);
2388 defsubr (&Scharset_after);
2389 defsubr (&Siso_charset);
2390 defsubr (&Sclear_charset_maps);
2391 defsubr (&Scharset_priority_list);
2392 defsubr (&Sset_charset_priority);
2393 defsubr (&Scharset_id_internal);
2395 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2396 doc: /* *List of directories to search for charset map files. */);
2397 Vcharset_map_path = Qnil;
2399 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2400 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2401 inhibit_load_charset_map = 0;
2403 DEFVAR_LISP ("charset-list", &Vcharset_list,
2404 doc: /* List of all charsets ever defined. */);
2405 Vcharset_list = Qnil;
2407 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2408 doc: /* ISO639 language mnemonic symbol for the current language environment.
2409 If the current language environment is for multiple languages (e.g. "Latin-1"),
2410 the value may be a list of mnemonics. */);
2411 Vcurrent_iso639_language = Qnil;
2413 charset_ascii
2414 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2415 0, 127, 'B', -1, 0, 1, 0, 0);
2416 charset_iso_8859_1
2417 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2418 0, 255, -1, -1, -1, 1, 0, 0);
2419 charset_unicode
2420 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2421 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2422 charset_emacs
2423 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2424 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2425 charset_eight_bit
2426 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2427 128, 255, -1, 0, -1, 0, 1,
2428 MAX_5_BYTE_CHAR + 1);
2431 #endif /* emacs */
2433 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2434 (do not change this comment) */