lisp/avoid.el: Fix typos in docstrings.
[emacs.git] / src / charset.c
blob9ea1366d73a927468637487e74b61977d74ed000
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
28 #include <config.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <ctype.h>
34 #include <sys/types.h>
35 #include <setjmp.h>
36 #include "lisp.h"
37 #include "character.h"
38 #include "charset.h"
39 #include "coding.h"
40 #include "disptab.h"
41 #include "buffer.h"
43 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
45 A coded character set ("charset" hereafter) is a meaningful
46 collection (i.e. language, culture, functionality, etc.) of
47 characters. Emacs handles multiple charsets at once. In Emacs Lisp
48 code, a charset is represented by a symbol. In C code, a charset is
49 represented by its ID number or by a pointer to a struct charset.
51 The actual information about each charset is stored in two places.
52 Lispy information is stored in the hash table Vcharset_hash_table as
53 a vector (charset attributes). The other information is stored in
54 charset_table as a struct charset.
58 /* List of all charsets. This variable is used only from Emacs
59 Lisp. */
60 Lisp_Object Vcharset_list;
62 /* Hash table that contains attributes of each charset. Keys are
63 charset symbols, and values are vectors of charset attributes. */
64 Lisp_Object Vcharset_hash_table;
66 /* Table of struct charset. */
67 struct charset *charset_table;
69 static int charset_table_size;
70 static int charset_table_used;
72 Lisp_Object Qcharsetp;
74 /* Special charset symbols. */
75 Lisp_Object Qascii;
76 Lisp_Object Qeight_bit;
77 Lisp_Object Qiso_8859_1;
78 Lisp_Object Qunicode;
79 Lisp_Object Qemacs;
81 /* The corresponding charsets. */
82 int charset_ascii;
83 int charset_eight_bit;
84 int charset_iso_8859_1;
85 int charset_unicode;
86 int charset_emacs;
88 /* The other special charsets. */
89 int charset_jisx0201_roman;
90 int charset_jisx0208_1978;
91 int charset_jisx0208;
92 int charset_ksc5601;
94 /* Value of charset attribute `charset-iso-plane'. */
95 Lisp_Object Qgl, Qgr;
97 /* Charset of unibyte characters. */
98 int charset_unibyte;
100 /* List of charsets ordered by the priority. */
101 Lisp_Object Vcharset_ordered_list;
103 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
104 charsets. */
105 Lisp_Object Vcharset_non_preferred_head;
107 /* Incremented everytime we change Vcharset_ordered_list. This is
108 unsigned short so that it fits in Lisp_Int and never matches
109 -1. */
110 unsigned short charset_ordered_list_tick;
112 /* List of iso-2022 charsets. */
113 Lisp_Object Viso_2022_charset_list;
115 /* List of emacs-mule charsets. */
116 Lisp_Object Vemacs_mule_charset_list;
118 int emacs_mule_charset[256];
120 /* Mapping table from ISO2022's charset (specified by DIMENSION,
121 CHARS, and FINAL-CHAR) to Emacs' charset. */
122 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
124 Lisp_Object Vcharset_map_path;
126 /* If nonzero, don't load charset maps. */
127 int inhibit_load_charset_map;
129 Lisp_Object Vcurrent_iso639_language;
131 /* Defined in chartab.c */
132 extern void
133 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
134 Lisp_Object function, Lisp_Object table,
135 Lisp_Object arg, struct charset *charset,
136 unsigned from, unsigned to));
138 #define CODE_POINT_TO_INDEX(charset, code) \
139 ((charset)->code_linear_p \
140 ? (code) - (charset)->min_code \
141 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
142 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
143 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
144 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
145 ? (((((code) >> 24) - (charset)->code_space[12]) \
146 * (charset)->code_space[11]) \
147 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
148 * (charset)->code_space[7]) \
149 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
150 * (charset)->code_space[3]) \
151 + (((code) & 0xFF) - (charset)->code_space[0]) \
152 - ((charset)->char_index_offset)) \
153 : -1)
156 /* Convert the character index IDX to code-point CODE for CHARSET.
157 It is assumed that IDX is in a valid range. */
159 #define INDEX_TO_CODE_POINT(charset, idx) \
160 ((charset)->code_linear_p \
161 ? (idx) + (charset)->min_code \
162 : (idx += (charset)->char_index_offset, \
163 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
164 | (((charset)->code_space[4] \
165 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
166 << 8) \
167 | (((charset)->code_space[8] \
168 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
169 << 16) \
170 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
171 << 24))))
173 /* Structure to hold mapping tables for a charset. Used by temacs
174 invoked for dumping. */
176 static struct
178 /* The current charset for which the following tables are setup. */
179 struct charset *current;
181 /* 1 iff the following table is used for encoder. */
182 short for_encoder;
184 /* When the following table is used for encoding, mininum and
185 maxinum character of the current charset. */
186 int min_char, max_char;
188 /* A Unicode character correspoinding to the code indice 0 (i.e. the
189 minimum code-point) of the current charset, or -1 if the code
190 indice 0 is not a Unicode character. This is checked when
191 table.encoder[CHAR] is zero. */
192 int zero_index_char;
194 union {
195 /* Table mapping code-indices (not code-points) of the current
196 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
197 doesn't belong to the current charset. */
198 int decoder[0x10000];
199 /* Table mapping Unicode characters to code-indices of the current
200 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
201 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
202 (0x20000..0x2FFFF). Note that there is no charset map that
203 uses both SMP and SIP. */
204 unsigned short encoder[0x20000];
205 } table;
206 } *temp_charset_work;
208 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
209 do { \
210 if ((CODE) == 0) \
211 temp_charset_work->zero_index_char = (C); \
212 else if ((C) < 0x20000) \
213 temp_charset_work->table.encoder[(C)] = (CODE); \
214 else \
215 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
216 } while (0)
218 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
219 ((C) == temp_charset_work->zero_index_char ? 0 \
220 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
221 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
222 : temp_charset_work->table.encoder[(C) - 0x10000] \
223 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
225 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
226 (temp_charset_work->table.decoder[(CODE)] = (C))
228 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
229 (temp_charset_work->table.decoder[(CODE)])
232 /* Set to 1 to warn that a charset map is loaded and thus a buffer
233 text and a string data may be relocated. */
234 int charset_map_loaded;
236 struct charset_map_entries
238 struct {
239 unsigned from, to;
240 int c;
241 } entry[0x10000];
242 struct charset_map_entries *next;
245 /* Load the mapping information of CHARSET from ENTRIES for
246 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
247 encoding (CONTROL_FLAG == 2).
249 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
250 and CHARSET->fast_map.
252 If CONTROL_FLAG is 1, setup the following tables according to
253 CHARSET->method and inhibit_load_charset_map.
255 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
256 ----------------------+--------------------+---------------------------
257 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
258 ----------------------+--------------------+---------------------------
259 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
261 If CONTROL_FLAG is 2, setup the following tables.
263 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
264 ----------------------+--------------------+---------------------------
265 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
266 ----------------------+--------------------+--------------------------
267 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
270 static void
271 load_charset_map (charset, entries, n_entries, control_flag)
272 struct charset *charset;
273 struct charset_map_entries *entries;
274 int n_entries;
275 int control_flag;
277 Lisp_Object vec, table;
278 unsigned max_code = CHARSET_MAX_CODE (charset);
279 int ascii_compatible_p = charset->ascii_compatible_p;
280 int min_char, max_char, nonascii_min_char;
281 int i;
282 unsigned char *fast_map = charset->fast_map;
284 if (n_entries <= 0)
285 return;
287 if (control_flag)
289 if (! inhibit_load_charset_map)
291 if (control_flag == 1)
293 if (charset->method == CHARSET_METHOD_MAP)
295 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
297 vec = CHARSET_DECODER (charset)
298 = Fmake_vector (make_number (n), make_number (-1));
300 else
302 char_table_set_range (Vchar_unify_table,
303 charset->min_char, charset->max_char,
304 Qnil);
307 else
309 table = Fmake_char_table (Qnil, Qnil);
310 if (charset->method == CHARSET_METHOD_MAP)
311 CHARSET_ENCODER (charset) = table;
312 else
313 CHARSET_DEUNIFIER (charset) = table;
316 else
318 if (! temp_charset_work)
319 temp_charset_work = malloc (sizeof (*temp_charset_work));
320 if (control_flag == 1)
322 memset (temp_charset_work->table.decoder, -1,
323 sizeof (int) * 0x10000);
325 else
327 memset (temp_charset_work->table.encoder, 0,
328 sizeof (unsigned short) * 0x20000);
329 temp_charset_work->zero_index_char = -1;
331 temp_charset_work->current = charset;
332 temp_charset_work->for_encoder = (control_flag == 2);
333 control_flag += 2;
335 charset_map_loaded = 1;
338 min_char = max_char = entries->entry[0].c;
339 nonascii_min_char = MAX_CHAR;
340 for (i = 0; i < n_entries; i++)
342 unsigned from, to;
343 int from_index, to_index;
344 int from_c, to_c;
345 int idx = i % 0x10000;
347 if (i > 0 && idx == 0)
348 entries = entries->next;
349 from = entries->entry[idx].from;
350 to = entries->entry[idx].to;
351 from_c = entries->entry[idx].c;
352 from_index = CODE_POINT_TO_INDEX (charset, from);
353 if (from == to)
355 to_index = from_index;
356 to_c = from_c;
358 else
360 to_index = CODE_POINT_TO_INDEX (charset, to);
361 to_c = from_c + (to_index - from_index);
363 if (from_index < 0 || to_index < 0)
364 continue;
366 if (to_c > max_char)
367 max_char = to_c;
368 else if (from_c < min_char)
369 min_char = from_c;
371 if (control_flag == 1)
373 if (charset->method == CHARSET_METHOD_MAP)
374 for (; from_index <= to_index; from_index++, from_c++)
375 ASET (vec, from_index, make_number (from_c));
376 else
377 for (; from_index <= to_index; from_index++, from_c++)
378 CHAR_TABLE_SET (Vchar_unify_table,
379 CHARSET_CODE_OFFSET (charset) + from_index,
380 make_number (from_c));
382 else if (control_flag == 2)
384 if (charset->method == CHARSET_METHOD_MAP
385 && CHARSET_COMPACT_CODES_P (charset))
386 for (; from_index <= to_index; from_index++, from_c++)
388 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
390 if (NILP (CHAR_TABLE_REF (table, from_c)))
391 CHAR_TABLE_SET (table, from_c, make_number (code));
393 else
394 for (; from_index <= to_index; from_index++, from_c++)
396 if (NILP (CHAR_TABLE_REF (table, from_c)))
397 CHAR_TABLE_SET (table, from_c, make_number (from_index));
400 else if (control_flag == 3)
401 for (; from_index <= to_index; from_index++, from_c++)
402 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
403 else if (control_flag == 4)
404 for (; from_index <= to_index; from_index++, from_c++)
405 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
406 else /* control_flag == 0 */
408 if (ascii_compatible_p)
410 if (! ASCII_BYTE_P (from_c))
412 if (from_c < nonascii_min_char)
413 nonascii_min_char = from_c;
415 else if (! ASCII_BYTE_P (to_c))
417 nonascii_min_char = 0x80;
421 for (; from_c <= to_c; from_c++)
422 CHARSET_FAST_MAP_SET (from_c, fast_map);
426 if (control_flag == 0)
428 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
429 ? nonascii_min_char : min_char);
430 CHARSET_MAX_CHAR (charset) = max_char;
432 else if (control_flag == 4)
434 temp_charset_work->min_char = min_char;
435 temp_charset_work->max_char = max_char;
440 /* Read a hexadecimal number (preceded by "0x") from the file FP while
441 paying attention to comment character '#'. */
443 static INLINE unsigned
444 read_hex (fp, eof)
445 FILE *fp;
446 int *eof;
448 int c;
449 unsigned n;
451 while ((c = getc (fp)) != EOF)
453 if (c == '#')
455 while ((c = getc (fp)) != EOF && c != '\n');
457 else if (c == '0')
459 if ((c = getc (fp)) == EOF || c == 'x')
460 break;
463 if (c == EOF)
465 *eof = 1;
466 return 0;
468 *eof = 0;
469 n = 0;
470 if (c == 'x')
471 while ((c = getc (fp)) != EOF && isxdigit (c))
472 n = ((n << 4)
473 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
474 else
475 while ((c = getc (fp)) != EOF && isdigit (c))
476 n = (n * 10) + c - '0';
477 if (c != EOF)
478 ungetc (c, fp);
479 return n;
482 extern Lisp_Object Qfile_name_handler_alist;
484 /* Return a mapping vector for CHARSET loaded from MAPFILE.
485 Each line of MAPFILE has this form
486 0xAAAA 0xCCCC
487 where 0xAAAA is a code-point and 0xCCCC is the corresponding
488 character code, or this form
489 0xAAAA-0xBBBB 0xCCCC
490 where 0xAAAA and 0xBBBB are code-points specifying a range, and
491 0xCCCC is the first character code of the range.
493 The returned vector has this form:
494 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
495 where CODE1 is a code-point or a cons of code-points specifying a
496 range.
498 Note that this function uses `openp' to open MAPFILE but ignores
499 `file-name-handler-alist' to avoid running any Lisp code. */
501 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
503 static void
504 load_charset_map_from_file (charset, mapfile, control_flag)
505 struct charset *charset;
506 Lisp_Object mapfile;
507 int control_flag;
509 unsigned min_code = CHARSET_MIN_CODE (charset);
510 unsigned max_code = CHARSET_MAX_CODE (charset);
511 int fd;
512 FILE *fp;
513 int eof;
514 Lisp_Object suffixes;
515 struct charset_map_entries *head, *entries;
516 int n_entries, count;
517 USE_SAFE_ALLOCA;
519 suffixes = Fcons (build_string (".map"),
520 Fcons (build_string (".TXT"), Qnil));
522 count = SPECPDL_INDEX ();
523 specbind (Qfile_name_handler_alist, Qnil);
524 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
525 unbind_to (count, Qnil);
526 if (fd < 0
527 || ! (fp = fdopen (fd, "r")))
528 error ("Failure in loading charset map: %S", SDATA (mapfile));
530 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
531 large (larger than MAX_ALLOCA). */
532 SAFE_ALLOCA (head, struct charset_map_entries *,
533 sizeof (struct charset_map_entries));
534 entries = head;
535 bzero (entries, sizeof (struct charset_map_entries));
537 n_entries = 0;
538 eof = 0;
539 while (1)
541 unsigned from, to;
542 int c;
543 int idx;
545 from = read_hex (fp, &eof);
546 if (eof)
547 break;
548 if (getc (fp) == '-')
549 to = read_hex (fp, &eof);
550 else
551 to = from;
552 c = (int) read_hex (fp, &eof);
554 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
555 continue;
557 if (n_entries > 0 && (n_entries % 0x10000) == 0)
559 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
560 sizeof (struct charset_map_entries));
561 entries = entries->next;
562 bzero (entries, sizeof (struct charset_map_entries));
564 idx = n_entries % 0x10000;
565 entries->entry[idx].from = from;
566 entries->entry[idx].to = to;
567 entries->entry[idx].c = c;
568 n_entries++;
570 fclose (fp);
571 close (fd);
573 load_charset_map (charset, head, n_entries, control_flag);
574 SAFE_FREE ();
577 static void
578 load_charset_map_from_vector (charset, vec, control_flag)
579 struct charset *charset;
580 Lisp_Object vec;
581 int control_flag;
583 unsigned min_code = CHARSET_MIN_CODE (charset);
584 unsigned max_code = CHARSET_MAX_CODE (charset);
585 struct charset_map_entries *head, *entries;
586 int n_entries;
587 int len = ASIZE (vec);
588 int i;
589 USE_SAFE_ALLOCA;
591 if (len % 2 == 1)
593 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
594 return;
597 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
598 large (larger than MAX_ALLOCA). */
599 SAFE_ALLOCA (head, struct charset_map_entries *,
600 sizeof (struct charset_map_entries));
601 entries = head;
602 bzero (entries, sizeof (struct charset_map_entries));
604 n_entries = 0;
605 for (i = 0; i < len; i += 2)
607 Lisp_Object val, val2;
608 unsigned from, to;
609 int c;
610 int idx;
612 val = AREF (vec, i);
613 if (CONSP (val))
615 val2 = XCDR (val);
616 val = XCAR (val);
617 CHECK_NATNUM (val);
618 CHECK_NATNUM (val2);
619 from = XFASTINT (val);
620 to = XFASTINT (val2);
622 else
624 CHECK_NATNUM (val);
625 from = to = XFASTINT (val);
627 val = AREF (vec, i + 1);
628 CHECK_NATNUM (val);
629 c = XFASTINT (val);
631 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
632 continue;
634 if (n_entries > 0 && (n_entries % 0x10000) == 0)
636 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
637 sizeof (struct charset_map_entries));
638 entries = entries->next;
639 bzero (entries, sizeof (struct charset_map_entries));
641 idx = n_entries % 0x10000;
642 entries->entry[idx].from = from;
643 entries->entry[idx].to = to;
644 entries->entry[idx].c = c;
645 n_entries++;
648 load_charset_map (charset, head, n_entries, control_flag);
649 SAFE_FREE ();
653 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
654 map it is (see the comment of load_charset_map for the detail). */
656 static void
657 load_charset (charset, control_flag)
658 struct charset *charset;
659 int control_flag;
661 Lisp_Object map;
663 if (inhibit_load_charset_map
664 && temp_charset_work
665 && charset == temp_charset_work->current
666 && ((control_flag == 2) == temp_charset_work->for_encoder))
667 return;
669 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
670 map = CHARSET_MAP (charset);
671 else if (CHARSET_UNIFIED_P (charset))
672 map = CHARSET_UNIFY_MAP (charset);
673 if (STRINGP (map))
674 load_charset_map_from_file (charset, map, control_flag);
675 else
676 load_charset_map_from_vector (charset, map, control_flag);
680 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
681 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
682 (object)
683 Lisp_Object object;
685 return (CHARSETP (object) ? Qt : Qnil);
689 void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
690 Lisp_Object function, Lisp_Object arg,
691 unsigned from, unsigned to));
693 void
694 map_charset_for_dump (c_function, function, arg, from, to)
695 void (*c_function) (Lisp_Object, Lisp_Object);
696 Lisp_Object function, arg;
697 unsigned from, to;
699 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
700 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
701 Lisp_Object range;
702 int c, stop;
703 struct gcpro gcpro1;
705 range = Fcons (Qnil, Qnil);
706 GCPRO1 (range);
708 c = temp_charset_work->min_char;
709 stop = (temp_charset_work->max_char < 0x20000
710 ? temp_charset_work->max_char : 0xFFFF);
712 while (1)
714 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
716 if (index >= from_idx && index <= to_idx)
718 if (NILP (XCAR (range)))
719 XSETCAR (range, make_number (c));
721 else if (! NILP (XCAR (range)))
723 XSETCDR (range, make_number (c - 1));
724 if (c_function)
725 (*c_function) (arg, range);
726 else
727 call2 (function, range, arg);
728 XSETCAR (range, Qnil);
730 if (c == stop)
732 if (c == temp_charset_work->max_char)
734 if (! NILP (XCAR (range)))
736 XSETCDR (range, make_number (c));
737 if (c_function)
738 (*c_function) (arg, range);
739 else
740 call2 (function, range, arg);
742 break;
744 c = 0x1FFFF;
745 stop = temp_charset_work->max_char;
747 c++;
749 UNGCPRO;
752 void
753 map_charset_chars (c_function, function, arg,
754 charset, from, to)
755 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
756 Lisp_Object function, arg;
757 struct charset *charset;
758 unsigned from, to;
760 Lisp_Object range;
761 int partial;
763 partial = (from > CHARSET_MIN_CODE (charset)
764 || to < CHARSET_MAX_CODE (charset));
766 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
768 int from_idx = CODE_POINT_TO_INDEX (charset, from);
769 int to_idx = CODE_POINT_TO_INDEX (charset, to);
770 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
771 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
773 if (CHARSET_UNIFIED_P (charset))
775 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
776 load_charset (charset, 2);
777 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
778 map_char_table_for_charset (c_function, function,
779 CHARSET_DEUNIFIER (charset), arg,
780 partial ? charset : NULL, from, to);
781 else
782 map_charset_for_dump (c_function, function, arg, from, to);
785 range = Fcons (make_number (from_c), make_number (to_c));
786 if (NILP (function))
787 (*c_function) (arg, range);
788 else
789 call2 (function, range, arg);
791 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
793 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
794 load_charset (charset, 2);
795 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
796 map_char_table_for_charset (c_function, function,
797 CHARSET_ENCODER (charset), arg,
798 partial ? charset : NULL, from, to);
799 else
800 map_charset_for_dump (c_function, function, arg, from, to);
802 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
804 Lisp_Object subset_info;
805 int offset;
807 subset_info = CHARSET_SUBSET (charset);
808 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
809 offset = XINT (AREF (subset_info, 3));
810 from -= offset;
811 if (from < XFASTINT (AREF (subset_info, 1)))
812 from = XFASTINT (AREF (subset_info, 1));
813 to -= offset;
814 if (to > XFASTINT (AREF (subset_info, 2)))
815 to = XFASTINT (AREF (subset_info, 2));
816 map_charset_chars (c_function, function, arg, charset, from, to);
818 else /* i.e. CHARSET_METHOD_SUPERSET */
820 Lisp_Object parents;
822 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
823 parents = XCDR (parents))
825 int offset;
826 unsigned this_from, this_to;
828 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
829 offset = XINT (XCDR (XCAR (parents)));
830 this_from = from > offset ? from - offset : 0;
831 this_to = to > offset ? to - offset : 0;
832 if (this_from < CHARSET_MIN_CODE (charset))
833 this_from = CHARSET_MIN_CODE (charset);
834 if (this_to > CHARSET_MAX_CODE (charset))
835 this_to = CHARSET_MAX_CODE (charset);
836 map_charset_chars (c_function, function, arg, charset,
837 this_from, this_to);
842 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
843 doc: /* Call FUNCTION for all characters in CHARSET.
844 FUNCTION is called with an argument RANGE and the optional 3rd
845 argument ARG.
847 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
848 characters contained in CHARSET.
850 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
851 range of code points (in CHARSET) of target characters. */)
852 (function, charset, arg, from_code, to_code)
853 Lisp_Object function, charset, arg, from_code, to_code;
855 struct charset *cs;
856 unsigned from, to;
858 CHECK_CHARSET_GET_CHARSET (charset, cs);
859 if (NILP (from_code))
860 from = CHARSET_MIN_CODE (cs);
861 else
863 CHECK_NATNUM (from_code);
864 from = XINT (from_code);
865 if (from < CHARSET_MIN_CODE (cs))
866 from = CHARSET_MIN_CODE (cs);
868 if (NILP (to_code))
869 to = CHARSET_MAX_CODE (cs);
870 else
872 CHECK_NATNUM (to_code);
873 to = XINT (to_code);
874 if (to > CHARSET_MAX_CODE (cs))
875 to = CHARSET_MAX_CODE (cs);
877 map_charset_chars (NULL, function, arg, cs, from, to);
878 return Qnil;
882 /* Define a charset according to the arguments. The Nth argument is
883 the Nth attribute of the charset (the last attribute `charset-id'
884 is not included). See the docstring of `define-charset' for the
885 detail. */
887 DEFUN ("define-charset-internal", Fdefine_charset_internal,
888 Sdefine_charset_internal, charset_arg_max, MANY, 0,
889 doc: /* For internal use only.
890 usage: (define-charset-internal ...) */)
891 (nargs, args)
892 int nargs;
893 Lisp_Object *args;
895 /* Charset attr vector. */
896 Lisp_Object attrs;
897 Lisp_Object val;
898 unsigned hash_code;
899 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
900 int i, j;
901 struct charset charset;
902 int id;
903 int dimension;
904 int new_definition_p;
905 int nchars;
907 if (nargs != charset_arg_max)
908 return Fsignal (Qwrong_number_of_arguments,
909 Fcons (intern ("define-charset-internal"),
910 make_number (nargs)));
912 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
914 CHECK_SYMBOL (args[charset_arg_name]);
915 ASET (attrs, charset_name, args[charset_arg_name]);
917 val = args[charset_arg_code_space];
918 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
920 int min_byte, max_byte;
922 min_byte = XINT (Faref (val, make_number (i * 2)));
923 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
924 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
925 error ("Invalid :code-space value");
926 charset.code_space[i * 4] = min_byte;
927 charset.code_space[i * 4 + 1] = max_byte;
928 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
929 nchars *= charset.code_space[i * 4 + 2];
930 charset.code_space[i * 4 + 3] = nchars;
931 if (max_byte > 0)
932 dimension = i + 1;
935 val = args[charset_arg_dimension];
936 if (NILP (val))
937 charset.dimension = dimension;
938 else
940 CHECK_NATNUM (val);
941 charset.dimension = XINT (val);
942 if (charset.dimension < 1 || charset.dimension > 4)
943 args_out_of_range_3 (val, make_number (1), make_number (4));
946 charset.code_linear_p
947 = (charset.dimension == 1
948 || (charset.code_space[2] == 256
949 && (charset.dimension == 2
950 || (charset.code_space[6] == 256
951 && (charset.dimension == 3
952 || charset.code_space[10] == 256)))));
954 if (! charset.code_linear_p)
956 charset.code_space_mask = (unsigned char *) xmalloc (256);
957 bzero (charset.code_space_mask, 256);
958 for (i = 0; i < 4; i++)
959 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
960 j++)
961 charset.code_space_mask[j] |= (1 << i);
964 charset.iso_chars_96 = charset.code_space[2] == 96;
966 charset.min_code = (charset.code_space[0]
967 | (charset.code_space[4] << 8)
968 | (charset.code_space[8] << 16)
969 | (charset.code_space[12] << 24));
970 charset.max_code = (charset.code_space[1]
971 | (charset.code_space[5] << 8)
972 | (charset.code_space[9] << 16)
973 | (charset.code_space[13] << 24));
974 charset.char_index_offset = 0;
976 val = args[charset_arg_min_code];
977 if (! NILP (val))
979 unsigned code;
981 if (INTEGERP (val))
982 code = XINT (val);
983 else
985 CHECK_CONS (val);
986 CHECK_NUMBER_CAR (val);
987 CHECK_NUMBER_CDR (val);
988 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
990 if (code < charset.min_code
991 || code > charset.max_code)
992 args_out_of_range_3 (make_number (charset.min_code),
993 make_number (charset.max_code), val);
994 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
995 charset.min_code = code;
998 val = args[charset_arg_max_code];
999 if (! NILP (val))
1001 unsigned code;
1003 if (INTEGERP (val))
1004 code = XINT (val);
1005 else
1007 CHECK_CONS (val);
1008 CHECK_NUMBER_CAR (val);
1009 CHECK_NUMBER_CDR (val);
1010 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
1012 if (code < charset.min_code
1013 || code > charset.max_code)
1014 args_out_of_range_3 (make_number (charset.min_code),
1015 make_number (charset.max_code), val);
1016 charset.max_code = code;
1019 charset.compact_codes_p = charset.max_code < 0x10000;
1021 val = args[charset_arg_invalid_code];
1022 if (NILP (val))
1024 if (charset.min_code > 0)
1025 charset.invalid_code = 0;
1026 else
1028 XSETINT (val, charset.max_code + 1);
1029 if (XINT (val) == charset.max_code + 1)
1030 charset.invalid_code = charset.max_code + 1;
1031 else
1032 error ("Attribute :invalid-code must be specified");
1035 else
1037 CHECK_NATNUM (val);
1038 charset.invalid_code = XFASTINT (val);
1041 val = args[charset_arg_iso_final];
1042 if (NILP (val))
1043 charset.iso_final = -1;
1044 else
1046 CHECK_NUMBER (val);
1047 if (XINT (val) < '0' || XINT (val) > 127)
1048 error ("Invalid iso-final-char: %d", XINT (val));
1049 charset.iso_final = XINT (val);
1052 val = args[charset_arg_iso_revision];
1053 if (NILP (val))
1054 charset.iso_revision = -1;
1055 else
1057 CHECK_NUMBER (val);
1058 if (XINT (val) > 63)
1059 args_out_of_range (make_number (63), val);
1060 charset.iso_revision = XINT (val);
1063 val = args[charset_arg_emacs_mule_id];
1064 if (NILP (val))
1065 charset.emacs_mule_id = -1;
1066 else
1068 CHECK_NATNUM (val);
1069 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1070 error ("Invalid emacs-mule-id: %d", XINT (val));
1071 charset.emacs_mule_id = XINT (val);
1074 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1076 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1078 charset.unified_p = 0;
1080 bzero (charset.fast_map, sizeof (charset.fast_map));
1082 if (! NILP (args[charset_arg_code_offset]))
1084 val = args[charset_arg_code_offset];
1085 CHECK_NUMBER (val);
1087 charset.method = CHARSET_METHOD_OFFSET;
1088 charset.code_offset = XINT (val);
1090 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1091 charset.min_char = i + charset.code_offset;
1092 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1093 charset.max_char = i + charset.code_offset;
1094 if (charset.max_char > MAX_CHAR)
1095 error ("Unsupported max char: %d", charset.max_char);
1097 i = (charset.min_char >> 7) << 7;
1098 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1099 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1100 i = (i >> 12) << 12;
1101 for (; i <= charset.max_char; i += 0x1000)
1102 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1103 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1104 charset.ascii_compatible_p = 1;
1106 else if (! NILP (args[charset_arg_map]))
1108 val = args[charset_arg_map];
1109 ASET (attrs, charset_map, val);
1110 charset.method = CHARSET_METHOD_MAP;
1112 else if (! NILP (args[charset_arg_subset]))
1114 Lisp_Object parent;
1115 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1116 struct charset *parent_charset;
1118 val = args[charset_arg_subset];
1119 parent = Fcar (val);
1120 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1121 parent_min_code = Fnth (make_number (1), val);
1122 CHECK_NATNUM (parent_min_code);
1123 parent_max_code = Fnth (make_number (2), val);
1124 CHECK_NATNUM (parent_max_code);
1125 parent_code_offset = Fnth (make_number (3), val);
1126 CHECK_NUMBER (parent_code_offset);
1127 val = Fmake_vector (make_number (4), Qnil);
1128 ASET (val, 0, make_number (parent_charset->id));
1129 ASET (val, 1, parent_min_code);
1130 ASET (val, 2, parent_max_code);
1131 ASET (val, 3, parent_code_offset);
1132 ASET (attrs, charset_subset, val);
1134 charset.method = CHARSET_METHOD_SUBSET;
1135 /* Here, we just copy the parent's fast_map. It's not accurate,
1136 but at least it works for quickly detecting which character
1137 DOESN'T belong to this charset. */
1138 for (i = 0; i < 190; i++)
1139 charset.fast_map[i] = parent_charset->fast_map[i];
1141 /* We also copy these for parents. */
1142 charset.min_char = parent_charset->min_char;
1143 charset.max_char = parent_charset->max_char;
1145 else if (! NILP (args[charset_arg_superset]))
1147 val = args[charset_arg_superset];
1148 charset.method = CHARSET_METHOD_SUPERSET;
1149 val = Fcopy_sequence (val);
1150 ASET (attrs, charset_superset, val);
1152 charset.min_char = MAX_CHAR;
1153 charset.max_char = 0;
1154 for (; ! NILP (val); val = Fcdr (val))
1156 Lisp_Object elt, car_part, cdr_part;
1157 int this_id, offset;
1158 struct charset *this_charset;
1160 elt = Fcar (val);
1161 if (CONSP (elt))
1163 car_part = XCAR (elt);
1164 cdr_part = XCDR (elt);
1165 CHECK_CHARSET_GET_ID (car_part, this_id);
1166 CHECK_NUMBER (cdr_part);
1167 offset = XINT (cdr_part);
1169 else
1171 CHECK_CHARSET_GET_ID (elt, this_id);
1172 offset = 0;
1174 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1176 this_charset = CHARSET_FROM_ID (this_id);
1177 if (charset.min_char > this_charset->min_char)
1178 charset.min_char = this_charset->min_char;
1179 if (charset.max_char < this_charset->max_char)
1180 charset.max_char = this_charset->max_char;
1181 for (i = 0; i < 190; i++)
1182 charset.fast_map[i] |= this_charset->fast_map[i];
1185 else
1186 error ("None of :code-offset, :map, :parents are specified");
1188 val = args[charset_arg_unify_map];
1189 if (! NILP (val) && !STRINGP (val))
1190 CHECK_VECTOR (val);
1191 ASET (attrs, charset_unify_map, val);
1193 CHECK_LIST (args[charset_arg_plist]);
1194 ASET (attrs, charset_plist, args[charset_arg_plist]);
1196 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1197 &hash_code);
1198 if (charset.hash_index >= 0)
1200 new_definition_p = 0;
1201 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1202 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1204 else
1206 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1207 hash_code);
1208 if (charset_table_used == charset_table_size)
1210 struct charset *new_table
1211 = (struct charset *) xmalloc (sizeof (struct charset)
1212 * (charset_table_size + 16));
1213 bcopy (charset_table, new_table,
1214 sizeof (struct charset) * charset_table_size);
1215 charset_table_size += 16;
1216 charset_table = new_table;
1218 id = charset_table_used++;
1219 new_definition_p = 1;
1222 ASET (attrs, charset_id, make_number (id));
1223 charset.id = id;
1224 charset_table[id] = charset;
1226 if (charset.method == CHARSET_METHOD_MAP)
1228 load_charset (&charset, 0);
1229 charset_table[id] = charset;
1232 if (charset.iso_final >= 0)
1234 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1235 charset.iso_final) = id;
1236 if (new_definition_p)
1237 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1238 Fcons (make_number (id), Qnil));
1239 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1240 charset_jisx0201_roman = id;
1241 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1242 charset_jisx0208_1978 = id;
1243 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1244 charset_jisx0208 = id;
1245 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1246 charset_ksc5601 = id;
1249 if (charset.emacs_mule_id >= 0)
1251 emacs_mule_charset[charset.emacs_mule_id] = id;
1252 if (charset.emacs_mule_id < 0xA0)
1253 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1254 else
1255 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1256 if (new_definition_p)
1257 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1258 Fcons (make_number (id), Qnil));
1261 if (new_definition_p)
1263 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1264 if (charset.supplementary_p)
1265 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1266 Fcons (make_number (id), Qnil));
1267 else
1269 Lisp_Object tail;
1271 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1273 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1275 if (cs->supplementary_p)
1276 break;
1278 if (EQ (tail, Vcharset_ordered_list))
1279 Vcharset_ordered_list = Fcons (make_number (id),
1280 Vcharset_ordered_list);
1281 else if (NILP (tail))
1282 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1283 Fcons (make_number (id), Qnil));
1284 else
1286 val = Fcons (XCAR (tail), XCDR (tail));
1287 XSETCDR (tail, val);
1288 XSETCAR (tail, make_number (id));
1291 charset_ordered_list_tick++;
1294 return Qnil;
1298 /* Same as Fdefine_charset_internal but arguments are more convenient
1299 to call from C (typically in syms_of_charset). This can define a
1300 charset of `offset' method only. Return the ID of the new
1301 charset. */
1303 static int
1304 define_charset_internal (name, dimension, code_space, min_code, max_code,
1305 iso_final, iso_revision, emacs_mule_id,
1306 ascii_compatible, supplementary,
1307 code_offset)
1308 Lisp_Object name;
1309 int dimension;
1310 unsigned char *code_space;
1311 unsigned min_code, max_code;
1312 int iso_final, iso_revision, emacs_mule_id;
1313 int ascii_compatible, supplementary;
1314 int code_offset;
1316 Lisp_Object args[charset_arg_max];
1317 Lisp_Object plist[14];
1318 Lisp_Object val;
1319 int i;
1321 args[charset_arg_name] = name;
1322 args[charset_arg_dimension] = make_number (dimension);
1323 val = Fmake_vector (make_number (8), make_number (0));
1324 for (i = 0; i < 8; i++)
1325 ASET (val, i, make_number (code_space[i]));
1326 args[charset_arg_code_space] = val;
1327 args[charset_arg_min_code] = make_number (min_code);
1328 args[charset_arg_max_code] = make_number (max_code);
1329 args[charset_arg_iso_final]
1330 = (iso_final < 0 ? Qnil : make_number (iso_final));
1331 args[charset_arg_iso_revision] = make_number (iso_revision);
1332 args[charset_arg_emacs_mule_id]
1333 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1334 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1335 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1336 args[charset_arg_invalid_code] = Qnil;
1337 args[charset_arg_code_offset] = make_number (code_offset);
1338 args[charset_arg_map] = Qnil;
1339 args[charset_arg_subset] = Qnil;
1340 args[charset_arg_superset] = Qnil;
1341 args[charset_arg_unify_map] = Qnil;
1343 plist[0] = intern_c_string (":name");
1344 plist[1] = args[charset_arg_name];
1345 plist[2] = intern_c_string (":dimension");
1346 plist[3] = args[charset_arg_dimension];
1347 plist[4] = intern_c_string (":code-space");
1348 plist[5] = args[charset_arg_code_space];
1349 plist[6] = intern_c_string (":iso-final-char");
1350 plist[7] = args[charset_arg_iso_final];
1351 plist[8] = intern_c_string (":emacs-mule-id");
1352 plist[9] = args[charset_arg_emacs_mule_id];
1353 plist[10] = intern_c_string (":ascii-compatible-p");
1354 plist[11] = args[charset_arg_ascii_compatible_p];
1355 plist[12] = intern_c_string (":code-offset");
1356 plist[13] = args[charset_arg_code_offset];
1358 args[charset_arg_plist] = Flist (14, plist);
1359 Fdefine_charset_internal (charset_arg_max, args);
1361 return XINT (CHARSET_SYMBOL_ID (name));
1365 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1366 Sdefine_charset_alias, 2, 2, 0,
1367 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1368 (alias, charset)
1369 Lisp_Object alias, charset;
1371 Lisp_Object attr;
1373 CHECK_CHARSET_GET_ATTR (charset, attr);
1374 Fputhash (alias, attr, Vcharset_hash_table);
1375 Vcharset_list = Fcons (alias, Vcharset_list);
1376 return Qnil;
1380 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1381 doc: /* Return the property list of CHARSET. */)
1382 (charset)
1383 Lisp_Object charset;
1385 Lisp_Object attrs;
1387 CHECK_CHARSET_GET_ATTR (charset, attrs);
1388 return CHARSET_ATTR_PLIST (attrs);
1392 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1393 doc: /* Set CHARSET's property list to PLIST. */)
1394 (charset, plist)
1395 Lisp_Object charset, plist;
1397 Lisp_Object attrs;
1399 CHECK_CHARSET_GET_ATTR (charset, attrs);
1400 CHARSET_ATTR_PLIST (attrs) = plist;
1401 return plist;
1405 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1406 doc: /* Unify characters of CHARSET with Unicode.
1407 This means reading the relevant file and installing the table defined
1408 by CHARSET's `:unify-map' property.
1410 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1411 the same meaning as the `:unify-map' attribute in the function
1412 `define-charset' (which see).
1414 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1415 (charset, unify_map, deunify)
1416 Lisp_Object charset, unify_map, deunify;
1418 int id;
1419 struct charset *cs;
1421 CHECK_CHARSET_GET_ID (charset, id);
1422 cs = CHARSET_FROM_ID (id);
1423 if (NILP (deunify)
1424 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1425 : ! CHARSET_UNIFIED_P (cs))
1426 return Qnil;
1428 CHARSET_UNIFIED_P (cs) = 0;
1429 if (NILP (deunify))
1431 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1432 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1433 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1434 if (NILP (unify_map))
1435 unify_map = CHARSET_UNIFY_MAP (cs);
1436 else
1438 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1439 signal_error ("Bad unify-map", unify_map);
1440 CHARSET_UNIFY_MAP (cs) = unify_map;
1442 if (NILP (Vchar_unify_table))
1443 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1444 char_table_set_range (Vchar_unify_table,
1445 cs->min_char, cs->max_char, charset);
1446 CHARSET_UNIFIED_P (cs) = 1;
1448 else if (CHAR_TABLE_P (Vchar_unify_table))
1450 int min_code = CHARSET_MIN_CODE (cs);
1451 int max_code = CHARSET_MAX_CODE (cs);
1452 int min_char = DECODE_CHAR (cs, min_code);
1453 int max_char = DECODE_CHAR (cs, max_code);
1455 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1458 return Qnil;
1461 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1462 Sget_unused_iso_final_char, 2, 2, 0,
1463 doc: /*
1464 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1465 DIMENSION is the number of bytes to represent a character: 1 or 2.
1466 CHARS is the number of characters in a dimension: 94 or 96.
1468 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1469 If there's no unused final char for the specified kind of charset,
1470 return nil. */)
1471 (dimension, chars)
1472 Lisp_Object dimension, chars;
1474 int final_char;
1476 CHECK_NUMBER (dimension);
1477 CHECK_NUMBER (chars);
1478 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1479 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1480 if (XINT (chars) != 94 && XINT (chars) != 96)
1481 args_out_of_range_3 (chars, make_number (94), make_number (96));
1482 for (final_char = '0'; final_char <= '?'; final_char++)
1483 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1484 break;
1485 return (final_char <= '?' ? make_number (final_char) : Qnil);
1488 static void
1489 check_iso_charset_parameter (dimension, chars, final_char)
1490 Lisp_Object dimension, chars, final_char;
1492 CHECK_NATNUM (dimension);
1493 CHECK_NATNUM (chars);
1494 CHECK_NATNUM (final_char);
1496 if (XINT (dimension) > 3)
1497 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1498 if (XINT (chars) != 94 && XINT (chars) != 96)
1499 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1500 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1501 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1505 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1506 4, 4, 0,
1507 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1509 On decoding by an ISO-2022 base coding system, when a charset
1510 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1511 if CHARSET is designated instead. */)
1512 (dimension, chars, final_char, charset)
1513 Lisp_Object dimension, chars, final_char, charset;
1515 int id;
1516 int chars_flag;
1518 CHECK_CHARSET_GET_ID (charset, id);
1519 check_iso_charset_parameter (dimension, chars, final_char);
1520 chars_flag = XINT (chars) == 96;
1521 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1522 return Qnil;
1526 /* Return information about charsets in the text at PTR of NBYTES
1527 bytes, which are NCHARS characters. The value is:
1529 0: Each character is represented by one byte. This is always
1530 true for a unibyte string. For a multibyte string, true if
1531 it contains only ASCII characters.
1533 1: No charsets other than ascii, control-1, and latin-1 are
1534 found.
1536 2: Otherwise.
1540 string_xstring_p (string)
1541 Lisp_Object string;
1543 const unsigned char *p = SDATA (string);
1544 const unsigned char *endp = p + SBYTES (string);
1546 if (SCHARS (string) == SBYTES (string))
1547 return 0;
1549 while (p < endp)
1551 int c = STRING_CHAR_ADVANCE (p);
1553 if (c >= 0x100)
1554 return 2;
1556 return 1;
1560 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1562 CHARSETS is a vector. If Nth element is non-nil, it means the
1563 charset whose id is N is already found.
1565 It may lookup a translation table TABLE if supplied. */
1567 static void
1568 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1569 const unsigned char *ptr;
1570 EMACS_INT nchars, nbytes;
1571 Lisp_Object charsets, table;
1572 int multibyte;
1574 const unsigned char *pend = ptr + nbytes;
1576 if (nchars == nbytes)
1578 if (multibyte)
1579 ASET (charsets, charset_ascii, Qt);
1580 else
1581 while (ptr < pend)
1583 int c = *ptr++;
1585 if (!NILP (table))
1586 c = translate_char (table, c);
1587 if (ASCII_BYTE_P (c))
1588 ASET (charsets, charset_ascii, Qt);
1589 else
1590 ASET (charsets, charset_eight_bit, Qt);
1593 else
1595 while (ptr < pend)
1597 int c = STRING_CHAR_ADVANCE (ptr);
1598 struct charset *charset;
1600 if (!NILP (table))
1601 c = translate_char (table, c);
1602 charset = CHAR_CHARSET (c);
1603 ASET (charsets, CHARSET_ID (charset), Qt);
1608 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1609 2, 3, 0,
1610 doc: /* Return a list of charsets in the region between BEG and END.
1611 BEG and END are buffer positions.
1612 Optional arg TABLE if non-nil is a translation table to look up.
1614 If the current buffer is unibyte, the returned list may contain
1615 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1616 (beg, end, table)
1617 Lisp_Object beg, end, table;
1619 Lisp_Object charsets;
1620 EMACS_INT from, from_byte, to, stop, stop_byte;
1621 int i;
1622 Lisp_Object val;
1623 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1625 validate_region (&beg, &end);
1626 from = XFASTINT (beg);
1627 stop = to = XFASTINT (end);
1629 if (from < GPT && GPT < to)
1631 stop = GPT;
1632 stop_byte = GPT_BYTE;
1634 else
1635 stop_byte = CHAR_TO_BYTE (stop);
1637 from_byte = CHAR_TO_BYTE (from);
1639 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1640 while (1)
1642 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1643 stop_byte - from_byte, charsets, table,
1644 multibyte);
1645 if (stop < to)
1647 from = stop, from_byte = stop_byte;
1648 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1650 else
1651 break;
1654 val = Qnil;
1655 for (i = charset_table_used - 1; i >= 0; i--)
1656 if (!NILP (AREF (charsets, i)))
1657 val = Fcons (CHARSET_NAME (charset_table + i), val);
1658 return val;
1661 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1662 1, 2, 0,
1663 doc: /* Return a list of charsets in STR.
1664 Optional arg TABLE if non-nil is a translation table to look up.
1666 If STR is unibyte, the returned list may contain
1667 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1668 (str, table)
1669 Lisp_Object str, table;
1671 Lisp_Object charsets;
1672 int i;
1673 Lisp_Object val;
1675 CHECK_STRING (str);
1677 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1678 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1679 charsets, table,
1680 STRING_MULTIBYTE (str));
1681 val = Qnil;
1682 for (i = charset_table_used - 1; i >= 0; i--)
1683 if (!NILP (AREF (charsets, i)))
1684 val = Fcons (CHARSET_NAME (charset_table + i), val);
1685 return val;
1690 /* Return a unified character code for C (>= 0x110000). VAL is a
1691 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1692 charset symbol. */
1694 maybe_unify_char (c, val)
1695 int c;
1696 Lisp_Object val;
1698 struct charset *charset;
1700 if (INTEGERP (val))
1701 return XINT (val);
1702 if (NILP (val))
1703 return c;
1705 CHECK_CHARSET_GET_CHARSET (val, charset);
1706 load_charset (charset, 1);
1707 if (! inhibit_load_charset_map)
1709 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1710 if (! NILP (val))
1711 c = XINT (val);
1713 else
1715 int code_index = c - CHARSET_CODE_OFFSET (charset);
1716 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1718 if (unified > 0)
1719 c = unified;
1721 return c;
1725 /* Return a character correponding to the code-point CODE of
1726 CHARSET. */
1729 decode_char (charset, code)
1730 struct charset *charset;
1731 unsigned code;
1733 int c, char_index;
1734 enum charset_method method = CHARSET_METHOD (charset);
1736 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1737 return -1;
1739 if (method == CHARSET_METHOD_SUBSET)
1741 Lisp_Object subset_info;
1743 subset_info = CHARSET_SUBSET (charset);
1744 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1745 code -= XINT (AREF (subset_info, 3));
1746 if (code < XFASTINT (AREF (subset_info, 1))
1747 || code > XFASTINT (AREF (subset_info, 2)))
1748 c = -1;
1749 else
1750 c = DECODE_CHAR (charset, code);
1752 else if (method == CHARSET_METHOD_SUPERSET)
1754 Lisp_Object parents;
1756 parents = CHARSET_SUPERSET (charset);
1757 c = -1;
1758 for (; CONSP (parents); parents = XCDR (parents))
1760 int id = XINT (XCAR (XCAR (parents)));
1761 int code_offset = XINT (XCDR (XCAR (parents)));
1762 unsigned this_code = code - code_offset;
1764 charset = CHARSET_FROM_ID (id);
1765 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1766 break;
1769 else
1771 char_index = CODE_POINT_TO_INDEX (charset, code);
1772 if (char_index < 0)
1773 return -1;
1775 if (method == CHARSET_METHOD_MAP)
1777 Lisp_Object decoder;
1779 decoder = CHARSET_DECODER (charset);
1780 if (! VECTORP (decoder))
1782 load_charset (charset, 1);
1783 decoder = CHARSET_DECODER (charset);
1785 if (VECTORP (decoder))
1786 c = XINT (AREF (decoder, char_index));
1787 else
1788 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1790 else /* method == CHARSET_METHOD_OFFSET */
1792 c = char_index + CHARSET_CODE_OFFSET (charset);
1793 if (CHARSET_UNIFIED_P (charset)
1794 && c > MAX_UNICODE_CHAR)
1795 MAYBE_UNIFY_CHAR (c);
1799 return c;
1802 /* Variable used temporarily by the macro ENCODE_CHAR. */
1803 Lisp_Object charset_work;
1805 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1806 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1807 use CHARSET's strict_max_char instead of max_char. */
1809 unsigned
1810 encode_char (charset, c)
1811 struct charset *charset;
1812 int c;
1814 unsigned code;
1815 enum charset_method method = CHARSET_METHOD (charset);
1817 if (CHARSET_UNIFIED_P (charset))
1819 Lisp_Object deunifier;
1820 int code_index = -1;
1822 deunifier = CHARSET_DEUNIFIER (charset);
1823 if (! CHAR_TABLE_P (deunifier))
1825 load_charset (charset, 2);
1826 deunifier = CHARSET_DEUNIFIER (charset);
1828 if (CHAR_TABLE_P (deunifier))
1830 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1832 if (INTEGERP (deunified))
1833 code_index = XINT (deunified);
1835 else
1837 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1839 if (code_index >= 0)
1840 c = CHARSET_CODE_OFFSET (charset) + code_index;
1843 if (method == CHARSET_METHOD_SUBSET)
1845 Lisp_Object subset_info;
1846 struct charset *this_charset;
1848 subset_info = CHARSET_SUBSET (charset);
1849 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1850 code = ENCODE_CHAR (this_charset, c);
1851 if (code == CHARSET_INVALID_CODE (this_charset)
1852 || code < XFASTINT (AREF (subset_info, 1))
1853 || code > XFASTINT (AREF (subset_info, 2)))
1854 return CHARSET_INVALID_CODE (charset);
1855 code += XINT (AREF (subset_info, 3));
1856 return code;
1859 if (method == CHARSET_METHOD_SUPERSET)
1861 Lisp_Object parents;
1863 parents = CHARSET_SUPERSET (charset);
1864 for (; CONSP (parents); parents = XCDR (parents))
1866 int id = XINT (XCAR (XCAR (parents)));
1867 int code_offset = XINT (XCDR (XCAR (parents)));
1868 struct charset *this_charset = CHARSET_FROM_ID (id);
1870 code = ENCODE_CHAR (this_charset, c);
1871 if (code != CHARSET_INVALID_CODE (this_charset))
1872 return code + code_offset;
1874 return CHARSET_INVALID_CODE (charset);
1877 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1878 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1879 return CHARSET_INVALID_CODE (charset);
1881 if (method == CHARSET_METHOD_MAP)
1883 Lisp_Object encoder;
1884 Lisp_Object val;
1886 encoder = CHARSET_ENCODER (charset);
1887 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1889 load_charset (charset, 2);
1890 encoder = CHARSET_ENCODER (charset);
1892 if (CHAR_TABLE_P (encoder))
1894 val = CHAR_TABLE_REF (encoder, c);
1895 if (NILP (val))
1896 return CHARSET_INVALID_CODE (charset);
1897 code = XINT (val);
1898 if (! CHARSET_COMPACT_CODES_P (charset))
1899 code = INDEX_TO_CODE_POINT (charset, code);
1901 else
1903 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1904 code = INDEX_TO_CODE_POINT (charset, code);
1907 else /* method == CHARSET_METHOD_OFFSET */
1909 int code_index = c - CHARSET_CODE_OFFSET (charset);
1911 code = INDEX_TO_CODE_POINT (charset, code_index);
1914 return code;
1918 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1919 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1920 Return nil if CODE-POINT is not valid in CHARSET.
1922 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1924 Optional argument RESTRICTION specifies a way to map the pair of CCS
1925 and CODE-POINT to a character. Currently not supported and just ignored. */)
1926 (charset, code_point, restriction)
1927 Lisp_Object charset, code_point, restriction;
1929 int c, id;
1930 unsigned code;
1931 struct charset *charsetp;
1933 CHECK_CHARSET_GET_ID (charset, id);
1934 if (CONSP (code_point))
1936 CHECK_NATNUM_CAR (code_point);
1937 CHECK_NATNUM_CDR (code_point);
1938 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1940 else
1942 CHECK_NATNUM (code_point);
1943 code = XINT (code_point);
1945 charsetp = CHARSET_FROM_ID (id);
1946 c = DECODE_CHAR (charsetp, code);
1947 return (c >= 0 ? make_number (c) : Qnil);
1951 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1952 doc: /* Encode the character CH into a code-point of CHARSET.
1953 Return nil if CHARSET doesn't include CH.
1955 Optional argument RESTRICTION specifies a way to map CH to a
1956 code-point in CCS. Currently not supported and just ignored. */)
1957 (ch, charset, restriction)
1958 Lisp_Object ch, charset, restriction;
1960 int id;
1961 unsigned code;
1962 struct charset *charsetp;
1964 CHECK_CHARSET_GET_ID (charset, id);
1965 CHECK_NATNUM (ch);
1966 charsetp = CHARSET_FROM_ID (id);
1967 code = ENCODE_CHAR (charsetp, XINT (ch));
1968 if (code == CHARSET_INVALID_CODE (charsetp))
1969 return Qnil;
1970 if (code > 0x7FFFFFF)
1971 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1972 return make_number (code);
1976 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1977 doc:
1978 /* Return a character of CHARSET whose position codes are CODEn.
1980 CODE1 through CODE4 are optional, but if you don't supply sufficient
1981 position codes, it is assumed that the minimum code in each dimension
1982 is specified. */)
1983 (charset, code1, code2, code3, code4)
1984 Lisp_Object charset, code1, code2, code3, code4;
1986 int id, dimension;
1987 struct charset *charsetp;
1988 unsigned code;
1989 int c;
1991 CHECK_CHARSET_GET_ID (charset, id);
1992 charsetp = CHARSET_FROM_ID (id);
1994 dimension = CHARSET_DIMENSION (charsetp);
1995 if (NILP (code1))
1996 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1997 ? 0 : CHARSET_MIN_CODE (charsetp));
1998 else
2000 CHECK_NATNUM (code1);
2001 if (XFASTINT (code1) >= 0x100)
2002 args_out_of_range (make_number (0xFF), code1);
2003 code = XFASTINT (code1);
2005 if (dimension > 1)
2007 code <<= 8;
2008 if (NILP (code2))
2009 code |= charsetp->code_space[(dimension - 2) * 4];
2010 else
2012 CHECK_NATNUM (code2);
2013 if (XFASTINT (code2) >= 0x100)
2014 args_out_of_range (make_number (0xFF), code2);
2015 code |= XFASTINT (code2);
2018 if (dimension > 2)
2020 code <<= 8;
2021 if (NILP (code3))
2022 code |= charsetp->code_space[(dimension - 3) * 4];
2023 else
2025 CHECK_NATNUM (code3);
2026 if (XFASTINT (code3) >= 0x100)
2027 args_out_of_range (make_number (0xFF), code3);
2028 code |= XFASTINT (code3);
2031 if (dimension > 3)
2033 code <<= 8;
2034 if (NILP (code4))
2035 code |= charsetp->code_space[0];
2036 else
2038 CHECK_NATNUM (code4);
2039 if (XFASTINT (code4) >= 0x100)
2040 args_out_of_range (make_number (0xFF), code4);
2041 code |= XFASTINT (code4);
2048 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2049 code &= 0x7F7F7F7F;
2050 c = DECODE_CHAR (charsetp, code);
2051 if (c < 0)
2052 error ("Invalid code(s)");
2053 return make_number (c);
2057 /* Return the first charset in CHARSET_LIST that contains C.
2058 CHARSET_LIST is a list of charset IDs. If it is nil, use
2059 Vcharset_ordered_list. */
2061 struct charset *
2062 char_charset (c, charset_list, code_return)
2063 int c;
2064 Lisp_Object charset_list;
2065 unsigned *code_return;
2067 int maybe_null = 0;
2069 if (NILP (charset_list))
2070 charset_list = Vcharset_ordered_list;
2071 else
2072 maybe_null = 1;
2074 while (CONSP (charset_list))
2076 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2077 unsigned code = ENCODE_CHAR (charset, c);
2079 if (code != CHARSET_INVALID_CODE (charset))
2081 if (code_return)
2082 *code_return = code;
2083 return charset;
2085 charset_list = XCDR (charset_list);
2086 if (c <= MAX_UNICODE_CHAR
2087 && EQ (charset_list, Vcharset_non_preferred_head))
2088 return CHARSET_FROM_ID (charset_unicode);
2090 return (maybe_null ? NULL
2091 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2092 : CHARSET_FROM_ID (charset_eight_bit));
2096 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2097 doc:
2098 /*Return list of charset and one to four position-codes of CH.
2099 The charset is decided by the current priority order of charsets.
2100 A position-code is a byte value of each dimension of the code-point of
2101 CH in the charset. */)
2102 (ch)
2103 Lisp_Object ch;
2105 struct charset *charset;
2106 int c, dimension;
2107 unsigned code;
2108 Lisp_Object val;
2110 CHECK_CHARACTER (ch);
2111 c = XFASTINT (ch);
2112 charset = CHAR_CHARSET (c);
2113 if (! charset)
2114 abort ();
2115 code = ENCODE_CHAR (charset, c);
2116 if (code == CHARSET_INVALID_CODE (charset))
2117 abort ();
2118 dimension = CHARSET_DIMENSION (charset);
2119 for (val = Qnil; dimension > 0; dimension--)
2121 val = Fcons (make_number (code & 0xFF), val);
2122 code >>= 8;
2124 return Fcons (CHARSET_NAME (charset), val);
2128 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2129 doc: /* Return the charset of highest priority that contains CH.
2130 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2131 from which to find the charset. It may also be a coding system. In
2132 that case, find the charset from what supported by that coding system. */)
2133 (ch, restriction)
2134 Lisp_Object ch, restriction;
2136 struct charset *charset;
2138 CHECK_CHARACTER (ch);
2139 if (NILP (restriction))
2140 charset = CHAR_CHARSET (XINT (ch));
2141 else
2143 if (CONSP (restriction))
2145 int c = XFASTINT (ch);
2147 for (; CONSP (restriction); restriction = XCDR (restriction))
2149 struct charset *charset;
2151 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), charset);
2152 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
2153 return XCAR (restriction);
2155 return Qnil;
2157 restriction = coding_system_charset_list (restriction);
2158 charset = char_charset (XINT (ch), restriction, NULL);
2159 if (! charset)
2160 return Qnil;
2162 return (CHARSET_NAME (charset));
2166 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2167 doc: /*
2168 Return charset of a character in the current buffer at position POS.
2169 If POS is nil, it defauls to the current point.
2170 If POS is out of range, the value is nil. */)
2171 (pos)
2172 Lisp_Object pos;
2174 Lisp_Object ch;
2175 struct charset *charset;
2177 ch = Fchar_after (pos);
2178 if (! INTEGERP (ch))
2179 return ch;
2180 charset = CHAR_CHARSET (XINT (ch));
2181 return (CHARSET_NAME (charset));
2185 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2186 doc: /*
2187 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2189 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2190 by their DIMENSION, CHARS, and FINAL-CHAR,
2191 whereas Emacs distinguishes them by charset symbol.
2192 See the documentation of the function `charset-info' for the meanings of
2193 DIMENSION, CHARS, and FINAL-CHAR. */)
2194 (dimension, chars, final_char)
2195 Lisp_Object dimension, chars, final_char;
2197 int id;
2198 int chars_flag;
2200 check_iso_charset_parameter (dimension, chars, final_char);
2201 chars_flag = XFASTINT (chars) == 96;
2202 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2203 XFASTINT (final_char));
2204 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2208 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2209 0, 0, 0,
2210 doc: /*
2211 Internal use only.
2212 Clear temporary charset mapping tables.
2213 It should be called only from temacs invoked for dumping. */)
2216 if (temp_charset_work)
2218 free (temp_charset_work);
2219 temp_charset_work = NULL;
2222 if (CHAR_TABLE_P (Vchar_unify_table))
2223 Foptimize_char_table (Vchar_unify_table, Qnil);
2225 return Qnil;
2228 DEFUN ("charset-priority-list", Fcharset_priority_list,
2229 Scharset_priority_list, 0, 1, 0,
2230 doc: /* Return the list of charsets ordered by priority.
2231 HIGHESTP non-nil means just return the highest priority one. */)
2232 (highestp)
2233 Lisp_Object highestp;
2235 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2237 if (!NILP (highestp))
2238 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2240 while (!NILP (list))
2242 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2243 list = XCDR (list);
2245 return Fnreverse (val);
2248 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2249 1, MANY, 0,
2250 doc: /* Assign higher priority to the charsets given as arguments.
2251 usage: (set-charset-priority &rest charsets) */)
2252 (nargs, args)
2253 int nargs;
2254 Lisp_Object *args;
2256 Lisp_Object new_head, old_list, arglist[2];
2257 Lisp_Object list_2022, list_emacs_mule;
2258 int i, id;
2260 old_list = Fcopy_sequence (Vcharset_ordered_list);
2261 new_head = Qnil;
2262 for (i = 0; i < nargs; i++)
2264 CHECK_CHARSET_GET_ID (args[i], id);
2265 if (! NILP (Fmemq (make_number (id), old_list)))
2267 old_list = Fdelq (make_number (id), old_list);
2268 new_head = Fcons (make_number (id), new_head);
2271 arglist[0] = Fnreverse (new_head);
2272 arglist[1] = Vcharset_non_preferred_head = old_list;
2273 Vcharset_ordered_list = Fnconc (2, arglist);
2274 charset_ordered_list_tick++;
2276 charset_unibyte = -1;
2277 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2278 CONSP (old_list); old_list = XCDR (old_list))
2280 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2281 list_2022 = Fcons (XCAR (old_list), list_2022);
2282 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2283 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2284 if (charset_unibyte < 0)
2286 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2288 if (CHARSET_DIMENSION (charset) == 1
2289 && CHARSET_ASCII_COMPATIBLE_P (charset)
2290 && CHARSET_MAX_CHAR (charset) >= 0x80)
2291 charset_unibyte = CHARSET_ID (charset);
2294 Viso_2022_charset_list = Fnreverse (list_2022);
2295 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2296 if (charset_unibyte < 0)
2297 charset_unibyte = charset_iso_8859_1;
2299 return Qnil;
2302 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2303 0, 1, 0,
2304 doc: /* Internal use only.
2305 Return charset identification number of CHARSET. */)
2306 (charset)
2307 Lisp_Object charset;
2309 int id;
2311 CHECK_CHARSET_GET_ID (charset, id);
2312 return make_number (id);
2315 struct charset_sort_data
2317 Lisp_Object charset;
2318 int id;
2319 int priority;
2322 static int
2323 charset_compare (const void *d1, const void *d2)
2325 const struct charset_sort_data *data1 = d1, *data2 = d2;
2326 return (data1->priority - data2->priority);
2329 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2330 doc: /* Sort charset list CHARSETS by a priority of each charset.
2331 Return the sorted list. CHARSETS is modified by side effects.
2332 See also `charset-priority-list' and `set-charset-priority'. */)
2333 (Lisp_Object charsets)
2335 Lisp_Object len = Flength (charsets);
2336 int n = XFASTINT (len), i, j, done;
2337 Lisp_Object tail, elt, attrs;
2338 struct charset_sort_data *sort_data;
2339 int id, min_id, max_id;
2340 USE_SAFE_ALLOCA;
2342 if (n == 0)
2343 return Qnil;
2344 SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
2345 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2347 elt = XCAR (tail);
2348 CHECK_CHARSET_GET_ATTR (elt, attrs);
2349 sort_data[i].charset = elt;
2350 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
2351 if (i == 0)
2352 min_id = max_id = id;
2353 else if (id < min_id)
2354 min_id = id;
2355 else if (id > max_id)
2356 max_id = id;
2358 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2359 done < n && CONSP (tail); tail = XCDR (tail), i++)
2361 elt = XCAR (tail);
2362 id = XFASTINT (elt);
2363 if (id >= min_id && id <= max_id)
2364 for (j = 0; j < n; j++)
2365 if (sort_data[j].id == id)
2367 sort_data[j].priority = i;
2368 done++;
2371 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2372 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2373 XSETCAR (tail, sort_data[i].charset);
2374 SAFE_FREE ();
2375 return charsets;
2379 void
2380 init_charset ()
2382 Lisp_Object tempdir;
2383 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2384 if (access ((char *) SDATA (tempdir), 0) < 0)
2386 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2387 Emacs will not function correctly without the character map files.\n\
2388 Please check your installation!\n",
2389 tempdir);
2390 /* TODO should this be a fatal error? (Bug#909) */
2393 Vcharset_map_path = Fcons (tempdir, Qnil);
2397 void
2398 init_charset_once ()
2400 int i, j, k;
2402 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2403 for (j = 0; j < ISO_MAX_CHARS; j++)
2404 for (k = 0; k < ISO_MAX_FINAL; k++)
2405 iso_charset_table[i][j][k] = -1;
2407 for (i = 0; i < 256; i++)
2408 emacs_mule_charset[i] = -1;
2410 charset_jisx0201_roman = -1;
2411 charset_jisx0208_1978 = -1;
2412 charset_jisx0208 = -1;
2413 charset_ksc5601 = -1;
2416 #ifdef emacs
2418 void
2419 syms_of_charset ()
2421 DEFSYM (Qcharsetp, "charsetp");
2423 DEFSYM (Qascii, "ascii");
2424 DEFSYM (Qunicode, "unicode");
2425 DEFSYM (Qemacs, "emacs");
2426 DEFSYM (Qeight_bit, "eight-bit");
2427 DEFSYM (Qiso_8859_1, "iso-8859-1");
2429 DEFSYM (Qgl, "gl");
2430 DEFSYM (Qgr, "gr");
2432 staticpro (&Vcharset_ordered_list);
2433 Vcharset_ordered_list = Qnil;
2435 staticpro (&Viso_2022_charset_list);
2436 Viso_2022_charset_list = Qnil;
2438 staticpro (&Vemacs_mule_charset_list);
2439 Vemacs_mule_charset_list = Qnil;
2441 /* Don't staticpro them here. It's done in syms_of_fns. */
2442 QCtest = intern (":test");
2443 Qeq = intern ("eq");
2445 staticpro (&Vcharset_hash_table);
2447 Lisp_Object args[2];
2448 args[0] = QCtest;
2449 args[1] = Qeq;
2450 Vcharset_hash_table = Fmake_hash_table (2, args);
2453 charset_table_size = 128;
2454 charset_table = ((struct charset *)
2455 xmalloc (sizeof (struct charset) * charset_table_size));
2456 charset_table_used = 0;
2458 defsubr (&Scharsetp);
2459 defsubr (&Smap_charset_chars);
2460 defsubr (&Sdefine_charset_internal);
2461 defsubr (&Sdefine_charset_alias);
2462 defsubr (&Scharset_plist);
2463 defsubr (&Sset_charset_plist);
2464 defsubr (&Sunify_charset);
2465 defsubr (&Sget_unused_iso_final_char);
2466 defsubr (&Sdeclare_equiv_charset);
2467 defsubr (&Sfind_charset_region);
2468 defsubr (&Sfind_charset_string);
2469 defsubr (&Sdecode_char);
2470 defsubr (&Sencode_char);
2471 defsubr (&Ssplit_char);
2472 defsubr (&Smake_char);
2473 defsubr (&Schar_charset);
2474 defsubr (&Scharset_after);
2475 defsubr (&Siso_charset);
2476 defsubr (&Sclear_charset_maps);
2477 defsubr (&Scharset_priority_list);
2478 defsubr (&Sset_charset_priority);
2479 defsubr (&Scharset_id_internal);
2480 defsubr (&Ssort_charsets);
2482 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2483 doc: /* *List of directories to search for charset map files. */);
2484 Vcharset_map_path = Qnil;
2486 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2487 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2488 inhibit_load_charset_map = 0;
2490 DEFVAR_LISP ("charset-list", &Vcharset_list,
2491 doc: /* List of all charsets ever defined. */);
2492 Vcharset_list = Qnil;
2494 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2495 doc: /* ISO639 language mnemonic symbol for the current language environment.
2496 If the current language environment is for multiple languages (e.g. "Latin-1"),
2497 the value may be a list of mnemonics. */);
2498 Vcurrent_iso639_language = Qnil;
2500 charset_ascii
2501 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2502 0, 127, 'B', -1, 0, 1, 0, 0);
2503 charset_iso_8859_1
2504 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2505 0, 255, -1, -1, -1, 1, 0, 0);
2506 charset_unicode
2507 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2508 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2509 charset_emacs
2510 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2511 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2512 charset_eight_bit
2513 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2514 128, 255, -1, 0, -1, 0, 1,
2515 MAX_5_BYTE_CHAR + 1);
2516 charset_unibyte = charset_iso_8859_1;
2519 #endif /* emacs */
2521 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2522 (do not change this comment) */