Fix permissions handling (CVE-2010-0825).
[emacs.git] / src / charset.c
blob125c91316871c569c135729744bb126641073d80
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
28 #include <config.h>
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include <setjmp.h>
35 #include "lisp.h"
36 #include "character.h"
37 #include "charset.h"
38 #include "coding.h"
39 #include "disptab.h"
40 #include "buffer.h"
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
57 /* List of all charsets. This variable is used only from Emacs
58 Lisp. */
59 Lisp_Object Vcharset_list;
61 /* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63 Lisp_Object Vcharset_hash_table;
65 /* Table of struct charset. */
66 struct charset *charset_table;
68 static int charset_table_size;
69 static int charset_table_used;
71 Lisp_Object Qcharsetp;
73 /* Special charset symbols. */
74 Lisp_Object Qascii;
75 Lisp_Object Qeight_bit;
76 Lisp_Object Qiso_8859_1;
77 Lisp_Object Qunicode;
78 Lisp_Object Qemacs;
80 /* The corresponding charsets. */
81 int charset_ascii;
82 int charset_eight_bit;
83 int charset_iso_8859_1;
84 int charset_unicode;
85 int charset_emacs;
87 /* The other special charsets. */
88 int charset_jisx0201_roman;
89 int charset_jisx0208_1978;
90 int charset_jisx0208;
91 int charset_ksc5601;
93 /* Value of charset attribute `charset-iso-plane'. */
94 Lisp_Object Qgl, Qgr;
96 /* Charset of unibyte characters. */
97 int charset_unibyte;
99 /* List of charsets ordered by the priority. */
100 Lisp_Object Vcharset_ordered_list;
102 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
103 charsets. */
104 Lisp_Object Vcharset_non_preferred_head;
106 /* Incremented everytime we change Vcharset_ordered_list. This is
107 unsigned short so that it fits in Lisp_Int and never matches
108 -1. */
109 unsigned short charset_ordered_list_tick;
111 /* List of iso-2022 charsets. */
112 Lisp_Object Viso_2022_charset_list;
114 /* List of emacs-mule charsets. */
115 Lisp_Object Vemacs_mule_charset_list;
117 struct charset *emacs_mule_charset[256];
119 /* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
121 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
123 Lisp_Object Vcharset_map_path;
125 /* If nonzero, don't load charset maps. */
126 int inhibit_load_charset_map;
128 Lisp_Object Vcurrent_iso639_language;
130 /* Defined in chartab.c */
131 extern void
132 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
133 Lisp_Object function, Lisp_Object table,
134 Lisp_Object arg, struct charset *charset,
135 unsigned from, unsigned to));
137 #define CODE_POINT_TO_INDEX(charset, code) \
138 ((charset)->code_linear_p \
139 ? (code) - (charset)->min_code \
140 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
141 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
142 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
143 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
144 ? (((((code) >> 24) - (charset)->code_space[12]) \
145 * (charset)->code_space[11]) \
146 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
147 * (charset)->code_space[7]) \
148 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
149 * (charset)->code_space[3]) \
150 + (((code) & 0xFF) - (charset)->code_space[0]) \
151 - ((charset)->char_index_offset)) \
152 : -1)
155 /* Convert the character index IDX to code-point CODE for CHARSET.
156 It is assumed that IDX is in a valid range. */
158 #define INDEX_TO_CODE_POINT(charset, idx) \
159 ((charset)->code_linear_p \
160 ? (idx) + (charset)->min_code \
161 : (idx += (charset)->char_index_offset, \
162 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
163 | (((charset)->code_space[4] \
164 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
165 << 8) \
166 | (((charset)->code_space[8] \
167 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
168 << 16) \
169 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
170 << 24))))
172 /* Structure to hold mapping tables for a charset. Used by temacs
173 invoked for dumping. */
175 static struct
177 /* The current charset for which the following tables are setup. */
178 struct charset *current;
180 /* 1 iff the following table is used for encoder. */
181 short for_encoder;
183 /* When the following table is used for encoding, mininum and
184 maxinum character of the current charset. */
185 int min_char, max_char;
187 /* A Unicode character correspoinding to the code indice 0 (i.e. the
188 minimum code-point) of the current charset, or -1 if the code
189 indice 0 is not a Unicode character. This is checked when
190 table.encoder[CHAR] is zero. */
191 int zero_index_char;
193 union {
194 /* Table mapping code-indices (not code-points) of the current
195 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
196 doesn't belong to the current charset. */
197 int decoder[0x10000];
198 /* Table mapping Unicode characters to code-indices of the current
199 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
200 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
201 (0x20000..0x2FFFF). Note that there is no charset map that
202 uses both SMP and SIP. */
203 unsigned short encoder[0x20000];
204 } table;
205 } *temp_charset_work;
207 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
208 do { \
209 if ((CODE) == 0) \
210 temp_charset_work->zero_index_char = (C); \
211 else if ((C) < 0x20000) \
212 temp_charset_work->table.encoder[(C)] = (CODE); \
213 else \
214 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
215 } while (0)
217 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
218 ((C) == temp_charset_work->zero_index_char ? 0 \
219 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
220 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
221 : temp_charset_work->table.encoder[(C) - 0x10000] \
222 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
224 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
225 (temp_charset_work->table.decoder[(CODE)] = (C))
227 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
228 (temp_charset_work->table.decoder[(CODE)])
231 /* Set to 1 to warn that a charset map is loaded and thus a buffer
232 text and a string data may be relocated. */
233 int charset_map_loaded;
235 struct charset_map_entries
237 struct {
238 unsigned from, to;
239 int c;
240 } entry[0x10000];
241 struct charset_map_entries *next;
244 /* Load the mapping information of CHARSET from ENTRIES for
245 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
246 encoding (CONTROL_FLAG == 2).
248 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
249 and CHARSET->fast_map.
251 If CONTROL_FLAG is 1, setup the following tables according to
252 CHARSET->method and inhibit_load_charset_map.
254 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
260 If CONTROL_FLAG is 2, setup the following tables.
262 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
263 ----------------------+--------------------+---------------------------
264 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
265 ----------------------+--------------------+--------------------------
266 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
269 static void
270 load_charset_map (charset, entries, n_entries, control_flag)
271 struct charset *charset;
272 struct charset_map_entries *entries;
273 int n_entries;
274 int control_flag;
276 Lisp_Object vec, table;
277 unsigned max_code = CHARSET_MAX_CODE (charset);
278 int ascii_compatible_p = charset->ascii_compatible_p;
279 int min_char, max_char, nonascii_min_char;
280 int i;
281 unsigned char *fast_map = charset->fast_map;
283 if (n_entries <= 0)
284 return;
286 if (control_flag)
288 if (! inhibit_load_charset_map)
290 if (control_flag == 1)
292 if (charset->method == CHARSET_METHOD_MAP)
294 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
296 vec = CHARSET_DECODER (charset)
297 = Fmake_vector (make_number (n), make_number (-1));
299 else
301 char_table_set_range (Vchar_unify_table,
302 charset->min_char, charset->max_char,
303 Qnil);
306 else
308 table = Fmake_char_table (Qnil, Qnil);
309 if (charset->method == CHARSET_METHOD_MAP)
310 CHARSET_ENCODER (charset) = table;
311 else
312 CHARSET_DEUNIFIER (charset) = table;
315 else
317 if (! temp_charset_work)
318 temp_charset_work = malloc (sizeof (*temp_charset_work));
319 if (control_flag == 1)
321 memset (temp_charset_work->table.decoder, -1,
322 sizeof (int) * 0x10000);
324 else
326 memset (temp_charset_work->table.encoder, 0,
327 sizeof (unsigned short) * 0x20000);
328 temp_charset_work->zero_index_char = -1;
330 temp_charset_work->current = charset;
331 temp_charset_work->for_encoder = (control_flag == 2);
332 control_flag += 2;
334 charset_map_loaded = 1;
337 min_char = max_char = entries->entry[0].c;
338 nonascii_min_char = MAX_CHAR;
339 for (i = 0; i < n_entries; i++)
341 unsigned from, to;
342 int from_index, to_index;
343 int from_c, to_c;
344 int idx = i % 0x10000;
346 if (i > 0 && idx == 0)
347 entries = entries->next;
348 from = entries->entry[idx].from;
349 to = entries->entry[idx].to;
350 from_c = entries->entry[idx].c;
351 from_index = CODE_POINT_TO_INDEX (charset, from);
352 if (from == to)
354 to_index = from_index;
355 to_c = from_c;
357 else
359 to_index = CODE_POINT_TO_INDEX (charset, to);
360 to_c = from_c + (to_index - from_index);
362 if (from_index < 0 || to_index < 0)
363 continue;
365 if (to_c > max_char)
366 max_char = to_c;
367 else if (from_c < min_char)
368 min_char = from_c;
370 if (control_flag == 1)
372 if (charset->method == CHARSET_METHOD_MAP)
373 for (; from_index <= to_index; from_index++, from_c++)
374 ASET (vec, from_index, make_number (from_c));
375 else
376 for (; from_index <= to_index; from_index++, from_c++)
377 CHAR_TABLE_SET (Vchar_unify_table,
378 CHARSET_CODE_OFFSET (charset) + from_index,
379 make_number (from_c));
381 else if (control_flag == 2)
383 if (charset->method == CHARSET_METHOD_MAP
384 && CHARSET_COMPACT_CODES_P (charset))
385 for (; from_index <= to_index; from_index++, from_c++)
387 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
389 if (NILP (CHAR_TABLE_REF (table, from_c)))
390 CHAR_TABLE_SET (table, from_c, make_number (code));
392 else
393 for (; from_index <= to_index; from_index++, from_c++)
395 if (NILP (CHAR_TABLE_REF (table, from_c)))
396 CHAR_TABLE_SET (table, from_c, make_number (from_index));
399 else if (control_flag == 3)
400 for (; from_index <= to_index; from_index++, from_c++)
401 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
402 else if (control_flag == 4)
403 for (; from_index <= to_index; from_index++, from_c++)
404 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
405 else /* control_flag == 0 */
407 if (ascii_compatible_p)
409 if (! ASCII_BYTE_P (from_c))
411 if (from_c < nonascii_min_char)
412 nonascii_min_char = from_c;
414 else if (! ASCII_BYTE_P (to_c))
416 nonascii_min_char = 0x80;
420 for (; from_c <= to_c; from_c++)
421 CHARSET_FAST_MAP_SET (from_c, fast_map);
425 if (control_flag == 0)
427 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
428 ? nonascii_min_char : min_char);
429 CHARSET_MAX_CHAR (charset) = max_char;
431 else if (control_flag == 4)
433 temp_charset_work->min_char = min_char;
434 temp_charset_work->max_char = max_char;
439 /* Read a hexadecimal number (preceded by "0x") from the file FP while
440 paying attention to comment charcter '#'. */
442 static INLINE unsigned
443 read_hex (fp, eof)
444 FILE *fp;
445 int *eof;
447 int c;
448 unsigned n;
450 while ((c = getc (fp)) != EOF)
452 if (c == '#')
454 while ((c = getc (fp)) != EOF && c != '\n');
456 else if (c == '0')
458 if ((c = getc (fp)) == EOF || c == 'x')
459 break;
462 if (c == EOF)
464 *eof = 1;
465 return 0;
467 *eof = 0;
468 n = 0;
469 if (c == 'x')
470 while ((c = getc (fp)) != EOF && isxdigit (c))
471 n = ((n << 4)
472 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
473 else
474 while ((c = getc (fp)) != EOF && isdigit (c))
475 n = (n * 10) + c - '0';
476 if (c != EOF)
477 ungetc (c, fp);
478 return n;
481 extern Lisp_Object Qfile_name_handler_alist;
483 /* Return a mapping vector for CHARSET loaded from MAPFILE.
484 Each line of MAPFILE has this form
485 0xAAAA 0xCCCC
486 where 0xAAAA is a code-point and 0xCCCC is the corresponding
487 character code, or this form
488 0xAAAA-0xBBBB 0xCCCC
489 where 0xAAAA and 0xBBBB are code-points specifying a range, and
490 0xCCCC is the first character code of the range.
492 The returned vector has this form:
493 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
494 where CODE1 is a code-point or a cons of code-points specifying a
495 range.
497 Note that this function uses `openp' to open MAPFILE but ignores
498 `file-name-handler-alist' to avoid running any Lisp code. */
500 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
502 static void
503 load_charset_map_from_file (charset, mapfile, control_flag)
504 struct charset *charset;
505 Lisp_Object mapfile;
506 int control_flag;
508 unsigned min_code = CHARSET_MIN_CODE (charset);
509 unsigned max_code = CHARSET_MAX_CODE (charset);
510 int fd;
511 FILE *fp;
512 int eof;
513 Lisp_Object suffixes;
514 struct charset_map_entries *head, *entries;
515 int n_entries, count;
516 USE_SAFE_ALLOCA;
518 suffixes = Fcons (build_string (".map"),
519 Fcons (build_string (".TXT"), Qnil));
521 count = SPECPDL_INDEX ();
522 specbind (Qfile_name_handler_alist, Qnil);
523 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
524 unbind_to (count, Qnil);
525 if (fd < 0
526 || ! (fp = fdopen (fd, "r")))
527 error ("Failure in loading charset map: %S", SDATA (mapfile));
529 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
530 large (larger than MAX_ALLOCA). */
531 SAFE_ALLOCA (head, struct charset_map_entries *,
532 sizeof (struct charset_map_entries));
533 entries = head;
534 bzero (entries, sizeof (struct charset_map_entries));
536 n_entries = 0;
537 eof = 0;
538 while (1)
540 unsigned from, to;
541 int c;
542 int idx;
544 from = read_hex (fp, &eof);
545 if (eof)
546 break;
547 if (getc (fp) == '-')
548 to = read_hex (fp, &eof);
549 else
550 to = from;
551 c = (int) read_hex (fp, &eof);
553 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
554 continue;
556 if (n_entries > 0 && (n_entries % 0x10000) == 0)
558 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
559 sizeof (struct charset_map_entries));
560 entries = entries->next;
561 bzero (entries, sizeof (struct charset_map_entries));
563 idx = n_entries % 0x10000;
564 entries->entry[idx].from = from;
565 entries->entry[idx].to = to;
566 entries->entry[idx].c = c;
567 n_entries++;
569 fclose (fp);
570 close (fd);
572 load_charset_map (charset, head, n_entries, control_flag);
573 SAFE_FREE ();
576 static void
577 load_charset_map_from_vector (charset, vec, control_flag)
578 struct charset *charset;
579 Lisp_Object vec;
580 int control_flag;
582 unsigned min_code = CHARSET_MIN_CODE (charset);
583 unsigned max_code = CHARSET_MAX_CODE (charset);
584 struct charset_map_entries *head, *entries;
585 int n_entries;
586 int len = ASIZE (vec);
587 int i;
588 USE_SAFE_ALLOCA;
590 if (len % 2 == 1)
592 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
593 return;
596 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
597 large (larger than MAX_ALLOCA). */
598 SAFE_ALLOCA (head, struct charset_map_entries *,
599 sizeof (struct charset_map_entries));
600 entries = head;
601 bzero (entries, sizeof (struct charset_map_entries));
603 n_entries = 0;
604 for (i = 0; i < len; i += 2)
606 Lisp_Object val, val2;
607 unsigned from, to;
608 int c;
609 int idx;
611 val = AREF (vec, i);
612 if (CONSP (val))
614 val2 = XCDR (val);
615 val = XCAR (val);
616 CHECK_NATNUM (val);
617 CHECK_NATNUM (val2);
618 from = XFASTINT (val);
619 to = XFASTINT (val2);
621 else
623 CHECK_NATNUM (val);
624 from = to = XFASTINT (val);
626 val = AREF (vec, i + 1);
627 CHECK_NATNUM (val);
628 c = XFASTINT (val);
630 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
631 continue;
633 if (n_entries > 0 && (n_entries % 0x10000) == 0)
635 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
636 sizeof (struct charset_map_entries));
637 entries = entries->next;
638 bzero (entries, sizeof (struct charset_map_entries));
640 idx = n_entries % 0x10000;
641 entries->entry[idx].from = from;
642 entries->entry[idx].to = to;
643 entries->entry[idx].c = c;
644 n_entries++;
647 load_charset_map (charset, head, n_entries, control_flag);
648 SAFE_FREE ();
652 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
653 map it is (see the comment of load_charset_map for the detail). */
655 static void
656 load_charset (charset, control_flag)
657 struct charset *charset;
658 int control_flag;
660 Lisp_Object map;
662 if (inhibit_load_charset_map
663 && temp_charset_work
664 && charset == temp_charset_work->current
665 && ((control_flag == 2) == temp_charset_work->for_encoder))
666 return;
668 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
669 map = CHARSET_MAP (charset);
670 else if (CHARSET_UNIFIED_P (charset))
671 map = CHARSET_UNIFY_MAP (charset);
672 if (STRINGP (map))
673 load_charset_map_from_file (charset, map, control_flag);
674 else
675 load_charset_map_from_vector (charset, map, control_flag);
679 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
680 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
681 (object)
682 Lisp_Object object;
684 return (CHARSETP (object) ? Qt : Qnil);
688 void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
689 Lisp_Object function, Lisp_Object arg,
690 unsigned from, unsigned to));
692 void
693 map_charset_for_dump (c_function, function, arg, from, to)
694 void (*c_function) (Lisp_Object, Lisp_Object);
695 Lisp_Object function, arg;
696 unsigned from, to;
698 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
699 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
700 Lisp_Object range;
701 int c, stop;
702 struct gcpro gcpro1;
704 range = Fcons (Qnil, Qnil);
705 GCPRO1 (range);
707 c = temp_charset_work->min_char;
708 stop = (temp_charset_work->max_char < 0x20000
709 ? temp_charset_work->max_char : 0xFFFF);
711 while (1)
713 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
715 if (index >= from_idx && index <= to_idx)
717 if (NILP (XCAR (range)))
718 XSETCAR (range, make_number (c));
720 else if (! NILP (XCAR (range)))
722 XSETCDR (range, make_number (c - 1));
723 if (c_function)
724 (*c_function) (arg, range);
725 else
726 call2 (function, range, arg);
727 XSETCAR (range, Qnil);
729 if (c == stop)
731 if (c == temp_charset_work->max_char)
733 if (! NILP (XCAR (range)))
735 XSETCDR (range, make_number (c));
736 if (c_function)
737 (*c_function) (arg, range);
738 else
739 call2 (function, range, arg);
741 break;
743 c = 0x1FFFF;
744 stop = temp_charset_work->max_char;
746 c++;
748 UNGCPRO;
751 void
752 map_charset_chars (c_function, function, arg,
753 charset, from, to)
754 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
755 Lisp_Object function, arg;
756 struct charset *charset;
757 unsigned from, to;
759 Lisp_Object range;
760 int partial;
762 partial = (from > CHARSET_MIN_CODE (charset)
763 || to < CHARSET_MAX_CODE (charset));
765 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
767 int from_idx = CODE_POINT_TO_INDEX (charset, from);
768 int to_idx = CODE_POINT_TO_INDEX (charset, to);
769 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
770 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
772 if (CHARSET_UNIFIED_P (charset))
774 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
775 load_charset (charset, 2);
776 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
777 map_char_table_for_charset (c_function, function,
778 CHARSET_DEUNIFIER (charset), arg,
779 partial ? charset : NULL, from, to);
780 else
781 map_charset_for_dump (c_function, function, arg, from, to);
784 range = Fcons (make_number (from_c), make_number (to_c));
785 if (NILP (function))
786 (*c_function) (arg, range);
787 else
788 call2 (function, range, arg);
790 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
792 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
793 load_charset (charset, 2);
794 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
795 map_char_table_for_charset (c_function, function,
796 CHARSET_ENCODER (charset), arg,
797 partial ? charset : NULL, from, to);
798 else
799 map_charset_for_dump (c_function, function, arg, from, to);
801 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
803 Lisp_Object subset_info;
804 int offset;
806 subset_info = CHARSET_SUBSET (charset);
807 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
808 offset = XINT (AREF (subset_info, 3));
809 from -= offset;
810 if (from < XFASTINT (AREF (subset_info, 1)))
811 from = XFASTINT (AREF (subset_info, 1));
812 to -= offset;
813 if (to > XFASTINT (AREF (subset_info, 2)))
814 to = XFASTINT (AREF (subset_info, 2));
815 map_charset_chars (c_function, function, arg, charset, from, to);
817 else /* i.e. CHARSET_METHOD_SUPERSET */
819 Lisp_Object parents;
821 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
822 parents = XCDR (parents))
824 int offset;
825 unsigned this_from, this_to;
827 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
828 offset = XINT (XCDR (XCAR (parents)));
829 this_from = from > offset ? from - offset : 0;
830 this_to = to > offset ? to - offset : 0;
831 if (this_from < CHARSET_MIN_CODE (charset))
832 this_from = CHARSET_MIN_CODE (charset);
833 if (this_to > CHARSET_MAX_CODE (charset))
834 this_to = CHARSET_MAX_CODE (charset);
835 map_charset_chars (c_function, function, arg, charset,
836 this_from, this_to);
841 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
842 doc: /* Call FUNCTION for all characters in CHARSET.
843 FUNCTION is called with an argument RANGE and the optional 3rd
844 argument ARG.
846 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
847 characters contained in CHARSET.
849 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
850 range of code points (in CHARSET) of target characters. */)
851 (function, charset, arg, from_code, to_code)
852 Lisp_Object function, charset, arg, from_code, to_code;
854 struct charset *cs;
855 unsigned from, to;
857 CHECK_CHARSET_GET_CHARSET (charset, cs);
858 if (NILP (from_code))
859 from = CHARSET_MIN_CODE (cs);
860 else
862 CHECK_NATNUM (from_code);
863 from = XINT (from_code);
864 if (from < CHARSET_MIN_CODE (cs))
865 from = CHARSET_MIN_CODE (cs);
867 if (NILP (to_code))
868 to = CHARSET_MAX_CODE (cs);
869 else
871 CHECK_NATNUM (to_code);
872 to = XINT (to_code);
873 if (to > CHARSET_MAX_CODE (cs))
874 to = CHARSET_MAX_CODE (cs);
876 map_charset_chars (NULL, function, arg, cs, from, to);
877 return Qnil;
881 /* Define a charset according to the arguments. The Nth argument is
882 the Nth attribute of the charset (the last attribute `charset-id'
883 is not included). See the docstring of `define-charset' for the
884 detail. */
886 DEFUN ("define-charset-internal", Fdefine_charset_internal,
887 Sdefine_charset_internal, charset_arg_max, MANY, 0,
888 doc: /* For internal use only.
889 usage: (define-charset-internal ...) */)
890 (nargs, args)
891 int nargs;
892 Lisp_Object *args;
894 /* Charset attr vector. */
895 Lisp_Object attrs;
896 Lisp_Object val;
897 unsigned hash_code;
898 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
899 int i, j;
900 struct charset charset;
901 int id;
902 int dimension;
903 int new_definition_p;
904 int nchars;
906 if (nargs != charset_arg_max)
907 return Fsignal (Qwrong_number_of_arguments,
908 Fcons (intern ("define-charset-internal"),
909 make_number (nargs)));
911 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
913 CHECK_SYMBOL (args[charset_arg_name]);
914 ASET (attrs, charset_name, args[charset_arg_name]);
916 val = args[charset_arg_code_space];
917 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
919 int min_byte, max_byte;
921 min_byte = XINT (Faref (val, make_number (i * 2)));
922 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
923 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
924 error ("Invalid :code-space value");
925 charset.code_space[i * 4] = min_byte;
926 charset.code_space[i * 4 + 1] = max_byte;
927 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
928 nchars *= charset.code_space[i * 4 + 2];
929 charset.code_space[i * 4 + 3] = nchars;
930 if (max_byte > 0)
931 dimension = i + 1;
934 val = args[charset_arg_dimension];
935 if (NILP (val))
936 charset.dimension = dimension;
937 else
939 CHECK_NATNUM (val);
940 charset.dimension = XINT (val);
941 if (charset.dimension < 1 || charset.dimension > 4)
942 args_out_of_range_3 (val, make_number (1), make_number (4));
945 charset.code_linear_p
946 = (charset.dimension == 1
947 || (charset.code_space[2] == 256
948 && (charset.dimension == 2
949 || (charset.code_space[6] == 256
950 && (charset.dimension == 3
951 || charset.code_space[10] == 256)))));
953 if (! charset.code_linear_p)
955 charset.code_space_mask = (unsigned char *) xmalloc (256);
956 bzero (charset.code_space_mask, 256);
957 for (i = 0; i < 4; i++)
958 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
959 j++)
960 charset.code_space_mask[j] |= (1 << i);
963 charset.iso_chars_96 = charset.code_space[2] == 96;
965 charset.min_code = (charset.code_space[0]
966 | (charset.code_space[4] << 8)
967 | (charset.code_space[8] << 16)
968 | (charset.code_space[12] << 24));
969 charset.max_code = (charset.code_space[1]
970 | (charset.code_space[5] << 8)
971 | (charset.code_space[9] << 16)
972 | (charset.code_space[13] << 24));
973 charset.char_index_offset = 0;
975 val = args[charset_arg_min_code];
976 if (! NILP (val))
978 unsigned code;
980 if (INTEGERP (val))
981 code = XINT (val);
982 else
984 CHECK_CONS (val);
985 CHECK_NUMBER_CAR (val);
986 CHECK_NUMBER_CDR (val);
987 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
989 if (code < charset.min_code
990 || code > charset.max_code)
991 args_out_of_range_3 (make_number (charset.min_code),
992 make_number (charset.max_code), val);
993 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
994 charset.min_code = code;
997 val = args[charset_arg_max_code];
998 if (! NILP (val))
1000 unsigned code;
1002 if (INTEGERP (val))
1003 code = XINT (val);
1004 else
1006 CHECK_CONS (val);
1007 CHECK_NUMBER_CAR (val);
1008 CHECK_NUMBER_CDR (val);
1009 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
1011 if (code < charset.min_code
1012 || code > charset.max_code)
1013 args_out_of_range_3 (make_number (charset.min_code),
1014 make_number (charset.max_code), val);
1015 charset.max_code = code;
1018 charset.compact_codes_p = charset.max_code < 0x10000;
1020 val = args[charset_arg_invalid_code];
1021 if (NILP (val))
1023 if (charset.min_code > 0)
1024 charset.invalid_code = 0;
1025 else
1027 XSETINT (val, charset.max_code + 1);
1028 if (XINT (val) == charset.max_code + 1)
1029 charset.invalid_code = charset.max_code + 1;
1030 else
1031 error ("Attribute :invalid-code must be specified");
1034 else
1036 CHECK_NATNUM (val);
1037 charset.invalid_code = XFASTINT (val);
1040 val = args[charset_arg_iso_final];
1041 if (NILP (val))
1042 charset.iso_final = -1;
1043 else
1045 CHECK_NUMBER (val);
1046 if (XINT (val) < '0' || XINT (val) > 127)
1047 error ("Invalid iso-final-char: %d", XINT (val));
1048 charset.iso_final = XINT (val);
1051 val = args[charset_arg_iso_revision];
1052 if (NILP (val))
1053 charset.iso_revision = -1;
1054 else
1056 CHECK_NUMBER (val);
1057 if (XINT (val) > 63)
1058 args_out_of_range (make_number (63), val);
1059 charset.iso_revision = XINT (val);
1062 val = args[charset_arg_emacs_mule_id];
1063 if (NILP (val))
1064 charset.emacs_mule_id = -1;
1065 else
1067 CHECK_NATNUM (val);
1068 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1069 error ("Invalid emacs-mule-id: %d", XINT (val));
1070 charset.emacs_mule_id = XINT (val);
1073 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1075 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1077 charset.unified_p = 0;
1079 bzero (charset.fast_map, sizeof (charset.fast_map));
1081 if (! NILP (args[charset_arg_code_offset]))
1083 val = args[charset_arg_code_offset];
1084 CHECK_NUMBER (val);
1086 charset.method = CHARSET_METHOD_OFFSET;
1087 charset.code_offset = XINT (val);
1089 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1090 charset.min_char = i + charset.code_offset;
1091 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1092 charset.max_char = i + charset.code_offset;
1093 if (charset.max_char > MAX_CHAR)
1094 error ("Unsupported max char: %d", charset.max_char);
1096 i = (charset.min_char >> 7) << 7;
1097 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1098 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1099 i = (i >> 12) << 12;
1100 for (; i <= charset.max_char; i += 0x1000)
1101 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1102 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1103 charset.ascii_compatible_p = 1;
1105 else if (! NILP (args[charset_arg_map]))
1107 val = args[charset_arg_map];
1108 ASET (attrs, charset_map, val);
1109 charset.method = CHARSET_METHOD_MAP;
1111 else if (! NILP (args[charset_arg_subset]))
1113 Lisp_Object parent;
1114 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1115 struct charset *parent_charset;
1117 val = args[charset_arg_subset];
1118 parent = Fcar (val);
1119 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1120 parent_min_code = Fnth (make_number (1), val);
1121 CHECK_NATNUM (parent_min_code);
1122 parent_max_code = Fnth (make_number (2), val);
1123 CHECK_NATNUM (parent_max_code);
1124 parent_code_offset = Fnth (make_number (3), val);
1125 CHECK_NUMBER (parent_code_offset);
1126 val = Fmake_vector (make_number (4), Qnil);
1127 ASET (val, 0, make_number (parent_charset->id));
1128 ASET (val, 1, parent_min_code);
1129 ASET (val, 2, parent_max_code);
1130 ASET (val, 3, parent_code_offset);
1131 ASET (attrs, charset_subset, val);
1133 charset.method = CHARSET_METHOD_SUBSET;
1134 /* Here, we just copy the parent's fast_map. It's not accurate,
1135 but at least it works for quickly detecting which character
1136 DOESN'T belong to this charset. */
1137 for (i = 0; i < 190; i++)
1138 charset.fast_map[i] = parent_charset->fast_map[i];
1140 /* We also copy these for parents. */
1141 charset.min_char = parent_charset->min_char;
1142 charset.max_char = parent_charset->max_char;
1144 else if (! NILP (args[charset_arg_superset]))
1146 val = args[charset_arg_superset];
1147 charset.method = CHARSET_METHOD_SUPERSET;
1148 val = Fcopy_sequence (val);
1149 ASET (attrs, charset_superset, val);
1151 charset.min_char = MAX_CHAR;
1152 charset.max_char = 0;
1153 for (; ! NILP (val); val = Fcdr (val))
1155 Lisp_Object elt, car_part, cdr_part;
1156 int this_id, offset;
1157 struct charset *this_charset;
1159 elt = Fcar (val);
1160 if (CONSP (elt))
1162 car_part = XCAR (elt);
1163 cdr_part = XCDR (elt);
1164 CHECK_CHARSET_GET_ID (car_part, this_id);
1165 CHECK_NUMBER (cdr_part);
1166 offset = XINT (cdr_part);
1168 else
1170 CHECK_CHARSET_GET_ID (elt, this_id);
1171 offset = 0;
1173 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1175 this_charset = CHARSET_FROM_ID (this_id);
1176 if (charset.min_char > this_charset->min_char)
1177 charset.min_char = this_charset->min_char;
1178 if (charset.max_char < this_charset->max_char)
1179 charset.max_char = this_charset->max_char;
1180 for (i = 0; i < 190; i++)
1181 charset.fast_map[i] |= this_charset->fast_map[i];
1184 else
1185 error ("None of :code-offset, :map, :parents are specified");
1187 val = args[charset_arg_unify_map];
1188 if (! NILP (val) && !STRINGP (val))
1189 CHECK_VECTOR (val);
1190 ASET (attrs, charset_unify_map, val);
1192 CHECK_LIST (args[charset_arg_plist]);
1193 ASET (attrs, charset_plist, args[charset_arg_plist]);
1195 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1196 &hash_code);
1197 if (charset.hash_index >= 0)
1199 new_definition_p = 0;
1200 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1201 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1203 else
1205 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1206 hash_code);
1207 if (charset_table_used == charset_table_size)
1209 struct charset *new_table
1210 = (struct charset *) xmalloc (sizeof (struct charset)
1211 * (charset_table_size + 16));
1212 bcopy (charset_table, new_table,
1213 sizeof (struct charset) * charset_table_size);
1214 charset_table_size += 16;
1215 charset_table = new_table;
1217 id = charset_table_used++;
1218 new_definition_p = 1;
1221 ASET (attrs, charset_id, make_number (id));
1222 charset.id = id;
1223 charset_table[id] = charset;
1225 if (charset.method == CHARSET_METHOD_MAP)
1227 load_charset (&charset, 0);
1228 charset_table[id] = charset;
1231 if (charset.iso_final >= 0)
1233 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1234 charset.iso_final) = id;
1235 if (new_definition_p)
1236 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1237 Fcons (make_number (id), Qnil));
1238 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1239 charset_jisx0201_roman = id;
1240 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1241 charset_jisx0208_1978 = id;
1242 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1243 charset_jisx0208 = id;
1244 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1245 charset_ksc5601 = id;
1248 if (charset.emacs_mule_id >= 0)
1250 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1251 if (charset.emacs_mule_id < 0xA0)
1252 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1253 else
1254 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1255 if (new_definition_p)
1256 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1257 Fcons (make_number (id), Qnil));
1260 if (new_definition_p)
1262 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1263 if (charset.supplementary_p)
1264 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1265 Fcons (make_number (id), Qnil));
1266 else
1268 Lisp_Object tail;
1270 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1272 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1274 if (cs->supplementary_p)
1275 break;
1277 if (EQ (tail, Vcharset_ordered_list))
1278 Vcharset_ordered_list = Fcons (make_number (id),
1279 Vcharset_ordered_list);
1280 else if (NILP (tail))
1281 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1282 Fcons (make_number (id), Qnil));
1283 else
1285 val = Fcons (XCAR (tail), XCDR (tail));
1286 XSETCDR (tail, val);
1287 XSETCAR (tail, make_number (id));
1290 charset_ordered_list_tick++;
1293 return Qnil;
1297 /* Same as Fdefine_charset_internal but arguments are more convenient
1298 to call from C (typically in syms_of_charset). This can define a
1299 charset of `offset' method only. Return the ID of the new
1300 charset. */
1302 static int
1303 define_charset_internal (name, dimension, code_space, min_code, max_code,
1304 iso_final, iso_revision, emacs_mule_id,
1305 ascii_compatible, supplementary,
1306 code_offset)
1307 Lisp_Object name;
1308 int dimension;
1309 unsigned char *code_space;
1310 unsigned min_code, max_code;
1311 int iso_final, iso_revision, emacs_mule_id;
1312 int ascii_compatible, supplementary;
1313 int code_offset;
1315 Lisp_Object args[charset_arg_max];
1316 Lisp_Object plist[14];
1317 Lisp_Object val;
1318 int i;
1320 args[charset_arg_name] = name;
1321 args[charset_arg_dimension] = make_number (dimension);
1322 val = Fmake_vector (make_number (8), make_number (0));
1323 for (i = 0; i < 8; i++)
1324 ASET (val, i, make_number (code_space[i]));
1325 args[charset_arg_code_space] = val;
1326 args[charset_arg_min_code] = make_number (min_code);
1327 args[charset_arg_max_code] = make_number (max_code);
1328 args[charset_arg_iso_final]
1329 = (iso_final < 0 ? Qnil : make_number (iso_final));
1330 args[charset_arg_iso_revision] = make_number (iso_revision);
1331 args[charset_arg_emacs_mule_id]
1332 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1333 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1334 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1335 args[charset_arg_invalid_code] = Qnil;
1336 args[charset_arg_code_offset] = make_number (code_offset);
1337 args[charset_arg_map] = Qnil;
1338 args[charset_arg_subset] = Qnil;
1339 args[charset_arg_superset] = Qnil;
1340 args[charset_arg_unify_map] = Qnil;
1342 plist[0] = intern_c_string (":name");
1343 plist[1] = args[charset_arg_name];
1344 plist[2] = intern_c_string (":dimension");
1345 plist[3] = args[charset_arg_dimension];
1346 plist[4] = intern_c_string (":code-space");
1347 plist[5] = args[charset_arg_code_space];
1348 plist[6] = intern_c_string (":iso-final-char");
1349 plist[7] = args[charset_arg_iso_final];
1350 plist[8] = intern_c_string (":emacs-mule-id");
1351 plist[9] = args[charset_arg_emacs_mule_id];
1352 plist[10] = intern_c_string (":ascii-compatible-p");
1353 plist[11] = args[charset_arg_ascii_compatible_p];
1354 plist[12] = intern_c_string (":code-offset");
1355 plist[13] = args[charset_arg_code_offset];
1357 args[charset_arg_plist] = Flist (14, plist);
1358 Fdefine_charset_internal (charset_arg_max, args);
1360 return XINT (CHARSET_SYMBOL_ID (name));
1364 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1365 Sdefine_charset_alias, 2, 2, 0,
1366 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1367 (alias, charset)
1368 Lisp_Object alias, charset;
1370 Lisp_Object attr;
1372 CHECK_CHARSET_GET_ATTR (charset, attr);
1373 Fputhash (alias, attr, Vcharset_hash_table);
1374 Vcharset_list = Fcons (alias, Vcharset_list);
1375 return Qnil;
1379 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1380 doc: /* Return the property list of CHARSET. */)
1381 (charset)
1382 Lisp_Object charset;
1384 Lisp_Object attrs;
1386 CHECK_CHARSET_GET_ATTR (charset, attrs);
1387 return CHARSET_ATTR_PLIST (attrs);
1391 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1392 doc: /* Set CHARSET's property list to PLIST. */)
1393 (charset, plist)
1394 Lisp_Object charset, plist;
1396 Lisp_Object attrs;
1398 CHECK_CHARSET_GET_ATTR (charset, attrs);
1399 CHARSET_ATTR_PLIST (attrs) = plist;
1400 return plist;
1404 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1405 doc: /* Unify characters of CHARSET with Unicode.
1406 This means reading the relevant file and installing the table defined
1407 by CHARSET's `:unify-map' property.
1409 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1410 the same meaning as the `:unify-map' attribute in the function
1411 `define-charset' (which see).
1413 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1414 (charset, unify_map, deunify)
1415 Lisp_Object charset, unify_map, deunify;
1417 int id;
1418 struct charset *cs;
1420 CHECK_CHARSET_GET_ID (charset, id);
1421 cs = CHARSET_FROM_ID (id);
1422 if (NILP (deunify)
1423 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1424 : ! CHARSET_UNIFIED_P (cs))
1425 return Qnil;
1427 CHARSET_UNIFIED_P (cs) = 0;
1428 if (NILP (deunify))
1430 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1431 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1432 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1433 if (NILP (unify_map))
1434 unify_map = CHARSET_UNIFY_MAP (cs);
1435 else
1437 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1438 signal_error ("Bad unify-map", unify_map);
1439 CHARSET_UNIFY_MAP (cs) = unify_map;
1441 if (NILP (Vchar_unify_table))
1442 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1443 char_table_set_range (Vchar_unify_table,
1444 cs->min_char, cs->max_char, charset);
1445 CHARSET_UNIFIED_P (cs) = 1;
1447 else if (CHAR_TABLE_P (Vchar_unify_table))
1449 int min_code = CHARSET_MIN_CODE (cs);
1450 int max_code = CHARSET_MAX_CODE (cs);
1451 int min_char = DECODE_CHAR (cs, min_code);
1452 int max_char = DECODE_CHAR (cs, max_code);
1454 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1457 return Qnil;
1460 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1461 Sget_unused_iso_final_char, 2, 2, 0,
1462 doc: /*
1463 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1464 DIMENSION is the number of bytes to represent a character: 1 or 2.
1465 CHARS is the number of characters in a dimension: 94 or 96.
1467 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1468 If there's no unused final char for the specified kind of charset,
1469 return nil. */)
1470 (dimension, chars)
1471 Lisp_Object dimension, chars;
1473 int final_char;
1475 CHECK_NUMBER (dimension);
1476 CHECK_NUMBER (chars);
1477 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1478 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1479 if (XINT (chars) != 94 && XINT (chars) != 96)
1480 args_out_of_range_3 (chars, make_number (94), make_number (96));
1481 for (final_char = '0'; final_char <= '?'; final_char++)
1482 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1483 break;
1484 return (final_char <= '?' ? make_number (final_char) : Qnil);
1487 static void
1488 check_iso_charset_parameter (dimension, chars, final_char)
1489 Lisp_Object dimension, chars, final_char;
1491 CHECK_NATNUM (dimension);
1492 CHECK_NATNUM (chars);
1493 CHECK_NATNUM (final_char);
1495 if (XINT (dimension) > 3)
1496 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1497 if (XINT (chars) != 94 && XINT (chars) != 96)
1498 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1499 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1500 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1504 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1505 4, 4, 0,
1506 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1508 On decoding by an ISO-2022 base coding system, when a charset
1509 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1510 if CHARSET is designated instead. */)
1511 (dimension, chars, final_char, charset)
1512 Lisp_Object dimension, chars, final_char, charset;
1514 int id;
1515 int chars_flag;
1517 CHECK_CHARSET_GET_ID (charset, id);
1518 check_iso_charset_parameter (dimension, chars, final_char);
1519 chars_flag = XINT (chars) == 96;
1520 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1521 return Qnil;
1525 /* Return information about charsets in the text at PTR of NBYTES
1526 bytes, which are NCHARS characters. The value is:
1528 0: Each character is represented by one byte. This is always
1529 true for a unibyte string. For a multibyte string, true if
1530 it contains only ASCII characters.
1532 1: No charsets other than ascii, control-1, and latin-1 are
1533 found.
1535 2: Otherwise.
1539 string_xstring_p (string)
1540 Lisp_Object string;
1542 const unsigned char *p = SDATA (string);
1543 const unsigned char *endp = p + SBYTES (string);
1545 if (SCHARS (string) == SBYTES (string))
1546 return 0;
1548 while (p < endp)
1550 int c = STRING_CHAR_ADVANCE (p);
1552 if (c >= 0x100)
1553 return 2;
1555 return 1;
1559 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1561 CHARSETS is a vector. If Nth element is non-nil, it means the
1562 charset whose id is N is already found.
1564 It may lookup a translation table TABLE if supplied. */
1566 static void
1567 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1568 const unsigned char *ptr;
1569 EMACS_INT nchars, nbytes;
1570 Lisp_Object charsets, table;
1571 int multibyte;
1573 const unsigned char *pend = ptr + nbytes;
1575 if (nchars == nbytes)
1577 if (multibyte)
1578 ASET (charsets, charset_ascii, Qt);
1579 else
1580 while (ptr < pend)
1582 int c = *ptr++;
1584 if (!NILP (table))
1585 c = translate_char (table, c);
1586 if (ASCII_BYTE_P (c))
1587 ASET (charsets, charset_ascii, Qt);
1588 else
1589 ASET (charsets, charset_eight_bit, Qt);
1592 else
1594 while (ptr < pend)
1596 int c = STRING_CHAR_ADVANCE (ptr);
1597 struct charset *charset;
1599 if (!NILP (table))
1600 c = translate_char (table, c);
1601 charset = CHAR_CHARSET (c);
1602 ASET (charsets, CHARSET_ID (charset), Qt);
1607 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1608 2, 3, 0,
1609 doc: /* Return a list of charsets in the region between BEG and END.
1610 BEG and END are buffer positions.
1611 Optional arg TABLE if non-nil is a translation table to look up.
1613 If the current buffer is unibyte, the returned list may contain
1614 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1615 (beg, end, table)
1616 Lisp_Object beg, end, table;
1618 Lisp_Object charsets;
1619 EMACS_INT from, from_byte, to, stop, stop_byte;
1620 int i;
1621 Lisp_Object val;
1622 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1624 validate_region (&beg, &end);
1625 from = XFASTINT (beg);
1626 stop = to = XFASTINT (end);
1628 if (from < GPT && GPT < to)
1630 stop = GPT;
1631 stop_byte = GPT_BYTE;
1633 else
1634 stop_byte = CHAR_TO_BYTE (stop);
1636 from_byte = CHAR_TO_BYTE (from);
1638 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1639 while (1)
1641 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1642 stop_byte - from_byte, charsets, table,
1643 multibyte);
1644 if (stop < to)
1646 from = stop, from_byte = stop_byte;
1647 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1649 else
1650 break;
1653 val = Qnil;
1654 for (i = charset_table_used - 1; i >= 0; i--)
1655 if (!NILP (AREF (charsets, i)))
1656 val = Fcons (CHARSET_NAME (charset_table + i), val);
1657 return val;
1660 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1661 1, 2, 0,
1662 doc: /* Return a list of charsets in STR.
1663 Optional arg TABLE if non-nil is a translation table to look up.
1665 If STR is unibyte, the returned list may contain
1666 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1667 (str, table)
1668 Lisp_Object str, table;
1670 Lisp_Object charsets;
1671 int i;
1672 Lisp_Object val;
1674 CHECK_STRING (str);
1676 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1677 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1678 charsets, table,
1679 STRING_MULTIBYTE (str));
1680 val = Qnil;
1681 for (i = charset_table_used - 1; i >= 0; i--)
1682 if (!NILP (AREF (charsets, i)))
1683 val = Fcons (CHARSET_NAME (charset_table + i), val);
1684 return val;
1689 /* Return a unified character code for C (>= 0x110000). VAL is a
1690 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1691 charset symbol. */
1693 maybe_unify_char (c, val)
1694 int c;
1695 Lisp_Object val;
1697 struct charset *charset;
1699 if (INTEGERP (val))
1700 return XINT (val);
1701 if (NILP (val))
1702 return c;
1704 CHECK_CHARSET_GET_CHARSET (val, charset);
1705 load_charset (charset, 1);
1706 if (! inhibit_load_charset_map)
1708 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1709 if (! NILP (val))
1710 c = XINT (val);
1712 else
1714 int code_index = c - CHARSET_CODE_OFFSET (charset);
1715 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1717 if (unified > 0)
1718 c = unified;
1720 return c;
1724 /* Return a character correponding to the code-point CODE of
1725 CHARSET. */
1728 decode_char (charset, code)
1729 struct charset *charset;
1730 unsigned code;
1732 int c, char_index;
1733 enum charset_method method = CHARSET_METHOD (charset);
1735 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1736 return -1;
1738 if (method == CHARSET_METHOD_SUBSET)
1740 Lisp_Object subset_info;
1742 subset_info = CHARSET_SUBSET (charset);
1743 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1744 code -= XINT (AREF (subset_info, 3));
1745 if (code < XFASTINT (AREF (subset_info, 1))
1746 || code > XFASTINT (AREF (subset_info, 2)))
1747 c = -1;
1748 else
1749 c = DECODE_CHAR (charset, code);
1751 else if (method == CHARSET_METHOD_SUPERSET)
1753 Lisp_Object parents;
1755 parents = CHARSET_SUPERSET (charset);
1756 c = -1;
1757 for (; CONSP (parents); parents = XCDR (parents))
1759 int id = XINT (XCAR (XCAR (parents)));
1760 int code_offset = XINT (XCDR (XCAR (parents)));
1761 unsigned this_code = code - code_offset;
1763 charset = CHARSET_FROM_ID (id);
1764 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1765 break;
1768 else
1770 char_index = CODE_POINT_TO_INDEX (charset, code);
1771 if (char_index < 0)
1772 return -1;
1774 if (method == CHARSET_METHOD_MAP)
1776 Lisp_Object decoder;
1778 decoder = CHARSET_DECODER (charset);
1779 if (! VECTORP (decoder))
1781 load_charset (charset, 1);
1782 decoder = CHARSET_DECODER (charset);
1784 if (VECTORP (decoder))
1785 c = XINT (AREF (decoder, char_index));
1786 else
1787 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1789 else /* method == CHARSET_METHOD_OFFSET */
1791 c = char_index + CHARSET_CODE_OFFSET (charset);
1792 if (CHARSET_UNIFIED_P (charset)
1793 && c > MAX_UNICODE_CHAR)
1794 MAYBE_UNIFY_CHAR (c);
1798 return c;
1801 /* Variable used temporarily by the macro ENCODE_CHAR. */
1802 Lisp_Object charset_work;
1804 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1805 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1806 use CHARSET's strict_max_char instead of max_char. */
1808 unsigned
1809 encode_char (charset, c)
1810 struct charset *charset;
1811 int c;
1813 unsigned code;
1814 enum charset_method method = CHARSET_METHOD (charset);
1816 if (CHARSET_UNIFIED_P (charset))
1818 Lisp_Object deunifier;
1819 int code_index = -1;
1821 deunifier = CHARSET_DEUNIFIER (charset);
1822 if (! CHAR_TABLE_P (deunifier))
1824 load_charset (charset, 2);
1825 deunifier = CHARSET_DEUNIFIER (charset);
1827 if (CHAR_TABLE_P (deunifier))
1829 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1831 if (INTEGERP (deunified))
1832 code_index = XINT (deunified);
1834 else
1836 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1838 if (code_index >= 0)
1839 c = CHARSET_CODE_OFFSET (charset) + code_index;
1842 if (method == CHARSET_METHOD_SUBSET)
1844 Lisp_Object subset_info;
1845 struct charset *this_charset;
1847 subset_info = CHARSET_SUBSET (charset);
1848 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1849 code = ENCODE_CHAR (this_charset, c);
1850 if (code == CHARSET_INVALID_CODE (this_charset)
1851 || code < XFASTINT (AREF (subset_info, 1))
1852 || code > XFASTINT (AREF (subset_info, 2)))
1853 return CHARSET_INVALID_CODE (charset);
1854 code += XINT (AREF (subset_info, 3));
1855 return code;
1858 if (method == CHARSET_METHOD_SUPERSET)
1860 Lisp_Object parents;
1862 parents = CHARSET_SUPERSET (charset);
1863 for (; CONSP (parents); parents = XCDR (parents))
1865 int id = XINT (XCAR (XCAR (parents)));
1866 int code_offset = XINT (XCDR (XCAR (parents)));
1867 struct charset *this_charset = CHARSET_FROM_ID (id);
1869 code = ENCODE_CHAR (this_charset, c);
1870 if (code != CHARSET_INVALID_CODE (this_charset))
1871 return code + code_offset;
1873 return CHARSET_INVALID_CODE (charset);
1876 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1877 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1878 return CHARSET_INVALID_CODE (charset);
1880 if (method == CHARSET_METHOD_MAP)
1882 Lisp_Object encoder;
1883 Lisp_Object val;
1885 encoder = CHARSET_ENCODER (charset);
1886 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1888 load_charset (charset, 2);
1889 encoder = CHARSET_ENCODER (charset);
1891 if (CHAR_TABLE_P (encoder))
1893 val = CHAR_TABLE_REF (encoder, c);
1894 if (NILP (val))
1895 return CHARSET_INVALID_CODE (charset);
1896 code = XINT (val);
1897 if (! CHARSET_COMPACT_CODES_P (charset))
1898 code = INDEX_TO_CODE_POINT (charset, code);
1900 else
1902 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1903 code = INDEX_TO_CODE_POINT (charset, code);
1906 else /* method == CHARSET_METHOD_OFFSET */
1908 int code_index = c - CHARSET_CODE_OFFSET (charset);
1910 code = INDEX_TO_CODE_POINT (charset, code_index);
1913 return code;
1917 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1918 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1919 Return nil if CODE-POINT is not valid in CHARSET.
1921 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1923 Optional argument RESTRICTION specifies a way to map the pair of CCS
1924 and CODE-POINT to a character. Currently not supported and just ignored. */)
1925 (charset, code_point, restriction)
1926 Lisp_Object charset, code_point, restriction;
1928 int c, id;
1929 unsigned code;
1930 struct charset *charsetp;
1932 CHECK_CHARSET_GET_ID (charset, id);
1933 if (CONSP (code_point))
1935 CHECK_NATNUM_CAR (code_point);
1936 CHECK_NATNUM_CDR (code_point);
1937 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1939 else
1941 CHECK_NATNUM (code_point);
1942 code = XINT (code_point);
1944 charsetp = CHARSET_FROM_ID (id);
1945 c = DECODE_CHAR (charsetp, code);
1946 return (c >= 0 ? make_number (c) : Qnil);
1950 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1951 doc: /* Encode the character CH into a code-point of CHARSET.
1952 Return nil if CHARSET doesn't include CH.
1954 Optional argument RESTRICTION specifies a way to map CH to a
1955 code-point in CCS. Currently not supported and just ignored. */)
1956 (ch, charset, restriction)
1957 Lisp_Object ch, charset, restriction;
1959 int id;
1960 unsigned code;
1961 struct charset *charsetp;
1963 CHECK_CHARSET_GET_ID (charset, id);
1964 CHECK_NATNUM (ch);
1965 charsetp = CHARSET_FROM_ID (id);
1966 code = ENCODE_CHAR (charsetp, XINT (ch));
1967 if (code == CHARSET_INVALID_CODE (charsetp))
1968 return Qnil;
1969 if (code > 0x7FFFFFF)
1970 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1971 return make_number (code);
1975 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1976 doc:
1977 /* Return a character of CHARSET whose position codes are CODEn.
1979 CODE1 through CODE4 are optional, but if you don't supply sufficient
1980 position codes, it is assumed that the minimum code in each dimension
1981 is specified. */)
1982 (charset, code1, code2, code3, code4)
1983 Lisp_Object charset, code1, code2, code3, code4;
1985 int id, dimension;
1986 struct charset *charsetp;
1987 unsigned code;
1988 int c;
1990 CHECK_CHARSET_GET_ID (charset, id);
1991 charsetp = CHARSET_FROM_ID (id);
1993 dimension = CHARSET_DIMENSION (charsetp);
1994 if (NILP (code1))
1995 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1996 ? 0 : CHARSET_MIN_CODE (charsetp));
1997 else
1999 CHECK_NATNUM (code1);
2000 if (XFASTINT (code1) >= 0x100)
2001 args_out_of_range (make_number (0xFF), code1);
2002 code = XFASTINT (code1);
2004 if (dimension > 1)
2006 code <<= 8;
2007 if (NILP (code2))
2008 code |= charsetp->code_space[(dimension - 2) * 4];
2009 else
2011 CHECK_NATNUM (code2);
2012 if (XFASTINT (code2) >= 0x100)
2013 args_out_of_range (make_number (0xFF), code2);
2014 code |= XFASTINT (code2);
2017 if (dimension > 2)
2019 code <<= 8;
2020 if (NILP (code3))
2021 code |= charsetp->code_space[(dimension - 3) * 4];
2022 else
2024 CHECK_NATNUM (code3);
2025 if (XFASTINT (code3) >= 0x100)
2026 args_out_of_range (make_number (0xFF), code3);
2027 code |= XFASTINT (code3);
2030 if (dimension > 3)
2032 code <<= 8;
2033 if (NILP (code4))
2034 code |= charsetp->code_space[0];
2035 else
2037 CHECK_NATNUM (code4);
2038 if (XFASTINT (code4) >= 0x100)
2039 args_out_of_range (make_number (0xFF), code4);
2040 code |= XFASTINT (code4);
2047 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2048 code &= 0x7F7F7F7F;
2049 c = DECODE_CHAR (charsetp, code);
2050 if (c < 0)
2051 error ("Invalid code(s)");
2052 return make_number (c);
2056 /* Return the first charset in CHARSET_LIST that contains C.
2057 CHARSET_LIST is a list of charset IDs. If it is nil, use
2058 Vcharset_ordered_list. */
2060 struct charset *
2061 char_charset (c, charset_list, code_return)
2062 int c;
2063 Lisp_Object charset_list;
2064 unsigned *code_return;
2066 int maybe_null = 0;
2068 if (NILP (charset_list))
2069 charset_list = Vcharset_ordered_list;
2070 else
2071 maybe_null = 1;
2073 while (CONSP (charset_list))
2075 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2076 unsigned code = ENCODE_CHAR (charset, c);
2078 if (code != CHARSET_INVALID_CODE (charset))
2080 if (code_return)
2081 *code_return = code;
2082 return charset;
2084 charset_list = XCDR (charset_list);
2085 if (c <= MAX_UNICODE_CHAR
2086 && EQ (charset_list, Vcharset_non_preferred_head))
2087 return CHARSET_FROM_ID (charset_unicode);
2089 return (maybe_null ? NULL
2090 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2091 : CHARSET_FROM_ID (charset_eight_bit));
2095 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2096 doc:
2097 /*Return list of charset and one to four position-codes of CH.
2098 The charset is decided by the current priority order of charsets.
2099 A position-code is a byte value of each dimension of the code-point of
2100 CH in the charset. */)
2101 (ch)
2102 Lisp_Object ch;
2104 struct charset *charset;
2105 int c, dimension;
2106 unsigned code;
2107 Lisp_Object val;
2109 CHECK_CHARACTER (ch);
2110 c = XFASTINT (ch);
2111 charset = CHAR_CHARSET (c);
2112 if (! charset)
2113 abort ();
2114 code = ENCODE_CHAR (charset, c);
2115 if (code == CHARSET_INVALID_CODE (charset))
2116 abort ();
2117 dimension = CHARSET_DIMENSION (charset);
2118 for (val = Qnil; dimension > 0; dimension--)
2120 val = Fcons (make_number (code & 0xFF), val);
2121 code >>= 8;
2123 return Fcons (CHARSET_NAME (charset), val);
2127 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2128 doc: /* Return the charset of highest priority that contains CH.
2129 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2130 from which to find the charset. It may also be a coding system. In
2131 that case, find the charset from what supported by that coding system. */)
2132 (ch, restriction)
2133 Lisp_Object ch, restriction;
2135 struct charset *charset;
2137 CHECK_CHARACTER (ch);
2138 if (NILP (restriction))
2139 charset = CHAR_CHARSET (XINT (ch));
2140 else
2142 Lisp_Object charset_list;
2144 if (CONSP (restriction))
2146 for (charset_list = Qnil; CONSP (restriction);
2147 restriction = XCDR (restriction))
2149 int id;
2151 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2152 charset_list = Fcons (make_number (id), charset_list);
2154 charset_list = Fnreverse (charset_list);
2156 else
2157 charset_list = coding_system_charset_list (restriction);
2158 charset = char_charset (XINT (ch), charset_list, NULL);
2159 if (! charset)
2160 return Qnil;
2162 return (CHARSET_NAME (charset));
2166 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2167 doc: /*
2168 Return charset of a character in the current buffer at position POS.
2169 If POS is nil, it defauls to the current point.
2170 If POS is out of range, the value is nil. */)
2171 (pos)
2172 Lisp_Object pos;
2174 Lisp_Object ch;
2175 struct charset *charset;
2177 ch = Fchar_after (pos);
2178 if (! INTEGERP (ch))
2179 return ch;
2180 charset = CHAR_CHARSET (XINT (ch));
2181 return (CHARSET_NAME (charset));
2185 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2186 doc: /*
2187 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2189 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2190 by their DIMENSION, CHARS, and FINAL-CHAR,
2191 whereas Emacs distinguishes them by charset symbol.
2192 See the documentation of the function `charset-info' for the meanings of
2193 DIMENSION, CHARS, and FINAL-CHAR. */)
2194 (dimension, chars, final_char)
2195 Lisp_Object dimension, chars, final_char;
2197 int id;
2198 int chars_flag;
2200 check_iso_charset_parameter (dimension, chars, final_char);
2201 chars_flag = XFASTINT (chars) == 96;
2202 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2203 XFASTINT (final_char));
2204 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2208 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2209 0, 0, 0,
2210 doc: /*
2211 Internal use only.
2212 Clear temporary charset mapping tables.
2213 It should be called only from temacs invoked for dumping. */)
2216 if (temp_charset_work)
2218 free (temp_charset_work);
2219 temp_charset_work = NULL;
2222 if (CHAR_TABLE_P (Vchar_unify_table))
2223 Foptimize_char_table (Vchar_unify_table, Qnil);
2225 return Qnil;
2228 DEFUN ("charset-priority-list", Fcharset_priority_list,
2229 Scharset_priority_list, 0, 1, 0,
2230 doc: /* Return the list of charsets ordered by priority.
2231 HIGHESTP non-nil means just return the highest priority one. */)
2232 (highestp)
2233 Lisp_Object highestp;
2235 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2237 if (!NILP (highestp))
2238 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2240 while (!NILP (list))
2242 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2243 list = XCDR (list);
2245 return Fnreverse (val);
2248 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2249 1, MANY, 0,
2250 doc: /* Assign higher priority to the charsets given as arguments.
2251 usage: (set-charset-priority &rest charsets) */)
2252 (nargs, args)
2253 int nargs;
2254 Lisp_Object *args;
2256 Lisp_Object new_head, old_list, arglist[2];
2257 Lisp_Object list_2022, list_emacs_mule;
2258 int i, id;
2260 old_list = Fcopy_sequence (Vcharset_ordered_list);
2261 new_head = Qnil;
2262 for (i = 0; i < nargs; i++)
2264 CHECK_CHARSET_GET_ID (args[i], id);
2265 if (! NILP (Fmemq (make_number (id), old_list)))
2267 old_list = Fdelq (make_number (id), old_list);
2268 new_head = Fcons (make_number (id), new_head);
2271 arglist[0] = Fnreverse (new_head);
2272 arglist[1] = Vcharset_non_preferred_head = old_list;
2273 Vcharset_ordered_list = Fnconc (2, arglist);
2274 charset_ordered_list_tick++;
2276 charset_unibyte = -1;
2277 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2278 CONSP (old_list); old_list = XCDR (old_list))
2280 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2281 list_2022 = Fcons (XCAR (old_list), list_2022);
2282 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2283 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2284 if (charset_unibyte < 0)
2286 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2288 if (CHARSET_DIMENSION (charset) == 1
2289 && CHARSET_ASCII_COMPATIBLE_P (charset)
2290 && CHARSET_MAX_CHAR (charset) >= 0x80)
2291 charset_unibyte = CHARSET_ID (charset);
2294 Viso_2022_charset_list = Fnreverse (list_2022);
2295 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2296 if (charset_unibyte < 0)
2297 charset_unibyte = charset_iso_8859_1;
2299 return Qnil;
2302 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2303 0, 1, 0,
2304 doc: /* Internal use only.
2305 Return charset identification number of CHARSET. */)
2306 (charset)
2307 Lisp_Object charset;
2309 int id;
2311 CHECK_CHARSET_GET_ID (charset, id);
2312 return make_number (id);
2316 void
2317 init_charset ()
2319 Lisp_Object tempdir;
2320 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2321 if (access ((char *) SDATA (tempdir), 0) < 0)
2323 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2324 Emacs will not function correctly without the character map files.\n\
2325 Please check your installation!\n",
2326 tempdir);
2327 /* TODO should this be a fatal error? (Bug#909) */
2330 Vcharset_map_path = Fcons (tempdir, Qnil);
2334 void
2335 init_charset_once ()
2337 int i, j, k;
2339 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2340 for (j = 0; j < ISO_MAX_CHARS; j++)
2341 for (k = 0; k < ISO_MAX_FINAL; k++)
2342 iso_charset_table[i][j][k] = -1;
2344 for (i = 0; i < 256; i++)
2345 emacs_mule_charset[i] = NULL;
2347 charset_jisx0201_roman = -1;
2348 charset_jisx0208_1978 = -1;
2349 charset_jisx0208 = -1;
2350 charset_ksc5601 = -1;
2353 #ifdef emacs
2355 void
2356 syms_of_charset ()
2358 DEFSYM (Qcharsetp, "charsetp");
2360 DEFSYM (Qascii, "ascii");
2361 DEFSYM (Qunicode, "unicode");
2362 DEFSYM (Qemacs, "emacs");
2363 DEFSYM (Qeight_bit, "eight-bit");
2364 DEFSYM (Qiso_8859_1, "iso-8859-1");
2366 DEFSYM (Qgl, "gl");
2367 DEFSYM (Qgr, "gr");
2369 staticpro (&Vcharset_ordered_list);
2370 Vcharset_ordered_list = Qnil;
2372 staticpro (&Viso_2022_charset_list);
2373 Viso_2022_charset_list = Qnil;
2375 staticpro (&Vemacs_mule_charset_list);
2376 Vemacs_mule_charset_list = Qnil;
2378 /* Don't staticpro them here. It's done in syms_of_fns. */
2379 QCtest = intern (":test");
2380 Qeq = intern ("eq");
2382 staticpro (&Vcharset_hash_table);
2384 Lisp_Object args[2];
2385 args[0] = QCtest;
2386 args[1] = Qeq;
2387 Vcharset_hash_table = Fmake_hash_table (2, args);
2390 charset_table_size = 128;
2391 charset_table = ((struct charset *)
2392 xmalloc (sizeof (struct charset) * charset_table_size));
2393 charset_table_used = 0;
2395 defsubr (&Scharsetp);
2396 defsubr (&Smap_charset_chars);
2397 defsubr (&Sdefine_charset_internal);
2398 defsubr (&Sdefine_charset_alias);
2399 defsubr (&Scharset_plist);
2400 defsubr (&Sset_charset_plist);
2401 defsubr (&Sunify_charset);
2402 defsubr (&Sget_unused_iso_final_char);
2403 defsubr (&Sdeclare_equiv_charset);
2404 defsubr (&Sfind_charset_region);
2405 defsubr (&Sfind_charset_string);
2406 defsubr (&Sdecode_char);
2407 defsubr (&Sencode_char);
2408 defsubr (&Ssplit_char);
2409 defsubr (&Smake_char);
2410 defsubr (&Schar_charset);
2411 defsubr (&Scharset_after);
2412 defsubr (&Siso_charset);
2413 defsubr (&Sclear_charset_maps);
2414 defsubr (&Scharset_priority_list);
2415 defsubr (&Sset_charset_priority);
2416 defsubr (&Scharset_id_internal);
2418 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2419 doc: /* *List of directories to search for charset map files. */);
2420 Vcharset_map_path = Qnil;
2422 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2423 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2424 inhibit_load_charset_map = 0;
2426 DEFVAR_LISP ("charset-list", &Vcharset_list,
2427 doc: /* List of all charsets ever defined. */);
2428 Vcharset_list = Qnil;
2430 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2431 doc: /* ISO639 language mnemonic symbol for the current language environment.
2432 If the current language environment is for multiple languages (e.g. "Latin-1"),
2433 the value may be a list of mnemonics. */);
2434 Vcurrent_iso639_language = Qnil;
2436 charset_ascii
2437 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2438 0, 127, 'B', -1, 0, 1, 0, 0);
2439 charset_iso_8859_1
2440 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2441 0, 255, -1, -1, -1, 1, 0, 0);
2442 charset_unicode
2443 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2444 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2445 charset_emacs
2446 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2447 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2448 charset_eight_bit
2449 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2450 128, 255, -1, 0, -1, 0, 1,
2451 MAX_5_BYTE_CHAR + 1);
2452 charset_unibyte = charset_iso_8859_1;
2455 #endif /* emacs */
2457 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2458 (do not change this comment) */