(post-read-decode-hz)
[emacs.git] / src / charset.c
blobe2ef9b086510a65e4d63876742076b2a283b1a52
1 /* Basic character set support.
2 Copyright (C) 1995, 97, 98, 2000, 2001 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
9 This file is part of GNU Emacs.
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
14 any later version.
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
26 #ifdef emacs
27 #include <config.h>
28 #endif
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
34 #ifdef emacs
36 #include <sys/types.h>
37 #include "lisp.h"
38 #include "character.h"
39 #include "charset.h"
40 #include "coding.h"
41 #include "disptab.h"
42 #include "buffer.h"
44 #else /* not emacs */
46 #include "mulelib.h"
48 #endif /* emacs */
51 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
53 A coded character set ("charset" hereafter) is a meaningful
54 collection (i.e. language, culture, functionality, etc.) of
55 characters. Emacs handles multiple charsets at once. In Emacs Lisp
56 code, a charset is represented by a symbol. In C code, a charset is
57 represented by its ID number or by a pointer to a struct charset.
59 The actual information about each charset is stored in two places.
60 Lispy information is stored in the hash table Vcharset_hash_table as
61 a vector (charset attributes). The other information is stored in
62 charset_table as a struct charset.
66 /* List of all charsets. This variable is used only from Emacs
67 Lisp. */
68 Lisp_Object Vcharset_list;
70 /* Hash table that contains attributes of each charset. Keys are
71 charset symbols, and values are vectors of charset attributes. */
72 Lisp_Object Vcharset_hash_table;
74 /* Table of struct charset. */
75 struct charset *charset_table;
77 static int charset_table_size;
78 int charset_table_used;
80 Lisp_Object Qcharsetp;
82 /* Special charset symbols. */
83 Lisp_Object Qascii;
84 Lisp_Object Qeight_bit_control;
85 Lisp_Object Qeight_bit_graphic;
86 Lisp_Object Qiso_8859_1;
87 Lisp_Object Qunicode;
89 /* The corresponding charsets. */
90 int charset_ascii;
91 int charset_8_bit_control;
92 int charset_8_bit_graphic;
93 int charset_iso_8859_1;
94 int charset_unicode;
96 /* The other special charsets. */
97 int charset_jisx0201_roman;
98 int charset_jisx0208_1978;
99 int charset_jisx0208;
101 /* Value of charset attribute `charset-iso-plane'. */
102 Lisp_Object Qgl, Qgr;
104 /* The primary charset. It is a charset of unibyte characters. */
105 int charset_primary;
107 /* List of charsets ordered by the priority. */
108 Lisp_Object Vcharset_ordered_list;
110 /* List of iso-2022 charsets. */
111 Lisp_Object Viso_2022_charset_list;
113 /* List of emacs-mule charsets. */
114 Lisp_Object Vemacs_mule_charset_list;
116 struct charset *emacs_mule_charset[256];
118 /* Mapping table from ISO2022's charset (specified by DIMENSION,
119 CHARS, and FINAL-CHAR) to Emacs' charset. */
120 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
122 Lisp_Object Vcharset_map_directory;
124 Lisp_Object Vchar_unified_charset_table;
126 #define CODE_POINT_TO_INDEX(charset, code) \
127 ((charset)->code_linear_p \
128 ? (code) - (charset)->min_code \
129 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
130 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
131 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
132 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
133 ? (((((code) >> 24) - (charset)->code_space[12]) \
134 * (charset)->code_space[11]) \
135 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
136 * (charset)->code_space[7]) \
137 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
138 * (charset)->code_space[3]) \
139 + (((code) & 0xFF) - (charset)->code_space[0]) \
140 - ((charset)->char_index_offset)) \
141 : -1)
144 /* Convert the character index IDX to code-point CODE for CHARSET.
145 It is assumed that IDX is in a valid range. */
147 #define INDEX_TO_CODE_POINT(charset, idx) \
148 ((charset)->code_linear_p \
149 ? (idx) + (charset)->min_code \
150 : (idx += (charset)->char_index_offset, \
151 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
152 | (((charset)->code_space[4] \
153 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
154 << 8) \
155 | (((charset)->code_space[8] \
156 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
157 << 16) \
158 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
159 << 24))))
164 /* Set to 1 to warn that a charset map is loaded and thus a buffer
165 text and a string data may be relocated. */
166 int charset_map_loaded;
168 struct charset_map_entries
170 struct {
171 unsigned from, to;
172 int c;
173 } entry[0x10000];
174 struct charset_map_entries *next;
177 /* Load the mapping information for CHARSET from ENTRIES.
179 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
181 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
182 CHARSET->decoder, and CHARSET->encoder.
184 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
185 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
186 setup it too. */
188 static void
189 load_charset_map (charset, entries, n_entries, control_flag)
190 struct charset *charset;
191 struct charset_map_entries *entries;
192 int n_entries;
193 int control_flag;
195 Lisp_Object vec, table;
196 unsigned max_code = CHARSET_MAX_CODE (charset);
197 int ascii_compatible_p = charset->ascii_compatible_p;
198 int min_char, max_char, nonascii_min_char;
199 int i;
200 unsigned char *fast_map = charset->fast_map;
202 if (n_entries <= 0)
203 return;
205 if (control_flag > 0)
207 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
208 unsigned invalid_code = CHARSET_INVALID_CODE (charset);
210 table = Fmake_char_table (Qnil, make_number (invalid_code));
211 if (control_flag == 1)
212 vec = Fmake_vector (make_number (n), make_number (-1));
213 else if (! CHAR_TABLE_P (Vchar_unify_table))
214 Vchar_unify_table = Fmake_char_table (Qnil, make_number (-1));
216 charset_map_loaded = 1;
219 min_char = max_char = entries->entry[0].c;
220 nonascii_min_char = MAX_CHAR;
221 for (i = 0; i < n_entries; i++)
223 unsigned from, to;
224 int from_index, to_index;
225 int from_c, to_c;
226 int idx = i % 0x10000;
228 if (i > 0 && idx == 0)
229 entries = entries->next;
230 from = entries->entry[idx].from;
231 to = entries->entry[idx].to;
232 from_c = entries->entry[idx].c;
233 from_index = CODE_POINT_TO_INDEX (charset, from);
234 if (from == to)
236 to_index = from_index;
237 to_c = from_c;
239 else
241 to_index = CODE_POINT_TO_INDEX (charset, to);
242 to_c = from_c + (to_index - from_index);
244 if (from_index < 0 || to_index < 0)
245 continue;
247 if (control_flag < 2)
249 int c;
251 if (to_c > max_char)
252 max_char = to_c;
253 else if (from_c < min_char)
254 min_char = from_c;
255 if (ascii_compatible_p)
257 if (! ASCII_BYTE_P (from_c))
259 if (from_c < nonascii_min_char)
260 nonascii_min_char = from_c;
262 else if (! ASCII_BYTE_P (to_c))
264 nonascii_min_char = 0x80;
268 for (c = from_c; c <= to_c; c++)
269 CHARSET_FAST_MAP_SET (c, fast_map);
271 if (control_flag == 1)
273 unsigned code = from;
275 if (CHARSET_COMPACT_CODES_P (charset))
276 while (1)
278 ASET (vec, from_index, make_number (from_c));
279 CHAR_TABLE_SET (table, from_c, make_number (code));
280 if (from_index == to_index)
281 break;
282 from_index++, from_c++;
283 code = INDEX_TO_CODE_POINT (charset, from_index);
285 else
286 for (; from_index <= to_index; from_index++, from_c++)
288 ASET (vec, from_index, make_number (from_c));
289 CHAR_TABLE_SET (table, from_c, make_number (from_index));
293 else
295 unsigned code = from;
297 while (1)
299 int c1 = DECODE_CHAR (charset, code);
301 if (c1 >= 0)
303 CHAR_TABLE_SET (table, from_c, make_number (c1));
304 CHAR_TABLE_SET (Vchar_unify_table, c1, from_c);
305 if (CHAR_TABLE_P (Vchar_unified_charset_table))
306 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
307 CHARSET_NAME (charset));
309 if (from_index == to_index)
310 break;
311 from_index++, from_c++;
312 code = INDEX_TO_CODE_POINT (charset, from_index);
317 if (control_flag < 2)
319 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
320 ? nonascii_min_char : min_char);
321 CHARSET_MAX_CHAR (charset) = max_char;
322 if (control_flag == 1)
324 CHARSET_DECODER (charset) = vec;
325 CHARSET_ENCODER (charset) = table;
328 else
329 CHARSET_DEUNIFIER (charset) = table;
333 /* Read a hexadecimal number (preceded by "0x") from the file FP while
334 paying attention to comment charcter '#'. */
336 static INLINE unsigned
337 read_hex (fp, eof)
338 FILE *fp;
339 int *eof;
341 int c;
342 unsigned n;
344 while ((c = getc (fp)) != EOF)
346 if (c == '#')
348 while ((c = getc (fp)) != EOF && c != '\n');
350 else if (c == '0')
352 if ((c = getc (fp)) == EOF || c == 'x')
353 break;
356 if (c == EOF)
358 *eof = 1;
359 return 0;
361 *eof = 0;
362 n = 0;
363 if (c == 'x')
364 while ((c = getc (fp)) != EOF && isxdigit (c))
365 n = ((n << 4)
366 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
367 else
368 while ((c = getc (fp)) != EOF && isdigit (c))
369 n = (n * 10) + c - '0';
370 if (c != EOF)
371 ungetc (c, fp);
372 return n;
376 /* Return a mapping vector for CHARSET loaded from MAPFILE.
377 Each line of MAPFILE has this form
378 0xAAAA 0xCCCC
379 where 0xAAAA is a code-point and 0xCCCC is the corresponding
380 character code, or this form
381 0xAAAA-0xBBBB 0xCCCC
382 where 0xAAAA and 0xBBBB are code-points specifying a range, and
383 0xCCCC is the first character code of the range.
385 The returned vector has this form:
386 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
387 where CODE1 is a code-point or a cons of code-points specifying a
388 range. */
390 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
392 static void
393 load_charset_map_from_file (charset, mapfile, control_flag)
394 struct charset *charset;
395 Lisp_Object mapfile;
396 int control_flag;
398 unsigned min_code = CHARSET_MIN_CODE (charset);
399 unsigned max_code = CHARSET_MAX_CODE (charset);
400 int fd;
401 FILE *fp;
402 int eof;
403 Lisp_Object suffixes;
404 struct charset_map_entries *head, *entries;
405 int n_entries;
407 suffixes = Fcons (build_string (".map"),
408 Fcons (build_string (".TXT"), Qnil));
410 fd = openp (Fcons (Vcharset_map_directory, Qnil), mapfile, suffixes,
411 NULL, 0);
412 if (fd < 0
413 || ! (fp = fdopen (fd, "r")))
415 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
416 return;
419 head = entries = ((struct charset_map_entries *)
420 alloca (sizeof (struct charset_map_entries)));
421 n_entries = 0;
422 eof = 0;
423 while (1)
425 unsigned from, to;
426 int c;
427 int idx;
429 from = read_hex (fp, &eof);
430 if (eof)
431 break;
432 if (getc (fp) == '-')
433 to = read_hex (fp, &eof);
434 else
435 to = from;
436 c = (int) read_hex (fp, &eof);
438 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
439 continue;
441 if (n_entries > 0 && (n_entries % 0x10000) == 0)
443 entries->next = ((struct charset_map_entries *)
444 alloca (sizeof (struct charset_map_entries)));
445 entries = entries->next;
447 idx = n_entries % 0x10000;
448 entries->entry[idx].from = from;
449 entries->entry[idx].to = to;
450 entries->entry[idx].c = c;
451 n_entries++;
453 fclose (fp);
454 close (fd);
456 load_charset_map (charset, head, n_entries, control_flag);
459 static void
460 load_charset_map_from_vector (charset, vec, control_flag)
461 struct charset *charset;
462 Lisp_Object vec;
463 int control_flag;
465 unsigned min_code = CHARSET_MIN_CODE (charset);
466 unsigned max_code = CHARSET_MAX_CODE (charset);
467 struct charset_map_entries *head, *entries;
468 int n_entries;
469 int len = ASIZE (vec);
470 int i;
472 if (len % 2 == 1)
474 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
475 return;
478 head = entries = ((struct charset_map_entries *)
479 alloca (sizeof (struct charset_map_entries)));
480 n_entries = 0;
481 for (i = 0; i < len; i += 2)
483 Lisp_Object val, val2;
484 unsigned from, to;
485 int c;
486 int idx;
488 val = AREF (vec, i);
489 if (CONSP (val))
491 val2 = XCDR (val);
492 val = XCAR (val);
493 CHECK_NATNUM (val);
494 CHECK_NATNUM (val2);
495 from = XFASTINT (val);
496 to = XFASTINT (val2);
498 else
500 CHECK_NATNUM (val);
501 from = to = XFASTINT (val);
503 val = AREF (vec, i + 1);
504 CHECK_NATNUM (val);
505 c = XFASTINT (val);
507 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
508 continue;
510 if ((n_entries % 0x10000) == 0)
512 entries->next = ((struct charset_map_entries *)
513 alloca (sizeof (struct charset_map_entries)));
514 entries = entries->next;
516 idx = n_entries % 0x10000;
517 entries->entry[idx].from = from;
518 entries->entry[idx].to = to;
519 entries->entry[idx].c = c;
520 n_entries++;
523 load_charset_map (charset, head, n_entries, control_flag);
526 static void
527 load_charset (charset)
528 struct charset *charset;
530 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
532 Lisp_Object map;
534 map = CHARSET_MAP (charset);
535 if (STRINGP (map))
536 load_charset_map_from_file (charset, map, 1);
537 else
538 load_charset_map_from_vector (charset, map, 1);
539 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
544 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
545 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
546 (object)
547 Lisp_Object object;
549 return (CHARSETP (object) ? Qt : Qnil);
553 void
554 map_charset_chars (c_function, function, charset_symbol, arg)
555 void (*c_function) P_ ((Lisp_Object, Lisp_Object, Lisp_Object));
556 Lisp_Object function, charset_symbol, arg;
558 int id;
559 struct charset *charset;
560 Lisp_Object range;
562 CHECK_CHARSET_GET_ID (charset_symbol, id);
563 charset = CHARSET_FROM_ID (id);
565 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
566 load_charset (charset);
568 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
570 range = Fcons (make_number (CHARSET_MIN_CHAR (charset)),
571 make_number (CHARSET_MAX_CHAR (charset)));
572 if (NILP (function))
573 (*c_function) (arg, range, Qnil);
574 else
575 call2 (function, range, arg);
577 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
579 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
580 return;
581 if (CHARSET_ASCII_COMPATIBLE_P (charset))
583 range = Fcons (make_number (0), make_number (127));
584 if (NILP (function))
585 (*c_function) (arg, range, Qnil);
586 else
587 call2 (function, range, arg);
589 map_char_table (c_function, function, CHARSET_ENCODER (charset), arg,
590 0, NULL);
592 else /* i.e. CHARSET_METHOD_PARENT */
594 int from, to, c;
595 unsigned code;
596 int i, j, k, l;
597 int *code_space = CHARSET_CODE_SPACE (charset);
598 Lisp_Object val;
600 range = Fcons (Qnil, Qnil);
601 from = to = -2;
602 for (i = code_space[12]; i <= code_space[13]; i++)
603 for (j = code_space[8]; j <= code_space[9]; j++)
604 for (k = code_space[4]; k <= code_space[5]; k++)
605 for (l = code_space[0]; l <= code_space[1]; l++)
607 code = (i << 24) | (j << 16) | (k << 8) | l;
608 c = DECODE_CHAR (charset, code);
609 if (c == to + 1)
611 to++;
612 continue;
614 if (from >= 0)
616 if (from < to)
618 XSETCAR (range, make_number (from));
619 XSETCDR (range, make_number (to));
620 val = range;
622 else
623 val = make_number (from);
624 if (NILP (function))
625 (*c_function) (arg, val, Qnil);
626 else
627 call2 (function, val, arg);
629 from = to = (c < 0 ? -2 : c);
631 if (from >= 0)
633 if (from < to)
635 XSETCAR (range, make_number (from));
636 XSETCDR (range, make_number (to));
637 val = range;
639 else
640 val = make_number (from);
641 if (NILP (function))
642 (*c_function) (arg, val, Qnil);
643 else
644 call2 (function, val, arg);
649 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 3, 0,
650 doc: /* Call FUNCTION for all characters in CHARSET.
651 FUNCTION is called with an argument RANGE and optional 2nd
652 argument ARG.
654 RANGE is either a cons (FROM . TO), where FROM and TO indicate a range of
655 characters contained in CHARSET or a single character in the case that
656 FROM and TO would be equal. (The charset mapping may have gaps.)*/)
657 (function, charset, arg)
658 Lisp_Object function, charset, arg;
660 map_charset_chars (NULL, function, charset, arg);
661 return Qnil;
665 /* Define a charset according to the arguments. The Nth argument is
666 the Nth attribute of the charset (the last attribute `charset-id'
667 is not included). See the docstring of `define-charset' for the
668 detail. */
670 DEFUN ("define-charset-internal", Fdefine_charset_internal,
671 Sdefine_charset_internal, charset_arg_max, MANY, 0,
672 doc: /* For internal use only.
673 usage: (define-charset-internal ...) */)
674 (nargs, args)
675 int nargs;
676 Lisp_Object *args;
678 /* Charset attr vector. */
679 Lisp_Object attrs;
680 Lisp_Object val;
681 unsigned hash_code;
682 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
683 int i, j;
684 struct charset charset;
685 int id;
686 int dimension;
687 int new_definition_p;
688 int nchars;
690 if (nargs != charset_arg_max)
691 return Fsignal (Qwrong_number_of_arguments,
692 Fcons (intern ("define-charset-internal"),
693 make_number (nargs)));
695 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
697 CHECK_SYMBOL (args[charset_arg_name]);
698 ASET (attrs, charset_name, args[charset_arg_name]);
700 val = args[charset_arg_code_space];
701 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
703 int min_byte, max_byte;
705 min_byte = XINT (Faref (val, make_number (i * 2)));
706 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
707 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
708 error ("Invalid :code-space value");
709 charset.code_space[i * 4] = min_byte;
710 charset.code_space[i * 4 + 1] = max_byte;
711 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
712 nchars *= charset.code_space[i * 4 + 2];
713 charset.code_space[i * 4 + 3] = nchars;
714 if (max_byte > 0)
715 dimension = i + 1;
718 val = args[charset_arg_dimension];
719 if (NILP (val))
720 charset.dimension = dimension;
721 else
723 CHECK_NATNUM (val);
724 charset.dimension = XINT (val);
725 if (charset.dimension < 1 || charset.dimension > 4)
726 args_out_of_range_3 (val, make_number (1), make_number (4));
729 charset.code_linear_p
730 = (charset.dimension == 1
731 || (charset.code_space[2] == 256
732 && (charset.dimension == 2
733 || (charset.code_space[6] == 256
734 && (charset.dimension == 3
735 || charset.code_space[10] == 256)))));
737 if (! charset.code_linear_p)
739 charset.code_space_mask = (unsigned char *) xmalloc (256);
740 bzero (charset.code_space_mask, 256);
741 for (i = 0; i < 4; i++)
742 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
743 j++)
744 charset.code_space_mask[j] |= (1 << i);
747 charset.iso_chars_96 = charset.code_space[2] == 96;
749 charset.min_code = (charset.code_space[0]
750 | (charset.code_space[4] << 8)
751 | (charset.code_space[8] << 16)
752 | (charset.code_space[12] << 24));
753 charset.max_code = (charset.code_space[1]
754 | (charset.code_space[5] << 8)
755 | (charset.code_space[9] << 16)
756 | (charset.code_space[13] << 24));
757 charset.char_index_offset = 0;
759 val = args[charset_arg_min_code];
760 if (! NILP (val))
762 unsigned code;
764 if (INTEGERP (val))
765 code = XINT (val);
766 else
768 CHECK_CONS (val);
769 CHECK_NUMBER (XCAR (val));
770 CHECK_NUMBER (XCDR (val));
771 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
773 if (code < charset.min_code
774 || code > charset.max_code)
775 args_out_of_range_3 (make_number (charset.min_code),
776 make_number (charset.max_code), val);
777 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
778 charset.min_code = code;
781 val = args[charset_arg_max_code];
782 if (! NILP (val))
784 unsigned code;
786 if (INTEGERP (val))
787 code = XINT (val);
788 else
790 CHECK_CONS (val);
791 CHECK_NUMBER (XCAR (val));
792 CHECK_NUMBER (XCDR (val));
793 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
795 if (code < charset.min_code
796 || code > charset.max_code)
797 args_out_of_range_3 (make_number (charset.min_code),
798 make_number (charset.max_code), val);
799 charset.max_code = code;
802 charset.compact_codes_p = charset.max_code < 0x1000000;
804 val = args[charset_arg_invalid_code];
805 if (NILP (val))
807 if (charset.min_code > 0)
808 charset.invalid_code = 0;
809 else
811 XSETINT (val, charset.max_code + 1);
812 if (XINT (val) == charset.max_code + 1)
813 charset.invalid_code = charset.max_code + 1;
814 else
815 error ("Attribute :invalid-code must be specified");
818 else
820 CHECK_NATNUM (val);
821 charset.invalid_code = XFASTINT (val);
824 val = args[charset_arg_iso_final];
825 if (NILP (val))
826 charset.iso_final = -1;
827 else
829 CHECK_NUMBER (val);
830 if (XINT (val) < '0' || XINT (val) > 127)
831 error ("Invalid iso-final-char: %d", XINT (val));
832 charset.iso_final = XINT (val);
835 val = args[charset_arg_iso_revision];
836 if (NILP (val))
837 charset.iso_revision = -1;
838 else
840 CHECK_NUMBER (val);
841 if (XINT (val) > 63)
842 args_out_of_range (make_number (63), val);
843 charset.iso_revision = XINT (val);
846 val = args[charset_arg_emacs_mule_id];
847 if (NILP (val))
848 charset.emacs_mule_id = -1;
849 else
851 CHECK_NATNUM (val);
852 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
853 error ("Invalid emacs-mule-id: %d", XINT (val));
854 charset.emacs_mule_id = XINT (val);
857 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
859 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
861 charset.unified_p = 0;
863 bzero (charset.fast_map, sizeof (charset.fast_map));
865 if (! NILP (args[charset_arg_code_offset]))
867 val = args[charset_arg_code_offset];
868 CHECK_NUMBER (val);
870 charset.method = CHARSET_METHOD_OFFSET;
871 charset.code_offset = XINT (val);
873 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
874 charset.min_char = i + charset.code_offset;
875 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
876 charset.max_char = i + charset.code_offset;
877 if (charset.max_char > MAX_CHAR)
878 error ("Unsupported max char: %d", charset.max_char);
880 for (i = charset.min_char; i < 0x10000 && i <= charset.max_char;
881 i += 128)
882 CHARSET_FAST_MAP_SET (i, charset.fast_map);
883 for (; i <= charset.max_char; i += 0x1000)
884 CHARSET_FAST_MAP_SET (i, charset.fast_map);
886 else if (! NILP (args[charset_arg_map]))
888 val = args[charset_arg_map];
889 ASET (attrs, charset_map, val);
890 if (STRINGP (val))
891 load_charset_map_from_file (&charset, val, 0);
892 else
893 load_charset_map_from_vector (&charset, val, 0);
894 charset.method = CHARSET_METHOD_MAP_DEFERRED;
896 else if (! NILP (args[charset_arg_parents]))
898 val = args[charset_arg_parents];
899 CHECK_LIST (val);
900 charset.method = CHARSET_METHOD_INHERIT;
901 val = Fcopy_sequence (val);
902 ASET (attrs, charset_parents, val);
904 charset.min_char = MAX_CHAR;
905 charset.max_char = 0;
906 for (; ! NILP (val); val = Fcdr (val))
908 Lisp_Object elt, car_part, cdr_part;
909 int this_id, offset;
910 struct charset *this_charset;
912 elt = Fcar (val);
913 if (CONSP (elt))
915 car_part = XCAR (elt);
916 cdr_part = XCDR (elt);
917 CHECK_CHARSET_GET_ID (car_part, this_id);
918 CHECK_NUMBER (cdr_part);
919 offset = XINT (cdr_part);
921 else
923 CHECK_CHARSET_GET_ID (elt, this_id);
924 offset = 0;
926 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
928 this_charset = CHARSET_FROM_ID (this_id);
929 if (charset.min_char > this_charset->min_char)
930 charset.min_char = this_charset->min_char;
931 if (charset.max_char < this_charset->max_char)
932 charset.max_char = this_charset->max_char;
933 for (i = 0; i < 190; i++)
934 charset.fast_map[i] |= this_charset->fast_map[i];
937 else
938 error ("None of :code-offset, :map, :parents are specified");
940 val = args[charset_arg_unify_map];
941 if (! NILP (val) && !STRINGP (val))
942 CHECK_VECTOR (val);
943 ASET (attrs, charset_unify_map, val);
945 CHECK_LIST (args[charset_arg_plist]);
946 ASET (attrs, charset_plist, args[charset_arg_plist]);
948 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
949 &hash_code);
950 if (charset.hash_index >= 0)
952 new_definition_p = 0;
953 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
954 HASH_VALUE (hash_table, charset.hash_index) = attrs;
956 else
958 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
959 hash_code);
960 if (charset_table_used == charset_table_size)
962 charset_table_size += 256;
963 charset_table
964 = ((struct charset *)
965 xrealloc (charset_table,
966 sizeof (struct charset) * charset_table_size));
968 id = charset_table_used++;
969 new_definition_p = 1;
972 ASET (attrs, charset_id, make_number (id));
973 charset.id = id;
974 charset_table[id] = charset;
976 if (charset.iso_final >= 0)
978 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
979 charset.iso_final) = id;
980 if (new_definition_p)
981 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
982 Fcons (make_number (id), Qnil));
983 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
984 charset_jisx0201_roman = id;
985 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
986 charset_jisx0208_1978 = id;
987 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
988 charset_jisx0208 = id;
991 if (charset.emacs_mule_id >= 0)
993 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
994 if (charset.emacs_mule_id < 0xA0)
995 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
996 if (new_definition_p)
997 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
998 Fcons (make_number (id), Qnil));
1001 if (new_definition_p)
1003 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1004 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1005 Fcons (make_number (id), Qnil));
1008 return Qnil;
1011 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1012 Sdefine_charset_alias, 2, 2, 0,
1013 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1014 (alias, charset)
1015 Lisp_Object alias, charset;
1017 Lisp_Object attr;
1019 CHECK_CHARSET_GET_ATTR (charset, attr);
1020 Fputhash (alias, attr, Vcharset_hash_table);
1021 Vcharset_list = Fcons (alias, Vcharset_list);
1022 return Qnil;
1026 DEFUN ("primary-charset", Fprimary_charset, Sprimary_charset, 0, 0, 0,
1027 doc: /* Return the primary charset. */)
1030 return CHARSET_NAME (CHARSET_FROM_ID (charset_primary));
1034 DEFUN ("set-primary-charset", Fset_primary_charset, Sset_primary_charset,
1035 1, 1, 0,
1036 doc: /* Set the primary charset to CHARSET. */)
1037 (charset)
1038 Lisp_Object charset;
1040 int id;
1042 CHECK_CHARSET_GET_ID (charset, id);
1043 charset_primary = id;
1044 return Qnil;
1048 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1049 doc: /* Return a property list of CHARSET. */)
1050 (charset)
1051 Lisp_Object charset;
1053 Lisp_Object attrs;
1055 CHECK_CHARSET_GET_ATTR (charset, attrs);
1056 return CHARSET_ATTR_PLIST (attrs);
1060 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1061 doc: /* Set CHARSET's property list to PLIST. */)
1062 (charset, plist)
1063 Lisp_Object charset, plist;
1065 Lisp_Object attrs;
1067 CHECK_CHARSET_GET_ATTR (charset, attrs);
1068 CHARSET_ATTR_PLIST (attrs) = plist;
1069 return plist;
1073 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 2, 0,
1074 doc: /* Unify characters of CHARSET with Unicode. */)
1075 (charset, unify_map)
1076 Lisp_Object charset, unify_map;
1078 int id;
1079 struct charset *cs;
1081 CHECK_CHARSET_GET_ID (charset, id);
1082 cs = CHARSET_FROM_ID (id);
1083 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
1084 load_charset (cs);
1085 if (CHARSET_UNIFIED_P (cs)
1086 && CHAR_TABLE_P (CHARSET_DEUNIFIER (cs)))
1087 return Qnil;
1088 CHARSET_UNIFIED_P (cs) = 0;
1089 if (NILP (unify_map))
1090 unify_map = CHARSET_UNIFY_MAP (cs);
1091 if (STRINGP (unify_map))
1092 load_charset_map_from_file (cs, unify_map, 2);
1093 else
1094 load_charset_map_from_vector (cs, unify_map, 2);
1095 CHARSET_UNIFIED_P (cs) = 1;
1096 return Qnil;
1099 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1100 Sget_unused_iso_final_char, 2, 2, 0,
1101 doc: /*
1102 Return an unsed ISO final char for a charset of DIMENISION and CHARS.
1103 DIMENSION is the number of bytes to represent a character: 1 or 2.
1104 CHARS is the number of characters in a dimension: 94 or 96.
1106 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1107 If there's no unused final char for the specified kind of charset,
1108 return nil. */)
1109 (dimension, chars)
1110 Lisp_Object dimension, chars;
1112 int final_char;
1114 CHECK_NUMBER (dimension);
1115 CHECK_NUMBER (chars);
1116 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1117 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1118 if (XINT (chars) != 94 && XINT (chars) != 96)
1119 args_out_of_range_3 (chars, make_number (94), make_number (96));
1120 for (final_char = '0'; final_char <= '?'; final_char++)
1121 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1122 break;
1123 return (final_char <= '?' ? make_number (final_char) : Qnil);
1126 static void
1127 check_iso_charset_parameter (dimension, chars, final_char)
1128 Lisp_Object dimension, chars, final_char;
1130 CHECK_NATNUM (dimension);
1131 CHECK_NATNUM (chars);
1132 CHECK_NATNUM (final_char);
1134 if (XINT (dimension) > 3)
1135 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1136 if (XINT (chars) != 94 && XINT (chars) != 96)
1137 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1138 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1139 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1143 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1144 4, 4, 0,
1145 doc: /*
1146 Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
1147 CHARSET should be defined by `defined-charset' in advance. */)
1148 (dimension, chars, final_char, charset)
1149 Lisp_Object dimension, chars, final_char, charset;
1151 int id;
1153 CHECK_CHARSET_GET_ID (charset, id);
1154 check_iso_charset_parameter (dimension, chars, final_char);
1156 ISO_CHARSET_TABLE (dimension, chars, final_char) = id;
1157 return Qnil;
1161 /* Return information about charsets in the text at PTR of NBYTES
1162 bytes, which are NCHARS characters. The value is:
1164 0: Each character is represented by one byte. This is always
1165 true for a unibyte string. For a multibyte string, true if
1166 it contains only ASCII characters.
1168 1: No charsets other than ascii, eight-bit-control, and
1169 latin-1 are found.
1171 2: Otherwise.
1175 string_xstring_p (string)
1176 Lisp_Object string;
1178 unsigned char *p = XSTRING (string)->data;
1179 unsigned char *endp = p + STRING_BYTES (XSTRING (string));
1180 struct charset *charset;
1182 if (XSTRING (string)->size == STRING_BYTES (XSTRING (string)))
1183 return 0;
1185 charset = CHARSET_FROM_ID (charset_iso_8859_1);
1186 while (p < endp)
1188 int c = STRING_CHAR_ADVANCE (p);
1190 if (ENCODE_CHAR (charset, c) < 0)
1191 return 2;
1193 return 1;
1197 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1199 CHARSETS is a vector. Each element is a cons of CHARSET and
1200 FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t.
1201 FOUND-FLAG t (or nil) means that the corresponding charset is
1202 already found (or not yet found).
1204 It may lookup a translation table TABLE if supplied. */
1206 static void
1207 find_charsets_in_text (ptr, nchars, nbytes, charsets, table)
1208 unsigned char *ptr;
1209 int nchars, nbytes;
1210 Lisp_Object charsets, table;
1212 unsigned char *pend = ptr + nbytes;
1213 int ncharsets = ASIZE (charsets);
1215 if (nchars == nbytes)
1216 return;
1218 while (ptr < pend)
1220 int c = STRING_CHAR_ADVANCE (ptr);
1221 int i;
1222 int all_found = 1;
1223 Lisp_Object elt;
1225 if (!NILP (table))
1226 c = translate_char (table, c);
1227 for (i = 0; i < ncharsets; i++)
1229 elt = AREF (charsets, i);
1230 if (NILP (XCDR (elt)))
1232 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (elt)));
1234 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
1235 XCDR (elt) = Qt;
1236 else
1237 all_found = 0;
1240 if (all_found)
1241 break;
1246 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1247 2, 3, 0,
1248 doc: /* Return a list of charsets in the region between BEG and END.
1249 BEG and END are buffer positions.
1250 Optional arg TABLE if non-nil is a translation table to look up.
1252 If the region contains invalid multibyte characters,
1253 `unknown' is included in the returned list.
1255 If the current buffer is unibyte, the returned list may contain
1256 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1257 (beg, end, table)
1258 Lisp_Object beg, end, table;
1260 Lisp_Object charsets;
1261 int from, from_byte, to, stop, stop_byte, i;
1262 Lisp_Object val;
1264 validate_region (&beg, &end);
1265 from = XFASTINT (beg);
1266 stop = to = XFASTINT (end);
1268 if (from < GPT && GPT < to)
1270 stop = GPT;
1271 stop_byte = GPT_BYTE;
1273 else
1274 stop_byte = CHAR_TO_BYTE (stop);
1276 from_byte = CHAR_TO_BYTE (from);
1278 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1279 for (i = 0; i < charset_table_used; i++)
1280 ASET (charsets, i, Fcons (make_number (i), Qnil));
1282 while (1)
1284 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1285 stop_byte - from_byte, charsets, table);
1286 if (stop < to)
1288 from = stop, from_byte = stop_byte;
1289 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1291 else
1292 break;
1295 val = Qnil;
1296 for (i = charset_table_used - 1; i >= 0; i--)
1297 if (!NILP (XCDR (AREF (charsets, i))))
1298 val = Fcons (CHARSET_NAME (charset_table + i), val);
1299 return val;
1302 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1303 1, 2, 0,
1304 doc: /* Return a list of charsets in STR.
1305 Optional arg TABLE if non-nil is a translation table to look up.
1307 If the string contains invalid multibyte characters,
1308 `unknown' is included in the returned list.
1310 If STR is unibyte, the returned list may contain
1311 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1312 (str, table)
1313 Lisp_Object str, table;
1315 Lisp_Object charsets;
1316 int i;
1317 Lisp_Object val;
1319 CHECK_STRING (str);
1321 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1322 find_charsets_in_text (XSTRING (str)->data, XSTRING (str)->size,
1323 STRING_BYTES (XSTRING (str)), charsets, table);
1325 val = Qnil;
1326 for (i = charset_table_used - 1; i >= 0; i--)
1327 if (!NILP (XCDR (AREF (charsets, i))))
1328 val = Fcons (CHARSET_NAME (charset_table + i), val);
1329 return val;
1334 /* Return a character correponding to the code-point CODE of
1335 CHARSET. */
1338 decode_char (charset, code)
1339 struct charset *charset;
1340 unsigned code;
1342 int c, char_index;
1343 enum charset_method method = CHARSET_METHOD (charset);
1345 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1346 return -1;
1348 if (method == CHARSET_METHOD_MAP_DEFERRED)
1350 load_charset (charset);
1351 method = CHARSET_METHOD (charset);
1354 if (method == CHARSET_METHOD_INHERIT)
1356 Lisp_Object parents;
1358 parents = CHARSET_PARENTS (charset);
1359 c = -1;
1360 for (; CONSP (parents); parents = XCDR (parents))
1362 int id = XINT (XCAR (XCAR (parents)));
1363 int code_offset = XINT (XCDR (XCAR (parents)));
1364 unsigned this_code = code + code_offset;
1366 charset = CHARSET_FROM_ID (id);
1367 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1368 break;
1371 else
1373 char_index = CODE_POINT_TO_INDEX (charset, code);
1374 if (char_index < 0)
1375 return -1;
1377 if (method == CHARSET_METHOD_MAP)
1379 Lisp_Object decoder;
1381 decoder = CHARSET_DECODER (charset);
1382 if (! VECTORP (decoder))
1383 return -1;
1384 c = XINT (AREF (decoder, char_index));
1386 else
1388 c = char_index + CHARSET_CODE_OFFSET (charset);
1392 if (CHARSET_UNIFIED_P (charset)
1393 && c >= 0)
1395 MAYBE_UNIFY_CHAR (c);
1398 return c;
1402 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1403 CHARSET, return CHARSET_INVALID_CODE (CHARSET). */
1405 unsigned
1406 encode_char (charset, c)
1407 struct charset *charset;
1408 int c;
1410 unsigned code;
1411 enum charset_method method = CHARSET_METHOD (charset);
1413 if (CHARSET_UNIFIED_P (charset))
1415 Lisp_Object deunifier;
1416 int deunified;
1418 deunifier = CHARSET_DEUNIFIER (charset);
1419 if (! CHAR_TABLE_P (deunifier))
1421 Funify_charset (CHARSET_NAME (charset), Qnil);
1422 deunifier = CHARSET_DEUNIFIER (charset);
1424 deunified = XINT (CHAR_TABLE_REF (deunifier, c));
1425 if (deunified > 0)
1426 c = deunified;
1429 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1430 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1431 return CHARSET_INVALID_CODE (charset);
1433 if (method == CHARSET_METHOD_INHERIT)
1435 Lisp_Object parents;
1437 parents = CHARSET_PARENTS (charset);
1438 for (; CONSP (parents); parents = XCDR (parents))
1440 int id = XINT (XCAR (XCAR (parents)));
1441 int code_offset = XINT (XCDR (XCAR (parents)));
1442 struct charset *this_charset = CHARSET_FROM_ID (id);
1444 code = ENCODE_CHAR (this_charset, c);
1445 if (code != CHARSET_INVALID_CODE (this_charset)
1446 && (code_offset < 0 || code >= code_offset))
1448 code -= code_offset;
1449 if (code >= charset->min_code && code <= charset->max_code
1450 && CODE_POINT_TO_INDEX (charset, code) >= 0)
1451 return code;
1454 return CHARSET_INVALID_CODE (charset);
1457 if (method == CHARSET_METHOD_MAP_DEFERRED)
1459 load_charset (charset);
1460 method = CHARSET_METHOD (charset);
1463 if (method == CHARSET_METHOD_MAP)
1465 Lisp_Object encoder;
1466 Lisp_Object val;
1468 encoder = CHARSET_ENCODER (charset);
1469 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1470 return CHARSET_INVALID_CODE (charset);
1471 val = CHAR_TABLE_REF (encoder, c);
1472 code = XINT (val);
1473 if (! CHARSET_COMPACT_CODES_P (charset))
1474 code = INDEX_TO_CODE_POINT (charset, code);
1476 else /* method == CHARSET_METHOD_OFFSET */
1478 code = c - CHARSET_CODE_OFFSET (charset);
1479 code = INDEX_TO_CODE_POINT (charset, code);
1482 return code;
1486 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1487 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1488 Return nil if CODE-POINT is not valid in CHARSET.
1490 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1492 Optional argument RESTRICTION specifies a way to map the pair of CCS
1493 and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1494 (charset, code_point, restriction)
1495 Lisp_Object charset, code_point, restriction;
1497 int c, id;
1498 unsigned code;
1499 struct charset *charsetp;
1501 CHECK_CHARSET_GET_ID (charset, id);
1502 if (CONSP (code_point))
1504 CHECK_NATNUM (XCAR (code_point));
1505 CHECK_NATNUM (XCDR (code_point));
1506 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1508 else
1510 CHECK_NATNUM (code_point);
1511 code = XINT (code_point);
1513 charsetp = CHARSET_FROM_ID (id);
1514 c = DECODE_CHAR (charsetp, code);
1515 return (c >= 0 ? make_number (c) : Qnil);
1519 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1520 doc: /* Encode the character CH into a code-point of CHARSET.
1521 Return nil if CHARSET doesn't include CH.
1523 Optional argument RESTRICTION specifies a way to map CHAR to a
1524 code-point in CCS. Currently not supported and just ignored. */)
1525 (ch, charset, restriction)
1526 Lisp_Object ch, charset, restriction;
1528 int c, id;
1529 unsigned code;
1530 struct charset *charsetp;
1532 CHECK_CHARSET_GET_ID (charset, id);
1533 CHECK_NATNUM (ch);
1534 c = XINT (ch);
1535 charsetp = CHARSET_FROM_ID (id);
1536 code = ENCODE_CHAR (charsetp, ch);
1537 if (code == CHARSET_INVALID_CODE (charsetp))
1538 return Qnil;
1539 if (code > 0x7FFFFFF)
1540 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1541 return make_number (code);
1545 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1546 doc:
1547 /* Return a character of CHARSET whose position codes are CODEn.
1549 CODE1 through CODE4 are optional, but if you don't supply sufficient
1550 position codes, it is assumed that the minimum code in each dimension
1551 is specified. */)
1552 (charset, code1, code2, code3, code4)
1553 Lisp_Object charset, code1, code2, code3, code4;
1555 int id, dimension;
1556 struct charset *charsetp;
1557 unsigned code;
1558 int c;
1560 CHECK_CHARSET_GET_ID (charset, id);
1561 charsetp = CHARSET_FROM_ID (id);
1563 dimension = CHARSET_DIMENSION (charsetp);
1564 if (NILP (code1))
1565 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1566 ? 0 : CHARSET_MIN_CODE (charsetp));
1567 else
1569 CHECK_NATNUM (code1);
1570 if (XFASTINT (code1) >= 0x100)
1571 args_out_of_range (make_number (0xFF), code1);
1572 code = XFASTINT (code1);
1574 if (dimension > 1)
1576 code <<= 8;
1577 if (NILP (code2))
1578 code |= charsetp->code_space[(dimension - 2) * 4];
1579 else
1581 CHECK_NATNUM (code2);
1582 if (XFASTINT (code2) >= 0x100)
1583 args_out_of_range (make_number (0xFF), code2);
1584 code |= XFASTINT (code2);
1587 if (dimension > 2)
1589 code <<= 8;
1590 if (NILP (code3))
1591 code |= charsetp->code_space[(dimension - 3) * 4];
1592 else
1594 CHECK_NATNUM (code3);
1595 if (XFASTINT (code3) >= 0x100)
1596 args_out_of_range (make_number (0xFF), code3);
1597 code |= XFASTINT (code3);
1600 if (dimension > 3)
1602 code <<= 8;
1603 if (NILP (code4))
1604 code |= charsetp->code_space[0];
1605 else
1607 CHECK_NATNUM (code4);
1608 if (XFASTINT (code4) >= 0x100)
1609 args_out_of_range (make_number (0xFF), code4);
1610 code |= XFASTINT (code4);
1617 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1618 code &= 0x7F7F7F7F;
1619 c = DECODE_CHAR (charsetp, code);
1620 if (c < 0)
1621 error ("Invalid code(s)");
1622 return make_number (c);
1626 /* Return the first charset in CHARSET_LIST that contains C.
1627 CHARSET_LIST is a list of charset IDs. If it is nil, use
1628 Vcharset_ordered_list. */
1630 struct charset *
1631 char_charset (c, charset_list, code_return)
1632 int c;
1633 Lisp_Object charset_list;
1634 unsigned *code_return;
1636 if (NILP (charset_list))
1637 charset_list = Vcharset_ordered_list;
1639 while (CONSP (charset_list))
1641 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1642 unsigned code = ENCODE_CHAR (charset, c);
1644 if (code != CHARSET_INVALID_CODE (charset))
1646 if (code_return)
1647 *code_return = code;
1648 return charset;
1650 charset_list = XCDR (charset_list);
1652 return NULL;
1656 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1657 doc: /*Return list of charset and one to three position-codes of CHAR.
1658 If CHAR is invalid as a character code,
1659 return a list of symbol `unknown' and CHAR. */)
1660 (ch)
1661 Lisp_Object ch;
1663 struct charset *charset;
1664 int c, dimension;
1665 unsigned code;
1666 Lisp_Object val;
1668 CHECK_CHARACTER (ch);
1669 c = XFASTINT (ch);
1670 charset = CHAR_CHARSET (c);
1671 if (! charset)
1672 return Fcons (intern ("unknown"), Fcons (ch, Qnil));
1674 code = ENCODE_CHAR (charset, c);
1675 if (code == CHARSET_INVALID_CODE (charset))
1676 abort ();
1677 dimension = CHARSET_DIMENSION (charset);
1678 val = (dimension == 1 ? Fcons (make_number (code), Qnil)
1679 : dimension == 2 ? Fcons (make_number (code >> 8),
1680 Fcons (make_number (code & 0xFF), Qnil))
1681 : Fcons (make_number (code >> 16),
1682 Fcons (make_number ((code >> 8) & 0xFF),
1683 Fcons (make_number (code & 0xFF), Qnil))));
1684 return Fcons (CHARSET_NAME (charset), val);
1688 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1689 doc: /* Return the charset of highest priority that contains CHAR. */)
1690 (ch)
1691 Lisp_Object ch;
1693 struct charset *charset;
1695 CHECK_CHARACTER (ch);
1696 charset = CHAR_CHARSET (XINT (ch));
1697 return (CHARSET_NAME (charset));
1701 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1702 doc: /*
1703 Return charset of a character in the current buffer at position POS.
1704 If POS is nil, it defauls to the current point.
1705 If POS is out of range, the value is nil. */)
1706 (pos)
1707 Lisp_Object pos;
1709 Lisp_Object ch;
1710 struct charset *charset;
1712 ch = Fchar_after (pos);
1713 if (! INTEGERP (ch))
1714 return ch;
1715 charset = CHAR_CHARSET (XINT (ch));
1716 return (CHARSET_NAME (charset));
1720 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1721 doc: /*
1722 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1724 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1725 by their DIMENSION, CHARS, and FINAL-CHAR,
1726 where as Emacs distinguishes them by charset symbol.
1727 See the documentation of the function `charset-info' for the meanings of
1728 DIMENSION, CHARS, and FINAL-CHAR. */)
1729 (dimension, chars, final_char)
1730 Lisp_Object dimension, chars, final_char;
1732 int id;
1734 check_iso_charset_parameter (dimension, chars, final_char);
1735 id = ISO_CHARSET_TABLE (XFASTINT (dimension), XFASTINT (chars),
1736 XFASTINT (final_char));
1737 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
1741 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1742 0, 0, 0,
1743 doc: /*
1744 Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1747 int i;
1748 struct charset *charset;
1749 Lisp_Object attrs;
1751 for (i = 0; i < charset_table_used; i++)
1753 charset = CHARSET_FROM_ID (i);
1754 attrs = CHARSET_ATTRIBUTES (charset);
1756 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1758 CHARSET_ATTR_DECODER (attrs) = Qnil;
1759 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1760 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1763 if (CHARSET_UNIFIED_P (charset))
1764 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
1767 if (CHAR_TABLE_P (Vchar_unified_charset_table))
1769 Foptimize_char_table (Vchar_unified_charset_table);
1770 Vchar_unify_table = Vchar_unified_charset_table;
1771 Vchar_unified_charset_table = Qnil;
1774 return Qnil;
1777 DEFUN ("charset-priority-list", Fcharset_priority_list,
1778 Scharset_priority_list, 0, 1, 0,
1779 doc: /* Return the list of charsets ordered by priority.
1780 HIGHESTP non-nil means just return the highest priority one. */)
1781 (highestp)
1782 Lisp_Object highestp;
1784 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
1786 if (!NILP (highestp))
1787 return CHARSET_NAME (CHARSET_FROM_ID (Fcar (list)));
1789 while (!NILP (list))
1791 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XCAR (list))), val);
1792 list = XCDR (list);
1794 return Fnreverse (val);
1797 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
1798 1, MANY, 0,
1799 doc: /* Assign higher priority to the charsets given as arguments.
1800 usage: (set-charset-priority &rest charsets) */)
1801 (nargs, args)
1802 int nargs;
1803 Lisp_Object *args;
1805 Lisp_Object new_head = Qnil, old_list, id, arglist[2];
1806 int i;
1808 old_list = Fcopy_sequence (Vcharset_ordered_list);
1809 for (i = 0; i < nargs; i++)
1811 CHECK_CHARSET_GET_ID (args[i], id);
1812 old_list = Fdelq (id, old_list);
1813 new_head = Fcons (id, new_head);
1815 arglist[0] = Fnreverse (new_head);
1816 arglist[1] = old_list;
1817 Vcharset_ordered_list = Fnconc (2, arglist);
1818 return Qnil;
1821 void
1822 init_charset ()
1828 void
1829 init_charset_once ()
1831 int i, j, k;
1833 for (i = 0; i < ISO_MAX_DIMENSION; i++)
1834 for (j = 0; j < ISO_MAX_CHARS; j++)
1835 for (k = 0; k < ISO_MAX_FINAL; k++)
1836 iso_charset_table[i][j][k] = -1;
1838 for (i = 0; i < 255; i++)
1839 emacs_mule_charset[i] = NULL;
1841 charset_jisx0201_roman = -1;
1842 charset_jisx0208_1978 = -1;
1843 charset_jisx0208 = -1;
1845 #if 0
1846 Vchar_charset_set = Fmake_char_table (Qnil, Qnil);
1847 CHAR_TABLE_SET (Vchar_charset_set, make_number (97), Qnil);
1849 DEFSYM (Qcharset_encode_table, "charset-encode-table");
1851 /* Intern this now in case it isn't already done.
1852 Setting this variable twice is harmless.
1853 But don't staticpro it here--that is done in alloc.c. */
1854 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1856 /* Now we are ready to set up this property, so we can create syntax
1857 tables. */
1858 Fput (Qcharset_encode_table, Qchar_table_extra_slots, make_number (0));
1859 #endif
1862 #ifdef emacs
1864 void
1865 syms_of_charset ()
1867 char *p;
1869 DEFSYM (Qcharsetp, "charsetp");
1871 DEFSYM (Qascii, "ascii");
1872 DEFSYM (Qunicode, "unicode");
1873 DEFSYM (Qeight_bit_control, "eight-bit-control");
1874 DEFSYM (Qeight_bit_graphic, "eight-bit-graphic");
1875 DEFSYM (Qiso_8859_1, "iso-8859-1");
1877 DEFSYM (Qgl, "gl");
1878 DEFSYM (Qgr, "gr");
1880 p = (char *) xmalloc (30000);
1882 staticpro (&Vcharset_ordered_list);
1883 Vcharset_ordered_list = Qnil;
1885 staticpro (&Viso_2022_charset_list);
1886 Viso_2022_charset_list = Qnil;
1888 staticpro (&Vemacs_mule_charset_list);
1889 Vemacs_mule_charset_list = Qnil;
1891 staticpro (&Vcharset_hash_table);
1892 Vcharset_hash_table = Fmakehash (Qeq);
1894 charset_table_size = 128;
1895 charset_table = ((struct charset *)
1896 xmalloc (sizeof (struct charset) * charset_table_size));
1897 charset_table_used = 0;
1899 staticpro (&Vchar_unified_charset_table);
1900 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
1902 defsubr (&Scharsetp);
1903 defsubr (&Smap_charset_chars);
1904 defsubr (&Sdefine_charset_internal);
1905 defsubr (&Sdefine_charset_alias);
1906 defsubr (&Sprimary_charset);
1907 defsubr (&Sset_primary_charset);
1908 defsubr (&Scharset_plist);
1909 defsubr (&Sset_charset_plist);
1910 defsubr (&Sunify_charset);
1911 defsubr (&Sget_unused_iso_final_char);
1912 defsubr (&Sdeclare_equiv_charset);
1913 defsubr (&Sfind_charset_region);
1914 defsubr (&Sfind_charset_string);
1915 defsubr (&Sdecode_char);
1916 defsubr (&Sencode_char);
1917 defsubr (&Ssplit_char);
1918 defsubr (&Smake_char);
1919 defsubr (&Schar_charset);
1920 defsubr (&Scharset_after);
1921 defsubr (&Siso_charset);
1922 defsubr (&Sclear_charset_maps);
1923 defsubr (&Scharset_priority_list);
1924 defsubr (&Sset_charset_priority);
1926 DEFVAR_LISP ("charset-map-directory", &Vcharset_map_directory,
1927 doc: /* Directory of charset map files that come with GNU Emacs.
1928 The default value is sub-directory "charsets" of `data-directory'. */);
1929 Vcharset_map_directory = Fexpand_file_name (build_string ("charsets"),
1930 Vdata_directory);
1932 DEFVAR_LISP ("charset-list", &Vcharset_list,
1933 doc: /* List of all charsets ever defined. */);
1934 Vcharset_list = Qnil;
1936 /* Make the prerequisite charset `ascii' and `unicode'. */
1938 Lisp_Object args[charset_arg_max];
1939 Lisp_Object plist[14];
1940 Lisp_Object val;
1942 plist[0] = intern (":name");
1943 plist[2] = intern (":dimension");
1944 plist[4] = intern (":code-space");
1945 plist[6] = intern (":iso-final-char");
1946 plist[8] = intern (":emacs-mule-id");
1947 plist[10] = intern (":ascii-compatible-p");
1948 plist[12] = intern (":code-offset");
1950 args[charset_arg_name] = Qascii;
1951 args[charset_arg_dimension] = make_number (1);
1952 val = Fmake_vector (make_number (8), make_number (0));
1953 ASET (val, 1, make_number (127));
1954 args[charset_arg_code_space] = val;
1955 args[charset_arg_min_code] = Qnil;
1956 args[charset_arg_max_code] = Qnil;
1957 args[charset_arg_iso_final] = make_number ('B');
1958 args[charset_arg_iso_revision] = Qnil;
1959 args[charset_arg_emacs_mule_id] = make_number (0);
1960 args[charset_arg_ascii_compatible_p] = Qt;
1961 args[charset_arg_supplementary_p] = Qnil;
1962 args[charset_arg_invalid_code] = Qnil;
1963 args[charset_arg_code_offset] = make_number (0);
1964 args[charset_arg_map] = Qnil;
1965 args[charset_arg_parents] = Qnil;
1966 args[charset_arg_unify_map] = Qnil;
1967 /* The actual plist is set by mule-conf.el. */
1968 plist[1] = args[charset_arg_name];
1969 plist[3] = args[charset_arg_dimension];
1970 plist[5] = args[charset_arg_code_space];
1971 plist[7] = args[charset_arg_iso_final];
1972 plist[9] = args[charset_arg_emacs_mule_id];
1973 plist[11] = args[charset_arg_ascii_compatible_p];
1974 plist[13] = args[charset_arg_code_offset];
1975 args[charset_arg_plist] = Flist (14, plist);
1976 Fdefine_charset_internal (charset_arg_max, args);
1977 charset_ascii = CHARSET_SYMBOL_ID (Qascii);
1979 args[charset_arg_name] = Qunicode;
1980 args[charset_arg_dimension] = make_number (3);
1981 val = Fmake_vector (make_number (8), make_number (0));
1982 ASET (val, 1, make_number (255));
1983 ASET (val, 3, make_number (255));
1984 ASET (val, 5, make_number (16));
1985 args[charset_arg_code_space] = val;
1986 args[charset_arg_min_code] = Qnil;
1987 args[charset_arg_max_code] = Qnil;
1988 args[charset_arg_iso_final] = Qnil;
1989 args[charset_arg_iso_revision] = Qnil;
1990 args[charset_arg_emacs_mule_id] = Qnil;
1991 args[charset_arg_ascii_compatible_p] = Qt;
1992 args[charset_arg_supplementary_p] = Qnil;
1993 args[charset_arg_invalid_code] = Qnil;
1994 args[charset_arg_code_offset] = make_number (0);
1995 args[charset_arg_map] = Qnil;
1996 args[charset_arg_parents] = Qnil;
1997 args[charset_arg_unify_map] = Qnil;
1998 /* The actual plist is set by mule-conf.el. */
1999 plist[1] = args[charset_arg_name];
2000 plist[3] = args[charset_arg_dimension];
2001 plist[5] = args[charset_arg_code_space];
2002 plist[7] = args[charset_arg_iso_final];
2003 plist[9] = args[charset_arg_emacs_mule_id];
2004 plist[11] = args[charset_arg_ascii_compatible_p];
2005 plist[13] = args[charset_arg_code_offset];
2006 args[charset_arg_plist] = Flist (14, plist);
2007 Fdefine_charset_internal (charset_arg_max, args);
2008 charset_unicode = CHARSET_SYMBOL_ID (Qunicode);
2012 #endif /* emacs */