1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
3 2006, 2007, 2008 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
33 #include <sys/types.h>
35 #include "character.h"
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
56 /* List of all charsets. This variable is used only from Emacs
58 Lisp_Object Vcharset_list
;
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table
;
64 /* Table of struct charset. */
65 struct charset
*charset_table
;
67 static int charset_table_size
;
68 static int charset_table_used
;
70 Lisp_Object Qcharsetp
;
72 /* Special charset symbols. */
74 Lisp_Object Qeight_bit
;
75 Lisp_Object Qiso_8859_1
;
78 /* The corresponding charsets. */
80 int charset_eight_bit
;
81 int charset_iso_8859_1
;
84 /* The other special charsets. */
85 int charset_jisx0201_roman
;
86 int charset_jisx0208_1978
;
89 /* Value of charset attribute `charset-iso-plane'. */
92 /* Charset of unibyte characters. */
95 /* List of charsets ordered by the priority. */
96 Lisp_Object Vcharset_ordered_list
;
98 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
100 Lisp_Object Vcharset_non_preferred_head
;
102 /* Incremented everytime we change Vcharset_ordered_list. This is
103 unsigned short so that it fits in Lisp_Int and never matches
105 unsigned short charset_ordered_list_tick
;
107 /* List of iso-2022 charsets. */
108 Lisp_Object Viso_2022_charset_list
;
110 /* List of emacs-mule charsets. */
111 Lisp_Object Vemacs_mule_charset_list
;
113 struct charset
*emacs_mule_charset
[256];
115 /* Mapping table from ISO2022's charset (specified by DIMENSION,
116 CHARS, and FINAL-CHAR) to Emacs' charset. */
117 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
119 Lisp_Object Vcharset_map_path
;
121 Lisp_Object Vchar_unified_charset_table
;
123 Lisp_Object Vcurrent_iso639_language
;
125 /* Defined in chartab.c */
127 map_char_table_for_charset
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
128 Lisp_Object function
, Lisp_Object table
,
129 Lisp_Object arg
, struct charset
*charset
,
130 unsigned from
, unsigned to
));
132 #define CODE_POINT_TO_INDEX(charset, code) \
133 ((charset)->code_linear_p \
134 ? (code) - (charset)->min_code \
135 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
136 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
137 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
138 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
139 ? (((((code) >> 24) - (charset)->code_space[12]) \
140 * (charset)->code_space[11]) \
141 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
142 * (charset)->code_space[7]) \
143 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
144 * (charset)->code_space[3]) \
145 + (((code) & 0xFF) - (charset)->code_space[0]) \
146 - ((charset)->char_index_offset)) \
150 /* Convert the character index IDX to code-point CODE for CHARSET.
151 It is assumed that IDX is in a valid range. */
153 #define INDEX_TO_CODE_POINT(charset, idx) \
154 ((charset)->code_linear_p \
155 ? (idx) + (charset)->min_code \
156 : (idx += (charset)->char_index_offset, \
157 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
158 | (((charset)->code_space[4] \
159 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
161 | (((charset)->code_space[8] \
162 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
164 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
170 /* Set to 1 to warn that a charset map is loaded and thus a buffer
171 text and a string data may be relocated. */
172 int charset_map_loaded
;
174 struct charset_map_entries
180 struct charset_map_entries
*next
;
183 /* Load the mapping information for CHARSET from ENTRIES.
185 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
187 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
188 CHARSET->decoder, and CHARSET->encoder.
190 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
191 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
195 load_charset_map (charset
, entries
, n_entries
, control_flag
)
196 struct charset
*charset
;
197 struct charset_map_entries
*entries
;
201 Lisp_Object vec
, table
;
202 unsigned max_code
= CHARSET_MAX_CODE (charset
);
203 int ascii_compatible_p
= charset
->ascii_compatible_p
;
204 int min_char
, max_char
, nonascii_min_char
;
206 unsigned char *fast_map
= charset
->fast_map
;
211 if (control_flag
> 0)
213 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
215 table
= Fmake_char_table (Qnil
, Qnil
);
216 if (control_flag
== 1)
217 vec
= Fmake_vector (make_number (n
), make_number (-1));
218 else if (! CHAR_TABLE_P (Vchar_unify_table
))
219 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
221 charset_map_loaded
= 1;
224 min_char
= max_char
= entries
->entry
[0].c
;
225 nonascii_min_char
= MAX_CHAR
;
226 for (i
= 0; i
< n_entries
; i
++)
229 int from_index
, to_index
;
231 int idx
= i
% 0x10000;
233 if (i
> 0 && idx
== 0)
234 entries
= entries
->next
;
235 from
= entries
->entry
[idx
].from
;
236 to
= entries
->entry
[idx
].to
;
237 from_c
= entries
->entry
[idx
].c
;
238 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
241 to_index
= from_index
;
246 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
247 to_c
= from_c
+ (to_index
- from_index
);
249 if (from_index
< 0 || to_index
< 0)
252 if (control_flag
< 2)
258 else if (from_c
< min_char
)
260 if (ascii_compatible_p
)
262 if (! ASCII_BYTE_P (from_c
))
264 if (from_c
< nonascii_min_char
)
265 nonascii_min_char
= from_c
;
267 else if (! ASCII_BYTE_P (to_c
))
269 nonascii_min_char
= 0x80;
273 for (c
= from_c
; c
<= to_c
; c
++)
274 CHARSET_FAST_MAP_SET (c
, fast_map
);
276 if (control_flag
== 1)
278 unsigned code
= from
;
280 if (CHARSET_COMPACT_CODES_P (charset
))
283 ASET (vec
, from_index
, make_number (from_c
));
284 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
285 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
286 if (from_index
== to_index
)
288 from_index
++, from_c
++;
289 code
= INDEX_TO_CODE_POINT (charset
, from_index
);
292 for (; from_index
<= to_index
; from_index
++, from_c
++)
294 ASET (vec
, from_index
, make_number (from_c
));
295 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
296 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
302 unsigned code
= from
;
306 int c1
= DECODE_CHAR (charset
, code
);
310 CHAR_TABLE_SET (table
, from_c
, make_number (c1
));
311 CHAR_TABLE_SET (Vchar_unify_table
, c1
, make_number (from_c
));
312 if (CHAR_TABLE_P (Vchar_unified_charset_table
))
313 CHAR_TABLE_SET (Vchar_unified_charset_table
, c1
,
314 CHARSET_NAME (charset
));
316 if (from_index
== to_index
)
318 from_index
++, from_c
++;
319 code
= INDEX_TO_CODE_POINT (charset
, from_index
);
324 if (control_flag
< 2)
326 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
327 ? nonascii_min_char
: min_char
);
328 CHARSET_MAX_CHAR (charset
) = max_char
;
329 if (control_flag
== 1)
331 CHARSET_DECODER (charset
) = vec
;
332 CHARSET_ENCODER (charset
) = table
;
336 CHARSET_DEUNIFIER (charset
) = table
;
340 /* Read a hexadecimal number (preceded by "0x") from the file FP while
341 paying attention to comment charcter '#'. */
343 static INLINE
unsigned
351 while ((c
= getc (fp
)) != EOF
)
355 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
359 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
371 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
373 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
375 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
376 n
= (n
* 10) + c
- '0';
383 /* Return a mapping vector for CHARSET loaded from MAPFILE.
384 Each line of MAPFILE has this form
386 where 0xAAAA is a code-point and 0xCCCC is the corresponding
387 character code, or this form
389 where 0xAAAA and 0xBBBB are code-points specifying a range, and
390 0xCCCC is the first character code of the range.
392 The returned vector has this form:
393 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
394 where CODE1 is a code-point or a cons of code-points specifying a
397 extern void add_to_log
P_ ((char *, Lisp_Object
, Lisp_Object
));
400 load_charset_map_from_file (charset
, mapfile
, control_flag
)
401 struct charset
*charset
;
405 unsigned min_code
= CHARSET_MIN_CODE (charset
);
406 unsigned max_code
= CHARSET_MAX_CODE (charset
);
410 Lisp_Object suffixes
;
411 struct charset_map_entries
*head
, *entries
;
414 suffixes
= Fcons (build_string (".map"),
415 Fcons (build_string (".TXT"), Qnil
));
417 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
419 || ! (fp
= fdopen (fd
, "r")))
421 add_to_log ("Failure in loading charset map: %S", mapfile
, Qnil
);
425 head
= entries
= ((struct charset_map_entries
*)
426 alloca (sizeof (struct charset_map_entries
)));
435 from
= read_hex (fp
, &eof
);
438 if (getc (fp
) == '-')
439 to
= read_hex (fp
, &eof
);
442 c
= (int) read_hex (fp
, &eof
);
444 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
447 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
449 entries
->next
= ((struct charset_map_entries
*)
450 alloca (sizeof (struct charset_map_entries
)));
451 entries
= entries
->next
;
453 idx
= n_entries
% 0x10000;
454 entries
->entry
[idx
].from
= from
;
455 entries
->entry
[idx
].to
= to
;
456 entries
->entry
[idx
].c
= c
;
462 load_charset_map (charset
, head
, n_entries
, control_flag
);
466 load_charset_map_from_vector (charset
, vec
, control_flag
)
467 struct charset
*charset
;
471 unsigned min_code
= CHARSET_MIN_CODE (charset
);
472 unsigned max_code
= CHARSET_MAX_CODE (charset
);
473 struct charset_map_entries
*head
, *entries
;
475 int len
= ASIZE (vec
);
480 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
484 head
= entries
= ((struct charset_map_entries
*)
485 alloca (sizeof (struct charset_map_entries
)));
487 for (i
= 0; i
< len
; i
+= 2)
489 Lisp_Object val
, val2
;
501 from
= XFASTINT (val
);
502 to
= XFASTINT (val2
);
507 from
= to
= XFASTINT (val
);
509 val
= AREF (vec
, i
+ 1);
513 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
516 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
518 entries
->next
= ((struct charset_map_entries
*)
519 alloca (sizeof (struct charset_map_entries
)));
520 entries
= entries
->next
;
522 idx
= n_entries
% 0x10000;
523 entries
->entry
[idx
].from
= from
;
524 entries
->entry
[idx
].to
= to
;
525 entries
->entry
[idx
].c
= c
;
529 load_charset_map (charset
, head
, n_entries
, control_flag
);
533 load_charset (charset
)
534 struct charset
*charset
;
536 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP_DEFERRED
)
540 map
= CHARSET_MAP (charset
);
542 load_charset_map_from_file (charset
, map
, 1);
544 load_charset_map_from_vector (charset
, map
, 1);
545 CHARSET_METHOD (charset
) = CHARSET_METHOD_MAP
;
550 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
551 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
555 return (CHARSETP (object
) ? Qt
: Qnil
);
560 map_charset_chars (c_function
, function
, arg
,
562 void (*c_function
) P_ ((Lisp_Object
, Lisp_Object
));
563 Lisp_Object function
, arg
;
564 struct charset
*charset
;
570 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP_DEFERRED
)
571 load_charset (charset
);
573 partial
= (from
> CHARSET_MIN_CODE (charset
)
574 || to
< CHARSET_MAX_CODE (charset
));
576 if (CHARSET_UNIFIED_P (charset
)
577 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
579 map_char_table_for_charset (c_function
, function
,
580 CHARSET_DEUNIFIER (charset
), arg
,
581 partial
? charset
: NULL
, from
, to
);
584 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
586 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
587 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
588 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
589 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
591 range
= Fcons (make_number (from_c
), make_number (to_c
));
593 (*c_function
) (arg
, range
);
595 call2 (function
, range
, arg
);
597 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
599 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
601 map_char_table_for_charset (c_function
, function
,
602 CHARSET_ENCODER (charset
), arg
,
603 partial
? charset
: NULL
, from
, to
);
605 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
607 Lisp_Object subset_info
;
610 subset_info
= CHARSET_SUBSET (charset
);
611 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
612 offset
= XINT (AREF (subset_info
, 3));
614 if (from
< XFASTINT (AREF (subset_info
, 1)))
615 from
= XFASTINT (AREF (subset_info
, 1));
617 if (to
> XFASTINT (AREF (subset_info
, 2)))
618 to
= XFASTINT (AREF (subset_info
, 2));
619 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
621 else /* i.e. CHARSET_METHOD_SUPERSET */
625 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
626 parents
= XCDR (parents
))
629 unsigned this_from
, this_to
;
631 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
632 offset
= XINT (XCDR (XCAR (parents
)));
633 this_from
= from
- offset
;
634 this_to
= to
- offset
;
635 if (this_from
< CHARSET_MIN_CODE (charset
))
636 this_from
= CHARSET_MIN_CODE (charset
);
637 if (this_to
> CHARSET_MAX_CODE (charset
))
638 this_to
= CHARSET_MAX_CODE (charset
);
639 map_charset_chars (c_function
, function
, arg
, charset
,
645 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
646 doc
: /* Call FUNCTION for all characters in CHARSET.
647 FUNCTION is called with an argument RANGE and the optional 3rd
650 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
651 characters contained in CHARSET.
653 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
654 range of code points of target characters. */)
655 (function
, charset
, arg
, from_code
, to_code
)
656 Lisp_Object function
, charset
, arg
, from_code
, to_code
;
661 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
662 if (NILP (from_code
))
663 from
= CHARSET_MIN_CODE (cs
);
666 CHECK_NATNUM (from_code
);
667 from
= XINT (from_code
);
668 if (from
< CHARSET_MIN_CODE (cs
))
669 from
= CHARSET_MIN_CODE (cs
);
672 to
= CHARSET_MAX_CODE (cs
);
675 CHECK_NATNUM (to_code
);
677 if (to
> CHARSET_MAX_CODE (cs
))
678 to
= CHARSET_MAX_CODE (cs
);
680 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
685 /* Define a charset according to the arguments. The Nth argument is
686 the Nth attribute of the charset (the last attribute `charset-id'
687 is not included). See the docstring of `define-charset' for the
690 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
691 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
692 doc
: /* For internal use only.
693 usage: (define-charset-internal ...) */)
698 /* Charset attr vector. */
702 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
704 struct charset charset
;
707 int new_definition_p
;
710 if (nargs
!= charset_arg_max
)
711 return Fsignal (Qwrong_number_of_arguments
,
712 Fcons (intern ("define-charset-internal"),
713 make_number (nargs
)));
715 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
717 CHECK_SYMBOL (args
[charset_arg_name
]);
718 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
720 val
= args
[charset_arg_code_space
];
721 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
723 int min_byte
, max_byte
;
725 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
726 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
727 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
728 error ("Invalid :code-space value");
729 charset
.code_space
[i
* 4] = min_byte
;
730 charset
.code_space
[i
* 4 + 1] = max_byte
;
731 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
732 nchars
*= charset
.code_space
[i
* 4 + 2];
733 charset
.code_space
[i
* 4 + 3] = nchars
;
738 val
= args
[charset_arg_dimension
];
740 charset
.dimension
= dimension
;
744 charset
.dimension
= XINT (val
);
745 if (charset
.dimension
< 1 || charset
.dimension
> 4)
746 args_out_of_range_3 (val
, make_number (1), make_number (4));
749 charset
.code_linear_p
750 = (charset
.dimension
== 1
751 || (charset
.code_space
[2] == 256
752 && (charset
.dimension
== 2
753 || (charset
.code_space
[6] == 256
754 && (charset
.dimension
== 3
755 || charset
.code_space
[10] == 256)))));
757 if (! charset
.code_linear_p
)
759 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
760 bzero (charset
.code_space_mask
, 256);
761 for (i
= 0; i
< 4; i
++)
762 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
764 charset
.code_space_mask
[j
] |= (1 << i
);
767 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
769 charset
.min_code
= (charset
.code_space
[0]
770 | (charset
.code_space
[4] << 8)
771 | (charset
.code_space
[8] << 16)
772 | (charset
.code_space
[12] << 24));
773 charset
.max_code
= (charset
.code_space
[1]
774 | (charset
.code_space
[5] << 8)
775 | (charset
.code_space
[9] << 16)
776 | (charset
.code_space
[13] << 24));
777 charset
.char_index_offset
= 0;
779 val
= args
[charset_arg_min_code
];
789 CHECK_NUMBER_CAR (val
);
790 CHECK_NUMBER_CDR (val
);
791 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
793 if (code
< charset
.min_code
794 || code
> charset
.max_code
)
795 args_out_of_range_3 (make_number (charset
.min_code
),
796 make_number (charset
.max_code
), val
);
797 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
798 charset
.min_code
= code
;
801 val
= args
[charset_arg_max_code
];
811 CHECK_NUMBER_CAR (val
);
812 CHECK_NUMBER_CDR (val
);
813 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
815 if (code
< charset
.min_code
816 || code
> charset
.max_code
)
817 args_out_of_range_3 (make_number (charset
.min_code
),
818 make_number (charset
.max_code
), val
);
819 charset
.max_code
= code
;
822 charset
.compact_codes_p
= charset
.max_code
< 0x1000000;
824 val
= args
[charset_arg_invalid_code
];
827 if (charset
.min_code
> 0)
828 charset
.invalid_code
= 0;
831 XSETINT (val
, charset
.max_code
+ 1);
832 if (XINT (val
) == charset
.max_code
+ 1)
833 charset
.invalid_code
= charset
.max_code
+ 1;
835 error ("Attribute :invalid-code must be specified");
841 charset
.invalid_code
= XFASTINT (val
);
844 val
= args
[charset_arg_iso_final
];
846 charset
.iso_final
= -1;
850 if (XINT (val
) < '0' || XINT (val
) > 127)
851 error ("Invalid iso-final-char: %d", XINT (val
));
852 charset
.iso_final
= XINT (val
);
855 val
= args
[charset_arg_iso_revision
];
857 charset
.iso_revision
= -1;
862 args_out_of_range (make_number (63), val
);
863 charset
.iso_revision
= XINT (val
);
866 val
= args
[charset_arg_emacs_mule_id
];
868 charset
.emacs_mule_id
= -1;
872 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
873 error ("Invalid emacs-mule-id: %d", XINT (val
));
874 charset
.emacs_mule_id
= XINT (val
);
877 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
879 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
881 charset
.unified_p
= 0;
883 bzero (charset
.fast_map
, sizeof (charset
.fast_map
));
885 if (! NILP (args
[charset_arg_code_offset
]))
887 val
= args
[charset_arg_code_offset
];
890 charset
.method
= CHARSET_METHOD_OFFSET
;
891 charset
.code_offset
= XINT (val
);
893 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
894 charset
.min_char
= i
+ charset
.code_offset
;
895 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
896 charset
.max_char
= i
+ charset
.code_offset
;
897 if (charset
.max_char
> MAX_CHAR
)
898 error ("Unsupported max char: %d", charset
.max_char
);
900 i
= (charset
.min_char
>> 7) << 7;
901 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
902 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
904 for (; i
<= charset
.max_char
; i
+= 0x1000)
905 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
907 else if (! NILP (args
[charset_arg_map
]))
909 val
= args
[charset_arg_map
];
910 ASET (attrs
, charset_map
, val
);
912 load_charset_map_from_file (&charset
, val
, 0);
914 load_charset_map_from_vector (&charset
, val
, 0);
915 charset
.method
= CHARSET_METHOD_MAP_DEFERRED
;
917 else if (! NILP (args
[charset_arg_subset
]))
920 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
921 struct charset
*parent_charset
;
923 val
= args
[charset_arg_subset
];
925 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
926 parent_min_code
= Fnth (make_number (1), val
);
927 CHECK_NATNUM (parent_min_code
);
928 parent_max_code
= Fnth (make_number (2), val
);
929 CHECK_NATNUM (parent_max_code
);
930 parent_code_offset
= Fnth (make_number (3), val
);
931 CHECK_NUMBER (parent_code_offset
);
932 val
= Fmake_vector (make_number (4), Qnil
);
933 ASET (val
, 0, make_number (parent_charset
->id
));
934 ASET (val
, 1, parent_min_code
);
935 ASET (val
, 2, parent_max_code
);
936 ASET (val
, 3, parent_code_offset
);
937 ASET (attrs
, charset_subset
, val
);
939 charset
.method
= CHARSET_METHOD_SUBSET
;
940 /* Here, we just copy the parent's fast_map. It's not accurate,
941 but at least it works for quickly detecting which character
942 DOESN'T belong to this charset. */
943 for (i
= 0; i
< 190; i
++)
944 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
946 /* We also copy these for parents. */
947 charset
.min_char
= parent_charset
->min_char
;
948 charset
.max_char
= parent_charset
->max_char
;
950 else if (! NILP (args
[charset_arg_superset
]))
952 val
= args
[charset_arg_superset
];
953 charset
.method
= CHARSET_METHOD_SUPERSET
;
954 val
= Fcopy_sequence (val
);
955 ASET (attrs
, charset_superset
, val
);
957 charset
.min_char
= MAX_CHAR
;
958 charset
.max_char
= 0;
959 for (; ! NILP (val
); val
= Fcdr (val
))
961 Lisp_Object elt
, car_part
, cdr_part
;
963 struct charset
*this_charset
;
968 car_part
= XCAR (elt
);
969 cdr_part
= XCDR (elt
);
970 CHECK_CHARSET_GET_ID (car_part
, this_id
);
971 CHECK_NUMBER (cdr_part
);
972 offset
= XINT (cdr_part
);
976 CHECK_CHARSET_GET_ID (elt
, this_id
);
979 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
981 this_charset
= CHARSET_FROM_ID (this_id
);
982 if (charset
.min_char
> this_charset
->min_char
)
983 charset
.min_char
= this_charset
->min_char
;
984 if (charset
.max_char
< this_charset
->max_char
)
985 charset
.max_char
= this_charset
->max_char
;
986 for (i
= 0; i
< 190; i
++)
987 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
991 error ("None of :code-offset, :map, :parents are specified");
993 val
= args
[charset_arg_unify_map
];
994 if (! NILP (val
) && !STRINGP (val
))
996 ASET (attrs
, charset_unify_map
, val
);
998 CHECK_LIST (args
[charset_arg_plist
]);
999 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1001 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1003 if (charset
.hash_index
>= 0)
1005 new_definition_p
= 0;
1006 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1007 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1011 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1013 if (charset_table_used
== charset_table_size
)
1015 struct charset
*new_table
1016 = (struct charset
*) xmalloc (sizeof (struct charset
)
1017 * (charset_table_size
+ 16));
1018 bcopy (charset_table
, new_table
,
1019 sizeof (struct charset
) * charset_table_size
);
1020 charset_table_size
+= 16;
1021 charset_table
= new_table
;
1023 id
= charset_table_used
++;
1024 new_definition_p
= 1;
1027 ASET (attrs
, charset_id
, make_number (id
));
1029 charset_table
[id
] = charset
;
1031 if (charset
.iso_final
>= 0)
1033 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1034 charset
.iso_final
) = id
;
1035 if (new_definition_p
)
1036 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1037 Fcons (make_number (id
), Qnil
));
1038 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1039 charset_jisx0201_roman
= id
;
1040 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1041 charset_jisx0208_1978
= id
;
1042 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1043 charset_jisx0208
= id
;
1046 if (charset
.emacs_mule_id
>= 0)
1048 emacs_mule_charset
[charset
.emacs_mule_id
] = CHARSET_FROM_ID (id
);
1049 if (charset
.emacs_mule_id
< 0xA0)
1050 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1052 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1053 if (new_definition_p
)
1054 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1055 Fcons (make_number (id
), Qnil
));
1058 if (new_definition_p
)
1060 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1061 if (charset
.supplementary_p
)
1062 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1063 Fcons (make_number (id
), Qnil
));
1068 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1070 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1072 if (cs
->supplementary_p
)
1075 if (EQ (tail
, Vcharset_ordered_list
))
1076 Vcharset_ordered_list
= Fcons (make_number (id
),
1077 Vcharset_ordered_list
);
1078 else if (NILP (tail
))
1079 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1080 Fcons (make_number (id
), Qnil
));
1083 val
= Fcons (XCAR (tail
), XCDR (tail
));
1084 XSETCDR (tail
, val
);
1085 XSETCAR (tail
, make_number (id
));
1088 charset_ordered_list_tick
++;
1095 /* Same as Fdefine_charset_internal but arguments are more convenient
1096 to call from C (typically in syms_of_charset). This can define a
1097 charset of `offset' method only. Return the ID of the new
1101 define_charset_internal (name
, dimension
, code_space
, min_code
, max_code
,
1102 iso_final
, iso_revision
, emacs_mule_id
,
1103 ascii_compatible
, supplementary
,
1107 unsigned char *code_space
;
1108 unsigned min_code
, max_code
;
1109 int iso_final
, iso_revision
, emacs_mule_id
;
1110 int ascii_compatible
, supplementary
;
1113 Lisp_Object args
[charset_arg_max
];
1114 Lisp_Object plist
[14];
1118 args
[charset_arg_name
] = name
;
1119 args
[charset_arg_dimension
] = make_number (dimension
);
1120 val
= Fmake_vector (make_number (8), make_number (0));
1121 for (i
= 0; i
< 8; i
++)
1122 ASET (val
, i
, make_number (code_space
[i
]));
1123 args
[charset_arg_code_space
] = val
;
1124 args
[charset_arg_min_code
] = make_number (min_code
);
1125 args
[charset_arg_max_code
] = make_number (max_code
);
1126 args
[charset_arg_iso_final
]
1127 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1128 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1129 args
[charset_arg_emacs_mule_id
]
1130 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1131 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1132 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1133 args
[charset_arg_invalid_code
] = Qnil
;
1134 args
[charset_arg_code_offset
] = make_number (code_offset
);
1135 args
[charset_arg_map
] = Qnil
;
1136 args
[charset_arg_subset
] = Qnil
;
1137 args
[charset_arg_superset
] = Qnil
;
1138 args
[charset_arg_unify_map
] = Qnil
;
1140 plist
[0] = intern (":name");
1141 plist
[1] = args
[charset_arg_name
];
1142 plist
[2] = intern (":dimension");
1143 plist
[3] = args
[charset_arg_dimension
];
1144 plist
[4] = intern (":code-space");
1145 plist
[5] = args
[charset_arg_code_space
];
1146 plist
[6] = intern (":iso-final-char");
1147 plist
[7] = args
[charset_arg_iso_final
];
1148 plist
[8] = intern (":emacs-mule-id");
1149 plist
[9] = args
[charset_arg_emacs_mule_id
];
1150 plist
[10] = intern (":ascii-compatible-p");
1151 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1152 plist
[12] = intern (":code-offset");
1153 plist
[13] = args
[charset_arg_code_offset
];
1155 args
[charset_arg_plist
] = Flist (14, plist
);
1156 Fdefine_charset_internal (charset_arg_max
, args
);
1158 return XINT (CHARSET_SYMBOL_ID (name
));
1162 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1163 Sdefine_charset_alias
, 2, 2, 0,
1164 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1166 Lisp_Object alias
, charset
;
1170 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1171 Fputhash (alias
, attr
, Vcharset_hash_table
);
1172 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1177 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1178 doc
: /* Return the property list of CHARSET. */)
1180 Lisp_Object charset
;
1184 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1185 return CHARSET_ATTR_PLIST (attrs
);
1189 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1190 doc
: /* Set CHARSET's property list to PLIST. */)
1192 Lisp_Object charset
, plist
;
1196 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1197 CHARSET_ATTR_PLIST (attrs
) = plist
;
1202 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1203 doc
: /* Unify characters of CHARSET with Unicode.
1204 This means reading the relevant file and installing the table defined
1205 by CHARSET's `:unify-map' property.
1207 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1208 the same meaning as the `:unify-map' attribute in the function
1209 `define-charset' (which see).
1211 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1212 (charset
, unify_map
, deunify
)
1213 Lisp_Object charset
, unify_map
, deunify
;
1218 CHECK_CHARSET_GET_ID (charset
, id
);
1219 cs
= CHARSET_FROM_ID (id
);
1220 if (CHARSET_METHOD (cs
) == CHARSET_METHOD_MAP_DEFERRED
)
1223 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1224 : ! CHARSET_UNIFIED_P (cs
))
1227 CHARSET_UNIFIED_P (cs
) = 0;
1230 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
)
1231 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1232 if (NILP (unify_map
))
1233 unify_map
= CHARSET_UNIFY_MAP (cs
);
1234 if (STRINGP (unify_map
))
1235 load_charset_map_from_file (cs
, unify_map
, 2);
1236 else if (VECTORP (unify_map
))
1237 load_charset_map_from_vector (cs
, unify_map
, 2);
1238 else if (NILP (unify_map
))
1239 error ("No unify-map for charset");
1241 error ("Bad unify-map arg");
1242 CHARSET_UNIFIED_P (cs
) = 1;
1244 else if (CHAR_TABLE_P (Vchar_unify_table
))
1246 int min_code
= CHARSET_MIN_CODE (cs
);
1247 int max_code
= CHARSET_MAX_CODE (cs
);
1248 int min_char
= DECODE_CHAR (cs
, min_code
);
1249 int max_char
= DECODE_CHAR (cs
, max_code
);
1251 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1257 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1258 Sget_unused_iso_final_char
, 2, 2, 0,
1260 Return an unused ISO final char for a charset of DIMENISION and CHARS.
1261 DIMENSION is the number of bytes to represent a character: 1 or 2.
1262 CHARS is the number of characters in a dimension: 94 or 96.
1264 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1265 If there's no unused final char for the specified kind of charset,
1268 Lisp_Object dimension
, chars
;
1272 CHECK_NUMBER (dimension
);
1273 CHECK_NUMBER (chars
);
1274 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1275 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1276 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1277 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1278 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1279 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1281 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1285 check_iso_charset_parameter (dimension
, chars
, final_char
)
1286 Lisp_Object dimension
, chars
, final_char
;
1288 CHECK_NATNUM (dimension
);
1289 CHECK_NATNUM (chars
);
1290 CHECK_NATNUM (final_char
);
1292 if (XINT (dimension
) > 3)
1293 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1294 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1295 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1296 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1297 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1301 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1303 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1305 On decoding by an ISO-2022 base coding system, when a charset
1306 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1307 if CHARSET is designated instead. */)
1308 (dimension
, chars
, final_char
, charset
)
1309 Lisp_Object dimension
, chars
, final_char
, charset
;
1314 CHECK_CHARSET_GET_ID (charset
, id
);
1315 check_iso_charset_parameter (dimension
, chars
, final_char
);
1316 chars_flag
= XINT (chars
) == 96;
1317 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1322 /* Return information about charsets in the text at PTR of NBYTES
1323 bytes, which are NCHARS characters. The value is:
1325 0: Each character is represented by one byte. This is always
1326 true for a unibyte string. For a multibyte string, true if
1327 it contains only ASCII characters.
1329 1: No charsets other than ascii, control-1, and latin-1 are
1336 string_xstring_p (string
)
1339 const unsigned char *p
= SDATA (string
);
1340 const unsigned char *endp
= p
+ SBYTES (string
);
1342 if (SCHARS (string
) == SBYTES (string
))
1347 int c
= STRING_CHAR_ADVANCE (p
);
1356 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1358 CHARSETS is a vector. If Nth element is non-nil, it means the
1359 charset whose id is N is already found.
1361 It may lookup a translation table TABLE if supplied. */
1364 find_charsets_in_text (ptr
, nchars
, nbytes
, charsets
, table
, multibyte
)
1365 const unsigned char *ptr
;
1366 EMACS_INT nchars
, nbytes
;
1367 Lisp_Object charsets
, table
;
1370 const unsigned char *pend
= ptr
+ nbytes
;
1372 if (nchars
== nbytes
)
1375 ASET (charsets
, charset_ascii
, Qt
);
1382 c
= translate_char (table
, c
);
1383 if (ASCII_BYTE_P (c
))
1384 ASET (charsets
, charset_ascii
, Qt
);
1386 ASET (charsets
, charset_eight_bit
, Qt
);
1393 int c
= STRING_CHAR_ADVANCE (ptr
);
1394 struct charset
*charset
;
1397 c
= translate_char (table
, c
);
1398 charset
= CHAR_CHARSET (c
);
1399 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1404 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1406 doc
: /* Return a list of charsets in the region between BEG and END.
1407 BEG and END are buffer positions.
1408 Optional arg TABLE if non-nil is a translation table to look up.
1410 If the current buffer is unibyte, the returned list may contain
1411 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1413 Lisp_Object beg
, end
, table
;
1415 Lisp_Object charsets
;
1416 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1419 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1421 validate_region (&beg
, &end
);
1422 from
= XFASTINT (beg
);
1423 stop
= to
= XFASTINT (end
);
1425 if (from
< GPT
&& GPT
< to
)
1428 stop_byte
= GPT_BYTE
;
1431 stop_byte
= CHAR_TO_BYTE (stop
);
1433 from_byte
= CHAR_TO_BYTE (from
);
1435 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1438 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1439 stop_byte
- from_byte
, charsets
, table
,
1443 from
= stop
, from_byte
= stop_byte
;
1444 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1451 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1452 if (!NILP (AREF (charsets
, i
)))
1453 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1457 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1459 doc
: /* Return a list of charsets in STR.
1460 Optional arg TABLE if non-nil is a translation table to look up.
1462 If STR is unibyte, the returned list may contain
1463 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1465 Lisp_Object str
, table
;
1467 Lisp_Object charsets
;
1473 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1474 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1476 STRING_MULTIBYTE (str
));
1478 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1479 if (!NILP (AREF (charsets
, i
)))
1480 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1486 /* Return a character correponding to the code-point CODE of
1490 decode_char (charset
, code
)
1491 struct charset
*charset
;
1495 enum charset_method method
= CHARSET_METHOD (charset
);
1497 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1500 if (method
== CHARSET_METHOD_MAP_DEFERRED
)
1502 load_charset (charset
);
1503 method
= CHARSET_METHOD (charset
);
1506 if (method
== CHARSET_METHOD_SUBSET
)
1508 Lisp_Object subset_info
;
1510 subset_info
= CHARSET_SUBSET (charset
);
1511 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1512 code
-= XINT (AREF (subset_info
, 3));
1513 if (code
< XFASTINT (AREF (subset_info
, 1))
1514 || code
> XFASTINT (AREF (subset_info
, 2)))
1517 c
= DECODE_CHAR (charset
, code
);
1519 else if (method
== CHARSET_METHOD_SUPERSET
)
1521 Lisp_Object parents
;
1523 parents
= CHARSET_SUPERSET (charset
);
1525 for (; CONSP (parents
); parents
= XCDR (parents
))
1527 int id
= XINT (XCAR (XCAR (parents
)));
1528 int code_offset
= XINT (XCDR (XCAR (parents
)));
1529 unsigned this_code
= code
- code_offset
;
1531 charset
= CHARSET_FROM_ID (id
);
1532 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1538 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1542 if (method
== CHARSET_METHOD_MAP
)
1544 Lisp_Object decoder
;
1546 decoder
= CHARSET_DECODER (charset
);
1547 if (! VECTORP (decoder
))
1549 c
= XINT (AREF (decoder
, char_index
));
1553 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1557 if (CHARSET_UNIFIED_P (charset
)
1560 MAYBE_UNIFY_CHAR (c
);
1566 /* Variable used temporarily by the macro ENCODE_CHAR. */
1567 Lisp_Object charset_work
;
1569 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1570 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1571 use CHARSET's strict_max_char instead of max_char. */
1574 encode_char (charset
, c
)
1575 struct charset
*charset
;
1579 enum charset_method method
= CHARSET_METHOD (charset
);
1581 if (CHARSET_UNIFIED_P (charset
))
1583 Lisp_Object deunifier
, deunified
;
1585 deunifier
= CHARSET_DEUNIFIER (charset
);
1586 if (! CHAR_TABLE_P (deunifier
))
1588 Funify_charset (CHARSET_NAME (charset
), Qnil
, Qnil
);
1589 deunifier
= CHARSET_DEUNIFIER (charset
);
1591 deunified
= CHAR_TABLE_REF (deunifier
, c
);
1592 if (! NILP (deunified
))
1593 c
= XINT (deunified
);
1596 if (method
== CHARSET_METHOD_SUBSET
)
1598 Lisp_Object subset_info
;
1599 struct charset
*this_charset
;
1601 subset_info
= CHARSET_SUBSET (charset
);
1602 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1603 code
= ENCODE_CHAR (this_charset
, c
);
1604 if (code
== CHARSET_INVALID_CODE (this_charset
)
1605 || code
< XFASTINT (AREF (subset_info
, 1))
1606 || code
> XFASTINT (AREF (subset_info
, 2)))
1607 return CHARSET_INVALID_CODE (charset
);
1608 code
+= XINT (AREF (subset_info
, 3));
1612 if (method
== CHARSET_METHOD_SUPERSET
)
1614 Lisp_Object parents
;
1616 parents
= CHARSET_SUPERSET (charset
);
1617 for (; CONSP (parents
); parents
= XCDR (parents
))
1619 int id
= XINT (XCAR (XCAR (parents
)));
1620 int code_offset
= XINT (XCDR (XCAR (parents
)));
1621 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1623 code
= ENCODE_CHAR (this_charset
, c
);
1624 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1625 return code
+ code_offset
;
1627 return CHARSET_INVALID_CODE (charset
);
1630 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1631 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1632 return CHARSET_INVALID_CODE (charset
);
1634 if (method
== CHARSET_METHOD_MAP_DEFERRED
)
1636 load_charset (charset
);
1637 method
= CHARSET_METHOD (charset
);
1640 if (method
== CHARSET_METHOD_MAP
)
1642 Lisp_Object encoder
;
1645 encoder
= CHARSET_ENCODER (charset
);
1646 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1647 return CHARSET_INVALID_CODE (charset
);
1648 val
= CHAR_TABLE_REF (encoder
, c
);
1650 return CHARSET_INVALID_CODE (charset
);
1652 if (! CHARSET_COMPACT_CODES_P (charset
))
1653 code
= INDEX_TO_CODE_POINT (charset
, code
);
1655 else /* method == CHARSET_METHOD_OFFSET */
1657 code
= c
- CHARSET_CODE_OFFSET (charset
);
1658 code
= INDEX_TO_CODE_POINT (charset
, code
);
1665 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1666 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1667 Return nil if CODE-POINT is not valid in CHARSET.
1669 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1671 Optional argument RESTRICTION specifies a way to map the pair of CCS
1672 and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1673 (charset
, code_point
, restriction
)
1674 Lisp_Object charset
, code_point
, restriction
;
1678 struct charset
*charsetp
;
1680 CHECK_CHARSET_GET_ID (charset
, id
);
1681 if (CONSP (code_point
))
1683 CHECK_NATNUM_CAR (code_point
);
1684 CHECK_NATNUM_CDR (code_point
);
1685 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1689 CHECK_NATNUM (code_point
);
1690 code
= XINT (code_point
);
1692 charsetp
= CHARSET_FROM_ID (id
);
1693 c
= DECODE_CHAR (charsetp
, code
);
1694 return (c
>= 0 ? make_number (c
) : Qnil
);
1698 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1699 doc
: /* Encode the character CH into a code-point of CHARSET.
1700 Return nil if CHARSET doesn't include CH.
1702 Optional argument RESTRICTION specifies a way to map CHAR to a
1703 code-point in CCS. Currently not supported and just ignored. */)
1704 (ch
, charset
, restriction
)
1705 Lisp_Object ch
, charset
, restriction
;
1709 struct charset
*charsetp
;
1711 CHECK_CHARSET_GET_ID (charset
, id
);
1713 charsetp
= CHARSET_FROM_ID (id
);
1714 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1715 if (code
== CHARSET_INVALID_CODE (charsetp
))
1717 if (code
> 0x7FFFFFF)
1718 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1719 return make_number (code
);
1723 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1725 /* Return a character of CHARSET whose position codes are CODEn.
1727 CODE1 through CODE4 are optional, but if you don't supply sufficient
1728 position codes, it is assumed that the minimum code in each dimension
1730 (charset
, code1
, code2
, code3
, code4
)
1731 Lisp_Object charset
, code1
, code2
, code3
, code4
;
1734 struct charset
*charsetp
;
1738 CHECK_CHARSET_GET_ID (charset
, id
);
1739 charsetp
= CHARSET_FROM_ID (id
);
1741 dimension
= CHARSET_DIMENSION (charsetp
);
1743 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1744 ? 0 : CHARSET_MIN_CODE (charsetp
));
1747 CHECK_NATNUM (code1
);
1748 if (XFASTINT (code1
) >= 0x100)
1749 args_out_of_range (make_number (0xFF), code1
);
1750 code
= XFASTINT (code1
);
1756 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1759 CHECK_NATNUM (code2
);
1760 if (XFASTINT (code2
) >= 0x100)
1761 args_out_of_range (make_number (0xFF), code2
);
1762 code
|= XFASTINT (code2
);
1769 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1772 CHECK_NATNUM (code3
);
1773 if (XFASTINT (code3
) >= 0x100)
1774 args_out_of_range (make_number (0xFF), code3
);
1775 code
|= XFASTINT (code3
);
1782 code
|= charsetp
->code_space
[0];
1785 CHECK_NATNUM (code4
);
1786 if (XFASTINT (code4
) >= 0x100)
1787 args_out_of_range (make_number (0xFF), code4
);
1788 code
|= XFASTINT (code4
);
1795 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
1797 c
= DECODE_CHAR (charsetp
, code
);
1799 error ("Invalid code(s)");
1800 return make_number (c
);
1804 /* Return the first charset in CHARSET_LIST that contains C.
1805 CHARSET_LIST is a list of charset IDs. If it is nil, use
1806 Vcharset_ordered_list. */
1809 char_charset (c
, charset_list
, code_return
)
1811 Lisp_Object charset_list
;
1812 unsigned *code_return
;
1814 if (NILP (charset_list
))
1815 charset_list
= Vcharset_ordered_list
;
1817 while (CONSP (charset_list
)
1818 && ! EQ (charset_list
, Vcharset_non_preferred_head
))
1820 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
1821 unsigned code
= ENCODE_CHAR (charset
, c
);
1823 if (code
!= CHARSET_INVALID_CODE (charset
))
1826 *code_return
= code
;
1829 charset_list
= XCDR (charset_list
);
1831 return (c
<= MAX_UNICODE_CHAR
? CHARSET_FROM_ID (charset_unicode
)
1832 : CHARSET_FROM_ID (charset_eight_bit
));
1836 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
1838 /*Return list of charset and one to four position-codes of CHAR.
1839 The charset is decided by the current priority order of charsets.
1840 A position-code is a byte value of each dimension of the code-point of
1841 CHAR in the charset. */)
1845 struct charset
*charset
;
1850 CHECK_CHARACTER (ch
);
1852 charset
= CHAR_CHARSET (c
);
1855 code
= ENCODE_CHAR (charset
, c
);
1856 if (code
== CHARSET_INVALID_CODE (charset
))
1858 dimension
= CHARSET_DIMENSION (charset
);
1859 for (val
= Qnil
; dimension
> 0; dimension
--)
1861 val
= Fcons (make_number (code
& 0xFF), val
);
1864 return Fcons (CHARSET_NAME (charset
), val
);
1868 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 1, 0,
1869 doc
: /* Return the charset of highest priority that contains CH. */)
1873 struct charset
*charset
;
1875 CHECK_CHARACTER (ch
);
1876 charset
= CHAR_CHARSET (XINT (ch
));
1877 return (CHARSET_NAME (charset
));
1881 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
1883 Return charset of a character in the current buffer at position POS.
1884 If POS is nil, it defauls to the current point.
1885 If POS is out of range, the value is nil. */)
1890 struct charset
*charset
;
1892 ch
= Fchar_after (pos
);
1893 if (! INTEGERP (ch
))
1895 charset
= CHAR_CHARSET (XINT (ch
));
1896 return (CHARSET_NAME (charset
));
1900 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
1902 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1904 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1905 by their DIMENSION, CHARS, and FINAL-CHAR,
1906 where as Emacs distinguishes them by charset symbol.
1907 See the documentation of the function `charset-info' for the meanings of
1908 DIMENSION, CHARS, and FINAL-CHAR. */)
1909 (dimension
, chars
, final_char
)
1910 Lisp_Object dimension
, chars
, final_char
;
1915 check_iso_charset_parameter (dimension
, chars
, final_char
);
1916 chars_flag
= XFASTINT (chars
) == 96;
1917 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
1918 XFASTINT (final_char
));
1919 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
1923 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
1926 Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1930 struct charset
*charset
;
1933 for (i
= 0; i
< charset_table_used
; i
++)
1935 charset
= CHARSET_FROM_ID (i
);
1936 attrs
= CHARSET_ATTRIBUTES (charset
);
1938 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
1940 CHARSET_ATTR_DECODER (attrs
) = Qnil
;
1941 CHARSET_ATTR_ENCODER (attrs
) = Qnil
;
1942 CHARSET_METHOD (charset
) = CHARSET_METHOD_MAP_DEFERRED
;
1945 if (CHARSET_UNIFIED_P (charset
))
1946 CHARSET_ATTR_DEUNIFIER (attrs
) = Qnil
;
1949 if (CHAR_TABLE_P (Vchar_unified_charset_table
))
1951 Foptimize_char_table (Vchar_unified_charset_table
);
1952 Vchar_unify_table
= Vchar_unified_charset_table
;
1953 Vchar_unified_charset_table
= Qnil
;
1959 DEFUN ("charset-priority-list", Fcharset_priority_list
,
1960 Scharset_priority_list
, 0, 1, 0,
1961 doc
: /* Return the list of charsets ordered by priority.
1962 HIGHESTP non-nil means just return the highest priority one. */)
1964 Lisp_Object highestp
;
1966 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
1968 if (!NILP (highestp
))
1969 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
1971 while (!NILP (list
))
1973 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
1976 return Fnreverse (val
);
1979 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
1981 doc
: /* Assign higher priority to the charsets given as arguments.
1982 usage: (set-charset-priority &rest charsets) */)
1987 Lisp_Object new_head
, old_list
, arglist
[2];
1988 Lisp_Object list_2022
, list_emacs_mule
;
1991 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
1993 for (i
= 0; i
< nargs
; i
++)
1995 CHECK_CHARSET_GET_ID (args
[i
], id
);
1996 if (! NILP (Fmemq (make_number (id
), old_list
)))
1998 old_list
= Fdelq (make_number (id
), old_list
);
1999 new_head
= Fcons (make_number (id
), new_head
);
2002 arglist
[0] = Fnreverse (new_head
);
2003 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2004 Vcharset_ordered_list
= Fnconc (2, arglist
);
2005 charset_ordered_list_tick
++;
2007 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2008 CONSP (old_list
); old_list
= XCDR (old_list
))
2010 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2011 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2012 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2013 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2015 Viso_2022_charset_list
= Fnreverse (list_2022
);
2016 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2021 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2023 doc
: /* Internal use only.
2024 Return charset identification number of CHARSET. */)
2026 Lisp_Object charset
;
2030 CHECK_CHARSET_GET_ID (charset
, id
);
2031 return make_number (id
);
2039 = Fcons (Fexpand_file_name (build_string ("charsets"), Vdata_directory
),
2045 init_charset_once ()
2049 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2050 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2051 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2052 iso_charset_table
[i
][j
][k
] = -1;
2054 for (i
= 0; i
< 256; i
++)
2055 emacs_mule_charset
[i
] = NULL
;
2057 charset_jisx0201_roman
= -1;
2058 charset_jisx0208_1978
= -1;
2059 charset_jisx0208
= -1;
2061 for (i
= 0; i
< 128; i
++)
2062 unibyte_to_multibyte_table
[i
] = i
;
2063 for (; i
< 256; i
++)
2064 unibyte_to_multibyte_table
[i
] = BYTE8_TO_CHAR (i
);
2072 DEFSYM (Qcharsetp
, "charsetp");
2074 DEFSYM (Qascii
, "ascii");
2075 DEFSYM (Qunicode
, "unicode");
2076 DEFSYM (Qeight_bit
, "eight-bit");
2077 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2082 staticpro (&Vcharset_ordered_list
);
2083 Vcharset_ordered_list
= Qnil
;
2085 staticpro (&Viso_2022_charset_list
);
2086 Viso_2022_charset_list
= Qnil
;
2088 staticpro (&Vemacs_mule_charset_list
);
2089 Vemacs_mule_charset_list
= Qnil
;
2091 /* Don't staticpro them here. It's done in syms_of_fns. */
2092 QCtest
= intern (":test");
2093 Qeq
= intern ("eq");
2095 staticpro (&Vcharset_hash_table
);
2097 Lisp_Object args
[2];
2100 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2103 charset_table_size
= 128;
2104 charset_table
= ((struct charset
*)
2105 xmalloc (sizeof (struct charset
) * charset_table_size
));
2106 charset_table_used
= 0;
2108 staticpro (&Vchar_unified_charset_table
);
2109 Vchar_unified_charset_table
= Fmake_char_table (Qnil
, make_number (-1));
2111 defsubr (&Scharsetp
);
2112 defsubr (&Smap_charset_chars
);
2113 defsubr (&Sdefine_charset_internal
);
2114 defsubr (&Sdefine_charset_alias
);
2115 defsubr (&Scharset_plist
);
2116 defsubr (&Sset_charset_plist
);
2117 defsubr (&Sunify_charset
);
2118 defsubr (&Sget_unused_iso_final_char
);
2119 defsubr (&Sdeclare_equiv_charset
);
2120 defsubr (&Sfind_charset_region
);
2121 defsubr (&Sfind_charset_string
);
2122 defsubr (&Sdecode_char
);
2123 defsubr (&Sencode_char
);
2124 defsubr (&Ssplit_char
);
2125 defsubr (&Smake_char
);
2126 defsubr (&Schar_charset
);
2127 defsubr (&Scharset_after
);
2128 defsubr (&Siso_charset
);
2129 defsubr (&Sclear_charset_maps
);
2130 defsubr (&Scharset_priority_list
);
2131 defsubr (&Sset_charset_priority
);
2132 defsubr (&Scharset_id_internal
);
2134 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path
,
2135 doc
: /* *Lisp of directories to search for charset map files. */);
2136 Vcharset_map_path
= Qnil
;
2138 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2139 doc
: /* List of all charsets ever defined. */);
2140 Vcharset_list
= Qnil
;
2142 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language
,
2143 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2144 If the current language environment is for multiple languages (e.g. "Latin-1"),
2145 the value may be a list of mnemonics. */);
2146 Vcurrent_iso639_language
= Qnil
;
2149 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2150 0, 127, 'B', -1, 0, 1, 0, 0);
2152 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2153 0, 255, -1, -1, -1, 1, 0, 0);
2155 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2156 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2158 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2159 128, 255, -1, 0, -1, 0, 1,
2160 MAX_5_BYTE_CHAR
+ 1);
2165 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2166 (do not change this comment) */