1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
34 #include <sys/types.h>
37 #include "character.h"
43 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
45 A coded character set ("charset" hereafter) is a meaningful
46 collection (i.e. language, culture, functionality, etc.) of
47 characters. Emacs handles multiple charsets at once. In Emacs Lisp
48 code, a charset is represented by a symbol. In C code, a charset is
49 represented by its ID number or by a pointer to a struct charset.
51 The actual information about each charset is stored in two places.
52 Lispy information is stored in the hash table Vcharset_hash_table as
53 a vector (charset attributes). The other information is stored in
54 charset_table as a struct charset.
58 /* List of all charsets. This variable is used only from Emacs
60 Lisp_Object Vcharset_list
;
62 /* Hash table that contains attributes of each charset. Keys are
63 charset symbols, and values are vectors of charset attributes. */
64 Lisp_Object Vcharset_hash_table
;
66 /* Table of struct charset. */
67 struct charset
*charset_table
;
69 static int charset_table_size
;
70 static int charset_table_used
;
72 Lisp_Object Qcharsetp
;
74 /* Special charset symbols. */
76 Lisp_Object Qeight_bit
;
77 Lisp_Object Qiso_8859_1
;
81 /* The corresponding charsets. */
83 int charset_eight_bit
;
84 int charset_iso_8859_1
;
88 /* The other special charsets. */
89 int charset_jisx0201_roman
;
90 int charset_jisx0208_1978
;
94 /* Value of charset attribute `charset-iso-plane'. */
97 /* Charset of unibyte characters. */
100 /* List of charsets ordered by the priority. */
101 Lisp_Object Vcharset_ordered_list
;
103 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
105 Lisp_Object Vcharset_non_preferred_head
;
107 /* Incremented everytime we change Vcharset_ordered_list. This is
108 unsigned short so that it fits in Lisp_Int and never matches
110 unsigned short charset_ordered_list_tick
;
112 /* List of iso-2022 charsets. */
113 Lisp_Object Viso_2022_charset_list
;
115 /* List of emacs-mule charsets. */
116 Lisp_Object Vemacs_mule_charset_list
;
118 int emacs_mule_charset
[256];
120 /* Mapping table from ISO2022's charset (specified by DIMENSION,
121 CHARS, and FINAL-CHAR) to Emacs' charset. */
122 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
124 Lisp_Object Vcharset_map_path
;
126 /* If nonzero, don't load charset maps. */
127 int inhibit_load_charset_map
;
129 Lisp_Object Vcurrent_iso639_language
;
131 /* Defined in chartab.c */
133 map_char_table_for_charset
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
134 Lisp_Object function
, Lisp_Object table
,
135 Lisp_Object arg
, struct charset
*charset
,
136 unsigned from
, unsigned to
));
138 #define CODE_POINT_TO_INDEX(charset, code) \
139 ((charset)->code_linear_p \
140 ? (code) - (charset)->min_code \
141 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
142 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
143 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
144 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
145 ? (((((code) >> 24) - (charset)->code_space[12]) \
146 * (charset)->code_space[11]) \
147 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
148 * (charset)->code_space[7]) \
149 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
150 * (charset)->code_space[3]) \
151 + (((code) & 0xFF) - (charset)->code_space[0]) \
152 - ((charset)->char_index_offset)) \
156 /* Convert the character index IDX to code-point CODE for CHARSET.
157 It is assumed that IDX is in a valid range. */
159 #define INDEX_TO_CODE_POINT(charset, idx) \
160 ((charset)->code_linear_p \
161 ? (idx) + (charset)->min_code \
162 : (idx += (charset)->char_index_offset, \
163 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
164 | (((charset)->code_space[4] \
165 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
167 | (((charset)->code_space[8] \
168 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
170 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
173 /* Structure to hold mapping tables for a charset. Used by temacs
174 invoked for dumping. */
178 /* The current charset for which the following tables are setup. */
179 struct charset
*current
;
181 /* 1 iff the following table is used for encoder. */
184 /* When the following table is used for encoding, mininum and
185 maxinum character of the current charset. */
186 int min_char
, max_char
;
188 /* A Unicode character correspoinding to the code indice 0 (i.e. the
189 minimum code-point) of the current charset, or -1 if the code
190 indice 0 is not a Unicode character. This is checked when
191 table.encoder[CHAR] is zero. */
195 /* Table mapping code-indices (not code-points) of the current
196 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
197 doesn't belong to the current charset. */
198 int decoder
[0x10000];
199 /* Table mapping Unicode characters to code-indices of the current
200 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
201 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
202 (0x20000..0x2FFFF). Note that there is no charset map that
203 uses both SMP and SIP. */
204 unsigned short encoder
[0x20000];
206 } *temp_charset_work
;
208 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
211 temp_charset_work->zero_index_char = (C); \
212 else if ((C) < 0x20000) \
213 temp_charset_work->table.encoder[(C)] = (CODE); \
215 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
218 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
219 ((C) == temp_charset_work->zero_index_char ? 0 \
220 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
221 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
222 : temp_charset_work->table.encoder[(C) - 0x10000] \
223 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
225 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
226 (temp_charset_work->table.decoder[(CODE)] = (C))
228 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
229 (temp_charset_work->table.decoder[(CODE)])
232 /* Set to 1 to warn that a charset map is loaded and thus a buffer
233 text and a string data may be relocated. */
234 int charset_map_loaded
;
236 struct charset_map_entries
242 struct charset_map_entries
*next
;
245 /* Load the mapping information of CHARSET from ENTRIES for
246 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
247 encoding (CONTROL_FLAG == 2).
249 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
250 and CHARSET->fast_map.
252 If CONTROL_FLAG is 1, setup the following tables according to
253 CHARSET->method and inhibit_load_charset_map.
255 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
256 ----------------------+--------------------+---------------------------
257 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
258 ----------------------+--------------------+---------------------------
259 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
261 If CONTROL_FLAG is 2, setup the following tables.
263 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
264 ----------------------+--------------------+---------------------------
265 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
266 ----------------------+--------------------+--------------------------
267 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
271 load_charset_map (charset
, entries
, n_entries
, control_flag
)
272 struct charset
*charset
;
273 struct charset_map_entries
*entries
;
277 Lisp_Object vec
, table
;
278 unsigned max_code
= CHARSET_MAX_CODE (charset
);
279 int ascii_compatible_p
= charset
->ascii_compatible_p
;
280 int min_char
, max_char
, nonascii_min_char
;
282 unsigned char *fast_map
= charset
->fast_map
;
289 if (! inhibit_load_charset_map
)
291 if (control_flag
== 1)
293 if (charset
->method
== CHARSET_METHOD_MAP
)
295 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
297 vec
= CHARSET_DECODER (charset
)
298 = Fmake_vector (make_number (n
), make_number (-1));
302 char_table_set_range (Vchar_unify_table
,
303 charset
->min_char
, charset
->max_char
,
309 table
= Fmake_char_table (Qnil
, Qnil
);
310 if (charset
->method
== CHARSET_METHOD_MAP
)
311 CHARSET_ENCODER (charset
) = table
;
313 CHARSET_DEUNIFIER (charset
) = table
;
318 if (! temp_charset_work
)
319 temp_charset_work
= malloc (sizeof (*temp_charset_work
));
320 if (control_flag
== 1)
322 memset (temp_charset_work
->table
.decoder
, -1,
323 sizeof (int) * 0x10000);
327 memset (temp_charset_work
->table
.encoder
, 0,
328 sizeof (unsigned short) * 0x20000);
329 temp_charset_work
->zero_index_char
= -1;
331 temp_charset_work
->current
= charset
;
332 temp_charset_work
->for_encoder
= (control_flag
== 2);
335 charset_map_loaded
= 1;
338 min_char
= max_char
= entries
->entry
[0].c
;
339 nonascii_min_char
= MAX_CHAR
;
340 for (i
= 0; i
< n_entries
; i
++)
343 int from_index
, to_index
;
345 int idx
= i
% 0x10000;
347 if (i
> 0 && idx
== 0)
348 entries
= entries
->next
;
349 from
= entries
->entry
[idx
].from
;
350 to
= entries
->entry
[idx
].to
;
351 from_c
= entries
->entry
[idx
].c
;
352 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
355 to_index
= from_index
;
360 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
361 to_c
= from_c
+ (to_index
- from_index
);
363 if (from_index
< 0 || to_index
< 0)
368 else if (from_c
< min_char
)
371 if (control_flag
== 1)
373 if (charset
->method
== CHARSET_METHOD_MAP
)
374 for (; from_index
<= to_index
; from_index
++, from_c
++)
375 ASET (vec
, from_index
, make_number (from_c
));
377 for (; from_index
<= to_index
; from_index
++, from_c
++)
378 CHAR_TABLE_SET (Vchar_unify_table
,
379 CHARSET_CODE_OFFSET (charset
) + from_index
,
380 make_number (from_c
));
382 else if (control_flag
== 2)
384 if (charset
->method
== CHARSET_METHOD_MAP
385 && CHARSET_COMPACT_CODES_P (charset
))
386 for (; from_index
<= to_index
; from_index
++, from_c
++)
388 unsigned code
= INDEX_TO_CODE_POINT (charset
, from_index
);
390 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
391 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
394 for (; from_index
<= to_index
; from_index
++, from_c
++)
396 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
397 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
400 else if (control_flag
== 3)
401 for (; from_index
<= to_index
; from_index
++, from_c
++)
402 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
403 else if (control_flag
== 4)
404 for (; from_index
<= to_index
; from_index
++, from_c
++)
405 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
406 else /* control_flag == 0 */
408 if (ascii_compatible_p
)
410 if (! ASCII_BYTE_P (from_c
))
412 if (from_c
< nonascii_min_char
)
413 nonascii_min_char
= from_c
;
415 else if (! ASCII_BYTE_P (to_c
))
417 nonascii_min_char
= 0x80;
421 for (; from_c
<= to_c
; from_c
++)
422 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
426 if (control_flag
== 0)
428 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
429 ? nonascii_min_char
: min_char
);
430 CHARSET_MAX_CHAR (charset
) = max_char
;
432 else if (control_flag
== 4)
434 temp_charset_work
->min_char
= min_char
;
435 temp_charset_work
->max_char
= max_char
;
440 /* Read a hexadecimal number (preceded by "0x") from the file FP while
441 paying attention to comment charcter '#'. */
443 static INLINE
unsigned
451 while ((c
= getc (fp
)) != EOF
)
455 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
459 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
471 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
473 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
475 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
476 n
= (n
* 10) + c
- '0';
482 extern Lisp_Object Qfile_name_handler_alist
;
484 /* Return a mapping vector for CHARSET loaded from MAPFILE.
485 Each line of MAPFILE has this form
487 where 0xAAAA is a code-point and 0xCCCC is the corresponding
488 character code, or this form
490 where 0xAAAA and 0xBBBB are code-points specifying a range, and
491 0xCCCC is the first character code of the range.
493 The returned vector has this form:
494 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
495 where CODE1 is a code-point or a cons of code-points specifying a
498 Note that this function uses `openp' to open MAPFILE but ignores
499 `file-name-handler-alist' to avoid running any Lisp code. */
501 extern void add_to_log
P_ ((char *, Lisp_Object
, Lisp_Object
));
504 load_charset_map_from_file (charset
, mapfile
, control_flag
)
505 struct charset
*charset
;
509 unsigned min_code
= CHARSET_MIN_CODE (charset
);
510 unsigned max_code
= CHARSET_MAX_CODE (charset
);
514 Lisp_Object suffixes
;
515 struct charset_map_entries
*head
, *entries
;
516 int n_entries
, count
;
519 suffixes
= Fcons (build_string (".map"),
520 Fcons (build_string (".TXT"), Qnil
));
522 count
= SPECPDL_INDEX ();
523 specbind (Qfile_name_handler_alist
, Qnil
);
524 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
525 unbind_to (count
, Qnil
);
527 || ! (fp
= fdopen (fd
, "r")))
528 error ("Failure in loading charset map: %S", SDATA (mapfile
));
530 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
531 large (larger than MAX_ALLOCA). */
532 SAFE_ALLOCA (head
, struct charset_map_entries
*,
533 sizeof (struct charset_map_entries
));
535 bzero (entries
, sizeof (struct charset_map_entries
));
545 from
= read_hex (fp
, &eof
);
548 if (getc (fp
) == '-')
549 to
= read_hex (fp
, &eof
);
552 c
= (int) read_hex (fp
, &eof
);
554 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
557 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
559 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
560 sizeof (struct charset_map_entries
));
561 entries
= entries
->next
;
562 bzero (entries
, sizeof (struct charset_map_entries
));
564 idx
= n_entries
% 0x10000;
565 entries
->entry
[idx
].from
= from
;
566 entries
->entry
[idx
].to
= to
;
567 entries
->entry
[idx
].c
= c
;
573 load_charset_map (charset
, head
, n_entries
, control_flag
);
578 load_charset_map_from_vector (charset
, vec
, control_flag
)
579 struct charset
*charset
;
583 unsigned min_code
= CHARSET_MIN_CODE (charset
);
584 unsigned max_code
= CHARSET_MAX_CODE (charset
);
585 struct charset_map_entries
*head
, *entries
;
587 int len
= ASIZE (vec
);
593 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
597 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
598 large (larger than MAX_ALLOCA). */
599 SAFE_ALLOCA (head
, struct charset_map_entries
*,
600 sizeof (struct charset_map_entries
));
602 bzero (entries
, sizeof (struct charset_map_entries
));
605 for (i
= 0; i
< len
; i
+= 2)
607 Lisp_Object val
, val2
;
619 from
= XFASTINT (val
);
620 to
= XFASTINT (val2
);
625 from
= to
= XFASTINT (val
);
627 val
= AREF (vec
, i
+ 1);
631 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
634 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
636 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
637 sizeof (struct charset_map_entries
));
638 entries
= entries
->next
;
639 bzero (entries
, sizeof (struct charset_map_entries
));
641 idx
= n_entries
% 0x10000;
642 entries
->entry
[idx
].from
= from
;
643 entries
->entry
[idx
].to
= to
;
644 entries
->entry
[idx
].c
= c
;
648 load_charset_map (charset
, head
, n_entries
, control_flag
);
653 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
654 map it is (see the comment of load_charset_map for the detail). */
657 load_charset (charset
, control_flag
)
658 struct charset
*charset
;
663 if (inhibit_load_charset_map
665 && charset
== temp_charset_work
->current
666 && ((control_flag
== 2) == temp_charset_work
->for_encoder
))
669 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
670 map
= CHARSET_MAP (charset
);
671 else if (CHARSET_UNIFIED_P (charset
))
672 map
= CHARSET_UNIFY_MAP (charset
);
674 load_charset_map_from_file (charset
, map
, control_flag
);
676 load_charset_map_from_vector (charset
, map
, control_flag
);
680 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
681 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
685 return (CHARSETP (object
) ? Qt
: Qnil
);
689 void map_charset_for_dump
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
690 Lisp_Object function
, Lisp_Object arg
,
691 unsigned from
, unsigned to
));
694 map_charset_for_dump (c_function
, function
, arg
, from
, to
)
695 void (*c_function
) (Lisp_Object
, Lisp_Object
);
696 Lisp_Object function
, arg
;
699 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
700 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
705 range
= Fcons (Qnil
, Qnil
);
708 c
= temp_charset_work
->min_char
;
709 stop
= (temp_charset_work
->max_char
< 0x20000
710 ? temp_charset_work
->max_char
: 0xFFFF);
714 int index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
716 if (index
>= from_idx
&& index
<= to_idx
)
718 if (NILP (XCAR (range
)))
719 XSETCAR (range
, make_number (c
));
721 else if (! NILP (XCAR (range
)))
723 XSETCDR (range
, make_number (c
- 1));
725 (*c_function
) (arg
, range
);
727 call2 (function
, range
, arg
);
728 XSETCAR (range
, Qnil
);
732 if (c
== temp_charset_work
->max_char
)
734 if (! NILP (XCAR (range
)))
736 XSETCDR (range
, make_number (c
));
738 (*c_function
) (arg
, range
);
740 call2 (function
, range
, arg
);
745 stop
= temp_charset_work
->max_char
;
753 map_charset_chars (c_function
, function
, arg
,
755 void (*c_function
) P_ ((Lisp_Object
, Lisp_Object
));
756 Lisp_Object function
, arg
;
757 struct charset
*charset
;
763 partial
= (from
> CHARSET_MIN_CODE (charset
)
764 || to
< CHARSET_MAX_CODE (charset
));
766 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
768 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
769 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
770 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
771 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
773 if (CHARSET_UNIFIED_P (charset
))
775 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
776 load_charset (charset
, 2);
777 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
778 map_char_table_for_charset (c_function
, function
,
779 CHARSET_DEUNIFIER (charset
), arg
,
780 partial
? charset
: NULL
, from
, to
);
782 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
785 range
= Fcons (make_number (from_c
), make_number (to_c
));
787 (*c_function
) (arg
, range
);
789 call2 (function
, range
, arg
);
791 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
793 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
794 load_charset (charset
, 2);
795 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
796 map_char_table_for_charset (c_function
, function
,
797 CHARSET_ENCODER (charset
), arg
,
798 partial
? charset
: NULL
, from
, to
);
800 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
802 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
804 Lisp_Object subset_info
;
807 subset_info
= CHARSET_SUBSET (charset
);
808 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
809 offset
= XINT (AREF (subset_info
, 3));
811 if (from
< XFASTINT (AREF (subset_info
, 1)))
812 from
= XFASTINT (AREF (subset_info
, 1));
814 if (to
> XFASTINT (AREF (subset_info
, 2)))
815 to
= XFASTINT (AREF (subset_info
, 2));
816 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
818 else /* i.e. CHARSET_METHOD_SUPERSET */
822 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
823 parents
= XCDR (parents
))
826 unsigned this_from
, this_to
;
828 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
829 offset
= XINT (XCDR (XCAR (parents
)));
830 this_from
= from
> offset
? from
- offset
: 0;
831 this_to
= to
> offset
? to
- offset
: 0;
832 if (this_from
< CHARSET_MIN_CODE (charset
))
833 this_from
= CHARSET_MIN_CODE (charset
);
834 if (this_to
> CHARSET_MAX_CODE (charset
))
835 this_to
= CHARSET_MAX_CODE (charset
);
836 map_charset_chars (c_function
, function
, arg
, charset
,
842 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
843 doc
: /* Call FUNCTION for all characters in CHARSET.
844 FUNCTION is called with an argument RANGE and the optional 3rd
847 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
848 characters contained in CHARSET.
850 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
851 range of code points (in CHARSET) of target characters. */)
852 (function
, charset
, arg
, from_code
, to_code
)
853 Lisp_Object function
, charset
, arg
, from_code
, to_code
;
858 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
859 if (NILP (from_code
))
860 from
= CHARSET_MIN_CODE (cs
);
863 CHECK_NATNUM (from_code
);
864 from
= XINT (from_code
);
865 if (from
< CHARSET_MIN_CODE (cs
))
866 from
= CHARSET_MIN_CODE (cs
);
869 to
= CHARSET_MAX_CODE (cs
);
872 CHECK_NATNUM (to_code
);
874 if (to
> CHARSET_MAX_CODE (cs
))
875 to
= CHARSET_MAX_CODE (cs
);
877 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
882 /* Define a charset according to the arguments. The Nth argument is
883 the Nth attribute of the charset (the last attribute `charset-id'
884 is not included). See the docstring of `define-charset' for the
887 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
888 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
889 doc
: /* For internal use only.
890 usage: (define-charset-internal ...) */)
895 /* Charset attr vector. */
899 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
901 struct charset charset
;
904 int new_definition_p
;
907 if (nargs
!= charset_arg_max
)
908 return Fsignal (Qwrong_number_of_arguments
,
909 Fcons (intern ("define-charset-internal"),
910 make_number (nargs
)));
912 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
914 CHECK_SYMBOL (args
[charset_arg_name
]);
915 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
917 val
= args
[charset_arg_code_space
];
918 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
920 int min_byte
, max_byte
;
922 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
923 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
924 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
925 error ("Invalid :code-space value");
926 charset
.code_space
[i
* 4] = min_byte
;
927 charset
.code_space
[i
* 4 + 1] = max_byte
;
928 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
929 nchars
*= charset
.code_space
[i
* 4 + 2];
930 charset
.code_space
[i
* 4 + 3] = nchars
;
935 val
= args
[charset_arg_dimension
];
937 charset
.dimension
= dimension
;
941 charset
.dimension
= XINT (val
);
942 if (charset
.dimension
< 1 || charset
.dimension
> 4)
943 args_out_of_range_3 (val
, make_number (1), make_number (4));
946 charset
.code_linear_p
947 = (charset
.dimension
== 1
948 || (charset
.code_space
[2] == 256
949 && (charset
.dimension
== 2
950 || (charset
.code_space
[6] == 256
951 && (charset
.dimension
== 3
952 || charset
.code_space
[10] == 256)))));
954 if (! charset
.code_linear_p
)
956 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
957 bzero (charset
.code_space_mask
, 256);
958 for (i
= 0; i
< 4; i
++)
959 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
961 charset
.code_space_mask
[j
] |= (1 << i
);
964 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
966 charset
.min_code
= (charset
.code_space
[0]
967 | (charset
.code_space
[4] << 8)
968 | (charset
.code_space
[8] << 16)
969 | (charset
.code_space
[12] << 24));
970 charset
.max_code
= (charset
.code_space
[1]
971 | (charset
.code_space
[5] << 8)
972 | (charset
.code_space
[9] << 16)
973 | (charset
.code_space
[13] << 24));
974 charset
.char_index_offset
= 0;
976 val
= args
[charset_arg_min_code
];
986 CHECK_NUMBER_CAR (val
);
987 CHECK_NUMBER_CDR (val
);
988 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
990 if (code
< charset
.min_code
991 || code
> charset
.max_code
)
992 args_out_of_range_3 (make_number (charset
.min_code
),
993 make_number (charset
.max_code
), val
);
994 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
995 charset
.min_code
= code
;
998 val
= args
[charset_arg_max_code
];
1008 CHECK_NUMBER_CAR (val
);
1009 CHECK_NUMBER_CDR (val
);
1010 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
1012 if (code
< charset
.min_code
1013 || code
> charset
.max_code
)
1014 args_out_of_range_3 (make_number (charset
.min_code
),
1015 make_number (charset
.max_code
), val
);
1016 charset
.max_code
= code
;
1019 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
1021 val
= args
[charset_arg_invalid_code
];
1024 if (charset
.min_code
> 0)
1025 charset
.invalid_code
= 0;
1028 XSETINT (val
, charset
.max_code
+ 1);
1029 if (XINT (val
) == charset
.max_code
+ 1)
1030 charset
.invalid_code
= charset
.max_code
+ 1;
1032 error ("Attribute :invalid-code must be specified");
1038 charset
.invalid_code
= XFASTINT (val
);
1041 val
= args
[charset_arg_iso_final
];
1043 charset
.iso_final
= -1;
1047 if (XINT (val
) < '0' || XINT (val
) > 127)
1048 error ("Invalid iso-final-char: %d", XINT (val
));
1049 charset
.iso_final
= XINT (val
);
1052 val
= args
[charset_arg_iso_revision
];
1054 charset
.iso_revision
= -1;
1058 if (XINT (val
) > 63)
1059 args_out_of_range (make_number (63), val
);
1060 charset
.iso_revision
= XINT (val
);
1063 val
= args
[charset_arg_emacs_mule_id
];
1065 charset
.emacs_mule_id
= -1;
1069 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
1070 error ("Invalid emacs-mule-id: %d", XINT (val
));
1071 charset
.emacs_mule_id
= XINT (val
);
1074 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1076 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1078 charset
.unified_p
= 0;
1080 bzero (charset
.fast_map
, sizeof (charset
.fast_map
));
1082 if (! NILP (args
[charset_arg_code_offset
]))
1084 val
= args
[charset_arg_code_offset
];
1087 charset
.method
= CHARSET_METHOD_OFFSET
;
1088 charset
.code_offset
= XINT (val
);
1090 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1091 charset
.min_char
= i
+ charset
.code_offset
;
1092 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1093 charset
.max_char
= i
+ charset
.code_offset
;
1094 if (charset
.max_char
> MAX_CHAR
)
1095 error ("Unsupported max char: %d", charset
.max_char
);
1097 i
= (charset
.min_char
>> 7) << 7;
1098 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1099 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1100 i
= (i
>> 12) << 12;
1101 for (; i
<= charset
.max_char
; i
+= 0x1000)
1102 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1103 if (charset
.code_offset
== 0 && charset
.max_char
>= 0x80)
1104 charset
.ascii_compatible_p
= 1;
1106 else if (! NILP (args
[charset_arg_map
]))
1108 val
= args
[charset_arg_map
];
1109 ASET (attrs
, charset_map
, val
);
1110 charset
.method
= CHARSET_METHOD_MAP
;
1112 else if (! NILP (args
[charset_arg_subset
]))
1115 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1116 struct charset
*parent_charset
;
1118 val
= args
[charset_arg_subset
];
1119 parent
= Fcar (val
);
1120 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1121 parent_min_code
= Fnth (make_number (1), val
);
1122 CHECK_NATNUM (parent_min_code
);
1123 parent_max_code
= Fnth (make_number (2), val
);
1124 CHECK_NATNUM (parent_max_code
);
1125 parent_code_offset
= Fnth (make_number (3), val
);
1126 CHECK_NUMBER (parent_code_offset
);
1127 val
= Fmake_vector (make_number (4), Qnil
);
1128 ASET (val
, 0, make_number (parent_charset
->id
));
1129 ASET (val
, 1, parent_min_code
);
1130 ASET (val
, 2, parent_max_code
);
1131 ASET (val
, 3, parent_code_offset
);
1132 ASET (attrs
, charset_subset
, val
);
1134 charset
.method
= CHARSET_METHOD_SUBSET
;
1135 /* Here, we just copy the parent's fast_map. It's not accurate,
1136 but at least it works for quickly detecting which character
1137 DOESN'T belong to this charset. */
1138 for (i
= 0; i
< 190; i
++)
1139 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1141 /* We also copy these for parents. */
1142 charset
.min_char
= parent_charset
->min_char
;
1143 charset
.max_char
= parent_charset
->max_char
;
1145 else if (! NILP (args
[charset_arg_superset
]))
1147 val
= args
[charset_arg_superset
];
1148 charset
.method
= CHARSET_METHOD_SUPERSET
;
1149 val
= Fcopy_sequence (val
);
1150 ASET (attrs
, charset_superset
, val
);
1152 charset
.min_char
= MAX_CHAR
;
1153 charset
.max_char
= 0;
1154 for (; ! NILP (val
); val
= Fcdr (val
))
1156 Lisp_Object elt
, car_part
, cdr_part
;
1157 int this_id
, offset
;
1158 struct charset
*this_charset
;
1163 car_part
= XCAR (elt
);
1164 cdr_part
= XCDR (elt
);
1165 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1166 CHECK_NUMBER (cdr_part
);
1167 offset
= XINT (cdr_part
);
1171 CHECK_CHARSET_GET_ID (elt
, this_id
);
1174 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1176 this_charset
= CHARSET_FROM_ID (this_id
);
1177 if (charset
.min_char
> this_charset
->min_char
)
1178 charset
.min_char
= this_charset
->min_char
;
1179 if (charset
.max_char
< this_charset
->max_char
)
1180 charset
.max_char
= this_charset
->max_char
;
1181 for (i
= 0; i
< 190; i
++)
1182 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1186 error ("None of :code-offset, :map, :parents are specified");
1188 val
= args
[charset_arg_unify_map
];
1189 if (! NILP (val
) && !STRINGP (val
))
1191 ASET (attrs
, charset_unify_map
, val
);
1193 CHECK_LIST (args
[charset_arg_plist
]);
1194 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1196 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1198 if (charset
.hash_index
>= 0)
1200 new_definition_p
= 0;
1201 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1202 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1206 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1208 if (charset_table_used
== charset_table_size
)
1210 struct charset
*new_table
1211 = (struct charset
*) xmalloc (sizeof (struct charset
)
1212 * (charset_table_size
+ 16));
1213 bcopy (charset_table
, new_table
,
1214 sizeof (struct charset
) * charset_table_size
);
1215 charset_table_size
+= 16;
1216 charset_table
= new_table
;
1218 id
= charset_table_used
++;
1219 new_definition_p
= 1;
1222 ASET (attrs
, charset_id
, make_number (id
));
1224 charset_table
[id
] = charset
;
1226 if (charset
.method
== CHARSET_METHOD_MAP
)
1228 load_charset (&charset
, 0);
1229 charset_table
[id
] = charset
;
1232 if (charset
.iso_final
>= 0)
1234 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1235 charset
.iso_final
) = id
;
1236 if (new_definition_p
)
1237 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1238 Fcons (make_number (id
), Qnil
));
1239 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1240 charset_jisx0201_roman
= id
;
1241 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1242 charset_jisx0208_1978
= id
;
1243 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1244 charset_jisx0208
= id
;
1245 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id
)
1246 charset_ksc5601
= id
;
1249 if (charset
.emacs_mule_id
>= 0)
1251 emacs_mule_charset
[charset
.emacs_mule_id
] = id
;
1252 if (charset
.emacs_mule_id
< 0xA0)
1253 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1255 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1256 if (new_definition_p
)
1257 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1258 Fcons (make_number (id
), Qnil
));
1261 if (new_definition_p
)
1263 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1264 if (charset
.supplementary_p
)
1265 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1266 Fcons (make_number (id
), Qnil
));
1271 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1273 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1275 if (cs
->supplementary_p
)
1278 if (EQ (tail
, Vcharset_ordered_list
))
1279 Vcharset_ordered_list
= Fcons (make_number (id
),
1280 Vcharset_ordered_list
);
1281 else if (NILP (tail
))
1282 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1283 Fcons (make_number (id
), Qnil
));
1286 val
= Fcons (XCAR (tail
), XCDR (tail
));
1287 XSETCDR (tail
, val
);
1288 XSETCAR (tail
, make_number (id
));
1291 charset_ordered_list_tick
++;
1298 /* Same as Fdefine_charset_internal but arguments are more convenient
1299 to call from C (typically in syms_of_charset). This can define a
1300 charset of `offset' method only. Return the ID of the new
1304 define_charset_internal (name
, dimension
, code_space
, min_code
, max_code
,
1305 iso_final
, iso_revision
, emacs_mule_id
,
1306 ascii_compatible
, supplementary
,
1310 unsigned char *code_space
;
1311 unsigned min_code
, max_code
;
1312 int iso_final
, iso_revision
, emacs_mule_id
;
1313 int ascii_compatible
, supplementary
;
1316 Lisp_Object args
[charset_arg_max
];
1317 Lisp_Object plist
[14];
1321 args
[charset_arg_name
] = name
;
1322 args
[charset_arg_dimension
] = make_number (dimension
);
1323 val
= Fmake_vector (make_number (8), make_number (0));
1324 for (i
= 0; i
< 8; i
++)
1325 ASET (val
, i
, make_number (code_space
[i
]));
1326 args
[charset_arg_code_space
] = val
;
1327 args
[charset_arg_min_code
] = make_number (min_code
);
1328 args
[charset_arg_max_code
] = make_number (max_code
);
1329 args
[charset_arg_iso_final
]
1330 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1331 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1332 args
[charset_arg_emacs_mule_id
]
1333 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1334 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1335 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1336 args
[charset_arg_invalid_code
] = Qnil
;
1337 args
[charset_arg_code_offset
] = make_number (code_offset
);
1338 args
[charset_arg_map
] = Qnil
;
1339 args
[charset_arg_subset
] = Qnil
;
1340 args
[charset_arg_superset
] = Qnil
;
1341 args
[charset_arg_unify_map
] = Qnil
;
1343 plist
[0] = intern_c_string (":name");
1344 plist
[1] = args
[charset_arg_name
];
1345 plist
[2] = intern_c_string (":dimension");
1346 plist
[3] = args
[charset_arg_dimension
];
1347 plist
[4] = intern_c_string (":code-space");
1348 plist
[5] = args
[charset_arg_code_space
];
1349 plist
[6] = intern_c_string (":iso-final-char");
1350 plist
[7] = args
[charset_arg_iso_final
];
1351 plist
[8] = intern_c_string (":emacs-mule-id");
1352 plist
[9] = args
[charset_arg_emacs_mule_id
];
1353 plist
[10] = intern_c_string (":ascii-compatible-p");
1354 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1355 plist
[12] = intern_c_string (":code-offset");
1356 plist
[13] = args
[charset_arg_code_offset
];
1358 args
[charset_arg_plist
] = Flist (14, plist
);
1359 Fdefine_charset_internal (charset_arg_max
, args
);
1361 return XINT (CHARSET_SYMBOL_ID (name
));
1365 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1366 Sdefine_charset_alias
, 2, 2, 0,
1367 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1369 Lisp_Object alias
, charset
;
1373 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1374 Fputhash (alias
, attr
, Vcharset_hash_table
);
1375 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1380 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1381 doc
: /* Return the property list of CHARSET. */)
1383 Lisp_Object charset
;
1387 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1388 return CHARSET_ATTR_PLIST (attrs
);
1392 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1393 doc
: /* Set CHARSET's property list to PLIST. */)
1395 Lisp_Object charset
, plist
;
1399 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1400 CHARSET_ATTR_PLIST (attrs
) = plist
;
1405 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1406 doc
: /* Unify characters of CHARSET with Unicode.
1407 This means reading the relevant file and installing the table defined
1408 by CHARSET's `:unify-map' property.
1410 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1411 the same meaning as the `:unify-map' attribute in the function
1412 `define-charset' (which see).
1414 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1415 (charset
, unify_map
, deunify
)
1416 Lisp_Object charset
, unify_map
, deunify
;
1421 CHECK_CHARSET_GET_ID (charset
, id
);
1422 cs
= CHARSET_FROM_ID (id
);
1424 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1425 : ! CHARSET_UNIFIED_P (cs
))
1428 CHARSET_UNIFIED_P (cs
) = 0;
1431 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1432 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1433 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1434 if (NILP (unify_map
))
1435 unify_map
= CHARSET_UNIFY_MAP (cs
);
1438 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1439 signal_error ("Bad unify-map", unify_map
);
1440 CHARSET_UNIFY_MAP (cs
) = unify_map
;
1442 if (NILP (Vchar_unify_table
))
1443 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1444 char_table_set_range (Vchar_unify_table
,
1445 cs
->min_char
, cs
->max_char
, charset
);
1446 CHARSET_UNIFIED_P (cs
) = 1;
1448 else if (CHAR_TABLE_P (Vchar_unify_table
))
1450 int min_code
= CHARSET_MIN_CODE (cs
);
1451 int max_code
= CHARSET_MAX_CODE (cs
);
1452 int min_char
= DECODE_CHAR (cs
, min_code
);
1453 int max_char
= DECODE_CHAR (cs
, max_code
);
1455 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1461 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1462 Sget_unused_iso_final_char
, 2, 2, 0,
1464 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1465 DIMENSION is the number of bytes to represent a character: 1 or 2.
1466 CHARS is the number of characters in a dimension: 94 or 96.
1468 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1469 If there's no unused final char for the specified kind of charset,
1472 Lisp_Object dimension
, chars
;
1476 CHECK_NUMBER (dimension
);
1477 CHECK_NUMBER (chars
);
1478 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1479 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1480 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1481 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1482 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1483 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1485 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1489 check_iso_charset_parameter (dimension
, chars
, final_char
)
1490 Lisp_Object dimension
, chars
, final_char
;
1492 CHECK_NATNUM (dimension
);
1493 CHECK_NATNUM (chars
);
1494 CHECK_NATNUM (final_char
);
1496 if (XINT (dimension
) > 3)
1497 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1498 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1499 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1500 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1501 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1505 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1507 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1509 On decoding by an ISO-2022 base coding system, when a charset
1510 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1511 if CHARSET is designated instead. */)
1512 (dimension
, chars
, final_char
, charset
)
1513 Lisp_Object dimension
, chars
, final_char
, charset
;
1518 CHECK_CHARSET_GET_ID (charset
, id
);
1519 check_iso_charset_parameter (dimension
, chars
, final_char
);
1520 chars_flag
= XINT (chars
) == 96;
1521 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1526 /* Return information about charsets in the text at PTR of NBYTES
1527 bytes, which are NCHARS characters. The value is:
1529 0: Each character is represented by one byte. This is always
1530 true for a unibyte string. For a multibyte string, true if
1531 it contains only ASCII characters.
1533 1: No charsets other than ascii, control-1, and latin-1 are
1540 string_xstring_p (string
)
1543 const unsigned char *p
= SDATA (string
);
1544 const unsigned char *endp
= p
+ SBYTES (string
);
1546 if (SCHARS (string
) == SBYTES (string
))
1551 int c
= STRING_CHAR_ADVANCE (p
);
1560 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1562 CHARSETS is a vector. If Nth element is non-nil, it means the
1563 charset whose id is N is already found.
1565 It may lookup a translation table TABLE if supplied. */
1568 find_charsets_in_text (ptr
, nchars
, nbytes
, charsets
, table
, multibyte
)
1569 const unsigned char *ptr
;
1570 EMACS_INT nchars
, nbytes
;
1571 Lisp_Object charsets
, table
;
1574 const unsigned char *pend
= ptr
+ nbytes
;
1576 if (nchars
== nbytes
)
1579 ASET (charsets
, charset_ascii
, Qt
);
1586 c
= translate_char (table
, c
);
1587 if (ASCII_BYTE_P (c
))
1588 ASET (charsets
, charset_ascii
, Qt
);
1590 ASET (charsets
, charset_eight_bit
, Qt
);
1597 int c
= STRING_CHAR_ADVANCE (ptr
);
1598 struct charset
*charset
;
1601 c
= translate_char (table
, c
);
1602 charset
= CHAR_CHARSET (c
);
1603 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1608 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1610 doc
: /* Return a list of charsets in the region between BEG and END.
1611 BEG and END are buffer positions.
1612 Optional arg TABLE if non-nil is a translation table to look up.
1614 If the current buffer is unibyte, the returned list may contain
1615 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1617 Lisp_Object beg
, end
, table
;
1619 Lisp_Object charsets
;
1620 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1623 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1625 validate_region (&beg
, &end
);
1626 from
= XFASTINT (beg
);
1627 stop
= to
= XFASTINT (end
);
1629 if (from
< GPT
&& GPT
< to
)
1632 stop_byte
= GPT_BYTE
;
1635 stop_byte
= CHAR_TO_BYTE (stop
);
1637 from_byte
= CHAR_TO_BYTE (from
);
1639 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1642 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1643 stop_byte
- from_byte
, charsets
, table
,
1647 from
= stop
, from_byte
= stop_byte
;
1648 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1655 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1656 if (!NILP (AREF (charsets
, i
)))
1657 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1661 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1663 doc
: /* Return a list of charsets in STR.
1664 Optional arg TABLE if non-nil is a translation table to look up.
1666 If STR is unibyte, the returned list may contain
1667 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1669 Lisp_Object str
, table
;
1671 Lisp_Object charsets
;
1677 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1678 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1680 STRING_MULTIBYTE (str
));
1682 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1683 if (!NILP (AREF (charsets
, i
)))
1684 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1690 /* Return a unified character code for C (>= 0x110000). VAL is a
1691 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1694 maybe_unify_char (c
, val
)
1698 struct charset
*charset
;
1705 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1706 load_charset (charset
, 1);
1707 if (! inhibit_load_charset_map
)
1709 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1715 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1716 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1725 /* Return a character correponding to the code-point CODE of
1729 decode_char (charset
, code
)
1730 struct charset
*charset
;
1734 enum charset_method method
= CHARSET_METHOD (charset
);
1736 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1739 if (method
== CHARSET_METHOD_SUBSET
)
1741 Lisp_Object subset_info
;
1743 subset_info
= CHARSET_SUBSET (charset
);
1744 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1745 code
-= XINT (AREF (subset_info
, 3));
1746 if (code
< XFASTINT (AREF (subset_info
, 1))
1747 || code
> XFASTINT (AREF (subset_info
, 2)))
1750 c
= DECODE_CHAR (charset
, code
);
1752 else if (method
== CHARSET_METHOD_SUPERSET
)
1754 Lisp_Object parents
;
1756 parents
= CHARSET_SUPERSET (charset
);
1758 for (; CONSP (parents
); parents
= XCDR (parents
))
1760 int id
= XINT (XCAR (XCAR (parents
)));
1761 int code_offset
= XINT (XCDR (XCAR (parents
)));
1762 unsigned this_code
= code
- code_offset
;
1764 charset
= CHARSET_FROM_ID (id
);
1765 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1771 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1775 if (method
== CHARSET_METHOD_MAP
)
1777 Lisp_Object decoder
;
1779 decoder
= CHARSET_DECODER (charset
);
1780 if (! VECTORP (decoder
))
1782 load_charset (charset
, 1);
1783 decoder
= CHARSET_DECODER (charset
);
1785 if (VECTORP (decoder
))
1786 c
= XINT (AREF (decoder
, char_index
));
1788 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1790 else /* method == CHARSET_METHOD_OFFSET */
1792 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1793 if (CHARSET_UNIFIED_P (charset
)
1794 && c
> MAX_UNICODE_CHAR
)
1795 MAYBE_UNIFY_CHAR (c
);
1802 /* Variable used temporarily by the macro ENCODE_CHAR. */
1803 Lisp_Object charset_work
;
1805 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1806 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1807 use CHARSET's strict_max_char instead of max_char. */
1810 encode_char (charset
, c
)
1811 struct charset
*charset
;
1815 enum charset_method method
= CHARSET_METHOD (charset
);
1817 if (CHARSET_UNIFIED_P (charset
))
1819 Lisp_Object deunifier
;
1820 int code_index
= -1;
1822 deunifier
= CHARSET_DEUNIFIER (charset
);
1823 if (! CHAR_TABLE_P (deunifier
))
1825 load_charset (charset
, 2);
1826 deunifier
= CHARSET_DEUNIFIER (charset
);
1828 if (CHAR_TABLE_P (deunifier
))
1830 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1832 if (INTEGERP (deunified
))
1833 code_index
= XINT (deunified
);
1837 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1839 if (code_index
>= 0)
1840 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1843 if (method
== CHARSET_METHOD_SUBSET
)
1845 Lisp_Object subset_info
;
1846 struct charset
*this_charset
;
1848 subset_info
= CHARSET_SUBSET (charset
);
1849 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1850 code
= ENCODE_CHAR (this_charset
, c
);
1851 if (code
== CHARSET_INVALID_CODE (this_charset
)
1852 || code
< XFASTINT (AREF (subset_info
, 1))
1853 || code
> XFASTINT (AREF (subset_info
, 2)))
1854 return CHARSET_INVALID_CODE (charset
);
1855 code
+= XINT (AREF (subset_info
, 3));
1859 if (method
== CHARSET_METHOD_SUPERSET
)
1861 Lisp_Object parents
;
1863 parents
= CHARSET_SUPERSET (charset
);
1864 for (; CONSP (parents
); parents
= XCDR (parents
))
1866 int id
= XINT (XCAR (XCAR (parents
)));
1867 int code_offset
= XINT (XCDR (XCAR (parents
)));
1868 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1870 code
= ENCODE_CHAR (this_charset
, c
);
1871 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1872 return code
+ code_offset
;
1874 return CHARSET_INVALID_CODE (charset
);
1877 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1878 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1879 return CHARSET_INVALID_CODE (charset
);
1881 if (method
== CHARSET_METHOD_MAP
)
1883 Lisp_Object encoder
;
1886 encoder
= CHARSET_ENCODER (charset
);
1887 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1889 load_charset (charset
, 2);
1890 encoder
= CHARSET_ENCODER (charset
);
1892 if (CHAR_TABLE_P (encoder
))
1894 val
= CHAR_TABLE_REF (encoder
, c
);
1896 return CHARSET_INVALID_CODE (charset
);
1898 if (! CHARSET_COMPACT_CODES_P (charset
))
1899 code
= INDEX_TO_CODE_POINT (charset
, code
);
1903 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1904 code
= INDEX_TO_CODE_POINT (charset
, code
);
1907 else /* method == CHARSET_METHOD_OFFSET */
1909 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1911 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1918 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1919 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1920 Return nil if CODE-POINT is not valid in CHARSET.
1922 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1924 Optional argument RESTRICTION specifies a way to map the pair of CCS
1925 and CODE-POINT to a character. Currently not supported and just ignored. */)
1926 (charset
, code_point
, restriction
)
1927 Lisp_Object charset
, code_point
, restriction
;
1931 struct charset
*charsetp
;
1933 CHECK_CHARSET_GET_ID (charset
, id
);
1934 if (CONSP (code_point
))
1936 CHECK_NATNUM_CAR (code_point
);
1937 CHECK_NATNUM_CDR (code_point
);
1938 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1942 CHECK_NATNUM (code_point
);
1943 code
= XINT (code_point
);
1945 charsetp
= CHARSET_FROM_ID (id
);
1946 c
= DECODE_CHAR (charsetp
, code
);
1947 return (c
>= 0 ? make_number (c
) : Qnil
);
1951 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1952 doc
: /* Encode the character CH into a code-point of CHARSET.
1953 Return nil if CHARSET doesn't include CH.
1955 Optional argument RESTRICTION specifies a way to map CH to a
1956 code-point in CCS. Currently not supported and just ignored. */)
1957 (ch
, charset
, restriction
)
1958 Lisp_Object ch
, charset
, restriction
;
1962 struct charset
*charsetp
;
1964 CHECK_CHARSET_GET_ID (charset
, id
);
1966 charsetp
= CHARSET_FROM_ID (id
);
1967 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1968 if (code
== CHARSET_INVALID_CODE (charsetp
))
1970 if (code
> 0x7FFFFFF)
1971 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1972 return make_number (code
);
1976 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1978 /* Return a character of CHARSET whose position codes are CODEn.
1980 CODE1 through CODE4 are optional, but if you don't supply sufficient
1981 position codes, it is assumed that the minimum code in each dimension
1983 (charset
, code1
, code2
, code3
, code4
)
1984 Lisp_Object charset
, code1
, code2
, code3
, code4
;
1987 struct charset
*charsetp
;
1991 CHECK_CHARSET_GET_ID (charset
, id
);
1992 charsetp
= CHARSET_FROM_ID (id
);
1994 dimension
= CHARSET_DIMENSION (charsetp
);
1996 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1997 ? 0 : CHARSET_MIN_CODE (charsetp
));
2000 CHECK_NATNUM (code1
);
2001 if (XFASTINT (code1
) >= 0x100)
2002 args_out_of_range (make_number (0xFF), code1
);
2003 code
= XFASTINT (code1
);
2009 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
2012 CHECK_NATNUM (code2
);
2013 if (XFASTINT (code2
) >= 0x100)
2014 args_out_of_range (make_number (0xFF), code2
);
2015 code
|= XFASTINT (code2
);
2022 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
2025 CHECK_NATNUM (code3
);
2026 if (XFASTINT (code3
) >= 0x100)
2027 args_out_of_range (make_number (0xFF), code3
);
2028 code
|= XFASTINT (code3
);
2035 code
|= charsetp
->code_space
[0];
2038 CHECK_NATNUM (code4
);
2039 if (XFASTINT (code4
) >= 0x100)
2040 args_out_of_range (make_number (0xFF), code4
);
2041 code
|= XFASTINT (code4
);
2048 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
2050 c
= DECODE_CHAR (charsetp
, code
);
2052 error ("Invalid code(s)");
2053 return make_number (c
);
2057 /* Return the first charset in CHARSET_LIST that contains C.
2058 CHARSET_LIST is a list of charset IDs. If it is nil, use
2059 Vcharset_ordered_list. */
2062 char_charset (c
, charset_list
, code_return
)
2064 Lisp_Object charset_list
;
2065 unsigned *code_return
;
2069 if (NILP (charset_list
))
2070 charset_list
= Vcharset_ordered_list
;
2074 while (CONSP (charset_list
))
2076 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
2077 unsigned code
= ENCODE_CHAR (charset
, c
);
2079 if (code
!= CHARSET_INVALID_CODE (charset
))
2082 *code_return
= code
;
2085 charset_list
= XCDR (charset_list
);
2086 if (c
<= MAX_UNICODE_CHAR
2087 && EQ (charset_list
, Vcharset_non_preferred_head
))
2088 return CHARSET_FROM_ID (charset_unicode
);
2090 return (maybe_null
? NULL
2091 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2092 : CHARSET_FROM_ID (charset_eight_bit
));
2096 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2098 /*Return list of charset and one to four position-codes of CH.
2099 The charset is decided by the current priority order of charsets.
2100 A position-code is a byte value of each dimension of the code-point of
2101 CH in the charset. */)
2105 struct charset
*charset
;
2110 CHECK_CHARACTER (ch
);
2112 charset
= CHAR_CHARSET (c
);
2115 code
= ENCODE_CHAR (charset
, c
);
2116 if (code
== CHARSET_INVALID_CODE (charset
))
2118 dimension
= CHARSET_DIMENSION (charset
);
2119 for (val
= Qnil
; dimension
> 0; dimension
--)
2121 val
= Fcons (make_number (code
& 0xFF), val
);
2124 return Fcons (CHARSET_NAME (charset
), val
);
2128 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2129 doc
: /* Return the charset of highest priority that contains CH.
2130 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2131 from which to find the charset. It may also be a coding system. In
2132 that case, find the charset from what supported by that coding system. */)
2134 Lisp_Object ch
, restriction
;
2136 struct charset
*charset
;
2138 CHECK_CHARACTER (ch
);
2139 if (NILP (restriction
))
2140 charset
= CHAR_CHARSET (XINT (ch
));
2143 if (CONSP (restriction
))
2145 int c
= XFASTINT (ch
);
2147 for (; CONSP (restriction
); restriction
= XCDR (restriction
))
2149 struct charset
*charset
;
2151 CHECK_CHARSET_GET_CHARSET (XCAR (restriction
), charset
);
2152 if (ENCODE_CHAR (charset
, c
) != CHARSET_INVALID_CODE (charset
))
2153 return XCAR (restriction
);
2157 restriction
= coding_system_charset_list (restriction
);
2158 charset
= char_charset (XINT (ch
), restriction
, NULL
);
2162 return (CHARSET_NAME (charset
));
2166 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2168 Return charset of a character in the current buffer at position POS.
2169 If POS is nil, it defauls to the current point.
2170 If POS is out of range, the value is nil. */)
2175 struct charset
*charset
;
2177 ch
= Fchar_after (pos
);
2178 if (! INTEGERP (ch
))
2180 charset
= CHAR_CHARSET (XINT (ch
));
2181 return (CHARSET_NAME (charset
));
2185 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2187 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2189 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2190 by their DIMENSION, CHARS, and FINAL-CHAR,
2191 whereas Emacs distinguishes them by charset symbol.
2192 See the documentation of the function `charset-info' for the meanings of
2193 DIMENSION, CHARS, and FINAL-CHAR. */)
2194 (dimension
, chars
, final_char
)
2195 Lisp_Object dimension
, chars
, final_char
;
2200 check_iso_charset_parameter (dimension
, chars
, final_char
);
2201 chars_flag
= XFASTINT (chars
) == 96;
2202 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
2203 XFASTINT (final_char
));
2204 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2208 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2212 Clear temporary charset mapping tables.
2213 It should be called only from temacs invoked for dumping. */)
2216 if (temp_charset_work
)
2218 free (temp_charset_work
);
2219 temp_charset_work
= NULL
;
2222 if (CHAR_TABLE_P (Vchar_unify_table
))
2223 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2228 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2229 Scharset_priority_list
, 0, 1, 0,
2230 doc
: /* Return the list of charsets ordered by priority.
2231 HIGHESTP non-nil means just return the highest priority one. */)
2233 Lisp_Object highestp
;
2235 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2237 if (!NILP (highestp
))
2238 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2240 while (!NILP (list
))
2242 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2245 return Fnreverse (val
);
2248 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2250 doc
: /* Assign higher priority to the charsets given as arguments.
2251 usage: (set-charset-priority &rest charsets) */)
2256 Lisp_Object new_head
, old_list
, arglist
[2];
2257 Lisp_Object list_2022
, list_emacs_mule
;
2260 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2262 for (i
= 0; i
< nargs
; i
++)
2264 CHECK_CHARSET_GET_ID (args
[i
], id
);
2265 if (! NILP (Fmemq (make_number (id
), old_list
)))
2267 old_list
= Fdelq (make_number (id
), old_list
);
2268 new_head
= Fcons (make_number (id
), new_head
);
2271 arglist
[0] = Fnreverse (new_head
);
2272 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2273 Vcharset_ordered_list
= Fnconc (2, arglist
);
2274 charset_ordered_list_tick
++;
2276 charset_unibyte
= -1;
2277 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2278 CONSP (old_list
); old_list
= XCDR (old_list
))
2280 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2281 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2282 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2283 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2284 if (charset_unibyte
< 0)
2286 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (old_list
)));
2288 if (CHARSET_DIMENSION (charset
) == 1
2289 && CHARSET_ASCII_COMPATIBLE_P (charset
)
2290 && CHARSET_MAX_CHAR (charset
) >= 0x80)
2291 charset_unibyte
= CHARSET_ID (charset
);
2294 Viso_2022_charset_list
= Fnreverse (list_2022
);
2295 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2296 if (charset_unibyte
< 0)
2297 charset_unibyte
= charset_iso_8859_1
;
2302 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2304 doc
: /* Internal use only.
2305 Return charset identification number of CHARSET. */)
2307 Lisp_Object charset
;
2311 CHECK_CHARSET_GET_ID (charset
, id
);
2312 return make_number (id
);
2315 struct charset_sort_data
2317 Lisp_Object charset
;
2323 charset_compare (const void *d1
, const void *d2
)
2325 const struct charset_sort_data
*data1
= d1
, *data2
= d2
;
2326 return (data1
->priority
- data2
->priority
);
2329 DEFUN ("sort-charsets", Fsort_charsets
, Ssort_charsets
, 1, 1, 0,
2330 doc
: /* Sort charset list CHARSETS by a priority of each charset.
2331 Return the sorted list. CHARSETS is modified by side effects.
2332 See also `charset-priority-list' and `set-charset-priority'. */)
2333 (Lisp_Object charsets
)
2335 Lisp_Object len
= Flength (charsets
);
2336 int n
= XFASTINT (len
), i
, j
, done
;
2337 Lisp_Object tail
, elt
, attrs
;
2338 struct charset_sort_data
*sort_data
;
2339 int id
, min_id
, max_id
;
2344 SAFE_ALLOCA (sort_data
, struct charset_sort_data
*, sizeof (*sort_data
) * n
);
2345 for (tail
= charsets
, i
= 0; CONSP (tail
); tail
= XCDR (tail
), i
++)
2348 CHECK_CHARSET_GET_ATTR (elt
, attrs
);
2349 sort_data
[i
].charset
= elt
;
2350 sort_data
[i
].id
= id
= XINT (CHARSET_ATTR_ID (attrs
));
2352 min_id
= max_id
= id
;
2353 else if (id
< min_id
)
2355 else if (id
> max_id
)
2358 for (done
= 0, tail
= Vcharset_ordered_list
, i
= 0;
2359 done
< n
&& CONSP (tail
); tail
= XCDR (tail
), i
++)
2362 id
= XFASTINT (elt
);
2363 if (id
>= min_id
&& id
<= max_id
)
2364 for (j
= 0; j
< n
; j
++)
2365 if (sort_data
[j
].id
== id
)
2367 sort_data
[j
].priority
= i
;
2371 qsort (sort_data
, n
, sizeof *sort_data
, charset_compare
);
2372 for (i
= 0, tail
= charsets
; CONSP (tail
); tail
= XCDR (tail
), i
++)
2373 XSETCAR (tail
, sort_data
[i
].charset
);
2382 Lisp_Object tempdir
;
2383 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2384 if (access ((char *) SDATA (tempdir
), 0) < 0)
2386 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2387 Emacs will not function correctly without the character map files.\n\
2388 Please check your installation!\n",
2390 /* TODO should this be a fatal error? (Bug#909) */
2393 Vcharset_map_path
= Fcons (tempdir
, Qnil
);
2398 init_charset_once ()
2402 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2403 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2404 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2405 iso_charset_table
[i
][j
][k
] = -1;
2407 for (i
= 0; i
< 256; i
++)
2408 emacs_mule_charset
[i
] = -1;
2410 charset_jisx0201_roman
= -1;
2411 charset_jisx0208_1978
= -1;
2412 charset_jisx0208
= -1;
2413 charset_ksc5601
= -1;
2421 DEFSYM (Qcharsetp
, "charsetp");
2423 DEFSYM (Qascii
, "ascii");
2424 DEFSYM (Qunicode
, "unicode");
2425 DEFSYM (Qemacs
, "emacs");
2426 DEFSYM (Qeight_bit
, "eight-bit");
2427 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2432 staticpro (&Vcharset_ordered_list
);
2433 Vcharset_ordered_list
= Qnil
;
2435 staticpro (&Viso_2022_charset_list
);
2436 Viso_2022_charset_list
= Qnil
;
2438 staticpro (&Vemacs_mule_charset_list
);
2439 Vemacs_mule_charset_list
= Qnil
;
2441 /* Don't staticpro them here. It's done in syms_of_fns. */
2442 QCtest
= intern (":test");
2443 Qeq
= intern ("eq");
2445 staticpro (&Vcharset_hash_table
);
2447 Lisp_Object args
[2];
2450 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2453 charset_table_size
= 128;
2454 charset_table
= ((struct charset
*)
2455 xmalloc (sizeof (struct charset
) * charset_table_size
));
2456 charset_table_used
= 0;
2458 defsubr (&Scharsetp
);
2459 defsubr (&Smap_charset_chars
);
2460 defsubr (&Sdefine_charset_internal
);
2461 defsubr (&Sdefine_charset_alias
);
2462 defsubr (&Scharset_plist
);
2463 defsubr (&Sset_charset_plist
);
2464 defsubr (&Sunify_charset
);
2465 defsubr (&Sget_unused_iso_final_char
);
2466 defsubr (&Sdeclare_equiv_charset
);
2467 defsubr (&Sfind_charset_region
);
2468 defsubr (&Sfind_charset_string
);
2469 defsubr (&Sdecode_char
);
2470 defsubr (&Sencode_char
);
2471 defsubr (&Ssplit_char
);
2472 defsubr (&Smake_char
);
2473 defsubr (&Schar_charset
);
2474 defsubr (&Scharset_after
);
2475 defsubr (&Siso_charset
);
2476 defsubr (&Sclear_charset_maps
);
2477 defsubr (&Scharset_priority_list
);
2478 defsubr (&Sset_charset_priority
);
2479 defsubr (&Scharset_id_internal
);
2480 defsubr (&Ssort_charsets
);
2482 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path
,
2483 doc
: /* *List of directories to search for charset map files. */);
2484 Vcharset_map_path
= Qnil
;
2486 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map
,
2487 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2488 inhibit_load_charset_map
= 0;
2490 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2491 doc
: /* List of all charsets ever defined. */);
2492 Vcharset_list
= Qnil
;
2494 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language
,
2495 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2496 If the current language environment is for multiple languages (e.g. "Latin-1"),
2497 the value may be a list of mnemonics. */);
2498 Vcurrent_iso639_language
= Qnil
;
2501 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2502 0, 127, 'B', -1, 0, 1, 0, 0);
2504 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2505 0, 255, -1, -1, -1, 1, 0, 0);
2507 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2508 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2510 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F",
2511 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2513 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2514 128, 255, -1, 0, -1, 0, 1,
2515 MAX_5_BYTE_CHAR
+ 1);
2516 charset_unibyte
= charset_iso_8859_1
;
2521 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2522 (do not change this comment) */