1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
33 #include <sys/types.h>
36 #include "character.h"
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
57 /* List of all charsets. This variable is used only from Emacs
59 Lisp_Object Vcharset_list
;
61 /* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63 Lisp_Object Vcharset_hash_table
;
65 /* Table of struct charset. */
66 struct charset
*charset_table
;
68 static int charset_table_size
;
69 static int charset_table_used
;
71 Lisp_Object Qcharsetp
;
73 /* Special charset symbols. */
75 Lisp_Object Qeight_bit
;
76 Lisp_Object Qiso_8859_1
;
80 /* The corresponding charsets. */
82 int charset_eight_bit
;
83 int charset_iso_8859_1
;
87 /* The other special charsets. */
88 int charset_jisx0201_roman
;
89 int charset_jisx0208_1978
;
93 /* Value of charset attribute `charset-iso-plane'. */
96 /* Charset of unibyte characters. */
99 /* List of charsets ordered by the priority. */
100 Lisp_Object Vcharset_ordered_list
;
102 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
104 Lisp_Object Vcharset_non_preferred_head
;
106 /* Incremented everytime we change Vcharset_ordered_list. This is
107 unsigned short so that it fits in Lisp_Int and never matches
109 unsigned short charset_ordered_list_tick
;
111 /* List of iso-2022 charsets. */
112 Lisp_Object Viso_2022_charset_list
;
114 /* List of emacs-mule charsets. */
115 Lisp_Object Vemacs_mule_charset_list
;
117 struct charset
*emacs_mule_charset
[256];
119 /* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
121 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
123 Lisp_Object Vcharset_map_path
;
125 /* If nonzero, don't load charset maps. */
126 int inhibit_load_charset_map
;
128 Lisp_Object Vcurrent_iso639_language
;
130 /* Defined in chartab.c */
132 map_char_table_for_charset
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
133 Lisp_Object function
, Lisp_Object table
,
134 Lisp_Object arg
, struct charset
*charset
,
135 unsigned from
, unsigned to
));
137 #define CODE_POINT_TO_INDEX(charset, code) \
138 ((charset)->code_linear_p \
139 ? (code) - (charset)->min_code \
140 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
141 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
142 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
143 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
144 ? (((((code) >> 24) - (charset)->code_space[12]) \
145 * (charset)->code_space[11]) \
146 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
147 * (charset)->code_space[7]) \
148 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
149 * (charset)->code_space[3]) \
150 + (((code) & 0xFF) - (charset)->code_space[0]) \
151 - ((charset)->char_index_offset)) \
155 /* Convert the character index IDX to code-point CODE for CHARSET.
156 It is assumed that IDX is in a valid range. */
158 #define INDEX_TO_CODE_POINT(charset, idx) \
159 ((charset)->code_linear_p \
160 ? (idx) + (charset)->min_code \
161 : (idx += (charset)->char_index_offset, \
162 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
163 | (((charset)->code_space[4] \
164 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
166 | (((charset)->code_space[8] \
167 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
169 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
172 /* Structure to hold mapping tables for a charset. Used by temacs
173 invoked for dumping. */
177 /* The current charset for which the following tables are setup. */
178 struct charset
*current
;
180 /* 1 iff the following table is used for encoder. */
183 /* When the following table is used for encoding, mininum and
184 maxinum character of the current charset. */
185 int min_char
, max_char
;
187 /* A Unicode character correspoinding to the code indice 0 (i.e. the
188 minimum code-point) of the current charset, or -1 if the code
189 indice 0 is not a Unicode character. This is checked when
190 table.encoder[CHAR] is zero. */
194 /* Table mapping code-indices (not code-points) of the current
195 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
196 doesn't belong to the current charset. */
197 int decoder
[0x10000];
198 /* Table mapping Unicode characters to code-indices of the current
199 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
200 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
201 (0x20000..0x2FFFF). Note that there is no charset map that
202 uses both SMP and SIP. */
203 unsigned short encoder
[0x20000];
205 } *temp_charset_work
;
207 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
210 temp_charset_work->zero_index_char = (C); \
211 else if ((C) < 0x20000) \
212 temp_charset_work->table.encoder[(C)] = (CODE); \
214 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
217 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
218 ((C) == temp_charset_work->zero_index_char ? 0 \
219 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
220 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
221 : temp_charset_work->table.encoder[(C) - 0x10000] \
222 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
224 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
225 (temp_charset_work->table.decoder[(CODE)] = (C))
227 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
228 (temp_charset_work->table.decoder[(CODE)])
231 /* Set to 1 to warn that a charset map is loaded and thus a buffer
232 text and a string data may be relocated. */
233 int charset_map_loaded
;
235 struct charset_map_entries
241 struct charset_map_entries
*next
;
244 /* Load the mapping information of CHARSET from ENTRIES for
245 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
246 encoding (CONTROL_FLAG == 2).
248 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
249 and CHARSET->fast_map.
251 If CONTROL_FLAG is 1, setup the following tables according to
252 CHARSET->method and inhibit_load_charset_map.
254 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
260 If CONTROL_FLAG is 2, setup the following tables.
262 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
263 ----------------------+--------------------+---------------------------
264 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
265 ----------------------+--------------------+--------------------------
266 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
270 load_charset_map (charset
, entries
, n_entries
, control_flag
)
271 struct charset
*charset
;
272 struct charset_map_entries
*entries
;
276 Lisp_Object vec
, table
;
277 unsigned max_code
= CHARSET_MAX_CODE (charset
);
278 int ascii_compatible_p
= charset
->ascii_compatible_p
;
279 int min_char
, max_char
, nonascii_min_char
;
281 unsigned char *fast_map
= charset
->fast_map
;
288 if (! inhibit_load_charset_map
)
290 if (control_flag
== 1)
292 if (charset
->method
== CHARSET_METHOD_MAP
)
294 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
296 vec
= CHARSET_DECODER (charset
)
297 = Fmake_vector (make_number (n
), make_number (-1));
301 char_table_set_range (Vchar_unify_table
,
302 charset
->min_char
, charset
->max_char
,
308 table
= Fmake_char_table (Qnil
, Qnil
);
309 if (charset
->method
== CHARSET_METHOD_MAP
)
310 CHARSET_ENCODER (charset
) = table
;
312 CHARSET_DEUNIFIER (charset
) = table
;
317 if (! temp_charset_work
)
318 temp_charset_work
= malloc (sizeof (*temp_charset_work
));
319 if (control_flag
== 1)
321 memset (temp_charset_work
->table
.decoder
, -1,
322 sizeof (int) * 0x10000);
326 memset (temp_charset_work
->table
.encoder
, 0,
327 sizeof (unsigned short) * 0x20000);
328 temp_charset_work
->zero_index_char
= -1;
330 temp_charset_work
->current
= charset
;
331 temp_charset_work
->for_encoder
= (control_flag
== 2);
334 charset_map_loaded
= 1;
337 min_char
= max_char
= entries
->entry
[0].c
;
338 nonascii_min_char
= MAX_CHAR
;
339 for (i
= 0; i
< n_entries
; i
++)
342 int from_index
, to_index
;
344 int idx
= i
% 0x10000;
346 if (i
> 0 && idx
== 0)
347 entries
= entries
->next
;
348 from
= entries
->entry
[idx
].from
;
349 to
= entries
->entry
[idx
].to
;
350 from_c
= entries
->entry
[idx
].c
;
351 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
354 to_index
= from_index
;
359 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
360 to_c
= from_c
+ (to_index
- from_index
);
362 if (from_index
< 0 || to_index
< 0)
367 else if (from_c
< min_char
)
370 if (control_flag
== 1)
372 if (charset
->method
== CHARSET_METHOD_MAP
)
373 for (; from_index
<= to_index
; from_index
++, from_c
++)
374 ASET (vec
, from_index
, make_number (from_c
));
376 for (; from_index
<= to_index
; from_index
++, from_c
++)
377 CHAR_TABLE_SET (Vchar_unify_table
,
378 CHARSET_CODE_OFFSET (charset
) + from_index
,
379 make_number (from_c
));
381 else if (control_flag
== 2)
383 if (charset
->method
== CHARSET_METHOD_MAP
384 && CHARSET_COMPACT_CODES_P (charset
))
385 for (; from_index
<= to_index
; from_index
++, from_c
++)
387 unsigned code
= INDEX_TO_CODE_POINT (charset
, from_index
);
389 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
390 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
393 for (; from_index
<= to_index
; from_index
++, from_c
++)
395 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
396 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
399 else if (control_flag
== 3)
400 for (; from_index
<= to_index
; from_index
++, from_c
++)
401 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
402 else if (control_flag
== 4)
403 for (; from_index
<= to_index
; from_index
++, from_c
++)
404 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
405 else /* control_flag == 0 */
407 if (ascii_compatible_p
)
409 if (! ASCII_BYTE_P (from_c
))
411 if (from_c
< nonascii_min_char
)
412 nonascii_min_char
= from_c
;
414 else if (! ASCII_BYTE_P (to_c
))
416 nonascii_min_char
= 0x80;
420 for (; from_c
<= to_c
; from_c
++)
421 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
425 if (control_flag
== 0)
427 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
428 ? nonascii_min_char
: min_char
);
429 CHARSET_MAX_CHAR (charset
) = max_char
;
431 else if (control_flag
== 4)
433 temp_charset_work
->min_char
= min_char
;
434 temp_charset_work
->max_char
= max_char
;
439 /* Read a hexadecimal number (preceded by "0x") from the file FP while
440 paying attention to comment charcter '#'. */
442 static INLINE
unsigned
450 while ((c
= getc (fp
)) != EOF
)
454 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
458 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
470 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
472 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
474 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
475 n
= (n
* 10) + c
- '0';
481 extern Lisp_Object Qfile_name_handler_alist
;
483 /* Return a mapping vector for CHARSET loaded from MAPFILE.
484 Each line of MAPFILE has this form
486 where 0xAAAA is a code-point and 0xCCCC is the corresponding
487 character code, or this form
489 where 0xAAAA and 0xBBBB are code-points specifying a range, and
490 0xCCCC is the first character code of the range.
492 The returned vector has this form:
493 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
494 where CODE1 is a code-point or a cons of code-points specifying a
497 Note that this function uses `openp' to open MAPFILE but ignores
498 `file-name-handler-alist' to avoid running any Lisp code. */
500 extern void add_to_log
P_ ((char *, Lisp_Object
, Lisp_Object
));
503 load_charset_map_from_file (charset
, mapfile
, control_flag
)
504 struct charset
*charset
;
508 unsigned min_code
= CHARSET_MIN_CODE (charset
);
509 unsigned max_code
= CHARSET_MAX_CODE (charset
);
513 Lisp_Object suffixes
;
514 struct charset_map_entries
*head
, *entries
;
515 int n_entries
, count
;
518 suffixes
= Fcons (build_string (".map"),
519 Fcons (build_string (".TXT"), Qnil
));
521 count
= SPECPDL_INDEX ();
522 specbind (Qfile_name_handler_alist
, Qnil
);
523 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
524 unbind_to (count
, Qnil
);
526 || ! (fp
= fdopen (fd
, "r")))
527 error ("Failure in loading charset map: %S", SDATA (mapfile
));
529 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
530 large (larger than MAX_ALLOCA). */
531 SAFE_ALLOCA (head
, struct charset_map_entries
*,
532 sizeof (struct charset_map_entries
));
534 bzero (entries
, sizeof (struct charset_map_entries
));
544 from
= read_hex (fp
, &eof
);
547 if (getc (fp
) == '-')
548 to
= read_hex (fp
, &eof
);
551 c
= (int) read_hex (fp
, &eof
);
553 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
556 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
558 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
559 sizeof (struct charset_map_entries
));
560 entries
= entries
->next
;
561 bzero (entries
, sizeof (struct charset_map_entries
));
563 idx
= n_entries
% 0x10000;
564 entries
->entry
[idx
].from
= from
;
565 entries
->entry
[idx
].to
= to
;
566 entries
->entry
[idx
].c
= c
;
571 load_charset_map (charset
, head
, n_entries
, control_flag
);
576 load_charset_map_from_vector (charset
, vec
, control_flag
)
577 struct charset
*charset
;
581 unsigned min_code
= CHARSET_MIN_CODE (charset
);
582 unsigned max_code
= CHARSET_MAX_CODE (charset
);
583 struct charset_map_entries
*head
, *entries
;
585 int len
= ASIZE (vec
);
591 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
595 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
596 large (larger than MAX_ALLOCA). */
597 SAFE_ALLOCA (head
, struct charset_map_entries
*,
598 sizeof (struct charset_map_entries
));
600 bzero (entries
, sizeof (struct charset_map_entries
));
603 for (i
= 0; i
< len
; i
+= 2)
605 Lisp_Object val
, val2
;
617 from
= XFASTINT (val
);
618 to
= XFASTINT (val2
);
623 from
= to
= XFASTINT (val
);
625 val
= AREF (vec
, i
+ 1);
629 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
632 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
634 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
635 sizeof (struct charset_map_entries
));
636 entries
= entries
->next
;
637 bzero (entries
, sizeof (struct charset_map_entries
));
639 idx
= n_entries
% 0x10000;
640 entries
->entry
[idx
].from
= from
;
641 entries
->entry
[idx
].to
= to
;
642 entries
->entry
[idx
].c
= c
;
646 load_charset_map (charset
, head
, n_entries
, control_flag
);
651 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
652 map it is (see the comment of load_charset_map for the detail). */
655 load_charset (charset
, control_flag
)
656 struct charset
*charset
;
661 if (inhibit_load_charset_map
663 && charset
== temp_charset_work
->current
664 && ((control_flag
== 2) == temp_charset_work
->for_encoder
))
667 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
668 map
= CHARSET_MAP (charset
);
669 else if (CHARSET_UNIFIED_P (charset
))
670 map
= CHARSET_UNIFY_MAP (charset
);
672 load_charset_map_from_file (charset
, map
, control_flag
);
674 load_charset_map_from_vector (charset
, map
, control_flag
);
678 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
679 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
683 return (CHARSETP (object
) ? Qt
: Qnil
);
687 void map_charset_for_dump
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
688 Lisp_Object function
, Lisp_Object arg
,
689 unsigned from
, unsigned to
));
692 map_charset_for_dump (c_function
, function
, arg
, from
, to
)
693 void (*c_function
) (Lisp_Object
, Lisp_Object
);
694 Lisp_Object function
, arg
;
697 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
698 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
703 range
= Fcons (Qnil
, Qnil
);
706 c
= temp_charset_work
->min_char
;
707 stop
= (temp_charset_work
->max_char
< 0x20000
708 ? temp_charset_work
->max_char
: 0xFFFF);
712 int index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
714 if (index
>= from_idx
&& index
<= to_idx
)
716 if (NILP (XCAR (range
)))
717 XSETCAR (range
, make_number (c
));
719 else if (! NILP (XCAR (range
)))
721 XSETCDR (range
, make_number (c
- 1));
723 (*c_function
) (arg
, range
);
725 call2 (function
, range
, arg
);
726 XSETCAR (range
, Qnil
);
730 if (c
== temp_charset_work
->max_char
)
732 if (! NILP (XCAR (range
)))
734 XSETCDR (range
, make_number (c
));
736 (*c_function
) (arg
, range
);
738 call2 (function
, range
, arg
);
743 stop
= temp_charset_work
->max_char
;
751 map_charset_chars (c_function
, function
, arg
,
753 void (*c_function
) P_ ((Lisp_Object
, Lisp_Object
));
754 Lisp_Object function
, arg
;
755 struct charset
*charset
;
761 partial
= (from
> CHARSET_MIN_CODE (charset
)
762 || to
< CHARSET_MAX_CODE (charset
));
764 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
766 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
767 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
768 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
769 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
771 if (CHARSET_UNIFIED_P (charset
))
773 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
774 load_charset (charset
, 2);
775 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
776 map_char_table_for_charset (c_function
, function
,
777 CHARSET_DEUNIFIER (charset
), arg
,
778 partial
? charset
: NULL
, from
, to
);
780 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
783 range
= Fcons (make_number (from_c
), make_number (to_c
));
785 (*c_function
) (arg
, range
);
787 call2 (function
, range
, arg
);
789 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
791 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
792 load_charset (charset
, 2);
793 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
794 map_char_table_for_charset (c_function
, function
,
795 CHARSET_ENCODER (charset
), arg
,
796 partial
? charset
: NULL
, from
, to
);
798 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
800 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
802 Lisp_Object subset_info
;
805 subset_info
= CHARSET_SUBSET (charset
);
806 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
807 offset
= XINT (AREF (subset_info
, 3));
809 if (from
< XFASTINT (AREF (subset_info
, 1)))
810 from
= XFASTINT (AREF (subset_info
, 1));
812 if (to
> XFASTINT (AREF (subset_info
, 2)))
813 to
= XFASTINT (AREF (subset_info
, 2));
814 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
816 else /* i.e. CHARSET_METHOD_SUPERSET */
820 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
821 parents
= XCDR (parents
))
824 unsigned this_from
, this_to
;
826 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
827 offset
= XINT (XCDR (XCAR (parents
)));
828 this_from
= from
> offset
? from
- offset
: 0;
829 this_to
= to
> offset
? to
- offset
: 0;
830 if (this_from
< CHARSET_MIN_CODE (charset
))
831 this_from
= CHARSET_MIN_CODE (charset
);
832 if (this_to
> CHARSET_MAX_CODE (charset
))
833 this_to
= CHARSET_MAX_CODE (charset
);
834 map_charset_chars (c_function
, function
, arg
, charset
,
840 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
841 doc
: /* Call FUNCTION for all characters in CHARSET.
842 FUNCTION is called with an argument RANGE and the optional 3rd
845 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
846 characters contained in CHARSET.
848 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
849 range of code points (in CHARSET) of target characters. */)
850 (function
, charset
, arg
, from_code
, to_code
)
851 Lisp_Object function
, charset
, arg
, from_code
, to_code
;
856 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
857 if (NILP (from_code
))
858 from
= CHARSET_MIN_CODE (cs
);
861 CHECK_NATNUM (from_code
);
862 from
= XINT (from_code
);
863 if (from
< CHARSET_MIN_CODE (cs
))
864 from
= CHARSET_MIN_CODE (cs
);
867 to
= CHARSET_MAX_CODE (cs
);
870 CHECK_NATNUM (to_code
);
872 if (to
> CHARSET_MAX_CODE (cs
))
873 to
= CHARSET_MAX_CODE (cs
);
875 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
880 /* Define a charset according to the arguments. The Nth argument is
881 the Nth attribute of the charset (the last attribute `charset-id'
882 is not included). See the docstring of `define-charset' for the
885 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
886 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
887 doc
: /* For internal use only.
888 usage: (define-charset-internal ...) */)
893 /* Charset attr vector. */
897 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
899 struct charset charset
;
902 int new_definition_p
;
905 if (nargs
!= charset_arg_max
)
906 return Fsignal (Qwrong_number_of_arguments
,
907 Fcons (intern ("define-charset-internal"),
908 make_number (nargs
)));
910 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
912 CHECK_SYMBOL (args
[charset_arg_name
]);
913 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
915 val
= args
[charset_arg_code_space
];
916 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
918 int min_byte
, max_byte
;
920 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
921 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
922 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
923 error ("Invalid :code-space value");
924 charset
.code_space
[i
* 4] = min_byte
;
925 charset
.code_space
[i
* 4 + 1] = max_byte
;
926 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
927 nchars
*= charset
.code_space
[i
* 4 + 2];
928 charset
.code_space
[i
* 4 + 3] = nchars
;
933 val
= args
[charset_arg_dimension
];
935 charset
.dimension
= dimension
;
939 charset
.dimension
= XINT (val
);
940 if (charset
.dimension
< 1 || charset
.dimension
> 4)
941 args_out_of_range_3 (val
, make_number (1), make_number (4));
944 charset
.code_linear_p
945 = (charset
.dimension
== 1
946 || (charset
.code_space
[2] == 256
947 && (charset
.dimension
== 2
948 || (charset
.code_space
[6] == 256
949 && (charset
.dimension
== 3
950 || charset
.code_space
[10] == 256)))));
952 if (! charset
.code_linear_p
)
954 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
955 bzero (charset
.code_space_mask
, 256);
956 for (i
= 0; i
< 4; i
++)
957 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
959 charset
.code_space_mask
[j
] |= (1 << i
);
962 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
964 charset
.min_code
= (charset
.code_space
[0]
965 | (charset
.code_space
[4] << 8)
966 | (charset
.code_space
[8] << 16)
967 | (charset
.code_space
[12] << 24));
968 charset
.max_code
= (charset
.code_space
[1]
969 | (charset
.code_space
[5] << 8)
970 | (charset
.code_space
[9] << 16)
971 | (charset
.code_space
[13] << 24));
972 charset
.char_index_offset
= 0;
974 val
= args
[charset_arg_min_code
];
984 CHECK_NUMBER_CAR (val
);
985 CHECK_NUMBER_CDR (val
);
986 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
988 if (code
< charset
.min_code
989 || code
> charset
.max_code
)
990 args_out_of_range_3 (make_number (charset
.min_code
),
991 make_number (charset
.max_code
), val
);
992 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
993 charset
.min_code
= code
;
996 val
= args
[charset_arg_max_code
];
1006 CHECK_NUMBER_CAR (val
);
1007 CHECK_NUMBER_CDR (val
);
1008 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
1010 if (code
< charset
.min_code
1011 || code
> charset
.max_code
)
1012 args_out_of_range_3 (make_number (charset
.min_code
),
1013 make_number (charset
.max_code
), val
);
1014 charset
.max_code
= code
;
1017 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
1019 val
= args
[charset_arg_invalid_code
];
1022 if (charset
.min_code
> 0)
1023 charset
.invalid_code
= 0;
1026 XSETINT (val
, charset
.max_code
+ 1);
1027 if (XINT (val
) == charset
.max_code
+ 1)
1028 charset
.invalid_code
= charset
.max_code
+ 1;
1030 error ("Attribute :invalid-code must be specified");
1036 charset
.invalid_code
= XFASTINT (val
);
1039 val
= args
[charset_arg_iso_final
];
1041 charset
.iso_final
= -1;
1045 if (XINT (val
) < '0' || XINT (val
) > 127)
1046 error ("Invalid iso-final-char: %d", XINT (val
));
1047 charset
.iso_final
= XINT (val
);
1050 val
= args
[charset_arg_iso_revision
];
1052 charset
.iso_revision
= -1;
1056 if (XINT (val
) > 63)
1057 args_out_of_range (make_number (63), val
);
1058 charset
.iso_revision
= XINT (val
);
1061 val
= args
[charset_arg_emacs_mule_id
];
1063 charset
.emacs_mule_id
= -1;
1067 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
1068 error ("Invalid emacs-mule-id: %d", XINT (val
));
1069 charset
.emacs_mule_id
= XINT (val
);
1072 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1074 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1076 charset
.unified_p
= 0;
1078 bzero (charset
.fast_map
, sizeof (charset
.fast_map
));
1080 if (! NILP (args
[charset_arg_code_offset
]))
1082 val
= args
[charset_arg_code_offset
];
1085 charset
.method
= CHARSET_METHOD_OFFSET
;
1086 charset
.code_offset
= XINT (val
);
1088 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1089 charset
.min_char
= i
+ charset
.code_offset
;
1090 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1091 charset
.max_char
= i
+ charset
.code_offset
;
1092 if (charset
.max_char
> MAX_CHAR
)
1093 error ("Unsupported max char: %d", charset
.max_char
);
1095 i
= (charset
.min_char
>> 7) << 7;
1096 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1097 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1098 i
= (i
>> 12) << 12;
1099 for (; i
<= charset
.max_char
; i
+= 0x1000)
1100 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1101 if (charset
.code_offset
== 0 && charset
.max_char
>= 0x80)
1102 charset
.ascii_compatible_p
= 1;
1104 else if (! NILP (args
[charset_arg_map
]))
1106 val
= args
[charset_arg_map
];
1107 ASET (attrs
, charset_map
, val
);
1108 charset
.method
= CHARSET_METHOD_MAP
;
1110 else if (! NILP (args
[charset_arg_subset
]))
1113 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1114 struct charset
*parent_charset
;
1116 val
= args
[charset_arg_subset
];
1117 parent
= Fcar (val
);
1118 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1119 parent_min_code
= Fnth (make_number (1), val
);
1120 CHECK_NATNUM (parent_min_code
);
1121 parent_max_code
= Fnth (make_number (2), val
);
1122 CHECK_NATNUM (parent_max_code
);
1123 parent_code_offset
= Fnth (make_number (3), val
);
1124 CHECK_NUMBER (parent_code_offset
);
1125 val
= Fmake_vector (make_number (4), Qnil
);
1126 ASET (val
, 0, make_number (parent_charset
->id
));
1127 ASET (val
, 1, parent_min_code
);
1128 ASET (val
, 2, parent_max_code
);
1129 ASET (val
, 3, parent_code_offset
);
1130 ASET (attrs
, charset_subset
, val
);
1132 charset
.method
= CHARSET_METHOD_SUBSET
;
1133 /* Here, we just copy the parent's fast_map. It's not accurate,
1134 but at least it works for quickly detecting which character
1135 DOESN'T belong to this charset. */
1136 for (i
= 0; i
< 190; i
++)
1137 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1139 /* We also copy these for parents. */
1140 charset
.min_char
= parent_charset
->min_char
;
1141 charset
.max_char
= parent_charset
->max_char
;
1143 else if (! NILP (args
[charset_arg_superset
]))
1145 val
= args
[charset_arg_superset
];
1146 charset
.method
= CHARSET_METHOD_SUPERSET
;
1147 val
= Fcopy_sequence (val
);
1148 ASET (attrs
, charset_superset
, val
);
1150 charset
.min_char
= MAX_CHAR
;
1151 charset
.max_char
= 0;
1152 for (; ! NILP (val
); val
= Fcdr (val
))
1154 Lisp_Object elt
, car_part
, cdr_part
;
1155 int this_id
, offset
;
1156 struct charset
*this_charset
;
1161 car_part
= XCAR (elt
);
1162 cdr_part
= XCDR (elt
);
1163 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1164 CHECK_NUMBER (cdr_part
);
1165 offset
= XINT (cdr_part
);
1169 CHECK_CHARSET_GET_ID (elt
, this_id
);
1172 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1174 this_charset
= CHARSET_FROM_ID (this_id
);
1175 if (charset
.min_char
> this_charset
->min_char
)
1176 charset
.min_char
= this_charset
->min_char
;
1177 if (charset
.max_char
< this_charset
->max_char
)
1178 charset
.max_char
= this_charset
->max_char
;
1179 for (i
= 0; i
< 190; i
++)
1180 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1184 error ("None of :code-offset, :map, :parents are specified");
1186 val
= args
[charset_arg_unify_map
];
1187 if (! NILP (val
) && !STRINGP (val
))
1189 ASET (attrs
, charset_unify_map
, val
);
1191 CHECK_LIST (args
[charset_arg_plist
]);
1192 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1194 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1196 if (charset
.hash_index
>= 0)
1198 new_definition_p
= 0;
1199 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1200 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1204 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1206 if (charset_table_used
== charset_table_size
)
1208 struct charset
*new_table
1209 = (struct charset
*) xmalloc (sizeof (struct charset
)
1210 * (charset_table_size
+ 16));
1211 bcopy (charset_table
, new_table
,
1212 sizeof (struct charset
) * charset_table_size
);
1213 charset_table_size
+= 16;
1214 charset_table
= new_table
;
1216 id
= charset_table_used
++;
1217 new_definition_p
= 1;
1220 ASET (attrs
, charset_id
, make_number (id
));
1222 charset_table
[id
] = charset
;
1224 if (charset
.method
== CHARSET_METHOD_MAP
)
1226 load_charset (&charset
, 0);
1227 charset_table
[id
] = charset
;
1230 if (charset
.iso_final
>= 0)
1232 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1233 charset
.iso_final
) = id
;
1234 if (new_definition_p
)
1235 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1236 Fcons (make_number (id
), Qnil
));
1237 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1238 charset_jisx0201_roman
= id
;
1239 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1240 charset_jisx0208_1978
= id
;
1241 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1242 charset_jisx0208
= id
;
1243 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id
)
1244 charset_ksc5601
= id
;
1247 if (charset
.emacs_mule_id
>= 0)
1249 emacs_mule_charset
[charset
.emacs_mule_id
] = CHARSET_FROM_ID (id
);
1250 if (charset
.emacs_mule_id
< 0xA0)
1251 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1253 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1254 if (new_definition_p
)
1255 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1256 Fcons (make_number (id
), Qnil
));
1259 if (new_definition_p
)
1261 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1262 if (charset
.supplementary_p
)
1263 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1264 Fcons (make_number (id
), Qnil
));
1269 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1271 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1273 if (cs
->supplementary_p
)
1276 if (EQ (tail
, Vcharset_ordered_list
))
1277 Vcharset_ordered_list
= Fcons (make_number (id
),
1278 Vcharset_ordered_list
);
1279 else if (NILP (tail
))
1280 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1281 Fcons (make_number (id
), Qnil
));
1284 val
= Fcons (XCAR (tail
), XCDR (tail
));
1285 XSETCDR (tail
, val
);
1286 XSETCAR (tail
, make_number (id
));
1289 charset_ordered_list_tick
++;
1296 /* Same as Fdefine_charset_internal but arguments are more convenient
1297 to call from C (typically in syms_of_charset). This can define a
1298 charset of `offset' method only. Return the ID of the new
1302 define_charset_internal (name
, dimension
, code_space
, min_code
, max_code
,
1303 iso_final
, iso_revision
, emacs_mule_id
,
1304 ascii_compatible
, supplementary
,
1308 unsigned char *code_space
;
1309 unsigned min_code
, max_code
;
1310 int iso_final
, iso_revision
, emacs_mule_id
;
1311 int ascii_compatible
, supplementary
;
1314 Lisp_Object args
[charset_arg_max
];
1315 Lisp_Object plist
[14];
1319 args
[charset_arg_name
] = name
;
1320 args
[charset_arg_dimension
] = make_number (dimension
);
1321 val
= Fmake_vector (make_number (8), make_number (0));
1322 for (i
= 0; i
< 8; i
++)
1323 ASET (val
, i
, make_number (code_space
[i
]));
1324 args
[charset_arg_code_space
] = val
;
1325 args
[charset_arg_min_code
] = make_number (min_code
);
1326 args
[charset_arg_max_code
] = make_number (max_code
);
1327 args
[charset_arg_iso_final
]
1328 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1329 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1330 args
[charset_arg_emacs_mule_id
]
1331 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1332 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1333 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1334 args
[charset_arg_invalid_code
] = Qnil
;
1335 args
[charset_arg_code_offset
] = make_number (code_offset
);
1336 args
[charset_arg_map
] = Qnil
;
1337 args
[charset_arg_subset
] = Qnil
;
1338 args
[charset_arg_superset
] = Qnil
;
1339 args
[charset_arg_unify_map
] = Qnil
;
1341 plist
[0] = intern_c_string (":name");
1342 plist
[1] = args
[charset_arg_name
];
1343 plist
[2] = intern_c_string (":dimension");
1344 plist
[3] = args
[charset_arg_dimension
];
1345 plist
[4] = intern_c_string (":code-space");
1346 plist
[5] = args
[charset_arg_code_space
];
1347 plist
[6] = intern_c_string (":iso-final-char");
1348 plist
[7] = args
[charset_arg_iso_final
];
1349 plist
[8] = intern_c_string (":emacs-mule-id");
1350 plist
[9] = args
[charset_arg_emacs_mule_id
];
1351 plist
[10] = intern_c_string (":ascii-compatible-p");
1352 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1353 plist
[12] = intern_c_string (":code-offset");
1354 plist
[13] = args
[charset_arg_code_offset
];
1356 args
[charset_arg_plist
] = Flist (14, plist
);
1357 Fdefine_charset_internal (charset_arg_max
, args
);
1359 return XINT (CHARSET_SYMBOL_ID (name
));
1363 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1364 Sdefine_charset_alias
, 2, 2, 0,
1365 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1367 Lisp_Object alias
, charset
;
1371 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1372 Fputhash (alias
, attr
, Vcharset_hash_table
);
1373 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1378 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1379 doc
: /* Return the property list of CHARSET. */)
1381 Lisp_Object charset
;
1385 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1386 return CHARSET_ATTR_PLIST (attrs
);
1390 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1391 doc
: /* Set CHARSET's property list to PLIST. */)
1393 Lisp_Object charset
, plist
;
1397 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1398 CHARSET_ATTR_PLIST (attrs
) = plist
;
1403 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1404 doc
: /* Unify characters of CHARSET with Unicode.
1405 This means reading the relevant file and installing the table defined
1406 by CHARSET's `:unify-map' property.
1408 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1409 the same meaning as the `:unify-map' attribute in the function
1410 `define-charset' (which see).
1412 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1413 (charset
, unify_map
, deunify
)
1414 Lisp_Object charset
, unify_map
, deunify
;
1419 CHECK_CHARSET_GET_ID (charset
, id
);
1420 cs
= CHARSET_FROM_ID (id
);
1422 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1423 : ! CHARSET_UNIFIED_P (cs
))
1426 CHARSET_UNIFIED_P (cs
) = 0;
1429 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1430 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1431 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1432 if (NILP (unify_map
))
1433 unify_map
= CHARSET_UNIFY_MAP (cs
);
1436 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1437 signal_error ("Bad unify-map", unify_map
);
1438 CHARSET_UNIFY_MAP (cs
) = unify_map
;
1440 if (NILP (Vchar_unify_table
))
1441 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1442 char_table_set_range (Vchar_unify_table
,
1443 cs
->min_char
, cs
->max_char
, charset
);
1444 CHARSET_UNIFIED_P (cs
) = 1;
1446 else if (CHAR_TABLE_P (Vchar_unify_table
))
1448 int min_code
= CHARSET_MIN_CODE (cs
);
1449 int max_code
= CHARSET_MAX_CODE (cs
);
1450 int min_char
= DECODE_CHAR (cs
, min_code
);
1451 int max_char
= DECODE_CHAR (cs
, max_code
);
1453 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1459 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1460 Sget_unused_iso_final_char
, 2, 2, 0,
1462 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1463 DIMENSION is the number of bytes to represent a character: 1 or 2.
1464 CHARS is the number of characters in a dimension: 94 or 96.
1466 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1467 If there's no unused final char for the specified kind of charset,
1470 Lisp_Object dimension
, chars
;
1474 CHECK_NUMBER (dimension
);
1475 CHECK_NUMBER (chars
);
1476 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1477 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1478 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1479 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1480 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1481 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1483 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1487 check_iso_charset_parameter (dimension
, chars
, final_char
)
1488 Lisp_Object dimension
, chars
, final_char
;
1490 CHECK_NATNUM (dimension
);
1491 CHECK_NATNUM (chars
);
1492 CHECK_NATNUM (final_char
);
1494 if (XINT (dimension
) > 3)
1495 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1496 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1497 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1498 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1499 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1503 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1505 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1507 On decoding by an ISO-2022 base coding system, when a charset
1508 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1509 if CHARSET is designated instead. */)
1510 (dimension
, chars
, final_char
, charset
)
1511 Lisp_Object dimension
, chars
, final_char
, charset
;
1516 CHECK_CHARSET_GET_ID (charset
, id
);
1517 check_iso_charset_parameter (dimension
, chars
, final_char
);
1518 chars_flag
= XINT (chars
) == 96;
1519 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1524 /* Return information about charsets in the text at PTR of NBYTES
1525 bytes, which are NCHARS characters. The value is:
1527 0: Each character is represented by one byte. This is always
1528 true for a unibyte string. For a multibyte string, true if
1529 it contains only ASCII characters.
1531 1: No charsets other than ascii, control-1, and latin-1 are
1538 string_xstring_p (string
)
1541 const unsigned char *p
= SDATA (string
);
1542 const unsigned char *endp
= p
+ SBYTES (string
);
1544 if (SCHARS (string
) == SBYTES (string
))
1549 int c
= STRING_CHAR_ADVANCE (p
);
1558 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1560 CHARSETS is a vector. If Nth element is non-nil, it means the
1561 charset whose id is N is already found.
1563 It may lookup a translation table TABLE if supplied. */
1566 find_charsets_in_text (ptr
, nchars
, nbytes
, charsets
, table
, multibyte
)
1567 const unsigned char *ptr
;
1568 EMACS_INT nchars
, nbytes
;
1569 Lisp_Object charsets
, table
;
1572 const unsigned char *pend
= ptr
+ nbytes
;
1574 if (nchars
== nbytes
)
1577 ASET (charsets
, charset_ascii
, Qt
);
1584 c
= translate_char (table
, c
);
1585 if (ASCII_BYTE_P (c
))
1586 ASET (charsets
, charset_ascii
, Qt
);
1588 ASET (charsets
, charset_eight_bit
, Qt
);
1595 int c
= STRING_CHAR_ADVANCE (ptr
);
1596 struct charset
*charset
;
1599 c
= translate_char (table
, c
);
1600 charset
= CHAR_CHARSET (c
);
1601 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1606 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1608 doc
: /* Return a list of charsets in the region between BEG and END.
1609 BEG and END are buffer positions.
1610 Optional arg TABLE if non-nil is a translation table to look up.
1612 If the current buffer is unibyte, the returned list may contain
1613 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1615 Lisp_Object beg
, end
, table
;
1617 Lisp_Object charsets
;
1618 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1621 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1623 validate_region (&beg
, &end
);
1624 from
= XFASTINT (beg
);
1625 stop
= to
= XFASTINT (end
);
1627 if (from
< GPT
&& GPT
< to
)
1630 stop_byte
= GPT_BYTE
;
1633 stop_byte
= CHAR_TO_BYTE (stop
);
1635 from_byte
= CHAR_TO_BYTE (from
);
1637 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1640 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1641 stop_byte
- from_byte
, charsets
, table
,
1645 from
= stop
, from_byte
= stop_byte
;
1646 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1653 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1654 if (!NILP (AREF (charsets
, i
)))
1655 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1659 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1661 doc
: /* Return a list of charsets in STR.
1662 Optional arg TABLE if non-nil is a translation table to look up.
1664 If STR is unibyte, the returned list may contain
1665 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1667 Lisp_Object str
, table
;
1669 Lisp_Object charsets
;
1675 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1676 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1678 STRING_MULTIBYTE (str
));
1680 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1681 if (!NILP (AREF (charsets
, i
)))
1682 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1688 /* Return a unified character code for C (>= 0x110000). VAL is a
1689 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1692 maybe_unify_char (c
, val
)
1696 struct charset
*charset
;
1703 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1704 load_charset (charset
, 1);
1705 if (! inhibit_load_charset_map
)
1707 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1713 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1714 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1723 /* Return a character correponding to the code-point CODE of
1727 decode_char (charset
, code
)
1728 struct charset
*charset
;
1732 enum charset_method method
= CHARSET_METHOD (charset
);
1734 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1737 if (method
== CHARSET_METHOD_SUBSET
)
1739 Lisp_Object subset_info
;
1741 subset_info
= CHARSET_SUBSET (charset
);
1742 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1743 code
-= XINT (AREF (subset_info
, 3));
1744 if (code
< XFASTINT (AREF (subset_info
, 1))
1745 || code
> XFASTINT (AREF (subset_info
, 2)))
1748 c
= DECODE_CHAR (charset
, code
);
1750 else if (method
== CHARSET_METHOD_SUPERSET
)
1752 Lisp_Object parents
;
1754 parents
= CHARSET_SUPERSET (charset
);
1756 for (; CONSP (parents
); parents
= XCDR (parents
))
1758 int id
= XINT (XCAR (XCAR (parents
)));
1759 int code_offset
= XINT (XCDR (XCAR (parents
)));
1760 unsigned this_code
= code
- code_offset
;
1762 charset
= CHARSET_FROM_ID (id
);
1763 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1769 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1773 if (method
== CHARSET_METHOD_MAP
)
1775 Lisp_Object decoder
;
1777 decoder
= CHARSET_DECODER (charset
);
1778 if (! VECTORP (decoder
))
1780 load_charset (charset
, 1);
1781 decoder
= CHARSET_DECODER (charset
);
1783 if (VECTORP (decoder
))
1784 c
= XINT (AREF (decoder
, char_index
));
1786 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1788 else /* method == CHARSET_METHOD_OFFSET */
1790 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1791 if (CHARSET_UNIFIED_P (charset
)
1792 && c
> MAX_UNICODE_CHAR
)
1793 MAYBE_UNIFY_CHAR (c
);
1800 /* Variable used temporarily by the macro ENCODE_CHAR. */
1801 Lisp_Object charset_work
;
1803 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1804 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1805 use CHARSET's strict_max_char instead of max_char. */
1808 encode_char (charset
, c
)
1809 struct charset
*charset
;
1813 enum charset_method method
= CHARSET_METHOD (charset
);
1815 if (CHARSET_UNIFIED_P (charset
))
1817 Lisp_Object deunifier
;
1818 int code_index
= -1;
1820 deunifier
= CHARSET_DEUNIFIER (charset
);
1821 if (! CHAR_TABLE_P (deunifier
))
1823 load_charset (charset
, 2);
1824 deunifier
= CHARSET_DEUNIFIER (charset
);
1826 if (CHAR_TABLE_P (deunifier
))
1828 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1830 if (INTEGERP (deunified
))
1831 code_index
= XINT (deunified
);
1835 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1837 if (code_index
>= 0)
1838 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1841 if (method
== CHARSET_METHOD_SUBSET
)
1843 Lisp_Object subset_info
;
1844 struct charset
*this_charset
;
1846 subset_info
= CHARSET_SUBSET (charset
);
1847 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1848 code
= ENCODE_CHAR (this_charset
, c
);
1849 if (code
== CHARSET_INVALID_CODE (this_charset
)
1850 || code
< XFASTINT (AREF (subset_info
, 1))
1851 || code
> XFASTINT (AREF (subset_info
, 2)))
1852 return CHARSET_INVALID_CODE (charset
);
1853 code
+= XINT (AREF (subset_info
, 3));
1857 if (method
== CHARSET_METHOD_SUPERSET
)
1859 Lisp_Object parents
;
1861 parents
= CHARSET_SUPERSET (charset
);
1862 for (; CONSP (parents
); parents
= XCDR (parents
))
1864 int id
= XINT (XCAR (XCAR (parents
)));
1865 int code_offset
= XINT (XCDR (XCAR (parents
)));
1866 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1868 code
= ENCODE_CHAR (this_charset
, c
);
1869 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1870 return code
+ code_offset
;
1872 return CHARSET_INVALID_CODE (charset
);
1875 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1876 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1877 return CHARSET_INVALID_CODE (charset
);
1879 if (method
== CHARSET_METHOD_MAP
)
1881 Lisp_Object encoder
;
1884 encoder
= CHARSET_ENCODER (charset
);
1885 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1887 load_charset (charset
, 2);
1888 encoder
= CHARSET_ENCODER (charset
);
1890 if (CHAR_TABLE_P (encoder
))
1892 val
= CHAR_TABLE_REF (encoder
, c
);
1894 return CHARSET_INVALID_CODE (charset
);
1896 if (! CHARSET_COMPACT_CODES_P (charset
))
1897 code
= INDEX_TO_CODE_POINT (charset
, code
);
1901 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1902 code
= INDEX_TO_CODE_POINT (charset
, code
);
1905 else /* method == CHARSET_METHOD_OFFSET */
1907 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1909 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1916 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1917 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1918 Return nil if CODE-POINT is not valid in CHARSET.
1920 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1922 Optional argument RESTRICTION specifies a way to map the pair of CCS
1923 and CODE-POINT to a character. Currently not supported and just ignored. */)
1924 (charset
, code_point
, restriction
)
1925 Lisp_Object charset
, code_point
, restriction
;
1929 struct charset
*charsetp
;
1931 CHECK_CHARSET_GET_ID (charset
, id
);
1932 if (CONSP (code_point
))
1934 CHECK_NATNUM_CAR (code_point
);
1935 CHECK_NATNUM_CDR (code_point
);
1936 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1940 CHECK_NATNUM (code_point
);
1941 code
= XINT (code_point
);
1943 charsetp
= CHARSET_FROM_ID (id
);
1944 c
= DECODE_CHAR (charsetp
, code
);
1945 return (c
>= 0 ? make_number (c
) : Qnil
);
1949 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1950 doc
: /* Encode the character CH into a code-point of CHARSET.
1951 Return nil if CHARSET doesn't include CH.
1953 Optional argument RESTRICTION specifies a way to map CH to a
1954 code-point in CCS. Currently not supported and just ignored. */)
1955 (ch
, charset
, restriction
)
1956 Lisp_Object ch
, charset
, restriction
;
1960 struct charset
*charsetp
;
1962 CHECK_CHARSET_GET_ID (charset
, id
);
1964 charsetp
= CHARSET_FROM_ID (id
);
1965 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1966 if (code
== CHARSET_INVALID_CODE (charsetp
))
1968 if (code
> 0x7FFFFFF)
1969 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1970 return make_number (code
);
1974 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1976 /* Return a character of CHARSET whose position codes are CODEn.
1978 CODE1 through CODE4 are optional, but if you don't supply sufficient
1979 position codes, it is assumed that the minimum code in each dimension
1981 (charset
, code1
, code2
, code3
, code4
)
1982 Lisp_Object charset
, code1
, code2
, code3
, code4
;
1985 struct charset
*charsetp
;
1989 CHECK_CHARSET_GET_ID (charset
, id
);
1990 charsetp
= CHARSET_FROM_ID (id
);
1992 dimension
= CHARSET_DIMENSION (charsetp
);
1994 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1995 ? 0 : CHARSET_MIN_CODE (charsetp
));
1998 CHECK_NATNUM (code1
);
1999 if (XFASTINT (code1
) >= 0x100)
2000 args_out_of_range (make_number (0xFF), code1
);
2001 code
= XFASTINT (code1
);
2007 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
2010 CHECK_NATNUM (code2
);
2011 if (XFASTINT (code2
) >= 0x100)
2012 args_out_of_range (make_number (0xFF), code2
);
2013 code
|= XFASTINT (code2
);
2020 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
2023 CHECK_NATNUM (code3
);
2024 if (XFASTINT (code3
) >= 0x100)
2025 args_out_of_range (make_number (0xFF), code3
);
2026 code
|= XFASTINT (code3
);
2033 code
|= charsetp
->code_space
[0];
2036 CHECK_NATNUM (code4
);
2037 if (XFASTINT (code4
) >= 0x100)
2038 args_out_of_range (make_number (0xFF), code4
);
2039 code
|= XFASTINT (code4
);
2046 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
2048 c
= DECODE_CHAR (charsetp
, code
);
2050 error ("Invalid code(s)");
2051 return make_number (c
);
2055 /* Return the first charset in CHARSET_LIST that contains C.
2056 CHARSET_LIST is a list of charset IDs. If it is nil, use
2057 Vcharset_ordered_list. */
2060 char_charset (c
, charset_list
, code_return
)
2062 Lisp_Object charset_list
;
2063 unsigned *code_return
;
2067 if (NILP (charset_list
))
2068 charset_list
= Vcharset_ordered_list
;
2072 while (CONSP (charset_list
))
2074 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
2075 unsigned code
= ENCODE_CHAR (charset
, c
);
2077 if (code
!= CHARSET_INVALID_CODE (charset
))
2080 *code_return
= code
;
2083 charset_list
= XCDR (charset_list
);
2085 && c
<= MAX_UNICODE_CHAR
2086 && EQ (charset_list
, Vcharset_non_preferred_head
))
2087 return CHARSET_FROM_ID (charset_unicode
);
2089 return (maybe_null
? NULL
2090 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2091 : CHARSET_FROM_ID (charset_eight_bit
));
2095 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2097 /*Return list of charset and one to four position-codes of CH.
2098 The charset is decided by the current priority order of charsets.
2099 A position-code is a byte value of each dimension of the code-point of
2100 CH in the charset. */)
2104 struct charset
*charset
;
2109 CHECK_CHARACTER (ch
);
2111 charset
= CHAR_CHARSET (c
);
2114 code
= ENCODE_CHAR (charset
, c
);
2115 if (code
== CHARSET_INVALID_CODE (charset
))
2117 dimension
= CHARSET_DIMENSION (charset
);
2118 for (val
= Qnil
; dimension
> 0; dimension
--)
2120 val
= Fcons (make_number (code
& 0xFF), val
);
2123 return Fcons (CHARSET_NAME (charset
), val
);
2127 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2128 doc
: /* Return the charset of highest priority that contains CH.
2129 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2130 from which to find the charset. It may also be a coding system. In
2131 that case, find the charset from what supported by that coding system. */)
2133 Lisp_Object ch
, restriction
;
2135 struct charset
*charset
;
2137 CHECK_CHARACTER (ch
);
2138 if (NILP (restriction
))
2139 charset
= CHAR_CHARSET (XINT (ch
));
2142 Lisp_Object charset_list
;
2144 if (CONSP (restriction
))
2146 for (charset_list
= Qnil
; CONSP (restriction
);
2147 restriction
= XCDR (restriction
))
2151 CHECK_CHARSET_GET_ID (XCAR (restriction
), id
);
2152 charset_list
= Fcons (make_number (id
), charset_list
);
2154 charset_list
= Fnreverse (charset_list
);
2157 charset_list
= coding_system_charset_list (restriction
);
2158 charset
= char_charset (XINT (ch
), charset_list
, NULL
);
2162 return (CHARSET_NAME (charset
));
2166 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2168 Return charset of a character in the current buffer at position POS.
2169 If POS is nil, it defauls to the current point.
2170 If POS is out of range, the value is nil. */)
2175 struct charset
*charset
;
2177 ch
= Fchar_after (pos
);
2178 if (! INTEGERP (ch
))
2180 charset
= CHAR_CHARSET (XINT (ch
));
2181 return (CHARSET_NAME (charset
));
2185 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2187 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2189 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2190 by their DIMENSION, CHARS, and FINAL-CHAR,
2191 whereas Emacs distinguishes them by charset symbol.
2192 See the documentation of the function `charset-info' for the meanings of
2193 DIMENSION, CHARS, and FINAL-CHAR. */)
2194 (dimension
, chars
, final_char
)
2195 Lisp_Object dimension
, chars
, final_char
;
2200 check_iso_charset_parameter (dimension
, chars
, final_char
);
2201 chars_flag
= XFASTINT (chars
) == 96;
2202 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
2203 XFASTINT (final_char
));
2204 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2208 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2212 Clear temporary charset mapping tables.
2213 It should be called only from temacs invoked for dumping. */)
2216 if (temp_charset_work
)
2218 free (temp_charset_work
);
2219 temp_charset_work
= NULL
;
2222 if (CHAR_TABLE_P (Vchar_unify_table
))
2223 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2228 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2229 Scharset_priority_list
, 0, 1, 0,
2230 doc
: /* Return the list of charsets ordered by priority.
2231 HIGHESTP non-nil means just return the highest priority one. */)
2233 Lisp_Object highestp
;
2235 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2237 if (!NILP (highestp
))
2238 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2240 while (!NILP (list
))
2242 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2245 return Fnreverse (val
);
2248 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2250 doc
: /* Assign higher priority to the charsets given as arguments.
2251 usage: (set-charset-priority &rest charsets) */)
2256 Lisp_Object new_head
, old_list
, arglist
[2];
2257 Lisp_Object list_2022
, list_emacs_mule
;
2260 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2262 for (i
= 0; i
< nargs
; i
++)
2264 CHECK_CHARSET_GET_ID (args
[i
], id
);
2265 if (! NILP (Fmemq (make_number (id
), old_list
)))
2267 old_list
= Fdelq (make_number (id
), old_list
);
2268 new_head
= Fcons (make_number (id
), new_head
);
2271 arglist
[0] = Fnreverse (new_head
);
2272 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2273 Vcharset_ordered_list
= Fnconc (2, arglist
);
2274 charset_ordered_list_tick
++;
2276 charset_unibyte
= -1;
2277 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2278 CONSP (old_list
); old_list
= XCDR (old_list
))
2280 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2281 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2282 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2283 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2284 if (charset_unibyte
< 0)
2286 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (old_list
)));
2288 if (CHARSET_DIMENSION (charset
) == 1
2289 && CHARSET_ASCII_COMPATIBLE_P (charset
)
2290 && CHARSET_MAX_CHAR (charset
) >= 0x80)
2291 charset_unibyte
= CHARSET_ID (charset
);
2294 Viso_2022_charset_list
= Fnreverse (list_2022
);
2295 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2296 if (charset_unibyte
< 0)
2297 charset_unibyte
= charset_iso_8859_1
;
2302 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2304 doc
: /* Internal use only.
2305 Return charset identification number of CHARSET. */)
2307 Lisp_Object charset
;
2311 CHECK_CHARSET_GET_ID (charset
, id
);
2312 return make_number (id
);
2319 Lisp_Object tempdir
;
2320 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2321 if (access ((char *) SDATA (tempdir
), 0) < 0)
2323 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2324 Emacs will not function correctly without the character map files.\n\
2325 Please check your installation!\n",
2327 /* TODO should this be a fatal error? (Bug#909) */
2330 Vcharset_map_path
= Fcons (tempdir
, Qnil
);
2335 init_charset_once ()
2339 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2340 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2341 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2342 iso_charset_table
[i
][j
][k
] = -1;
2344 for (i
= 0; i
< 256; i
++)
2345 emacs_mule_charset
[i
] = NULL
;
2347 charset_jisx0201_roman
= -1;
2348 charset_jisx0208_1978
= -1;
2349 charset_jisx0208
= -1;
2350 charset_ksc5601
= -1;
2358 DEFSYM (Qcharsetp
, "charsetp");
2360 DEFSYM (Qascii
, "ascii");
2361 DEFSYM (Qunicode
, "unicode");
2362 DEFSYM (Qemacs
, "emacs");
2363 DEFSYM (Qeight_bit
, "eight-bit");
2364 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2369 staticpro (&Vcharset_ordered_list
);
2370 Vcharset_ordered_list
= Qnil
;
2372 staticpro (&Viso_2022_charset_list
);
2373 Viso_2022_charset_list
= Qnil
;
2375 staticpro (&Vemacs_mule_charset_list
);
2376 Vemacs_mule_charset_list
= Qnil
;
2378 /* Don't staticpro them here. It's done in syms_of_fns. */
2379 QCtest
= intern (":test");
2380 Qeq
= intern ("eq");
2382 staticpro (&Vcharset_hash_table
);
2384 Lisp_Object args
[2];
2387 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2390 charset_table_size
= 128;
2391 charset_table
= ((struct charset
*)
2392 xmalloc (sizeof (struct charset
) * charset_table_size
));
2393 charset_table_used
= 0;
2395 defsubr (&Scharsetp
);
2396 defsubr (&Smap_charset_chars
);
2397 defsubr (&Sdefine_charset_internal
);
2398 defsubr (&Sdefine_charset_alias
);
2399 defsubr (&Scharset_plist
);
2400 defsubr (&Sset_charset_plist
);
2401 defsubr (&Sunify_charset
);
2402 defsubr (&Sget_unused_iso_final_char
);
2403 defsubr (&Sdeclare_equiv_charset
);
2404 defsubr (&Sfind_charset_region
);
2405 defsubr (&Sfind_charset_string
);
2406 defsubr (&Sdecode_char
);
2407 defsubr (&Sencode_char
);
2408 defsubr (&Ssplit_char
);
2409 defsubr (&Smake_char
);
2410 defsubr (&Schar_charset
);
2411 defsubr (&Scharset_after
);
2412 defsubr (&Siso_charset
);
2413 defsubr (&Sclear_charset_maps
);
2414 defsubr (&Scharset_priority_list
);
2415 defsubr (&Sset_charset_priority
);
2416 defsubr (&Scharset_id_internal
);
2418 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path
,
2419 doc
: /* *List of directories to search for charset map files. */);
2420 Vcharset_map_path
= Qnil
;
2422 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map
,
2423 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2424 inhibit_load_charset_map
= 0;
2426 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2427 doc
: /* List of all charsets ever defined. */);
2428 Vcharset_list
= Qnil
;
2430 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language
,
2431 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2432 If the current language environment is for multiple languages (e.g. "Latin-1"),
2433 the value may be a list of mnemonics. */);
2434 Vcurrent_iso639_language
= Qnil
;
2437 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2438 0, 127, 'B', -1, 0, 1, 0, 0);
2440 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2441 0, 255, -1, -1, -1, 1, 0, 0);
2443 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2444 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2446 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F",
2447 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2449 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2450 128, 255, -1, 0, -1, 0, 1,
2451 MAX_5_BYTE_CHAR
+ 1);
2452 charset_unibyte
= charset_iso_8859_1
;
2457 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2458 (do not change this comment) */