1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
33 #include <sys/types.h>
35 #include "character.h"
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
56 /* List of all charsets. This variable is used only from Emacs
58 Lisp_Object Vcharset_list
;
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table
;
64 /* Table of struct charset. */
65 struct charset
*charset_table
;
67 static int charset_table_size
;
68 static int charset_table_used
;
70 Lisp_Object Qcharsetp
;
72 /* Special charset symbols. */
74 Lisp_Object Qeight_bit
;
75 Lisp_Object Qiso_8859_1
;
79 /* The corresponding charsets. */
81 int charset_eight_bit
;
82 int charset_iso_8859_1
;
86 /* The other special charsets. */
87 int charset_jisx0201_roman
;
88 int charset_jisx0208_1978
;
91 /* Value of charset attribute `charset-iso-plane'. */
94 /* Charset of unibyte characters. */
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list
;
100 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
102 Lisp_Object Vcharset_non_preferred_head
;
104 /* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
107 unsigned short charset_ordered_list_tick
;
109 /* List of iso-2022 charsets. */
110 Lisp_Object Viso_2022_charset_list
;
112 /* List of emacs-mule charsets. */
113 Lisp_Object Vemacs_mule_charset_list
;
115 struct charset
*emacs_mule_charset
[256];
117 /* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
121 Lisp_Object Vcharset_map_path
;
123 /* If nonzero, don't load charset maps. */
124 int inhibit_load_charset_map
;
126 Lisp_Object Vcurrent_iso639_language
;
128 /* Defined in chartab.c */
130 map_char_table_for_charset
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
131 Lisp_Object function
, Lisp_Object table
,
132 Lisp_Object arg
, struct charset
*charset
,
133 unsigned from
, unsigned to
));
135 #define CODE_POINT_TO_INDEX(charset, code) \
136 ((charset)->code_linear_p \
137 ? (code) - (charset)->min_code \
138 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
139 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
140 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
141 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
142 ? (((((code) >> 24) - (charset)->code_space[12]) \
143 * (charset)->code_space[11]) \
144 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
145 * (charset)->code_space[7]) \
146 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
147 * (charset)->code_space[3]) \
148 + (((code) & 0xFF) - (charset)->code_space[0]) \
149 - ((charset)->char_index_offset)) \
153 /* Convert the character index IDX to code-point CODE for CHARSET.
154 It is assumed that IDX is in a valid range. */
156 #define INDEX_TO_CODE_POINT(charset, idx) \
157 ((charset)->code_linear_p \
158 ? (idx) + (charset)->min_code \
159 : (idx += (charset)->char_index_offset, \
160 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
161 | (((charset)->code_space[4] \
162 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
164 | (((charset)->code_space[8] \
165 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
167 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
170 /* Structure to hold mapping tables for a charset. Used by temacs
171 invoked for dumping. */
175 /* The current charset for which the following tables are setup. */
176 struct charset
*current
;
178 /* 1 iff the following table is used for encoder. */
181 /* When the following table is used for encoding, mininum and
182 maxinum character of the current charset. */
183 int min_char
, max_char
;
185 /* A Unicode character correspoinding to the code indice 0 (i.e. the
186 minimum code-point) of the current charset, or -1 if the code
187 indice 0 is not a Unicode character. This is checked when
188 table.encoder[CHAR] is zero. */
192 /* Table mapping code-indices (not code-points) of the current
193 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
194 doesn't belong to the current charset. */
195 int decoder
[0x10000];
196 /* Table mapping Unicode characters to code-indices of the current
197 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
198 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
199 (0x20000..0x2FFFF). Note that there is no charset map that
200 uses both SMP and SIP. */
201 unsigned short encoder
[0x20000];
203 } *temp_charset_work
;
205 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
208 temp_charset_work->zero_index_char = (C); \
209 else if ((C) < 0x20000) \
210 temp_charset_work->table.encoder[(C)] = (CODE); \
212 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
215 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
216 ((C) == temp_charset_work->zero_index_char ? 0 \
217 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
218 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
219 : temp_charset_work->table.encoder[(C) - 0x10000] \
220 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
222 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
223 (temp_charset_work->table.decoder[(CODE)] = (C))
225 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
226 (temp_charset_work->table.decoder[(CODE)])
229 /* Set to 1 to warn that a charset map is loaded and thus a buffer
230 text and a string data may be relocated. */
231 int charset_map_loaded
;
233 struct charset_map_entries
239 struct charset_map_entries
*next
;
242 /* Load the mapping information of CHARSET from ENTRIES for
243 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
244 encoding (CONTROL_FLAG == 2).
246 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
247 and CHARSET->fast_map.
249 If CONTROL_FLAG is 1, setup the following tables according to
250 CHARSET->method and inhibit_load_charset_map.
252 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
253 ----------------------+--------------------+---------------------------
254 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
258 If CONTROL_FLAG is 2, setup the following tables.
260 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
261 ----------------------+--------------------+---------------------------
262 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
263 ----------------------+--------------------+--------------------------
264 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
268 load_charset_map (charset
, entries
, n_entries
, control_flag
)
269 struct charset
*charset
;
270 struct charset_map_entries
*entries
;
274 Lisp_Object vec
, table
;
275 unsigned max_code
= CHARSET_MAX_CODE (charset
);
276 int ascii_compatible_p
= charset
->ascii_compatible_p
;
277 int min_char
, max_char
, nonascii_min_char
;
279 unsigned char *fast_map
= charset
->fast_map
;
286 if (! inhibit_load_charset_map
)
288 if (control_flag
== 1)
290 if (charset
->method
== CHARSET_METHOD_MAP
)
292 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
294 vec
= CHARSET_DECODER (charset
)
295 = Fmake_vector (make_number (n
), make_number (-1));
299 char_table_set_range (Vchar_unify_table
,
300 charset
->min_char
, charset
->max_char
,
306 table
= Fmake_char_table (Qnil
, Qnil
);
307 if (charset
->method
== CHARSET_METHOD_MAP
)
308 CHARSET_ENCODER (charset
) = table
;
310 CHARSET_DEUNIFIER (charset
) = table
;
315 if (! temp_charset_work
)
316 temp_charset_work
= malloc (sizeof (*temp_charset_work
));
317 if (control_flag
== 1)
319 memset (temp_charset_work
->table
.decoder
, -1,
320 sizeof (int) * 0x10000);
321 temp_charset_work
->for_encoder
= 0;
325 memset (temp_charset_work
->table
.encoder
, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work
->zero_index_char
= -1;
329 temp_charset_work
->current
= charset
;
330 temp_charset_work
->for_encoder
= (control_flag
== 2);
333 charset_map_loaded
= 1;
336 min_char
= max_char
= entries
->entry
[0].c
;
337 nonascii_min_char
= MAX_CHAR
;
338 for (i
= 0; i
< n_entries
; i
++)
341 int from_index
, to_index
;
343 int idx
= i
% 0x10000;
345 if (i
> 0 && idx
== 0)
346 entries
= entries
->next
;
347 from
= entries
->entry
[idx
].from
;
348 to
= entries
->entry
[idx
].to
;
349 from_c
= entries
->entry
[idx
].c
;
350 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
353 to_index
= from_index
;
358 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
359 to_c
= from_c
+ (to_index
- from_index
);
361 if (from_index
< 0 || to_index
< 0)
366 else if (from_c
< min_char
)
369 if (control_flag
== 1)
371 if (charset
->method
== CHARSET_METHOD_MAP
)
372 for (; from_index
<= to_index
; from_index
++, from_c
++)
373 ASET (vec
, from_index
, make_number (from_c
));
375 for (; from_index
<= to_index
; from_index
++, from_c
++)
376 CHAR_TABLE_SET (Vchar_unify_table
,
377 CHARSET_CODE_OFFSET (charset
) + from_index
,
378 make_number (from_c
));
380 else if (control_flag
== 2)
382 if (charset
->method
== CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset
))
384 for (; from_index
<= to_index
; from_index
++, from_c
++)
386 unsigned code
= INDEX_TO_CODE_POINT (charset
, from_index
);
388 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
389 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
392 for (; from_index
<= to_index
; from_index
++, from_c
++)
394 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
395 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
398 else if (control_flag
== 3)
399 for (; from_index
<= to_index
; from_index
++, from_c
++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
401 else if (control_flag
== 4)
402 for (; from_index
<= to_index
; from_index
++, from_c
++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
404 else /* control_flag == 0 */
406 if (ascii_compatible_p
)
408 if (! ASCII_BYTE_P (from_c
))
410 if (from_c
< nonascii_min_char
)
411 nonascii_min_char
= from_c
;
413 else if (! ASCII_BYTE_P (to_c
))
415 nonascii_min_char
= 0x80;
419 for (; from_c
<= to_c
; from_c
++)
420 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
424 if (control_flag
== 0)
426 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
427 ? nonascii_min_char
: min_char
);
428 CHARSET_MAX_CHAR (charset
) = max_char
;
430 else if (control_flag
== 4)
432 temp_charset_work
->min_char
= min_char
;
433 temp_charset_work
->max_char
= max_char
;
438 /* Read a hexadecimal number (preceded by "0x") from the file FP while
439 paying attention to comment charcter '#'. */
441 static INLINE
unsigned
449 while ((c
= getc (fp
)) != EOF
)
453 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
457 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
469 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
471 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
473 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
474 n
= (n
* 10) + c
- '0';
481 /* Return a mapping vector for CHARSET loaded from MAPFILE.
482 Each line of MAPFILE has this form
484 where 0xAAAA is a code-point and 0xCCCC is the corresponding
485 character code, or this form
487 where 0xAAAA and 0xBBBB are code-points specifying a range, and
488 0xCCCC is the first character code of the range.
490 The returned vector has this form:
491 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
492 where CODE1 is a code-point or a cons of code-points specifying a
495 extern void add_to_log
P_ ((char *, Lisp_Object
, Lisp_Object
));
498 load_charset_map_from_file (charset
, mapfile
, control_flag
)
499 struct charset
*charset
;
503 unsigned min_code
= CHARSET_MIN_CODE (charset
);
504 unsigned max_code
= CHARSET_MAX_CODE (charset
);
508 Lisp_Object suffixes
;
509 struct charset_map_entries
*head
, *entries
;
512 suffixes
= Fcons (build_string (".map"),
513 Fcons (build_string (".TXT"), Qnil
));
515 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
517 || ! (fp
= fdopen (fd
, "r")))
519 add_to_log ("Failure in loading charset map: %S", mapfile
, Qnil
);
523 head
= entries
= ((struct charset_map_entries
*)
524 alloca (sizeof (struct charset_map_entries
)));
533 from
= read_hex (fp
, &eof
);
536 if (getc (fp
) == '-')
537 to
= read_hex (fp
, &eof
);
540 c
= (int) read_hex (fp
, &eof
);
542 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
545 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
547 entries
->next
= ((struct charset_map_entries
*)
548 alloca (sizeof (struct charset_map_entries
)));
549 entries
= entries
->next
;
551 idx
= n_entries
% 0x10000;
552 entries
->entry
[idx
].from
= from
;
553 entries
->entry
[idx
].to
= to
;
554 entries
->entry
[idx
].c
= c
;
560 load_charset_map (charset
, head
, n_entries
, control_flag
);
564 load_charset_map_from_vector (charset
, vec
, control_flag
)
565 struct charset
*charset
;
569 unsigned min_code
= CHARSET_MIN_CODE (charset
);
570 unsigned max_code
= CHARSET_MAX_CODE (charset
);
571 struct charset_map_entries
*head
, *entries
;
573 int len
= ASIZE (vec
);
578 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
582 head
= entries
= ((struct charset_map_entries
*)
583 alloca (sizeof (struct charset_map_entries
)));
585 for (i
= 0; i
< len
; i
+= 2)
587 Lisp_Object val
, val2
;
599 from
= XFASTINT (val
);
600 to
= XFASTINT (val2
);
605 from
= to
= XFASTINT (val
);
607 val
= AREF (vec
, i
+ 1);
611 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
614 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
616 entries
->next
= ((struct charset_map_entries
*)
617 alloca (sizeof (struct charset_map_entries
)));
618 entries
= entries
->next
;
620 idx
= n_entries
% 0x10000;
621 entries
->entry
[idx
].from
= from
;
622 entries
->entry
[idx
].to
= to
;
623 entries
->entry
[idx
].c
= c
;
627 load_charset_map (charset
, head
, n_entries
, control_flag
);
631 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
632 map it is (see the comment of load_charset_map for the detail). */
635 load_charset (charset
, control_flag
)
636 struct charset
*charset
;
641 if (inhibit_load_charset_map
643 && charset
== temp_charset_work
->current
644 && (control_flag
== 2 == temp_charset_work
->for_encoder
))
647 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
648 map
= CHARSET_MAP (charset
);
649 else if (CHARSET_UNIFIED_P (charset
))
650 map
= CHARSET_UNIFY_MAP (charset
);
652 load_charset_map_from_file (charset
, map
, control_flag
);
654 load_charset_map_from_vector (charset
, map
, control_flag
);
658 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
659 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
663 return (CHARSETP (object
) ? Qt
: Qnil
);
667 void map_charset_for_dump
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
668 Lisp_Object function
, Lisp_Object arg
,
669 unsigned from
, unsigned to
));
672 map_charset_for_dump (c_function
, function
, arg
, from
, to
)
673 void (*c_function
) (Lisp_Object
, Lisp_Object
);
674 Lisp_Object function
, arg
;
677 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
678 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
683 range
= Fcons (Qnil
, Qnil
);
686 c
= temp_charset_work
->min_char
;
687 stop
= (temp_charset_work
->max_char
< 0x20000
688 ? temp_charset_work
->max_char
: 0xFFFF);
692 int index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
694 if (index
>= from_idx
&& index
<= to_idx
)
696 if (NILP (XCAR (range
)))
697 XSETCAR (range
, make_number (c
));
699 else if (! NILP (XCAR (range
)))
701 XSETCDR (range
, make_number (c
- 1));
703 (*c_function
) (arg
, range
);
705 call2 (function
, range
, arg
);
706 XSETCAR (range
, Qnil
);
710 if (c
== temp_charset_work
->max_char
)
712 if (! NILP (XCAR (range
)))
714 XSETCDR (range
, make_number (c
));
716 (*c_function
) (arg
, range
);
718 call2 (function
, range
, arg
);
723 stop
= temp_charset_work
->max_char
;
730 map_charset_chars (c_function
, function
, arg
,
732 void (*c_function
) P_ ((Lisp_Object
, Lisp_Object
));
733 Lisp_Object function
, arg
;
734 struct charset
*charset
;
740 partial
= (from
> CHARSET_MIN_CODE (charset
)
741 || to
< CHARSET_MAX_CODE (charset
));
743 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
745 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
746 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
747 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
748 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
750 if (CHARSET_UNIFIED_P (charset
))
752 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
753 load_charset (charset
, 2);
754 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
755 map_char_table_for_charset (c_function
, function
,
756 CHARSET_DEUNIFIER (charset
), arg
,
757 partial
? charset
: NULL
, from
, to
);
759 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
762 range
= Fcons (make_number (from_c
), make_number (to_c
));
764 (*c_function
) (arg
, range
);
766 call2 (function
, range
, arg
);
768 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
770 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
771 load_charset (charset
, 2);
772 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
773 map_char_table_for_charset (c_function
, function
,
774 CHARSET_ENCODER (charset
), arg
,
775 partial
? charset
: NULL
, from
, to
);
777 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
779 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
781 Lisp_Object subset_info
;
784 subset_info
= CHARSET_SUBSET (charset
);
785 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
786 offset
= XINT (AREF (subset_info
, 3));
788 if (from
< XFASTINT (AREF (subset_info
, 1)))
789 from
= XFASTINT (AREF (subset_info
, 1));
791 if (to
> XFASTINT (AREF (subset_info
, 2)))
792 to
= XFASTINT (AREF (subset_info
, 2));
793 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
795 else /* i.e. CHARSET_METHOD_SUPERSET */
799 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
800 parents
= XCDR (parents
))
803 unsigned this_from
, this_to
;
805 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
806 offset
= XINT (XCDR (XCAR (parents
)));
807 this_from
= from
- offset
;
808 this_to
= to
- offset
;
809 if (this_from
< CHARSET_MIN_CODE (charset
))
810 this_from
= CHARSET_MIN_CODE (charset
);
811 if (this_to
> CHARSET_MAX_CODE (charset
))
812 this_to
= CHARSET_MAX_CODE (charset
);
813 map_charset_chars (c_function
, function
, arg
, charset
,
819 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
820 doc
: /* Call FUNCTION for all characters in CHARSET.
821 FUNCTION is called with an argument RANGE and the optional 3rd
824 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
825 characters contained in CHARSET.
827 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
828 range of code points of target characters. */)
829 (function
, charset
, arg
, from_code
, to_code
)
830 Lisp_Object function
, charset
, arg
, from_code
, to_code
;
835 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
836 if (NILP (from_code
))
837 from
= CHARSET_MIN_CODE (cs
);
840 CHECK_NATNUM (from_code
);
841 from
= XINT (from_code
);
842 if (from
< CHARSET_MIN_CODE (cs
))
843 from
= CHARSET_MIN_CODE (cs
);
846 to
= CHARSET_MAX_CODE (cs
);
849 CHECK_NATNUM (to_code
);
851 if (to
> CHARSET_MAX_CODE (cs
))
852 to
= CHARSET_MAX_CODE (cs
);
854 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
859 /* Define a charset according to the arguments. The Nth argument is
860 the Nth attribute of the charset (the last attribute `charset-id'
861 is not included). See the docstring of `define-charset' for the
864 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
865 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
866 doc
: /* For internal use only.
867 usage: (define-charset-internal ...) */)
872 /* Charset attr vector. */
876 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
878 struct charset charset
;
881 int new_definition_p
;
884 if (nargs
!= charset_arg_max
)
885 return Fsignal (Qwrong_number_of_arguments
,
886 Fcons (intern ("define-charset-internal"),
887 make_number (nargs
)));
889 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
891 CHECK_SYMBOL (args
[charset_arg_name
]);
892 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
894 val
= args
[charset_arg_code_space
];
895 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
897 int min_byte
, max_byte
;
899 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
900 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
901 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
902 error ("Invalid :code-space value");
903 charset
.code_space
[i
* 4] = min_byte
;
904 charset
.code_space
[i
* 4 + 1] = max_byte
;
905 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
906 nchars
*= charset
.code_space
[i
* 4 + 2];
907 charset
.code_space
[i
* 4 + 3] = nchars
;
912 val
= args
[charset_arg_dimension
];
914 charset
.dimension
= dimension
;
918 charset
.dimension
= XINT (val
);
919 if (charset
.dimension
< 1 || charset
.dimension
> 4)
920 args_out_of_range_3 (val
, make_number (1), make_number (4));
923 charset
.code_linear_p
924 = (charset
.dimension
== 1
925 || (charset
.code_space
[2] == 256
926 && (charset
.dimension
== 2
927 || (charset
.code_space
[6] == 256
928 && (charset
.dimension
== 3
929 || charset
.code_space
[10] == 256)))));
931 if (! charset
.code_linear_p
)
933 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
934 bzero (charset
.code_space_mask
, 256);
935 for (i
= 0; i
< 4; i
++)
936 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
938 charset
.code_space_mask
[j
] |= (1 << i
);
941 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
943 charset
.min_code
= (charset
.code_space
[0]
944 | (charset
.code_space
[4] << 8)
945 | (charset
.code_space
[8] << 16)
946 | (charset
.code_space
[12] << 24));
947 charset
.max_code
= (charset
.code_space
[1]
948 | (charset
.code_space
[5] << 8)
949 | (charset
.code_space
[9] << 16)
950 | (charset
.code_space
[13] << 24));
951 charset
.char_index_offset
= 0;
953 val
= args
[charset_arg_min_code
];
963 CHECK_NUMBER_CAR (val
);
964 CHECK_NUMBER_CDR (val
);
965 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
967 if (code
< charset
.min_code
968 || code
> charset
.max_code
)
969 args_out_of_range_3 (make_number (charset
.min_code
),
970 make_number (charset
.max_code
), val
);
971 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
972 charset
.min_code
= code
;
975 val
= args
[charset_arg_max_code
];
985 CHECK_NUMBER_CAR (val
);
986 CHECK_NUMBER_CDR (val
);
987 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
989 if (code
< charset
.min_code
990 || code
> charset
.max_code
)
991 args_out_of_range_3 (make_number (charset
.min_code
),
992 make_number (charset
.max_code
), val
);
993 charset
.max_code
= code
;
996 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
998 val
= args
[charset_arg_invalid_code
];
1001 if (charset
.min_code
> 0)
1002 charset
.invalid_code
= 0;
1005 XSETINT (val
, charset
.max_code
+ 1);
1006 if (XINT (val
) == charset
.max_code
+ 1)
1007 charset
.invalid_code
= charset
.max_code
+ 1;
1009 error ("Attribute :invalid-code must be specified");
1015 charset
.invalid_code
= XFASTINT (val
);
1018 val
= args
[charset_arg_iso_final
];
1020 charset
.iso_final
= -1;
1024 if (XINT (val
) < '0' || XINT (val
) > 127)
1025 error ("Invalid iso-final-char: %d", XINT (val
));
1026 charset
.iso_final
= XINT (val
);
1029 val
= args
[charset_arg_iso_revision
];
1031 charset
.iso_revision
= -1;
1035 if (XINT (val
) > 63)
1036 args_out_of_range (make_number (63), val
);
1037 charset
.iso_revision
= XINT (val
);
1040 val
= args
[charset_arg_emacs_mule_id
];
1042 charset
.emacs_mule_id
= -1;
1046 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
1047 error ("Invalid emacs-mule-id: %d", XINT (val
));
1048 charset
.emacs_mule_id
= XINT (val
);
1051 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1053 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1055 charset
.unified_p
= 0;
1057 bzero (charset
.fast_map
, sizeof (charset
.fast_map
));
1059 if (! NILP (args
[charset_arg_code_offset
]))
1061 val
= args
[charset_arg_code_offset
];
1064 charset
.method
= CHARSET_METHOD_OFFSET
;
1065 charset
.code_offset
= XINT (val
);
1067 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1068 charset
.min_char
= i
+ charset
.code_offset
;
1069 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1070 charset
.max_char
= i
+ charset
.code_offset
;
1071 if (charset
.max_char
> MAX_CHAR
)
1072 error ("Unsupported max char: %d", charset
.max_char
);
1074 i
= (charset
.min_char
>> 7) << 7;
1075 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1076 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1077 i
= (i
>> 12) << 12;
1078 for (; i
<= charset
.max_char
; i
+= 0x1000)
1079 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1081 else if (! NILP (args
[charset_arg_map
]))
1083 val
= args
[charset_arg_map
];
1084 ASET (attrs
, charset_map
, val
);
1085 charset
.method
= CHARSET_METHOD_MAP
;
1087 else if (! NILP (args
[charset_arg_subset
]))
1090 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1091 struct charset
*parent_charset
;
1093 val
= args
[charset_arg_subset
];
1094 parent
= Fcar (val
);
1095 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1096 parent_min_code
= Fnth (make_number (1), val
);
1097 CHECK_NATNUM (parent_min_code
);
1098 parent_max_code
= Fnth (make_number (2), val
);
1099 CHECK_NATNUM (parent_max_code
);
1100 parent_code_offset
= Fnth (make_number (3), val
);
1101 CHECK_NUMBER (parent_code_offset
);
1102 val
= Fmake_vector (make_number (4), Qnil
);
1103 ASET (val
, 0, make_number (parent_charset
->id
));
1104 ASET (val
, 1, parent_min_code
);
1105 ASET (val
, 2, parent_max_code
);
1106 ASET (val
, 3, parent_code_offset
);
1107 ASET (attrs
, charset_subset
, val
);
1109 charset
.method
= CHARSET_METHOD_SUBSET
;
1110 /* Here, we just copy the parent's fast_map. It's not accurate,
1111 but at least it works for quickly detecting which character
1112 DOESN'T belong to this charset. */
1113 for (i
= 0; i
< 190; i
++)
1114 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1116 /* We also copy these for parents. */
1117 charset
.min_char
= parent_charset
->min_char
;
1118 charset
.max_char
= parent_charset
->max_char
;
1120 else if (! NILP (args
[charset_arg_superset
]))
1122 val
= args
[charset_arg_superset
];
1123 charset
.method
= CHARSET_METHOD_SUPERSET
;
1124 val
= Fcopy_sequence (val
);
1125 ASET (attrs
, charset_superset
, val
);
1127 charset
.min_char
= MAX_CHAR
;
1128 charset
.max_char
= 0;
1129 for (; ! NILP (val
); val
= Fcdr (val
))
1131 Lisp_Object elt
, car_part
, cdr_part
;
1132 int this_id
, offset
;
1133 struct charset
*this_charset
;
1138 car_part
= XCAR (elt
);
1139 cdr_part
= XCDR (elt
);
1140 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1141 CHECK_NUMBER (cdr_part
);
1142 offset
= XINT (cdr_part
);
1146 CHECK_CHARSET_GET_ID (elt
, this_id
);
1149 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1151 this_charset
= CHARSET_FROM_ID (this_id
);
1152 if (charset
.min_char
> this_charset
->min_char
)
1153 charset
.min_char
= this_charset
->min_char
;
1154 if (charset
.max_char
< this_charset
->max_char
)
1155 charset
.max_char
= this_charset
->max_char
;
1156 for (i
= 0; i
< 190; i
++)
1157 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1161 error ("None of :code-offset, :map, :parents are specified");
1163 val
= args
[charset_arg_unify_map
];
1164 if (! NILP (val
) && !STRINGP (val
))
1166 ASET (attrs
, charset_unify_map
, val
);
1168 CHECK_LIST (args
[charset_arg_plist
]);
1169 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1171 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1173 if (charset
.hash_index
>= 0)
1175 new_definition_p
= 0;
1176 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1177 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1181 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1183 if (charset_table_used
== charset_table_size
)
1185 struct charset
*new_table
1186 = (struct charset
*) xmalloc (sizeof (struct charset
)
1187 * (charset_table_size
+ 16));
1188 bcopy (charset_table
, new_table
,
1189 sizeof (struct charset
) * charset_table_size
);
1190 charset_table_size
+= 16;
1191 charset_table
= new_table
;
1193 id
= charset_table_used
++;
1194 new_definition_p
= 1;
1197 ASET (attrs
, charset_id
, make_number (id
));
1199 charset_table
[id
] = charset
;
1201 if (charset
.method
== CHARSET_METHOD_MAP
)
1203 load_charset (&charset
, 0);
1204 charset_table
[id
] = charset
;
1207 if (charset
.iso_final
>= 0)
1209 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1210 charset
.iso_final
) = id
;
1211 if (new_definition_p
)
1212 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1213 Fcons (make_number (id
), Qnil
));
1214 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1215 charset_jisx0201_roman
= id
;
1216 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1217 charset_jisx0208_1978
= id
;
1218 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1219 charset_jisx0208
= id
;
1222 if (charset
.emacs_mule_id
>= 0)
1224 emacs_mule_charset
[charset
.emacs_mule_id
] = CHARSET_FROM_ID (id
);
1225 if (charset
.emacs_mule_id
< 0xA0)
1226 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1228 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1229 if (new_definition_p
)
1230 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1231 Fcons (make_number (id
), Qnil
));
1234 if (new_definition_p
)
1236 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1237 if (charset
.supplementary_p
)
1238 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1239 Fcons (make_number (id
), Qnil
));
1244 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1246 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1248 if (cs
->supplementary_p
)
1251 if (EQ (tail
, Vcharset_ordered_list
))
1252 Vcharset_ordered_list
= Fcons (make_number (id
),
1253 Vcharset_ordered_list
);
1254 else if (NILP (tail
))
1255 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1256 Fcons (make_number (id
), Qnil
));
1259 val
= Fcons (XCAR (tail
), XCDR (tail
));
1260 XSETCDR (tail
, val
);
1261 XSETCAR (tail
, make_number (id
));
1264 charset_ordered_list_tick
++;
1271 /* Same as Fdefine_charset_internal but arguments are more convenient
1272 to call from C (typically in syms_of_charset). This can define a
1273 charset of `offset' method only. Return the ID of the new
1277 define_charset_internal (name
, dimension
, code_space
, min_code
, max_code
,
1278 iso_final
, iso_revision
, emacs_mule_id
,
1279 ascii_compatible
, supplementary
,
1283 unsigned char *code_space
;
1284 unsigned min_code
, max_code
;
1285 int iso_final
, iso_revision
, emacs_mule_id
;
1286 int ascii_compatible
, supplementary
;
1289 Lisp_Object args
[charset_arg_max
];
1290 Lisp_Object plist
[14];
1294 args
[charset_arg_name
] = name
;
1295 args
[charset_arg_dimension
] = make_number (dimension
);
1296 val
= Fmake_vector (make_number (8), make_number (0));
1297 for (i
= 0; i
< 8; i
++)
1298 ASET (val
, i
, make_number (code_space
[i
]));
1299 args
[charset_arg_code_space
] = val
;
1300 args
[charset_arg_min_code
] = make_number (min_code
);
1301 args
[charset_arg_max_code
] = make_number (max_code
);
1302 args
[charset_arg_iso_final
]
1303 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1304 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1305 args
[charset_arg_emacs_mule_id
]
1306 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1307 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1308 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1309 args
[charset_arg_invalid_code
] = Qnil
;
1310 args
[charset_arg_code_offset
] = make_number (code_offset
);
1311 args
[charset_arg_map
] = Qnil
;
1312 args
[charset_arg_subset
] = Qnil
;
1313 args
[charset_arg_superset
] = Qnil
;
1314 args
[charset_arg_unify_map
] = Qnil
;
1316 plist
[0] = intern (":name");
1317 plist
[1] = args
[charset_arg_name
];
1318 plist
[2] = intern (":dimension");
1319 plist
[3] = args
[charset_arg_dimension
];
1320 plist
[4] = intern (":code-space");
1321 plist
[5] = args
[charset_arg_code_space
];
1322 plist
[6] = intern (":iso-final-char");
1323 plist
[7] = args
[charset_arg_iso_final
];
1324 plist
[8] = intern (":emacs-mule-id");
1325 plist
[9] = args
[charset_arg_emacs_mule_id
];
1326 plist
[10] = intern (":ascii-compatible-p");
1327 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1328 plist
[12] = intern (":code-offset");
1329 plist
[13] = args
[charset_arg_code_offset
];
1331 args
[charset_arg_plist
] = Flist (14, plist
);
1332 Fdefine_charset_internal (charset_arg_max
, args
);
1334 return XINT (CHARSET_SYMBOL_ID (name
));
1338 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1339 Sdefine_charset_alias
, 2, 2, 0,
1340 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1342 Lisp_Object alias
, charset
;
1346 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1347 Fputhash (alias
, attr
, Vcharset_hash_table
);
1348 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1353 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1354 doc
: /* Return the property list of CHARSET. */)
1356 Lisp_Object charset
;
1360 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1361 return CHARSET_ATTR_PLIST (attrs
);
1365 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1366 doc
: /* Set CHARSET's property list to PLIST. */)
1368 Lisp_Object charset
, plist
;
1372 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1373 CHARSET_ATTR_PLIST (attrs
) = plist
;
1378 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1379 doc
: /* Unify characters of CHARSET with Unicode.
1380 This means reading the relevant file and installing the table defined
1381 by CHARSET's `:unify-map' property.
1383 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1384 the same meaning as the `:unify-map' attribute in the function
1385 `define-charset' (which see).
1387 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1388 (charset
, unify_map
, deunify
)
1389 Lisp_Object charset
, unify_map
, deunify
;
1394 CHECK_CHARSET_GET_ID (charset
, id
);
1395 cs
= CHARSET_FROM_ID (id
);
1397 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1398 : ! CHARSET_UNIFIED_P (cs
))
1401 CHARSET_UNIFIED_P (cs
) = 0;
1404 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1405 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1406 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1407 if (NILP (unify_map
))
1408 unify_map
= CHARSET_UNIFY_MAP (cs
);
1411 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1412 signal_error ("Bad unify-map", unify_map
);
1413 CHARSET_UNIFY_MAP (cs
) = unify_map
;
1415 if (NILP (Vchar_unify_table
))
1416 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1417 char_table_set_range (Vchar_unify_table
,
1418 cs
->min_char
, cs
->max_char
, charset
);
1419 CHARSET_UNIFIED_P (cs
) = 1;
1421 else if (CHAR_TABLE_P (Vchar_unify_table
))
1423 int min_code
= CHARSET_MIN_CODE (cs
);
1424 int max_code
= CHARSET_MAX_CODE (cs
);
1425 int min_char
= DECODE_CHAR (cs
, min_code
);
1426 int max_char
= DECODE_CHAR (cs
, max_code
);
1428 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1434 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1435 Sget_unused_iso_final_char
, 2, 2, 0,
1437 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1438 DIMENSION is the number of bytes to represent a character: 1 or 2.
1439 CHARS is the number of characters in a dimension: 94 or 96.
1441 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1442 If there's no unused final char for the specified kind of charset,
1445 Lisp_Object dimension
, chars
;
1449 CHECK_NUMBER (dimension
);
1450 CHECK_NUMBER (chars
);
1451 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1452 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1453 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1454 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1455 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1456 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1458 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1462 check_iso_charset_parameter (dimension
, chars
, final_char
)
1463 Lisp_Object dimension
, chars
, final_char
;
1465 CHECK_NATNUM (dimension
);
1466 CHECK_NATNUM (chars
);
1467 CHECK_NATNUM (final_char
);
1469 if (XINT (dimension
) > 3)
1470 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1471 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1472 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1473 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1474 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1478 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1480 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1482 On decoding by an ISO-2022 base coding system, when a charset
1483 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1484 if CHARSET is designated instead. */)
1485 (dimension
, chars
, final_char
, charset
)
1486 Lisp_Object dimension
, chars
, final_char
, charset
;
1491 CHECK_CHARSET_GET_ID (charset
, id
);
1492 check_iso_charset_parameter (dimension
, chars
, final_char
);
1493 chars_flag
= XINT (chars
) == 96;
1494 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1499 /* Return information about charsets in the text at PTR of NBYTES
1500 bytes, which are NCHARS characters. The value is:
1502 0: Each character is represented by one byte. This is always
1503 true for a unibyte string. For a multibyte string, true if
1504 it contains only ASCII characters.
1506 1: No charsets other than ascii, control-1, and latin-1 are
1513 string_xstring_p (string
)
1516 const unsigned char *p
= SDATA (string
);
1517 const unsigned char *endp
= p
+ SBYTES (string
);
1519 if (SCHARS (string
) == SBYTES (string
))
1524 int c
= STRING_CHAR_ADVANCE (p
);
1533 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1535 CHARSETS is a vector. If Nth element is non-nil, it means the
1536 charset whose id is N is already found.
1538 It may lookup a translation table TABLE if supplied. */
1541 find_charsets_in_text (ptr
, nchars
, nbytes
, charsets
, table
, multibyte
)
1542 const unsigned char *ptr
;
1543 EMACS_INT nchars
, nbytes
;
1544 Lisp_Object charsets
, table
;
1547 const unsigned char *pend
= ptr
+ nbytes
;
1549 if (nchars
== nbytes
)
1552 ASET (charsets
, charset_ascii
, Qt
);
1559 c
= translate_char (table
, c
);
1560 if (ASCII_BYTE_P (c
))
1561 ASET (charsets
, charset_ascii
, Qt
);
1563 ASET (charsets
, charset_eight_bit
, Qt
);
1570 int c
= STRING_CHAR_ADVANCE (ptr
);
1571 struct charset
*charset
;
1574 c
= translate_char (table
, c
);
1575 charset
= CHAR_CHARSET (c
);
1576 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1581 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1583 doc
: /* Return a list of charsets in the region between BEG and END.
1584 BEG and END are buffer positions.
1585 Optional arg TABLE if non-nil is a translation table to look up.
1587 If the current buffer is unibyte, the returned list may contain
1588 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1590 Lisp_Object beg
, end
, table
;
1592 Lisp_Object charsets
;
1593 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1596 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1598 validate_region (&beg
, &end
);
1599 from
= XFASTINT (beg
);
1600 stop
= to
= XFASTINT (end
);
1602 if (from
< GPT
&& GPT
< to
)
1605 stop_byte
= GPT_BYTE
;
1608 stop_byte
= CHAR_TO_BYTE (stop
);
1610 from_byte
= CHAR_TO_BYTE (from
);
1612 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1615 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1616 stop_byte
- from_byte
, charsets
, table
,
1620 from
= stop
, from_byte
= stop_byte
;
1621 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1628 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1629 if (!NILP (AREF (charsets
, i
)))
1630 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1634 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1636 doc
: /* Return a list of charsets in STR.
1637 Optional arg TABLE if non-nil is a translation table to look up.
1639 If STR is unibyte, the returned list may contain
1640 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1642 Lisp_Object str
, table
;
1644 Lisp_Object charsets
;
1650 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1651 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1653 STRING_MULTIBYTE (str
));
1655 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1656 if (!NILP (AREF (charsets
, i
)))
1657 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1663 /* Return a unified character code for C (>= 0x110000). VAL is a
1664 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1667 maybe_unify_char (c
, val
)
1671 struct charset
*charset
;
1678 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1679 load_charset (charset
, 1);
1680 if (! inhibit_load_charset_map
)
1682 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1688 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1689 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1698 /* Return a character correponding to the code-point CODE of
1702 decode_char (charset
, code
)
1703 struct charset
*charset
;
1707 enum charset_method method
= CHARSET_METHOD (charset
);
1709 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1712 if (method
== CHARSET_METHOD_SUBSET
)
1714 Lisp_Object subset_info
;
1716 subset_info
= CHARSET_SUBSET (charset
);
1717 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1718 code
-= XINT (AREF (subset_info
, 3));
1719 if (code
< XFASTINT (AREF (subset_info
, 1))
1720 || code
> XFASTINT (AREF (subset_info
, 2)))
1723 c
= DECODE_CHAR (charset
, code
);
1725 else if (method
== CHARSET_METHOD_SUPERSET
)
1727 Lisp_Object parents
;
1729 parents
= CHARSET_SUPERSET (charset
);
1731 for (; CONSP (parents
); parents
= XCDR (parents
))
1733 int id
= XINT (XCAR (XCAR (parents
)));
1734 int code_offset
= XINT (XCDR (XCAR (parents
)));
1735 unsigned this_code
= code
- code_offset
;
1737 charset
= CHARSET_FROM_ID (id
);
1738 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1744 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1748 if (method
== CHARSET_METHOD_MAP
)
1750 Lisp_Object decoder
;
1752 decoder
= CHARSET_DECODER (charset
);
1753 if (! VECTORP (decoder
))
1755 load_charset (charset
, 1);
1756 decoder
= CHARSET_DECODER (charset
);
1758 if (VECTORP (decoder
))
1759 c
= XINT (AREF (decoder
, char_index
));
1761 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1763 else /* method == CHARSET_METHOD_OFFSET */
1765 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1766 if (CHARSET_UNIFIED_P (charset
)
1767 && c
> MAX_UNICODE_CHAR
)
1768 MAYBE_UNIFY_CHAR (c
);
1775 /* Variable used temporarily by the macro ENCODE_CHAR. */
1776 Lisp_Object charset_work
;
1778 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1779 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1780 use CHARSET's strict_max_char instead of max_char. */
1783 encode_char (charset
, c
)
1784 struct charset
*charset
;
1788 enum charset_method method
= CHARSET_METHOD (charset
);
1790 if (CHARSET_UNIFIED_P (charset
))
1792 Lisp_Object deunifier
, deunified
;
1793 int code_index
= -1;
1795 deunifier
= CHARSET_DEUNIFIER (charset
);
1796 if (! CHAR_TABLE_P (deunifier
))
1798 load_charset (charset
, 2);
1799 deunifier
= CHARSET_DEUNIFIER (charset
);
1801 if (CHAR_TABLE_P (deunifier
))
1803 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1805 if (INTEGERP (deunified
))
1806 code_index
= XINT (deunified
);
1810 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1812 if (code_index
>= 0)
1813 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1816 if (method
== CHARSET_METHOD_SUBSET
)
1818 Lisp_Object subset_info
;
1819 struct charset
*this_charset
;
1821 subset_info
= CHARSET_SUBSET (charset
);
1822 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1823 code
= ENCODE_CHAR (this_charset
, c
);
1824 if (code
== CHARSET_INVALID_CODE (this_charset
)
1825 || code
< XFASTINT (AREF (subset_info
, 1))
1826 || code
> XFASTINT (AREF (subset_info
, 2)))
1827 return CHARSET_INVALID_CODE (charset
);
1828 code
+= XINT (AREF (subset_info
, 3));
1832 if (method
== CHARSET_METHOD_SUPERSET
)
1834 Lisp_Object parents
;
1836 parents
= CHARSET_SUPERSET (charset
);
1837 for (; CONSP (parents
); parents
= XCDR (parents
))
1839 int id
= XINT (XCAR (XCAR (parents
)));
1840 int code_offset
= XINT (XCDR (XCAR (parents
)));
1841 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1843 code
= ENCODE_CHAR (this_charset
, c
);
1844 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1845 return code
+ code_offset
;
1847 return CHARSET_INVALID_CODE (charset
);
1850 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1851 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1852 return CHARSET_INVALID_CODE (charset
);
1854 if (method
== CHARSET_METHOD_MAP
)
1856 Lisp_Object encoder
;
1859 encoder
= CHARSET_ENCODER (charset
);
1860 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1862 load_charset (charset
, 2);
1863 encoder
= CHARSET_ENCODER (charset
);
1865 if (CHAR_TABLE_P (encoder
))
1867 val
= CHAR_TABLE_REF (encoder
, c
);
1869 return CHARSET_INVALID_CODE (charset
);
1871 if (! CHARSET_COMPACT_CODES_P (charset
))
1872 code
= INDEX_TO_CODE_POINT (charset
, code
);
1876 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1877 code
= INDEX_TO_CODE_POINT (charset
, code
);
1880 else /* method == CHARSET_METHOD_OFFSET */
1882 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1884 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1891 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1892 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1893 Return nil if CODE-POINT is not valid in CHARSET.
1895 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1897 Optional argument RESTRICTION specifies a way to map the pair of CCS
1898 and CODE-POINT to a character. Currently not supported and just ignored. */)
1899 (charset
, code_point
, restriction
)
1900 Lisp_Object charset
, code_point
, restriction
;
1904 struct charset
*charsetp
;
1906 CHECK_CHARSET_GET_ID (charset
, id
);
1907 if (CONSP (code_point
))
1909 CHECK_NATNUM_CAR (code_point
);
1910 CHECK_NATNUM_CDR (code_point
);
1911 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1915 CHECK_NATNUM (code_point
);
1916 code
= XINT (code_point
);
1918 charsetp
= CHARSET_FROM_ID (id
);
1919 c
= DECODE_CHAR (charsetp
, code
);
1920 return (c
>= 0 ? make_number (c
) : Qnil
);
1924 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1925 doc
: /* Encode the character CH into a code-point of CHARSET.
1926 Return nil if CHARSET doesn't include CH.
1928 Optional argument RESTRICTION specifies a way to map CH to a
1929 code-point in CCS. Currently not supported and just ignored. */)
1930 (ch
, charset
, restriction
)
1931 Lisp_Object ch
, charset
, restriction
;
1935 struct charset
*charsetp
;
1937 CHECK_CHARSET_GET_ID (charset
, id
);
1939 charsetp
= CHARSET_FROM_ID (id
);
1940 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1941 if (code
== CHARSET_INVALID_CODE (charsetp
))
1943 if (code
> 0x7FFFFFF)
1944 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1945 return make_number (code
);
1949 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1951 /* Return a character of CHARSET whose position codes are CODEn.
1953 CODE1 through CODE4 are optional, but if you don't supply sufficient
1954 position codes, it is assumed that the minimum code in each dimension
1956 (charset
, code1
, code2
, code3
, code4
)
1957 Lisp_Object charset
, code1
, code2
, code3
, code4
;
1960 struct charset
*charsetp
;
1964 CHECK_CHARSET_GET_ID (charset
, id
);
1965 charsetp
= CHARSET_FROM_ID (id
);
1967 dimension
= CHARSET_DIMENSION (charsetp
);
1969 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1970 ? 0 : CHARSET_MIN_CODE (charsetp
));
1973 CHECK_NATNUM (code1
);
1974 if (XFASTINT (code1
) >= 0x100)
1975 args_out_of_range (make_number (0xFF), code1
);
1976 code
= XFASTINT (code1
);
1982 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1985 CHECK_NATNUM (code2
);
1986 if (XFASTINT (code2
) >= 0x100)
1987 args_out_of_range (make_number (0xFF), code2
);
1988 code
|= XFASTINT (code2
);
1995 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1998 CHECK_NATNUM (code3
);
1999 if (XFASTINT (code3
) >= 0x100)
2000 args_out_of_range (make_number (0xFF), code3
);
2001 code
|= XFASTINT (code3
);
2008 code
|= charsetp
->code_space
[0];
2011 CHECK_NATNUM (code4
);
2012 if (XFASTINT (code4
) >= 0x100)
2013 args_out_of_range (make_number (0xFF), code4
);
2014 code
|= XFASTINT (code4
);
2021 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
2023 c
= DECODE_CHAR (charsetp
, code
);
2025 error ("Invalid code(s)");
2026 return make_number (c
);
2030 /* Return the first charset in CHARSET_LIST that contains C.
2031 CHARSET_LIST is a list of charset IDs. If it is nil, use
2032 Vcharset_ordered_list. */
2035 char_charset (c
, charset_list
, code_return
)
2037 Lisp_Object charset_list
;
2038 unsigned *code_return
;
2042 if (NILP (charset_list
))
2043 charset_list
= Vcharset_ordered_list
;
2047 while (CONSP (charset_list
))
2049 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
2050 unsigned code
= ENCODE_CHAR (charset
, c
);
2052 if (code
!= CHARSET_INVALID_CODE (charset
))
2055 *code_return
= code
;
2058 charset_list
= XCDR (charset_list
);
2059 if (c
<= MAX_UNICODE_CHAR
2060 && EQ (charset_list
, Vcharset_non_preferred_head
))
2061 return CHARSET_FROM_ID (charset_unicode
);
2063 return (maybe_null
? NULL
2064 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2065 : CHARSET_FROM_ID (charset_eight_bit
));
2069 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2071 /*Return list of charset and one to four position-codes of CH.
2072 The charset is decided by the current priority order of charsets.
2073 A position-code is a byte value of each dimension of the code-point of
2074 CH in the charset. */)
2078 struct charset
*charset
;
2083 CHECK_CHARACTER (ch
);
2085 charset
= CHAR_CHARSET (c
);
2088 code
= ENCODE_CHAR (charset
, c
);
2089 if (code
== CHARSET_INVALID_CODE (charset
))
2091 dimension
= CHARSET_DIMENSION (charset
);
2092 for (val
= Qnil
; dimension
> 0; dimension
--)
2094 val
= Fcons (make_number (code
& 0xFF), val
);
2097 return Fcons (CHARSET_NAME (charset
), val
);
2101 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 1, 0,
2102 doc
: /* Return the charset of highest priority that contains CH. */)
2106 struct charset
*charset
;
2108 CHECK_CHARACTER (ch
);
2109 charset
= CHAR_CHARSET (XINT (ch
));
2110 return (CHARSET_NAME (charset
));
2114 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2116 Return charset of a character in the current buffer at position POS.
2117 If POS is nil, it defauls to the current point.
2118 If POS is out of range, the value is nil. */)
2123 struct charset
*charset
;
2125 ch
= Fchar_after (pos
);
2126 if (! INTEGERP (ch
))
2128 charset
= CHAR_CHARSET (XINT (ch
));
2129 return (CHARSET_NAME (charset
));
2133 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2135 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2137 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2138 by their DIMENSION, CHARS, and FINAL-CHAR,
2139 whereas Emacs distinguishes them by charset symbol.
2140 See the documentation of the function `charset-info' for the meanings of
2141 DIMENSION, CHARS, and FINAL-CHAR. */)
2142 (dimension
, chars
, final_char
)
2143 Lisp_Object dimension
, chars
, final_char
;
2148 check_iso_charset_parameter (dimension
, chars
, final_char
);
2149 chars_flag
= XFASTINT (chars
) == 96;
2150 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
2151 XFASTINT (final_char
));
2152 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2156 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2160 Clear temporary charset mapping tables.
2161 It should be called only from temacs invoked for dumping. */)
2165 struct charset
*charset
;
2168 if (temp_charset_work
)
2170 free (temp_charset_work
);
2171 temp_charset_work
= NULL
;
2174 if (CHAR_TABLE_P (Vchar_unify_table
))
2175 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2180 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2181 Scharset_priority_list
, 0, 1, 0,
2182 doc
: /* Return the list of charsets ordered by priority.
2183 HIGHESTP non-nil means just return the highest priority one. */)
2185 Lisp_Object highestp
;
2187 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2189 if (!NILP (highestp
))
2190 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2192 while (!NILP (list
))
2194 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2197 return Fnreverse (val
);
2200 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2202 doc
: /* Assign higher priority to the charsets given as arguments.
2203 usage: (set-charset-priority &rest charsets) */)
2208 Lisp_Object new_head
, old_list
, arglist
[2];
2209 Lisp_Object list_2022
, list_emacs_mule
;
2212 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2214 for (i
= 0; i
< nargs
; i
++)
2216 CHECK_CHARSET_GET_ID (args
[i
], id
);
2217 if (! NILP (Fmemq (make_number (id
), old_list
)))
2219 old_list
= Fdelq (make_number (id
), old_list
);
2220 new_head
= Fcons (make_number (id
), new_head
);
2223 arglist
[0] = Fnreverse (new_head
);
2224 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2225 Vcharset_ordered_list
= Fnconc (2, arglist
);
2226 charset_ordered_list_tick
++;
2228 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2229 CONSP (old_list
); old_list
= XCDR (old_list
))
2231 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2232 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2233 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2234 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2236 Viso_2022_charset_list
= Fnreverse (list_2022
);
2237 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2242 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2244 doc
: /* Internal use only.
2245 Return charset identification number of CHARSET. */)
2247 Lisp_Object charset
;
2251 CHECK_CHARSET_GET_ID (charset
, id
);
2252 return make_number (id
);
2259 Lisp_Object tempdir
;
2260 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2261 if (access (SDATA (tempdir
), 0) < 0)
2263 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2264 Emacs will not function correctly without the character map files.\n\
2265 Please check your installation!\n",
2267 /* TODO should this be a fatal error? (Bug#909) */
2270 Vcharset_map_path
= Fcons (tempdir
, Qnil
);
2275 init_charset_once ()
2279 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2280 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2281 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2282 iso_charset_table
[i
][j
][k
] = -1;
2284 for (i
= 0; i
< 256; i
++)
2285 emacs_mule_charset
[i
] = NULL
;
2287 charset_jisx0201_roman
= -1;
2288 charset_jisx0208_1978
= -1;
2289 charset_jisx0208
= -1;
2291 for (i
= 0; i
< 128; i
++)
2292 unibyte_to_multibyte_table
[i
] = i
;
2293 for (; i
< 256; i
++)
2294 unibyte_to_multibyte_table
[i
] = BYTE8_TO_CHAR (i
);
2302 DEFSYM (Qcharsetp
, "charsetp");
2304 DEFSYM (Qascii
, "ascii");
2305 DEFSYM (Qunicode
, "unicode");
2306 DEFSYM (Qemacs
, "emacs");
2307 DEFSYM (Qeight_bit
, "eight-bit");
2308 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2313 staticpro (&Vcharset_ordered_list
);
2314 Vcharset_ordered_list
= Qnil
;
2316 staticpro (&Viso_2022_charset_list
);
2317 Viso_2022_charset_list
= Qnil
;
2319 staticpro (&Vemacs_mule_charset_list
);
2320 Vemacs_mule_charset_list
= Qnil
;
2322 /* Don't staticpro them here. It's done in syms_of_fns. */
2323 QCtest
= intern (":test");
2324 Qeq
= intern ("eq");
2326 staticpro (&Vcharset_hash_table
);
2328 Lisp_Object args
[2];
2331 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2334 charset_table_size
= 128;
2335 charset_table
= ((struct charset
*)
2336 xmalloc (sizeof (struct charset
) * charset_table_size
));
2337 charset_table_used
= 0;
2339 defsubr (&Scharsetp
);
2340 defsubr (&Smap_charset_chars
);
2341 defsubr (&Sdefine_charset_internal
);
2342 defsubr (&Sdefine_charset_alias
);
2343 defsubr (&Scharset_plist
);
2344 defsubr (&Sset_charset_plist
);
2345 defsubr (&Sunify_charset
);
2346 defsubr (&Sget_unused_iso_final_char
);
2347 defsubr (&Sdeclare_equiv_charset
);
2348 defsubr (&Sfind_charset_region
);
2349 defsubr (&Sfind_charset_string
);
2350 defsubr (&Sdecode_char
);
2351 defsubr (&Sencode_char
);
2352 defsubr (&Ssplit_char
);
2353 defsubr (&Smake_char
);
2354 defsubr (&Schar_charset
);
2355 defsubr (&Scharset_after
);
2356 defsubr (&Siso_charset
);
2357 defsubr (&Sclear_charset_maps
);
2358 defsubr (&Scharset_priority_list
);
2359 defsubr (&Sset_charset_priority
);
2360 defsubr (&Scharset_id_internal
);
2362 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path
,
2363 doc
: /* *List of directories to search for charset map files. */);
2364 Vcharset_map_path
= Qnil
;
2366 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map
,
2367 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2368 inhibit_load_charset_map
= 0;
2370 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2371 doc
: /* List of all charsets ever defined. */);
2372 Vcharset_list
= Qnil
;
2374 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language
,
2375 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2376 If the current language environment is for multiple languages (e.g. "Latin-1"),
2377 the value may be a list of mnemonics. */);
2378 Vcurrent_iso639_language
= Qnil
;
2381 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2382 0, 127, 'B', -1, 0, 1, 0, 0);
2384 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2385 0, 255, -1, -1, -1, 1, 0, 0);
2387 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2388 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2390 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F",
2391 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2393 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2394 128, 255, -1, 0, -1, 0, 1,
2395 MAX_5_BYTE_CHAR
+ 1);
2400 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2401 (do not change this comment) */