1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
33 #include <sys/types.h>
36 #include "character.h"
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
57 /* List of all charsets. This variable is used only from Emacs
59 Lisp_Object Vcharset_list
;
61 /* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63 Lisp_Object Vcharset_hash_table
;
65 /* Table of struct charset. */
66 struct charset
*charset_table
;
68 static int charset_table_size
;
69 static int charset_table_used
;
71 Lisp_Object Qcharsetp
;
73 /* Special charset symbols. */
75 Lisp_Object Qeight_bit
;
76 Lisp_Object Qiso_8859_1
;
80 /* The corresponding charsets. */
82 int charset_eight_bit
;
83 int charset_iso_8859_1
;
87 /* The other special charsets. */
88 int charset_jisx0201_roman
;
89 int charset_jisx0208_1978
;
93 /* Value of charset attribute `charset-iso-plane'. */
96 /* Charset of unibyte characters. */
99 /* List of charsets ordered by the priority. */
100 Lisp_Object Vcharset_ordered_list
;
102 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
104 Lisp_Object Vcharset_non_preferred_head
;
106 /* Incremented everytime we change Vcharset_ordered_list. This is
107 unsigned short so that it fits in Lisp_Int and never matches
109 unsigned short charset_ordered_list_tick
;
111 /* List of iso-2022 charsets. */
112 Lisp_Object Viso_2022_charset_list
;
114 /* List of emacs-mule charsets. */
115 Lisp_Object Vemacs_mule_charset_list
;
117 int emacs_mule_charset
[256];
119 /* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
121 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
123 Lisp_Object Vcharset_map_path
;
125 /* If nonzero, don't load charset maps. */
126 int inhibit_load_charset_map
;
128 Lisp_Object Vcurrent_iso639_language
;
130 #define CODE_POINT_TO_INDEX(charset, code) \
131 ((charset)->code_linear_p \
132 ? (code) - (charset)->min_code \
133 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
134 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
135 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
136 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
137 ? (((((code) >> 24) - (charset)->code_space[12]) \
138 * (charset)->code_space[11]) \
139 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
140 * (charset)->code_space[7]) \
141 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
142 * (charset)->code_space[3]) \
143 + (((code) & 0xFF) - (charset)->code_space[0]) \
144 - ((charset)->char_index_offset)) \
148 /* Convert the character index IDX to code-point CODE for CHARSET.
149 It is assumed that IDX is in a valid range. */
151 #define INDEX_TO_CODE_POINT(charset, idx) \
152 ((charset)->code_linear_p \
153 ? (idx) + (charset)->min_code \
154 : (idx += (charset)->char_index_offset, \
155 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
156 | (((charset)->code_space[4] \
157 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
159 | (((charset)->code_space[8] \
160 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
162 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
165 /* Structure to hold mapping tables for a charset. Used by temacs
166 invoked for dumping. */
170 /* The current charset for which the following tables are setup. */
171 struct charset
*current
;
173 /* 1 iff the following table is used for encoder. */
176 /* When the following table is used for encoding, mininum and
177 maxinum character of the current charset. */
178 int min_char
, max_char
;
180 /* A Unicode character correspoinding to the code indice 0 (i.e. the
181 minimum code-point) of the current charset, or -1 if the code
182 indice 0 is not a Unicode character. This is checked when
183 table.encoder[CHAR] is zero. */
187 /* Table mapping code-indices (not code-points) of the current
188 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
189 doesn't belong to the current charset. */
190 int decoder
[0x10000];
191 /* Table mapping Unicode characters to code-indices of the current
192 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
193 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
194 (0x20000..0x2FFFF). Note that there is no charset map that
195 uses both SMP and SIP. */
196 unsigned short encoder
[0x20000];
198 } *temp_charset_work
;
200 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
203 temp_charset_work->zero_index_char = (C); \
204 else if ((C) < 0x20000) \
205 temp_charset_work->table.encoder[(C)] = (CODE); \
207 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
210 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
211 ((C) == temp_charset_work->zero_index_char ? 0 \
212 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
213 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
214 : temp_charset_work->table.encoder[(C) - 0x10000] \
215 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
217 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
218 (temp_charset_work->table.decoder[(CODE)] = (C))
220 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
221 (temp_charset_work->table.decoder[(CODE)])
224 /* Set to 1 to warn that a charset map is loaded and thus a buffer
225 text and a string data may be relocated. */
226 int charset_map_loaded
;
228 struct charset_map_entries
234 struct charset_map_entries
*next
;
237 /* Load the mapping information of CHARSET from ENTRIES for
238 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
239 encoding (CONTROL_FLAG == 2).
241 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
242 and CHARSET->fast_map.
244 If CONTROL_FLAG is 1, setup the following tables according to
245 CHARSET->method and inhibit_load_charset_map.
247 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
248 ----------------------+--------------------+---------------------------
249 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
250 ----------------------+--------------------+---------------------------
251 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
253 If CONTROL_FLAG is 2, setup the following tables.
255 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
256 ----------------------+--------------------+---------------------------
257 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
258 ----------------------+--------------------+--------------------------
259 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
263 load_charset_map (struct charset
*charset
, struct charset_map_entries
*entries
, int n_entries
, int control_flag
)
265 Lisp_Object vec
, table
;
266 unsigned max_code
= CHARSET_MAX_CODE (charset
);
267 int ascii_compatible_p
= charset
->ascii_compatible_p
;
268 int min_char
, max_char
, nonascii_min_char
;
270 unsigned char *fast_map
= charset
->fast_map
;
277 if (! inhibit_load_charset_map
)
279 if (control_flag
== 1)
281 if (charset
->method
== CHARSET_METHOD_MAP
)
283 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
285 vec
= CHARSET_DECODER (charset
)
286 = Fmake_vector (make_number (n
), make_number (-1));
290 char_table_set_range (Vchar_unify_table
,
291 charset
->min_char
, charset
->max_char
,
297 table
= Fmake_char_table (Qnil
, Qnil
);
298 if (charset
->method
== CHARSET_METHOD_MAP
)
299 CHARSET_ENCODER (charset
) = table
;
301 CHARSET_DEUNIFIER (charset
) = table
;
306 if (! temp_charset_work
)
307 temp_charset_work
= malloc (sizeof (*temp_charset_work
));
308 if (control_flag
== 1)
310 memset (temp_charset_work
->table
.decoder
, -1,
311 sizeof (int) * 0x10000);
315 memset (temp_charset_work
->table
.encoder
, 0,
316 sizeof (unsigned short) * 0x20000);
317 temp_charset_work
->zero_index_char
= -1;
319 temp_charset_work
->current
= charset
;
320 temp_charset_work
->for_encoder
= (control_flag
== 2);
323 charset_map_loaded
= 1;
326 min_char
= max_char
= entries
->entry
[0].c
;
327 nonascii_min_char
= MAX_CHAR
;
328 for (i
= 0; i
< n_entries
; i
++)
331 int from_index
, to_index
;
333 int idx
= i
% 0x10000;
335 if (i
> 0 && idx
== 0)
336 entries
= entries
->next
;
337 from
= entries
->entry
[idx
].from
;
338 to
= entries
->entry
[idx
].to
;
339 from_c
= entries
->entry
[idx
].c
;
340 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
343 to_index
= from_index
;
348 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
349 to_c
= from_c
+ (to_index
- from_index
);
351 if (from_index
< 0 || to_index
< 0)
356 else if (from_c
< min_char
)
359 if (control_flag
== 1)
361 if (charset
->method
== CHARSET_METHOD_MAP
)
362 for (; from_index
<= to_index
; from_index
++, from_c
++)
363 ASET (vec
, from_index
, make_number (from_c
));
365 for (; from_index
<= to_index
; from_index
++, from_c
++)
366 CHAR_TABLE_SET (Vchar_unify_table
,
367 CHARSET_CODE_OFFSET (charset
) + from_index
,
368 make_number (from_c
));
370 else if (control_flag
== 2)
372 if (charset
->method
== CHARSET_METHOD_MAP
373 && CHARSET_COMPACT_CODES_P (charset
))
374 for (; from_index
<= to_index
; from_index
++, from_c
++)
376 unsigned code
= INDEX_TO_CODE_POINT (charset
, from_index
);
378 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
379 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
382 for (; from_index
<= to_index
; from_index
++, from_c
++)
384 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
385 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
388 else if (control_flag
== 3)
389 for (; from_index
<= to_index
; from_index
++, from_c
++)
390 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
391 else if (control_flag
== 4)
392 for (; from_index
<= to_index
; from_index
++, from_c
++)
393 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
394 else /* control_flag == 0 */
396 if (ascii_compatible_p
)
398 if (! ASCII_BYTE_P (from_c
))
400 if (from_c
< nonascii_min_char
)
401 nonascii_min_char
= from_c
;
403 else if (! ASCII_BYTE_P (to_c
))
405 nonascii_min_char
= 0x80;
409 for (; from_c
<= to_c
; from_c
++)
410 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
414 if (control_flag
== 0)
416 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
417 ? nonascii_min_char
: min_char
);
418 CHARSET_MAX_CHAR (charset
) = max_char
;
420 else if (control_flag
== 4)
422 temp_charset_work
->min_char
= min_char
;
423 temp_charset_work
->max_char
= max_char
;
428 /* Read a hexadecimal number (preceded by "0x") from the file FP while
429 paying attention to comment character '#'. */
431 static INLINE
unsigned
432 read_hex (FILE *fp
, int *eof
)
437 while ((c
= getc (fp
)) != EOF
)
441 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
445 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
457 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
459 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
461 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
462 n
= (n
* 10) + c
- '0';
468 /* Return a mapping vector for CHARSET loaded from MAPFILE.
469 Each line of MAPFILE has this form
471 where 0xAAAA is a code-point and 0xCCCC is the corresponding
472 character code, or this form
474 where 0xAAAA and 0xBBBB are code-points specifying a range, and
475 0xCCCC is the first character code of the range.
477 The returned vector has this form:
478 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
479 where CODE1 is a code-point or a cons of code-points specifying a
482 Note that this function uses `openp' to open MAPFILE but ignores
483 `file-name-handler-alist' to avoid running any Lisp code. */
486 load_charset_map_from_file (struct charset
*charset
, Lisp_Object mapfile
, int control_flag
)
488 unsigned min_code
= CHARSET_MIN_CODE (charset
);
489 unsigned max_code
= CHARSET_MAX_CODE (charset
);
493 Lisp_Object suffixes
;
494 struct charset_map_entries
*head
, *entries
;
495 int n_entries
, count
;
498 suffixes
= Fcons (build_string (".map"),
499 Fcons (build_string (".TXT"), Qnil
));
501 count
= SPECPDL_INDEX ();
502 specbind (Qfile_name_handler_alist
, Qnil
);
503 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
504 unbind_to (count
, Qnil
);
506 || ! (fp
= fdopen (fd
, "r")))
507 error ("Failure in loading charset map: %S", SDATA (mapfile
));
509 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
510 large (larger than MAX_ALLOCA). */
511 SAFE_ALLOCA (head
, struct charset_map_entries
*,
512 sizeof (struct charset_map_entries
));
514 memset (entries
, 0, sizeof (struct charset_map_entries
));
524 from
= read_hex (fp
, &eof
);
527 if (getc (fp
) == '-')
528 to
= read_hex (fp
, &eof
);
531 c
= (int) read_hex (fp
, &eof
);
533 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
536 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
538 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
539 sizeof (struct charset_map_entries
));
540 entries
= entries
->next
;
541 memset (entries
, 0, sizeof (struct charset_map_entries
));
543 idx
= n_entries
% 0x10000;
544 entries
->entry
[idx
].from
= from
;
545 entries
->entry
[idx
].to
= to
;
546 entries
->entry
[idx
].c
= c
;
551 load_charset_map (charset
, head
, n_entries
, control_flag
);
556 load_charset_map_from_vector (struct charset
*charset
, Lisp_Object vec
, int control_flag
)
558 unsigned min_code
= CHARSET_MIN_CODE (charset
);
559 unsigned max_code
= CHARSET_MAX_CODE (charset
);
560 struct charset_map_entries
*head
, *entries
;
562 int len
= ASIZE (vec
);
568 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
572 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
573 large (larger than MAX_ALLOCA). */
574 SAFE_ALLOCA (head
, struct charset_map_entries
*,
575 sizeof (struct charset_map_entries
));
577 memset (entries
, 0, sizeof (struct charset_map_entries
));
580 for (i
= 0; i
< len
; i
+= 2)
582 Lisp_Object val
, val2
;
594 from
= XFASTINT (val
);
595 to
= XFASTINT (val2
);
600 from
= to
= XFASTINT (val
);
602 val
= AREF (vec
, i
+ 1);
606 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
609 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
611 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
612 sizeof (struct charset_map_entries
));
613 entries
= entries
->next
;
614 memset (entries
, 0, sizeof (struct charset_map_entries
));
616 idx
= n_entries
% 0x10000;
617 entries
->entry
[idx
].from
= from
;
618 entries
->entry
[idx
].to
= to
;
619 entries
->entry
[idx
].c
= c
;
623 load_charset_map (charset
, head
, n_entries
, control_flag
);
628 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
629 map it is (see the comment of load_charset_map for the detail). */
632 load_charset (struct charset
*charset
, int control_flag
)
636 if (inhibit_load_charset_map
638 && charset
== temp_charset_work
->current
639 && ((control_flag
== 2) == temp_charset_work
->for_encoder
))
642 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
643 map
= CHARSET_MAP (charset
);
644 else if (CHARSET_UNIFIED_P (charset
))
645 map
= CHARSET_UNIFY_MAP (charset
);
647 load_charset_map_from_file (charset
, map
, control_flag
);
649 load_charset_map_from_vector (charset
, map
, control_flag
);
653 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
654 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
657 return (CHARSETP (object
) ? Qt
: Qnil
);
661 void map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
),
662 Lisp_Object function
, Lisp_Object arg
,
663 unsigned from
, unsigned to
);
666 map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
), Lisp_Object function
, Lisp_Object arg
, unsigned int from
, unsigned int to
)
668 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
669 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
674 range
= Fcons (Qnil
, Qnil
);
677 c
= temp_charset_work
->min_char
;
678 stop
= (temp_charset_work
->max_char
< 0x20000
679 ? temp_charset_work
->max_char
: 0xFFFF);
683 int index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
685 if (index
>= from_idx
&& index
<= to_idx
)
687 if (NILP (XCAR (range
)))
688 XSETCAR (range
, make_number (c
));
690 else if (! NILP (XCAR (range
)))
692 XSETCDR (range
, make_number (c
- 1));
694 (*c_function
) (arg
, range
);
696 call2 (function
, range
, arg
);
697 XSETCAR (range
, Qnil
);
701 if (c
== temp_charset_work
->max_char
)
703 if (! NILP (XCAR (range
)))
705 XSETCDR (range
, make_number (c
));
707 (*c_function
) (arg
, range
);
709 call2 (function
, range
, arg
);
714 stop
= temp_charset_work
->max_char
;
722 map_charset_chars (void (*c_function
)(Lisp_Object
, Lisp_Object
), Lisp_Object function
,
723 Lisp_Object arg
, struct charset
*charset
, unsigned from
, unsigned to
)
728 partial
= (from
> CHARSET_MIN_CODE (charset
)
729 || to
< CHARSET_MAX_CODE (charset
));
731 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
733 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
734 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
735 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
736 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
738 if (CHARSET_UNIFIED_P (charset
))
740 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
741 load_charset (charset
, 2);
742 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
743 map_char_table_for_charset (c_function
, function
,
744 CHARSET_DEUNIFIER (charset
), arg
,
745 partial
? charset
: NULL
, from
, to
);
747 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
750 range
= Fcons (make_number (from_c
), make_number (to_c
));
752 (*c_function
) (arg
, range
);
754 call2 (function
, range
, arg
);
756 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
758 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
759 load_charset (charset
, 2);
760 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
761 map_char_table_for_charset (c_function
, function
,
762 CHARSET_ENCODER (charset
), arg
,
763 partial
? charset
: NULL
, from
, to
);
765 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
767 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
769 Lisp_Object subset_info
;
772 subset_info
= CHARSET_SUBSET (charset
);
773 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
774 offset
= XINT (AREF (subset_info
, 3));
776 if (from
< XFASTINT (AREF (subset_info
, 1)))
777 from
= XFASTINT (AREF (subset_info
, 1));
779 if (to
> XFASTINT (AREF (subset_info
, 2)))
780 to
= XFASTINT (AREF (subset_info
, 2));
781 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
783 else /* i.e. CHARSET_METHOD_SUPERSET */
787 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
788 parents
= XCDR (parents
))
791 unsigned this_from
, this_to
;
793 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
794 offset
= XINT (XCDR (XCAR (parents
)));
795 this_from
= from
> offset
? from
- offset
: 0;
796 this_to
= to
> offset
? to
- offset
: 0;
797 if (this_from
< CHARSET_MIN_CODE (charset
))
798 this_from
= CHARSET_MIN_CODE (charset
);
799 if (this_to
> CHARSET_MAX_CODE (charset
))
800 this_to
= CHARSET_MAX_CODE (charset
);
801 map_charset_chars (c_function
, function
, arg
, charset
,
807 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
808 doc
: /* Call FUNCTION for all characters in CHARSET.
809 FUNCTION is called with an argument RANGE and the optional 3rd
812 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
813 characters contained in CHARSET.
815 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
816 range of code points (in CHARSET) of target characters. */)
817 (Lisp_Object function
, Lisp_Object charset
, Lisp_Object arg
, Lisp_Object from_code
, Lisp_Object to_code
)
822 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
823 if (NILP (from_code
))
824 from
= CHARSET_MIN_CODE (cs
);
827 CHECK_NATNUM (from_code
);
828 from
= XINT (from_code
);
829 if (from
< CHARSET_MIN_CODE (cs
))
830 from
= CHARSET_MIN_CODE (cs
);
833 to
= CHARSET_MAX_CODE (cs
);
836 CHECK_NATNUM (to_code
);
838 if (to
> CHARSET_MAX_CODE (cs
))
839 to
= CHARSET_MAX_CODE (cs
);
841 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
846 /* Define a charset according to the arguments. The Nth argument is
847 the Nth attribute of the charset (the last attribute `charset-id'
848 is not included). See the docstring of `define-charset' for the
851 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
852 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
853 doc
: /* For internal use only.
854 usage: (define-charset-internal ...) */)
855 (int nargs
, Lisp_Object
*args
)
857 /* Charset attr vector. */
861 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
863 struct charset charset
;
866 int new_definition_p
;
869 if (nargs
!= charset_arg_max
)
870 return Fsignal (Qwrong_number_of_arguments
,
871 Fcons (intern ("define-charset-internal"),
872 make_number (nargs
)));
874 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
876 CHECK_SYMBOL (args
[charset_arg_name
]);
877 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
879 val
= args
[charset_arg_code_space
];
880 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
882 int min_byte
, max_byte
;
884 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
885 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
886 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
887 error ("Invalid :code-space value");
888 charset
.code_space
[i
* 4] = min_byte
;
889 charset
.code_space
[i
* 4 + 1] = max_byte
;
890 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
891 nchars
*= charset
.code_space
[i
* 4 + 2];
892 charset
.code_space
[i
* 4 + 3] = nchars
;
897 val
= args
[charset_arg_dimension
];
899 charset
.dimension
= dimension
;
903 charset
.dimension
= XINT (val
);
904 if (charset
.dimension
< 1 || charset
.dimension
> 4)
905 args_out_of_range_3 (val
, make_number (1), make_number (4));
908 charset
.code_linear_p
909 = (charset
.dimension
== 1
910 || (charset
.code_space
[2] == 256
911 && (charset
.dimension
== 2
912 || (charset
.code_space
[6] == 256
913 && (charset
.dimension
== 3
914 || charset
.code_space
[10] == 256)))));
916 if (! charset
.code_linear_p
)
918 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
919 memset (charset
.code_space_mask
, 0, 256);
920 for (i
= 0; i
< 4; i
++)
921 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
923 charset
.code_space_mask
[j
] |= (1 << i
);
926 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
928 charset
.min_code
= (charset
.code_space
[0]
929 | (charset
.code_space
[4] << 8)
930 | (charset
.code_space
[8] << 16)
931 | (charset
.code_space
[12] << 24));
932 charset
.max_code
= (charset
.code_space
[1]
933 | (charset
.code_space
[5] << 8)
934 | (charset
.code_space
[9] << 16)
935 | (charset
.code_space
[13] << 24));
936 charset
.char_index_offset
= 0;
938 val
= args
[charset_arg_min_code
];
948 CHECK_NUMBER_CAR (val
);
949 CHECK_NUMBER_CDR (val
);
950 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
952 if (code
< charset
.min_code
953 || code
> charset
.max_code
)
954 args_out_of_range_3 (make_number (charset
.min_code
),
955 make_number (charset
.max_code
), val
);
956 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
957 charset
.min_code
= code
;
960 val
= args
[charset_arg_max_code
];
970 CHECK_NUMBER_CAR (val
);
971 CHECK_NUMBER_CDR (val
);
972 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
974 if (code
< charset
.min_code
975 || code
> charset
.max_code
)
976 args_out_of_range_3 (make_number (charset
.min_code
),
977 make_number (charset
.max_code
), val
);
978 charset
.max_code
= code
;
981 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
983 val
= args
[charset_arg_invalid_code
];
986 if (charset
.min_code
> 0)
987 charset
.invalid_code
= 0;
990 XSETINT (val
, charset
.max_code
+ 1);
991 if (XINT (val
) == charset
.max_code
+ 1)
992 charset
.invalid_code
= charset
.max_code
+ 1;
994 error ("Attribute :invalid-code must be specified");
1000 charset
.invalid_code
= XFASTINT (val
);
1003 val
= args
[charset_arg_iso_final
];
1005 charset
.iso_final
= -1;
1009 if (XINT (val
) < '0' || XINT (val
) > 127)
1010 error ("Invalid iso-final-char: %d", XINT (val
));
1011 charset
.iso_final
= XINT (val
);
1014 val
= args
[charset_arg_iso_revision
];
1016 charset
.iso_revision
= -1;
1020 if (XINT (val
) > 63)
1021 args_out_of_range (make_number (63), val
);
1022 charset
.iso_revision
= XINT (val
);
1025 val
= args
[charset_arg_emacs_mule_id
];
1027 charset
.emacs_mule_id
= -1;
1031 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
1032 error ("Invalid emacs-mule-id: %d", XINT (val
));
1033 charset
.emacs_mule_id
= XINT (val
);
1036 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1038 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1040 charset
.unified_p
= 0;
1042 memset (charset
.fast_map
, 0, sizeof (charset
.fast_map
));
1044 if (! NILP (args
[charset_arg_code_offset
]))
1046 val
= args
[charset_arg_code_offset
];
1049 charset
.method
= CHARSET_METHOD_OFFSET
;
1050 charset
.code_offset
= XINT (val
);
1052 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1053 charset
.min_char
= i
+ charset
.code_offset
;
1054 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1055 charset
.max_char
= i
+ charset
.code_offset
;
1056 if (charset
.max_char
> MAX_CHAR
)
1057 error ("Unsupported max char: %d", charset
.max_char
);
1059 i
= (charset
.min_char
>> 7) << 7;
1060 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1061 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1062 i
= (i
>> 12) << 12;
1063 for (; i
<= charset
.max_char
; i
+= 0x1000)
1064 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1065 if (charset
.code_offset
== 0 && charset
.max_char
>= 0x80)
1066 charset
.ascii_compatible_p
= 1;
1068 else if (! NILP (args
[charset_arg_map
]))
1070 val
= args
[charset_arg_map
];
1071 ASET (attrs
, charset_map
, val
);
1072 charset
.method
= CHARSET_METHOD_MAP
;
1074 else if (! NILP (args
[charset_arg_subset
]))
1077 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1078 struct charset
*parent_charset
;
1080 val
= args
[charset_arg_subset
];
1081 parent
= Fcar (val
);
1082 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1083 parent_min_code
= Fnth (make_number (1), val
);
1084 CHECK_NATNUM (parent_min_code
);
1085 parent_max_code
= Fnth (make_number (2), val
);
1086 CHECK_NATNUM (parent_max_code
);
1087 parent_code_offset
= Fnth (make_number (3), val
);
1088 CHECK_NUMBER (parent_code_offset
);
1089 val
= Fmake_vector (make_number (4), Qnil
);
1090 ASET (val
, 0, make_number (parent_charset
->id
));
1091 ASET (val
, 1, parent_min_code
);
1092 ASET (val
, 2, parent_max_code
);
1093 ASET (val
, 3, parent_code_offset
);
1094 ASET (attrs
, charset_subset
, val
);
1096 charset
.method
= CHARSET_METHOD_SUBSET
;
1097 /* Here, we just copy the parent's fast_map. It's not accurate,
1098 but at least it works for quickly detecting which character
1099 DOESN'T belong to this charset. */
1100 for (i
= 0; i
< 190; i
++)
1101 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1103 /* We also copy these for parents. */
1104 charset
.min_char
= parent_charset
->min_char
;
1105 charset
.max_char
= parent_charset
->max_char
;
1107 else if (! NILP (args
[charset_arg_superset
]))
1109 val
= args
[charset_arg_superset
];
1110 charset
.method
= CHARSET_METHOD_SUPERSET
;
1111 val
= Fcopy_sequence (val
);
1112 ASET (attrs
, charset_superset
, val
);
1114 charset
.min_char
= MAX_CHAR
;
1115 charset
.max_char
= 0;
1116 for (; ! NILP (val
); val
= Fcdr (val
))
1118 Lisp_Object elt
, car_part
, cdr_part
;
1119 int this_id
, offset
;
1120 struct charset
*this_charset
;
1125 car_part
= XCAR (elt
);
1126 cdr_part
= XCDR (elt
);
1127 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1128 CHECK_NUMBER (cdr_part
);
1129 offset
= XINT (cdr_part
);
1133 CHECK_CHARSET_GET_ID (elt
, this_id
);
1136 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1138 this_charset
= CHARSET_FROM_ID (this_id
);
1139 if (charset
.min_char
> this_charset
->min_char
)
1140 charset
.min_char
= this_charset
->min_char
;
1141 if (charset
.max_char
< this_charset
->max_char
)
1142 charset
.max_char
= this_charset
->max_char
;
1143 for (i
= 0; i
< 190; i
++)
1144 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1148 error ("None of :code-offset, :map, :parents are specified");
1150 val
= args
[charset_arg_unify_map
];
1151 if (! NILP (val
) && !STRINGP (val
))
1153 ASET (attrs
, charset_unify_map
, val
);
1155 CHECK_LIST (args
[charset_arg_plist
]);
1156 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1158 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1160 if (charset
.hash_index
>= 0)
1162 new_definition_p
= 0;
1163 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1164 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1168 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1170 if (charset_table_used
== charset_table_size
)
1172 struct charset
*new_table
1173 = (struct charset
*) xmalloc (sizeof (struct charset
)
1174 * (charset_table_size
+ 16));
1175 memcpy (new_table
, charset_table
,
1176 sizeof (struct charset
) * charset_table_size
);
1177 charset_table_size
+= 16;
1178 charset_table
= new_table
;
1180 id
= charset_table_used
++;
1181 new_definition_p
= 1;
1184 ASET (attrs
, charset_id
, make_number (id
));
1186 charset_table
[id
] = charset
;
1188 if (charset
.method
== CHARSET_METHOD_MAP
)
1190 load_charset (&charset
, 0);
1191 charset_table
[id
] = charset
;
1194 if (charset
.iso_final
>= 0)
1196 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1197 charset
.iso_final
) = id
;
1198 if (new_definition_p
)
1199 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1200 Fcons (make_number (id
), Qnil
));
1201 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1202 charset_jisx0201_roman
= id
;
1203 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1204 charset_jisx0208_1978
= id
;
1205 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1206 charset_jisx0208
= id
;
1207 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id
)
1208 charset_ksc5601
= id
;
1211 if (charset
.emacs_mule_id
>= 0)
1213 emacs_mule_charset
[charset
.emacs_mule_id
] = id
;
1214 if (charset
.emacs_mule_id
< 0xA0)
1215 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1217 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1218 if (new_definition_p
)
1219 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1220 Fcons (make_number (id
), Qnil
));
1223 if (new_definition_p
)
1225 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1226 if (charset
.supplementary_p
)
1227 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1228 Fcons (make_number (id
), Qnil
));
1233 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1235 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1237 if (cs
->supplementary_p
)
1240 if (EQ (tail
, Vcharset_ordered_list
))
1241 Vcharset_ordered_list
= Fcons (make_number (id
),
1242 Vcharset_ordered_list
);
1243 else if (NILP (tail
))
1244 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1245 Fcons (make_number (id
), Qnil
));
1248 val
= Fcons (XCAR (tail
), XCDR (tail
));
1249 XSETCDR (tail
, val
);
1250 XSETCAR (tail
, make_number (id
));
1253 charset_ordered_list_tick
++;
1260 /* Same as Fdefine_charset_internal but arguments are more convenient
1261 to call from C (typically in syms_of_charset). This can define a
1262 charset of `offset' method only. Return the ID of the new
1266 define_charset_internal (Lisp_Object name
,
1268 const unsigned char *code_space
,
1269 unsigned min_code
, unsigned max_code
,
1270 int iso_final
, int iso_revision
, int emacs_mule_id
,
1271 int ascii_compatible
, int supplementary
,
1274 Lisp_Object args
[charset_arg_max
];
1275 Lisp_Object plist
[14];
1279 args
[charset_arg_name
] = name
;
1280 args
[charset_arg_dimension
] = make_number (dimension
);
1281 val
= Fmake_vector (make_number (8), make_number (0));
1282 for (i
= 0; i
< 8; i
++)
1283 ASET (val
, i
, make_number (code_space
[i
]));
1284 args
[charset_arg_code_space
] = val
;
1285 args
[charset_arg_min_code
] = make_number (min_code
);
1286 args
[charset_arg_max_code
] = make_number (max_code
);
1287 args
[charset_arg_iso_final
]
1288 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1289 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1290 args
[charset_arg_emacs_mule_id
]
1291 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1292 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1293 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1294 args
[charset_arg_invalid_code
] = Qnil
;
1295 args
[charset_arg_code_offset
] = make_number (code_offset
);
1296 args
[charset_arg_map
] = Qnil
;
1297 args
[charset_arg_subset
] = Qnil
;
1298 args
[charset_arg_superset
] = Qnil
;
1299 args
[charset_arg_unify_map
] = Qnil
;
1301 plist
[0] = intern_c_string (":name");
1302 plist
[1] = args
[charset_arg_name
];
1303 plist
[2] = intern_c_string (":dimension");
1304 plist
[3] = args
[charset_arg_dimension
];
1305 plist
[4] = intern_c_string (":code-space");
1306 plist
[5] = args
[charset_arg_code_space
];
1307 plist
[6] = intern_c_string (":iso-final-char");
1308 plist
[7] = args
[charset_arg_iso_final
];
1309 plist
[8] = intern_c_string (":emacs-mule-id");
1310 plist
[9] = args
[charset_arg_emacs_mule_id
];
1311 plist
[10] = intern_c_string (":ascii-compatible-p");
1312 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1313 plist
[12] = intern_c_string (":code-offset");
1314 plist
[13] = args
[charset_arg_code_offset
];
1316 args
[charset_arg_plist
] = Flist (14, plist
);
1317 Fdefine_charset_internal (charset_arg_max
, args
);
1319 return XINT (CHARSET_SYMBOL_ID (name
));
1323 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1324 Sdefine_charset_alias
, 2, 2, 0,
1325 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1326 (Lisp_Object alias
, Lisp_Object charset
)
1330 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1331 Fputhash (alias
, attr
, Vcharset_hash_table
);
1332 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1337 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1338 doc
: /* Return the property list of CHARSET. */)
1339 (Lisp_Object charset
)
1343 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1344 return CHARSET_ATTR_PLIST (attrs
);
1348 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1349 doc
: /* Set CHARSET's property list to PLIST. */)
1350 (Lisp_Object charset
, Lisp_Object plist
)
1354 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1355 CHARSET_ATTR_PLIST (attrs
) = plist
;
1360 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1361 doc
: /* Unify characters of CHARSET with Unicode.
1362 This means reading the relevant file and installing the table defined
1363 by CHARSET's `:unify-map' property.
1365 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1366 the same meaning as the `:unify-map' attribute in the function
1367 `define-charset' (which see).
1369 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1370 (Lisp_Object charset
, Lisp_Object unify_map
, Lisp_Object deunify
)
1375 CHECK_CHARSET_GET_ID (charset
, id
);
1376 cs
= CHARSET_FROM_ID (id
);
1378 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1379 : ! CHARSET_UNIFIED_P (cs
))
1382 CHARSET_UNIFIED_P (cs
) = 0;
1385 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1386 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1387 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1388 if (NILP (unify_map
))
1389 unify_map
= CHARSET_UNIFY_MAP (cs
);
1392 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1393 signal_error ("Bad unify-map", unify_map
);
1394 CHARSET_UNIFY_MAP (cs
) = unify_map
;
1396 if (NILP (Vchar_unify_table
))
1397 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1398 char_table_set_range (Vchar_unify_table
,
1399 cs
->min_char
, cs
->max_char
, charset
);
1400 CHARSET_UNIFIED_P (cs
) = 1;
1402 else if (CHAR_TABLE_P (Vchar_unify_table
))
1404 int min_code
= CHARSET_MIN_CODE (cs
);
1405 int max_code
= CHARSET_MAX_CODE (cs
);
1406 int min_char
= DECODE_CHAR (cs
, min_code
);
1407 int max_char
= DECODE_CHAR (cs
, max_code
);
1409 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1415 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1416 Sget_unused_iso_final_char
, 2, 2, 0,
1418 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1419 DIMENSION is the number of bytes to represent a character: 1 or 2.
1420 CHARS is the number of characters in a dimension: 94 or 96.
1422 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1423 If there's no unused final char for the specified kind of charset,
1425 (Lisp_Object dimension
, Lisp_Object chars
)
1429 CHECK_NUMBER (dimension
);
1430 CHECK_NUMBER (chars
);
1431 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1432 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1433 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1434 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1435 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1436 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1438 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1442 check_iso_charset_parameter (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
)
1444 CHECK_NATNUM (dimension
);
1445 CHECK_NATNUM (chars
);
1446 CHECK_NATNUM (final_char
);
1448 if (XINT (dimension
) > 3)
1449 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1450 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1451 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1452 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1453 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1457 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1459 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1461 On decoding by an ISO-2022 base coding system, when a charset
1462 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1463 if CHARSET is designated instead. */)
1464 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
, Lisp_Object charset
)
1469 CHECK_CHARSET_GET_ID (charset
, id
);
1470 check_iso_charset_parameter (dimension
, chars
, final_char
);
1471 chars_flag
= XINT (chars
) == 96;
1472 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1477 /* Return information about charsets in the text at PTR of NBYTES
1478 bytes, which are NCHARS characters. The value is:
1480 0: Each character is represented by one byte. This is always
1481 true for a unibyte string. For a multibyte string, true if
1482 it contains only ASCII characters.
1484 1: No charsets other than ascii, control-1, and latin-1 are
1491 string_xstring_p (Lisp_Object string
)
1493 const unsigned char *p
= SDATA (string
);
1494 const unsigned char *endp
= p
+ SBYTES (string
);
1496 if (SCHARS (string
) == SBYTES (string
))
1501 int c
= STRING_CHAR_ADVANCE (p
);
1510 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1512 CHARSETS is a vector. If Nth element is non-nil, it means the
1513 charset whose id is N is already found.
1515 It may lookup a translation table TABLE if supplied. */
1518 find_charsets_in_text (const unsigned char *ptr
, EMACS_INT nchars
, EMACS_INT nbytes
, Lisp_Object charsets
, Lisp_Object table
, int multibyte
)
1520 const unsigned char *pend
= ptr
+ nbytes
;
1522 if (nchars
== nbytes
)
1525 ASET (charsets
, charset_ascii
, Qt
);
1532 c
= translate_char (table
, c
);
1533 if (ASCII_BYTE_P (c
))
1534 ASET (charsets
, charset_ascii
, Qt
);
1536 ASET (charsets
, charset_eight_bit
, Qt
);
1543 int c
= STRING_CHAR_ADVANCE (ptr
);
1544 struct charset
*charset
;
1547 c
= translate_char (table
, c
);
1548 charset
= CHAR_CHARSET (c
);
1549 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1554 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1556 doc
: /* Return a list of charsets in the region between BEG and END.
1557 BEG and END are buffer positions.
1558 Optional arg TABLE if non-nil is a translation table to look up.
1560 If the current buffer is unibyte, the returned list may contain
1561 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1562 (Lisp_Object beg
, Lisp_Object end
, Lisp_Object table
)
1564 Lisp_Object charsets
;
1565 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1568 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1570 validate_region (&beg
, &end
);
1571 from
= XFASTINT (beg
);
1572 stop
= to
= XFASTINT (end
);
1574 if (from
< GPT
&& GPT
< to
)
1577 stop_byte
= GPT_BYTE
;
1580 stop_byte
= CHAR_TO_BYTE (stop
);
1582 from_byte
= CHAR_TO_BYTE (from
);
1584 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1587 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1588 stop_byte
- from_byte
, charsets
, table
,
1592 from
= stop
, from_byte
= stop_byte
;
1593 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1600 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1601 if (!NILP (AREF (charsets
, i
)))
1602 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1606 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1608 doc
: /* Return a list of charsets in STR.
1609 Optional arg TABLE if non-nil is a translation table to look up.
1611 If STR is unibyte, the returned list may contain
1612 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1613 (Lisp_Object str
, Lisp_Object table
)
1615 Lisp_Object charsets
;
1621 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1622 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1624 STRING_MULTIBYTE (str
));
1626 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1627 if (!NILP (AREF (charsets
, i
)))
1628 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1634 /* Return a unified character code for C (>= 0x110000). VAL is a
1635 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1638 maybe_unify_char (int c
, Lisp_Object val
)
1640 struct charset
*charset
;
1647 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1648 load_charset (charset
, 1);
1649 if (! inhibit_load_charset_map
)
1651 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1657 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1658 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1667 /* Return a character correponding to the code-point CODE of
1671 decode_char (struct charset
*charset
, unsigned int code
)
1674 enum charset_method method
= CHARSET_METHOD (charset
);
1676 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1679 if (method
== CHARSET_METHOD_SUBSET
)
1681 Lisp_Object subset_info
;
1683 subset_info
= CHARSET_SUBSET (charset
);
1684 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1685 code
-= XINT (AREF (subset_info
, 3));
1686 if (code
< XFASTINT (AREF (subset_info
, 1))
1687 || code
> XFASTINT (AREF (subset_info
, 2)))
1690 c
= DECODE_CHAR (charset
, code
);
1692 else if (method
== CHARSET_METHOD_SUPERSET
)
1694 Lisp_Object parents
;
1696 parents
= CHARSET_SUPERSET (charset
);
1698 for (; CONSP (parents
); parents
= XCDR (parents
))
1700 int id
= XINT (XCAR (XCAR (parents
)));
1701 int code_offset
= XINT (XCDR (XCAR (parents
)));
1702 unsigned this_code
= code
- code_offset
;
1704 charset
= CHARSET_FROM_ID (id
);
1705 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1711 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1715 if (method
== CHARSET_METHOD_MAP
)
1717 Lisp_Object decoder
;
1719 decoder
= CHARSET_DECODER (charset
);
1720 if (! VECTORP (decoder
))
1722 load_charset (charset
, 1);
1723 decoder
= CHARSET_DECODER (charset
);
1725 if (VECTORP (decoder
))
1726 c
= XINT (AREF (decoder
, char_index
));
1728 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1730 else /* method == CHARSET_METHOD_OFFSET */
1732 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1733 if (CHARSET_UNIFIED_P (charset
)
1734 && c
> MAX_UNICODE_CHAR
)
1735 MAYBE_UNIFY_CHAR (c
);
1742 /* Variable used temporarily by the macro ENCODE_CHAR. */
1743 Lisp_Object charset_work
;
1745 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1746 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1747 use CHARSET's strict_max_char instead of max_char. */
1750 encode_char (struct charset
*charset
, int c
)
1753 enum charset_method method
= CHARSET_METHOD (charset
);
1755 if (CHARSET_UNIFIED_P (charset
))
1757 Lisp_Object deunifier
;
1758 int code_index
= -1;
1760 deunifier
= CHARSET_DEUNIFIER (charset
);
1761 if (! CHAR_TABLE_P (deunifier
))
1763 load_charset (charset
, 2);
1764 deunifier
= CHARSET_DEUNIFIER (charset
);
1766 if (CHAR_TABLE_P (deunifier
))
1768 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1770 if (INTEGERP (deunified
))
1771 code_index
= XINT (deunified
);
1775 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1777 if (code_index
>= 0)
1778 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1781 if (method
== CHARSET_METHOD_SUBSET
)
1783 Lisp_Object subset_info
;
1784 struct charset
*this_charset
;
1786 subset_info
= CHARSET_SUBSET (charset
);
1787 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1788 code
= ENCODE_CHAR (this_charset
, c
);
1789 if (code
== CHARSET_INVALID_CODE (this_charset
)
1790 || code
< XFASTINT (AREF (subset_info
, 1))
1791 || code
> XFASTINT (AREF (subset_info
, 2)))
1792 return CHARSET_INVALID_CODE (charset
);
1793 code
+= XINT (AREF (subset_info
, 3));
1797 if (method
== CHARSET_METHOD_SUPERSET
)
1799 Lisp_Object parents
;
1801 parents
= CHARSET_SUPERSET (charset
);
1802 for (; CONSP (parents
); parents
= XCDR (parents
))
1804 int id
= XINT (XCAR (XCAR (parents
)));
1805 int code_offset
= XINT (XCDR (XCAR (parents
)));
1806 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1808 code
= ENCODE_CHAR (this_charset
, c
);
1809 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1810 return code
+ code_offset
;
1812 return CHARSET_INVALID_CODE (charset
);
1815 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1816 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1817 return CHARSET_INVALID_CODE (charset
);
1819 if (method
== CHARSET_METHOD_MAP
)
1821 Lisp_Object encoder
;
1824 encoder
= CHARSET_ENCODER (charset
);
1825 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1827 load_charset (charset
, 2);
1828 encoder
= CHARSET_ENCODER (charset
);
1830 if (CHAR_TABLE_P (encoder
))
1832 val
= CHAR_TABLE_REF (encoder
, c
);
1834 return CHARSET_INVALID_CODE (charset
);
1836 if (! CHARSET_COMPACT_CODES_P (charset
))
1837 code
= INDEX_TO_CODE_POINT (charset
, code
);
1841 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1842 code
= INDEX_TO_CODE_POINT (charset
, code
);
1845 else /* method == CHARSET_METHOD_OFFSET */
1847 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1849 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1856 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1857 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1858 Return nil if CODE-POINT is not valid in CHARSET.
1860 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1862 Optional argument RESTRICTION specifies a way to map the pair of CCS
1863 and CODE-POINT to a character. Currently not supported and just ignored. */)
1864 (Lisp_Object charset
, Lisp_Object code_point
, Lisp_Object restriction
)
1868 struct charset
*charsetp
;
1870 CHECK_CHARSET_GET_ID (charset
, id
);
1871 if (CONSP (code_point
))
1873 CHECK_NATNUM_CAR (code_point
);
1874 CHECK_NATNUM_CDR (code_point
);
1875 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1879 CHECK_NATNUM (code_point
);
1880 code
= XINT (code_point
);
1882 charsetp
= CHARSET_FROM_ID (id
);
1883 c
= DECODE_CHAR (charsetp
, code
);
1884 return (c
>= 0 ? make_number (c
) : Qnil
);
1888 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1889 doc
: /* Encode the character CH into a code-point of CHARSET.
1890 Return nil if CHARSET doesn't include CH.
1892 Optional argument RESTRICTION specifies a way to map CH to a
1893 code-point in CCS. Currently not supported and just ignored. */)
1894 (Lisp_Object ch
, Lisp_Object charset
, Lisp_Object restriction
)
1898 struct charset
*charsetp
;
1900 CHECK_CHARSET_GET_ID (charset
, id
);
1902 charsetp
= CHARSET_FROM_ID (id
);
1903 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1904 if (code
== CHARSET_INVALID_CODE (charsetp
))
1906 if (code
> 0x7FFFFFF)
1907 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1908 return make_number (code
);
1912 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1914 /* Return a character of CHARSET whose position codes are CODEn.
1916 CODE1 through CODE4 are optional, but if you don't supply sufficient
1917 position codes, it is assumed that the minimum code in each dimension
1919 (Lisp_Object charset
, Lisp_Object code1
, Lisp_Object code2
, Lisp_Object code3
, Lisp_Object code4
)
1922 struct charset
*charsetp
;
1926 CHECK_CHARSET_GET_ID (charset
, id
);
1927 charsetp
= CHARSET_FROM_ID (id
);
1929 dimension
= CHARSET_DIMENSION (charsetp
);
1931 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1932 ? 0 : CHARSET_MIN_CODE (charsetp
));
1935 CHECK_NATNUM (code1
);
1936 if (XFASTINT (code1
) >= 0x100)
1937 args_out_of_range (make_number (0xFF), code1
);
1938 code
= XFASTINT (code1
);
1944 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1947 CHECK_NATNUM (code2
);
1948 if (XFASTINT (code2
) >= 0x100)
1949 args_out_of_range (make_number (0xFF), code2
);
1950 code
|= XFASTINT (code2
);
1957 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1960 CHECK_NATNUM (code3
);
1961 if (XFASTINT (code3
) >= 0x100)
1962 args_out_of_range (make_number (0xFF), code3
);
1963 code
|= XFASTINT (code3
);
1970 code
|= charsetp
->code_space
[0];
1973 CHECK_NATNUM (code4
);
1974 if (XFASTINT (code4
) >= 0x100)
1975 args_out_of_range (make_number (0xFF), code4
);
1976 code
|= XFASTINT (code4
);
1983 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
1985 c
= DECODE_CHAR (charsetp
, code
);
1987 error ("Invalid code(s)");
1988 return make_number (c
);
1992 /* Return the first charset in CHARSET_LIST that contains C.
1993 CHARSET_LIST is a list of charset IDs. If it is nil, use
1994 Vcharset_ordered_list. */
1997 char_charset (int c
, Lisp_Object charset_list
, unsigned int *code_return
)
2001 if (NILP (charset_list
))
2002 charset_list
= Vcharset_ordered_list
;
2006 while (CONSP (charset_list
))
2008 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
2009 unsigned code
= ENCODE_CHAR (charset
, c
);
2011 if (code
!= CHARSET_INVALID_CODE (charset
))
2014 *code_return
= code
;
2017 charset_list
= XCDR (charset_list
);
2019 && c
<= MAX_UNICODE_CHAR
2020 && EQ (charset_list
, Vcharset_non_preferred_head
))
2021 return CHARSET_FROM_ID (charset_unicode
);
2023 return (maybe_null
? NULL
2024 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2025 : CHARSET_FROM_ID (charset_eight_bit
));
2029 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2031 /*Return list of charset and one to four position-codes of CH.
2032 The charset is decided by the current priority order of charsets.
2033 A position-code is a byte value of each dimension of the code-point of
2034 CH in the charset. */)
2037 struct charset
*charset
;
2042 CHECK_CHARACTER (ch
);
2044 charset
= CHAR_CHARSET (c
);
2047 code
= ENCODE_CHAR (charset
, c
);
2048 if (code
== CHARSET_INVALID_CODE (charset
))
2050 dimension
= CHARSET_DIMENSION (charset
);
2051 for (val
= Qnil
; dimension
> 0; dimension
--)
2053 val
= Fcons (make_number (code
& 0xFF), val
);
2056 return Fcons (CHARSET_NAME (charset
), val
);
2060 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2061 doc
: /* Return the charset of highest priority that contains CH.
2062 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2063 from which to find the charset. It may also be a coding system. In
2064 that case, find the charset from what supported by that coding system. */)
2065 (Lisp_Object ch
, Lisp_Object restriction
)
2067 struct charset
*charset
;
2069 CHECK_CHARACTER (ch
);
2070 if (NILP (restriction
))
2071 charset
= CHAR_CHARSET (XINT (ch
));
2074 if (CONSP (restriction
))
2076 int c
= XFASTINT (ch
);
2078 for (; CONSP (restriction
); restriction
= XCDR (restriction
))
2080 struct charset
*charset
;
2082 CHECK_CHARSET_GET_CHARSET (XCAR (restriction
), charset
);
2083 if (ENCODE_CHAR (charset
, c
) != CHARSET_INVALID_CODE (charset
))
2084 return XCAR (restriction
);
2088 restriction
= coding_system_charset_list (restriction
);
2089 charset
= char_charset (XINT (ch
), restriction
, NULL
);
2093 return (CHARSET_NAME (charset
));
2097 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2099 Return charset of a character in the current buffer at position POS.
2100 If POS is nil, it defauls to the current point.
2101 If POS is out of range, the value is nil. */)
2105 struct charset
*charset
;
2107 ch
= Fchar_after (pos
);
2108 if (! INTEGERP (ch
))
2110 charset
= CHAR_CHARSET (XINT (ch
));
2111 return (CHARSET_NAME (charset
));
2115 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2117 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2119 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2120 by their DIMENSION, CHARS, and FINAL-CHAR,
2121 whereas Emacs distinguishes them by charset symbol.
2122 See the documentation of the function `charset-info' for the meanings of
2123 DIMENSION, CHARS, and FINAL-CHAR. */)
2124 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
)
2129 check_iso_charset_parameter (dimension
, chars
, final_char
);
2130 chars_flag
= XFASTINT (chars
) == 96;
2131 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
2132 XFASTINT (final_char
));
2133 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2137 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2141 Clear temporary charset mapping tables.
2142 It should be called only from temacs invoked for dumping. */)
2145 if (temp_charset_work
)
2147 free (temp_charset_work
);
2148 temp_charset_work
= NULL
;
2151 if (CHAR_TABLE_P (Vchar_unify_table
))
2152 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2157 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2158 Scharset_priority_list
, 0, 1, 0,
2159 doc
: /* Return the list of charsets ordered by priority.
2160 HIGHESTP non-nil means just return the highest priority one. */)
2161 (Lisp_Object highestp
)
2163 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2165 if (!NILP (highestp
))
2166 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2168 while (!NILP (list
))
2170 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2173 return Fnreverse (val
);
2176 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2178 doc
: /* Assign higher priority to the charsets given as arguments.
2179 usage: (set-charset-priority &rest charsets) */)
2180 (int nargs
, Lisp_Object
*args
)
2182 Lisp_Object new_head
, old_list
, arglist
[2];
2183 Lisp_Object list_2022
, list_emacs_mule
;
2186 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2188 for (i
= 0; i
< nargs
; i
++)
2190 CHECK_CHARSET_GET_ID (args
[i
], id
);
2191 if (! NILP (Fmemq (make_number (id
), old_list
)))
2193 old_list
= Fdelq (make_number (id
), old_list
);
2194 new_head
= Fcons (make_number (id
), new_head
);
2197 arglist
[0] = Fnreverse (new_head
);
2198 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2199 Vcharset_ordered_list
= Fnconc (2, arglist
);
2200 charset_ordered_list_tick
++;
2202 charset_unibyte
= -1;
2203 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2204 CONSP (old_list
); old_list
= XCDR (old_list
))
2206 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2207 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2208 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2209 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2210 if (charset_unibyte
< 0)
2212 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (old_list
)));
2214 if (CHARSET_DIMENSION (charset
) == 1
2215 && CHARSET_ASCII_COMPATIBLE_P (charset
)
2216 && CHARSET_MAX_CHAR (charset
) >= 0x80)
2217 charset_unibyte
= CHARSET_ID (charset
);
2220 Viso_2022_charset_list
= Fnreverse (list_2022
);
2221 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2222 if (charset_unibyte
< 0)
2223 charset_unibyte
= charset_iso_8859_1
;
2228 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2230 doc
: /* Internal use only.
2231 Return charset identification number of CHARSET. */)
2232 (Lisp_Object charset
)
2236 CHECK_CHARSET_GET_ID (charset
, id
);
2237 return make_number (id
);
2240 struct charset_sort_data
2242 Lisp_Object charset
;
2248 charset_compare (const void *d1
, const void *d2
)
2250 const struct charset_sort_data
*data1
= d1
, *data2
= d2
;
2251 return (data1
->priority
- data2
->priority
);
2254 DEFUN ("sort-charsets", Fsort_charsets
, Ssort_charsets
, 1, 1, 0,
2255 doc
: /* Sort charset list CHARSETS by a priority of each charset.
2256 Return the sorted list. CHARSETS is modified by side effects.
2257 See also `charset-priority-list' and `set-charset-priority'. */)
2258 (Lisp_Object charsets
)
2260 Lisp_Object len
= Flength (charsets
);
2261 int n
= XFASTINT (len
), i
, j
, done
;
2262 Lisp_Object tail
, elt
, attrs
;
2263 struct charset_sort_data
*sort_data
;
2264 int id
, min_id
, max_id
;
2269 SAFE_ALLOCA (sort_data
, struct charset_sort_data
*, sizeof (*sort_data
) * n
);
2270 for (tail
= charsets
, i
= 0; CONSP (tail
); tail
= XCDR (tail
), i
++)
2273 CHECK_CHARSET_GET_ATTR (elt
, attrs
);
2274 sort_data
[i
].charset
= elt
;
2275 sort_data
[i
].id
= id
= XINT (CHARSET_ATTR_ID (attrs
));
2277 min_id
= max_id
= id
;
2278 else if (id
< min_id
)
2280 else if (id
> max_id
)
2283 for (done
= 0, tail
= Vcharset_ordered_list
, i
= 0;
2284 done
< n
&& CONSP (tail
); tail
= XCDR (tail
), i
++)
2287 id
= XFASTINT (elt
);
2288 if (id
>= min_id
&& id
<= max_id
)
2289 for (j
= 0; j
< n
; j
++)
2290 if (sort_data
[j
].id
== id
)
2292 sort_data
[j
].priority
= i
;
2296 qsort (sort_data
, n
, sizeof *sort_data
, charset_compare
);
2297 for (i
= 0, tail
= charsets
; CONSP (tail
); tail
= XCDR (tail
), i
++)
2298 XSETCAR (tail
, sort_data
[i
].charset
);
2307 Lisp_Object tempdir
;
2308 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2309 if (access ((char *) SDATA (tempdir
), 0) < 0)
2311 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2312 Emacs will not function correctly without the character map files.\n\
2313 Please check your installation!\n",
2315 /* TODO should this be a fatal error? (Bug#909) */
2318 Vcharset_map_path
= Fcons (tempdir
, Qnil
);
2323 init_charset_once (void)
2327 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2328 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2329 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2330 iso_charset_table
[i
][j
][k
] = -1;
2332 for (i
= 0; i
< 256; i
++)
2333 emacs_mule_charset
[i
] = -1;
2335 charset_jisx0201_roman
= -1;
2336 charset_jisx0208_1978
= -1;
2337 charset_jisx0208
= -1;
2338 charset_ksc5601
= -1;
2344 syms_of_charset (void)
2346 DEFSYM (Qcharsetp
, "charsetp");
2348 DEFSYM (Qascii
, "ascii");
2349 DEFSYM (Qunicode
, "unicode");
2350 DEFSYM (Qemacs
, "emacs");
2351 DEFSYM (Qeight_bit
, "eight-bit");
2352 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2357 staticpro (&Vcharset_ordered_list
);
2358 Vcharset_ordered_list
= Qnil
;
2360 staticpro (&Viso_2022_charset_list
);
2361 Viso_2022_charset_list
= Qnil
;
2363 staticpro (&Vemacs_mule_charset_list
);
2364 Vemacs_mule_charset_list
= Qnil
;
2366 /* Don't staticpro them here. It's done in syms_of_fns. */
2367 QCtest
= intern_c_string (":test");
2368 Qeq
= intern_c_string ("eq");
2370 staticpro (&Vcharset_hash_table
);
2372 Lisp_Object args
[2];
2375 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2378 charset_table_size
= 128;
2379 charset_table
= ((struct charset
*)
2380 xmalloc (sizeof (struct charset
) * charset_table_size
));
2381 charset_table_used
= 0;
2383 defsubr (&Scharsetp
);
2384 defsubr (&Smap_charset_chars
);
2385 defsubr (&Sdefine_charset_internal
);
2386 defsubr (&Sdefine_charset_alias
);
2387 defsubr (&Scharset_plist
);
2388 defsubr (&Sset_charset_plist
);
2389 defsubr (&Sunify_charset
);
2390 defsubr (&Sget_unused_iso_final_char
);
2391 defsubr (&Sdeclare_equiv_charset
);
2392 defsubr (&Sfind_charset_region
);
2393 defsubr (&Sfind_charset_string
);
2394 defsubr (&Sdecode_char
);
2395 defsubr (&Sencode_char
);
2396 defsubr (&Ssplit_char
);
2397 defsubr (&Smake_char
);
2398 defsubr (&Schar_charset
);
2399 defsubr (&Scharset_after
);
2400 defsubr (&Siso_charset
);
2401 defsubr (&Sclear_charset_maps
);
2402 defsubr (&Scharset_priority_list
);
2403 defsubr (&Sset_charset_priority
);
2404 defsubr (&Scharset_id_internal
);
2405 defsubr (&Ssort_charsets
);
2407 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path
,
2408 doc
: /* *List of directories to search for charset map files. */);
2409 Vcharset_map_path
= Qnil
;
2411 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map
,
2412 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2413 inhibit_load_charset_map
= 0;
2415 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2416 doc
: /* List of all charsets ever defined. */);
2417 Vcharset_list
= Qnil
;
2419 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language
,
2420 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2421 If the current language environment is for multiple languages (e.g. "Latin-1"),
2422 the value may be a list of mnemonics. */);
2423 Vcurrent_iso639_language
= Qnil
;
2426 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2427 0, 127, 'B', -1, 0, 1, 0, 0);
2429 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2430 0, 255, -1, -1, -1, 1, 0, 0);
2432 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2433 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2435 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F",
2436 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2438 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2439 128, 255, -1, 0, -1, 0, 1,
2440 MAX_5_BYTE_CHAR
+ 1);
2441 charset_unibyte
= charset_iso_8859_1
;
2446 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2447 (do not change this comment) */