1 /* Basic character set support.
3 Copyright (C) 2001-2014 Free Software Foundation, Inc.
5 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 2005, 2006, 2007, 2008, 2009, 2010, 2011
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H14PRO021
10 Copyright (C) 2003, 2004
11 National Institute of Advanced Industrial Science and Technology (AIST)
12 Registration Number H13PRO009
14 This file is part of GNU Emacs.
16 GNU Emacs is free software: you can redistribute it and/or modify
17 it under the terms of the GNU General Public License as published by
18 the Free Software Foundation, either version 3 of the License, or
19 (at your option) any later version.
21 GNU Emacs is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 GNU General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
35 #include <sys/types.h>
38 #include "character.h"
44 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
46 A coded character set ("charset" hereafter) is a meaningful
47 collection (i.e. language, culture, functionality, etc.) of
48 characters. Emacs handles multiple charsets at once. In Emacs Lisp
49 code, a charset is represented by a symbol. In C code, a charset is
50 represented by its ID number or by a pointer to a struct charset.
52 The actual information about each charset is stored in two places.
53 Lispy information is stored in the hash table Vcharset_hash_table as
54 a vector (charset attributes). The other information is stored in
55 charset_table as a struct charset.
59 /* Hash table that contains attributes of each charset. Keys are
60 charset symbols, and values are vectors of charset attributes. */
61 Lisp_Object Vcharset_hash_table
;
63 /* Table of struct charset. */
64 struct charset
*charset_table
;
66 static ptrdiff_t charset_table_size
;
67 static int charset_table_used
;
69 Lisp_Object Qcharsetp
;
71 /* Special charset symbols. */
73 static Lisp_Object Qeight_bit
;
74 static Lisp_Object Qiso_8859_1
;
75 static Lisp_Object Qunicode
;
76 static Lisp_Object Qemacs
;
78 /* The corresponding charsets. */
80 int charset_eight_bit
;
81 static int charset_iso_8859_1
;
83 static int charset_emacs
;
85 /* The other special charsets. */
86 int charset_jisx0201_roman
;
87 int charset_jisx0208_1978
;
91 /* Value of charset attribute `charset-iso-plane'. */
92 static Lisp_Object Qgl
, Qgr
;
94 /* Charset of unibyte characters. */
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list
;
100 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
102 Lisp_Object Vcharset_non_preferred_head
;
104 /* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
107 unsigned short charset_ordered_list_tick
;
109 /* List of iso-2022 charsets. */
110 Lisp_Object Viso_2022_charset_list
;
112 /* List of emacs-mule charsets. */
113 Lisp_Object Vemacs_mule_charset_list
;
115 int emacs_mule_charset
[256];
117 /* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
121 #define CODE_POINT_TO_INDEX(charset, code) \
122 ((charset)->code_linear_p \
123 ? (int) ((code) - (charset)->min_code) \
124 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
125 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
126 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
127 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
128 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
129 * (charset)->code_space[11]) \
130 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
131 * (charset)->code_space[7]) \
132 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
133 * (charset)->code_space[3]) \
134 + (((code) & 0xFF) - (charset)->code_space[0]) \
135 - ((charset)->char_index_offset)) \
139 /* Return the code-point for the character index IDX in CHARSET.
140 IDX should be an unsigned int variable in a valid range (which is
141 always in nonnegative int range too). IDX contains garbage afterwards. */
143 #define INDEX_TO_CODE_POINT(charset, idx) \
144 ((charset)->code_linear_p \
145 ? (idx) + (charset)->min_code \
146 : (idx += (charset)->char_index_offset, \
147 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
148 | (((charset)->code_space[4] \
149 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
151 | (((charset)->code_space[8] \
152 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
154 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
157 /* Structure to hold mapping tables for a charset. Used by temacs
158 invoked for dumping. */
162 /* The current charset for which the following tables are setup. */
163 struct charset
*current
;
165 /* 1 iff the following table is used for encoder. */
168 /* When the following table is used for encoding, minimum and
169 maximum character of the current charset. */
170 int min_char
, max_char
;
172 /* A Unicode character corresponding to the code index 0 (i.e. the
173 minimum code-point) of the current charset, or -1 if the code
174 index 0 is not a Unicode character. This is checked when
175 table.encoder[CHAR] is zero. */
179 /* Table mapping code-indices (not code-points) of the current
180 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
181 doesn't belong to the current charset. */
182 int decoder
[0x10000];
183 /* Table mapping Unicode characters to code-indices of the current
184 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
185 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
186 (0x20000..0x2FFFF). Note that there is no charset map that
187 uses both SMP and SIP. */
188 unsigned short encoder
[0x20000];
190 } *temp_charset_work
;
192 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
195 temp_charset_work->zero_index_char = (C); \
196 else if ((C) < 0x20000) \
197 temp_charset_work->table.encoder[(C)] = (CODE); \
199 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
202 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
203 ((C) == temp_charset_work->zero_index_char ? 0 \
204 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
205 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
206 : temp_charset_work->table.encoder[(C) - 0x10000] \
207 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
209 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
210 (temp_charset_work->table.decoder[(CODE)] = (C))
212 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
213 (temp_charset_work->table.decoder[(CODE)])
216 /* Set to 1 to warn that a charset map is loaded and thus a buffer
217 text and a string data may be relocated. */
218 bool charset_map_loaded
;
220 struct charset_map_entries
226 struct charset_map_entries
*next
;
229 /* Load the mapping information of CHARSET from ENTRIES for
230 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
231 encoding (CONTROL_FLAG == 2).
233 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
234 and CHARSET->fast_map.
236 If CONTROL_FLAG is 1, setup the following tables according to
237 CHARSET->method and inhibit_load_charset_map.
239 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
240 ----------------------+--------------------+---------------------------
241 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
242 ----------------------+--------------------+---------------------------
243 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
245 If CONTROL_FLAG is 2, setup the following tables.
247 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
248 ----------------------+--------------------+---------------------------
249 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
250 ----------------------+--------------------+--------------------------
251 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
255 load_charset_map (struct charset
*charset
, struct charset_map_entries
*entries
, int n_entries
, int control_flag
)
257 Lisp_Object vec
IF_LINT (= Qnil
), table
IF_LINT (= Qnil
);
258 unsigned max_code
= CHARSET_MAX_CODE (charset
);
259 bool ascii_compatible_p
= charset
->ascii_compatible_p
;
260 int min_char
, max_char
, nonascii_min_char
;
262 unsigned char *fast_map
= charset
->fast_map
;
269 if (! inhibit_load_charset_map
)
271 if (control_flag
== 1)
273 if (charset
->method
== CHARSET_METHOD_MAP
)
275 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
277 vec
= Fmake_vector (make_number (n
), make_number (-1));
278 set_charset_attr (charset
, charset_decoder
, vec
);
282 char_table_set_range (Vchar_unify_table
,
283 charset
->min_char
, charset
->max_char
,
289 table
= Fmake_char_table (Qnil
, Qnil
);
290 set_charset_attr (charset
,
291 (charset
->method
== CHARSET_METHOD_MAP
292 ? charset_encoder
: charset_deunifier
),
298 if (! temp_charset_work
)
299 temp_charset_work
= xmalloc (sizeof *temp_charset_work
);
300 if (control_flag
== 1)
302 memset (temp_charset_work
->table
.decoder
, -1,
303 sizeof (int) * 0x10000);
307 memset (temp_charset_work
->table
.encoder
, 0,
308 sizeof (unsigned short) * 0x20000);
309 temp_charset_work
->zero_index_char
= -1;
311 temp_charset_work
->current
= charset
;
312 temp_charset_work
->for_encoder
= (control_flag
== 2);
315 charset_map_loaded
= 1;
318 min_char
= max_char
= entries
->entry
[0].c
;
319 nonascii_min_char
= MAX_CHAR
;
320 for (i
= 0; i
< n_entries
; i
++)
323 int from_index
, to_index
, lim_index
;
325 int idx
= i
% 0x10000;
327 if (i
> 0 && idx
== 0)
328 entries
= entries
->next
;
329 from
= entries
->entry
[idx
].from
;
330 to
= entries
->entry
[idx
].to
;
331 from_c
= entries
->entry
[idx
].c
;
332 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
335 to_index
= from_index
;
340 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
341 to_c
= from_c
+ (to_index
- from_index
);
343 if (from_index
< 0 || to_index
< 0)
345 lim_index
= to_index
+ 1;
349 else if (from_c
< min_char
)
352 if (control_flag
== 1)
354 if (charset
->method
== CHARSET_METHOD_MAP
)
355 for (; from_index
< lim_index
; from_index
++, from_c
++)
356 ASET (vec
, from_index
, make_number (from_c
));
358 for (; from_index
< lim_index
; from_index
++, from_c
++)
359 CHAR_TABLE_SET (Vchar_unify_table
,
360 CHARSET_CODE_OFFSET (charset
) + from_index
,
361 make_number (from_c
));
363 else if (control_flag
== 2)
365 if (charset
->method
== CHARSET_METHOD_MAP
366 && CHARSET_COMPACT_CODES_P (charset
))
367 for (; from_index
< lim_index
; from_index
++, from_c
++)
369 unsigned code
= from_index
;
370 code
= INDEX_TO_CODE_POINT (charset
, code
);
372 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
373 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
376 for (; from_index
< lim_index
; from_index
++, from_c
++)
378 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
379 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
382 else if (control_flag
== 3)
383 for (; from_index
< lim_index
; from_index
++, from_c
++)
384 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
385 else if (control_flag
== 4)
386 for (; from_index
< lim_index
; from_index
++, from_c
++)
387 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
388 else /* control_flag == 0 */
390 if (ascii_compatible_p
)
392 if (! ASCII_CHAR_P (from_c
))
394 if (from_c
< nonascii_min_char
)
395 nonascii_min_char
= from_c
;
397 else if (! ASCII_CHAR_P (to_c
))
399 nonascii_min_char
= 0x80;
403 for (; from_c
<= to_c
; from_c
++)
404 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
408 if (control_flag
== 0)
410 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
411 ? nonascii_min_char
: min_char
);
412 CHARSET_MAX_CHAR (charset
) = max_char
;
414 else if (control_flag
== 4)
416 temp_charset_work
->min_char
= min_char
;
417 temp_charset_work
->max_char
= max_char
;
422 /* Read a hexadecimal number (preceded by "0x") from the file FP while
423 paying attention to comment character '#'. */
426 read_hex (FILE *fp
, bool *eof
, bool *overflow
)
431 while ((c
= getc (fp
)) != EOF
)
435 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
439 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
449 while (c_isxdigit (c
= getc (fp
)))
451 if (UINT_MAX
>> 4 < n
)
454 | (c
- ('0' <= c
&& c
<= '9' ? '0'
455 : 'A' <= c
&& c
<= 'F' ? 'A' - 10
463 /* Return a mapping vector for CHARSET loaded from MAPFILE.
464 Each line of MAPFILE has this form
466 where 0xAAAA is a code-point and 0xCCCC is the corresponding
467 character code, or this form
469 where 0xAAAA and 0xBBBB are code-points specifying a range, and
470 0xCCCC is the first character code of the range.
472 The returned vector has this form:
473 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
474 where CODE1 is a code-point or a cons of code-points specifying a
477 Note that this function uses `openp' to open MAPFILE but ignores
478 `file-name-handler-alist' to avoid running any Lisp code. */
481 load_charset_map_from_file (struct charset
*charset
, Lisp_Object mapfile
,
484 unsigned min_code
= CHARSET_MIN_CODE (charset
);
485 unsigned max_code
= CHARSET_MAX_CODE (charset
);
488 struct charset_map_entries
*head
, *entries
;
490 AUTO_STRING (map
, ".map");
491 AUTO_STRING (txt
, ".txt");
492 AUTO_LIST2 (suffixes
, map
, txt
);
493 ptrdiff_t count
= SPECPDL_INDEX ();
494 record_unwind_protect_nothing ();
495 specbind (Qfile_name_handler_alist
, Qnil
);
496 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
, false);
497 fp
= fd
< 0 ? 0 : fdopen (fd
, "r");
500 int open_errno
= errno
;
502 report_file_errno ("Loading charset map", mapfile
, open_errno
);
504 set_unwind_protect_ptr (count
, fclose_unwind
, fp
);
505 unbind_to (count
+ 1, Qnil
);
507 /* Use record_xmalloc, as `charset_map_entries' is
508 large (larger than MAX_ALLOCA). */
509 head
= record_xmalloc (sizeof *head
);
511 memset (entries
, 0, sizeof (struct charset_map_entries
));
516 unsigned from
, to
, c
;
518 bool eof
= 0, overflow
= 0;
520 from
= read_hex (fp
, &eof
, &overflow
);
523 if (getc (fp
) == '-')
524 to
= read_hex (fp
, &eof
, &overflow
);
529 c
= read_hex (fp
, &eof
, &overflow
);
535 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
538 if (n_entries
== 0x10000)
540 entries
->next
= record_xmalloc (sizeof *entries
->next
);
541 entries
= entries
->next
;
542 memset (entries
, 0, sizeof (struct charset_map_entries
));
546 entries
->entry
[idx
].from
= from
;
547 entries
->entry
[idx
].to
= to
;
548 entries
->entry
[idx
].c
= c
;
552 clear_unwind_protect (count
);
554 load_charset_map (charset
, head
, n_entries
, control_flag
);
555 unbind_to (count
, Qnil
);
559 load_charset_map_from_vector (struct charset
*charset
, Lisp_Object vec
, int control_flag
)
561 unsigned min_code
= CHARSET_MIN_CODE (charset
);
562 unsigned max_code
= CHARSET_MAX_CODE (charset
);
563 struct charset_map_entries
*head
, *entries
;
565 int len
= ASIZE (vec
);
571 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
575 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
576 large (larger than MAX_ALLOCA). */
577 head
= SAFE_ALLOCA (sizeof *head
);
579 memset (entries
, 0, sizeof (struct charset_map_entries
));
582 for (i
= 0; i
< len
; i
+= 2)
584 Lisp_Object val
, val2
;
594 from
= XFASTINT (val
);
595 to
= XFASTINT (val2
);
598 from
= to
= XFASTINT (val
);
599 val
= AREF (vec
, i
+ 1);
603 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
606 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
608 entries
->next
= SAFE_ALLOCA (sizeof *entries
->next
);
609 entries
= entries
->next
;
610 memset (entries
, 0, sizeof (struct charset_map_entries
));
612 idx
= n_entries
% 0x10000;
613 entries
->entry
[idx
].from
= from
;
614 entries
->entry
[idx
].to
= to
;
615 entries
->entry
[idx
].c
= c
;
619 load_charset_map (charset
, head
, n_entries
, control_flag
);
624 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
625 map it is (see the comment of load_charset_map for the detail). */
628 load_charset (struct charset
*charset
, int control_flag
)
632 if (inhibit_load_charset_map
634 && charset
== temp_charset_work
->current
635 && ((control_flag
== 2) == temp_charset_work
->for_encoder
))
638 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
639 map
= CHARSET_MAP (charset
);
642 if (! CHARSET_UNIFIED_P (charset
))
644 map
= CHARSET_UNIFY_MAP (charset
);
647 load_charset_map_from_file (charset
, map
, control_flag
);
649 load_charset_map_from_vector (charset
, map
, control_flag
);
653 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
654 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
657 return (CHARSETP (object
) ? Qt
: Qnil
);
662 map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
),
663 Lisp_Object function
, Lisp_Object arg
,
664 unsigned int from
, unsigned int to
)
666 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
667 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
668 Lisp_Object range
= Fcons (Qnil
, Qnil
);
671 c
= temp_charset_work
->min_char
;
672 stop
= (temp_charset_work
->max_char
< 0x20000
673 ? temp_charset_work
->max_char
: 0xFFFF);
677 int idx
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
679 if (idx
>= from_idx
&& idx
<= to_idx
)
681 if (NILP (XCAR (range
)))
682 XSETCAR (range
, make_number (c
));
684 else if (! NILP (XCAR (range
)))
686 XSETCDR (range
, make_number (c
- 1));
688 (*c_function
) (arg
, range
);
690 call2 (function
, range
, arg
);
691 XSETCAR (range
, Qnil
);
695 if (c
== temp_charset_work
->max_char
)
697 if (! NILP (XCAR (range
)))
699 XSETCDR (range
, make_number (c
));
701 (*c_function
) (arg
, range
);
703 call2 (function
, range
, arg
);
708 stop
= temp_charset_work
->max_char
;
715 map_charset_chars (void (*c_function
)(Lisp_Object
, Lisp_Object
), Lisp_Object function
,
716 Lisp_Object arg
, struct charset
*charset
, unsigned from
, unsigned to
)
719 bool partial
= (from
> CHARSET_MIN_CODE (charset
)
720 || to
< CHARSET_MAX_CODE (charset
));
722 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
724 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
725 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
726 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
727 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
729 if (CHARSET_UNIFIED_P (charset
))
731 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
732 load_charset (charset
, 2);
733 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
734 map_char_table_for_charset (c_function
, function
,
735 CHARSET_DEUNIFIER (charset
), arg
,
736 partial
? charset
: NULL
, from
, to
);
738 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
741 range
= Fcons (make_number (from_c
), make_number (to_c
));
743 (*c_function
) (arg
, range
);
745 call2 (function
, range
, arg
);
747 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
749 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
750 load_charset (charset
, 2);
751 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
752 map_char_table_for_charset (c_function
, function
,
753 CHARSET_ENCODER (charset
), arg
,
754 partial
? charset
: NULL
, from
, to
);
756 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
758 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
760 Lisp_Object subset_info
;
763 subset_info
= CHARSET_SUBSET (charset
);
764 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
765 offset
= XINT (AREF (subset_info
, 3));
767 if (from
< XFASTINT (AREF (subset_info
, 1)))
768 from
= XFASTINT (AREF (subset_info
, 1));
770 if (to
> XFASTINT (AREF (subset_info
, 2)))
771 to
= XFASTINT (AREF (subset_info
, 2));
772 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
774 else /* i.e. CHARSET_METHOD_SUPERSET */
778 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
779 parents
= XCDR (parents
))
782 unsigned this_from
, this_to
;
784 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
785 offset
= XINT (XCDR (XCAR (parents
)));
786 this_from
= from
> offset
? from
- offset
: 0;
787 this_to
= to
> offset
? to
- offset
: 0;
788 if (this_from
< CHARSET_MIN_CODE (charset
))
789 this_from
= CHARSET_MIN_CODE (charset
);
790 if (this_to
> CHARSET_MAX_CODE (charset
))
791 this_to
= CHARSET_MAX_CODE (charset
);
792 map_charset_chars (c_function
, function
, arg
, charset
,
798 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
799 doc
: /* Call FUNCTION for all characters in CHARSET.
800 FUNCTION is called with an argument RANGE and the optional 3rd
803 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
804 characters contained in CHARSET.
806 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
807 range of code points (in CHARSET) of target characters. */)
808 (Lisp_Object function
, Lisp_Object charset
, Lisp_Object arg
, Lisp_Object from_code
, Lisp_Object to_code
)
813 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
814 if (NILP (from_code
))
815 from
= CHARSET_MIN_CODE (cs
);
818 from
= XINT (from_code
);
819 if (from
< CHARSET_MIN_CODE (cs
))
820 from
= CHARSET_MIN_CODE (cs
);
823 to
= CHARSET_MAX_CODE (cs
);
827 if (to
> CHARSET_MAX_CODE (cs
))
828 to
= CHARSET_MAX_CODE (cs
);
830 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
835 /* Define a charset according to the arguments. The Nth argument is
836 the Nth attribute of the charset (the last attribute `charset-id'
837 is not included). See the docstring of `define-charset' for the
840 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
841 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
842 doc
: /* For internal use only.
843 usage: (define-charset-internal ...) */)
844 (ptrdiff_t nargs
, Lisp_Object
*args
)
846 /* Charset attr vector. */
849 EMACS_UINT hash_code
;
850 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
852 struct charset charset
;
855 bool new_definition_p
;
858 if (nargs
!= charset_arg_max
)
859 return Fsignal (Qwrong_number_of_arguments
,
860 Fcons (intern ("define-charset-internal"),
861 make_number (nargs
)));
863 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
865 CHECK_SYMBOL (args
[charset_arg_name
]);
866 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
868 val
= args
[charset_arg_code_space
];
869 for (i
= 0, dimension
= 0, nchars
= 1; ; i
++)
871 Lisp_Object min_byte_obj
, max_byte_obj
;
872 int min_byte
, max_byte
;
874 min_byte_obj
= Faref (val
, make_number (i
* 2));
875 max_byte_obj
= Faref (val
, make_number (i
* 2 + 1));
876 CHECK_RANGED_INTEGER (min_byte_obj
, 0, 255);
877 min_byte
= XINT (min_byte_obj
);
878 CHECK_RANGED_INTEGER (max_byte_obj
, min_byte
, 255);
879 max_byte
= XINT (max_byte_obj
);
880 charset
.code_space
[i
* 4] = min_byte
;
881 charset
.code_space
[i
* 4 + 1] = max_byte
;
882 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
887 nchars
*= charset
.code_space
[i
* 4 + 2];
888 charset
.code_space
[i
* 4 + 3] = nchars
;
891 val
= args
[charset_arg_dimension
];
893 charset
.dimension
= dimension
;
896 CHECK_RANGED_INTEGER (val
, 1, 4);
897 charset
.dimension
= XINT (val
);
900 charset
.code_linear_p
901 = (charset
.dimension
== 1
902 || (charset
.code_space
[2] == 256
903 && (charset
.dimension
== 2
904 || (charset
.code_space
[6] == 256
905 && (charset
.dimension
== 3
906 || charset
.code_space
[10] == 256)))));
908 if (! charset
.code_linear_p
)
910 charset
.code_space_mask
= xzalloc (256);
911 for (i
= 0; i
< 4; i
++)
912 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
914 charset
.code_space_mask
[j
] |= (1 << i
);
917 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
919 charset
.min_code
= (charset
.code_space
[0]
920 | (charset
.code_space
[4] << 8)
921 | (charset
.code_space
[8] << 16)
922 | ((unsigned) charset
.code_space
[12] << 24));
923 charset
.max_code
= (charset
.code_space
[1]
924 | (charset
.code_space
[5] << 8)
925 | (charset
.code_space
[9] << 16)
926 | ((unsigned) charset
.code_space
[13] << 24));
927 charset
.char_index_offset
= 0;
929 val
= args
[charset_arg_min_code
];
932 unsigned code
= cons_to_unsigned (val
, UINT_MAX
);
934 if (code
< charset
.min_code
935 || code
> charset
.max_code
)
936 args_out_of_range_3 (make_fixnum_or_float (charset
.min_code
),
937 make_fixnum_or_float (charset
.max_code
), val
);
938 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
939 charset
.min_code
= code
;
942 val
= args
[charset_arg_max_code
];
945 unsigned code
= cons_to_unsigned (val
, UINT_MAX
);
947 if (code
< charset
.min_code
948 || code
> charset
.max_code
)
949 args_out_of_range_3 (make_fixnum_or_float (charset
.min_code
),
950 make_fixnum_or_float (charset
.max_code
), val
);
951 charset
.max_code
= code
;
954 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
956 val
= args
[charset_arg_invalid_code
];
959 if (charset
.min_code
> 0)
960 charset
.invalid_code
= 0;
963 if (charset
.max_code
< UINT_MAX
)
964 charset
.invalid_code
= charset
.max_code
+ 1;
966 error ("Attribute :invalid-code must be specified");
970 charset
.invalid_code
= cons_to_unsigned (val
, UINT_MAX
);
972 val
= args
[charset_arg_iso_final
];
974 charset
.iso_final
= -1;
978 if (XINT (val
) < '0' || XINT (val
) > 127)
979 error ("Invalid iso-final-char: %"pI
"d", XINT (val
));
980 charset
.iso_final
= XINT (val
);
983 val
= args
[charset_arg_iso_revision
];
985 charset
.iso_revision
= -1;
988 CHECK_RANGED_INTEGER (val
, -1, 63);
989 charset
.iso_revision
= XINT (val
);
992 val
= args
[charset_arg_emacs_mule_id
];
994 charset
.emacs_mule_id
= -1;
998 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
999 error ("Invalid emacs-mule-id: %"pI
"d", XINT (val
));
1000 charset
.emacs_mule_id
= XINT (val
);
1003 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1005 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1007 charset
.unified_p
= 0;
1009 memset (charset
.fast_map
, 0, sizeof (charset
.fast_map
));
1011 if (! NILP (args
[charset_arg_code_offset
]))
1013 val
= args
[charset_arg_code_offset
];
1014 CHECK_CHARACTER (val
);
1016 charset
.method
= CHARSET_METHOD_OFFSET
;
1017 charset
.code_offset
= XINT (val
);
1019 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1020 if (MAX_CHAR
- charset
.code_offset
< i
)
1021 error ("Unsupported max char: %d", charset
.max_char
);
1022 charset
.max_char
= i
+ charset
.code_offset
;
1023 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1024 charset
.min_char
= i
+ charset
.code_offset
;
1026 i
= (charset
.min_char
>> 7) << 7;
1027 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1028 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1029 i
= (i
>> 12) << 12;
1030 for (; i
<= charset
.max_char
; i
+= 0x1000)
1031 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1032 if (charset
.code_offset
== 0 && charset
.max_char
>= 0x80)
1033 charset
.ascii_compatible_p
= 1;
1035 else if (! NILP (args
[charset_arg_map
]))
1037 val
= args
[charset_arg_map
];
1038 ASET (attrs
, charset_map
, val
);
1039 charset
.method
= CHARSET_METHOD_MAP
;
1041 else if (! NILP (args
[charset_arg_subset
]))
1044 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1045 struct charset
*parent_charset
;
1047 val
= args
[charset_arg_subset
];
1048 parent
= Fcar (val
);
1049 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1050 parent_min_code
= Fnth (make_number (1), val
);
1051 CHECK_NATNUM (parent_min_code
);
1052 parent_max_code
= Fnth (make_number (2), val
);
1053 CHECK_NATNUM (parent_max_code
);
1054 parent_code_offset
= Fnth (make_number (3), val
);
1055 CHECK_NUMBER (parent_code_offset
);
1056 val
= make_uninit_vector (4);
1057 ASET (val
, 0, make_number (parent_charset
->id
));
1058 ASET (val
, 1, parent_min_code
);
1059 ASET (val
, 2, parent_max_code
);
1060 ASET (val
, 3, parent_code_offset
);
1061 ASET (attrs
, charset_subset
, val
);
1063 charset
.method
= CHARSET_METHOD_SUBSET
;
1064 /* Here, we just copy the parent's fast_map. It's not accurate,
1065 but at least it works for quickly detecting which character
1066 DOESN'T belong to this charset. */
1067 for (i
= 0; i
< 190; i
++)
1068 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1070 /* We also copy these for parents. */
1071 charset
.min_char
= parent_charset
->min_char
;
1072 charset
.max_char
= parent_charset
->max_char
;
1074 else if (! NILP (args
[charset_arg_superset
]))
1076 val
= args
[charset_arg_superset
];
1077 charset
.method
= CHARSET_METHOD_SUPERSET
;
1078 val
= Fcopy_sequence (val
);
1079 ASET (attrs
, charset_superset
, val
);
1081 charset
.min_char
= MAX_CHAR
;
1082 charset
.max_char
= 0;
1083 for (; ! NILP (val
); val
= Fcdr (val
))
1085 Lisp_Object elt
, car_part
, cdr_part
;
1086 int this_id
, offset
;
1087 struct charset
*this_charset
;
1092 car_part
= XCAR (elt
);
1093 cdr_part
= XCDR (elt
);
1094 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1095 CHECK_TYPE_RANGED_INTEGER (int, cdr_part
);
1096 offset
= XINT (cdr_part
);
1100 CHECK_CHARSET_GET_ID (elt
, this_id
);
1103 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1105 this_charset
= CHARSET_FROM_ID (this_id
);
1106 if (charset
.min_char
> this_charset
->min_char
)
1107 charset
.min_char
= this_charset
->min_char
;
1108 if (charset
.max_char
< this_charset
->max_char
)
1109 charset
.max_char
= this_charset
->max_char
;
1110 for (i
= 0; i
< 190; i
++)
1111 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1115 error ("None of :code-offset, :map, :parents are specified");
1117 val
= args
[charset_arg_unify_map
];
1118 if (! NILP (val
) && !STRINGP (val
))
1120 ASET (attrs
, charset_unify_map
, val
);
1122 CHECK_LIST (args
[charset_arg_plist
]);
1123 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1125 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1127 if (charset
.hash_index
>= 0)
1129 new_definition_p
= 0;
1130 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1131 set_hash_value_slot (hash_table
, charset
.hash_index
, attrs
);
1135 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1137 if (charset_table_used
== charset_table_size
)
1139 /* Ensure that charset IDs fit into 'int' as well as into the
1140 restriction imposed by fixnums. Although the 'int' restriction
1141 could be removed, too much other code would need altering; for
1142 example, the IDs are stuffed into struct
1143 coding_system.charbuf[i] entries, which are 'int'. */
1144 int old_size
= charset_table_size
;
1145 ptrdiff_t new_size
= old_size
;
1146 struct charset
*new_table
=
1147 xpalloc (0, &new_size
, 1,
1148 min (INT_MAX
, MOST_POSITIVE_FIXNUM
),
1149 sizeof *charset_table
);
1150 memcpy (new_table
, charset_table
, old_size
* sizeof *new_table
);
1151 charset_table
= new_table
;
1152 charset_table_size
= new_size
;
1153 /* FIXME: This leaks memory, as the old charset_table becomes
1154 unreachable. If the old charset table is charset_table_init
1155 then this leak is intentional; otherwise, it's unclear.
1156 If the latter memory leak is intentional, a
1157 comment should be added to explain this. If not, the old
1158 charset_table should be freed, by passing it as the 1st argument
1159 to xpalloc and removing the memcpy. */
1161 id
= charset_table_used
++;
1162 new_definition_p
= 1;
1165 ASET (attrs
, charset_id
, make_number (id
));
1167 charset_table
[id
] = charset
;
1169 if (charset
.method
== CHARSET_METHOD_MAP
)
1171 load_charset (&charset
, 0);
1172 charset_table
[id
] = charset
;
1175 if (charset
.iso_final
>= 0)
1177 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1178 charset
.iso_final
) = id
;
1179 if (new_definition_p
)
1180 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1181 list1 (make_number (id
)));
1182 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1183 charset_jisx0201_roman
= id
;
1184 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1185 charset_jisx0208_1978
= id
;
1186 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1187 charset_jisx0208
= id
;
1188 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id
)
1189 charset_ksc5601
= id
;
1192 if (charset
.emacs_mule_id
>= 0)
1194 emacs_mule_charset
[charset
.emacs_mule_id
] = id
;
1195 if (charset
.emacs_mule_id
< 0xA0)
1196 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1198 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1199 if (new_definition_p
)
1200 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1201 list1 (make_number (id
)));
1204 if (new_definition_p
)
1206 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1207 if (charset
.supplementary_p
)
1208 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1209 list1 (make_number (id
)));
1214 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1216 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1218 if (cs
->supplementary_p
)
1221 if (EQ (tail
, Vcharset_ordered_list
))
1222 Vcharset_ordered_list
= Fcons (make_number (id
),
1223 Vcharset_ordered_list
);
1224 else if (NILP (tail
))
1225 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1226 list1 (make_number (id
)));
1229 val
= Fcons (XCAR (tail
), XCDR (tail
));
1230 XSETCDR (tail
, val
);
1231 XSETCAR (tail
, make_number (id
));
1234 charset_ordered_list_tick
++;
1241 /* Same as Fdefine_charset_internal but arguments are more convenient
1242 to call from C (typically in syms_of_charset). This can define a
1243 charset of `offset' method only. Return the ID of the new
1247 define_charset_internal (Lisp_Object name
,
1249 const char *code_space_chars
,
1250 unsigned min_code
, unsigned max_code
,
1251 int iso_final
, int iso_revision
, int emacs_mule_id
,
1252 bool ascii_compatible
, bool supplementary
,
1255 const unsigned char *code_space
= (const unsigned char *) code_space_chars
;
1256 Lisp_Object args
[charset_arg_max
];
1260 args
[charset_arg_name
] = name
;
1261 args
[charset_arg_dimension
] = make_number (dimension
);
1262 val
= make_uninit_vector (8);
1263 for (i
= 0; i
< 8; i
++)
1264 ASET (val
, i
, make_number (code_space
[i
]));
1265 args
[charset_arg_code_space
] = val
;
1266 args
[charset_arg_min_code
] = make_number (min_code
);
1267 args
[charset_arg_max_code
] = make_number (max_code
);
1268 args
[charset_arg_iso_final
]
1269 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1270 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1271 args
[charset_arg_emacs_mule_id
]
1272 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1273 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1274 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1275 args
[charset_arg_invalid_code
] = Qnil
;
1276 args
[charset_arg_code_offset
] = make_number (code_offset
);
1277 args
[charset_arg_map
] = Qnil
;
1278 args
[charset_arg_subset
] = Qnil
;
1279 args
[charset_arg_superset
] = Qnil
;
1280 args
[charset_arg_unify_map
] = Qnil
;
1282 args
[charset_arg_plist
] =
1283 listn (CONSTYPE_HEAP
, 14,
1284 intern_c_string (":name"),
1285 args
[charset_arg_name
],
1286 intern_c_string (":dimension"),
1287 args
[charset_arg_dimension
],
1288 intern_c_string (":code-space"),
1289 args
[charset_arg_code_space
],
1290 intern_c_string (":iso-final-char"),
1291 args
[charset_arg_iso_final
],
1292 intern_c_string (":emacs-mule-id"),
1293 args
[charset_arg_emacs_mule_id
],
1294 intern_c_string (":ascii-compatible-p"),
1295 args
[charset_arg_ascii_compatible_p
],
1296 intern_c_string (":code-offset"),
1297 args
[charset_arg_code_offset
]);
1298 Fdefine_charset_internal (charset_arg_max
, args
);
1300 return XINT (CHARSET_SYMBOL_ID (name
));
1304 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1305 Sdefine_charset_alias
, 2, 2, 0,
1306 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1307 (Lisp_Object alias
, Lisp_Object charset
)
1311 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1312 Fputhash (alias
, attr
, Vcharset_hash_table
);
1313 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1318 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1319 doc
: /* Return the property list of CHARSET. */)
1320 (Lisp_Object charset
)
1324 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1325 return CHARSET_ATTR_PLIST (attrs
);
1329 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1330 doc
: /* Set CHARSET's property list to PLIST. */)
1331 (Lisp_Object charset
, Lisp_Object plist
)
1335 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1336 ASET (attrs
, charset_plist
, plist
);
1341 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1342 doc
: /* Unify characters of CHARSET with Unicode.
1343 This means reading the relevant file and installing the table defined
1344 by CHARSET's `:unify-map' property.
1346 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1347 the same meaning as the `:unify-map' attribute in the function
1348 `define-charset' (which see).
1350 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1351 (Lisp_Object charset
, Lisp_Object unify_map
, Lisp_Object deunify
)
1356 CHECK_CHARSET_GET_ID (charset
, id
);
1357 cs
= CHARSET_FROM_ID (id
);
1359 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1360 : ! CHARSET_UNIFIED_P (cs
))
1363 CHARSET_UNIFIED_P (cs
) = 0;
1366 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1367 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1368 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1369 if (NILP (unify_map
))
1370 unify_map
= CHARSET_UNIFY_MAP (cs
);
1373 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1374 signal_error ("Bad unify-map", unify_map
);
1375 set_charset_attr (cs
, charset_unify_map
, unify_map
);
1377 if (NILP (Vchar_unify_table
))
1378 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1379 char_table_set_range (Vchar_unify_table
,
1380 cs
->min_char
, cs
->max_char
, charset
);
1381 CHARSET_UNIFIED_P (cs
) = 1;
1383 else if (CHAR_TABLE_P (Vchar_unify_table
))
1385 unsigned min_code
= CHARSET_MIN_CODE (cs
);
1386 unsigned max_code
= CHARSET_MAX_CODE (cs
);
1387 int min_char
= DECODE_CHAR (cs
, min_code
);
1388 int max_char
= DECODE_CHAR (cs
, max_code
);
1390 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1396 /* Check that DIMENSION, CHARS, and FINAL_CHAR specify a valid ISO charset.
1397 Return true if it's a 96-character set, false if 94. */
1400 check_iso_charset_parameter (Lisp_Object dimension
, Lisp_Object chars
,
1401 Lisp_Object final_char
)
1403 CHECK_NUMBER (dimension
);
1404 CHECK_NUMBER (chars
);
1405 CHECK_CHARACTER (final_char
);
1407 if (! (1 <= XINT (dimension
) && XINT (dimension
) <= 3))
1408 error ("Invalid DIMENSION %"pI
"d, it should be 1, 2, or 3",
1411 bool chars_flag
= XINT (chars
) == 96;
1412 if (! (chars_flag
|| XINT (chars
) == 94))
1413 error ("Invalid CHARS %"pI
"d, it should be 94 or 96", XINT (chars
));
1415 int final_ch
= XFASTINT (final_char
);
1416 if (! ('0' <= final_ch
&& final_ch
<= '~'))
1417 error ("Invalid FINAL-CHAR '%c', it should be '0'..'~'", final_ch
);
1422 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1423 Sget_unused_iso_final_char
, 2, 2, 0,
1425 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1426 DIMENSION is the number of bytes to represent a character: 1 or 2.
1427 CHARS is the number of characters in a dimension: 94 or 96.
1429 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1430 If there's no unused final char for the specified kind of charset,
1432 (Lisp_Object dimension
, Lisp_Object chars
)
1434 bool chars_flag
= check_iso_charset_parameter (dimension
, chars
,
1436 for (int final_char
= '0'; final_char
<= '?'; final_char
++)
1437 if (ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, final_char
) < 0)
1438 return make_number (final_char
);
1443 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1445 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1447 On decoding by an ISO-2022 base coding system, when a charset
1448 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1449 if CHARSET is designated instead. */)
1450 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
, Lisp_Object charset
)
1454 CHECK_CHARSET_GET_ID (charset
, id
);
1455 bool chars_flag
= check_iso_charset_parameter (dimension
, chars
, final_char
);
1456 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XFASTINT (final_char
)) = id
;
1461 /* Return information about charsets in the text at PTR of NBYTES
1462 bytes, which are NCHARS characters. The value is:
1464 0: Each character is represented by one byte. This is always
1465 true for a unibyte string. For a multibyte string, true if
1466 it contains only ASCII characters.
1468 1: No charsets other than ascii, control-1, and latin-1 are
1475 string_xstring_p (Lisp_Object string
)
1477 const unsigned char *p
= SDATA (string
);
1478 const unsigned char *endp
= p
+ SBYTES (string
);
1480 if (SCHARS (string
) == SBYTES (string
))
1485 int c
= STRING_CHAR_ADVANCE (p
);
1494 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1496 CHARSETS is a vector. If Nth element is non-nil, it means the
1497 charset whose id is N is already found.
1499 It may lookup a translation table TABLE if supplied. */
1502 find_charsets_in_text (const unsigned char *ptr
, ptrdiff_t nchars
,
1503 ptrdiff_t nbytes
, Lisp_Object charsets
,
1504 Lisp_Object table
, bool multibyte
)
1506 const unsigned char *pend
= ptr
+ nbytes
;
1508 if (nchars
== nbytes
)
1511 ASET (charsets
, charset_ascii
, Qt
);
1518 c
= translate_char (table
, c
);
1519 if (ASCII_CHAR_P (c
))
1520 ASET (charsets
, charset_ascii
, Qt
);
1522 ASET (charsets
, charset_eight_bit
, Qt
);
1529 int c
= STRING_CHAR_ADVANCE (ptr
);
1530 struct charset
*charset
;
1533 c
= translate_char (table
, c
);
1534 charset
= CHAR_CHARSET (c
);
1535 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1540 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1542 doc
: /* Return a list of charsets in the region between BEG and END.
1543 BEG and END are buffer positions.
1544 Optional arg TABLE if non-nil is a translation table to look up.
1546 If the current buffer is unibyte, the returned list may contain
1547 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1548 (Lisp_Object beg
, Lisp_Object end
, Lisp_Object table
)
1550 Lisp_Object charsets
;
1551 ptrdiff_t from
, from_byte
, to
, stop
, stop_byte
;
1554 bool multibyte
= ! NILP (BVAR (current_buffer
, enable_multibyte_characters
));
1556 validate_region (&beg
, &end
);
1557 from
= XFASTINT (beg
);
1558 stop
= to
= XFASTINT (end
);
1560 if (from
< GPT
&& GPT
< to
)
1563 stop_byte
= GPT_BYTE
;
1566 stop_byte
= CHAR_TO_BYTE (stop
);
1568 from_byte
= CHAR_TO_BYTE (from
);
1570 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1573 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1574 stop_byte
- from_byte
, charsets
, table
,
1578 from
= stop
, from_byte
= stop_byte
;
1579 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1586 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1587 if (!NILP (AREF (charsets
, i
)))
1588 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1592 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1594 doc
: /* Return a list of charsets in STR.
1595 Optional arg TABLE if non-nil is a translation table to look up.
1597 If STR is unibyte, the returned list may contain
1598 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1599 (Lisp_Object str
, Lisp_Object table
)
1601 Lisp_Object charsets
;
1607 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1608 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1610 STRING_MULTIBYTE (str
));
1612 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1613 if (!NILP (AREF (charsets
, i
)))
1614 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1620 /* Return a unified character code for C (>= 0x110000). VAL is a
1621 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1624 maybe_unify_char (int c
, Lisp_Object val
)
1626 struct charset
*charset
;
1629 return XFASTINT (val
);
1633 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1635 /* The call to load_charset below can allocate memory, which screws
1636 callers of this function through STRING_CHAR_* macros that hold C
1637 pointers to buffer text, if REL_ALLOC is used. */
1638 r_alloc_inhibit_buffer_relocation (1);
1640 load_charset (charset
, 1);
1641 if (! inhibit_load_charset_map
)
1643 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1649 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1650 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1656 r_alloc_inhibit_buffer_relocation (0);
1662 /* Return a character corresponding to the code-point CODE of
1666 decode_char (struct charset
*charset
, unsigned int code
)
1669 enum charset_method method
= CHARSET_METHOD (charset
);
1671 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1674 if (method
== CHARSET_METHOD_SUBSET
)
1676 Lisp_Object subset_info
;
1678 subset_info
= CHARSET_SUBSET (charset
);
1679 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1680 code
-= XINT (AREF (subset_info
, 3));
1681 if (code
< XFASTINT (AREF (subset_info
, 1))
1682 || code
> XFASTINT (AREF (subset_info
, 2)))
1685 c
= DECODE_CHAR (charset
, code
);
1687 else if (method
== CHARSET_METHOD_SUPERSET
)
1689 Lisp_Object parents
;
1691 parents
= CHARSET_SUPERSET (charset
);
1693 for (; CONSP (parents
); parents
= XCDR (parents
))
1695 int id
= XINT (XCAR (XCAR (parents
)));
1696 int code_offset
= XINT (XCDR (XCAR (parents
)));
1697 unsigned this_code
= code
- code_offset
;
1699 charset
= CHARSET_FROM_ID (id
);
1700 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1706 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1710 if (method
== CHARSET_METHOD_MAP
)
1712 Lisp_Object decoder
;
1714 decoder
= CHARSET_DECODER (charset
);
1715 if (! VECTORP (decoder
))
1717 load_charset (charset
, 1);
1718 decoder
= CHARSET_DECODER (charset
);
1720 if (VECTORP (decoder
))
1721 c
= XINT (AREF (decoder
, char_index
));
1723 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1725 else /* method == CHARSET_METHOD_OFFSET */
1727 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1728 if (CHARSET_UNIFIED_P (charset
)
1729 && MAX_UNICODE_CHAR
< c
&& c
<= MAX_5_BYTE_CHAR
)
1731 /* Unify C with a Unicode character if possible. */
1732 Lisp_Object val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1733 c
= maybe_unify_char (c
, val
);
1741 /* Variable used temporarily by the macro ENCODE_CHAR. */
1742 Lisp_Object charset_work
;
1744 /* Return a code-point of C in CHARSET. If C doesn't belong to
1745 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1746 use CHARSET's strict_max_char instead of max_char. */
1749 encode_char (struct charset
*charset
, int c
)
1752 enum charset_method method
= CHARSET_METHOD (charset
);
1754 if (CHARSET_UNIFIED_P (charset
))
1756 Lisp_Object deunifier
;
1757 int code_index
= -1;
1759 deunifier
= CHARSET_DEUNIFIER (charset
);
1760 if (! CHAR_TABLE_P (deunifier
))
1762 load_charset (charset
, 2);
1763 deunifier
= CHARSET_DEUNIFIER (charset
);
1765 if (CHAR_TABLE_P (deunifier
))
1767 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1769 if (INTEGERP (deunified
))
1770 code_index
= XINT (deunified
);
1774 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1776 if (code_index
>= 0)
1777 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1780 if (method
== CHARSET_METHOD_SUBSET
)
1782 Lisp_Object subset_info
;
1783 struct charset
*this_charset
;
1785 subset_info
= CHARSET_SUBSET (charset
);
1786 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1787 code
= ENCODE_CHAR (this_charset
, c
);
1788 if (code
== CHARSET_INVALID_CODE (this_charset
)
1789 || code
< XFASTINT (AREF (subset_info
, 1))
1790 || code
> XFASTINT (AREF (subset_info
, 2)))
1791 return CHARSET_INVALID_CODE (charset
);
1792 code
+= XINT (AREF (subset_info
, 3));
1796 if (method
== CHARSET_METHOD_SUPERSET
)
1798 Lisp_Object parents
;
1800 parents
= CHARSET_SUPERSET (charset
);
1801 for (; CONSP (parents
); parents
= XCDR (parents
))
1803 int id
= XINT (XCAR (XCAR (parents
)));
1804 int code_offset
= XINT (XCDR (XCAR (parents
)));
1805 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1807 code
= ENCODE_CHAR (this_charset
, c
);
1808 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1809 return code
+ code_offset
;
1811 return CHARSET_INVALID_CODE (charset
);
1814 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1815 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1816 return CHARSET_INVALID_CODE (charset
);
1818 if (method
== CHARSET_METHOD_MAP
)
1820 Lisp_Object encoder
;
1823 encoder
= CHARSET_ENCODER (charset
);
1824 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1826 load_charset (charset
, 2);
1827 encoder
= CHARSET_ENCODER (charset
);
1829 if (CHAR_TABLE_P (encoder
))
1831 val
= CHAR_TABLE_REF (encoder
, c
);
1833 return CHARSET_INVALID_CODE (charset
);
1835 if (! CHARSET_COMPACT_CODES_P (charset
))
1836 code
= INDEX_TO_CODE_POINT (charset
, code
);
1840 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1841 code
= INDEX_TO_CODE_POINT (charset
, code
);
1844 else /* method == CHARSET_METHOD_OFFSET */
1846 unsigned code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1848 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1855 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1856 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1857 Return nil if CODE-POINT is not valid in CHARSET.
1859 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). */)
1860 (Lisp_Object charset
, Lisp_Object code_point
, Lisp_Object restriction
)
1864 struct charset
*charsetp
;
1866 CHECK_CHARSET_GET_ID (charset
, id
);
1867 code
= cons_to_unsigned (code_point
, UINT_MAX
);
1868 charsetp
= CHARSET_FROM_ID (id
);
1869 c
= DECODE_CHAR (charsetp
, code
);
1870 return (c
>= 0 ? make_number (c
) : Qnil
);
1874 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1875 doc
: /* Encode the character CH into a code-point of CHARSET.
1876 Return nil if CHARSET doesn't include CH. */)
1877 (Lisp_Object ch
, Lisp_Object charset
, Lisp_Object restriction
)
1881 struct charset
*charsetp
;
1883 CHECK_CHARSET_GET_ID (charset
, id
);
1884 CHECK_CHARACTER (ch
);
1886 charsetp
= CHARSET_FROM_ID (id
);
1887 code
= ENCODE_CHAR (charsetp
, c
);
1888 if (code
== CHARSET_INVALID_CODE (charsetp
))
1890 return INTEGER_TO_CONS (code
);
1894 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1896 /* Return a character of CHARSET whose position codes are CODEn.
1898 CODE1 through CODE4 are optional, but if you don't supply sufficient
1899 position codes, it is assumed that the minimum code in each dimension
1901 (Lisp_Object charset
, Lisp_Object code1
, Lisp_Object code2
, Lisp_Object code3
, Lisp_Object code4
)
1904 struct charset
*charsetp
;
1908 CHECK_CHARSET_GET_ID (charset
, id
);
1909 charsetp
= CHARSET_FROM_ID (id
);
1911 dimension
= CHARSET_DIMENSION (charsetp
);
1913 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1914 ? 0 : CHARSET_MIN_CODE (charsetp
));
1917 CHECK_NATNUM (code1
);
1918 if (XFASTINT (code1
) >= 0x100)
1919 args_out_of_range (make_number (0xFF), code1
);
1920 code
= XFASTINT (code1
);
1926 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1929 CHECK_NATNUM (code2
);
1930 if (XFASTINT (code2
) >= 0x100)
1931 args_out_of_range (make_number (0xFF), code2
);
1932 code
|= XFASTINT (code2
);
1939 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1942 CHECK_NATNUM (code3
);
1943 if (XFASTINT (code3
) >= 0x100)
1944 args_out_of_range (make_number (0xFF), code3
);
1945 code
|= XFASTINT (code3
);
1952 code
|= charsetp
->code_space
[0];
1955 CHECK_NATNUM (code4
);
1956 if (XFASTINT (code4
) >= 0x100)
1957 args_out_of_range (make_number (0xFF), code4
);
1958 code
|= XFASTINT (code4
);
1965 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
1967 c
= DECODE_CHAR (charsetp
, code
);
1969 error ("Invalid code(s)");
1970 return make_number (c
);
1974 /* Return the first charset in CHARSET_LIST that contains C.
1975 CHARSET_LIST is a list of charset IDs. If it is nil, use
1976 Vcharset_ordered_list. */
1979 char_charset (int c
, Lisp_Object charset_list
, unsigned int *code_return
)
1981 bool maybe_null
= 0;
1983 if (NILP (charset_list
))
1984 charset_list
= Vcharset_ordered_list
;
1988 while (CONSP (charset_list
))
1990 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
1991 unsigned code
= ENCODE_CHAR (charset
, c
);
1993 if (code
!= CHARSET_INVALID_CODE (charset
))
1996 *code_return
= code
;
1999 charset_list
= XCDR (charset_list
);
2001 && c
<= MAX_UNICODE_CHAR
2002 && EQ (charset_list
, Vcharset_non_preferred_head
))
2003 return CHARSET_FROM_ID (charset_unicode
);
2005 return (maybe_null
? NULL
2006 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2007 : CHARSET_FROM_ID (charset_eight_bit
));
2011 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2013 /*Return list of charset and one to four position-codes of CH.
2014 The charset is decided by the current priority order of charsets.
2015 A position-code is a byte value of each dimension of the code-point of
2016 CH in the charset. */)
2019 struct charset
*charset
;
2024 CHECK_CHARACTER (ch
);
2026 charset
= CHAR_CHARSET (c
);
2029 code
= ENCODE_CHAR (charset
, c
);
2030 if (code
== CHARSET_INVALID_CODE (charset
))
2032 dimension
= CHARSET_DIMENSION (charset
);
2033 for (val
= Qnil
; dimension
> 0; dimension
--)
2035 val
= Fcons (make_number (code
& 0xFF), val
);
2038 return Fcons (CHARSET_NAME (charset
), val
);
2042 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2043 doc
: /* Return the charset of highest priority that contains CH.
2044 ASCII characters are an exception: for them, this function always
2046 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2047 from which to find the charset. It may also be a coding system. In
2048 that case, find the charset from what supported by that coding system. */)
2049 (Lisp_Object ch
, Lisp_Object restriction
)
2051 struct charset
*charset
;
2053 CHECK_CHARACTER (ch
);
2054 if (NILP (restriction
))
2055 charset
= CHAR_CHARSET (XINT (ch
));
2058 if (CONSP (restriction
))
2060 int c
= XFASTINT (ch
);
2062 for (; CONSP (restriction
); restriction
= XCDR (restriction
))
2064 struct charset
*rcharset
;
2066 CHECK_CHARSET_GET_CHARSET (XCAR (restriction
), rcharset
);
2067 if (ENCODE_CHAR (rcharset
, c
) != CHARSET_INVALID_CODE (rcharset
))
2068 return XCAR (restriction
);
2072 restriction
= coding_system_charset_list (restriction
);
2073 charset
= char_charset (XINT (ch
), restriction
, NULL
);
2077 return (CHARSET_NAME (charset
));
2081 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2083 Return charset of a character in the current buffer at position POS.
2084 If POS is nil, it defaults to the current point.
2085 If POS is out of range, the value is nil. */)
2089 struct charset
*charset
;
2091 ch
= Fchar_after (pos
);
2092 if (! INTEGERP (ch
))
2094 charset
= CHAR_CHARSET (XINT (ch
));
2095 return (CHARSET_NAME (charset
));
2099 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2101 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2103 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2104 by their DIMENSION, CHARS, and FINAL-CHAR,
2105 whereas Emacs distinguishes them by charset symbol.
2106 See the documentation of the function `charset-info' for the meanings of
2107 DIMENSION, CHARS, and FINAL-CHAR. */)
2108 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
)
2110 bool chars_flag
= check_iso_charset_parameter (dimension
, chars
, final_char
);
2111 int id
= ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
,
2112 XFASTINT (final_char
));
2113 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2117 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2121 Clear temporary charset mapping tables.
2122 It should be called only from temacs invoked for dumping. */)
2125 if (temp_charset_work
)
2127 xfree (temp_charset_work
);
2128 temp_charset_work
= NULL
;
2131 if (CHAR_TABLE_P (Vchar_unify_table
))
2132 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2137 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2138 Scharset_priority_list
, 0, 1, 0,
2139 doc
: /* Return the list of charsets ordered by priority.
2140 HIGHESTP non-nil means just return the highest priority one. */)
2141 (Lisp_Object highestp
)
2143 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2145 if (!NILP (highestp
))
2146 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2148 while (!NILP (list
))
2150 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2153 return Fnreverse (val
);
2156 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2158 doc
: /* Assign higher priority to the charsets given as arguments.
2159 usage: (set-charset-priority &rest charsets) */)
2160 (ptrdiff_t nargs
, Lisp_Object
*args
)
2162 Lisp_Object new_head
, old_list
, arglist
[2];
2163 Lisp_Object list_2022
, list_emacs_mule
;
2167 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2169 for (i
= 0; i
< nargs
; i
++)
2171 CHECK_CHARSET_GET_ID (args
[i
], id
);
2172 if (! NILP (Fmemq (make_number (id
), old_list
)))
2174 old_list
= Fdelq (make_number (id
), old_list
);
2175 new_head
= Fcons (make_number (id
), new_head
);
2178 arglist
[0] = Fnreverse (new_head
);
2179 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2180 Vcharset_ordered_list
= Fnconc (2, arglist
);
2181 charset_ordered_list_tick
++;
2183 charset_unibyte
= -1;
2184 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2185 CONSP (old_list
); old_list
= XCDR (old_list
))
2187 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2188 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2189 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2190 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2191 if (charset_unibyte
< 0)
2193 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (old_list
)));
2195 if (CHARSET_DIMENSION (charset
) == 1
2196 && CHARSET_ASCII_COMPATIBLE_P (charset
)
2197 && CHARSET_MAX_CHAR (charset
) >= 0x80)
2198 charset_unibyte
= CHARSET_ID (charset
);
2201 Viso_2022_charset_list
= Fnreverse (list_2022
);
2202 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2203 if (charset_unibyte
< 0)
2204 charset_unibyte
= charset_iso_8859_1
;
2209 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2211 doc
: /* Internal use only.
2212 Return charset identification number of CHARSET. */)
2213 (Lisp_Object charset
)
2217 CHECK_CHARSET_GET_ID (charset
, id
);
2218 return make_number (id
);
2221 struct charset_sort_data
2223 Lisp_Object charset
;
2229 charset_compare (const void *d1
, const void *d2
)
2231 const struct charset_sort_data
*data1
= d1
, *data2
= d2
;
2232 if (data1
->priority
!= data2
->priority
)
2233 return data1
->priority
< data2
->priority
? -1 : 1;
2237 DEFUN ("sort-charsets", Fsort_charsets
, Ssort_charsets
, 1, 1, 0,
2238 doc
: /* Sort charset list CHARSETS by a priority of each charset.
2239 Return the sorted list. CHARSETS is modified by side effects.
2240 See also `charset-priority-list' and `set-charset-priority'. */)
2241 (Lisp_Object charsets
)
2243 Lisp_Object len
= Flength (charsets
);
2244 ptrdiff_t n
= XFASTINT (len
), i
, j
;
2246 Lisp_Object tail
, elt
, attrs
;
2247 struct charset_sort_data
*sort_data
;
2248 int id
, min_id
= INT_MAX
, max_id
= INT_MIN
;
2253 SAFE_NALLOCA (sort_data
, 1, n
);
2254 for (tail
= charsets
, i
= 0; CONSP (tail
); tail
= XCDR (tail
), i
++)
2257 CHECK_CHARSET_GET_ATTR (elt
, attrs
);
2258 sort_data
[i
].charset
= elt
;
2259 sort_data
[i
].id
= id
= XINT (CHARSET_ATTR_ID (attrs
));
2265 for (done
= 0, tail
= Vcharset_ordered_list
, i
= 0;
2266 done
< n
&& CONSP (tail
); tail
= XCDR (tail
), i
++)
2269 id
= XFASTINT (elt
);
2270 if (id
>= min_id
&& id
<= max_id
)
2271 for (j
= 0; j
< n
; j
++)
2272 if (sort_data
[j
].id
== id
)
2274 sort_data
[j
].priority
= i
;
2278 qsort (sort_data
, n
, sizeof *sort_data
, charset_compare
);
2279 for (i
= 0, tail
= charsets
; CONSP (tail
); tail
= XCDR (tail
), i
++)
2280 XSETCAR (tail
, sort_data
[i
].charset
);
2289 Lisp_Object tempdir
;
2290 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2291 if (! file_accessible_directory_p (tempdir
))
2293 /* This used to be non-fatal (dir_warning), but it should not
2294 happen, and if it does sooner or later it will cause some
2295 obscure problem (eg bug#6401), so better abort. */
2296 fprintf (stderr
, "Error: charsets directory not found:\n\
2298 Emacs will not function correctly without the character map files.\n%s\
2299 Please check your installation!\n",
2301 egetenv("EMACSDATA") ? "The EMACSDATA environment \
2302 variable is set, maybe it has the wrong value?\n" : "");
2306 Vcharset_map_path
= list1 (tempdir
);
2311 init_charset_once (void)
2315 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2316 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2317 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2318 iso_charset_table
[i
][j
][k
] = -1;
2320 for (i
= 0; i
< 256; i
++)
2321 emacs_mule_charset
[i
] = -1;
2323 charset_jisx0201_roman
= -1;
2324 charset_jisx0208_1978
= -1;
2325 charset_jisx0208
= -1;
2326 charset_ksc5601
= -1;
2331 /* Allocate an initial charset table that is large enough to handle
2332 Emacs while it is bootstrapping. As of September 2011, the size
2333 needs to be at least 166; make it a bit bigger to allow for future
2336 Don't make the value so small that the table is reallocated during
2337 bootstrapping, as glibc malloc calls larger than just under 64 KiB
2338 during an initial bootstrap wreak havoc after dumping; see the
2339 M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
2340 internal to glibc malloc and perhaps to Emacs malloc debugging. */
2341 static struct charset charset_table_init
[180];
2344 syms_of_charset (void)
2346 DEFSYM (Qcharsetp
, "charsetp");
2348 DEFSYM (Qascii
, "ascii");
2349 DEFSYM (Qunicode
, "unicode");
2350 DEFSYM (Qemacs
, "emacs");
2351 DEFSYM (Qeight_bit
, "eight-bit");
2352 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2357 staticpro (&Vcharset_ordered_list
);
2358 Vcharset_ordered_list
= Qnil
;
2360 staticpro (&Viso_2022_charset_list
);
2361 Viso_2022_charset_list
= Qnil
;
2363 staticpro (&Vemacs_mule_charset_list
);
2364 Vemacs_mule_charset_list
= Qnil
;
2366 /* Don't staticpro them here. It's done in syms_of_fns. */
2367 QCtest
= intern_c_string (":test");
2368 Qeq
= intern_c_string ("eq");
2370 staticpro (&Vcharset_hash_table
);
2372 Lisp_Object args
[2];
2375 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2378 charset_table
= charset_table_init
;
2379 charset_table_size
= ARRAYELTS (charset_table_init
);
2380 charset_table_used
= 0;
2382 defsubr (&Scharsetp
);
2383 defsubr (&Smap_charset_chars
);
2384 defsubr (&Sdefine_charset_internal
);
2385 defsubr (&Sdefine_charset_alias
);
2386 defsubr (&Scharset_plist
);
2387 defsubr (&Sset_charset_plist
);
2388 defsubr (&Sunify_charset
);
2389 defsubr (&Sget_unused_iso_final_char
);
2390 defsubr (&Sdeclare_equiv_charset
);
2391 defsubr (&Sfind_charset_region
);
2392 defsubr (&Sfind_charset_string
);
2393 defsubr (&Sdecode_char
);
2394 defsubr (&Sencode_char
);
2395 defsubr (&Ssplit_char
);
2396 defsubr (&Smake_char
);
2397 defsubr (&Schar_charset
);
2398 defsubr (&Scharset_after
);
2399 defsubr (&Siso_charset
);
2400 defsubr (&Sclear_charset_maps
);
2401 defsubr (&Scharset_priority_list
);
2402 defsubr (&Sset_charset_priority
);
2403 defsubr (&Scharset_id_internal
);
2404 defsubr (&Ssort_charsets
);
2406 DEFVAR_LISP ("charset-map-path", Vcharset_map_path
,
2407 doc
: /* List of directories to search for charset map files. */);
2408 Vcharset_map_path
= Qnil
;
2410 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map
,
2411 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2412 inhibit_load_charset_map
= 0;
2414 DEFVAR_LISP ("charset-list", Vcharset_list
,
2415 doc
: /* List of all charsets ever defined. */);
2416 Vcharset_list
= Qnil
;
2418 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language
,
2419 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2420 If the current language environment is for multiple languages (e.g. "Latin-1"),
2421 the value may be a list of mnemonics. */);
2422 Vcurrent_iso639_language
= Qnil
;
2425 = define_charset_internal (Qascii
, 1, "\x00\x7F\0\0\0\0\0",
2426 0, 127, 'B', -1, 0, 1, 0, 0);
2428 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\0\0\0\0\0",
2429 0, 255, -1, -1, -1, 1, 0, 0);
2431 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2432 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2434 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
2435 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2437 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\0\0\0\0\0",
2438 128, 255, -1, 0, -1, 0, 1,
2439 MAX_5_BYTE_CHAR
+ 1);
2440 charset_unibyte
= charset_iso_8859_1
;