1 /* Basic character set support.
3 Copyright (C) 2001-2017 Free Software Foundation, Inc.
5 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 2005, 2006, 2007, 2008, 2009, 2010, 2011
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H14PRO021
10 Copyright (C) 2003, 2004
11 National Institute of Advanced Industrial Science and Technology (AIST)
12 Registration Number H13PRO009
14 This file is part of GNU Emacs.
16 GNU Emacs is free software: you can redistribute it and/or modify
17 it under the terms of the GNU General Public License as published by
18 the Free Software Foundation, either version 3 of the License, or (at
19 your option) any later version.
21 GNU Emacs is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 GNU General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
36 #include <sys/types.h>
39 #include "character.h"
44 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
46 A coded character set ("charset" hereafter) is a meaningful
47 collection (i.e. language, culture, functionality, etc.) of
48 characters. Emacs handles multiple charsets at once. In Emacs Lisp
49 code, a charset is represented by a symbol. In C code, a charset is
50 represented by its ID number or by a pointer to a struct charset.
52 The actual information about each charset is stored in two places.
53 Lispy information is stored in the hash table Vcharset_hash_table as
54 a vector (charset attributes). The other information is stored in
55 charset_table as a struct charset.
59 /* Hash table that contains attributes of each charset. Keys are
60 charset symbols, and values are vectors of charset attributes. */
61 Lisp_Object Vcharset_hash_table
;
63 /* Table of struct charset. */
64 struct charset
*charset_table
;
66 static ptrdiff_t charset_table_size
;
67 static int charset_table_used
;
69 /* Special charsets corresponding to symbols. */
71 int charset_eight_bit
;
72 static int charset_iso_8859_1
;
74 static int charset_emacs
;
76 /* The other special charsets. */
77 int charset_jisx0201_roman
;
78 int charset_jisx0208_1978
;
82 /* Charset of unibyte characters. */
85 /* List of charsets ordered by the priority. */
86 Lisp_Object Vcharset_ordered_list
;
88 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
90 Lisp_Object Vcharset_non_preferred_head
;
92 /* Incremented every time we change the priority of charsets.
94 EMACS_UINT charset_ordered_list_tick
;
96 /* List of iso-2022 charsets. */
97 Lisp_Object Viso_2022_charset_list
;
99 /* List of emacs-mule charsets. */
100 Lisp_Object Vemacs_mule_charset_list
;
102 int emacs_mule_charset
[256];
104 /* Mapping table from ISO2022's charset (specified by DIMENSION,
105 CHARS, and FINAL-CHAR) to Emacs' charset. */
106 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
108 #define CODE_POINT_TO_INDEX(charset, code) \
109 ((charset)->code_linear_p \
110 ? (int) ((code) - (charset)->min_code) \
111 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
112 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
113 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
114 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
115 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
116 * (charset)->code_space[11]) \
117 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
118 * (charset)->code_space[7]) \
119 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
120 * (charset)->code_space[3]) \
121 + (((code) & 0xFF) - (charset)->code_space[0]) \
122 - ((charset)->char_index_offset)) \
126 /* Return the code-point for the character index IDX in CHARSET.
127 IDX should be an unsigned int variable in a valid range (which is
128 always in nonnegative int range too). IDX contains garbage afterwards. */
130 #define INDEX_TO_CODE_POINT(charset, idx) \
131 ((charset)->code_linear_p \
132 ? (idx) + (charset)->min_code \
133 : (idx += (charset)->char_index_offset, \
134 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
135 | (((charset)->code_space[4] \
136 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
138 | (((charset)->code_space[8] \
139 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
141 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
144 /* Structure to hold mapping tables for a charset. Used by temacs
145 invoked for dumping. */
149 /* The current charset for which the following tables are setup. */
150 struct charset
*current
;
152 /* 1 iff the following table is used for encoder. */
155 /* When the following table is used for encoding, minimum and
156 maximum character of the current charset. */
157 int min_char
, max_char
;
159 /* A Unicode character corresponding to the code index 0 (i.e. the
160 minimum code-point) of the current charset, or -1 if the code
161 index 0 is not a Unicode character. This is checked when
162 table.encoder[CHAR] is zero. */
166 /* Table mapping code-indices (not code-points) of the current
167 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
168 doesn't belong to the current charset. */
169 int decoder
[0x10000];
170 /* Table mapping Unicode characters to code-indices of the current
171 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
172 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
173 (0x20000..0x2FFFF). Note that there is no charset map that
174 uses both SMP and SIP. */
175 unsigned short encoder
[0x20000];
177 } *temp_charset_work
;
179 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
182 temp_charset_work->zero_index_char = (C); \
183 else if ((C) < 0x20000) \
184 temp_charset_work->table.encoder[(C)] = (CODE); \
186 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
189 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
190 ((C) == temp_charset_work->zero_index_char ? 0 \
191 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
192 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
193 : temp_charset_work->table.encoder[(C) - 0x10000] \
194 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
196 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
197 (temp_charset_work->table.decoder[(CODE)] = (C))
199 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
200 (temp_charset_work->table.decoder[(CODE)])
203 /* Set to 1 to warn that a charset map is loaded and thus a buffer
204 text and a string data may be relocated. */
205 bool charset_map_loaded
;
207 struct charset_map_entries
213 struct charset_map_entries
*next
;
216 /* Load the mapping information of CHARSET from ENTRIES for
217 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
218 encoding (CONTROL_FLAG == 2).
220 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
221 and CHARSET->fast_map.
223 If CONTROL_FLAG is 1, setup the following tables according to
224 CHARSET->method and inhibit_load_charset_map.
226 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
227 ----------------------+--------------------+---------------------------
228 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
229 ----------------------+--------------------+---------------------------
230 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
232 If CONTROL_FLAG is 2, setup the following tables.
234 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
235 ----------------------+--------------------+---------------------------
236 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
237 ----------------------+--------------------+--------------------------
238 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
242 load_charset_map (struct charset
*charset
, struct charset_map_entries
*entries
, int n_entries
, int control_flag
)
244 Lisp_Object vec UNINIT
;
245 Lisp_Object table UNINIT
;
246 unsigned max_code
= CHARSET_MAX_CODE (charset
);
247 bool ascii_compatible_p
= charset
->ascii_compatible_p
;
248 int min_char
, max_char
, nonascii_min_char
;
250 unsigned char *fast_map
= charset
->fast_map
;
257 if (! inhibit_load_charset_map
)
259 if (control_flag
== 1)
261 if (charset
->method
== CHARSET_METHOD_MAP
)
263 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
265 vec
= Fmake_vector (make_number (n
), make_number (-1));
266 set_charset_attr (charset
, charset_decoder
, vec
);
270 char_table_set_range (Vchar_unify_table
,
271 charset
->min_char
, charset
->max_char
,
277 table
= Fmake_char_table (Qnil
, Qnil
);
278 set_charset_attr (charset
,
279 (charset
->method
== CHARSET_METHOD_MAP
280 ? charset_encoder
: charset_deunifier
),
286 if (! temp_charset_work
)
287 temp_charset_work
= xmalloc (sizeof *temp_charset_work
);
288 if (control_flag
== 1)
290 memset (temp_charset_work
->table
.decoder
, -1,
291 sizeof (int) * 0x10000);
295 memset (temp_charset_work
->table
.encoder
, 0,
296 sizeof (unsigned short) * 0x20000);
297 temp_charset_work
->zero_index_char
= -1;
299 temp_charset_work
->current
= charset
;
300 temp_charset_work
->for_encoder
= (control_flag
== 2);
303 charset_map_loaded
= 1;
306 min_char
= max_char
= entries
->entry
[0].c
;
307 nonascii_min_char
= MAX_CHAR
;
308 for (i
= 0; i
< n_entries
; i
++)
311 int from_index
, to_index
, lim_index
;
313 int idx
= i
% 0x10000;
315 if (i
> 0 && idx
== 0)
316 entries
= entries
->next
;
317 from
= entries
->entry
[idx
].from
;
318 to
= entries
->entry
[idx
].to
;
319 from_c
= entries
->entry
[idx
].c
;
320 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
323 to_index
= from_index
;
328 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
329 to_c
= from_c
+ (to_index
- from_index
);
331 if (from_index
< 0 || to_index
< 0)
333 lim_index
= to_index
+ 1;
337 else if (from_c
< min_char
)
340 if (control_flag
== 1)
342 if (charset
->method
== CHARSET_METHOD_MAP
)
343 for (; from_index
< lim_index
; from_index
++, from_c
++)
344 ASET (vec
, from_index
, make_number (from_c
));
346 for (; from_index
< lim_index
; from_index
++, from_c
++)
347 CHAR_TABLE_SET (Vchar_unify_table
,
348 CHARSET_CODE_OFFSET (charset
) + from_index
,
349 make_number (from_c
));
351 else if (control_flag
== 2)
353 if (charset
->method
== CHARSET_METHOD_MAP
354 && CHARSET_COMPACT_CODES_P (charset
))
355 for (; from_index
< lim_index
; from_index
++, from_c
++)
357 unsigned code
= from_index
;
358 code
= INDEX_TO_CODE_POINT (charset
, code
);
360 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
361 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
364 for (; from_index
< lim_index
; from_index
++, from_c
++)
366 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
367 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
370 else if (control_flag
== 3)
371 for (; from_index
< lim_index
; from_index
++, from_c
++)
372 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
373 else if (control_flag
== 4)
374 for (; from_index
< lim_index
; from_index
++, from_c
++)
375 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
376 else /* control_flag == 0 */
378 if (ascii_compatible_p
)
380 if (! ASCII_CHAR_P (from_c
))
382 if (from_c
< nonascii_min_char
)
383 nonascii_min_char
= from_c
;
385 else if (! ASCII_CHAR_P (to_c
))
387 nonascii_min_char
= 0x80;
391 for (; from_c
<= to_c
; from_c
++)
392 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
396 if (control_flag
== 0)
398 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
399 ? nonascii_min_char
: min_char
);
400 CHARSET_MAX_CHAR (charset
) = max_char
;
402 else if (control_flag
== 4)
404 temp_charset_work
->min_char
= min_char
;
405 temp_charset_work
->max_char
= max_char
;
410 /* Read a hexadecimal number (preceded by "0x") from the file FP while
411 paying attention to comment character '#'. */
414 read_hex (FILE *fp
, bool *eof
, bool *overflow
)
419 while ((c
= getc (fp
)) != EOF
)
423 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
427 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
437 while (c_isxdigit (c
= getc (fp
)))
439 if (INT_LEFT_SHIFT_OVERFLOW (n
, 4))
442 | (c
- ('0' <= c
&& c
<= '9' ? '0'
443 : 'A' <= c
&& c
<= 'F' ? 'A' - 10
451 /* Return a mapping vector for CHARSET loaded from MAPFILE.
452 Each line of MAPFILE has this form
454 where 0xAAAA is a code-point and 0xCCCC is the corresponding
455 character code, or this form
457 where 0xAAAA and 0xBBBB are code-points specifying a range, and
458 0xCCCC is the first character code of the range.
460 The returned vector has this form:
461 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
462 where CODE1 is a code-point or a cons of code-points specifying a
465 Note that this function uses `openp' to open MAPFILE but ignores
466 `file-name-handler-alist' to avoid running any Lisp code. */
469 load_charset_map_from_file (struct charset
*charset
, Lisp_Object mapfile
,
472 unsigned min_code
= CHARSET_MIN_CODE (charset
);
473 unsigned max_code
= CHARSET_MAX_CODE (charset
);
476 struct charset_map_entries
*head
, *entries
;
478 AUTO_STRING (map
, ".map");
479 AUTO_STRING (txt
, ".txt");
480 AUTO_LIST2 (suffixes
, map
, txt
);
481 ptrdiff_t count
= SPECPDL_INDEX ();
482 record_unwind_protect_nothing ();
483 specbind (Qfile_name_handler_alist
, Qnil
);
484 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
, false);
485 fp
= fd
< 0 ? 0 : fdopen (fd
, "r");
488 int open_errno
= errno
;
490 report_file_errno ("Loading charset map", mapfile
, open_errno
);
492 set_unwind_protect_ptr (count
, fclose_unwind
, fp
);
493 unbind_to (count
+ 1, Qnil
);
495 /* Use record_xmalloc, as `charset_map_entries' is
496 large (larger than MAX_ALLOCA). */
497 head
= record_xmalloc (sizeof *head
);
499 memset (entries
, 0, sizeof (struct charset_map_entries
));
504 unsigned from
, to
, c
;
506 bool eof
= 0, overflow
= 0;
508 from
= read_hex (fp
, &eof
, &overflow
);
511 if (getc (fp
) == '-')
512 to
= read_hex (fp
, &eof
, &overflow
);
517 c
= read_hex (fp
, &eof
, &overflow
);
523 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
526 if (n_entries
== 0x10000)
528 entries
->next
= record_xmalloc (sizeof *entries
->next
);
529 entries
= entries
->next
;
530 memset (entries
, 0, sizeof (struct charset_map_entries
));
534 entries
->entry
[idx
].from
= from
;
535 entries
->entry
[idx
].to
= to
;
536 entries
->entry
[idx
].c
= c
;
540 clear_unwind_protect (count
);
542 load_charset_map (charset
, head
, n_entries
, control_flag
);
543 unbind_to (count
, Qnil
);
547 load_charset_map_from_vector (struct charset
*charset
, Lisp_Object vec
, int control_flag
)
549 unsigned min_code
= CHARSET_MIN_CODE (charset
);
550 unsigned max_code
= CHARSET_MAX_CODE (charset
);
551 struct charset_map_entries
*head
, *entries
;
553 int len
= ASIZE (vec
);
559 add_to_log ("Failure in loading charset map: %V", vec
);
563 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
564 large (larger than MAX_ALLOCA). */
565 head
= SAFE_ALLOCA (sizeof *head
);
567 memset (entries
, 0, sizeof (struct charset_map_entries
));
570 for (i
= 0; i
< len
; i
+= 2)
572 Lisp_Object val
, val2
;
582 from
= XFASTINT (val
);
583 to
= XFASTINT (val2
);
586 from
= to
= XFASTINT (val
);
587 val
= AREF (vec
, i
+ 1);
591 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
594 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
596 entries
->next
= SAFE_ALLOCA (sizeof *entries
->next
);
597 entries
= entries
->next
;
598 memset (entries
, 0, sizeof (struct charset_map_entries
));
600 idx
= n_entries
% 0x10000;
601 entries
->entry
[idx
].from
= from
;
602 entries
->entry
[idx
].to
= to
;
603 entries
->entry
[idx
].c
= c
;
607 load_charset_map (charset
, head
, n_entries
, control_flag
);
612 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
613 map it is (see the comment of load_charset_map for the detail). */
616 load_charset (struct charset
*charset
, int control_flag
)
620 if (inhibit_load_charset_map
622 && charset
== temp_charset_work
->current
623 && ((control_flag
== 2) == temp_charset_work
->for_encoder
))
626 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
627 map
= CHARSET_MAP (charset
);
630 if (! CHARSET_UNIFIED_P (charset
))
632 map
= CHARSET_UNIFY_MAP (charset
);
635 load_charset_map_from_file (charset
, map
, control_flag
);
637 load_charset_map_from_vector (charset
, map
, control_flag
);
641 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
642 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
645 return (CHARSETP (object
) ? Qt
: Qnil
);
650 map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
),
651 Lisp_Object function
, Lisp_Object arg
,
652 unsigned int from
, unsigned int to
)
654 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
655 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
656 Lisp_Object range
= Fcons (Qnil
, Qnil
);
659 c
= temp_charset_work
->min_char
;
660 stop
= (temp_charset_work
->max_char
< 0x20000
661 ? temp_charset_work
->max_char
: 0xFFFF);
665 int idx
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
667 if (idx
>= from_idx
&& idx
<= to_idx
)
669 if (NILP (XCAR (range
)))
670 XSETCAR (range
, make_number (c
));
672 else if (! NILP (XCAR (range
)))
674 XSETCDR (range
, make_number (c
- 1));
676 (*c_function
) (arg
, range
);
678 call2 (function
, range
, arg
);
679 XSETCAR (range
, Qnil
);
683 if (c
== temp_charset_work
->max_char
)
685 if (! NILP (XCAR (range
)))
687 XSETCDR (range
, make_number (c
));
689 (*c_function
) (arg
, range
);
691 call2 (function
, range
, arg
);
696 stop
= temp_charset_work
->max_char
;
703 map_charset_chars (void (*c_function
)(Lisp_Object
, Lisp_Object
), Lisp_Object function
,
704 Lisp_Object arg
, struct charset
*charset
, unsigned from
, unsigned to
)
707 bool partial
= (from
> CHARSET_MIN_CODE (charset
)
708 || to
< CHARSET_MAX_CODE (charset
));
710 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
712 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
713 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
714 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
715 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
717 if (CHARSET_UNIFIED_P (charset
))
719 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
720 load_charset (charset
, 2);
721 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
722 map_char_table_for_charset (c_function
, function
,
723 CHARSET_DEUNIFIER (charset
), arg
,
724 partial
? charset
: NULL
, from
, to
);
726 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
729 range
= Fcons (make_number (from_c
), make_number (to_c
));
731 (*c_function
) (arg
, range
);
733 call2 (function
, range
, arg
);
735 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
737 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
738 load_charset (charset
, 2);
739 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
740 map_char_table_for_charset (c_function
, function
,
741 CHARSET_ENCODER (charset
), arg
,
742 partial
? charset
: NULL
, from
, to
);
744 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
746 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
748 Lisp_Object subset_info
;
751 subset_info
= CHARSET_SUBSET (charset
);
752 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
753 offset
= XINT (AREF (subset_info
, 3));
755 if (from
< XFASTINT (AREF (subset_info
, 1)))
756 from
= XFASTINT (AREF (subset_info
, 1));
758 if (to
> XFASTINT (AREF (subset_info
, 2)))
759 to
= XFASTINT (AREF (subset_info
, 2));
760 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
762 else /* i.e. CHARSET_METHOD_SUPERSET */
766 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
767 parents
= XCDR (parents
))
770 unsigned this_from
, this_to
;
772 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
773 offset
= XINT (XCDR (XCAR (parents
)));
774 this_from
= from
> offset
? from
- offset
: 0;
775 this_to
= to
> offset
? to
- offset
: 0;
776 if (this_from
< CHARSET_MIN_CODE (charset
))
777 this_from
= CHARSET_MIN_CODE (charset
);
778 if (this_to
> CHARSET_MAX_CODE (charset
))
779 this_to
= CHARSET_MAX_CODE (charset
);
780 map_charset_chars (c_function
, function
, arg
, charset
,
786 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
787 doc
: /* Call FUNCTION for all characters in CHARSET.
788 FUNCTION is called with an argument RANGE and the optional 3rd
791 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
792 characters contained in CHARSET.
794 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
795 range of code points (in CHARSET) of target characters. */)
796 (Lisp_Object function
, Lisp_Object charset
, Lisp_Object arg
, Lisp_Object from_code
, Lisp_Object to_code
)
801 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
802 if (NILP (from_code
))
803 from
= CHARSET_MIN_CODE (cs
);
806 from
= XINT (from_code
);
807 if (from
< CHARSET_MIN_CODE (cs
))
808 from
= CHARSET_MIN_CODE (cs
);
811 to
= CHARSET_MAX_CODE (cs
);
815 if (to
> CHARSET_MAX_CODE (cs
))
816 to
= CHARSET_MAX_CODE (cs
);
818 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
823 /* Define a charset according to the arguments. The Nth argument is
824 the Nth attribute of the charset (the last attribute `charset-id'
825 is not included). See the docstring of `define-charset' for the
828 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
829 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
830 doc
: /* For internal use only.
831 usage: (define-charset-internal ...) */)
832 (ptrdiff_t nargs
, Lisp_Object
*args
)
834 /* Charset attr vector. */
837 EMACS_UINT hash_code
;
838 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
840 struct charset charset
;
843 bool new_definition_p
;
846 if (nargs
!= charset_arg_max
)
847 Fsignal (Qwrong_number_of_arguments
,
848 Fcons (intern ("define-charset-internal"),
849 make_number (nargs
)));
851 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
853 CHECK_SYMBOL (args
[charset_arg_name
]);
854 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
856 val
= args
[charset_arg_code_space
];
857 for (i
= 0, dimension
= 0, nchars
= 1; ; i
++)
859 Lisp_Object min_byte_obj
, max_byte_obj
;
860 int min_byte
, max_byte
;
862 min_byte_obj
= Faref (val
, make_number (i
* 2));
863 max_byte_obj
= Faref (val
, make_number (i
* 2 + 1));
864 CHECK_RANGED_INTEGER (min_byte_obj
, 0, 255);
865 min_byte
= XINT (min_byte_obj
);
866 CHECK_RANGED_INTEGER (max_byte_obj
, min_byte
, 255);
867 max_byte
= XINT (max_byte_obj
);
868 charset
.code_space
[i
* 4] = min_byte
;
869 charset
.code_space
[i
* 4 + 1] = max_byte
;
870 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
875 nchars
*= charset
.code_space
[i
* 4 + 2];
876 charset
.code_space
[i
* 4 + 3] = nchars
;
879 val
= args
[charset_arg_dimension
];
881 charset
.dimension
= dimension
;
884 CHECK_RANGED_INTEGER (val
, 1, 4);
885 charset
.dimension
= XINT (val
);
888 charset
.code_linear_p
889 = (charset
.dimension
== 1
890 || (charset
.code_space
[2] == 256
891 && (charset
.dimension
== 2
892 || (charset
.code_space
[6] == 256
893 && (charset
.dimension
== 3
894 || charset
.code_space
[10] == 256)))));
896 if (! charset
.code_linear_p
)
898 charset
.code_space_mask
= xzalloc (256);
899 for (i
= 0; i
< 4; i
++)
900 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
902 charset
.code_space_mask
[j
] |= (1 << i
);
905 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
907 charset
.min_code
= (charset
.code_space
[0]
908 | (charset
.code_space
[4] << 8)
909 | (charset
.code_space
[8] << 16)
910 | ((unsigned) charset
.code_space
[12] << 24));
911 charset
.max_code
= (charset
.code_space
[1]
912 | (charset
.code_space
[5] << 8)
913 | (charset
.code_space
[9] << 16)
914 | ((unsigned) charset
.code_space
[13] << 24));
915 charset
.char_index_offset
= 0;
917 val
= args
[charset_arg_min_code
];
920 unsigned code
= cons_to_unsigned (val
, UINT_MAX
);
922 if (code
< charset
.min_code
923 || code
> charset
.max_code
)
924 args_out_of_range_3 (make_fixnum_or_float (charset
.min_code
),
925 make_fixnum_or_float (charset
.max_code
), val
);
926 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
927 charset
.min_code
= code
;
930 val
= args
[charset_arg_max_code
];
933 unsigned code
= cons_to_unsigned (val
, UINT_MAX
);
935 if (code
< charset
.min_code
936 || code
> charset
.max_code
)
937 args_out_of_range_3 (make_fixnum_or_float (charset
.min_code
),
938 make_fixnum_or_float (charset
.max_code
), val
);
939 charset
.max_code
= code
;
942 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
944 val
= args
[charset_arg_invalid_code
];
947 if (charset
.min_code
> 0)
948 charset
.invalid_code
= 0;
951 if (charset
.max_code
< UINT_MAX
)
952 charset
.invalid_code
= charset
.max_code
+ 1;
954 error ("Attribute :invalid-code must be specified");
958 charset
.invalid_code
= cons_to_unsigned (val
, UINT_MAX
);
960 val
= args
[charset_arg_iso_final
];
962 charset
.iso_final
= -1;
966 if (XINT (val
) < '0' || XINT (val
) > 127)
967 error ("Invalid iso-final-char: %"pI
"d", XINT (val
));
968 charset
.iso_final
= XINT (val
);
971 val
= args
[charset_arg_iso_revision
];
973 charset
.iso_revision
= -1;
976 CHECK_RANGED_INTEGER (val
, -1, 63);
977 charset
.iso_revision
= XINT (val
);
980 val
= args
[charset_arg_emacs_mule_id
];
982 charset
.emacs_mule_id
= -1;
986 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
987 error ("Invalid emacs-mule-id: %"pI
"d", XINT (val
));
988 charset
.emacs_mule_id
= XINT (val
);
991 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
993 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
995 charset
.unified_p
= 0;
997 memset (charset
.fast_map
, 0, sizeof (charset
.fast_map
));
999 if (! NILP (args
[charset_arg_code_offset
]))
1001 val
= args
[charset_arg_code_offset
];
1002 CHECK_CHARACTER (val
);
1004 charset
.method
= CHARSET_METHOD_OFFSET
;
1005 charset
.code_offset
= XINT (val
);
1007 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1008 if (MAX_CHAR
- charset
.code_offset
< i
)
1009 error ("Unsupported max char: %d", charset
.max_char
);
1010 charset
.max_char
= i
+ charset
.code_offset
;
1011 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1012 charset
.min_char
= i
+ charset
.code_offset
;
1014 i
= (charset
.min_char
>> 7) << 7;
1015 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1016 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1017 i
= (i
>> 12) << 12;
1018 for (; i
<= charset
.max_char
; i
+= 0x1000)
1019 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1020 if (charset
.code_offset
== 0 && charset
.max_char
>= 0x80)
1021 charset
.ascii_compatible_p
= 1;
1023 else if (! NILP (args
[charset_arg_map
]))
1025 val
= args
[charset_arg_map
];
1026 ASET (attrs
, charset_map
, val
);
1027 charset
.method
= CHARSET_METHOD_MAP
;
1029 else if (! NILP (args
[charset_arg_subset
]))
1032 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1033 struct charset
*parent_charset
;
1035 val
= args
[charset_arg_subset
];
1036 parent
= Fcar (val
);
1037 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1038 parent_min_code
= Fnth (make_number (1), val
);
1039 CHECK_NATNUM (parent_min_code
);
1040 parent_max_code
= Fnth (make_number (2), val
);
1041 CHECK_NATNUM (parent_max_code
);
1042 parent_code_offset
= Fnth (make_number (3), val
);
1043 CHECK_NUMBER (parent_code_offset
);
1044 val
= make_uninit_vector (4);
1045 ASET (val
, 0, make_number (parent_charset
->id
));
1046 ASET (val
, 1, parent_min_code
);
1047 ASET (val
, 2, parent_max_code
);
1048 ASET (val
, 3, parent_code_offset
);
1049 ASET (attrs
, charset_subset
, val
);
1051 charset
.method
= CHARSET_METHOD_SUBSET
;
1052 /* Here, we just copy the parent's fast_map. It's not accurate,
1053 but at least it works for quickly detecting which character
1054 DOESN'T belong to this charset. */
1055 memcpy (charset
.fast_map
, parent_charset
->fast_map
,
1056 sizeof charset
.fast_map
);
1058 /* We also copy these for parents. */
1059 charset
.min_char
= parent_charset
->min_char
;
1060 charset
.max_char
= parent_charset
->max_char
;
1062 else if (! NILP (args
[charset_arg_superset
]))
1064 val
= args
[charset_arg_superset
];
1065 charset
.method
= CHARSET_METHOD_SUPERSET
;
1066 val
= Fcopy_sequence (val
);
1067 ASET (attrs
, charset_superset
, val
);
1069 charset
.min_char
= MAX_CHAR
;
1070 charset
.max_char
= 0;
1071 for (; ! NILP (val
); val
= Fcdr (val
))
1073 Lisp_Object elt
, car_part
, cdr_part
;
1074 int this_id
, offset
;
1075 struct charset
*this_charset
;
1080 car_part
= XCAR (elt
);
1081 cdr_part
= XCDR (elt
);
1082 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1083 CHECK_TYPE_RANGED_INTEGER (int, cdr_part
);
1084 offset
= XINT (cdr_part
);
1088 CHECK_CHARSET_GET_ID (elt
, this_id
);
1091 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1093 this_charset
= CHARSET_FROM_ID (this_id
);
1094 if (charset
.min_char
> this_charset
->min_char
)
1095 charset
.min_char
= this_charset
->min_char
;
1096 if (charset
.max_char
< this_charset
->max_char
)
1097 charset
.max_char
= this_charset
->max_char
;
1098 for (i
= 0; i
< 190; i
++)
1099 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1103 error ("None of :code-offset, :map, :parents are specified");
1105 val
= args
[charset_arg_unify_map
];
1106 if (! NILP (val
) && !STRINGP (val
))
1108 ASET (attrs
, charset_unify_map
, val
);
1110 CHECK_LIST (args
[charset_arg_plist
]);
1111 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1113 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1115 if (charset
.hash_index
>= 0)
1117 new_definition_p
= 0;
1118 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1119 set_hash_value_slot (hash_table
, charset
.hash_index
, attrs
);
1123 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1125 if (charset_table_used
== charset_table_size
)
1127 /* Ensure that charset IDs fit into 'int' as well as into the
1128 restriction imposed by fixnums. Although the 'int' restriction
1129 could be removed, too much other code would need altering; for
1130 example, the IDs are stuffed into struct
1131 coding_system.charbuf[i] entries, which are 'int'. */
1132 int old_size
= charset_table_size
;
1133 ptrdiff_t new_size
= old_size
;
1134 struct charset
*new_table
=
1135 xpalloc (0, &new_size
, 1,
1136 min (INT_MAX
, MOST_POSITIVE_FIXNUM
),
1137 sizeof *charset_table
);
1138 memcpy (new_table
, charset_table
, old_size
* sizeof *new_table
);
1139 charset_table
= new_table
;
1140 charset_table_size
= new_size
;
1141 /* FIXME: This leaks memory, as the old charset_table becomes
1142 unreachable. If the old charset table is charset_table_init
1143 then this leak is intentional; otherwise, it's unclear.
1144 If the latter memory leak is intentional, a
1145 comment should be added to explain this. If not, the old
1146 charset_table should be freed, by passing it as the 1st argument
1147 to xpalloc and removing the memcpy. */
1149 id
= charset_table_used
++;
1150 new_definition_p
= 1;
1153 ASET (attrs
, charset_id
, make_number (id
));
1155 charset_table
[id
] = charset
;
1157 if (charset
.method
== CHARSET_METHOD_MAP
)
1159 load_charset (&charset
, 0);
1160 charset_table
[id
] = charset
;
1163 if (charset
.iso_final
>= 0)
1165 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1166 charset
.iso_final
) = id
;
1167 if (new_definition_p
)
1168 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1169 list1 (make_number (id
)));
1170 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1171 charset_jisx0201_roman
= id
;
1172 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1173 charset_jisx0208_1978
= id
;
1174 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1175 charset_jisx0208
= id
;
1176 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id
)
1177 charset_ksc5601
= id
;
1180 if (charset
.emacs_mule_id
>= 0)
1182 emacs_mule_charset
[charset
.emacs_mule_id
] = id
;
1183 if (charset
.emacs_mule_id
< 0xA0)
1184 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1186 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1187 if (new_definition_p
)
1188 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1189 list1 (make_number (id
)));
1192 if (new_definition_p
)
1194 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1195 if (charset
.supplementary_p
)
1196 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1197 list1 (make_number (id
)));
1202 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1204 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1206 if (cs
->supplementary_p
)
1209 if (EQ (tail
, Vcharset_ordered_list
))
1210 Vcharset_ordered_list
= Fcons (make_number (id
),
1211 Vcharset_ordered_list
);
1212 else if (NILP (tail
))
1213 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1214 list1 (make_number (id
)));
1217 val
= Fcons (XCAR (tail
), XCDR (tail
));
1218 XSETCDR (tail
, val
);
1219 XSETCAR (tail
, make_number (id
));
1222 charset_ordered_list_tick
++;
1229 /* Same as Fdefine_charset_internal but arguments are more convenient
1230 to call from C (typically in syms_of_charset). This can define a
1231 charset of `offset' method only. Return the ID of the new
1235 define_charset_internal (Lisp_Object name
,
1237 const char *code_space_chars
,
1238 unsigned min_code
, unsigned max_code
,
1239 int iso_final
, int iso_revision
, int emacs_mule_id
,
1240 bool ascii_compatible
, bool supplementary
,
1243 const unsigned char *code_space
= (const unsigned char *) code_space_chars
;
1244 Lisp_Object args
[charset_arg_max
];
1248 args
[charset_arg_name
] = name
;
1249 args
[charset_arg_dimension
] = make_number (dimension
);
1250 val
= make_uninit_vector (8);
1251 for (i
= 0; i
< 8; i
++)
1252 ASET (val
, i
, make_number (code_space
[i
]));
1253 args
[charset_arg_code_space
] = val
;
1254 args
[charset_arg_min_code
] = make_number (min_code
);
1255 args
[charset_arg_max_code
] = make_number (max_code
);
1256 args
[charset_arg_iso_final
]
1257 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1258 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1259 args
[charset_arg_emacs_mule_id
]
1260 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1261 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1262 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1263 args
[charset_arg_invalid_code
] = Qnil
;
1264 args
[charset_arg_code_offset
] = make_number (code_offset
);
1265 args
[charset_arg_map
] = Qnil
;
1266 args
[charset_arg_subset
] = Qnil
;
1267 args
[charset_arg_superset
] = Qnil
;
1268 args
[charset_arg_unify_map
] = Qnil
;
1270 args
[charset_arg_plist
] =
1271 listn (CONSTYPE_HEAP
, 14,
1273 args
[charset_arg_name
],
1274 intern_c_string (":dimension"),
1275 args
[charset_arg_dimension
],
1276 intern_c_string (":code-space"),
1277 args
[charset_arg_code_space
],
1278 intern_c_string (":iso-final-char"),
1279 args
[charset_arg_iso_final
],
1280 intern_c_string (":emacs-mule-id"),
1281 args
[charset_arg_emacs_mule_id
],
1282 QCascii_compatible_p
,
1283 args
[charset_arg_ascii_compatible_p
],
1284 intern_c_string (":code-offset"),
1285 args
[charset_arg_code_offset
]);
1286 Fdefine_charset_internal (charset_arg_max
, args
);
1288 return XINT (CHARSET_SYMBOL_ID (name
));
1292 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1293 Sdefine_charset_alias
, 2, 2, 0,
1294 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1295 (Lisp_Object alias
, Lisp_Object charset
)
1299 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1300 Fputhash (alias
, attr
, Vcharset_hash_table
);
1301 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1306 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1307 doc
: /* Return the property list of CHARSET. */)
1308 (Lisp_Object charset
)
1312 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1313 return CHARSET_ATTR_PLIST (attrs
);
1317 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1318 doc
: /* Set CHARSET's property list to PLIST. */)
1319 (Lisp_Object charset
, Lisp_Object plist
)
1323 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1324 ASET (attrs
, charset_plist
, plist
);
1329 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1330 doc
: /* Unify characters of CHARSET with Unicode.
1331 This means reading the relevant file and installing the table defined
1332 by CHARSET's `:unify-map' property.
1334 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1335 the same meaning as the `:unify-map' attribute in the function
1336 `define-charset' (which see).
1338 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1339 (Lisp_Object charset
, Lisp_Object unify_map
, Lisp_Object deunify
)
1344 CHECK_CHARSET_GET_ID (charset
, id
);
1345 cs
= CHARSET_FROM_ID (id
);
1347 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1348 : ! CHARSET_UNIFIED_P (cs
))
1351 CHARSET_UNIFIED_P (cs
) = 0;
1354 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1355 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1356 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1357 if (NILP (unify_map
))
1358 unify_map
= CHARSET_UNIFY_MAP (cs
);
1361 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1362 signal_error ("Bad unify-map", unify_map
);
1363 set_charset_attr (cs
, charset_unify_map
, unify_map
);
1365 if (NILP (Vchar_unify_table
))
1366 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1367 char_table_set_range (Vchar_unify_table
,
1368 cs
->min_char
, cs
->max_char
, charset
);
1369 CHARSET_UNIFIED_P (cs
) = 1;
1371 else if (CHAR_TABLE_P (Vchar_unify_table
))
1373 unsigned min_code
= CHARSET_MIN_CODE (cs
);
1374 unsigned max_code
= CHARSET_MAX_CODE (cs
);
1375 int min_char
= DECODE_CHAR (cs
, min_code
);
1376 int max_char
= DECODE_CHAR (cs
, max_code
);
1378 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1384 /* Check that DIMENSION, CHARS, and FINAL_CHAR specify a valid ISO charset.
1385 Return true if it's a 96-character set, false if 94. */
1388 check_iso_charset_parameter (Lisp_Object dimension
, Lisp_Object chars
,
1389 Lisp_Object final_char
)
1391 CHECK_NUMBER (dimension
);
1392 CHECK_NUMBER (chars
);
1393 CHECK_CHARACTER (final_char
);
1395 if (! (1 <= XINT (dimension
) && XINT (dimension
) <= 3))
1396 error ("Invalid DIMENSION %"pI
"d, it should be 1, 2, or 3",
1399 bool chars_flag
= XINT (chars
) == 96;
1400 if (! (chars_flag
|| XINT (chars
) == 94))
1401 error ("Invalid CHARS %"pI
"d, it should be 94 or 96", XINT (chars
));
1403 int final_ch
= XFASTINT (final_char
);
1404 if (! ('0' <= final_ch
&& final_ch
<= '~'))
1405 error ("Invalid FINAL-CHAR `%c', it should be `0'..`~'", final_ch
);
1410 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1411 Sget_unused_iso_final_char
, 2, 2, 0,
1413 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1414 DIMENSION is the number of bytes to represent a character: 1 or 2.
1415 CHARS is the number of characters in a dimension: 94 or 96.
1417 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1418 If there's no unused final char for the specified kind of charset,
1420 (Lisp_Object dimension
, Lisp_Object chars
)
1422 bool chars_flag
= check_iso_charset_parameter (dimension
, chars
,
1424 for (int final_char
= '0'; final_char
<= '?'; final_char
++)
1425 if (ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, final_char
) < 0)
1426 return make_number (final_char
);
1431 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1433 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1435 On decoding by an ISO-2022 base coding system, when a charset
1436 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1437 if CHARSET is designated instead. */)
1438 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
, Lisp_Object charset
)
1442 CHECK_CHARSET_GET_ID (charset
, id
);
1443 bool chars_flag
= check_iso_charset_parameter (dimension
, chars
, final_char
);
1444 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XFASTINT (final_char
)) = id
;
1449 /* Return information about charsets in the text at PTR of NBYTES
1450 bytes, which are NCHARS characters. The value is:
1452 0: Each character is represented by one byte. This is always
1453 true for a unibyte string. For a multibyte string, true if
1454 it contains only ASCII characters.
1456 1: No charsets other than ascii, control-1, and latin-1 are
1463 string_xstring_p (Lisp_Object string
)
1465 const unsigned char *p
= SDATA (string
);
1466 const unsigned char *endp
= p
+ SBYTES (string
);
1468 if (SCHARS (string
) == SBYTES (string
))
1473 int c
= STRING_CHAR_ADVANCE (p
);
1482 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1484 CHARSETS is a vector. If Nth element is non-nil, it means the
1485 charset whose id is N is already found.
1487 It may lookup a translation table TABLE if supplied. */
1490 find_charsets_in_text (const unsigned char *ptr
, ptrdiff_t nchars
,
1491 ptrdiff_t nbytes
, Lisp_Object charsets
,
1492 Lisp_Object table
, bool multibyte
)
1494 const unsigned char *pend
= ptr
+ nbytes
;
1496 if (nchars
== nbytes
)
1499 ASET (charsets
, charset_ascii
, Qt
);
1506 c
= translate_char (table
, c
);
1507 if (ASCII_CHAR_P (c
))
1508 ASET (charsets
, charset_ascii
, Qt
);
1510 ASET (charsets
, charset_eight_bit
, Qt
);
1517 int c
= STRING_CHAR_ADVANCE (ptr
);
1518 struct charset
*charset
;
1521 c
= translate_char (table
, c
);
1522 charset
= CHAR_CHARSET (c
);
1523 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1528 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1530 doc
: /* Return a list of charsets in the region between BEG and END.
1531 BEG and END are buffer positions.
1532 Optional arg TABLE if non-nil is a translation table to look up.
1534 If the current buffer is unibyte, the returned list may contain
1535 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1536 (Lisp_Object beg
, Lisp_Object end
, Lisp_Object table
)
1538 Lisp_Object charsets
;
1539 ptrdiff_t from
, from_byte
, to
, stop
, stop_byte
;
1542 bool multibyte
= ! NILP (BVAR (current_buffer
, enable_multibyte_characters
));
1544 validate_region (&beg
, &end
);
1545 from
= XFASTINT (beg
);
1546 stop
= to
= XFASTINT (end
);
1548 if (from
< GPT
&& GPT
< to
)
1551 stop_byte
= GPT_BYTE
;
1554 stop_byte
= CHAR_TO_BYTE (stop
);
1556 from_byte
= CHAR_TO_BYTE (from
);
1558 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1561 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1562 stop_byte
- from_byte
, charsets
, table
,
1566 from
= stop
, from_byte
= stop_byte
;
1567 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1574 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1575 if (!NILP (AREF (charsets
, i
)))
1576 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1580 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1582 doc
: /* Return a list of charsets in STR.
1583 Optional arg TABLE if non-nil is a translation table to look up.
1585 If STR is unibyte, the returned list may contain
1586 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1587 (Lisp_Object str
, Lisp_Object table
)
1589 Lisp_Object charsets
;
1595 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1596 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1598 STRING_MULTIBYTE (str
));
1600 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1601 if (!NILP (AREF (charsets
, i
)))
1602 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1608 /* Return a unified character code for C (>= 0x110000). VAL is a
1609 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1612 maybe_unify_char (int c
, Lisp_Object val
)
1614 struct charset
*charset
;
1617 return XFASTINT (val
);
1621 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1623 /* The call to load_charset below can allocate memory, which screws
1624 callers of this function through STRING_CHAR_* macros that hold C
1625 pointers to buffer text, if REL_ALLOC is used. */
1626 r_alloc_inhibit_buffer_relocation (1);
1628 load_charset (charset
, 1);
1629 if (! inhibit_load_charset_map
)
1631 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1637 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1638 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1644 r_alloc_inhibit_buffer_relocation (0);
1650 /* Return a character corresponding to the code-point CODE of
1654 decode_char (struct charset
*charset
, unsigned int code
)
1657 enum charset_method method
= CHARSET_METHOD (charset
);
1659 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1662 if (method
== CHARSET_METHOD_SUBSET
)
1664 Lisp_Object subset_info
;
1666 subset_info
= CHARSET_SUBSET (charset
);
1667 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1668 code
-= XINT (AREF (subset_info
, 3));
1669 if (code
< XFASTINT (AREF (subset_info
, 1))
1670 || code
> XFASTINT (AREF (subset_info
, 2)))
1673 c
= DECODE_CHAR (charset
, code
);
1675 else if (method
== CHARSET_METHOD_SUPERSET
)
1677 Lisp_Object parents
;
1679 parents
= CHARSET_SUPERSET (charset
);
1681 for (; CONSP (parents
); parents
= XCDR (parents
))
1683 int id
= XINT (XCAR (XCAR (parents
)));
1684 int code_offset
= XINT (XCDR (XCAR (parents
)));
1685 unsigned this_code
= code
- code_offset
;
1687 charset
= CHARSET_FROM_ID (id
);
1688 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1694 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1698 if (method
== CHARSET_METHOD_MAP
)
1700 Lisp_Object decoder
;
1702 decoder
= CHARSET_DECODER (charset
);
1703 if (! VECTORP (decoder
))
1705 load_charset (charset
, 1);
1706 decoder
= CHARSET_DECODER (charset
);
1708 if (VECTORP (decoder
))
1709 c
= XINT (AREF (decoder
, char_index
));
1711 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1713 else /* method == CHARSET_METHOD_OFFSET */
1715 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1716 if (CHARSET_UNIFIED_P (charset
)
1717 && MAX_UNICODE_CHAR
< c
&& c
<= MAX_5_BYTE_CHAR
)
1719 /* Unify C with a Unicode character if possible. */
1720 Lisp_Object val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1721 c
= maybe_unify_char (c
, val
);
1729 /* Variable used temporarily by the macro ENCODE_CHAR. */
1730 Lisp_Object charset_work
;
1732 /* Return a code-point of C in CHARSET. If C doesn't belong to
1733 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1734 use CHARSET's strict_max_char instead of max_char. */
1737 encode_char (struct charset
*charset
, int c
)
1740 enum charset_method method
= CHARSET_METHOD (charset
);
1742 if (CHARSET_UNIFIED_P (charset
))
1744 Lisp_Object deunifier
;
1745 int code_index
= -1;
1747 deunifier
= CHARSET_DEUNIFIER (charset
);
1748 if (! CHAR_TABLE_P (deunifier
))
1750 load_charset (charset
, 2);
1751 deunifier
= CHARSET_DEUNIFIER (charset
);
1753 if (CHAR_TABLE_P (deunifier
))
1755 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1757 if (INTEGERP (deunified
))
1758 code_index
= XINT (deunified
);
1762 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1764 if (code_index
>= 0)
1765 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1768 if (method
== CHARSET_METHOD_SUBSET
)
1770 Lisp_Object subset_info
;
1771 struct charset
*this_charset
;
1773 subset_info
= CHARSET_SUBSET (charset
);
1774 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1775 code
= ENCODE_CHAR (this_charset
, c
);
1776 if (code
== CHARSET_INVALID_CODE (this_charset
)
1777 || code
< XFASTINT (AREF (subset_info
, 1))
1778 || code
> XFASTINT (AREF (subset_info
, 2)))
1779 return CHARSET_INVALID_CODE (charset
);
1780 code
+= XINT (AREF (subset_info
, 3));
1784 if (method
== CHARSET_METHOD_SUPERSET
)
1786 Lisp_Object parents
;
1788 parents
= CHARSET_SUPERSET (charset
);
1789 for (; CONSP (parents
); parents
= XCDR (parents
))
1791 int id
= XINT (XCAR (XCAR (parents
)));
1792 int code_offset
= XINT (XCDR (XCAR (parents
)));
1793 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1795 code
= ENCODE_CHAR (this_charset
, c
);
1796 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1797 return code
+ code_offset
;
1799 return CHARSET_INVALID_CODE (charset
);
1802 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1803 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1804 return CHARSET_INVALID_CODE (charset
);
1806 if (method
== CHARSET_METHOD_MAP
)
1808 Lisp_Object encoder
;
1811 encoder
= CHARSET_ENCODER (charset
);
1812 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1814 load_charset (charset
, 2);
1815 encoder
= CHARSET_ENCODER (charset
);
1817 if (CHAR_TABLE_P (encoder
))
1819 val
= CHAR_TABLE_REF (encoder
, c
);
1821 return CHARSET_INVALID_CODE (charset
);
1823 if (! CHARSET_COMPACT_CODES_P (charset
))
1824 code
= INDEX_TO_CODE_POINT (charset
, code
);
1828 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1829 code
= INDEX_TO_CODE_POINT (charset
, code
);
1832 else /* method == CHARSET_METHOD_OFFSET */
1834 unsigned code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1836 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1843 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 2, 0,
1844 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1845 Return nil if CODE-POINT is not valid in CHARSET.
1847 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). */)
1848 (Lisp_Object charset
, Lisp_Object code_point
)
1852 struct charset
*charsetp
;
1854 CHECK_CHARSET_GET_ID (charset
, id
);
1855 code
= cons_to_unsigned (code_point
, UINT_MAX
);
1856 charsetp
= CHARSET_FROM_ID (id
);
1857 c
= DECODE_CHAR (charsetp
, code
);
1858 return (c
>= 0 ? make_number (c
) : Qnil
);
1862 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 2, 0,
1863 doc
: /* Encode the character CH into a code-point of CHARSET.
1864 Return nil if CHARSET doesn't include CH. */)
1865 (Lisp_Object ch
, Lisp_Object charset
)
1869 struct charset
*charsetp
;
1871 CHECK_CHARSET_GET_ID (charset
, id
);
1872 CHECK_CHARACTER (ch
);
1874 charsetp
= CHARSET_FROM_ID (id
);
1875 code
= ENCODE_CHAR (charsetp
, c
);
1876 if (code
== CHARSET_INVALID_CODE (charsetp
))
1878 return INTEGER_TO_CONS (code
);
1882 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1884 /* Return a character of CHARSET whose position codes are CODEn.
1886 CODE1 through CODE4 are optional, but if you don't supply sufficient
1887 position codes, it is assumed that the minimum code in each dimension
1889 (Lisp_Object charset
, Lisp_Object code1
, Lisp_Object code2
, Lisp_Object code3
, Lisp_Object code4
)
1892 struct charset
*charsetp
;
1896 CHECK_CHARSET_GET_ID (charset
, id
);
1897 charsetp
= CHARSET_FROM_ID (id
);
1899 dimension
= CHARSET_DIMENSION (charsetp
);
1901 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1902 ? 0 : CHARSET_MIN_CODE (charsetp
));
1905 CHECK_NATNUM (code1
);
1906 if (XFASTINT (code1
) >= 0x100)
1907 args_out_of_range (make_number (0xFF), code1
);
1908 code
= XFASTINT (code1
);
1914 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1917 CHECK_NATNUM (code2
);
1918 if (XFASTINT (code2
) >= 0x100)
1919 args_out_of_range (make_number (0xFF), code2
);
1920 code
|= XFASTINT (code2
);
1927 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1930 CHECK_NATNUM (code3
);
1931 if (XFASTINT (code3
) >= 0x100)
1932 args_out_of_range (make_number (0xFF), code3
);
1933 code
|= XFASTINT (code3
);
1940 code
|= charsetp
->code_space
[0];
1943 CHECK_NATNUM (code4
);
1944 if (XFASTINT (code4
) >= 0x100)
1945 args_out_of_range (make_number (0xFF), code4
);
1946 code
|= XFASTINT (code4
);
1953 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
1955 c
= DECODE_CHAR (charsetp
, code
);
1957 error ("Invalid code(s)");
1958 return make_number (c
);
1962 /* Return the first charset in CHARSET_LIST that contains C.
1963 CHARSET_LIST is a list of charset IDs. If it is nil, use
1964 Vcharset_ordered_list. */
1967 char_charset (int c
, Lisp_Object charset_list
, unsigned int *code_return
)
1969 bool maybe_null
= 0;
1971 if (NILP (charset_list
))
1972 charset_list
= Vcharset_ordered_list
;
1976 while (CONSP (charset_list
))
1978 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
1979 unsigned code
= ENCODE_CHAR (charset
, c
);
1981 if (code
!= CHARSET_INVALID_CODE (charset
))
1984 *code_return
= code
;
1987 charset_list
= XCDR (charset_list
);
1989 && c
<= MAX_UNICODE_CHAR
1990 && EQ (charset_list
, Vcharset_non_preferred_head
))
1991 return CHARSET_FROM_ID (charset_unicode
);
1993 return (maybe_null
? NULL
1994 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
1995 : CHARSET_FROM_ID (charset_eight_bit
));
1999 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2001 /*Return list of charset and one to four position-codes of CH.
2002 The charset is decided by the current priority order of charsets.
2003 A position-code is a byte value of each dimension of the code-point of
2004 CH in the charset. */)
2007 struct charset
*charset
;
2012 CHECK_CHARACTER (ch
);
2014 charset
= CHAR_CHARSET (c
);
2017 code
= ENCODE_CHAR (charset
, c
);
2018 if (code
== CHARSET_INVALID_CODE (charset
))
2020 dimension
= CHARSET_DIMENSION (charset
);
2021 for (val
= Qnil
; dimension
> 0; dimension
--)
2023 val
= Fcons (make_number (code
& 0xFF), val
);
2026 return Fcons (CHARSET_NAME (charset
), val
);
2030 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2031 doc
: /* Return the charset of highest priority that contains CH.
2032 ASCII characters are an exception: for them, this function always
2034 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2035 from which to find the charset. It may also be a coding system. In
2036 that case, find the charset from what supported by that coding system. */)
2037 (Lisp_Object ch
, Lisp_Object restriction
)
2039 struct charset
*charset
;
2041 CHECK_CHARACTER (ch
);
2042 if (NILP (restriction
))
2043 charset
= CHAR_CHARSET (XINT (ch
));
2046 if (CONSP (restriction
))
2048 int c
= XFASTINT (ch
);
2050 for (; CONSP (restriction
); restriction
= XCDR (restriction
))
2052 struct charset
*rcharset
;
2054 CHECK_CHARSET_GET_CHARSET (XCAR (restriction
), rcharset
);
2055 if (ENCODE_CHAR (rcharset
, c
) != CHARSET_INVALID_CODE (rcharset
))
2056 return XCAR (restriction
);
2060 restriction
= coding_system_charset_list (restriction
);
2061 charset
= char_charset (XINT (ch
), restriction
, NULL
);
2065 return (CHARSET_NAME (charset
));
2069 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2071 Return charset of a character in the current buffer at position POS.
2072 If POS is nil, it defaults to the current point.
2073 If POS is out of range, the value is nil. */)
2077 struct charset
*charset
;
2079 ch
= Fchar_after (pos
);
2080 if (! INTEGERP (ch
))
2082 charset
= CHAR_CHARSET (XINT (ch
));
2083 return (CHARSET_NAME (charset
));
2087 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2089 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2091 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2092 by their DIMENSION, CHARS, and FINAL-CHAR,
2093 whereas Emacs distinguishes them by charset symbol.
2094 See the documentation of the function `charset-info' for the meanings of
2095 DIMENSION, CHARS, and FINAL-CHAR. */)
2096 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
)
2098 bool chars_flag
= check_iso_charset_parameter (dimension
, chars
, final_char
);
2099 int id
= ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
,
2100 XFASTINT (final_char
));
2101 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2105 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2109 Clear temporary charset mapping tables.
2110 It should be called only from temacs invoked for dumping. */)
2113 if (temp_charset_work
)
2115 xfree (temp_charset_work
);
2116 temp_charset_work
= NULL
;
2119 if (CHAR_TABLE_P (Vchar_unify_table
))
2120 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2125 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2126 Scharset_priority_list
, 0, 1, 0,
2127 doc
: /* Return the list of charsets ordered by priority.
2128 HIGHESTP non-nil means just return the highest priority one. */)
2129 (Lisp_Object highestp
)
2131 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2133 if (!NILP (highestp
))
2134 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2136 while (!NILP (list
))
2138 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2141 return Fnreverse (val
);
2144 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2146 doc
: /* Assign higher priority to the charsets given as arguments.
2147 usage: (set-charset-priority &rest charsets) */)
2148 (ptrdiff_t nargs
, Lisp_Object
*args
)
2150 Lisp_Object new_head
, old_list
;
2151 Lisp_Object list_2022
, list_emacs_mule
;
2155 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2157 for (i
= 0; i
< nargs
; i
++)
2159 CHECK_CHARSET_GET_ID (args
[i
], id
);
2160 if (! NILP (Fmemq (make_number (id
), old_list
)))
2162 old_list
= Fdelq (make_number (id
), old_list
);
2163 new_head
= Fcons (make_number (id
), new_head
);
2166 Vcharset_non_preferred_head
= old_list
;
2167 Vcharset_ordered_list
= CALLN (Fnconc
, Fnreverse (new_head
), old_list
);
2169 charset_ordered_list_tick
++;
2171 charset_unibyte
= -1;
2172 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2173 CONSP (old_list
); old_list
= XCDR (old_list
))
2175 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2176 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2177 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2178 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2179 if (charset_unibyte
< 0)
2181 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (old_list
)));
2183 if (CHARSET_DIMENSION (charset
) == 1
2184 && CHARSET_ASCII_COMPATIBLE_P (charset
)
2185 && CHARSET_MAX_CHAR (charset
) >= 0x80)
2186 charset_unibyte
= CHARSET_ID (charset
);
2189 Viso_2022_charset_list
= Fnreverse (list_2022
);
2190 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2191 if (charset_unibyte
< 0)
2192 charset_unibyte
= charset_iso_8859_1
;
2197 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2199 doc
: /* Internal use only.
2200 Return charset identification number of CHARSET. */)
2201 (Lisp_Object charset
)
2205 CHECK_CHARSET_GET_ID (charset
, id
);
2206 return make_number (id
);
2209 struct charset_sort_data
2211 Lisp_Object charset
;
2217 charset_compare (const void *d1
, const void *d2
)
2219 const struct charset_sort_data
*data1
= d1
, *data2
= d2
;
2220 if (data1
->priority
!= data2
->priority
)
2221 return data1
->priority
< data2
->priority
? -1 : 1;
2225 DEFUN ("sort-charsets", Fsort_charsets
, Ssort_charsets
, 1, 1, 0,
2226 doc
: /* Sort charset list CHARSETS by a priority of each charset.
2227 Return the sorted list. CHARSETS is modified by side effects.
2228 See also `charset-priority-list' and `set-charset-priority'. */)
2229 (Lisp_Object charsets
)
2231 Lisp_Object len
= Flength (charsets
);
2232 ptrdiff_t n
= XFASTINT (len
), i
, j
;
2234 Lisp_Object tail
, elt
, attrs
;
2235 struct charset_sort_data
*sort_data
;
2236 int id
, min_id
= INT_MAX
, max_id
= INT_MIN
;
2241 SAFE_NALLOCA (sort_data
, 1, n
);
2242 for (tail
= charsets
, i
= 0; CONSP (tail
); tail
= XCDR (tail
), i
++)
2245 CHECK_CHARSET_GET_ATTR (elt
, attrs
);
2246 sort_data
[i
].charset
= elt
;
2247 sort_data
[i
].id
= id
= XINT (CHARSET_ATTR_ID (attrs
));
2253 for (done
= 0, tail
= Vcharset_ordered_list
, i
= 0;
2254 done
< n
&& CONSP (tail
); tail
= XCDR (tail
), i
++)
2257 id
= XFASTINT (elt
);
2258 if (id
>= min_id
&& id
<= max_id
)
2259 for (j
= 0; j
< n
; j
++)
2260 if (sort_data
[j
].id
== id
)
2262 sort_data
[j
].priority
= i
;
2266 qsort (sort_data
, n
, sizeof *sort_data
, charset_compare
);
2267 for (i
= 0, tail
= charsets
; CONSP (tail
); tail
= XCDR (tail
), i
++)
2268 XSETCAR (tail
, sort_data
[i
].charset
);
2277 Lisp_Object tempdir
;
2278 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2279 if (! file_accessible_directory_p (tempdir
))
2281 /* This used to be non-fatal (dir_warning), but it should not
2282 happen, and if it does sooner or later it will cause some
2283 obscure problem (eg bug#6401), so better abort. */
2284 fprintf (stderr
, "Error: charsets directory not found:\n\
2286 Emacs will not function correctly without the character map files.\n%s\
2287 Please check your installation!\n",
2289 egetenv("EMACSDATA") ? "The EMACSDATA environment \
2290 variable is set, maybe it has the wrong value?\n" : "");
2294 Vcharset_map_path
= list1 (tempdir
);
2299 init_charset_once (void)
2303 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2304 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2305 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2306 iso_charset_table
[i
][j
][k
] = -1;
2308 for (i
= 0; i
< 256; i
++)
2309 emacs_mule_charset
[i
] = -1;
2311 charset_jisx0201_roman
= -1;
2312 charset_jisx0208_1978
= -1;
2313 charset_jisx0208
= -1;
2314 charset_ksc5601
= -1;
2319 /* Allocate an initial charset table that is large enough to handle
2320 Emacs while it is bootstrapping. As of September 2011, the size
2321 needs to be at least 166; make it a bit bigger to allow for future
2324 Don't make the value so small that the table is reallocated during
2325 bootstrapping, as glibc malloc calls larger than just under 64 KiB
2326 during an initial bootstrap wreak havoc after dumping; see the
2327 M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
2328 internal to glibc malloc and perhaps to Emacs malloc debugging. */
2329 static struct charset charset_table_init
[180];
2332 syms_of_charset (void)
2334 DEFSYM (Qcharsetp
, "charsetp");
2336 /* Special charset symbols. */
2337 DEFSYM (Qascii
, "ascii");
2338 DEFSYM (Qunicode
, "unicode");
2339 DEFSYM (Qemacs
, "emacs");
2340 DEFSYM (Qeight_bit
, "eight-bit");
2341 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2343 staticpro (&Vcharset_ordered_list
);
2344 Vcharset_ordered_list
= Qnil
;
2346 staticpro (&Viso_2022_charset_list
);
2347 Viso_2022_charset_list
= Qnil
;
2349 staticpro (&Vemacs_mule_charset_list
);
2350 Vemacs_mule_charset_list
= Qnil
;
2352 staticpro (&Vcharset_hash_table
);
2353 Vcharset_hash_table
= CALLN (Fmake_hash_table
, QCtest
, Qeq
);
2355 charset_table
= charset_table_init
;
2356 charset_table_size
= ARRAYELTS (charset_table_init
);
2357 charset_table_used
= 0;
2359 defsubr (&Scharsetp
);
2360 defsubr (&Smap_charset_chars
);
2361 defsubr (&Sdefine_charset_internal
);
2362 defsubr (&Sdefine_charset_alias
);
2363 defsubr (&Scharset_plist
);
2364 defsubr (&Sset_charset_plist
);
2365 defsubr (&Sunify_charset
);
2366 defsubr (&Sget_unused_iso_final_char
);
2367 defsubr (&Sdeclare_equiv_charset
);
2368 defsubr (&Sfind_charset_region
);
2369 defsubr (&Sfind_charset_string
);
2370 defsubr (&Sdecode_char
);
2371 defsubr (&Sencode_char
);
2372 defsubr (&Ssplit_char
);
2373 defsubr (&Smake_char
);
2374 defsubr (&Schar_charset
);
2375 defsubr (&Scharset_after
);
2376 defsubr (&Siso_charset
);
2377 defsubr (&Sclear_charset_maps
);
2378 defsubr (&Scharset_priority_list
);
2379 defsubr (&Sset_charset_priority
);
2380 defsubr (&Scharset_id_internal
);
2381 defsubr (&Ssort_charsets
);
2383 DEFVAR_LISP ("charset-map-path", Vcharset_map_path
,
2384 doc
: /* List of directories to search for charset map files. */);
2385 Vcharset_map_path
= Qnil
;
2387 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map
,
2388 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2389 inhibit_load_charset_map
= 0;
2391 DEFVAR_LISP ("charset-list", Vcharset_list
,
2392 doc
: /* List of all charsets ever defined. */);
2393 Vcharset_list
= Qnil
;
2395 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language
,
2396 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2397 If the current language environment is for multiple languages (e.g. "Latin-1"),
2398 the value may be a list of mnemonics. */);
2399 Vcurrent_iso639_language
= Qnil
;
2402 = define_charset_internal (Qascii
, 1, "\x00\x7F\0\0\0\0\0",
2403 0, 127, 'B', -1, 0, 1, 0, 0);
2405 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\0\0\0\0\0",
2406 0, 255, -1, -1, -1, 1, 0, 0);
2408 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2409 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2411 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
2412 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2414 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\0\0\0\0\0",
2415 128, 255, -1, 0, -1, 0, 1,
2416 MAX_5_BYTE_CHAR
+ 1);
2417 charset_unibyte
= charset_iso_8859_1
;