1 /* Basic character set support.
3 Copyright (C) 2001-2015 Free Software Foundation, Inc.
5 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 2005, 2006, 2007, 2008, 2009, 2010, 2011
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H14PRO021
10 Copyright (C) 2003, 2004
11 National Institute of Advanced Industrial Science and Technology (AIST)
12 Registration Number H13PRO009
14 This file is part of GNU Emacs.
16 GNU Emacs is free software: you can redistribute it and/or modify
17 it under the terms of the GNU General Public License as published by
18 the Free Software Foundation, either version 3 of the License, or
19 (at your option) any later version.
21 GNU Emacs is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 GNU General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
35 #include <sys/types.h>
38 #include "character.h"
44 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
46 A coded character set ("charset" hereafter) is a meaningful
47 collection (i.e. language, culture, functionality, etc.) of
48 characters. Emacs handles multiple charsets at once. In Emacs Lisp
49 code, a charset is represented by a symbol. In C code, a charset is
50 represented by its ID number or by a pointer to a struct charset.
52 The actual information about each charset is stored in two places.
53 Lispy information is stored in the hash table Vcharset_hash_table as
54 a vector (charset attributes). The other information is stored in
55 charset_table as a struct charset.
59 /* Hash table that contains attributes of each charset. Keys are
60 charset symbols, and values are vectors of charset attributes. */
61 Lisp_Object Vcharset_hash_table
;
63 /* Table of struct charset. */
64 struct charset
*charset_table
;
66 static ptrdiff_t charset_table_size
;
67 static int charset_table_used
;
69 /* Special charsets corresponding to symbols. */
71 int charset_eight_bit
;
72 static int charset_iso_8859_1
;
74 static int charset_emacs
;
76 /* The other special charsets. */
77 int charset_jisx0201_roman
;
78 int charset_jisx0208_1978
;
82 /* Charset of unibyte characters. */
85 /* List of charsets ordered by the priority. */
86 Lisp_Object Vcharset_ordered_list
;
88 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
90 Lisp_Object Vcharset_non_preferred_head
;
92 /* Incremented every time we change the priority of charsets.
94 EMACS_UINT charset_ordered_list_tick
;
96 /* List of iso-2022 charsets. */
97 Lisp_Object Viso_2022_charset_list
;
99 /* List of emacs-mule charsets. */
100 Lisp_Object Vemacs_mule_charset_list
;
102 int emacs_mule_charset
[256];
104 /* Mapping table from ISO2022's charset (specified by DIMENSION,
105 CHARS, and FINAL-CHAR) to Emacs' charset. */
106 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
108 #define CODE_POINT_TO_INDEX(charset, code) \
109 ((charset)->code_linear_p \
110 ? (int) ((code) - (charset)->min_code) \
111 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
112 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
113 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
114 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
115 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
116 * (charset)->code_space[11]) \
117 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
118 * (charset)->code_space[7]) \
119 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
120 * (charset)->code_space[3]) \
121 + (((code) & 0xFF) - (charset)->code_space[0]) \
122 - ((charset)->char_index_offset)) \
126 /* Return the code-point for the character index IDX in CHARSET.
127 IDX should be an unsigned int variable in a valid range (which is
128 always in nonnegative int range too). IDX contains garbage afterwards. */
130 #define INDEX_TO_CODE_POINT(charset, idx) \
131 ((charset)->code_linear_p \
132 ? (idx) + (charset)->min_code \
133 : (idx += (charset)->char_index_offset, \
134 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
135 | (((charset)->code_space[4] \
136 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
138 | (((charset)->code_space[8] \
139 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
141 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
144 /* Structure to hold mapping tables for a charset. Used by temacs
145 invoked for dumping. */
149 /* The current charset for which the following tables are setup. */
150 struct charset
*current
;
152 /* 1 iff the following table is used for encoder. */
155 /* When the following table is used for encoding, minimum and
156 maximum character of the current charset. */
157 int min_char
, max_char
;
159 /* A Unicode character corresponding to the code index 0 (i.e. the
160 minimum code-point) of the current charset, or -1 if the code
161 index 0 is not a Unicode character. This is checked when
162 table.encoder[CHAR] is zero. */
166 /* Table mapping code-indices (not code-points) of the current
167 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
168 doesn't belong to the current charset. */
169 int decoder
[0x10000];
170 /* Table mapping Unicode characters to code-indices of the current
171 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
172 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
173 (0x20000..0x2FFFF). Note that there is no charset map that
174 uses both SMP and SIP. */
175 unsigned short encoder
[0x20000];
177 } *temp_charset_work
;
179 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
182 temp_charset_work->zero_index_char = (C); \
183 else if ((C) < 0x20000) \
184 temp_charset_work->table.encoder[(C)] = (CODE); \
186 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
189 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
190 ((C) == temp_charset_work->zero_index_char ? 0 \
191 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
192 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
193 : temp_charset_work->table.encoder[(C) - 0x10000] \
194 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
196 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
197 (temp_charset_work->table.decoder[(CODE)] = (C))
199 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
200 (temp_charset_work->table.decoder[(CODE)])
203 /* Set to 1 to warn that a charset map is loaded and thus a buffer
204 text and a string data may be relocated. */
205 bool charset_map_loaded
;
207 struct charset_map_entries
213 struct charset_map_entries
*next
;
216 /* Load the mapping information of CHARSET from ENTRIES for
217 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
218 encoding (CONTROL_FLAG == 2).
220 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
221 and CHARSET->fast_map.
223 If CONTROL_FLAG is 1, setup the following tables according to
224 CHARSET->method and inhibit_load_charset_map.
226 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
227 ----------------------+--------------------+---------------------------
228 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
229 ----------------------+--------------------+---------------------------
230 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
232 If CONTROL_FLAG is 2, setup the following tables.
234 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
235 ----------------------+--------------------+---------------------------
236 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
237 ----------------------+--------------------+--------------------------
238 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
242 load_charset_map (struct charset
*charset
, struct charset_map_entries
*entries
, int n_entries
, int control_flag
)
244 Lisp_Object vec
IF_LINT (= Qnil
), table
IF_LINT (= Qnil
);
245 unsigned max_code
= CHARSET_MAX_CODE (charset
);
246 bool ascii_compatible_p
= charset
->ascii_compatible_p
;
247 int min_char
, max_char
, nonascii_min_char
;
249 unsigned char *fast_map
= charset
->fast_map
;
256 if (! inhibit_load_charset_map
)
258 if (control_flag
== 1)
260 if (charset
->method
== CHARSET_METHOD_MAP
)
262 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
264 vec
= Fmake_vector (make_number (n
), make_number (-1));
265 set_charset_attr (charset
, charset_decoder
, vec
);
269 char_table_set_range (Vchar_unify_table
,
270 charset
->min_char
, charset
->max_char
,
276 table
= Fmake_char_table (Qnil
, Qnil
);
277 set_charset_attr (charset
,
278 (charset
->method
== CHARSET_METHOD_MAP
279 ? charset_encoder
: charset_deunifier
),
285 if (! temp_charset_work
)
286 temp_charset_work
= xmalloc (sizeof *temp_charset_work
);
287 if (control_flag
== 1)
289 memset (temp_charset_work
->table
.decoder
, -1,
290 sizeof (int) * 0x10000);
294 memset (temp_charset_work
->table
.encoder
, 0,
295 sizeof (unsigned short) * 0x20000);
296 temp_charset_work
->zero_index_char
= -1;
298 temp_charset_work
->current
= charset
;
299 temp_charset_work
->for_encoder
= (control_flag
== 2);
302 charset_map_loaded
= 1;
305 min_char
= max_char
= entries
->entry
[0].c
;
306 nonascii_min_char
= MAX_CHAR
;
307 for (i
= 0; i
< n_entries
; i
++)
310 int from_index
, to_index
, lim_index
;
312 int idx
= i
% 0x10000;
314 if (i
> 0 && idx
== 0)
315 entries
= entries
->next
;
316 from
= entries
->entry
[idx
].from
;
317 to
= entries
->entry
[idx
].to
;
318 from_c
= entries
->entry
[idx
].c
;
319 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
322 to_index
= from_index
;
327 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
328 to_c
= from_c
+ (to_index
- from_index
);
330 if (from_index
< 0 || to_index
< 0)
332 lim_index
= to_index
+ 1;
336 else if (from_c
< min_char
)
339 if (control_flag
== 1)
341 if (charset
->method
== CHARSET_METHOD_MAP
)
342 for (; from_index
< lim_index
; from_index
++, from_c
++)
343 ASET (vec
, from_index
, make_number (from_c
));
345 for (; from_index
< lim_index
; from_index
++, from_c
++)
346 CHAR_TABLE_SET (Vchar_unify_table
,
347 CHARSET_CODE_OFFSET (charset
) + from_index
,
348 make_number (from_c
));
350 else if (control_flag
== 2)
352 if (charset
->method
== CHARSET_METHOD_MAP
353 && CHARSET_COMPACT_CODES_P (charset
))
354 for (; from_index
< lim_index
; from_index
++, from_c
++)
356 unsigned code
= from_index
;
357 code
= INDEX_TO_CODE_POINT (charset
, code
);
359 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
360 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
363 for (; from_index
< lim_index
; from_index
++, from_c
++)
365 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
366 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
369 else if (control_flag
== 3)
370 for (; from_index
< lim_index
; from_index
++, from_c
++)
371 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
372 else if (control_flag
== 4)
373 for (; from_index
< lim_index
; from_index
++, from_c
++)
374 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
375 else /* control_flag == 0 */
377 if (ascii_compatible_p
)
379 if (! ASCII_CHAR_P (from_c
))
381 if (from_c
< nonascii_min_char
)
382 nonascii_min_char
= from_c
;
384 else if (! ASCII_CHAR_P (to_c
))
386 nonascii_min_char
= 0x80;
390 for (; from_c
<= to_c
; from_c
++)
391 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
395 if (control_flag
== 0)
397 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
398 ? nonascii_min_char
: min_char
);
399 CHARSET_MAX_CHAR (charset
) = max_char
;
401 else if (control_flag
== 4)
403 temp_charset_work
->min_char
= min_char
;
404 temp_charset_work
->max_char
= max_char
;
409 /* Read a hexadecimal number (preceded by "0x") from the file FP while
410 paying attention to comment character '#'. */
413 read_hex (FILE *fp
, bool *eof
, bool *overflow
)
418 while ((c
= getc (fp
)) != EOF
)
422 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
426 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
436 while (c_isxdigit (c
= getc (fp
)))
438 if (UINT_MAX
>> 4 < n
)
441 | (c
- ('0' <= c
&& c
<= '9' ? '0'
442 : 'A' <= c
&& c
<= 'F' ? 'A' - 10
450 /* Return a mapping vector for CHARSET loaded from MAPFILE.
451 Each line of MAPFILE has this form
453 where 0xAAAA is a code-point and 0xCCCC is the corresponding
454 character code, or this form
456 where 0xAAAA and 0xBBBB are code-points specifying a range, and
457 0xCCCC is the first character code of the range.
459 The returned vector has this form:
460 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
461 where CODE1 is a code-point or a cons of code-points specifying a
464 Note that this function uses `openp' to open MAPFILE but ignores
465 `file-name-handler-alist' to avoid running any Lisp code. */
468 load_charset_map_from_file (struct charset
*charset
, Lisp_Object mapfile
,
471 unsigned min_code
= CHARSET_MIN_CODE (charset
);
472 unsigned max_code
= CHARSET_MAX_CODE (charset
);
475 struct charset_map_entries
*head
, *entries
;
477 AUTO_STRING (map
, ".map");
478 AUTO_STRING (txt
, ".txt");
479 AUTO_LIST2 (suffixes
, map
, txt
);
480 ptrdiff_t count
= SPECPDL_INDEX ();
481 record_unwind_protect_nothing ();
482 specbind (Qfile_name_handler_alist
, Qnil
);
483 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
, false);
484 fp
= fd
< 0 ? 0 : fdopen (fd
, "r");
487 int open_errno
= errno
;
489 report_file_errno ("Loading charset map", mapfile
, open_errno
);
491 set_unwind_protect_ptr (count
, fclose_unwind
, fp
);
492 unbind_to (count
+ 1, Qnil
);
494 /* Use record_xmalloc, as `charset_map_entries' is
495 large (larger than MAX_ALLOCA). */
496 head
= record_xmalloc (sizeof *head
);
498 memset (entries
, 0, sizeof (struct charset_map_entries
));
503 unsigned from
, to
, c
;
505 bool eof
= 0, overflow
= 0;
507 from
= read_hex (fp
, &eof
, &overflow
);
510 if (getc (fp
) == '-')
511 to
= read_hex (fp
, &eof
, &overflow
);
516 c
= read_hex (fp
, &eof
, &overflow
);
522 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
525 if (n_entries
== 0x10000)
527 entries
->next
= record_xmalloc (sizeof *entries
->next
);
528 entries
= entries
->next
;
529 memset (entries
, 0, sizeof (struct charset_map_entries
));
533 entries
->entry
[idx
].from
= from
;
534 entries
->entry
[idx
].to
= to
;
535 entries
->entry
[idx
].c
= c
;
539 clear_unwind_protect (count
);
541 load_charset_map (charset
, head
, n_entries
, control_flag
);
542 unbind_to (count
, Qnil
);
546 load_charset_map_from_vector (struct charset
*charset
, Lisp_Object vec
, int control_flag
)
548 unsigned min_code
= CHARSET_MIN_CODE (charset
);
549 unsigned max_code
= CHARSET_MAX_CODE (charset
);
550 struct charset_map_entries
*head
, *entries
;
552 int len
= ASIZE (vec
);
558 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
562 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
563 large (larger than MAX_ALLOCA). */
564 head
= SAFE_ALLOCA (sizeof *head
);
566 memset (entries
, 0, sizeof (struct charset_map_entries
));
569 for (i
= 0; i
< len
; i
+= 2)
571 Lisp_Object val
, val2
;
581 from
= XFASTINT (val
);
582 to
= XFASTINT (val2
);
585 from
= to
= XFASTINT (val
);
586 val
= AREF (vec
, i
+ 1);
590 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
593 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
595 entries
->next
= SAFE_ALLOCA (sizeof *entries
->next
);
596 entries
= entries
->next
;
597 memset (entries
, 0, sizeof (struct charset_map_entries
));
599 idx
= n_entries
% 0x10000;
600 entries
->entry
[idx
].from
= from
;
601 entries
->entry
[idx
].to
= to
;
602 entries
->entry
[idx
].c
= c
;
606 load_charset_map (charset
, head
, n_entries
, control_flag
);
611 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
612 map it is (see the comment of load_charset_map for the detail). */
615 load_charset (struct charset
*charset
, int control_flag
)
619 if (inhibit_load_charset_map
621 && charset
== temp_charset_work
->current
622 && ((control_flag
== 2) == temp_charset_work
->for_encoder
))
625 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
626 map
= CHARSET_MAP (charset
);
629 if (! CHARSET_UNIFIED_P (charset
))
631 map
= CHARSET_UNIFY_MAP (charset
);
634 load_charset_map_from_file (charset
, map
, control_flag
);
636 load_charset_map_from_vector (charset
, map
, control_flag
);
640 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
641 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
644 return (CHARSETP (object
) ? Qt
: Qnil
);
649 map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
),
650 Lisp_Object function
, Lisp_Object arg
,
651 unsigned int from
, unsigned int to
)
653 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
654 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
655 Lisp_Object range
= Fcons (Qnil
, Qnil
);
658 c
= temp_charset_work
->min_char
;
659 stop
= (temp_charset_work
->max_char
< 0x20000
660 ? temp_charset_work
->max_char
: 0xFFFF);
664 int idx
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
666 if (idx
>= from_idx
&& idx
<= to_idx
)
668 if (NILP (XCAR (range
)))
669 XSETCAR (range
, make_number (c
));
671 else if (! NILP (XCAR (range
)))
673 XSETCDR (range
, make_number (c
- 1));
675 (*c_function
) (arg
, range
);
677 call2 (function
, range
, arg
);
678 XSETCAR (range
, Qnil
);
682 if (c
== temp_charset_work
->max_char
)
684 if (! NILP (XCAR (range
)))
686 XSETCDR (range
, make_number (c
));
688 (*c_function
) (arg
, range
);
690 call2 (function
, range
, arg
);
695 stop
= temp_charset_work
->max_char
;
702 map_charset_chars (void (*c_function
)(Lisp_Object
, Lisp_Object
), Lisp_Object function
,
703 Lisp_Object arg
, struct charset
*charset
, unsigned from
, unsigned to
)
706 bool partial
= (from
> CHARSET_MIN_CODE (charset
)
707 || to
< CHARSET_MAX_CODE (charset
));
709 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
711 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
712 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
713 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
714 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
716 if (CHARSET_UNIFIED_P (charset
))
718 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
719 load_charset (charset
, 2);
720 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
721 map_char_table_for_charset (c_function
, function
,
722 CHARSET_DEUNIFIER (charset
), arg
,
723 partial
? charset
: NULL
, from
, to
);
725 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
728 range
= Fcons (make_number (from_c
), make_number (to_c
));
730 (*c_function
) (arg
, range
);
732 call2 (function
, range
, arg
);
734 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
736 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
737 load_charset (charset
, 2);
738 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
739 map_char_table_for_charset (c_function
, function
,
740 CHARSET_ENCODER (charset
), arg
,
741 partial
? charset
: NULL
, from
, to
);
743 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
745 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
747 Lisp_Object subset_info
;
750 subset_info
= CHARSET_SUBSET (charset
);
751 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
752 offset
= XINT (AREF (subset_info
, 3));
754 if (from
< XFASTINT (AREF (subset_info
, 1)))
755 from
= XFASTINT (AREF (subset_info
, 1));
757 if (to
> XFASTINT (AREF (subset_info
, 2)))
758 to
= XFASTINT (AREF (subset_info
, 2));
759 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
761 else /* i.e. CHARSET_METHOD_SUPERSET */
765 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
766 parents
= XCDR (parents
))
769 unsigned this_from
, this_to
;
771 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
772 offset
= XINT (XCDR (XCAR (parents
)));
773 this_from
= from
> offset
? from
- offset
: 0;
774 this_to
= to
> offset
? to
- offset
: 0;
775 if (this_from
< CHARSET_MIN_CODE (charset
))
776 this_from
= CHARSET_MIN_CODE (charset
);
777 if (this_to
> CHARSET_MAX_CODE (charset
))
778 this_to
= CHARSET_MAX_CODE (charset
);
779 map_charset_chars (c_function
, function
, arg
, charset
,
785 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
786 doc
: /* Call FUNCTION for all characters in CHARSET.
787 FUNCTION is called with an argument RANGE and the optional 3rd
790 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
791 characters contained in CHARSET.
793 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
794 range of code points (in CHARSET) of target characters. */)
795 (Lisp_Object function
, Lisp_Object charset
, Lisp_Object arg
, Lisp_Object from_code
, Lisp_Object to_code
)
800 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
801 if (NILP (from_code
))
802 from
= CHARSET_MIN_CODE (cs
);
805 from
= XINT (from_code
);
806 if (from
< CHARSET_MIN_CODE (cs
))
807 from
= CHARSET_MIN_CODE (cs
);
810 to
= CHARSET_MAX_CODE (cs
);
814 if (to
> CHARSET_MAX_CODE (cs
))
815 to
= CHARSET_MAX_CODE (cs
);
817 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
822 /* Define a charset according to the arguments. The Nth argument is
823 the Nth attribute of the charset (the last attribute `charset-id'
824 is not included). See the docstring of `define-charset' for the
827 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
828 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
829 doc
: /* For internal use only.
830 usage: (define-charset-internal ...) */)
831 (ptrdiff_t nargs
, Lisp_Object
*args
)
833 /* Charset attr vector. */
836 EMACS_UINT hash_code
;
837 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
839 struct charset charset
;
842 bool new_definition_p
;
845 if (nargs
!= charset_arg_max
)
846 return Fsignal (Qwrong_number_of_arguments
,
847 Fcons (intern ("define-charset-internal"),
848 make_number (nargs
)));
850 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
852 CHECK_SYMBOL (args
[charset_arg_name
]);
853 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
855 val
= args
[charset_arg_code_space
];
856 for (i
= 0, dimension
= 0, nchars
= 1; ; i
++)
858 Lisp_Object min_byte_obj
, max_byte_obj
;
859 int min_byte
, max_byte
;
861 min_byte_obj
= Faref (val
, make_number (i
* 2));
862 max_byte_obj
= Faref (val
, make_number (i
* 2 + 1));
863 CHECK_RANGED_INTEGER (min_byte_obj
, 0, 255);
864 min_byte
= XINT (min_byte_obj
);
865 CHECK_RANGED_INTEGER (max_byte_obj
, min_byte
, 255);
866 max_byte
= XINT (max_byte_obj
);
867 charset
.code_space
[i
* 4] = min_byte
;
868 charset
.code_space
[i
* 4 + 1] = max_byte
;
869 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
874 nchars
*= charset
.code_space
[i
* 4 + 2];
875 charset
.code_space
[i
* 4 + 3] = nchars
;
878 val
= args
[charset_arg_dimension
];
880 charset
.dimension
= dimension
;
883 CHECK_RANGED_INTEGER (val
, 1, 4);
884 charset
.dimension
= XINT (val
);
887 charset
.code_linear_p
888 = (charset
.dimension
== 1
889 || (charset
.code_space
[2] == 256
890 && (charset
.dimension
== 2
891 || (charset
.code_space
[6] == 256
892 && (charset
.dimension
== 3
893 || charset
.code_space
[10] == 256)))));
895 if (! charset
.code_linear_p
)
897 charset
.code_space_mask
= xzalloc (256);
898 for (i
= 0; i
< 4; i
++)
899 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
901 charset
.code_space_mask
[j
] |= (1 << i
);
904 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
906 charset
.min_code
= (charset
.code_space
[0]
907 | (charset
.code_space
[4] << 8)
908 | (charset
.code_space
[8] << 16)
909 | ((unsigned) charset
.code_space
[12] << 24));
910 charset
.max_code
= (charset
.code_space
[1]
911 | (charset
.code_space
[5] << 8)
912 | (charset
.code_space
[9] << 16)
913 | ((unsigned) charset
.code_space
[13] << 24));
914 charset
.char_index_offset
= 0;
916 val
= args
[charset_arg_min_code
];
919 unsigned code
= cons_to_unsigned (val
, UINT_MAX
);
921 if (code
< charset
.min_code
922 || code
> charset
.max_code
)
923 args_out_of_range_3 (make_fixnum_or_float (charset
.min_code
),
924 make_fixnum_or_float (charset
.max_code
), val
);
925 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
926 charset
.min_code
= code
;
929 val
= args
[charset_arg_max_code
];
932 unsigned code
= cons_to_unsigned (val
, UINT_MAX
);
934 if (code
< charset
.min_code
935 || code
> charset
.max_code
)
936 args_out_of_range_3 (make_fixnum_or_float (charset
.min_code
),
937 make_fixnum_or_float (charset
.max_code
), val
);
938 charset
.max_code
= code
;
941 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
943 val
= args
[charset_arg_invalid_code
];
946 if (charset
.min_code
> 0)
947 charset
.invalid_code
= 0;
950 if (charset
.max_code
< UINT_MAX
)
951 charset
.invalid_code
= charset
.max_code
+ 1;
953 error ("Attribute :invalid-code must be specified");
957 charset
.invalid_code
= cons_to_unsigned (val
, UINT_MAX
);
959 val
= args
[charset_arg_iso_final
];
961 charset
.iso_final
= -1;
965 if (XINT (val
) < '0' || XINT (val
) > 127)
966 error ("Invalid iso-final-char: %"pI
"d", XINT (val
));
967 charset
.iso_final
= XINT (val
);
970 val
= args
[charset_arg_iso_revision
];
972 charset
.iso_revision
= -1;
975 CHECK_RANGED_INTEGER (val
, -1, 63);
976 charset
.iso_revision
= XINT (val
);
979 val
= args
[charset_arg_emacs_mule_id
];
981 charset
.emacs_mule_id
= -1;
985 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
986 error ("Invalid emacs-mule-id: %"pI
"d", XINT (val
));
987 charset
.emacs_mule_id
= XINT (val
);
990 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
992 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
994 charset
.unified_p
= 0;
996 memset (charset
.fast_map
, 0, sizeof (charset
.fast_map
));
998 if (! NILP (args
[charset_arg_code_offset
]))
1000 val
= args
[charset_arg_code_offset
];
1001 CHECK_CHARACTER (val
);
1003 charset
.method
= CHARSET_METHOD_OFFSET
;
1004 charset
.code_offset
= XINT (val
);
1006 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1007 if (MAX_CHAR
- charset
.code_offset
< i
)
1008 error ("Unsupported max char: %d", charset
.max_char
);
1009 charset
.max_char
= i
+ charset
.code_offset
;
1010 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1011 charset
.min_char
= i
+ charset
.code_offset
;
1013 i
= (charset
.min_char
>> 7) << 7;
1014 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1015 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1016 i
= (i
>> 12) << 12;
1017 for (; i
<= charset
.max_char
; i
+= 0x1000)
1018 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1019 if (charset
.code_offset
== 0 && charset
.max_char
>= 0x80)
1020 charset
.ascii_compatible_p
= 1;
1022 else if (! NILP (args
[charset_arg_map
]))
1024 val
= args
[charset_arg_map
];
1025 ASET (attrs
, charset_map
, val
);
1026 charset
.method
= CHARSET_METHOD_MAP
;
1028 else if (! NILP (args
[charset_arg_subset
]))
1031 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1032 struct charset
*parent_charset
;
1034 val
= args
[charset_arg_subset
];
1035 parent
= Fcar (val
);
1036 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1037 parent_min_code
= Fnth (make_number (1), val
);
1038 CHECK_NATNUM (parent_min_code
);
1039 parent_max_code
= Fnth (make_number (2), val
);
1040 CHECK_NATNUM (parent_max_code
);
1041 parent_code_offset
= Fnth (make_number (3), val
);
1042 CHECK_NUMBER (parent_code_offset
);
1043 val
= make_uninit_vector (4);
1044 ASET (val
, 0, make_number (parent_charset
->id
));
1045 ASET (val
, 1, parent_min_code
);
1046 ASET (val
, 2, parent_max_code
);
1047 ASET (val
, 3, parent_code_offset
);
1048 ASET (attrs
, charset_subset
, val
);
1050 charset
.method
= CHARSET_METHOD_SUBSET
;
1051 /* Here, we just copy the parent's fast_map. It's not accurate,
1052 but at least it works for quickly detecting which character
1053 DOESN'T belong to this charset. */
1054 for (i
= 0; i
< 190; i
++)
1055 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1057 /* We also copy these for parents. */
1058 charset
.min_char
= parent_charset
->min_char
;
1059 charset
.max_char
= parent_charset
->max_char
;
1061 else if (! NILP (args
[charset_arg_superset
]))
1063 val
= args
[charset_arg_superset
];
1064 charset
.method
= CHARSET_METHOD_SUPERSET
;
1065 val
= Fcopy_sequence (val
);
1066 ASET (attrs
, charset_superset
, val
);
1068 charset
.min_char
= MAX_CHAR
;
1069 charset
.max_char
= 0;
1070 for (; ! NILP (val
); val
= Fcdr (val
))
1072 Lisp_Object elt
, car_part
, cdr_part
;
1073 int this_id
, offset
;
1074 struct charset
*this_charset
;
1079 car_part
= XCAR (elt
);
1080 cdr_part
= XCDR (elt
);
1081 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1082 CHECK_TYPE_RANGED_INTEGER (int, cdr_part
);
1083 offset
= XINT (cdr_part
);
1087 CHECK_CHARSET_GET_ID (elt
, this_id
);
1090 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1092 this_charset
= CHARSET_FROM_ID (this_id
);
1093 if (charset
.min_char
> this_charset
->min_char
)
1094 charset
.min_char
= this_charset
->min_char
;
1095 if (charset
.max_char
< this_charset
->max_char
)
1096 charset
.max_char
= this_charset
->max_char
;
1097 for (i
= 0; i
< 190; i
++)
1098 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1102 error ("None of :code-offset, :map, :parents are specified");
1104 val
= args
[charset_arg_unify_map
];
1105 if (! NILP (val
) && !STRINGP (val
))
1107 ASET (attrs
, charset_unify_map
, val
);
1109 CHECK_LIST (args
[charset_arg_plist
]);
1110 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1112 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1114 if (charset
.hash_index
>= 0)
1116 new_definition_p
= 0;
1117 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1118 set_hash_value_slot (hash_table
, charset
.hash_index
, attrs
);
1122 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1124 if (charset_table_used
== charset_table_size
)
1126 /* Ensure that charset IDs fit into 'int' as well as into the
1127 restriction imposed by fixnums. Although the 'int' restriction
1128 could be removed, too much other code would need altering; for
1129 example, the IDs are stuffed into struct
1130 coding_system.charbuf[i] entries, which are 'int'. */
1131 int old_size
= charset_table_size
;
1132 ptrdiff_t new_size
= old_size
;
1133 struct charset
*new_table
=
1134 xpalloc (0, &new_size
, 1,
1135 min (INT_MAX
, MOST_POSITIVE_FIXNUM
),
1136 sizeof *charset_table
);
1137 memcpy (new_table
, charset_table
, old_size
* sizeof *new_table
);
1138 charset_table
= new_table
;
1139 charset_table_size
= new_size
;
1140 /* FIXME: This leaks memory, as the old charset_table becomes
1141 unreachable. If the old charset table is charset_table_init
1142 then this leak is intentional; otherwise, it's unclear.
1143 If the latter memory leak is intentional, a
1144 comment should be added to explain this. If not, the old
1145 charset_table should be freed, by passing it as the 1st argument
1146 to xpalloc and removing the memcpy. */
1148 id
= charset_table_used
++;
1149 new_definition_p
= 1;
1152 ASET (attrs
, charset_id
, make_number (id
));
1154 charset_table
[id
] = charset
;
1156 if (charset
.method
== CHARSET_METHOD_MAP
)
1158 load_charset (&charset
, 0);
1159 charset_table
[id
] = charset
;
1162 if (charset
.iso_final
>= 0)
1164 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1165 charset
.iso_final
) = id
;
1166 if (new_definition_p
)
1167 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1168 list1 (make_number (id
)));
1169 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1170 charset_jisx0201_roman
= id
;
1171 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1172 charset_jisx0208_1978
= id
;
1173 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1174 charset_jisx0208
= id
;
1175 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id
)
1176 charset_ksc5601
= id
;
1179 if (charset
.emacs_mule_id
>= 0)
1181 emacs_mule_charset
[charset
.emacs_mule_id
] = id
;
1182 if (charset
.emacs_mule_id
< 0xA0)
1183 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1185 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1186 if (new_definition_p
)
1187 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1188 list1 (make_number (id
)));
1191 if (new_definition_p
)
1193 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1194 if (charset
.supplementary_p
)
1195 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1196 list1 (make_number (id
)));
1201 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1203 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1205 if (cs
->supplementary_p
)
1208 if (EQ (tail
, Vcharset_ordered_list
))
1209 Vcharset_ordered_list
= Fcons (make_number (id
),
1210 Vcharset_ordered_list
);
1211 else if (NILP (tail
))
1212 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1213 list1 (make_number (id
)));
1216 val
= Fcons (XCAR (tail
), XCDR (tail
));
1217 XSETCDR (tail
, val
);
1218 XSETCAR (tail
, make_number (id
));
1221 charset_ordered_list_tick
++;
1228 /* Same as Fdefine_charset_internal but arguments are more convenient
1229 to call from C (typically in syms_of_charset). This can define a
1230 charset of `offset' method only. Return the ID of the new
1234 define_charset_internal (Lisp_Object name
,
1236 const char *code_space_chars
,
1237 unsigned min_code
, unsigned max_code
,
1238 int iso_final
, int iso_revision
, int emacs_mule_id
,
1239 bool ascii_compatible
, bool supplementary
,
1242 const unsigned char *code_space
= (const unsigned char *) code_space_chars
;
1243 Lisp_Object args
[charset_arg_max
];
1247 args
[charset_arg_name
] = name
;
1248 args
[charset_arg_dimension
] = make_number (dimension
);
1249 val
= make_uninit_vector (8);
1250 for (i
= 0; i
< 8; i
++)
1251 ASET (val
, i
, make_number (code_space
[i
]));
1252 args
[charset_arg_code_space
] = val
;
1253 args
[charset_arg_min_code
] = make_number (min_code
);
1254 args
[charset_arg_max_code
] = make_number (max_code
);
1255 args
[charset_arg_iso_final
]
1256 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1257 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1258 args
[charset_arg_emacs_mule_id
]
1259 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1260 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1261 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1262 args
[charset_arg_invalid_code
] = Qnil
;
1263 args
[charset_arg_code_offset
] = make_number (code_offset
);
1264 args
[charset_arg_map
] = Qnil
;
1265 args
[charset_arg_subset
] = Qnil
;
1266 args
[charset_arg_superset
] = Qnil
;
1267 args
[charset_arg_unify_map
] = Qnil
;
1269 args
[charset_arg_plist
] =
1270 listn (CONSTYPE_HEAP
, 14,
1271 intern_c_string (":name"),
1272 args
[charset_arg_name
],
1273 intern_c_string (":dimension"),
1274 args
[charset_arg_dimension
],
1275 intern_c_string (":code-space"),
1276 args
[charset_arg_code_space
],
1277 intern_c_string (":iso-final-char"),
1278 args
[charset_arg_iso_final
],
1279 intern_c_string (":emacs-mule-id"),
1280 args
[charset_arg_emacs_mule_id
],
1281 intern_c_string (":ascii-compatible-p"),
1282 args
[charset_arg_ascii_compatible_p
],
1283 intern_c_string (":code-offset"),
1284 args
[charset_arg_code_offset
]);
1285 Fdefine_charset_internal (charset_arg_max
, args
);
1287 return XINT (CHARSET_SYMBOL_ID (name
));
1291 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1292 Sdefine_charset_alias
, 2, 2, 0,
1293 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1294 (Lisp_Object alias
, Lisp_Object charset
)
1298 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1299 Fputhash (alias
, attr
, Vcharset_hash_table
);
1300 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1305 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1306 doc
: /* Return the property list of CHARSET. */)
1307 (Lisp_Object charset
)
1311 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1312 return CHARSET_ATTR_PLIST (attrs
);
1316 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1317 doc
: /* Set CHARSET's property list to PLIST. */)
1318 (Lisp_Object charset
, Lisp_Object plist
)
1322 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1323 ASET (attrs
, charset_plist
, plist
);
1328 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1329 doc
: /* Unify characters of CHARSET with Unicode.
1330 This means reading the relevant file and installing the table defined
1331 by CHARSET's `:unify-map' property.
1333 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1334 the same meaning as the `:unify-map' attribute in the function
1335 `define-charset' (which see).
1337 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1338 (Lisp_Object charset
, Lisp_Object unify_map
, Lisp_Object deunify
)
1343 CHECK_CHARSET_GET_ID (charset
, id
);
1344 cs
= CHARSET_FROM_ID (id
);
1346 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1347 : ! CHARSET_UNIFIED_P (cs
))
1350 CHARSET_UNIFIED_P (cs
) = 0;
1353 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1354 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1355 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1356 if (NILP (unify_map
))
1357 unify_map
= CHARSET_UNIFY_MAP (cs
);
1360 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1361 signal_error ("Bad unify-map", unify_map
);
1362 set_charset_attr (cs
, charset_unify_map
, unify_map
);
1364 if (NILP (Vchar_unify_table
))
1365 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1366 char_table_set_range (Vchar_unify_table
,
1367 cs
->min_char
, cs
->max_char
, charset
);
1368 CHARSET_UNIFIED_P (cs
) = 1;
1370 else if (CHAR_TABLE_P (Vchar_unify_table
))
1372 unsigned min_code
= CHARSET_MIN_CODE (cs
);
1373 unsigned max_code
= CHARSET_MAX_CODE (cs
);
1374 int min_char
= DECODE_CHAR (cs
, min_code
);
1375 int max_char
= DECODE_CHAR (cs
, max_code
);
1377 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1383 /* Check that DIMENSION, CHARS, and FINAL_CHAR specify a valid ISO charset.
1384 Return true if it's a 96-character set, false if 94. */
1387 check_iso_charset_parameter (Lisp_Object dimension
, Lisp_Object chars
,
1388 Lisp_Object final_char
)
1390 CHECK_NUMBER (dimension
);
1391 CHECK_NUMBER (chars
);
1392 CHECK_CHARACTER (final_char
);
1394 if (! (1 <= XINT (dimension
) && XINT (dimension
) <= 3))
1395 error ("Invalid DIMENSION %"pI
"d, it should be 1, 2, or 3",
1398 bool chars_flag
= XINT (chars
) == 96;
1399 if (! (chars_flag
|| XINT (chars
) == 94))
1400 error ("Invalid CHARS %"pI
"d, it should be 94 or 96", XINT (chars
));
1402 int final_ch
= XFASTINT (final_char
);
1403 if (! ('0' <= final_ch
&& final_ch
<= '~'))
1404 error ("Invalid FINAL-CHAR '%c', it should be '0'..'~'", final_ch
);
1409 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1410 Sget_unused_iso_final_char
, 2, 2, 0,
1412 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1413 DIMENSION is the number of bytes to represent a character: 1 or 2.
1414 CHARS is the number of characters in a dimension: 94 or 96.
1416 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1417 If there's no unused final char for the specified kind of charset,
1419 (Lisp_Object dimension
, Lisp_Object chars
)
1421 bool chars_flag
= check_iso_charset_parameter (dimension
, chars
,
1423 for (int final_char
= '0'; final_char
<= '?'; final_char
++)
1424 if (ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, final_char
) < 0)
1425 return make_number (final_char
);
1430 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1432 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1434 On decoding by an ISO-2022 base coding system, when a charset
1435 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1436 if CHARSET is designated instead. */)
1437 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
, Lisp_Object charset
)
1441 CHECK_CHARSET_GET_ID (charset
, id
);
1442 bool chars_flag
= check_iso_charset_parameter (dimension
, chars
, final_char
);
1443 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XFASTINT (final_char
)) = id
;
1448 /* Return information about charsets in the text at PTR of NBYTES
1449 bytes, which are NCHARS characters. The value is:
1451 0: Each character is represented by one byte. This is always
1452 true for a unibyte string. For a multibyte string, true if
1453 it contains only ASCII characters.
1455 1: No charsets other than ascii, control-1, and latin-1 are
1462 string_xstring_p (Lisp_Object string
)
1464 const unsigned char *p
= SDATA (string
);
1465 const unsigned char *endp
= p
+ SBYTES (string
);
1467 if (SCHARS (string
) == SBYTES (string
))
1472 int c
= STRING_CHAR_ADVANCE (p
);
1481 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1483 CHARSETS is a vector. If Nth element is non-nil, it means the
1484 charset whose id is N is already found.
1486 It may lookup a translation table TABLE if supplied. */
1489 find_charsets_in_text (const unsigned char *ptr
, ptrdiff_t nchars
,
1490 ptrdiff_t nbytes
, Lisp_Object charsets
,
1491 Lisp_Object table
, bool multibyte
)
1493 const unsigned char *pend
= ptr
+ nbytes
;
1495 if (nchars
== nbytes
)
1498 ASET (charsets
, charset_ascii
, Qt
);
1505 c
= translate_char (table
, c
);
1506 if (ASCII_CHAR_P (c
))
1507 ASET (charsets
, charset_ascii
, Qt
);
1509 ASET (charsets
, charset_eight_bit
, Qt
);
1516 int c
= STRING_CHAR_ADVANCE (ptr
);
1517 struct charset
*charset
;
1520 c
= translate_char (table
, c
);
1521 charset
= CHAR_CHARSET (c
);
1522 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1527 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1529 doc
: /* Return a list of charsets in the region between BEG and END.
1530 BEG and END are buffer positions.
1531 Optional arg TABLE if non-nil is a translation table to look up.
1533 If the current buffer is unibyte, the returned list may contain
1534 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1535 (Lisp_Object beg
, Lisp_Object end
, Lisp_Object table
)
1537 Lisp_Object charsets
;
1538 ptrdiff_t from
, from_byte
, to
, stop
, stop_byte
;
1541 bool multibyte
= ! NILP (BVAR (current_buffer
, enable_multibyte_characters
));
1543 validate_region (&beg
, &end
);
1544 from
= XFASTINT (beg
);
1545 stop
= to
= XFASTINT (end
);
1547 if (from
< GPT
&& GPT
< to
)
1550 stop_byte
= GPT_BYTE
;
1553 stop_byte
= CHAR_TO_BYTE (stop
);
1555 from_byte
= CHAR_TO_BYTE (from
);
1557 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1560 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1561 stop_byte
- from_byte
, charsets
, table
,
1565 from
= stop
, from_byte
= stop_byte
;
1566 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1573 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1574 if (!NILP (AREF (charsets
, i
)))
1575 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1579 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1581 doc
: /* Return a list of charsets in STR.
1582 Optional arg TABLE if non-nil is a translation table to look up.
1584 If STR is unibyte, the returned list may contain
1585 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1586 (Lisp_Object str
, Lisp_Object table
)
1588 Lisp_Object charsets
;
1594 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1595 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1597 STRING_MULTIBYTE (str
));
1599 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1600 if (!NILP (AREF (charsets
, i
)))
1601 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1607 /* Return a unified character code for C (>= 0x110000). VAL is a
1608 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1611 maybe_unify_char (int c
, Lisp_Object val
)
1613 struct charset
*charset
;
1616 return XFASTINT (val
);
1620 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1622 /* The call to load_charset below can allocate memory, which screws
1623 callers of this function through STRING_CHAR_* macros that hold C
1624 pointers to buffer text, if REL_ALLOC is used. */
1625 r_alloc_inhibit_buffer_relocation (1);
1627 load_charset (charset
, 1);
1628 if (! inhibit_load_charset_map
)
1630 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1636 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1637 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1643 r_alloc_inhibit_buffer_relocation (0);
1649 /* Return a character corresponding to the code-point CODE of
1653 decode_char (struct charset
*charset
, unsigned int code
)
1656 enum charset_method method
= CHARSET_METHOD (charset
);
1658 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1661 if (method
== CHARSET_METHOD_SUBSET
)
1663 Lisp_Object subset_info
;
1665 subset_info
= CHARSET_SUBSET (charset
);
1666 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1667 code
-= XINT (AREF (subset_info
, 3));
1668 if (code
< XFASTINT (AREF (subset_info
, 1))
1669 || code
> XFASTINT (AREF (subset_info
, 2)))
1672 c
= DECODE_CHAR (charset
, code
);
1674 else if (method
== CHARSET_METHOD_SUPERSET
)
1676 Lisp_Object parents
;
1678 parents
= CHARSET_SUPERSET (charset
);
1680 for (; CONSP (parents
); parents
= XCDR (parents
))
1682 int id
= XINT (XCAR (XCAR (parents
)));
1683 int code_offset
= XINT (XCDR (XCAR (parents
)));
1684 unsigned this_code
= code
- code_offset
;
1686 charset
= CHARSET_FROM_ID (id
);
1687 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1693 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1697 if (method
== CHARSET_METHOD_MAP
)
1699 Lisp_Object decoder
;
1701 decoder
= CHARSET_DECODER (charset
);
1702 if (! VECTORP (decoder
))
1704 load_charset (charset
, 1);
1705 decoder
= CHARSET_DECODER (charset
);
1707 if (VECTORP (decoder
))
1708 c
= XINT (AREF (decoder
, char_index
));
1710 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1712 else /* method == CHARSET_METHOD_OFFSET */
1714 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1715 if (CHARSET_UNIFIED_P (charset
)
1716 && MAX_UNICODE_CHAR
< c
&& c
<= MAX_5_BYTE_CHAR
)
1718 /* Unify C with a Unicode character if possible. */
1719 Lisp_Object val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1720 c
= maybe_unify_char (c
, val
);
1728 /* Variable used temporarily by the macro ENCODE_CHAR. */
1729 Lisp_Object charset_work
;
1731 /* Return a code-point of C in CHARSET. If C doesn't belong to
1732 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1733 use CHARSET's strict_max_char instead of max_char. */
1736 encode_char (struct charset
*charset
, int c
)
1739 enum charset_method method
= CHARSET_METHOD (charset
);
1741 if (CHARSET_UNIFIED_P (charset
))
1743 Lisp_Object deunifier
;
1744 int code_index
= -1;
1746 deunifier
= CHARSET_DEUNIFIER (charset
);
1747 if (! CHAR_TABLE_P (deunifier
))
1749 load_charset (charset
, 2);
1750 deunifier
= CHARSET_DEUNIFIER (charset
);
1752 if (CHAR_TABLE_P (deunifier
))
1754 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1756 if (INTEGERP (deunified
))
1757 code_index
= XINT (deunified
);
1761 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1763 if (code_index
>= 0)
1764 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1767 if (method
== CHARSET_METHOD_SUBSET
)
1769 Lisp_Object subset_info
;
1770 struct charset
*this_charset
;
1772 subset_info
= CHARSET_SUBSET (charset
);
1773 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1774 code
= ENCODE_CHAR (this_charset
, c
);
1775 if (code
== CHARSET_INVALID_CODE (this_charset
)
1776 || code
< XFASTINT (AREF (subset_info
, 1))
1777 || code
> XFASTINT (AREF (subset_info
, 2)))
1778 return CHARSET_INVALID_CODE (charset
);
1779 code
+= XINT (AREF (subset_info
, 3));
1783 if (method
== CHARSET_METHOD_SUPERSET
)
1785 Lisp_Object parents
;
1787 parents
= CHARSET_SUPERSET (charset
);
1788 for (; CONSP (parents
); parents
= XCDR (parents
))
1790 int id
= XINT (XCAR (XCAR (parents
)));
1791 int code_offset
= XINT (XCDR (XCAR (parents
)));
1792 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1794 code
= ENCODE_CHAR (this_charset
, c
);
1795 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1796 return code
+ code_offset
;
1798 return CHARSET_INVALID_CODE (charset
);
1801 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1802 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1803 return CHARSET_INVALID_CODE (charset
);
1805 if (method
== CHARSET_METHOD_MAP
)
1807 Lisp_Object encoder
;
1810 encoder
= CHARSET_ENCODER (charset
);
1811 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1813 load_charset (charset
, 2);
1814 encoder
= CHARSET_ENCODER (charset
);
1816 if (CHAR_TABLE_P (encoder
))
1818 val
= CHAR_TABLE_REF (encoder
, c
);
1820 return CHARSET_INVALID_CODE (charset
);
1822 if (! CHARSET_COMPACT_CODES_P (charset
))
1823 code
= INDEX_TO_CODE_POINT (charset
, code
);
1827 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1828 code
= INDEX_TO_CODE_POINT (charset
, code
);
1831 else /* method == CHARSET_METHOD_OFFSET */
1833 unsigned code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1835 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1842 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1843 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1844 Return nil if CODE-POINT is not valid in CHARSET.
1846 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). */)
1847 (Lisp_Object charset
, Lisp_Object code_point
, Lisp_Object restriction
)
1851 struct charset
*charsetp
;
1853 CHECK_CHARSET_GET_ID (charset
, id
);
1854 code
= cons_to_unsigned (code_point
, UINT_MAX
);
1855 charsetp
= CHARSET_FROM_ID (id
);
1856 c
= DECODE_CHAR (charsetp
, code
);
1857 return (c
>= 0 ? make_number (c
) : Qnil
);
1861 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1862 doc
: /* Encode the character CH into a code-point of CHARSET.
1863 Return nil if CHARSET doesn't include CH. */)
1864 (Lisp_Object ch
, Lisp_Object charset
, Lisp_Object restriction
)
1868 struct charset
*charsetp
;
1870 CHECK_CHARSET_GET_ID (charset
, id
);
1871 CHECK_CHARACTER (ch
);
1873 charsetp
= CHARSET_FROM_ID (id
);
1874 code
= ENCODE_CHAR (charsetp
, c
);
1875 if (code
== CHARSET_INVALID_CODE (charsetp
))
1877 return INTEGER_TO_CONS (code
);
1881 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1883 /* Return a character of CHARSET whose position codes are CODEn.
1885 CODE1 through CODE4 are optional, but if you don't supply sufficient
1886 position codes, it is assumed that the minimum code in each dimension
1888 (Lisp_Object charset
, Lisp_Object code1
, Lisp_Object code2
, Lisp_Object code3
, Lisp_Object code4
)
1891 struct charset
*charsetp
;
1895 CHECK_CHARSET_GET_ID (charset
, id
);
1896 charsetp
= CHARSET_FROM_ID (id
);
1898 dimension
= CHARSET_DIMENSION (charsetp
);
1900 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1901 ? 0 : CHARSET_MIN_CODE (charsetp
));
1904 CHECK_NATNUM (code1
);
1905 if (XFASTINT (code1
) >= 0x100)
1906 args_out_of_range (make_number (0xFF), code1
);
1907 code
= XFASTINT (code1
);
1913 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1916 CHECK_NATNUM (code2
);
1917 if (XFASTINT (code2
) >= 0x100)
1918 args_out_of_range (make_number (0xFF), code2
);
1919 code
|= XFASTINT (code2
);
1926 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1929 CHECK_NATNUM (code3
);
1930 if (XFASTINT (code3
) >= 0x100)
1931 args_out_of_range (make_number (0xFF), code3
);
1932 code
|= XFASTINT (code3
);
1939 code
|= charsetp
->code_space
[0];
1942 CHECK_NATNUM (code4
);
1943 if (XFASTINT (code4
) >= 0x100)
1944 args_out_of_range (make_number (0xFF), code4
);
1945 code
|= XFASTINT (code4
);
1952 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
1954 c
= DECODE_CHAR (charsetp
, code
);
1956 error ("Invalid code(s)");
1957 return make_number (c
);
1961 /* Return the first charset in CHARSET_LIST that contains C.
1962 CHARSET_LIST is a list of charset IDs. If it is nil, use
1963 Vcharset_ordered_list. */
1966 char_charset (int c
, Lisp_Object charset_list
, unsigned int *code_return
)
1968 bool maybe_null
= 0;
1970 if (NILP (charset_list
))
1971 charset_list
= Vcharset_ordered_list
;
1975 while (CONSP (charset_list
))
1977 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
1978 unsigned code
= ENCODE_CHAR (charset
, c
);
1980 if (code
!= CHARSET_INVALID_CODE (charset
))
1983 *code_return
= code
;
1986 charset_list
= XCDR (charset_list
);
1988 && c
<= MAX_UNICODE_CHAR
1989 && EQ (charset_list
, Vcharset_non_preferred_head
))
1990 return CHARSET_FROM_ID (charset_unicode
);
1992 return (maybe_null
? NULL
1993 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
1994 : CHARSET_FROM_ID (charset_eight_bit
));
1998 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2000 /*Return list of charset and one to four position-codes of CH.
2001 The charset is decided by the current priority order of charsets.
2002 A position-code is a byte value of each dimension of the code-point of
2003 CH in the charset. */)
2006 struct charset
*charset
;
2011 CHECK_CHARACTER (ch
);
2013 charset
= CHAR_CHARSET (c
);
2016 code
= ENCODE_CHAR (charset
, c
);
2017 if (code
== CHARSET_INVALID_CODE (charset
))
2019 dimension
= CHARSET_DIMENSION (charset
);
2020 for (val
= Qnil
; dimension
> 0; dimension
--)
2022 val
= Fcons (make_number (code
& 0xFF), val
);
2025 return Fcons (CHARSET_NAME (charset
), val
);
2029 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2030 doc
: /* Return the charset of highest priority that contains CH.
2031 ASCII characters are an exception: for them, this function always
2033 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2034 from which to find the charset. It may also be a coding system. In
2035 that case, find the charset from what supported by that coding system. */)
2036 (Lisp_Object ch
, Lisp_Object restriction
)
2038 struct charset
*charset
;
2040 CHECK_CHARACTER (ch
);
2041 if (NILP (restriction
))
2042 charset
= CHAR_CHARSET (XINT (ch
));
2045 if (CONSP (restriction
))
2047 int c
= XFASTINT (ch
);
2049 for (; CONSP (restriction
); restriction
= XCDR (restriction
))
2051 struct charset
*rcharset
;
2053 CHECK_CHARSET_GET_CHARSET (XCAR (restriction
), rcharset
);
2054 if (ENCODE_CHAR (rcharset
, c
) != CHARSET_INVALID_CODE (rcharset
))
2055 return XCAR (restriction
);
2059 restriction
= coding_system_charset_list (restriction
);
2060 charset
= char_charset (XINT (ch
), restriction
, NULL
);
2064 return (CHARSET_NAME (charset
));
2068 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2070 Return charset of a character in the current buffer at position POS.
2071 If POS is nil, it defaults to the current point.
2072 If POS is out of range, the value is nil. */)
2076 struct charset
*charset
;
2078 ch
= Fchar_after (pos
);
2079 if (! INTEGERP (ch
))
2081 charset
= CHAR_CHARSET (XINT (ch
));
2082 return (CHARSET_NAME (charset
));
2086 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2088 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2090 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2091 by their DIMENSION, CHARS, and FINAL-CHAR,
2092 whereas Emacs distinguishes them by charset symbol.
2093 See the documentation of the function `charset-info' for the meanings of
2094 DIMENSION, CHARS, and FINAL-CHAR. */)
2095 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
)
2097 bool chars_flag
= check_iso_charset_parameter (dimension
, chars
, final_char
);
2098 int id
= ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
,
2099 XFASTINT (final_char
));
2100 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2104 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2108 Clear temporary charset mapping tables.
2109 It should be called only from temacs invoked for dumping. */)
2112 if (temp_charset_work
)
2114 xfree (temp_charset_work
);
2115 temp_charset_work
= NULL
;
2118 if (CHAR_TABLE_P (Vchar_unify_table
))
2119 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2124 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2125 Scharset_priority_list
, 0, 1, 0,
2126 doc
: /* Return the list of charsets ordered by priority.
2127 HIGHESTP non-nil means just return the highest priority one. */)
2128 (Lisp_Object highestp
)
2130 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2132 if (!NILP (highestp
))
2133 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2135 while (!NILP (list
))
2137 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2140 return Fnreverse (val
);
2143 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2145 doc
: /* Assign higher priority to the charsets given as arguments.
2146 usage: (set-charset-priority &rest charsets) */)
2147 (ptrdiff_t nargs
, Lisp_Object
*args
)
2149 Lisp_Object new_head
, old_list
;
2150 Lisp_Object list_2022
, list_emacs_mule
;
2154 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2156 for (i
= 0; i
< nargs
; i
++)
2158 CHECK_CHARSET_GET_ID (args
[i
], id
);
2159 if (! NILP (Fmemq (make_number (id
), old_list
)))
2161 old_list
= Fdelq (make_number (id
), old_list
);
2162 new_head
= Fcons (make_number (id
), new_head
);
2165 Vcharset_non_preferred_head
= old_list
;
2166 Vcharset_ordered_list
= CALLN (Fnconc
, Fnreverse (new_head
), old_list
);
2168 charset_ordered_list_tick
++;
2170 charset_unibyte
= -1;
2171 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2172 CONSP (old_list
); old_list
= XCDR (old_list
))
2174 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2175 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2176 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2177 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2178 if (charset_unibyte
< 0)
2180 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (old_list
)));
2182 if (CHARSET_DIMENSION (charset
) == 1
2183 && CHARSET_ASCII_COMPATIBLE_P (charset
)
2184 && CHARSET_MAX_CHAR (charset
) >= 0x80)
2185 charset_unibyte
= CHARSET_ID (charset
);
2188 Viso_2022_charset_list
= Fnreverse (list_2022
);
2189 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2190 if (charset_unibyte
< 0)
2191 charset_unibyte
= charset_iso_8859_1
;
2196 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2198 doc
: /* Internal use only.
2199 Return charset identification number of CHARSET. */)
2200 (Lisp_Object charset
)
2204 CHECK_CHARSET_GET_ID (charset
, id
);
2205 return make_number (id
);
2208 struct charset_sort_data
2210 Lisp_Object charset
;
2216 charset_compare (const void *d1
, const void *d2
)
2218 const struct charset_sort_data
*data1
= d1
, *data2
= d2
;
2219 if (data1
->priority
!= data2
->priority
)
2220 return data1
->priority
< data2
->priority
? -1 : 1;
2224 DEFUN ("sort-charsets", Fsort_charsets
, Ssort_charsets
, 1, 1, 0,
2225 doc
: /* Sort charset list CHARSETS by a priority of each charset.
2226 Return the sorted list. CHARSETS is modified by side effects.
2227 See also `charset-priority-list' and `set-charset-priority'. */)
2228 (Lisp_Object charsets
)
2230 Lisp_Object len
= Flength (charsets
);
2231 ptrdiff_t n
= XFASTINT (len
), i
, j
;
2233 Lisp_Object tail
, elt
, attrs
;
2234 struct charset_sort_data
*sort_data
;
2235 int id
, min_id
= INT_MAX
, max_id
= INT_MIN
;
2240 SAFE_NALLOCA (sort_data
, 1, n
);
2241 for (tail
= charsets
, i
= 0; CONSP (tail
); tail
= XCDR (tail
), i
++)
2244 CHECK_CHARSET_GET_ATTR (elt
, attrs
);
2245 sort_data
[i
].charset
= elt
;
2246 sort_data
[i
].id
= id
= XINT (CHARSET_ATTR_ID (attrs
));
2252 for (done
= 0, tail
= Vcharset_ordered_list
, i
= 0;
2253 done
< n
&& CONSP (tail
); tail
= XCDR (tail
), i
++)
2256 id
= XFASTINT (elt
);
2257 if (id
>= min_id
&& id
<= max_id
)
2258 for (j
= 0; j
< n
; j
++)
2259 if (sort_data
[j
].id
== id
)
2261 sort_data
[j
].priority
= i
;
2265 qsort (sort_data
, n
, sizeof *sort_data
, charset_compare
);
2266 for (i
= 0, tail
= charsets
; CONSP (tail
); tail
= XCDR (tail
), i
++)
2267 XSETCAR (tail
, sort_data
[i
].charset
);
2276 Lisp_Object tempdir
;
2277 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2278 if (! file_accessible_directory_p (tempdir
))
2280 /* This used to be non-fatal (dir_warning), but it should not
2281 happen, and if it does sooner or later it will cause some
2282 obscure problem (eg bug#6401), so better abort. */
2283 fprintf (stderr
, "Error: charsets directory not found:\n\
2285 Emacs will not function correctly without the character map files.\n%s\
2286 Please check your installation!\n",
2288 egetenv("EMACSDATA") ? "The EMACSDATA environment \
2289 variable is set, maybe it has the wrong value?\n" : "");
2293 Vcharset_map_path
= list1 (tempdir
);
2298 init_charset_once (void)
2302 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2303 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2304 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2305 iso_charset_table
[i
][j
][k
] = -1;
2307 for (i
= 0; i
< 256; i
++)
2308 emacs_mule_charset
[i
] = -1;
2310 charset_jisx0201_roman
= -1;
2311 charset_jisx0208_1978
= -1;
2312 charset_jisx0208
= -1;
2313 charset_ksc5601
= -1;
2318 /* Allocate an initial charset table that is large enough to handle
2319 Emacs while it is bootstrapping. As of September 2011, the size
2320 needs to be at least 166; make it a bit bigger to allow for future
2323 Don't make the value so small that the table is reallocated during
2324 bootstrapping, as glibc malloc calls larger than just under 64 KiB
2325 during an initial bootstrap wreak havoc after dumping; see the
2326 M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
2327 internal to glibc malloc and perhaps to Emacs malloc debugging. */
2328 static struct charset charset_table_init
[180];
2331 syms_of_charset (void)
2333 DEFSYM (Qcharsetp
, "charsetp");
2335 /* Special charset symbols. */
2336 DEFSYM (Qascii
, "ascii");
2337 DEFSYM (Qunicode
, "unicode");
2338 DEFSYM (Qemacs
, "emacs");
2339 DEFSYM (Qeight_bit
, "eight-bit");
2340 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2342 /* Value of charset attribute `charset-iso-plane'. */
2346 staticpro (&Vcharset_ordered_list
);
2347 Vcharset_ordered_list
= Qnil
;
2349 staticpro (&Viso_2022_charset_list
);
2350 Viso_2022_charset_list
= Qnil
;
2352 staticpro (&Vemacs_mule_charset_list
);
2353 Vemacs_mule_charset_list
= Qnil
;
2355 staticpro (&Vcharset_hash_table
);
2356 Vcharset_hash_table
= CALLN (Fmake_hash_table
, QCtest
, Qeq
);
2358 charset_table
= charset_table_init
;
2359 charset_table_size
= ARRAYELTS (charset_table_init
);
2360 charset_table_used
= 0;
2362 defsubr (&Scharsetp
);
2363 defsubr (&Smap_charset_chars
);
2364 defsubr (&Sdefine_charset_internal
);
2365 defsubr (&Sdefine_charset_alias
);
2366 defsubr (&Scharset_plist
);
2367 defsubr (&Sset_charset_plist
);
2368 defsubr (&Sunify_charset
);
2369 defsubr (&Sget_unused_iso_final_char
);
2370 defsubr (&Sdeclare_equiv_charset
);
2371 defsubr (&Sfind_charset_region
);
2372 defsubr (&Sfind_charset_string
);
2373 defsubr (&Sdecode_char
);
2374 defsubr (&Sencode_char
);
2375 defsubr (&Ssplit_char
);
2376 defsubr (&Smake_char
);
2377 defsubr (&Schar_charset
);
2378 defsubr (&Scharset_after
);
2379 defsubr (&Siso_charset
);
2380 defsubr (&Sclear_charset_maps
);
2381 defsubr (&Scharset_priority_list
);
2382 defsubr (&Sset_charset_priority
);
2383 defsubr (&Scharset_id_internal
);
2384 defsubr (&Ssort_charsets
);
2386 DEFVAR_LISP ("charset-map-path", Vcharset_map_path
,
2387 doc
: /* List of directories to search for charset map files. */);
2388 Vcharset_map_path
= Qnil
;
2390 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map
,
2391 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2392 inhibit_load_charset_map
= 0;
2394 DEFVAR_LISP ("charset-list", Vcharset_list
,
2395 doc
: /* List of all charsets ever defined. */);
2396 Vcharset_list
= Qnil
;
2398 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language
,
2399 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2400 If the current language environment is for multiple languages (e.g. "Latin-1"),
2401 the value may be a list of mnemonics. */);
2402 Vcurrent_iso639_language
= Qnil
;
2405 = define_charset_internal (Qascii
, 1, "\x00\x7F\0\0\0\0\0",
2406 0, 127, 'B', -1, 0, 1, 0, 0);
2408 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\0\0\0\0\0",
2409 0, 255, -1, -1, -1, 1, 0, 0);
2411 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2412 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2414 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
2415 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2417 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\0\0\0\0\0",
2418 128, 255, -1, 0, -1, 0, 1,
2419 MAX_5_BYTE_CHAR
+ 1);
2420 charset_unibyte
= charset_iso_8859_1
;