1 /* Basic character set support.
3 Copyright (C) 2001-2018 Free Software Foundation, Inc.
5 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 2005, 2006, 2007, 2008, 2009, 2010, 2011
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H14PRO021
10 Copyright (C) 2003, 2004
11 National Institute of Advanced Industrial Science and Technology (AIST)
12 Registration Number H13PRO009
14 This file is part of GNU Emacs.
16 GNU Emacs is free software: you can redistribute it and/or modify
17 it under the terms of the GNU General Public License as published by
18 the Free Software Foundation, either version 3 of the License, or (at
19 your option) any later version.
21 GNU Emacs is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 GNU General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
35 #include <sys/types.h>
37 #include "character.h"
43 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
45 A coded character set ("charset" hereafter) is a meaningful
46 collection (i.e. language, culture, functionality, etc.) of
47 characters. Emacs handles multiple charsets at once. In Emacs Lisp
48 code, a charset is represented by a symbol. In C code, a charset is
49 represented by its ID number or by a pointer to a struct charset.
51 The actual information about each charset is stored in two places.
52 Lispy information is stored in the hash table Vcharset_hash_table as
53 a vector (charset attributes). The other information is stored in
54 charset_table as a struct charset.
58 /* Hash table that contains attributes of each charset. Keys are
59 charset symbols, and values are vectors of charset attributes. */
60 Lisp_Object Vcharset_hash_table
;
62 /* Table of struct charset. */
63 struct charset
*charset_table
;
65 static ptrdiff_t charset_table_size
;
66 static int charset_table_used
;
68 /* Special charsets corresponding to symbols. */
70 int charset_eight_bit
;
71 static int charset_iso_8859_1
;
73 static int charset_emacs
;
75 /* The other special charsets. */
76 int charset_jisx0201_roman
;
77 int charset_jisx0208_1978
;
81 /* Charset of unibyte characters. */
84 /* List of charsets ordered by the priority. */
85 Lisp_Object Vcharset_ordered_list
;
87 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
89 Lisp_Object Vcharset_non_preferred_head
;
91 /* Incremented every time we change the priority of charsets.
93 EMACS_UINT charset_ordered_list_tick
;
95 /* List of iso-2022 charsets. */
96 Lisp_Object Viso_2022_charset_list
;
98 /* List of emacs-mule charsets. */
99 Lisp_Object Vemacs_mule_charset_list
;
101 int emacs_mule_charset
[256];
103 /* Mapping table from ISO2022's charset (specified by DIMENSION,
104 CHARS, and FINAL-CHAR) to Emacs' charset. */
105 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
107 #define CODE_POINT_TO_INDEX(charset, code) \
108 ((charset)->code_linear_p \
109 ? (int) ((code) - (charset)->min_code) \
110 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
111 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
112 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
113 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
114 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
115 * (charset)->code_space[11]) \
116 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
117 * (charset)->code_space[7]) \
118 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
119 * (charset)->code_space[3]) \
120 + (((code) & 0xFF) - (charset)->code_space[0]) \
121 - ((charset)->char_index_offset)) \
125 /* Return the code-point for the character index IDX in CHARSET.
126 IDX should be an unsigned int variable in a valid range (which is
127 always in nonnegative int range too). IDX contains garbage afterwards. */
129 #define INDEX_TO_CODE_POINT(charset, idx) \
130 ((charset)->code_linear_p \
131 ? (idx) + (charset)->min_code \
132 : (idx += (charset)->char_index_offset, \
133 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
134 | (((charset)->code_space[4] \
135 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
137 | (((charset)->code_space[8] \
138 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
140 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
143 /* Structure to hold mapping tables for a charset. Used by temacs
144 invoked for dumping. */
148 /* The current charset for which the following tables are setup. */
149 struct charset
*current
;
151 /* 1 iff the following table is used for encoder. */
154 /* When the following table is used for encoding, minimum and
155 maximum character of the current charset. */
156 int min_char
, max_char
;
158 /* A Unicode character corresponding to the code index 0 (i.e. the
159 minimum code-point) of the current charset, or -1 if the code
160 index 0 is not a Unicode character. This is checked when
161 table.encoder[CHAR] is zero. */
165 /* Table mapping code-indices (not code-points) of the current
166 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
167 doesn't belong to the current charset. */
168 int decoder
[0x10000];
169 /* Table mapping Unicode characters to code-indices of the current
170 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
171 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
172 (0x20000..0x2FFFF). Note that there is no charset map that
173 uses both SMP and SIP. */
174 unsigned short encoder
[0x20000];
176 } *temp_charset_work
;
178 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
181 temp_charset_work->zero_index_char = (C); \
182 else if ((C) < 0x20000) \
183 temp_charset_work->table.encoder[(C)] = (CODE); \
185 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
188 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
189 ((C) == temp_charset_work->zero_index_char ? 0 \
190 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
191 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
192 : temp_charset_work->table.encoder[(C) - 0x10000] \
193 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
195 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
196 (temp_charset_work->table.decoder[(CODE)] = (C))
198 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
199 (temp_charset_work->table.decoder[(CODE)])
202 /* Set to 1 to warn that a charset map is loaded and thus a buffer
203 text and a string data may be relocated. */
204 bool charset_map_loaded
;
206 struct charset_map_entries
212 struct charset_map_entries
*next
;
215 /* Load the mapping information of CHARSET from ENTRIES for
216 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
217 encoding (CONTROL_FLAG == 2).
219 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
220 and CHARSET->fast_map.
222 If CONTROL_FLAG is 1, setup the following tables according to
223 CHARSET->method and inhibit_load_charset_map.
225 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
226 ----------------------+--------------------+---------------------------
227 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
228 ----------------------+--------------------+---------------------------
229 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
231 If CONTROL_FLAG is 2, setup the following tables.
233 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
234 ----------------------+--------------------+---------------------------
235 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
236 ----------------------+--------------------+--------------------------
237 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
241 load_charset_map (struct charset
*charset
, struct charset_map_entries
*entries
, int n_entries
, int control_flag
)
243 Lisp_Object vec UNINIT
;
244 Lisp_Object table UNINIT
;
245 unsigned max_code
= CHARSET_MAX_CODE (charset
);
246 bool ascii_compatible_p
= charset
->ascii_compatible_p
;
247 int min_char
, max_char
, nonascii_min_char
;
249 unsigned char *fast_map
= charset
->fast_map
;
256 if (! inhibit_load_charset_map
)
258 if (control_flag
== 1)
260 if (charset
->method
== CHARSET_METHOD_MAP
)
262 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
264 vec
= Fmake_vector (make_number (n
), make_number (-1));
265 set_charset_attr (charset
, charset_decoder
, vec
);
269 char_table_set_range (Vchar_unify_table
,
270 charset
->min_char
, charset
->max_char
,
276 table
= Fmake_char_table (Qnil
, Qnil
);
277 set_charset_attr (charset
,
278 (charset
->method
== CHARSET_METHOD_MAP
279 ? charset_encoder
: charset_deunifier
),
285 if (! temp_charset_work
)
286 temp_charset_work
= xmalloc (sizeof *temp_charset_work
);
287 if (control_flag
== 1)
289 memset (temp_charset_work
->table
.decoder
, -1,
290 sizeof (int) * 0x10000);
294 memset (temp_charset_work
->table
.encoder
, 0,
295 sizeof (unsigned short) * 0x20000);
296 temp_charset_work
->zero_index_char
= -1;
298 temp_charset_work
->current
= charset
;
299 temp_charset_work
->for_encoder
= (control_flag
== 2);
302 charset_map_loaded
= 1;
305 min_char
= max_char
= entries
->entry
[0].c
;
306 nonascii_min_char
= MAX_CHAR
;
307 for (i
= 0; i
< n_entries
; i
++)
310 int from_index
, to_index
, lim_index
;
312 int idx
= i
% 0x10000;
314 if (i
> 0 && idx
== 0)
315 entries
= entries
->next
;
316 from
= entries
->entry
[idx
].from
;
317 to
= entries
->entry
[idx
].to
;
318 from_c
= entries
->entry
[idx
].c
;
319 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
322 to_index
= from_index
;
327 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
328 to_c
= from_c
+ (to_index
- from_index
);
330 if (from_index
< 0 || to_index
< 0)
332 lim_index
= to_index
+ 1;
336 else if (from_c
< min_char
)
339 if (control_flag
== 1)
341 if (charset
->method
== CHARSET_METHOD_MAP
)
342 for (; from_index
< lim_index
; from_index
++, from_c
++)
343 ASET (vec
, from_index
, make_number (from_c
));
345 for (; from_index
< lim_index
; from_index
++, from_c
++)
346 CHAR_TABLE_SET (Vchar_unify_table
,
347 CHARSET_CODE_OFFSET (charset
) + from_index
,
348 make_number (from_c
));
350 else if (control_flag
== 2)
352 if (charset
->method
== CHARSET_METHOD_MAP
353 && CHARSET_COMPACT_CODES_P (charset
))
354 for (; from_index
< lim_index
; from_index
++, from_c
++)
356 unsigned code
= from_index
;
357 code
= INDEX_TO_CODE_POINT (charset
, code
);
359 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
360 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
363 for (; from_index
< lim_index
; from_index
++, from_c
++)
365 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
366 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
369 else if (control_flag
== 3)
370 for (; from_index
< lim_index
; from_index
++, from_c
++)
371 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
372 else if (control_flag
== 4)
373 for (; from_index
< lim_index
; from_index
++, from_c
++)
374 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
375 else /* control_flag == 0 */
377 if (ascii_compatible_p
)
379 if (! ASCII_CHAR_P (from_c
))
381 if (from_c
< nonascii_min_char
)
382 nonascii_min_char
= from_c
;
384 else if (! ASCII_CHAR_P (to_c
))
386 nonascii_min_char
= 0x80;
390 for (; from_c
<= to_c
; from_c
++)
391 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
395 if (control_flag
== 0)
397 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
398 ? nonascii_min_char
: min_char
);
399 CHARSET_MAX_CHAR (charset
) = max_char
;
401 else if (control_flag
== 4)
403 temp_charset_work
->min_char
= min_char
;
404 temp_charset_work
->max_char
= max_char
;
409 /* Read a hexadecimal number (preceded by "0x") from the file FP while
410 paying attention to comment character '#'. LOOKAHEAD is the
411 lookahead byte if it is nonnegative. Store into *TERMINATOR the
412 input byte after the number, or EOF if an end-of-file or input
413 error occurred. Set *OVERFLOW if the number overflows. */
416 read_hex (FILE *fp
, int lookahead
, int *terminator
, bool *overflow
)
418 int c
= lookahead
< 0 ? getc_unlocked (fp
) : lookahead
;
424 c
= getc_unlocked (fp
);
425 while (0 <= c
&& c
!= '\n');
428 c
= getc_unlocked (fp
);
429 if (c
< 0 || c
== 'x')
434 c
= getc_unlocked (fp
);
443 c
= getc_unlocked (fp
);
444 int digit
= char_hexdigit (c
);
447 v
|= INT_LEFT_SHIFT_OVERFLOW (n
, 4);
448 n
= (n
<< 4) + digit
;
456 /* Return a mapping vector for CHARSET loaded from MAPFILE.
457 Each line of MAPFILE has this form
459 where 0xAAAA is a code-point and 0xCCCC is the corresponding
460 character code, or this form
462 where 0xAAAA and 0xBBBB are code-points specifying a range, and
463 0xCCCC is the first character code of the range.
465 The returned vector has this form:
466 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
467 where CODE1 is a code-point or a cons of code-points specifying a
470 Note that this function uses `openp' to open MAPFILE but ignores
471 `file-name-handler-alist' to avoid running any Lisp code. */
474 load_charset_map_from_file (struct charset
*charset
, Lisp_Object mapfile
,
477 unsigned min_code
= CHARSET_MIN_CODE (charset
);
478 unsigned max_code
= CHARSET_MAX_CODE (charset
);
481 struct charset_map_entries
*head
, *entries
;
483 AUTO_STRING (map
, ".map");
484 AUTO_STRING (txt
, ".txt");
485 AUTO_LIST2 (suffixes
, map
, txt
);
486 ptrdiff_t count
= SPECPDL_INDEX ();
487 record_unwind_protect_nothing ();
488 specbind (Qfile_name_handler_alist
, Qnil
);
489 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
, false);
490 fp
= fd
< 0 ? 0 : fdopen (fd
, "r");
493 int open_errno
= errno
;
495 report_file_errno ("Loading charset map", mapfile
, open_errno
);
497 set_unwind_protect_ptr (count
, fclose_unwind
, fp
);
498 unbind_to (count
+ 1, Qnil
);
500 /* Use record_xmalloc, as `charset_map_entries' is
501 large (larger than MAX_ALLOCA). */
502 head
= record_xmalloc (sizeof *head
);
504 memset (entries
, 0, sizeof (struct charset_map_entries
));
510 bool overflow
= false;
511 unsigned from
= read_hex (fp
, ch
, &ch
, &overflow
), to
;
516 to
= read_hex (fp
, -1, &ch
, &overflow
);
525 unsigned c
= read_hex (fp
, ch
, &ch
, &overflow
);
531 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
534 if (n_entries
== 0x10000)
536 entries
->next
= record_xmalloc (sizeof *entries
->next
);
537 entries
= entries
->next
;
538 memset (entries
, 0, sizeof (struct charset_map_entries
));
542 entries
->entry
[idx
].from
= from
;
543 entries
->entry
[idx
].to
= to
;
544 entries
->entry
[idx
].c
= c
;
548 clear_unwind_protect (count
);
550 load_charset_map (charset
, head
, n_entries
, control_flag
);
551 unbind_to (count
, Qnil
);
555 load_charset_map_from_vector (struct charset
*charset
, Lisp_Object vec
, int control_flag
)
557 unsigned min_code
= CHARSET_MIN_CODE (charset
);
558 unsigned max_code
= CHARSET_MAX_CODE (charset
);
559 struct charset_map_entries
*head
, *entries
;
561 int len
= ASIZE (vec
);
567 add_to_log ("Failure in loading charset map: %V", vec
);
571 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
572 large (larger than MAX_ALLOCA). */
573 head
= SAFE_ALLOCA (sizeof *head
);
575 memset (entries
, 0, sizeof (struct charset_map_entries
));
578 for (i
= 0; i
< len
; i
+= 2)
580 Lisp_Object val
, val2
;
590 from
= XFASTINT (val
);
591 to
= XFASTINT (val2
);
594 from
= to
= XFASTINT (val
);
595 val
= AREF (vec
, i
+ 1);
599 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
602 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
604 entries
->next
= SAFE_ALLOCA (sizeof *entries
->next
);
605 entries
= entries
->next
;
606 memset (entries
, 0, sizeof (struct charset_map_entries
));
608 idx
= n_entries
% 0x10000;
609 entries
->entry
[idx
].from
= from
;
610 entries
->entry
[idx
].to
= to
;
611 entries
->entry
[idx
].c
= c
;
615 load_charset_map (charset
, head
, n_entries
, control_flag
);
620 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
621 map it is (see the comment of load_charset_map for the detail). */
624 load_charset (struct charset
*charset
, int control_flag
)
628 if (inhibit_load_charset_map
630 && charset
== temp_charset_work
->current
631 && ((control_flag
== 2) == temp_charset_work
->for_encoder
))
634 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
635 map
= CHARSET_MAP (charset
);
638 if (! CHARSET_UNIFIED_P (charset
))
640 map
= CHARSET_UNIFY_MAP (charset
);
643 load_charset_map_from_file (charset
, map
, control_flag
);
645 load_charset_map_from_vector (charset
, map
, control_flag
);
649 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
650 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
653 return (CHARSETP (object
) ? Qt
: Qnil
);
658 map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
),
659 Lisp_Object function
, Lisp_Object arg
,
660 unsigned int from
, unsigned int to
)
662 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
663 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
664 Lisp_Object range
= Fcons (Qnil
, Qnil
);
667 c
= temp_charset_work
->min_char
;
668 stop
= (temp_charset_work
->max_char
< 0x20000
669 ? temp_charset_work
->max_char
: 0xFFFF);
673 int idx
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
675 if (idx
>= from_idx
&& idx
<= to_idx
)
677 if (NILP (XCAR (range
)))
678 XSETCAR (range
, make_number (c
));
680 else if (! NILP (XCAR (range
)))
682 XSETCDR (range
, make_number (c
- 1));
684 (*c_function
) (arg
, range
);
686 call2 (function
, range
, arg
);
687 XSETCAR (range
, Qnil
);
691 if (c
== temp_charset_work
->max_char
)
693 if (! NILP (XCAR (range
)))
695 XSETCDR (range
, make_number (c
));
697 (*c_function
) (arg
, range
);
699 call2 (function
, range
, arg
);
704 stop
= temp_charset_work
->max_char
;
711 map_charset_chars (void (*c_function
)(Lisp_Object
, Lisp_Object
), Lisp_Object function
,
712 Lisp_Object arg
, struct charset
*charset
, unsigned from
, unsigned to
)
715 bool partial
= (from
> CHARSET_MIN_CODE (charset
)
716 || to
< CHARSET_MAX_CODE (charset
));
718 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
720 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
721 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
722 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
723 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
725 if (CHARSET_UNIFIED_P (charset
))
727 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
728 load_charset (charset
, 2);
729 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
730 map_char_table_for_charset (c_function
, function
,
731 CHARSET_DEUNIFIER (charset
), arg
,
732 partial
? charset
: NULL
, from
, to
);
734 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
737 range
= Fcons (make_number (from_c
), make_number (to_c
));
739 (*c_function
) (arg
, range
);
741 call2 (function
, range
, arg
);
743 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
745 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
746 load_charset (charset
, 2);
747 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
748 map_char_table_for_charset (c_function
, function
,
749 CHARSET_ENCODER (charset
), arg
,
750 partial
? charset
: NULL
, from
, to
);
752 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
754 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
756 Lisp_Object subset_info
;
759 subset_info
= CHARSET_SUBSET (charset
);
760 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
761 offset
= XINT (AREF (subset_info
, 3));
763 if (from
< XFASTINT (AREF (subset_info
, 1)))
764 from
= XFASTINT (AREF (subset_info
, 1));
766 if (to
> XFASTINT (AREF (subset_info
, 2)))
767 to
= XFASTINT (AREF (subset_info
, 2));
768 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
770 else /* i.e. CHARSET_METHOD_SUPERSET */
774 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
775 parents
= XCDR (parents
))
778 unsigned this_from
, this_to
;
780 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
781 offset
= XINT (XCDR (XCAR (parents
)));
782 this_from
= from
> offset
? from
- offset
: 0;
783 this_to
= to
> offset
? to
- offset
: 0;
784 if (this_from
< CHARSET_MIN_CODE (charset
))
785 this_from
= CHARSET_MIN_CODE (charset
);
786 if (this_to
> CHARSET_MAX_CODE (charset
))
787 this_to
= CHARSET_MAX_CODE (charset
);
788 map_charset_chars (c_function
, function
, arg
, charset
,
794 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
795 doc
: /* Call FUNCTION for all characters in CHARSET.
796 FUNCTION is called with an argument RANGE and the optional 3rd
799 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
800 characters contained in CHARSET.
802 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
803 range of code points (in CHARSET) of target characters. */)
804 (Lisp_Object function
, Lisp_Object charset
, Lisp_Object arg
, Lisp_Object from_code
, Lisp_Object to_code
)
809 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
810 if (NILP (from_code
))
811 from
= CHARSET_MIN_CODE (cs
);
814 from
= XINT (from_code
);
815 if (from
< CHARSET_MIN_CODE (cs
))
816 from
= CHARSET_MIN_CODE (cs
);
819 to
= CHARSET_MAX_CODE (cs
);
823 if (to
> CHARSET_MAX_CODE (cs
))
824 to
= CHARSET_MAX_CODE (cs
);
826 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
831 /* Define a charset according to the arguments. The Nth argument is
832 the Nth attribute of the charset (the last attribute `charset-id'
833 is not included). See the docstring of `define-charset' for the
836 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
837 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
838 doc
: /* For internal use only.
839 usage: (define-charset-internal ...) */)
840 (ptrdiff_t nargs
, Lisp_Object
*args
)
842 /* Charset attr vector. */
845 EMACS_UINT hash_code
;
846 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
848 struct charset charset
;
851 bool new_definition_p
;
854 if (nargs
!= charset_arg_max
)
855 Fsignal (Qwrong_number_of_arguments
,
856 Fcons (intern ("define-charset-internal"),
857 make_number (nargs
)));
859 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
861 CHECK_SYMBOL (args
[charset_arg_name
]);
862 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
864 val
= args
[charset_arg_code_space
];
865 for (i
= 0, dimension
= 0, nchars
= 1; ; i
++)
867 Lisp_Object min_byte_obj
, max_byte_obj
;
868 int min_byte
, max_byte
;
870 min_byte_obj
= Faref (val
, make_number (i
* 2));
871 max_byte_obj
= Faref (val
, make_number (i
* 2 + 1));
872 CHECK_RANGED_INTEGER (min_byte_obj
, 0, 255);
873 min_byte
= XINT (min_byte_obj
);
874 CHECK_RANGED_INTEGER (max_byte_obj
, min_byte
, 255);
875 max_byte
= XINT (max_byte_obj
);
876 charset
.code_space
[i
* 4] = min_byte
;
877 charset
.code_space
[i
* 4 + 1] = max_byte
;
878 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
883 nchars
*= charset
.code_space
[i
* 4 + 2];
884 charset
.code_space
[i
* 4 + 3] = nchars
;
887 val
= args
[charset_arg_dimension
];
889 charset
.dimension
= dimension
;
892 CHECK_RANGED_INTEGER (val
, 1, 4);
893 charset
.dimension
= XINT (val
);
896 charset
.code_linear_p
897 = (charset
.dimension
== 1
898 || (charset
.code_space
[2] == 256
899 && (charset
.dimension
== 2
900 || (charset
.code_space
[6] == 256
901 && (charset
.dimension
== 3
902 || charset
.code_space
[10] == 256)))));
904 if (! charset
.code_linear_p
)
906 charset
.code_space_mask
= xzalloc (256);
907 for (i
= 0; i
< 4; i
++)
908 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
910 charset
.code_space_mask
[j
] |= (1 << i
);
913 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
915 charset
.min_code
= (charset
.code_space
[0]
916 | (charset
.code_space
[4] << 8)
917 | (charset
.code_space
[8] << 16)
918 | ((unsigned) charset
.code_space
[12] << 24));
919 charset
.max_code
= (charset
.code_space
[1]
920 | (charset
.code_space
[5] << 8)
921 | (charset
.code_space
[9] << 16)
922 | ((unsigned) charset
.code_space
[13] << 24));
923 charset
.char_index_offset
= 0;
925 val
= args
[charset_arg_min_code
];
928 unsigned code
= cons_to_unsigned (val
, UINT_MAX
);
930 if (code
< charset
.min_code
931 || code
> charset
.max_code
)
932 args_out_of_range_3 (make_fixnum_or_float (charset
.min_code
),
933 make_fixnum_or_float (charset
.max_code
), val
);
934 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
935 charset
.min_code
= code
;
938 val
= args
[charset_arg_max_code
];
941 unsigned code
= cons_to_unsigned (val
, UINT_MAX
);
943 if (code
< charset
.min_code
944 || code
> charset
.max_code
)
945 args_out_of_range_3 (make_fixnum_or_float (charset
.min_code
),
946 make_fixnum_or_float (charset
.max_code
), val
);
947 charset
.max_code
= code
;
950 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
952 val
= args
[charset_arg_invalid_code
];
955 if (charset
.min_code
> 0)
956 charset
.invalid_code
= 0;
959 if (charset
.max_code
< UINT_MAX
)
960 charset
.invalid_code
= charset
.max_code
+ 1;
962 error ("Attribute :invalid-code must be specified");
966 charset
.invalid_code
= cons_to_unsigned (val
, UINT_MAX
);
968 val
= args
[charset_arg_iso_final
];
970 charset
.iso_final
= -1;
974 if (XINT (val
) < '0' || XINT (val
) > 127)
975 error ("Invalid iso-final-char: %"pI
"d", XINT (val
));
976 charset
.iso_final
= XINT (val
);
979 val
= args
[charset_arg_iso_revision
];
981 charset
.iso_revision
= -1;
984 CHECK_RANGED_INTEGER (val
, -1, 63);
985 charset
.iso_revision
= XINT (val
);
988 val
= args
[charset_arg_emacs_mule_id
];
990 charset
.emacs_mule_id
= -1;
994 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
995 error ("Invalid emacs-mule-id: %"pI
"d", XINT (val
));
996 charset
.emacs_mule_id
= XINT (val
);
999 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1001 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1003 charset
.unified_p
= 0;
1005 memset (charset
.fast_map
, 0, sizeof (charset
.fast_map
));
1007 if (! NILP (args
[charset_arg_code_offset
]))
1009 val
= args
[charset_arg_code_offset
];
1010 CHECK_CHARACTER (val
);
1012 charset
.method
= CHARSET_METHOD_OFFSET
;
1013 charset
.code_offset
= XINT (val
);
1015 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1016 if (MAX_CHAR
- charset
.code_offset
< i
)
1017 error ("Unsupported max char: %d", charset
.max_char
);
1018 charset
.max_char
= i
+ charset
.code_offset
;
1019 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1020 charset
.min_char
= i
+ charset
.code_offset
;
1022 i
= (charset
.min_char
>> 7) << 7;
1023 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1024 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1025 i
= (i
>> 12) << 12;
1026 for (; i
<= charset
.max_char
; i
+= 0x1000)
1027 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1028 if (charset
.code_offset
== 0 && charset
.max_char
>= 0x80)
1029 charset
.ascii_compatible_p
= 1;
1031 else if (! NILP (args
[charset_arg_map
]))
1033 val
= args
[charset_arg_map
];
1034 ASET (attrs
, charset_map
, val
);
1035 charset
.method
= CHARSET_METHOD_MAP
;
1037 else if (! NILP (args
[charset_arg_subset
]))
1040 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1041 struct charset
*parent_charset
;
1043 val
= args
[charset_arg_subset
];
1044 parent
= Fcar (val
);
1045 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1046 parent_min_code
= Fnth (make_number (1), val
);
1047 CHECK_NATNUM (parent_min_code
);
1048 parent_max_code
= Fnth (make_number (2), val
);
1049 CHECK_NATNUM (parent_max_code
);
1050 parent_code_offset
= Fnth (make_number (3), val
);
1051 CHECK_NUMBER (parent_code_offset
);
1052 val
= make_uninit_vector (4);
1053 ASET (val
, 0, make_number (parent_charset
->id
));
1054 ASET (val
, 1, parent_min_code
);
1055 ASET (val
, 2, parent_max_code
);
1056 ASET (val
, 3, parent_code_offset
);
1057 ASET (attrs
, charset_subset
, val
);
1059 charset
.method
= CHARSET_METHOD_SUBSET
;
1060 /* Here, we just copy the parent's fast_map. It's not accurate,
1061 but at least it works for quickly detecting which character
1062 DOESN'T belong to this charset. */
1063 memcpy (charset
.fast_map
, parent_charset
->fast_map
,
1064 sizeof charset
.fast_map
);
1066 /* We also copy these for parents. */
1067 charset
.min_char
= parent_charset
->min_char
;
1068 charset
.max_char
= parent_charset
->max_char
;
1070 else if (! NILP (args
[charset_arg_superset
]))
1072 val
= args
[charset_arg_superset
];
1073 charset
.method
= CHARSET_METHOD_SUPERSET
;
1074 val
= Fcopy_sequence (val
);
1075 ASET (attrs
, charset_superset
, val
);
1077 charset
.min_char
= MAX_CHAR
;
1078 charset
.max_char
= 0;
1079 for (; ! NILP (val
); val
= Fcdr (val
))
1081 Lisp_Object elt
, car_part
, cdr_part
;
1082 int this_id
, offset
;
1083 struct charset
*this_charset
;
1088 car_part
= XCAR (elt
);
1089 cdr_part
= XCDR (elt
);
1090 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1091 CHECK_TYPE_RANGED_INTEGER (int, cdr_part
);
1092 offset
= XINT (cdr_part
);
1096 CHECK_CHARSET_GET_ID (elt
, this_id
);
1099 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1101 this_charset
= CHARSET_FROM_ID (this_id
);
1102 if (charset
.min_char
> this_charset
->min_char
)
1103 charset
.min_char
= this_charset
->min_char
;
1104 if (charset
.max_char
< this_charset
->max_char
)
1105 charset
.max_char
= this_charset
->max_char
;
1106 for (i
= 0; i
< 190; i
++)
1107 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1111 error ("None of :code-offset, :map, :parents are specified");
1113 val
= args
[charset_arg_unify_map
];
1114 if (! NILP (val
) && !STRINGP (val
))
1116 ASET (attrs
, charset_unify_map
, val
);
1118 CHECK_LIST (args
[charset_arg_plist
]);
1119 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1121 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1123 if (charset
.hash_index
>= 0)
1125 new_definition_p
= 0;
1126 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1127 set_hash_value_slot (hash_table
, charset
.hash_index
, attrs
);
1131 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1133 if (charset_table_used
== charset_table_size
)
1135 /* Ensure that charset IDs fit into 'int' as well as into the
1136 restriction imposed by fixnums. Although the 'int' restriction
1137 could be removed, too much other code would need altering; for
1138 example, the IDs are stuffed into struct
1139 coding_system.charbuf[i] entries, which are 'int'. */
1140 int old_size
= charset_table_size
;
1141 ptrdiff_t new_size
= old_size
;
1142 struct charset
*new_table
=
1143 xpalloc (0, &new_size
, 1,
1144 min (INT_MAX
, MOST_POSITIVE_FIXNUM
),
1145 sizeof *charset_table
);
1146 memcpy (new_table
, charset_table
, old_size
* sizeof *new_table
);
1147 charset_table
= new_table
;
1148 charset_table_size
= new_size
;
1149 /* FIXME: This leaks memory, as the old charset_table becomes
1150 unreachable. If the old charset table is charset_table_init
1151 then this leak is intentional; otherwise, it's unclear.
1152 If the latter memory leak is intentional, a
1153 comment should be added to explain this. If not, the old
1154 charset_table should be freed, by passing it as the 1st argument
1155 to xpalloc and removing the memcpy. */
1157 id
= charset_table_used
++;
1158 new_definition_p
= 1;
1161 ASET (attrs
, charset_id
, make_number (id
));
1163 charset_table
[id
] = charset
;
1165 if (charset
.method
== CHARSET_METHOD_MAP
)
1167 load_charset (&charset
, 0);
1168 charset_table
[id
] = charset
;
1171 if (charset
.iso_final
>= 0)
1173 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1174 charset
.iso_final
) = id
;
1175 if (new_definition_p
)
1176 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1177 list1 (make_number (id
)));
1178 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1179 charset_jisx0201_roman
= id
;
1180 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1181 charset_jisx0208_1978
= id
;
1182 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1183 charset_jisx0208
= id
;
1184 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id
)
1185 charset_ksc5601
= id
;
1188 if (charset
.emacs_mule_id
>= 0)
1190 emacs_mule_charset
[charset
.emacs_mule_id
] = id
;
1191 if (charset
.emacs_mule_id
< 0xA0)
1192 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1194 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1195 if (new_definition_p
)
1196 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1197 list1 (make_number (id
)));
1200 if (new_definition_p
)
1202 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1203 if (charset
.supplementary_p
)
1204 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1205 list1 (make_number (id
)));
1210 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1212 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1214 if (cs
->supplementary_p
)
1217 if (EQ (tail
, Vcharset_ordered_list
))
1218 Vcharset_ordered_list
= Fcons (make_number (id
),
1219 Vcharset_ordered_list
);
1220 else if (NILP (tail
))
1221 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1222 list1 (make_number (id
)));
1225 val
= Fcons (XCAR (tail
), XCDR (tail
));
1226 XSETCDR (tail
, val
);
1227 XSETCAR (tail
, make_number (id
));
1230 charset_ordered_list_tick
++;
1237 /* Same as Fdefine_charset_internal but arguments are more convenient
1238 to call from C (typically in syms_of_charset). This can define a
1239 charset of `offset' method only. Return the ID of the new
1243 define_charset_internal (Lisp_Object name
,
1245 const char *code_space_chars
,
1246 unsigned min_code
, unsigned max_code
,
1247 int iso_final
, int iso_revision
, int emacs_mule_id
,
1248 bool ascii_compatible
, bool supplementary
,
1251 const unsigned char *code_space
= (const unsigned char *) code_space_chars
;
1252 Lisp_Object args
[charset_arg_max
];
1256 args
[charset_arg_name
] = name
;
1257 args
[charset_arg_dimension
] = make_number (dimension
);
1258 val
= make_uninit_vector (8);
1259 for (i
= 0; i
< 8; i
++)
1260 ASET (val
, i
, make_number (code_space
[i
]));
1261 args
[charset_arg_code_space
] = val
;
1262 args
[charset_arg_min_code
] = make_number (min_code
);
1263 args
[charset_arg_max_code
] = make_number (max_code
);
1264 args
[charset_arg_iso_final
]
1265 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1266 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1267 args
[charset_arg_emacs_mule_id
]
1268 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1269 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1270 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1271 args
[charset_arg_invalid_code
] = Qnil
;
1272 args
[charset_arg_code_offset
] = make_number (code_offset
);
1273 args
[charset_arg_map
] = Qnil
;
1274 args
[charset_arg_subset
] = Qnil
;
1275 args
[charset_arg_superset
] = Qnil
;
1276 args
[charset_arg_unify_map
] = Qnil
;
1278 args
[charset_arg_plist
] =
1279 listn (CONSTYPE_HEAP
, 14,
1281 args
[charset_arg_name
],
1282 intern_c_string (":dimension"),
1283 args
[charset_arg_dimension
],
1284 intern_c_string (":code-space"),
1285 args
[charset_arg_code_space
],
1286 intern_c_string (":iso-final-char"),
1287 args
[charset_arg_iso_final
],
1288 intern_c_string (":emacs-mule-id"),
1289 args
[charset_arg_emacs_mule_id
],
1290 QCascii_compatible_p
,
1291 args
[charset_arg_ascii_compatible_p
],
1292 intern_c_string (":code-offset"),
1293 args
[charset_arg_code_offset
]);
1294 Fdefine_charset_internal (charset_arg_max
, args
);
1296 return XINT (CHARSET_SYMBOL_ID (name
));
1300 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1301 Sdefine_charset_alias
, 2, 2, 0,
1302 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1303 (Lisp_Object alias
, Lisp_Object charset
)
1307 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1308 Fputhash (alias
, attr
, Vcharset_hash_table
);
1309 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1314 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1315 doc
: /* Return the property list of CHARSET. */)
1316 (Lisp_Object charset
)
1320 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1321 return CHARSET_ATTR_PLIST (attrs
);
1325 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1326 doc
: /* Set CHARSET's property list to PLIST. */)
1327 (Lisp_Object charset
, Lisp_Object plist
)
1331 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1332 ASET (attrs
, charset_plist
, plist
);
1337 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1338 doc
: /* Unify characters of CHARSET with Unicode.
1339 This means reading the relevant file and installing the table defined
1340 by CHARSET's `:unify-map' property.
1342 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1343 the same meaning as the `:unify-map' attribute in the function
1344 `define-charset' (which see).
1346 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1347 (Lisp_Object charset
, Lisp_Object unify_map
, Lisp_Object deunify
)
1352 CHECK_CHARSET_GET_ID (charset
, id
);
1353 cs
= CHARSET_FROM_ID (id
);
1355 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1356 : ! CHARSET_UNIFIED_P (cs
))
1359 CHARSET_UNIFIED_P (cs
) = 0;
1362 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1363 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1364 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1365 if (NILP (unify_map
))
1366 unify_map
= CHARSET_UNIFY_MAP (cs
);
1369 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1370 signal_error ("Bad unify-map", unify_map
);
1371 set_charset_attr (cs
, charset_unify_map
, unify_map
);
1373 if (NILP (Vchar_unify_table
))
1374 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1375 char_table_set_range (Vchar_unify_table
,
1376 cs
->min_char
, cs
->max_char
, charset
);
1377 CHARSET_UNIFIED_P (cs
) = 1;
1379 else if (CHAR_TABLE_P (Vchar_unify_table
))
1381 unsigned min_code
= CHARSET_MIN_CODE (cs
);
1382 unsigned max_code
= CHARSET_MAX_CODE (cs
);
1383 int min_char
= DECODE_CHAR (cs
, min_code
);
1384 int max_char
= DECODE_CHAR (cs
, max_code
);
1386 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1392 /* Check that DIMENSION, CHARS, and FINAL_CHAR specify a valid ISO charset.
1393 Return true if it's a 96-character set, false if 94. */
1396 check_iso_charset_parameter (Lisp_Object dimension
, Lisp_Object chars
,
1397 Lisp_Object final_char
)
1399 CHECK_NUMBER (dimension
);
1400 CHECK_NUMBER (chars
);
1401 CHECK_CHARACTER (final_char
);
1403 if (! (1 <= XINT (dimension
) && XINT (dimension
) <= 3))
1404 error ("Invalid DIMENSION %"pI
"d, it should be 1, 2, or 3",
1407 bool chars_flag
= XINT (chars
) == 96;
1408 if (! (chars_flag
|| XINT (chars
) == 94))
1409 error ("Invalid CHARS %"pI
"d, it should be 94 or 96", XINT (chars
));
1411 int final_ch
= XFASTINT (final_char
);
1412 if (! ('0' <= final_ch
&& final_ch
<= '~'))
1413 error ("Invalid FINAL-CHAR `%c', it should be `0'..`~'", final_ch
);
1418 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1419 Sget_unused_iso_final_char
, 2, 2, 0,
1421 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1422 DIMENSION is the number of bytes to represent a character: 1 or 2.
1423 CHARS is the number of characters in a dimension: 94 or 96.
1425 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1426 If there's no unused final char for the specified kind of charset,
1428 (Lisp_Object dimension
, Lisp_Object chars
)
1430 bool chars_flag
= check_iso_charset_parameter (dimension
, chars
,
1432 for (int final_char
= '0'; final_char
<= '?'; final_char
++)
1433 if (ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, final_char
) < 0)
1434 return make_number (final_char
);
1439 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1441 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1443 On decoding by an ISO-2022 base coding system, when a charset
1444 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1445 if CHARSET is designated instead. */)
1446 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
, Lisp_Object charset
)
1450 CHECK_CHARSET_GET_ID (charset
, id
);
1451 bool chars_flag
= check_iso_charset_parameter (dimension
, chars
, final_char
);
1452 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XFASTINT (final_char
)) = id
;
1457 /* Return information about charsets in the text at PTR of NBYTES
1458 bytes, which are NCHARS characters. The value is:
1460 0: Each character is represented by one byte. This is always
1461 true for a unibyte string. For a multibyte string, true if
1462 it contains only ASCII characters.
1464 1: No charsets other than ascii, control-1, and latin-1 are
1471 string_xstring_p (Lisp_Object string
)
1473 const unsigned char *p
= SDATA (string
);
1474 const unsigned char *endp
= p
+ SBYTES (string
);
1476 if (SCHARS (string
) == SBYTES (string
))
1481 int c
= STRING_CHAR_ADVANCE (p
);
1490 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1492 CHARSETS is a vector. If Nth element is non-nil, it means the
1493 charset whose id is N is already found.
1495 It may lookup a translation table TABLE if supplied. */
1498 find_charsets_in_text (const unsigned char *ptr
, ptrdiff_t nchars
,
1499 ptrdiff_t nbytes
, Lisp_Object charsets
,
1500 Lisp_Object table
, bool multibyte
)
1502 const unsigned char *pend
= ptr
+ nbytes
;
1504 if (nchars
== nbytes
)
1507 ASET (charsets
, charset_ascii
, Qt
);
1514 c
= translate_char (table
, c
);
1515 if (ASCII_CHAR_P (c
))
1516 ASET (charsets
, charset_ascii
, Qt
);
1518 ASET (charsets
, charset_eight_bit
, Qt
);
1525 int c
= STRING_CHAR_ADVANCE (ptr
);
1526 struct charset
*charset
;
1529 c
= translate_char (table
, c
);
1530 charset
= CHAR_CHARSET (c
);
1531 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1536 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1538 doc
: /* Return a list of charsets in the region between BEG and END.
1539 BEG and END are buffer positions.
1540 Optional arg TABLE if non-nil is a translation table to look up.
1542 If the current buffer is unibyte, the returned list may contain
1543 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1544 (Lisp_Object beg
, Lisp_Object end
, Lisp_Object table
)
1546 Lisp_Object charsets
;
1547 ptrdiff_t from
, from_byte
, to
, stop
, stop_byte
;
1550 bool multibyte
= ! NILP (BVAR (current_buffer
, enable_multibyte_characters
));
1552 validate_region (&beg
, &end
);
1553 from
= XFASTINT (beg
);
1554 stop
= to
= XFASTINT (end
);
1556 if (from
< GPT
&& GPT
< to
)
1559 stop_byte
= GPT_BYTE
;
1562 stop_byte
= CHAR_TO_BYTE (stop
);
1564 from_byte
= CHAR_TO_BYTE (from
);
1566 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1569 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1570 stop_byte
- from_byte
, charsets
, table
,
1574 from
= stop
, from_byte
= stop_byte
;
1575 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1582 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1583 if (!NILP (AREF (charsets
, i
)))
1584 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1588 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1590 doc
: /* Return a list of charsets in STR.
1591 Optional arg TABLE if non-nil is a translation table to look up.
1593 If STR is unibyte, the returned list may contain
1594 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1595 (Lisp_Object str
, Lisp_Object table
)
1597 Lisp_Object charsets
;
1603 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1604 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1606 STRING_MULTIBYTE (str
));
1608 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1609 if (!NILP (AREF (charsets
, i
)))
1610 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1616 /* Return a unified character code for C (>= 0x110000). VAL is a
1617 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1620 maybe_unify_char (int c
, Lisp_Object val
)
1622 struct charset
*charset
;
1625 return XFASTINT (val
);
1629 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1631 /* The call to load_charset below can allocate memory, which screws
1632 callers of this function through STRING_CHAR_* macros that hold C
1633 pointers to buffer text, if REL_ALLOC is used. */
1634 r_alloc_inhibit_buffer_relocation (1);
1636 load_charset (charset
, 1);
1637 if (! inhibit_load_charset_map
)
1639 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1645 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1646 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1652 r_alloc_inhibit_buffer_relocation (0);
1658 /* Return a character corresponding to the code-point CODE of
1662 decode_char (struct charset
*charset
, unsigned int code
)
1665 enum charset_method method
= CHARSET_METHOD (charset
);
1667 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1670 if (method
== CHARSET_METHOD_SUBSET
)
1672 Lisp_Object subset_info
;
1674 subset_info
= CHARSET_SUBSET (charset
);
1675 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1676 code
-= XINT (AREF (subset_info
, 3));
1677 if (code
< XFASTINT (AREF (subset_info
, 1))
1678 || code
> XFASTINT (AREF (subset_info
, 2)))
1681 c
= DECODE_CHAR (charset
, code
);
1683 else if (method
== CHARSET_METHOD_SUPERSET
)
1685 Lisp_Object parents
;
1687 parents
= CHARSET_SUPERSET (charset
);
1689 for (; CONSP (parents
); parents
= XCDR (parents
))
1691 int id
= XINT (XCAR (XCAR (parents
)));
1692 int code_offset
= XINT (XCDR (XCAR (parents
)));
1693 unsigned this_code
= code
- code_offset
;
1695 charset
= CHARSET_FROM_ID (id
);
1696 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1702 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1706 if (method
== CHARSET_METHOD_MAP
)
1708 Lisp_Object decoder
;
1710 decoder
= CHARSET_DECODER (charset
);
1711 if (! VECTORP (decoder
))
1713 load_charset (charset
, 1);
1714 decoder
= CHARSET_DECODER (charset
);
1716 if (VECTORP (decoder
))
1717 c
= XINT (AREF (decoder
, char_index
));
1719 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1721 else /* method == CHARSET_METHOD_OFFSET */
1723 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1724 if (CHARSET_UNIFIED_P (charset
)
1725 && MAX_UNICODE_CHAR
< c
&& c
<= MAX_5_BYTE_CHAR
)
1727 /* Unify C with a Unicode character if possible. */
1728 Lisp_Object val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1729 c
= maybe_unify_char (c
, val
);
1737 /* Variable used temporarily by the macro ENCODE_CHAR. */
1738 Lisp_Object charset_work
;
1740 /* Return a code-point of C in CHARSET. If C doesn't belong to
1741 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1742 use CHARSET's strict_max_char instead of max_char. */
1745 encode_char (struct charset
*charset
, int c
)
1748 enum charset_method method
= CHARSET_METHOD (charset
);
1750 if (CHARSET_UNIFIED_P (charset
))
1752 Lisp_Object deunifier
;
1753 int code_index
= -1;
1755 deunifier
= CHARSET_DEUNIFIER (charset
);
1756 if (! CHAR_TABLE_P (deunifier
))
1758 load_charset (charset
, 2);
1759 deunifier
= CHARSET_DEUNIFIER (charset
);
1761 if (CHAR_TABLE_P (deunifier
))
1763 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1765 if (INTEGERP (deunified
))
1766 code_index
= XINT (deunified
);
1770 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1772 if (code_index
>= 0)
1773 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1776 if (method
== CHARSET_METHOD_SUBSET
)
1778 Lisp_Object subset_info
;
1779 struct charset
*this_charset
;
1781 subset_info
= CHARSET_SUBSET (charset
);
1782 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1783 code
= ENCODE_CHAR (this_charset
, c
);
1784 if (code
== CHARSET_INVALID_CODE (this_charset
)
1785 || code
< XFASTINT (AREF (subset_info
, 1))
1786 || code
> XFASTINT (AREF (subset_info
, 2)))
1787 return CHARSET_INVALID_CODE (charset
);
1788 code
+= XINT (AREF (subset_info
, 3));
1792 if (method
== CHARSET_METHOD_SUPERSET
)
1794 Lisp_Object parents
;
1796 parents
= CHARSET_SUPERSET (charset
);
1797 for (; CONSP (parents
); parents
= XCDR (parents
))
1799 int id
= XINT (XCAR (XCAR (parents
)));
1800 int code_offset
= XINT (XCDR (XCAR (parents
)));
1801 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1803 code
= ENCODE_CHAR (this_charset
, c
);
1804 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1805 return code
+ code_offset
;
1807 return CHARSET_INVALID_CODE (charset
);
1810 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1811 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1812 return CHARSET_INVALID_CODE (charset
);
1814 if (method
== CHARSET_METHOD_MAP
)
1816 Lisp_Object encoder
;
1819 encoder
= CHARSET_ENCODER (charset
);
1820 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1822 load_charset (charset
, 2);
1823 encoder
= CHARSET_ENCODER (charset
);
1825 if (CHAR_TABLE_P (encoder
))
1827 val
= CHAR_TABLE_REF (encoder
, c
);
1829 return CHARSET_INVALID_CODE (charset
);
1831 if (! CHARSET_COMPACT_CODES_P (charset
))
1832 code
= INDEX_TO_CODE_POINT (charset
, code
);
1836 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1837 code
= INDEX_TO_CODE_POINT (charset
, code
);
1840 else /* method == CHARSET_METHOD_OFFSET */
1842 unsigned code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1844 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1851 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 2, 0,
1852 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1853 Return nil if CODE-POINT is not valid in CHARSET.
1855 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). */)
1856 (Lisp_Object charset
, Lisp_Object code_point
)
1860 struct charset
*charsetp
;
1862 CHECK_CHARSET_GET_ID (charset
, id
);
1863 code
= cons_to_unsigned (code_point
, UINT_MAX
);
1864 charsetp
= CHARSET_FROM_ID (id
);
1865 c
= DECODE_CHAR (charsetp
, code
);
1866 return (c
>= 0 ? make_number (c
) : Qnil
);
1870 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 2, 0,
1871 doc
: /* Encode the character CH into a code-point of CHARSET.
1872 Return nil if CHARSET doesn't include CH. */)
1873 (Lisp_Object ch
, Lisp_Object charset
)
1877 struct charset
*charsetp
;
1879 CHECK_CHARSET_GET_ID (charset
, id
);
1880 CHECK_CHARACTER (ch
);
1882 charsetp
= CHARSET_FROM_ID (id
);
1883 code
= ENCODE_CHAR (charsetp
, c
);
1884 if (code
== CHARSET_INVALID_CODE (charsetp
))
1886 return INTEGER_TO_CONS (code
);
1890 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1892 /* Return a character of CHARSET whose position codes are CODEn.
1894 CODE1 through CODE4 are optional, but if you don't supply sufficient
1895 position codes, it is assumed that the minimum code in each dimension
1897 (Lisp_Object charset
, Lisp_Object code1
, Lisp_Object code2
, Lisp_Object code3
, Lisp_Object code4
)
1900 struct charset
*charsetp
;
1904 CHECK_CHARSET_GET_ID (charset
, id
);
1905 charsetp
= CHARSET_FROM_ID (id
);
1907 dimension
= CHARSET_DIMENSION (charsetp
);
1909 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1910 ? 0 : CHARSET_MIN_CODE (charsetp
));
1913 CHECK_NATNUM (code1
);
1914 if (XFASTINT (code1
) >= 0x100)
1915 args_out_of_range (make_number (0xFF), code1
);
1916 code
= XFASTINT (code1
);
1922 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1925 CHECK_NATNUM (code2
);
1926 if (XFASTINT (code2
) >= 0x100)
1927 args_out_of_range (make_number (0xFF), code2
);
1928 code
|= XFASTINT (code2
);
1935 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1938 CHECK_NATNUM (code3
);
1939 if (XFASTINT (code3
) >= 0x100)
1940 args_out_of_range (make_number (0xFF), code3
);
1941 code
|= XFASTINT (code3
);
1948 code
|= charsetp
->code_space
[0];
1951 CHECK_NATNUM (code4
);
1952 if (XFASTINT (code4
) >= 0x100)
1953 args_out_of_range (make_number (0xFF), code4
);
1954 code
|= XFASTINT (code4
);
1961 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
1963 c
= DECODE_CHAR (charsetp
, code
);
1965 error ("Invalid code(s)");
1966 return make_number (c
);
1970 /* Return the first charset in CHARSET_LIST that contains C.
1971 CHARSET_LIST is a list of charset IDs. If it is nil, use
1972 Vcharset_ordered_list. */
1975 char_charset (int c
, Lisp_Object charset_list
, unsigned int *code_return
)
1977 bool maybe_null
= 0;
1979 if (NILP (charset_list
))
1980 charset_list
= Vcharset_ordered_list
;
1984 while (CONSP (charset_list
))
1986 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
1987 unsigned code
= ENCODE_CHAR (charset
, c
);
1989 if (code
!= CHARSET_INVALID_CODE (charset
))
1992 *code_return
= code
;
1995 charset_list
= XCDR (charset_list
);
1997 && c
<= MAX_UNICODE_CHAR
1998 && EQ (charset_list
, Vcharset_non_preferred_head
))
1999 return CHARSET_FROM_ID (charset_unicode
);
2001 return (maybe_null
? NULL
2002 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2003 : CHARSET_FROM_ID (charset_eight_bit
));
2007 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2009 /*Return list of charset and one to four position-codes of CH.
2010 The charset is decided by the current priority order of charsets.
2011 A position-code is a byte value of each dimension of the code-point of
2012 CH in the charset. */)
2015 struct charset
*charset
;
2020 CHECK_CHARACTER (ch
);
2022 charset
= CHAR_CHARSET (c
);
2025 code
= ENCODE_CHAR (charset
, c
);
2026 if (code
== CHARSET_INVALID_CODE (charset
))
2028 dimension
= CHARSET_DIMENSION (charset
);
2029 for (val
= Qnil
; dimension
> 0; dimension
--)
2031 val
= Fcons (make_number (code
& 0xFF), val
);
2034 return Fcons (CHARSET_NAME (charset
), val
);
2038 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2039 doc
: /* Return the charset of highest priority that contains CH.
2040 ASCII characters are an exception: for them, this function always
2042 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2043 from which to find the charset. It may also be a coding system. In
2044 that case, find the charset from what supported by that coding system. */)
2045 (Lisp_Object ch
, Lisp_Object restriction
)
2047 struct charset
*charset
;
2049 CHECK_CHARACTER (ch
);
2050 if (NILP (restriction
))
2051 charset
= CHAR_CHARSET (XINT (ch
));
2054 if (CONSP (restriction
))
2056 int c
= XFASTINT (ch
);
2058 for (; CONSP (restriction
); restriction
= XCDR (restriction
))
2060 struct charset
*rcharset
;
2062 CHECK_CHARSET_GET_CHARSET (XCAR (restriction
), rcharset
);
2063 if (ENCODE_CHAR (rcharset
, c
) != CHARSET_INVALID_CODE (rcharset
))
2064 return XCAR (restriction
);
2068 restriction
= coding_system_charset_list (restriction
);
2069 charset
= char_charset (XINT (ch
), restriction
, NULL
);
2073 return (CHARSET_NAME (charset
));
2077 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2079 Return charset of a character in the current buffer at position POS.
2080 If POS is nil, it defaults to the current point.
2081 If POS is out of range, the value is nil. */)
2085 struct charset
*charset
;
2087 ch
= Fchar_after (pos
);
2088 if (! INTEGERP (ch
))
2090 charset
= CHAR_CHARSET (XINT (ch
));
2091 return (CHARSET_NAME (charset
));
2095 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2097 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2099 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2100 by their DIMENSION, CHARS, and FINAL-CHAR,
2101 whereas Emacs distinguishes them by charset symbol.
2102 See the documentation of the function `charset-info' for the meanings of
2103 DIMENSION, CHARS, and FINAL-CHAR. */)
2104 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
)
2106 bool chars_flag
= check_iso_charset_parameter (dimension
, chars
, final_char
);
2107 int id
= ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
,
2108 XFASTINT (final_char
));
2109 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2113 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2117 Clear temporary charset mapping tables.
2118 It should be called only from temacs invoked for dumping. */)
2121 if (temp_charset_work
)
2123 xfree (temp_charset_work
);
2124 temp_charset_work
= NULL
;
2127 if (CHAR_TABLE_P (Vchar_unify_table
))
2128 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2133 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2134 Scharset_priority_list
, 0, 1, 0,
2135 doc
: /* Return the list of charsets ordered by priority.
2136 HIGHESTP non-nil means just return the highest priority one. */)
2137 (Lisp_Object highestp
)
2139 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2141 if (!NILP (highestp
))
2142 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2144 while (!NILP (list
))
2146 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2149 return Fnreverse (val
);
2152 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2154 doc
: /* Assign higher priority to the charsets given as arguments.
2155 usage: (set-charset-priority &rest charsets) */)
2156 (ptrdiff_t nargs
, Lisp_Object
*args
)
2158 Lisp_Object new_head
, old_list
;
2159 Lisp_Object list_2022
, list_emacs_mule
;
2163 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2165 for (i
= 0; i
< nargs
; i
++)
2167 CHECK_CHARSET_GET_ID (args
[i
], id
);
2168 if (! NILP (Fmemq (make_number (id
), old_list
)))
2170 old_list
= Fdelq (make_number (id
), old_list
);
2171 new_head
= Fcons (make_number (id
), new_head
);
2174 Vcharset_non_preferred_head
= old_list
;
2175 Vcharset_ordered_list
= CALLN (Fnconc
, Fnreverse (new_head
), old_list
);
2177 charset_ordered_list_tick
++;
2179 charset_unibyte
= -1;
2180 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2181 CONSP (old_list
); old_list
= XCDR (old_list
))
2183 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2184 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2185 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2186 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2187 if (charset_unibyte
< 0)
2189 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (old_list
)));
2191 if (CHARSET_DIMENSION (charset
) == 1
2192 && CHARSET_ASCII_COMPATIBLE_P (charset
)
2193 && CHARSET_MAX_CHAR (charset
) >= 0x80)
2194 charset_unibyte
= CHARSET_ID (charset
);
2197 Viso_2022_charset_list
= Fnreverse (list_2022
);
2198 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2199 if (charset_unibyte
< 0)
2200 charset_unibyte
= charset_iso_8859_1
;
2205 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2207 doc
: /* Internal use only.
2208 Return charset identification number of CHARSET. */)
2209 (Lisp_Object charset
)
2213 CHECK_CHARSET_GET_ID (charset
, id
);
2214 return make_number (id
);
2217 struct charset_sort_data
2219 Lisp_Object charset
;
2225 charset_compare (const void *d1
, const void *d2
)
2227 const struct charset_sort_data
*data1
= d1
, *data2
= d2
;
2228 if (data1
->priority
!= data2
->priority
)
2229 return data1
->priority
< data2
->priority
? -1 : 1;
2233 DEFUN ("sort-charsets", Fsort_charsets
, Ssort_charsets
, 1, 1, 0,
2234 doc
: /* Sort charset list CHARSETS by a priority of each charset.
2235 Return the sorted list. CHARSETS is modified by side effects.
2236 See also `charset-priority-list' and `set-charset-priority'. */)
2237 (Lisp_Object charsets
)
2239 Lisp_Object len
= Flength (charsets
);
2240 ptrdiff_t n
= XFASTINT (len
), i
, j
;
2242 Lisp_Object tail
, elt
, attrs
;
2243 struct charset_sort_data
*sort_data
;
2244 int id
, min_id
= INT_MAX
, max_id
= INT_MIN
;
2249 SAFE_NALLOCA (sort_data
, 1, n
);
2250 for (tail
= charsets
, i
= 0; CONSP (tail
); tail
= XCDR (tail
), i
++)
2253 CHECK_CHARSET_GET_ATTR (elt
, attrs
);
2254 sort_data
[i
].charset
= elt
;
2255 sort_data
[i
].id
= id
= XINT (CHARSET_ATTR_ID (attrs
));
2261 for (done
= 0, tail
= Vcharset_ordered_list
, i
= 0;
2262 done
< n
&& CONSP (tail
); tail
= XCDR (tail
), i
++)
2265 id
= XFASTINT (elt
);
2266 if (id
>= min_id
&& id
<= max_id
)
2267 for (j
= 0; j
< n
; j
++)
2268 if (sort_data
[j
].id
== id
)
2270 sort_data
[j
].priority
= i
;
2274 qsort (sort_data
, n
, sizeof *sort_data
, charset_compare
);
2275 for (i
= 0, tail
= charsets
; CONSP (tail
); tail
= XCDR (tail
), i
++)
2276 XSETCAR (tail
, sort_data
[i
].charset
);
2285 Lisp_Object tempdir
;
2286 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2287 if (! file_accessible_directory_p (tempdir
))
2289 /* This used to be non-fatal (dir_warning), but it should not
2290 happen, and if it does sooner or later it will cause some
2291 obscure problem (eg bug#6401), so better abort. */
2292 fprintf (stderr
, "Error: charsets directory not found:\n\
2294 Emacs will not function correctly without the character map files.\n%s\
2295 Please check your installation!\n",
2297 egetenv("EMACSDATA") ? "The EMACSDATA environment \
2298 variable is set, maybe it has the wrong value?\n" : "");
2302 Vcharset_map_path
= list1 (tempdir
);
2307 init_charset_once (void)
2311 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2312 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2313 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2314 iso_charset_table
[i
][j
][k
] = -1;
2316 for (i
= 0; i
< 256; i
++)
2317 emacs_mule_charset
[i
] = -1;
2319 charset_jisx0201_roman
= -1;
2320 charset_jisx0208_1978
= -1;
2321 charset_jisx0208
= -1;
2322 charset_ksc5601
= -1;
2327 /* Allocate an initial charset table that is large enough to handle
2328 Emacs while it is bootstrapping. As of September 2011, the size
2329 needs to be at least 166; make it a bit bigger to allow for future
2332 Don't make the value so small that the table is reallocated during
2333 bootstrapping, as glibc malloc calls larger than just under 64 KiB
2334 during an initial bootstrap wreak havoc after dumping; see the
2335 M_MMAP_THRESHOLD value in alloc.c, plus there is an extra overhead
2336 internal to glibc malloc and perhaps to Emacs malloc debugging. */
2337 static struct charset charset_table_init
[180];
2340 syms_of_charset (void)
2342 DEFSYM (Qcharsetp
, "charsetp");
2344 /* Special charset symbols. */
2345 DEFSYM (Qascii
, "ascii");
2346 DEFSYM (Qunicode
, "unicode");
2347 DEFSYM (Qemacs
, "emacs");
2348 DEFSYM (Qeight_bit
, "eight-bit");
2349 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2351 staticpro (&Vcharset_ordered_list
);
2352 Vcharset_ordered_list
= Qnil
;
2354 staticpro (&Viso_2022_charset_list
);
2355 Viso_2022_charset_list
= Qnil
;
2357 staticpro (&Vemacs_mule_charset_list
);
2358 Vemacs_mule_charset_list
= Qnil
;
2360 staticpro (&Vcharset_hash_table
);
2361 Vcharset_hash_table
= CALLN (Fmake_hash_table
, QCtest
, Qeq
);
2363 charset_table
= charset_table_init
;
2364 charset_table_size
= ARRAYELTS (charset_table_init
);
2365 charset_table_used
= 0;
2367 defsubr (&Scharsetp
);
2368 defsubr (&Smap_charset_chars
);
2369 defsubr (&Sdefine_charset_internal
);
2370 defsubr (&Sdefine_charset_alias
);
2371 defsubr (&Scharset_plist
);
2372 defsubr (&Sset_charset_plist
);
2373 defsubr (&Sunify_charset
);
2374 defsubr (&Sget_unused_iso_final_char
);
2375 defsubr (&Sdeclare_equiv_charset
);
2376 defsubr (&Sfind_charset_region
);
2377 defsubr (&Sfind_charset_string
);
2378 defsubr (&Sdecode_char
);
2379 defsubr (&Sencode_char
);
2380 defsubr (&Ssplit_char
);
2381 defsubr (&Smake_char
);
2382 defsubr (&Schar_charset
);
2383 defsubr (&Scharset_after
);
2384 defsubr (&Siso_charset
);
2385 defsubr (&Sclear_charset_maps
);
2386 defsubr (&Scharset_priority_list
);
2387 defsubr (&Sset_charset_priority
);
2388 defsubr (&Scharset_id_internal
);
2389 defsubr (&Ssort_charsets
);
2391 DEFVAR_LISP ("charset-map-path", Vcharset_map_path
,
2392 doc
: /* List of directories to search for charset map files. */);
2393 Vcharset_map_path
= Qnil
;
2395 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map
,
2396 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2397 inhibit_load_charset_map
= 0;
2399 DEFVAR_LISP ("charset-list", Vcharset_list
,
2400 doc
: /* List of all charsets ever defined. */);
2401 Vcharset_list
= Qnil
;
2403 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language
,
2404 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2405 If the current language environment is for multiple languages (e.g. "Latin-1"),
2406 the value may be a list of mnemonics. */);
2407 Vcurrent_iso639_language
= Qnil
;
2410 = define_charset_internal (Qascii
, 1, "\x00\x7F\0\0\0\0\0",
2411 0, 127, 'B', -1, 0, 1, 0, 0);
2413 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\0\0\0\0\0",
2414 0, 255, -1, -1, -1, 1, 0, 0);
2416 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2417 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2419 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
2420 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2422 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\0\0\0\0\0",
2423 128, 255, -1, 0, -1, 0, 1,
2424 MAX_5_BYTE_CHAR
+ 1);
2425 charset_unibyte
= charset_iso_8859_1
;