1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
34 #include <sys/types.h>
37 #include "character.h"
43 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
45 A coded character set ("charset" hereafter) is a meaningful
46 collection (i.e. language, culture, functionality, etc.) of
47 characters. Emacs handles multiple charsets at once. In Emacs Lisp
48 code, a charset is represented by a symbol. In C code, a charset is
49 represented by its ID number or by a pointer to a struct charset.
51 The actual information about each charset is stored in two places.
52 Lispy information is stored in the hash table Vcharset_hash_table as
53 a vector (charset attributes). The other information is stored in
54 charset_table as a struct charset.
58 /* List of all charsets. This variable is used only from Emacs
60 Lisp_Object Vcharset_list
;
62 /* Hash table that contains attributes of each charset. Keys are
63 charset symbols, and values are vectors of charset attributes. */
64 Lisp_Object Vcharset_hash_table
;
66 /* Table of struct charset. */
67 struct charset
*charset_table
;
69 static int charset_table_size
;
70 static int charset_table_used
;
72 Lisp_Object Qcharsetp
;
74 /* Special charset symbols. */
76 Lisp_Object Qeight_bit
;
77 Lisp_Object Qiso_8859_1
;
81 /* The corresponding charsets. */
83 int charset_eight_bit
;
84 int charset_iso_8859_1
;
88 /* The other special charsets. */
89 int charset_jisx0201_roman
;
90 int charset_jisx0208_1978
;
94 /* Value of charset attribute `charset-iso-plane'. */
97 /* Charset of unibyte characters. */
100 /* List of charsets ordered by the priority. */
101 Lisp_Object Vcharset_ordered_list
;
103 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
105 Lisp_Object Vcharset_non_preferred_head
;
107 /* Incremented everytime we change Vcharset_ordered_list. This is
108 unsigned short so that it fits in Lisp_Int and never matches
110 unsigned short charset_ordered_list_tick
;
112 /* List of iso-2022 charsets. */
113 Lisp_Object Viso_2022_charset_list
;
115 /* List of emacs-mule charsets. */
116 Lisp_Object Vemacs_mule_charset_list
;
118 struct charset
*emacs_mule_charset
[256];
120 /* Mapping table from ISO2022's charset (specified by DIMENSION,
121 CHARS, and FINAL-CHAR) to Emacs' charset. */
122 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
124 Lisp_Object Vcharset_map_path
;
126 /* If nonzero, don't load charset maps. */
127 int inhibit_load_charset_map
;
129 Lisp_Object Vcurrent_iso639_language
;
131 #define CODE_POINT_TO_INDEX(charset, code) \
132 ((charset)->code_linear_p \
133 ? (code) - (charset)->min_code \
134 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
135 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
136 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
137 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
138 ? (((((code) >> 24) - (charset)->code_space[12]) \
139 * (charset)->code_space[11]) \
140 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
141 * (charset)->code_space[7]) \
142 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
143 * (charset)->code_space[3]) \
144 + (((code) & 0xFF) - (charset)->code_space[0]) \
145 - ((charset)->char_index_offset)) \
149 /* Convert the character index IDX to code-point CODE for CHARSET.
150 It is assumed that IDX is in a valid range. */
152 #define INDEX_TO_CODE_POINT(charset, idx) \
153 ((charset)->code_linear_p \
154 ? (idx) + (charset)->min_code \
155 : (idx += (charset)->char_index_offset, \
156 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
157 | (((charset)->code_space[4] \
158 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
160 | (((charset)->code_space[8] \
161 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
163 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
166 /* Structure to hold mapping tables for a charset. Used by temacs
167 invoked for dumping. */
171 /* The current charset for which the following tables are setup. */
172 struct charset
*current
;
174 /* 1 iff the following table is used for encoder. */
177 /* When the following table is used for encoding, mininum and
178 maxinum character of the current charset. */
179 int min_char
, max_char
;
181 /* A Unicode character correspoinding to the code indice 0 (i.e. the
182 minimum code-point) of the current charset, or -1 if the code
183 indice 0 is not a Unicode character. This is checked when
184 table.encoder[CHAR] is zero. */
188 /* Table mapping code-indices (not code-points) of the current
189 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
190 doesn't belong to the current charset. */
191 int decoder
[0x10000];
192 /* Table mapping Unicode characters to code-indices of the current
193 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
194 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
195 (0x20000..0x2FFFF). Note that there is no charset map that
196 uses both SMP and SIP. */
197 unsigned short encoder
[0x20000];
199 } *temp_charset_work
;
201 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
204 temp_charset_work->zero_index_char = (C); \
205 else if ((C) < 0x20000) \
206 temp_charset_work->table.encoder[(C)] = (CODE); \
208 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
211 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
212 ((C) == temp_charset_work->zero_index_char ? 0 \
213 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
214 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
215 : temp_charset_work->table.encoder[(C) - 0x10000] \
216 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
218 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
219 (temp_charset_work->table.decoder[(CODE)] = (C))
221 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
222 (temp_charset_work->table.decoder[(CODE)])
225 /* Set to 1 to warn that a charset map is loaded and thus a buffer
226 text and a string data may be relocated. */
227 int charset_map_loaded
;
229 struct charset_map_entries
235 struct charset_map_entries
*next
;
238 /* Load the mapping information of CHARSET from ENTRIES for
239 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
240 encoding (CONTROL_FLAG == 2).
242 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
243 and CHARSET->fast_map.
245 If CONTROL_FLAG is 1, setup the following tables according to
246 CHARSET->method and inhibit_load_charset_map.
248 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
249 ----------------------+--------------------+---------------------------
250 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
251 ----------------------+--------------------+---------------------------
252 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
254 If CONTROL_FLAG is 2, setup the following tables.
256 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
259 ----------------------+--------------------+--------------------------
260 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
264 load_charset_map (struct charset
*charset
, struct charset_map_entries
*entries
, int n_entries
, int control_flag
)
266 Lisp_Object vec
, table
;
267 unsigned max_code
= CHARSET_MAX_CODE (charset
);
268 int ascii_compatible_p
= charset
->ascii_compatible_p
;
269 int min_char
, max_char
, nonascii_min_char
;
271 unsigned char *fast_map
= charset
->fast_map
;
278 if (! inhibit_load_charset_map
)
280 if (control_flag
== 1)
282 if (charset
->method
== CHARSET_METHOD_MAP
)
284 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
286 vec
= CHARSET_DECODER (charset
)
287 = Fmake_vector (make_number (n
), make_number (-1));
291 char_table_set_range (Vchar_unify_table
,
292 charset
->min_char
, charset
->max_char
,
298 table
= Fmake_char_table (Qnil
, Qnil
);
299 if (charset
->method
== CHARSET_METHOD_MAP
)
300 CHARSET_ENCODER (charset
) = table
;
302 CHARSET_DEUNIFIER (charset
) = table
;
307 if (! temp_charset_work
)
308 temp_charset_work
= malloc (sizeof (*temp_charset_work
));
309 if (control_flag
== 1)
311 memset (temp_charset_work
->table
.decoder
, -1,
312 sizeof (int) * 0x10000);
316 memset (temp_charset_work
->table
.encoder
, 0,
317 sizeof (unsigned short) * 0x20000);
318 temp_charset_work
->zero_index_char
= -1;
320 temp_charset_work
->current
= charset
;
321 temp_charset_work
->for_encoder
= (control_flag
== 2);
324 charset_map_loaded
= 1;
327 min_char
= max_char
= entries
->entry
[0].c
;
328 nonascii_min_char
= MAX_CHAR
;
329 for (i
= 0; i
< n_entries
; i
++)
332 int from_index
, to_index
;
334 int idx
= i
% 0x10000;
336 if (i
> 0 && idx
== 0)
337 entries
= entries
->next
;
338 from
= entries
->entry
[idx
].from
;
339 to
= entries
->entry
[idx
].to
;
340 from_c
= entries
->entry
[idx
].c
;
341 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
344 to_index
= from_index
;
349 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
350 to_c
= from_c
+ (to_index
- from_index
);
352 if (from_index
< 0 || to_index
< 0)
357 else if (from_c
< min_char
)
360 if (control_flag
== 1)
362 if (charset
->method
== CHARSET_METHOD_MAP
)
363 for (; from_index
<= to_index
; from_index
++, from_c
++)
364 ASET (vec
, from_index
, make_number (from_c
));
366 for (; from_index
<= to_index
; from_index
++, from_c
++)
367 CHAR_TABLE_SET (Vchar_unify_table
,
368 CHARSET_CODE_OFFSET (charset
) + from_index
,
369 make_number (from_c
));
371 else if (control_flag
== 2)
373 if (charset
->method
== CHARSET_METHOD_MAP
374 && CHARSET_COMPACT_CODES_P (charset
))
375 for (; from_index
<= to_index
; from_index
++, from_c
++)
377 unsigned code
= INDEX_TO_CODE_POINT (charset
, from_index
);
379 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
380 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
383 for (; from_index
<= to_index
; from_index
++, from_c
++)
385 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
386 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
389 else if (control_flag
== 3)
390 for (; from_index
<= to_index
; from_index
++, from_c
++)
391 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
392 else if (control_flag
== 4)
393 for (; from_index
<= to_index
; from_index
++, from_c
++)
394 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
395 else /* control_flag == 0 */
397 if (ascii_compatible_p
)
399 if (! ASCII_BYTE_P (from_c
))
401 if (from_c
< nonascii_min_char
)
402 nonascii_min_char
= from_c
;
404 else if (! ASCII_BYTE_P (to_c
))
406 nonascii_min_char
= 0x80;
410 for (; from_c
<= to_c
; from_c
++)
411 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
415 if (control_flag
== 0)
417 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
418 ? nonascii_min_char
: min_char
);
419 CHARSET_MAX_CHAR (charset
) = max_char
;
421 else if (control_flag
== 4)
423 temp_charset_work
->min_char
= min_char
;
424 temp_charset_work
->max_char
= max_char
;
429 /* Read a hexadecimal number (preceded by "0x") from the file FP while
430 paying attention to comment charcter '#'. */
432 static INLINE
unsigned
433 read_hex (FILE *fp
, int *eof
)
438 while ((c
= getc (fp
)) != EOF
)
442 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
446 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
458 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
460 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
462 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
463 n
= (n
* 10) + c
- '0';
469 /* Return a mapping vector for CHARSET loaded from MAPFILE.
470 Each line of MAPFILE has this form
472 where 0xAAAA is a code-point and 0xCCCC is the corresponding
473 character code, or this form
475 where 0xAAAA and 0xBBBB are code-points specifying a range, and
476 0xCCCC is the first character code of the range.
478 The returned vector has this form:
479 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
480 where CODE1 is a code-point or a cons of code-points specifying a
483 Note that this function uses `openp' to open MAPFILE but ignores
484 `file-name-handler-alist' to avoid running any Lisp code. */
487 load_charset_map_from_file (struct charset
*charset
, Lisp_Object mapfile
, int control_flag
)
489 unsigned min_code
= CHARSET_MIN_CODE (charset
);
490 unsigned max_code
= CHARSET_MAX_CODE (charset
);
494 Lisp_Object suffixes
;
495 struct charset_map_entries
*head
, *entries
;
496 int n_entries
, count
;
499 suffixes
= Fcons (build_string (".map"),
500 Fcons (build_string (".TXT"), Qnil
));
502 count
= SPECPDL_INDEX ();
503 specbind (Qfile_name_handler_alist
, Qnil
);
504 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
505 unbind_to (count
, Qnil
);
507 || ! (fp
= fdopen (fd
, "r")))
508 error ("Failure in loading charset map: %S", SDATA (mapfile
));
510 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
511 large (larger than MAX_ALLOCA). */
512 SAFE_ALLOCA (head
, struct charset_map_entries
*,
513 sizeof (struct charset_map_entries
));
515 memset (entries
, 0, sizeof (struct charset_map_entries
));
525 from
= read_hex (fp
, &eof
);
528 if (getc (fp
) == '-')
529 to
= read_hex (fp
, &eof
);
532 c
= (int) read_hex (fp
, &eof
);
534 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
537 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
539 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
540 sizeof (struct charset_map_entries
));
541 entries
= entries
->next
;
542 memset (entries
, 0, sizeof (struct charset_map_entries
));
544 idx
= n_entries
% 0x10000;
545 entries
->entry
[idx
].from
= from
;
546 entries
->entry
[idx
].to
= to
;
547 entries
->entry
[idx
].c
= c
;
552 load_charset_map (charset
, head
, n_entries
, control_flag
);
557 load_charset_map_from_vector (struct charset
*charset
, Lisp_Object vec
, int control_flag
)
559 unsigned min_code
= CHARSET_MIN_CODE (charset
);
560 unsigned max_code
= CHARSET_MAX_CODE (charset
);
561 struct charset_map_entries
*head
, *entries
;
563 int len
= ASIZE (vec
);
569 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
573 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
574 large (larger than MAX_ALLOCA). */
575 SAFE_ALLOCA (head
, struct charset_map_entries
*,
576 sizeof (struct charset_map_entries
));
578 memset (entries
, 0, sizeof (struct charset_map_entries
));
581 for (i
= 0; i
< len
; i
+= 2)
583 Lisp_Object val
, val2
;
595 from
= XFASTINT (val
);
596 to
= XFASTINT (val2
);
601 from
= to
= XFASTINT (val
);
603 val
= AREF (vec
, i
+ 1);
607 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
610 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
612 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
613 sizeof (struct charset_map_entries
));
614 entries
= entries
->next
;
615 memset (entries
, 0, sizeof (struct charset_map_entries
));
617 idx
= n_entries
% 0x10000;
618 entries
->entry
[idx
].from
= from
;
619 entries
->entry
[idx
].to
= to
;
620 entries
->entry
[idx
].c
= c
;
624 load_charset_map (charset
, head
, n_entries
, control_flag
);
629 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
630 map it is (see the comment of load_charset_map for the detail). */
633 load_charset (struct charset
*charset
, int control_flag
)
637 if (inhibit_load_charset_map
639 && charset
== temp_charset_work
->current
640 && ((control_flag
== 2) == temp_charset_work
->for_encoder
))
643 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
644 map
= CHARSET_MAP (charset
);
645 else if (CHARSET_UNIFIED_P (charset
))
646 map
= CHARSET_UNIFY_MAP (charset
);
648 load_charset_map_from_file (charset
, map
, control_flag
);
650 load_charset_map_from_vector (charset
, map
, control_flag
);
654 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
655 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
658 return (CHARSETP (object
) ? Qt
: Qnil
);
662 void map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
),
663 Lisp_Object function
, Lisp_Object arg
,
664 unsigned from
, unsigned to
);
667 map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
), Lisp_Object function
, Lisp_Object arg
, unsigned int from
, unsigned int to
)
669 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
670 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
675 range
= Fcons (Qnil
, Qnil
);
678 c
= temp_charset_work
->min_char
;
679 stop
= (temp_charset_work
->max_char
< 0x20000
680 ? temp_charset_work
->max_char
: 0xFFFF);
684 int index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
686 if (index
>= from_idx
&& index
<= to_idx
)
688 if (NILP (XCAR (range
)))
689 XSETCAR (range
, make_number (c
));
691 else if (! NILP (XCAR (range
)))
693 XSETCDR (range
, make_number (c
- 1));
695 (*c_function
) (arg
, range
);
697 call2 (function
, range
, arg
);
698 XSETCAR (range
, Qnil
);
702 if (c
== temp_charset_work
->max_char
)
704 if (! NILP (XCAR (range
)))
706 XSETCDR (range
, make_number (c
));
708 (*c_function
) (arg
, range
);
710 call2 (function
, range
, arg
);
715 stop
= temp_charset_work
->max_char
;
723 map_charset_chars (void (*c_function
)(Lisp_Object
, Lisp_Object
), Lisp_Object function
,
724 Lisp_Object arg
, struct charset
*charset
, unsigned from
, unsigned to
)
729 partial
= (from
> CHARSET_MIN_CODE (charset
)
730 || to
< CHARSET_MAX_CODE (charset
));
732 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
734 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
735 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
736 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
737 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
739 if (CHARSET_UNIFIED_P (charset
))
741 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
742 load_charset (charset
, 2);
743 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
744 map_char_table_for_charset (c_function
, function
,
745 CHARSET_DEUNIFIER (charset
), arg
,
746 partial
? charset
: NULL
, from
, to
);
748 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
751 range
= Fcons (make_number (from_c
), make_number (to_c
));
753 (*c_function
) (arg
, range
);
755 call2 (function
, range
, arg
);
757 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
759 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
760 load_charset (charset
, 2);
761 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
762 map_char_table_for_charset (c_function
, function
,
763 CHARSET_ENCODER (charset
), arg
,
764 partial
? charset
: NULL
, from
, to
);
766 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
768 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
770 Lisp_Object subset_info
;
773 subset_info
= CHARSET_SUBSET (charset
);
774 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
775 offset
= XINT (AREF (subset_info
, 3));
777 if (from
< XFASTINT (AREF (subset_info
, 1)))
778 from
= XFASTINT (AREF (subset_info
, 1));
780 if (to
> XFASTINT (AREF (subset_info
, 2)))
781 to
= XFASTINT (AREF (subset_info
, 2));
782 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
784 else /* i.e. CHARSET_METHOD_SUPERSET */
788 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
789 parents
= XCDR (parents
))
792 unsigned this_from
, this_to
;
794 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
795 offset
= XINT (XCDR (XCAR (parents
)));
796 this_from
= from
> offset
? from
- offset
: 0;
797 this_to
= to
> offset
? to
- offset
: 0;
798 if (this_from
< CHARSET_MIN_CODE (charset
))
799 this_from
= CHARSET_MIN_CODE (charset
);
800 if (this_to
> CHARSET_MAX_CODE (charset
))
801 this_to
= CHARSET_MAX_CODE (charset
);
802 map_charset_chars (c_function
, function
, arg
, charset
,
808 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
809 doc
: /* Call FUNCTION for all characters in CHARSET.
810 FUNCTION is called with an argument RANGE and the optional 3rd
813 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
814 characters contained in CHARSET.
816 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
817 range of code points (in CHARSET) of target characters. */)
818 (Lisp_Object function
, Lisp_Object charset
, Lisp_Object arg
, Lisp_Object from_code
, Lisp_Object to_code
)
823 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
824 if (NILP (from_code
))
825 from
= CHARSET_MIN_CODE (cs
);
828 CHECK_NATNUM (from_code
);
829 from
= XINT (from_code
);
830 if (from
< CHARSET_MIN_CODE (cs
))
831 from
= CHARSET_MIN_CODE (cs
);
834 to
= CHARSET_MAX_CODE (cs
);
837 CHECK_NATNUM (to_code
);
839 if (to
> CHARSET_MAX_CODE (cs
))
840 to
= CHARSET_MAX_CODE (cs
);
842 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
847 /* Define a charset according to the arguments. The Nth argument is
848 the Nth attribute of the charset (the last attribute `charset-id'
849 is not included). See the docstring of `define-charset' for the
852 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
853 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
854 doc
: /* For internal use only.
855 usage: (define-charset-internal ...) */)
856 (int nargs
, Lisp_Object
*args
)
858 /* Charset attr vector. */
862 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
864 struct charset charset
;
867 int new_definition_p
;
870 if (nargs
!= charset_arg_max
)
871 return Fsignal (Qwrong_number_of_arguments
,
872 Fcons (intern ("define-charset-internal"),
873 make_number (nargs
)));
875 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
877 CHECK_SYMBOL (args
[charset_arg_name
]);
878 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
880 val
= args
[charset_arg_code_space
];
881 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
883 int min_byte
, max_byte
;
885 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
886 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
887 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
888 error ("Invalid :code-space value");
889 charset
.code_space
[i
* 4] = min_byte
;
890 charset
.code_space
[i
* 4 + 1] = max_byte
;
891 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
892 nchars
*= charset
.code_space
[i
* 4 + 2];
893 charset
.code_space
[i
* 4 + 3] = nchars
;
898 val
= args
[charset_arg_dimension
];
900 charset
.dimension
= dimension
;
904 charset
.dimension
= XINT (val
);
905 if (charset
.dimension
< 1 || charset
.dimension
> 4)
906 args_out_of_range_3 (val
, make_number (1), make_number (4));
909 charset
.code_linear_p
910 = (charset
.dimension
== 1
911 || (charset
.code_space
[2] == 256
912 && (charset
.dimension
== 2
913 || (charset
.code_space
[6] == 256
914 && (charset
.dimension
== 3
915 || charset
.code_space
[10] == 256)))));
917 if (! charset
.code_linear_p
)
919 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
920 memset (charset
.code_space_mask
, 0, 256);
921 for (i
= 0; i
< 4; i
++)
922 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
924 charset
.code_space_mask
[j
] |= (1 << i
);
927 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
929 charset
.min_code
= (charset
.code_space
[0]
930 | (charset
.code_space
[4] << 8)
931 | (charset
.code_space
[8] << 16)
932 | (charset
.code_space
[12] << 24));
933 charset
.max_code
= (charset
.code_space
[1]
934 | (charset
.code_space
[5] << 8)
935 | (charset
.code_space
[9] << 16)
936 | (charset
.code_space
[13] << 24));
937 charset
.char_index_offset
= 0;
939 val
= args
[charset_arg_min_code
];
949 CHECK_NUMBER_CAR (val
);
950 CHECK_NUMBER_CDR (val
);
951 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
953 if (code
< charset
.min_code
954 || code
> charset
.max_code
)
955 args_out_of_range_3 (make_number (charset
.min_code
),
956 make_number (charset
.max_code
), val
);
957 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
958 charset
.min_code
= code
;
961 val
= args
[charset_arg_max_code
];
971 CHECK_NUMBER_CAR (val
);
972 CHECK_NUMBER_CDR (val
);
973 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
975 if (code
< charset
.min_code
976 || code
> charset
.max_code
)
977 args_out_of_range_3 (make_number (charset
.min_code
),
978 make_number (charset
.max_code
), val
);
979 charset
.max_code
= code
;
982 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
984 val
= args
[charset_arg_invalid_code
];
987 if (charset
.min_code
> 0)
988 charset
.invalid_code
= 0;
991 XSETINT (val
, charset
.max_code
+ 1);
992 if (XINT (val
) == charset
.max_code
+ 1)
993 charset
.invalid_code
= charset
.max_code
+ 1;
995 error ("Attribute :invalid-code must be specified");
1001 charset
.invalid_code
= XFASTINT (val
);
1004 val
= args
[charset_arg_iso_final
];
1006 charset
.iso_final
= -1;
1010 if (XINT (val
) < '0' || XINT (val
) > 127)
1011 error ("Invalid iso-final-char: %d", XINT (val
));
1012 charset
.iso_final
= XINT (val
);
1015 val
= args
[charset_arg_iso_revision
];
1017 charset
.iso_revision
= -1;
1021 if (XINT (val
) > 63)
1022 args_out_of_range (make_number (63), val
);
1023 charset
.iso_revision
= XINT (val
);
1026 val
= args
[charset_arg_emacs_mule_id
];
1028 charset
.emacs_mule_id
= -1;
1032 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
1033 error ("Invalid emacs-mule-id: %d", XINT (val
));
1034 charset
.emacs_mule_id
= XINT (val
);
1037 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1039 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1041 charset
.unified_p
= 0;
1043 memset (charset
.fast_map
, 0, sizeof (charset
.fast_map
));
1045 if (! NILP (args
[charset_arg_code_offset
]))
1047 val
= args
[charset_arg_code_offset
];
1050 charset
.method
= CHARSET_METHOD_OFFSET
;
1051 charset
.code_offset
= XINT (val
);
1053 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1054 charset
.min_char
= i
+ charset
.code_offset
;
1055 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1056 charset
.max_char
= i
+ charset
.code_offset
;
1057 if (charset
.max_char
> MAX_CHAR
)
1058 error ("Unsupported max char: %d", charset
.max_char
);
1060 i
= (charset
.min_char
>> 7) << 7;
1061 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1062 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1063 i
= (i
>> 12) << 12;
1064 for (; i
<= charset
.max_char
; i
+= 0x1000)
1065 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1066 if (charset
.code_offset
== 0 && charset
.max_char
>= 0x80)
1067 charset
.ascii_compatible_p
= 1;
1069 else if (! NILP (args
[charset_arg_map
]))
1071 val
= args
[charset_arg_map
];
1072 ASET (attrs
, charset_map
, val
);
1073 charset
.method
= CHARSET_METHOD_MAP
;
1075 else if (! NILP (args
[charset_arg_subset
]))
1078 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1079 struct charset
*parent_charset
;
1081 val
= args
[charset_arg_subset
];
1082 parent
= Fcar (val
);
1083 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1084 parent_min_code
= Fnth (make_number (1), val
);
1085 CHECK_NATNUM (parent_min_code
);
1086 parent_max_code
= Fnth (make_number (2), val
);
1087 CHECK_NATNUM (parent_max_code
);
1088 parent_code_offset
= Fnth (make_number (3), val
);
1089 CHECK_NUMBER (parent_code_offset
);
1090 val
= Fmake_vector (make_number (4), Qnil
);
1091 ASET (val
, 0, make_number (parent_charset
->id
));
1092 ASET (val
, 1, parent_min_code
);
1093 ASET (val
, 2, parent_max_code
);
1094 ASET (val
, 3, parent_code_offset
);
1095 ASET (attrs
, charset_subset
, val
);
1097 charset
.method
= CHARSET_METHOD_SUBSET
;
1098 /* Here, we just copy the parent's fast_map. It's not accurate,
1099 but at least it works for quickly detecting which character
1100 DOESN'T belong to this charset. */
1101 for (i
= 0; i
< 190; i
++)
1102 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1104 /* We also copy these for parents. */
1105 charset
.min_char
= parent_charset
->min_char
;
1106 charset
.max_char
= parent_charset
->max_char
;
1108 else if (! NILP (args
[charset_arg_superset
]))
1110 val
= args
[charset_arg_superset
];
1111 charset
.method
= CHARSET_METHOD_SUPERSET
;
1112 val
= Fcopy_sequence (val
);
1113 ASET (attrs
, charset_superset
, val
);
1115 charset
.min_char
= MAX_CHAR
;
1116 charset
.max_char
= 0;
1117 for (; ! NILP (val
); val
= Fcdr (val
))
1119 Lisp_Object elt
, car_part
, cdr_part
;
1120 int this_id
, offset
;
1121 struct charset
*this_charset
;
1126 car_part
= XCAR (elt
);
1127 cdr_part
= XCDR (elt
);
1128 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1129 CHECK_NUMBER (cdr_part
);
1130 offset
= XINT (cdr_part
);
1134 CHECK_CHARSET_GET_ID (elt
, this_id
);
1137 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1139 this_charset
= CHARSET_FROM_ID (this_id
);
1140 if (charset
.min_char
> this_charset
->min_char
)
1141 charset
.min_char
= this_charset
->min_char
;
1142 if (charset
.max_char
< this_charset
->max_char
)
1143 charset
.max_char
= this_charset
->max_char
;
1144 for (i
= 0; i
< 190; i
++)
1145 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1149 error ("None of :code-offset, :map, :parents are specified");
1151 val
= args
[charset_arg_unify_map
];
1152 if (! NILP (val
) && !STRINGP (val
))
1154 ASET (attrs
, charset_unify_map
, val
);
1156 CHECK_LIST (args
[charset_arg_plist
]);
1157 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1159 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1161 if (charset
.hash_index
>= 0)
1163 new_definition_p
= 0;
1164 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1165 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1169 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1171 if (charset_table_used
== charset_table_size
)
1173 struct charset
*new_table
1174 = (struct charset
*) xmalloc (sizeof (struct charset
)
1175 * (charset_table_size
+ 16));
1176 memcpy (new_table
, charset_table
,
1177 sizeof (struct charset
) * charset_table_size
);
1178 charset_table_size
+= 16;
1179 charset_table
= new_table
;
1181 id
= charset_table_used
++;
1182 new_definition_p
= 1;
1185 ASET (attrs
, charset_id
, make_number (id
));
1187 charset_table
[id
] = charset
;
1189 if (charset
.method
== CHARSET_METHOD_MAP
)
1191 load_charset (&charset
, 0);
1192 charset_table
[id
] = charset
;
1195 if (charset
.iso_final
>= 0)
1197 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1198 charset
.iso_final
) = id
;
1199 if (new_definition_p
)
1200 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1201 Fcons (make_number (id
), Qnil
));
1202 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1203 charset_jisx0201_roman
= id
;
1204 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1205 charset_jisx0208_1978
= id
;
1206 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1207 charset_jisx0208
= id
;
1208 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id
)
1209 charset_ksc5601
= id
;
1212 if (charset
.emacs_mule_id
>= 0)
1214 emacs_mule_charset
[charset
.emacs_mule_id
] = CHARSET_FROM_ID (id
);
1215 if (charset
.emacs_mule_id
< 0xA0)
1216 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1218 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1219 if (new_definition_p
)
1220 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1221 Fcons (make_number (id
), Qnil
));
1224 if (new_definition_p
)
1226 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1227 if (charset
.supplementary_p
)
1228 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1229 Fcons (make_number (id
), Qnil
));
1234 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1236 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1238 if (cs
->supplementary_p
)
1241 if (EQ (tail
, Vcharset_ordered_list
))
1242 Vcharset_ordered_list
= Fcons (make_number (id
),
1243 Vcharset_ordered_list
);
1244 else if (NILP (tail
))
1245 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1246 Fcons (make_number (id
), Qnil
));
1249 val
= Fcons (XCAR (tail
), XCDR (tail
));
1250 XSETCDR (tail
, val
);
1251 XSETCAR (tail
, make_number (id
));
1254 charset_ordered_list_tick
++;
1261 /* Same as Fdefine_charset_internal but arguments are more convenient
1262 to call from C (typically in syms_of_charset). This can define a
1263 charset of `offset' method only. Return the ID of the new
1267 define_charset_internal (Lisp_Object name
,
1269 const unsigned char *code_space
,
1270 unsigned min_code
, unsigned max_code
,
1271 int iso_final
, int iso_revision
, int emacs_mule_id
,
1272 int ascii_compatible
, int supplementary
,
1275 Lisp_Object args
[charset_arg_max
];
1276 Lisp_Object plist
[14];
1280 args
[charset_arg_name
] = name
;
1281 args
[charset_arg_dimension
] = make_number (dimension
);
1282 val
= Fmake_vector (make_number (8), make_number (0));
1283 for (i
= 0; i
< 8; i
++)
1284 ASET (val
, i
, make_number (code_space
[i
]));
1285 args
[charset_arg_code_space
] = val
;
1286 args
[charset_arg_min_code
] = make_number (min_code
);
1287 args
[charset_arg_max_code
] = make_number (max_code
);
1288 args
[charset_arg_iso_final
]
1289 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1290 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1291 args
[charset_arg_emacs_mule_id
]
1292 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1293 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1294 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1295 args
[charset_arg_invalid_code
] = Qnil
;
1296 args
[charset_arg_code_offset
] = make_number (code_offset
);
1297 args
[charset_arg_map
] = Qnil
;
1298 args
[charset_arg_subset
] = Qnil
;
1299 args
[charset_arg_superset
] = Qnil
;
1300 args
[charset_arg_unify_map
] = Qnil
;
1302 plist
[0] = intern_c_string (":name");
1303 plist
[1] = args
[charset_arg_name
];
1304 plist
[2] = intern_c_string (":dimension");
1305 plist
[3] = args
[charset_arg_dimension
];
1306 plist
[4] = intern_c_string (":code-space");
1307 plist
[5] = args
[charset_arg_code_space
];
1308 plist
[6] = intern_c_string (":iso-final-char");
1309 plist
[7] = args
[charset_arg_iso_final
];
1310 plist
[8] = intern_c_string (":emacs-mule-id");
1311 plist
[9] = args
[charset_arg_emacs_mule_id
];
1312 plist
[10] = intern_c_string (":ascii-compatible-p");
1313 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1314 plist
[12] = intern_c_string (":code-offset");
1315 plist
[13] = args
[charset_arg_code_offset
];
1317 args
[charset_arg_plist
] = Flist (14, plist
);
1318 Fdefine_charset_internal (charset_arg_max
, args
);
1320 return XINT (CHARSET_SYMBOL_ID (name
));
1324 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1325 Sdefine_charset_alias
, 2, 2, 0,
1326 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1327 (Lisp_Object alias
, Lisp_Object charset
)
1331 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1332 Fputhash (alias
, attr
, Vcharset_hash_table
);
1333 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1338 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1339 doc
: /* Return the property list of CHARSET. */)
1340 (Lisp_Object charset
)
1344 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1345 return CHARSET_ATTR_PLIST (attrs
);
1349 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1350 doc
: /* Set CHARSET's property list to PLIST. */)
1351 (Lisp_Object charset
, Lisp_Object plist
)
1355 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1356 CHARSET_ATTR_PLIST (attrs
) = plist
;
1361 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1362 doc
: /* Unify characters of CHARSET with Unicode.
1363 This means reading the relevant file and installing the table defined
1364 by CHARSET's `:unify-map' property.
1366 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1367 the same meaning as the `:unify-map' attribute in the function
1368 `define-charset' (which see).
1370 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1371 (Lisp_Object charset
, Lisp_Object unify_map
, Lisp_Object deunify
)
1376 CHECK_CHARSET_GET_ID (charset
, id
);
1377 cs
= CHARSET_FROM_ID (id
);
1379 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1380 : ! CHARSET_UNIFIED_P (cs
))
1383 CHARSET_UNIFIED_P (cs
) = 0;
1386 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1387 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1388 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1389 if (NILP (unify_map
))
1390 unify_map
= CHARSET_UNIFY_MAP (cs
);
1393 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1394 signal_error ("Bad unify-map", unify_map
);
1395 CHARSET_UNIFY_MAP (cs
) = unify_map
;
1397 if (NILP (Vchar_unify_table
))
1398 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1399 char_table_set_range (Vchar_unify_table
,
1400 cs
->min_char
, cs
->max_char
, charset
);
1401 CHARSET_UNIFIED_P (cs
) = 1;
1403 else if (CHAR_TABLE_P (Vchar_unify_table
))
1405 int min_code
= CHARSET_MIN_CODE (cs
);
1406 int max_code
= CHARSET_MAX_CODE (cs
);
1407 int min_char
= DECODE_CHAR (cs
, min_code
);
1408 int max_char
= DECODE_CHAR (cs
, max_code
);
1410 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1416 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1417 Sget_unused_iso_final_char
, 2, 2, 0,
1419 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1420 DIMENSION is the number of bytes to represent a character: 1 or 2.
1421 CHARS is the number of characters in a dimension: 94 or 96.
1423 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1424 If there's no unused final char for the specified kind of charset,
1426 (Lisp_Object dimension
, Lisp_Object chars
)
1430 CHECK_NUMBER (dimension
);
1431 CHECK_NUMBER (chars
);
1432 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1433 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1434 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1435 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1436 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1437 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1439 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1443 check_iso_charset_parameter (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
)
1445 CHECK_NATNUM (dimension
);
1446 CHECK_NATNUM (chars
);
1447 CHECK_NATNUM (final_char
);
1449 if (XINT (dimension
) > 3)
1450 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1451 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1452 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1453 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1454 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1458 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1460 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1462 On decoding by an ISO-2022 base coding system, when a charset
1463 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1464 if CHARSET is designated instead. */)
1465 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
, Lisp_Object charset
)
1470 CHECK_CHARSET_GET_ID (charset
, id
);
1471 check_iso_charset_parameter (dimension
, chars
, final_char
);
1472 chars_flag
= XINT (chars
) == 96;
1473 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1478 /* Return information about charsets in the text at PTR of NBYTES
1479 bytes, which are NCHARS characters. The value is:
1481 0: Each character is represented by one byte. This is always
1482 true for a unibyte string. For a multibyte string, true if
1483 it contains only ASCII characters.
1485 1: No charsets other than ascii, control-1, and latin-1 are
1492 string_xstring_p (Lisp_Object string
)
1494 const unsigned char *p
= SDATA (string
);
1495 const unsigned char *endp
= p
+ SBYTES (string
);
1497 if (SCHARS (string
) == SBYTES (string
))
1502 int c
= STRING_CHAR_ADVANCE (p
);
1511 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1513 CHARSETS is a vector. If Nth element is non-nil, it means the
1514 charset whose id is N is already found.
1516 It may lookup a translation table TABLE if supplied. */
1519 find_charsets_in_text (const unsigned char *ptr
, EMACS_INT nchars
, EMACS_INT nbytes
, Lisp_Object charsets
, Lisp_Object table
, int multibyte
)
1521 const unsigned char *pend
= ptr
+ nbytes
;
1523 if (nchars
== nbytes
)
1526 ASET (charsets
, charset_ascii
, Qt
);
1533 c
= translate_char (table
, c
);
1534 if (ASCII_BYTE_P (c
))
1535 ASET (charsets
, charset_ascii
, Qt
);
1537 ASET (charsets
, charset_eight_bit
, Qt
);
1544 int c
= STRING_CHAR_ADVANCE (ptr
);
1545 struct charset
*charset
;
1548 c
= translate_char (table
, c
);
1549 charset
= CHAR_CHARSET (c
);
1550 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1555 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1557 doc
: /* Return a list of charsets in the region between BEG and END.
1558 BEG and END are buffer positions.
1559 Optional arg TABLE if non-nil is a translation table to look up.
1561 If the current buffer is unibyte, the returned list may contain
1562 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1563 (Lisp_Object beg
, Lisp_Object end
, Lisp_Object table
)
1565 Lisp_Object charsets
;
1566 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1569 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1571 validate_region (&beg
, &end
);
1572 from
= XFASTINT (beg
);
1573 stop
= to
= XFASTINT (end
);
1575 if (from
< GPT
&& GPT
< to
)
1578 stop_byte
= GPT_BYTE
;
1581 stop_byte
= CHAR_TO_BYTE (stop
);
1583 from_byte
= CHAR_TO_BYTE (from
);
1585 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1588 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1589 stop_byte
- from_byte
, charsets
, table
,
1593 from
= stop
, from_byte
= stop_byte
;
1594 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1601 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1602 if (!NILP (AREF (charsets
, i
)))
1603 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1607 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1609 doc
: /* Return a list of charsets in STR.
1610 Optional arg TABLE if non-nil is a translation table to look up.
1612 If STR is unibyte, the returned list may contain
1613 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1614 (Lisp_Object str
, Lisp_Object table
)
1616 Lisp_Object charsets
;
1622 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1623 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1625 STRING_MULTIBYTE (str
));
1627 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1628 if (!NILP (AREF (charsets
, i
)))
1629 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1635 /* Return a unified character code for C (>= 0x110000). VAL is a
1636 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1639 maybe_unify_char (int c
, Lisp_Object val
)
1641 struct charset
*charset
;
1648 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1649 load_charset (charset
, 1);
1650 if (! inhibit_load_charset_map
)
1652 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1658 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1659 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1668 /* Return a character correponding to the code-point CODE of
1672 decode_char (struct charset
*charset
, unsigned int code
)
1675 enum charset_method method
= CHARSET_METHOD (charset
);
1677 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1680 if (method
== CHARSET_METHOD_SUBSET
)
1682 Lisp_Object subset_info
;
1684 subset_info
= CHARSET_SUBSET (charset
);
1685 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1686 code
-= XINT (AREF (subset_info
, 3));
1687 if (code
< XFASTINT (AREF (subset_info
, 1))
1688 || code
> XFASTINT (AREF (subset_info
, 2)))
1691 c
= DECODE_CHAR (charset
, code
);
1693 else if (method
== CHARSET_METHOD_SUPERSET
)
1695 Lisp_Object parents
;
1697 parents
= CHARSET_SUPERSET (charset
);
1699 for (; CONSP (parents
); parents
= XCDR (parents
))
1701 int id
= XINT (XCAR (XCAR (parents
)));
1702 int code_offset
= XINT (XCDR (XCAR (parents
)));
1703 unsigned this_code
= code
- code_offset
;
1705 charset
= CHARSET_FROM_ID (id
);
1706 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1712 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1716 if (method
== CHARSET_METHOD_MAP
)
1718 Lisp_Object decoder
;
1720 decoder
= CHARSET_DECODER (charset
);
1721 if (! VECTORP (decoder
))
1723 load_charset (charset
, 1);
1724 decoder
= CHARSET_DECODER (charset
);
1726 if (VECTORP (decoder
))
1727 c
= XINT (AREF (decoder
, char_index
));
1729 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1731 else /* method == CHARSET_METHOD_OFFSET */
1733 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1734 if (CHARSET_UNIFIED_P (charset
)
1735 && c
> MAX_UNICODE_CHAR
)
1736 MAYBE_UNIFY_CHAR (c
);
1743 /* Variable used temporarily by the macro ENCODE_CHAR. */
1744 Lisp_Object charset_work
;
1746 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1747 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1748 use CHARSET's strict_max_char instead of max_char. */
1751 encode_char (struct charset
*charset
, int c
)
1754 enum charset_method method
= CHARSET_METHOD (charset
);
1756 if (CHARSET_UNIFIED_P (charset
))
1758 Lisp_Object deunifier
;
1759 int code_index
= -1;
1761 deunifier
= CHARSET_DEUNIFIER (charset
);
1762 if (! CHAR_TABLE_P (deunifier
))
1764 load_charset (charset
, 2);
1765 deunifier
= CHARSET_DEUNIFIER (charset
);
1767 if (CHAR_TABLE_P (deunifier
))
1769 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1771 if (INTEGERP (deunified
))
1772 code_index
= XINT (deunified
);
1776 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1778 if (code_index
>= 0)
1779 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1782 if (method
== CHARSET_METHOD_SUBSET
)
1784 Lisp_Object subset_info
;
1785 struct charset
*this_charset
;
1787 subset_info
= CHARSET_SUBSET (charset
);
1788 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1789 code
= ENCODE_CHAR (this_charset
, c
);
1790 if (code
== CHARSET_INVALID_CODE (this_charset
)
1791 || code
< XFASTINT (AREF (subset_info
, 1))
1792 || code
> XFASTINT (AREF (subset_info
, 2)))
1793 return CHARSET_INVALID_CODE (charset
);
1794 code
+= XINT (AREF (subset_info
, 3));
1798 if (method
== CHARSET_METHOD_SUPERSET
)
1800 Lisp_Object parents
;
1802 parents
= CHARSET_SUPERSET (charset
);
1803 for (; CONSP (parents
); parents
= XCDR (parents
))
1805 int id
= XINT (XCAR (XCAR (parents
)));
1806 int code_offset
= XINT (XCDR (XCAR (parents
)));
1807 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1809 code
= ENCODE_CHAR (this_charset
, c
);
1810 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1811 return code
+ code_offset
;
1813 return CHARSET_INVALID_CODE (charset
);
1816 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1817 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1818 return CHARSET_INVALID_CODE (charset
);
1820 if (method
== CHARSET_METHOD_MAP
)
1822 Lisp_Object encoder
;
1825 encoder
= CHARSET_ENCODER (charset
);
1826 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1828 load_charset (charset
, 2);
1829 encoder
= CHARSET_ENCODER (charset
);
1831 if (CHAR_TABLE_P (encoder
))
1833 val
= CHAR_TABLE_REF (encoder
, c
);
1835 return CHARSET_INVALID_CODE (charset
);
1837 if (! CHARSET_COMPACT_CODES_P (charset
))
1838 code
= INDEX_TO_CODE_POINT (charset
, code
);
1842 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1843 code
= INDEX_TO_CODE_POINT (charset
, code
);
1846 else /* method == CHARSET_METHOD_OFFSET */
1848 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1850 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1857 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1858 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1859 Return nil if CODE-POINT is not valid in CHARSET.
1861 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1863 Optional argument RESTRICTION specifies a way to map the pair of CCS
1864 and CODE-POINT to a character. Currently not supported and just ignored. */)
1865 (Lisp_Object charset
, Lisp_Object code_point
, Lisp_Object restriction
)
1869 struct charset
*charsetp
;
1871 CHECK_CHARSET_GET_ID (charset
, id
);
1872 if (CONSP (code_point
))
1874 CHECK_NATNUM_CAR (code_point
);
1875 CHECK_NATNUM_CDR (code_point
);
1876 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1880 CHECK_NATNUM (code_point
);
1881 code
= XINT (code_point
);
1883 charsetp
= CHARSET_FROM_ID (id
);
1884 c
= DECODE_CHAR (charsetp
, code
);
1885 return (c
>= 0 ? make_number (c
) : Qnil
);
1889 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1890 doc
: /* Encode the character CH into a code-point of CHARSET.
1891 Return nil if CHARSET doesn't include CH.
1893 Optional argument RESTRICTION specifies a way to map CH to a
1894 code-point in CCS. Currently not supported and just ignored. */)
1895 (Lisp_Object ch
, Lisp_Object charset
, Lisp_Object restriction
)
1899 struct charset
*charsetp
;
1901 CHECK_CHARSET_GET_ID (charset
, id
);
1903 charsetp
= CHARSET_FROM_ID (id
);
1904 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1905 if (code
== CHARSET_INVALID_CODE (charsetp
))
1907 if (code
> 0x7FFFFFF)
1908 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1909 return make_number (code
);
1913 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1915 /* Return a character of CHARSET whose position codes are CODEn.
1917 CODE1 through CODE4 are optional, but if you don't supply sufficient
1918 position codes, it is assumed that the minimum code in each dimension
1920 (Lisp_Object charset
, Lisp_Object code1
, Lisp_Object code2
, Lisp_Object code3
, Lisp_Object code4
)
1923 struct charset
*charsetp
;
1927 CHECK_CHARSET_GET_ID (charset
, id
);
1928 charsetp
= CHARSET_FROM_ID (id
);
1930 dimension
= CHARSET_DIMENSION (charsetp
);
1932 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1933 ? 0 : CHARSET_MIN_CODE (charsetp
));
1936 CHECK_NATNUM (code1
);
1937 if (XFASTINT (code1
) >= 0x100)
1938 args_out_of_range (make_number (0xFF), code1
);
1939 code
= XFASTINT (code1
);
1945 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1948 CHECK_NATNUM (code2
);
1949 if (XFASTINT (code2
) >= 0x100)
1950 args_out_of_range (make_number (0xFF), code2
);
1951 code
|= XFASTINT (code2
);
1958 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1961 CHECK_NATNUM (code3
);
1962 if (XFASTINT (code3
) >= 0x100)
1963 args_out_of_range (make_number (0xFF), code3
);
1964 code
|= XFASTINT (code3
);
1971 code
|= charsetp
->code_space
[0];
1974 CHECK_NATNUM (code4
);
1975 if (XFASTINT (code4
) >= 0x100)
1976 args_out_of_range (make_number (0xFF), code4
);
1977 code
|= XFASTINT (code4
);
1984 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
1986 c
= DECODE_CHAR (charsetp
, code
);
1988 error ("Invalid code(s)");
1989 return make_number (c
);
1993 /* Return the first charset in CHARSET_LIST that contains C.
1994 CHARSET_LIST is a list of charset IDs. If it is nil, use
1995 Vcharset_ordered_list. */
1998 char_charset (int c
, Lisp_Object charset_list
, unsigned int *code_return
)
2002 if (NILP (charset_list
))
2003 charset_list
= Vcharset_ordered_list
;
2007 while (CONSP (charset_list
))
2009 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
2010 unsigned code
= ENCODE_CHAR (charset
, c
);
2012 if (code
!= CHARSET_INVALID_CODE (charset
))
2015 *code_return
= code
;
2018 charset_list
= XCDR (charset_list
);
2020 && c
<= MAX_UNICODE_CHAR
2021 && EQ (charset_list
, Vcharset_non_preferred_head
))
2022 return CHARSET_FROM_ID (charset_unicode
);
2024 return (maybe_null
? NULL
2025 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2026 : CHARSET_FROM_ID (charset_eight_bit
));
2030 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2032 /*Return list of charset and one to four position-codes of CH.
2033 The charset is decided by the current priority order of charsets.
2034 A position-code is a byte value of each dimension of the code-point of
2035 CH in the charset. */)
2038 struct charset
*charset
;
2043 CHECK_CHARACTER (ch
);
2045 charset
= CHAR_CHARSET (c
);
2048 code
= ENCODE_CHAR (charset
, c
);
2049 if (code
== CHARSET_INVALID_CODE (charset
))
2051 dimension
= CHARSET_DIMENSION (charset
);
2052 for (val
= Qnil
; dimension
> 0; dimension
--)
2054 val
= Fcons (make_number (code
& 0xFF), val
);
2057 return Fcons (CHARSET_NAME (charset
), val
);
2061 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2062 doc
: /* Return the charset of highest priority that contains CH.
2063 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2064 from which to find the charset. It may also be a coding system. In
2065 that case, find the charset from what supported by that coding system. */)
2066 (Lisp_Object ch
, Lisp_Object restriction
)
2068 struct charset
*charset
;
2070 CHECK_CHARACTER (ch
);
2071 if (NILP (restriction
))
2072 charset
= CHAR_CHARSET (XINT (ch
));
2075 if (CONSP (restriction
))
2077 int c
= XFASTINT (ch
);
2079 for (; CONSP (restriction
); restriction
= XCDR (restriction
))
2081 struct charset
*charset
;
2083 CHECK_CHARSET_GET_CHARSET (XCAR (restriction
), charset
);
2084 if (ENCODE_CHAR (charset
, c
) != CHARSET_INVALID_CODE (charset
))
2085 return XCAR (restriction
);
2089 restriction
= coding_system_charset_list (restriction
);
2090 charset
= char_charset (XINT (ch
), restriction
, NULL
);
2094 return (CHARSET_NAME (charset
));
2098 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2100 Return charset of a character in the current buffer at position POS.
2101 If POS is nil, it defauls to the current point.
2102 If POS is out of range, the value is nil. */)
2106 struct charset
*charset
;
2108 ch
= Fchar_after (pos
);
2109 if (! INTEGERP (ch
))
2111 charset
= CHAR_CHARSET (XINT (ch
));
2112 return (CHARSET_NAME (charset
));
2116 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2118 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2120 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2121 by their DIMENSION, CHARS, and FINAL-CHAR,
2122 whereas Emacs distinguishes them by charset symbol.
2123 See the documentation of the function `charset-info' for the meanings of
2124 DIMENSION, CHARS, and FINAL-CHAR. */)
2125 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
)
2130 check_iso_charset_parameter (dimension
, chars
, final_char
);
2131 chars_flag
= XFASTINT (chars
) == 96;
2132 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
2133 XFASTINT (final_char
));
2134 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2138 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2142 Clear temporary charset mapping tables.
2143 It should be called only from temacs invoked for dumping. */)
2146 if (temp_charset_work
)
2148 free (temp_charset_work
);
2149 temp_charset_work
= NULL
;
2152 if (CHAR_TABLE_P (Vchar_unify_table
))
2153 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2158 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2159 Scharset_priority_list
, 0, 1, 0,
2160 doc
: /* Return the list of charsets ordered by priority.
2161 HIGHESTP non-nil means just return the highest priority one. */)
2162 (Lisp_Object highestp
)
2164 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2166 if (!NILP (highestp
))
2167 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2169 while (!NILP (list
))
2171 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2174 return Fnreverse (val
);
2177 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2179 doc
: /* Assign higher priority to the charsets given as arguments.
2180 usage: (set-charset-priority &rest charsets) */)
2181 (int nargs
, Lisp_Object
*args
)
2183 Lisp_Object new_head
, old_list
, arglist
[2];
2184 Lisp_Object list_2022
, list_emacs_mule
;
2187 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2189 for (i
= 0; i
< nargs
; i
++)
2191 CHECK_CHARSET_GET_ID (args
[i
], id
);
2192 if (! NILP (Fmemq (make_number (id
), old_list
)))
2194 old_list
= Fdelq (make_number (id
), old_list
);
2195 new_head
= Fcons (make_number (id
), new_head
);
2198 arglist
[0] = Fnreverse (new_head
);
2199 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2200 Vcharset_ordered_list
= Fnconc (2, arglist
);
2201 charset_ordered_list_tick
++;
2203 charset_unibyte
= -1;
2204 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2205 CONSP (old_list
); old_list
= XCDR (old_list
))
2207 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2208 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2209 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2210 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2211 if (charset_unibyte
< 0)
2213 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (old_list
)));
2215 if (CHARSET_DIMENSION (charset
) == 1
2216 && CHARSET_ASCII_COMPATIBLE_P (charset
)
2217 && CHARSET_MAX_CHAR (charset
) >= 0x80)
2218 charset_unibyte
= CHARSET_ID (charset
);
2221 Viso_2022_charset_list
= Fnreverse (list_2022
);
2222 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2223 if (charset_unibyte
< 0)
2224 charset_unibyte
= charset_iso_8859_1
;
2229 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2231 doc
: /* Internal use only.
2232 Return charset identification number of CHARSET. */)
2233 (Lisp_Object charset
)
2237 CHECK_CHARSET_GET_ID (charset
, id
);
2238 return make_number (id
);
2241 struct charset_sort_data
2243 Lisp_Object charset
;
2249 charset_compare (const void *d1
, const void *d2
)
2251 const struct charset_sort_data
*data1
= d1
, *data2
= d2
;
2252 return (data1
->priority
- data2
->priority
);
2255 DEFUN ("sort-charsets", Fsort_charsets
, Ssort_charsets
, 1, 1, 0,
2256 doc
: /* Sort charset list CHARSETS by a priority of each charset.
2257 Return the sorted list. CHARSETS is modified by side effects.
2258 See also `charset-priority-list' and `set-charset-priority'. */)
2259 (Lisp_Object charsets
)
2261 Lisp_Object len
= Flength (charsets
);
2262 int n
= XFASTINT (len
), i
, j
, done
;
2263 Lisp_Object tail
, elt
, attrs
;
2264 struct charset_sort_data
*sort_data
;
2265 int id
, min_id
, max_id
;
2270 SAFE_ALLOCA (sort_data
, struct charset_sort_data
*, sizeof (*sort_data
) * n
);
2271 for (tail
= charsets
, i
= 0; CONSP (tail
); tail
= XCDR (tail
), i
++)
2274 CHECK_CHARSET_GET_ATTR (elt
, attrs
);
2275 sort_data
[i
].charset
= elt
;
2276 sort_data
[i
].id
= id
= XINT (CHARSET_ATTR_ID (attrs
));
2278 min_id
= max_id
= id
;
2279 else if (id
< min_id
)
2281 else if (id
> max_id
)
2284 for (done
= 0, tail
= Vcharset_ordered_list
, i
= 0;
2285 done
< n
&& CONSP (tail
); tail
= XCDR (tail
), i
++)
2288 id
= XFASTINT (elt
);
2289 if (id
>= min_id
&& id
<= max_id
)
2290 for (j
= 0; j
< n
; j
++)
2291 if (sort_data
[j
].id
== id
)
2293 sort_data
[j
].priority
= i
;
2297 qsort (sort_data
, n
, sizeof *sort_data
, charset_compare
);
2298 for (i
= 0, tail
= charsets
; CONSP (tail
); tail
= XCDR (tail
), i
++)
2299 XSETCAR (tail
, sort_data
[i
].charset
);
2308 Lisp_Object tempdir
;
2309 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2310 if (access ((char *) SDATA (tempdir
), 0) < 0)
2312 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2313 Emacs will not function correctly without the character map files.\n\
2314 Please check your installation!\n",
2316 /* TODO should this be a fatal error? (Bug#909) */
2319 Vcharset_map_path
= Fcons (tempdir
, Qnil
);
2324 init_charset_once (void)
2328 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2329 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2330 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2331 iso_charset_table
[i
][j
][k
] = -1;
2333 for (i
= 0; i
< 256; i
++)
2334 emacs_mule_charset
[i
] = NULL
;
2336 charset_jisx0201_roman
= -1;
2337 charset_jisx0208_1978
= -1;
2338 charset_jisx0208
= -1;
2339 charset_ksc5601
= -1;
2345 syms_of_charset (void)
2347 DEFSYM (Qcharsetp
, "charsetp");
2349 DEFSYM (Qascii
, "ascii");
2350 DEFSYM (Qunicode
, "unicode");
2351 DEFSYM (Qemacs
, "emacs");
2352 DEFSYM (Qeight_bit
, "eight-bit");
2353 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2358 staticpro (&Vcharset_ordered_list
);
2359 Vcharset_ordered_list
= Qnil
;
2361 staticpro (&Viso_2022_charset_list
);
2362 Viso_2022_charset_list
= Qnil
;
2364 staticpro (&Vemacs_mule_charset_list
);
2365 Vemacs_mule_charset_list
= Qnil
;
2367 /* Don't staticpro them here. It's done in syms_of_fns. */
2368 QCtest
= intern (":test");
2369 Qeq
= intern ("eq");
2371 staticpro (&Vcharset_hash_table
);
2373 Lisp_Object args
[2];
2376 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2379 charset_table_size
= 128;
2380 charset_table
= ((struct charset
*)
2381 xmalloc (sizeof (struct charset
) * charset_table_size
));
2382 charset_table_used
= 0;
2384 defsubr (&Scharsetp
);
2385 defsubr (&Smap_charset_chars
);
2386 defsubr (&Sdefine_charset_internal
);
2387 defsubr (&Sdefine_charset_alias
);
2388 defsubr (&Scharset_plist
);
2389 defsubr (&Sset_charset_plist
);
2390 defsubr (&Sunify_charset
);
2391 defsubr (&Sget_unused_iso_final_char
);
2392 defsubr (&Sdeclare_equiv_charset
);
2393 defsubr (&Sfind_charset_region
);
2394 defsubr (&Sfind_charset_string
);
2395 defsubr (&Sdecode_char
);
2396 defsubr (&Sencode_char
);
2397 defsubr (&Ssplit_char
);
2398 defsubr (&Smake_char
);
2399 defsubr (&Schar_charset
);
2400 defsubr (&Scharset_after
);
2401 defsubr (&Siso_charset
);
2402 defsubr (&Sclear_charset_maps
);
2403 defsubr (&Scharset_priority_list
);
2404 defsubr (&Sset_charset_priority
);
2405 defsubr (&Scharset_id_internal
);
2406 defsubr (&Ssort_charsets
);
2408 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path
,
2409 doc
: /* *List of directories to search for charset map files. */);
2410 Vcharset_map_path
= Qnil
;
2412 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map
,
2413 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2414 inhibit_load_charset_map
= 0;
2416 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2417 doc
: /* List of all charsets ever defined. */);
2418 Vcharset_list
= Qnil
;
2420 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language
,
2421 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2422 If the current language environment is for multiple languages (e.g. "Latin-1"),
2423 the value may be a list of mnemonics. */);
2424 Vcurrent_iso639_language
= Qnil
;
2427 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2428 0, 127, 'B', -1, 0, 1, 0, 0);
2430 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2431 0, 255, -1, -1, -1, 1, 0, 0);
2433 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2434 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2436 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F",
2437 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2439 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2440 128, 255, -1, 0, -1, 0, 1,
2441 MAX_5_BYTE_CHAR
+ 1);
2442 charset_unibyte
= charset_iso_8859_1
;
2447 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2448 (do not change this comment) */