1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
33 #include <sys/types.h>
35 #include "character.h"
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
56 /* List of all charsets. This variable is used only from Emacs
58 Lisp_Object Vcharset_list
;
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table
;
64 /* Table of struct charset. */
65 struct charset
*charset_table
;
67 static int charset_table_size
;
68 static int charset_table_used
;
70 Lisp_Object Qcharsetp
;
72 /* Special charset symbols. */
74 Lisp_Object Qeight_bit
;
75 Lisp_Object Qiso_8859_1
;
79 /* The corresponding charsets. */
81 int charset_eight_bit
;
82 int charset_iso_8859_1
;
86 /* The other special charsets. */
87 int charset_jisx0201_roman
;
88 int charset_jisx0208_1978
;
91 /* Value of charset attribute `charset-iso-plane'. */
94 /* Charset of unibyte characters. */
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list
;
100 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
102 Lisp_Object Vcharset_non_preferred_head
;
104 /* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
107 unsigned short charset_ordered_list_tick
;
109 /* List of iso-2022 charsets. */
110 Lisp_Object Viso_2022_charset_list
;
112 /* List of emacs-mule charsets. */
113 Lisp_Object Vemacs_mule_charset_list
;
115 struct charset
*emacs_mule_charset
[256];
117 /* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
121 Lisp_Object Vcharset_map_path
;
123 /* If nonzero, don't load charset maps. */
124 int inhibit_load_charset_map
;
126 Lisp_Object Vcurrent_iso639_language
;
128 /* Defined in chartab.c */
130 map_char_table_for_charset
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
131 Lisp_Object function
, Lisp_Object table
,
132 Lisp_Object arg
, struct charset
*charset
,
133 unsigned from
, unsigned to
));
135 #define CODE_POINT_TO_INDEX(charset, code) \
136 ((charset)->code_linear_p \
137 ? (code) - (charset)->min_code \
138 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
139 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
140 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
141 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
142 ? (((((code) >> 24) - (charset)->code_space[12]) \
143 * (charset)->code_space[11]) \
144 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
145 * (charset)->code_space[7]) \
146 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
147 * (charset)->code_space[3]) \
148 + (((code) & 0xFF) - (charset)->code_space[0]) \
149 - ((charset)->char_index_offset)) \
153 /* Convert the character index IDX to code-point CODE for CHARSET.
154 It is assumed that IDX is in a valid range. */
156 #define INDEX_TO_CODE_POINT(charset, idx) \
157 ((charset)->code_linear_p \
158 ? (idx) + (charset)->min_code \
159 : (idx += (charset)->char_index_offset, \
160 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
161 | (((charset)->code_space[4] \
162 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
164 | (((charset)->code_space[8] \
165 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
167 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
170 /* Structure to hold mapping tables for a charset. Used by temacs
171 invoked for dumping. */
175 /* The current charset for which the following tables are setup. */
176 struct charset
*current
;
178 /* 1 iff the following table is used for encoder. */
181 /* When the following table is used for encoding, mininum and
182 maxinum character of the current charset. */
183 int min_char
, max_char
;
185 /* A Unicode character correspoinding to the code indice 0 (i.e. the
186 minimum code-point) of the current charset, or -1 if the code
187 indice 0 is not a Unicode character. This is checked when
188 table.encoder[CHAR] is zero. */
192 /* Table mapping code-indices (not code-points) of the current
193 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
194 doesn't belong to the current charset. */
195 int decoder
[0x10000];
196 /* Table mapping Unicode characters to code-indices of the current
197 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
198 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
199 (0x20000..0x2FFFF). Note that there is no charset map that
200 uses both SMP and SIP. */
201 unsigned short encoder
[0x20000];
203 } *temp_charset_work
;
205 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
208 temp_charset_work->zero_index_char = (C); \
209 else if ((C) < 0x20000) \
210 temp_charset_work->table.encoder[(C)] = (CODE); \
212 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
215 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
216 ((C) == temp_charset_work->zero_index_char ? 0 \
217 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
218 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
219 : temp_charset_work->table.encoder[(C) - 0x10000] \
220 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
222 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
223 (temp_charset_work->table.decoder[(CODE)] = (C))
225 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
226 (temp_charset_work->table.decoder[(CODE)])
229 /* Set to 1 to warn that a charset map is loaded and thus a buffer
230 text and a string data may be relocated. */
231 int charset_map_loaded
;
233 struct charset_map_entries
239 struct charset_map_entries
*next
;
242 /* Load the mapping information of CHARSET from ENTRIES for
243 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
244 encoding (CONTROL_FLAG == 2).
246 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
247 and CHARSET->fast_map.
249 If CONTROL_FLAG is 1, setup the following tables according to
250 CHARSET->method and inhibit_load_charset_map.
252 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
253 ----------------------+--------------------+---------------------------
254 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
258 If CONTROL_FLAG is 2, setup the following tables.
260 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
261 ----------------------+--------------------+---------------------------
262 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
263 ----------------------+--------------------+--------------------------
264 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
268 load_charset_map (charset
, entries
, n_entries
, control_flag
)
269 struct charset
*charset
;
270 struct charset_map_entries
*entries
;
274 Lisp_Object vec
, table
;
275 unsigned max_code
= CHARSET_MAX_CODE (charset
);
276 int ascii_compatible_p
= charset
->ascii_compatible_p
;
277 int min_char
, max_char
, nonascii_min_char
;
279 unsigned char *fast_map
= charset
->fast_map
;
286 if (! inhibit_load_charset_map
)
288 if (control_flag
== 1)
290 if (charset
->method
== CHARSET_METHOD_MAP
)
292 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
294 vec
= CHARSET_DECODER (charset
)
295 = Fmake_vector (make_number (n
), make_number (-1));
299 char_table_set_range (Vchar_unify_table
,
300 charset
->min_char
, charset
->max_char
,
306 table
= Fmake_char_table (Qnil
, Qnil
);
307 if (charset
->method
== CHARSET_METHOD_MAP
)
308 CHARSET_ENCODER (charset
) = table
;
310 CHARSET_DEUNIFIER (charset
) = table
;
315 if (! temp_charset_work
)
316 temp_charset_work
= malloc (sizeof (*temp_charset_work
));
317 if (control_flag
== 1)
319 memset (temp_charset_work
->table
.decoder
, -1,
320 sizeof (int) * 0x10000);
321 temp_charset_work
->for_encoder
= 0;
325 memset (temp_charset_work
->table
.encoder
, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work
->zero_index_char
= -1;
329 temp_charset_work
->current
= charset
;
330 temp_charset_work
->for_encoder
= (control_flag
== 2);
333 charset_map_loaded
= 1;
336 min_char
= max_char
= entries
->entry
[0].c
;
337 nonascii_min_char
= MAX_CHAR
;
338 for (i
= 0; i
< n_entries
; i
++)
341 int from_index
, to_index
;
343 int idx
= i
% 0x10000;
345 if (i
> 0 && idx
== 0)
346 entries
= entries
->next
;
347 from
= entries
->entry
[idx
].from
;
348 to
= entries
->entry
[idx
].to
;
349 from_c
= entries
->entry
[idx
].c
;
350 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
353 to_index
= from_index
;
358 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
359 to_c
= from_c
+ (to_index
- from_index
);
361 if (from_index
< 0 || to_index
< 0)
366 else if (from_c
< min_char
)
369 if (control_flag
== 1)
371 if (charset
->method
== CHARSET_METHOD_MAP
)
372 for (; from_index
<= to_index
; from_index
++, from_c
++)
373 ASET (vec
, from_index
, make_number (from_c
));
375 for (; from_index
<= to_index
; from_index
++, from_c
++)
376 CHAR_TABLE_SET (Vchar_unify_table
,
377 CHARSET_CODE_OFFSET (charset
) + from_index
,
378 make_number (from_c
));
380 else if (control_flag
== 2)
382 if (charset
->method
== CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset
))
384 for (; from_index
<= to_index
; from_index
++, from_c
++)
386 unsigned code
= INDEX_TO_CODE_POINT (charset
, from_index
);
388 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
389 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
392 for (; from_index
<= to_index
; from_index
++, from_c
++)
394 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
395 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
398 else if (control_flag
== 3)
399 for (; from_index
<= to_index
; from_index
++, from_c
++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
401 else if (control_flag
== 4)
402 for (; from_index
<= to_index
; from_index
++, from_c
++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
404 else /* control_flag == 0 */
406 if (ascii_compatible_p
)
408 if (! ASCII_BYTE_P (from_c
))
410 if (from_c
< nonascii_min_char
)
411 nonascii_min_char
= from_c
;
413 else if (! ASCII_BYTE_P (to_c
))
415 nonascii_min_char
= 0x80;
419 for (; from_c
<= to_c
; from_c
++)
420 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
424 if (control_flag
== 0)
426 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
427 ? nonascii_min_char
: min_char
);
428 CHARSET_MAX_CHAR (charset
) = max_char
;
430 else if (control_flag
== 4)
432 temp_charset_work
->min_char
= min_char
;
433 temp_charset_work
->max_char
= max_char
;
438 /* Read a hexadecimal number (preceded by "0x") from the file FP while
439 paying attention to comment charcter '#'. */
441 static INLINE
unsigned
449 while ((c
= getc (fp
)) != EOF
)
453 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
457 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
469 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
471 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
473 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
474 n
= (n
* 10) + c
- '0';
480 extern Lisp_Object Qfile_name_handler_alist
;
482 /* Return a mapping vector for CHARSET loaded from MAPFILE.
483 Each line of MAPFILE has this form
485 where 0xAAAA is a code-point and 0xCCCC is the corresponding
486 character code, or this form
488 where 0xAAAA and 0xBBBB are code-points specifying a range, and
489 0xCCCC is the first character code of the range.
491 The returned vector has this form:
492 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
493 where CODE1 is a code-point or a cons of code-points specifying a
496 Note that this function uses `openp' to open MAPFILE but ignores
497 `file-name-handler-alist' to avoid running any Lisp code. */
499 extern void add_to_log
P_ ((char *, Lisp_Object
, Lisp_Object
));
502 load_charset_map_from_file (charset
, mapfile
, control_flag
)
503 struct charset
*charset
;
507 unsigned min_code
= CHARSET_MIN_CODE (charset
);
508 unsigned max_code
= CHARSET_MAX_CODE (charset
);
512 Lisp_Object suffixes
;
513 struct charset_map_entries
*head
, *entries
;
515 int count
= SPECPDL_INDEX ();
517 suffixes
= Fcons (build_string (".map"),
518 Fcons (build_string (".TXT"), Qnil
));
520 specbind (Qfile_name_handler_alist
, Qnil
);
521 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
522 unbind_to (count
, Qnil
);
524 || ! (fp
= fdopen (fd
, "r")))
525 error ("Failure in loading charset map: %S", SDATA (mapfile
));
527 head
= entries
= ((struct charset_map_entries
*)
528 alloca (sizeof (struct charset_map_entries
)));
537 from
= read_hex (fp
, &eof
);
540 if (getc (fp
) == '-')
541 to
= read_hex (fp
, &eof
);
544 c
= (int) read_hex (fp
, &eof
);
546 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
549 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
551 entries
->next
= ((struct charset_map_entries
*)
552 alloca (sizeof (struct charset_map_entries
)));
553 entries
= entries
->next
;
555 idx
= n_entries
% 0x10000;
556 entries
->entry
[idx
].from
= from
;
557 entries
->entry
[idx
].to
= to
;
558 entries
->entry
[idx
].c
= c
;
564 load_charset_map (charset
, head
, n_entries
, control_flag
);
568 load_charset_map_from_vector (charset
, vec
, control_flag
)
569 struct charset
*charset
;
573 unsigned min_code
= CHARSET_MIN_CODE (charset
);
574 unsigned max_code
= CHARSET_MAX_CODE (charset
);
575 struct charset_map_entries
*head
, *entries
;
577 int len
= ASIZE (vec
);
582 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
586 head
= entries
= ((struct charset_map_entries
*)
587 alloca (sizeof (struct charset_map_entries
)));
589 for (i
= 0; i
< len
; i
+= 2)
591 Lisp_Object val
, val2
;
603 from
= XFASTINT (val
);
604 to
= XFASTINT (val2
);
609 from
= to
= XFASTINT (val
);
611 val
= AREF (vec
, i
+ 1);
615 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
618 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
620 entries
->next
= ((struct charset_map_entries
*)
621 alloca (sizeof (struct charset_map_entries
)));
622 entries
= entries
->next
;
624 idx
= n_entries
% 0x10000;
625 entries
->entry
[idx
].from
= from
;
626 entries
->entry
[idx
].to
= to
;
627 entries
->entry
[idx
].c
= c
;
631 load_charset_map (charset
, head
, n_entries
, control_flag
);
635 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
636 map it is (see the comment of load_charset_map for the detail). */
639 load_charset (charset
, control_flag
)
640 struct charset
*charset
;
645 if (inhibit_load_charset_map
647 && charset
== temp_charset_work
->current
648 && (control_flag
== 2 == temp_charset_work
->for_encoder
))
651 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
652 map
= CHARSET_MAP (charset
);
653 else if (CHARSET_UNIFIED_P (charset
))
654 map
= CHARSET_UNIFY_MAP (charset
);
656 load_charset_map_from_file (charset
, map
, control_flag
);
658 load_charset_map_from_vector (charset
, map
, control_flag
);
662 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
663 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
667 return (CHARSETP (object
) ? Qt
: Qnil
);
671 void map_charset_for_dump
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
672 Lisp_Object function
, Lisp_Object arg
,
673 unsigned from
, unsigned to
));
676 map_charset_for_dump (c_function
, function
, arg
, from
, to
)
677 void (*c_function
) (Lisp_Object
, Lisp_Object
);
678 Lisp_Object function
, arg
;
681 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
682 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
687 range
= Fcons (Qnil
, Qnil
);
690 c
= temp_charset_work
->min_char
;
691 stop
= (temp_charset_work
->max_char
< 0x20000
692 ? temp_charset_work
->max_char
: 0xFFFF);
696 int index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
698 if (index
>= from_idx
&& index
<= to_idx
)
700 if (NILP (XCAR (range
)))
701 XSETCAR (range
, make_number (c
));
703 else if (! NILP (XCAR (range
)))
705 XSETCDR (range
, make_number (c
- 1));
707 (*c_function
) (arg
, range
);
709 call2 (function
, range
, arg
);
710 XSETCAR (range
, Qnil
);
714 if (c
== temp_charset_work
->max_char
)
716 if (! NILP (XCAR (range
)))
718 XSETCDR (range
, make_number (c
));
720 (*c_function
) (arg
, range
);
722 call2 (function
, range
, arg
);
727 stop
= temp_charset_work
->max_char
;
735 map_charset_chars (c_function
, function
, arg
,
737 void (*c_function
) P_ ((Lisp_Object
, Lisp_Object
));
738 Lisp_Object function
, arg
;
739 struct charset
*charset
;
745 partial
= (from
> CHARSET_MIN_CODE (charset
)
746 || to
< CHARSET_MAX_CODE (charset
));
748 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
750 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
751 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
752 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
753 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
755 if (CHARSET_UNIFIED_P (charset
))
757 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
758 load_charset (charset
, 2);
759 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
760 map_char_table_for_charset (c_function
, function
,
761 CHARSET_DEUNIFIER (charset
), arg
,
762 partial
? charset
: NULL
, from
, to
);
764 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
767 range
= Fcons (make_number (from_c
), make_number (to_c
));
769 (*c_function
) (arg
, range
);
771 call2 (function
, range
, arg
);
773 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
775 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
776 load_charset (charset
, 2);
777 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
778 map_char_table_for_charset (c_function
, function
,
779 CHARSET_ENCODER (charset
), arg
,
780 partial
? charset
: NULL
, from
, to
);
782 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
784 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
786 Lisp_Object subset_info
;
789 subset_info
= CHARSET_SUBSET (charset
);
790 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
791 offset
= XINT (AREF (subset_info
, 3));
793 if (from
< XFASTINT (AREF (subset_info
, 1)))
794 from
= XFASTINT (AREF (subset_info
, 1));
796 if (to
> XFASTINT (AREF (subset_info
, 2)))
797 to
= XFASTINT (AREF (subset_info
, 2));
798 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
800 else /* i.e. CHARSET_METHOD_SUPERSET */
804 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
805 parents
= XCDR (parents
))
808 unsigned this_from
, this_to
;
810 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
811 offset
= XINT (XCDR (XCAR (parents
)));
812 this_from
= from
> offset
? from
- offset
: 0;
813 this_to
= to
> offset
? to
- offset
: 0;
814 if (this_from
< CHARSET_MIN_CODE (charset
))
815 this_from
= CHARSET_MIN_CODE (charset
);
816 if (this_to
> CHARSET_MAX_CODE (charset
))
817 this_to
= CHARSET_MAX_CODE (charset
);
818 map_charset_chars (c_function
, function
, arg
, charset
,
824 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
825 doc
: /* Call FUNCTION for all characters in CHARSET.
826 FUNCTION is called with an argument RANGE and the optional 3rd
829 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
830 characters contained in CHARSET.
832 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
833 range of code points of target characters. */)
834 (function
, charset
, arg
, from_code
, to_code
)
835 Lisp_Object function
, charset
, arg
, from_code
, to_code
;
840 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
841 if (NILP (from_code
))
842 from
= CHARSET_MIN_CODE (cs
);
845 CHECK_NATNUM (from_code
);
846 from
= XINT (from_code
);
847 if (from
< CHARSET_MIN_CODE (cs
))
848 from
= CHARSET_MIN_CODE (cs
);
851 to
= CHARSET_MAX_CODE (cs
);
854 CHECK_NATNUM (to_code
);
856 if (to
> CHARSET_MAX_CODE (cs
))
857 to
= CHARSET_MAX_CODE (cs
);
859 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
864 /* Define a charset according to the arguments. The Nth argument is
865 the Nth attribute of the charset (the last attribute `charset-id'
866 is not included). See the docstring of `define-charset' for the
869 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
870 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
871 doc
: /* For internal use only.
872 usage: (define-charset-internal ...) */)
877 /* Charset attr vector. */
881 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
883 struct charset charset
;
886 int new_definition_p
;
889 if (nargs
!= charset_arg_max
)
890 return Fsignal (Qwrong_number_of_arguments
,
891 Fcons (intern ("define-charset-internal"),
892 make_number (nargs
)));
894 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
896 CHECK_SYMBOL (args
[charset_arg_name
]);
897 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
899 val
= args
[charset_arg_code_space
];
900 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
902 int min_byte
, max_byte
;
904 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
905 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
906 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
907 error ("Invalid :code-space value");
908 charset
.code_space
[i
* 4] = min_byte
;
909 charset
.code_space
[i
* 4 + 1] = max_byte
;
910 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
911 nchars
*= charset
.code_space
[i
* 4 + 2];
912 charset
.code_space
[i
* 4 + 3] = nchars
;
917 val
= args
[charset_arg_dimension
];
919 charset
.dimension
= dimension
;
923 charset
.dimension
= XINT (val
);
924 if (charset
.dimension
< 1 || charset
.dimension
> 4)
925 args_out_of_range_3 (val
, make_number (1), make_number (4));
928 charset
.code_linear_p
929 = (charset
.dimension
== 1
930 || (charset
.code_space
[2] == 256
931 && (charset
.dimension
== 2
932 || (charset
.code_space
[6] == 256
933 && (charset
.dimension
== 3
934 || charset
.code_space
[10] == 256)))));
936 if (! charset
.code_linear_p
)
938 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
939 bzero (charset
.code_space_mask
, 256);
940 for (i
= 0; i
< 4; i
++)
941 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
943 charset
.code_space_mask
[j
] |= (1 << i
);
946 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
948 charset
.min_code
= (charset
.code_space
[0]
949 | (charset
.code_space
[4] << 8)
950 | (charset
.code_space
[8] << 16)
951 | (charset
.code_space
[12] << 24));
952 charset
.max_code
= (charset
.code_space
[1]
953 | (charset
.code_space
[5] << 8)
954 | (charset
.code_space
[9] << 16)
955 | (charset
.code_space
[13] << 24));
956 charset
.char_index_offset
= 0;
958 val
= args
[charset_arg_min_code
];
968 CHECK_NUMBER_CAR (val
);
969 CHECK_NUMBER_CDR (val
);
970 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
972 if (code
< charset
.min_code
973 || code
> charset
.max_code
)
974 args_out_of_range_3 (make_number (charset
.min_code
),
975 make_number (charset
.max_code
), val
);
976 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
977 charset
.min_code
= code
;
980 val
= args
[charset_arg_max_code
];
990 CHECK_NUMBER_CAR (val
);
991 CHECK_NUMBER_CDR (val
);
992 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
994 if (code
< charset
.min_code
995 || code
> charset
.max_code
)
996 args_out_of_range_3 (make_number (charset
.min_code
),
997 make_number (charset
.max_code
), val
);
998 charset
.max_code
= code
;
1001 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
1003 val
= args
[charset_arg_invalid_code
];
1006 if (charset
.min_code
> 0)
1007 charset
.invalid_code
= 0;
1010 XSETINT (val
, charset
.max_code
+ 1);
1011 if (XINT (val
) == charset
.max_code
+ 1)
1012 charset
.invalid_code
= charset
.max_code
+ 1;
1014 error ("Attribute :invalid-code must be specified");
1020 charset
.invalid_code
= XFASTINT (val
);
1023 val
= args
[charset_arg_iso_final
];
1025 charset
.iso_final
= -1;
1029 if (XINT (val
) < '0' || XINT (val
) > 127)
1030 error ("Invalid iso-final-char: %d", XINT (val
));
1031 charset
.iso_final
= XINT (val
);
1034 val
= args
[charset_arg_iso_revision
];
1036 charset
.iso_revision
= -1;
1040 if (XINT (val
) > 63)
1041 args_out_of_range (make_number (63), val
);
1042 charset
.iso_revision
= XINT (val
);
1045 val
= args
[charset_arg_emacs_mule_id
];
1047 charset
.emacs_mule_id
= -1;
1051 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
1052 error ("Invalid emacs-mule-id: %d", XINT (val
));
1053 charset
.emacs_mule_id
= XINT (val
);
1056 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1058 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1060 charset
.unified_p
= 0;
1062 bzero (charset
.fast_map
, sizeof (charset
.fast_map
));
1064 if (! NILP (args
[charset_arg_code_offset
]))
1066 val
= args
[charset_arg_code_offset
];
1069 charset
.method
= CHARSET_METHOD_OFFSET
;
1070 charset
.code_offset
= XINT (val
);
1072 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1073 charset
.min_char
= i
+ charset
.code_offset
;
1074 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1075 charset
.max_char
= i
+ charset
.code_offset
;
1076 if (charset
.max_char
> MAX_CHAR
)
1077 error ("Unsupported max char: %d", charset
.max_char
);
1079 i
= (charset
.min_char
>> 7) << 7;
1080 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1081 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1082 i
= (i
>> 12) << 12;
1083 for (; i
<= charset
.max_char
; i
+= 0x1000)
1084 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1086 else if (! NILP (args
[charset_arg_map
]))
1088 val
= args
[charset_arg_map
];
1089 ASET (attrs
, charset_map
, val
);
1090 charset
.method
= CHARSET_METHOD_MAP
;
1092 else if (! NILP (args
[charset_arg_subset
]))
1095 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1096 struct charset
*parent_charset
;
1098 val
= args
[charset_arg_subset
];
1099 parent
= Fcar (val
);
1100 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1101 parent_min_code
= Fnth (make_number (1), val
);
1102 CHECK_NATNUM (parent_min_code
);
1103 parent_max_code
= Fnth (make_number (2), val
);
1104 CHECK_NATNUM (parent_max_code
);
1105 parent_code_offset
= Fnth (make_number (3), val
);
1106 CHECK_NUMBER (parent_code_offset
);
1107 val
= Fmake_vector (make_number (4), Qnil
);
1108 ASET (val
, 0, make_number (parent_charset
->id
));
1109 ASET (val
, 1, parent_min_code
);
1110 ASET (val
, 2, parent_max_code
);
1111 ASET (val
, 3, parent_code_offset
);
1112 ASET (attrs
, charset_subset
, val
);
1114 charset
.method
= CHARSET_METHOD_SUBSET
;
1115 /* Here, we just copy the parent's fast_map. It's not accurate,
1116 but at least it works for quickly detecting which character
1117 DOESN'T belong to this charset. */
1118 for (i
= 0; i
< 190; i
++)
1119 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1121 /* We also copy these for parents. */
1122 charset
.min_char
= parent_charset
->min_char
;
1123 charset
.max_char
= parent_charset
->max_char
;
1125 else if (! NILP (args
[charset_arg_superset
]))
1127 val
= args
[charset_arg_superset
];
1128 charset
.method
= CHARSET_METHOD_SUPERSET
;
1129 val
= Fcopy_sequence (val
);
1130 ASET (attrs
, charset_superset
, val
);
1132 charset
.min_char
= MAX_CHAR
;
1133 charset
.max_char
= 0;
1134 for (; ! NILP (val
); val
= Fcdr (val
))
1136 Lisp_Object elt
, car_part
, cdr_part
;
1137 int this_id
, offset
;
1138 struct charset
*this_charset
;
1143 car_part
= XCAR (elt
);
1144 cdr_part
= XCDR (elt
);
1145 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1146 CHECK_NUMBER (cdr_part
);
1147 offset
= XINT (cdr_part
);
1151 CHECK_CHARSET_GET_ID (elt
, this_id
);
1154 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1156 this_charset
= CHARSET_FROM_ID (this_id
);
1157 if (charset
.min_char
> this_charset
->min_char
)
1158 charset
.min_char
= this_charset
->min_char
;
1159 if (charset
.max_char
< this_charset
->max_char
)
1160 charset
.max_char
= this_charset
->max_char
;
1161 for (i
= 0; i
< 190; i
++)
1162 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1166 error ("None of :code-offset, :map, :parents are specified");
1168 val
= args
[charset_arg_unify_map
];
1169 if (! NILP (val
) && !STRINGP (val
))
1171 ASET (attrs
, charset_unify_map
, val
);
1173 CHECK_LIST (args
[charset_arg_plist
]);
1174 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1176 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1178 if (charset
.hash_index
>= 0)
1180 new_definition_p
= 0;
1181 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1182 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1186 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1188 if (charset_table_used
== charset_table_size
)
1190 struct charset
*new_table
1191 = (struct charset
*) xmalloc (sizeof (struct charset
)
1192 * (charset_table_size
+ 16));
1193 bcopy (charset_table
, new_table
,
1194 sizeof (struct charset
) * charset_table_size
);
1195 charset_table_size
+= 16;
1196 charset_table
= new_table
;
1198 id
= charset_table_used
++;
1199 new_definition_p
= 1;
1202 ASET (attrs
, charset_id
, make_number (id
));
1204 charset_table
[id
] = charset
;
1206 if (charset
.method
== CHARSET_METHOD_MAP
)
1208 load_charset (&charset
, 0);
1209 charset_table
[id
] = charset
;
1212 if (charset
.iso_final
>= 0)
1214 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1215 charset
.iso_final
) = id
;
1216 if (new_definition_p
)
1217 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1218 Fcons (make_number (id
), Qnil
));
1219 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1220 charset_jisx0201_roman
= id
;
1221 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1222 charset_jisx0208_1978
= id
;
1223 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1224 charset_jisx0208
= id
;
1227 if (charset
.emacs_mule_id
>= 0)
1229 emacs_mule_charset
[charset
.emacs_mule_id
] = CHARSET_FROM_ID (id
);
1230 if (charset
.emacs_mule_id
< 0xA0)
1231 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1233 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1234 if (new_definition_p
)
1235 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1236 Fcons (make_number (id
), Qnil
));
1239 if (new_definition_p
)
1241 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1242 if (charset
.supplementary_p
)
1243 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1244 Fcons (make_number (id
), Qnil
));
1249 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1251 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1253 if (cs
->supplementary_p
)
1256 if (EQ (tail
, Vcharset_ordered_list
))
1257 Vcharset_ordered_list
= Fcons (make_number (id
),
1258 Vcharset_ordered_list
);
1259 else if (NILP (tail
))
1260 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1261 Fcons (make_number (id
), Qnil
));
1264 val
= Fcons (XCAR (tail
), XCDR (tail
));
1265 XSETCDR (tail
, val
);
1266 XSETCAR (tail
, make_number (id
));
1269 charset_ordered_list_tick
++;
1276 /* Same as Fdefine_charset_internal but arguments are more convenient
1277 to call from C (typically in syms_of_charset). This can define a
1278 charset of `offset' method only. Return the ID of the new
1282 define_charset_internal (name
, dimension
, code_space
, min_code
, max_code
,
1283 iso_final
, iso_revision
, emacs_mule_id
,
1284 ascii_compatible
, supplementary
,
1288 unsigned char *code_space
;
1289 unsigned min_code
, max_code
;
1290 int iso_final
, iso_revision
, emacs_mule_id
;
1291 int ascii_compatible
, supplementary
;
1294 Lisp_Object args
[charset_arg_max
];
1295 Lisp_Object plist
[14];
1299 args
[charset_arg_name
] = name
;
1300 args
[charset_arg_dimension
] = make_number (dimension
);
1301 val
= Fmake_vector (make_number (8), make_number (0));
1302 for (i
= 0; i
< 8; i
++)
1303 ASET (val
, i
, make_number (code_space
[i
]));
1304 args
[charset_arg_code_space
] = val
;
1305 args
[charset_arg_min_code
] = make_number (min_code
);
1306 args
[charset_arg_max_code
] = make_number (max_code
);
1307 args
[charset_arg_iso_final
]
1308 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1309 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1310 args
[charset_arg_emacs_mule_id
]
1311 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1312 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1313 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1314 args
[charset_arg_invalid_code
] = Qnil
;
1315 args
[charset_arg_code_offset
] = make_number (code_offset
);
1316 args
[charset_arg_map
] = Qnil
;
1317 args
[charset_arg_subset
] = Qnil
;
1318 args
[charset_arg_superset
] = Qnil
;
1319 args
[charset_arg_unify_map
] = Qnil
;
1321 plist
[0] = intern (":name");
1322 plist
[1] = args
[charset_arg_name
];
1323 plist
[2] = intern (":dimension");
1324 plist
[3] = args
[charset_arg_dimension
];
1325 plist
[4] = intern (":code-space");
1326 plist
[5] = args
[charset_arg_code_space
];
1327 plist
[6] = intern (":iso-final-char");
1328 plist
[7] = args
[charset_arg_iso_final
];
1329 plist
[8] = intern (":emacs-mule-id");
1330 plist
[9] = args
[charset_arg_emacs_mule_id
];
1331 plist
[10] = intern (":ascii-compatible-p");
1332 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1333 plist
[12] = intern (":code-offset");
1334 plist
[13] = args
[charset_arg_code_offset
];
1336 args
[charset_arg_plist
] = Flist (14, plist
);
1337 Fdefine_charset_internal (charset_arg_max
, args
);
1339 return XINT (CHARSET_SYMBOL_ID (name
));
1343 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1344 Sdefine_charset_alias
, 2, 2, 0,
1345 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1347 Lisp_Object alias
, charset
;
1351 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1352 Fputhash (alias
, attr
, Vcharset_hash_table
);
1353 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1358 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1359 doc
: /* Return the property list of CHARSET. */)
1361 Lisp_Object charset
;
1365 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1366 return CHARSET_ATTR_PLIST (attrs
);
1370 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1371 doc
: /* Set CHARSET's property list to PLIST. */)
1373 Lisp_Object charset
, plist
;
1377 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1378 CHARSET_ATTR_PLIST (attrs
) = plist
;
1383 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1384 doc
: /* Unify characters of CHARSET with Unicode.
1385 This means reading the relevant file and installing the table defined
1386 by CHARSET's `:unify-map' property.
1388 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1389 the same meaning as the `:unify-map' attribute in the function
1390 `define-charset' (which see).
1392 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1393 (charset
, unify_map
, deunify
)
1394 Lisp_Object charset
, unify_map
, deunify
;
1399 CHECK_CHARSET_GET_ID (charset
, id
);
1400 cs
= CHARSET_FROM_ID (id
);
1402 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1403 : ! CHARSET_UNIFIED_P (cs
))
1406 CHARSET_UNIFIED_P (cs
) = 0;
1409 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1410 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1411 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1412 if (NILP (unify_map
))
1413 unify_map
= CHARSET_UNIFY_MAP (cs
);
1416 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1417 signal_error ("Bad unify-map", unify_map
);
1418 CHARSET_UNIFY_MAP (cs
) = unify_map
;
1420 if (NILP (Vchar_unify_table
))
1421 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1422 char_table_set_range (Vchar_unify_table
,
1423 cs
->min_char
, cs
->max_char
, charset
);
1424 CHARSET_UNIFIED_P (cs
) = 1;
1426 else if (CHAR_TABLE_P (Vchar_unify_table
))
1428 int min_code
= CHARSET_MIN_CODE (cs
);
1429 int max_code
= CHARSET_MAX_CODE (cs
);
1430 int min_char
= DECODE_CHAR (cs
, min_code
);
1431 int max_char
= DECODE_CHAR (cs
, max_code
);
1433 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1439 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1440 Sget_unused_iso_final_char
, 2, 2, 0,
1442 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1443 DIMENSION is the number of bytes to represent a character: 1 or 2.
1444 CHARS is the number of characters in a dimension: 94 or 96.
1446 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1447 If there's no unused final char for the specified kind of charset,
1450 Lisp_Object dimension
, chars
;
1454 CHECK_NUMBER (dimension
);
1455 CHECK_NUMBER (chars
);
1456 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1457 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1458 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1459 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1460 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1461 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1463 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1467 check_iso_charset_parameter (dimension
, chars
, final_char
)
1468 Lisp_Object dimension
, chars
, final_char
;
1470 CHECK_NATNUM (dimension
);
1471 CHECK_NATNUM (chars
);
1472 CHECK_NATNUM (final_char
);
1474 if (XINT (dimension
) > 3)
1475 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1476 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1477 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1478 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1479 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1483 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1485 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1487 On decoding by an ISO-2022 base coding system, when a charset
1488 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1489 if CHARSET is designated instead. */)
1490 (dimension
, chars
, final_char
, charset
)
1491 Lisp_Object dimension
, chars
, final_char
, charset
;
1496 CHECK_CHARSET_GET_ID (charset
, id
);
1497 check_iso_charset_parameter (dimension
, chars
, final_char
);
1498 chars_flag
= XINT (chars
) == 96;
1499 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1504 /* Return information about charsets in the text at PTR of NBYTES
1505 bytes, which are NCHARS characters. The value is:
1507 0: Each character is represented by one byte. This is always
1508 true for a unibyte string. For a multibyte string, true if
1509 it contains only ASCII characters.
1511 1: No charsets other than ascii, control-1, and latin-1 are
1518 string_xstring_p (string
)
1521 const unsigned char *p
= SDATA (string
);
1522 const unsigned char *endp
= p
+ SBYTES (string
);
1524 if (SCHARS (string
) == SBYTES (string
))
1529 int c
= STRING_CHAR_ADVANCE (p
);
1538 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1540 CHARSETS is a vector. If Nth element is non-nil, it means the
1541 charset whose id is N is already found.
1543 It may lookup a translation table TABLE if supplied. */
1546 find_charsets_in_text (ptr
, nchars
, nbytes
, charsets
, table
, multibyte
)
1547 const unsigned char *ptr
;
1548 EMACS_INT nchars
, nbytes
;
1549 Lisp_Object charsets
, table
;
1552 const unsigned char *pend
= ptr
+ nbytes
;
1554 if (nchars
== nbytes
)
1557 ASET (charsets
, charset_ascii
, Qt
);
1564 c
= translate_char (table
, c
);
1565 if (ASCII_BYTE_P (c
))
1566 ASET (charsets
, charset_ascii
, Qt
);
1568 ASET (charsets
, charset_eight_bit
, Qt
);
1575 int c
= STRING_CHAR_ADVANCE (ptr
);
1576 struct charset
*charset
;
1579 c
= translate_char (table
, c
);
1580 charset
= CHAR_CHARSET (c
);
1581 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1586 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1588 doc
: /* Return a list of charsets in the region between BEG and END.
1589 BEG and END are buffer positions.
1590 Optional arg TABLE if non-nil is a translation table to look up.
1592 If the current buffer is unibyte, the returned list may contain
1593 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1595 Lisp_Object beg
, end
, table
;
1597 Lisp_Object charsets
;
1598 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1601 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1603 validate_region (&beg
, &end
);
1604 from
= XFASTINT (beg
);
1605 stop
= to
= XFASTINT (end
);
1607 if (from
< GPT
&& GPT
< to
)
1610 stop_byte
= GPT_BYTE
;
1613 stop_byte
= CHAR_TO_BYTE (stop
);
1615 from_byte
= CHAR_TO_BYTE (from
);
1617 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1620 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1621 stop_byte
- from_byte
, charsets
, table
,
1625 from
= stop
, from_byte
= stop_byte
;
1626 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1633 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1634 if (!NILP (AREF (charsets
, i
)))
1635 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1639 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1641 doc
: /* Return a list of charsets in STR.
1642 Optional arg TABLE if non-nil is a translation table to look up.
1644 If STR is unibyte, the returned list may contain
1645 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1647 Lisp_Object str
, table
;
1649 Lisp_Object charsets
;
1655 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1656 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1658 STRING_MULTIBYTE (str
));
1660 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1661 if (!NILP (AREF (charsets
, i
)))
1662 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1668 /* Return a unified character code for C (>= 0x110000). VAL is a
1669 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1672 maybe_unify_char (c
, val
)
1676 struct charset
*charset
;
1683 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1684 load_charset (charset
, 1);
1685 if (! inhibit_load_charset_map
)
1687 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1693 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1694 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1703 /* Return a character correponding to the code-point CODE of
1707 decode_char (charset
, code
)
1708 struct charset
*charset
;
1712 enum charset_method method
= CHARSET_METHOD (charset
);
1714 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1717 if (method
== CHARSET_METHOD_SUBSET
)
1719 Lisp_Object subset_info
;
1721 subset_info
= CHARSET_SUBSET (charset
);
1722 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1723 code
-= XINT (AREF (subset_info
, 3));
1724 if (code
< XFASTINT (AREF (subset_info
, 1))
1725 || code
> XFASTINT (AREF (subset_info
, 2)))
1728 c
= DECODE_CHAR (charset
, code
);
1730 else if (method
== CHARSET_METHOD_SUPERSET
)
1732 Lisp_Object parents
;
1734 parents
= CHARSET_SUPERSET (charset
);
1736 for (; CONSP (parents
); parents
= XCDR (parents
))
1738 int id
= XINT (XCAR (XCAR (parents
)));
1739 int code_offset
= XINT (XCDR (XCAR (parents
)));
1740 unsigned this_code
= code
- code_offset
;
1742 charset
= CHARSET_FROM_ID (id
);
1743 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1749 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1753 if (method
== CHARSET_METHOD_MAP
)
1755 Lisp_Object decoder
;
1757 decoder
= CHARSET_DECODER (charset
);
1758 if (! VECTORP (decoder
))
1760 load_charset (charset
, 1);
1761 decoder
= CHARSET_DECODER (charset
);
1763 if (VECTORP (decoder
))
1764 c
= XINT (AREF (decoder
, char_index
));
1766 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1768 else /* method == CHARSET_METHOD_OFFSET */
1770 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1771 if (CHARSET_UNIFIED_P (charset
)
1772 && c
> MAX_UNICODE_CHAR
)
1773 MAYBE_UNIFY_CHAR (c
);
1780 /* Variable used temporarily by the macro ENCODE_CHAR. */
1781 Lisp_Object charset_work
;
1783 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1784 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1785 use CHARSET's strict_max_char instead of max_char. */
1788 encode_char (charset
, c
)
1789 struct charset
*charset
;
1793 enum charset_method method
= CHARSET_METHOD (charset
);
1795 if (CHARSET_UNIFIED_P (charset
))
1797 Lisp_Object deunifier
, deunified
;
1798 int code_index
= -1;
1800 deunifier
= CHARSET_DEUNIFIER (charset
);
1801 if (! CHAR_TABLE_P (deunifier
))
1803 load_charset (charset
, 2);
1804 deunifier
= CHARSET_DEUNIFIER (charset
);
1806 if (CHAR_TABLE_P (deunifier
))
1808 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1810 if (INTEGERP (deunified
))
1811 code_index
= XINT (deunified
);
1815 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1817 if (code_index
>= 0)
1818 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1821 if (method
== CHARSET_METHOD_SUBSET
)
1823 Lisp_Object subset_info
;
1824 struct charset
*this_charset
;
1826 subset_info
= CHARSET_SUBSET (charset
);
1827 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1828 code
= ENCODE_CHAR (this_charset
, c
);
1829 if (code
== CHARSET_INVALID_CODE (this_charset
)
1830 || code
< XFASTINT (AREF (subset_info
, 1))
1831 || code
> XFASTINT (AREF (subset_info
, 2)))
1832 return CHARSET_INVALID_CODE (charset
);
1833 code
+= XINT (AREF (subset_info
, 3));
1837 if (method
== CHARSET_METHOD_SUPERSET
)
1839 Lisp_Object parents
;
1841 parents
= CHARSET_SUPERSET (charset
);
1842 for (; CONSP (parents
); parents
= XCDR (parents
))
1844 int id
= XINT (XCAR (XCAR (parents
)));
1845 int code_offset
= XINT (XCDR (XCAR (parents
)));
1846 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1848 code
= ENCODE_CHAR (this_charset
, c
);
1849 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1850 return code
+ code_offset
;
1852 return CHARSET_INVALID_CODE (charset
);
1855 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1856 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1857 return CHARSET_INVALID_CODE (charset
);
1859 if (method
== CHARSET_METHOD_MAP
)
1861 Lisp_Object encoder
;
1864 encoder
= CHARSET_ENCODER (charset
);
1865 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1867 load_charset (charset
, 2);
1868 encoder
= CHARSET_ENCODER (charset
);
1870 if (CHAR_TABLE_P (encoder
))
1872 val
= CHAR_TABLE_REF (encoder
, c
);
1874 return CHARSET_INVALID_CODE (charset
);
1876 if (! CHARSET_COMPACT_CODES_P (charset
))
1877 code
= INDEX_TO_CODE_POINT (charset
, code
);
1881 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1882 code
= INDEX_TO_CODE_POINT (charset
, code
);
1885 else /* method == CHARSET_METHOD_OFFSET */
1887 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1889 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1896 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1897 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1898 Return nil if CODE-POINT is not valid in CHARSET.
1900 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1902 Optional argument RESTRICTION specifies a way to map the pair of CCS
1903 and CODE-POINT to a character. Currently not supported and just ignored. */)
1904 (charset
, code_point
, restriction
)
1905 Lisp_Object charset
, code_point
, restriction
;
1909 struct charset
*charsetp
;
1911 CHECK_CHARSET_GET_ID (charset
, id
);
1912 if (CONSP (code_point
))
1914 CHECK_NATNUM_CAR (code_point
);
1915 CHECK_NATNUM_CDR (code_point
);
1916 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1920 CHECK_NATNUM (code_point
);
1921 code
= XINT (code_point
);
1923 charsetp
= CHARSET_FROM_ID (id
);
1924 c
= DECODE_CHAR (charsetp
, code
);
1925 return (c
>= 0 ? make_number (c
) : Qnil
);
1929 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1930 doc
: /* Encode the character CH into a code-point of CHARSET.
1931 Return nil if CHARSET doesn't include CH.
1933 Optional argument RESTRICTION specifies a way to map CH to a
1934 code-point in CCS. Currently not supported and just ignored. */)
1935 (ch
, charset
, restriction
)
1936 Lisp_Object ch
, charset
, restriction
;
1940 struct charset
*charsetp
;
1942 CHECK_CHARSET_GET_ID (charset
, id
);
1944 charsetp
= CHARSET_FROM_ID (id
);
1945 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1946 if (code
== CHARSET_INVALID_CODE (charsetp
))
1948 if (code
> 0x7FFFFFF)
1949 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1950 return make_number (code
);
1954 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1956 /* Return a character of CHARSET whose position codes are CODEn.
1958 CODE1 through CODE4 are optional, but if you don't supply sufficient
1959 position codes, it is assumed that the minimum code in each dimension
1961 (charset
, code1
, code2
, code3
, code4
)
1962 Lisp_Object charset
, code1
, code2
, code3
, code4
;
1965 struct charset
*charsetp
;
1969 CHECK_CHARSET_GET_ID (charset
, id
);
1970 charsetp
= CHARSET_FROM_ID (id
);
1972 dimension
= CHARSET_DIMENSION (charsetp
);
1974 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1975 ? 0 : CHARSET_MIN_CODE (charsetp
));
1978 CHECK_NATNUM (code1
);
1979 if (XFASTINT (code1
) >= 0x100)
1980 args_out_of_range (make_number (0xFF), code1
);
1981 code
= XFASTINT (code1
);
1987 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1990 CHECK_NATNUM (code2
);
1991 if (XFASTINT (code2
) >= 0x100)
1992 args_out_of_range (make_number (0xFF), code2
);
1993 code
|= XFASTINT (code2
);
2000 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
2003 CHECK_NATNUM (code3
);
2004 if (XFASTINT (code3
) >= 0x100)
2005 args_out_of_range (make_number (0xFF), code3
);
2006 code
|= XFASTINT (code3
);
2013 code
|= charsetp
->code_space
[0];
2016 CHECK_NATNUM (code4
);
2017 if (XFASTINT (code4
) >= 0x100)
2018 args_out_of_range (make_number (0xFF), code4
);
2019 code
|= XFASTINT (code4
);
2026 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
2028 c
= DECODE_CHAR (charsetp
, code
);
2030 error ("Invalid code(s)");
2031 return make_number (c
);
2035 /* Return the first charset in CHARSET_LIST that contains C.
2036 CHARSET_LIST is a list of charset IDs. If it is nil, use
2037 Vcharset_ordered_list. */
2040 char_charset (c
, charset_list
, code_return
)
2042 Lisp_Object charset_list
;
2043 unsigned *code_return
;
2047 if (NILP (charset_list
))
2048 charset_list
= Vcharset_ordered_list
;
2052 while (CONSP (charset_list
))
2054 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
2055 unsigned code
= ENCODE_CHAR (charset
, c
);
2057 if (code
!= CHARSET_INVALID_CODE (charset
))
2060 *code_return
= code
;
2063 charset_list
= XCDR (charset_list
);
2064 if (c
<= MAX_UNICODE_CHAR
2065 && EQ (charset_list
, Vcharset_non_preferred_head
))
2066 return CHARSET_FROM_ID (charset_unicode
);
2068 return (maybe_null
? NULL
2069 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2070 : CHARSET_FROM_ID (charset_eight_bit
));
2074 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2076 /*Return list of charset and one to four position-codes of CH.
2077 The charset is decided by the current priority order of charsets.
2078 A position-code is a byte value of each dimension of the code-point of
2079 CH in the charset. */)
2083 struct charset
*charset
;
2088 CHECK_CHARACTER (ch
);
2090 charset
= CHAR_CHARSET (c
);
2093 code
= ENCODE_CHAR (charset
, c
);
2094 if (code
== CHARSET_INVALID_CODE (charset
))
2096 dimension
= CHARSET_DIMENSION (charset
);
2097 for (val
= Qnil
; dimension
> 0; dimension
--)
2099 val
= Fcons (make_number (code
& 0xFF), val
);
2102 return Fcons (CHARSET_NAME (charset
), val
);
2106 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2107 doc
: /* Return the charset of highest priority that contains CH.
2108 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2109 from which to find the charset. It may also be a coding system. In
2110 that case, find the charset from what supported by that coding system. */)
2112 Lisp_Object ch
, restriction
;
2114 struct charset
*charset
;
2116 CHECK_CHARACTER (ch
);
2117 if (NILP (restriction
))
2118 charset
= CHAR_CHARSET (XINT (ch
));
2121 Lisp_Object charset_list
;
2123 if (CONSP (restriction
))
2125 for (charset_list
= Qnil
; CONSP (restriction
);
2126 restriction
= XCDR (restriction
))
2130 CHECK_CHARSET_GET_ID (XCAR (restriction
), id
);
2131 charset_list
= Fcons (make_number (id
), charset_list
);
2133 charset_list
= Fnreverse (charset_list
);
2136 charset_list
= coding_system_charset_list (restriction
);
2137 charset
= char_charset (XINT (ch
), charset_list
, NULL
);
2141 return (CHARSET_NAME (charset
));
2145 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2147 Return charset of a character in the current buffer at position POS.
2148 If POS is nil, it defauls to the current point.
2149 If POS is out of range, the value is nil. */)
2154 struct charset
*charset
;
2156 ch
= Fchar_after (pos
);
2157 if (! INTEGERP (ch
))
2159 charset
= CHAR_CHARSET (XINT (ch
));
2160 return (CHARSET_NAME (charset
));
2164 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2166 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2168 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2169 by their DIMENSION, CHARS, and FINAL-CHAR,
2170 whereas Emacs distinguishes them by charset symbol.
2171 See the documentation of the function `charset-info' for the meanings of
2172 DIMENSION, CHARS, and FINAL-CHAR. */)
2173 (dimension
, chars
, final_char
)
2174 Lisp_Object dimension
, chars
, final_char
;
2179 check_iso_charset_parameter (dimension
, chars
, final_char
);
2180 chars_flag
= XFASTINT (chars
) == 96;
2181 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
2182 XFASTINT (final_char
));
2183 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2187 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2191 Clear temporary charset mapping tables.
2192 It should be called only from temacs invoked for dumping. */)
2196 struct charset
*charset
;
2199 if (temp_charset_work
)
2201 free (temp_charset_work
);
2202 temp_charset_work
= NULL
;
2205 if (CHAR_TABLE_P (Vchar_unify_table
))
2206 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2211 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2212 Scharset_priority_list
, 0, 1, 0,
2213 doc
: /* Return the list of charsets ordered by priority.
2214 HIGHESTP non-nil means just return the highest priority one. */)
2216 Lisp_Object highestp
;
2218 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2220 if (!NILP (highestp
))
2221 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2223 while (!NILP (list
))
2225 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2228 return Fnreverse (val
);
2231 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2233 doc
: /* Assign higher priority to the charsets given as arguments.
2234 usage: (set-charset-priority &rest charsets) */)
2239 Lisp_Object new_head
, old_list
, arglist
[2];
2240 Lisp_Object list_2022
, list_emacs_mule
;
2243 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2245 for (i
= 0; i
< nargs
; i
++)
2247 CHECK_CHARSET_GET_ID (args
[i
], id
);
2248 if (! NILP (Fmemq (make_number (id
), old_list
)))
2250 old_list
= Fdelq (make_number (id
), old_list
);
2251 new_head
= Fcons (make_number (id
), new_head
);
2254 arglist
[0] = Fnreverse (new_head
);
2255 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2256 Vcharset_ordered_list
= Fnconc (2, arglist
);
2257 charset_ordered_list_tick
++;
2259 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2260 CONSP (old_list
); old_list
= XCDR (old_list
))
2262 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2263 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2264 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2265 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2267 Viso_2022_charset_list
= Fnreverse (list_2022
);
2268 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2273 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2275 doc
: /* Internal use only.
2276 Return charset identification number of CHARSET. */)
2278 Lisp_Object charset
;
2282 CHECK_CHARSET_GET_ID (charset
, id
);
2283 return make_number (id
);
2290 Lisp_Object tempdir
;
2291 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2292 if (access (SDATA (tempdir
), 0) < 0)
2294 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2295 Emacs will not function correctly without the character map files.\n\
2296 Please check your installation!\n",
2298 /* TODO should this be a fatal error? (Bug#909) */
2301 Vcharset_map_path
= Fcons (tempdir
, Qnil
);
2306 init_charset_once ()
2310 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2311 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2312 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2313 iso_charset_table
[i
][j
][k
] = -1;
2315 for (i
= 0; i
< 256; i
++)
2316 emacs_mule_charset
[i
] = NULL
;
2318 charset_jisx0201_roman
= -1;
2319 charset_jisx0208_1978
= -1;
2320 charset_jisx0208
= -1;
2322 for (i
= 0; i
< 128; i
++)
2323 unibyte_to_multibyte_table
[i
] = i
;
2324 for (; i
< 256; i
++)
2325 unibyte_to_multibyte_table
[i
] = BYTE8_TO_CHAR (i
);
2333 DEFSYM (Qcharsetp
, "charsetp");
2335 DEFSYM (Qascii
, "ascii");
2336 DEFSYM (Qunicode
, "unicode");
2337 DEFSYM (Qemacs
, "emacs");
2338 DEFSYM (Qeight_bit
, "eight-bit");
2339 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2344 staticpro (&Vcharset_ordered_list
);
2345 Vcharset_ordered_list
= Qnil
;
2347 staticpro (&Viso_2022_charset_list
);
2348 Viso_2022_charset_list
= Qnil
;
2350 staticpro (&Vemacs_mule_charset_list
);
2351 Vemacs_mule_charset_list
= Qnil
;
2353 /* Don't staticpro them here. It's done in syms_of_fns. */
2354 QCtest
= intern (":test");
2355 Qeq
= intern ("eq");
2357 staticpro (&Vcharset_hash_table
);
2359 Lisp_Object args
[2];
2362 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2365 charset_table_size
= 128;
2366 charset_table
= ((struct charset
*)
2367 xmalloc (sizeof (struct charset
) * charset_table_size
));
2368 charset_table_used
= 0;
2370 defsubr (&Scharsetp
);
2371 defsubr (&Smap_charset_chars
);
2372 defsubr (&Sdefine_charset_internal
);
2373 defsubr (&Sdefine_charset_alias
);
2374 defsubr (&Scharset_plist
);
2375 defsubr (&Sset_charset_plist
);
2376 defsubr (&Sunify_charset
);
2377 defsubr (&Sget_unused_iso_final_char
);
2378 defsubr (&Sdeclare_equiv_charset
);
2379 defsubr (&Sfind_charset_region
);
2380 defsubr (&Sfind_charset_string
);
2381 defsubr (&Sdecode_char
);
2382 defsubr (&Sencode_char
);
2383 defsubr (&Ssplit_char
);
2384 defsubr (&Smake_char
);
2385 defsubr (&Schar_charset
);
2386 defsubr (&Scharset_after
);
2387 defsubr (&Siso_charset
);
2388 defsubr (&Sclear_charset_maps
);
2389 defsubr (&Scharset_priority_list
);
2390 defsubr (&Sset_charset_priority
);
2391 defsubr (&Scharset_id_internal
);
2393 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path
,
2394 doc
: /* *List of directories to search for charset map files. */);
2395 Vcharset_map_path
= Qnil
;
2397 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map
,
2398 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2399 inhibit_load_charset_map
= 0;
2401 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2402 doc
: /* List of all charsets ever defined. */);
2403 Vcharset_list
= Qnil
;
2405 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language
,
2406 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2407 If the current language environment is for multiple languages (e.g. "Latin-1"),
2408 the value may be a list of mnemonics. */);
2409 Vcurrent_iso639_language
= Qnil
;
2412 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2413 0, 127, 'B', -1, 0, 1, 0, 0);
2415 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2416 0, 255, -1, -1, -1, 1, 0, 0);
2418 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2419 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2421 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F",
2422 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2424 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2425 128, 255, -1, 0, -1, 0, 1,
2426 MAX_5_BYTE_CHAR
+ 1);
2431 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2432 (do not change this comment) */