1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
33 #include <sys/types.h>
35 #include "character.h"
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
56 /* List of all charsets. This variable is used only from Emacs
58 Lisp_Object Vcharset_list
;
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table
;
64 /* Table of struct charset. */
65 struct charset
*charset_table
;
67 static int charset_table_size
;
68 static int charset_table_used
;
70 Lisp_Object Qcharsetp
;
72 /* Special charset symbols. */
74 Lisp_Object Qeight_bit
;
75 Lisp_Object Qiso_8859_1
;
79 /* The corresponding charsets. */
81 int charset_eight_bit
;
82 int charset_iso_8859_1
;
86 /* The other special charsets. */
87 int charset_jisx0201_roman
;
88 int charset_jisx0208_1978
;
91 /* Value of charset attribute `charset-iso-plane'. */
94 /* Charset of unibyte characters. */
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list
;
100 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
102 Lisp_Object Vcharset_non_preferred_head
;
104 /* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
107 unsigned short charset_ordered_list_tick
;
109 /* List of iso-2022 charsets. */
110 Lisp_Object Viso_2022_charset_list
;
112 /* List of emacs-mule charsets. */
113 Lisp_Object Vemacs_mule_charset_list
;
115 struct charset
*emacs_mule_charset
[256];
117 /* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
121 Lisp_Object Vcharset_map_path
;
123 /* If nonzero, don't load charset maps. */
124 int inhibit_load_charset_map
;
126 Lisp_Object Vcurrent_iso639_language
;
128 /* Defined in chartab.c */
130 map_char_table_for_charset
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
131 Lisp_Object function
, Lisp_Object table
,
132 Lisp_Object arg
, struct charset
*charset
,
133 unsigned from
, unsigned to
));
135 #define CODE_POINT_TO_INDEX(charset, code) \
136 ((charset)->code_linear_p \
137 ? (code) - (charset)->min_code \
138 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
139 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
140 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
141 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
142 ? (((((code) >> 24) - (charset)->code_space[12]) \
143 * (charset)->code_space[11]) \
144 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
145 * (charset)->code_space[7]) \
146 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
147 * (charset)->code_space[3]) \
148 + (((code) & 0xFF) - (charset)->code_space[0]) \
149 - ((charset)->char_index_offset)) \
153 /* Convert the character index IDX to code-point CODE for CHARSET.
154 It is assumed that IDX is in a valid range. */
156 #define INDEX_TO_CODE_POINT(charset, idx) \
157 ((charset)->code_linear_p \
158 ? (idx) + (charset)->min_code \
159 : (idx += (charset)->char_index_offset, \
160 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
161 | (((charset)->code_space[4] \
162 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
164 | (((charset)->code_space[8] \
165 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
167 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
170 /* Structure to hold mapping tables for a charset. Used by temacs
171 invoked for dumping. */
175 /* The current charset for which the following tables are setup. */
176 struct charset
*current
;
178 /* 1 iff the following table is used for encoder. */
181 /* When the following table is used for encoding, mininum and
182 maxinum character of the current charset. */
183 int min_char
, max_char
;
185 /* A Unicode character correspoinding to the code indice 0 (i.e. the
186 minimum code-point) of the current charset, or -1 if the code
187 indice 0 is not a Unicode character. This is checked when
188 table.encoder[CHAR] is zero. */
192 /* Table mapping code-indices (not code-points) of the current
193 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
194 doesn't belong to the current charset. */
195 int decoder
[0x10000];
196 /* Table mapping Unicode characters to code-indices of the current
197 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
198 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
199 (0x20000..0x2FFFF). Note that there is no charset map that
200 uses both SMP and SIP. */
201 unsigned short encoder
[0x20000];
203 } *temp_charset_work
;
205 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
208 temp_charset_work->zero_index_char = (C); \
209 else if ((C) < 0x20000) \
210 temp_charset_work->table.encoder[(C)] = (CODE); \
212 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
215 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
216 ((C) == temp_charset_work->zero_index_char ? 0 \
217 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
218 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
219 : temp_charset_work->table.encoder[(C) - 0x10000] \
220 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
222 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
223 (temp_charset_work->table.decoder[(CODE)] = (C))
225 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
226 (temp_charset_work->table.decoder[(CODE)])
229 /* Set to 1 to warn that a charset map is loaded and thus a buffer
230 text and a string data may be relocated. */
231 int charset_map_loaded
;
233 struct charset_map_entries
239 struct charset_map_entries
*next
;
242 /* Load the mapping information of CHARSET from ENTRIES for
243 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
244 encoding (CONTROL_FLAG == 2).
246 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
247 and CHARSET->fast_map.
249 If CONTROL_FLAG is 1, setup the following tables according to
250 CHARSET->method and inhibit_load_charset_map.
252 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
253 ----------------------+--------------------+---------------------------
254 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
258 If CONTROL_FLAG is 2, setup the following tables.
260 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
261 ----------------------+--------------------+---------------------------
262 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
263 ----------------------+--------------------+--------------------------
264 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
268 load_charset_map (charset
, entries
, n_entries
, control_flag
)
269 struct charset
*charset
;
270 struct charset_map_entries
*entries
;
274 Lisp_Object vec
, table
;
275 unsigned max_code
= CHARSET_MAX_CODE (charset
);
276 int ascii_compatible_p
= charset
->ascii_compatible_p
;
277 int min_char
, max_char
, nonascii_min_char
;
279 unsigned char *fast_map
= charset
->fast_map
;
286 if (! inhibit_load_charset_map
)
288 if (control_flag
== 1)
290 if (charset
->method
== CHARSET_METHOD_MAP
)
292 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
294 vec
= CHARSET_DECODER (charset
)
295 = Fmake_vector (make_number (n
), make_number (-1));
299 char_table_set_range (Vchar_unify_table
,
300 charset
->min_char
, charset
->max_char
,
306 table
= Fmake_char_table (Qnil
, Qnil
);
307 if (charset
->method
== CHARSET_METHOD_MAP
)
308 CHARSET_ENCODER (charset
) = table
;
310 CHARSET_DEUNIFIER (charset
) = table
;
315 if (! temp_charset_work
)
316 temp_charset_work
= malloc (sizeof (*temp_charset_work
));
317 if (control_flag
== 1)
319 memset (temp_charset_work
->table
.decoder
, -1,
320 sizeof (int) * 0x10000);
321 temp_charset_work
->for_encoder
= 0;
325 memset (temp_charset_work
->table
.encoder
, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work
->zero_index_char
= -1;
329 temp_charset_work
->current
= charset
;
330 temp_charset_work
->for_encoder
= (control_flag
== 2);
333 charset_map_loaded
= 1;
336 min_char
= max_char
= entries
->entry
[0].c
;
337 nonascii_min_char
= MAX_CHAR
;
338 for (i
= 0; i
< n_entries
; i
++)
341 int from_index
, to_index
;
343 int idx
= i
% 0x10000;
345 if (i
> 0 && idx
== 0)
346 entries
= entries
->next
;
347 from
= entries
->entry
[idx
].from
;
348 to
= entries
->entry
[idx
].to
;
349 from_c
= entries
->entry
[idx
].c
;
350 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
353 to_index
= from_index
;
358 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
359 to_c
= from_c
+ (to_index
- from_index
);
361 if (from_index
< 0 || to_index
< 0)
366 else if (from_c
< min_char
)
369 if (control_flag
== 1)
371 if (charset
->method
== CHARSET_METHOD_MAP
)
372 for (; from_index
<= to_index
; from_index
++, from_c
++)
373 ASET (vec
, from_index
, make_number (from_c
));
375 for (; from_index
<= to_index
; from_index
++, from_c
++)
376 CHAR_TABLE_SET (Vchar_unify_table
,
377 CHARSET_CODE_OFFSET (charset
) + from_index
,
378 make_number (from_c
));
380 else if (control_flag
== 2)
382 if (charset
->method
== CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset
))
384 for (; from_index
<= to_index
; from_index
++, from_c
++)
386 unsigned code
= INDEX_TO_CODE_POINT (charset
, from_index
);
388 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
389 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
392 for (; from_index
<= to_index
; from_index
++, from_c
++)
394 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
395 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
398 else if (control_flag
== 3)
399 for (; from_index
<= to_index
; from_index
++, from_c
++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
401 else if (control_flag
== 4)
402 for (; from_index
<= to_index
; from_index
++, from_c
++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
404 else /* control_flag == 0 */
406 if (ascii_compatible_p
)
408 if (! ASCII_BYTE_P (from_c
))
410 if (from_c
< nonascii_min_char
)
411 nonascii_min_char
= from_c
;
413 else if (! ASCII_BYTE_P (to_c
))
415 nonascii_min_char
= 0x80;
419 for (; from_c
<= to_c
; from_c
++)
420 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
424 if (control_flag
== 0)
426 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
427 ? nonascii_min_char
: min_char
);
428 CHARSET_MAX_CHAR (charset
) = max_char
;
430 else if (control_flag
== 4)
432 temp_charset_work
->min_char
= min_char
;
433 temp_charset_work
->max_char
= max_char
;
438 /* Read a hexadecimal number (preceded by "0x") from the file FP while
439 paying attention to comment charcter '#'. */
441 static INLINE
unsigned
449 while ((c
= getc (fp
)) != EOF
)
453 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
457 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
469 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
471 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
473 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
474 n
= (n
* 10) + c
- '0';
480 extern Lisp_Object Qfile_name_handler_alist
;
482 /* Return a mapping vector for CHARSET loaded from MAPFILE.
483 Each line of MAPFILE has this form
485 where 0xAAAA is a code-point and 0xCCCC is the corresponding
486 character code, or this form
488 where 0xAAAA and 0xBBBB are code-points specifying a range, and
489 0xCCCC is the first character code of the range.
491 The returned vector has this form:
492 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
493 where CODE1 is a code-point or a cons of code-points specifying a
496 Note that this function uses `openp' to open MAPFILE but ignores
497 `file-name-handler-alist' to avoid running any Lisp code. */
499 extern void add_to_log
P_ ((char *, Lisp_Object
, Lisp_Object
));
502 load_charset_map_from_file (charset
, mapfile
, control_flag
)
503 struct charset
*charset
;
507 unsigned min_code
= CHARSET_MIN_CODE (charset
);
508 unsigned max_code
= CHARSET_MAX_CODE (charset
);
512 Lisp_Object suffixes
;
513 struct charset_map_entries
*head
, *entries
;
515 int count
= SPECPDL_INDEX ();
517 suffixes
= Fcons (build_string (".map"),
518 Fcons (build_string (".TXT"), Qnil
));
520 specbind (Qfile_name_handler_alist
, Qnil
);
521 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
522 unbind_to (count
, Qnil
);
524 || ! (fp
= fdopen (fd
, "r")))
526 add_to_log ("Failure in loading charset map: %S", mapfile
, Qnil
);
530 head
= entries
= ((struct charset_map_entries
*)
531 alloca (sizeof (struct charset_map_entries
)));
540 from
= read_hex (fp
, &eof
);
543 if (getc (fp
) == '-')
544 to
= read_hex (fp
, &eof
);
547 c
= (int) read_hex (fp
, &eof
);
549 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
552 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
554 entries
->next
= ((struct charset_map_entries
*)
555 alloca (sizeof (struct charset_map_entries
)));
556 entries
= entries
->next
;
558 idx
= n_entries
% 0x10000;
559 entries
->entry
[idx
].from
= from
;
560 entries
->entry
[idx
].to
= to
;
561 entries
->entry
[idx
].c
= c
;
567 load_charset_map (charset
, head
, n_entries
, control_flag
);
571 load_charset_map_from_vector (charset
, vec
, control_flag
)
572 struct charset
*charset
;
576 unsigned min_code
= CHARSET_MIN_CODE (charset
);
577 unsigned max_code
= CHARSET_MAX_CODE (charset
);
578 struct charset_map_entries
*head
, *entries
;
580 int len
= ASIZE (vec
);
585 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
589 head
= entries
= ((struct charset_map_entries
*)
590 alloca (sizeof (struct charset_map_entries
)));
592 for (i
= 0; i
< len
; i
+= 2)
594 Lisp_Object val
, val2
;
606 from
= XFASTINT (val
);
607 to
= XFASTINT (val2
);
612 from
= to
= XFASTINT (val
);
614 val
= AREF (vec
, i
+ 1);
618 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
621 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
623 entries
->next
= ((struct charset_map_entries
*)
624 alloca (sizeof (struct charset_map_entries
)));
625 entries
= entries
->next
;
627 idx
= n_entries
% 0x10000;
628 entries
->entry
[idx
].from
= from
;
629 entries
->entry
[idx
].to
= to
;
630 entries
->entry
[idx
].c
= c
;
634 load_charset_map (charset
, head
, n_entries
, control_flag
);
638 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
639 map it is (see the comment of load_charset_map for the detail). */
642 load_charset (charset
, control_flag
)
643 struct charset
*charset
;
648 if (inhibit_load_charset_map
650 && charset
== temp_charset_work
->current
651 && (control_flag
== 2 == temp_charset_work
->for_encoder
))
654 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
655 map
= CHARSET_MAP (charset
);
656 else if (CHARSET_UNIFIED_P (charset
))
657 map
= CHARSET_UNIFY_MAP (charset
);
659 load_charset_map_from_file (charset
, map
, control_flag
);
661 load_charset_map_from_vector (charset
, map
, control_flag
);
665 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
666 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
670 return (CHARSETP (object
) ? Qt
: Qnil
);
674 void map_charset_for_dump
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
675 Lisp_Object function
, Lisp_Object arg
,
676 unsigned from
, unsigned to
));
679 map_charset_for_dump (c_function
, function
, arg
, from
, to
)
680 void (*c_function
) (Lisp_Object
, Lisp_Object
);
681 Lisp_Object function
, arg
;
684 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
685 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
690 range
= Fcons (Qnil
, Qnil
);
693 c
= temp_charset_work
->min_char
;
694 stop
= (temp_charset_work
->max_char
< 0x20000
695 ? temp_charset_work
->max_char
: 0xFFFF);
699 int index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
701 if (index
>= from_idx
&& index
<= to_idx
)
703 if (NILP (XCAR (range
)))
704 XSETCAR (range
, make_number (c
));
706 else if (! NILP (XCAR (range
)))
708 XSETCDR (range
, make_number (c
- 1));
710 (*c_function
) (arg
, range
);
712 call2 (function
, range
, arg
);
713 XSETCAR (range
, Qnil
);
717 if (c
== temp_charset_work
->max_char
)
719 if (! NILP (XCAR (range
)))
721 XSETCDR (range
, make_number (c
));
723 (*c_function
) (arg
, range
);
725 call2 (function
, range
, arg
);
730 stop
= temp_charset_work
->max_char
;
737 map_charset_chars (c_function
, function
, arg
,
739 void (*c_function
) P_ ((Lisp_Object
, Lisp_Object
));
740 Lisp_Object function
, arg
;
741 struct charset
*charset
;
747 partial
= (from
> CHARSET_MIN_CODE (charset
)
748 || to
< CHARSET_MAX_CODE (charset
));
750 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
752 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
753 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
754 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
755 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
757 if (CHARSET_UNIFIED_P (charset
))
759 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
760 load_charset (charset
, 2);
761 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
762 map_char_table_for_charset (c_function
, function
,
763 CHARSET_DEUNIFIER (charset
), arg
,
764 partial
? charset
: NULL
, from
, to
);
766 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
769 range
= Fcons (make_number (from_c
), make_number (to_c
));
771 (*c_function
) (arg
, range
);
773 call2 (function
, range
, arg
);
775 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
777 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
778 load_charset (charset
, 2);
779 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
780 map_char_table_for_charset (c_function
, function
,
781 CHARSET_ENCODER (charset
), arg
,
782 partial
? charset
: NULL
, from
, to
);
784 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
786 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
788 Lisp_Object subset_info
;
791 subset_info
= CHARSET_SUBSET (charset
);
792 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
793 offset
= XINT (AREF (subset_info
, 3));
795 if (from
< XFASTINT (AREF (subset_info
, 1)))
796 from
= XFASTINT (AREF (subset_info
, 1));
798 if (to
> XFASTINT (AREF (subset_info
, 2)))
799 to
= XFASTINT (AREF (subset_info
, 2));
800 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
802 else /* i.e. CHARSET_METHOD_SUPERSET */
806 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
807 parents
= XCDR (parents
))
810 unsigned this_from
, this_to
;
812 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
813 offset
= XINT (XCDR (XCAR (parents
)));
814 this_from
= from
- offset
;
815 this_to
= to
- offset
;
816 if (this_from
< CHARSET_MIN_CODE (charset
))
817 this_from
= CHARSET_MIN_CODE (charset
);
818 if (this_to
> CHARSET_MAX_CODE (charset
))
819 this_to
= CHARSET_MAX_CODE (charset
);
820 map_charset_chars (c_function
, function
, arg
, charset
,
826 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
827 doc
: /* Call FUNCTION for all characters in CHARSET.
828 FUNCTION is called with an argument RANGE and the optional 3rd
831 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
832 characters contained in CHARSET.
834 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
835 range of code points of target characters. */)
836 (function
, charset
, arg
, from_code
, to_code
)
837 Lisp_Object function
, charset
, arg
, from_code
, to_code
;
842 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
843 if (NILP (from_code
))
844 from
= CHARSET_MIN_CODE (cs
);
847 CHECK_NATNUM (from_code
);
848 from
= XINT (from_code
);
849 if (from
< CHARSET_MIN_CODE (cs
))
850 from
= CHARSET_MIN_CODE (cs
);
853 to
= CHARSET_MAX_CODE (cs
);
856 CHECK_NATNUM (to_code
);
858 if (to
> CHARSET_MAX_CODE (cs
))
859 to
= CHARSET_MAX_CODE (cs
);
861 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
866 /* Define a charset according to the arguments. The Nth argument is
867 the Nth attribute of the charset (the last attribute `charset-id'
868 is not included). See the docstring of `define-charset' for the
871 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
872 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
873 doc
: /* For internal use only.
874 usage: (define-charset-internal ...) */)
879 /* Charset attr vector. */
883 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
885 struct charset charset
;
888 int new_definition_p
;
891 if (nargs
!= charset_arg_max
)
892 return Fsignal (Qwrong_number_of_arguments
,
893 Fcons (intern ("define-charset-internal"),
894 make_number (nargs
)));
896 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
898 CHECK_SYMBOL (args
[charset_arg_name
]);
899 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
901 val
= args
[charset_arg_code_space
];
902 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
904 int min_byte
, max_byte
;
906 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
907 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
908 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
909 error ("Invalid :code-space value");
910 charset
.code_space
[i
* 4] = min_byte
;
911 charset
.code_space
[i
* 4 + 1] = max_byte
;
912 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
913 nchars
*= charset
.code_space
[i
* 4 + 2];
914 charset
.code_space
[i
* 4 + 3] = nchars
;
919 val
= args
[charset_arg_dimension
];
921 charset
.dimension
= dimension
;
925 charset
.dimension
= XINT (val
);
926 if (charset
.dimension
< 1 || charset
.dimension
> 4)
927 args_out_of_range_3 (val
, make_number (1), make_number (4));
930 charset
.code_linear_p
931 = (charset
.dimension
== 1
932 || (charset
.code_space
[2] == 256
933 && (charset
.dimension
== 2
934 || (charset
.code_space
[6] == 256
935 && (charset
.dimension
== 3
936 || charset
.code_space
[10] == 256)))));
938 if (! charset
.code_linear_p
)
940 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
941 bzero (charset
.code_space_mask
, 256);
942 for (i
= 0; i
< 4; i
++)
943 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
945 charset
.code_space_mask
[j
] |= (1 << i
);
948 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
950 charset
.min_code
= (charset
.code_space
[0]
951 | (charset
.code_space
[4] << 8)
952 | (charset
.code_space
[8] << 16)
953 | (charset
.code_space
[12] << 24));
954 charset
.max_code
= (charset
.code_space
[1]
955 | (charset
.code_space
[5] << 8)
956 | (charset
.code_space
[9] << 16)
957 | (charset
.code_space
[13] << 24));
958 charset
.char_index_offset
= 0;
960 val
= args
[charset_arg_min_code
];
970 CHECK_NUMBER_CAR (val
);
971 CHECK_NUMBER_CDR (val
);
972 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
974 if (code
< charset
.min_code
975 || code
> charset
.max_code
)
976 args_out_of_range_3 (make_number (charset
.min_code
),
977 make_number (charset
.max_code
), val
);
978 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
979 charset
.min_code
= code
;
982 val
= args
[charset_arg_max_code
];
992 CHECK_NUMBER_CAR (val
);
993 CHECK_NUMBER_CDR (val
);
994 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
996 if (code
< charset
.min_code
997 || code
> charset
.max_code
)
998 args_out_of_range_3 (make_number (charset
.min_code
),
999 make_number (charset
.max_code
), val
);
1000 charset
.max_code
= code
;
1003 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
1005 val
= args
[charset_arg_invalid_code
];
1008 if (charset
.min_code
> 0)
1009 charset
.invalid_code
= 0;
1012 XSETINT (val
, charset
.max_code
+ 1);
1013 if (XINT (val
) == charset
.max_code
+ 1)
1014 charset
.invalid_code
= charset
.max_code
+ 1;
1016 error ("Attribute :invalid-code must be specified");
1022 charset
.invalid_code
= XFASTINT (val
);
1025 val
= args
[charset_arg_iso_final
];
1027 charset
.iso_final
= -1;
1031 if (XINT (val
) < '0' || XINT (val
) > 127)
1032 error ("Invalid iso-final-char: %d", XINT (val
));
1033 charset
.iso_final
= XINT (val
);
1036 val
= args
[charset_arg_iso_revision
];
1038 charset
.iso_revision
= -1;
1042 if (XINT (val
) > 63)
1043 args_out_of_range (make_number (63), val
);
1044 charset
.iso_revision
= XINT (val
);
1047 val
= args
[charset_arg_emacs_mule_id
];
1049 charset
.emacs_mule_id
= -1;
1053 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
1054 error ("Invalid emacs-mule-id: %d", XINT (val
));
1055 charset
.emacs_mule_id
= XINT (val
);
1058 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1060 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1062 charset
.unified_p
= 0;
1064 bzero (charset
.fast_map
, sizeof (charset
.fast_map
));
1066 if (! NILP (args
[charset_arg_code_offset
]))
1068 val
= args
[charset_arg_code_offset
];
1071 charset
.method
= CHARSET_METHOD_OFFSET
;
1072 charset
.code_offset
= XINT (val
);
1074 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1075 charset
.min_char
= i
+ charset
.code_offset
;
1076 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1077 charset
.max_char
= i
+ charset
.code_offset
;
1078 if (charset
.max_char
> MAX_CHAR
)
1079 error ("Unsupported max char: %d", charset
.max_char
);
1081 i
= (charset
.min_char
>> 7) << 7;
1082 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1083 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1084 i
= (i
>> 12) << 12;
1085 for (; i
<= charset
.max_char
; i
+= 0x1000)
1086 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1088 else if (! NILP (args
[charset_arg_map
]))
1090 val
= args
[charset_arg_map
];
1091 ASET (attrs
, charset_map
, val
);
1092 charset
.method
= CHARSET_METHOD_MAP
;
1094 else if (! NILP (args
[charset_arg_subset
]))
1097 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1098 struct charset
*parent_charset
;
1100 val
= args
[charset_arg_subset
];
1101 parent
= Fcar (val
);
1102 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1103 parent_min_code
= Fnth (make_number (1), val
);
1104 CHECK_NATNUM (parent_min_code
);
1105 parent_max_code
= Fnth (make_number (2), val
);
1106 CHECK_NATNUM (parent_max_code
);
1107 parent_code_offset
= Fnth (make_number (3), val
);
1108 CHECK_NUMBER (parent_code_offset
);
1109 val
= Fmake_vector (make_number (4), Qnil
);
1110 ASET (val
, 0, make_number (parent_charset
->id
));
1111 ASET (val
, 1, parent_min_code
);
1112 ASET (val
, 2, parent_max_code
);
1113 ASET (val
, 3, parent_code_offset
);
1114 ASET (attrs
, charset_subset
, val
);
1116 charset
.method
= CHARSET_METHOD_SUBSET
;
1117 /* Here, we just copy the parent's fast_map. It's not accurate,
1118 but at least it works for quickly detecting which character
1119 DOESN'T belong to this charset. */
1120 for (i
= 0; i
< 190; i
++)
1121 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1123 /* We also copy these for parents. */
1124 charset
.min_char
= parent_charset
->min_char
;
1125 charset
.max_char
= parent_charset
->max_char
;
1127 else if (! NILP (args
[charset_arg_superset
]))
1129 val
= args
[charset_arg_superset
];
1130 charset
.method
= CHARSET_METHOD_SUPERSET
;
1131 val
= Fcopy_sequence (val
);
1132 ASET (attrs
, charset_superset
, val
);
1134 charset
.min_char
= MAX_CHAR
;
1135 charset
.max_char
= 0;
1136 for (; ! NILP (val
); val
= Fcdr (val
))
1138 Lisp_Object elt
, car_part
, cdr_part
;
1139 int this_id
, offset
;
1140 struct charset
*this_charset
;
1145 car_part
= XCAR (elt
);
1146 cdr_part
= XCDR (elt
);
1147 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1148 CHECK_NUMBER (cdr_part
);
1149 offset
= XINT (cdr_part
);
1153 CHECK_CHARSET_GET_ID (elt
, this_id
);
1156 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1158 this_charset
= CHARSET_FROM_ID (this_id
);
1159 if (charset
.min_char
> this_charset
->min_char
)
1160 charset
.min_char
= this_charset
->min_char
;
1161 if (charset
.max_char
< this_charset
->max_char
)
1162 charset
.max_char
= this_charset
->max_char
;
1163 for (i
= 0; i
< 190; i
++)
1164 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1168 error ("None of :code-offset, :map, :parents are specified");
1170 val
= args
[charset_arg_unify_map
];
1171 if (! NILP (val
) && !STRINGP (val
))
1173 ASET (attrs
, charset_unify_map
, val
);
1175 CHECK_LIST (args
[charset_arg_plist
]);
1176 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1178 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1180 if (charset
.hash_index
>= 0)
1182 new_definition_p
= 0;
1183 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1184 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1188 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1190 if (charset_table_used
== charset_table_size
)
1192 struct charset
*new_table
1193 = (struct charset
*) xmalloc (sizeof (struct charset
)
1194 * (charset_table_size
+ 16));
1195 bcopy (charset_table
, new_table
,
1196 sizeof (struct charset
) * charset_table_size
);
1197 charset_table_size
+= 16;
1198 charset_table
= new_table
;
1200 id
= charset_table_used
++;
1201 new_definition_p
= 1;
1204 ASET (attrs
, charset_id
, make_number (id
));
1206 charset_table
[id
] = charset
;
1208 if (charset
.method
== CHARSET_METHOD_MAP
)
1210 load_charset (&charset
, 0);
1211 charset_table
[id
] = charset
;
1214 if (charset
.iso_final
>= 0)
1216 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1217 charset
.iso_final
) = id
;
1218 if (new_definition_p
)
1219 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1220 Fcons (make_number (id
), Qnil
));
1221 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1222 charset_jisx0201_roman
= id
;
1223 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1224 charset_jisx0208_1978
= id
;
1225 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1226 charset_jisx0208
= id
;
1229 if (charset
.emacs_mule_id
>= 0)
1231 emacs_mule_charset
[charset
.emacs_mule_id
] = CHARSET_FROM_ID (id
);
1232 if (charset
.emacs_mule_id
< 0xA0)
1233 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1235 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1236 if (new_definition_p
)
1237 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1238 Fcons (make_number (id
), Qnil
));
1241 if (new_definition_p
)
1243 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1244 if (charset
.supplementary_p
)
1245 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1246 Fcons (make_number (id
), Qnil
));
1251 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1253 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1255 if (cs
->supplementary_p
)
1258 if (EQ (tail
, Vcharset_ordered_list
))
1259 Vcharset_ordered_list
= Fcons (make_number (id
),
1260 Vcharset_ordered_list
);
1261 else if (NILP (tail
))
1262 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1263 Fcons (make_number (id
), Qnil
));
1266 val
= Fcons (XCAR (tail
), XCDR (tail
));
1267 XSETCDR (tail
, val
);
1268 XSETCAR (tail
, make_number (id
));
1271 charset_ordered_list_tick
++;
1278 /* Same as Fdefine_charset_internal but arguments are more convenient
1279 to call from C (typically in syms_of_charset). This can define a
1280 charset of `offset' method only. Return the ID of the new
1284 define_charset_internal (name
, dimension
, code_space
, min_code
, max_code
,
1285 iso_final
, iso_revision
, emacs_mule_id
,
1286 ascii_compatible
, supplementary
,
1290 unsigned char *code_space
;
1291 unsigned min_code
, max_code
;
1292 int iso_final
, iso_revision
, emacs_mule_id
;
1293 int ascii_compatible
, supplementary
;
1296 Lisp_Object args
[charset_arg_max
];
1297 Lisp_Object plist
[14];
1301 args
[charset_arg_name
] = name
;
1302 args
[charset_arg_dimension
] = make_number (dimension
);
1303 val
= Fmake_vector (make_number (8), make_number (0));
1304 for (i
= 0; i
< 8; i
++)
1305 ASET (val
, i
, make_number (code_space
[i
]));
1306 args
[charset_arg_code_space
] = val
;
1307 args
[charset_arg_min_code
] = make_number (min_code
);
1308 args
[charset_arg_max_code
] = make_number (max_code
);
1309 args
[charset_arg_iso_final
]
1310 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1311 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1312 args
[charset_arg_emacs_mule_id
]
1313 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1314 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1315 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1316 args
[charset_arg_invalid_code
] = Qnil
;
1317 args
[charset_arg_code_offset
] = make_number (code_offset
);
1318 args
[charset_arg_map
] = Qnil
;
1319 args
[charset_arg_subset
] = Qnil
;
1320 args
[charset_arg_superset
] = Qnil
;
1321 args
[charset_arg_unify_map
] = Qnil
;
1323 plist
[0] = intern (":name");
1324 plist
[1] = args
[charset_arg_name
];
1325 plist
[2] = intern (":dimension");
1326 plist
[3] = args
[charset_arg_dimension
];
1327 plist
[4] = intern (":code-space");
1328 plist
[5] = args
[charset_arg_code_space
];
1329 plist
[6] = intern (":iso-final-char");
1330 plist
[7] = args
[charset_arg_iso_final
];
1331 plist
[8] = intern (":emacs-mule-id");
1332 plist
[9] = args
[charset_arg_emacs_mule_id
];
1333 plist
[10] = intern (":ascii-compatible-p");
1334 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1335 plist
[12] = intern (":code-offset");
1336 plist
[13] = args
[charset_arg_code_offset
];
1338 args
[charset_arg_plist
] = Flist (14, plist
);
1339 Fdefine_charset_internal (charset_arg_max
, args
);
1341 return XINT (CHARSET_SYMBOL_ID (name
));
1345 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1346 Sdefine_charset_alias
, 2, 2, 0,
1347 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1349 Lisp_Object alias
, charset
;
1353 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1354 Fputhash (alias
, attr
, Vcharset_hash_table
);
1355 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1360 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1361 doc
: /* Return the property list of CHARSET. */)
1363 Lisp_Object charset
;
1367 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1368 return CHARSET_ATTR_PLIST (attrs
);
1372 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1373 doc
: /* Set CHARSET's property list to PLIST. */)
1375 Lisp_Object charset
, plist
;
1379 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1380 CHARSET_ATTR_PLIST (attrs
) = plist
;
1385 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1386 doc
: /* Unify characters of CHARSET with Unicode.
1387 This means reading the relevant file and installing the table defined
1388 by CHARSET's `:unify-map' property.
1390 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1391 the same meaning as the `:unify-map' attribute in the function
1392 `define-charset' (which see).
1394 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1395 (charset
, unify_map
, deunify
)
1396 Lisp_Object charset
, unify_map
, deunify
;
1401 CHECK_CHARSET_GET_ID (charset
, id
);
1402 cs
= CHARSET_FROM_ID (id
);
1404 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1405 : ! CHARSET_UNIFIED_P (cs
))
1408 CHARSET_UNIFIED_P (cs
) = 0;
1411 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1412 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1413 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1414 if (NILP (unify_map
))
1415 unify_map
= CHARSET_UNIFY_MAP (cs
);
1418 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1419 signal_error ("Bad unify-map", unify_map
);
1420 CHARSET_UNIFY_MAP (cs
) = unify_map
;
1422 if (NILP (Vchar_unify_table
))
1423 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1424 char_table_set_range (Vchar_unify_table
,
1425 cs
->min_char
, cs
->max_char
, charset
);
1426 CHARSET_UNIFIED_P (cs
) = 1;
1428 else if (CHAR_TABLE_P (Vchar_unify_table
))
1430 int min_code
= CHARSET_MIN_CODE (cs
);
1431 int max_code
= CHARSET_MAX_CODE (cs
);
1432 int min_char
= DECODE_CHAR (cs
, min_code
);
1433 int max_char
= DECODE_CHAR (cs
, max_code
);
1435 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1441 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1442 Sget_unused_iso_final_char
, 2, 2, 0,
1444 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1445 DIMENSION is the number of bytes to represent a character: 1 or 2.
1446 CHARS is the number of characters in a dimension: 94 or 96.
1448 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1449 If there's no unused final char for the specified kind of charset,
1452 Lisp_Object dimension
, chars
;
1456 CHECK_NUMBER (dimension
);
1457 CHECK_NUMBER (chars
);
1458 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1459 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1460 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1461 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1462 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1463 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1465 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1469 check_iso_charset_parameter (dimension
, chars
, final_char
)
1470 Lisp_Object dimension
, chars
, final_char
;
1472 CHECK_NATNUM (dimension
);
1473 CHECK_NATNUM (chars
);
1474 CHECK_NATNUM (final_char
);
1476 if (XINT (dimension
) > 3)
1477 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1478 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1479 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1480 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1481 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1485 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1487 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1489 On decoding by an ISO-2022 base coding system, when a charset
1490 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1491 if CHARSET is designated instead. */)
1492 (dimension
, chars
, final_char
, charset
)
1493 Lisp_Object dimension
, chars
, final_char
, charset
;
1498 CHECK_CHARSET_GET_ID (charset
, id
);
1499 check_iso_charset_parameter (dimension
, chars
, final_char
);
1500 chars_flag
= XINT (chars
) == 96;
1501 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1506 /* Return information about charsets in the text at PTR of NBYTES
1507 bytes, which are NCHARS characters. The value is:
1509 0: Each character is represented by one byte. This is always
1510 true for a unibyte string. For a multibyte string, true if
1511 it contains only ASCII characters.
1513 1: No charsets other than ascii, control-1, and latin-1 are
1520 string_xstring_p (string
)
1523 const unsigned char *p
= SDATA (string
);
1524 const unsigned char *endp
= p
+ SBYTES (string
);
1526 if (SCHARS (string
) == SBYTES (string
))
1531 int c
= STRING_CHAR_ADVANCE (p
);
1540 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1542 CHARSETS is a vector. If Nth element is non-nil, it means the
1543 charset whose id is N is already found.
1545 It may lookup a translation table TABLE if supplied. */
1548 find_charsets_in_text (ptr
, nchars
, nbytes
, charsets
, table
, multibyte
)
1549 const unsigned char *ptr
;
1550 EMACS_INT nchars
, nbytes
;
1551 Lisp_Object charsets
, table
;
1554 const unsigned char *pend
= ptr
+ nbytes
;
1556 if (nchars
== nbytes
)
1559 ASET (charsets
, charset_ascii
, Qt
);
1566 c
= translate_char (table
, c
);
1567 if (ASCII_BYTE_P (c
))
1568 ASET (charsets
, charset_ascii
, Qt
);
1570 ASET (charsets
, charset_eight_bit
, Qt
);
1577 int c
= STRING_CHAR_ADVANCE (ptr
);
1578 struct charset
*charset
;
1581 c
= translate_char (table
, c
);
1582 charset
= CHAR_CHARSET (c
);
1583 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1588 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1590 doc
: /* Return a list of charsets in the region between BEG and END.
1591 BEG and END are buffer positions.
1592 Optional arg TABLE if non-nil is a translation table to look up.
1594 If the current buffer is unibyte, the returned list may contain
1595 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1597 Lisp_Object beg
, end
, table
;
1599 Lisp_Object charsets
;
1600 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1603 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1605 validate_region (&beg
, &end
);
1606 from
= XFASTINT (beg
);
1607 stop
= to
= XFASTINT (end
);
1609 if (from
< GPT
&& GPT
< to
)
1612 stop_byte
= GPT_BYTE
;
1615 stop_byte
= CHAR_TO_BYTE (stop
);
1617 from_byte
= CHAR_TO_BYTE (from
);
1619 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1622 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1623 stop_byte
- from_byte
, charsets
, table
,
1627 from
= stop
, from_byte
= stop_byte
;
1628 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1635 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1636 if (!NILP (AREF (charsets
, i
)))
1637 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1641 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1643 doc
: /* Return a list of charsets in STR.
1644 Optional arg TABLE if non-nil is a translation table to look up.
1646 If STR is unibyte, the returned list may contain
1647 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1649 Lisp_Object str
, table
;
1651 Lisp_Object charsets
;
1657 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1658 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1660 STRING_MULTIBYTE (str
));
1662 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1663 if (!NILP (AREF (charsets
, i
)))
1664 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1670 /* Return a unified character code for C (>= 0x110000). VAL is a
1671 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1674 maybe_unify_char (c
, val
)
1678 struct charset
*charset
;
1685 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1686 load_charset (charset
, 1);
1687 if (! inhibit_load_charset_map
)
1689 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1695 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1696 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1705 /* Return a character correponding to the code-point CODE of
1709 decode_char (charset
, code
)
1710 struct charset
*charset
;
1714 enum charset_method method
= CHARSET_METHOD (charset
);
1716 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1719 if (method
== CHARSET_METHOD_SUBSET
)
1721 Lisp_Object subset_info
;
1723 subset_info
= CHARSET_SUBSET (charset
);
1724 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1725 code
-= XINT (AREF (subset_info
, 3));
1726 if (code
< XFASTINT (AREF (subset_info
, 1))
1727 || code
> XFASTINT (AREF (subset_info
, 2)))
1730 c
= DECODE_CHAR (charset
, code
);
1732 else if (method
== CHARSET_METHOD_SUPERSET
)
1734 Lisp_Object parents
;
1736 parents
= CHARSET_SUPERSET (charset
);
1738 for (; CONSP (parents
); parents
= XCDR (parents
))
1740 int id
= XINT (XCAR (XCAR (parents
)));
1741 int code_offset
= XINT (XCDR (XCAR (parents
)));
1742 unsigned this_code
= code
- code_offset
;
1744 charset
= CHARSET_FROM_ID (id
);
1745 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1751 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1755 if (method
== CHARSET_METHOD_MAP
)
1757 Lisp_Object decoder
;
1759 decoder
= CHARSET_DECODER (charset
);
1760 if (! VECTORP (decoder
))
1762 load_charset (charset
, 1);
1763 decoder
= CHARSET_DECODER (charset
);
1765 if (VECTORP (decoder
))
1766 c
= XINT (AREF (decoder
, char_index
));
1768 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1770 else /* method == CHARSET_METHOD_OFFSET */
1772 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1773 if (CHARSET_UNIFIED_P (charset
)
1774 && c
> MAX_UNICODE_CHAR
)
1775 MAYBE_UNIFY_CHAR (c
);
1782 /* Variable used temporarily by the macro ENCODE_CHAR. */
1783 Lisp_Object charset_work
;
1785 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1786 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1787 use CHARSET's strict_max_char instead of max_char. */
1790 encode_char (charset
, c
)
1791 struct charset
*charset
;
1795 enum charset_method method
= CHARSET_METHOD (charset
);
1797 if (CHARSET_UNIFIED_P (charset
))
1799 Lisp_Object deunifier
, deunified
;
1800 int code_index
= -1;
1802 deunifier
= CHARSET_DEUNIFIER (charset
);
1803 if (! CHAR_TABLE_P (deunifier
))
1805 load_charset (charset
, 2);
1806 deunifier
= CHARSET_DEUNIFIER (charset
);
1808 if (CHAR_TABLE_P (deunifier
))
1810 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1812 if (INTEGERP (deunified
))
1813 code_index
= XINT (deunified
);
1817 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1819 if (code_index
>= 0)
1820 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1823 if (method
== CHARSET_METHOD_SUBSET
)
1825 Lisp_Object subset_info
;
1826 struct charset
*this_charset
;
1828 subset_info
= CHARSET_SUBSET (charset
);
1829 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1830 code
= ENCODE_CHAR (this_charset
, c
);
1831 if (code
== CHARSET_INVALID_CODE (this_charset
)
1832 || code
< XFASTINT (AREF (subset_info
, 1))
1833 || code
> XFASTINT (AREF (subset_info
, 2)))
1834 return CHARSET_INVALID_CODE (charset
);
1835 code
+= XINT (AREF (subset_info
, 3));
1839 if (method
== CHARSET_METHOD_SUPERSET
)
1841 Lisp_Object parents
;
1843 parents
= CHARSET_SUPERSET (charset
);
1844 for (; CONSP (parents
); parents
= XCDR (parents
))
1846 int id
= XINT (XCAR (XCAR (parents
)));
1847 int code_offset
= XINT (XCDR (XCAR (parents
)));
1848 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1850 code
= ENCODE_CHAR (this_charset
, c
);
1851 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1852 return code
+ code_offset
;
1854 return CHARSET_INVALID_CODE (charset
);
1857 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1858 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1859 return CHARSET_INVALID_CODE (charset
);
1861 if (method
== CHARSET_METHOD_MAP
)
1863 Lisp_Object encoder
;
1866 encoder
= CHARSET_ENCODER (charset
);
1867 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1869 load_charset (charset
, 2);
1870 encoder
= CHARSET_ENCODER (charset
);
1872 if (CHAR_TABLE_P (encoder
))
1874 val
= CHAR_TABLE_REF (encoder
, c
);
1876 return CHARSET_INVALID_CODE (charset
);
1878 if (! CHARSET_COMPACT_CODES_P (charset
))
1879 code
= INDEX_TO_CODE_POINT (charset
, code
);
1883 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1884 code
= INDEX_TO_CODE_POINT (charset
, code
);
1887 else /* method == CHARSET_METHOD_OFFSET */
1889 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1891 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1898 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1899 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1900 Return nil if CODE-POINT is not valid in CHARSET.
1902 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1904 Optional argument RESTRICTION specifies a way to map the pair of CCS
1905 and CODE-POINT to a character. Currently not supported and just ignored. */)
1906 (charset
, code_point
, restriction
)
1907 Lisp_Object charset
, code_point
, restriction
;
1911 struct charset
*charsetp
;
1913 CHECK_CHARSET_GET_ID (charset
, id
);
1914 if (CONSP (code_point
))
1916 CHECK_NATNUM_CAR (code_point
);
1917 CHECK_NATNUM_CDR (code_point
);
1918 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1922 CHECK_NATNUM (code_point
);
1923 code
= XINT (code_point
);
1925 charsetp
= CHARSET_FROM_ID (id
);
1926 c
= DECODE_CHAR (charsetp
, code
);
1927 return (c
>= 0 ? make_number (c
) : Qnil
);
1931 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1932 doc
: /* Encode the character CH into a code-point of CHARSET.
1933 Return nil if CHARSET doesn't include CH.
1935 Optional argument RESTRICTION specifies a way to map CH to a
1936 code-point in CCS. Currently not supported and just ignored. */)
1937 (ch
, charset
, restriction
)
1938 Lisp_Object ch
, charset
, restriction
;
1942 struct charset
*charsetp
;
1944 CHECK_CHARSET_GET_ID (charset
, id
);
1946 charsetp
= CHARSET_FROM_ID (id
);
1947 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1948 if (code
== CHARSET_INVALID_CODE (charsetp
))
1950 if (code
> 0x7FFFFFF)
1951 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1952 return make_number (code
);
1956 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1958 /* Return a character of CHARSET whose position codes are CODEn.
1960 CODE1 through CODE4 are optional, but if you don't supply sufficient
1961 position codes, it is assumed that the minimum code in each dimension
1963 (charset
, code1
, code2
, code3
, code4
)
1964 Lisp_Object charset
, code1
, code2
, code3
, code4
;
1967 struct charset
*charsetp
;
1971 CHECK_CHARSET_GET_ID (charset
, id
);
1972 charsetp
= CHARSET_FROM_ID (id
);
1974 dimension
= CHARSET_DIMENSION (charsetp
);
1976 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1977 ? 0 : CHARSET_MIN_CODE (charsetp
));
1980 CHECK_NATNUM (code1
);
1981 if (XFASTINT (code1
) >= 0x100)
1982 args_out_of_range (make_number (0xFF), code1
);
1983 code
= XFASTINT (code1
);
1989 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1992 CHECK_NATNUM (code2
);
1993 if (XFASTINT (code2
) >= 0x100)
1994 args_out_of_range (make_number (0xFF), code2
);
1995 code
|= XFASTINT (code2
);
2002 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
2005 CHECK_NATNUM (code3
);
2006 if (XFASTINT (code3
) >= 0x100)
2007 args_out_of_range (make_number (0xFF), code3
);
2008 code
|= XFASTINT (code3
);
2015 code
|= charsetp
->code_space
[0];
2018 CHECK_NATNUM (code4
);
2019 if (XFASTINT (code4
) >= 0x100)
2020 args_out_of_range (make_number (0xFF), code4
);
2021 code
|= XFASTINT (code4
);
2028 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
2030 c
= DECODE_CHAR (charsetp
, code
);
2032 error ("Invalid code(s)");
2033 return make_number (c
);
2037 /* Return the first charset in CHARSET_LIST that contains C.
2038 CHARSET_LIST is a list of charset IDs. If it is nil, use
2039 Vcharset_ordered_list. */
2042 char_charset (c
, charset_list
, code_return
)
2044 Lisp_Object charset_list
;
2045 unsigned *code_return
;
2049 if (NILP (charset_list
))
2050 charset_list
= Vcharset_ordered_list
;
2054 while (CONSP (charset_list
))
2056 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
2057 unsigned code
= ENCODE_CHAR (charset
, c
);
2059 if (code
!= CHARSET_INVALID_CODE (charset
))
2062 *code_return
= code
;
2065 charset_list
= XCDR (charset_list
);
2066 if (c
<= MAX_UNICODE_CHAR
2067 && EQ (charset_list
, Vcharset_non_preferred_head
))
2068 return CHARSET_FROM_ID (charset_unicode
);
2070 return (maybe_null
? NULL
2071 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2072 : CHARSET_FROM_ID (charset_eight_bit
));
2076 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2078 /*Return list of charset and one to four position-codes of CH.
2079 The charset is decided by the current priority order of charsets.
2080 A position-code is a byte value of each dimension of the code-point of
2081 CH in the charset. */)
2085 struct charset
*charset
;
2090 CHECK_CHARACTER (ch
);
2092 charset
= CHAR_CHARSET (c
);
2095 code
= ENCODE_CHAR (charset
, c
);
2096 if (code
== CHARSET_INVALID_CODE (charset
))
2098 dimension
= CHARSET_DIMENSION (charset
);
2099 for (val
= Qnil
; dimension
> 0; dimension
--)
2101 val
= Fcons (make_number (code
& 0xFF), val
);
2104 return Fcons (CHARSET_NAME (charset
), val
);
2108 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2109 doc
: /* Return the charset of highest priority that contains CH.
2110 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2111 from which to find the charset. It may also be a coding system. In
2112 that case, find the charset from what supported by that coding system. */)
2114 Lisp_Object ch
, restriction
;
2116 struct charset
*charset
;
2118 CHECK_CHARACTER (ch
);
2119 if (NILP (restriction
))
2120 charset
= CHAR_CHARSET (XINT (ch
));
2123 Lisp_Object charset_list
;
2125 if (CONSP (restriction
))
2127 for (charset_list
= Qnil
; CONSP (restriction
);
2128 restriction
= XCDR (restriction
))
2132 CHECK_CHARSET_GET_ID (XCAR (restriction
), id
);
2133 charset_list
= Fcons (make_number (id
), charset_list
);
2135 charset_list
= Fnreverse (charset_list
);
2138 charset_list
= coding_system_charset_list (restriction
);
2139 charset
= char_charset (XINT (ch
), charset_list
, NULL
);
2143 return (CHARSET_NAME (charset
));
2147 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2149 Return charset of a character in the current buffer at position POS.
2150 If POS is nil, it defauls to the current point.
2151 If POS is out of range, the value is nil. */)
2156 struct charset
*charset
;
2158 ch
= Fchar_after (pos
);
2159 if (! INTEGERP (ch
))
2161 charset
= CHAR_CHARSET (XINT (ch
));
2162 return (CHARSET_NAME (charset
));
2166 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2168 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2170 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2171 by their DIMENSION, CHARS, and FINAL-CHAR,
2172 whereas Emacs distinguishes them by charset symbol.
2173 See the documentation of the function `charset-info' for the meanings of
2174 DIMENSION, CHARS, and FINAL-CHAR. */)
2175 (dimension
, chars
, final_char
)
2176 Lisp_Object dimension
, chars
, final_char
;
2181 check_iso_charset_parameter (dimension
, chars
, final_char
);
2182 chars_flag
= XFASTINT (chars
) == 96;
2183 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
2184 XFASTINT (final_char
));
2185 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2189 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2193 Clear temporary charset mapping tables.
2194 It should be called only from temacs invoked for dumping. */)
2198 struct charset
*charset
;
2201 if (temp_charset_work
)
2203 free (temp_charset_work
);
2204 temp_charset_work
= NULL
;
2207 if (CHAR_TABLE_P (Vchar_unify_table
))
2208 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2213 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2214 Scharset_priority_list
, 0, 1, 0,
2215 doc
: /* Return the list of charsets ordered by priority.
2216 HIGHESTP non-nil means just return the highest priority one. */)
2218 Lisp_Object highestp
;
2220 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2222 if (!NILP (highestp
))
2223 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2225 while (!NILP (list
))
2227 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2230 return Fnreverse (val
);
2233 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2235 doc
: /* Assign higher priority to the charsets given as arguments.
2236 usage: (set-charset-priority &rest charsets) */)
2241 Lisp_Object new_head
, old_list
, arglist
[2];
2242 Lisp_Object list_2022
, list_emacs_mule
;
2245 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2247 for (i
= 0; i
< nargs
; i
++)
2249 CHECK_CHARSET_GET_ID (args
[i
], id
);
2250 if (! NILP (Fmemq (make_number (id
), old_list
)))
2252 old_list
= Fdelq (make_number (id
), old_list
);
2253 new_head
= Fcons (make_number (id
), new_head
);
2256 arglist
[0] = Fnreverse (new_head
);
2257 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2258 Vcharset_ordered_list
= Fnconc (2, arglist
);
2259 charset_ordered_list_tick
++;
2261 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2262 CONSP (old_list
); old_list
= XCDR (old_list
))
2264 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2265 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2266 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2267 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2269 Viso_2022_charset_list
= Fnreverse (list_2022
);
2270 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2275 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2277 doc
: /* Internal use only.
2278 Return charset identification number of CHARSET. */)
2280 Lisp_Object charset
;
2284 CHECK_CHARSET_GET_ID (charset
, id
);
2285 return make_number (id
);
2292 Lisp_Object tempdir
;
2293 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2294 if (access (SDATA (tempdir
), 0) < 0)
2296 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2297 Emacs will not function correctly without the character map files.\n\
2298 Please check your installation!\n",
2300 /* TODO should this be a fatal error? (Bug#909) */
2303 Vcharset_map_path
= Fcons (tempdir
, Qnil
);
2308 init_charset_once ()
2312 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2313 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2314 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2315 iso_charset_table
[i
][j
][k
] = -1;
2317 for (i
= 0; i
< 256; i
++)
2318 emacs_mule_charset
[i
] = NULL
;
2320 charset_jisx0201_roman
= -1;
2321 charset_jisx0208_1978
= -1;
2322 charset_jisx0208
= -1;
2324 for (i
= 0; i
< 128; i
++)
2325 unibyte_to_multibyte_table
[i
] = i
;
2326 for (; i
< 256; i
++)
2327 unibyte_to_multibyte_table
[i
] = BYTE8_TO_CHAR (i
);
2335 DEFSYM (Qcharsetp
, "charsetp");
2337 DEFSYM (Qascii
, "ascii");
2338 DEFSYM (Qunicode
, "unicode");
2339 DEFSYM (Qemacs
, "emacs");
2340 DEFSYM (Qeight_bit
, "eight-bit");
2341 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2346 staticpro (&Vcharset_ordered_list
);
2347 Vcharset_ordered_list
= Qnil
;
2349 staticpro (&Viso_2022_charset_list
);
2350 Viso_2022_charset_list
= Qnil
;
2352 staticpro (&Vemacs_mule_charset_list
);
2353 Vemacs_mule_charset_list
= Qnil
;
2355 /* Don't staticpro them here. It's done in syms_of_fns. */
2356 QCtest
= intern (":test");
2357 Qeq
= intern ("eq");
2359 staticpro (&Vcharset_hash_table
);
2361 Lisp_Object args
[2];
2364 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2367 charset_table_size
= 128;
2368 charset_table
= ((struct charset
*)
2369 xmalloc (sizeof (struct charset
) * charset_table_size
));
2370 charset_table_used
= 0;
2372 defsubr (&Scharsetp
);
2373 defsubr (&Smap_charset_chars
);
2374 defsubr (&Sdefine_charset_internal
);
2375 defsubr (&Sdefine_charset_alias
);
2376 defsubr (&Scharset_plist
);
2377 defsubr (&Sset_charset_plist
);
2378 defsubr (&Sunify_charset
);
2379 defsubr (&Sget_unused_iso_final_char
);
2380 defsubr (&Sdeclare_equiv_charset
);
2381 defsubr (&Sfind_charset_region
);
2382 defsubr (&Sfind_charset_string
);
2383 defsubr (&Sdecode_char
);
2384 defsubr (&Sencode_char
);
2385 defsubr (&Ssplit_char
);
2386 defsubr (&Smake_char
);
2387 defsubr (&Schar_charset
);
2388 defsubr (&Scharset_after
);
2389 defsubr (&Siso_charset
);
2390 defsubr (&Sclear_charset_maps
);
2391 defsubr (&Scharset_priority_list
);
2392 defsubr (&Sset_charset_priority
);
2393 defsubr (&Scharset_id_internal
);
2395 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path
,
2396 doc
: /* *List of directories to search for charset map files. */);
2397 Vcharset_map_path
= Qnil
;
2399 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map
,
2400 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2401 inhibit_load_charset_map
= 0;
2403 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2404 doc
: /* List of all charsets ever defined. */);
2405 Vcharset_list
= Qnil
;
2407 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language
,
2408 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2409 If the current language environment is for multiple languages (e.g. "Latin-1"),
2410 the value may be a list of mnemonics. */);
2411 Vcurrent_iso639_language
= Qnil
;
2414 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2415 0, 127, 'B', -1, 0, 1, 0, 0);
2417 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2418 0, 255, -1, -1, -1, 1, 0, 0);
2420 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2421 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2423 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F",
2424 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2426 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2427 128, 255, -1, 0, -1, 0, 1,
2428 MAX_5_BYTE_CHAR
+ 1);
2433 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2434 (do not change this comment) */