1 /* Basic character set support.
2 Copyright (C) 1995, 97, 98, 2000, 2001 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
9 This file is part of GNU Emacs.
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
36 #include <sys/types.h>
38 #include "character.h"
51 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
53 A coded character set ("charset" hereafter) is a meaningful
54 collection (i.e. language, culture, functionality, etc.) of
55 characters. Emacs handles multiple charsets at once. In Emacs Lisp
56 code, a charset is represented by a symbol. In C code, a charset is
57 represented by its ID number or by a pointer to a struct charset.
59 The actual information about each charset is stored in two places.
60 Lispy information is stored in the hash table Vcharset_hash_table as
61 a vector (charset attributes). The other information is stored in
62 charset_table as a struct charset.
66 /* List of all charsets. This variable is used only from Emacs
68 Lisp_Object Vcharset_list
;
70 /* Hash table that contains attributes of each charset. Keys are
71 charset symbols, and values are vectors of charset attributes. */
72 Lisp_Object Vcharset_hash_table
;
74 /* Table of struct charset. */
75 struct charset
*charset_table
;
77 static int charset_table_size
;
78 int charset_table_used
;
80 Lisp_Object Qcharsetp
;
82 /* Special charset symbols. */
84 Lisp_Object Qeight_bit_control
;
85 Lisp_Object Qeight_bit_graphic
;
86 Lisp_Object Qiso_8859_1
;
89 /* The corresponding charsets. */
91 int charset_8_bit_control
;
92 int charset_8_bit_graphic
;
93 int charset_iso_8859_1
;
96 /* The other special charsets. */
97 int charset_jisx0201_roman
;
98 int charset_jisx0208_1978
;
101 /* Value of charset attribute `charset-iso-plane'. */
102 Lisp_Object Qgl
, Qgr
;
104 /* The primary charset. It is a charset of unibyte characters. */
107 /* List of charsets ordered by the priority. */
108 Lisp_Object Vcharset_ordered_list
;
110 /* List of iso-2022 charsets. */
111 Lisp_Object Viso_2022_charset_list
;
113 /* List of emacs-mule charsets. */
114 Lisp_Object Vemacs_mule_charset_list
;
116 struct charset
*emacs_mule_charset
[256];
118 /* Mapping table from ISO2022's charset (specified by DIMENSION,
119 CHARS, and FINAL-CHAR) to Emacs' charset. */
120 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
122 Lisp_Object Vcharset_map_directory
;
124 Lisp_Object Vchar_unified_charset_table
;
126 #define CODE_POINT_TO_INDEX(charset, code) \
127 ((charset)->code_linear_p \
128 ? (code) - (charset)->min_code \
129 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
130 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
131 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
132 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
133 ? (((((code) >> 24) - (charset)->code_space[12]) \
134 * (charset)->code_space[11]) \
135 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
136 * (charset)->code_space[7]) \
137 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
138 * (charset)->code_space[3]) \
139 + (((code) & 0xFF) - (charset)->code_space[0]) \
140 - ((charset)->char_index_offset)) \
144 /* Convert the character index IDX to code-point CODE for CHARSET.
145 It is assumed that IDX is in a valid range. */
147 #define INDEX_TO_CODE_POINT(charset, idx) \
148 ((charset)->code_linear_p \
149 ? (idx) + (charset)->min_code \
150 : (idx += (charset)->char_index_offset, \
151 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
152 | (((charset)->code_space[4] \
153 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
155 | (((charset)->code_space[8] \
156 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
158 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
164 /* Set to 1 to warn that a charset map is loaded and thus a buffer
165 text and a string data may be relocated. */
166 int charset_map_loaded
;
168 struct charset_map_entries
174 struct charset_map_entries
*next
;
177 /* Load the mapping information for CHARSET from ENTRIES.
179 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
181 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
182 CHARSET->decoder, and CHARSET->encoder.
184 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
185 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
189 load_charset_map (charset
, entries
, n_entries
, control_flag
)
190 struct charset
*charset
;
191 struct charset_map_entries
*entries
;
195 Lisp_Object vec
, table
;
196 unsigned max_code
= CHARSET_MAX_CODE (charset
);
197 int ascii_compatible_p
= charset
->ascii_compatible_p
;
198 int min_char
, max_char
, nonascii_min_char
;
200 unsigned char *fast_map
= charset
->fast_map
;
205 if (control_flag
> 0)
207 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
208 unsigned invalid_code
= CHARSET_INVALID_CODE (charset
);
210 table
= Fmake_char_table (Qnil
, make_number (invalid_code
));
211 if (control_flag
== 1)
212 vec
= Fmake_vector (make_number (n
), make_number (-1));
213 else if (! CHAR_TABLE_P (Vchar_unify_table
))
214 Vchar_unify_table
= Fmake_char_table (Qnil
, make_number (-1));
216 charset_map_loaded
= 1;
219 min_char
= max_char
= entries
->entry
[0].c
;
220 nonascii_min_char
= MAX_CHAR
;
221 for (i
= 0; i
< n_entries
; i
++)
224 int from_index
, to_index
;
226 int idx
= i
% 0x10000;
228 if (i
> 0 && idx
== 0)
229 entries
= entries
->next
;
230 from
= entries
->entry
[idx
].from
;
231 to
= entries
->entry
[idx
].to
;
232 from_c
= entries
->entry
[idx
].c
;
233 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
236 to_index
= from_index
;
241 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
242 to_c
= from_c
+ (to_index
- from_index
);
244 if (from_index
< 0 || to_index
< 0)
247 if (control_flag
< 2)
253 else if (from_c
< min_char
)
255 if (ascii_compatible_p
)
257 if (! ASCII_BYTE_P (from_c
))
259 if (from_c
< nonascii_min_char
)
260 nonascii_min_char
= from_c
;
262 else if (! ASCII_BYTE_P (to_c
))
264 nonascii_min_char
= 0x80;
268 for (c
= from_c
; c
<= to_c
; c
++)
269 CHARSET_FAST_MAP_SET (c
, fast_map
);
271 if (control_flag
== 1)
273 unsigned code
= from
;
275 if (CHARSET_COMPACT_CODES_P (charset
))
278 ASET (vec
, from_index
, make_number (from_c
));
279 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
280 if (from_index
== to_index
)
282 from_index
++, from_c
++;
283 code
= INDEX_TO_CODE_POINT (charset
, from_index
);
286 for (; from_index
<= to_index
; from_index
++, from_c
++)
288 ASET (vec
, from_index
, make_number (from_c
));
289 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
295 unsigned code
= from
;
299 int c1
= DECODE_CHAR (charset
, code
);
303 CHAR_TABLE_SET (table
, from_c
, make_number (c1
));
304 CHAR_TABLE_SET (Vchar_unify_table
, c1
, from_c
);
305 if (CHAR_TABLE_P (Vchar_unified_charset_table
))
306 CHAR_TABLE_SET (Vchar_unified_charset_table
, c1
,
307 CHARSET_NAME (charset
));
309 if (from_index
== to_index
)
311 from_index
++, from_c
++;
312 code
= INDEX_TO_CODE_POINT (charset
, from_index
);
317 if (control_flag
< 2)
319 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
320 ? nonascii_min_char
: min_char
);
321 CHARSET_MAX_CHAR (charset
) = max_char
;
322 if (control_flag
== 1)
324 CHARSET_DECODER (charset
) = vec
;
325 CHARSET_ENCODER (charset
) = table
;
329 CHARSET_DEUNIFIER (charset
) = table
;
333 /* Read a hexadecimal number (preceded by "0x") from the file FP while
334 paying attention to comment charcter '#'. */
336 static INLINE
unsigned
344 while ((c
= getc (fp
)) != EOF
)
348 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
352 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
364 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
366 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
368 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
369 n
= (n
* 10) + c
- '0';
376 /* Return a mapping vector for CHARSET loaded from MAPFILE.
377 Each line of MAPFILE has this form
379 where 0xAAAA is a code-point and 0xCCCC is the corresponding
380 character code, or this form
382 where 0xAAAA and 0xBBBB are code-points specifying a range, and
383 0xCCCC is the first character code of the range.
385 The returned vector has this form:
386 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
387 where CODE1 is a code-point or a cons of code-points specifying a
390 extern void add_to_log
P_ ((char *, Lisp_Object
, Lisp_Object
));
393 load_charset_map_from_file (charset
, mapfile
, control_flag
)
394 struct charset
*charset
;
398 unsigned min_code
= CHARSET_MIN_CODE (charset
);
399 unsigned max_code
= CHARSET_MAX_CODE (charset
);
403 Lisp_Object suffixes
;
404 struct charset_map_entries
*head
, *entries
;
407 suffixes
= Fcons (build_string (".map"),
408 Fcons (build_string (".TXT"), Qnil
));
410 fd
= openp (Fcons (Vcharset_map_directory
, Qnil
), mapfile
, suffixes
,
413 || ! (fp
= fdopen (fd
, "r")))
415 add_to_log ("Failure in loading charset map: %S", mapfile
, Qnil
);
419 head
= entries
= ((struct charset_map_entries
*)
420 alloca (sizeof (struct charset_map_entries
)));
429 from
= read_hex (fp
, &eof
);
432 if (getc (fp
) == '-')
433 to
= read_hex (fp
, &eof
);
436 c
= (int) read_hex (fp
, &eof
);
438 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
441 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
443 entries
->next
= ((struct charset_map_entries
*)
444 alloca (sizeof (struct charset_map_entries
)));
445 entries
= entries
->next
;
447 idx
= n_entries
% 0x10000;
448 entries
->entry
[idx
].from
= from
;
449 entries
->entry
[idx
].to
= to
;
450 entries
->entry
[idx
].c
= c
;
456 load_charset_map (charset
, head
, n_entries
, control_flag
);
460 load_charset_map_from_vector (charset
, vec
, control_flag
)
461 struct charset
*charset
;
465 unsigned min_code
= CHARSET_MIN_CODE (charset
);
466 unsigned max_code
= CHARSET_MAX_CODE (charset
);
467 struct charset_map_entries
*head
, *entries
;
469 int len
= ASIZE (vec
);
474 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
478 head
= entries
= ((struct charset_map_entries
*)
479 alloca (sizeof (struct charset_map_entries
)));
481 for (i
= 0; i
< len
; i
+= 2)
483 Lisp_Object val
, val2
;
495 from
= XFASTINT (val
);
496 to
= XFASTINT (val2
);
501 from
= to
= XFASTINT (val
);
503 val
= AREF (vec
, i
+ 1);
507 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
510 if ((n_entries
% 0x10000) == 0)
512 entries
->next
= ((struct charset_map_entries
*)
513 alloca (sizeof (struct charset_map_entries
)));
514 entries
= entries
->next
;
516 idx
= n_entries
% 0x10000;
517 entries
->entry
[idx
].from
= from
;
518 entries
->entry
[idx
].to
= to
;
519 entries
->entry
[idx
].c
= c
;
523 load_charset_map (charset
, head
, n_entries
, control_flag
);
527 load_charset (charset
)
528 struct charset
*charset
;
530 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP_DEFERRED
)
534 map
= CHARSET_MAP (charset
);
536 load_charset_map_from_file (charset
, map
, 1);
538 load_charset_map_from_vector (charset
, map
, 1);
539 CHARSET_METHOD (charset
) = CHARSET_METHOD_MAP
;
544 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
545 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
549 return (CHARSETP (object
) ? Qt
: Qnil
);
554 map_charset_chars (c_function
, function
, charset_symbol
, arg
)
555 void (*c_function
) P_ ((Lisp_Object
, Lisp_Object
, Lisp_Object
));
556 Lisp_Object function
, charset_symbol
, arg
;
559 struct charset
*charset
;
562 CHECK_CHARSET_GET_ID (charset_symbol
, id
);
563 charset
= CHARSET_FROM_ID (id
);
565 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP_DEFERRED
)
566 load_charset (charset
);
568 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
570 range
= Fcons (make_number (CHARSET_MIN_CHAR (charset
)),
571 make_number (CHARSET_MAX_CHAR (charset
)));
573 (*c_function
) (arg
, range
, Qnil
);
575 call2 (function
, range
, arg
);
577 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
579 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
581 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
583 range
= Fcons (make_number (0), make_number (127));
585 (*c_function
) (arg
, range
, Qnil
);
587 call2 (function
, range
, arg
);
589 map_char_table (c_function
, function
, CHARSET_ENCODER (charset
), arg
,
592 else /* i.e. CHARSET_METHOD_PARENT */
597 int *code_space
= CHARSET_CODE_SPACE (charset
);
600 range
= Fcons (Qnil
, Qnil
);
602 for (i
= code_space
[12]; i
<= code_space
[13]; i
++)
603 for (j
= code_space
[8]; j
<= code_space
[9]; j
++)
604 for (k
= code_space
[4]; k
<= code_space
[5]; k
++)
605 for (l
= code_space
[0]; l
<= code_space
[1]; l
++)
607 code
= (i
<< 24) | (j
<< 16) | (k
<< 8) | l
;
608 c
= DECODE_CHAR (charset
, code
);
618 XSETCAR (range
, make_number (from
));
619 XSETCDR (range
, make_number (to
));
623 val
= make_number (from
);
625 (*c_function
) (arg
, val
, Qnil
);
627 call2 (function
, val
, arg
);
629 from
= to
= (c
< 0 ? -2 : c
);
635 XSETCAR (range
, make_number (from
));
636 XSETCDR (range
, make_number (to
));
640 val
= make_number (from
);
642 (*c_function
) (arg
, val
, Qnil
);
644 call2 (function
, val
, arg
);
649 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 3, 0,
650 doc
: /* Call FUNCTION for all characters in CHARSET.
651 FUNCTION is called with an argument RANGE and optional 2nd
654 RANGE is either a cons (FROM . TO), where FROM and TO indicate a range of
655 characters contained in CHARSET or a single character in the case that
656 FROM and TO would be equal. (The charset mapping may have gaps.)*/)
657 (function
, charset
, arg
)
658 Lisp_Object function
, charset
, arg
;
660 map_charset_chars (NULL
, function
, charset
, arg
);
665 /* Define a charset according to the arguments. The Nth argument is
666 the Nth attribute of the charset (the last attribute `charset-id'
667 is not included). See the docstring of `define-charset' for the
670 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
671 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
672 doc
: /* For internal use only.
673 usage: (define-charset-internal ...) */)
678 /* Charset attr vector. */
682 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
684 struct charset charset
;
687 int new_definition_p
;
690 if (nargs
!= charset_arg_max
)
691 return Fsignal (Qwrong_number_of_arguments
,
692 Fcons (intern ("define-charset-internal"),
693 make_number (nargs
)));
695 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
697 CHECK_SYMBOL (args
[charset_arg_name
]);
698 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
700 val
= args
[charset_arg_code_space
];
701 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
703 int min_byte
, max_byte
;
705 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
706 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
707 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
708 error ("Invalid :code-space value");
709 charset
.code_space
[i
* 4] = min_byte
;
710 charset
.code_space
[i
* 4 + 1] = max_byte
;
711 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
712 nchars
*= charset
.code_space
[i
* 4 + 2];
713 charset
.code_space
[i
* 4 + 3] = nchars
;
718 val
= args
[charset_arg_dimension
];
720 charset
.dimension
= dimension
;
724 charset
.dimension
= XINT (val
);
725 if (charset
.dimension
< 1 || charset
.dimension
> 4)
726 args_out_of_range_3 (val
, make_number (1), make_number (4));
729 charset
.code_linear_p
730 = (charset
.dimension
== 1
731 || (charset
.code_space
[2] == 256
732 && (charset
.dimension
== 2
733 || (charset
.code_space
[6] == 256
734 && (charset
.dimension
== 3
735 || charset
.code_space
[10] == 256)))));
737 if (! charset
.code_linear_p
)
739 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
740 bzero (charset
.code_space_mask
, 256);
741 for (i
= 0; i
< 4; i
++)
742 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
744 charset
.code_space_mask
[j
] |= (1 << i
);
747 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
749 charset
.min_code
= (charset
.code_space
[0]
750 | (charset
.code_space
[4] << 8)
751 | (charset
.code_space
[8] << 16)
752 | (charset
.code_space
[12] << 24));
753 charset
.max_code
= (charset
.code_space
[1]
754 | (charset
.code_space
[5] << 8)
755 | (charset
.code_space
[9] << 16)
756 | (charset
.code_space
[13] << 24));
757 charset
.char_index_offset
= 0;
759 val
= args
[charset_arg_min_code
];
769 CHECK_NUMBER (XCAR (val
));
770 CHECK_NUMBER (XCDR (val
));
771 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
773 if (code
< charset
.min_code
774 || code
> charset
.max_code
)
775 args_out_of_range_3 (make_number (charset
.min_code
),
776 make_number (charset
.max_code
), val
);
777 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
778 charset
.min_code
= code
;
781 val
= args
[charset_arg_max_code
];
791 CHECK_NUMBER (XCAR (val
));
792 CHECK_NUMBER (XCDR (val
));
793 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
795 if (code
< charset
.min_code
796 || code
> charset
.max_code
)
797 args_out_of_range_3 (make_number (charset
.min_code
),
798 make_number (charset
.max_code
), val
);
799 charset
.max_code
= code
;
802 charset
.compact_codes_p
= charset
.max_code
< 0x1000000;
804 val
= args
[charset_arg_invalid_code
];
807 if (charset
.min_code
> 0)
808 charset
.invalid_code
= 0;
811 XSETINT (val
, charset
.max_code
+ 1);
812 if (XINT (val
) == charset
.max_code
+ 1)
813 charset
.invalid_code
= charset
.max_code
+ 1;
815 error ("Attribute :invalid-code must be specified");
821 charset
.invalid_code
= XFASTINT (val
);
824 val
= args
[charset_arg_iso_final
];
826 charset
.iso_final
= -1;
830 if (XINT (val
) < '0' || XINT (val
) > 127)
831 error ("Invalid iso-final-char: %d", XINT (val
));
832 charset
.iso_final
= XINT (val
);
835 val
= args
[charset_arg_iso_revision
];
837 charset
.iso_revision
= -1;
842 args_out_of_range (make_number (63), val
);
843 charset
.iso_revision
= XINT (val
);
846 val
= args
[charset_arg_emacs_mule_id
];
848 charset
.emacs_mule_id
= -1;
852 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
853 error ("Invalid emacs-mule-id: %d", XINT (val
));
854 charset
.emacs_mule_id
= XINT (val
);
857 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
859 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
861 charset
.unified_p
= 0;
863 bzero (charset
.fast_map
, sizeof (charset
.fast_map
));
865 if (! NILP (args
[charset_arg_code_offset
]))
867 val
= args
[charset_arg_code_offset
];
870 charset
.method
= CHARSET_METHOD_OFFSET
;
871 charset
.code_offset
= XINT (val
);
873 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
874 charset
.min_char
= i
+ charset
.code_offset
;
875 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
876 charset
.max_char
= i
+ charset
.code_offset
;
877 if (charset
.max_char
> MAX_CHAR
)
878 error ("Unsupported max char: %d", charset
.max_char
);
880 for (i
= charset
.min_char
; i
< 0x10000 && i
<= charset
.max_char
;
882 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
883 for (; i
<= charset
.max_char
; i
+= 0x1000)
884 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
886 else if (! NILP (args
[charset_arg_map
]))
888 val
= args
[charset_arg_map
];
889 ASET (attrs
, charset_map
, val
);
891 load_charset_map_from_file (&charset
, val
, 0);
893 load_charset_map_from_vector (&charset
, val
, 0);
894 charset
.method
= CHARSET_METHOD_MAP_DEFERRED
;
896 else if (! NILP (args
[charset_arg_parents
]))
898 val
= args
[charset_arg_parents
];
900 charset
.method
= CHARSET_METHOD_INHERIT
;
901 val
= Fcopy_sequence (val
);
902 ASET (attrs
, charset_parents
, val
);
904 charset
.min_char
= MAX_CHAR
;
905 charset
.max_char
= 0;
906 for (; ! NILP (val
); val
= Fcdr (val
))
908 Lisp_Object elt
, car_part
, cdr_part
;
910 struct charset
*this_charset
;
915 car_part
= XCAR (elt
);
916 cdr_part
= XCDR (elt
);
917 CHECK_CHARSET_GET_ID (car_part
, this_id
);
918 CHECK_NUMBER (cdr_part
);
919 offset
= XINT (cdr_part
);
923 CHECK_CHARSET_GET_ID (elt
, this_id
);
926 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
928 this_charset
= CHARSET_FROM_ID (this_id
);
929 if (charset
.min_char
> this_charset
->min_char
)
930 charset
.min_char
= this_charset
->min_char
;
931 if (charset
.max_char
< this_charset
->max_char
)
932 charset
.max_char
= this_charset
->max_char
;
933 for (i
= 0; i
< 190; i
++)
934 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
938 error ("None of :code-offset, :map, :parents are specified");
940 val
= args
[charset_arg_unify_map
];
941 if (! NILP (val
) && !STRINGP (val
))
943 ASET (attrs
, charset_unify_map
, val
);
945 CHECK_LIST (args
[charset_arg_plist
]);
946 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
948 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
950 if (charset
.hash_index
>= 0)
952 new_definition_p
= 0;
953 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
954 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
958 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
960 if (charset_table_used
== charset_table_size
)
962 charset_table_size
+= 256;
964 = ((struct charset
*)
965 xrealloc (charset_table
,
966 sizeof (struct charset
) * charset_table_size
));
968 id
= charset_table_used
++;
969 new_definition_p
= 1;
972 ASET (attrs
, charset_id
, make_number (id
));
974 charset_table
[id
] = charset
;
976 if (charset
.iso_final
>= 0)
978 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
979 charset
.iso_final
) = id
;
980 if (new_definition_p
)
981 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
982 Fcons (make_number (id
), Qnil
));
983 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
984 charset_jisx0201_roman
= id
;
985 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
986 charset_jisx0208_1978
= id
;
987 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
988 charset_jisx0208
= id
;
991 if (charset
.emacs_mule_id
>= 0)
993 emacs_mule_charset
[charset
.emacs_mule_id
] = CHARSET_FROM_ID (id
);
994 if (charset
.emacs_mule_id
< 0xA0)
995 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
996 if (new_definition_p
)
997 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
998 Fcons (make_number (id
), Qnil
));
1001 if (new_definition_p
)
1003 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1004 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1005 Fcons (make_number (id
), Qnil
));
1011 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1012 Sdefine_charset_alias
, 2, 2, 0,
1013 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1015 Lisp_Object alias
, charset
;
1019 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1020 Fputhash (alias
, attr
, Vcharset_hash_table
);
1021 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1026 DEFUN ("primary-charset", Fprimary_charset
, Sprimary_charset
, 0, 0, 0,
1027 doc
: /* Return the primary charset. */)
1030 return CHARSET_NAME (CHARSET_FROM_ID (charset_primary
));
1034 DEFUN ("set-primary-charset", Fset_primary_charset
, Sset_primary_charset
,
1036 doc
: /* Set the primary charset to CHARSET. */)
1038 Lisp_Object charset
;
1042 CHECK_CHARSET_GET_ID (charset
, id
);
1043 charset_primary
= id
;
1048 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1049 doc
: /* Return a property list of CHARSET. */)
1051 Lisp_Object charset
;
1055 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1056 return CHARSET_ATTR_PLIST (attrs
);
1060 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1061 doc
: /* Set CHARSET's property list to PLIST. */)
1063 Lisp_Object charset
, plist
;
1067 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1068 CHARSET_ATTR_PLIST (attrs
) = plist
;
1073 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 2, 0,
1074 doc
: /* Unify characters of CHARSET with Unicode. */)
1075 (charset
, unify_map
)
1076 Lisp_Object charset
, unify_map
;
1081 CHECK_CHARSET_GET_ID (charset
, id
);
1082 cs
= CHARSET_FROM_ID (id
);
1083 if (CHARSET_METHOD (cs
) == CHARSET_METHOD_MAP_DEFERRED
)
1085 if (CHARSET_UNIFIED_P (cs
)
1086 && CHAR_TABLE_P (CHARSET_DEUNIFIER (cs
)))
1088 CHARSET_UNIFIED_P (cs
) = 0;
1089 if (NILP (unify_map
))
1090 unify_map
= CHARSET_UNIFY_MAP (cs
);
1091 if (STRINGP (unify_map
))
1092 load_charset_map_from_file (cs
, unify_map
, 2);
1094 load_charset_map_from_vector (cs
, unify_map
, 2);
1095 CHARSET_UNIFIED_P (cs
) = 1;
1099 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1100 Sget_unused_iso_final_char
, 2, 2, 0,
1102 Return an unsed ISO final char for a charset of DIMENISION and CHARS.
1103 DIMENSION is the number of bytes to represent a character: 1 or 2.
1104 CHARS is the number of characters in a dimension: 94 or 96.
1106 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1107 If there's no unused final char for the specified kind of charset,
1110 Lisp_Object dimension
, chars
;
1114 CHECK_NUMBER (dimension
);
1115 CHECK_NUMBER (chars
);
1116 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1117 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1118 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1119 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1120 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1121 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1123 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1127 check_iso_charset_parameter (dimension
, chars
, final_char
)
1128 Lisp_Object dimension
, chars
, final_char
;
1130 CHECK_NATNUM (dimension
);
1131 CHECK_NATNUM (chars
);
1132 CHECK_NATNUM (final_char
);
1134 if (XINT (dimension
) > 3)
1135 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1136 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1137 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1138 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1139 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1143 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1146 Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
1147 CHARSET should be defined by `defined-charset' in advance. */)
1148 (dimension
, chars
, final_char
, charset
)
1149 Lisp_Object dimension
, chars
, final_char
, charset
;
1153 CHECK_CHARSET_GET_ID (charset
, id
);
1154 check_iso_charset_parameter (dimension
, chars
, final_char
);
1156 ISO_CHARSET_TABLE (dimension
, chars
, final_char
) = id
;
1161 /* Return information about charsets in the text at PTR of NBYTES
1162 bytes, which are NCHARS characters. The value is:
1164 0: Each character is represented by one byte. This is always
1165 true for a unibyte string. For a multibyte string, true if
1166 it contains only ASCII characters.
1168 1: No charsets other than ascii, eight-bit-control, and
1175 string_xstring_p (string
)
1178 unsigned char *p
= XSTRING (string
)->data
;
1179 unsigned char *endp
= p
+ STRING_BYTES (XSTRING (string
));
1180 struct charset
*charset
;
1182 if (XSTRING (string
)->size
== STRING_BYTES (XSTRING (string
)))
1185 charset
= CHARSET_FROM_ID (charset_iso_8859_1
);
1188 int c
= STRING_CHAR_ADVANCE (p
);
1190 if (ENCODE_CHAR (charset
, c
) < 0)
1197 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1199 CHARSETS is a vector. Each element is a cons of CHARSET and
1200 FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t.
1201 FOUND-FLAG t (or nil) means that the corresponding charset is
1202 already found (or not yet found).
1204 It may lookup a translation table TABLE if supplied. */
1207 find_charsets_in_text (ptr
, nchars
, nbytes
, charsets
, table
)
1210 Lisp_Object charsets
, table
;
1212 unsigned char *pend
= ptr
+ nbytes
;
1213 int ncharsets
= ASIZE (charsets
);
1215 if (nchars
== nbytes
)
1220 int c
= STRING_CHAR_ADVANCE (ptr
);
1226 c
= translate_char (table
, c
);
1227 for (i
= 0; i
< ncharsets
; i
++)
1229 elt
= AREF (charsets
, i
);
1230 if (NILP (XCDR (elt
)))
1232 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (elt
)));
1234 if (ENCODE_CHAR (charset
, c
) != CHARSET_INVALID_CODE (charset
))
1246 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1248 doc
: /* Return a list of charsets in the region between BEG and END.
1249 BEG and END are buffer positions.
1250 Optional arg TABLE if non-nil is a translation table to look up.
1252 If the region contains invalid multibyte characters,
1253 `unknown' is included in the returned list.
1255 If the current buffer is unibyte, the returned list may contain
1256 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1258 Lisp_Object beg
, end
, table
;
1260 Lisp_Object charsets
;
1261 int from
, from_byte
, to
, stop
, stop_byte
, i
;
1264 validate_region (&beg
, &end
);
1265 from
= XFASTINT (beg
);
1266 stop
= to
= XFASTINT (end
);
1268 if (from
< GPT
&& GPT
< to
)
1271 stop_byte
= GPT_BYTE
;
1274 stop_byte
= CHAR_TO_BYTE (stop
);
1276 from_byte
= CHAR_TO_BYTE (from
);
1278 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1279 for (i
= 0; i
< charset_table_used
; i
++)
1280 ASET (charsets
, i
, Fcons (make_number (i
), Qnil
));
1284 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1285 stop_byte
- from_byte
, charsets
, table
);
1288 from
= stop
, from_byte
= stop_byte
;
1289 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1296 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1297 if (!NILP (XCDR (AREF (charsets
, i
))))
1298 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1302 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1304 doc
: /* Return a list of charsets in STR.
1305 Optional arg TABLE if non-nil is a translation table to look up.
1307 If the string contains invalid multibyte characters,
1308 `unknown' is included in the returned list.
1310 If STR is unibyte, the returned list may contain
1311 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1313 Lisp_Object str
, table
;
1315 Lisp_Object charsets
;
1321 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1322 find_charsets_in_text (XSTRING (str
)->data
, XSTRING (str
)->size
,
1323 STRING_BYTES (XSTRING (str
)), charsets
, table
);
1326 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1327 if (!NILP (XCDR (AREF (charsets
, i
))))
1328 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1334 /* Return a character correponding to the code-point CODE of
1338 decode_char (charset
, code
)
1339 struct charset
*charset
;
1343 enum charset_method method
= CHARSET_METHOD (charset
);
1345 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1348 if (method
== CHARSET_METHOD_MAP_DEFERRED
)
1350 load_charset (charset
);
1351 method
= CHARSET_METHOD (charset
);
1354 if (method
== CHARSET_METHOD_INHERIT
)
1356 Lisp_Object parents
;
1358 parents
= CHARSET_PARENTS (charset
);
1360 for (; CONSP (parents
); parents
= XCDR (parents
))
1362 int id
= XINT (XCAR (XCAR (parents
)));
1363 int code_offset
= XINT (XCDR (XCAR (parents
)));
1364 unsigned this_code
= code
+ code_offset
;
1366 charset
= CHARSET_FROM_ID (id
);
1367 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1373 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1377 if (method
== CHARSET_METHOD_MAP
)
1379 Lisp_Object decoder
;
1381 decoder
= CHARSET_DECODER (charset
);
1382 if (! VECTORP (decoder
))
1384 c
= XINT (AREF (decoder
, char_index
));
1388 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1392 if (CHARSET_UNIFIED_P (charset
)
1395 MAYBE_UNIFY_CHAR (c
);
1402 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1403 CHARSET, return CHARSET_INVALID_CODE (CHARSET). */
1406 encode_char (charset
, c
)
1407 struct charset
*charset
;
1411 enum charset_method method
= CHARSET_METHOD (charset
);
1413 if (CHARSET_UNIFIED_P (charset
))
1415 Lisp_Object deunifier
;
1418 deunifier
= CHARSET_DEUNIFIER (charset
);
1419 if (! CHAR_TABLE_P (deunifier
))
1421 Funify_charset (CHARSET_NAME (charset
), Qnil
);
1422 deunifier
= CHARSET_DEUNIFIER (charset
);
1424 deunified
= XINT (CHAR_TABLE_REF (deunifier
, c
));
1429 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1430 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1431 return CHARSET_INVALID_CODE (charset
);
1433 if (method
== CHARSET_METHOD_INHERIT
)
1435 Lisp_Object parents
;
1437 parents
= CHARSET_PARENTS (charset
);
1438 for (; CONSP (parents
); parents
= XCDR (parents
))
1440 int id
= XINT (XCAR (XCAR (parents
)));
1441 int code_offset
= XINT (XCDR (XCAR (parents
)));
1442 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1444 code
= ENCODE_CHAR (this_charset
, c
);
1445 if (code
!= CHARSET_INVALID_CODE (this_charset
)
1446 && (code_offset
< 0 || code
>= code_offset
))
1448 code
-= code_offset
;
1449 if (code
>= charset
->min_code
&& code
<= charset
->max_code
1450 && CODE_POINT_TO_INDEX (charset
, code
) >= 0)
1454 return CHARSET_INVALID_CODE (charset
);
1457 if (method
== CHARSET_METHOD_MAP_DEFERRED
)
1459 load_charset (charset
);
1460 method
= CHARSET_METHOD (charset
);
1463 if (method
== CHARSET_METHOD_MAP
)
1465 Lisp_Object encoder
;
1468 encoder
= CHARSET_ENCODER (charset
);
1469 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1470 return CHARSET_INVALID_CODE (charset
);
1471 val
= CHAR_TABLE_REF (encoder
, c
);
1473 if (! CHARSET_COMPACT_CODES_P (charset
))
1474 code
= INDEX_TO_CODE_POINT (charset
, code
);
1476 else /* method == CHARSET_METHOD_OFFSET */
1478 code
= c
- CHARSET_CODE_OFFSET (charset
);
1479 code
= INDEX_TO_CODE_POINT (charset
, code
);
1486 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1487 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1488 Return nil if CODE-POINT is not valid in CHARSET.
1490 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1492 Optional argument RESTRICTION specifies a way to map the pair of CCS
1493 and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1494 (charset
, code_point
, restriction
)
1495 Lisp_Object charset
, code_point
, restriction
;
1499 struct charset
*charsetp
;
1501 CHECK_CHARSET_GET_ID (charset
, id
);
1502 if (CONSP (code_point
))
1504 CHECK_NATNUM (XCAR (code_point
));
1505 CHECK_NATNUM (XCDR (code_point
));
1506 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1510 CHECK_NATNUM (code_point
);
1511 code
= XINT (code_point
);
1513 charsetp
= CHARSET_FROM_ID (id
);
1514 c
= DECODE_CHAR (charsetp
, code
);
1515 return (c
>= 0 ? make_number (c
) : Qnil
);
1519 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1520 doc
: /* Encode the character CH into a code-point of CHARSET.
1521 Return nil if CHARSET doesn't include CH.
1523 Optional argument RESTRICTION specifies a way to map CHAR to a
1524 code-point in CCS. Currently not supported and just ignored. */)
1525 (ch
, charset
, restriction
)
1526 Lisp_Object ch
, charset
, restriction
;
1530 struct charset
*charsetp
;
1532 CHECK_CHARSET_GET_ID (charset
, id
);
1535 charsetp
= CHARSET_FROM_ID (id
);
1536 code
= ENCODE_CHAR (charsetp
, ch
);
1537 if (code
== CHARSET_INVALID_CODE (charsetp
))
1539 if (code
> 0x7FFFFFF)
1540 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1541 return make_number (code
);
1545 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1547 /* Return a character of CHARSET whose position codes are CODEn.
1549 CODE1 through CODE4 are optional, but if you don't supply sufficient
1550 position codes, it is assumed that the minimum code in each dimension
1552 (charset
, code1
, code2
, code3
, code4
)
1553 Lisp_Object charset
, code1
, code2
, code3
, code4
;
1556 struct charset
*charsetp
;
1560 CHECK_CHARSET_GET_ID (charset
, id
);
1561 charsetp
= CHARSET_FROM_ID (id
);
1563 dimension
= CHARSET_DIMENSION (charsetp
);
1565 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1566 ? 0 : CHARSET_MIN_CODE (charsetp
));
1569 CHECK_NATNUM (code1
);
1570 if (XFASTINT (code1
) >= 0x100)
1571 args_out_of_range (make_number (0xFF), code1
);
1572 code
= XFASTINT (code1
);
1578 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1581 CHECK_NATNUM (code2
);
1582 if (XFASTINT (code2
) >= 0x100)
1583 args_out_of_range (make_number (0xFF), code2
);
1584 code
|= XFASTINT (code2
);
1591 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1594 CHECK_NATNUM (code3
);
1595 if (XFASTINT (code3
) >= 0x100)
1596 args_out_of_range (make_number (0xFF), code3
);
1597 code
|= XFASTINT (code3
);
1604 code
|= charsetp
->code_space
[0];
1607 CHECK_NATNUM (code4
);
1608 if (XFASTINT (code4
) >= 0x100)
1609 args_out_of_range (make_number (0xFF), code4
);
1610 code
|= XFASTINT (code4
);
1617 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
1619 c
= DECODE_CHAR (charsetp
, code
);
1621 error ("Invalid code(s)");
1622 return make_number (c
);
1626 /* Return the first charset in CHARSET_LIST that contains C.
1627 CHARSET_LIST is a list of charset IDs. If it is nil, use
1628 Vcharset_ordered_list. */
1631 char_charset (c
, charset_list
, code_return
)
1633 Lisp_Object charset_list
;
1634 unsigned *code_return
;
1636 if (NILP (charset_list
))
1637 charset_list
= Vcharset_ordered_list
;
1639 while (CONSP (charset_list
))
1641 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
1642 unsigned code
= ENCODE_CHAR (charset
, c
);
1644 if (code
!= CHARSET_INVALID_CODE (charset
))
1647 *code_return
= code
;
1650 charset_list
= XCDR (charset_list
);
1656 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
1657 doc
: /*Return list of charset and one to three position-codes of CHAR.
1658 If CHAR is invalid as a character code,
1659 return a list of symbol `unknown' and CHAR. */)
1663 struct charset
*charset
;
1668 CHECK_CHARACTER (ch
);
1670 charset
= CHAR_CHARSET (c
);
1672 return Fcons (intern ("unknown"), Fcons (ch
, Qnil
));
1674 code
= ENCODE_CHAR (charset
, c
);
1675 if (code
== CHARSET_INVALID_CODE (charset
))
1677 dimension
= CHARSET_DIMENSION (charset
);
1678 val
= (dimension
== 1 ? Fcons (make_number (code
), Qnil
)
1679 : dimension
== 2 ? Fcons (make_number (code
>> 8),
1680 Fcons (make_number (code
& 0xFF), Qnil
))
1681 : Fcons (make_number (code
>> 16),
1682 Fcons (make_number ((code
>> 8) & 0xFF),
1683 Fcons (make_number (code
& 0xFF), Qnil
))));
1684 return Fcons (CHARSET_NAME (charset
), val
);
1688 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 1, 0,
1689 doc
: /* Return the charset of highest priority that contains CHAR. */)
1693 struct charset
*charset
;
1695 CHECK_CHARACTER (ch
);
1696 charset
= CHAR_CHARSET (XINT (ch
));
1697 return (CHARSET_NAME (charset
));
1701 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
1703 Return charset of a character in the current buffer at position POS.
1704 If POS is nil, it defauls to the current point.
1705 If POS is out of range, the value is nil. */)
1710 struct charset
*charset
;
1712 ch
= Fchar_after (pos
);
1713 if (! INTEGERP (ch
))
1715 charset
= CHAR_CHARSET (XINT (ch
));
1716 return (CHARSET_NAME (charset
));
1720 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
1722 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1724 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1725 by their DIMENSION, CHARS, and FINAL-CHAR,
1726 where as Emacs distinguishes them by charset symbol.
1727 See the documentation of the function `charset-info' for the meanings of
1728 DIMENSION, CHARS, and FINAL-CHAR. */)
1729 (dimension
, chars
, final_char
)
1730 Lisp_Object dimension
, chars
, final_char
;
1734 check_iso_charset_parameter (dimension
, chars
, final_char
);
1735 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), XFASTINT (chars
),
1736 XFASTINT (final_char
));
1737 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
1741 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
1744 Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1748 struct charset
*charset
;
1751 for (i
= 0; i
< charset_table_used
; i
++)
1753 charset
= CHARSET_FROM_ID (i
);
1754 attrs
= CHARSET_ATTRIBUTES (charset
);
1756 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
1758 CHARSET_ATTR_DECODER (attrs
) = Qnil
;
1759 CHARSET_ATTR_ENCODER (attrs
) = Qnil
;
1760 CHARSET_METHOD (charset
) = CHARSET_METHOD_MAP_DEFERRED
;
1763 if (CHARSET_UNIFIED_P (charset
))
1764 CHARSET_ATTR_DEUNIFIER (attrs
) = Qnil
;
1767 if (CHAR_TABLE_P (Vchar_unified_charset_table
))
1769 Foptimize_char_table (Vchar_unified_charset_table
);
1770 Vchar_unify_table
= Vchar_unified_charset_table
;
1771 Vchar_unified_charset_table
= Qnil
;
1777 DEFUN ("charset-priority-list", Fcharset_priority_list
,
1778 Scharset_priority_list
, 0, 1, 0,
1779 doc
: /* Return the list of charsets ordered by priority.
1780 HIGHESTP non-nil means just return the highest priority one. */)
1782 Lisp_Object highestp
;
1784 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
1786 if (!NILP (highestp
))
1787 return CHARSET_NAME (CHARSET_FROM_ID (Fcar (list
)));
1789 while (!NILP (list
))
1791 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XCAR (list
))), val
);
1794 return Fnreverse (val
);
1797 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
1799 doc
: /* Assign higher priority to the charsets given as arguments.
1800 usage: (set-charset-priority &rest charsets) */)
1805 Lisp_Object new_head
= Qnil
, old_list
, id
, arglist
[2];
1808 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
1809 for (i
= 0; i
< nargs
; i
++)
1811 CHECK_CHARSET_GET_ID (args
[i
], id
);
1812 old_list
= Fdelq (id
, old_list
);
1813 new_head
= Fcons (id
, new_head
);
1815 arglist
[0] = Fnreverse (new_head
);
1816 arglist
[1] = old_list
;
1817 Vcharset_ordered_list
= Fnconc (2, arglist
);
1829 init_charset_once ()
1833 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
1834 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
1835 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
1836 iso_charset_table
[i
][j
][k
] = -1;
1838 for (i
= 0; i
< 255; i
++)
1839 emacs_mule_charset
[i
] = NULL
;
1841 charset_jisx0201_roman
= -1;
1842 charset_jisx0208_1978
= -1;
1843 charset_jisx0208
= -1;
1846 Vchar_charset_set
= Fmake_char_table (Qnil
, Qnil
);
1847 CHAR_TABLE_SET (Vchar_charset_set
, make_number (97), Qnil
);
1849 DEFSYM (Qcharset_encode_table
, "charset-encode-table");
1851 /* Intern this now in case it isn't already done.
1852 Setting this variable twice is harmless.
1853 But don't staticpro it here--that is done in alloc.c. */
1854 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
1856 /* Now we are ready to set up this property, so we can create syntax
1858 Fput (Qcharset_encode_table
, Qchar_table_extra_slots
, make_number (0));
1869 DEFSYM (Qcharsetp
, "charsetp");
1871 DEFSYM (Qascii
, "ascii");
1872 DEFSYM (Qunicode
, "unicode");
1873 DEFSYM (Qeight_bit_control
, "eight-bit-control");
1874 DEFSYM (Qeight_bit_graphic
, "eight-bit-graphic");
1875 DEFSYM (Qiso_8859_1
, "iso-8859-1");
1880 p
= (char *) xmalloc (30000);
1882 staticpro (&Vcharset_ordered_list
);
1883 Vcharset_ordered_list
= Qnil
;
1885 staticpro (&Viso_2022_charset_list
);
1886 Viso_2022_charset_list
= Qnil
;
1888 staticpro (&Vemacs_mule_charset_list
);
1889 Vemacs_mule_charset_list
= Qnil
;
1891 staticpro (&Vcharset_hash_table
);
1892 Vcharset_hash_table
= Fmakehash (Qeq
);
1894 charset_table_size
= 128;
1895 charset_table
= ((struct charset
*)
1896 xmalloc (sizeof (struct charset
) * charset_table_size
));
1897 charset_table_used
= 0;
1899 staticpro (&Vchar_unified_charset_table
);
1900 Vchar_unified_charset_table
= Fmake_char_table (Qnil
, make_number (-1));
1902 defsubr (&Scharsetp
);
1903 defsubr (&Smap_charset_chars
);
1904 defsubr (&Sdefine_charset_internal
);
1905 defsubr (&Sdefine_charset_alias
);
1906 defsubr (&Sprimary_charset
);
1907 defsubr (&Sset_primary_charset
);
1908 defsubr (&Scharset_plist
);
1909 defsubr (&Sset_charset_plist
);
1910 defsubr (&Sunify_charset
);
1911 defsubr (&Sget_unused_iso_final_char
);
1912 defsubr (&Sdeclare_equiv_charset
);
1913 defsubr (&Sfind_charset_region
);
1914 defsubr (&Sfind_charset_string
);
1915 defsubr (&Sdecode_char
);
1916 defsubr (&Sencode_char
);
1917 defsubr (&Ssplit_char
);
1918 defsubr (&Smake_char
);
1919 defsubr (&Schar_charset
);
1920 defsubr (&Scharset_after
);
1921 defsubr (&Siso_charset
);
1922 defsubr (&Sclear_charset_maps
);
1923 defsubr (&Scharset_priority_list
);
1924 defsubr (&Sset_charset_priority
);
1926 DEFVAR_LISP ("charset-map-directory", &Vcharset_map_directory
,
1927 doc
: /* Directory of charset map files that come with GNU Emacs.
1928 The default value is sub-directory "charsets" of `data-directory'. */);
1929 Vcharset_map_directory
= Fexpand_file_name (build_string ("charsets"),
1932 DEFVAR_LISP ("charset-list", &Vcharset_list
,
1933 doc
: /* List of all charsets ever defined. */);
1934 Vcharset_list
= Qnil
;
1936 /* Make the prerequisite charset `ascii' and `unicode'. */
1938 Lisp_Object args
[charset_arg_max
];
1939 Lisp_Object plist
[14];
1942 plist
[0] = intern (":name");
1943 plist
[2] = intern (":dimension");
1944 plist
[4] = intern (":code-space");
1945 plist
[6] = intern (":iso-final-char");
1946 plist
[8] = intern (":emacs-mule-id");
1947 plist
[10] = intern (":ascii-compatible-p");
1948 plist
[12] = intern (":code-offset");
1950 args
[charset_arg_name
] = Qascii
;
1951 args
[charset_arg_dimension
] = make_number (1);
1952 val
= Fmake_vector (make_number (8), make_number (0));
1953 ASET (val
, 1, make_number (127));
1954 args
[charset_arg_code_space
] = val
;
1955 args
[charset_arg_min_code
] = Qnil
;
1956 args
[charset_arg_max_code
] = Qnil
;
1957 args
[charset_arg_iso_final
] = make_number ('B');
1958 args
[charset_arg_iso_revision
] = Qnil
;
1959 args
[charset_arg_emacs_mule_id
] = make_number (0);
1960 args
[charset_arg_ascii_compatible_p
] = Qt
;
1961 args
[charset_arg_supplementary_p
] = Qnil
;
1962 args
[charset_arg_invalid_code
] = Qnil
;
1963 args
[charset_arg_code_offset
] = make_number (0);
1964 args
[charset_arg_map
] = Qnil
;
1965 args
[charset_arg_parents
] = Qnil
;
1966 args
[charset_arg_unify_map
] = Qnil
;
1967 /* The actual plist is set by mule-conf.el. */
1968 plist
[1] = args
[charset_arg_name
];
1969 plist
[3] = args
[charset_arg_dimension
];
1970 plist
[5] = args
[charset_arg_code_space
];
1971 plist
[7] = args
[charset_arg_iso_final
];
1972 plist
[9] = args
[charset_arg_emacs_mule_id
];
1973 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1974 plist
[13] = args
[charset_arg_code_offset
];
1975 args
[charset_arg_plist
] = Flist (14, plist
);
1976 Fdefine_charset_internal (charset_arg_max
, args
);
1977 charset_ascii
= CHARSET_SYMBOL_ID (Qascii
);
1979 args
[charset_arg_name
] = Qunicode
;
1980 args
[charset_arg_dimension
] = make_number (3);
1981 val
= Fmake_vector (make_number (8), make_number (0));
1982 ASET (val
, 1, make_number (255));
1983 ASET (val
, 3, make_number (255));
1984 ASET (val
, 5, make_number (16));
1985 args
[charset_arg_code_space
] = val
;
1986 args
[charset_arg_min_code
] = Qnil
;
1987 args
[charset_arg_max_code
] = Qnil
;
1988 args
[charset_arg_iso_final
] = Qnil
;
1989 args
[charset_arg_iso_revision
] = Qnil
;
1990 args
[charset_arg_emacs_mule_id
] = Qnil
;
1991 args
[charset_arg_ascii_compatible_p
] = Qt
;
1992 args
[charset_arg_supplementary_p
] = Qnil
;
1993 args
[charset_arg_invalid_code
] = Qnil
;
1994 args
[charset_arg_code_offset
] = make_number (0);
1995 args
[charset_arg_map
] = Qnil
;
1996 args
[charset_arg_parents
] = Qnil
;
1997 args
[charset_arg_unify_map
] = Qnil
;
1998 /* The actual plist is set by mule-conf.el. */
1999 plist
[1] = args
[charset_arg_name
];
2000 plist
[3] = args
[charset_arg_dimension
];
2001 plist
[5] = args
[charset_arg_code_space
];
2002 plist
[7] = args
[charset_arg_iso_final
];
2003 plist
[9] = args
[charset_arg_emacs_mule_id
];
2004 plist
[11] = args
[charset_arg_ascii_compatible_p
];
2005 plist
[13] = args
[charset_arg_code_offset
];
2006 args
[charset_arg_plist
] = Flist (14, plist
);
2007 Fdefine_charset_internal (charset_arg_max
, args
);
2008 charset_unicode
= CHARSET_SYMBOL_ID (Qunicode
);