1 /* Basic multilingual character support.
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* At first, see the document in `charset.h' to understand the code in
33 #include <sys/types.h>
46 Lisp_Object Qcharset
, Qascii
;
49 /* Declaration of special leading-codes. */
50 int leading_code_private_11
; /* for private DIMENSION1 of 1-column */
51 int leading_code_private_12
; /* for private DIMENSION1 of 2-column */
52 int leading_code_private_21
; /* for private DIMENSION2 of 1-column */
53 int leading_code_private_22
; /* for private DIMENSION2 of 2-column */
55 /* Declaration of special charsets. */
56 int charset_ascii
; /* ASCII */
57 int charset_latin_iso8859_1
; /* ISO8859-1 (Latin-1) */
58 int charset_jisx0208_1978
; /* JISX0208.1978 (Japanese Kanji old set) */
59 int charset_jisx0208
; /* JISX0208.1983 (Japanese Kanji) */
60 int charset_katakana_jisx0201
; /* JISX0201.Kana (Japanese Katakana) */
61 int charset_latin_jisx0201
; /* JISX0201.Roman (Japanese Roman) */
62 int charset_big5_1
; /* Big5 Level 1 (Chinese Traditional) */
63 int charset_big5_2
; /* Big5 Level 2 (Chinese Traditional) */
65 Lisp_Object Qcharset_table
;
67 /* A char-table containing information of each character set. */
68 Lisp_Object Vcharset_table
;
70 /* A vector of charset symbol indexed by charset-id. This is used
71 only for returning charset symbol from C functions. */
72 Lisp_Object Vcharset_symbol_table
;
74 /* A list of charset symbols ever defined. */
75 Lisp_Object Vcharset_list
;
77 /* Vector of translation table ever defined.
78 ID of a translation table is used to index this vector. */
79 Lisp_Object Vtranslation_table_vector
;
81 /* A char-table for characters which may invoke auto-filling. */
82 Lisp_Object Vauto_fill_chars
;
84 Lisp_Object Qauto_fill_chars
;
86 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
87 int bytes_by_char_head
[256];
88 int width_by_char_head
[256];
90 /* Mapping table from ISO2022's charset (specified by DIMENSION,
91 CHARS, and FINAL-CHAR) to Emacs' charset. */
92 int iso_charset_table
[2][2][128];
94 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
95 unsigned char *_fetch_multibyte_char_p
;
96 int _fetch_multibyte_char_len
;
98 /* Offset to add to a non-ASCII value when inserting it. */
99 int nonascii_insert_offset
;
101 /* Translation table for converting non-ASCII unibyte characters
102 to multibyte codes, or nil. */
103 Lisp_Object Vnonascii_translation_table
;
105 /* List of all possible generic characters. */
106 Lisp_Object Vgeneric_character_list
;
108 #define min(X, Y) ((X) < (Y) ? (X) : (Y))
109 #define max(X, Y) ((X) > (Y) ? (X) : (Y))
112 invalid_character (c
)
115 error ("Invalid character: 0%o, %d, 0x%x", c
, c
, c
);
118 /* Parse a multibyte character string STR of length LENGTH (>= 2) set
119 BYTES to the length of actual multibyte sequence, CHARSET, C1, and
120 C2 to such values that MAKE_CHAR can make the multibyte character
123 It is assumed that *STR is one of base leading codes and the
124 following (LENGTH - 1) bytes satisfy !CHAR_HEAD_P.
126 This macro should be called only from SPLIT_MULTIBYTE_SEQ. */
128 #define SPLIT_CHARACTER_SEQ(str, length, bytes, charset, c1, c2) \
131 (charset) = (str)[0]; \
132 if ((charset) >= LEADING_CODE_PRIVATE_11 \
133 && (charset) <= LEADING_CODE_PRIVATE_22) \
134 (charset) = (str)[(bytes)++]; \
135 if ((bytes) < (length)) \
137 (c1) = (str)[(bytes)++] & 0x7F; \
138 if ((bytes) < (length)) \
139 (c2) = (str)[(bytes)++] & 0x7F; \
147 /* Parse string STR of length LENGTH and check if a multibyte
148 characters is at STR. Set BYTES to the actual length, CHARSET, C1,
149 C2 to proper values for that character. */
151 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
154 if (ASCII_BYTE_P ((str)[0])) \
157 for (i = 1; i < (length) && ! CHAR_HEAD_P ((str)[i]); i++); \
159 (bytes) = 1, (charset) = CHARSET_ASCII, (c1) = (str)[0] ; \
162 if (i > BYTES_BY_CHAR_HEAD ((str)[0])) \
163 i = BYTES_BY_CHAR_HEAD ((str)[0]); \
164 SPLIT_CHARACTER_SEQ (str, i, bytes, charset, c1, c2); \
168 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0. */
169 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \
170 (charset == CHARSET_ASCII \
171 ? ((c1) >= 0 && (c1) <= 0x7F) \
172 : (CHARSET_DIMENSION (charset) == 1 \
173 ? ((c1) >= 0x20 && (c1) <= 0x7F) \
174 : ((c1) >= 0x20 && (c1) <= 0x7F && (c2) >= 0x20 && (c2) <= 0x7F)))
176 /* Store multi-byte form of the character C in STR. The caller should
177 allocate at least 4-byte area at STR in advance. Returns the
178 length of the multi-byte form. If C is an invalid character code,
181 Use macro `CHAR_STRING (C, STR)' instead of calling this function
182 directly if C can be an ASCII character. */
185 char_to_string (c
, str
)
189 unsigned char *p
= str
;
191 if (c
& CHAR_MODIFIER_MASK
) /* This includes the case C is negative. */
193 /* Multibyte character can't have a modifier bit. */
194 if (! SINGLE_BYTE_CHAR_P ((c
& ~CHAR_MODIFIER_MASK
)))
195 invalid_character (c
);
197 /* For Meta, Shift, and Control modifiers, we need special care. */
200 /* Move the meta bit to the right place for a string. */
201 c
= (c
& ~CHAR_META
) | 0x80;
205 /* Shift modifier is valid only with [A-Za-z]. */
206 if ((c
& 0377) >= 'A' && (c
& 0377) <= 'Z')
208 else if ((c
& 0377) >= 'a' && (c
& 0377) <= 'z')
209 c
= (c
& ~CHAR_SHIFT
) - ('a' - 'A');
213 /* Simulate the code in lread.c. */
214 /* Allow `\C- ' and `\C-?'. */
215 if (c
== (CHAR_CTL
| ' '))
217 else if (c
== (CHAR_CTL
| '?'))
219 /* ASCII control chars are made from letters (both cases),
220 as well as the non-letters within 0100...0137. */
221 else if ((c
& 0137) >= 0101 && (c
& 0137) <= 0132)
222 c
&= (037 | (~0177 & ~CHAR_CTL
));
223 else if ((c
& 0177) >= 0100 && (c
& 0177) <= 0137)
224 c
&= (037 | (~0177 & ~CHAR_CTL
));
227 /* If C still has any modifier bits, it is an invalid character. */
228 if (c
& CHAR_MODIFIER_MASK
)
229 invalid_character (c
);
233 else if (c
< MAX_CHAR
)
237 SPLIT_NON_ASCII_CHAR (c
, charset
, c1
, c2
);
239 if (charset
>= LEADING_CODE_EXT_11
)
240 *p
++ = (charset
< LEADING_CODE_EXT_12
241 ? LEADING_CODE_PRIVATE_11
242 : (charset
< LEADING_CODE_EXT_21
243 ? LEADING_CODE_PRIVATE_12
244 : (charset
< LEADING_CODE_EXT_22
245 ? LEADING_CODE_PRIVATE_21
246 : LEADING_CODE_PRIVATE_22
)));
248 if (c1
> 0 && c1
< 32 || c2
> 0 && c2
< 32)
249 invalid_character (c
);
261 /* Return the non-ASCII character corresponding to multi-byte form at
262 STR of length LEN. If ACTUAL_LEN is not NULL, store the byte
263 length of the multibyte form in *ACTUAL_LEN.
265 Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
266 this function directly if you want ot handle ASCII characters as
270 string_to_char (str
, len
, actual_len
)
271 const unsigned char *str
;
272 int len
, *actual_len
;
274 int c
, bytes
, charset
, c1
, c2
;
276 SPLIT_MULTIBYTE_SEQ (str
, len
, bytes
, charset
, c1
, c2
);
277 c
= MAKE_CHAR (charset
, c1
, c2
);
283 /* Return the length of the multi-byte form at string STR of length LEN.
284 Use the macro MULTIBYTE_FORM_LENGTH instead. */
286 multibyte_form_length (str
, len
)
287 const unsigned char *str
;
292 PARSE_MULTIBYTE_SEQ (str
, len
, bytes
);
296 /* Check multibyte form at string STR of length LEN and set variables
297 pointed by CHARSET, C1, and C2 to charset and position codes of the
298 character at STR, and return 0. If there's no multibyte character,
299 return -1. This should be used only in the macro SPLIT_STRING
300 which checks range of STR in advance. */
303 split_string (str
, len
, charset
, c1
, c2
)
304 const unsigned char *str
;
305 unsigned char *c1
, *c2
;
308 register int bytes
, cs
, code1
, code2
= -1;
310 SPLIT_MULTIBYTE_SEQ (str
, len
, bytes
, cs
, code1
, code2
);
311 if (cs
== CHARSET_ASCII
)
319 /* Return 1 iff character C has valid printable glyph.
320 Use the macro CHAR_PRINTABLE_P instead. */
325 int charset
, c1
, c2
, chars
;
327 if (SINGLE_BYTE_CHAR_P (c
))
332 SPLIT_NON_ASCII_CHAR (c
, charset
, c1
, c2
);
333 if (! CHARSET_DEFINED_P (charset
))
335 if (CHARSET_CHARS (charset
) == 94
336 ? c1
<= 32 || c1
>= 127
339 if (CHARSET_DIMENSION (charset
) == 2
340 && (CHARSET_CHARS (charset
) == 94
341 ? c2
<= 32 || c2
>= 127
347 /* Translate character C by translation table TABLE. If C
348 is negative, translate a character specified by CHARSET, C1, and C2
349 (C1 and C2 are code points of the character). If no translation is
350 found in TABLE, return C. */
352 translate_char (table
, c
, charset
, c1
, c2
)
354 int c
, charset
, c1
, c2
;
357 int alt_charset
, alt_c1
, alt_c2
, dimension
;
359 if (c
< 0) c
= MAKE_CHAR (charset
, (c1
& 0x7F) , (c2
& 0x7F));
360 if (!CHAR_TABLE_P (table
)
361 || (ch
= Faref (table
, make_number (c
)), !NATNUMP (ch
)))
364 SPLIT_CHAR (XFASTINT (ch
), alt_charset
, alt_c1
, alt_c2
);
365 dimension
= CHARSET_DIMENSION (alt_charset
);
366 if (dimension
== 1 && alt_c1
> 0 || dimension
== 2 && alt_c2
> 0)
367 /* CH is not a generic character, just return it. */
368 return XFASTINT (ch
);
370 /* Since CH is a generic character, we must return a specific
371 charater which has the same position codes as C from CH. */
373 SPLIT_CHAR (c
, charset
, c1
, c2
);
374 if (dimension
!= CHARSET_DIMENSION (charset
))
375 /* We can't make such a character because of dimension mismatch. */
377 return MAKE_CHAR (alt_charset
, c1
, c2
);
380 /* Convert the unibyte character C to multibyte based on
381 Vnonascii_translation_table or nonascii_insert_offset. If they can't
382 convert C to a valid multibyte character, convert it based on
383 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
386 unibyte_char_to_multibyte (c
)
389 if (c
< 0400 && c
>= 0200)
393 if (! NILP (Vnonascii_translation_table
))
395 c
= XINT (Faref (Vnonascii_translation_table
, make_number (c
)));
396 if (c
>= 0400 && ! char_valid_p (c
, 0))
397 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
399 else if (c
>= 0240 && nonascii_insert_offset
> 0)
401 c
+= nonascii_insert_offset
;
402 if (c
< 0400 || ! char_valid_p (c
, 0))
403 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
406 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
412 /* Convert the multibyte character C to unibyte 8-bit character based
413 on Vnonascii_translation_table or nonascii_insert_offset. If
414 REV_TBL is non-nil, it should be a reverse table of
415 Vnonascii_translation_table, i.e. what given by:
416 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
419 multibyte_char_to_unibyte (c
, rev_tbl
)
423 if (!SINGLE_BYTE_CHAR_P (c
))
427 if (! CHAR_TABLE_P (rev_tbl
)
428 && CHAR_TABLE_P (Vnonascii_translation_table
))
429 rev_tbl
= Fchar_table_extra_slot (Vnonascii_translation_table
,
431 if (CHAR_TABLE_P (rev_tbl
))
434 temp
= Faref (rev_tbl
, make_number (c
));
438 c
= (c_save
& 0177) + 0200;
442 if (nonascii_insert_offset
> 0)
443 c
-= nonascii_insert_offset
;
444 if (c
< 128 || c
>= 256)
445 c
= (c_save
& 0177) + 0200;
453 /* Update the table Vcharset_table with the given arguments (see the
454 document of `define-charset' for the meaning of each argument).
455 Several other table contents are also updated. The caller should
456 check the validity of CHARSET-ID and the remaining arguments in
460 update_charset_table (charset_id
, dimension
, chars
, width
, direction
,
461 iso_final_char
, iso_graphic_plane
,
462 short_name
, long_name
, description
)
463 Lisp_Object charset_id
, dimension
, chars
, width
, direction
;
464 Lisp_Object iso_final_char
, iso_graphic_plane
;
465 Lisp_Object short_name
, long_name
, description
;
467 int charset
= XINT (charset_id
);
469 unsigned char leading_code_base
, leading_code_ext
;
471 if (NILP (CHARSET_TABLE_ENTRY (charset
)))
472 CHARSET_TABLE_ENTRY (charset
)
473 = Fmake_vector (make_number (CHARSET_MAX_IDX
), Qnil
);
475 /* Get byte length of multibyte form, base leading-code, and
476 extended leading-code of the charset. See the comment under the
477 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
478 bytes
= XINT (dimension
);
479 if (charset
< MIN_CHARSET_PRIVATE_DIMENSION1
)
481 /* Official charset, it doesn't have an extended leading-code. */
482 if (charset
!= CHARSET_ASCII
)
483 bytes
+= 1; /* For a base leading-code. */
484 leading_code_base
= charset
;
485 leading_code_ext
= 0;
489 /* Private charset. */
490 bytes
+= 2; /* For base and extended leading-codes. */
492 = (charset
< LEADING_CODE_EXT_12
493 ? LEADING_CODE_PRIVATE_11
494 : (charset
< LEADING_CODE_EXT_21
495 ? LEADING_CODE_PRIVATE_12
496 : (charset
< LEADING_CODE_EXT_22
497 ? LEADING_CODE_PRIVATE_21
498 : LEADING_CODE_PRIVATE_22
)));
499 leading_code_ext
= charset
;
502 if (BYTES_BY_CHAR_HEAD (leading_code_base
) != bytes
)
503 error ("Invalid dimension for the charset-ID %d", charset
);
505 CHARSET_TABLE_INFO (charset
, CHARSET_ID_IDX
) = charset_id
;
506 CHARSET_TABLE_INFO (charset
, CHARSET_BYTES_IDX
) = make_number (bytes
);
507 CHARSET_TABLE_INFO (charset
, CHARSET_DIMENSION_IDX
) = dimension
;
508 CHARSET_TABLE_INFO (charset
, CHARSET_CHARS_IDX
) = chars
;
509 CHARSET_TABLE_INFO (charset
, CHARSET_WIDTH_IDX
) = width
;
510 CHARSET_TABLE_INFO (charset
, CHARSET_DIRECTION_IDX
) = direction
;
511 CHARSET_TABLE_INFO (charset
, CHARSET_LEADING_CODE_BASE_IDX
)
512 = make_number (leading_code_base
);
513 CHARSET_TABLE_INFO (charset
, CHARSET_LEADING_CODE_EXT_IDX
)
514 = make_number (leading_code_ext
);
515 CHARSET_TABLE_INFO (charset
, CHARSET_ISO_FINAL_CHAR_IDX
) = iso_final_char
;
516 CHARSET_TABLE_INFO (charset
, CHARSET_ISO_GRAPHIC_PLANE_IDX
)
518 CHARSET_TABLE_INFO (charset
, CHARSET_SHORT_NAME_IDX
) = short_name
;
519 CHARSET_TABLE_INFO (charset
, CHARSET_LONG_NAME_IDX
) = long_name
;
520 CHARSET_TABLE_INFO (charset
, CHARSET_DESCRIPTION_IDX
) = description
;
521 CHARSET_TABLE_INFO (charset
, CHARSET_PLIST_IDX
) = Qnil
;
524 /* If we have already defined a charset which has the same
525 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
526 DIRECTION, we must update the entry REVERSE-CHARSET of both
527 charsets. If there's no such charset, the value of the entry
531 for (i
= 0; i
<= MAX_CHARSET
; i
++)
532 if (!NILP (CHARSET_TABLE_ENTRY (i
)))
534 if (CHARSET_DIMENSION (i
) == XINT (dimension
)
535 && CHARSET_CHARS (i
) == XINT (chars
)
536 && CHARSET_ISO_FINAL_CHAR (i
) == XINT (iso_final_char
)
537 && CHARSET_DIRECTION (i
) != XINT (direction
))
539 CHARSET_TABLE_INFO (charset
, CHARSET_REVERSE_CHARSET_IDX
)
541 CHARSET_TABLE_INFO (i
, CHARSET_REVERSE_CHARSET_IDX
) = charset_id
;
546 /* No such a charset. */
547 CHARSET_TABLE_INFO (charset
, CHARSET_REVERSE_CHARSET_IDX
)
551 if (charset
!= CHARSET_ASCII
552 && charset
< MIN_CHARSET_PRIVATE_DIMENSION1
)
554 width_by_char_head
[leading_code_base
] = XINT (width
);
556 /* Update table emacs_code_class. */
557 emacs_code_class
[charset
] = (bytes
== 2
558 ? EMACS_leading_code_2
560 ? EMACS_leading_code_3
561 : EMACS_leading_code_4
));
564 /* Update table iso_charset_table. */
565 if (ISO_CHARSET_TABLE (dimension
, chars
, iso_final_char
) < 0)
566 ISO_CHARSET_TABLE (dimension
, chars
, iso_final_char
) = charset
;
571 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
574 get_charset_id (charset_symbol
)
575 Lisp_Object charset_symbol
;
580 return ((SYMBOLP (charset_symbol
)
581 && (val
= Fget (charset_symbol
, Qcharset
), VECTORP (val
))
582 && (charset
= XINT (XVECTOR (val
)->contents
[CHARSET_ID_IDX
]),
583 CHARSET_VALID_P (charset
)))
587 /* Return an identification number for a new private charset of
588 DIMENSION and WIDTH. If there's no more room for the new charset,
591 get_new_private_charset_id (dimension
, width
)
592 int dimension
, width
;
594 int charset
, from
, to
;
599 from
= LEADING_CODE_EXT_11
, to
= LEADING_CODE_EXT_12
;
601 from
= LEADING_CODE_EXT_12
, to
= LEADING_CODE_EXT_21
;
606 from
= LEADING_CODE_EXT_21
, to
= LEADING_CODE_EXT_22
;
608 from
= LEADING_CODE_EXT_22
, to
= LEADING_CODE_EXT_MAX
+ 1;
611 for (charset
= from
; charset
< to
; charset
++)
612 if (!CHARSET_DEFINED_P (charset
)) break;
614 return make_number (charset
< to
? charset
: 0);
617 DEFUN ("define-charset", Fdefine_charset
, Sdefine_charset
, 3, 3, 0,
618 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
619 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
620 treated as a private charset.\n\
621 INFO-VECTOR is a vector of the format:\n\
622 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
623 SHORT-NAME LONG-NAME DESCRIPTION]\n\
624 The meanings of each elements is as follows:\n\
625 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
626 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
627 WIDTH (integer) is the number of columns a character in the charset\n\
628 occupies on the screen: one of 0, 1, and 2.\n\
630 DIRECTION (integer) is the rendering direction of characters in the\n\
631 charset when rendering. If 0, render from left to right, else\n\
632 render from right to left.\n\
634 ISO-FINAL-CHAR (character) is the final character of the\n\
635 corresponding ISO 2022 charset.\n\
637 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
638 while encoding to variants of ISO 2022 coding system, one of the\n\
639 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
641 SHORT-NAME (string) is the short name to refer to the charset.\n\
643 LONG-NAME (string) is the long name to refer to the charset.\n\
645 DESCRIPTION (string) is the description string of the charset.")
646 (charset_id
, charset_symbol
, info_vector
)
647 Lisp_Object charset_id
, charset_symbol
, info_vector
;
651 if (!NILP (charset_id
))
652 CHECK_NUMBER (charset_id
, 0);
653 CHECK_SYMBOL (charset_symbol
, 1);
654 CHECK_VECTOR (info_vector
, 2);
656 if (! NILP (charset_id
))
658 if (! CHARSET_VALID_P (XINT (charset_id
)))
659 error ("Invalid CHARSET: %d", XINT (charset_id
));
660 else if (CHARSET_DEFINED_P (XINT (charset_id
)))
661 error ("Already defined charset: %d", XINT (charset_id
));
664 vec
= XVECTOR (info_vector
)->contents
;
665 if (XVECTOR (info_vector
)->size
!= 9
666 || !INTEGERP (vec
[0]) || !(XINT (vec
[0]) == 1 || XINT (vec
[0]) == 2)
667 || !INTEGERP (vec
[1]) || !(XINT (vec
[1]) == 94 || XINT (vec
[1]) == 96)
668 || !INTEGERP (vec
[2]) || !(XINT (vec
[2]) == 1 || XINT (vec
[2]) == 2)
669 || !INTEGERP (vec
[3]) || !(XINT (vec
[3]) == 0 || XINT (vec
[3]) == 1)
670 || !INTEGERP (vec
[4]) || !(XINT (vec
[4]) >= '0' && XINT (vec
[4]) <= '~')
671 || !INTEGERP (vec
[5]) || !(XINT (vec
[5]) == 0 || XINT (vec
[5]) == 1)
674 || !STRINGP (vec
[8]))
675 error ("Invalid info-vector argument for defining charset %s",
676 XSYMBOL (charset_symbol
)->name
->data
);
678 if (NILP (charset_id
))
680 charset_id
= get_new_private_charset_id (XINT (vec
[0]), XINT (vec
[2]));
681 if (XINT (charset_id
) == 0)
682 error ("There's no room for a new private charset %s",
683 XSYMBOL (charset_symbol
)->name
->data
);
686 update_charset_table (charset_id
, vec
[0], vec
[1], vec
[2], vec
[3],
687 vec
[4], vec
[5], vec
[6], vec
[7], vec
[8]);
688 Fput (charset_symbol
, Qcharset
, CHARSET_TABLE_ENTRY (XINT (charset_id
)));
689 CHARSET_SYMBOL (XINT (charset_id
)) = charset_symbol
;
690 Vcharset_list
= Fcons (charset_symbol
, Vcharset_list
);
694 DEFUN ("generic-character-list", Fgeneric_character_list
,
695 Sgeneric_character_list
, 0, 0, 0,
696 "Return a list of all possible generic characters.\n\
697 It includes a generic character for a charset not yet defined.")
700 return Vgeneric_character_list
;
703 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
704 Sget_unused_iso_final_char
, 2, 2, 0,
705 "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
706 DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
707 CHARS is the number of characters in a dimension: 94 or 96.\n\
709 This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
710 If there's no unused final char for the specified kind of charset,\n\
713 Lisp_Object dimension
, chars
;
717 CHECK_NUMBER (dimension
, 0);
718 CHECK_NUMBER (chars
, 1);
719 if (XINT (dimension
) != 1 && XINT (dimension
) != 2)
720 error ("Invalid charset dimension %d, it should be 1 or 2",
722 if (XINT (chars
) != 94 && XINT (chars
) != 96)
723 error ("Invalid charset chars %d, it should be 94 or 96",
725 for (final_char
= '0'; final_char
<= '?'; final_char
++)
727 if (ISO_CHARSET_TABLE (dimension
, chars
, make_number (final_char
)) < 0)
730 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
733 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
735 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
736 CHARSET should be defined by `defined-charset' in advance.")
737 (dimension
, chars
, final_char
, charset_symbol
)
738 Lisp_Object dimension
, chars
, final_char
, charset_symbol
;
742 CHECK_NUMBER (dimension
, 0);
743 CHECK_NUMBER (chars
, 1);
744 CHECK_NUMBER (final_char
, 2);
745 CHECK_SYMBOL (charset_symbol
, 3);
747 if (XINT (dimension
) != 1 && XINT (dimension
) != 2)
748 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension
));
749 if (XINT (chars
) != 94 && XINT (chars
) != 96)
750 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
751 if (XINT (final_char
) < '0' || XFASTINT (final_char
) > '~')
752 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
753 if ((charset
= get_charset_id (charset_symbol
)) < 0)
754 error ("Invalid charset %s", XSYMBOL (charset_symbol
)->name
->data
);
756 ISO_CHARSET_TABLE (dimension
, chars
, final_char
) = charset
;
760 /* Return number of different charsets in STR of length LEN. In
761 addition, for each found charset N, CHARSETS[N] is set 1. The
762 caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance.
763 It may lookup a translation table TABLE if supplied.
765 If MULTIBYTE is zero, do not check multibyte characters, i.e. if
766 any ASCII codes (7-bit) are found, CHARSET[0] is set to 1, if any
767 8-bit codes are found CHARSET[1] is set to 1. */
770 find_charset_in_str (str
, len
, charsets
, table
, multibyte
)
776 register int num
= 0;
780 unsigned char *endp
= str
+ len
;
783 while (str
< endp
&& maskbits
!= 3)
784 maskbits
|= (*str
++ < 0x80 ? 1 : 2);
798 if (! CHAR_TABLE_P (table
))
803 int bytes
, charset
, c1
, c2
;
805 SPLIT_MULTIBYTE_SEQ (str
, len
, bytes
, charset
, c1
, c2
);
809 int c1
= translate_char (table
, -1, charset
, c1
, c2
);
811 charset
= CHAR_CHARSET (c1
);
814 if (!charsets
[charset
])
816 charsets
[charset
] = 1;
825 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
827 "Return a list of charsets in the region between BEG and END.\n\
828 BEG and END are buffer positions.\n\
829 Optional arg TABLE if non-nil is a translation table to look up.\n\
831 If the region contains invalid multiybte characters,\n\
832 `unknown' is included in the returned list.\n\
834 If the current buffer is unibyte, the returned list contains\n\
835 `ascii' if any 7-bit characters are found,\n\
836 and `unknown' if any 8-bit characters are found.")
838 Lisp_Object beg
, end
, table
;
840 int charsets
[MAX_CHARSET
+ 1];
841 int from
, from_byte
, to
, stop
, stop_byte
, i
;
844 int multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
846 validate_region (&beg
, &end
);
847 from
= XFASTINT (beg
);
848 stop
= to
= XFASTINT (end
);
850 if (from
< GPT
&& GPT
< to
)
853 stop_byte
= GPT_BYTE
;
856 stop_byte
= CHAR_TO_BYTE (stop
);
858 from_byte
= CHAR_TO_BYTE (from
);
860 bzero (charsets
, (MAX_CHARSET
+ 1) * sizeof (int));
863 find_charset_in_str (BYTE_POS_ADDR (from_byte
), stop_byte
- from_byte
,
864 charsets
, table
, multibyte
);
867 from
= stop
, from_byte
= stop_byte
;
868 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
876 for (i
= (multibyte
? MAX_CHARSET
: 1); i
>= 0; i
--)
879 if (CHARSET_DEFINED_P (i
))
880 val
= Fcons (CHARSET_SYMBOL (i
), val
);
885 val
= Fcons (Qunknown
, val
);
889 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
891 "Return a list of charsets in STR.\n\
892 Optional arg TABLE if non-nil is a translation table to look up.\n\
894 If the region contains invalid multiybte characters,\n\
895 `unknown' is included in the returned list.\n\
897 If STR is unibyte, the returned list contains\n\
898 `ascii' if any 7-bit characters are found,\n\
899 and `unknown' if any 8-bit characters are found.")
901 Lisp_Object str
, table
;
903 int charsets
[MAX_CHARSET
+ 1];
909 CHECK_STRING (str
, 0);
910 multibyte
= STRING_MULTIBYTE (str
);
912 bzero (charsets
, (MAX_CHARSET
+ 1) * sizeof (int));
913 find_charset_in_str (XSTRING (str
)->data
, STRING_BYTES (XSTRING (str
)),
914 charsets
, table
, multibyte
);
917 for (i
= (multibyte
? MAX_CHARSET
: 1); i
>= 0; i
--)
920 if (CHARSET_DEFINED_P (i
))
921 val
= Fcons (CHARSET_SYMBOL (i
), val
);
926 val
= Fcons (Qunknown
, val
);
930 DEFUN ("make-char-internal", Fmake_char_internal
, Smake_char_internal
, 1, 3, 0,
932 (charset
, code1
, code2
)
933 Lisp_Object charset
, code1
, code2
;
935 int charset_id
, c1
, c2
;
937 CHECK_NUMBER (charset
, 0);
938 charset_id
= XINT (charset
);
939 if (!CHARSET_DEFINED_P (charset_id
))
940 error ("Invalid charset ID: %d", XINT (charset
));
946 CHECK_NUMBER (code1
, 1);
953 CHECK_NUMBER (code2
, 2);
957 if (c1
< 0 || c1
> 0xFF || c2
< 0 || c2
> 0xFF)
958 error ("Invalid code points for charset ID %d: %d %d", charset_id
, c1
, c2
);
964 ? !CHAR_COMPONENTS_VALID_P (charset
, c1
, 0x20)
965 : !CHAR_COMPONENTS_VALID_P (charset
, c1
, c2
)))
966 error ("Invalid code points for charset ID %d: %d %d", charset_id
, c1
, c2
);
968 return make_number (MAKE_CHAR (charset_id
, c1
, c2
));
971 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
972 "Return list of charset and one or two position-codes of CHAR.\n\
973 If CHAR is invalid as a character code,\n\
974 return a list of symbol `unknown' and CHAR.")
979 int c
, charset
, c1
, c2
;
981 CHECK_NUMBER (ch
, 0);
983 if (!CHAR_VALID_P (c
, 1))
984 return Fcons (Qunknown
, Fcons (ch
, Qnil
));
985 SPLIT_CHAR (XFASTINT (ch
), charset
, c1
, c2
);
987 ? Fcons (CHARSET_SYMBOL (charset
),
988 Fcons (make_number (c1
), Fcons (make_number (c2
), Qnil
)))
989 : Fcons (CHARSET_SYMBOL (charset
), Fcons (make_number (c1
), Qnil
)));
992 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 1, 0,
993 "Return charset of CHAR.")
997 CHECK_NUMBER (ch
, 0);
999 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch
)));
1002 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
1003 "Return charset of a character in the current buffer at position POS.\n\
1004 If POS is nil, it defauls to the current point.\n\
1005 If POS is out of range, the value is nil.")
1009 register int pos_byte
, bytes
, charset
, c1
, c2
;
1010 register unsigned char *p
;
1014 else if (MARKERP (pos
))
1016 pos_byte
= marker_byte_position (pos
);
1017 if (pos_byte
< BEGV_BYTE
|| pos_byte
>= ZV_BYTE
)
1022 CHECK_NUMBER (pos
, 0);
1023 if (XINT (pos
) < BEGV
|| XINT (pos
) >= ZV
)
1025 pos_byte
= CHAR_TO_BYTE (XINT (pos
));
1027 p
= BYTE_POS_ADDR (pos_byte
);
1028 if (BASE_LEADING_CODE_P (*p
))
1030 SPLIT_MULTIBYTE_SEQ (p
, Z_BYTE
- pos_byte
, bytes
, charset
, c1
, c2
);
1035 charset
= CHARSET_ASCII
;
1037 return CHARSET_SYMBOL (charset
);
1040 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
1041 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
1043 ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
1044 by their DIMENSION, CHARS, and FINAL-CHAR,\n\
1045 where as Emacs distinguishes them by charset symbol.\n\
1046 See the documentation of the function `charset-info' for the meanings of\n\
1047 DIMENSION, CHARS, and FINAL-CHAR.")
1048 (dimension
, chars
, final_char
)
1049 Lisp_Object dimension
, chars
, final_char
;
1053 CHECK_NUMBER (dimension
, 0);
1054 CHECK_NUMBER (chars
, 1);
1055 CHECK_NUMBER (final_char
, 2);
1057 if ((charset
= ISO_CHARSET_TABLE (dimension
, chars
, final_char
)) < 0)
1059 return CHARSET_SYMBOL (charset
);
1062 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1063 generic character. If GENERICP is zero, return nonzero iff C is a
1064 valid normal character. Do not call this function directly,
1065 instead use macro CHAR_VALID_P. */
1067 char_valid_p (c
, genericp
)
1070 int charset
, c1
, c2
;
1074 if (SINGLE_BYTE_CHAR_P (c
))
1076 SPLIT_NON_ASCII_CHAR (c
, charset
, c1
, c2
);
1081 if (c2
<= 0) c2
= 0x20;
1085 if (c2
<= 0) c1
= c2
= 0x20;
1088 return (CHARSET_DEFINED_P (charset
)
1089 && CHAR_COMPONENTS_VALID_P (charset
, c1
, c2
));
1092 DEFUN ("char-valid-p", Fchar_valid_p
, Schar_valid_p
, 1, 2, 0,
1093 "Return t if OBJECT is a valid normal character.\n\
1094 If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
1095 a valid generic character.")
1097 Lisp_Object object
, genericp
;
1099 if (! NATNUMP (object
))
1101 return (CHAR_VALID_P (XFASTINT (object
), !NILP (genericp
)) ? Qt
: Qnil
);
1104 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte
,
1105 Sunibyte_char_to_multibyte
, 1, 1, 0,
1106 "Convert the unibyte character CH to multibyte character.\n\
1107 The conversion is done based on `nonascii-translation-table' (which see)\n\
1108 or `nonascii-insert-offset' (which see).")
1114 CHECK_NUMBER (ch
, 0);
1116 if (c
< 0 || c
>= 0400)
1117 error ("Invalid unibyte character: %d", c
);
1118 c
= unibyte_char_to_multibyte (c
);
1120 error ("Can't convert to multibyte character: %d", XINT (ch
));
1121 return make_number (c
);
1124 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte
,
1125 Smultibyte_char_to_unibyte
, 1, 1, 0,
1126 "Convert the multibyte character CH to unibyte character.\n\
1127 The conversion is done based on `nonascii-translation-table' (which see)\n\
1128 or `nonascii-insert-offset' (which see).")
1134 CHECK_NUMBER (ch
, 0);
1136 if (! CHAR_VALID_P (c
, 0))
1137 error ("Invalid multibyte character: %d", c
);
1138 c
= multibyte_char_to_unibyte (c
, Qnil
);
1140 error ("Can't convert to unibyte character: %d", XINT (ch
));
1141 return make_number (c
);
1144 DEFUN ("char-bytes", Fchar_bytes
, Schar_bytes
, 1, 1, 0,
1145 "Return 1 regardless of the argument CHAR.\n\
1146 This is now an obsolete function. We keep it just for backward compatibility.")
1152 CHECK_NUMBER (ch
, 0);
1153 return make_number (1);
1156 /* Return how many bytes C will occupy in a multibyte buffer.
1157 Don't call this function directly, instead use macro CHAR_BYTES. */
1164 if (SINGLE_BYTE_CHAR_P (c
) || (c
& ~((1 << CHARACTERBITS
) - 1)))
1167 charset
= CHAR_CHARSET (c
);
1168 return (CHARSET_DEFINED_P (charset
) ? CHARSET_BYTES (charset
) : 1);
1171 /* Return the width of character of which multi-byte form starts with
1172 C. The width is measured by how many columns occupied on the
1173 screen when displayed in the current buffer. */
1175 #define ONE_BYTE_CHAR_WIDTH(c) \
1178 ? XFASTINT (current_buffer->tab_width) \
1179 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1183 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1184 : ((! NILP (current_buffer->enable_multibyte_characters) \
1185 && BASE_LEADING_CODE_P (c)) \
1186 ? WIDTH_BY_CHAR_HEAD (c) \
1189 DEFUN ("char-width", Fchar_width
, Schar_width
, 1, 1, 0,
1190 "Return width of CHAR when displayed in the current buffer.\n\
1191 The width is measured by how many columns it occupies on the screen.")
1195 Lisp_Object val
, disp
;
1197 struct Lisp_Char_Table
*dp
= buffer_display_table ();
1199 CHECK_NUMBER (ch
, 0);
1203 /* Get the way the display table would display it. */
1204 disp
= dp
? DISP_CHAR_VECTOR (dp
, c
) : Qnil
;
1207 XSETINT (val
, XVECTOR (disp
)->size
);
1208 else if (SINGLE_BYTE_CHAR_P (c
))
1209 XSETINT (val
, ONE_BYTE_CHAR_WIDTH (c
));
1212 int charset
= CHAR_CHARSET (c
);
1214 XSETFASTINT (val
, CHARSET_WIDTH (charset
));
1219 /* Return width of string STR of length LEN when displayed in the
1220 current buffer. The width is measured by how many columns it
1221 occupies on the screen. */
1228 unsigned char *endp
= str
+ len
;
1230 struct Lisp_Char_Table
*dp
= buffer_display_table ();
1236 int c
= STRING_CHAR_AND_LENGTH (str
, endp
- str
, thislen
);
1238 /* Get the way the display table would display it. */
1240 disp
= DISP_CHAR_VECTOR (dp
, c
);
1245 width
+= XVECTOR (disp
)->size
;
1247 width
+= ONE_BYTE_CHAR_WIDTH (*str
);
1254 DEFUN ("string-width", Fstring_width
, Sstring_width
, 1, 1, 0,
1255 "Return width of STRING when displayed in the current buffer.\n\
1256 Width is measured by how many columns it occupies on the screen.\n\
1257 When calculating width of a multibyte character in STRING,\n\
1258 only the base leading-code is considered; the validity of\n\
1259 the following bytes is not checked.")
1265 CHECK_STRING (str
, 0);
1266 XSETFASTINT (val
, strwidth (XSTRING (str
)->data
,
1267 STRING_BYTES (XSTRING (str
))));
1271 DEFUN ("char-direction", Fchar_direction
, Schar_direction
, 1, 1, 0,
1272 "Return the direction of CHAR.\n\
1273 The returned value is 0 for left-to-right and 1 for right-to-left.")
1279 CHECK_NUMBER (ch
, 0);
1280 charset
= CHAR_CHARSET (XFASTINT (ch
));
1281 if (!CHARSET_DEFINED_P (charset
))
1282 invalid_character (XINT (ch
));
1283 return CHARSET_TABLE_INFO (charset
, CHARSET_DIRECTION_IDX
);
1286 DEFUN ("chars-in-region", Fchars_in_region
, Schars_in_region
, 2, 2, 0,
1287 "Return number of characters between BEG and END.")
1289 Lisp_Object beg
, end
;
1293 CHECK_NUMBER_COERCE_MARKER (beg
, 0);
1294 CHECK_NUMBER_COERCE_MARKER (end
, 1);
1296 from
= min (XFASTINT (beg
), XFASTINT (end
));
1297 to
= max (XFASTINT (beg
), XFASTINT (end
));
1299 return make_number (to
- from
);
1302 /* Return the number of characters in the NBYTES bytes at PTR.
1303 This works by looking at the contents and checking for multibyte sequences.
1304 However, if the current buffer has enable-multibyte-characters = nil,
1305 we treat each byte as a character. */
1308 chars_in_text (ptr
, nbytes
)
1312 /* current_buffer is null at early stages of Emacs initialization. */
1313 if (current_buffer
== 0
1314 || NILP (current_buffer
->enable_multibyte_characters
))
1317 return multibyte_chars_in_text (ptr
, nbytes
);
1320 /* Return the number of characters in the NBYTES bytes at PTR.
1321 This works by looking at the contents and checking for multibyte sequences.
1322 It ignores enable-multibyte-characters. */
1325 multibyte_chars_in_text (ptr
, nbytes
)
1329 unsigned char *endp
;
1332 endp
= ptr
+ nbytes
;
1337 if (BASE_LEADING_CODE_P (*ptr
))
1339 PARSE_MULTIBYTE_SEQ (ptr
, nbytes
, bytes
);
1354 DEFUN ("string", Fstring
, Sstring
, 1, MANY
, 0,
1355 "Concatenate all the argument characters and make the result a string.")
1361 unsigned char *buf
= (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH
* n
);
1362 unsigned char *p
= buf
;
1364 int c
, multibyte_p
= 0;
1366 for (i
= 0; i
< n
; i
++)
1368 CHECK_NUMBER (args
[i
], 0);
1370 p
+= CHAR_STRING (c
, p
);
1372 if (!SINGLE_BYTE_CHAR_P (c
))
1376 /* Here, we can't use make_string_from_bytes because of the byte
1377 combining problem. Make a multibyte string if there is any
1378 multibyte character in ARGS to make sure that `(string 2276)'
1379 returns a multibyte string if running --unibyte. */
1381 val
= make_multibyte_string (buf
, n
, p
- buf
);
1383 val
= make_unibyte_string (buf
, p
- buf
);
1390 charset_id_internal (charset_name
)
1395 val
= Fget (intern (charset_name
), Qcharset
);
1397 error ("Charset %s is not defined", charset_name
);
1399 return (XINT (XVECTOR (val
)->contents
[0]));
1402 DEFUN ("setup-special-charsets", Fsetup_special_charsets
,
1403 Ssetup_special_charsets
, 0, 0, 0, "Internal use only.")
1406 charset_latin_iso8859_1
= charset_id_internal ("latin-iso8859-1");
1407 charset_jisx0208_1978
= charset_id_internal ("japanese-jisx0208-1978");
1408 charset_jisx0208
= charset_id_internal ("japanese-jisx0208");
1409 charset_katakana_jisx0201
= charset_id_internal ("katakana-jisx0201");
1410 charset_latin_jisx0201
= charset_id_internal ("latin-jisx0201");
1411 charset_big5_1
= charset_id_internal ("chinese-big5-1");
1412 charset_big5_2
= charset_id_internal ("chinese-big5-2");
1417 init_charset_once ()
1421 staticpro (&Vcharset_table
);
1422 staticpro (&Vcharset_symbol_table
);
1423 staticpro (&Vgeneric_character_list
);
1425 /* This has to be done here, before we call Fmake_char_table. */
1426 Qcharset_table
= intern ("charset-table");
1427 staticpro (&Qcharset_table
);
1429 /* Intern this now in case it isn't already done.
1430 Setting this variable twice is harmless.
1431 But don't staticpro it here--that is done in alloc.c. */
1432 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
1434 /* Now we are ready to set up this property, so we can
1435 create the charset table. */
1436 Fput (Qcharset_table
, Qchar_table_extra_slots
, make_number (0));
1437 Vcharset_table
= Fmake_char_table (Qcharset_table
, Qnil
);
1439 Qunknown
= intern ("unknown");
1440 staticpro (&Qunknown
);
1441 Vcharset_symbol_table
= Fmake_vector (make_number (MAX_CHARSET
+ 1),
1445 for (i
= 0; i
< 2; i
++)
1446 for (j
= 0; j
< 2; j
++)
1447 for (k
= 0; k
< 128; k
++)
1448 iso_charset_table
[i
][j
][k
] = -1;
1450 for (i
= 0; i
< 256; i
++)
1451 BYTES_BY_CHAR_HEAD (i
) = 1;
1452 for (i
= MIN_CHARSET_OFFICIAL_DIMENSION1
;
1453 i
<= MAX_CHARSET_OFFICIAL_DIMENSION1
; i
++)
1454 BYTES_BY_CHAR_HEAD (i
) = 2;
1455 for (i
= MIN_CHARSET_OFFICIAL_DIMENSION2
;
1456 i
<= MAX_CHARSET_OFFICIAL_DIMENSION2
; i
++)
1457 BYTES_BY_CHAR_HEAD (i
) = 3;
1458 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11
) = 3;
1459 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12
) = 3;
1460 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21
) = 4;
1461 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22
) = 4;
1462 /* The followings don't reflect the actual bytes, but just to tell
1463 that it is a start of a multibyte character. */
1464 BYTES_BY_CHAR_HEAD (0x80) = 2;
1465 BYTES_BY_CHAR_HEAD (0x9E) = 2;
1466 BYTES_BY_CHAR_HEAD (0x9F) = 2;
1468 for (i
= 0; i
< 128; i
++)
1469 WIDTH_BY_CHAR_HEAD (i
) = 1;
1470 for (; i
< 256; i
++)
1471 WIDTH_BY_CHAR_HEAD (i
) = 4;
1472 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11
) = 1;
1473 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12
) = 2;
1474 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21
) = 1;
1475 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22
) = 2;
1481 for (i
= 0x81; i
< 0x90; i
++)
1482 val
= Fcons (make_number ((i
- 0x70) << 7), val
);
1483 for (; i
< 0x9A; i
++)
1484 val
= Fcons (make_number ((i
- 0x8F) << 14), val
);
1485 for (i
= 0xA0; i
< 0xF0; i
++)
1486 val
= Fcons (make_number ((i
- 0x70) << 7), val
);
1487 for (; i
< 0xFF; i
++)
1488 val
= Fcons (make_number ((i
- 0xE0) << 14), val
);
1489 Vgeneric_character_list
= Fnreverse (val
);
1492 nonascii_insert_offset
= 0;
1493 Vnonascii_translation_table
= Qnil
;
1501 Qascii
= intern ("ascii");
1502 staticpro (&Qascii
);
1504 Qcharset
= intern ("charset");
1505 staticpro (&Qcharset
);
1507 /* Define ASCII charset now. */
1508 update_charset_table (make_number (CHARSET_ASCII
),
1509 make_number (1), make_number (94),
1514 build_string ("ASCII"),
1515 build_string ("ASCII"),
1516 build_string ("ASCII (ISO646 IRV)"));
1517 CHARSET_SYMBOL (CHARSET_ASCII
) = Qascii
;
1518 Fput (Qascii
, Qcharset
, CHARSET_TABLE_ENTRY (CHARSET_ASCII
));
1520 Qauto_fill_chars
= intern ("auto-fill-chars");
1521 staticpro (&Qauto_fill_chars
);
1522 Fput (Qauto_fill_chars
, Qchar_table_extra_slots
, make_number (0));
1524 defsubr (&Sdefine_charset
);
1525 defsubr (&Sgeneric_character_list
);
1526 defsubr (&Sget_unused_iso_final_char
);
1527 defsubr (&Sdeclare_equiv_charset
);
1528 defsubr (&Sfind_charset_region
);
1529 defsubr (&Sfind_charset_string
);
1530 defsubr (&Smake_char_internal
);
1531 defsubr (&Ssplit_char
);
1532 defsubr (&Schar_charset
);
1533 defsubr (&Scharset_after
);
1534 defsubr (&Siso_charset
);
1535 defsubr (&Schar_valid_p
);
1536 defsubr (&Sunibyte_char_to_multibyte
);
1537 defsubr (&Smultibyte_char_to_unibyte
);
1538 defsubr (&Schar_bytes
);
1539 defsubr (&Schar_width
);
1540 defsubr (&Sstring_width
);
1541 defsubr (&Schar_direction
);
1542 defsubr (&Schars_in_region
);
1544 defsubr (&Ssetup_special_charsets
);
1546 DEFVAR_LISP ("charset-list", &Vcharset_list
,
1547 "List of charsets ever defined.");
1548 Vcharset_list
= Fcons (Qascii
, Qnil
);
1550 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector
,
1551 "Vector of cons cell of a symbol and translation table ever defined.\n\
1552 An ID of a translation table is an index of this vector.");
1553 Vtranslation_table_vector
= Fmake_vector (make_number (16), Qnil
);
1555 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11
,
1556 "Leading-code of private TYPE9N charset of column-width 1.");
1557 leading_code_private_11
= LEADING_CODE_PRIVATE_11
;
1559 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12
,
1560 "Leading-code of private TYPE9N charset of column-width 2.");
1561 leading_code_private_12
= LEADING_CODE_PRIVATE_12
;
1563 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21
,
1564 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1565 leading_code_private_21
= LEADING_CODE_PRIVATE_21
;
1567 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22
,
1568 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1569 leading_code_private_22
= LEADING_CODE_PRIVATE_22
;
1571 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset
,
1572 "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
1573 This is used for converting unibyte text to multibyte,\n\
1574 and for inserting character codes specified by number.\n\n\
1575 This serves to convert a Latin-1 or similar 8-bit character code\n\
1576 to the corresponding Emacs multibyte character code.\n\
1577 Typically the value should be (- (make-char CHARSET 0) 128),\n\
1578 for your choice of character set.\n\
1579 If `nonascii-translation-table' is non-nil, it overrides this variable.");
1580 nonascii_insert_offset
= 0;
1582 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table
,
1583 "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
1584 This is used for converting unibyte text to multibyte,\n\
1585 and for inserting character codes specified by number.\n\n\
1586 Conversion is performed only when multibyte characters are enabled,\n\
1587 and it serves to convert a Latin-1 or similar 8-bit character code\n\
1588 to the corresponding Emacs character code.\n\n\
1589 If this is nil, `nonascii-insert-offset' is used instead.\n\
1590 See also the docstring of `make-translation-table'.");
1591 Vnonascii_translation_table
= Qnil
;
1593 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars
,
1594 "A char-table for characters which invoke auto-filling.\n\
1595 Such characters has value t in this table.");
1596 Vauto_fill_chars
= Fmake_char_table (Qauto_fill_chars
, Qnil
);
1597 CHAR_TABLE_SET (Vauto_fill_chars
, make_number (' '), Qt
);
1598 CHAR_TABLE_SET (Vauto_fill_chars
, make_number ('\n'), Qt
);