Use defcustom for user variables.
[emacs.git] / src / charset.c
blobfbed6283daaf7da469529ae6bbd0514c4fddeeb0
1 /* Multilingual characters handler.
2 Ver.1.0
3 Copyright (C) 1995 Free Software Foundation, Inc.
4 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
6 This file is part of GNU Emacs.
8 GNU Emacs is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* At first, see the document in `charset.h' to understand the code in
24 this file. */
26 #include <stdio.h>
28 #ifdef emacs
30 #include <sys/types.h>
31 #include <config.h>
32 #include "lisp.h"
33 #include "buffer.h"
34 #include "charset.h"
35 #include "coding.h"
37 #else /* not emacs */
39 #include "mulelib.h"
41 #endif /* emacs */
43 Lisp_Object Qcharset, Qascii, Qcomposition;
45 /* Declaration of special leading-codes. */
46 int leading_code_composition; /* for composite characters */
47 int leading_code_private_11; /* for private DIMENSION1 of 1-column */
48 int leading_code_private_12; /* for private DIMENSION1 of 2-column */
49 int leading_code_private_21; /* for private DIMENSION2 of 1-column */
50 int leading_code_private_22; /* for private DIMENSION2 of 2-column */
52 /* Declaration of special charsets. */
53 int charset_ascii; /* ASCII */
54 int charset_composition; /* for a composite character */
55 int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
56 int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
57 int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
58 int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
59 int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
60 int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
61 int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
63 Lisp_Object Qcharset_table;
65 /* A char-table containing information of each character set. */
66 Lisp_Object Vcharset_table;
68 /* A vector of charset symbol indexed by charset-id. This is used
69 only for returning charset symbol from C functions. */
70 Lisp_Object Vcharset_symbol_table;
72 /* A list of charset symbols ever defined. */
73 Lisp_Object Vcharset_list;
75 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
76 int bytes_by_char_head[256];
77 int width_by_char_head[256];
79 /* Mapping table from ISO2022's charset (specified by DIMENSION,
80 CHARS, and FINAL-CHAR) to Emacs' charset. */
81 int iso_charset_table[2][2][128];
83 /* Table of pointers to the structure `cmpchar_info' indexed by
84 CMPCHAR-ID. */
85 struct cmpchar_info **cmpchar_table;
86 /* The current size of `cmpchar_table'. */
87 static int cmpchar_table_size;
88 /* Number of the current composite characters. */
89 int n_cmpchars;
91 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
92 unsigned char *_fetch_multibyte_char_p;
93 int _fetch_multibyte_char_len;
95 /* Set STR a pointer to the multi-byte form of the character C. If C
96 is not a composite character, the multi-byte form is set in WORKBUF
97 and STR points WORKBUF. The caller should allocate at least 4-byte
98 area at WORKBUF in advance. Returns the length of the multi-byte
99 form.
101 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
102 function directly if C can be an ASCII character. */
105 non_ascii_char_to_string (c, workbuf, str)
106 int c;
107 unsigned char *workbuf, **str;
109 int charset, c1, c2;
111 if (COMPOSITE_CHAR_P (c))
113 int cmpchar_id = COMPOSITE_CHAR_ID (c);
115 if (cmpchar_id < n_cmpchars)
117 *str = cmpchar_table[cmpchar_id]->data;
118 return cmpchar_table[cmpchar_id]->len;
120 else
122 *str = workbuf;
123 return 0;
127 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
129 *str = workbuf;
130 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
131 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
132 workbuf++;
133 *workbuf++ = c1 | 0x80;
134 if (c2 >= 0)
135 *workbuf++ = c2 | 0x80;
137 return (workbuf - *str);
140 /* Return a non-ASCII character of which multi-byte form is at STR of
141 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
142 character is set to the address ACTUAL_LEN.
144 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
145 directly if STR can hold an ASCII character. */
147 string_to_non_ascii_char (str, len, actual_len)
148 unsigned char *str;
149 int len, *actual_len;
151 int charset;
152 unsigned char c1, c2;
153 register int c;
155 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
157 if (actual_len)
158 *actual_len = 1;
159 return (int) *str;
162 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
164 if (actual_len)
165 *actual_len = (charset == CHARSET_COMPOSITION
166 ? cmpchar_table[COMPOSITE_CHAR_ID (c)]->len
167 : BYTES_BY_CHAR_HEAD (*str));
168 return c;
171 /* Return the length of the multi-byte form at string STR of length LEN. */
173 multibyte_form_length (str, len)
174 unsigned char *str;
175 int len;
177 int charset;
178 unsigned char c1, c2;
179 register int c;
181 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
182 return 1;
184 return (charset == CHARSET_COMPOSITION
185 ? cmpchar_table[(c1 << 7) | c2]->len
186 : BYTES_BY_CHAR_HEAD (*str));
189 /* Check if string STR of length LEN contains valid multi-byte form of
190 a character. If valid, charset and position codes of the character
191 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
192 return -1. This should be used only in the macro SPLIT_STRING
193 which checks range of STR in advance. */
195 split_non_ascii_string (str, len, charset, c1, c2)
196 register unsigned char *str, *c1, *c2;
197 register int len, *charset;
199 register unsigned int cs = *str++;
201 if (cs == LEADING_CODE_COMPOSITION)
203 int cmpchar_id = str_cmpchar_id (str - 1, len);
205 if (cmpchar_id < 0)
206 return -1;
207 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
209 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
210 && CHARSET_DEFINED_P (cs))
212 *charset = cs;
213 if (*str < 0xA0)
214 return -1;
215 *c1 = (*str++) & 0x7F;
216 if (CHARSET_DIMENSION (cs) == 2)
218 if (*str < 0xA0)
219 return -1;
220 *c2 = (*str++) & 0x7F;
223 else
224 return -1;
225 return 0;
228 /* Update the table Vcharset_table with the given arguments (see the
229 document of `define-charset' for the meaning of each argument).
230 Several other table contents are also updated. The caller should
231 check the validity of CHARSET-ID and the remaining arguments in
232 advance. */
234 void
235 update_charset_table (charset_id, dimension, chars, width, direction,
236 iso_final_char, iso_graphic_plane,
237 short_name, long_name, description)
238 Lisp_Object charset_id, dimension, chars, width, direction;
239 Lisp_Object iso_final_char, iso_graphic_plane;
240 Lisp_Object short_name, long_name, description;
242 int charset = XINT (charset_id);
243 int bytes;
244 unsigned char leading_code_base, leading_code_ext;
246 if (NILP (CHARSET_TABLE_ENTRY (charset)))
247 CHARSET_TABLE_ENTRY (charset)
248 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
250 /* Get byte length of multibyte form, base leading-code, and
251 extended leading-code of the charset. See the comment under the
252 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
253 bytes = XINT (dimension);
254 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
256 /* Official charset, it doesn't have an extended leading-code. */
257 if (charset != CHARSET_ASCII)
258 bytes += 1; /* For a base leading-code. */
259 leading_code_base = charset;
260 leading_code_ext = 0;
262 else
264 /* Private charset. */
265 bytes += 2; /* For base and extended leading-codes. */
266 leading_code_base
267 = (charset < LEADING_CODE_EXT_12
268 ? LEADING_CODE_PRIVATE_11
269 : (charset < LEADING_CODE_EXT_21
270 ? LEADING_CODE_PRIVATE_12
271 : (charset < LEADING_CODE_EXT_22
272 ? LEADING_CODE_PRIVATE_21
273 : LEADING_CODE_PRIVATE_22)));
274 leading_code_ext = charset;
277 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
278 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
279 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
280 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
281 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
282 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
283 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
284 = make_number (leading_code_base);
285 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
286 = make_number (leading_code_ext);
287 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
288 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
289 = iso_graphic_plane;
290 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
291 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
292 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
293 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
296 /* If we have already defined a charset which has the same
297 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
298 DIRECTION, we must update the entry REVERSE-CHARSET of both
299 charsets. If there's no such charset, the value of the entry
300 is set to nil. */
301 int i;
303 for (i = 0; i <= MAX_CHARSET; i++)
304 if (!NILP (CHARSET_TABLE_ENTRY (i)))
306 if (CHARSET_DIMENSION (i) == XINT (dimension)
307 && CHARSET_CHARS (i) == XINT (chars)
308 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
309 && CHARSET_DIRECTION (i) != XINT (direction))
311 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
312 = make_number (i);
313 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
314 break;
317 if (i > MAX_CHARSET)
318 /* No such a charset. */
319 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
320 = make_number (-1);
323 if (charset != CHARSET_ASCII
324 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
326 /* Update tables bytes_by_char_head and width_by_char_head. */
327 bytes_by_char_head[leading_code_base] = bytes;
328 width_by_char_head[leading_code_base] = XINT (width);
330 /* Update table emacs_code_class. */
331 emacs_code_class[charset] = (bytes == 2
332 ? EMACS_leading_code_2
333 : (bytes == 3
334 ? EMACS_leading_code_3
335 : EMACS_leading_code_4));
338 /* Update table iso_charset_table. */
339 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
340 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
343 #ifdef emacs
345 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
346 is invalid. */
348 get_charset_id (charset_symbol)
349 Lisp_Object charset_symbol;
351 Lisp_Object val;
352 int charset;
354 return ((SYMBOLP (charset_symbol)
355 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
356 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
357 CHARSET_VALID_P (charset)))
358 ? charset : -1);
361 /* Return an identification number for a new private charset of
362 DIMENSION and WIDTH. If there's no more room for the new charset,
363 return 0. */
364 Lisp_Object
365 get_new_private_charset_id (dimension, width)
366 int dimension, width;
368 int charset, from, to;
370 if (dimension == 1)
372 if (width == 1)
373 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
374 else
375 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
377 else
379 if (width == 1)
380 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
381 else
382 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX - 1;
385 for (charset = from; charset < to; charset++)
386 if (!CHARSET_DEFINED_P (charset)) break;
388 return make_number (charset < to ? charset : 0);
391 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
392 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
393 If CHARSET-ID is nil, it is set automatically, which means CHARSET is\n\
394 treated as a private charset.\n\
395 INFO-VECTOR is a vector of the format:\n\
396 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
397 SHORT-NAME LONG-NAME DESCRIPTION]\n\
398 The meanings of each elements is as follows:\n\
399 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
400 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
401 WIDTH (integer) is the number of columns a character in the charset\n\
402 occupies on the screen: one of 0, 1, and 2.\n\
404 DIRECTION (integer) is the rendering direction of characters in the\n\
405 charset when rendering. If 0, render from right to left, else\n\
406 render from left to right.\n\
408 ISO-FINAL-CHAR (character) is the final character of the\n\
409 corresponding ISO 2022 charset.\n\
411 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
412 while encoding to variants of ISO 2022 coding system, one of the\n\
413 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
415 SHORT-NAME (string) is the short name to refer to the charset.\n\
417 LONG-NAME (string) is the long name to refer to the charset.\n\
419 DESCRIPTION (string) is the description string of the charset.")
420 (charset_id, charset_symbol, info_vector)
421 Lisp_Object charset_id, charset_symbol, info_vector;
423 Lisp_Object *vec;
425 if (!NILP (charset_id))
426 CHECK_NUMBER (charset_id, 0);
427 CHECK_SYMBOL (charset_symbol, 1);
428 CHECK_VECTOR (info_vector, 2);
430 if (! NILP (charset_id))
432 if (! CHARSET_VALID_P (XINT (charset_id)))
433 error ("Invalid CHARSET: %d", XINT (charset_id));
434 else if (CHARSET_DEFINED_P (XINT (charset_id)))
435 error ("Already defined charset: %d", XINT (charset_id));
438 vec = XVECTOR (info_vector)->contents;
439 if (XVECTOR (info_vector)->size != 9
440 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
441 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
442 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
443 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
444 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
445 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
446 || !STRINGP (vec[6])
447 || !STRINGP (vec[7])
448 || !STRINGP (vec[8]))
449 error ("Invalid info-vector argument for defining charset %s",
450 XSYMBOL (charset_symbol)->name->data);
452 if (NILP (charset_id))
454 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
455 if (XINT (charset_id) == 0)
456 error ("There's no room for a new private charset %s",
457 XSYMBOL (charset_symbol)->name->data);
460 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
461 vec[4], vec[5], vec[6], vec[7], vec[8]);
462 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
463 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
464 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
465 return Qnil;
468 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
469 4, 4, 0,
470 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
471 CHARSET should be defined by `defined-charset' in advance.")
472 (dimension, chars, final_char, charset_symbol)
473 Lisp_Object dimension, chars, final_char, charset_symbol;
475 int charset;
477 CHECK_NUMBER (dimension, 0);
478 CHECK_NUMBER (chars, 1);
479 CHECK_NUMBER (final_char, 2);
480 CHECK_SYMBOL (charset_symbol, 3);
482 if (XINT (dimension) != 1 && XINT (dimension) != 2)
483 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
484 if (XINT (chars) != 94 && XINT (chars) != 96)
485 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
486 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
487 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
488 if ((charset = get_charset_id (charset_symbol)) < 0)
489 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
491 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
492 return Qnil;
495 /* Return number of different charsets in STR of length LEN. In
496 addition, for each found charset N, CHARSETS[N] is set 1. The
497 caller should allocate CHARSETS (MAX_CHARSET + 1 bytes) in advance. */
500 find_charset_in_str (str, len, charsets)
501 unsigned char *str, *charsets;
502 int len;
504 int num = 0;
506 while (len > 0)
508 int bytes = BYTES_BY_CHAR_HEAD (*str);
509 int charset = CHARSET_AT (str);
511 if (!charsets[charset])
513 charsets[charset] = 1;
514 num += 1;
516 str += bytes;
517 len -= bytes;
519 return num;
522 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
523 2, 2, 0,
524 "Return a list of charsets in the region between BEG and END.\n\
525 BEG and END are buffer positions.")
526 (beg, end)
527 Lisp_Object beg, end;
529 char charsets[MAX_CHARSET + 1];
530 int from, to, stop, i;
531 Lisp_Object val;
533 validate_region (&beg, &end);
534 from = XFASTINT (beg);
535 stop = to = XFASTINT (end);
536 if (from < GPT && GPT < to)
537 stop = GPT;
538 bzero (charsets, MAX_CHARSET + 1);
539 while (1)
541 find_charset_in_str (POS_ADDR (from), stop - from, charsets);
542 if (stop < to)
543 from = stop, stop = to;
544 else
545 break;
547 val = Qnil;
548 for (i = MAX_CHARSET; i >= 0; i--)
549 if (charsets[i])
550 val = Fcons (CHARSET_SYMBOL (i), val);
551 return val;
554 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
555 1, 1, 0,
556 "Return a list of charsets in STR.")
557 (str)
558 Lisp_Object str;
560 char charsets[MAX_CHARSET + 1];
561 int i;
562 Lisp_Object val;
564 CHECK_STRING (str, 0);
565 bzero (charsets, MAX_CHARSET + 1);
566 find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size, charsets);
567 val = Qnil;
568 for (i = MAX_CHARSET; i >= 0; i--)
569 if (charsets[i])
570 val = Fcons (CHARSET_SYMBOL (i), val);
571 return val;
574 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
576 (charset, code1, code2)
577 Lisp_Object charset, code1, code2;
579 CHECK_NUMBER (charset, 0);
581 if (NILP (code1))
582 XSETFASTINT (code1, 0);
583 else
584 CHECK_NUMBER (code1, 1);
585 if (NILP (code2))
586 XSETFASTINT (code2, 0);
587 else
588 CHECK_NUMBER (code2, 2);
590 if (!CHARSET_DEFINED_P (XINT (charset)))
591 error ("Invalid charset: %d", XINT (charset));
593 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
596 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
597 "Return list of charset and one or two position-codes of CHAR.")
598 (ch)
599 Lisp_Object ch;
601 Lisp_Object val;
602 int charset, c1, c2;
604 CHECK_NUMBER (ch, 0);
605 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
606 return (c2 >= 0
607 ? Fcons (CHARSET_SYMBOL (charset),
608 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
609 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
612 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
613 "Return charset of CHAR.")
614 (ch)
615 Lisp_Object ch;
617 CHECK_NUMBER (ch, 0);
619 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
622 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
623 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.")
624 (dimension, chars, final_char)
625 Lisp_Object dimension, chars, final_char;
627 int charset;
629 CHECK_NUMBER (dimension, 0);
630 CHECK_NUMBER (chars, 1);
631 CHECK_NUMBER (final_char, 2);
633 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
634 return Qnil;
635 return CHARSET_SYMBOL (charset);
638 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
639 "Return byte length of multi-byte form of CHAR.")
640 (ch)
641 Lisp_Object ch;
643 Lisp_Object val;
644 int bytes;
646 CHECK_NUMBER (ch, 0);
647 if (COMPOSITE_CHAR_P (XFASTINT (ch)))
649 unsigned int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
651 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
653 else
655 int charset = CHAR_CHARSET (XFASTINT (ch));
657 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
660 XSETFASTINT (val, bytes);
661 return val;
664 /* Return the width of character of which multi-byte form starts with
665 C. The width is measured by how many columns occupied on the
666 screen when displayed in the current buffer. */
668 #define ONE_BYTE_CHAR_WIDTH(c) \
669 (c < 0x20 \
670 ? (c == '\t' \
671 ? XFASTINT (current_buffer->tab_width) \
672 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
673 : (c < 0x7f \
674 ? 1 \
675 : (c == 0x7F \
676 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
677 : ((! NILP (current_buffer->enable_multibyte_characters) \
678 && BASE_LEADING_CODE_P (c)) \
679 ? WIDTH_BY_CHAR_HEAD (c) \
680 : 4)))) \
683 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
684 "Return width of CHAR when displayed in the current buffer.\n\
685 The width is measured by how many columns it occupies on the screen.")
686 (ch)
687 Lisp_Object ch;
689 Lisp_Object val;
690 int c;
692 CHECK_NUMBER (ch, 0);
694 c = XFASTINT (ch);
695 if (SINGLE_BYTE_CHAR_P (c))
696 XSETFASTINT (val, ONE_BYTE_CHAR_WIDTH (c));
697 else if (COMPOSITE_CHAR_P (c))
699 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
700 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
702 else
704 int charset = CHAR_CHARSET (c);
706 XSETFASTINT (val, CHARSET_WIDTH (charset));
708 return val;
711 /* Return width of string STR of length LEN when displayed in the
712 current buffer. The width is measured by how many columns it
713 occupies on the screen. */
715 strwidth (str, len)
716 unsigned char *str;
717 int len;
719 unsigned char *endp = str + len;
720 int width = 0;
722 while (str < endp) {
723 if (*str == LEADING_CODE_COMPOSITION)
725 int id = str_cmpchar_id (str, endp - str);
727 if (id < 0)
729 width += 4;
730 str++;
732 else
734 width += cmpchar_table[id]->width;
735 str += cmpchar_table[id]->len;
738 else
740 width += ONE_BYTE_CHAR_WIDTH (*str);
741 str += BYTES_BY_CHAR_HEAD (*str);
744 return width;
747 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
748 "Return width of STRING when displayed in the current buffer.\n\
749 Width is measured by how many columns it occupies on the screen.\n\
750 When calculating width of a multi-byte character in STRING,\n\
751 only the base leading-code is considered and the validity of\n\
752 the following bytes are not checked.")
753 (str)
754 Lisp_Object str;
756 Lisp_Object val;
758 CHECK_STRING (str, 0);
759 XSETFASTINT (val, strwidth (XSTRING (str)->data, XSTRING (str)->size));
760 return val;
763 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
764 "Return the direction of CHAR.\n\
765 The returned value is 0 for left-to-right and 1 for right-to-left.")
766 (ch)
767 Lisp_Object ch;
769 int charset;
771 CHECK_NUMBER (ch, 0);
772 charset = CHAR_CHARSET (XFASTINT (ch));
773 if (!CHARSET_DEFINED_P (charset))
774 error ("Invalid character: %d", XINT (ch));
775 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
778 DEFUN ("chars-in-string", Fchars_in_string, Schars_in_string, 1, 1, 0,
779 "Return number of characters in STRING.")
780 (str)
781 Lisp_Object str;
783 Lisp_Object val;
784 unsigned char *p, *endp;
785 int chars;
787 CHECK_STRING (str, 0);
789 p = XSTRING (str)->data; endp = p + XSTRING (str)->size;
790 chars = 0;
791 while (p < endp)
793 if (*p == LEADING_CODE_COMPOSITION)
795 p++;
796 while (p < endp && ! CHAR_HEAD_P (p)) p++;
798 else
799 p += BYTES_BY_CHAR_HEAD (*p);
800 chars++;
803 XSETFASTINT (val, chars);
804 return val;
807 DEFUN ("char-boundary-p", Fchar_boundary_p, Schar_boundary_p, 1, 1, 0,
808 "Return non-nil value if POS is at character boundary of multibyte form.\n\
809 The return value is:\n\
810 0 if POS is at an ASCII character or at the end of range,\n\
811 1 if POS is at a head of 2-byte length multi-byte form,\n\
812 2 if POS is at a head of 3-byte length multi-byte form,\n\
813 3 if POS is at a head of 4-byte length multi-byte form,\n\
814 4 if POS is at a head of multi-byte form of a composite character.\n\
815 If POS is out of range or not at character boundary, return NIL.")
816 (pos)
817 Lisp_Object pos;
819 Lisp_Object val;
820 int n;
822 CHECK_NUMBER_COERCE_MARKER (pos, 0);
824 n = XINT (pos);
825 if (n < BEGV || n > ZV)
826 return Qnil;
828 if (n == ZV || NILP (current_buffer->enable_multibyte_characters))
829 XSETFASTINT (val, 0);
830 else
832 unsigned char *p = POS_ADDR (n);
834 if (SINGLE_BYTE_CHAR_P (*p))
835 XSETFASTINT (val, 0);
836 else if (*p == LEADING_CODE_COMPOSITION)
837 XSETFASTINT (val, 4);
838 else if (BYTES_BY_CHAR_HEAD (*p) > 1)
839 XSETFASTINT (val, BYTES_BY_CHAR_HEAD (*p) - 1);
840 else
841 val = Qnil;
843 return val;
846 DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0,
847 "Concatenate all the argument characters and make the result a string.")
848 (n, args)
849 int n;
850 Lisp_Object *args;
852 int i;
853 unsigned char *buf
854 = (unsigned char *) malloc (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
855 unsigned char *p = buf;
856 Lisp_Object val;
858 for (i = 0; i < n; i++)
860 int c, len;
861 unsigned char *str;
863 if (!INTEGERP (args[i]))
865 free (buf);
866 CHECK_NUMBER (args[i], 0);
868 c = XINT (args[i]);
869 len = CHAR_STRING (c, p, str);
870 if (p != str)
871 /* C is a composite character. */
872 bcopy (str, p, len);
873 p += len;
876 val = make_string (buf, p - buf);
877 free (buf);
878 return val;
881 #endif /* emacs */
883 /*** Composite characters staffs ***/
885 /* Each composite character is identified by CMPCHAR-ID which is
886 assigned when Emacs needs the character code of the composite
887 character (e.g. when displaying it on the screen). See the
888 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
889 composite character is represented in Emacs. */
891 /* If `static' is defined, it means that it is defined to null string. */
892 #ifndef static
893 /* The following function is copied from lread.c. */
894 static int
895 hash_string (ptr, len)
896 unsigned char *ptr;
897 int len;
899 register unsigned char *p = ptr;
900 register unsigned char *end = p + len;
901 register unsigned char c;
902 register int hash = 0;
904 while (p != end)
906 c = *p++;
907 if (c >= 0140) c -= 40;
908 hash = ((hash<<3) + (hash>>28) + c);
910 return hash & 07777777777;
912 #endif
914 #define CMPCHAR_HASH_TABLE_SIZE 0xFFF
916 static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
918 /* Each element of `cmpchar_hash_table' is a pointer to an array of
919 integer, where the 1st element is the size of the array, the 2nd
920 element is how many elements are actually used in the array, and
921 the remaining elements are CMPCHAR-IDs of composite characters of
922 the same hash value. */
923 #define CMPCHAR_HASH_SIZE(table) table[0]
924 #define CMPCHAR_HASH_USED(table) table[1]
925 #define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
927 /* Return CMPCHAR-ID of the composite character in STR of the length
928 LEN. If the composite character has not yet been registered,
929 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
930 is the sole function for assigning CMPCHAR-ID. */
932 str_cmpchar_id (str, len)
933 unsigned char *str;
934 int len;
936 int hash_idx, *hashp;
937 unsigned char *buf;
938 int embedded_rule; /* 1 if composition rule is embedded. */
939 int chars; /* number of components. */
940 int i;
941 struct cmpchar_info *cmpcharp;
943 if (len < 5)
944 /* Any composite char have at least 3-byte length. */
945 return -1;
947 /* The second byte 0xFF means compostion rule is embedded. */
948 embedded_rule = (str[1] == 0xFF);
950 /* At first, get the actual length of the composite character. */
952 unsigned char *p, *endp = str + 1, *lastp = str + len;
953 int bytes;
955 while (endp < lastp && ! CHAR_HEAD_P (endp)) endp++;
956 chars = 0;
957 p = str + 1 + embedded_rule;
958 while (p < endp)
960 /* No need of checking if *P is 0xA0 because
961 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
962 p += (bytes = BYTES_BY_CHAR_HEAD (*p - 0x20) + embedded_rule);
963 chars++;
965 len = (p -= embedded_rule) - str;
966 if (p > endp)
967 len -= - bytes, chars--;
969 if (chars < 2 || chars > MAX_COMPONENT_COUNT)
970 /* Invalid number of components. */
971 return -1;
973 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
974 hashp = cmpchar_hash_table[hash_idx];
976 /* Then, look into the hash table. */
977 if (hashp != NULL)
978 /* Find the correct one among composite characters of the same
979 hash value. */
980 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
982 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
983 if (len == cmpcharp->len
984 && ! bcmp (str, cmpcharp->data, len))
985 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
988 /* We have to register the composite character in cmpchar_table. */
989 if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
990 /* No, we have no more room for a new composite character. */
991 return -1;
993 /* Make the entry in hash table. */
994 if (hashp == NULL)
996 /* Make a table for 8 composite characters initially. */
997 hashp = (cmpchar_hash_table[hash_idx]
998 = (int *) xmalloc (sizeof (int) * (2 + 8)));
999 CMPCHAR_HASH_SIZE (hashp) = 10;
1000 CMPCHAR_HASH_USED (hashp) = 2;
1002 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1004 CMPCHAR_HASH_SIZE (hashp) += 8;
1005 hashp = (cmpchar_hash_table[hash_idx]
1006 = (int *) xrealloc (hashp,
1007 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1009 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1010 CMPCHAR_HASH_USED (hashp)++;
1012 /* Set information of the composite character in cmpchar_table. */
1013 if (cmpchar_table_size == 0)
1015 /* This is the first composite character to be registered. */
1016 cmpchar_table_size = 256;
1017 cmpchar_table
1018 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1019 * cmpchar_table_size);
1021 else if (cmpchar_table_size <= n_cmpchars)
1023 cmpchar_table_size += 256;
1024 cmpchar_table
1025 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1026 sizeof (cmpchar_table[0])
1027 * cmpchar_table_size);
1030 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1032 cmpcharp->len = len;
1033 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1034 bcopy (str, cmpcharp->data, len);
1035 cmpcharp->data[len] = 0;
1036 cmpcharp->glyph_len = chars;
1037 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1038 if (embedded_rule)
1040 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1041 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1043 else
1045 cmpcharp->cmp_rule = NULL;
1046 cmpcharp->col_offset = NULL;
1049 /* Setup GLYPH data and composition rules (if any) so as not to make
1050 them every time on displaying. */
1052 unsigned char *bufp;
1053 int width;
1054 float leftmost = 0.0, rightmost = 1.0;
1056 if (embedded_rule)
1057 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1058 cmpcharp->col_offset[0] = 0;
1060 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1062 if (embedded_rule)
1063 cmpcharp->cmp_rule[i] = *bufp++;
1065 if (*bufp == 0xA0) /* This is an ASCII character. */
1067 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1068 width = 1;
1069 bufp++;
1071 else /* Multibyte character. */
1073 /* Make `bufp' point normal multi-byte form temporally. */
1074 *bufp -= 0x20;
1075 cmpcharp->glyph[i]
1076 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1077 width = WIDTH_BY_CHAR_HEAD (*bufp);
1078 *bufp += 0x20;
1079 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1082 if (embedded_rule && i > 0)
1084 /* Reference points (global_ref and new_ref) are
1085 encoded as below:
1087 0--1--2 -- ascent
1090 | 4 -+--- center
1091 -- 3 5 -- baseline
1093 6--7--8 -- descent
1095 Now, we calculate the column offset of the new glyph
1096 from the left edge of the first glyph. This can avoid
1097 the same calculation everytime displaying this
1098 composite character. */
1100 /* Reference points of global glyph and new glyph. */
1101 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1102 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1103 /* Column offset relative to the first glyph. */
1104 float left = (leftmost
1105 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1106 - (new_ref % 3) * width / 2.0);
1108 cmpcharp->col_offset[i] = left;
1109 if (left < leftmost)
1110 leftmost = left;
1111 if (left + width > rightmost)
1112 rightmost = left + width;
1114 else
1116 if (width > rightmost)
1117 rightmost = width;
1120 if (embedded_rule)
1122 /* Now col_offset[N] are relative to the left edge of the
1123 first component. Make them relative to the left edge of
1124 overall glyph. */
1125 for (i = 0; i < chars; i++)
1126 cmpcharp->col_offset[i] -= leftmost;
1127 /* Make rightmost holds width of overall glyph. */
1128 rightmost -= leftmost;
1131 cmpcharp->width = rightmost;
1132 if (cmpcharp->width < rightmost)
1133 /* To get a ceiling integer value. */
1134 cmpcharp->width++;
1137 cmpchar_table[n_cmpchars] = cmpcharp;
1139 return n_cmpchars++;
1142 /* Return the Nth element of the composite character C. */
1144 cmpchar_component (c, n)
1145 unsigned int c, n;
1147 int id = COMPOSITE_CHAR_ID (c);
1149 if (id >= n_cmpchars /* C is not a valid composite character. */
1150 || n >= cmpchar_table[id]->glyph_len) /* No such component. */
1151 return -1;
1152 /* No face data is stored in glyph code. */
1153 return ((int) (cmpchar_table[id]->glyph[n]));
1156 DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1157 "T if CHAR is a composite character.")
1158 (ch)
1159 Lisp_Object ch;
1161 CHECK_NUMBER (ch, 0);
1162 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1165 DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1166 2, 2, 0,
1167 "Return the IDXth component character of composite character CHARACTER.")
1168 (character, idx)
1169 Lisp_Object character, idx;
1171 int c;
1173 CHECK_NUMBER (character, 0);
1174 CHECK_NUMBER (idx, 1);
1176 if ((c = cmpchar_component (XINT (character), XINT (idx))) < 0)
1177 args_out_of_range (character, idx);
1179 return make_number (c);
1182 DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1183 2, 2, 0,
1184 "Return the Nth composition rule embedded in composite character CHARACTER.\n\
1185 The returned rule is for composing the Nth component\n\
1186 on the (N-1)th component. If N is 0, the returned value is always 255.")
1187 (character, n)
1188 Lisp_Object character, n;
1190 int id, i;
1192 CHECK_NUMBER (character, 0);
1193 CHECK_NUMBER (n, 1);
1195 id = COMPOSITE_CHAR_ID (XINT (character));
1196 if (id < 0 || id >= n_cmpchars)
1197 error ("Invalid composite character: %d", XINT (character));
1198 i = XINT (n);
1199 if (i > cmpchar_table[id]->glyph_len)
1200 args_out_of_range (character, n);
1202 return make_number (cmpchar_table[id]->cmp_rule[i]);
1205 DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1206 Scmpchar_cmp_rule_p, 1, 1, 0,
1207 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1208 (character)
1209 Lisp_Object character;
1211 int id;
1213 CHECK_NUMBER (character, 0);
1214 id = COMPOSITE_CHAR_ID (XINT (character));
1215 if (id < 0 || id >= n_cmpchars)
1216 error ("Invalid composite character: %d", XINT (character));
1218 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1221 DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1222 Scmpchar_cmp_count, 1, 1, 0,
1223 "Return number of compoents of composite character CHARACTER.")
1224 (character)
1225 Lisp_Object character;
1227 int id;
1229 CHECK_NUMBER (character, 0);
1230 id = COMPOSITE_CHAR_ID (XINT (character));
1231 if (id < 0 || id >= n_cmpchars)
1232 error ("Invalid composite character: %d", XINT (character));
1234 return (make_number (cmpchar_table[id]->glyph_len));
1237 DEFUN ("compose-string", Fcompose_string, Scompose_string,
1238 1, 1, 0,
1239 "Return one char string composed from all characters in STRING.")
1240 (str)
1241 Lisp_Object str;
1243 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1244 int len, i;
1246 CHECK_STRING (str, 0);
1248 buf[0] = LEADING_CODE_COMPOSITION;
1249 p = XSTRING (str)->data;
1250 pend = p + XSTRING (str)->size;
1251 i = 1;
1252 while (p < pend)
1254 if (*p < 0x20 || *p == 127) /* control code */
1255 error ("Invalid component character: %d", *p);
1256 else if (*p < 0x80) /* ASCII */
1258 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1259 error ("Too long string to be composed: %s", XSTRING (str)->data);
1260 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1261 code itself. */
1262 buf[i++] = 0xA0;
1263 buf[i++] = *p++ + 0x80;
1265 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1267 /* Already composed. Eliminate the heading
1268 LEADING_CODE_COMPOSITION, keep the remaining bytes
1269 unchanged. */
1270 p++;
1271 ptemp = p;
1272 while (! CHAR_HEAD_P (p)) p++;
1273 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1274 error ("Too long string to be composed: %s", XSTRING (str)->data);
1275 bcopy (ptemp, buf + i, p - ptemp);
1276 i += p - ptemp;
1278 else /* multibyte char */
1280 /* Add 0x20 to the base leading-code, keep the remaining
1281 bytes unchanged. */
1282 len = BYTES_BY_CHAR_HEAD (*p);
1283 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1284 error ("Too long string to be composed: %s", XSTRING (str)->data);
1285 bcopy (p, buf + i, len);
1286 buf[i] += 0x20;
1287 p += len, i += len;
1291 if (i < 5)
1292 /* STR contains only one character, which can't be composed. */
1293 error ("Too short string to be composed: %s", XSTRING (str)->data);
1295 return make_string (buf, i);
1299 charset_id_internal (charset_name)
1300 char *charset_name;
1302 Lisp_Object val = Fget (intern (charset_name), Qcharset);
1304 if (!VECTORP (val))
1305 error ("Charset %s is not defined", charset_name);
1307 return (XINT (XVECTOR (val)->contents[0]));
1310 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1311 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1314 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1315 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1316 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1317 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1318 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1319 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1320 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1321 return Qnil;
1324 init_charset_once ()
1326 int i, j, k;
1328 staticpro (&Vcharset_table);
1329 staticpro (&Vcharset_symbol_table);
1331 /* This has to be done here, before we call Fmake_char_table. */
1332 Qcharset_table = intern ("charset-table");
1333 staticpro (&Qcharset_table);
1335 /* Intern this now in case it isn't already done.
1336 Setting this variable twice is harmless.
1337 But don't staticpro it here--that is done in alloc.c. */
1338 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1340 /* Now we are ready to set up this property, so we can
1341 create the charset table. */
1342 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1343 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1345 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil);
1347 /* Setup tables. */
1348 for (i = 0; i < 2; i++)
1349 for (j = 0; j < 2; j++)
1350 for (k = 0; k < 128; k++)
1351 iso_charset_table [i][j][k] = -1;
1353 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1354 cmpchar_table_size = n_cmpchars = 0;
1356 for (i = 0; i < 256; i++)
1357 BYTES_BY_CHAR_HEAD (i) = 1;
1358 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1359 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1360 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1361 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
1362 /* The following doesn't reflect the actual bytes, but just to tell
1363 that it is a start of a multibyte character. */
1364 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
1366 for (i = 0; i < 128; i++)
1367 WIDTH_BY_CHAR_HEAD (i) = 1;
1368 for (; i < 256; i++)
1369 WIDTH_BY_CHAR_HEAD (i) = 4;
1370 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1371 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1372 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1373 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
1376 #ifdef emacs
1378 syms_of_charset ()
1380 Qascii = intern ("ascii");
1381 staticpro (&Qascii);
1383 Qcharset = intern ("charset");
1384 staticpro (&Qcharset);
1386 /* Define ASCII charset now. */
1387 update_charset_table (make_number (CHARSET_ASCII),
1388 make_number (1), make_number (94),
1389 make_number (1),
1390 make_number (0),
1391 make_number ('B'),
1392 make_number (0),
1393 build_string ("ASCII"),
1394 build_string ("ASCII"),
1395 build_string ("ASCII (ISO646 IRV)"));
1396 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1397 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1399 Qcomposition = intern ("composition");
1400 staticpro (&Qcomposition);
1401 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1403 defsubr (&Sdefine_charset);
1404 defsubr (&Sdeclare_equiv_charset);
1405 defsubr (&Sfind_charset_region);
1406 defsubr (&Sfind_charset_string);
1407 defsubr (&Smake_char_internal);
1408 defsubr (&Ssplit_char);
1409 defsubr (&Schar_charset);
1410 defsubr (&Siso_charset);
1411 defsubr (&Schar_bytes);
1412 defsubr (&Schar_width);
1413 defsubr (&Sstring_width);
1414 defsubr (&Schar_direction);
1415 defsubr (&Schars_in_string);
1416 defsubr (&Schar_boundary_p);
1417 defsubr (&Sconcat_chars);
1418 defsubr (&Scmpcharp);
1419 defsubr (&Scmpchar_component);
1420 defsubr (&Scmpchar_cmp_rule);
1421 defsubr (&Scmpchar_cmp_rule_p);
1422 defsubr (&Scmpchar_cmp_count);
1423 defsubr (&Scompose_string);
1424 defsubr (&Ssetup_special_charsets);
1426 DEFVAR_LISP ("charset-list", &Vcharset_list,
1427 "List of charsets ever defined.");
1428 Vcharset_list = Fcons (Qascii, Qnil);
1430 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
1431 "Leading-code of composite characters.");
1432 leading_code_composition = LEADING_CODE_COMPOSITION;
1434 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1435 "Leading-code of private TYPE9N charset of column-width 1.");
1436 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1438 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1439 "Leading-code of private TYPE9N charset of column-width 2.");
1440 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1442 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1443 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1444 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1446 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1447 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1448 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1451 #endif /* emacs */