(Vpreloaded_file_list): New variable.
[emacs.git] / src / charset.c
blob2e30fcc67a68cb3b3c6f5c273c3e35699a73f263
1 /* Basic multilingual character support.
2 Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* At first, see the document in `charset.h' to understand the code in
23 this file. */
25 #include <stdio.h>
27 #ifdef emacs
29 #include <sys/types.h>
30 #include <config.h>
31 #include "lisp.h"
32 #include "buffer.h"
33 #include "charset.h"
34 #include "coding.h"
35 #include "disptab.h"
37 #else /* not emacs */
39 #include "mulelib.h"
41 #endif /* emacs */
43 Lisp_Object Qcharset, Qascii, Qcomposition;
45 /* Declaration of special leading-codes. */
46 int leading_code_composition; /* for composite characters */
47 int leading_code_private_11; /* for private DIMENSION1 of 1-column */
48 int leading_code_private_12; /* for private DIMENSION1 of 2-column */
49 int leading_code_private_21; /* for private DIMENSION2 of 1-column */
50 int leading_code_private_22; /* for private DIMENSION2 of 2-column */
52 /* Declaration of special charsets. */
53 int charset_ascii; /* ASCII */
54 int charset_composition; /* for a composite character */
55 int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
56 int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
57 int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
58 int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
59 int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
60 int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
61 int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
63 Lisp_Object Qcharset_table;
65 /* A char-table containing information of each character set. */
66 Lisp_Object Vcharset_table;
68 /* A vector of charset symbol indexed by charset-id. This is used
69 only for returning charset symbol from C functions. */
70 Lisp_Object Vcharset_symbol_table;
72 /* A list of charset symbols ever defined. */
73 Lisp_Object Vcharset_list;
75 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
76 int bytes_by_char_head[256];
77 int width_by_char_head[256];
79 /* Mapping table from ISO2022's charset (specified by DIMENSION,
80 CHARS, and FINAL-CHAR) to Emacs' charset. */
81 int iso_charset_table[2][2][128];
83 /* Table of pointers to the structure `cmpchar_info' indexed by
84 CMPCHAR-ID. */
85 struct cmpchar_info **cmpchar_table;
86 /* The current size of `cmpchar_table'. */
87 static int cmpchar_table_size;
88 /* Number of the current composite characters. */
89 int n_cmpchars;
91 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
92 unsigned char *_fetch_multibyte_char_p;
93 int _fetch_multibyte_char_len;
95 /* Set STR a pointer to the multi-byte form of the character C. If C
96 is not a composite character, the multi-byte form is set in WORKBUF
97 and STR points WORKBUF. The caller should allocate at least 4-byte
98 area at WORKBUF in advance. Returns the length of the multi-byte
99 form. If C is an invalid character to have a multi-byte form,
100 signal an error.
102 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
103 function directly if C can be an ASCII character. */
106 non_ascii_char_to_string (c, workbuf, str)
107 int c;
108 unsigned char *workbuf, **str;
110 int charset, c1, c2;
112 if (COMPOSITE_CHAR_P (c))
114 int cmpchar_id = COMPOSITE_CHAR_ID (c);
116 if (cmpchar_id < n_cmpchars)
118 *str = cmpchar_table[cmpchar_id]->data;
119 return cmpchar_table[cmpchar_id]->len;
121 else
123 error ("Invalid characer: %d", c);
127 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
128 if (!charset
129 || ! CHARSET_DEFINED_P (charset)
130 || c1 >= 0 && c1 < 32
131 || c2 >= 0 && c2 < 32)
132 error ("Invalid characer: %d", c);
134 *str = workbuf;
135 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
136 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
137 workbuf++;
138 *workbuf++ = c1 | 0x80;
139 if (c2 >= 0)
140 *workbuf++ = c2 | 0x80;
142 return (workbuf - *str);
145 /* Return a non-ASCII character of which multi-byte form is at STR of
146 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
147 character is set to the address ACTUAL_LEN.
149 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
150 directly if STR can hold an ASCII character. */
152 string_to_non_ascii_char (str, len, actual_len)
153 unsigned char *str;
154 int len, *actual_len;
156 int charset;
157 unsigned char c1, c2;
158 register int c;
160 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
162 if (actual_len)
163 *actual_len = 1;
164 return (int) *str;
167 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
169 if (actual_len)
170 *actual_len = (charset == CHARSET_COMPOSITION
171 ? cmpchar_table[COMPOSITE_CHAR_ID (c)]->len
172 : BYTES_BY_CHAR_HEAD (*str));
173 return c;
176 /* Return the length of the multi-byte form at string STR of length LEN. */
178 multibyte_form_length (str, len)
179 unsigned char *str;
180 int len;
182 int charset;
183 unsigned char c1, c2;
184 register int c;
186 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
187 return 1;
189 return (charset == CHARSET_COMPOSITION
190 ? cmpchar_table[(c1 << 7) | c2]->len
191 : BYTES_BY_CHAR_HEAD (*str));
194 /* Check if string STR of length LEN contains valid multi-byte form of
195 a character. If valid, charset and position codes of the character
196 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
197 return -1. This should be used only in the macro SPLIT_STRING
198 which checks range of STR in advance. */
200 split_non_ascii_string (str, len, charset, c1, c2)
201 register unsigned char *str, *c1, *c2;
202 register int len, *charset;
204 register unsigned int cs = *str++;
206 if (cs == LEADING_CODE_COMPOSITION)
208 int cmpchar_id = str_cmpchar_id (str - 1, len);
210 if (cmpchar_id < 0)
211 return -1;
212 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
214 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
215 && CHARSET_DEFINED_P (cs))
217 *charset = cs;
218 if (*str < 0xA0)
219 return -1;
220 *c1 = (*str++) & 0x7F;
221 if (CHARSET_DIMENSION (cs) == 2)
223 if (*str < 0xA0)
224 return -1;
225 *c2 = (*str++) & 0x7F;
228 else
229 return -1;
230 return 0;
233 /* Return a character unified with C (or a character made of CHARSET,
234 C1, and C2) in unification table TABLE. If no unification is found
235 in TABLE, return C. */
236 unify_char (table, c, charset, c1, c2)
237 Lisp_Object table;
238 int c, charset, c1, c2;
240 Lisp_Object ch;
241 int alt_charset, alt_c1, alt_c2, dimension;
243 if (c < 0) c = MAKE_CHAR (charset, c1, c2);
244 if (!CHAR_TABLE_P (table)
245 || (ch = Faref (table, make_number (c)), !INTEGERP (ch))
246 || XINT (ch) < 0)
247 return c;
249 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
250 dimension = CHARSET_DIMENSION (alt_charset);
251 if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
252 /* CH is not a generic character, just return it. */
253 return XFASTINT (ch);
255 /* Since CH is a generic character, we must return a specific
256 charater which has the same position codes as C from CH. */
257 if (charset < 0)
258 SPLIT_CHAR (c, charset, c1, c2);
259 if (dimension != CHARSET_DIMENSION (charset))
260 /* We can't make such a character because of dimension mismatch. */
261 return c;
262 if (!alt_c1) alt_c1 = c1;
263 if (!alt_c2) alt_c2 = c2;
264 return MAKE_CHAR (alt_charset, c1, c2);
267 /* Update the table Vcharset_table with the given arguments (see the
268 document of `define-charset' for the meaning of each argument).
269 Several other table contents are also updated. The caller should
270 check the validity of CHARSET-ID and the remaining arguments in
271 advance. */
273 void
274 update_charset_table (charset_id, dimension, chars, width, direction,
275 iso_final_char, iso_graphic_plane,
276 short_name, long_name, description)
277 Lisp_Object charset_id, dimension, chars, width, direction;
278 Lisp_Object iso_final_char, iso_graphic_plane;
279 Lisp_Object short_name, long_name, description;
281 int charset = XINT (charset_id);
282 int bytes;
283 unsigned char leading_code_base, leading_code_ext;
285 if (NILP (CHARSET_TABLE_ENTRY (charset)))
286 CHARSET_TABLE_ENTRY (charset)
287 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
289 /* Get byte length of multibyte form, base leading-code, and
290 extended leading-code of the charset. See the comment under the
291 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
292 bytes = XINT (dimension);
293 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
295 /* Official charset, it doesn't have an extended leading-code. */
296 if (charset != CHARSET_ASCII)
297 bytes += 1; /* For a base leading-code. */
298 leading_code_base = charset;
299 leading_code_ext = 0;
301 else
303 /* Private charset. */
304 bytes += 2; /* For base and extended leading-codes. */
305 leading_code_base
306 = (charset < LEADING_CODE_EXT_12
307 ? LEADING_CODE_PRIVATE_11
308 : (charset < LEADING_CODE_EXT_21
309 ? LEADING_CODE_PRIVATE_12
310 : (charset < LEADING_CODE_EXT_22
311 ? LEADING_CODE_PRIVATE_21
312 : LEADING_CODE_PRIVATE_22)));
313 leading_code_ext = charset;
316 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
317 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
318 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
319 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
320 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
321 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
322 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
323 = make_number (leading_code_base);
324 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
325 = make_number (leading_code_ext);
326 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
327 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
328 = iso_graphic_plane;
329 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
330 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
331 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
332 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
335 /* If we have already defined a charset which has the same
336 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
337 DIRECTION, we must update the entry REVERSE-CHARSET of both
338 charsets. If there's no such charset, the value of the entry
339 is set to nil. */
340 int i;
342 for (i = 0; i <= MAX_CHARSET; i++)
343 if (!NILP (CHARSET_TABLE_ENTRY (i)))
345 if (CHARSET_DIMENSION (i) == XINT (dimension)
346 && CHARSET_CHARS (i) == XINT (chars)
347 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
348 && CHARSET_DIRECTION (i) != XINT (direction))
350 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
351 = make_number (i);
352 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
353 break;
356 if (i > MAX_CHARSET)
357 /* No such a charset. */
358 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
359 = make_number (-1);
362 if (charset != CHARSET_ASCII
363 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
365 /* Update tables bytes_by_char_head and width_by_char_head. */
366 bytes_by_char_head[leading_code_base] = bytes;
367 width_by_char_head[leading_code_base] = XINT (width);
369 /* Update table emacs_code_class. */
370 emacs_code_class[charset] = (bytes == 2
371 ? EMACS_leading_code_2
372 : (bytes == 3
373 ? EMACS_leading_code_3
374 : EMACS_leading_code_4));
377 /* Update table iso_charset_table. */
378 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
379 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
382 #ifdef emacs
384 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
385 is invalid. */
387 get_charset_id (charset_symbol)
388 Lisp_Object charset_symbol;
390 Lisp_Object val;
391 int charset;
393 return ((SYMBOLP (charset_symbol)
394 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
395 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
396 CHARSET_VALID_P (charset)))
397 ? charset : -1);
400 /* Return an identification number for a new private charset of
401 DIMENSION and WIDTH. If there's no more room for the new charset,
402 return 0. */
403 Lisp_Object
404 get_new_private_charset_id (dimension, width)
405 int dimension, width;
407 int charset, from, to;
409 if (dimension == 1)
411 if (width == 1)
412 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
413 else
414 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
416 else
418 if (width == 1)
419 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
420 else
421 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX - 1;
424 for (charset = from; charset < to; charset++)
425 if (!CHARSET_DEFINED_P (charset)) break;
427 return make_number (charset < to ? charset : 0);
430 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
431 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
432 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
433 treated as a private charset.\n\
434 INFO-VECTOR is a vector of the format:\n\
435 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
436 SHORT-NAME LONG-NAME DESCRIPTION]\n\
437 The meanings of each elements is as follows:\n\
438 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
439 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
440 WIDTH (integer) is the number of columns a character in the charset\n\
441 occupies on the screen: one of 0, 1, and 2.\n\
443 DIRECTION (integer) is the rendering direction of characters in the\n\
444 charset when rendering. If 0, render from right to left, else\n\
445 render from left to right.\n\
447 ISO-FINAL-CHAR (character) is the final character of the\n\
448 corresponding ISO 2022 charset.\n\
450 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
451 while encoding to variants of ISO 2022 coding system, one of the\n\
452 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
454 SHORT-NAME (string) is the short name to refer to the charset.\n\
456 LONG-NAME (string) is the long name to refer to the charset.\n\
458 DESCRIPTION (string) is the description string of the charset.")
459 (charset_id, charset_symbol, info_vector)
460 Lisp_Object charset_id, charset_symbol, info_vector;
462 Lisp_Object *vec;
464 if (!NILP (charset_id))
465 CHECK_NUMBER (charset_id, 0);
466 CHECK_SYMBOL (charset_symbol, 1);
467 CHECK_VECTOR (info_vector, 2);
469 if (! NILP (charset_id))
471 if (! CHARSET_VALID_P (XINT (charset_id)))
472 error ("Invalid CHARSET: %d", XINT (charset_id));
473 else if (CHARSET_DEFINED_P (XINT (charset_id)))
474 error ("Already defined charset: %d", XINT (charset_id));
477 vec = XVECTOR (info_vector)->contents;
478 if (XVECTOR (info_vector)->size != 9
479 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
480 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
481 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
482 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
483 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
484 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
485 || !STRINGP (vec[6])
486 || !STRINGP (vec[7])
487 || !STRINGP (vec[8]))
488 error ("Invalid info-vector argument for defining charset %s",
489 XSYMBOL (charset_symbol)->name->data);
491 if (NILP (charset_id))
493 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
494 if (XINT (charset_id) == 0)
495 error ("There's no room for a new private charset %s",
496 XSYMBOL (charset_symbol)->name->data);
499 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
500 vec[4], vec[5], vec[6], vec[7], vec[8]);
501 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
502 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
503 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
504 return Qnil;
507 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
508 4, 4, 0,
509 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
510 CHARSET should be defined by `defined-charset' in advance.")
511 (dimension, chars, final_char, charset_symbol)
512 Lisp_Object dimension, chars, final_char, charset_symbol;
514 int charset;
516 CHECK_NUMBER (dimension, 0);
517 CHECK_NUMBER (chars, 1);
518 CHECK_NUMBER (final_char, 2);
519 CHECK_SYMBOL (charset_symbol, 3);
521 if (XINT (dimension) != 1 && XINT (dimension) != 2)
522 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
523 if (XINT (chars) != 94 && XINT (chars) != 96)
524 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
525 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
526 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
527 if ((charset = get_charset_id (charset_symbol)) < 0)
528 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
530 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
531 return Qnil;
534 /* Return number of different charsets in STR of length LEN. In
535 addition, for each found charset N, CHARSETS[N] is set 1. The
536 caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance.
537 It may lookup a unification table TABLE if supplied. */
540 find_charset_in_str (str, len, charsets, table)
541 unsigned char *str;
542 int len, *charsets;
543 Lisp_Object table;
545 int num = 0;
547 if (! CHAR_TABLE_P (table))
548 table = Qnil;
550 while (len > 0)
552 int bytes = BYTES_BY_CHAR_HEAD (*str);
553 int charset;
555 if (NILP (table))
556 charset = CHARSET_AT (str);
557 else
559 int c, charset;
560 unsigned char c1, c2;
562 SPLIT_STRING(str, bytes, charset, c1, c2);
563 if ((c = unify_char (table, -1, charset, c1, c2)) >= 0)
564 charset = CHAR_CHARSET (c);
567 if (!charsets[charset])
569 charsets[charset] = 1;
570 num += 1;
572 str += bytes;
573 len -= bytes;
575 return num;
578 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
579 2, 3, 0,
580 "Return a list of charsets in the region between BEG and END.\n\
581 BEG and END are buffer positions.\n\
582 Optional arg TABLE if non-nil is a unification table to look up.")
583 (beg, end, table)
584 Lisp_Object beg, end, table;
586 int charsets[MAX_CHARSET + 1];
587 int from, to, stop, i;
588 Lisp_Object val;
590 validate_region (&beg, &end);
591 from = XFASTINT (beg);
592 stop = to = XFASTINT (end);
593 if (from < GPT && GPT < to)
594 stop = GPT;
595 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
596 while (1)
598 find_charset_in_str (POS_ADDR (from), stop - from, charsets, table);
599 if (stop < to)
600 from = stop, stop = to;
601 else
602 break;
604 val = Qnil;
605 for (i = MAX_CHARSET; i >= 0; i--)
606 if (charsets[i])
607 val = Fcons (CHARSET_SYMBOL (i), val);
608 return val;
611 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
612 1, 2, 0,
613 "Return a list of charsets in STR.\n\
614 Optional arg TABLE if non-nil is a unification table to look up.")
615 (str, table)
616 Lisp_Object str, table;
618 int charsets[MAX_CHARSET + 1];
619 int i;
620 Lisp_Object val;
622 CHECK_STRING (str, 0);
623 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
624 find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size,
625 charsets, table);
626 val = Qnil;
627 for (i = MAX_CHARSET; i >= 0; i--)
628 if (charsets[i])
629 val = Fcons (CHARSET_SYMBOL (i), val);
630 return val;
633 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
635 (charset, code1, code2)
636 Lisp_Object charset, code1, code2;
638 CHECK_NUMBER (charset, 0);
640 if (NILP (code1))
641 XSETFASTINT (code1, 0);
642 else
643 CHECK_NUMBER (code1, 1);
644 if (NILP (code2))
645 XSETFASTINT (code2, 0);
646 else
647 CHECK_NUMBER (code2, 2);
649 if (!CHARSET_DEFINED_P (XINT (charset)))
650 error ("Invalid charset: %d", XINT (charset));
652 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
655 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
656 "Return list of charset and one or two position-codes of CHAR.")
657 (ch)
658 Lisp_Object ch;
660 Lisp_Object val;
661 int charset, c1, c2;
663 CHECK_NUMBER (ch, 0);
664 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
665 return (c2 >= 0
666 ? Fcons (CHARSET_SYMBOL (charset),
667 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
668 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
671 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
672 "Return charset of CHAR.")
673 (ch)
674 Lisp_Object ch;
676 CHECK_NUMBER (ch, 0);
678 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
681 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
682 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.")
683 (dimension, chars, final_char)
684 Lisp_Object dimension, chars, final_char;
686 int charset;
688 CHECK_NUMBER (dimension, 0);
689 CHECK_NUMBER (chars, 1);
690 CHECK_NUMBER (final_char, 2);
692 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
693 return Qnil;
694 return CHARSET_SYMBOL (charset);
697 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
698 "Return byte length of multi-byte form of CHAR.")
699 (ch)
700 Lisp_Object ch;
702 Lisp_Object val;
703 int bytes;
705 CHECK_NUMBER (ch, 0);
706 if (COMPOSITE_CHAR_P (XFASTINT (ch)))
708 unsigned int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
710 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
712 else
714 int charset = CHAR_CHARSET (XFASTINT (ch));
716 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
719 XSETFASTINT (val, bytes);
720 return val;
723 /* Return the width of character of which multi-byte form starts with
724 C. The width is measured by how many columns occupied on the
725 screen when displayed in the current buffer. */
727 #define ONE_BYTE_CHAR_WIDTH(c) \
728 (c < 0x20 \
729 ? (c == '\t' \
730 ? XFASTINT (current_buffer->tab_width) \
731 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
732 : (c < 0x7f \
733 ? 1 \
734 : (c == 0x7F \
735 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
736 : ((! NILP (current_buffer->enable_multibyte_characters) \
737 && BASE_LEADING_CODE_P (c)) \
738 ? WIDTH_BY_CHAR_HEAD (c) \
739 : 4)))) \
742 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
743 "Return width of CHAR when displayed in the current buffer.\n\
744 The width is measured by how many columns it occupies on the screen.")
745 (ch)
746 Lisp_Object ch;
748 Lisp_Object val, disp;
749 int c;
750 struct Lisp_Char_Table *dp = buffer_display_table ();
752 CHECK_NUMBER (ch, 0);
754 c = XINT (ch);
756 /* Get the way the display table would display it. */
757 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
759 if (VECTORP (disp))
760 XSETINT (val, XVECTOR (disp)->size);
761 else if (SINGLE_BYTE_CHAR_P (c))
762 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
763 else if (COMPOSITE_CHAR_P (c))
765 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
766 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
768 else
770 int charset = CHAR_CHARSET (c);
772 XSETFASTINT (val, CHARSET_WIDTH (charset));
774 return val;
777 /* Return width of string STR of length LEN when displayed in the
778 current buffer. The width is measured by how many columns it
779 occupies on the screen. */
782 strwidth (str, len)
783 unsigned char *str;
784 int len;
786 unsigned char *endp = str + len;
787 int width = 0;
788 struct Lisp_Char_Table *dp = buffer_display_table (current_buffer);
790 while (str < endp)
792 if (*str == LEADING_CODE_COMPOSITION)
794 int id = str_cmpchar_id (str, endp - str);
796 if (id < 0)
798 width += 4;
799 str++;
801 else
803 width += cmpchar_table[id]->width;
804 str += cmpchar_table[id]->len;
807 else
809 Lisp_Object disp;
810 int thiswidth;
811 int c = STRING_CHAR (str, endp - str);
813 /* Get the way the display table would display it. */
814 if (dp)
815 disp = DISP_CHAR_VECTOR (dp, c);
816 else
817 disp = Qnil;
819 if (VECTORP (disp))
820 thiswidth = XVECTOR (disp)->size;
821 else
822 thiswidth = ONE_BYTE_CHAR_WIDTH (*str);
824 width += thiswidth;
825 str += BYTES_BY_CHAR_HEAD (*str);
828 return width;
831 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
832 "Return width of STRING when displayed in the current buffer.\n\
833 Width is measured by how many columns it occupies on the screen.\n\
834 When calculating width of a multi-byte character in STRING,\n\
835 only the base leading-code is considered and the validity of\n\
836 the following bytes are not checked.")
837 (str)
838 Lisp_Object str;
840 Lisp_Object val;
842 CHECK_STRING (str, 0);
843 XSETFASTINT (val, strwidth (XSTRING (str)->data, XSTRING (str)->size));
844 return val;
847 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
848 "Return the direction of CHAR.\n\
849 The returned value is 0 for left-to-right and 1 for right-to-left.")
850 (ch)
851 Lisp_Object ch;
853 int charset;
855 CHECK_NUMBER (ch, 0);
856 charset = CHAR_CHARSET (XFASTINT (ch));
857 if (!CHARSET_DEFINED_P (charset))
858 error ("Invalid character: %d", XINT (ch));
859 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
862 DEFUN ("chars-in-string", Fchars_in_string, Schars_in_string, 1, 1, 0,
863 "Return number of characters in STRING.")
864 (str)
865 Lisp_Object str;
867 Lisp_Object val;
868 unsigned char *p, *endp;
869 int chars;
871 CHECK_STRING (str, 0);
873 p = XSTRING (str)->data; endp = p + XSTRING (str)->size;
874 chars = 0;
875 while (p < endp)
877 if (*p == LEADING_CODE_COMPOSITION)
879 p++;
880 while (p < endp && ! CHAR_HEAD_P (p)) p++;
882 else
883 p += BYTES_BY_CHAR_HEAD (*p);
884 chars++;
887 XSETFASTINT (val, chars);
888 return val;
891 DEFUN ("char-boundary-p", Fchar_boundary_p, Schar_boundary_p, 1, 1, 0,
892 "Return non-nil value if POS is at character boundary of multibyte form.\n\
893 The return value is:\n\
894 0 if POS is at an ASCII character or at the end of range,\n\
895 1 if POS is at a head of 2-byte length multi-byte form,\n\
896 2 if POS is at a head of 3-byte length multi-byte form,\n\
897 3 if POS is at a head of 4-byte length multi-byte form,\n\
898 4 if POS is at a head of multi-byte form of a composite character.\n\
899 If POS is out of range or not at character boundary, return NIL.")
900 (pos)
901 Lisp_Object pos;
903 Lisp_Object val;
904 int n;
906 CHECK_NUMBER_COERCE_MARKER (pos, 0);
908 n = XINT (pos);
909 if (n < BEGV || n > ZV)
910 return Qnil;
912 if (n == ZV || NILP (current_buffer->enable_multibyte_characters))
913 XSETFASTINT (val, 0);
914 else
916 unsigned char *p = POS_ADDR (n);
918 if (SINGLE_BYTE_CHAR_P (*p))
919 XSETFASTINT (val, 0);
920 else if (*p == LEADING_CODE_COMPOSITION)
921 XSETFASTINT (val, 4);
922 else if (BYTES_BY_CHAR_HEAD (*p) > 1)
923 XSETFASTINT (val, BYTES_BY_CHAR_HEAD (*p) - 1);
924 else
925 val = Qnil;
927 return val;
930 DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0,
931 "Concatenate all the argument characters and make the result a string.")
932 (n, args)
933 int n;
934 Lisp_Object *args;
936 int i;
937 unsigned char *buf
938 = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
939 unsigned char *p = buf;
940 Lisp_Object val;
942 for (i = 0; i < n; i++)
944 int c, len;
945 unsigned char *str;
947 if (!INTEGERP (args[i]))
949 free (buf);
950 CHECK_NUMBER (args[i], 0);
952 c = XINT (args[i]);
953 len = CHAR_STRING (c, p, str);
954 if (p != str)
955 /* C is a composite character. */
956 bcopy (str, p, len);
957 p += len;
960 val = make_string (buf, p - buf);
961 return val;
964 #endif /* emacs */
966 /*** Composite characters staffs ***/
968 /* Each composite character is identified by CMPCHAR-ID which is
969 assigned when Emacs needs the character code of the composite
970 character (e.g. when displaying it on the screen). See the
971 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
972 composite character is represented in Emacs. */
974 /* If `static' is defined, it means that it is defined to null string. */
975 #ifndef static
976 /* The following function is copied from lread.c. */
977 static int
978 hash_string (ptr, len)
979 unsigned char *ptr;
980 int len;
982 register unsigned char *p = ptr;
983 register unsigned char *end = p + len;
984 register unsigned char c;
985 register int hash = 0;
987 while (p != end)
989 c = *p++;
990 if (c >= 0140) c -= 40;
991 hash = ((hash<<3) + (hash>>28) + c);
993 return hash & 07777777777;
995 #endif
997 #define CMPCHAR_HASH_TABLE_SIZE 0xFFF
999 static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
1001 /* Each element of `cmpchar_hash_table' is a pointer to an array of
1002 integer, where the 1st element is the size of the array, the 2nd
1003 element is how many elements are actually used in the array, and
1004 the remaining elements are CMPCHAR-IDs of composite characters of
1005 the same hash value. */
1006 #define CMPCHAR_HASH_SIZE(table) table[0]
1007 #define CMPCHAR_HASH_USED(table) table[1]
1008 #define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
1010 /* Return CMPCHAR-ID of the composite character in STR of the length
1011 LEN. If the composite character has not yet been registered,
1012 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
1013 is the sole function for assigning CMPCHAR-ID. */
1015 str_cmpchar_id (str, len)
1016 unsigned char *str;
1017 int len;
1019 int hash_idx, *hashp;
1020 unsigned char *buf;
1021 int embedded_rule; /* 1 if composition rule is embedded. */
1022 int chars; /* number of components. */
1023 int i;
1024 struct cmpchar_info *cmpcharp;
1026 if (len < 5)
1027 /* Any composite char have at least 3-byte length. */
1028 return -1;
1030 /* The second byte 0xFF means compostion rule is embedded. */
1031 embedded_rule = (str[1] == 0xFF);
1033 /* At first, get the actual length of the composite character. */
1035 unsigned char *p, *endp = str + 1, *lastp = str + len;
1036 int bytes;
1038 while (endp < lastp && ! CHAR_HEAD_P (endp)) endp++;
1039 chars = 0;
1040 p = str + 1 + embedded_rule;
1041 while (p < endp)
1043 /* No need of checking if *P is 0xA0 because
1044 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
1045 p += (bytes = BYTES_BY_CHAR_HEAD (*p - 0x20) + embedded_rule);
1046 chars++;
1048 len = (p -= embedded_rule) - str;
1049 if (p > endp)
1050 len -= - bytes, chars--;
1052 if (chars < 2 || chars > MAX_COMPONENT_COUNT)
1053 /* Invalid number of components. */
1054 return -1;
1056 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
1057 hashp = cmpchar_hash_table[hash_idx];
1059 /* Then, look into the hash table. */
1060 if (hashp != NULL)
1061 /* Find the correct one among composite characters of the same
1062 hash value. */
1063 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
1065 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
1066 if (len == cmpcharp->len
1067 && ! bcmp (str, cmpcharp->data, len))
1068 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
1071 /* We have to register the composite character in cmpchar_table. */
1072 if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
1073 /* No, we have no more room for a new composite character. */
1074 return -1;
1076 /* Make the entry in hash table. */
1077 if (hashp == NULL)
1079 /* Make a table for 8 composite characters initially. */
1080 hashp = (cmpchar_hash_table[hash_idx]
1081 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1082 CMPCHAR_HASH_SIZE (hashp) = 10;
1083 CMPCHAR_HASH_USED (hashp) = 2;
1085 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1087 CMPCHAR_HASH_SIZE (hashp) += 8;
1088 hashp = (cmpchar_hash_table[hash_idx]
1089 = (int *) xrealloc (hashp,
1090 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1092 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1093 CMPCHAR_HASH_USED (hashp)++;
1095 /* Set information of the composite character in cmpchar_table. */
1096 if (cmpchar_table_size == 0)
1098 /* This is the first composite character to be registered. */
1099 cmpchar_table_size = 256;
1100 cmpchar_table
1101 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1102 * cmpchar_table_size);
1104 else if (cmpchar_table_size <= n_cmpchars)
1106 cmpchar_table_size += 256;
1107 cmpchar_table
1108 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1109 sizeof (cmpchar_table[0])
1110 * cmpchar_table_size);
1113 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1115 cmpcharp->len = len;
1116 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1117 bcopy (str, cmpcharp->data, len);
1118 cmpcharp->data[len] = 0;
1119 cmpcharp->glyph_len = chars;
1120 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1121 if (embedded_rule)
1123 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1124 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1126 else
1128 cmpcharp->cmp_rule = NULL;
1129 cmpcharp->col_offset = NULL;
1132 /* Setup GLYPH data and composition rules (if any) so as not to make
1133 them every time on displaying. */
1135 unsigned char *bufp;
1136 int width;
1137 float leftmost = 0.0, rightmost = 1.0;
1139 if (embedded_rule)
1140 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1141 cmpcharp->col_offset[0] = 0;
1143 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1145 if (embedded_rule)
1146 cmpcharp->cmp_rule[i] = *bufp++;
1148 if (*bufp == 0xA0) /* This is an ASCII character. */
1150 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1151 width = 1;
1152 bufp++;
1154 else /* Multibyte character. */
1156 /* Make `bufp' point normal multi-byte form temporally. */
1157 *bufp -= 0x20;
1158 cmpcharp->glyph[i]
1159 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1160 width = WIDTH_BY_CHAR_HEAD (*bufp);
1161 *bufp += 0x20;
1162 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1165 if (embedded_rule && i > 0)
1167 /* Reference points (global_ref and new_ref) are
1168 encoded as below:
1170 0--1--2 -- ascent
1173 | 4 -+--- center
1174 -- 3 5 -- baseline
1176 6--7--8 -- descent
1178 Now, we calculate the column offset of the new glyph
1179 from the left edge of the first glyph. This can avoid
1180 the same calculation everytime displaying this
1181 composite character. */
1183 /* Reference points of global glyph and new glyph. */
1184 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1185 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1186 /* Column offset relative to the first glyph. */
1187 float left = (leftmost
1188 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1189 - (new_ref % 3) * width / 2.0);
1191 cmpcharp->col_offset[i] = left;
1192 if (left < leftmost)
1193 leftmost = left;
1194 if (left + width > rightmost)
1195 rightmost = left + width;
1197 else
1199 if (width > rightmost)
1200 rightmost = width;
1203 if (embedded_rule)
1205 /* Now col_offset[N] are relative to the left edge of the
1206 first component. Make them relative to the left edge of
1207 overall glyph. */
1208 for (i = 0; i < chars; i++)
1209 cmpcharp->col_offset[i] -= leftmost;
1210 /* Make rightmost holds width of overall glyph. */
1211 rightmost -= leftmost;
1214 cmpcharp->width = rightmost;
1215 if (cmpcharp->width < rightmost)
1216 /* To get a ceiling integer value. */
1217 cmpcharp->width++;
1220 cmpchar_table[n_cmpchars] = cmpcharp;
1222 return n_cmpchars++;
1225 /* Return the Nth element of the composite character C. */
1227 cmpchar_component (c, n)
1228 unsigned int c, n;
1230 int id = COMPOSITE_CHAR_ID (c);
1232 if (id >= n_cmpchars /* C is not a valid composite character. */
1233 || n >= cmpchar_table[id]->glyph_len) /* No such component. */
1234 return -1;
1235 /* No face data is stored in glyph code. */
1236 return ((int) (cmpchar_table[id]->glyph[n]));
1239 DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1240 "T if CHAR is a composite character.")
1241 (ch)
1242 Lisp_Object ch;
1244 CHECK_NUMBER (ch, 0);
1245 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1248 DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1249 2, 2, 0,
1250 "Return the IDXth component character of composite character CHARACTER.")
1251 (character, idx)
1252 Lisp_Object character, idx;
1254 int c;
1256 CHECK_NUMBER (character, 0);
1257 CHECK_NUMBER (idx, 1);
1259 if ((c = cmpchar_component (XINT (character), XINT (idx))) < 0)
1260 args_out_of_range (character, idx);
1262 return make_number (c);
1265 DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1266 2, 2, 0,
1267 "Return the Nth composition rule embedded in composite character CHARACTER.\n\
1268 The returned rule is for composing the Nth component\n\
1269 on the (N-1)th component. If N is 0, the returned value is always 255.")
1270 (character, n)
1271 Lisp_Object character, n;
1273 int id, i;
1275 CHECK_NUMBER (character, 0);
1276 CHECK_NUMBER (n, 1);
1278 id = COMPOSITE_CHAR_ID (XINT (character));
1279 if (id < 0 || id >= n_cmpchars)
1280 error ("Invalid composite character: %d", XINT (character));
1281 i = XINT (n);
1282 if (i > cmpchar_table[id]->glyph_len)
1283 args_out_of_range (character, n);
1285 return make_number (cmpchar_table[id]->cmp_rule[i]);
1288 DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1289 Scmpchar_cmp_rule_p, 1, 1, 0,
1290 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1291 (character)
1292 Lisp_Object character;
1294 int id;
1296 CHECK_NUMBER (character, 0);
1297 id = COMPOSITE_CHAR_ID (XINT (character));
1298 if (id < 0 || id >= n_cmpchars)
1299 error ("Invalid composite character: %d", XINT (character));
1301 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1304 DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1305 Scmpchar_cmp_count, 1, 1, 0,
1306 "Return number of compoents of composite character CHARACTER.")
1307 (character)
1308 Lisp_Object character;
1310 int id;
1312 CHECK_NUMBER (character, 0);
1313 id = COMPOSITE_CHAR_ID (XINT (character));
1314 if (id < 0 || id >= n_cmpchars)
1315 error ("Invalid composite character: %d", XINT (character));
1317 return (make_number (cmpchar_table[id]->glyph_len));
1320 DEFUN ("compose-string", Fcompose_string, Scompose_string,
1321 1, 1, 0,
1322 "Return one char string composed from all characters in STRING.")
1323 (str)
1324 Lisp_Object str;
1326 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1327 int len, i;
1329 CHECK_STRING (str, 0);
1331 buf[0] = LEADING_CODE_COMPOSITION;
1332 p = XSTRING (str)->data;
1333 pend = p + XSTRING (str)->size;
1334 i = 1;
1335 while (p < pend)
1337 if (*p < 0x20 || *p == 127) /* control code */
1338 error ("Invalid component character: %d", *p);
1339 else if (*p < 0x80) /* ASCII */
1341 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1342 error ("Too long string to be composed: %s", XSTRING (str)->data);
1343 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1344 code itself. */
1345 buf[i++] = 0xA0;
1346 buf[i++] = *p++ + 0x80;
1348 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1350 /* Already composed. Eliminate the heading
1351 LEADING_CODE_COMPOSITION, keep the remaining bytes
1352 unchanged. */
1353 p++;
1354 ptemp = p;
1355 while (! CHAR_HEAD_P (p)) p++;
1356 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1357 error ("Too long string to be composed: %s", XSTRING (str)->data);
1358 bcopy (ptemp, buf + i, p - ptemp);
1359 i += p - ptemp;
1361 else /* multibyte char */
1363 /* Add 0x20 to the base leading-code, keep the remaining
1364 bytes unchanged. */
1365 len = BYTES_BY_CHAR_HEAD (*p);
1366 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1367 error ("Too long string to be composed: %s", XSTRING (str)->data);
1368 bcopy (p, buf + i, len);
1369 buf[i] += 0x20;
1370 p += len, i += len;
1374 if (i < 5)
1375 /* STR contains only one character, which can't be composed. */
1376 error ("Too short string to be composed: %s", XSTRING (str)->data);
1378 return make_string (buf, i);
1382 charset_id_internal (charset_name)
1383 char *charset_name;
1385 Lisp_Object val = Fget (intern (charset_name), Qcharset);
1387 if (!VECTORP (val))
1388 error ("Charset %s is not defined", charset_name);
1390 return (XINT (XVECTOR (val)->contents[0]));
1393 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1394 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1397 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1398 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1399 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1400 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1401 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1402 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1403 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1404 return Qnil;
1407 init_charset_once ()
1409 int i, j, k;
1411 staticpro (&Vcharset_table);
1412 staticpro (&Vcharset_symbol_table);
1414 /* This has to be done here, before we call Fmake_char_table. */
1415 Qcharset_table = intern ("charset-table");
1416 staticpro (&Qcharset_table);
1418 /* Intern this now in case it isn't already done.
1419 Setting this variable twice is harmless.
1420 But don't staticpro it here--that is done in alloc.c. */
1421 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1423 /* Now we are ready to set up this property, so we can
1424 create the charset table. */
1425 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1426 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1428 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil);
1430 /* Setup tables. */
1431 for (i = 0; i < 2; i++)
1432 for (j = 0; j < 2; j++)
1433 for (k = 0; k < 128; k++)
1434 iso_charset_table [i][j][k] = -1;
1436 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1437 cmpchar_table_size = n_cmpchars = 0;
1439 for (i = 0; i < 256; i++)
1440 BYTES_BY_CHAR_HEAD (i) = 1;
1441 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1442 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1443 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1444 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
1445 /* The following doesn't reflect the actual bytes, but just to tell
1446 that it is a start of a multibyte character. */
1447 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
1449 for (i = 0; i < 128; i++)
1450 WIDTH_BY_CHAR_HEAD (i) = 1;
1451 for (; i < 256; i++)
1452 WIDTH_BY_CHAR_HEAD (i) = 4;
1453 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1454 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1455 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1456 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
1459 #ifdef emacs
1461 syms_of_charset ()
1463 Qascii = intern ("ascii");
1464 staticpro (&Qascii);
1466 Qcharset = intern ("charset");
1467 staticpro (&Qcharset);
1469 /* Define ASCII charset now. */
1470 update_charset_table (make_number (CHARSET_ASCII),
1471 make_number (1), make_number (94),
1472 make_number (1),
1473 make_number (0),
1474 make_number ('B'),
1475 make_number (0),
1476 build_string ("ASCII"),
1477 build_string ("ASCII"),
1478 build_string ("ASCII (ISO646 IRV)"));
1479 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1480 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1482 Qcomposition = intern ("composition");
1483 staticpro (&Qcomposition);
1484 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1486 defsubr (&Sdefine_charset);
1487 defsubr (&Sdeclare_equiv_charset);
1488 defsubr (&Sfind_charset_region);
1489 defsubr (&Sfind_charset_string);
1490 defsubr (&Smake_char_internal);
1491 defsubr (&Ssplit_char);
1492 defsubr (&Schar_charset);
1493 defsubr (&Siso_charset);
1494 defsubr (&Schar_bytes);
1495 defsubr (&Schar_width);
1496 defsubr (&Sstring_width);
1497 defsubr (&Schar_direction);
1498 defsubr (&Schars_in_string);
1499 defsubr (&Schar_boundary_p);
1500 defsubr (&Sconcat_chars);
1501 defsubr (&Scmpcharp);
1502 defsubr (&Scmpchar_component);
1503 defsubr (&Scmpchar_cmp_rule);
1504 defsubr (&Scmpchar_cmp_rule_p);
1505 defsubr (&Scmpchar_cmp_count);
1506 defsubr (&Scompose_string);
1507 defsubr (&Ssetup_special_charsets);
1509 DEFVAR_LISP ("charset-list", &Vcharset_list,
1510 "List of charsets ever defined.");
1511 Vcharset_list = Fcons (Qascii, Qnil);
1513 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
1514 "Leading-code of composite characters.");
1515 leading_code_composition = LEADING_CODE_COMPOSITION;
1517 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1518 "Leading-code of private TYPE9N charset of column-width 1.");
1519 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1521 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1522 "Leading-code of private TYPE9N charset of column-width 2.");
1523 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1525 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1526 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1527 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1529 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1530 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1531 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1534 #endif /* emacs */