(Fmake_temp_name): forgot the \n\ in the docstring
[emacs.git] / src / charset.c
blob61fdcf66ef56f39aa82293bf9409055ef59e36d6
1 /* Basic multilingual character support.
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* At first, see the document in `charset.h' to understand the code in
23 this file. */
25 #include <stdio.h>
27 #ifdef emacs
29 #include <sys/types.h>
30 #include <config.h>
31 #include "lisp.h"
32 #include "buffer.h"
33 #include "charset.h"
34 #include "coding.h"
35 #include "disptab.h"
37 #else /* not emacs */
39 #include "mulelib.h"
41 #endif /* emacs */
43 Lisp_Object Qcharset, Qascii, Qcomposition;
44 Lisp_Object Qunknown;
46 /* Declaration of special leading-codes. */
47 int leading_code_composition; /* for composite characters */
48 int leading_code_private_11; /* for private DIMENSION1 of 1-column */
49 int leading_code_private_12; /* for private DIMENSION1 of 2-column */
50 int leading_code_private_21; /* for private DIMENSION2 of 1-column */
51 int leading_code_private_22; /* for private DIMENSION2 of 2-column */
53 /* Declaration of special charsets. */
54 int charset_ascii; /* ASCII */
55 int charset_composition; /* for a composite character */
56 int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
57 int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
58 int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
59 int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
60 int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
61 int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
62 int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
64 int min_composite_char;
66 Lisp_Object Qcharset_table;
68 /* A char-table containing information of each character set. */
69 Lisp_Object Vcharset_table;
71 /* A vector of charset symbol indexed by charset-id. This is used
72 only for returning charset symbol from C functions. */
73 Lisp_Object Vcharset_symbol_table;
75 /* A list of charset symbols ever defined. */
76 Lisp_Object Vcharset_list;
78 /* Vector of translation table ever defined.
79 ID of a translation table is used to index this vector. */
80 Lisp_Object Vtranslation_table_vector;
82 /* A char-table for characters which may invoke auto-filling. */
83 Lisp_Object Vauto_fill_chars;
85 Lisp_Object Qauto_fill_chars;
87 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
88 int bytes_by_char_head[256];
89 int width_by_char_head[256];
91 /* Mapping table from ISO2022's charset (specified by DIMENSION,
92 CHARS, and FINAL-CHAR) to Emacs' charset. */
93 int iso_charset_table[2][2][128];
95 /* Table of pointers to the structure `cmpchar_info' indexed by
96 CMPCHAR-ID. */
97 struct cmpchar_info **cmpchar_table;
98 /* The current size of `cmpchar_table'. */
99 static int cmpchar_table_size;
100 /* Number of the current composite characters. */
101 int n_cmpchars;
103 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
104 unsigned char *_fetch_multibyte_char_p;
105 int _fetch_multibyte_char_len;
107 /* Offset to add to a non-ASCII value when inserting it. */
108 int nonascii_insert_offset;
110 /* Translation table for converting non-ASCII unibyte characters
111 to multibyte codes, or nil. */
112 Lisp_Object Vnonascii_translation_table;
114 /* List of all possible generic characters. */
115 Lisp_Object Vgeneric_character_list;
117 #define min(X, Y) ((X) < (Y) ? (X) : (Y))
118 #define max(X, Y) ((X) > (Y) ? (X) : (Y))
120 void
121 invalid_character (c)
122 int c;
124 error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
127 /* Parse composite character string STR of length LENGTH (>= 2) and
128 set BYTES, CHARSET, C1, and C2 as below.
130 It is assumed that *STR is LEADING_CODE_COMPOSITION and the
131 following (LENGTH - 1) bytes satisfy !CHAR_HEAD_P.
133 If there is a valid composite character, set CHARSET, C1, and C2 to
134 such values that MAKE_CHAR can make the composite character from
135 them. Otherwise, set CHARSET to CHARSET_COMPOSITION, set C1 to the
136 second byte of the sequence, C2 to -1 so that MAKE_CHAR can make
137 the invalid multibyte character whose string representation is two
138 bytes of STR[0] and STR[1]. In any case, set BYTES to LENGTH.
140 This macro should be called only from SPLIT_MULTIBYTE_SEQ. */
142 #define SPLIT_COMPOSITE_SEQ(str, length, bytes, charset, c1, c2) \
143 do { \
144 int cmpchar_id = str_cmpchar_id ((str), (length)); \
146 (charset) = CHARSET_COMPOSITION; \
147 (bytes) = (length); \
148 if (cmpchar_id >= 0) \
150 (c1) = CHAR_FIELD2 (cmpchar_id); \
151 (c2) = CHAR_FIELD3 (cmpchar_id); \
153 else \
155 (c1) = (str)[1] & 0x7F; \
156 (c2) = -1; \
158 } while (0)
160 /* Parse non-composite multibyte character string STR of length LENGTH
161 (>= 2) and set BYTES to the length of actual multibyte sequence,
162 CHARSET, C1, and C2 to such values that MAKE_CHAR can make the
163 multibyte character from them.
165 It is assumed that *STR is one of base leading codes (excluding
166 LEADING_CODE_COMPOSITION) and the following (LENGTH - 1) bytes
167 satisfy !CHAR_HEAD_P.
169 This macro should be called only from SPLIT_MULTIBYTE_SEQ. */
171 #define SPLIT_CHARACTER_SEQ(str, length, bytes, charset, c1, c2) \
172 do { \
173 (bytes) = 1; \
174 (charset) = (str)[0]; \
175 if ((charset) >= LEADING_CODE_PRIVATE_11 \
176 && (charset) <= LEADING_CODE_PRIVATE_22) \
177 (charset) = (str)[(bytes)++]; \
178 if ((bytes) < (length)) \
180 (c1) = (str)[(bytes)++] & 0x7F; \
181 if ((bytes) < (length)) \
182 (c2) = (str)[(bytes)++] & 0x7F; \
183 else \
184 (c2) = -1; \
186 else \
187 (c1) = (c2) = -1; \
188 } while (0)
190 /* Parse string STR of length LENGTH and check if a multibyte
191 characters is at STR. set BYTES to the actual length, CHARSET, C1,
192 C2 to proper values for that character. */
194 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
195 do { \
196 int i; \
197 if (ASCII_BYTE_P ((str)[0])) \
198 i = 1; \
199 else \
200 for (i = 1; i < (length) && ! CHAR_HEAD_P ((str)[i]); i++); \
201 if (i == 1) \
202 (bytes) = 1, (charset) = CHARSET_ASCII, (c1) = (str)[0] ; \
203 else if ((str)[0] == LEADING_CODE_COMPOSITION) \
204 SPLIT_COMPOSITE_SEQ (str, i, bytes, charset, c1, c2); \
205 else \
207 if (i > BYTES_BY_CHAR_HEAD ((str)[0])) \
208 i = BYTES_BY_CHAR_HEAD ((str)[0]); \
209 SPLIT_CHARACTER_SEQ (str, i, bytes, charset, c1, c2); \
211 } while (0)
213 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0. */
214 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \
215 (charset == CHARSET_ASCII \
216 ? ((c1) >= 0 && (c1) <= 0x7F) \
217 : (CHARSET_DIMENSION (charset) == 1 \
218 ? ((c1) >= 0x20 && (c1) <= 0x7F) \
219 : ((c1) >= 0x20 && (c1) <= 0x7F && (c2) >= 0x20 && (c2) <= 0x7F)))
221 /* Set STR a pointer to the multi-byte form of the character C. If C
222 is not a composite character, the multi-byte form is set in WORKBUF
223 and STR points WORKBUF. The caller should allocate at least 4-byte
224 area at WORKBUF in advance. Returns the length of the multi-byte
225 form. If C is an invalid character, store (C & 0xFF) in WORKBUF[0]
226 and return 1.
228 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
229 function directly if C can be an ASCII character. */
232 non_ascii_char_to_string (c, workbuf, str)
233 int c;
234 unsigned char *workbuf, **str;
236 if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */
238 /* Multibyte character can't have a modifier bit. */
239 if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
240 invalid_character (c);
242 /* For Meta, Shift, and Control modifiers, we need special care. */
243 if (c & CHAR_META)
245 /* Move the meta bit to the right place for a string. */
246 c = (c & ~CHAR_META) | 0x80;
248 if (c & CHAR_SHIFT)
250 /* Shift modifier is valid only with [A-Za-z]. */
251 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
252 c &= ~CHAR_SHIFT;
253 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
254 c = (c & ~CHAR_SHIFT) - ('a' - 'A');
256 if (c & CHAR_CTL)
258 /* Simulate the code in lread.c. */
259 /* Allow `\C- ' and `\C-?'. */
260 if (c == (CHAR_CTL | ' '))
261 c = 0;
262 else if (c == (CHAR_CTL | '?'))
263 c = 127;
264 /* ASCII control chars are made from letters (both cases),
265 as well as the non-letters within 0100...0137. */
266 else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
267 c &= (037 | (~0177 & ~CHAR_CTL));
268 else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
269 c &= (037 | (~0177 & ~CHAR_CTL));
272 /* If C still has any modifier bits, it is an invalid character. */
273 if (c & CHAR_MODIFIER_MASK)
274 invalid_character (c);
276 *str = workbuf;
277 *workbuf++ = c;
279 else
281 int charset, c1, c2;
283 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
284 if (charset == CHARSET_COMPOSITION)
286 if (c >= MAX_CHAR)
287 invalid_character (c);
288 if (c >= MIN_CHAR_COMPOSITION)
290 /* Valid composite character. */
291 *str = cmpchar_table[COMPOSITE_CHAR_ID (c)]->data;
292 workbuf = *str + cmpchar_table[COMPOSITE_CHAR_ID (c)]->len;
294 else
296 /* Invalid but can have multibyte form. */
297 *str = workbuf;
298 *workbuf++ = LEADING_CODE_COMPOSITION;
299 *workbuf++ = c1 | 0x80;
302 else if (charset > CHARSET_COMPOSITION)
304 *str = workbuf;
305 if (charset >= LEADING_CODE_EXT_11)
306 *workbuf++ = (charset < LEADING_CODE_EXT_12
307 ? LEADING_CODE_PRIVATE_11
308 : (charset < LEADING_CODE_EXT_21
309 ? LEADING_CODE_PRIVATE_12
310 : (charset < LEADING_CODE_EXT_22
311 ? LEADING_CODE_PRIVATE_21
312 : LEADING_CODE_PRIVATE_22)));
313 *workbuf++ = charset;
314 if (c1 > 0 && c1 < 32 || c2 > 0 && c2 < 32)
315 invalid_character (c);
316 if (c1)
318 *workbuf++ = c1 | 0x80;
319 if (c2 > 0)
320 *workbuf++ = c2 | 0x80;
323 else if (charset == CHARSET_ASCII)
324 *workbuf++= c & 0x7F;
325 else
326 invalid_character (c);
329 return (workbuf - *str);
332 /* Return the non-ASCII character corresponding to multi-byte form at
333 STR of length LEN. If ACTUAL_LEN is not NULL, store the byte
334 length of the multibyte form in *ACTUAL_LEN.
336 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
337 directly if you want ot handle ASCII characters as well. */
340 string_to_non_ascii_char (str, len, actual_len)
341 const unsigned char *str;
342 int len, *actual_len;
344 int c, bytes, charset, c1, c2;
346 SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
347 c = MAKE_CHAR (charset, c1, c2);
348 if (actual_len)
349 *actual_len = bytes;
350 return c;
353 /* Return the length of the multi-byte form at string STR of length LEN.
354 Use the macro MULTIBYTE_FORM_LENGTH instead. */
356 multibyte_form_length (str, len)
357 const unsigned char *str;
358 int len;
360 int bytes;
362 PARSE_MULTIBYTE_SEQ (str, len, bytes);
363 return bytes;
366 /* Check multibyte form at string STR of length LEN and set variables
367 pointed by CHARSET, C1, and C2 to charset and position codes of the
368 character at STR, and return 0. If there's no multibyte character,
369 return -1. This should be used only in the macro SPLIT_STRING
370 which checks range of STR in advance. */
373 split_non_ascii_string (str, len, charset, c1, c2)
374 const unsigned char *str;
375 unsigned char *c1, *c2;
376 int len, *charset;
378 register int bytes, cs, code1, code2 = -1;
380 SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
381 if (cs == CHARSET_ASCII)
382 return -1;
383 *charset = cs;
384 *c1 = code1;
385 *c2 = code2;
388 /* Return 1 iff character C has valid printable glyph.
389 Use the macro CHAR_PRINTABLE_P instead. */
391 char_printable_p (c)
392 int c;
394 int charset, c1, c2, chars;
396 if (SINGLE_BYTE_CHAR_P (c))
397 return 1;
398 if (c >= MIN_CHAR_COMPOSITION)
399 return (c < MAX_CHAR);
401 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
402 if (! CHARSET_DEFINED_P (charset))
403 return 0;
404 if (CHARSET_CHARS (charset) == 94
405 ? c1 <= 32 || c1 >= 127
406 : c1 < 32)
407 return 0;
408 if (CHARSET_DIMENSION (charset) == 2
409 && (CHARSET_CHARS (charset) == 94
410 ? c2 <= 32 || c2 >= 127
411 : c2 < 32))
412 return 0;
413 return 1;
416 /* Translate character C by translation table TABLE. If C
417 is negative, translate a character specified by CHARSET, C1, and C2
418 (C1 and C2 are code points of the character). If no translation is
419 found in TABLE, return C. */
421 translate_char (table, c, charset, c1, c2)
422 Lisp_Object table;
423 int c, charset, c1, c2;
425 Lisp_Object ch;
426 int alt_charset, alt_c1, alt_c2, dimension;
428 if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
429 if (!CHAR_TABLE_P (table)
430 || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
431 return c;
433 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
434 dimension = CHARSET_DIMENSION (alt_charset);
435 if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
436 /* CH is not a generic character, just return it. */
437 return XFASTINT (ch);
439 /* Since CH is a generic character, we must return a specific
440 charater which has the same position codes as C from CH. */
441 if (charset < 0)
442 SPLIT_CHAR (c, charset, c1, c2);
443 if (dimension != CHARSET_DIMENSION (charset))
444 /* We can't make such a character because of dimension mismatch. */
445 return c;
446 return MAKE_CHAR (alt_charset, c1, c2);
449 /* Convert the unibyte character C to multibyte based on
450 Vnonascii_translation_table or nonascii_insert_offset. If they can't
451 convert C to a valid multibyte character, convert it based on
452 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
455 unibyte_char_to_multibyte (c)
456 int c;
458 if (c < 0400 && c >= 0200)
460 int c_save = c;
462 if (! NILP (Vnonascii_translation_table))
464 c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
465 if (c >= 0400 && ! char_valid_p (c, 0))
466 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
468 else if (c >= 0240 && nonascii_insert_offset > 0)
470 c += nonascii_insert_offset;
471 if (c < 0400 || ! char_valid_p (c, 0))
472 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
474 else if (c >= 0240)
475 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
477 return c;
481 /* Convert the multibyte character C to unibyte 8-bit character based
482 on Vnonascii_translation_table or nonascii_insert_offset. If
483 REV_TBL is non-nil, it should be a reverse table of
484 Vnonascii_translation_table, i.e. what given by:
485 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
488 multibyte_char_to_unibyte (c, rev_tbl)
489 int c;
490 Lisp_Object rev_tbl;
492 if (!SINGLE_BYTE_CHAR_P (c))
494 int c_save = c;
496 if (! CHAR_TABLE_P (rev_tbl)
497 && CHAR_TABLE_P (Vnonascii_translation_table))
498 rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
499 make_number (0));
500 if (CHAR_TABLE_P (rev_tbl))
502 Lisp_Object temp;
503 temp = Faref (rev_tbl, make_number (c));
504 if (INTEGERP (temp))
505 c = XINT (temp);
506 if (c >= 256)
507 c = (c_save & 0177) + 0200;
509 else
511 if (nonascii_insert_offset > 0)
512 c -= nonascii_insert_offset;
513 if (c < 128 || c >= 256)
514 c = (c_save & 0177) + 0200;
518 return c;
522 /* Update the table Vcharset_table with the given arguments (see the
523 document of `define-charset' for the meaning of each argument).
524 Several other table contents are also updated. The caller should
525 check the validity of CHARSET-ID and the remaining arguments in
526 advance. */
528 void
529 update_charset_table (charset_id, dimension, chars, width, direction,
530 iso_final_char, iso_graphic_plane,
531 short_name, long_name, description)
532 Lisp_Object charset_id, dimension, chars, width, direction;
533 Lisp_Object iso_final_char, iso_graphic_plane;
534 Lisp_Object short_name, long_name, description;
536 int charset = XINT (charset_id);
537 int bytes;
538 unsigned char leading_code_base, leading_code_ext;
540 if (NILP (CHARSET_TABLE_ENTRY (charset)))
541 CHARSET_TABLE_ENTRY (charset)
542 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
544 /* Get byte length of multibyte form, base leading-code, and
545 extended leading-code of the charset. See the comment under the
546 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
547 bytes = XINT (dimension);
548 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
550 /* Official charset, it doesn't have an extended leading-code. */
551 if (charset != CHARSET_ASCII)
552 bytes += 1; /* For a base leading-code. */
553 leading_code_base = charset;
554 leading_code_ext = 0;
556 else
558 /* Private charset. */
559 bytes += 2; /* For base and extended leading-codes. */
560 leading_code_base
561 = (charset < LEADING_CODE_EXT_12
562 ? LEADING_CODE_PRIVATE_11
563 : (charset < LEADING_CODE_EXT_21
564 ? LEADING_CODE_PRIVATE_12
565 : (charset < LEADING_CODE_EXT_22
566 ? LEADING_CODE_PRIVATE_21
567 : LEADING_CODE_PRIVATE_22)));
568 leading_code_ext = charset;
571 if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
572 error ("Invalid dimension for the charset-ID %d", charset);
574 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
575 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
576 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
577 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
578 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
579 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
580 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
581 = make_number (leading_code_base);
582 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
583 = make_number (leading_code_ext);
584 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
585 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
586 = iso_graphic_plane;
587 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
588 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
589 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
590 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
593 /* If we have already defined a charset which has the same
594 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
595 DIRECTION, we must update the entry REVERSE-CHARSET of both
596 charsets. If there's no such charset, the value of the entry
597 is set to nil. */
598 int i;
600 for (i = 0; i <= MAX_CHARSET; i++)
601 if (!NILP (CHARSET_TABLE_ENTRY (i)))
603 if (CHARSET_DIMENSION (i) == XINT (dimension)
604 && CHARSET_CHARS (i) == XINT (chars)
605 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
606 && CHARSET_DIRECTION (i) != XINT (direction))
608 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
609 = make_number (i);
610 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
611 break;
614 if (i > MAX_CHARSET)
615 /* No such a charset. */
616 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
617 = make_number (-1);
620 if (charset != CHARSET_ASCII
621 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
623 width_by_char_head[leading_code_base] = XINT (width);
625 /* Update table emacs_code_class. */
626 emacs_code_class[charset] = (bytes == 2
627 ? EMACS_leading_code_2
628 : (bytes == 3
629 ? EMACS_leading_code_3
630 : EMACS_leading_code_4));
633 /* Update table iso_charset_table. */
634 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
635 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
638 #ifdef emacs
640 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
641 is invalid. */
643 get_charset_id (charset_symbol)
644 Lisp_Object charset_symbol;
646 Lisp_Object val;
647 int charset;
649 return ((SYMBOLP (charset_symbol)
650 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
651 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
652 CHARSET_VALID_P (charset)))
653 ? charset : -1);
656 /* Return an identification number for a new private charset of
657 DIMENSION and WIDTH. If there's no more room for the new charset,
658 return 0. */
659 Lisp_Object
660 get_new_private_charset_id (dimension, width)
661 int dimension, width;
663 int charset, from, to;
665 if (dimension == 1)
667 if (width == 1)
668 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
669 else
670 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
672 else
674 if (width == 1)
675 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
676 else
677 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX + 1;
680 for (charset = from; charset < to; charset++)
681 if (!CHARSET_DEFINED_P (charset)) break;
683 return make_number (charset < to ? charset : 0);
686 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
687 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
688 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
689 treated as a private charset.\n\
690 INFO-VECTOR is a vector of the format:\n\
691 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
692 SHORT-NAME LONG-NAME DESCRIPTION]\n\
693 The meanings of each elements is as follows:\n\
694 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
695 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
696 WIDTH (integer) is the number of columns a character in the charset\n\
697 occupies on the screen: one of 0, 1, and 2.\n\
699 DIRECTION (integer) is the rendering direction of characters in the\n\
700 charset when rendering. If 0, render from left to right, else\n\
701 render from right to left.\n\
703 ISO-FINAL-CHAR (character) is the final character of the\n\
704 corresponding ISO 2022 charset.\n\
706 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
707 while encoding to variants of ISO 2022 coding system, one of the\n\
708 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
710 SHORT-NAME (string) is the short name to refer to the charset.\n\
712 LONG-NAME (string) is the long name to refer to the charset.\n\
714 DESCRIPTION (string) is the description string of the charset.")
715 (charset_id, charset_symbol, info_vector)
716 Lisp_Object charset_id, charset_symbol, info_vector;
718 Lisp_Object *vec;
720 if (!NILP (charset_id))
721 CHECK_NUMBER (charset_id, 0);
722 CHECK_SYMBOL (charset_symbol, 1);
723 CHECK_VECTOR (info_vector, 2);
725 if (! NILP (charset_id))
727 if (! CHARSET_VALID_P (XINT (charset_id)))
728 error ("Invalid CHARSET: %d", XINT (charset_id));
729 else if (CHARSET_DEFINED_P (XINT (charset_id)))
730 error ("Already defined charset: %d", XINT (charset_id));
733 vec = XVECTOR (info_vector)->contents;
734 if (XVECTOR (info_vector)->size != 9
735 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
736 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
737 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
738 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
739 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
740 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
741 || !STRINGP (vec[6])
742 || !STRINGP (vec[7])
743 || !STRINGP (vec[8]))
744 error ("Invalid info-vector argument for defining charset %s",
745 XSYMBOL (charset_symbol)->name->data);
747 if (NILP (charset_id))
749 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
750 if (XINT (charset_id) == 0)
751 error ("There's no room for a new private charset %s",
752 XSYMBOL (charset_symbol)->name->data);
755 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
756 vec[4], vec[5], vec[6], vec[7], vec[8]);
757 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
758 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
759 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
760 return Qnil;
763 DEFUN ("generic-character-list", Fgeneric_character_list,
764 Sgeneric_character_list, 0, 0, 0,
765 "Return a list of all possible generic characters.\n\
766 It includes a generic character for a charset not yet defined.")
769 return Vgeneric_character_list;
772 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
773 Sget_unused_iso_final_char, 2, 2, 0,
774 "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
775 DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
776 CHARS is the number of characters in a dimension: 94 or 96.\n\
778 This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
779 If there's no unused final char for the specified kind of charset,\n\
780 return nil.")
781 (dimension, chars)
782 Lisp_Object dimension, chars;
784 int final_char;
786 CHECK_NUMBER (dimension, 0);
787 CHECK_NUMBER (chars, 1);
788 if (XINT (dimension) != 1 && XINT (dimension) != 2)
789 error ("Invalid charset dimension %d, it should be 1 or 2",
790 XINT (dimension));
791 if (XINT (chars) != 94 && XINT (chars) != 96)
792 error ("Invalid charset chars %d, it should be 94 or 96",
793 XINT (chars));
794 for (final_char = '0'; final_char <= '?'; final_char++)
796 if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
797 break;
799 return (final_char <= '?' ? make_number (final_char) : Qnil);
802 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
803 4, 4, 0,
804 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
805 CHARSET should be defined by `defined-charset' in advance.")
806 (dimension, chars, final_char, charset_symbol)
807 Lisp_Object dimension, chars, final_char, charset_symbol;
809 int charset;
811 CHECK_NUMBER (dimension, 0);
812 CHECK_NUMBER (chars, 1);
813 CHECK_NUMBER (final_char, 2);
814 CHECK_SYMBOL (charset_symbol, 3);
816 if (XINT (dimension) != 1 && XINT (dimension) != 2)
817 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
818 if (XINT (chars) != 94 && XINT (chars) != 96)
819 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
820 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
821 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
822 if ((charset = get_charset_id (charset_symbol)) < 0)
823 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
825 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
826 return Qnil;
829 /* Return number of different charsets in STR of length LEN. In
830 addition, for each found charset N, CHARSETS[N] is set 1. The
831 caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance.
832 It may lookup a translation table TABLE if supplied.
834 If CMPCHARP is nonzero and some composite character is found,
835 CHARSETS[128] is also set 1 and the returned number is incremented
836 by 1.
838 If MULTIBYTE is zero, do not check multibyte characters, i.e. if
839 any ASCII codes (7-bit) are found, CHARSET[0] is set to 1, if any
840 8-bit codes are found CHARSET[1] is set to 1. */
843 find_charset_in_str (str, len, charsets, table, cmpcharp, multibyte)
844 unsigned char *str;
845 int len, *charsets;
846 Lisp_Object table;
847 int cmpcharp;
848 int multibyte;
850 register int num = 0, c;
852 if (! multibyte)
854 unsigned char *endp = str + len;
855 int maskbits = 0;
857 while (str < endp && maskbits != 3)
858 maskbits |= (*str++ < 0x80 ? 1 : 2);
859 if (maskbits & 1)
861 charsets[0] = 1;
862 num++;
864 if (maskbits & 2)
866 charsets[1] = 1;
867 num++;
869 return num;
872 if (! CHAR_TABLE_P (table))
873 table = Qnil;
875 while (len > 0)
877 int bytes, charset;
878 c = *str;
880 if (c == LEADING_CODE_COMPOSITION)
882 int cmpchar_id = str_cmpchar_id (str, len);
883 GLYPH *glyph;
885 if (cmpchar_id >= 0)
887 struct cmpchar_info *cmp_p = cmpchar_table[cmpchar_id];
888 int i;
890 for (i = 0; i < cmp_p->glyph_len; i++)
892 c = cmp_p->glyph[i];
893 if (!NILP (table))
895 if ((c = translate_char (table, c, 0, 0, 0)) < 0)
896 c = cmp_p->glyph[i];
898 if ((charset = CHAR_CHARSET (c)) < 0)
899 charset = CHARSET_ASCII;
900 if (!charsets[charset])
902 charsets[charset] = 1;
903 num += 1;
906 str += cmp_p->len;
907 len -= cmp_p->len;
908 if (cmpcharp && !charsets[CHARSET_COMPOSITION])
910 charsets[CHARSET_COMPOSITION] = 1;
911 num += 1;
913 continue;
916 charset = 1; /* This leads to `unknown' charset. */
917 bytes = 1;
919 else
921 c = STRING_CHAR_AND_LENGTH (str, len, bytes);
922 if (! NILP (table))
924 int c1 = translate_char (table, c, 0, 0, 0);
925 if (c1 >= 0)
926 c = c1;
928 charset = CHAR_CHARSET (c);
931 if (!charsets[charset])
933 charsets[charset] = 1;
934 num += 1;
936 str += bytes;
937 len -= bytes;
939 return num;
942 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
943 2, 3, 0,
944 "Return a list of charsets in the region between BEG and END.\n\
945 BEG and END are buffer positions.\n\
946 If the region contains any composite character,\n\
947 `composition' is included in the returned list.\n\
948 Optional arg TABLE if non-nil is a translation table to look up.\n\
950 If the region contains invalid multiybte characters,\n\
951 `unknown' is included in the returned list.\n\
953 If the current buffer is unibyte, the returned list contains\n\
954 `ascii' if any 7-bit characters are found,\n\
955 and `unknown' if any 8-bit characters are found.")
956 (beg, end, table)
957 Lisp_Object beg, end, table;
959 int charsets[MAX_CHARSET + 1];
960 int from, from_byte, to, stop, stop_byte, i;
961 Lisp_Object val;
962 int undefined;
963 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
965 validate_region (&beg, &end);
966 from = XFASTINT (beg);
967 stop = to = XFASTINT (end);
969 if (from < GPT && GPT < to)
971 stop = GPT;
972 stop_byte = GPT_BYTE;
974 else
975 stop_byte = CHAR_TO_BYTE (stop);
977 from_byte = CHAR_TO_BYTE (from);
979 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
980 while (1)
982 find_charset_in_str (BYTE_POS_ADDR (from_byte), stop_byte - from_byte,
983 charsets, table, 1, multibyte);
984 if (stop < to)
986 from = stop, from_byte = stop_byte;
987 stop = to, stop_byte = CHAR_TO_BYTE (stop);
989 else
990 break;
993 val = Qnil;
994 undefined = 0;
995 for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--)
996 if (charsets[i])
998 if (CHARSET_DEFINED_P (i) || i == CHARSET_COMPOSITION)
999 val = Fcons (CHARSET_SYMBOL (i), val);
1000 else
1001 undefined = 1;
1003 if (undefined)
1004 val = Fcons (Qunknown, val);
1005 return val;
1008 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1009 1, 2, 0,
1010 "Return a list of charsets in STR.\n\
1011 If the string contains any composite characters,\n\
1012 `composition' is included in the returned list.\n\
1013 Optional arg TABLE if non-nil is a translation table to look up.\n\
1015 If the region contains invalid multiybte characters,\n\
1016 `unknown' is included in the returned list.\n\
1018 If STR is unibyte, the returned list contains\n\
1019 `ascii' if any 7-bit characters are found,\n\
1020 and `unknown' if any 8-bit characters are found.")
1021 (str, table)
1022 Lisp_Object str, table;
1024 int charsets[MAX_CHARSET + 1];
1025 int i;
1026 Lisp_Object val;
1027 int undefined;
1028 int multibyte;
1030 CHECK_STRING (str, 0);
1031 multibyte = STRING_MULTIBYTE (str);
1033 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
1034 find_charset_in_str (XSTRING (str)->data, STRING_BYTES (XSTRING (str)),
1035 charsets, table, 1, multibyte);
1036 val = Qnil;
1037 undefined = 0;
1038 for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--)
1039 if (charsets[i])
1041 if (CHARSET_DEFINED_P (i) || i == CHARSET_COMPOSITION)
1042 val = Fcons (CHARSET_SYMBOL (i), val);
1043 else
1044 undefined = 1;
1046 if (undefined)
1047 val = Fcons (Qunknown, val);
1048 return val;
1051 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
1053 (charset, code1, code2)
1054 Lisp_Object charset, code1, code2;
1056 int charset_id, c1, c2;
1058 CHECK_NUMBER (charset, 0);
1059 charset_id = XINT (charset);
1060 if (!CHARSET_DEFINED_P (charset_id))
1061 error ("Invalid charset ID: %d", XINT (charset));
1063 if (NILP (code1))
1064 c1 = 0;
1065 else
1067 CHECK_NUMBER (code1, 1);
1068 c1 = XINT (code1);
1070 if (NILP (code2))
1071 c2 = 0;
1072 else
1074 CHECK_NUMBER (code2, 2);
1075 c2 = XINT (code2);
1078 if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1079 error ("Invalid code points: %d %d", c1, c2);
1080 c1 &= 0x7F;
1081 c2 &= 0x7F;
1082 if (c1 == 0
1083 ? c2 != 0
1084 : (c2 == 0
1085 ? !CHAR_COMPONENTS_VALID_P (charset, c1, 0x20)
1086 : !CHAR_COMPONENTS_VALID_P (charset, c1, c2)))
1087 error ("Invalid code points: %d %d", c1, c2);
1089 return make_number (MAKE_CHAR (charset_id, c1, c2));
1092 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1093 "Return list of charset and one or two position-codes of CHAR.\n\
1094 If CHAR is invalid as a character code,\n\
1095 return a list of symbol `unknown' and CHAR.")
1096 (ch)
1097 Lisp_Object ch;
1099 Lisp_Object val;
1100 int c, charset, c1, c2;
1102 CHECK_NUMBER (ch, 0);
1103 c = XFASTINT (ch);
1104 if (!CHAR_VALID_P (c, 1))
1105 return Fcons (Qunknown, Fcons (ch, Qnil));
1106 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1107 return (c2 >= 0
1108 ? Fcons (CHARSET_SYMBOL (charset),
1109 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1110 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1113 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1114 "Return charset of CHAR.")
1115 (ch)
1116 Lisp_Object ch;
1118 CHECK_NUMBER (ch, 0);
1120 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1123 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1124 "Return charset of a character in the current buffer at position POS.\n\
1125 If POS is nil, it defauls to the current point.\n\
1126 If POS is out of range, the value is nil.")
1127 (pos)
1128 Lisp_Object pos;
1130 register int pos_byte, bytes, charset, c1, c2;
1131 register unsigned char *p;
1133 if (NILP (pos))
1134 pos_byte = PT_BYTE;
1135 else if (MARKERP (pos))
1137 pos_byte = marker_byte_position (pos);
1138 if (pos_byte < BEGV_BYTE || pos_byte >= ZV_BYTE)
1139 return Qnil;
1141 else
1143 CHECK_NUMBER (pos, 0);
1144 if (XINT (pos) < BEGV || XINT (pos) >= ZV)
1145 return Qnil;
1146 pos_byte = CHAR_TO_BYTE (XINT (pos));
1148 p = BYTE_POS_ADDR (pos_byte);
1149 if (BASE_LEADING_CODE_P (*p))
1151 SPLIT_MULTIBYTE_SEQ (p, Z_BYTE - pos_byte, bytes, charset, c1, c2);
1152 if (charset < 0)
1153 charset = 1;
1155 else
1156 charset = CHARSET_ASCII;
1158 return CHARSET_SYMBOL (charset);
1161 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1162 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
1164 ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
1165 by their DIMENSION, CHARS, and FINAL-CHAR,\n\
1166 where as Emacs distinguishes them by charset symbol.\n\
1167 See the documentation of the function `charset-info' for the meanings of\n\
1168 DIMENSION, CHARS, and FINAL-CHAR.")
1169 (dimension, chars, final_char)
1170 Lisp_Object dimension, chars, final_char;
1172 int charset;
1174 CHECK_NUMBER (dimension, 0);
1175 CHECK_NUMBER (chars, 1);
1176 CHECK_NUMBER (final_char, 2);
1178 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1179 return Qnil;
1180 return CHARSET_SYMBOL (charset);
1183 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1184 generic character. If GENERICP is zero, return nonzero iff C is a
1185 valid normal character. Do not call this function directly,
1186 instead use macro CHAR_VALID_P. */
1188 char_valid_p (c, genericp)
1189 int c, genericp;
1191 int charset, c1, c2;
1193 if (c < 0)
1194 return 0;
1195 if (SINGLE_BYTE_CHAR_P (c))
1196 return 1;
1197 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
1198 if (charset == CHARSET_COMPOSITION)
1199 return ((c >= MIN_CHAR_COMPOSITION
1200 && c < MIN_CHAR_COMPOSITION + n_cmpchars)
1201 || (genericp && c == GENERIC_COMPOSITION_CHAR));
1202 if (genericp)
1204 if (c1)
1206 if (c2 <= 0) c2 = 0x20;
1208 else
1210 if (c2 <= 0) c1 = c2 = 0x20;
1213 return (CHARSET_DEFINED_P (charset)
1214 && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1217 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1218 "Return t if OBJECT is a valid normal character.\n\
1219 If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
1220 a valid generic character.")
1221 (object, genericp)
1222 Lisp_Object object, genericp;
1224 if (! NATNUMP (object))
1225 return Qnil;
1226 return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1229 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1230 Sunibyte_char_to_multibyte, 1, 1, 0,
1231 "Convert the unibyte character CH to multibyte character.\n\
1232 The conversion is done based on `nonascii-translation-table' (which see)\n\
1233 or `nonascii-insert-offset' (which see).")
1234 (ch)
1235 Lisp_Object ch;
1237 int c;
1239 CHECK_NUMBER (ch, 0);
1240 c = XINT (ch);
1241 if (c < 0 || c >= 0400)
1242 error ("Invalid unibyte character: %d", c);
1243 c = unibyte_char_to_multibyte (c);
1244 if (c < 0)
1245 error ("Can't convert to multibyte character: %d", XINT (ch));
1246 return make_number (c);
1249 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1250 Smultibyte_char_to_unibyte, 1, 1, 0,
1251 "Convert the multibyte character CH to unibyte character.\n\
1252 The conversion is done based on `nonascii-translation-table' (which see)\n\
1253 or `nonascii-insert-offset' (which see).")
1254 (ch)
1255 Lisp_Object ch;
1257 int c;
1259 CHECK_NUMBER (ch, 0);
1260 c = XINT (ch);
1261 if (! CHAR_VALID_P (c, 0))
1262 error ("Invalid multibyte character: %d", c);
1263 c = multibyte_char_to_unibyte (c, Qnil);
1264 if (c < 0)
1265 error ("Can't convert to unibyte character: %d", XINT (ch));
1266 return make_number (c);
1269 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1270 "Return 1 regardless of the argument CHAR.\n\
1271 This is now an obsolete function. We keep it just for backward compatibility.")
1272 (ch)
1273 Lisp_Object ch;
1275 Lisp_Object val;
1277 CHECK_NUMBER (ch, 0);
1278 return make_number (1);
1281 /* Return how many bytes C will occupy in a multibyte buffer.
1282 Don't call this function directly, instead use macro CHAR_BYTES. */
1284 char_bytes (c)
1285 int c;
1287 int bytes;
1289 if (SINGLE_BYTE_CHAR_P (c) || (c & ~GLYPH_MASK_CHAR))
1290 return 1;
1292 if (COMPOSITE_CHAR_P (c))
1294 unsigned int id = COMPOSITE_CHAR_ID (c);
1296 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
1298 else
1300 int charset = CHAR_CHARSET (c);
1302 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
1305 return bytes;
1308 /* Return the width of character of which multi-byte form starts with
1309 C. The width is measured by how many columns occupied on the
1310 screen when displayed in the current buffer. */
1312 #define ONE_BYTE_CHAR_WIDTH(c) \
1313 (c < 0x20 \
1314 ? (c == '\t' \
1315 ? XFASTINT (current_buffer->tab_width) \
1316 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1317 : (c < 0x7f \
1318 ? 1 \
1319 : (c == 0x7F \
1320 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1321 : ((! NILP (current_buffer->enable_multibyte_characters) \
1322 && BASE_LEADING_CODE_P (c)) \
1323 ? WIDTH_BY_CHAR_HEAD (c) \
1324 : 4))))
1326 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1327 "Return width of CHAR when displayed in the current buffer.\n\
1328 The width is measured by how many columns it occupies on the screen.")
1329 (ch)
1330 Lisp_Object ch;
1332 Lisp_Object val, disp;
1333 int c;
1334 struct Lisp_Char_Table *dp = buffer_display_table ();
1336 CHECK_NUMBER (ch, 0);
1338 c = XINT (ch);
1340 /* Get the way the display table would display it. */
1341 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1343 if (VECTORP (disp))
1344 XSETINT (val, XVECTOR (disp)->size);
1345 else if (SINGLE_BYTE_CHAR_P (c))
1346 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1347 else if (COMPOSITE_CHAR_P (c))
1349 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
1350 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 1));
1352 else
1354 int charset = CHAR_CHARSET (c);
1356 XSETFASTINT (val, CHARSET_WIDTH (charset));
1358 return val;
1361 /* Return width of string STR of length LEN when displayed in the
1362 current buffer. The width is measured by how many columns it
1363 occupies on the screen. */
1366 strwidth (str, len)
1367 unsigned char *str;
1368 int len;
1370 unsigned char *endp = str + len;
1371 int width = 0;
1372 struct Lisp_Char_Table *dp = buffer_display_table ();
1374 while (str < endp)
1376 if (*str == LEADING_CODE_COMPOSITION)
1378 int id = str_cmpchar_id (str, endp - str);
1380 if (id < 0)
1382 width += 4;
1383 str++;
1385 else
1387 width += cmpchar_table[id]->width;
1388 str += cmpchar_table[id]->len;
1391 else
1393 Lisp_Object disp;
1394 int thislen;
1395 int c = STRING_CHAR_AND_LENGTH (str, endp - str, thislen);
1397 /* Get the way the display table would display it. */
1398 if (dp)
1399 disp = DISP_CHAR_VECTOR (dp, c);
1400 else
1401 disp = Qnil;
1403 if (VECTORP (disp))
1404 width += XVECTOR (disp)->size;
1405 else
1406 width += ONE_BYTE_CHAR_WIDTH (*str);
1408 str += thislen;
1411 return width;
1414 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1415 "Return width of STRING when displayed in the current buffer.\n\
1416 Width is measured by how many columns it occupies on the screen.\n\
1417 When calculating width of a multibyte character in STRING,\n\
1418 only the base leading-code is considered; the validity of\n\
1419 the following bytes is not checked.")
1420 (str)
1421 Lisp_Object str;
1423 Lisp_Object val;
1425 CHECK_STRING (str, 0);
1426 XSETFASTINT (val, strwidth (XSTRING (str)->data,
1427 STRING_BYTES (XSTRING (str))));
1428 return val;
1431 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1432 "Return the direction of CHAR.\n\
1433 The returned value is 0 for left-to-right and 1 for right-to-left.")
1434 (ch)
1435 Lisp_Object ch;
1437 int charset;
1439 CHECK_NUMBER (ch, 0);
1440 charset = CHAR_CHARSET (XFASTINT (ch));
1441 if (!CHARSET_DEFINED_P (charset))
1442 invalid_character (XINT (ch));
1443 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1446 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
1447 "Return number of characters between BEG and END.")
1448 (beg, end)
1449 Lisp_Object beg, end;
1451 int from, to;
1453 CHECK_NUMBER_COERCE_MARKER (beg, 0);
1454 CHECK_NUMBER_COERCE_MARKER (end, 1);
1456 from = min (XFASTINT (beg), XFASTINT (end));
1457 to = max (XFASTINT (beg), XFASTINT (end));
1459 return make_number (to - from);
1462 /* Return the number of characters in the NBYTES bytes at PTR.
1463 This works by looking at the contents and checking for multibyte sequences.
1464 However, if the current buffer has enable-multibyte-characters = nil,
1465 we treat each byte as a character. */
1468 chars_in_text (ptr, nbytes)
1469 unsigned char *ptr;
1470 int nbytes;
1472 /* current_buffer is null at early stages of Emacs initialization. */
1473 if (current_buffer == 0
1474 || NILP (current_buffer->enable_multibyte_characters))
1475 return nbytes;
1477 return multibyte_chars_in_text (ptr, nbytes);
1480 /* Return the number of characters in the NBYTES bytes at PTR.
1481 This works by looking at the contents and checking for multibyte sequences.
1482 It ignores enable-multibyte-characters. */
1485 multibyte_chars_in_text (ptr, nbytes)
1486 unsigned char *ptr;
1487 int nbytes;
1489 unsigned char *endp;
1490 int chars, bytes;
1492 endp = ptr + nbytes;
1493 chars = 0;
1495 while (ptr < endp)
1497 if (BASE_LEADING_CODE_P (*ptr))
1499 PARSE_MULTIBYTE_SEQ (ptr, nbytes, bytes);
1500 ptr += bytes;
1501 nbytes -= bytes;
1503 else
1505 ptr++;
1506 nbytes--;
1508 chars++;
1511 return chars;
1514 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
1515 "Concatenate all the argument characters and make the result a string.")
1516 (n, args)
1517 int n;
1518 Lisp_Object *args;
1520 int i;
1521 unsigned char *buf
1522 = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
1523 unsigned char *p = buf;
1524 Lisp_Object val;
1526 for (i = 0; i < n; i++)
1528 int c, len;
1529 unsigned char *str;
1531 if (!INTEGERP (args[i]))
1532 CHECK_NUMBER (args[i], 0);
1533 c = XINT (args[i]);
1534 len = CHAR_STRING (c, p, str);
1535 if (p != str)
1536 /* C is a composite character. */
1537 bcopy (str, p, len);
1538 p += len;
1541 /* Here, we can't use make_string_from_bytes because of byte
1542 combining problem. */
1543 val = make_string (buf, p - buf);
1544 return val;
1547 #endif /* emacs */
1549 /*** Composite characters staffs ***/
1551 /* Each composite character is identified by CMPCHAR-ID which is
1552 assigned when Emacs needs the character code of the composite
1553 character (e.g. when displaying it on the screen). See the
1554 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
1555 composite character is represented in Emacs. */
1557 /* If `static' is defined, it means that it is defined to null string. */
1558 #ifndef static
1559 /* The following function is copied from lread.c. */
1560 static int
1561 hash_string (ptr, len)
1562 unsigned char *ptr;
1563 int len;
1565 register unsigned char *p = ptr;
1566 register unsigned char *end = p + len;
1567 register unsigned char c;
1568 register int hash = 0;
1570 while (p != end)
1572 c = *p++;
1573 if (c >= 0140) c -= 40;
1574 hash = ((hash<<3) + (hash>>28) + c);
1576 return hash & 07777777777;
1578 #endif
1580 #define CMPCHAR_HASH_TABLE_SIZE 0xFFF
1582 static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
1584 /* Each element of `cmpchar_hash_table' is a pointer to an array of
1585 integer, where the 1st element is the size of the array, the 2nd
1586 element is how many elements are actually used in the array, and
1587 the remaining elements are CMPCHAR-IDs of composite characters of
1588 the same hash value. */
1589 #define CMPCHAR_HASH_SIZE(table) table[0]
1590 #define CMPCHAR_HASH_USED(table) table[1]
1591 #define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
1593 /* Return CMPCHAR-ID of the composite character in STR of the length
1594 LEN. If the composite character has not yet been registered,
1595 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
1596 is the sole function for assigning CMPCHAR-ID. */
1598 str_cmpchar_id (str, len)
1599 const unsigned char *str;
1600 int len;
1602 int hash_idx, *hashp;
1603 unsigned char *buf;
1604 int embedded_rule; /* 1 if composition rule is embedded. */
1605 int chars; /* number of components. */
1606 int i;
1607 struct cmpchar_info *cmpcharp;
1609 /* The second byte 0xFF means COMPOSITION rule is embedded. */
1610 embedded_rule = (str[1] == 0xFF);
1612 /* At first, get the actual length of the composite character. */
1614 const unsigned char *p, *endp = str + 1, *lastp = str + len;
1615 int bytes;
1617 while (endp < lastp && ! CHAR_HEAD_P (*endp)) endp++;
1618 if (endp - str < 5)
1619 /* Any composite char have at least 5-byte length. */
1620 return -1;
1622 chars = 0;
1623 p = str + 1;
1624 while (p < endp)
1626 if (embedded_rule)
1628 p++;
1629 if (p >= endp)
1630 return -1;
1632 /* No need of checking if *P is 0xA0 because
1633 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
1634 p += BYTES_BY_CHAR_HEAD (*p - 0x20);
1635 chars++;
1637 if (p > endp || chars < 2 || chars > MAX_COMPONENT_COUNT)
1638 /* Invalid components. */
1639 return -1;
1640 len = p - str;
1642 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
1643 hashp = cmpchar_hash_table[hash_idx];
1645 /* Then, look into the hash table. */
1646 if (hashp != NULL)
1647 /* Find the correct one among composite characters of the same
1648 hash value. */
1649 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
1651 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
1652 if (len == cmpcharp->len
1653 && ! bcmp (str, cmpcharp->data, len))
1654 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
1657 /* We have to register the composite character in cmpchar_table. */
1658 if (n_cmpchars >= (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
1659 /* No, we have no more room for a new composite character. */
1660 return -1;
1662 /* Make the entry in hash table. */
1663 if (hashp == NULL)
1665 /* Make a table for 8 composite characters initially. */
1666 hashp = (cmpchar_hash_table[hash_idx]
1667 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1668 CMPCHAR_HASH_SIZE (hashp) = 10;
1669 CMPCHAR_HASH_USED (hashp) = 2;
1671 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1673 CMPCHAR_HASH_SIZE (hashp) += 8;
1674 hashp = (cmpchar_hash_table[hash_idx]
1675 = (int *) xrealloc (hashp,
1676 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1678 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1679 CMPCHAR_HASH_USED (hashp)++;
1681 /* Set information of the composite character in cmpchar_table. */
1682 if (cmpchar_table_size == 0)
1684 /* This is the first composite character to be registered. */
1685 cmpchar_table_size = 256;
1686 cmpchar_table
1687 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1688 * cmpchar_table_size);
1690 else if (cmpchar_table_size <= n_cmpchars)
1692 cmpchar_table_size += 256;
1693 cmpchar_table
1694 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1695 sizeof (cmpchar_table[0])
1696 * cmpchar_table_size);
1699 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1701 cmpcharp->len = len;
1702 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1703 bcopy (str, cmpcharp->data, len);
1704 cmpcharp->data[len] = 0;
1705 cmpcharp->glyph_len = chars;
1706 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1707 if (embedded_rule)
1709 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1710 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1712 else
1714 cmpcharp->cmp_rule = NULL;
1715 cmpcharp->col_offset = NULL;
1718 /* Setup GLYPH data and composition rules (if any) so as not to make
1719 them every time on displaying. */
1721 unsigned char *bufp;
1722 int width;
1723 float leftmost = 0.0, rightmost = 1.0;
1725 if (embedded_rule)
1726 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1727 cmpcharp->col_offset[0] = 0;
1729 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1731 if (embedded_rule)
1732 cmpcharp->cmp_rule[i] = *bufp++;
1734 if (*bufp == 0xA0) /* This is an ASCII character. */
1736 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1737 width = 1;
1738 bufp++;
1740 else /* Multibyte character. */
1742 /* Make `bufp' point normal multi-byte form temporally. */
1743 *bufp -= 0x20;
1744 cmpcharp->glyph[i]
1745 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1746 width = WIDTH_BY_CHAR_HEAD (*bufp);
1747 *bufp += 0x20;
1748 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1751 if (embedded_rule && i > 0)
1753 /* Reference points (global_ref and new_ref) are
1754 encoded as below:
1756 0--1--2 -- ascent
1759 | 4 -+--- center
1760 -- 3 5 -- baseline
1762 6--7--8 -- descent
1764 Now, we calculate the column offset of the new glyph
1765 from the left edge of the first glyph. This can avoid
1766 the same calculation everytime displaying this
1767 composite character. */
1769 /* Reference points of global glyph and new glyph. */
1770 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1771 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1772 /* Column offset relative to the first glyph. */
1773 float left = (leftmost
1774 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1775 - (new_ref % 3) * width / 2.0);
1777 cmpcharp->col_offset[i] = left;
1778 if (left < leftmost)
1779 leftmost = left;
1780 if (left + width > rightmost)
1781 rightmost = left + width;
1783 else
1785 if (width > rightmost)
1786 rightmost = width;
1789 if (embedded_rule)
1791 /* Now col_offset[N] are relative to the left edge of the
1792 first component. Make them relative to the left edge of
1793 overall glyph. */
1794 for (i = 0; i < chars; i++)
1795 cmpcharp->col_offset[i] -= leftmost;
1796 /* Make rightmost holds width of overall glyph. */
1797 rightmost -= leftmost;
1800 cmpcharp->width = rightmost;
1801 if (cmpcharp->width < rightmost)
1802 /* To get a ceiling integer value. */
1803 cmpcharp->width++;
1806 cmpchar_table[n_cmpchars] = cmpcharp;
1808 return n_cmpchars++;
1811 /* Return the Nth element of the composite character C. If NOERROR is
1812 nonzero, return 0 on error condition (C is an invalid composite
1813 charcter, or N is out of range). */
1815 cmpchar_component (c, n, noerror)
1816 int c, n, noerror;
1818 int id = COMPOSITE_CHAR_ID (c);
1820 if (id < 0 || id >= n_cmpchars)
1822 /* C is not a valid composite character. */
1823 if (noerror) return 0;
1824 error ("Invalid composite character: %d", c) ;
1826 if (n >= cmpchar_table[id]->glyph_len)
1828 /* No such component. */
1829 if (noerror) return 0;
1830 args_out_of_range (make_number (c), make_number (n));
1832 /* No face data is stored in glyph code. */
1833 return ((int) (cmpchar_table[id]->glyph[n]));
1836 DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1837 "T if CHAR is a composite character.")
1838 (ch)
1839 Lisp_Object ch;
1841 CHECK_NUMBER (ch, 0);
1842 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1845 DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1846 2, 2, 0,
1847 "Return the Nth component character of composite character CHARACTER.")
1848 (character, n)
1849 Lisp_Object character, n;
1851 int id;
1853 CHECK_NUMBER (character, 0);
1854 CHECK_NUMBER (n, 1);
1856 return (make_number (cmpchar_component (XINT (character), XINT (n), 0)));
1859 DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1860 2, 2, 0,
1861 "Return the Nth composition rule of composite character CHARACTER.\n\
1862 The returned rule is for composing the Nth component\n\
1863 on the (N-1)th component.\n\
1864 If CHARACTER should be composed relatively or N is 0, return 255.")
1865 (character, n)
1866 Lisp_Object character, n;
1868 int id;
1870 CHECK_NUMBER (character, 0);
1871 CHECK_NUMBER (n, 1);
1873 id = COMPOSITE_CHAR_ID (XINT (character));
1874 if (id < 0 || id >= n_cmpchars)
1875 error ("Invalid composite character: %d", XINT (character));
1876 if (XINT (n) < 0 || XINT (n) >= cmpchar_table[id]->glyph_len)
1877 args_out_of_range (character, n);
1879 return make_number (cmpchar_table[id]->cmp_rule
1880 ? cmpchar_table[id]->cmp_rule[XINT (n)]
1881 : 255);
1884 DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1885 Scmpchar_cmp_rule_p, 1, 1, 0,
1886 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1887 (character)
1888 Lisp_Object character;
1890 int id;
1892 CHECK_NUMBER (character, 0);
1893 id = COMPOSITE_CHAR_ID (XINT (character));
1894 if (id < 0 || id >= n_cmpchars)
1895 error ("Invalid composite character: %d", XINT (character));
1897 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1900 DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1901 Scmpchar_cmp_count, 1, 1, 0,
1902 "Return number of compoents of composite character CHARACTER.")
1903 (character)
1904 Lisp_Object character;
1906 int id;
1908 CHECK_NUMBER (character, 0);
1909 id = COMPOSITE_CHAR_ID (XINT (character));
1910 if (id < 0 || id >= n_cmpchars)
1911 error ("Invalid composite character: %d", XINT (character));
1913 return (make_number (cmpchar_table[id]->glyph_len));
1916 DEFUN ("compose-string", Fcompose_string, Scompose_string,
1917 1, 1, 0,
1918 "Return one char string composed from all characters in STRING.")
1919 (str)
1920 Lisp_Object str;
1922 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1923 int len, i;
1925 CHECK_STRING (str, 0);
1927 buf[0] = LEADING_CODE_COMPOSITION;
1928 p = XSTRING (str)->data;
1929 pend = p + STRING_BYTES (XSTRING (str));
1930 i = 1;
1931 while (p < pend)
1933 if (*p < 0x20) /* control code */
1934 error ("Invalid component character: %d", *p);
1935 else if (*p < 0x80) /* ASCII */
1937 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1938 error ("Too long string to be composed: %s", XSTRING (str)->data);
1939 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1940 code itself. */
1941 buf[i++] = 0xA0;
1942 buf[i++] = *p++ + 0x80;
1944 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1946 /* Already composed. Eliminate the heading
1947 LEADING_CODE_COMPOSITION, keep the remaining bytes
1948 unchanged. */
1949 p++;
1950 if (*p == 255)
1951 error ("Can't compose a rule-based composition character");
1952 ptemp = p;
1953 while (! CHAR_HEAD_P (*p)) p++;
1954 if (str_cmpchar_id (ptemp - 1, p - ptemp + 1) < 0)
1955 error ("Can't compose an invalid composition character");
1956 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1957 error ("Too long string to be composed: %s", XSTRING (str)->data);
1958 bcopy (ptemp, buf + i, p - ptemp);
1959 i += p - ptemp;
1961 else /* multibyte char */
1963 /* Add 0x20 to the base leading-code, keep the remaining
1964 bytes unchanged. */
1965 int c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
1967 if (len <= 1 || ! CHAR_VALID_P (c, 0))
1968 error ("Can't compose an invalid character");
1969 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1970 error ("Too long string to be composed: %s", XSTRING (str)->data);
1971 bcopy (p, buf + i, len);
1972 buf[i] += 0x20;
1973 p += len, i += len;
1977 if (i < 5)
1978 /* STR contains only one character, which can't be composed. */
1979 error ("Too short string to be composed: %s", XSTRING (str)->data);
1981 return make_string_from_bytes (buf, 1, i);
1986 charset_id_internal (charset_name)
1987 char *charset_name;
1989 Lisp_Object val;
1991 val= Fget (intern (charset_name), Qcharset);
1992 if (!VECTORP (val))
1993 error ("Charset %s is not defined", charset_name);
1995 return (XINT (XVECTOR (val)->contents[0]));
1998 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1999 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
2002 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
2003 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
2004 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
2005 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
2006 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
2007 charset_big5_1 = charset_id_internal ("chinese-big5-1");
2008 charset_big5_2 = charset_id_internal ("chinese-big5-2");
2009 return Qnil;
2012 void
2013 init_charset_once ()
2015 int i, j, k;
2017 staticpro (&Vcharset_table);
2018 staticpro (&Vcharset_symbol_table);
2019 staticpro (&Vgeneric_character_list);
2021 /* This has to be done here, before we call Fmake_char_table. */
2022 Qcharset_table = intern ("charset-table");
2023 staticpro (&Qcharset_table);
2025 /* Intern this now in case it isn't already done.
2026 Setting this variable twice is harmless.
2027 But don't staticpro it here--that is done in alloc.c. */
2028 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2030 /* Now we are ready to set up this property, so we can
2031 create the charset table. */
2032 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
2033 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
2035 Qunknown = intern ("unknown");
2036 staticpro (&Qunknown);
2037 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
2038 Qunknown);
2040 /* Setup tables. */
2041 for (i = 0; i < 2; i++)
2042 for (j = 0; j < 2; j++)
2043 for (k = 0; k < 128; k++)
2044 iso_charset_table [i][j][k] = -1;
2046 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
2047 cmpchar_table_size = n_cmpchars = 0;
2049 for (i = 0; i < 256; i++)
2050 BYTES_BY_CHAR_HEAD (i) = 1;
2051 for (i = MIN_CHARSET_OFFICIAL_DIMENSION1;
2052 i <= MAX_CHARSET_OFFICIAL_DIMENSION1; i++)
2053 BYTES_BY_CHAR_HEAD (i) = 2;
2054 for (i = MIN_CHARSET_OFFICIAL_DIMENSION2;
2055 i <= MAX_CHARSET_OFFICIAL_DIMENSION2; i++)
2056 BYTES_BY_CHAR_HEAD (i) = 3;
2057 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
2058 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
2059 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
2060 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
2061 /* The followings don't reflect the actual bytes, but just to tell
2062 that it is a start of a multibyte character. */
2063 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
2064 BYTES_BY_CHAR_HEAD (0x9E) = 2;
2065 BYTES_BY_CHAR_HEAD (0x9F) = 2;
2067 for (i = 0; i < 128; i++)
2068 WIDTH_BY_CHAR_HEAD (i) = 1;
2069 for (; i < 256; i++)
2070 WIDTH_BY_CHAR_HEAD (i) = 4;
2071 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
2072 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
2073 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
2074 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
2077 Lisp_Object val;
2079 val = Qnil;
2080 for (i = 0x81; i < 0x90; i++)
2081 val = Fcons (make_number ((i - 0x70) << 7), val);
2082 for (; i < 0x9A; i++)
2083 val = Fcons (make_number ((i - 0x8F) << 14), val);
2084 for (i = 0xA0; i < 0xF0; i++)
2085 val = Fcons (make_number ((i - 0x70) << 7), val);
2086 for (; i < 0xFF; i++)
2087 val = Fcons (make_number ((i - 0xE0) << 14), val);
2088 val = Fcons (make_number (GENERIC_COMPOSITION_CHAR), val);
2089 Vgeneric_character_list = Fnreverse (val);
2092 nonascii_insert_offset = 0;
2093 Vnonascii_translation_table = Qnil;
2096 #ifdef emacs
2098 void
2099 syms_of_charset ()
2101 Qascii = intern ("ascii");
2102 staticpro (&Qascii);
2104 Qcharset = intern ("charset");
2105 staticpro (&Qcharset);
2107 /* Define ASCII charset now. */
2108 update_charset_table (make_number (CHARSET_ASCII),
2109 make_number (1), make_number (94),
2110 make_number (1),
2111 make_number (0),
2112 make_number ('B'),
2113 make_number (0),
2114 build_string ("ASCII"),
2115 build_string ("ASCII"),
2116 build_string ("ASCII (ISO646 IRV)"));
2117 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
2118 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
2120 Qcomposition = intern ("composition");
2121 staticpro (&Qcomposition);
2122 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
2124 Qauto_fill_chars = intern ("auto-fill-chars");
2125 staticpro (&Qauto_fill_chars);
2126 Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
2128 defsubr (&Sdefine_charset);
2129 defsubr (&Sgeneric_character_list);
2130 defsubr (&Sget_unused_iso_final_char);
2131 defsubr (&Sdeclare_equiv_charset);
2132 defsubr (&Sfind_charset_region);
2133 defsubr (&Sfind_charset_string);
2134 defsubr (&Smake_char_internal);
2135 defsubr (&Ssplit_char);
2136 defsubr (&Schar_charset);
2137 defsubr (&Scharset_after);
2138 defsubr (&Siso_charset);
2139 defsubr (&Schar_valid_p);
2140 defsubr (&Sunibyte_char_to_multibyte);
2141 defsubr (&Smultibyte_char_to_unibyte);
2142 defsubr (&Schar_bytes);
2143 defsubr (&Schar_width);
2144 defsubr (&Sstring_width);
2145 defsubr (&Schar_direction);
2146 defsubr (&Schars_in_region);
2147 defsubr (&Sstring);
2148 defsubr (&Scmpcharp);
2149 defsubr (&Scmpchar_component);
2150 defsubr (&Scmpchar_cmp_rule);
2151 defsubr (&Scmpchar_cmp_rule_p);
2152 defsubr (&Scmpchar_cmp_count);
2153 defsubr (&Scompose_string);
2154 defsubr (&Ssetup_special_charsets);
2156 DEFVAR_LISP ("charset-list", &Vcharset_list,
2157 "List of charsets ever defined.");
2158 Vcharset_list = Fcons (Qascii, Qnil);
2160 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
2161 "Vector of cons cell of a symbol and translation table ever defined.\n\
2162 An ID of a translation table is an index of this vector.");
2163 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
2165 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
2166 "Leading-code of composite characters.");
2167 leading_code_composition = LEADING_CODE_COMPOSITION;
2169 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
2170 "Leading-code of private TYPE9N charset of column-width 1.");
2171 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
2173 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
2174 "Leading-code of private TYPE9N charset of column-width 2.");
2175 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
2177 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
2178 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
2179 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
2181 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
2182 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
2183 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
2185 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
2186 "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
2187 This is used for converting unibyte text to multibyte,\n\
2188 and for inserting character codes specified by number.\n\n\
2189 This serves to convert a Latin-1 or similar 8-bit character code\n\
2190 to the corresponding Emacs multibyte character code.\n\
2191 Typically the value should be (- (make-char CHARSET 0) 128),\n\
2192 for your choice of character set.\n\
2193 If `nonascii-translation-table' is non-nil, it overrides this variable.");
2194 nonascii_insert_offset = 0;
2196 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
2197 "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
2198 This is used for converting unibyte text to multibyte,\n\
2199 and for inserting character codes specified by number.\n\n\
2200 Conversion is performed only when multibyte characters are enabled,\n\
2201 and it serves to convert a Latin-1 or similar 8-bit character code\n\
2202 to the corresponding Emacs character code.\n\n\
2203 If this is nil, `nonascii-insert-offset' is used instead.\n\
2204 See also the docstring of `make-translation-table'.");
2205 Vnonascii_translation_table = Qnil;
2207 DEFVAR_INT ("min-composite-char", &min_composite_char,
2208 "Minimum character code of a composite character.");
2209 min_composite_char = MIN_CHAR_COMPOSITION;
2211 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
2212 "A char-table for characters which invoke auto-filling.\n\
2213 Such characters has value t in this table.");
2214 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
2215 CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
2216 CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
2219 #endif /* emacs */