Document reserved keys
[emacs.git] / src / charset.c
blob05290e86b4ea2bd9776162eb8f9826c54ae77fa5
1 /* Basic character set support.
3 Copyright (C) 2001-2018 Free Software Foundation, Inc.
5 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 2005, 2006, 2007, 2008, 2009, 2010, 2011
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H14PRO021
10 Copyright (C) 2003, 2004
11 National Institute of Advanced Industrial Science and Technology (AIST)
12 Registration Number H13PRO009
14 This file is part of GNU Emacs.
16 GNU Emacs is free software: you can redistribute it and/or modify
17 it under the terms of the GNU General Public License as published by
18 the Free Software Foundation, either version 3 of the License, or (at
19 your option) any later version.
21 GNU Emacs is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 GNU General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
29 #include <config.h>
31 #include <errno.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <limits.h>
35 #include <sys/types.h>
36 #include "lisp.h"
37 #include "character.h"
38 #include "charset.h"
39 #include "coding.h"
40 #include "buffer.h"
41 #include "sysstdio.h"
43 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
45 A coded character set ("charset" hereafter) is a meaningful
46 collection (i.e. language, culture, functionality, etc.) of
47 characters. Emacs handles multiple charsets at once. In Emacs Lisp
48 code, a charset is represented by a symbol. In C code, a charset is
49 represented by its ID number or by a pointer to a struct charset.
51 The actual information about each charset is stored in two places.
52 Lispy information is stored in the hash table Vcharset_hash_table as
53 a vector (charset attributes). The other information is stored in
54 charset_table as a struct charset.
58 /* Hash table that contains attributes of each charset. Keys are
59 charset symbols, and values are vectors of charset attributes. */
60 Lisp_Object Vcharset_hash_table;
62 /* Table of struct charset. */
63 struct charset *charset_table;
65 static ptrdiff_t charset_table_size;
66 static int charset_table_used;
68 /* Special charsets corresponding to symbols. */
69 int charset_ascii;
70 int charset_eight_bit;
71 static int charset_iso_8859_1;
72 int charset_unicode;
73 static int charset_emacs;
75 /* The other special charsets. */
76 int charset_jisx0201_roman;
77 int charset_jisx0208_1978;
78 int charset_jisx0208;
79 int charset_ksc5601;
81 /* Charset of unibyte characters. */
82 int charset_unibyte;
84 /* List of charsets ordered by the priority. */
85 Lisp_Object Vcharset_ordered_list;
87 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
88 charsets. */
89 Lisp_Object Vcharset_non_preferred_head;
91 /* Incremented every time we change the priority of charsets.
92 Wraps around. */
93 EMACS_UINT charset_ordered_list_tick;
95 /* List of iso-2022 charsets. */
96 Lisp_Object Viso_2022_charset_list;
98 /* List of emacs-mule charsets. */
99 Lisp_Object Vemacs_mule_charset_list;
101 int emacs_mule_charset[256];
103 /* Mapping table from ISO2022's charset (specified by DIMENSION,
104 CHARS, and FINAL-CHAR) to Emacs' charset. */
105 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
107 #define CODE_POINT_TO_INDEX(charset, code) \
108 ((charset)->code_linear_p \
109 ? (int) ((code) - (charset)->min_code) \
110 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
111 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
112 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
113 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
114 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
115 * (charset)->code_space[11]) \
116 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
117 * (charset)->code_space[7]) \
118 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
119 * (charset)->code_space[3]) \
120 + (((code) & 0xFF) - (charset)->code_space[0]) \
121 - ((charset)->char_index_offset)) \
122 : -1)
125 /* Return the code-point for the character index IDX in CHARSET.
126 IDX should be an unsigned int variable in a valid range (which is
127 always in nonnegative int range too). IDX contains garbage afterwards. */
129 #define INDEX_TO_CODE_POINT(charset, idx) \
130 ((charset)->code_linear_p \
131 ? (idx) + (charset)->min_code \
132 : (idx += (charset)->char_index_offset, \
133 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
134 | (((charset)->code_space[4] \
135 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
136 << 8) \
137 | (((charset)->code_space[8] \
138 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
139 << 16) \
140 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
141 << 24))))
143 /* Structure to hold mapping tables for a charset. Used by temacs
144 invoked for dumping. */
146 static struct
148 /* The current charset for which the following tables are setup. */
149 struct charset *current;
151 /* 1 iff the following table is used for encoder. */
152 short for_encoder;
154 /* When the following table is used for encoding, minimum and
155 maximum character of the current charset. */
156 int min_char, max_char;
158 /* A Unicode character corresponding to the code index 0 (i.e. the
159 minimum code-point) of the current charset, or -1 if the code
160 index 0 is not a Unicode character. This is checked when
161 table.encoder[CHAR] is zero. */
162 int zero_index_char;
164 union {
165 /* Table mapping code-indices (not code-points) of the current
166 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
167 doesn't belong to the current charset. */
168 int decoder[0x10000];
169 /* Table mapping Unicode characters to code-indices of the current
170 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
171 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
172 (0x20000..0x2FFFF). Note that there is no charset map that
173 uses both SMP and SIP. */
174 unsigned short encoder[0x20000];
175 } table;
176 } *temp_charset_work;
178 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
179 do { \
180 if ((CODE) == 0) \
181 temp_charset_work->zero_index_char = (C); \
182 else if ((C) < 0x20000) \
183 temp_charset_work->table.encoder[(C)] = (CODE); \
184 else \
185 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
186 } while (0)
188 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
189 ((C) == temp_charset_work->zero_index_char ? 0 \
190 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
191 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
192 : temp_charset_work->table.encoder[(C) - 0x10000] \
193 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
195 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
196 (temp_charset_work->table.decoder[(CODE)] = (C))
198 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
199 (temp_charset_work->table.decoder[(CODE)])
202 /* Set to 1 to warn that a charset map is loaded and thus a buffer
203 text and a string data may be relocated. */
204 bool charset_map_loaded;
206 struct charset_map_entries
208 struct {
209 unsigned from, to;
210 int c;
211 } entry[0x10000];
212 struct charset_map_entries *next;
215 /* Load the mapping information of CHARSET from ENTRIES for
216 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
217 encoding (CONTROL_FLAG == 2).
219 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
220 and CHARSET->fast_map.
222 If CONTROL_FLAG is 1, setup the following tables according to
223 CHARSET->method and inhibit_load_charset_map.
225 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
226 ----------------------+--------------------+---------------------------
227 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
228 ----------------------+--------------------+---------------------------
229 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
231 If CONTROL_FLAG is 2, setup the following tables.
233 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
234 ----------------------+--------------------+---------------------------
235 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
236 ----------------------+--------------------+--------------------------
237 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
240 static void
241 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
243 Lisp_Object vec UNINIT;
244 Lisp_Object table UNINIT;
245 unsigned max_code = CHARSET_MAX_CODE (charset);
246 bool ascii_compatible_p = charset->ascii_compatible_p;
247 int min_char, max_char, nonascii_min_char;
248 int i;
249 unsigned char *fast_map = charset->fast_map;
251 if (n_entries <= 0)
252 return;
254 if (control_flag)
256 if (! inhibit_load_charset_map)
258 if (control_flag == 1)
260 if (charset->method == CHARSET_METHOD_MAP)
262 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
264 vec = Fmake_vector (make_number (n), make_number (-1));
265 set_charset_attr (charset, charset_decoder, vec);
267 else
269 char_table_set_range (Vchar_unify_table,
270 charset->min_char, charset->max_char,
271 Qnil);
274 else
276 table = Fmake_char_table (Qnil, Qnil);
277 set_charset_attr (charset,
278 (charset->method == CHARSET_METHOD_MAP
279 ? charset_encoder : charset_deunifier),
280 table);
283 else
285 if (! temp_charset_work)
286 temp_charset_work = xmalloc (sizeof *temp_charset_work);
287 if (control_flag == 1)
289 memset (temp_charset_work->table.decoder, -1,
290 sizeof (int) * 0x10000);
292 else
294 memset (temp_charset_work->table.encoder, 0,
295 sizeof (unsigned short) * 0x20000);
296 temp_charset_work->zero_index_char = -1;
298 temp_charset_work->current = charset;
299 temp_charset_work->for_encoder = (control_flag == 2);
300 control_flag += 2;
302 charset_map_loaded = 1;
305 min_char = max_char = entries->entry[0].c;
306 nonascii_min_char = MAX_CHAR;
307 for (i = 0; i < n_entries; i++)
309 unsigned from, to;
310 int from_index, to_index, lim_index;
311 int from_c, to_c;
312 int idx = i % 0x10000;
314 if (i > 0 && idx == 0)
315 entries = entries->next;
316 from = entries->entry[idx].from;
317 to = entries->entry[idx].to;
318 from_c = entries->entry[idx].c;
319 from_index = CODE_POINT_TO_INDEX (charset, from);
320 if (from == to)
322 to_index = from_index;
323 to_c = from_c;
325 else
327 to_index = CODE_POINT_TO_INDEX (charset, to);
328 to_c = from_c + (to_index - from_index);
330 if (from_index < 0 || to_index < 0)
331 continue;
332 lim_index = to_index + 1;
334 if (to_c > max_char)
335 max_char = to_c;
336 else if (from_c < min_char)
337 min_char = from_c;
339 if (control_flag == 1)
341 if (charset->method == CHARSET_METHOD_MAP)
342 for (; from_index < lim_index; from_index++, from_c++)
343 ASET (vec, from_index, make_number (from_c));
344 else
345 for (; from_index < lim_index; from_index++, from_c++)
346 CHAR_TABLE_SET (Vchar_unify_table,
347 CHARSET_CODE_OFFSET (charset) + from_index,
348 make_number (from_c));
350 else if (control_flag == 2)
352 if (charset->method == CHARSET_METHOD_MAP
353 && CHARSET_COMPACT_CODES_P (charset))
354 for (; from_index < lim_index; from_index++, from_c++)
356 unsigned code = from_index;
357 code = INDEX_TO_CODE_POINT (charset, code);
359 if (NILP (CHAR_TABLE_REF (table, from_c)))
360 CHAR_TABLE_SET (table, from_c, make_number (code));
362 else
363 for (; from_index < lim_index; from_index++, from_c++)
365 if (NILP (CHAR_TABLE_REF (table, from_c)))
366 CHAR_TABLE_SET (table, from_c, make_number (from_index));
369 else if (control_flag == 3)
370 for (; from_index < lim_index; from_index++, from_c++)
371 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
372 else if (control_flag == 4)
373 for (; from_index < lim_index; from_index++, from_c++)
374 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
375 else /* control_flag == 0 */
377 if (ascii_compatible_p)
379 if (! ASCII_CHAR_P (from_c))
381 if (from_c < nonascii_min_char)
382 nonascii_min_char = from_c;
384 else if (! ASCII_CHAR_P (to_c))
386 nonascii_min_char = 0x80;
390 for (; from_c <= to_c; from_c++)
391 CHARSET_FAST_MAP_SET (from_c, fast_map);
395 if (control_flag == 0)
397 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
398 ? nonascii_min_char : min_char);
399 CHARSET_MAX_CHAR (charset) = max_char;
401 else if (control_flag == 4)
403 temp_charset_work->min_char = min_char;
404 temp_charset_work->max_char = max_char;
409 /* Read a hexadecimal number (preceded by "0x") from the file FP while
410 paying attention to comment character '#'. LOOKAHEAD is the
411 lookahead byte if it is nonnegative. Store into *TERMINATOR the
412 input byte after the number, or EOF if an end-of-file or input
413 error occurred. Set *OVERFLOW if the number overflows. */
415 static unsigned
416 read_hex (FILE *fp, int lookahead, int *terminator, bool *overflow)
418 int c = lookahead < 0 ? getc_unlocked (fp) : lookahead;
420 while (true)
422 if (c == '#')
424 c = getc_unlocked (fp);
425 while (0 <= c && c != '\n');
426 else if (c == '0')
428 c = getc_unlocked (fp);
429 if (c < 0 || c == 'x')
430 break;
432 if (c < 0)
433 break;
434 c = getc_unlocked (fp);
437 unsigned n = 0;
438 bool v = false;
440 if (0 <= c)
441 while (true)
443 c = getc_unlocked (fp);
444 int digit = char_hexdigit (c);
445 if (digit < 0)
446 break;
447 v |= INT_LEFT_SHIFT_OVERFLOW (n, 4);
448 n = (n << 4) + digit;
451 *terminator = c;
452 *overflow |= v;
453 return n;
456 /* Return a mapping vector for CHARSET loaded from MAPFILE.
457 Each line of MAPFILE has this form
458 0xAAAA 0xCCCC
459 where 0xAAAA is a code-point and 0xCCCC is the corresponding
460 character code, or this form
461 0xAAAA-0xBBBB 0xCCCC
462 where 0xAAAA and 0xBBBB are code-points specifying a range, and
463 0xCCCC is the first character code of the range.
465 The returned vector has this form:
466 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
467 where CODE1 is a code-point or a cons of code-points specifying a
468 range.
470 Note that this function uses `openp' to open MAPFILE but ignores
471 `file-name-handler-alist' to avoid running any Lisp code. */
473 static void
474 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile,
475 int control_flag)
477 unsigned min_code = CHARSET_MIN_CODE (charset);
478 unsigned max_code = CHARSET_MAX_CODE (charset);
479 int fd;
480 FILE *fp;
481 struct charset_map_entries *head, *entries;
482 int n_entries;
483 AUTO_STRING (map, ".map");
484 AUTO_STRING (txt, ".txt");
485 AUTO_LIST2 (suffixes, map, txt);
486 ptrdiff_t count = SPECPDL_INDEX ();
487 record_unwind_protect_nothing ();
488 specbind (Qfile_name_handler_alist, Qnil);
489 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil, false);
490 fp = fd < 0 ? 0 : fdopen (fd, "r");
491 if (!fp)
493 int open_errno = errno;
494 emacs_close (fd);
495 report_file_errno ("Loading charset map", mapfile, open_errno);
497 set_unwind_protect_ptr (count, fclose_unwind, fp);
498 unbind_to (count + 1, Qnil);
500 /* Use record_xmalloc, as `charset_map_entries' is
501 large (larger than MAX_ALLOCA). */
502 head = record_xmalloc (sizeof *head);
503 entries = head;
504 memset (entries, 0, sizeof (struct charset_map_entries));
506 n_entries = 0;
507 int ch = -1;
508 while (true)
510 bool overflow = false;
511 unsigned from = read_hex (fp, ch, &ch, &overflow), to;
512 if (ch < 0)
513 break;
514 if (ch == '-')
516 to = read_hex (fp, -1, &ch, &overflow);
517 if (ch < 0)
518 break;
520 else
522 to = from;
523 ch = -1;
525 unsigned c = read_hex (fp, ch, &ch, &overflow);
526 if (ch < 0)
527 break;
529 if (overflow)
530 continue;
531 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
532 continue;
534 if (n_entries == 0x10000)
536 entries->next = record_xmalloc (sizeof *entries->next);
537 entries = entries->next;
538 memset (entries, 0, sizeof (struct charset_map_entries));
539 n_entries = 0;
541 int idx = n_entries;
542 entries->entry[idx].from = from;
543 entries->entry[idx].to = to;
544 entries->entry[idx].c = c;
545 n_entries++;
547 fclose (fp);
548 clear_unwind_protect (count);
550 load_charset_map (charset, head, n_entries, control_flag);
551 unbind_to (count, Qnil);
554 static void
555 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
557 unsigned min_code = CHARSET_MIN_CODE (charset);
558 unsigned max_code = CHARSET_MAX_CODE (charset);
559 struct charset_map_entries *head, *entries;
560 int n_entries;
561 int len = ASIZE (vec);
562 int i;
563 USE_SAFE_ALLOCA;
565 if (len % 2 == 1)
567 add_to_log ("Failure in loading charset map: %V", vec);
568 return;
571 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
572 large (larger than MAX_ALLOCA). */
573 head = SAFE_ALLOCA (sizeof *head);
574 entries = head;
575 memset (entries, 0, sizeof (struct charset_map_entries));
577 n_entries = 0;
578 for (i = 0; i < len; i += 2)
580 Lisp_Object val, val2;
581 unsigned from, to;
582 EMACS_INT c;
583 int idx;
585 val = AREF (vec, i);
586 if (CONSP (val))
588 val2 = XCDR (val);
589 val = XCAR (val);
590 from = XFASTINT (val);
591 to = XFASTINT (val2);
593 else
594 from = to = XFASTINT (val);
595 val = AREF (vec, i + 1);
596 CHECK_NATNUM (val);
597 c = XFASTINT (val);
599 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
600 continue;
602 if (n_entries > 0 && (n_entries % 0x10000) == 0)
604 entries->next = SAFE_ALLOCA (sizeof *entries->next);
605 entries = entries->next;
606 memset (entries, 0, sizeof (struct charset_map_entries));
608 idx = n_entries % 0x10000;
609 entries->entry[idx].from = from;
610 entries->entry[idx].to = to;
611 entries->entry[idx].c = c;
612 n_entries++;
615 load_charset_map (charset, head, n_entries, control_flag);
616 SAFE_FREE ();
620 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
621 map it is (see the comment of load_charset_map for the detail). */
623 static void
624 load_charset (struct charset *charset, int control_flag)
626 Lisp_Object map;
628 if (inhibit_load_charset_map
629 && temp_charset_work
630 && charset == temp_charset_work->current
631 && ((control_flag == 2) == temp_charset_work->for_encoder))
632 return;
634 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
635 map = CHARSET_MAP (charset);
636 else
638 if (! CHARSET_UNIFIED_P (charset))
639 emacs_abort ();
640 map = CHARSET_UNIFY_MAP (charset);
642 if (STRINGP (map))
643 load_charset_map_from_file (charset, map, control_flag);
644 else
645 load_charset_map_from_vector (charset, map, control_flag);
649 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
650 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
651 (Lisp_Object object)
653 return (CHARSETP (object) ? Qt : Qnil);
657 static void
658 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
659 Lisp_Object function, Lisp_Object arg,
660 unsigned int from, unsigned int to)
662 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
663 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
664 Lisp_Object range = Fcons (Qnil, Qnil);
665 int c, stop;
667 c = temp_charset_work->min_char;
668 stop = (temp_charset_work->max_char < 0x20000
669 ? temp_charset_work->max_char : 0xFFFF);
671 while (1)
673 int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
675 if (idx >= from_idx && idx <= to_idx)
677 if (NILP (XCAR (range)))
678 XSETCAR (range, make_number (c));
680 else if (! NILP (XCAR (range)))
682 XSETCDR (range, make_number (c - 1));
683 if (c_function)
684 (*c_function) (arg, range);
685 else
686 call2 (function, range, arg);
687 XSETCAR (range, Qnil);
689 if (c == stop)
691 if (c == temp_charset_work->max_char)
693 if (! NILP (XCAR (range)))
695 XSETCDR (range, make_number (c));
696 if (c_function)
697 (*c_function) (arg, range);
698 else
699 call2 (function, range, arg);
701 break;
703 c = 0x1FFFF;
704 stop = temp_charset_work->max_char;
706 c++;
710 void
711 map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
712 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
714 Lisp_Object range;
715 bool partial = (from > CHARSET_MIN_CODE (charset)
716 || to < CHARSET_MAX_CODE (charset));
718 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
720 int from_idx = CODE_POINT_TO_INDEX (charset, from);
721 int to_idx = CODE_POINT_TO_INDEX (charset, to);
722 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
723 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
725 if (CHARSET_UNIFIED_P (charset))
727 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
728 load_charset (charset, 2);
729 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
730 map_char_table_for_charset (c_function, function,
731 CHARSET_DEUNIFIER (charset), arg,
732 partial ? charset : NULL, from, to);
733 else
734 map_charset_for_dump (c_function, function, arg, from, to);
737 range = Fcons (make_number (from_c), make_number (to_c));
738 if (NILP (function))
739 (*c_function) (arg, range);
740 else
741 call2 (function, range, arg);
743 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
745 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
746 load_charset (charset, 2);
747 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
748 map_char_table_for_charset (c_function, function,
749 CHARSET_ENCODER (charset), arg,
750 partial ? charset : NULL, from, to);
751 else
752 map_charset_for_dump (c_function, function, arg, from, to);
754 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
756 Lisp_Object subset_info;
757 int offset;
759 subset_info = CHARSET_SUBSET (charset);
760 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
761 offset = XINT (AREF (subset_info, 3));
762 from -= offset;
763 if (from < XFASTINT (AREF (subset_info, 1)))
764 from = XFASTINT (AREF (subset_info, 1));
765 to -= offset;
766 if (to > XFASTINT (AREF (subset_info, 2)))
767 to = XFASTINT (AREF (subset_info, 2));
768 map_charset_chars (c_function, function, arg, charset, from, to);
770 else /* i.e. CHARSET_METHOD_SUPERSET */
772 Lisp_Object parents;
774 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
775 parents = XCDR (parents))
777 int offset;
778 unsigned this_from, this_to;
780 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
781 offset = XINT (XCDR (XCAR (parents)));
782 this_from = from > offset ? from - offset : 0;
783 this_to = to > offset ? to - offset : 0;
784 if (this_from < CHARSET_MIN_CODE (charset))
785 this_from = CHARSET_MIN_CODE (charset);
786 if (this_to > CHARSET_MAX_CODE (charset))
787 this_to = CHARSET_MAX_CODE (charset);
788 map_charset_chars (c_function, function, arg, charset,
789 this_from, this_to);
794 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
795 doc: /* Call FUNCTION for all characters in CHARSET.
796 FUNCTION is called with an argument RANGE and the optional 3rd
797 argument ARG.
799 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
800 characters contained in CHARSET.
802 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
803 range of code points (in CHARSET) of target characters. */)
804 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
806 struct charset *cs;
807 unsigned from, to;
809 CHECK_CHARSET_GET_CHARSET (charset, cs);
810 if (NILP (from_code))
811 from = CHARSET_MIN_CODE (cs);
812 else
814 from = XINT (from_code);
815 if (from < CHARSET_MIN_CODE (cs))
816 from = CHARSET_MIN_CODE (cs);
818 if (NILP (to_code))
819 to = CHARSET_MAX_CODE (cs);
820 else
822 to = XINT (to_code);
823 if (to > CHARSET_MAX_CODE (cs))
824 to = CHARSET_MAX_CODE (cs);
826 map_charset_chars (NULL, function, arg, cs, from, to);
827 return Qnil;
831 /* Define a charset according to the arguments. The Nth argument is
832 the Nth attribute of the charset (the last attribute `charset-id'
833 is not included). See the docstring of `define-charset' for the
834 detail. */
836 DEFUN ("define-charset-internal", Fdefine_charset_internal,
837 Sdefine_charset_internal, charset_arg_max, MANY, 0,
838 doc: /* For internal use only.
839 usage: (define-charset-internal ...) */)
840 (ptrdiff_t nargs, Lisp_Object *args)
842 /* Charset attr vector. */
843 Lisp_Object attrs;
844 Lisp_Object val;
845 EMACS_UINT hash_code;
846 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
847 int i, j;
848 struct charset charset;
849 int id;
850 int dimension;
851 bool new_definition_p;
852 int nchars;
854 if (nargs != charset_arg_max)
855 Fsignal (Qwrong_number_of_arguments,
856 Fcons (intern ("define-charset-internal"),
857 make_number (nargs)));
859 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
861 CHECK_SYMBOL (args[charset_arg_name]);
862 ASET (attrs, charset_name, args[charset_arg_name]);
864 val = args[charset_arg_code_space];
865 for (i = 0, dimension = 0, nchars = 1; ; i++)
867 Lisp_Object min_byte_obj, max_byte_obj;
868 int min_byte, max_byte;
870 min_byte_obj = Faref (val, make_number (i * 2));
871 max_byte_obj = Faref (val, make_number (i * 2 + 1));
872 CHECK_RANGED_INTEGER (min_byte_obj, 0, 255);
873 min_byte = XINT (min_byte_obj);
874 CHECK_RANGED_INTEGER (max_byte_obj, min_byte, 255);
875 max_byte = XINT (max_byte_obj);
876 charset.code_space[i * 4] = min_byte;
877 charset.code_space[i * 4 + 1] = max_byte;
878 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
879 if (max_byte > 0)
880 dimension = i + 1;
881 if (i == 3)
882 break;
883 nchars *= charset.code_space[i * 4 + 2];
884 charset.code_space[i * 4 + 3] = nchars;
887 val = args[charset_arg_dimension];
888 if (NILP (val))
889 charset.dimension = dimension;
890 else
892 CHECK_RANGED_INTEGER (val, 1, 4);
893 charset.dimension = XINT (val);
896 charset.code_linear_p
897 = (charset.dimension == 1
898 || (charset.code_space[2] == 256
899 && (charset.dimension == 2
900 || (charset.code_space[6] == 256
901 && (charset.dimension == 3
902 || charset.code_space[10] == 256)))));
904 if (! charset.code_linear_p)
906 charset.code_space_mask = xzalloc (256);
907 for (i = 0; i < 4; i++)
908 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
909 j++)
910 charset.code_space_mask[j] |= (1 << i);
913 charset.iso_chars_96 = charset.code_space[2] == 96;
915 charset.min_code = (charset.code_space[0]
916 | (charset.code_space[4] << 8)
917 | (charset.code_space[8] << 16)
918 | ((unsigned) charset.code_space[12] << 24));
919 charset.max_code = (charset.code_space[1]
920 | (charset.code_space[5] << 8)
921 | (charset.code_space[9] << 16)
922 | ((unsigned) charset.code_space[13] << 24));
923 charset.char_index_offset = 0;
925 val = args[charset_arg_min_code];
926 if (! NILP (val))
928 unsigned code = cons_to_unsigned (val, UINT_MAX);
930 if (code < charset.min_code
931 || code > charset.max_code)
932 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
933 make_fixnum_or_float (charset.max_code), val);
934 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
935 charset.min_code = code;
938 val = args[charset_arg_max_code];
939 if (! NILP (val))
941 unsigned code = cons_to_unsigned (val, UINT_MAX);
943 if (code < charset.min_code
944 || code > charset.max_code)
945 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
946 make_fixnum_or_float (charset.max_code), val);
947 charset.max_code = code;
950 charset.compact_codes_p = charset.max_code < 0x10000;
952 val = args[charset_arg_invalid_code];
953 if (NILP (val))
955 if (charset.min_code > 0)
956 charset.invalid_code = 0;
957 else
959 if (charset.max_code < UINT_MAX)
960 charset.invalid_code = charset.max_code + 1;
961 else
962 error ("Attribute :invalid-code must be specified");
965 else
966 charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
968 val = args[charset_arg_iso_final];
969 if (NILP (val))
970 charset.iso_final = -1;
971 else
973 CHECK_NUMBER (val);
974 if (XINT (val) < '0' || XINT (val) > 127)
975 error ("Invalid iso-final-char: %"pI"d", XINT (val));
976 charset.iso_final = XINT (val);
979 val = args[charset_arg_iso_revision];
980 if (NILP (val))
981 charset.iso_revision = -1;
982 else
984 CHECK_RANGED_INTEGER (val, -1, 63);
985 charset.iso_revision = XINT (val);
988 val = args[charset_arg_emacs_mule_id];
989 if (NILP (val))
990 charset.emacs_mule_id = -1;
991 else
993 CHECK_NATNUM (val);
994 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
995 error ("Invalid emacs-mule-id: %"pI"d", XINT (val));
996 charset.emacs_mule_id = XINT (val);
999 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1001 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1003 charset.unified_p = 0;
1005 memset (charset.fast_map, 0, sizeof (charset.fast_map));
1007 if (! NILP (args[charset_arg_code_offset]))
1009 val = args[charset_arg_code_offset];
1010 CHECK_CHARACTER (val);
1012 charset.method = CHARSET_METHOD_OFFSET;
1013 charset.code_offset = XINT (val);
1015 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1016 if (MAX_CHAR - charset.code_offset < i)
1017 error ("Unsupported max char: %d", charset.max_char);
1018 charset.max_char = i + charset.code_offset;
1019 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1020 charset.min_char = i + charset.code_offset;
1022 i = (charset.min_char >> 7) << 7;
1023 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1024 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1025 i = (i >> 12) << 12;
1026 for (; i <= charset.max_char; i += 0x1000)
1027 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1028 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1029 charset.ascii_compatible_p = 1;
1031 else if (! NILP (args[charset_arg_map]))
1033 val = args[charset_arg_map];
1034 ASET (attrs, charset_map, val);
1035 charset.method = CHARSET_METHOD_MAP;
1037 else if (! NILP (args[charset_arg_subset]))
1039 Lisp_Object parent;
1040 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1041 struct charset *parent_charset;
1043 val = args[charset_arg_subset];
1044 parent = Fcar (val);
1045 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1046 parent_min_code = Fnth (make_number (1), val);
1047 CHECK_NATNUM (parent_min_code);
1048 parent_max_code = Fnth (make_number (2), val);
1049 CHECK_NATNUM (parent_max_code);
1050 parent_code_offset = Fnth (make_number (3), val);
1051 CHECK_NUMBER (parent_code_offset);
1052 val = make_uninit_vector (4);
1053 ASET (val, 0, make_number (parent_charset->id));
1054 ASET (val, 1, parent_min_code);
1055 ASET (val, 2, parent_max_code);
1056 ASET (val, 3, parent_code_offset);
1057 ASET (attrs, charset_subset, val);
1059 charset.method = CHARSET_METHOD_SUBSET;
1060 /* Here, we just copy the parent's fast_map. It's not accurate,
1061 but at least it works for quickly detecting which character
1062 DOESN'T belong to this charset. */
1063 memcpy (charset.fast_map, parent_charset->fast_map,
1064 sizeof charset.fast_map);
1066 /* We also copy these for parents. */
1067 charset.min_char = parent_charset->min_char;
1068 charset.max_char = parent_charset->max_char;
1070 else if (! NILP (args[charset_arg_superset]))
1072 val = args[charset_arg_superset];
1073 charset.method = CHARSET_METHOD_SUPERSET;
1074 val = Fcopy_sequence (val);
1075 ASET (attrs, charset_superset, val);
1077 charset.min_char = MAX_CHAR;
1078 charset.max_char = 0;
1079 for (; ! NILP (val); val = Fcdr (val))
1081 Lisp_Object elt, car_part, cdr_part;
1082 int this_id, offset;
1083 struct charset *this_charset;
1085 elt = Fcar (val);
1086 if (CONSP (elt))
1088 car_part = XCAR (elt);
1089 cdr_part = XCDR (elt);
1090 CHECK_CHARSET_GET_ID (car_part, this_id);
1091 CHECK_TYPE_RANGED_INTEGER (int, cdr_part);
1092 offset = XINT (cdr_part);
1094 else
1096 CHECK_CHARSET_GET_ID (elt, this_id);
1097 offset = 0;
1099 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1101 this_charset = CHARSET_FROM_ID (this_id);
1102 if (charset.min_char > this_charset->min_char)
1103 charset.min_char = this_charset->min_char;
1104 if (charset.max_char < this_charset->max_char)
1105 charset.max_char = this_charset->max_char;
1106 for (i = 0; i < 190; i++)
1107 charset.fast_map[i] |= this_charset->fast_map[i];
1110 else
1111 error ("None of :code-offset, :map, :parents are specified");
1113 val = args[charset_arg_unify_map];
1114 if (! NILP (val) && !STRINGP (val))
1115 CHECK_VECTOR (val);
1116 ASET (attrs, charset_unify_map, val);
1118 CHECK_LIST (args[charset_arg_plist]);
1119 ASET (attrs, charset_plist, args[charset_arg_plist]);
1121 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1122 &hash_code);
1123 if (charset.hash_index >= 0)
1125 new_definition_p = 0;
1126 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1127 set_hash_value_slot (hash_table, charset.hash_index, attrs);
1129 else
1131 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1132 hash_code);
1133 if (charset_table_used == charset_table_size)
1135 /* Ensure that charset IDs fit into 'int' as well as into the
1136 restriction imposed by fixnums. Although the 'int' restriction
1137 could be removed, too much other code would need altering; for
1138 example, the IDs are stuffed into struct
1139 coding_system.charbuf[i] entries, which are 'int'. */
1140 int old_size = charset_table_size;
1141 ptrdiff_t new_size = old_size;
1142 struct charset *new_table =
1143 xpalloc (0, &new_size, 1,
1144 min (INT_MAX, MOST_POSITIVE_FIXNUM),
1145 sizeof *charset_table);
1146 memcpy (new_table, charset_table, old_size * sizeof *new_table);
1147 charset_table = new_table;
1148 charset_table_size = new_size;
1149 /* FIXME: This leaks memory, as the old charset_table becomes
1150 unreachable. If the old charset table is charset_table_init
1151 then this leak is intentional; otherwise, it's unclear.
1152 If the latter memory leak is intentional, a
1153 comment should be added to explain this. If not, the old
1154 charset_table should be freed, by passing it as the 1st argument
1155 to xpalloc and removing the memcpy. */
1157 id = charset_table_used++;
1158 new_definition_p = 1;
1161 ASET (attrs, charset_id, make_number (id));
1162 charset.id = id;
1163 charset_table[id] = charset;
1165 if (charset.method == CHARSET_METHOD_MAP)
1167 load_charset (&charset, 0);
1168 charset_table[id] = charset;
1171 if (charset.iso_final >= 0)
1173 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1174 charset.iso_final) = id;
1175 if (new_definition_p)
1176 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1177 list1 (make_number (id)));
1178 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1179 charset_jisx0201_roman = id;
1180 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1181 charset_jisx0208_1978 = id;
1182 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1183 charset_jisx0208 = id;
1184 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1185 charset_ksc5601 = id;
1188 if (charset.emacs_mule_id >= 0)
1190 emacs_mule_charset[charset.emacs_mule_id] = id;
1191 if (charset.emacs_mule_id < 0xA0)
1192 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1193 else
1194 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1195 if (new_definition_p)
1196 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1197 list1 (make_number (id)));
1200 if (new_definition_p)
1202 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1203 if (charset.supplementary_p)
1204 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1205 list1 (make_number (id)));
1206 else
1208 Lisp_Object tail;
1210 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1212 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1214 if (cs->supplementary_p)
1215 break;
1217 if (EQ (tail, Vcharset_ordered_list))
1218 Vcharset_ordered_list = Fcons (make_number (id),
1219 Vcharset_ordered_list);
1220 else if (NILP (tail))
1221 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1222 list1 (make_number (id)));
1223 else
1225 val = Fcons (XCAR (tail), XCDR (tail));
1226 XSETCDR (tail, val);
1227 XSETCAR (tail, make_number (id));
1230 charset_ordered_list_tick++;
1233 return Qnil;
1237 /* Same as Fdefine_charset_internal but arguments are more convenient
1238 to call from C (typically in syms_of_charset). This can define a
1239 charset of `offset' method only. Return the ID of the new
1240 charset. */
1242 static int
1243 define_charset_internal (Lisp_Object name,
1244 int dimension,
1245 const char *code_space_chars,
1246 unsigned min_code, unsigned max_code,
1247 int iso_final, int iso_revision, int emacs_mule_id,
1248 bool ascii_compatible, bool supplementary,
1249 int code_offset)
1251 const unsigned char *code_space = (const unsigned char *) code_space_chars;
1252 Lisp_Object args[charset_arg_max];
1253 Lisp_Object val;
1254 int i;
1256 args[charset_arg_name] = name;
1257 args[charset_arg_dimension] = make_number (dimension);
1258 val = make_uninit_vector (8);
1259 for (i = 0; i < 8; i++)
1260 ASET (val, i, make_number (code_space[i]));
1261 args[charset_arg_code_space] = val;
1262 args[charset_arg_min_code] = make_number (min_code);
1263 args[charset_arg_max_code] = make_number (max_code);
1264 args[charset_arg_iso_final]
1265 = (iso_final < 0 ? Qnil : make_number (iso_final));
1266 args[charset_arg_iso_revision] = make_number (iso_revision);
1267 args[charset_arg_emacs_mule_id]
1268 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1269 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1270 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1271 args[charset_arg_invalid_code] = Qnil;
1272 args[charset_arg_code_offset] = make_number (code_offset);
1273 args[charset_arg_map] = Qnil;
1274 args[charset_arg_subset] = Qnil;
1275 args[charset_arg_superset] = Qnil;
1276 args[charset_arg_unify_map] = Qnil;
1278 args[charset_arg_plist] =
1279 listn (CONSTYPE_HEAP, 14,
1280 QCname,
1281 args[charset_arg_name],
1282 intern_c_string (":dimension"),
1283 args[charset_arg_dimension],
1284 intern_c_string (":code-space"),
1285 args[charset_arg_code_space],
1286 intern_c_string (":iso-final-char"),
1287 args[charset_arg_iso_final],
1288 intern_c_string (":emacs-mule-id"),
1289 args[charset_arg_emacs_mule_id],
1290 QCascii_compatible_p,
1291 args[charset_arg_ascii_compatible_p],
1292 intern_c_string (":code-offset"),
1293 args[charset_arg_code_offset]);
1294 Fdefine_charset_internal (charset_arg_max, args);
1296 return XINT (CHARSET_SYMBOL_ID (name));
1300 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1301 Sdefine_charset_alias, 2, 2, 0,
1302 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1303 (Lisp_Object alias, Lisp_Object charset)
1305 Lisp_Object attr;
1307 CHECK_CHARSET_GET_ATTR (charset, attr);
1308 Fputhash (alias, attr, Vcharset_hash_table);
1309 Vcharset_list = Fcons (alias, Vcharset_list);
1310 return Qnil;
1314 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1315 doc: /* Return the property list of CHARSET. */)
1316 (Lisp_Object charset)
1318 Lisp_Object attrs;
1320 CHECK_CHARSET_GET_ATTR (charset, attrs);
1321 return CHARSET_ATTR_PLIST (attrs);
1325 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1326 doc: /* Set CHARSET's property list to PLIST. */)
1327 (Lisp_Object charset, Lisp_Object plist)
1329 Lisp_Object attrs;
1331 CHECK_CHARSET_GET_ATTR (charset, attrs);
1332 ASET (attrs, charset_plist, plist);
1333 return plist;
1337 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1338 doc: /* Unify characters of CHARSET with Unicode.
1339 This means reading the relevant file and installing the table defined
1340 by CHARSET's `:unify-map' property.
1342 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1343 the same meaning as the `:unify-map' attribute in the function
1344 `define-charset' (which see).
1346 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1347 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
1349 int id;
1350 struct charset *cs;
1352 CHECK_CHARSET_GET_ID (charset, id);
1353 cs = CHARSET_FROM_ID (id);
1354 if (NILP (deunify)
1355 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1356 : ! CHARSET_UNIFIED_P (cs))
1357 return Qnil;
1359 CHARSET_UNIFIED_P (cs) = 0;
1360 if (NILP (deunify))
1362 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1363 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1364 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1365 if (NILP (unify_map))
1366 unify_map = CHARSET_UNIFY_MAP (cs);
1367 else
1369 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1370 signal_error ("Bad unify-map", unify_map);
1371 set_charset_attr (cs, charset_unify_map, unify_map);
1373 if (NILP (Vchar_unify_table))
1374 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1375 char_table_set_range (Vchar_unify_table,
1376 cs->min_char, cs->max_char, charset);
1377 CHARSET_UNIFIED_P (cs) = 1;
1379 else if (CHAR_TABLE_P (Vchar_unify_table))
1381 unsigned min_code = CHARSET_MIN_CODE (cs);
1382 unsigned max_code = CHARSET_MAX_CODE (cs);
1383 int min_char = DECODE_CHAR (cs, min_code);
1384 int max_char = DECODE_CHAR (cs, max_code);
1386 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1389 return Qnil;
1392 /* Check that DIMENSION, CHARS, and FINAL_CHAR specify a valid ISO charset.
1393 Return true if it's a 96-character set, false if 94. */
1395 static bool
1396 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars,
1397 Lisp_Object final_char)
1399 CHECK_NUMBER (dimension);
1400 CHECK_NUMBER (chars);
1401 CHECK_CHARACTER (final_char);
1403 if (! (1 <= XINT (dimension) && XINT (dimension) <= 3))
1404 error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
1405 XINT (dimension));
1407 bool chars_flag = XINT (chars) == 96;
1408 if (! (chars_flag || XINT (chars) == 94))
1409 error ("Invalid CHARS %"pI"d, it should be 94 or 96", XINT (chars));
1411 int final_ch = XFASTINT (final_char);
1412 if (! ('0' <= final_ch && final_ch <= '~'))
1413 error ("Invalid FINAL-CHAR `%c', it should be `0'..`~'", final_ch);
1415 return chars_flag;
1418 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1419 Sget_unused_iso_final_char, 2, 2, 0,
1420 doc: /*
1421 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1422 DIMENSION is the number of bytes to represent a character: 1 or 2.
1423 CHARS is the number of characters in a dimension: 94 or 96.
1425 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1426 If there's no unused final char for the specified kind of charset,
1427 return nil. */)
1428 (Lisp_Object dimension, Lisp_Object chars)
1430 bool chars_flag = check_iso_charset_parameter (dimension, chars,
1431 make_number ('0'));
1432 for (int final_char = '0'; final_char <= '?'; final_char++)
1433 if (ISO_CHARSET_TABLE (XINT (dimension), chars_flag, final_char) < 0)
1434 return make_number (final_char);
1435 return Qnil;
1439 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1440 4, 4, 0,
1441 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1443 On decoding by an ISO-2022 base coding system, when a charset
1444 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1445 if CHARSET is designated instead. */)
1446 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
1448 int id;
1450 CHECK_CHARSET_GET_ID (charset, id);
1451 bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
1452 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XFASTINT (final_char)) = id;
1453 return Qnil;
1457 /* Return information about charsets in the text at PTR of NBYTES
1458 bytes, which are NCHARS characters. The value is:
1460 0: Each character is represented by one byte. This is always
1461 true for a unibyte string. For a multibyte string, true if
1462 it contains only ASCII characters.
1464 1: No charsets other than ascii, control-1, and latin-1 are
1465 found.
1467 2: Otherwise.
1471 string_xstring_p (Lisp_Object string)
1473 const unsigned char *p = SDATA (string);
1474 const unsigned char *endp = p + SBYTES (string);
1476 if (SCHARS (string) == SBYTES (string))
1477 return 0;
1479 while (p < endp)
1481 int c = STRING_CHAR_ADVANCE (p);
1483 if (c >= 0x100)
1484 return 2;
1486 return 1;
1490 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1492 CHARSETS is a vector. If Nth element is non-nil, it means the
1493 charset whose id is N is already found.
1495 It may lookup a translation table TABLE if supplied. */
1497 static void
1498 find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars,
1499 ptrdiff_t nbytes, Lisp_Object charsets,
1500 Lisp_Object table, bool multibyte)
1502 const unsigned char *pend = ptr + nbytes;
1504 if (nchars == nbytes)
1506 if (multibyte)
1507 ASET (charsets, charset_ascii, Qt);
1508 else
1509 while (ptr < pend)
1511 int c = *ptr++;
1513 if (!NILP (table))
1514 c = translate_char (table, c);
1515 if (ASCII_CHAR_P (c))
1516 ASET (charsets, charset_ascii, Qt);
1517 else
1518 ASET (charsets, charset_eight_bit, Qt);
1521 else
1523 while (ptr < pend)
1525 int c = STRING_CHAR_ADVANCE (ptr);
1526 struct charset *charset;
1528 if (!NILP (table))
1529 c = translate_char (table, c);
1530 charset = CHAR_CHARSET (c);
1531 ASET (charsets, CHARSET_ID (charset), Qt);
1536 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1537 2, 3, 0,
1538 doc: /* Return a list of charsets in the region between BEG and END.
1539 BEG and END are buffer positions.
1540 Optional arg TABLE if non-nil is a translation table to look up.
1542 If the current buffer is unibyte, the returned list may contain
1543 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1544 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
1546 Lisp_Object charsets;
1547 ptrdiff_t from, from_byte, to, stop, stop_byte;
1548 int i;
1549 Lisp_Object val;
1550 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1552 validate_region (&beg, &end);
1553 from = XFASTINT (beg);
1554 stop = to = XFASTINT (end);
1556 if (from < GPT && GPT < to)
1558 stop = GPT;
1559 stop_byte = GPT_BYTE;
1561 else
1562 stop_byte = CHAR_TO_BYTE (stop);
1564 from_byte = CHAR_TO_BYTE (from);
1566 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1567 while (1)
1569 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1570 stop_byte - from_byte, charsets, table,
1571 multibyte);
1572 if (stop < to)
1574 from = stop, from_byte = stop_byte;
1575 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1577 else
1578 break;
1581 val = Qnil;
1582 for (i = charset_table_used - 1; i >= 0; i--)
1583 if (!NILP (AREF (charsets, i)))
1584 val = Fcons (CHARSET_NAME (charset_table + i), val);
1585 return val;
1588 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1589 1, 2, 0,
1590 doc: /* Return a list of charsets in STR.
1591 Optional arg TABLE if non-nil is a translation table to look up.
1593 If STR is unibyte, the returned list may contain
1594 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1595 (Lisp_Object str, Lisp_Object table)
1597 Lisp_Object charsets;
1598 int i;
1599 Lisp_Object val;
1601 CHECK_STRING (str);
1603 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1604 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1605 charsets, table,
1606 STRING_MULTIBYTE (str));
1607 val = Qnil;
1608 for (i = charset_table_used - 1; i >= 0; i--)
1609 if (!NILP (AREF (charsets, i)))
1610 val = Fcons (CHARSET_NAME (charset_table + i), val);
1611 return val;
1616 /* Return a unified character code for C (>= 0x110000). VAL is a
1617 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1618 charset symbol. */
1619 static int
1620 maybe_unify_char (int c, Lisp_Object val)
1622 struct charset *charset;
1624 if (INTEGERP (val))
1625 return XFASTINT (val);
1626 if (NILP (val))
1627 return c;
1629 CHECK_CHARSET_GET_CHARSET (val, charset);
1630 #ifdef REL_ALLOC
1631 /* The call to load_charset below can allocate memory, which screws
1632 callers of this function through STRING_CHAR_* macros that hold C
1633 pointers to buffer text, if REL_ALLOC is used. */
1634 r_alloc_inhibit_buffer_relocation (1);
1635 #endif
1636 load_charset (charset, 1);
1637 if (! inhibit_load_charset_map)
1639 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1640 if (! NILP (val))
1641 c = XFASTINT (val);
1643 else
1645 int code_index = c - CHARSET_CODE_OFFSET (charset);
1646 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1648 if (unified > 0)
1649 c = unified;
1651 #ifdef REL_ALLOC
1652 r_alloc_inhibit_buffer_relocation (0);
1653 #endif
1654 return c;
1658 /* Return a character corresponding to the code-point CODE of
1659 CHARSET. */
1662 decode_char (struct charset *charset, unsigned int code)
1664 int c, char_index;
1665 enum charset_method method = CHARSET_METHOD (charset);
1667 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1668 return -1;
1670 if (method == CHARSET_METHOD_SUBSET)
1672 Lisp_Object subset_info;
1674 subset_info = CHARSET_SUBSET (charset);
1675 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1676 code -= XINT (AREF (subset_info, 3));
1677 if (code < XFASTINT (AREF (subset_info, 1))
1678 || code > XFASTINT (AREF (subset_info, 2)))
1679 c = -1;
1680 else
1681 c = DECODE_CHAR (charset, code);
1683 else if (method == CHARSET_METHOD_SUPERSET)
1685 Lisp_Object parents;
1687 parents = CHARSET_SUPERSET (charset);
1688 c = -1;
1689 for (; CONSP (parents); parents = XCDR (parents))
1691 int id = XINT (XCAR (XCAR (parents)));
1692 int code_offset = XINT (XCDR (XCAR (parents)));
1693 unsigned this_code = code - code_offset;
1695 charset = CHARSET_FROM_ID (id);
1696 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1697 break;
1700 else
1702 char_index = CODE_POINT_TO_INDEX (charset, code);
1703 if (char_index < 0)
1704 return -1;
1706 if (method == CHARSET_METHOD_MAP)
1708 Lisp_Object decoder;
1710 decoder = CHARSET_DECODER (charset);
1711 if (! VECTORP (decoder))
1713 load_charset (charset, 1);
1714 decoder = CHARSET_DECODER (charset);
1716 if (VECTORP (decoder))
1717 c = XINT (AREF (decoder, char_index));
1718 else
1719 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1721 else /* method == CHARSET_METHOD_OFFSET */
1723 c = char_index + CHARSET_CODE_OFFSET (charset);
1724 if (CHARSET_UNIFIED_P (charset)
1725 && MAX_UNICODE_CHAR < c && c <= MAX_5_BYTE_CHAR)
1727 /* Unify C with a Unicode character if possible. */
1728 Lisp_Object val = CHAR_TABLE_REF (Vchar_unify_table, c);
1729 c = maybe_unify_char (c, val);
1734 return c;
1737 /* Variable used temporarily by the macro ENCODE_CHAR. */
1738 Lisp_Object charset_work;
1740 /* Return a code-point of C in CHARSET. If C doesn't belong to
1741 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1742 use CHARSET's strict_max_char instead of max_char. */
1744 unsigned
1745 encode_char (struct charset *charset, int c)
1747 unsigned code;
1748 enum charset_method method = CHARSET_METHOD (charset);
1750 if (CHARSET_UNIFIED_P (charset))
1752 Lisp_Object deunifier;
1753 int code_index = -1;
1755 deunifier = CHARSET_DEUNIFIER (charset);
1756 if (! CHAR_TABLE_P (deunifier))
1758 load_charset (charset, 2);
1759 deunifier = CHARSET_DEUNIFIER (charset);
1761 if (CHAR_TABLE_P (deunifier))
1763 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1765 if (INTEGERP (deunified))
1766 code_index = XINT (deunified);
1768 else
1770 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1772 if (code_index >= 0)
1773 c = CHARSET_CODE_OFFSET (charset) + code_index;
1776 if (method == CHARSET_METHOD_SUBSET)
1778 Lisp_Object subset_info;
1779 struct charset *this_charset;
1781 subset_info = CHARSET_SUBSET (charset);
1782 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1783 code = ENCODE_CHAR (this_charset, c);
1784 if (code == CHARSET_INVALID_CODE (this_charset)
1785 || code < XFASTINT (AREF (subset_info, 1))
1786 || code > XFASTINT (AREF (subset_info, 2)))
1787 return CHARSET_INVALID_CODE (charset);
1788 code += XINT (AREF (subset_info, 3));
1789 return code;
1792 if (method == CHARSET_METHOD_SUPERSET)
1794 Lisp_Object parents;
1796 parents = CHARSET_SUPERSET (charset);
1797 for (; CONSP (parents); parents = XCDR (parents))
1799 int id = XINT (XCAR (XCAR (parents)));
1800 int code_offset = XINT (XCDR (XCAR (parents)));
1801 struct charset *this_charset = CHARSET_FROM_ID (id);
1803 code = ENCODE_CHAR (this_charset, c);
1804 if (code != CHARSET_INVALID_CODE (this_charset))
1805 return code + code_offset;
1807 return CHARSET_INVALID_CODE (charset);
1810 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1811 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1812 return CHARSET_INVALID_CODE (charset);
1814 if (method == CHARSET_METHOD_MAP)
1816 Lisp_Object encoder;
1817 Lisp_Object val;
1819 encoder = CHARSET_ENCODER (charset);
1820 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1822 load_charset (charset, 2);
1823 encoder = CHARSET_ENCODER (charset);
1825 if (CHAR_TABLE_P (encoder))
1827 val = CHAR_TABLE_REF (encoder, c);
1828 if (NILP (val))
1829 return CHARSET_INVALID_CODE (charset);
1830 code = XINT (val);
1831 if (! CHARSET_COMPACT_CODES_P (charset))
1832 code = INDEX_TO_CODE_POINT (charset, code);
1834 else
1836 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1837 code = INDEX_TO_CODE_POINT (charset, code);
1840 else /* method == CHARSET_METHOD_OFFSET */
1842 unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
1844 code = INDEX_TO_CODE_POINT (charset, code_index);
1847 return code;
1851 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 2, 0,
1852 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1853 Return nil if CODE-POINT is not valid in CHARSET.
1855 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). */)
1856 (Lisp_Object charset, Lisp_Object code_point)
1858 int c, id;
1859 unsigned code;
1860 struct charset *charsetp;
1862 CHECK_CHARSET_GET_ID (charset, id);
1863 code = cons_to_unsigned (code_point, UINT_MAX);
1864 charsetp = CHARSET_FROM_ID (id);
1865 c = DECODE_CHAR (charsetp, code);
1866 return (c >= 0 ? make_number (c) : Qnil);
1870 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 2, 0,
1871 doc: /* Encode the character CH into a code-point of CHARSET.
1872 Return nil if CHARSET doesn't include CH. */)
1873 (Lisp_Object ch, Lisp_Object charset)
1875 int c, id;
1876 unsigned code;
1877 struct charset *charsetp;
1879 CHECK_CHARSET_GET_ID (charset, id);
1880 CHECK_CHARACTER (ch);
1881 c = XFASTINT (ch);
1882 charsetp = CHARSET_FROM_ID (id);
1883 code = ENCODE_CHAR (charsetp, c);
1884 if (code == CHARSET_INVALID_CODE (charsetp))
1885 return Qnil;
1886 return INTEGER_TO_CONS (code);
1890 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1891 doc:
1892 /* Return a character of CHARSET whose position codes are CODEn.
1894 CODE1 through CODE4 are optional, but if you don't supply sufficient
1895 position codes, it is assumed that the minimum code in each dimension
1896 is specified. */)
1897 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
1899 int id, dimension;
1900 struct charset *charsetp;
1901 unsigned code;
1902 int c;
1904 CHECK_CHARSET_GET_ID (charset, id);
1905 charsetp = CHARSET_FROM_ID (id);
1907 dimension = CHARSET_DIMENSION (charsetp);
1908 if (NILP (code1))
1909 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1910 ? 0 : CHARSET_MIN_CODE (charsetp));
1911 else
1913 CHECK_NATNUM (code1);
1914 if (XFASTINT (code1) >= 0x100)
1915 args_out_of_range (make_number (0xFF), code1);
1916 code = XFASTINT (code1);
1918 if (dimension > 1)
1920 code <<= 8;
1921 if (NILP (code2))
1922 code |= charsetp->code_space[(dimension - 2) * 4];
1923 else
1925 CHECK_NATNUM (code2);
1926 if (XFASTINT (code2) >= 0x100)
1927 args_out_of_range (make_number (0xFF), code2);
1928 code |= XFASTINT (code2);
1931 if (dimension > 2)
1933 code <<= 8;
1934 if (NILP (code3))
1935 code |= charsetp->code_space[(dimension - 3) * 4];
1936 else
1938 CHECK_NATNUM (code3);
1939 if (XFASTINT (code3) >= 0x100)
1940 args_out_of_range (make_number (0xFF), code3);
1941 code |= XFASTINT (code3);
1944 if (dimension > 3)
1946 code <<= 8;
1947 if (NILP (code4))
1948 code |= charsetp->code_space[0];
1949 else
1951 CHECK_NATNUM (code4);
1952 if (XFASTINT (code4) >= 0x100)
1953 args_out_of_range (make_number (0xFF), code4);
1954 code |= XFASTINT (code4);
1961 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1962 code &= 0x7F7F7F7F;
1963 c = DECODE_CHAR (charsetp, code);
1964 if (c < 0)
1965 error ("Invalid code(s)");
1966 return make_number (c);
1970 /* Return the first charset in CHARSET_LIST that contains C.
1971 CHARSET_LIST is a list of charset IDs. If it is nil, use
1972 Vcharset_ordered_list. */
1974 struct charset *
1975 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
1977 bool maybe_null = 0;
1979 if (NILP (charset_list))
1980 charset_list = Vcharset_ordered_list;
1981 else
1982 maybe_null = 1;
1984 while (CONSP (charset_list))
1986 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1987 unsigned code = ENCODE_CHAR (charset, c);
1989 if (code != CHARSET_INVALID_CODE (charset))
1991 if (code_return)
1992 *code_return = code;
1993 return charset;
1995 charset_list = XCDR (charset_list);
1996 if (! maybe_null
1997 && c <= MAX_UNICODE_CHAR
1998 && EQ (charset_list, Vcharset_non_preferred_head))
1999 return CHARSET_FROM_ID (charset_unicode);
2001 return (maybe_null ? NULL
2002 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2003 : CHARSET_FROM_ID (charset_eight_bit));
2007 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2008 doc:
2009 /*Return list of charset and one to four position-codes of CH.
2010 The charset is decided by the current priority order of charsets.
2011 A position-code is a byte value of each dimension of the code-point of
2012 CH in the charset. */)
2013 (Lisp_Object ch)
2015 struct charset *charset;
2016 int c, dimension;
2017 unsigned code;
2018 Lisp_Object val;
2020 CHECK_CHARACTER (ch);
2021 c = XFASTINT (ch);
2022 charset = CHAR_CHARSET (c);
2023 if (! charset)
2024 emacs_abort ();
2025 code = ENCODE_CHAR (charset, c);
2026 if (code == CHARSET_INVALID_CODE (charset))
2027 emacs_abort ();
2028 dimension = CHARSET_DIMENSION (charset);
2029 for (val = Qnil; dimension > 0; dimension--)
2031 val = Fcons (make_number (code & 0xFF), val);
2032 code >>= 8;
2034 return Fcons (CHARSET_NAME (charset), val);
2038 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2039 doc: /* Return the charset of highest priority that contains CH.
2040 ASCII characters are an exception: for them, this function always
2041 returns `ascii'.
2042 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2043 from which to find the charset. It may also be a coding system. In
2044 that case, find the charset from what supported by that coding system. */)
2045 (Lisp_Object ch, Lisp_Object restriction)
2047 struct charset *charset;
2049 CHECK_CHARACTER (ch);
2050 if (NILP (restriction))
2051 charset = CHAR_CHARSET (XINT (ch));
2052 else
2054 if (CONSP (restriction))
2056 int c = XFASTINT (ch);
2058 for (; CONSP (restriction); restriction = XCDR (restriction))
2060 struct charset *rcharset;
2062 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
2063 if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
2064 return XCAR (restriction);
2066 return Qnil;
2068 restriction = coding_system_charset_list (restriction);
2069 charset = char_charset (XINT (ch), restriction, NULL);
2070 if (! charset)
2071 return Qnil;
2073 return (CHARSET_NAME (charset));
2077 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2078 doc: /*
2079 Return charset of a character in the current buffer at position POS.
2080 If POS is nil, it defaults to the current point.
2081 If POS is out of range, the value is nil. */)
2082 (Lisp_Object pos)
2084 Lisp_Object ch;
2085 struct charset *charset;
2087 ch = Fchar_after (pos);
2088 if (! INTEGERP (ch))
2089 return ch;
2090 charset = CHAR_CHARSET (XINT (ch));
2091 return (CHARSET_NAME (charset));
2095 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2096 doc: /*
2097 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2099 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2100 by their DIMENSION, CHARS, and FINAL-CHAR,
2101 whereas Emacs distinguishes them by charset symbol.
2102 See the documentation of the function `charset-info' for the meanings of
2103 DIMENSION, CHARS, and FINAL-CHAR. */)
2104 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
2106 bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
2107 int id = ISO_CHARSET_TABLE (XINT (dimension), chars_flag,
2108 XFASTINT (final_char));
2109 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2113 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2114 0, 0, 0,
2115 doc: /*
2116 Internal use only.
2117 Clear temporary charset mapping tables.
2118 It should be called only from temacs invoked for dumping. */)
2119 (void)
2121 if (temp_charset_work)
2123 xfree (temp_charset_work);
2124 temp_charset_work = NULL;
2127 if (CHAR_TABLE_P (Vchar_unify_table))
2128 Foptimize_char_table (Vchar_unify_table, Qnil);
2130 return Qnil;
2133 DEFUN ("charset-priority-list", Fcharset_priority_list,
2134 Scharset_priority_list, 0, 1, 0,
2135 doc: /* Return the list of charsets ordered by priority.
2136 HIGHESTP non-nil means just return the highest priority one. */)
2137 (Lisp_Object highestp)
2139 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2141 if (!NILP (highestp))
2142 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2144 while (!NILP (list))
2146 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2147 list = XCDR (list);
2149 return Fnreverse (val);
2152 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2153 1, MANY, 0,
2154 doc: /* Assign higher priority to the charsets given as arguments.
2155 usage: (set-charset-priority &rest charsets) */)
2156 (ptrdiff_t nargs, Lisp_Object *args)
2158 Lisp_Object new_head, old_list;
2159 Lisp_Object list_2022, list_emacs_mule;
2160 ptrdiff_t i;
2161 int id;
2163 old_list = Fcopy_sequence (Vcharset_ordered_list);
2164 new_head = Qnil;
2165 for (i = 0; i < nargs; i++)
2167 CHECK_CHARSET_GET_ID (args[i], id);
2168 if (! NILP (Fmemq (make_number (id), old_list)))
2170 old_list = Fdelq (make_number (id), old_list);
2171 new_head = Fcons (make_number (id), new_head);
2174 Vcharset_non_preferred_head = old_list;
2175 Vcharset_ordered_list = CALLN (Fnconc, Fnreverse (new_head), old_list);
2177 charset_ordered_list_tick++;
2179 charset_unibyte = -1;
2180 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2181 CONSP (old_list); old_list = XCDR (old_list))
2183 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2184 list_2022 = Fcons (XCAR (old_list), list_2022);
2185 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2186 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2187 if (charset_unibyte < 0)
2189 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2191 if (CHARSET_DIMENSION (charset) == 1
2192 && CHARSET_ASCII_COMPATIBLE_P (charset)
2193 && CHARSET_MAX_CHAR (charset) >= 0x80)
2194 charset_unibyte = CHARSET_ID (charset);
2197 Viso_2022_charset_list = Fnreverse (list_2022);
2198 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2199 if (charset_unibyte < 0)
2200 charset_unibyte = charset_iso_8859_1;
2202 return Qnil;
2205 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2206 0, 1, 0,
2207 doc: /* Internal use only.
2208 Return charset identification number of CHARSET. */)
2209 (Lisp_Object charset)
2211 int id;
2213 CHECK_CHARSET_GET_ID (charset, id);
2214 return make_number (id);
2217 struct charset_sort_data
2219 Lisp_Object charset;
2220 int id;
2221 ptrdiff_t priority;
2224 static int
2225 charset_compare (const void *d1, const void *d2)
2227 const struct charset_sort_data *data1 = d1, *data2 = d2;
2228 if (data1->priority != data2->priority)
2229 return data1->priority < data2->priority ? -1 : 1;
2230 return 0;
2233 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2234 doc: /* Sort charset list CHARSETS by a priority of each charset.
2235 Return the sorted list. CHARSETS is modified by side effects.
2236 See also `charset-priority-list' and `set-charset-priority'. */)
2237 (Lisp_Object charsets)
2239 Lisp_Object len = Flength (charsets);
2240 ptrdiff_t n = XFASTINT (len), i, j;
2241 int done;
2242 Lisp_Object tail, elt, attrs;
2243 struct charset_sort_data *sort_data;
2244 int id, min_id = INT_MAX, max_id = INT_MIN;
2245 USE_SAFE_ALLOCA;
2247 if (n == 0)
2248 return Qnil;
2249 SAFE_NALLOCA (sort_data, 1, n);
2250 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2252 elt = XCAR (tail);
2253 CHECK_CHARSET_GET_ATTR (elt, attrs);
2254 sort_data[i].charset = elt;
2255 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
2256 if (id < min_id)
2257 min_id = id;
2258 if (id > max_id)
2259 max_id = id;
2261 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2262 done < n && CONSP (tail); tail = XCDR (tail), i++)
2264 elt = XCAR (tail);
2265 id = XFASTINT (elt);
2266 if (id >= min_id && id <= max_id)
2267 for (j = 0; j < n; j++)
2268 if (sort_data[j].id == id)
2270 sort_data[j].priority = i;
2271 done++;
2274 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2275 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2276 XSETCAR (tail, sort_data[i].charset);
2277 SAFE_FREE ();
2278 return charsets;
2282 void
2283 init_charset (void)
2285 Lisp_Object tempdir;
2286 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2287 if (! file_accessible_directory_p (tempdir))
2289 /* This used to be non-fatal (dir_warning), but it should not
2290 happen, and if it does sooner or later it will cause some
2291 obscure problem (eg bug#6401), so better abort. */
2292 fprintf (stderr, "Error: charsets directory not found:\n\
2293 %s\n\
2294 Emacs will not function correctly without the character map files.\n%s\
2295 Please check your installation!\n",
2296 SDATA (tempdir),
2297 egetenv("EMACSDATA") ? "The EMACSDATA environment \
2298 variable is set, maybe it has the wrong value?\n" : "");
2299 exit (1);
2302 Vcharset_map_path = list1 (tempdir);
2306 void
2307 init_charset_once (void)
2309 int i, j, k;
2311 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2312 for (j = 0; j < ISO_MAX_CHARS; j++)
2313 for (k = 0; k < ISO_MAX_FINAL; k++)
2314 iso_charset_table[i][j][k] = -1;
2316 for (i = 0; i < 256; i++)
2317 emacs_mule_charset[i] = -1;
2319 charset_jisx0201_roman = -1;
2320 charset_jisx0208_1978 = -1;
2321 charset_jisx0208 = -1;
2322 charset_ksc5601 = -1;
2325 #ifdef emacs
2327 /* Allocate an initial charset table that is large enough to handle
2328 Emacs while it is bootstrapping. As of September 2011, the size
2329 needs to be at least 166; make it a bit bigger to allow for future
2330 expansion.
2332 Don't make the value so small that the table is reallocated during
2333 bootstrapping, as glibc malloc calls larger than just under 64 KiB
2334 during an initial bootstrap wreak havoc after dumping; see the
2335 M_MMAP_THRESHOLD value in alloc.c, plus there is an extra overhead
2336 internal to glibc malloc and perhaps to Emacs malloc debugging. */
2337 static struct charset charset_table_init[180];
2339 void
2340 syms_of_charset (void)
2342 DEFSYM (Qcharsetp, "charsetp");
2344 /* Special charset symbols. */
2345 DEFSYM (Qascii, "ascii");
2346 DEFSYM (Qunicode, "unicode");
2347 DEFSYM (Qemacs, "emacs");
2348 DEFSYM (Qeight_bit, "eight-bit");
2349 DEFSYM (Qiso_8859_1, "iso-8859-1");
2351 staticpro (&Vcharset_ordered_list);
2352 Vcharset_ordered_list = Qnil;
2354 staticpro (&Viso_2022_charset_list);
2355 Viso_2022_charset_list = Qnil;
2357 staticpro (&Vemacs_mule_charset_list);
2358 Vemacs_mule_charset_list = Qnil;
2360 staticpro (&Vcharset_hash_table);
2361 Vcharset_hash_table = CALLN (Fmake_hash_table, QCtest, Qeq);
2363 charset_table = charset_table_init;
2364 charset_table_size = ARRAYELTS (charset_table_init);
2365 charset_table_used = 0;
2367 defsubr (&Scharsetp);
2368 defsubr (&Smap_charset_chars);
2369 defsubr (&Sdefine_charset_internal);
2370 defsubr (&Sdefine_charset_alias);
2371 defsubr (&Scharset_plist);
2372 defsubr (&Sset_charset_plist);
2373 defsubr (&Sunify_charset);
2374 defsubr (&Sget_unused_iso_final_char);
2375 defsubr (&Sdeclare_equiv_charset);
2376 defsubr (&Sfind_charset_region);
2377 defsubr (&Sfind_charset_string);
2378 defsubr (&Sdecode_char);
2379 defsubr (&Sencode_char);
2380 defsubr (&Ssplit_char);
2381 defsubr (&Smake_char);
2382 defsubr (&Schar_charset);
2383 defsubr (&Scharset_after);
2384 defsubr (&Siso_charset);
2385 defsubr (&Sclear_charset_maps);
2386 defsubr (&Scharset_priority_list);
2387 defsubr (&Sset_charset_priority);
2388 defsubr (&Scharset_id_internal);
2389 defsubr (&Ssort_charsets);
2391 DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
2392 doc: /* List of directories to search for charset map files. */);
2393 Vcharset_map_path = Qnil;
2395 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
2396 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2397 inhibit_load_charset_map = 0;
2399 DEFVAR_LISP ("charset-list", Vcharset_list,
2400 doc: /* List of all charsets ever defined. */);
2401 Vcharset_list = Qnil;
2403 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
2404 doc: /* ISO639 language mnemonic symbol for the current language environment.
2405 If the current language environment is for multiple languages (e.g. "Latin-1"),
2406 the value may be a list of mnemonics. */);
2407 Vcurrent_iso639_language = Qnil;
2409 charset_ascii
2410 = define_charset_internal (Qascii, 1, "\x00\x7F\0\0\0\0\0",
2411 0, 127, 'B', -1, 0, 1, 0, 0);
2412 charset_iso_8859_1
2413 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\0\0\0\0\0",
2414 0, 255, -1, -1, -1, 1, 0, 0);
2415 charset_unicode
2416 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2417 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2418 charset_emacs
2419 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
2420 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2421 charset_eight_bit
2422 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\0\0\0\0\0",
2423 128, 255, -1, 0, -1, 0, 1,
2424 MAX_5_BYTE_CHAR + 1);
2425 charset_unibyte = charset_iso_8859_1;
2428 #endif /* emacs */