Add separator.pbm tool-bar image
[emacs.git] / src / charset.c
blobeeebf172bdf6b0264eeb385e28255ec1905742de
1 /* Basic character set support.
3 Copyright (C) 2001-2015 Free Software Foundation, Inc.
5 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 2005, 2006, 2007, 2008, 2009, 2010, 2011
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H14PRO021
10 Copyright (C) 2003, 2004
11 National Institute of Advanced Industrial Science and Technology (AIST)
12 Registration Number H13PRO009
14 This file is part of GNU Emacs.
16 GNU Emacs is free software: you can redistribute it and/or modify
17 it under the terms of the GNU General Public License as published by
18 the Free Software Foundation, either version 3 of the License, or
19 (at your option) any later version.
21 GNU Emacs is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 GNU General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
29 #include <config.h>
31 #include <errno.h>
32 #include <stdio.h>
33 #include <unistd.h>
34 #include <limits.h>
35 #include <sys/types.h>
36 #include <c-ctype.h>
37 #include "lisp.h"
38 #include "character.h"
39 #include "charset.h"
40 #include "coding.h"
41 #include "disptab.h"
42 #include "buffer.h"
44 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
46 A coded character set ("charset" hereafter) is a meaningful
47 collection (i.e. language, culture, functionality, etc.) of
48 characters. Emacs handles multiple charsets at once. In Emacs Lisp
49 code, a charset is represented by a symbol. In C code, a charset is
50 represented by its ID number or by a pointer to a struct charset.
52 The actual information about each charset is stored in two places.
53 Lispy information is stored in the hash table Vcharset_hash_table as
54 a vector (charset attributes). The other information is stored in
55 charset_table as a struct charset.
59 /* Hash table that contains attributes of each charset. Keys are
60 charset symbols, and values are vectors of charset attributes. */
61 Lisp_Object Vcharset_hash_table;
63 /* Table of struct charset. */
64 struct charset *charset_table;
66 static ptrdiff_t charset_table_size;
67 static int charset_table_used;
69 /* Special charsets corresponding to symbols. */
70 int charset_ascii;
71 int charset_eight_bit;
72 static int charset_iso_8859_1;
73 int charset_unicode;
74 static int charset_emacs;
76 /* The other special charsets. */
77 int charset_jisx0201_roman;
78 int charset_jisx0208_1978;
79 int charset_jisx0208;
80 int charset_ksc5601;
82 /* Charset of unibyte characters. */
83 int charset_unibyte;
85 /* List of charsets ordered by the priority. */
86 Lisp_Object Vcharset_ordered_list;
88 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
89 charsets. */
90 Lisp_Object Vcharset_non_preferred_head;
92 /* Incremented every time we change the priority of charsets.
93 Wraps around. */
94 EMACS_UINT charset_ordered_list_tick;
96 /* List of iso-2022 charsets. */
97 Lisp_Object Viso_2022_charset_list;
99 /* List of emacs-mule charsets. */
100 Lisp_Object Vemacs_mule_charset_list;
102 int emacs_mule_charset[256];
104 /* Mapping table from ISO2022's charset (specified by DIMENSION,
105 CHARS, and FINAL-CHAR) to Emacs' charset. */
106 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
108 #define CODE_POINT_TO_INDEX(charset, code) \
109 ((charset)->code_linear_p \
110 ? (int) ((code) - (charset)->min_code) \
111 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
112 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
113 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
114 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
115 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
116 * (charset)->code_space[11]) \
117 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
118 * (charset)->code_space[7]) \
119 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
120 * (charset)->code_space[3]) \
121 + (((code) & 0xFF) - (charset)->code_space[0]) \
122 - ((charset)->char_index_offset)) \
123 : -1)
126 /* Return the code-point for the character index IDX in CHARSET.
127 IDX should be an unsigned int variable in a valid range (which is
128 always in nonnegative int range too). IDX contains garbage afterwards. */
130 #define INDEX_TO_CODE_POINT(charset, idx) \
131 ((charset)->code_linear_p \
132 ? (idx) + (charset)->min_code \
133 : (idx += (charset)->char_index_offset, \
134 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
135 | (((charset)->code_space[4] \
136 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
137 << 8) \
138 | (((charset)->code_space[8] \
139 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
140 << 16) \
141 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
142 << 24))))
144 /* Structure to hold mapping tables for a charset. Used by temacs
145 invoked for dumping. */
147 static struct
149 /* The current charset for which the following tables are setup. */
150 struct charset *current;
152 /* 1 iff the following table is used for encoder. */
153 short for_encoder;
155 /* When the following table is used for encoding, minimum and
156 maximum character of the current charset. */
157 int min_char, max_char;
159 /* A Unicode character corresponding to the code index 0 (i.e. the
160 minimum code-point) of the current charset, or -1 if the code
161 index 0 is not a Unicode character. This is checked when
162 table.encoder[CHAR] is zero. */
163 int zero_index_char;
165 union {
166 /* Table mapping code-indices (not code-points) of the current
167 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
168 doesn't belong to the current charset. */
169 int decoder[0x10000];
170 /* Table mapping Unicode characters to code-indices of the current
171 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
172 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
173 (0x20000..0x2FFFF). Note that there is no charset map that
174 uses both SMP and SIP. */
175 unsigned short encoder[0x20000];
176 } table;
177 } *temp_charset_work;
179 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
180 do { \
181 if ((CODE) == 0) \
182 temp_charset_work->zero_index_char = (C); \
183 else if ((C) < 0x20000) \
184 temp_charset_work->table.encoder[(C)] = (CODE); \
185 else \
186 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
187 } while (0)
189 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
190 ((C) == temp_charset_work->zero_index_char ? 0 \
191 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
192 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
193 : temp_charset_work->table.encoder[(C) - 0x10000] \
194 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
196 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
197 (temp_charset_work->table.decoder[(CODE)] = (C))
199 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
200 (temp_charset_work->table.decoder[(CODE)])
203 /* Set to 1 to warn that a charset map is loaded and thus a buffer
204 text and a string data may be relocated. */
205 bool charset_map_loaded;
207 struct charset_map_entries
209 struct {
210 unsigned from, to;
211 int c;
212 } entry[0x10000];
213 struct charset_map_entries *next;
216 /* Load the mapping information of CHARSET from ENTRIES for
217 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
218 encoding (CONTROL_FLAG == 2).
220 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
221 and CHARSET->fast_map.
223 If CONTROL_FLAG is 1, setup the following tables according to
224 CHARSET->method and inhibit_load_charset_map.
226 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
227 ----------------------+--------------------+---------------------------
228 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
229 ----------------------+--------------------+---------------------------
230 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
232 If CONTROL_FLAG is 2, setup the following tables.
234 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
235 ----------------------+--------------------+---------------------------
236 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
237 ----------------------+--------------------+--------------------------
238 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
241 static void
242 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
244 Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil);
245 unsigned max_code = CHARSET_MAX_CODE (charset);
246 bool ascii_compatible_p = charset->ascii_compatible_p;
247 int min_char, max_char, nonascii_min_char;
248 int i;
249 unsigned char *fast_map = charset->fast_map;
251 if (n_entries <= 0)
252 return;
254 if (control_flag)
256 if (! inhibit_load_charset_map)
258 if (control_flag == 1)
260 if (charset->method == CHARSET_METHOD_MAP)
262 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
264 vec = Fmake_vector (make_number (n), make_number (-1));
265 set_charset_attr (charset, charset_decoder, vec);
267 else
269 char_table_set_range (Vchar_unify_table,
270 charset->min_char, charset->max_char,
271 Qnil);
274 else
276 table = Fmake_char_table (Qnil, Qnil);
277 set_charset_attr (charset,
278 (charset->method == CHARSET_METHOD_MAP
279 ? charset_encoder : charset_deunifier),
280 table);
283 else
285 if (! temp_charset_work)
286 temp_charset_work = xmalloc (sizeof *temp_charset_work);
287 if (control_flag == 1)
289 memset (temp_charset_work->table.decoder, -1,
290 sizeof (int) * 0x10000);
292 else
294 memset (temp_charset_work->table.encoder, 0,
295 sizeof (unsigned short) * 0x20000);
296 temp_charset_work->zero_index_char = -1;
298 temp_charset_work->current = charset;
299 temp_charset_work->for_encoder = (control_flag == 2);
300 control_flag += 2;
302 charset_map_loaded = 1;
305 min_char = max_char = entries->entry[0].c;
306 nonascii_min_char = MAX_CHAR;
307 for (i = 0; i < n_entries; i++)
309 unsigned from, to;
310 int from_index, to_index, lim_index;
311 int from_c, to_c;
312 int idx = i % 0x10000;
314 if (i > 0 && idx == 0)
315 entries = entries->next;
316 from = entries->entry[idx].from;
317 to = entries->entry[idx].to;
318 from_c = entries->entry[idx].c;
319 from_index = CODE_POINT_TO_INDEX (charset, from);
320 if (from == to)
322 to_index = from_index;
323 to_c = from_c;
325 else
327 to_index = CODE_POINT_TO_INDEX (charset, to);
328 to_c = from_c + (to_index - from_index);
330 if (from_index < 0 || to_index < 0)
331 continue;
332 lim_index = to_index + 1;
334 if (to_c > max_char)
335 max_char = to_c;
336 else if (from_c < min_char)
337 min_char = from_c;
339 if (control_flag == 1)
341 if (charset->method == CHARSET_METHOD_MAP)
342 for (; from_index < lim_index; from_index++, from_c++)
343 ASET (vec, from_index, make_number (from_c));
344 else
345 for (; from_index < lim_index; from_index++, from_c++)
346 CHAR_TABLE_SET (Vchar_unify_table,
347 CHARSET_CODE_OFFSET (charset) + from_index,
348 make_number (from_c));
350 else if (control_flag == 2)
352 if (charset->method == CHARSET_METHOD_MAP
353 && CHARSET_COMPACT_CODES_P (charset))
354 for (; from_index < lim_index; from_index++, from_c++)
356 unsigned code = from_index;
357 code = INDEX_TO_CODE_POINT (charset, code);
359 if (NILP (CHAR_TABLE_REF (table, from_c)))
360 CHAR_TABLE_SET (table, from_c, make_number (code));
362 else
363 for (; from_index < lim_index; from_index++, from_c++)
365 if (NILP (CHAR_TABLE_REF (table, from_c)))
366 CHAR_TABLE_SET (table, from_c, make_number (from_index));
369 else if (control_flag == 3)
370 for (; from_index < lim_index; from_index++, from_c++)
371 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
372 else if (control_flag == 4)
373 for (; from_index < lim_index; from_index++, from_c++)
374 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
375 else /* control_flag == 0 */
377 if (ascii_compatible_p)
379 if (! ASCII_CHAR_P (from_c))
381 if (from_c < nonascii_min_char)
382 nonascii_min_char = from_c;
384 else if (! ASCII_CHAR_P (to_c))
386 nonascii_min_char = 0x80;
390 for (; from_c <= to_c; from_c++)
391 CHARSET_FAST_MAP_SET (from_c, fast_map);
395 if (control_flag == 0)
397 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
398 ? nonascii_min_char : min_char);
399 CHARSET_MAX_CHAR (charset) = max_char;
401 else if (control_flag == 4)
403 temp_charset_work->min_char = min_char;
404 temp_charset_work->max_char = max_char;
409 /* Read a hexadecimal number (preceded by "0x") from the file FP while
410 paying attention to comment character '#'. */
412 static unsigned
413 read_hex (FILE *fp, bool *eof, bool *overflow)
415 int c;
416 unsigned n;
418 while ((c = getc (fp)) != EOF)
420 if (c == '#')
422 while ((c = getc (fp)) != EOF && c != '\n');
424 else if (c == '0')
426 if ((c = getc (fp)) == EOF || c == 'x')
427 break;
430 if (c == EOF)
432 *eof = 1;
433 return 0;
435 n = 0;
436 while (c_isxdigit (c = getc (fp)))
438 if (UINT_MAX >> 4 < n)
439 *overflow = 1;
440 n = ((n << 4)
441 | (c - ('0' <= c && c <= '9' ? '0'
442 : 'A' <= c && c <= 'F' ? 'A' - 10
443 : 'a' - 10)));
445 if (c != EOF)
446 ungetc (c, fp);
447 return n;
450 /* Return a mapping vector for CHARSET loaded from MAPFILE.
451 Each line of MAPFILE has this form
452 0xAAAA 0xCCCC
453 where 0xAAAA is a code-point and 0xCCCC is the corresponding
454 character code, or this form
455 0xAAAA-0xBBBB 0xCCCC
456 where 0xAAAA and 0xBBBB are code-points specifying a range, and
457 0xCCCC is the first character code of the range.
459 The returned vector has this form:
460 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
461 where CODE1 is a code-point or a cons of code-points specifying a
462 range.
464 Note that this function uses `openp' to open MAPFILE but ignores
465 `file-name-handler-alist' to avoid running any Lisp code. */
467 static void
468 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile,
469 int control_flag)
471 unsigned min_code = CHARSET_MIN_CODE (charset);
472 unsigned max_code = CHARSET_MAX_CODE (charset);
473 int fd;
474 FILE *fp;
475 struct charset_map_entries *head, *entries;
476 int n_entries;
477 AUTO_STRING (map, ".map");
478 AUTO_STRING (txt, ".txt");
479 AUTO_LIST2 (suffixes, map, txt);
480 ptrdiff_t count = SPECPDL_INDEX ();
481 record_unwind_protect_nothing ();
482 specbind (Qfile_name_handler_alist, Qnil);
483 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil, false);
484 fp = fd < 0 ? 0 : fdopen (fd, "r");
485 if (!fp)
487 int open_errno = errno;
488 emacs_close (fd);
489 report_file_errno ("Loading charset map", mapfile, open_errno);
491 set_unwind_protect_ptr (count, fclose_unwind, fp);
492 unbind_to (count + 1, Qnil);
494 /* Use record_xmalloc, as `charset_map_entries' is
495 large (larger than MAX_ALLOCA). */
496 head = record_xmalloc (sizeof *head);
497 entries = head;
498 memset (entries, 0, sizeof (struct charset_map_entries));
500 n_entries = 0;
501 while (1)
503 unsigned from, to, c;
504 int idx;
505 bool eof = 0, overflow = 0;
507 from = read_hex (fp, &eof, &overflow);
508 if (eof)
509 break;
510 if (getc (fp) == '-')
511 to = read_hex (fp, &eof, &overflow);
512 else
513 to = from;
514 if (eof)
515 break;
516 c = read_hex (fp, &eof, &overflow);
517 if (eof)
518 break;
520 if (overflow)
521 continue;
522 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
523 continue;
525 if (n_entries == 0x10000)
527 entries->next = record_xmalloc (sizeof *entries->next);
528 entries = entries->next;
529 memset (entries, 0, sizeof (struct charset_map_entries));
530 n_entries = 0;
532 idx = n_entries;
533 entries->entry[idx].from = from;
534 entries->entry[idx].to = to;
535 entries->entry[idx].c = c;
536 n_entries++;
538 fclose (fp);
539 clear_unwind_protect (count);
541 load_charset_map (charset, head, n_entries, control_flag);
542 unbind_to (count, Qnil);
545 static void
546 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
548 unsigned min_code = CHARSET_MIN_CODE (charset);
549 unsigned max_code = CHARSET_MAX_CODE (charset);
550 struct charset_map_entries *head, *entries;
551 int n_entries;
552 int len = ASIZE (vec);
553 int i;
554 USE_SAFE_ALLOCA;
556 if (len % 2 == 1)
558 add_to_log ("Failure in loading charset map: %V", vec);
559 return;
562 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
563 large (larger than MAX_ALLOCA). */
564 head = SAFE_ALLOCA (sizeof *head);
565 entries = head;
566 memset (entries, 0, sizeof (struct charset_map_entries));
568 n_entries = 0;
569 for (i = 0; i < len; i += 2)
571 Lisp_Object val, val2;
572 unsigned from, to;
573 EMACS_INT c;
574 int idx;
576 val = AREF (vec, i);
577 if (CONSP (val))
579 val2 = XCDR (val);
580 val = XCAR (val);
581 from = XFASTINT (val);
582 to = XFASTINT (val2);
584 else
585 from = to = XFASTINT (val);
586 val = AREF (vec, i + 1);
587 CHECK_NATNUM (val);
588 c = XFASTINT (val);
590 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
591 continue;
593 if (n_entries > 0 && (n_entries % 0x10000) == 0)
595 entries->next = SAFE_ALLOCA (sizeof *entries->next);
596 entries = entries->next;
597 memset (entries, 0, sizeof (struct charset_map_entries));
599 idx = n_entries % 0x10000;
600 entries->entry[idx].from = from;
601 entries->entry[idx].to = to;
602 entries->entry[idx].c = c;
603 n_entries++;
606 load_charset_map (charset, head, n_entries, control_flag);
607 SAFE_FREE ();
611 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
612 map it is (see the comment of load_charset_map for the detail). */
614 static void
615 load_charset (struct charset *charset, int control_flag)
617 Lisp_Object map;
619 if (inhibit_load_charset_map
620 && temp_charset_work
621 && charset == temp_charset_work->current
622 && ((control_flag == 2) == temp_charset_work->for_encoder))
623 return;
625 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
626 map = CHARSET_MAP (charset);
627 else
629 if (! CHARSET_UNIFIED_P (charset))
630 emacs_abort ();
631 map = CHARSET_UNIFY_MAP (charset);
633 if (STRINGP (map))
634 load_charset_map_from_file (charset, map, control_flag);
635 else
636 load_charset_map_from_vector (charset, map, control_flag);
640 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
641 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
642 (Lisp_Object object)
644 return (CHARSETP (object) ? Qt : Qnil);
648 static void
649 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
650 Lisp_Object function, Lisp_Object arg,
651 unsigned int from, unsigned int to)
653 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
654 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
655 Lisp_Object range = Fcons (Qnil, Qnil);
656 int c, stop;
658 c = temp_charset_work->min_char;
659 stop = (temp_charset_work->max_char < 0x20000
660 ? temp_charset_work->max_char : 0xFFFF);
662 while (1)
664 int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
666 if (idx >= from_idx && idx <= to_idx)
668 if (NILP (XCAR (range)))
669 XSETCAR (range, make_number (c));
671 else if (! NILP (XCAR (range)))
673 XSETCDR (range, make_number (c - 1));
674 if (c_function)
675 (*c_function) (arg, range);
676 else
677 call2 (function, range, arg);
678 XSETCAR (range, Qnil);
680 if (c == stop)
682 if (c == temp_charset_work->max_char)
684 if (! NILP (XCAR (range)))
686 XSETCDR (range, make_number (c));
687 if (c_function)
688 (*c_function) (arg, range);
689 else
690 call2 (function, range, arg);
692 break;
694 c = 0x1FFFF;
695 stop = temp_charset_work->max_char;
697 c++;
701 void
702 map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
703 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
705 Lisp_Object range;
706 bool partial = (from > CHARSET_MIN_CODE (charset)
707 || to < CHARSET_MAX_CODE (charset));
709 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
711 int from_idx = CODE_POINT_TO_INDEX (charset, from);
712 int to_idx = CODE_POINT_TO_INDEX (charset, to);
713 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
714 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
716 if (CHARSET_UNIFIED_P (charset))
718 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
719 load_charset (charset, 2);
720 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
721 map_char_table_for_charset (c_function, function,
722 CHARSET_DEUNIFIER (charset), arg,
723 partial ? charset : NULL, from, to);
724 else
725 map_charset_for_dump (c_function, function, arg, from, to);
728 range = Fcons (make_number (from_c), make_number (to_c));
729 if (NILP (function))
730 (*c_function) (arg, range);
731 else
732 call2 (function, range, arg);
734 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
736 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
737 load_charset (charset, 2);
738 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
739 map_char_table_for_charset (c_function, function,
740 CHARSET_ENCODER (charset), arg,
741 partial ? charset : NULL, from, to);
742 else
743 map_charset_for_dump (c_function, function, arg, from, to);
745 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
747 Lisp_Object subset_info;
748 int offset;
750 subset_info = CHARSET_SUBSET (charset);
751 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
752 offset = XINT (AREF (subset_info, 3));
753 from -= offset;
754 if (from < XFASTINT (AREF (subset_info, 1)))
755 from = XFASTINT (AREF (subset_info, 1));
756 to -= offset;
757 if (to > XFASTINT (AREF (subset_info, 2)))
758 to = XFASTINT (AREF (subset_info, 2));
759 map_charset_chars (c_function, function, arg, charset, from, to);
761 else /* i.e. CHARSET_METHOD_SUPERSET */
763 Lisp_Object parents;
765 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
766 parents = XCDR (parents))
768 int offset;
769 unsigned this_from, this_to;
771 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
772 offset = XINT (XCDR (XCAR (parents)));
773 this_from = from > offset ? from - offset : 0;
774 this_to = to > offset ? to - offset : 0;
775 if (this_from < CHARSET_MIN_CODE (charset))
776 this_from = CHARSET_MIN_CODE (charset);
777 if (this_to > CHARSET_MAX_CODE (charset))
778 this_to = CHARSET_MAX_CODE (charset);
779 map_charset_chars (c_function, function, arg, charset,
780 this_from, this_to);
785 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
786 doc: /* Call FUNCTION for all characters in CHARSET.
787 FUNCTION is called with an argument RANGE and the optional 3rd
788 argument ARG.
790 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
791 characters contained in CHARSET.
793 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
794 range of code points (in CHARSET) of target characters. */)
795 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
797 struct charset *cs;
798 unsigned from, to;
800 CHECK_CHARSET_GET_CHARSET (charset, cs);
801 if (NILP (from_code))
802 from = CHARSET_MIN_CODE (cs);
803 else
805 from = XINT (from_code);
806 if (from < CHARSET_MIN_CODE (cs))
807 from = CHARSET_MIN_CODE (cs);
809 if (NILP (to_code))
810 to = CHARSET_MAX_CODE (cs);
811 else
813 to = XINT (to_code);
814 if (to > CHARSET_MAX_CODE (cs))
815 to = CHARSET_MAX_CODE (cs);
817 map_charset_chars (NULL, function, arg, cs, from, to);
818 return Qnil;
822 /* Define a charset according to the arguments. The Nth argument is
823 the Nth attribute of the charset (the last attribute `charset-id'
824 is not included). See the docstring of `define-charset' for the
825 detail. */
827 DEFUN ("define-charset-internal", Fdefine_charset_internal,
828 Sdefine_charset_internal, charset_arg_max, MANY, 0,
829 doc: /* For internal use only.
830 usage: (define-charset-internal ...) */)
831 (ptrdiff_t nargs, Lisp_Object *args)
833 /* Charset attr vector. */
834 Lisp_Object attrs;
835 Lisp_Object val;
836 EMACS_UINT hash_code;
837 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
838 int i, j;
839 struct charset charset;
840 int id;
841 int dimension;
842 bool new_definition_p;
843 int nchars;
845 if (nargs != charset_arg_max)
846 return Fsignal (Qwrong_number_of_arguments,
847 Fcons (intern ("define-charset-internal"),
848 make_number (nargs)));
850 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
852 CHECK_SYMBOL (args[charset_arg_name]);
853 ASET (attrs, charset_name, args[charset_arg_name]);
855 val = args[charset_arg_code_space];
856 for (i = 0, dimension = 0, nchars = 1; ; i++)
858 Lisp_Object min_byte_obj, max_byte_obj;
859 int min_byte, max_byte;
861 min_byte_obj = Faref (val, make_number (i * 2));
862 max_byte_obj = Faref (val, make_number (i * 2 + 1));
863 CHECK_RANGED_INTEGER (min_byte_obj, 0, 255);
864 min_byte = XINT (min_byte_obj);
865 CHECK_RANGED_INTEGER (max_byte_obj, min_byte, 255);
866 max_byte = XINT (max_byte_obj);
867 charset.code_space[i * 4] = min_byte;
868 charset.code_space[i * 4 + 1] = max_byte;
869 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
870 if (max_byte > 0)
871 dimension = i + 1;
872 if (i == 3)
873 break;
874 nchars *= charset.code_space[i * 4 + 2];
875 charset.code_space[i * 4 + 3] = nchars;
878 val = args[charset_arg_dimension];
879 if (NILP (val))
880 charset.dimension = dimension;
881 else
883 CHECK_RANGED_INTEGER (val, 1, 4);
884 charset.dimension = XINT (val);
887 charset.code_linear_p
888 = (charset.dimension == 1
889 || (charset.code_space[2] == 256
890 && (charset.dimension == 2
891 || (charset.code_space[6] == 256
892 && (charset.dimension == 3
893 || charset.code_space[10] == 256)))));
895 if (! charset.code_linear_p)
897 charset.code_space_mask = xzalloc (256);
898 for (i = 0; i < 4; i++)
899 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
900 j++)
901 charset.code_space_mask[j] |= (1 << i);
904 charset.iso_chars_96 = charset.code_space[2] == 96;
906 charset.min_code = (charset.code_space[0]
907 | (charset.code_space[4] << 8)
908 | (charset.code_space[8] << 16)
909 | ((unsigned) charset.code_space[12] << 24));
910 charset.max_code = (charset.code_space[1]
911 | (charset.code_space[5] << 8)
912 | (charset.code_space[9] << 16)
913 | ((unsigned) charset.code_space[13] << 24));
914 charset.char_index_offset = 0;
916 val = args[charset_arg_min_code];
917 if (! NILP (val))
919 unsigned code = cons_to_unsigned (val, UINT_MAX);
921 if (code < charset.min_code
922 || code > charset.max_code)
923 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
924 make_fixnum_or_float (charset.max_code), val);
925 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
926 charset.min_code = code;
929 val = args[charset_arg_max_code];
930 if (! NILP (val))
932 unsigned code = cons_to_unsigned (val, UINT_MAX);
934 if (code < charset.min_code
935 || code > charset.max_code)
936 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
937 make_fixnum_or_float (charset.max_code), val);
938 charset.max_code = code;
941 charset.compact_codes_p = charset.max_code < 0x10000;
943 val = args[charset_arg_invalid_code];
944 if (NILP (val))
946 if (charset.min_code > 0)
947 charset.invalid_code = 0;
948 else
950 if (charset.max_code < UINT_MAX)
951 charset.invalid_code = charset.max_code + 1;
952 else
953 error ("Attribute :invalid-code must be specified");
956 else
957 charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
959 val = args[charset_arg_iso_final];
960 if (NILP (val))
961 charset.iso_final = -1;
962 else
964 CHECK_NUMBER (val);
965 if (XINT (val) < '0' || XINT (val) > 127)
966 error ("Invalid iso-final-char: %"pI"d", XINT (val));
967 charset.iso_final = XINT (val);
970 val = args[charset_arg_iso_revision];
971 if (NILP (val))
972 charset.iso_revision = -1;
973 else
975 CHECK_RANGED_INTEGER (val, -1, 63);
976 charset.iso_revision = XINT (val);
979 val = args[charset_arg_emacs_mule_id];
980 if (NILP (val))
981 charset.emacs_mule_id = -1;
982 else
984 CHECK_NATNUM (val);
985 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
986 error ("Invalid emacs-mule-id: %"pI"d", XINT (val));
987 charset.emacs_mule_id = XINT (val);
990 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
992 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
994 charset.unified_p = 0;
996 memset (charset.fast_map, 0, sizeof (charset.fast_map));
998 if (! NILP (args[charset_arg_code_offset]))
1000 val = args[charset_arg_code_offset];
1001 CHECK_CHARACTER (val);
1003 charset.method = CHARSET_METHOD_OFFSET;
1004 charset.code_offset = XINT (val);
1006 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1007 if (MAX_CHAR - charset.code_offset < i)
1008 error ("Unsupported max char: %d", charset.max_char);
1009 charset.max_char = i + charset.code_offset;
1010 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1011 charset.min_char = i + charset.code_offset;
1013 i = (charset.min_char >> 7) << 7;
1014 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1015 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1016 i = (i >> 12) << 12;
1017 for (; i <= charset.max_char; i += 0x1000)
1018 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1019 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1020 charset.ascii_compatible_p = 1;
1022 else if (! NILP (args[charset_arg_map]))
1024 val = args[charset_arg_map];
1025 ASET (attrs, charset_map, val);
1026 charset.method = CHARSET_METHOD_MAP;
1028 else if (! NILP (args[charset_arg_subset]))
1030 Lisp_Object parent;
1031 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1032 struct charset *parent_charset;
1034 val = args[charset_arg_subset];
1035 parent = Fcar (val);
1036 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1037 parent_min_code = Fnth (make_number (1), val);
1038 CHECK_NATNUM (parent_min_code);
1039 parent_max_code = Fnth (make_number (2), val);
1040 CHECK_NATNUM (parent_max_code);
1041 parent_code_offset = Fnth (make_number (3), val);
1042 CHECK_NUMBER (parent_code_offset);
1043 val = make_uninit_vector (4);
1044 ASET (val, 0, make_number (parent_charset->id));
1045 ASET (val, 1, parent_min_code);
1046 ASET (val, 2, parent_max_code);
1047 ASET (val, 3, parent_code_offset);
1048 ASET (attrs, charset_subset, val);
1050 charset.method = CHARSET_METHOD_SUBSET;
1051 /* Here, we just copy the parent's fast_map. It's not accurate,
1052 but at least it works for quickly detecting which character
1053 DOESN'T belong to this charset. */
1054 for (i = 0; i < 190; i++)
1055 charset.fast_map[i] = parent_charset->fast_map[i];
1057 /* We also copy these for parents. */
1058 charset.min_char = parent_charset->min_char;
1059 charset.max_char = parent_charset->max_char;
1061 else if (! NILP (args[charset_arg_superset]))
1063 val = args[charset_arg_superset];
1064 charset.method = CHARSET_METHOD_SUPERSET;
1065 val = Fcopy_sequence (val);
1066 ASET (attrs, charset_superset, val);
1068 charset.min_char = MAX_CHAR;
1069 charset.max_char = 0;
1070 for (; ! NILP (val); val = Fcdr (val))
1072 Lisp_Object elt, car_part, cdr_part;
1073 int this_id, offset;
1074 struct charset *this_charset;
1076 elt = Fcar (val);
1077 if (CONSP (elt))
1079 car_part = XCAR (elt);
1080 cdr_part = XCDR (elt);
1081 CHECK_CHARSET_GET_ID (car_part, this_id);
1082 CHECK_TYPE_RANGED_INTEGER (int, cdr_part);
1083 offset = XINT (cdr_part);
1085 else
1087 CHECK_CHARSET_GET_ID (elt, this_id);
1088 offset = 0;
1090 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1092 this_charset = CHARSET_FROM_ID (this_id);
1093 if (charset.min_char > this_charset->min_char)
1094 charset.min_char = this_charset->min_char;
1095 if (charset.max_char < this_charset->max_char)
1096 charset.max_char = this_charset->max_char;
1097 for (i = 0; i < 190; i++)
1098 charset.fast_map[i] |= this_charset->fast_map[i];
1101 else
1102 error ("None of :code-offset, :map, :parents are specified");
1104 val = args[charset_arg_unify_map];
1105 if (! NILP (val) && !STRINGP (val))
1106 CHECK_VECTOR (val);
1107 ASET (attrs, charset_unify_map, val);
1109 CHECK_LIST (args[charset_arg_plist]);
1110 ASET (attrs, charset_plist, args[charset_arg_plist]);
1112 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1113 &hash_code);
1114 if (charset.hash_index >= 0)
1116 new_definition_p = 0;
1117 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1118 set_hash_value_slot (hash_table, charset.hash_index, attrs);
1120 else
1122 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1123 hash_code);
1124 if (charset_table_used == charset_table_size)
1126 /* Ensure that charset IDs fit into 'int' as well as into the
1127 restriction imposed by fixnums. Although the 'int' restriction
1128 could be removed, too much other code would need altering; for
1129 example, the IDs are stuffed into struct
1130 coding_system.charbuf[i] entries, which are 'int'. */
1131 int old_size = charset_table_size;
1132 ptrdiff_t new_size = old_size;
1133 struct charset *new_table =
1134 xpalloc (0, &new_size, 1,
1135 min (INT_MAX, MOST_POSITIVE_FIXNUM),
1136 sizeof *charset_table);
1137 memcpy (new_table, charset_table, old_size * sizeof *new_table);
1138 charset_table = new_table;
1139 charset_table_size = new_size;
1140 /* FIXME: This leaks memory, as the old charset_table becomes
1141 unreachable. If the old charset table is charset_table_init
1142 then this leak is intentional; otherwise, it's unclear.
1143 If the latter memory leak is intentional, a
1144 comment should be added to explain this. If not, the old
1145 charset_table should be freed, by passing it as the 1st argument
1146 to xpalloc and removing the memcpy. */
1148 id = charset_table_used++;
1149 new_definition_p = 1;
1152 ASET (attrs, charset_id, make_number (id));
1153 charset.id = id;
1154 charset_table[id] = charset;
1156 if (charset.method == CHARSET_METHOD_MAP)
1158 load_charset (&charset, 0);
1159 charset_table[id] = charset;
1162 if (charset.iso_final >= 0)
1164 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1165 charset.iso_final) = id;
1166 if (new_definition_p)
1167 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1168 list1 (make_number (id)));
1169 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1170 charset_jisx0201_roman = id;
1171 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1172 charset_jisx0208_1978 = id;
1173 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1174 charset_jisx0208 = id;
1175 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1176 charset_ksc5601 = id;
1179 if (charset.emacs_mule_id >= 0)
1181 emacs_mule_charset[charset.emacs_mule_id] = id;
1182 if (charset.emacs_mule_id < 0xA0)
1183 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1184 else
1185 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1186 if (new_definition_p)
1187 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1188 list1 (make_number (id)));
1191 if (new_definition_p)
1193 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1194 if (charset.supplementary_p)
1195 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1196 list1 (make_number (id)));
1197 else
1199 Lisp_Object tail;
1201 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1203 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1205 if (cs->supplementary_p)
1206 break;
1208 if (EQ (tail, Vcharset_ordered_list))
1209 Vcharset_ordered_list = Fcons (make_number (id),
1210 Vcharset_ordered_list);
1211 else if (NILP (tail))
1212 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1213 list1 (make_number (id)));
1214 else
1216 val = Fcons (XCAR (tail), XCDR (tail));
1217 XSETCDR (tail, val);
1218 XSETCAR (tail, make_number (id));
1221 charset_ordered_list_tick++;
1224 return Qnil;
1228 /* Same as Fdefine_charset_internal but arguments are more convenient
1229 to call from C (typically in syms_of_charset). This can define a
1230 charset of `offset' method only. Return the ID of the new
1231 charset. */
1233 static int
1234 define_charset_internal (Lisp_Object name,
1235 int dimension,
1236 const char *code_space_chars,
1237 unsigned min_code, unsigned max_code,
1238 int iso_final, int iso_revision, int emacs_mule_id,
1239 bool ascii_compatible, bool supplementary,
1240 int code_offset)
1242 const unsigned char *code_space = (const unsigned char *) code_space_chars;
1243 Lisp_Object args[charset_arg_max];
1244 Lisp_Object val;
1245 int i;
1247 args[charset_arg_name] = name;
1248 args[charset_arg_dimension] = make_number (dimension);
1249 val = make_uninit_vector (8);
1250 for (i = 0; i < 8; i++)
1251 ASET (val, i, make_number (code_space[i]));
1252 args[charset_arg_code_space] = val;
1253 args[charset_arg_min_code] = make_number (min_code);
1254 args[charset_arg_max_code] = make_number (max_code);
1255 args[charset_arg_iso_final]
1256 = (iso_final < 0 ? Qnil : make_number (iso_final));
1257 args[charset_arg_iso_revision] = make_number (iso_revision);
1258 args[charset_arg_emacs_mule_id]
1259 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1260 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1261 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1262 args[charset_arg_invalid_code] = Qnil;
1263 args[charset_arg_code_offset] = make_number (code_offset);
1264 args[charset_arg_map] = Qnil;
1265 args[charset_arg_subset] = Qnil;
1266 args[charset_arg_superset] = Qnil;
1267 args[charset_arg_unify_map] = Qnil;
1269 args[charset_arg_plist] =
1270 listn (CONSTYPE_HEAP, 14,
1271 QCname,
1272 args[charset_arg_name],
1273 intern_c_string (":dimension"),
1274 args[charset_arg_dimension],
1275 intern_c_string (":code-space"),
1276 args[charset_arg_code_space],
1277 intern_c_string (":iso-final-char"),
1278 args[charset_arg_iso_final],
1279 intern_c_string (":emacs-mule-id"),
1280 args[charset_arg_emacs_mule_id],
1281 QCascii_compatible_p,
1282 args[charset_arg_ascii_compatible_p],
1283 intern_c_string (":code-offset"),
1284 args[charset_arg_code_offset]);
1285 Fdefine_charset_internal (charset_arg_max, args);
1287 return XINT (CHARSET_SYMBOL_ID (name));
1291 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1292 Sdefine_charset_alias, 2, 2, 0,
1293 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1294 (Lisp_Object alias, Lisp_Object charset)
1296 Lisp_Object attr;
1298 CHECK_CHARSET_GET_ATTR (charset, attr);
1299 Fputhash (alias, attr, Vcharset_hash_table);
1300 Vcharset_list = Fcons (alias, Vcharset_list);
1301 return Qnil;
1305 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1306 doc: /* Return the property list of CHARSET. */)
1307 (Lisp_Object charset)
1309 Lisp_Object attrs;
1311 CHECK_CHARSET_GET_ATTR (charset, attrs);
1312 return CHARSET_ATTR_PLIST (attrs);
1316 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1317 doc: /* Set CHARSET's property list to PLIST. */)
1318 (Lisp_Object charset, Lisp_Object plist)
1320 Lisp_Object attrs;
1322 CHECK_CHARSET_GET_ATTR (charset, attrs);
1323 ASET (attrs, charset_plist, plist);
1324 return plist;
1328 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1329 doc: /* Unify characters of CHARSET with Unicode.
1330 This means reading the relevant file and installing the table defined
1331 by CHARSET's `:unify-map' property.
1333 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1334 the same meaning as the `:unify-map' attribute in the function
1335 `define-charset' (which see).
1337 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1338 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
1340 int id;
1341 struct charset *cs;
1343 CHECK_CHARSET_GET_ID (charset, id);
1344 cs = CHARSET_FROM_ID (id);
1345 if (NILP (deunify)
1346 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1347 : ! CHARSET_UNIFIED_P (cs))
1348 return Qnil;
1350 CHARSET_UNIFIED_P (cs) = 0;
1351 if (NILP (deunify))
1353 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1354 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1355 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1356 if (NILP (unify_map))
1357 unify_map = CHARSET_UNIFY_MAP (cs);
1358 else
1360 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1361 signal_error ("Bad unify-map", unify_map);
1362 set_charset_attr (cs, charset_unify_map, unify_map);
1364 if (NILP (Vchar_unify_table))
1365 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1366 char_table_set_range (Vchar_unify_table,
1367 cs->min_char, cs->max_char, charset);
1368 CHARSET_UNIFIED_P (cs) = 1;
1370 else if (CHAR_TABLE_P (Vchar_unify_table))
1372 unsigned min_code = CHARSET_MIN_CODE (cs);
1373 unsigned max_code = CHARSET_MAX_CODE (cs);
1374 int min_char = DECODE_CHAR (cs, min_code);
1375 int max_char = DECODE_CHAR (cs, max_code);
1377 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1380 return Qnil;
1383 /* Check that DIMENSION, CHARS, and FINAL_CHAR specify a valid ISO charset.
1384 Return true if it's a 96-character set, false if 94. */
1386 static bool
1387 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars,
1388 Lisp_Object final_char)
1390 CHECK_NUMBER (dimension);
1391 CHECK_NUMBER (chars);
1392 CHECK_CHARACTER (final_char);
1394 if (! (1 <= XINT (dimension) && XINT (dimension) <= 3))
1395 error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
1396 XINT (dimension));
1398 bool chars_flag = XINT (chars) == 96;
1399 if (! (chars_flag || XINT (chars) == 94))
1400 error ("Invalid CHARS %"pI"d, it should be 94 or 96", XINT (chars));
1402 int final_ch = XFASTINT (final_char);
1403 if (! ('0' <= final_ch && final_ch <= '~'))
1404 error ("Invalid FINAL-CHAR '%c', it should be '0'..'~'", final_ch);
1406 return chars_flag;
1409 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1410 Sget_unused_iso_final_char, 2, 2, 0,
1411 doc: /*
1412 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1413 DIMENSION is the number of bytes to represent a character: 1 or 2.
1414 CHARS is the number of characters in a dimension: 94 or 96.
1416 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1417 If there's no unused final char for the specified kind of charset,
1418 return nil. */)
1419 (Lisp_Object dimension, Lisp_Object chars)
1421 bool chars_flag = check_iso_charset_parameter (dimension, chars,
1422 make_number ('0'));
1423 for (int final_char = '0'; final_char <= '?'; final_char++)
1424 if (ISO_CHARSET_TABLE (XINT (dimension), chars_flag, final_char) < 0)
1425 return make_number (final_char);
1426 return Qnil;
1430 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1431 4, 4, 0,
1432 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1434 On decoding by an ISO-2022 base coding system, when a charset
1435 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1436 if CHARSET is designated instead. */)
1437 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
1439 int id;
1441 CHECK_CHARSET_GET_ID (charset, id);
1442 bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
1443 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XFASTINT (final_char)) = id;
1444 return Qnil;
1448 /* Return information about charsets in the text at PTR of NBYTES
1449 bytes, which are NCHARS characters. The value is:
1451 0: Each character is represented by one byte. This is always
1452 true for a unibyte string. For a multibyte string, true if
1453 it contains only ASCII characters.
1455 1: No charsets other than ascii, control-1, and latin-1 are
1456 found.
1458 2: Otherwise.
1462 string_xstring_p (Lisp_Object string)
1464 const unsigned char *p = SDATA (string);
1465 const unsigned char *endp = p + SBYTES (string);
1467 if (SCHARS (string) == SBYTES (string))
1468 return 0;
1470 while (p < endp)
1472 int c = STRING_CHAR_ADVANCE (p);
1474 if (c >= 0x100)
1475 return 2;
1477 return 1;
1481 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1483 CHARSETS is a vector. If Nth element is non-nil, it means the
1484 charset whose id is N is already found.
1486 It may lookup a translation table TABLE if supplied. */
1488 static void
1489 find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars,
1490 ptrdiff_t nbytes, Lisp_Object charsets,
1491 Lisp_Object table, bool multibyte)
1493 const unsigned char *pend = ptr + nbytes;
1495 if (nchars == nbytes)
1497 if (multibyte)
1498 ASET (charsets, charset_ascii, Qt);
1499 else
1500 while (ptr < pend)
1502 int c = *ptr++;
1504 if (!NILP (table))
1505 c = translate_char (table, c);
1506 if (ASCII_CHAR_P (c))
1507 ASET (charsets, charset_ascii, Qt);
1508 else
1509 ASET (charsets, charset_eight_bit, Qt);
1512 else
1514 while (ptr < pend)
1516 int c = STRING_CHAR_ADVANCE (ptr);
1517 struct charset *charset;
1519 if (!NILP (table))
1520 c = translate_char (table, c);
1521 charset = CHAR_CHARSET (c);
1522 ASET (charsets, CHARSET_ID (charset), Qt);
1527 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1528 2, 3, 0,
1529 doc: /* Return a list of charsets in the region between BEG and END.
1530 BEG and END are buffer positions.
1531 Optional arg TABLE if non-nil is a translation table to look up.
1533 If the current buffer is unibyte, the returned list may contain
1534 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1535 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
1537 Lisp_Object charsets;
1538 ptrdiff_t from, from_byte, to, stop, stop_byte;
1539 int i;
1540 Lisp_Object val;
1541 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1543 validate_region (&beg, &end);
1544 from = XFASTINT (beg);
1545 stop = to = XFASTINT (end);
1547 if (from < GPT && GPT < to)
1549 stop = GPT;
1550 stop_byte = GPT_BYTE;
1552 else
1553 stop_byte = CHAR_TO_BYTE (stop);
1555 from_byte = CHAR_TO_BYTE (from);
1557 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1558 while (1)
1560 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1561 stop_byte - from_byte, charsets, table,
1562 multibyte);
1563 if (stop < to)
1565 from = stop, from_byte = stop_byte;
1566 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1568 else
1569 break;
1572 val = Qnil;
1573 for (i = charset_table_used - 1; i >= 0; i--)
1574 if (!NILP (AREF (charsets, i)))
1575 val = Fcons (CHARSET_NAME (charset_table + i), val);
1576 return val;
1579 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1580 1, 2, 0,
1581 doc: /* Return a list of charsets in STR.
1582 Optional arg TABLE if non-nil is a translation table to look up.
1584 If STR is unibyte, the returned list may contain
1585 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1586 (Lisp_Object str, Lisp_Object table)
1588 Lisp_Object charsets;
1589 int i;
1590 Lisp_Object val;
1592 CHECK_STRING (str);
1594 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1595 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1596 charsets, table,
1597 STRING_MULTIBYTE (str));
1598 val = Qnil;
1599 for (i = charset_table_used - 1; i >= 0; i--)
1600 if (!NILP (AREF (charsets, i)))
1601 val = Fcons (CHARSET_NAME (charset_table + i), val);
1602 return val;
1607 /* Return a unified character code for C (>= 0x110000). VAL is a
1608 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1609 charset symbol. */
1610 static int
1611 maybe_unify_char (int c, Lisp_Object val)
1613 struct charset *charset;
1615 if (INTEGERP (val))
1616 return XFASTINT (val);
1617 if (NILP (val))
1618 return c;
1620 CHECK_CHARSET_GET_CHARSET (val, charset);
1621 #ifdef REL_ALLOC
1622 /* The call to load_charset below can allocate memory, which screws
1623 callers of this function through STRING_CHAR_* macros that hold C
1624 pointers to buffer text, if REL_ALLOC is used. */
1625 r_alloc_inhibit_buffer_relocation (1);
1626 #endif
1627 load_charset (charset, 1);
1628 if (! inhibit_load_charset_map)
1630 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1631 if (! NILP (val))
1632 c = XFASTINT (val);
1634 else
1636 int code_index = c - CHARSET_CODE_OFFSET (charset);
1637 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1639 if (unified > 0)
1640 c = unified;
1642 #ifdef REL_ALLOC
1643 r_alloc_inhibit_buffer_relocation (0);
1644 #endif
1645 return c;
1649 /* Return a character corresponding to the code-point CODE of
1650 CHARSET. */
1653 decode_char (struct charset *charset, unsigned int code)
1655 int c, char_index;
1656 enum charset_method method = CHARSET_METHOD (charset);
1658 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1659 return -1;
1661 if (method == CHARSET_METHOD_SUBSET)
1663 Lisp_Object subset_info;
1665 subset_info = CHARSET_SUBSET (charset);
1666 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1667 code -= XINT (AREF (subset_info, 3));
1668 if (code < XFASTINT (AREF (subset_info, 1))
1669 || code > XFASTINT (AREF (subset_info, 2)))
1670 c = -1;
1671 else
1672 c = DECODE_CHAR (charset, code);
1674 else if (method == CHARSET_METHOD_SUPERSET)
1676 Lisp_Object parents;
1678 parents = CHARSET_SUPERSET (charset);
1679 c = -1;
1680 for (; CONSP (parents); parents = XCDR (parents))
1682 int id = XINT (XCAR (XCAR (parents)));
1683 int code_offset = XINT (XCDR (XCAR (parents)));
1684 unsigned this_code = code - code_offset;
1686 charset = CHARSET_FROM_ID (id);
1687 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1688 break;
1691 else
1693 char_index = CODE_POINT_TO_INDEX (charset, code);
1694 if (char_index < 0)
1695 return -1;
1697 if (method == CHARSET_METHOD_MAP)
1699 Lisp_Object decoder;
1701 decoder = CHARSET_DECODER (charset);
1702 if (! VECTORP (decoder))
1704 load_charset (charset, 1);
1705 decoder = CHARSET_DECODER (charset);
1707 if (VECTORP (decoder))
1708 c = XINT (AREF (decoder, char_index));
1709 else
1710 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1712 else /* method == CHARSET_METHOD_OFFSET */
1714 c = char_index + CHARSET_CODE_OFFSET (charset);
1715 if (CHARSET_UNIFIED_P (charset)
1716 && MAX_UNICODE_CHAR < c && c <= MAX_5_BYTE_CHAR)
1718 /* Unify C with a Unicode character if possible. */
1719 Lisp_Object val = CHAR_TABLE_REF (Vchar_unify_table, c);
1720 c = maybe_unify_char (c, val);
1725 return c;
1728 /* Variable used temporarily by the macro ENCODE_CHAR. */
1729 Lisp_Object charset_work;
1731 /* Return a code-point of C in CHARSET. If C doesn't belong to
1732 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1733 use CHARSET's strict_max_char instead of max_char. */
1735 unsigned
1736 encode_char (struct charset *charset, int c)
1738 unsigned code;
1739 enum charset_method method = CHARSET_METHOD (charset);
1741 if (CHARSET_UNIFIED_P (charset))
1743 Lisp_Object deunifier;
1744 int code_index = -1;
1746 deunifier = CHARSET_DEUNIFIER (charset);
1747 if (! CHAR_TABLE_P (deunifier))
1749 load_charset (charset, 2);
1750 deunifier = CHARSET_DEUNIFIER (charset);
1752 if (CHAR_TABLE_P (deunifier))
1754 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1756 if (INTEGERP (deunified))
1757 code_index = XINT (deunified);
1759 else
1761 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1763 if (code_index >= 0)
1764 c = CHARSET_CODE_OFFSET (charset) + code_index;
1767 if (method == CHARSET_METHOD_SUBSET)
1769 Lisp_Object subset_info;
1770 struct charset *this_charset;
1772 subset_info = CHARSET_SUBSET (charset);
1773 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1774 code = ENCODE_CHAR (this_charset, c);
1775 if (code == CHARSET_INVALID_CODE (this_charset)
1776 || code < XFASTINT (AREF (subset_info, 1))
1777 || code > XFASTINT (AREF (subset_info, 2)))
1778 return CHARSET_INVALID_CODE (charset);
1779 code += XINT (AREF (subset_info, 3));
1780 return code;
1783 if (method == CHARSET_METHOD_SUPERSET)
1785 Lisp_Object parents;
1787 parents = CHARSET_SUPERSET (charset);
1788 for (; CONSP (parents); parents = XCDR (parents))
1790 int id = XINT (XCAR (XCAR (parents)));
1791 int code_offset = XINT (XCDR (XCAR (parents)));
1792 struct charset *this_charset = CHARSET_FROM_ID (id);
1794 code = ENCODE_CHAR (this_charset, c);
1795 if (code != CHARSET_INVALID_CODE (this_charset))
1796 return code + code_offset;
1798 return CHARSET_INVALID_CODE (charset);
1801 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1802 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1803 return CHARSET_INVALID_CODE (charset);
1805 if (method == CHARSET_METHOD_MAP)
1807 Lisp_Object encoder;
1808 Lisp_Object val;
1810 encoder = CHARSET_ENCODER (charset);
1811 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1813 load_charset (charset, 2);
1814 encoder = CHARSET_ENCODER (charset);
1816 if (CHAR_TABLE_P (encoder))
1818 val = CHAR_TABLE_REF (encoder, c);
1819 if (NILP (val))
1820 return CHARSET_INVALID_CODE (charset);
1821 code = XINT (val);
1822 if (! CHARSET_COMPACT_CODES_P (charset))
1823 code = INDEX_TO_CODE_POINT (charset, code);
1825 else
1827 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1828 code = INDEX_TO_CODE_POINT (charset, code);
1831 else /* method == CHARSET_METHOD_OFFSET */
1833 unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
1835 code = INDEX_TO_CODE_POINT (charset, code_index);
1838 return code;
1842 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1843 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1844 Return nil if CODE-POINT is not valid in CHARSET.
1846 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). */)
1847 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
1849 int c, id;
1850 unsigned code;
1851 struct charset *charsetp;
1853 CHECK_CHARSET_GET_ID (charset, id);
1854 code = cons_to_unsigned (code_point, UINT_MAX);
1855 charsetp = CHARSET_FROM_ID (id);
1856 c = DECODE_CHAR (charsetp, code);
1857 return (c >= 0 ? make_number (c) : Qnil);
1861 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1862 doc: /* Encode the character CH into a code-point of CHARSET.
1863 Return nil if CHARSET doesn't include CH. */)
1864 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
1866 int c, id;
1867 unsigned code;
1868 struct charset *charsetp;
1870 CHECK_CHARSET_GET_ID (charset, id);
1871 CHECK_CHARACTER (ch);
1872 c = XFASTINT (ch);
1873 charsetp = CHARSET_FROM_ID (id);
1874 code = ENCODE_CHAR (charsetp, c);
1875 if (code == CHARSET_INVALID_CODE (charsetp))
1876 return Qnil;
1877 return INTEGER_TO_CONS (code);
1881 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1882 doc:
1883 /* Return a character of CHARSET whose position codes are CODEn.
1885 CODE1 through CODE4 are optional, but if you don't supply sufficient
1886 position codes, it is assumed that the minimum code in each dimension
1887 is specified. */)
1888 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
1890 int id, dimension;
1891 struct charset *charsetp;
1892 unsigned code;
1893 int c;
1895 CHECK_CHARSET_GET_ID (charset, id);
1896 charsetp = CHARSET_FROM_ID (id);
1898 dimension = CHARSET_DIMENSION (charsetp);
1899 if (NILP (code1))
1900 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1901 ? 0 : CHARSET_MIN_CODE (charsetp));
1902 else
1904 CHECK_NATNUM (code1);
1905 if (XFASTINT (code1) >= 0x100)
1906 args_out_of_range (make_number (0xFF), code1);
1907 code = XFASTINT (code1);
1909 if (dimension > 1)
1911 code <<= 8;
1912 if (NILP (code2))
1913 code |= charsetp->code_space[(dimension - 2) * 4];
1914 else
1916 CHECK_NATNUM (code2);
1917 if (XFASTINT (code2) >= 0x100)
1918 args_out_of_range (make_number (0xFF), code2);
1919 code |= XFASTINT (code2);
1922 if (dimension > 2)
1924 code <<= 8;
1925 if (NILP (code3))
1926 code |= charsetp->code_space[(dimension - 3) * 4];
1927 else
1929 CHECK_NATNUM (code3);
1930 if (XFASTINT (code3) >= 0x100)
1931 args_out_of_range (make_number (0xFF), code3);
1932 code |= XFASTINT (code3);
1935 if (dimension > 3)
1937 code <<= 8;
1938 if (NILP (code4))
1939 code |= charsetp->code_space[0];
1940 else
1942 CHECK_NATNUM (code4);
1943 if (XFASTINT (code4) >= 0x100)
1944 args_out_of_range (make_number (0xFF), code4);
1945 code |= XFASTINT (code4);
1952 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1953 code &= 0x7F7F7F7F;
1954 c = DECODE_CHAR (charsetp, code);
1955 if (c < 0)
1956 error ("Invalid code(s)");
1957 return make_number (c);
1961 /* Return the first charset in CHARSET_LIST that contains C.
1962 CHARSET_LIST is a list of charset IDs. If it is nil, use
1963 Vcharset_ordered_list. */
1965 struct charset *
1966 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
1968 bool maybe_null = 0;
1970 if (NILP (charset_list))
1971 charset_list = Vcharset_ordered_list;
1972 else
1973 maybe_null = 1;
1975 while (CONSP (charset_list))
1977 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1978 unsigned code = ENCODE_CHAR (charset, c);
1980 if (code != CHARSET_INVALID_CODE (charset))
1982 if (code_return)
1983 *code_return = code;
1984 return charset;
1986 charset_list = XCDR (charset_list);
1987 if (! maybe_null
1988 && c <= MAX_UNICODE_CHAR
1989 && EQ (charset_list, Vcharset_non_preferred_head))
1990 return CHARSET_FROM_ID (charset_unicode);
1992 return (maybe_null ? NULL
1993 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
1994 : CHARSET_FROM_ID (charset_eight_bit));
1998 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1999 doc:
2000 /*Return list of charset and one to four position-codes of CH.
2001 The charset is decided by the current priority order of charsets.
2002 A position-code is a byte value of each dimension of the code-point of
2003 CH in the charset. */)
2004 (Lisp_Object ch)
2006 struct charset *charset;
2007 int c, dimension;
2008 unsigned code;
2009 Lisp_Object val;
2011 CHECK_CHARACTER (ch);
2012 c = XFASTINT (ch);
2013 charset = CHAR_CHARSET (c);
2014 if (! charset)
2015 emacs_abort ();
2016 code = ENCODE_CHAR (charset, c);
2017 if (code == CHARSET_INVALID_CODE (charset))
2018 emacs_abort ();
2019 dimension = CHARSET_DIMENSION (charset);
2020 for (val = Qnil; dimension > 0; dimension--)
2022 val = Fcons (make_number (code & 0xFF), val);
2023 code >>= 8;
2025 return Fcons (CHARSET_NAME (charset), val);
2029 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2030 doc: /* Return the charset of highest priority that contains CH.
2031 ASCII characters are an exception: for them, this function always
2032 returns `ascii'.
2033 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2034 from which to find the charset. It may also be a coding system. In
2035 that case, find the charset from what supported by that coding system. */)
2036 (Lisp_Object ch, Lisp_Object restriction)
2038 struct charset *charset;
2040 CHECK_CHARACTER (ch);
2041 if (NILP (restriction))
2042 charset = CHAR_CHARSET (XINT (ch));
2043 else
2045 if (CONSP (restriction))
2047 int c = XFASTINT (ch);
2049 for (; CONSP (restriction); restriction = XCDR (restriction))
2051 struct charset *rcharset;
2053 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
2054 if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
2055 return XCAR (restriction);
2057 return Qnil;
2059 restriction = coding_system_charset_list (restriction);
2060 charset = char_charset (XINT (ch), restriction, NULL);
2061 if (! charset)
2062 return Qnil;
2064 return (CHARSET_NAME (charset));
2068 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2069 doc: /*
2070 Return charset of a character in the current buffer at position POS.
2071 If POS is nil, it defaults to the current point.
2072 If POS is out of range, the value is nil. */)
2073 (Lisp_Object pos)
2075 Lisp_Object ch;
2076 struct charset *charset;
2078 ch = Fchar_after (pos);
2079 if (! INTEGERP (ch))
2080 return ch;
2081 charset = CHAR_CHARSET (XINT (ch));
2082 return (CHARSET_NAME (charset));
2086 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2087 doc: /*
2088 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2090 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2091 by their DIMENSION, CHARS, and FINAL-CHAR,
2092 whereas Emacs distinguishes them by charset symbol.
2093 See the documentation of the function `charset-info' for the meanings of
2094 DIMENSION, CHARS, and FINAL-CHAR. */)
2095 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
2097 bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
2098 int id = ISO_CHARSET_TABLE (XINT (dimension), chars_flag,
2099 XFASTINT (final_char));
2100 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2104 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2105 0, 0, 0,
2106 doc: /*
2107 Internal use only.
2108 Clear temporary charset mapping tables.
2109 It should be called only from temacs invoked for dumping. */)
2110 (void)
2112 if (temp_charset_work)
2114 xfree (temp_charset_work);
2115 temp_charset_work = NULL;
2118 if (CHAR_TABLE_P (Vchar_unify_table))
2119 Foptimize_char_table (Vchar_unify_table, Qnil);
2121 return Qnil;
2124 DEFUN ("charset-priority-list", Fcharset_priority_list,
2125 Scharset_priority_list, 0, 1, 0,
2126 doc: /* Return the list of charsets ordered by priority.
2127 HIGHESTP non-nil means just return the highest priority one. */)
2128 (Lisp_Object highestp)
2130 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2132 if (!NILP (highestp))
2133 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2135 while (!NILP (list))
2137 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2138 list = XCDR (list);
2140 return Fnreverse (val);
2143 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2144 1, MANY, 0,
2145 doc: /* Assign higher priority to the charsets given as arguments.
2146 usage: (set-charset-priority &rest charsets) */)
2147 (ptrdiff_t nargs, Lisp_Object *args)
2149 Lisp_Object new_head, old_list;
2150 Lisp_Object list_2022, list_emacs_mule;
2151 ptrdiff_t i;
2152 int id;
2154 old_list = Fcopy_sequence (Vcharset_ordered_list);
2155 new_head = Qnil;
2156 for (i = 0; i < nargs; i++)
2158 CHECK_CHARSET_GET_ID (args[i], id);
2159 if (! NILP (Fmemq (make_number (id), old_list)))
2161 old_list = Fdelq (make_number (id), old_list);
2162 new_head = Fcons (make_number (id), new_head);
2165 Vcharset_non_preferred_head = old_list;
2166 Vcharset_ordered_list = CALLN (Fnconc, Fnreverse (new_head), old_list);
2168 charset_ordered_list_tick++;
2170 charset_unibyte = -1;
2171 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2172 CONSP (old_list); old_list = XCDR (old_list))
2174 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2175 list_2022 = Fcons (XCAR (old_list), list_2022);
2176 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2177 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2178 if (charset_unibyte < 0)
2180 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2182 if (CHARSET_DIMENSION (charset) == 1
2183 && CHARSET_ASCII_COMPATIBLE_P (charset)
2184 && CHARSET_MAX_CHAR (charset) >= 0x80)
2185 charset_unibyte = CHARSET_ID (charset);
2188 Viso_2022_charset_list = Fnreverse (list_2022);
2189 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2190 if (charset_unibyte < 0)
2191 charset_unibyte = charset_iso_8859_1;
2193 return Qnil;
2196 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2197 0, 1, 0,
2198 doc: /* Internal use only.
2199 Return charset identification number of CHARSET. */)
2200 (Lisp_Object charset)
2202 int id;
2204 CHECK_CHARSET_GET_ID (charset, id);
2205 return make_number (id);
2208 struct charset_sort_data
2210 Lisp_Object charset;
2211 int id;
2212 ptrdiff_t priority;
2215 static int
2216 charset_compare (const void *d1, const void *d2)
2218 const struct charset_sort_data *data1 = d1, *data2 = d2;
2219 if (data1->priority != data2->priority)
2220 return data1->priority < data2->priority ? -1 : 1;
2221 return 0;
2224 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2225 doc: /* Sort charset list CHARSETS by a priority of each charset.
2226 Return the sorted list. CHARSETS is modified by side effects.
2227 See also `charset-priority-list' and `set-charset-priority'. */)
2228 (Lisp_Object charsets)
2230 Lisp_Object len = Flength (charsets);
2231 ptrdiff_t n = XFASTINT (len), i, j;
2232 int done;
2233 Lisp_Object tail, elt, attrs;
2234 struct charset_sort_data *sort_data;
2235 int id, min_id = INT_MAX, max_id = INT_MIN;
2236 USE_SAFE_ALLOCA;
2238 if (n == 0)
2239 return Qnil;
2240 SAFE_NALLOCA (sort_data, 1, n);
2241 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2243 elt = XCAR (tail);
2244 CHECK_CHARSET_GET_ATTR (elt, attrs);
2245 sort_data[i].charset = elt;
2246 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
2247 if (id < min_id)
2248 min_id = id;
2249 if (id > max_id)
2250 max_id = id;
2252 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2253 done < n && CONSP (tail); tail = XCDR (tail), i++)
2255 elt = XCAR (tail);
2256 id = XFASTINT (elt);
2257 if (id >= min_id && id <= max_id)
2258 for (j = 0; j < n; j++)
2259 if (sort_data[j].id == id)
2261 sort_data[j].priority = i;
2262 done++;
2265 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2266 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2267 XSETCAR (tail, sort_data[i].charset);
2268 SAFE_FREE ();
2269 return charsets;
2273 void
2274 init_charset (void)
2276 Lisp_Object tempdir;
2277 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2278 if (! file_accessible_directory_p (tempdir))
2280 /* This used to be non-fatal (dir_warning), but it should not
2281 happen, and if it does sooner or later it will cause some
2282 obscure problem (eg bug#6401), so better abort. */
2283 fprintf (stderr, "Error: charsets directory not found:\n\
2284 %s\n\
2285 Emacs will not function correctly without the character map files.\n%s\
2286 Please check your installation!\n",
2287 SDATA (tempdir),
2288 egetenv("EMACSDATA") ? "The EMACSDATA environment \
2289 variable is set, maybe it has the wrong value?\n" : "");
2290 exit (1);
2293 Vcharset_map_path = list1 (tempdir);
2297 void
2298 init_charset_once (void)
2300 int i, j, k;
2302 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2303 for (j = 0; j < ISO_MAX_CHARS; j++)
2304 for (k = 0; k < ISO_MAX_FINAL; k++)
2305 iso_charset_table[i][j][k] = -1;
2307 for (i = 0; i < 256; i++)
2308 emacs_mule_charset[i] = -1;
2310 charset_jisx0201_roman = -1;
2311 charset_jisx0208_1978 = -1;
2312 charset_jisx0208 = -1;
2313 charset_ksc5601 = -1;
2316 #ifdef emacs
2318 /* Allocate an initial charset table that is large enough to handle
2319 Emacs while it is bootstrapping. As of September 2011, the size
2320 needs to be at least 166; make it a bit bigger to allow for future
2321 expansion.
2323 Don't make the value so small that the table is reallocated during
2324 bootstrapping, as glibc malloc calls larger than just under 64 KiB
2325 during an initial bootstrap wreak havoc after dumping; see the
2326 M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
2327 internal to glibc malloc and perhaps to Emacs malloc debugging. */
2328 static struct charset charset_table_init[180];
2330 void
2331 syms_of_charset (void)
2333 DEFSYM (Qcharsetp, "charsetp");
2335 /* Special charset symbols. */
2336 DEFSYM (Qascii, "ascii");
2337 DEFSYM (Qunicode, "unicode");
2338 DEFSYM (Qemacs, "emacs");
2339 DEFSYM (Qeight_bit, "eight-bit");
2340 DEFSYM (Qiso_8859_1, "iso-8859-1");
2342 staticpro (&Vcharset_ordered_list);
2343 Vcharset_ordered_list = Qnil;
2345 staticpro (&Viso_2022_charset_list);
2346 Viso_2022_charset_list = Qnil;
2348 staticpro (&Vemacs_mule_charset_list);
2349 Vemacs_mule_charset_list = Qnil;
2351 staticpro (&Vcharset_hash_table);
2352 Vcharset_hash_table = CALLN (Fmake_hash_table, QCtest, Qeq);
2354 charset_table = charset_table_init;
2355 charset_table_size = ARRAYELTS (charset_table_init);
2356 charset_table_used = 0;
2358 defsubr (&Scharsetp);
2359 defsubr (&Smap_charset_chars);
2360 defsubr (&Sdefine_charset_internal);
2361 defsubr (&Sdefine_charset_alias);
2362 defsubr (&Scharset_plist);
2363 defsubr (&Sset_charset_plist);
2364 defsubr (&Sunify_charset);
2365 defsubr (&Sget_unused_iso_final_char);
2366 defsubr (&Sdeclare_equiv_charset);
2367 defsubr (&Sfind_charset_region);
2368 defsubr (&Sfind_charset_string);
2369 defsubr (&Sdecode_char);
2370 defsubr (&Sencode_char);
2371 defsubr (&Ssplit_char);
2372 defsubr (&Smake_char);
2373 defsubr (&Schar_charset);
2374 defsubr (&Scharset_after);
2375 defsubr (&Siso_charset);
2376 defsubr (&Sclear_charset_maps);
2377 defsubr (&Scharset_priority_list);
2378 defsubr (&Sset_charset_priority);
2379 defsubr (&Scharset_id_internal);
2380 defsubr (&Ssort_charsets);
2382 DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
2383 doc: /* List of directories to search for charset map files. */);
2384 Vcharset_map_path = Qnil;
2386 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
2387 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2388 inhibit_load_charset_map = 0;
2390 DEFVAR_LISP ("charset-list", Vcharset_list,
2391 doc: /* List of all charsets ever defined. */);
2392 Vcharset_list = Qnil;
2394 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
2395 doc: /* ISO639 language mnemonic symbol for the current language environment.
2396 If the current language environment is for multiple languages (e.g. "Latin-1"),
2397 the value may be a list of mnemonics. */);
2398 Vcurrent_iso639_language = Qnil;
2400 charset_ascii
2401 = define_charset_internal (Qascii, 1, "\x00\x7F\0\0\0\0\0",
2402 0, 127, 'B', -1, 0, 1, 0, 0);
2403 charset_iso_8859_1
2404 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\0\0\0\0\0",
2405 0, 255, -1, -1, -1, 1, 0, 0);
2406 charset_unicode
2407 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2408 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2409 charset_emacs
2410 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
2411 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2412 charset_eight_bit
2413 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\0\0\0\0\0",
2414 128, 255, -1, 0, -1, 0, 1,
2415 MAX_5_BYTE_CHAR + 1);
2416 charset_unibyte = charset_iso_8859_1;
2419 #endif /* emacs */