Avoid leaving garbage on screen when using 'raise' display property
[emacs.git] / src / charset.c
blobf0b41400843cca88f82e07188f1cd67a314faf1f
1 /* Basic character set support.
3 Copyright (C) 2001-2017 Free Software Foundation, Inc.
5 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 2005, 2006, 2007, 2008, 2009, 2010, 2011
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H14PRO021
10 Copyright (C) 2003, 2004
11 National Institute of Advanced Industrial Science and Technology (AIST)
12 Registration Number H13PRO009
14 This file is part of GNU Emacs.
16 GNU Emacs is free software: you can redistribute it and/or modify
17 it under the terms of the GNU General Public License as published by
18 the Free Software Foundation, either version 3 of the License, or (at
19 your option) any later version.
21 GNU Emacs is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 GNU General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
29 #include <config.h>
31 #include <errno.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <limits.h>
36 #include <sys/types.h>
37 #include <c-ctype.h>
38 #include "lisp.h"
39 #include "character.h"
40 #include "charset.h"
41 #include "coding.h"
42 #include "buffer.h"
44 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
46 A coded character set ("charset" hereafter) is a meaningful
47 collection (i.e. language, culture, functionality, etc.) of
48 characters. Emacs handles multiple charsets at once. In Emacs Lisp
49 code, a charset is represented by a symbol. In C code, a charset is
50 represented by its ID number or by a pointer to a struct charset.
52 The actual information about each charset is stored in two places.
53 Lispy information is stored in the hash table Vcharset_hash_table as
54 a vector (charset attributes). The other information is stored in
55 charset_table as a struct charset.
59 /* Hash table that contains attributes of each charset. Keys are
60 charset symbols, and values are vectors of charset attributes. */
61 Lisp_Object Vcharset_hash_table;
63 /* Table of struct charset. */
64 struct charset *charset_table;
66 static ptrdiff_t charset_table_size;
67 static int charset_table_used;
69 /* Special charsets corresponding to symbols. */
70 int charset_ascii;
71 int charset_eight_bit;
72 static int charset_iso_8859_1;
73 int charset_unicode;
74 static int charset_emacs;
76 /* The other special charsets. */
77 int charset_jisx0201_roman;
78 int charset_jisx0208_1978;
79 int charset_jisx0208;
80 int charset_ksc5601;
82 /* Charset of unibyte characters. */
83 int charset_unibyte;
85 /* List of charsets ordered by the priority. */
86 Lisp_Object Vcharset_ordered_list;
88 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
89 charsets. */
90 Lisp_Object Vcharset_non_preferred_head;
92 /* Incremented every time we change the priority of charsets.
93 Wraps around. */
94 EMACS_UINT charset_ordered_list_tick;
96 /* List of iso-2022 charsets. */
97 Lisp_Object Viso_2022_charset_list;
99 /* List of emacs-mule charsets. */
100 Lisp_Object Vemacs_mule_charset_list;
102 int emacs_mule_charset[256];
104 /* Mapping table from ISO2022's charset (specified by DIMENSION,
105 CHARS, and FINAL-CHAR) to Emacs' charset. */
106 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
108 #define CODE_POINT_TO_INDEX(charset, code) \
109 ((charset)->code_linear_p \
110 ? (int) ((code) - (charset)->min_code) \
111 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
112 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
113 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
114 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
115 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
116 * (charset)->code_space[11]) \
117 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
118 * (charset)->code_space[7]) \
119 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
120 * (charset)->code_space[3]) \
121 + (((code) & 0xFF) - (charset)->code_space[0]) \
122 - ((charset)->char_index_offset)) \
123 : -1)
126 /* Return the code-point for the character index IDX in CHARSET.
127 IDX should be an unsigned int variable in a valid range (which is
128 always in nonnegative int range too). IDX contains garbage afterwards. */
130 #define INDEX_TO_CODE_POINT(charset, idx) \
131 ((charset)->code_linear_p \
132 ? (idx) + (charset)->min_code \
133 : (idx += (charset)->char_index_offset, \
134 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
135 | (((charset)->code_space[4] \
136 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
137 << 8) \
138 | (((charset)->code_space[8] \
139 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
140 << 16) \
141 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
142 << 24))))
144 /* Structure to hold mapping tables for a charset. Used by temacs
145 invoked for dumping. */
147 static struct
149 /* The current charset for which the following tables are setup. */
150 struct charset *current;
152 /* 1 iff the following table is used for encoder. */
153 short for_encoder;
155 /* When the following table is used for encoding, minimum and
156 maximum character of the current charset. */
157 int min_char, max_char;
159 /* A Unicode character corresponding to the code index 0 (i.e. the
160 minimum code-point) of the current charset, or -1 if the code
161 index 0 is not a Unicode character. This is checked when
162 table.encoder[CHAR] is zero. */
163 int zero_index_char;
165 union {
166 /* Table mapping code-indices (not code-points) of the current
167 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
168 doesn't belong to the current charset. */
169 int decoder[0x10000];
170 /* Table mapping Unicode characters to code-indices of the current
171 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
172 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
173 (0x20000..0x2FFFF). Note that there is no charset map that
174 uses both SMP and SIP. */
175 unsigned short encoder[0x20000];
176 } table;
177 } *temp_charset_work;
179 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
180 do { \
181 if ((CODE) == 0) \
182 temp_charset_work->zero_index_char = (C); \
183 else if ((C) < 0x20000) \
184 temp_charset_work->table.encoder[(C)] = (CODE); \
185 else \
186 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
187 } while (0)
189 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
190 ((C) == temp_charset_work->zero_index_char ? 0 \
191 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
192 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
193 : temp_charset_work->table.encoder[(C) - 0x10000] \
194 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
196 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
197 (temp_charset_work->table.decoder[(CODE)] = (C))
199 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
200 (temp_charset_work->table.decoder[(CODE)])
203 /* Set to 1 to warn that a charset map is loaded and thus a buffer
204 text and a string data may be relocated. */
205 bool charset_map_loaded;
207 struct charset_map_entries
209 struct {
210 unsigned from, to;
211 int c;
212 } entry[0x10000];
213 struct charset_map_entries *next;
216 /* Load the mapping information of CHARSET from ENTRIES for
217 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
218 encoding (CONTROL_FLAG == 2).
220 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
221 and CHARSET->fast_map.
223 If CONTROL_FLAG is 1, setup the following tables according to
224 CHARSET->method and inhibit_load_charset_map.
226 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
227 ----------------------+--------------------+---------------------------
228 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
229 ----------------------+--------------------+---------------------------
230 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
232 If CONTROL_FLAG is 2, setup the following tables.
234 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
235 ----------------------+--------------------+---------------------------
236 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
237 ----------------------+--------------------+--------------------------
238 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
241 static void
242 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
244 Lisp_Object vec UNINIT;
245 Lisp_Object table UNINIT;
246 unsigned max_code = CHARSET_MAX_CODE (charset);
247 bool ascii_compatible_p = charset->ascii_compatible_p;
248 int min_char, max_char, nonascii_min_char;
249 int i;
250 unsigned char *fast_map = charset->fast_map;
252 if (n_entries <= 0)
253 return;
255 if (control_flag)
257 if (! inhibit_load_charset_map)
259 if (control_flag == 1)
261 if (charset->method == CHARSET_METHOD_MAP)
263 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
265 vec = Fmake_vector (make_number (n), make_number (-1));
266 set_charset_attr (charset, charset_decoder, vec);
268 else
270 char_table_set_range (Vchar_unify_table,
271 charset->min_char, charset->max_char,
272 Qnil);
275 else
277 table = Fmake_char_table (Qnil, Qnil);
278 set_charset_attr (charset,
279 (charset->method == CHARSET_METHOD_MAP
280 ? charset_encoder : charset_deunifier),
281 table);
284 else
286 if (! temp_charset_work)
287 temp_charset_work = xmalloc (sizeof *temp_charset_work);
288 if (control_flag == 1)
290 memset (temp_charset_work->table.decoder, -1,
291 sizeof (int) * 0x10000);
293 else
295 memset (temp_charset_work->table.encoder, 0,
296 sizeof (unsigned short) * 0x20000);
297 temp_charset_work->zero_index_char = -1;
299 temp_charset_work->current = charset;
300 temp_charset_work->for_encoder = (control_flag == 2);
301 control_flag += 2;
303 charset_map_loaded = 1;
306 min_char = max_char = entries->entry[0].c;
307 nonascii_min_char = MAX_CHAR;
308 for (i = 0; i < n_entries; i++)
310 unsigned from, to;
311 int from_index, to_index, lim_index;
312 int from_c, to_c;
313 int idx = i % 0x10000;
315 if (i > 0 && idx == 0)
316 entries = entries->next;
317 from = entries->entry[idx].from;
318 to = entries->entry[idx].to;
319 from_c = entries->entry[idx].c;
320 from_index = CODE_POINT_TO_INDEX (charset, from);
321 if (from == to)
323 to_index = from_index;
324 to_c = from_c;
326 else
328 to_index = CODE_POINT_TO_INDEX (charset, to);
329 to_c = from_c + (to_index - from_index);
331 if (from_index < 0 || to_index < 0)
332 continue;
333 lim_index = to_index + 1;
335 if (to_c > max_char)
336 max_char = to_c;
337 else if (from_c < min_char)
338 min_char = from_c;
340 if (control_flag == 1)
342 if (charset->method == CHARSET_METHOD_MAP)
343 for (; from_index < lim_index; from_index++, from_c++)
344 ASET (vec, from_index, make_number (from_c));
345 else
346 for (; from_index < lim_index; from_index++, from_c++)
347 CHAR_TABLE_SET (Vchar_unify_table,
348 CHARSET_CODE_OFFSET (charset) + from_index,
349 make_number (from_c));
351 else if (control_flag == 2)
353 if (charset->method == CHARSET_METHOD_MAP
354 && CHARSET_COMPACT_CODES_P (charset))
355 for (; from_index < lim_index; from_index++, from_c++)
357 unsigned code = from_index;
358 code = INDEX_TO_CODE_POINT (charset, code);
360 if (NILP (CHAR_TABLE_REF (table, from_c)))
361 CHAR_TABLE_SET (table, from_c, make_number (code));
363 else
364 for (; from_index < lim_index; from_index++, from_c++)
366 if (NILP (CHAR_TABLE_REF (table, from_c)))
367 CHAR_TABLE_SET (table, from_c, make_number (from_index));
370 else if (control_flag == 3)
371 for (; from_index < lim_index; from_index++, from_c++)
372 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
373 else if (control_flag == 4)
374 for (; from_index < lim_index; from_index++, from_c++)
375 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
376 else /* control_flag == 0 */
378 if (ascii_compatible_p)
380 if (! ASCII_CHAR_P (from_c))
382 if (from_c < nonascii_min_char)
383 nonascii_min_char = from_c;
385 else if (! ASCII_CHAR_P (to_c))
387 nonascii_min_char = 0x80;
391 for (; from_c <= to_c; from_c++)
392 CHARSET_FAST_MAP_SET (from_c, fast_map);
396 if (control_flag == 0)
398 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
399 ? nonascii_min_char : min_char);
400 CHARSET_MAX_CHAR (charset) = max_char;
402 else if (control_flag == 4)
404 temp_charset_work->min_char = min_char;
405 temp_charset_work->max_char = max_char;
410 /* Read a hexadecimal number (preceded by "0x") from the file FP while
411 paying attention to comment character '#'. */
413 static unsigned
414 read_hex (FILE *fp, bool *eof, bool *overflow)
416 int c;
417 unsigned n;
419 while ((c = getc (fp)) != EOF)
421 if (c == '#')
423 while ((c = getc (fp)) != EOF && c != '\n');
425 else if (c == '0')
427 if ((c = getc (fp)) == EOF || c == 'x')
428 break;
431 if (c == EOF)
433 *eof = 1;
434 return 0;
436 n = 0;
437 while (c_isxdigit (c = getc (fp)))
439 if (INT_LEFT_SHIFT_OVERFLOW (n, 4))
440 *overflow = 1;
441 n = ((n << 4)
442 | (c - ('0' <= c && c <= '9' ? '0'
443 : 'A' <= c && c <= 'F' ? 'A' - 10
444 : 'a' - 10)));
446 if (c != EOF)
447 ungetc (c, fp);
448 return n;
451 /* Return a mapping vector for CHARSET loaded from MAPFILE.
452 Each line of MAPFILE has this form
453 0xAAAA 0xCCCC
454 where 0xAAAA is a code-point and 0xCCCC is the corresponding
455 character code, or this form
456 0xAAAA-0xBBBB 0xCCCC
457 where 0xAAAA and 0xBBBB are code-points specifying a range, and
458 0xCCCC is the first character code of the range.
460 The returned vector has this form:
461 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
462 where CODE1 is a code-point or a cons of code-points specifying a
463 range.
465 Note that this function uses `openp' to open MAPFILE but ignores
466 `file-name-handler-alist' to avoid running any Lisp code. */
468 static void
469 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile,
470 int control_flag)
472 unsigned min_code = CHARSET_MIN_CODE (charset);
473 unsigned max_code = CHARSET_MAX_CODE (charset);
474 int fd;
475 FILE *fp;
476 struct charset_map_entries *head, *entries;
477 int n_entries;
478 AUTO_STRING (map, ".map");
479 AUTO_STRING (txt, ".txt");
480 AUTO_LIST2 (suffixes, map, txt);
481 ptrdiff_t count = SPECPDL_INDEX ();
482 record_unwind_protect_nothing ();
483 specbind (Qfile_name_handler_alist, Qnil);
484 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil, false);
485 fp = fd < 0 ? 0 : fdopen (fd, "r");
486 if (!fp)
488 int open_errno = errno;
489 emacs_close (fd);
490 report_file_errno ("Loading charset map", mapfile, open_errno);
492 set_unwind_protect_ptr (count, fclose_unwind, fp);
493 unbind_to (count + 1, Qnil);
495 /* Use record_xmalloc, as `charset_map_entries' is
496 large (larger than MAX_ALLOCA). */
497 head = record_xmalloc (sizeof *head);
498 entries = head;
499 memset (entries, 0, sizeof (struct charset_map_entries));
501 n_entries = 0;
502 while (1)
504 unsigned from, to, c;
505 int idx;
506 bool eof = 0, overflow = 0;
508 from = read_hex (fp, &eof, &overflow);
509 if (eof)
510 break;
511 if (getc (fp) == '-')
512 to = read_hex (fp, &eof, &overflow);
513 else
514 to = from;
515 if (eof)
516 break;
517 c = read_hex (fp, &eof, &overflow);
518 if (eof)
519 break;
521 if (overflow)
522 continue;
523 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
524 continue;
526 if (n_entries == 0x10000)
528 entries->next = record_xmalloc (sizeof *entries->next);
529 entries = entries->next;
530 memset (entries, 0, sizeof (struct charset_map_entries));
531 n_entries = 0;
533 idx = n_entries;
534 entries->entry[idx].from = from;
535 entries->entry[idx].to = to;
536 entries->entry[idx].c = c;
537 n_entries++;
539 fclose (fp);
540 clear_unwind_protect (count);
542 load_charset_map (charset, head, n_entries, control_flag);
543 unbind_to (count, Qnil);
546 static void
547 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
549 unsigned min_code = CHARSET_MIN_CODE (charset);
550 unsigned max_code = CHARSET_MAX_CODE (charset);
551 struct charset_map_entries *head, *entries;
552 int n_entries;
553 int len = ASIZE (vec);
554 int i;
555 USE_SAFE_ALLOCA;
557 if (len % 2 == 1)
559 add_to_log ("Failure in loading charset map: %V", vec);
560 return;
563 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
564 large (larger than MAX_ALLOCA). */
565 head = SAFE_ALLOCA (sizeof *head);
566 entries = head;
567 memset (entries, 0, sizeof (struct charset_map_entries));
569 n_entries = 0;
570 for (i = 0; i < len; i += 2)
572 Lisp_Object val, val2;
573 unsigned from, to;
574 EMACS_INT c;
575 int idx;
577 val = AREF (vec, i);
578 if (CONSP (val))
580 val2 = XCDR (val);
581 val = XCAR (val);
582 from = XFASTINT (val);
583 to = XFASTINT (val2);
585 else
586 from = to = XFASTINT (val);
587 val = AREF (vec, i + 1);
588 CHECK_NATNUM (val);
589 c = XFASTINT (val);
591 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
592 continue;
594 if (n_entries > 0 && (n_entries % 0x10000) == 0)
596 entries->next = SAFE_ALLOCA (sizeof *entries->next);
597 entries = entries->next;
598 memset (entries, 0, sizeof (struct charset_map_entries));
600 idx = n_entries % 0x10000;
601 entries->entry[idx].from = from;
602 entries->entry[idx].to = to;
603 entries->entry[idx].c = c;
604 n_entries++;
607 load_charset_map (charset, head, n_entries, control_flag);
608 SAFE_FREE ();
612 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
613 map it is (see the comment of load_charset_map for the detail). */
615 static void
616 load_charset (struct charset *charset, int control_flag)
618 Lisp_Object map;
620 if (inhibit_load_charset_map
621 && temp_charset_work
622 && charset == temp_charset_work->current
623 && ((control_flag == 2) == temp_charset_work->for_encoder))
624 return;
626 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
627 map = CHARSET_MAP (charset);
628 else
630 if (! CHARSET_UNIFIED_P (charset))
631 emacs_abort ();
632 map = CHARSET_UNIFY_MAP (charset);
634 if (STRINGP (map))
635 load_charset_map_from_file (charset, map, control_flag);
636 else
637 load_charset_map_from_vector (charset, map, control_flag);
641 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
642 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
643 (Lisp_Object object)
645 return (CHARSETP (object) ? Qt : Qnil);
649 static void
650 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
651 Lisp_Object function, Lisp_Object arg,
652 unsigned int from, unsigned int to)
654 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
655 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
656 Lisp_Object range = Fcons (Qnil, Qnil);
657 int c, stop;
659 c = temp_charset_work->min_char;
660 stop = (temp_charset_work->max_char < 0x20000
661 ? temp_charset_work->max_char : 0xFFFF);
663 while (1)
665 int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
667 if (idx >= from_idx && idx <= to_idx)
669 if (NILP (XCAR (range)))
670 XSETCAR (range, make_number (c));
672 else if (! NILP (XCAR (range)))
674 XSETCDR (range, make_number (c - 1));
675 if (c_function)
676 (*c_function) (arg, range);
677 else
678 call2 (function, range, arg);
679 XSETCAR (range, Qnil);
681 if (c == stop)
683 if (c == temp_charset_work->max_char)
685 if (! NILP (XCAR (range)))
687 XSETCDR (range, make_number (c));
688 if (c_function)
689 (*c_function) (arg, range);
690 else
691 call2 (function, range, arg);
693 break;
695 c = 0x1FFFF;
696 stop = temp_charset_work->max_char;
698 c++;
702 void
703 map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
704 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
706 Lisp_Object range;
707 bool partial = (from > CHARSET_MIN_CODE (charset)
708 || to < CHARSET_MAX_CODE (charset));
710 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
712 int from_idx = CODE_POINT_TO_INDEX (charset, from);
713 int to_idx = CODE_POINT_TO_INDEX (charset, to);
714 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
715 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
717 if (CHARSET_UNIFIED_P (charset))
719 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
720 load_charset (charset, 2);
721 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
722 map_char_table_for_charset (c_function, function,
723 CHARSET_DEUNIFIER (charset), arg,
724 partial ? charset : NULL, from, to);
725 else
726 map_charset_for_dump (c_function, function, arg, from, to);
729 range = Fcons (make_number (from_c), make_number (to_c));
730 if (NILP (function))
731 (*c_function) (arg, range);
732 else
733 call2 (function, range, arg);
735 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
737 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
738 load_charset (charset, 2);
739 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
740 map_char_table_for_charset (c_function, function,
741 CHARSET_ENCODER (charset), arg,
742 partial ? charset : NULL, from, to);
743 else
744 map_charset_for_dump (c_function, function, arg, from, to);
746 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
748 Lisp_Object subset_info;
749 int offset;
751 subset_info = CHARSET_SUBSET (charset);
752 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
753 offset = XINT (AREF (subset_info, 3));
754 from -= offset;
755 if (from < XFASTINT (AREF (subset_info, 1)))
756 from = XFASTINT (AREF (subset_info, 1));
757 to -= offset;
758 if (to > XFASTINT (AREF (subset_info, 2)))
759 to = XFASTINT (AREF (subset_info, 2));
760 map_charset_chars (c_function, function, arg, charset, from, to);
762 else /* i.e. CHARSET_METHOD_SUPERSET */
764 Lisp_Object parents;
766 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
767 parents = XCDR (parents))
769 int offset;
770 unsigned this_from, this_to;
772 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
773 offset = XINT (XCDR (XCAR (parents)));
774 this_from = from > offset ? from - offset : 0;
775 this_to = to > offset ? to - offset : 0;
776 if (this_from < CHARSET_MIN_CODE (charset))
777 this_from = CHARSET_MIN_CODE (charset);
778 if (this_to > CHARSET_MAX_CODE (charset))
779 this_to = CHARSET_MAX_CODE (charset);
780 map_charset_chars (c_function, function, arg, charset,
781 this_from, this_to);
786 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
787 doc: /* Call FUNCTION for all characters in CHARSET.
788 FUNCTION is called with an argument RANGE and the optional 3rd
789 argument ARG.
791 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
792 characters contained in CHARSET.
794 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
795 range of code points (in CHARSET) of target characters. */)
796 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
798 struct charset *cs;
799 unsigned from, to;
801 CHECK_CHARSET_GET_CHARSET (charset, cs);
802 if (NILP (from_code))
803 from = CHARSET_MIN_CODE (cs);
804 else
806 from = XINT (from_code);
807 if (from < CHARSET_MIN_CODE (cs))
808 from = CHARSET_MIN_CODE (cs);
810 if (NILP (to_code))
811 to = CHARSET_MAX_CODE (cs);
812 else
814 to = XINT (to_code);
815 if (to > CHARSET_MAX_CODE (cs))
816 to = CHARSET_MAX_CODE (cs);
818 map_charset_chars (NULL, function, arg, cs, from, to);
819 return Qnil;
823 /* Define a charset according to the arguments. The Nth argument is
824 the Nth attribute of the charset (the last attribute `charset-id'
825 is not included). See the docstring of `define-charset' for the
826 detail. */
828 DEFUN ("define-charset-internal", Fdefine_charset_internal,
829 Sdefine_charset_internal, charset_arg_max, MANY, 0,
830 doc: /* For internal use only.
831 usage: (define-charset-internal ...) */)
832 (ptrdiff_t nargs, Lisp_Object *args)
834 /* Charset attr vector. */
835 Lisp_Object attrs;
836 Lisp_Object val;
837 EMACS_UINT hash_code;
838 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
839 int i, j;
840 struct charset charset;
841 int id;
842 int dimension;
843 bool new_definition_p;
844 int nchars;
846 if (nargs != charset_arg_max)
847 Fsignal (Qwrong_number_of_arguments,
848 Fcons (intern ("define-charset-internal"),
849 make_number (nargs)));
851 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
853 CHECK_SYMBOL (args[charset_arg_name]);
854 ASET (attrs, charset_name, args[charset_arg_name]);
856 val = args[charset_arg_code_space];
857 for (i = 0, dimension = 0, nchars = 1; ; i++)
859 Lisp_Object min_byte_obj, max_byte_obj;
860 int min_byte, max_byte;
862 min_byte_obj = Faref (val, make_number (i * 2));
863 max_byte_obj = Faref (val, make_number (i * 2 + 1));
864 CHECK_RANGED_INTEGER (min_byte_obj, 0, 255);
865 min_byte = XINT (min_byte_obj);
866 CHECK_RANGED_INTEGER (max_byte_obj, min_byte, 255);
867 max_byte = XINT (max_byte_obj);
868 charset.code_space[i * 4] = min_byte;
869 charset.code_space[i * 4 + 1] = max_byte;
870 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
871 if (max_byte > 0)
872 dimension = i + 1;
873 if (i == 3)
874 break;
875 nchars *= charset.code_space[i * 4 + 2];
876 charset.code_space[i * 4 + 3] = nchars;
879 val = args[charset_arg_dimension];
880 if (NILP (val))
881 charset.dimension = dimension;
882 else
884 CHECK_RANGED_INTEGER (val, 1, 4);
885 charset.dimension = XINT (val);
888 charset.code_linear_p
889 = (charset.dimension == 1
890 || (charset.code_space[2] == 256
891 && (charset.dimension == 2
892 || (charset.code_space[6] == 256
893 && (charset.dimension == 3
894 || charset.code_space[10] == 256)))));
896 if (! charset.code_linear_p)
898 charset.code_space_mask = xzalloc (256);
899 for (i = 0; i < 4; i++)
900 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
901 j++)
902 charset.code_space_mask[j] |= (1 << i);
905 charset.iso_chars_96 = charset.code_space[2] == 96;
907 charset.min_code = (charset.code_space[0]
908 | (charset.code_space[4] << 8)
909 | (charset.code_space[8] << 16)
910 | ((unsigned) charset.code_space[12] << 24));
911 charset.max_code = (charset.code_space[1]
912 | (charset.code_space[5] << 8)
913 | (charset.code_space[9] << 16)
914 | ((unsigned) charset.code_space[13] << 24));
915 charset.char_index_offset = 0;
917 val = args[charset_arg_min_code];
918 if (! NILP (val))
920 unsigned code = cons_to_unsigned (val, UINT_MAX);
922 if (code < charset.min_code
923 || code > charset.max_code)
924 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
925 make_fixnum_or_float (charset.max_code), val);
926 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
927 charset.min_code = code;
930 val = args[charset_arg_max_code];
931 if (! NILP (val))
933 unsigned code = cons_to_unsigned (val, UINT_MAX);
935 if (code < charset.min_code
936 || code > charset.max_code)
937 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
938 make_fixnum_or_float (charset.max_code), val);
939 charset.max_code = code;
942 charset.compact_codes_p = charset.max_code < 0x10000;
944 val = args[charset_arg_invalid_code];
945 if (NILP (val))
947 if (charset.min_code > 0)
948 charset.invalid_code = 0;
949 else
951 if (charset.max_code < UINT_MAX)
952 charset.invalid_code = charset.max_code + 1;
953 else
954 error ("Attribute :invalid-code must be specified");
957 else
958 charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
960 val = args[charset_arg_iso_final];
961 if (NILP (val))
962 charset.iso_final = -1;
963 else
965 CHECK_NUMBER (val);
966 if (XINT (val) < '0' || XINT (val) > 127)
967 error ("Invalid iso-final-char: %"pI"d", XINT (val));
968 charset.iso_final = XINT (val);
971 val = args[charset_arg_iso_revision];
972 if (NILP (val))
973 charset.iso_revision = -1;
974 else
976 CHECK_RANGED_INTEGER (val, -1, 63);
977 charset.iso_revision = XINT (val);
980 val = args[charset_arg_emacs_mule_id];
981 if (NILP (val))
982 charset.emacs_mule_id = -1;
983 else
985 CHECK_NATNUM (val);
986 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
987 error ("Invalid emacs-mule-id: %"pI"d", XINT (val));
988 charset.emacs_mule_id = XINT (val);
991 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
993 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
995 charset.unified_p = 0;
997 memset (charset.fast_map, 0, sizeof (charset.fast_map));
999 if (! NILP (args[charset_arg_code_offset]))
1001 val = args[charset_arg_code_offset];
1002 CHECK_CHARACTER (val);
1004 charset.method = CHARSET_METHOD_OFFSET;
1005 charset.code_offset = XINT (val);
1007 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1008 if (MAX_CHAR - charset.code_offset < i)
1009 error ("Unsupported max char: %d", charset.max_char);
1010 charset.max_char = i + charset.code_offset;
1011 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1012 charset.min_char = i + charset.code_offset;
1014 i = (charset.min_char >> 7) << 7;
1015 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1016 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1017 i = (i >> 12) << 12;
1018 for (; i <= charset.max_char; i += 0x1000)
1019 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1020 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1021 charset.ascii_compatible_p = 1;
1023 else if (! NILP (args[charset_arg_map]))
1025 val = args[charset_arg_map];
1026 ASET (attrs, charset_map, val);
1027 charset.method = CHARSET_METHOD_MAP;
1029 else if (! NILP (args[charset_arg_subset]))
1031 Lisp_Object parent;
1032 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1033 struct charset *parent_charset;
1035 val = args[charset_arg_subset];
1036 parent = Fcar (val);
1037 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1038 parent_min_code = Fnth (make_number (1), val);
1039 CHECK_NATNUM (parent_min_code);
1040 parent_max_code = Fnth (make_number (2), val);
1041 CHECK_NATNUM (parent_max_code);
1042 parent_code_offset = Fnth (make_number (3), val);
1043 CHECK_NUMBER (parent_code_offset);
1044 val = make_uninit_vector (4);
1045 ASET (val, 0, make_number (parent_charset->id));
1046 ASET (val, 1, parent_min_code);
1047 ASET (val, 2, parent_max_code);
1048 ASET (val, 3, parent_code_offset);
1049 ASET (attrs, charset_subset, val);
1051 charset.method = CHARSET_METHOD_SUBSET;
1052 /* Here, we just copy the parent's fast_map. It's not accurate,
1053 but at least it works for quickly detecting which character
1054 DOESN'T belong to this charset. */
1055 memcpy (charset.fast_map, parent_charset->fast_map,
1056 sizeof charset.fast_map);
1058 /* We also copy these for parents. */
1059 charset.min_char = parent_charset->min_char;
1060 charset.max_char = parent_charset->max_char;
1062 else if (! NILP (args[charset_arg_superset]))
1064 val = args[charset_arg_superset];
1065 charset.method = CHARSET_METHOD_SUPERSET;
1066 val = Fcopy_sequence (val);
1067 ASET (attrs, charset_superset, val);
1069 charset.min_char = MAX_CHAR;
1070 charset.max_char = 0;
1071 for (; ! NILP (val); val = Fcdr (val))
1073 Lisp_Object elt, car_part, cdr_part;
1074 int this_id, offset;
1075 struct charset *this_charset;
1077 elt = Fcar (val);
1078 if (CONSP (elt))
1080 car_part = XCAR (elt);
1081 cdr_part = XCDR (elt);
1082 CHECK_CHARSET_GET_ID (car_part, this_id);
1083 CHECK_TYPE_RANGED_INTEGER (int, cdr_part);
1084 offset = XINT (cdr_part);
1086 else
1088 CHECK_CHARSET_GET_ID (elt, this_id);
1089 offset = 0;
1091 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1093 this_charset = CHARSET_FROM_ID (this_id);
1094 if (charset.min_char > this_charset->min_char)
1095 charset.min_char = this_charset->min_char;
1096 if (charset.max_char < this_charset->max_char)
1097 charset.max_char = this_charset->max_char;
1098 for (i = 0; i < 190; i++)
1099 charset.fast_map[i] |= this_charset->fast_map[i];
1102 else
1103 error ("None of :code-offset, :map, :parents are specified");
1105 val = args[charset_arg_unify_map];
1106 if (! NILP (val) && !STRINGP (val))
1107 CHECK_VECTOR (val);
1108 ASET (attrs, charset_unify_map, val);
1110 CHECK_LIST (args[charset_arg_plist]);
1111 ASET (attrs, charset_plist, args[charset_arg_plist]);
1113 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1114 &hash_code);
1115 if (charset.hash_index >= 0)
1117 new_definition_p = 0;
1118 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1119 set_hash_value_slot (hash_table, charset.hash_index, attrs);
1121 else
1123 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1124 hash_code);
1125 if (charset_table_used == charset_table_size)
1127 /* Ensure that charset IDs fit into 'int' as well as into the
1128 restriction imposed by fixnums. Although the 'int' restriction
1129 could be removed, too much other code would need altering; for
1130 example, the IDs are stuffed into struct
1131 coding_system.charbuf[i] entries, which are 'int'. */
1132 int old_size = charset_table_size;
1133 ptrdiff_t new_size = old_size;
1134 struct charset *new_table =
1135 xpalloc (0, &new_size, 1,
1136 min (INT_MAX, MOST_POSITIVE_FIXNUM),
1137 sizeof *charset_table);
1138 memcpy (new_table, charset_table, old_size * sizeof *new_table);
1139 charset_table = new_table;
1140 charset_table_size = new_size;
1141 /* FIXME: This leaks memory, as the old charset_table becomes
1142 unreachable. If the old charset table is charset_table_init
1143 then this leak is intentional; otherwise, it's unclear.
1144 If the latter memory leak is intentional, a
1145 comment should be added to explain this. If not, the old
1146 charset_table should be freed, by passing it as the 1st argument
1147 to xpalloc and removing the memcpy. */
1149 id = charset_table_used++;
1150 new_definition_p = 1;
1153 ASET (attrs, charset_id, make_number (id));
1154 charset.id = id;
1155 charset_table[id] = charset;
1157 if (charset.method == CHARSET_METHOD_MAP)
1159 load_charset (&charset, 0);
1160 charset_table[id] = charset;
1163 if (charset.iso_final >= 0)
1165 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1166 charset.iso_final) = id;
1167 if (new_definition_p)
1168 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1169 list1 (make_number (id)));
1170 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1171 charset_jisx0201_roman = id;
1172 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1173 charset_jisx0208_1978 = id;
1174 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1175 charset_jisx0208 = id;
1176 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1177 charset_ksc5601 = id;
1180 if (charset.emacs_mule_id >= 0)
1182 emacs_mule_charset[charset.emacs_mule_id] = id;
1183 if (charset.emacs_mule_id < 0xA0)
1184 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1185 else
1186 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1187 if (new_definition_p)
1188 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1189 list1 (make_number (id)));
1192 if (new_definition_p)
1194 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1195 if (charset.supplementary_p)
1196 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1197 list1 (make_number (id)));
1198 else
1200 Lisp_Object tail;
1202 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1204 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1206 if (cs->supplementary_p)
1207 break;
1209 if (EQ (tail, Vcharset_ordered_list))
1210 Vcharset_ordered_list = Fcons (make_number (id),
1211 Vcharset_ordered_list);
1212 else if (NILP (tail))
1213 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1214 list1 (make_number (id)));
1215 else
1217 val = Fcons (XCAR (tail), XCDR (tail));
1218 XSETCDR (tail, val);
1219 XSETCAR (tail, make_number (id));
1222 charset_ordered_list_tick++;
1225 return Qnil;
1229 /* Same as Fdefine_charset_internal but arguments are more convenient
1230 to call from C (typically in syms_of_charset). This can define a
1231 charset of `offset' method only. Return the ID of the new
1232 charset. */
1234 static int
1235 define_charset_internal (Lisp_Object name,
1236 int dimension,
1237 const char *code_space_chars,
1238 unsigned min_code, unsigned max_code,
1239 int iso_final, int iso_revision, int emacs_mule_id,
1240 bool ascii_compatible, bool supplementary,
1241 int code_offset)
1243 const unsigned char *code_space = (const unsigned char *) code_space_chars;
1244 Lisp_Object args[charset_arg_max];
1245 Lisp_Object val;
1246 int i;
1248 args[charset_arg_name] = name;
1249 args[charset_arg_dimension] = make_number (dimension);
1250 val = make_uninit_vector (8);
1251 for (i = 0; i < 8; i++)
1252 ASET (val, i, make_number (code_space[i]));
1253 args[charset_arg_code_space] = val;
1254 args[charset_arg_min_code] = make_number (min_code);
1255 args[charset_arg_max_code] = make_number (max_code);
1256 args[charset_arg_iso_final]
1257 = (iso_final < 0 ? Qnil : make_number (iso_final));
1258 args[charset_arg_iso_revision] = make_number (iso_revision);
1259 args[charset_arg_emacs_mule_id]
1260 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1261 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1262 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1263 args[charset_arg_invalid_code] = Qnil;
1264 args[charset_arg_code_offset] = make_number (code_offset);
1265 args[charset_arg_map] = Qnil;
1266 args[charset_arg_subset] = Qnil;
1267 args[charset_arg_superset] = Qnil;
1268 args[charset_arg_unify_map] = Qnil;
1270 args[charset_arg_plist] =
1271 listn (CONSTYPE_HEAP, 14,
1272 QCname,
1273 args[charset_arg_name],
1274 intern_c_string (":dimension"),
1275 args[charset_arg_dimension],
1276 intern_c_string (":code-space"),
1277 args[charset_arg_code_space],
1278 intern_c_string (":iso-final-char"),
1279 args[charset_arg_iso_final],
1280 intern_c_string (":emacs-mule-id"),
1281 args[charset_arg_emacs_mule_id],
1282 QCascii_compatible_p,
1283 args[charset_arg_ascii_compatible_p],
1284 intern_c_string (":code-offset"),
1285 args[charset_arg_code_offset]);
1286 Fdefine_charset_internal (charset_arg_max, args);
1288 return XINT (CHARSET_SYMBOL_ID (name));
1292 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1293 Sdefine_charset_alias, 2, 2, 0,
1294 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1295 (Lisp_Object alias, Lisp_Object charset)
1297 Lisp_Object attr;
1299 CHECK_CHARSET_GET_ATTR (charset, attr);
1300 Fputhash (alias, attr, Vcharset_hash_table);
1301 Vcharset_list = Fcons (alias, Vcharset_list);
1302 return Qnil;
1306 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1307 doc: /* Return the property list of CHARSET. */)
1308 (Lisp_Object charset)
1310 Lisp_Object attrs;
1312 CHECK_CHARSET_GET_ATTR (charset, attrs);
1313 return CHARSET_ATTR_PLIST (attrs);
1317 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1318 doc: /* Set CHARSET's property list to PLIST. */)
1319 (Lisp_Object charset, Lisp_Object plist)
1321 Lisp_Object attrs;
1323 CHECK_CHARSET_GET_ATTR (charset, attrs);
1324 ASET (attrs, charset_plist, plist);
1325 return plist;
1329 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1330 doc: /* Unify characters of CHARSET with Unicode.
1331 This means reading the relevant file and installing the table defined
1332 by CHARSET's `:unify-map' property.
1334 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1335 the same meaning as the `:unify-map' attribute in the function
1336 `define-charset' (which see).
1338 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1339 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
1341 int id;
1342 struct charset *cs;
1344 CHECK_CHARSET_GET_ID (charset, id);
1345 cs = CHARSET_FROM_ID (id);
1346 if (NILP (deunify)
1347 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1348 : ! CHARSET_UNIFIED_P (cs))
1349 return Qnil;
1351 CHARSET_UNIFIED_P (cs) = 0;
1352 if (NILP (deunify))
1354 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1355 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1356 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1357 if (NILP (unify_map))
1358 unify_map = CHARSET_UNIFY_MAP (cs);
1359 else
1361 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1362 signal_error ("Bad unify-map", unify_map);
1363 set_charset_attr (cs, charset_unify_map, unify_map);
1365 if (NILP (Vchar_unify_table))
1366 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1367 char_table_set_range (Vchar_unify_table,
1368 cs->min_char, cs->max_char, charset);
1369 CHARSET_UNIFIED_P (cs) = 1;
1371 else if (CHAR_TABLE_P (Vchar_unify_table))
1373 unsigned min_code = CHARSET_MIN_CODE (cs);
1374 unsigned max_code = CHARSET_MAX_CODE (cs);
1375 int min_char = DECODE_CHAR (cs, min_code);
1376 int max_char = DECODE_CHAR (cs, max_code);
1378 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1381 return Qnil;
1384 /* Check that DIMENSION, CHARS, and FINAL_CHAR specify a valid ISO charset.
1385 Return true if it's a 96-character set, false if 94. */
1387 static bool
1388 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars,
1389 Lisp_Object final_char)
1391 CHECK_NUMBER (dimension);
1392 CHECK_NUMBER (chars);
1393 CHECK_CHARACTER (final_char);
1395 if (! (1 <= XINT (dimension) && XINT (dimension) <= 3))
1396 error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
1397 XINT (dimension));
1399 bool chars_flag = XINT (chars) == 96;
1400 if (! (chars_flag || XINT (chars) == 94))
1401 error ("Invalid CHARS %"pI"d, it should be 94 or 96", XINT (chars));
1403 int final_ch = XFASTINT (final_char);
1404 if (! ('0' <= final_ch && final_ch <= '~'))
1405 error ("Invalid FINAL-CHAR `%c', it should be `0'..`~'", final_ch);
1407 return chars_flag;
1410 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1411 Sget_unused_iso_final_char, 2, 2, 0,
1412 doc: /*
1413 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1414 DIMENSION is the number of bytes to represent a character: 1 or 2.
1415 CHARS is the number of characters in a dimension: 94 or 96.
1417 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1418 If there's no unused final char for the specified kind of charset,
1419 return nil. */)
1420 (Lisp_Object dimension, Lisp_Object chars)
1422 bool chars_flag = check_iso_charset_parameter (dimension, chars,
1423 make_number ('0'));
1424 for (int final_char = '0'; final_char <= '?'; final_char++)
1425 if (ISO_CHARSET_TABLE (XINT (dimension), chars_flag, final_char) < 0)
1426 return make_number (final_char);
1427 return Qnil;
1431 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1432 4, 4, 0,
1433 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1435 On decoding by an ISO-2022 base coding system, when a charset
1436 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1437 if CHARSET is designated instead. */)
1438 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
1440 int id;
1442 CHECK_CHARSET_GET_ID (charset, id);
1443 bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
1444 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XFASTINT (final_char)) = id;
1445 return Qnil;
1449 /* Return information about charsets in the text at PTR of NBYTES
1450 bytes, which are NCHARS characters. The value is:
1452 0: Each character is represented by one byte. This is always
1453 true for a unibyte string. For a multibyte string, true if
1454 it contains only ASCII characters.
1456 1: No charsets other than ascii, control-1, and latin-1 are
1457 found.
1459 2: Otherwise.
1463 string_xstring_p (Lisp_Object string)
1465 const unsigned char *p = SDATA (string);
1466 const unsigned char *endp = p + SBYTES (string);
1468 if (SCHARS (string) == SBYTES (string))
1469 return 0;
1471 while (p < endp)
1473 int c = STRING_CHAR_ADVANCE (p);
1475 if (c >= 0x100)
1476 return 2;
1478 return 1;
1482 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1484 CHARSETS is a vector. If Nth element is non-nil, it means the
1485 charset whose id is N is already found.
1487 It may lookup a translation table TABLE if supplied. */
1489 static void
1490 find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars,
1491 ptrdiff_t nbytes, Lisp_Object charsets,
1492 Lisp_Object table, bool multibyte)
1494 const unsigned char *pend = ptr + nbytes;
1496 if (nchars == nbytes)
1498 if (multibyte)
1499 ASET (charsets, charset_ascii, Qt);
1500 else
1501 while (ptr < pend)
1503 int c = *ptr++;
1505 if (!NILP (table))
1506 c = translate_char (table, c);
1507 if (ASCII_CHAR_P (c))
1508 ASET (charsets, charset_ascii, Qt);
1509 else
1510 ASET (charsets, charset_eight_bit, Qt);
1513 else
1515 while (ptr < pend)
1517 int c = STRING_CHAR_ADVANCE (ptr);
1518 struct charset *charset;
1520 if (!NILP (table))
1521 c = translate_char (table, c);
1522 charset = CHAR_CHARSET (c);
1523 ASET (charsets, CHARSET_ID (charset), Qt);
1528 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1529 2, 3, 0,
1530 doc: /* Return a list of charsets in the region between BEG and END.
1531 BEG and END are buffer positions.
1532 Optional arg TABLE if non-nil is a translation table to look up.
1534 If the current buffer is unibyte, the returned list may contain
1535 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1536 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
1538 Lisp_Object charsets;
1539 ptrdiff_t from, from_byte, to, stop, stop_byte;
1540 int i;
1541 Lisp_Object val;
1542 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1544 validate_region (&beg, &end);
1545 from = XFASTINT (beg);
1546 stop = to = XFASTINT (end);
1548 if (from < GPT && GPT < to)
1550 stop = GPT;
1551 stop_byte = GPT_BYTE;
1553 else
1554 stop_byte = CHAR_TO_BYTE (stop);
1556 from_byte = CHAR_TO_BYTE (from);
1558 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1559 while (1)
1561 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1562 stop_byte - from_byte, charsets, table,
1563 multibyte);
1564 if (stop < to)
1566 from = stop, from_byte = stop_byte;
1567 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1569 else
1570 break;
1573 val = Qnil;
1574 for (i = charset_table_used - 1; i >= 0; i--)
1575 if (!NILP (AREF (charsets, i)))
1576 val = Fcons (CHARSET_NAME (charset_table + i), val);
1577 return val;
1580 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1581 1, 2, 0,
1582 doc: /* Return a list of charsets in STR.
1583 Optional arg TABLE if non-nil is a translation table to look up.
1585 If STR is unibyte, the returned list may contain
1586 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1587 (Lisp_Object str, Lisp_Object table)
1589 Lisp_Object charsets;
1590 int i;
1591 Lisp_Object val;
1593 CHECK_STRING (str);
1595 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1596 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1597 charsets, table,
1598 STRING_MULTIBYTE (str));
1599 val = Qnil;
1600 for (i = charset_table_used - 1; i >= 0; i--)
1601 if (!NILP (AREF (charsets, i)))
1602 val = Fcons (CHARSET_NAME (charset_table + i), val);
1603 return val;
1608 /* Return a unified character code for C (>= 0x110000). VAL is a
1609 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1610 charset symbol. */
1611 static int
1612 maybe_unify_char (int c, Lisp_Object val)
1614 struct charset *charset;
1616 if (INTEGERP (val))
1617 return XFASTINT (val);
1618 if (NILP (val))
1619 return c;
1621 CHECK_CHARSET_GET_CHARSET (val, charset);
1622 #ifdef REL_ALLOC
1623 /* The call to load_charset below can allocate memory, which screws
1624 callers of this function through STRING_CHAR_* macros that hold C
1625 pointers to buffer text, if REL_ALLOC is used. */
1626 r_alloc_inhibit_buffer_relocation (1);
1627 #endif
1628 load_charset (charset, 1);
1629 if (! inhibit_load_charset_map)
1631 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1632 if (! NILP (val))
1633 c = XFASTINT (val);
1635 else
1637 int code_index = c - CHARSET_CODE_OFFSET (charset);
1638 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1640 if (unified > 0)
1641 c = unified;
1643 #ifdef REL_ALLOC
1644 r_alloc_inhibit_buffer_relocation (0);
1645 #endif
1646 return c;
1650 /* Return a character corresponding to the code-point CODE of
1651 CHARSET. */
1654 decode_char (struct charset *charset, unsigned int code)
1656 int c, char_index;
1657 enum charset_method method = CHARSET_METHOD (charset);
1659 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1660 return -1;
1662 if (method == CHARSET_METHOD_SUBSET)
1664 Lisp_Object subset_info;
1666 subset_info = CHARSET_SUBSET (charset);
1667 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1668 code -= XINT (AREF (subset_info, 3));
1669 if (code < XFASTINT (AREF (subset_info, 1))
1670 || code > XFASTINT (AREF (subset_info, 2)))
1671 c = -1;
1672 else
1673 c = DECODE_CHAR (charset, code);
1675 else if (method == CHARSET_METHOD_SUPERSET)
1677 Lisp_Object parents;
1679 parents = CHARSET_SUPERSET (charset);
1680 c = -1;
1681 for (; CONSP (parents); parents = XCDR (parents))
1683 int id = XINT (XCAR (XCAR (parents)));
1684 int code_offset = XINT (XCDR (XCAR (parents)));
1685 unsigned this_code = code - code_offset;
1687 charset = CHARSET_FROM_ID (id);
1688 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1689 break;
1692 else
1694 char_index = CODE_POINT_TO_INDEX (charset, code);
1695 if (char_index < 0)
1696 return -1;
1698 if (method == CHARSET_METHOD_MAP)
1700 Lisp_Object decoder;
1702 decoder = CHARSET_DECODER (charset);
1703 if (! VECTORP (decoder))
1705 load_charset (charset, 1);
1706 decoder = CHARSET_DECODER (charset);
1708 if (VECTORP (decoder))
1709 c = XINT (AREF (decoder, char_index));
1710 else
1711 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1713 else /* method == CHARSET_METHOD_OFFSET */
1715 c = char_index + CHARSET_CODE_OFFSET (charset);
1716 if (CHARSET_UNIFIED_P (charset)
1717 && MAX_UNICODE_CHAR < c && c <= MAX_5_BYTE_CHAR)
1719 /* Unify C with a Unicode character if possible. */
1720 Lisp_Object val = CHAR_TABLE_REF (Vchar_unify_table, c);
1721 c = maybe_unify_char (c, val);
1726 return c;
1729 /* Variable used temporarily by the macro ENCODE_CHAR. */
1730 Lisp_Object charset_work;
1732 /* Return a code-point of C in CHARSET. If C doesn't belong to
1733 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1734 use CHARSET's strict_max_char instead of max_char. */
1736 unsigned
1737 encode_char (struct charset *charset, int c)
1739 unsigned code;
1740 enum charset_method method = CHARSET_METHOD (charset);
1742 if (CHARSET_UNIFIED_P (charset))
1744 Lisp_Object deunifier;
1745 int code_index = -1;
1747 deunifier = CHARSET_DEUNIFIER (charset);
1748 if (! CHAR_TABLE_P (deunifier))
1750 load_charset (charset, 2);
1751 deunifier = CHARSET_DEUNIFIER (charset);
1753 if (CHAR_TABLE_P (deunifier))
1755 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1757 if (INTEGERP (deunified))
1758 code_index = XINT (deunified);
1760 else
1762 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1764 if (code_index >= 0)
1765 c = CHARSET_CODE_OFFSET (charset) + code_index;
1768 if (method == CHARSET_METHOD_SUBSET)
1770 Lisp_Object subset_info;
1771 struct charset *this_charset;
1773 subset_info = CHARSET_SUBSET (charset);
1774 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1775 code = ENCODE_CHAR (this_charset, c);
1776 if (code == CHARSET_INVALID_CODE (this_charset)
1777 || code < XFASTINT (AREF (subset_info, 1))
1778 || code > XFASTINT (AREF (subset_info, 2)))
1779 return CHARSET_INVALID_CODE (charset);
1780 code += XINT (AREF (subset_info, 3));
1781 return code;
1784 if (method == CHARSET_METHOD_SUPERSET)
1786 Lisp_Object parents;
1788 parents = CHARSET_SUPERSET (charset);
1789 for (; CONSP (parents); parents = XCDR (parents))
1791 int id = XINT (XCAR (XCAR (parents)));
1792 int code_offset = XINT (XCDR (XCAR (parents)));
1793 struct charset *this_charset = CHARSET_FROM_ID (id);
1795 code = ENCODE_CHAR (this_charset, c);
1796 if (code != CHARSET_INVALID_CODE (this_charset))
1797 return code + code_offset;
1799 return CHARSET_INVALID_CODE (charset);
1802 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1803 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1804 return CHARSET_INVALID_CODE (charset);
1806 if (method == CHARSET_METHOD_MAP)
1808 Lisp_Object encoder;
1809 Lisp_Object val;
1811 encoder = CHARSET_ENCODER (charset);
1812 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1814 load_charset (charset, 2);
1815 encoder = CHARSET_ENCODER (charset);
1817 if (CHAR_TABLE_P (encoder))
1819 val = CHAR_TABLE_REF (encoder, c);
1820 if (NILP (val))
1821 return CHARSET_INVALID_CODE (charset);
1822 code = XINT (val);
1823 if (! CHARSET_COMPACT_CODES_P (charset))
1824 code = INDEX_TO_CODE_POINT (charset, code);
1826 else
1828 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1829 code = INDEX_TO_CODE_POINT (charset, code);
1832 else /* method == CHARSET_METHOD_OFFSET */
1834 unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
1836 code = INDEX_TO_CODE_POINT (charset, code_index);
1839 return code;
1843 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 2, 0,
1844 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1845 Return nil if CODE-POINT is not valid in CHARSET.
1847 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). */)
1848 (Lisp_Object charset, Lisp_Object code_point)
1850 int c, id;
1851 unsigned code;
1852 struct charset *charsetp;
1854 CHECK_CHARSET_GET_ID (charset, id);
1855 code = cons_to_unsigned (code_point, UINT_MAX);
1856 charsetp = CHARSET_FROM_ID (id);
1857 c = DECODE_CHAR (charsetp, code);
1858 return (c >= 0 ? make_number (c) : Qnil);
1862 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 2, 0,
1863 doc: /* Encode the character CH into a code-point of CHARSET.
1864 Return nil if CHARSET doesn't include CH. */)
1865 (Lisp_Object ch, Lisp_Object charset)
1867 int c, id;
1868 unsigned code;
1869 struct charset *charsetp;
1871 CHECK_CHARSET_GET_ID (charset, id);
1872 CHECK_CHARACTER (ch);
1873 c = XFASTINT (ch);
1874 charsetp = CHARSET_FROM_ID (id);
1875 code = ENCODE_CHAR (charsetp, c);
1876 if (code == CHARSET_INVALID_CODE (charsetp))
1877 return Qnil;
1878 return INTEGER_TO_CONS (code);
1882 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1883 doc:
1884 /* Return a character of CHARSET whose position codes are CODEn.
1886 CODE1 through CODE4 are optional, but if you don't supply sufficient
1887 position codes, it is assumed that the minimum code in each dimension
1888 is specified. */)
1889 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
1891 int id, dimension;
1892 struct charset *charsetp;
1893 unsigned code;
1894 int c;
1896 CHECK_CHARSET_GET_ID (charset, id);
1897 charsetp = CHARSET_FROM_ID (id);
1899 dimension = CHARSET_DIMENSION (charsetp);
1900 if (NILP (code1))
1901 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1902 ? 0 : CHARSET_MIN_CODE (charsetp));
1903 else
1905 CHECK_NATNUM (code1);
1906 if (XFASTINT (code1) >= 0x100)
1907 args_out_of_range (make_number (0xFF), code1);
1908 code = XFASTINT (code1);
1910 if (dimension > 1)
1912 code <<= 8;
1913 if (NILP (code2))
1914 code |= charsetp->code_space[(dimension - 2) * 4];
1915 else
1917 CHECK_NATNUM (code2);
1918 if (XFASTINT (code2) >= 0x100)
1919 args_out_of_range (make_number (0xFF), code2);
1920 code |= XFASTINT (code2);
1923 if (dimension > 2)
1925 code <<= 8;
1926 if (NILP (code3))
1927 code |= charsetp->code_space[(dimension - 3) * 4];
1928 else
1930 CHECK_NATNUM (code3);
1931 if (XFASTINT (code3) >= 0x100)
1932 args_out_of_range (make_number (0xFF), code3);
1933 code |= XFASTINT (code3);
1936 if (dimension > 3)
1938 code <<= 8;
1939 if (NILP (code4))
1940 code |= charsetp->code_space[0];
1941 else
1943 CHECK_NATNUM (code4);
1944 if (XFASTINT (code4) >= 0x100)
1945 args_out_of_range (make_number (0xFF), code4);
1946 code |= XFASTINT (code4);
1953 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1954 code &= 0x7F7F7F7F;
1955 c = DECODE_CHAR (charsetp, code);
1956 if (c < 0)
1957 error ("Invalid code(s)");
1958 return make_number (c);
1962 /* Return the first charset in CHARSET_LIST that contains C.
1963 CHARSET_LIST is a list of charset IDs. If it is nil, use
1964 Vcharset_ordered_list. */
1966 struct charset *
1967 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
1969 bool maybe_null = 0;
1971 if (NILP (charset_list))
1972 charset_list = Vcharset_ordered_list;
1973 else
1974 maybe_null = 1;
1976 while (CONSP (charset_list))
1978 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1979 unsigned code = ENCODE_CHAR (charset, c);
1981 if (code != CHARSET_INVALID_CODE (charset))
1983 if (code_return)
1984 *code_return = code;
1985 return charset;
1987 charset_list = XCDR (charset_list);
1988 if (! maybe_null
1989 && c <= MAX_UNICODE_CHAR
1990 && EQ (charset_list, Vcharset_non_preferred_head))
1991 return CHARSET_FROM_ID (charset_unicode);
1993 return (maybe_null ? NULL
1994 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
1995 : CHARSET_FROM_ID (charset_eight_bit));
1999 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2000 doc:
2001 /*Return list of charset and one to four position-codes of CH.
2002 The charset is decided by the current priority order of charsets.
2003 A position-code is a byte value of each dimension of the code-point of
2004 CH in the charset. */)
2005 (Lisp_Object ch)
2007 struct charset *charset;
2008 int c, dimension;
2009 unsigned code;
2010 Lisp_Object val;
2012 CHECK_CHARACTER (ch);
2013 c = XFASTINT (ch);
2014 charset = CHAR_CHARSET (c);
2015 if (! charset)
2016 emacs_abort ();
2017 code = ENCODE_CHAR (charset, c);
2018 if (code == CHARSET_INVALID_CODE (charset))
2019 emacs_abort ();
2020 dimension = CHARSET_DIMENSION (charset);
2021 for (val = Qnil; dimension > 0; dimension--)
2023 val = Fcons (make_number (code & 0xFF), val);
2024 code >>= 8;
2026 return Fcons (CHARSET_NAME (charset), val);
2030 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2031 doc: /* Return the charset of highest priority that contains CH.
2032 ASCII characters are an exception: for them, this function always
2033 returns `ascii'.
2034 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2035 from which to find the charset. It may also be a coding system. In
2036 that case, find the charset from what supported by that coding system. */)
2037 (Lisp_Object ch, Lisp_Object restriction)
2039 struct charset *charset;
2041 CHECK_CHARACTER (ch);
2042 if (NILP (restriction))
2043 charset = CHAR_CHARSET (XINT (ch));
2044 else
2046 if (CONSP (restriction))
2048 int c = XFASTINT (ch);
2050 for (; CONSP (restriction); restriction = XCDR (restriction))
2052 struct charset *rcharset;
2054 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
2055 if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
2056 return XCAR (restriction);
2058 return Qnil;
2060 restriction = coding_system_charset_list (restriction);
2061 charset = char_charset (XINT (ch), restriction, NULL);
2062 if (! charset)
2063 return Qnil;
2065 return (CHARSET_NAME (charset));
2069 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2070 doc: /*
2071 Return charset of a character in the current buffer at position POS.
2072 If POS is nil, it defaults to the current point.
2073 If POS is out of range, the value is nil. */)
2074 (Lisp_Object pos)
2076 Lisp_Object ch;
2077 struct charset *charset;
2079 ch = Fchar_after (pos);
2080 if (! INTEGERP (ch))
2081 return ch;
2082 charset = CHAR_CHARSET (XINT (ch));
2083 return (CHARSET_NAME (charset));
2087 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2088 doc: /*
2089 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2091 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2092 by their DIMENSION, CHARS, and FINAL-CHAR,
2093 whereas Emacs distinguishes them by charset symbol.
2094 See the documentation of the function `charset-info' for the meanings of
2095 DIMENSION, CHARS, and FINAL-CHAR. */)
2096 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
2098 bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
2099 int id = ISO_CHARSET_TABLE (XINT (dimension), chars_flag,
2100 XFASTINT (final_char));
2101 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2105 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2106 0, 0, 0,
2107 doc: /*
2108 Internal use only.
2109 Clear temporary charset mapping tables.
2110 It should be called only from temacs invoked for dumping. */)
2111 (void)
2113 if (temp_charset_work)
2115 xfree (temp_charset_work);
2116 temp_charset_work = NULL;
2119 if (CHAR_TABLE_P (Vchar_unify_table))
2120 Foptimize_char_table (Vchar_unify_table, Qnil);
2122 return Qnil;
2125 DEFUN ("charset-priority-list", Fcharset_priority_list,
2126 Scharset_priority_list, 0, 1, 0,
2127 doc: /* Return the list of charsets ordered by priority.
2128 HIGHESTP non-nil means just return the highest priority one. */)
2129 (Lisp_Object highestp)
2131 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2133 if (!NILP (highestp))
2134 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2136 while (!NILP (list))
2138 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2139 list = XCDR (list);
2141 return Fnreverse (val);
2144 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2145 1, MANY, 0,
2146 doc: /* Assign higher priority to the charsets given as arguments.
2147 usage: (set-charset-priority &rest charsets) */)
2148 (ptrdiff_t nargs, Lisp_Object *args)
2150 Lisp_Object new_head, old_list;
2151 Lisp_Object list_2022, list_emacs_mule;
2152 ptrdiff_t i;
2153 int id;
2155 old_list = Fcopy_sequence (Vcharset_ordered_list);
2156 new_head = Qnil;
2157 for (i = 0; i < nargs; i++)
2159 CHECK_CHARSET_GET_ID (args[i], id);
2160 if (! NILP (Fmemq (make_number (id), old_list)))
2162 old_list = Fdelq (make_number (id), old_list);
2163 new_head = Fcons (make_number (id), new_head);
2166 Vcharset_non_preferred_head = old_list;
2167 Vcharset_ordered_list = CALLN (Fnconc, Fnreverse (new_head), old_list);
2169 charset_ordered_list_tick++;
2171 charset_unibyte = -1;
2172 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2173 CONSP (old_list); old_list = XCDR (old_list))
2175 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2176 list_2022 = Fcons (XCAR (old_list), list_2022);
2177 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2178 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2179 if (charset_unibyte < 0)
2181 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2183 if (CHARSET_DIMENSION (charset) == 1
2184 && CHARSET_ASCII_COMPATIBLE_P (charset)
2185 && CHARSET_MAX_CHAR (charset) >= 0x80)
2186 charset_unibyte = CHARSET_ID (charset);
2189 Viso_2022_charset_list = Fnreverse (list_2022);
2190 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2191 if (charset_unibyte < 0)
2192 charset_unibyte = charset_iso_8859_1;
2194 return Qnil;
2197 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2198 0, 1, 0,
2199 doc: /* Internal use only.
2200 Return charset identification number of CHARSET. */)
2201 (Lisp_Object charset)
2203 int id;
2205 CHECK_CHARSET_GET_ID (charset, id);
2206 return make_number (id);
2209 struct charset_sort_data
2211 Lisp_Object charset;
2212 int id;
2213 ptrdiff_t priority;
2216 static int
2217 charset_compare (const void *d1, const void *d2)
2219 const struct charset_sort_data *data1 = d1, *data2 = d2;
2220 if (data1->priority != data2->priority)
2221 return data1->priority < data2->priority ? -1 : 1;
2222 return 0;
2225 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2226 doc: /* Sort charset list CHARSETS by a priority of each charset.
2227 Return the sorted list. CHARSETS is modified by side effects.
2228 See also `charset-priority-list' and `set-charset-priority'. */)
2229 (Lisp_Object charsets)
2231 Lisp_Object len = Flength (charsets);
2232 ptrdiff_t n = XFASTINT (len), i, j;
2233 int done;
2234 Lisp_Object tail, elt, attrs;
2235 struct charset_sort_data *sort_data;
2236 int id, min_id = INT_MAX, max_id = INT_MIN;
2237 USE_SAFE_ALLOCA;
2239 if (n == 0)
2240 return Qnil;
2241 SAFE_NALLOCA (sort_data, 1, n);
2242 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2244 elt = XCAR (tail);
2245 CHECK_CHARSET_GET_ATTR (elt, attrs);
2246 sort_data[i].charset = elt;
2247 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
2248 if (id < min_id)
2249 min_id = id;
2250 if (id > max_id)
2251 max_id = id;
2253 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2254 done < n && CONSP (tail); tail = XCDR (tail), i++)
2256 elt = XCAR (tail);
2257 id = XFASTINT (elt);
2258 if (id >= min_id && id <= max_id)
2259 for (j = 0; j < n; j++)
2260 if (sort_data[j].id == id)
2262 sort_data[j].priority = i;
2263 done++;
2266 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2267 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2268 XSETCAR (tail, sort_data[i].charset);
2269 SAFE_FREE ();
2270 return charsets;
2274 void
2275 init_charset (void)
2277 Lisp_Object tempdir;
2278 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2279 if (! file_accessible_directory_p (tempdir))
2281 /* This used to be non-fatal (dir_warning), but it should not
2282 happen, and if it does sooner or later it will cause some
2283 obscure problem (eg bug#6401), so better abort. */
2284 fprintf (stderr, "Error: charsets directory not found:\n\
2285 %s\n\
2286 Emacs will not function correctly without the character map files.\n%s\
2287 Please check your installation!\n",
2288 SDATA (tempdir),
2289 egetenv("EMACSDATA") ? "The EMACSDATA environment \
2290 variable is set, maybe it has the wrong value?\n" : "");
2291 exit (1);
2294 Vcharset_map_path = list1 (tempdir);
2298 void
2299 init_charset_once (void)
2301 int i, j, k;
2303 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2304 for (j = 0; j < ISO_MAX_CHARS; j++)
2305 for (k = 0; k < ISO_MAX_FINAL; k++)
2306 iso_charset_table[i][j][k] = -1;
2308 for (i = 0; i < 256; i++)
2309 emacs_mule_charset[i] = -1;
2311 charset_jisx0201_roman = -1;
2312 charset_jisx0208_1978 = -1;
2313 charset_jisx0208 = -1;
2314 charset_ksc5601 = -1;
2317 #ifdef emacs
2319 /* Allocate an initial charset table that is large enough to handle
2320 Emacs while it is bootstrapping. As of September 2011, the size
2321 needs to be at least 166; make it a bit bigger to allow for future
2322 expansion.
2324 Don't make the value so small that the table is reallocated during
2325 bootstrapping, as glibc malloc calls larger than just under 64 KiB
2326 during an initial bootstrap wreak havoc after dumping; see the
2327 M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
2328 internal to glibc malloc and perhaps to Emacs malloc debugging. */
2329 static struct charset charset_table_init[180];
2331 void
2332 syms_of_charset (void)
2334 DEFSYM (Qcharsetp, "charsetp");
2336 /* Special charset symbols. */
2337 DEFSYM (Qascii, "ascii");
2338 DEFSYM (Qunicode, "unicode");
2339 DEFSYM (Qemacs, "emacs");
2340 DEFSYM (Qeight_bit, "eight-bit");
2341 DEFSYM (Qiso_8859_1, "iso-8859-1");
2343 staticpro (&Vcharset_ordered_list);
2344 Vcharset_ordered_list = Qnil;
2346 staticpro (&Viso_2022_charset_list);
2347 Viso_2022_charset_list = Qnil;
2349 staticpro (&Vemacs_mule_charset_list);
2350 Vemacs_mule_charset_list = Qnil;
2352 staticpro (&Vcharset_hash_table);
2353 Vcharset_hash_table = CALLN (Fmake_hash_table, QCtest, Qeq);
2355 charset_table = charset_table_init;
2356 charset_table_size = ARRAYELTS (charset_table_init);
2357 charset_table_used = 0;
2359 defsubr (&Scharsetp);
2360 defsubr (&Smap_charset_chars);
2361 defsubr (&Sdefine_charset_internal);
2362 defsubr (&Sdefine_charset_alias);
2363 defsubr (&Scharset_plist);
2364 defsubr (&Sset_charset_plist);
2365 defsubr (&Sunify_charset);
2366 defsubr (&Sget_unused_iso_final_char);
2367 defsubr (&Sdeclare_equiv_charset);
2368 defsubr (&Sfind_charset_region);
2369 defsubr (&Sfind_charset_string);
2370 defsubr (&Sdecode_char);
2371 defsubr (&Sencode_char);
2372 defsubr (&Ssplit_char);
2373 defsubr (&Smake_char);
2374 defsubr (&Schar_charset);
2375 defsubr (&Scharset_after);
2376 defsubr (&Siso_charset);
2377 defsubr (&Sclear_charset_maps);
2378 defsubr (&Scharset_priority_list);
2379 defsubr (&Sset_charset_priority);
2380 defsubr (&Scharset_id_internal);
2381 defsubr (&Ssort_charsets);
2383 DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
2384 doc: /* List of directories to search for charset map files. */);
2385 Vcharset_map_path = Qnil;
2387 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
2388 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2389 inhibit_load_charset_map = 0;
2391 DEFVAR_LISP ("charset-list", Vcharset_list,
2392 doc: /* List of all charsets ever defined. */);
2393 Vcharset_list = Qnil;
2395 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
2396 doc: /* ISO639 language mnemonic symbol for the current language environment.
2397 If the current language environment is for multiple languages (e.g. "Latin-1"),
2398 the value may be a list of mnemonics. */);
2399 Vcurrent_iso639_language = Qnil;
2401 charset_ascii
2402 = define_charset_internal (Qascii, 1, "\x00\x7F\0\0\0\0\0",
2403 0, 127, 'B', -1, 0, 1, 0, 0);
2404 charset_iso_8859_1
2405 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\0\0\0\0\0",
2406 0, 255, -1, -1, -1, 1, 0, 0);
2407 charset_unicode
2408 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2409 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2410 charset_emacs
2411 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
2412 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2413 charset_eight_bit
2414 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\0\0\0\0\0",
2415 128, 255, -1, 0, -1, 0, 1,
2416 MAX_5_BYTE_CHAR + 1);
2417 charset_unibyte = charset_iso_8859_1;
2420 #endif /* emacs */