Don't suggest Mailutils on MS-Windows
[emacs.git] / src / chartab.c
blobfa5a8e411643a2a8ab742d48096c6d8566b1e906
1 /* chartab.c -- char-table support
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H13PRO009
6 This file is part of GNU Emacs.
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or (at
11 your option) any later version.
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
21 #include <config.h>
23 #include "lisp.h"
24 #include "character.h"
25 #include "charset.h"
27 /* 64/16/32/128 */
29 /* Number of elements in Nth level char-table. */
30 const int chartab_size[4] =
31 { (1 << CHARTAB_SIZE_BITS_0),
32 (1 << CHARTAB_SIZE_BITS_1),
33 (1 << CHARTAB_SIZE_BITS_2),
34 (1 << CHARTAB_SIZE_BITS_3) };
36 /* Number of characters each element of Nth level char-table
37 covers. */
38 static const int chartab_chars[4] =
39 { (1 << (CHARTAB_SIZE_BITS_1 + CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3)),
40 (1 << (CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3)),
41 (1 << CHARTAB_SIZE_BITS_3),
42 1 };
44 /* Number of characters (in bits) each element of Nth level char-table
45 covers. */
46 static const int chartab_bits[4] =
47 { (CHARTAB_SIZE_BITS_1 + CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3),
48 (CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3),
49 CHARTAB_SIZE_BITS_3,
50 0 };
52 #define CHARTAB_IDX(c, depth, min_char) \
53 (((c) - (min_char)) >> chartab_bits[(depth)])
56 /* Preamble for uniprop (Unicode character property) tables. See the
57 comment of "Unicode character property tables". */
59 /* Types of decoder and encoder functions for uniprop values. */
60 typedef Lisp_Object (*uniprop_decoder_t) (Lisp_Object, Lisp_Object);
61 typedef Lisp_Object (*uniprop_encoder_t) (Lisp_Object, Lisp_Object);
63 static Lisp_Object uniprop_table_uncompress (Lisp_Object, int);
64 static uniprop_decoder_t uniprop_get_decoder (Lisp_Object);
66 /* 1 iff TABLE is a uniprop table. */
67 #define UNIPROP_TABLE_P(TABLE) \
68 (EQ (XCHAR_TABLE (TABLE)->purpose, Qchar_code_property_table) \
69 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (TABLE)) == 5)
71 /* Return a decoder for values in the uniprop table TABLE. */
72 #define UNIPROP_GET_DECODER(TABLE) \
73 (UNIPROP_TABLE_P (TABLE) ? uniprop_get_decoder (TABLE) : NULL)
75 /* Nonzero iff OBJ is a string representing uniprop values of 128
76 succeeding characters (the bottom level of a char-table) by a
77 compressed format. We are sure that no property value has a string
78 starting with '\001' nor '\002'. */
79 #define UNIPROP_COMPRESSED_FORM_P(OBJ) \
80 (STRINGP (OBJ) && SCHARS (OBJ) > 0 \
81 && ((SREF (OBJ, 0) == 1 || (SREF (OBJ, 0) == 2))))
83 static void
84 CHECK_CHAR_TABLE (Lisp_Object x)
86 CHECK_TYPE (CHAR_TABLE_P (x), Qchar_table_p, x);
89 static void
90 set_char_table_ascii (Lisp_Object table, Lisp_Object val)
92 XCHAR_TABLE (table)->ascii = val;
94 static void
95 set_char_table_parent (Lisp_Object table, Lisp_Object val)
97 XCHAR_TABLE (table)->parent = val;
100 DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0,
101 doc: /* Return a newly created char-table, with purpose PURPOSE.
102 Each element is initialized to INIT, which defaults to nil.
104 PURPOSE should be a symbol. If it has a `char-table-extra-slots'
105 property, the property's value should be an integer between 0 and 10
106 that specifies how many extra slots the char-table has. Otherwise,
107 the char-table has no extra slot. */)
108 (register Lisp_Object purpose, Lisp_Object init)
110 Lisp_Object vector;
111 Lisp_Object n;
112 int n_extras;
113 int size;
115 CHECK_SYMBOL (purpose);
116 n = Fget (purpose, Qchar_table_extra_slots);
117 if (NILP (n))
118 n_extras = 0;
119 else
121 CHECK_NATNUM (n);
122 if (XINT (n) > 10)
123 args_out_of_range (n, Qnil);
124 n_extras = XINT (n);
127 size = CHAR_TABLE_STANDARD_SLOTS + n_extras;
128 vector = Fmake_vector (make_number (size), init);
129 XSETPVECTYPE (XVECTOR (vector), PVEC_CHAR_TABLE);
130 set_char_table_parent (vector, Qnil);
131 set_char_table_purpose (vector, purpose);
132 XSETCHAR_TABLE (vector, XCHAR_TABLE (vector));
133 return vector;
136 static Lisp_Object
137 make_sub_char_table (int depth, int min_char, Lisp_Object defalt)
139 int i;
140 Lisp_Object table = make_uninit_sub_char_table (depth, min_char);
142 for (i = 0; i < chartab_size[depth]; i++)
143 XSUB_CHAR_TABLE (table)->contents[i] = defalt;
144 return table;
147 static Lisp_Object
148 char_table_ascii (Lisp_Object table)
150 Lisp_Object sub, val;
152 sub = XCHAR_TABLE (table)->contents[0];
153 if (! SUB_CHAR_TABLE_P (sub))
154 return sub;
155 sub = XSUB_CHAR_TABLE (sub)->contents[0];
156 if (! SUB_CHAR_TABLE_P (sub))
157 return sub;
158 val = XSUB_CHAR_TABLE (sub)->contents[0];
159 if (UNIPROP_TABLE_P (table) && UNIPROP_COMPRESSED_FORM_P (val))
160 val = uniprop_table_uncompress (sub, 0);
161 return val;
164 static Lisp_Object
165 copy_sub_char_table (Lisp_Object table)
167 int depth = XSUB_CHAR_TABLE (table)->depth;
168 int min_char = XSUB_CHAR_TABLE (table)->min_char;
169 Lisp_Object copy = make_sub_char_table (depth, min_char, Qnil);
170 int i;
172 /* Recursively copy any sub char-tables. */
173 for (i = 0; i < chartab_size[depth]; i++)
175 Lisp_Object val = XSUB_CHAR_TABLE (table)->contents[i];
176 set_sub_char_table_contents
177 (copy, i, SUB_CHAR_TABLE_P (val) ? copy_sub_char_table (val) : val);
180 return copy;
184 Lisp_Object
185 copy_char_table (Lisp_Object table)
187 Lisp_Object copy;
188 int size = XCHAR_TABLE (table)->header.size & PSEUDOVECTOR_SIZE_MASK;
189 int i;
191 copy = Fmake_vector (make_number (size), Qnil);
192 XSETPVECTYPE (XVECTOR (copy), PVEC_CHAR_TABLE);
193 set_char_table_defalt (copy, XCHAR_TABLE (table)->defalt);
194 set_char_table_parent (copy, XCHAR_TABLE (table)->parent);
195 set_char_table_purpose (copy, XCHAR_TABLE (table)->purpose);
196 for (i = 0; i < chartab_size[0]; i++)
197 set_char_table_contents
198 (copy, i,
199 (SUB_CHAR_TABLE_P (XCHAR_TABLE (table)->contents[i])
200 ? copy_sub_char_table (XCHAR_TABLE (table)->contents[i])
201 : XCHAR_TABLE (table)->contents[i]));
202 set_char_table_ascii (copy, char_table_ascii (copy));
203 size -= CHAR_TABLE_STANDARD_SLOTS;
204 for (i = 0; i < size; i++)
205 set_char_table_extras (copy, i, XCHAR_TABLE (table)->extras[i]);
207 XSETCHAR_TABLE (copy, XCHAR_TABLE (copy));
208 return copy;
211 static Lisp_Object
212 sub_char_table_ref (Lisp_Object table, int c, bool is_uniprop)
214 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
215 Lisp_Object val;
216 int idx = CHARTAB_IDX (c, tbl->depth, tbl->min_char);
218 val = tbl->contents[idx];
219 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
220 val = uniprop_table_uncompress (table, idx);
221 if (SUB_CHAR_TABLE_P (val))
222 val = sub_char_table_ref (val, c, is_uniprop);
223 return val;
226 Lisp_Object
227 char_table_ref (Lisp_Object table, int c)
229 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
230 Lisp_Object val;
232 if (ASCII_CHAR_P (c))
234 val = tbl->ascii;
235 if (SUB_CHAR_TABLE_P (val))
236 val = XSUB_CHAR_TABLE (val)->contents[c];
238 else
240 val = tbl->contents[CHARTAB_IDX (c, 0, 0)];
241 if (SUB_CHAR_TABLE_P (val))
242 val = sub_char_table_ref (val, c, UNIPROP_TABLE_P (table));
244 if (NILP (val))
246 val = tbl->defalt;
247 if (NILP (val) && CHAR_TABLE_P (tbl->parent))
248 val = char_table_ref (tbl->parent, c);
250 return val;
253 static Lisp_Object
254 sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to,
255 Lisp_Object defalt, bool is_uniprop)
257 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
258 int depth = tbl->depth, min_char = tbl->min_char;
259 int chartab_idx = CHARTAB_IDX (c, depth, min_char), idx;
260 Lisp_Object val;
262 val = tbl->contents[chartab_idx];
263 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
264 val = uniprop_table_uncompress (table, chartab_idx);
265 if (SUB_CHAR_TABLE_P (val))
266 val = sub_char_table_ref_and_range (val, c, from, to, defalt, is_uniprop);
267 else if (NILP (val))
268 val = defalt;
270 idx = chartab_idx;
271 while (idx > 0 && *from < min_char + idx * chartab_chars[depth])
273 Lisp_Object this_val;
275 c = min_char + idx * chartab_chars[depth] - 1;
276 idx--;
277 this_val = tbl->contents[idx];
278 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
279 this_val = uniprop_table_uncompress (table, idx);
280 if (SUB_CHAR_TABLE_P (this_val))
281 this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt,
282 is_uniprop);
283 else if (NILP (this_val))
284 this_val = defalt;
286 if (! EQ (this_val, val))
288 *from = c + 1;
289 break;
292 while (((c = (chartab_idx + 1) * chartab_chars[depth])
293 < chartab_chars[depth - 1])
294 && (c += min_char) <= *to)
296 Lisp_Object this_val;
298 chartab_idx++;
299 this_val = tbl->contents[chartab_idx];
300 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
301 this_val = uniprop_table_uncompress (table, chartab_idx);
302 if (SUB_CHAR_TABLE_P (this_val))
303 this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt,
304 is_uniprop);
305 else if (NILP (this_val))
306 this_val = defalt;
307 if (! EQ (this_val, val))
309 *to = c - 1;
310 break;
314 return val;
318 /* Return the value for C in char-table TABLE. Shrink the range *FROM
319 and *TO to cover characters (containing C) that have the same value
320 as C. It is not assured that the values of (*FROM - 1) and (*TO +
321 1) are different from that of C. */
323 Lisp_Object
324 char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to)
326 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
327 int chartab_idx = CHARTAB_IDX (c, 0, 0), idx;
328 Lisp_Object val;
329 bool is_uniprop = UNIPROP_TABLE_P (table);
331 val = tbl->contents[chartab_idx];
332 if (*from < 0)
333 *from = 0;
334 if (*to < 0)
335 *to = MAX_CHAR;
336 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
337 val = uniprop_table_uncompress (table, chartab_idx);
338 if (SUB_CHAR_TABLE_P (val))
339 val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt,
340 is_uniprop);
341 else if (NILP (val))
342 val = tbl->defalt;
343 idx = chartab_idx;
344 while (*from < idx * chartab_chars[0])
346 Lisp_Object this_val;
348 c = idx * chartab_chars[0] - 1;
349 idx--;
350 this_val = tbl->contents[idx];
351 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
352 this_val = uniprop_table_uncompress (table, idx);
353 if (SUB_CHAR_TABLE_P (this_val))
354 this_val = sub_char_table_ref_and_range (this_val, c, from, to,
355 tbl->defalt, is_uniprop);
356 else if (NILP (this_val))
357 this_val = tbl->defalt;
359 if (! EQ (this_val, val))
361 *from = c + 1;
362 break;
365 while (*to >= (chartab_idx + 1) * chartab_chars[0])
367 Lisp_Object this_val;
369 chartab_idx++;
370 c = chartab_idx * chartab_chars[0];
371 this_val = tbl->contents[chartab_idx];
372 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
373 this_val = uniprop_table_uncompress (table, chartab_idx);
374 if (SUB_CHAR_TABLE_P (this_val))
375 this_val = sub_char_table_ref_and_range (this_val, c, from, to,
376 tbl->defalt, is_uniprop);
377 else if (NILP (this_val))
378 this_val = tbl->defalt;
379 if (! EQ (this_val, val))
381 *to = c - 1;
382 break;
386 return val;
390 static void
391 sub_char_table_set (Lisp_Object table, int c, Lisp_Object val, bool is_uniprop)
393 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
394 int depth = tbl->depth, min_char = tbl->min_char;
395 int i = CHARTAB_IDX (c, depth, min_char);
396 Lisp_Object sub;
398 if (depth == 3)
399 set_sub_char_table_contents (table, i, val);
400 else
402 sub = tbl->contents[i];
403 if (! SUB_CHAR_TABLE_P (sub))
405 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub))
406 sub = uniprop_table_uncompress (table, i);
407 else
409 sub = make_sub_char_table (depth + 1,
410 min_char + i * chartab_chars[depth],
411 sub);
412 set_sub_char_table_contents (table, i, sub);
415 sub_char_table_set (sub, c, val, is_uniprop);
419 void
420 char_table_set (Lisp_Object table, int c, Lisp_Object val)
422 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
424 if (ASCII_CHAR_P (c)
425 && SUB_CHAR_TABLE_P (tbl->ascii))
426 set_sub_char_table_contents (tbl->ascii, c, val);
427 else
429 int i = CHARTAB_IDX (c, 0, 0);
430 Lisp_Object sub;
432 sub = tbl->contents[i];
433 if (! SUB_CHAR_TABLE_P (sub))
435 sub = make_sub_char_table (1, i * chartab_chars[0], sub);
436 set_char_table_contents (table, i, sub);
438 sub_char_table_set (sub, c, val, UNIPROP_TABLE_P (table));
439 if (ASCII_CHAR_P (c))
440 set_char_table_ascii (table, char_table_ascii (table));
444 static void
445 sub_char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val,
446 bool is_uniprop)
448 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
449 int depth = tbl->depth, min_char = tbl->min_char;
450 int chars_in_block = chartab_chars[depth];
451 int i, c, lim = chartab_size[depth];
453 if (from < min_char)
454 from = min_char;
455 i = CHARTAB_IDX (from, depth, min_char);
456 c = min_char + chars_in_block * i;
457 for (; i < lim; i++, c += chars_in_block)
459 if (c > to)
460 break;
461 if (from <= c && c + chars_in_block - 1 <= to)
462 set_sub_char_table_contents (table, i, val);
463 else
465 Lisp_Object sub = tbl->contents[i];
466 if (! SUB_CHAR_TABLE_P (sub))
468 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub))
469 sub = uniprop_table_uncompress (table, i);
470 else
472 sub = make_sub_char_table (depth + 1, c, sub);
473 set_sub_char_table_contents (table, i, sub);
476 sub_char_table_set_range (sub, from, to, val, is_uniprop);
482 void
483 char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val)
485 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
487 if (from == to)
488 char_table_set (table, from, val);
489 else
491 bool is_uniprop = UNIPROP_TABLE_P (table);
492 int lim = CHARTAB_IDX (to, 0, 0);
493 int i, c;
495 for (i = CHARTAB_IDX (from, 0, 0), c = i * chartab_chars[0]; i <= lim;
496 i++, c += chartab_chars[0])
498 if (c > to)
499 break;
500 if (from <= c && c + chartab_chars[0] - 1 <= to)
501 set_char_table_contents (table, i, val);
502 else
504 Lisp_Object sub = tbl->contents[i];
505 if (! SUB_CHAR_TABLE_P (sub))
507 sub = make_sub_char_table (1, i * chartab_chars[0], sub);
508 set_char_table_contents (table, i, sub);
510 sub_char_table_set_range (sub, from, to, val, is_uniprop);
513 if (ASCII_CHAR_P (from))
514 set_char_table_ascii (table, char_table_ascii (table));
519 DEFUN ("char-table-subtype", Fchar_table_subtype, Schar_table_subtype,
520 1, 1, 0,
521 doc: /*
522 Return the subtype of char-table CHAR-TABLE. The value is a symbol. */)
523 (Lisp_Object char_table)
525 CHECK_CHAR_TABLE (char_table);
527 return XCHAR_TABLE (char_table)->purpose;
530 DEFUN ("char-table-parent", Fchar_table_parent, Schar_table_parent,
531 1, 1, 0,
532 doc: /* Return the parent char-table of CHAR-TABLE.
533 The value is either nil or another char-table.
534 If CHAR-TABLE holds nil for a given character,
535 then the actual applicable value is inherited from the parent char-table
536 \(or from its parents, if necessary). */)
537 (Lisp_Object char_table)
539 CHECK_CHAR_TABLE (char_table);
541 return XCHAR_TABLE (char_table)->parent;
544 DEFUN ("set-char-table-parent", Fset_char_table_parent, Sset_char_table_parent,
545 2, 2, 0,
546 doc: /* Set the parent char-table of CHAR-TABLE to PARENT.
547 Return PARENT. PARENT must be either nil or another char-table. */)
548 (Lisp_Object char_table, Lisp_Object parent)
550 Lisp_Object temp;
552 CHECK_CHAR_TABLE (char_table);
554 if (!NILP (parent))
556 CHECK_CHAR_TABLE (parent);
558 for (temp = parent; !NILP (temp); temp = XCHAR_TABLE (temp)->parent)
559 if (EQ (temp, char_table))
560 error ("Attempt to make a chartable be its own parent");
563 set_char_table_parent (char_table, parent);
565 return parent;
568 DEFUN ("char-table-extra-slot", Fchar_table_extra_slot, Schar_table_extra_slot,
569 2, 2, 0,
570 doc: /* Return the value of CHAR-TABLE's extra-slot number N. */)
571 (Lisp_Object char_table, Lisp_Object n)
573 CHECK_CHAR_TABLE (char_table);
574 CHECK_NUMBER (n);
575 if (XINT (n) < 0
576 || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
577 args_out_of_range (char_table, n);
579 return XCHAR_TABLE (char_table)->extras[XINT (n)];
582 DEFUN ("set-char-table-extra-slot", Fset_char_table_extra_slot,
583 Sset_char_table_extra_slot,
584 3, 3, 0,
585 doc: /* Set CHAR-TABLE's extra-slot number N to VALUE. */)
586 (Lisp_Object char_table, Lisp_Object n, Lisp_Object value)
588 CHECK_CHAR_TABLE (char_table);
589 CHECK_NUMBER (n);
590 if (XINT (n) < 0
591 || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
592 args_out_of_range (char_table, n);
594 set_char_table_extras (char_table, XINT (n), value);
595 return value;
598 DEFUN ("char-table-range", Fchar_table_range, Schar_table_range,
599 2, 2, 0,
600 doc: /* Return the value in CHAR-TABLE for a range of characters RANGE.
601 RANGE should be nil (for the default value),
602 a cons of character codes (for characters in the range), or a character code. */)
603 (Lisp_Object char_table, Lisp_Object range)
605 Lisp_Object val;
606 CHECK_CHAR_TABLE (char_table);
608 if (EQ (range, Qnil))
609 val = XCHAR_TABLE (char_table)->defalt;
610 else if (CHARACTERP (range))
611 val = CHAR_TABLE_REF (char_table, XFASTINT (range));
612 else if (CONSP (range))
614 int from, to;
616 CHECK_CHARACTER_CAR (range);
617 CHECK_CHARACTER_CDR (range);
618 from = XFASTINT (XCAR (range));
619 to = XFASTINT (XCDR (range));
620 val = char_table_ref_and_range (char_table, from, &from, &to);
621 /* Not yet implemented. */
623 else
624 error ("Invalid RANGE argument to `char-table-range'");
625 return val;
628 DEFUN ("set-char-table-range", Fset_char_table_range, Sset_char_table_range,
629 3, 3, 0,
630 doc: /* Set the value in CHAR-TABLE for a range of characters RANGE to VALUE.
631 RANGE should be t (for all characters), nil (for the default value),
632 a cons of character codes (for characters in the range),
633 or a character code. Return VALUE. */)
634 (Lisp_Object char_table, Lisp_Object range, Lisp_Object value)
636 CHECK_CHAR_TABLE (char_table);
637 if (EQ (range, Qt))
639 int i;
641 set_char_table_ascii (char_table, value);
642 for (i = 0; i < chartab_size[0]; i++)
643 set_char_table_contents (char_table, i, value);
645 else if (EQ (range, Qnil))
646 set_char_table_defalt (char_table, value);
647 else if (CHARACTERP (range))
648 char_table_set (char_table, XINT (range), value);
649 else if (CONSP (range))
651 CHECK_CHARACTER_CAR (range);
652 CHECK_CHARACTER_CDR (range);
653 char_table_set_range (char_table,
654 XINT (XCAR (range)), XINT (XCDR (range)), value);
656 else
657 error ("Invalid RANGE argument to `set-char-table-range'");
659 return value;
662 static Lisp_Object
663 optimize_sub_char_table (Lisp_Object table, Lisp_Object test)
665 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
666 int i, depth = tbl->depth;
667 Lisp_Object elt, this;
668 bool optimizable;
670 elt = XSUB_CHAR_TABLE (table)->contents[0];
671 if (SUB_CHAR_TABLE_P (elt))
673 elt = optimize_sub_char_table (elt, test);
674 set_sub_char_table_contents (table, 0, elt);
676 optimizable = SUB_CHAR_TABLE_P (elt) ? 0 : 1;
677 for (i = 1; i < chartab_size[depth]; i++)
679 this = XSUB_CHAR_TABLE (table)->contents[i];
680 if (SUB_CHAR_TABLE_P (this))
682 this = optimize_sub_char_table (this, test);
683 set_sub_char_table_contents (table, i, this);
685 if (optimizable
686 && (NILP (test) ? NILP (Fequal (this, elt)) /* defaults to `equal'. */
687 : EQ (test, Qeq) ? !EQ (this, elt) /* Optimize `eq' case. */
688 : NILP (call2 (test, this, elt))))
689 optimizable = 0;
692 return (optimizable ? elt : table);
695 DEFUN ("optimize-char-table", Foptimize_char_table, Soptimize_char_table,
696 1, 2, 0,
697 doc: /* Optimize CHAR-TABLE.
698 TEST is the comparison function used to decide whether two entries are
699 equivalent and can be merged. It defaults to `equal'. */)
700 (Lisp_Object char_table, Lisp_Object test)
702 Lisp_Object elt;
703 int i;
705 CHECK_CHAR_TABLE (char_table);
707 for (i = 0; i < chartab_size[0]; i++)
709 elt = XCHAR_TABLE (char_table)->contents[i];
710 if (SUB_CHAR_TABLE_P (elt))
711 set_char_table_contents
712 (char_table, i, optimize_sub_char_table (elt, test));
714 /* Reset the `ascii' cache, in case it got optimized away. */
715 set_char_table_ascii (char_table, char_table_ascii (char_table));
717 return Qnil;
721 /* Map C_FUNCTION or FUNCTION over TABLE (top or sub char-table),
722 calling it for each character or group of characters that share a
723 value. RANGE is a cons (FROM . TO) specifying the range of target
724 characters, VAL is a value of FROM in TABLE, TOP is the top
725 char-table.
727 ARG is passed to C_FUNCTION when that is called.
729 It returns the value of last character covered by TABLE (not the
730 value inherited from the parent), and by side-effect, the car part
731 of RANGE is updated to the minimum character C where C and all the
732 following characters in TABLE have the same value. */
734 static Lisp_Object
735 map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
736 Lisp_Object function, Lisp_Object table, Lisp_Object arg, Lisp_Object val,
737 Lisp_Object range, Lisp_Object top)
739 /* Depth of TABLE. */
740 int depth;
741 /* Minimum and maximum characters covered by TABLE. */
742 int min_char, max_char;
743 /* Number of characters covered by one element of TABLE. */
744 int chars_in_block;
745 int from = XINT (XCAR (range)), to = XINT (XCDR (range));
746 int i, c;
747 bool is_uniprop = UNIPROP_TABLE_P (top);
748 uniprop_decoder_t decoder = UNIPROP_GET_DECODER (top);
750 if (SUB_CHAR_TABLE_P (table))
752 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
754 depth = tbl->depth;
755 min_char = tbl->min_char;
756 max_char = min_char + chartab_chars[depth - 1] - 1;
758 else
760 depth = 0;
761 min_char = 0;
762 max_char = MAX_CHAR;
764 chars_in_block = chartab_chars[depth];
766 if (to < max_char)
767 max_char = to;
768 /* Set I to the index of the first element to check. */
769 if (from <= min_char)
770 i = 0;
771 else
772 i = (from - min_char) / chars_in_block;
773 for (c = min_char + chars_in_block * i; c <= max_char;
774 i++, c += chars_in_block)
776 Lisp_Object this = (SUB_CHAR_TABLE_P (table)
777 ? XSUB_CHAR_TABLE (table)->contents[i]
778 : XCHAR_TABLE (table)->contents[i]);
779 int nextc = c + chars_in_block;
781 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this))
782 this = uniprop_table_uncompress (table, i);
783 if (SUB_CHAR_TABLE_P (this))
785 if (to >= nextc)
786 XSETCDR (range, make_number (nextc - 1));
787 val = map_sub_char_table (c_function, function, this, arg,
788 val, range, top);
790 else
792 if (NILP (this))
793 this = XCHAR_TABLE (top)->defalt;
794 if (!EQ (val, this))
796 bool different_value = 1;
798 if (NILP (val))
800 if (! NILP (XCHAR_TABLE (top)->parent))
802 Lisp_Object parent = XCHAR_TABLE (top)->parent;
803 Lisp_Object temp = XCHAR_TABLE (parent)->parent;
805 /* This is to get a value of FROM in PARENT
806 without checking the parent of PARENT. */
807 set_char_table_parent (parent, Qnil);
808 val = CHAR_TABLE_REF (parent, from);
809 set_char_table_parent (parent, temp);
810 XSETCDR (range, make_number (c - 1));
811 val = map_sub_char_table (c_function, function,
812 parent, arg, val, range,
813 parent);
814 if (EQ (val, this))
815 different_value = 0;
818 if (! NILP (val) && different_value)
820 XSETCDR (range, make_number (c - 1));
821 if (EQ (XCAR (range), XCDR (range)))
823 if (c_function)
824 (*c_function) (arg, XCAR (range), val);
825 else
827 if (decoder)
828 val = decoder (top, val);
829 call2 (function, XCAR (range), val);
832 else
834 if (c_function)
835 (*c_function) (arg, range, val);
836 else
838 if (decoder)
839 val = decoder (top, val);
840 call2 (function, range, val);
844 val = this;
845 from = c;
846 XSETCAR (range, make_number (c));
849 XSETCDR (range, make_number (to));
851 return val;
855 /* Map C_FUNCTION or FUNCTION over TABLE, calling it for each
856 character or group of characters that share a value.
858 ARG is passed to C_FUNCTION when that is called. */
860 void
861 map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
862 Lisp_Object function, Lisp_Object table, Lisp_Object arg)
864 Lisp_Object range, val, parent;
865 uniprop_decoder_t decoder = UNIPROP_GET_DECODER (table);
867 range = Fcons (make_number (0), make_number (MAX_CHAR));
868 parent = XCHAR_TABLE (table)->parent;
870 val = XCHAR_TABLE (table)->ascii;
871 if (SUB_CHAR_TABLE_P (val))
872 val = XSUB_CHAR_TABLE (val)->contents[0];
873 val = map_sub_char_table (c_function, function, table, arg, val, range,
874 table);
876 /* If VAL is nil and TABLE has a parent, we must consult the parent
877 recursively. */
878 while (NILP (val) && ! NILP (XCHAR_TABLE (table)->parent))
880 Lisp_Object temp;
881 int from = XINT (XCAR (range));
883 parent = XCHAR_TABLE (table)->parent;
884 temp = XCHAR_TABLE (parent)->parent;
885 /* This is to get a value of FROM in PARENT without checking the
886 parent of PARENT. */
887 set_char_table_parent (parent, Qnil);
888 val = CHAR_TABLE_REF (parent, from);
889 set_char_table_parent (parent, temp);
890 val = map_sub_char_table (c_function, function, parent, arg, val, range,
891 parent);
892 table = parent;
895 if (! NILP (val))
897 if (EQ (XCAR (range), XCDR (range)))
899 if (c_function)
900 (*c_function) (arg, XCAR (range), val);
901 else
903 if (decoder)
904 val = decoder (table, val);
905 call2 (function, XCAR (range), val);
908 else
910 if (c_function)
911 (*c_function) (arg, range, val);
912 else
914 if (decoder)
915 val = decoder (table, val);
916 call2 (function, range, val);
922 DEFUN ("map-char-table", Fmap_char_table, Smap_char_table,
923 2, 2, 0,
924 doc: /* Call FUNCTION for each character in CHAR-TABLE that has non-nil value.
925 FUNCTION is called with two arguments, KEY and VALUE.
926 KEY is a character code or a cons of character codes specifying a
927 range of characters that have the same value.
928 VALUE is what (char-table-range CHAR-TABLE KEY) returns. */)
929 (Lisp_Object function, Lisp_Object char_table)
931 CHECK_CHAR_TABLE (char_table);
933 map_char_table (NULL, function, char_table, char_table);
934 return Qnil;
938 static void
939 map_sub_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object),
940 Lisp_Object function, Lisp_Object table, Lisp_Object arg,
941 Lisp_Object range, struct charset *charset,
942 unsigned from, unsigned to)
944 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
945 int i, c = tbl->min_char, depth = tbl->depth;
947 if (depth < 3)
948 for (i = 0; i < chartab_size[depth]; i++, c += chartab_chars[depth])
950 Lisp_Object this;
952 this = tbl->contents[i];
953 if (SUB_CHAR_TABLE_P (this))
954 map_sub_char_table_for_charset (c_function, function, this, arg,
955 range, charset, from, to);
956 else
958 if (! NILP (XCAR (range)))
960 XSETCDR (range, make_number (c - 1));
961 if (c_function)
962 (*c_function) (arg, range);
963 else
964 call2 (function, range, arg);
966 XSETCAR (range, Qnil);
969 else
970 for (i = 0; i < chartab_size[depth]; i++, c++)
972 Lisp_Object this;
973 unsigned code;
975 this = tbl->contents[i];
976 if (NILP (this)
977 || (charset
978 && (code = ENCODE_CHAR (charset, c),
979 (code < from || code > to))))
981 if (! NILP (XCAR (range)))
983 XSETCDR (range, make_number (c - 1));
984 if (c_function)
985 (*c_function) (arg, range);
986 else
987 call2 (function, range, arg);
988 XSETCAR (range, Qnil);
991 else
993 if (NILP (XCAR (range)))
994 XSETCAR (range, make_number (c));
1000 /* Support function for `map-charset-chars'. Map C_FUNCTION or
1001 FUNCTION over TABLE, calling it for each character or a group of
1002 succeeding characters that have non-nil value in TABLE. TABLE is a
1003 "mapping table" or a "deunifier table" of a certain charset.
1005 If CHARSET is not NULL (this is the case that `map-charset-chars'
1006 is called with non-nil FROM-CODE and TO-CODE), it is a charset who
1007 owns TABLE, and the function is called only on a character in the
1008 range FROM and TO. FROM and TO are not character codes, but code
1009 points of a character in CHARSET.
1011 This function is called in these two cases:
1013 (1) A charset has a mapping file name in :map property.
1015 (2) A charset has an upper code space in :offset property and a
1016 mapping file name in :unify-map property. In this case, this
1017 function is called only for characters in the Unicode code space.
1018 Characters in upper code space are handled directly in
1019 map_charset_chars. */
1021 void
1022 map_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object),
1023 Lisp_Object function, Lisp_Object table, Lisp_Object arg,
1024 struct charset *charset,
1025 unsigned from, unsigned to)
1027 Lisp_Object range;
1028 int c, i;
1030 range = Fcons (Qnil, Qnil);
1032 for (i = 0, c = 0; i < chartab_size[0]; i++, c += chartab_chars[0])
1034 Lisp_Object this;
1036 this = XCHAR_TABLE (table)->contents[i];
1037 if (SUB_CHAR_TABLE_P (this))
1038 map_sub_char_table_for_charset (c_function, function, this, arg,
1039 range, charset, from, to);
1040 else
1042 if (! NILP (XCAR (range)))
1044 XSETCDR (range, make_number (c - 1));
1045 if (c_function)
1046 (*c_function) (arg, range);
1047 else
1048 call2 (function, range, arg);
1050 XSETCAR (range, Qnil);
1053 if (! NILP (XCAR (range)))
1055 XSETCDR (range, make_number (c - 1));
1056 if (c_function)
1057 (*c_function) (arg, range);
1058 else
1059 call2 (function, range, arg);
1064 /* Unicode character property tables.
1066 This section provides a convenient and efficient way to get Unicode
1067 character properties of characters from C code (from Lisp, you must
1068 use get-char-code-property).
1070 The typical usage is to get a char-table object for a specific
1071 property like this (use of the "bidi-class" property below is just
1072 an example):
1074 Lisp_Object bidi_class_table = uniprop_table (intern ("bidi-class"));
1076 (uniprop_table can return nil if it fails to find data for the
1077 named property, or if it fails to load the appropriate Lisp support
1078 file, so the return value should be tested to be non-nil, before it
1079 is used.)
1081 To get a property value for character CH use CHAR_TABLE_REF:
1083 Lisp_Object bidi_class = CHAR_TABLE_REF (bidi_class_table, CH);
1085 In this case, what you actually get is an index number to the
1086 vector of property values (symbols nil, L, R, etc).
1088 The full list of Unicode character properties supported by Emacs is
1089 documented in the ELisp manual, in the node "Character Properties".
1091 A table for Unicode character property has these characteristics:
1093 o The purpose is `char-code-property-table', which implies that the
1094 table has 5 extra slots.
1096 o The second extra slot is a Lisp function, an index (integer) to
1097 the array uniprop_decoder[], or nil. If it is a Lisp function, we
1098 can't use such a table from C (at the moment). If it is nil, it
1099 means that we don't have to decode values.
1101 o The third extra slot is a Lisp function, an index (integer) to
1102 the array uniprop_encoder[], or nil. If it is a Lisp function, we
1103 can't use such a table from C (at the moment). If it is nil, it
1104 means that we don't have to encode values. */
1107 /* Uncompress the IDXth element of sub-char-table TABLE. */
1109 static Lisp_Object
1110 uniprop_table_uncompress (Lisp_Object table, int idx)
1112 Lisp_Object val = XSUB_CHAR_TABLE (table)->contents[idx];
1113 int min_char = XSUB_CHAR_TABLE (table)->min_char + chartab_chars[2] * idx;
1114 Lisp_Object sub = make_sub_char_table (3, min_char, Qnil);
1115 const unsigned char *p, *pend;
1117 set_sub_char_table_contents (table, idx, sub);
1118 p = SDATA (val), pend = p + SBYTES (val);
1119 if (*p == 1)
1121 /* SIMPLE TABLE */
1122 p++;
1123 idx = STRING_CHAR_ADVANCE (p);
1124 while (p < pend && idx < chartab_chars[2])
1126 int v = STRING_CHAR_ADVANCE (p);
1127 set_sub_char_table_contents
1128 (sub, idx++, v > 0 ? make_number (v) : Qnil);
1131 else if (*p == 2)
1133 /* RUN-LENGTH TABLE */
1134 p++;
1135 for (idx = 0; p < pend; )
1137 int v = STRING_CHAR_ADVANCE (p);
1138 int count = 1;
1139 int len;
1141 if (p < pend)
1143 count = STRING_CHAR_AND_LENGTH (p, len);
1144 if (count < 128)
1145 count = 1;
1146 else
1148 count -= 128;
1149 p += len;
1152 while (count-- > 0)
1153 set_sub_char_table_contents (sub, idx++, make_number (v));
1156 /* It seems that we don't need this function because C code won't need
1157 to get a property that is compressed in this form. */
1158 #if 0
1159 else if (*p == 0)
1161 /* WORD-LIST TABLE */
1163 #endif
1164 return sub;
1168 /* Decode VALUE as an element of char-table TABLE. */
1170 static Lisp_Object
1171 uniprop_decode_value_run_length (Lisp_Object table, Lisp_Object value)
1173 if (VECTORP (XCHAR_TABLE (table)->extras[4]))
1175 Lisp_Object valvec = XCHAR_TABLE (table)->extras[4];
1177 if (XINT (value) >= 0 && XINT (value) < ASIZE (valvec))
1178 value = AREF (valvec, XINT (value));
1180 return value;
1183 static uniprop_decoder_t uniprop_decoder [] =
1184 { uniprop_decode_value_run_length };
1186 static const int uniprop_decoder_count = ARRAYELTS (uniprop_decoder);
1188 /* Return the decoder of char-table TABLE or nil if none. */
1190 static uniprop_decoder_t
1191 uniprop_get_decoder (Lisp_Object table)
1193 EMACS_INT i;
1195 if (! INTEGERP (XCHAR_TABLE (table)->extras[1]))
1196 return NULL;
1197 i = XINT (XCHAR_TABLE (table)->extras[1]);
1198 if (i < 0 || i >= uniprop_decoder_count)
1199 return NULL;
1200 return uniprop_decoder[i];
1204 /* Encode VALUE as an element of char-table TABLE which contains
1205 characters as elements. */
1207 static Lisp_Object
1208 uniprop_encode_value_character (Lisp_Object table, Lisp_Object value)
1210 if (! NILP (value) && ! CHARACTERP (value))
1211 wrong_type_argument (Qintegerp, value);
1212 return value;
1216 /* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH
1217 compression. */
1219 static Lisp_Object
1220 uniprop_encode_value_run_length (Lisp_Object table, Lisp_Object value)
1222 Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents;
1223 int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]);
1225 for (i = 0; i < size; i++)
1226 if (EQ (value, value_table[i]))
1227 break;
1228 if (i == size)
1229 wrong_type_argument (build_string ("Unicode property value"), value);
1230 return make_number (i);
1234 /* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH
1235 compression and contains numbers as elements. */
1237 static Lisp_Object
1238 uniprop_encode_value_numeric (Lisp_Object table, Lisp_Object value)
1240 Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents;
1241 int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]);
1243 CHECK_NUMBER (value);
1244 for (i = 0; i < size; i++)
1245 if (EQ (value, value_table[i]))
1246 break;
1247 value = make_number (i);
1248 if (i == size)
1249 set_char_table_extras (table, 4,
1250 CALLN (Fvconcat,
1251 XCHAR_TABLE (table)->extras[4],
1252 Fmake_vector (make_number (1), value)));
1253 return make_number (i);
1256 static uniprop_encoder_t uniprop_encoder[] =
1257 { uniprop_encode_value_character,
1258 uniprop_encode_value_run_length,
1259 uniprop_encode_value_numeric };
1261 static const int uniprop_encoder_count = ARRAYELTS (uniprop_encoder);
1263 /* Return the encoder of char-table TABLE or nil if none. */
1265 static uniprop_decoder_t
1266 uniprop_get_encoder (Lisp_Object table)
1268 EMACS_INT i;
1270 if (! INTEGERP (XCHAR_TABLE (table)->extras[2]))
1271 return NULL;
1272 i = XINT (XCHAR_TABLE (table)->extras[2]);
1273 if (i < 0 || i >= uniprop_encoder_count)
1274 return NULL;
1275 return uniprop_encoder[i];
1278 /* Return a char-table for Unicode character property PROP. This
1279 function may load a Lisp file and thus may cause
1280 garbage-collection. */
1282 Lisp_Object
1283 uniprop_table (Lisp_Object prop)
1285 Lisp_Object val, table, result;
1287 val = Fassq (prop, Vchar_code_property_alist);
1288 if (! CONSP (val))
1289 return Qnil;
1290 table = XCDR (val);
1291 if (STRINGP (table))
1293 AUTO_STRING (intl, "international/");
1294 result = Fload (concat2 (intl, table), Qt, Qt, Qt, Qt);
1295 if (NILP (result))
1296 return Qnil;
1297 table = XCDR (val);
1299 if (! CHAR_TABLE_P (table)
1300 || ! UNIPROP_TABLE_P (table))
1301 return Qnil;
1302 val = XCHAR_TABLE (table)->extras[1];
1303 if (INTEGERP (val)
1304 ? (XINT (val) < 0 || XINT (val) >= uniprop_decoder_count)
1305 : ! NILP (val))
1306 return Qnil;
1307 /* Prepare ASCII values in advance for CHAR_TABLE_REF. */
1308 set_char_table_ascii (table, char_table_ascii (table));
1309 return table;
1312 DEFUN ("unicode-property-table-internal", Funicode_property_table_internal,
1313 Sunicode_property_table_internal, 1, 1, 0,
1314 doc: /* Return a char-table for Unicode character property PROP.
1315 Use `get-unicode-property-internal' and
1316 `put-unicode-property-internal' instead of `aref' and `aset' to get
1317 and put an element value. */)
1318 (Lisp_Object prop)
1320 Lisp_Object table = uniprop_table (prop);
1322 if (CHAR_TABLE_P (table))
1323 return table;
1324 return Fcdr (Fassq (prop, Vchar_code_property_alist));
1327 DEFUN ("get-unicode-property-internal", Fget_unicode_property_internal,
1328 Sget_unicode_property_internal, 2, 2, 0,
1329 doc: /* Return an element of CHAR-TABLE for character CH.
1330 CHAR-TABLE must be what returned by `unicode-property-table-internal'. */)
1331 (Lisp_Object char_table, Lisp_Object ch)
1333 Lisp_Object val;
1334 uniprop_decoder_t decoder;
1336 CHECK_CHAR_TABLE (char_table);
1337 CHECK_CHARACTER (ch);
1338 if (! UNIPROP_TABLE_P (char_table))
1339 error ("Invalid Unicode property table");
1340 val = CHAR_TABLE_REF (char_table, XINT (ch));
1341 decoder = uniprop_get_decoder (char_table);
1342 return (decoder ? decoder (char_table, val) : val);
1345 DEFUN ("put-unicode-property-internal", Fput_unicode_property_internal,
1346 Sput_unicode_property_internal, 3, 3, 0,
1347 doc: /* Set an element of CHAR-TABLE for character CH to VALUE.
1348 CHAR-TABLE must be what returned by `unicode-property-table-internal'. */)
1349 (Lisp_Object char_table, Lisp_Object ch, Lisp_Object value)
1351 uniprop_encoder_t encoder;
1353 CHECK_CHAR_TABLE (char_table);
1354 CHECK_CHARACTER (ch);
1355 if (! UNIPROP_TABLE_P (char_table))
1356 error ("Invalid Unicode property table");
1357 encoder = uniprop_get_encoder (char_table);
1358 if (encoder)
1359 value = encoder (char_table, value);
1360 CHAR_TABLE_SET (char_table, XINT (ch), value);
1361 return Qnil;
1365 void
1366 syms_of_chartab (void)
1368 /* Purpose of uniprop tables. */
1369 DEFSYM (Qchar_code_property_table, "char-code-property-table");
1371 defsubr (&Smake_char_table);
1372 defsubr (&Schar_table_parent);
1373 defsubr (&Schar_table_subtype);
1374 defsubr (&Sset_char_table_parent);
1375 defsubr (&Schar_table_extra_slot);
1376 defsubr (&Sset_char_table_extra_slot);
1377 defsubr (&Schar_table_range);
1378 defsubr (&Sset_char_table_range);
1379 defsubr (&Soptimize_char_table);
1380 defsubr (&Smap_char_table);
1381 defsubr (&Sunicode_property_table_internal);
1382 defsubr (&Sget_unicode_property_internal);
1383 defsubr (&Sput_unicode_property_internal);
1385 /* Each element has the form (PROP . TABLE).
1386 PROP is a symbol representing a character property.
1387 TABLE is a char-table containing the property value for each character.
1388 TABLE may be a name of file to load to build a char-table.
1389 This variable should be modified only through
1390 `define-char-code-property'. */
1392 DEFVAR_LISP ("char-code-property-alist", Vchar_code_property_alist,
1393 doc: /* Alist of character property name vs char-table containing property values.
1394 Internal use only. */);
1395 Vchar_code_property_alist = Qnil;