* lisp/subr.el (filter): Remove. Use `cl-remove-if-not' or `seq-filter'.
[emacs.git] / src / chartab.c
blobbfbbf798f0c17e88f07e22170df008bf02ab1070
1 /* chartab.c -- char-table support
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H13PRO009
6 This file is part of GNU Emacs.
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
21 #include <config.h>
23 #include "lisp.h"
24 #include "character.h"
25 #include "charset.h"
26 #include "ccl.h"
28 /* 64/16/32/128 */
30 /* Number of elements in Nth level char-table. */
31 const int chartab_size[4] =
32 { (1 << CHARTAB_SIZE_BITS_0),
33 (1 << CHARTAB_SIZE_BITS_1),
34 (1 << CHARTAB_SIZE_BITS_2),
35 (1 << CHARTAB_SIZE_BITS_3) };
37 /* Number of characters each element of Nth level char-table
38 covers. */
39 static const int chartab_chars[4] =
40 { (1 << (CHARTAB_SIZE_BITS_1 + CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3)),
41 (1 << (CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3)),
42 (1 << CHARTAB_SIZE_BITS_3),
43 1 };
45 /* Number of characters (in bits) each element of Nth level char-table
46 covers. */
47 static const int chartab_bits[4] =
48 { (CHARTAB_SIZE_BITS_1 + CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3),
49 (CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3),
50 CHARTAB_SIZE_BITS_3,
51 0 };
53 #define CHARTAB_IDX(c, depth, min_char) \
54 (((c) - (min_char)) >> chartab_bits[(depth)])
57 /* Preamble for uniprop (Unicode character property) tables. See the
58 comment of "Unicode character property tables". */
60 /* Purpose of uniprop tables. */
61 static Lisp_Object Qchar_code_property_table;
63 /* Types of decoder and encoder functions for uniprop values. */
64 typedef Lisp_Object (*uniprop_decoder_t) (Lisp_Object, Lisp_Object);
65 typedef Lisp_Object (*uniprop_encoder_t) (Lisp_Object, Lisp_Object);
67 static Lisp_Object uniprop_table_uncompress (Lisp_Object, int);
68 static uniprop_decoder_t uniprop_get_decoder (Lisp_Object);
70 /* 1 iff TABLE is a uniprop table. */
71 #define UNIPROP_TABLE_P(TABLE) \
72 (EQ (XCHAR_TABLE (TABLE)->purpose, Qchar_code_property_table) \
73 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (TABLE)) == 5)
75 /* Return a decoder for values in the uniprop table TABLE. */
76 #define UNIPROP_GET_DECODER(TABLE) \
77 (UNIPROP_TABLE_P (TABLE) ? uniprop_get_decoder (TABLE) : NULL)
79 /* Nonzero iff OBJ is a string representing uniprop values of 128
80 succeeding characters (the bottom level of a char-table) by a
81 compressed format. We are sure that no property value has a string
82 starting with '\001' nor '\002'. */
83 #define UNIPROP_COMPRESSED_FORM_P(OBJ) \
84 (STRINGP (OBJ) && SCHARS (OBJ) > 0 \
85 && ((SREF (OBJ, 0) == 1 || (SREF (OBJ, 0) == 2))))
87 static void
88 CHECK_CHAR_TABLE (Lisp_Object x)
90 CHECK_TYPE (CHAR_TABLE_P (x), Qchar_table_p, x);
93 static void
94 set_char_table_ascii (Lisp_Object table, Lisp_Object val)
96 XCHAR_TABLE (table)->ascii = val;
98 static void
99 set_char_table_parent (Lisp_Object table, Lisp_Object val)
101 XCHAR_TABLE (table)->parent = val;
104 DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0,
105 doc: /* Return a newly created char-table, with purpose PURPOSE.
106 Each element is initialized to INIT, which defaults to nil.
108 PURPOSE should be a symbol. If it has a `char-table-extra-slots'
109 property, the property's value should be an integer between 0 and 10
110 that specifies how many extra slots the char-table has. Otherwise,
111 the char-table has no extra slot. */)
112 (register Lisp_Object purpose, Lisp_Object init)
114 Lisp_Object vector;
115 Lisp_Object n;
116 int n_extras;
117 int size;
119 CHECK_SYMBOL (purpose);
120 n = Fget (purpose, Qchar_table_extra_slots);
121 if (NILP (n))
122 n_extras = 0;
123 else
125 CHECK_NATNUM (n);
126 if (XINT (n) > 10)
127 args_out_of_range (n, Qnil);
128 n_extras = XINT (n);
131 size = CHAR_TABLE_STANDARD_SLOTS + n_extras;
132 vector = Fmake_vector (make_number (size), init);
133 XSETPVECTYPE (XVECTOR (vector), PVEC_CHAR_TABLE);
134 set_char_table_parent (vector, Qnil);
135 set_char_table_purpose (vector, purpose);
136 XSETCHAR_TABLE (vector, XCHAR_TABLE (vector));
137 return vector;
140 static Lisp_Object
141 make_sub_char_table (int depth, int min_char, Lisp_Object defalt)
143 int i;
144 Lisp_Object table = make_uninit_sub_char_table (depth, min_char);
146 for (i = 0; i < chartab_size[depth]; i++)
147 XSUB_CHAR_TABLE (table)->contents[i] = defalt;
148 return table;
151 static Lisp_Object
152 char_table_ascii (Lisp_Object table)
154 Lisp_Object sub, val;
156 sub = XCHAR_TABLE (table)->contents[0];
157 if (! SUB_CHAR_TABLE_P (sub))
158 return sub;
159 sub = XSUB_CHAR_TABLE (sub)->contents[0];
160 if (! SUB_CHAR_TABLE_P (sub))
161 return sub;
162 val = XSUB_CHAR_TABLE (sub)->contents[0];
163 if (UNIPROP_TABLE_P (table) && UNIPROP_COMPRESSED_FORM_P (val))
164 val = uniprop_table_uncompress (sub, 0);
165 return val;
168 static Lisp_Object
169 copy_sub_char_table (Lisp_Object table)
171 int depth = XSUB_CHAR_TABLE (table)->depth;
172 int min_char = XSUB_CHAR_TABLE (table)->min_char;
173 Lisp_Object copy = make_sub_char_table (depth, min_char, Qnil);
174 int i;
176 /* Recursively copy any sub char-tables. */
177 for (i = 0; i < chartab_size[depth]; i++)
179 Lisp_Object val = XSUB_CHAR_TABLE (table)->contents[i];
180 set_sub_char_table_contents
181 (copy, i, SUB_CHAR_TABLE_P (val) ? copy_sub_char_table (val) : val);
184 return copy;
188 Lisp_Object
189 copy_char_table (Lisp_Object table)
191 Lisp_Object copy;
192 int size = XCHAR_TABLE (table)->header.size & PSEUDOVECTOR_SIZE_MASK;
193 int i;
195 copy = Fmake_vector (make_number (size), Qnil);
196 XSETPVECTYPE (XVECTOR (copy), PVEC_CHAR_TABLE);
197 set_char_table_defalt (copy, XCHAR_TABLE (table)->defalt);
198 set_char_table_parent (copy, XCHAR_TABLE (table)->parent);
199 set_char_table_purpose (copy, XCHAR_TABLE (table)->purpose);
200 for (i = 0; i < chartab_size[0]; i++)
201 set_char_table_contents
202 (copy, i,
203 (SUB_CHAR_TABLE_P (XCHAR_TABLE (table)->contents[i])
204 ? copy_sub_char_table (XCHAR_TABLE (table)->contents[i])
205 : XCHAR_TABLE (table)->contents[i]));
206 set_char_table_ascii (copy, char_table_ascii (copy));
207 size -= CHAR_TABLE_STANDARD_SLOTS;
208 for (i = 0; i < size; i++)
209 set_char_table_extras (copy, i, XCHAR_TABLE (table)->extras[i]);
211 XSETCHAR_TABLE (copy, XCHAR_TABLE (copy));
212 return copy;
215 static Lisp_Object
216 sub_char_table_ref (Lisp_Object table, int c, bool is_uniprop)
218 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
219 Lisp_Object val;
220 int idx = CHARTAB_IDX (c, tbl->depth, tbl->min_char);
222 val = tbl->contents[idx];
223 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
224 val = uniprop_table_uncompress (table, idx);
225 if (SUB_CHAR_TABLE_P (val))
226 val = sub_char_table_ref (val, c, is_uniprop);
227 return val;
230 Lisp_Object
231 char_table_ref (Lisp_Object table, int c)
233 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
234 Lisp_Object val;
236 if (ASCII_CHAR_P (c))
238 val = tbl->ascii;
239 if (SUB_CHAR_TABLE_P (val))
240 val = XSUB_CHAR_TABLE (val)->contents[c];
242 else
244 val = tbl->contents[CHARTAB_IDX (c, 0, 0)];
245 if (SUB_CHAR_TABLE_P (val))
246 val = sub_char_table_ref (val, c, UNIPROP_TABLE_P (table));
248 if (NILP (val))
250 val = tbl->defalt;
251 if (NILP (val) && CHAR_TABLE_P (tbl->parent))
252 val = char_table_ref (tbl->parent, c);
254 return val;
257 static Lisp_Object
258 sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to,
259 Lisp_Object defalt, bool is_uniprop)
261 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
262 int depth = tbl->depth, min_char = tbl->min_char;
263 int chartab_idx = CHARTAB_IDX (c, depth, min_char), idx;
264 Lisp_Object val;
266 val = tbl->contents[chartab_idx];
267 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
268 val = uniprop_table_uncompress (table, chartab_idx);
269 if (SUB_CHAR_TABLE_P (val))
270 val = sub_char_table_ref_and_range (val, c, from, to, defalt, is_uniprop);
271 else if (NILP (val))
272 val = defalt;
274 idx = chartab_idx;
275 while (idx > 0 && *from < min_char + idx * chartab_chars[depth])
277 Lisp_Object this_val;
279 c = min_char + idx * chartab_chars[depth] - 1;
280 idx--;
281 this_val = tbl->contents[idx];
282 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
283 this_val = uniprop_table_uncompress (table, idx);
284 if (SUB_CHAR_TABLE_P (this_val))
285 this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt,
286 is_uniprop);
287 else if (NILP (this_val))
288 this_val = defalt;
290 if (! EQ (this_val, val))
292 *from = c + 1;
293 break;
296 while (((c = (chartab_idx + 1) * chartab_chars[depth])
297 < chartab_chars[depth - 1])
298 && (c += min_char) <= *to)
300 Lisp_Object this_val;
302 chartab_idx++;
303 this_val = tbl->contents[chartab_idx];
304 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
305 this_val = uniprop_table_uncompress (table, chartab_idx);
306 if (SUB_CHAR_TABLE_P (this_val))
307 this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt,
308 is_uniprop);
309 else if (NILP (this_val))
310 this_val = defalt;
311 if (! EQ (this_val, val))
313 *to = c - 1;
314 break;
318 return val;
322 /* Return the value for C in char-table TABLE. Shrink the range *FROM
323 and *TO to cover characters (containing C) that have the same value
324 as C. It is not assured that the values of (*FROM - 1) and (*TO +
325 1) are different from that of C. */
327 Lisp_Object
328 char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to)
330 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
331 int chartab_idx = CHARTAB_IDX (c, 0, 0), idx;
332 Lisp_Object val;
333 bool is_uniprop = UNIPROP_TABLE_P (table);
335 val = tbl->contents[chartab_idx];
336 if (*from < 0)
337 *from = 0;
338 if (*to < 0)
339 *to = MAX_CHAR;
340 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
341 val = uniprop_table_uncompress (table, chartab_idx);
342 if (SUB_CHAR_TABLE_P (val))
343 val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt,
344 is_uniprop);
345 else if (NILP (val))
346 val = tbl->defalt;
347 idx = chartab_idx;
348 while (*from < idx * chartab_chars[0])
350 Lisp_Object this_val;
352 c = idx * chartab_chars[0] - 1;
353 idx--;
354 this_val = tbl->contents[idx];
355 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
356 this_val = uniprop_table_uncompress (table, idx);
357 if (SUB_CHAR_TABLE_P (this_val))
358 this_val = sub_char_table_ref_and_range (this_val, c, from, to,
359 tbl->defalt, is_uniprop);
360 else if (NILP (this_val))
361 this_val = tbl->defalt;
363 if (! EQ (this_val, val))
365 *from = c + 1;
366 break;
369 while (*to >= (chartab_idx + 1) * chartab_chars[0])
371 Lisp_Object this_val;
373 chartab_idx++;
374 c = chartab_idx * chartab_chars[0];
375 this_val = tbl->contents[chartab_idx];
376 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
377 this_val = uniprop_table_uncompress (table, chartab_idx);
378 if (SUB_CHAR_TABLE_P (this_val))
379 this_val = sub_char_table_ref_and_range (this_val, c, from, to,
380 tbl->defalt, is_uniprop);
381 else if (NILP (this_val))
382 this_val = tbl->defalt;
383 if (! EQ (this_val, val))
385 *to = c - 1;
386 break;
390 return val;
394 static void
395 sub_char_table_set (Lisp_Object table, int c, Lisp_Object val, bool is_uniprop)
397 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
398 int depth = tbl->depth, min_char = tbl->min_char;
399 int i = CHARTAB_IDX (c, depth, min_char);
400 Lisp_Object sub;
402 if (depth == 3)
403 set_sub_char_table_contents (table, i, val);
404 else
406 sub = tbl->contents[i];
407 if (! SUB_CHAR_TABLE_P (sub))
409 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub))
410 sub = uniprop_table_uncompress (table, i);
411 else
413 sub = make_sub_char_table (depth + 1,
414 min_char + i * chartab_chars[depth],
415 sub);
416 set_sub_char_table_contents (table, i, sub);
419 sub_char_table_set (sub, c, val, is_uniprop);
423 void
424 char_table_set (Lisp_Object table, int c, Lisp_Object val)
426 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
428 if (ASCII_CHAR_P (c)
429 && SUB_CHAR_TABLE_P (tbl->ascii))
430 set_sub_char_table_contents (tbl->ascii, c, val);
431 else
433 int i = CHARTAB_IDX (c, 0, 0);
434 Lisp_Object sub;
436 sub = tbl->contents[i];
437 if (! SUB_CHAR_TABLE_P (sub))
439 sub = make_sub_char_table (1, i * chartab_chars[0], sub);
440 set_char_table_contents (table, i, sub);
442 sub_char_table_set (sub, c, val, UNIPROP_TABLE_P (table));
443 if (ASCII_CHAR_P (c))
444 set_char_table_ascii (table, char_table_ascii (table));
448 static void
449 sub_char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val,
450 bool is_uniprop)
452 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
453 int depth = tbl->depth, min_char = tbl->min_char;
454 int chars_in_block = chartab_chars[depth];
455 int i, c, lim = chartab_size[depth];
457 if (from < min_char)
458 from = min_char;
459 i = CHARTAB_IDX (from, depth, min_char);
460 c = min_char + chars_in_block * i;
461 for (; i < lim; i++, c += chars_in_block)
463 if (c > to)
464 break;
465 if (from <= c && c + chars_in_block - 1 <= to)
466 set_sub_char_table_contents (table, i, val);
467 else
469 Lisp_Object sub = tbl->contents[i];
470 if (! SUB_CHAR_TABLE_P (sub))
472 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub))
473 sub = uniprop_table_uncompress (table, i);
474 else
476 sub = make_sub_char_table (depth + 1, c, sub);
477 set_sub_char_table_contents (table, i, sub);
480 sub_char_table_set_range (sub, from, to, val, is_uniprop);
486 void
487 char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val)
489 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
491 if (from == to)
492 char_table_set (table, from, val);
493 else
495 bool is_uniprop = UNIPROP_TABLE_P (table);
496 int lim = CHARTAB_IDX (to, 0, 0);
497 int i, c;
499 for (i = CHARTAB_IDX (from, 0, 0), c = 0; i <= lim;
500 i++, c += chartab_chars[0])
502 if (c > to)
503 break;
504 if (from <= c && c + chartab_chars[0] - 1 <= to)
505 set_char_table_contents (table, i, val);
506 else
508 Lisp_Object sub = tbl->contents[i];
509 if (! SUB_CHAR_TABLE_P (sub))
511 sub = make_sub_char_table (1, i * chartab_chars[0], sub);
512 set_char_table_contents (table, i, sub);
514 sub_char_table_set_range (sub, from, to, val, is_uniprop);
517 if (ASCII_CHAR_P (from))
518 set_char_table_ascii (table, char_table_ascii (table));
523 DEFUN ("char-table-subtype", Fchar_table_subtype, Schar_table_subtype,
524 1, 1, 0,
525 doc: /*
526 Return the subtype of char-table CHAR-TABLE. The value is a symbol. */)
527 (Lisp_Object char_table)
529 CHECK_CHAR_TABLE (char_table);
531 return XCHAR_TABLE (char_table)->purpose;
534 DEFUN ("char-table-parent", Fchar_table_parent, Schar_table_parent,
535 1, 1, 0,
536 doc: /* Return the parent char-table of CHAR-TABLE.
537 The value is either nil or another char-table.
538 If CHAR-TABLE holds nil for a given character,
539 then the actual applicable value is inherited from the parent char-table
540 \(or from its parents, if necessary). */)
541 (Lisp_Object char_table)
543 CHECK_CHAR_TABLE (char_table);
545 return XCHAR_TABLE (char_table)->parent;
548 DEFUN ("set-char-table-parent", Fset_char_table_parent, Sset_char_table_parent,
549 2, 2, 0,
550 doc: /* Set the parent char-table of CHAR-TABLE to PARENT.
551 Return PARENT. PARENT must be either nil or another char-table. */)
552 (Lisp_Object char_table, Lisp_Object parent)
554 Lisp_Object temp;
556 CHECK_CHAR_TABLE (char_table);
558 if (!NILP (parent))
560 CHECK_CHAR_TABLE (parent);
562 for (temp = parent; !NILP (temp); temp = XCHAR_TABLE (temp)->parent)
563 if (EQ (temp, char_table))
564 error ("Attempt to make a chartable be its own parent");
567 set_char_table_parent (char_table, parent);
569 return parent;
572 DEFUN ("char-table-extra-slot", Fchar_table_extra_slot, Schar_table_extra_slot,
573 2, 2, 0,
574 doc: /* Return the value of CHAR-TABLE's extra-slot number N. */)
575 (Lisp_Object char_table, Lisp_Object n)
577 CHECK_CHAR_TABLE (char_table);
578 CHECK_NUMBER (n);
579 if (XINT (n) < 0
580 || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
581 args_out_of_range (char_table, n);
583 return XCHAR_TABLE (char_table)->extras[XINT (n)];
586 DEFUN ("set-char-table-extra-slot", Fset_char_table_extra_slot,
587 Sset_char_table_extra_slot,
588 3, 3, 0,
589 doc: /* Set CHAR-TABLE's extra-slot number N to VALUE. */)
590 (Lisp_Object char_table, Lisp_Object n, Lisp_Object value)
592 CHECK_CHAR_TABLE (char_table);
593 CHECK_NUMBER (n);
594 if (XINT (n) < 0
595 || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
596 args_out_of_range (char_table, n);
598 set_char_table_extras (char_table, XINT (n), value);
599 return value;
602 DEFUN ("char-table-range", Fchar_table_range, Schar_table_range,
603 2, 2, 0,
604 doc: /* Return the value in CHAR-TABLE for a range of characters RANGE.
605 RANGE should be nil (for the default value),
606 a cons of character codes (for characters in the range), or a character code. */)
607 (Lisp_Object char_table, Lisp_Object range)
609 Lisp_Object val;
610 CHECK_CHAR_TABLE (char_table);
612 if (EQ (range, Qnil))
613 val = XCHAR_TABLE (char_table)->defalt;
614 else if (CHARACTERP (range))
615 val = CHAR_TABLE_REF (char_table, XFASTINT (range));
616 else if (CONSP (range))
618 int from, to;
620 CHECK_CHARACTER_CAR (range);
621 CHECK_CHARACTER_CDR (range);
622 from = XFASTINT (XCAR (range));
623 to = XFASTINT (XCDR (range));
624 val = char_table_ref_and_range (char_table, from, &from, &to);
625 /* Not yet implemented. */
627 else
628 error ("Invalid RANGE argument to `char-table-range'");
629 return val;
632 DEFUN ("set-char-table-range", Fset_char_table_range, Sset_char_table_range,
633 3, 3, 0,
634 doc: /* Set the value in CHAR-TABLE for a range of characters RANGE to VALUE.
635 RANGE should be t (for all characters), nil (for the default value),
636 a cons of character codes (for characters in the range),
637 or a character code. Return VALUE. */)
638 (Lisp_Object char_table, Lisp_Object range, Lisp_Object value)
640 CHECK_CHAR_TABLE (char_table);
641 if (EQ (range, Qt))
643 int i;
645 set_char_table_ascii (char_table, value);
646 for (i = 0; i < chartab_size[0]; i++)
647 set_char_table_contents (char_table, i, value);
649 else if (EQ (range, Qnil))
650 set_char_table_defalt (char_table, value);
651 else if (CHARACTERP (range))
652 char_table_set (char_table, XINT (range), value);
653 else if (CONSP (range))
655 CHECK_CHARACTER_CAR (range);
656 CHECK_CHARACTER_CDR (range);
657 char_table_set_range (char_table,
658 XINT (XCAR (range)), XINT (XCDR (range)), value);
660 else
661 error ("Invalid RANGE argument to `set-char-table-range'");
663 return value;
666 static Lisp_Object
667 optimize_sub_char_table (Lisp_Object table, Lisp_Object test)
669 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
670 int i, depth = tbl->depth;
671 Lisp_Object elt, this;
672 bool optimizable;
674 elt = XSUB_CHAR_TABLE (table)->contents[0];
675 if (SUB_CHAR_TABLE_P (elt))
677 elt = optimize_sub_char_table (elt, test);
678 set_sub_char_table_contents (table, 0, elt);
680 optimizable = SUB_CHAR_TABLE_P (elt) ? 0 : 1;
681 for (i = 1; i < chartab_size[depth]; i++)
683 this = XSUB_CHAR_TABLE (table)->contents[i];
684 if (SUB_CHAR_TABLE_P (this))
686 this = optimize_sub_char_table (this, test);
687 set_sub_char_table_contents (table, i, this);
689 if (optimizable
690 && (NILP (test) ? NILP (Fequal (this, elt)) /* defaults to `equal'. */
691 : EQ (test, Qeq) ? !EQ (this, elt) /* Optimize `eq' case. */
692 : NILP (call2 (test, this, elt))))
693 optimizable = 0;
696 return (optimizable ? elt : table);
699 DEFUN ("optimize-char-table", Foptimize_char_table, Soptimize_char_table,
700 1, 2, 0,
701 doc: /* Optimize CHAR-TABLE.
702 TEST is the comparison function used to decide whether two entries are
703 equivalent and can be merged. It defaults to `equal'. */)
704 (Lisp_Object char_table, Lisp_Object test)
706 Lisp_Object elt;
707 int i;
709 CHECK_CHAR_TABLE (char_table);
711 for (i = 0; i < chartab_size[0]; i++)
713 elt = XCHAR_TABLE (char_table)->contents[i];
714 if (SUB_CHAR_TABLE_P (elt))
715 set_char_table_contents
716 (char_table, i, optimize_sub_char_table (elt, test));
718 /* Reset the `ascii' cache, in case it got optimized away. */
719 set_char_table_ascii (char_table, char_table_ascii (char_table));
721 return Qnil;
725 /* Map C_FUNCTION or FUNCTION over TABLE (top or sub char-table),
726 calling it for each character or group of characters that share a
727 value. RANGE is a cons (FROM . TO) specifying the range of target
728 characters, VAL is a value of FROM in TABLE, TOP is the top
729 char-table.
731 ARG is passed to C_FUNCTION when that is called.
733 It returns the value of last character covered by TABLE (not the
734 value inherited from the parent), and by side-effect, the car part
735 of RANGE is updated to the minimum character C where C and all the
736 following characters in TABLE have the same value. */
738 static Lisp_Object
739 map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
740 Lisp_Object function, Lisp_Object table, Lisp_Object arg, Lisp_Object val,
741 Lisp_Object range, Lisp_Object top)
743 /* Depth of TABLE. */
744 int depth;
745 /* Minimum and maximum characters covered by TABLE. */
746 int min_char, max_char;
747 /* Number of characters covered by one element of TABLE. */
748 int chars_in_block;
749 int from = XINT (XCAR (range)), to = XINT (XCDR (range));
750 int i, c;
751 bool is_uniprop = UNIPROP_TABLE_P (top);
752 uniprop_decoder_t decoder = UNIPROP_GET_DECODER (top);
754 if (SUB_CHAR_TABLE_P (table))
756 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
758 depth = tbl->depth;
759 min_char = tbl->min_char;
760 max_char = min_char + chartab_chars[depth - 1] - 1;
762 else
764 depth = 0;
765 min_char = 0;
766 max_char = MAX_CHAR;
768 chars_in_block = chartab_chars[depth];
770 if (to < max_char)
771 max_char = to;
772 /* Set I to the index of the first element to check. */
773 if (from <= min_char)
774 i = 0;
775 else
776 i = (from - min_char) / chars_in_block;
777 for (c = min_char + chars_in_block * i; c <= max_char;
778 i++, c += chars_in_block)
780 Lisp_Object this = (SUB_CHAR_TABLE_P (table)
781 ? XSUB_CHAR_TABLE (table)->contents[i]
782 : XCHAR_TABLE (table)->contents[i]);
783 int nextc = c + chars_in_block;
785 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this))
786 this = uniprop_table_uncompress (table, i);
787 if (SUB_CHAR_TABLE_P (this))
789 if (to >= nextc)
790 XSETCDR (range, make_number (nextc - 1));
791 val = map_sub_char_table (c_function, function, this, arg,
792 val, range, top);
794 else
796 if (NILP (this))
797 this = XCHAR_TABLE (top)->defalt;
798 if (!EQ (val, this))
800 bool different_value = 1;
802 if (NILP (val))
804 if (! NILP (XCHAR_TABLE (top)->parent))
806 Lisp_Object parent = XCHAR_TABLE (top)->parent;
807 Lisp_Object temp = XCHAR_TABLE (parent)->parent;
809 /* This is to get a value of FROM in PARENT
810 without checking the parent of PARENT. */
811 set_char_table_parent (parent, Qnil);
812 val = CHAR_TABLE_REF (parent, from);
813 set_char_table_parent (parent, temp);
814 XSETCDR (range, make_number (c - 1));
815 val = map_sub_char_table (c_function, function,
816 parent, arg, val, range,
817 parent);
818 if (EQ (val, this))
819 different_value = 0;
822 if (! NILP (val) && different_value)
824 XSETCDR (range, make_number (c - 1));
825 if (EQ (XCAR (range), XCDR (range)))
827 if (c_function)
828 (*c_function) (arg, XCAR (range), val);
829 else
831 if (decoder)
832 val = decoder (top, val);
833 call2 (function, XCAR (range), val);
836 else
838 if (c_function)
839 (*c_function) (arg, range, val);
840 else
842 if (decoder)
843 val = decoder (top, val);
844 call2 (function, range, val);
848 val = this;
849 from = c;
850 XSETCAR (range, make_number (c));
853 XSETCDR (range, make_number (to));
855 return val;
859 /* Map C_FUNCTION or FUNCTION over TABLE, calling it for each
860 character or group of characters that share a value.
862 ARG is passed to C_FUNCTION when that is called. */
864 void
865 map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
866 Lisp_Object function, Lisp_Object table, Lisp_Object arg)
868 Lisp_Object range, val, parent;
869 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4;
870 uniprop_decoder_t decoder = UNIPROP_GET_DECODER (table);
872 range = Fcons (make_number (0), make_number (MAX_CHAR));
873 parent = XCHAR_TABLE (table)->parent;
875 GCPRO4 (table, arg, range, parent);
876 val = XCHAR_TABLE (table)->ascii;
877 if (SUB_CHAR_TABLE_P (val))
878 val = XSUB_CHAR_TABLE (val)->contents[0];
879 val = map_sub_char_table (c_function, function, table, arg, val, range,
880 table);
882 /* If VAL is nil and TABLE has a parent, we must consult the parent
883 recursively. */
884 while (NILP (val) && ! NILP (XCHAR_TABLE (table)->parent))
886 Lisp_Object temp;
887 int from = XINT (XCAR (range));
889 parent = XCHAR_TABLE (table)->parent;
890 temp = XCHAR_TABLE (parent)->parent;
891 /* This is to get a value of FROM in PARENT without checking the
892 parent of PARENT. */
893 set_char_table_parent (parent, Qnil);
894 val = CHAR_TABLE_REF (parent, from);
895 set_char_table_parent (parent, temp);
896 val = map_sub_char_table (c_function, function, parent, arg, val, range,
897 parent);
898 table = parent;
901 if (! NILP (val))
903 if (EQ (XCAR (range), XCDR (range)))
905 if (c_function)
906 (*c_function) (arg, XCAR (range), val);
907 else
909 if (decoder)
910 val = decoder (table, val);
911 call2 (function, XCAR (range), val);
914 else
916 if (c_function)
917 (*c_function) (arg, range, val);
918 else
920 if (decoder)
921 val = decoder (table, val);
922 call2 (function, range, val);
927 UNGCPRO;
930 DEFUN ("map-char-table", Fmap_char_table, Smap_char_table,
931 2, 2, 0,
932 doc: /* Call FUNCTION for each character in CHAR-TABLE that has non-nil value.
933 FUNCTION is called with two arguments, KEY and VALUE.
934 KEY is a character code or a cons of character codes specifying a
935 range of characters that have the same value.
936 VALUE is what (char-table-range CHAR-TABLE KEY) returns. */)
937 (Lisp_Object function, Lisp_Object char_table)
939 CHECK_CHAR_TABLE (char_table);
941 map_char_table (NULL, function, char_table, char_table);
942 return Qnil;
946 static void
947 map_sub_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object),
948 Lisp_Object function, Lisp_Object table, Lisp_Object arg,
949 Lisp_Object range, struct charset *charset,
950 unsigned from, unsigned to)
952 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
953 int i, c = tbl->min_char, depth = tbl->depth;
955 if (depth < 3)
956 for (i = 0; i < chartab_size[depth]; i++, c += chartab_chars[depth])
958 Lisp_Object this;
960 this = tbl->contents[i];
961 if (SUB_CHAR_TABLE_P (this))
962 map_sub_char_table_for_charset (c_function, function, this, arg,
963 range, charset, from, to);
964 else
966 if (! NILP (XCAR (range)))
968 XSETCDR (range, make_number (c - 1));
969 if (c_function)
970 (*c_function) (arg, range);
971 else
972 call2 (function, range, arg);
974 XSETCAR (range, Qnil);
977 else
978 for (i = 0; i < chartab_size[depth]; i++, c++)
980 Lisp_Object this;
981 unsigned code;
983 this = tbl->contents[i];
984 if (NILP (this)
985 || (charset
986 && (code = ENCODE_CHAR (charset, c),
987 (code < from || code > to))))
989 if (! NILP (XCAR (range)))
991 XSETCDR (range, make_number (c - 1));
992 if (c_function)
993 (*c_function) (arg, range);
994 else
995 call2 (function, range, arg);
996 XSETCAR (range, Qnil);
999 else
1001 if (NILP (XCAR (range)))
1002 XSETCAR (range, make_number (c));
1008 /* Support function for `map-charset-chars'. Map C_FUNCTION or
1009 FUNCTION over TABLE, calling it for each character or a group of
1010 succeeding characters that have non-nil value in TABLE. TABLE is a
1011 "mapping table" or a "deunifier table" of a certain charset.
1013 If CHARSET is not NULL (this is the case that `map-charset-chars'
1014 is called with non-nil FROM-CODE and TO-CODE), it is a charset who
1015 owns TABLE, and the function is called only on a character in the
1016 range FROM and TO. FROM and TO are not character codes, but code
1017 points of a character in CHARSET.
1019 This function is called in these two cases:
1021 (1) A charset has a mapping file name in :map property.
1023 (2) A charset has an upper code space in :offset property and a
1024 mapping file name in :unify-map property. In this case, this
1025 function is called only for characters in the Unicode code space.
1026 Characters in upper code space are handled directly in
1027 map_charset_chars. */
1029 void
1030 map_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object),
1031 Lisp_Object function, Lisp_Object table, Lisp_Object arg,
1032 struct charset *charset,
1033 unsigned from, unsigned to)
1035 Lisp_Object range;
1036 int c, i;
1037 struct gcpro gcpro1;
1039 range = Fcons (Qnil, Qnil);
1040 GCPRO1 (range);
1042 for (i = 0, c = 0; i < chartab_size[0]; i++, c += chartab_chars[0])
1044 Lisp_Object this;
1046 this = XCHAR_TABLE (table)->contents[i];
1047 if (SUB_CHAR_TABLE_P (this))
1048 map_sub_char_table_for_charset (c_function, function, this, arg,
1049 range, charset, from, to);
1050 else
1052 if (! NILP (XCAR (range)))
1054 XSETCDR (range, make_number (c - 1));
1055 if (c_function)
1056 (*c_function) (arg, range);
1057 else
1058 call2 (function, range, arg);
1060 XSETCAR (range, Qnil);
1063 if (! NILP (XCAR (range)))
1065 XSETCDR (range, make_number (c - 1));
1066 if (c_function)
1067 (*c_function) (arg, range);
1068 else
1069 call2 (function, range, arg);
1072 UNGCPRO;
1076 /* Unicode character property tables.
1078 This section provides a convenient and efficient way to get Unicode
1079 character properties of characters from C code (from Lisp, you must
1080 use get-char-code-property).
1082 The typical usage is to get a char-table object for a specific
1083 property like this (use of the "bidi-class" property below is just
1084 an example):
1086 Lisp_Object bidi_class_table = uniprop_table (intern ("bidi-class"));
1088 (uniprop_table can return nil if it fails to find data for the
1089 named property, or if it fails to load the appropriate Lisp support
1090 file, so the return value should be tested to be non-nil, before it
1091 is used.)
1093 To get a property value for character CH use CHAR_TABLE_REF:
1095 Lisp_Object bidi_class = CHAR_TABLE_REF (bidi_class_table, CH);
1097 In this case, what you actually get is an index number to the
1098 vector of property values (symbols nil, L, R, etc).
1100 The full list of Unicode character properties supported by Emacs is
1101 documented in the ELisp manual, in the node "Character Properties".
1103 A table for Unicode character property has these characteristics:
1105 o The purpose is `char-code-property-table', which implies that the
1106 table has 5 extra slots.
1108 o The second extra slot is a Lisp function, an index (integer) to
1109 the array uniprop_decoder[], or nil. If it is a Lisp function, we
1110 can't use such a table from C (at the moment). If it is nil, it
1111 means that we don't have to decode values.
1113 o The third extra slot is a Lisp function, an index (integer) to
1114 the array uniprop_encoder[], or nil. If it is a Lisp function, we
1115 can't use such a table from C (at the moment). If it is nil, it
1116 means that we don't have to encode values. */
1119 /* Uncompress the IDXth element of sub-char-table TABLE. */
1121 static Lisp_Object
1122 uniprop_table_uncompress (Lisp_Object table, int idx)
1124 Lisp_Object val = XSUB_CHAR_TABLE (table)->contents[idx];
1125 int min_char = XSUB_CHAR_TABLE (table)->min_char + chartab_chars[2] * idx;
1126 Lisp_Object sub = make_sub_char_table (3, min_char, Qnil);
1127 const unsigned char *p, *pend;
1129 set_sub_char_table_contents (table, idx, sub);
1130 p = SDATA (val), pend = p + SBYTES (val);
1131 if (*p == 1)
1133 /* SIMPLE TABLE */
1134 p++;
1135 idx = STRING_CHAR_ADVANCE (p);
1136 while (p < pend && idx < chartab_chars[2])
1138 int v = STRING_CHAR_ADVANCE (p);
1139 set_sub_char_table_contents
1140 (sub, idx++, v > 0 ? make_number (v) : Qnil);
1143 else if (*p == 2)
1145 /* RUN-LENGTH TABLE */
1146 p++;
1147 for (idx = 0; p < pend; )
1149 int v = STRING_CHAR_ADVANCE (p);
1150 int count = 1;
1151 int len;
1153 if (p < pend)
1155 count = STRING_CHAR_AND_LENGTH (p, len);
1156 if (count < 128)
1157 count = 1;
1158 else
1160 count -= 128;
1161 p += len;
1164 while (count-- > 0)
1165 set_sub_char_table_contents (sub, idx++, make_number (v));
1168 /* It seems that we don't need this function because C code won't need
1169 to get a property that is compressed in this form. */
1170 #if 0
1171 else if (*p == 0)
1173 /* WORD-LIST TABLE */
1175 #endif
1176 return sub;
1180 /* Decode VALUE as an element of char-table TABLE. */
1182 static Lisp_Object
1183 uniprop_decode_value_run_length (Lisp_Object table, Lisp_Object value)
1185 if (VECTORP (XCHAR_TABLE (table)->extras[4]))
1187 Lisp_Object valvec = XCHAR_TABLE (table)->extras[4];
1189 if (XINT (value) >= 0 && XINT (value) < ASIZE (valvec))
1190 value = AREF (valvec, XINT (value));
1192 return value;
1195 static uniprop_decoder_t uniprop_decoder [] =
1196 { uniprop_decode_value_run_length };
1198 static const int uniprop_decoder_count = ARRAYELTS (uniprop_decoder);
1200 /* Return the decoder of char-table TABLE or nil if none. */
1202 static uniprop_decoder_t
1203 uniprop_get_decoder (Lisp_Object table)
1205 EMACS_INT i;
1207 if (! INTEGERP (XCHAR_TABLE (table)->extras[1]))
1208 return NULL;
1209 i = XINT (XCHAR_TABLE (table)->extras[1]);
1210 if (i < 0 || i >= uniprop_decoder_count)
1211 return NULL;
1212 return uniprop_decoder[i];
1216 /* Encode VALUE as an element of char-table TABLE which contains
1217 characters as elements. */
1219 static Lisp_Object
1220 uniprop_encode_value_character (Lisp_Object table, Lisp_Object value)
1222 if (! NILP (value) && ! CHARACTERP (value))
1223 wrong_type_argument (Qintegerp, value);
1224 return value;
1228 /* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH
1229 compression. */
1231 static Lisp_Object
1232 uniprop_encode_value_run_length (Lisp_Object table, Lisp_Object value)
1234 Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents;
1235 int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]);
1237 for (i = 0; i < size; i++)
1238 if (EQ (value, value_table[i]))
1239 break;
1240 if (i == size)
1241 wrong_type_argument (build_string ("Unicode property value"), value);
1242 return make_number (i);
1246 /* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH
1247 compression and contains numbers as elements. */
1249 static Lisp_Object
1250 uniprop_encode_value_numeric (Lisp_Object table, Lisp_Object value)
1252 Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents;
1253 int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]);
1255 CHECK_NUMBER (value);
1256 for (i = 0; i < size; i++)
1257 if (EQ (value, value_table[i]))
1258 break;
1259 value = make_number (i);
1260 if (i == size)
1261 set_char_table_extras (table, 4, Fvconcat (2, ((Lisp_Object []) {
1262 XCHAR_TABLE (table)->extras[4], Fmake_vector (make_number (1), value) })));
1263 return make_number (i);
1266 static uniprop_encoder_t uniprop_encoder[] =
1267 { uniprop_encode_value_character,
1268 uniprop_encode_value_run_length,
1269 uniprop_encode_value_numeric };
1271 static const int uniprop_encoder_count = ARRAYELTS (uniprop_encoder);
1273 /* Return the encoder of char-table TABLE or nil if none. */
1275 static uniprop_decoder_t
1276 uniprop_get_encoder (Lisp_Object table)
1278 EMACS_INT i;
1280 if (! INTEGERP (XCHAR_TABLE (table)->extras[2]))
1281 return NULL;
1282 i = XINT (XCHAR_TABLE (table)->extras[2]);
1283 if (i < 0 || i >= uniprop_encoder_count)
1284 return NULL;
1285 return uniprop_encoder[i];
1288 /* Return a char-table for Unicode character property PROP. This
1289 function may load a Lisp file and thus may cause
1290 garbage-collection. */
1292 Lisp_Object
1293 uniprop_table (Lisp_Object prop)
1295 Lisp_Object val, table, result;
1297 val = Fassq (prop, Vchar_code_property_alist);
1298 if (! CONSP (val))
1299 return Qnil;
1300 table = XCDR (val);
1301 if (STRINGP (table))
1303 struct gcpro gcpro1;
1304 GCPRO1 (val);
1305 AUTO_STRING (intl, "international/");
1306 result = Fload (concat2 (intl, table), Qt, Qt, Qt, Qt);
1307 UNGCPRO;
1308 if (NILP (result))
1309 return Qnil;
1310 table = XCDR (val);
1312 if (! CHAR_TABLE_P (table)
1313 || ! UNIPROP_TABLE_P (table))
1314 return Qnil;
1315 val = XCHAR_TABLE (table)->extras[1];
1316 if (INTEGERP (val)
1317 ? (XINT (val) < 0 || XINT (val) >= uniprop_decoder_count)
1318 : ! NILP (val))
1319 return Qnil;
1320 /* Prepare ASCII values in advance for CHAR_TABLE_REF. */
1321 set_char_table_ascii (table, char_table_ascii (table));
1322 return table;
1325 DEFUN ("unicode-property-table-internal", Funicode_property_table_internal,
1326 Sunicode_property_table_internal, 1, 1, 0,
1327 doc: /* Return a char-table for Unicode character property PROP.
1328 Use `get-unicode-property-internal' and
1329 `put-unicode-property-internal' instead of `aref' and `aset' to get
1330 and put an element value. */)
1331 (Lisp_Object prop)
1333 Lisp_Object table = uniprop_table (prop);
1335 if (CHAR_TABLE_P (table))
1336 return table;
1337 return Fcdr (Fassq (prop, Vchar_code_property_alist));
1340 DEFUN ("get-unicode-property-internal", Fget_unicode_property_internal,
1341 Sget_unicode_property_internal, 2, 2, 0,
1342 doc: /* Return an element of CHAR-TABLE for character CH.
1343 CHAR-TABLE must be what returned by `unicode-property-table-internal'. */)
1344 (Lisp_Object char_table, Lisp_Object ch)
1346 Lisp_Object val;
1347 uniprop_decoder_t decoder;
1349 CHECK_CHAR_TABLE (char_table);
1350 CHECK_CHARACTER (ch);
1351 if (! UNIPROP_TABLE_P (char_table))
1352 error ("Invalid Unicode property table");
1353 val = CHAR_TABLE_REF (char_table, XINT (ch));
1354 decoder = uniprop_get_decoder (char_table);
1355 return (decoder ? decoder (char_table, val) : val);
1358 DEFUN ("put-unicode-property-internal", Fput_unicode_property_internal,
1359 Sput_unicode_property_internal, 3, 3, 0,
1360 doc: /* Set an element of CHAR-TABLE for character CH to VALUE.
1361 CHAR-TABLE must be what returned by `unicode-property-table-internal'. */)
1362 (Lisp_Object char_table, Lisp_Object ch, Lisp_Object value)
1364 uniprop_encoder_t encoder;
1366 CHECK_CHAR_TABLE (char_table);
1367 CHECK_CHARACTER (ch);
1368 if (! UNIPROP_TABLE_P (char_table))
1369 error ("Invalid Unicode property table");
1370 encoder = uniprop_get_encoder (char_table);
1371 if (encoder)
1372 value = encoder (char_table, value);
1373 CHAR_TABLE_SET (char_table, XINT (ch), value);
1374 return Qnil;
1378 void
1379 syms_of_chartab (void)
1381 DEFSYM (Qchar_code_property_table, "char-code-property-table");
1383 defsubr (&Smake_char_table);
1384 defsubr (&Schar_table_parent);
1385 defsubr (&Schar_table_subtype);
1386 defsubr (&Sset_char_table_parent);
1387 defsubr (&Schar_table_extra_slot);
1388 defsubr (&Sset_char_table_extra_slot);
1389 defsubr (&Schar_table_range);
1390 defsubr (&Sset_char_table_range);
1391 defsubr (&Soptimize_char_table);
1392 defsubr (&Smap_char_table);
1393 defsubr (&Sunicode_property_table_internal);
1394 defsubr (&Sget_unicode_property_internal);
1395 defsubr (&Sput_unicode_property_internal);
1397 /* Each element has the form (PROP . TABLE).
1398 PROP is a symbol representing a character property.
1399 TABLE is a char-table containing the property value for each character.
1400 TABLE may be a name of file to load to build a char-table.
1401 This variable should be modified only through
1402 `define-char-code-property'. */
1404 DEFVAR_LISP ("char-code-property-alist", Vchar_code_property_alist,
1405 doc: /* Alist of character property name vs char-table containing property values.
1406 Internal use only. */);
1407 Vchar_code_property_alist = Qnil;