ASCIIfy curved quotes on displays lacking them
[emacs.git] / src / chartab.c
blobacaabce54a7ec12c8a5a01f12ad6540fa675a8af
1 /* chartab.c -- char-table support
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H13PRO009
6 This file is part of GNU Emacs.
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
21 #include <config.h>
23 #include "lisp.h"
24 #include "character.h"
25 #include "charset.h"
26 #include "ccl.h"
28 /* 64/16/32/128 */
30 /* Number of elements in Nth level char-table. */
31 const int chartab_size[4] =
32 { (1 << CHARTAB_SIZE_BITS_0),
33 (1 << CHARTAB_SIZE_BITS_1),
34 (1 << CHARTAB_SIZE_BITS_2),
35 (1 << CHARTAB_SIZE_BITS_3) };
37 /* Number of characters each element of Nth level char-table
38 covers. */
39 static const int chartab_chars[4] =
40 { (1 << (CHARTAB_SIZE_BITS_1 + CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3)),
41 (1 << (CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3)),
42 (1 << CHARTAB_SIZE_BITS_3),
43 1 };
45 /* Number of characters (in bits) each element of Nth level char-table
46 covers. */
47 static const int chartab_bits[4] =
48 { (CHARTAB_SIZE_BITS_1 + CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3),
49 (CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3),
50 CHARTAB_SIZE_BITS_3,
51 0 };
53 #define CHARTAB_IDX(c, depth, min_char) \
54 (((c) - (min_char)) >> chartab_bits[(depth)])
57 /* Preamble for uniprop (Unicode character property) tables. See the
58 comment of "Unicode character property tables". */
60 /* Types of decoder and encoder functions for uniprop values. */
61 typedef Lisp_Object (*uniprop_decoder_t) (Lisp_Object, Lisp_Object);
62 typedef Lisp_Object (*uniprop_encoder_t) (Lisp_Object, Lisp_Object);
64 static Lisp_Object uniprop_table_uncompress (Lisp_Object, int);
65 static uniprop_decoder_t uniprop_get_decoder (Lisp_Object);
67 /* 1 iff TABLE is a uniprop table. */
68 #define UNIPROP_TABLE_P(TABLE) \
69 (EQ (XCHAR_TABLE (TABLE)->purpose, Qchar_code_property_table) \
70 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (TABLE)) == 5)
72 /* Return a decoder for values in the uniprop table TABLE. */
73 #define UNIPROP_GET_DECODER(TABLE) \
74 (UNIPROP_TABLE_P (TABLE) ? uniprop_get_decoder (TABLE) : NULL)
76 /* Nonzero iff OBJ is a string representing uniprop values of 128
77 succeeding characters (the bottom level of a char-table) by a
78 compressed format. We are sure that no property value has a string
79 starting with '\001' nor '\002'. */
80 #define UNIPROP_COMPRESSED_FORM_P(OBJ) \
81 (STRINGP (OBJ) && SCHARS (OBJ) > 0 \
82 && ((SREF (OBJ, 0) == 1 || (SREF (OBJ, 0) == 2))))
84 static void
85 CHECK_CHAR_TABLE (Lisp_Object x)
87 CHECK_TYPE (CHAR_TABLE_P (x), Qchar_table_p, x);
90 static void
91 set_char_table_ascii (Lisp_Object table, Lisp_Object val)
93 XCHAR_TABLE (table)->ascii = val;
95 static void
96 set_char_table_parent (Lisp_Object table, Lisp_Object val)
98 XCHAR_TABLE (table)->parent = val;
101 DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0,
102 doc: /* Return a newly created char-table, with purpose PURPOSE.
103 Each element is initialized to INIT, which defaults to nil.
105 PURPOSE should be a symbol. If it has a `char-table-extra-slots'
106 property, the property's value should be an integer between 0 and 10
107 that specifies how many extra slots the char-table has. Otherwise,
108 the char-table has no extra slot. */)
109 (register Lisp_Object purpose, Lisp_Object init)
111 Lisp_Object vector;
112 Lisp_Object n;
113 int n_extras;
114 int size;
116 CHECK_SYMBOL (purpose);
117 n = Fget (purpose, Qchar_table_extra_slots);
118 if (NILP (n))
119 n_extras = 0;
120 else
122 CHECK_NATNUM (n);
123 if (XINT (n) > 10)
124 args_out_of_range (n, Qnil);
125 n_extras = XINT (n);
128 size = CHAR_TABLE_STANDARD_SLOTS + n_extras;
129 vector = Fmake_vector (make_number (size), init);
130 XSETPVECTYPE (XVECTOR (vector), PVEC_CHAR_TABLE);
131 set_char_table_parent (vector, Qnil);
132 set_char_table_purpose (vector, purpose);
133 XSETCHAR_TABLE (vector, XCHAR_TABLE (vector));
134 return vector;
137 static Lisp_Object
138 make_sub_char_table (int depth, int min_char, Lisp_Object defalt)
140 int i;
141 Lisp_Object table = make_uninit_sub_char_table (depth, min_char);
143 for (i = 0; i < chartab_size[depth]; i++)
144 XSUB_CHAR_TABLE (table)->contents[i] = defalt;
145 return table;
148 static Lisp_Object
149 char_table_ascii (Lisp_Object table)
151 Lisp_Object sub, val;
153 sub = XCHAR_TABLE (table)->contents[0];
154 if (! SUB_CHAR_TABLE_P (sub))
155 return sub;
156 sub = XSUB_CHAR_TABLE (sub)->contents[0];
157 if (! SUB_CHAR_TABLE_P (sub))
158 return sub;
159 val = XSUB_CHAR_TABLE (sub)->contents[0];
160 if (UNIPROP_TABLE_P (table) && UNIPROP_COMPRESSED_FORM_P (val))
161 val = uniprop_table_uncompress (sub, 0);
162 return val;
165 static Lisp_Object
166 copy_sub_char_table (Lisp_Object table)
168 int depth = XSUB_CHAR_TABLE (table)->depth;
169 int min_char = XSUB_CHAR_TABLE (table)->min_char;
170 Lisp_Object copy = make_sub_char_table (depth, min_char, Qnil);
171 int i;
173 /* Recursively copy any sub char-tables. */
174 for (i = 0; i < chartab_size[depth]; i++)
176 Lisp_Object val = XSUB_CHAR_TABLE (table)->contents[i];
177 set_sub_char_table_contents
178 (copy, i, SUB_CHAR_TABLE_P (val) ? copy_sub_char_table (val) : val);
181 return copy;
185 Lisp_Object
186 copy_char_table (Lisp_Object table)
188 Lisp_Object copy;
189 int size = XCHAR_TABLE (table)->header.size & PSEUDOVECTOR_SIZE_MASK;
190 int i;
192 copy = Fmake_vector (make_number (size), Qnil);
193 XSETPVECTYPE (XVECTOR (copy), PVEC_CHAR_TABLE);
194 set_char_table_defalt (copy, XCHAR_TABLE (table)->defalt);
195 set_char_table_parent (copy, XCHAR_TABLE (table)->parent);
196 set_char_table_purpose (copy, XCHAR_TABLE (table)->purpose);
197 for (i = 0; i < chartab_size[0]; i++)
198 set_char_table_contents
199 (copy, i,
200 (SUB_CHAR_TABLE_P (XCHAR_TABLE (table)->contents[i])
201 ? copy_sub_char_table (XCHAR_TABLE (table)->contents[i])
202 : XCHAR_TABLE (table)->contents[i]));
203 set_char_table_ascii (copy, char_table_ascii (copy));
204 size -= CHAR_TABLE_STANDARD_SLOTS;
205 for (i = 0; i < size; i++)
206 set_char_table_extras (copy, i, XCHAR_TABLE (table)->extras[i]);
208 XSETCHAR_TABLE (copy, XCHAR_TABLE (copy));
209 return copy;
212 static Lisp_Object
213 sub_char_table_ref (Lisp_Object table, int c, bool is_uniprop)
215 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
216 Lisp_Object val;
217 int idx = CHARTAB_IDX (c, tbl->depth, tbl->min_char);
219 val = tbl->contents[idx];
220 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
221 val = uniprop_table_uncompress (table, idx);
222 if (SUB_CHAR_TABLE_P (val))
223 val = sub_char_table_ref (val, c, is_uniprop);
224 return val;
227 Lisp_Object
228 char_table_ref (Lisp_Object table, int c)
230 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
231 Lisp_Object val;
233 if (ASCII_CHAR_P (c))
235 val = tbl->ascii;
236 if (SUB_CHAR_TABLE_P (val))
237 val = XSUB_CHAR_TABLE (val)->contents[c];
239 else
241 val = tbl->contents[CHARTAB_IDX (c, 0, 0)];
242 if (SUB_CHAR_TABLE_P (val))
243 val = sub_char_table_ref (val, c, UNIPROP_TABLE_P (table));
245 if (NILP (val))
247 val = tbl->defalt;
248 if (NILP (val) && CHAR_TABLE_P (tbl->parent))
249 val = char_table_ref (tbl->parent, c);
251 return val;
254 static Lisp_Object
255 sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to,
256 Lisp_Object defalt, bool is_uniprop)
258 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
259 int depth = tbl->depth, min_char = tbl->min_char;
260 int chartab_idx = CHARTAB_IDX (c, depth, min_char), idx;
261 Lisp_Object val;
263 val = tbl->contents[chartab_idx];
264 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
265 val = uniprop_table_uncompress (table, chartab_idx);
266 if (SUB_CHAR_TABLE_P (val))
267 val = sub_char_table_ref_and_range (val, c, from, to, defalt, is_uniprop);
268 else if (NILP (val))
269 val = defalt;
271 idx = chartab_idx;
272 while (idx > 0 && *from < min_char + idx * chartab_chars[depth])
274 Lisp_Object this_val;
276 c = min_char + idx * chartab_chars[depth] - 1;
277 idx--;
278 this_val = tbl->contents[idx];
279 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
280 this_val = uniprop_table_uncompress (table, idx);
281 if (SUB_CHAR_TABLE_P (this_val))
282 this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt,
283 is_uniprop);
284 else if (NILP (this_val))
285 this_val = defalt;
287 if (! EQ (this_val, val))
289 *from = c + 1;
290 break;
293 while (((c = (chartab_idx + 1) * chartab_chars[depth])
294 < chartab_chars[depth - 1])
295 && (c += min_char) <= *to)
297 Lisp_Object this_val;
299 chartab_idx++;
300 this_val = tbl->contents[chartab_idx];
301 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
302 this_val = uniprop_table_uncompress (table, chartab_idx);
303 if (SUB_CHAR_TABLE_P (this_val))
304 this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt,
305 is_uniprop);
306 else if (NILP (this_val))
307 this_val = defalt;
308 if (! EQ (this_val, val))
310 *to = c - 1;
311 break;
315 return val;
319 /* Return the value for C in char-table TABLE. Shrink the range *FROM
320 and *TO to cover characters (containing C) that have the same value
321 as C. It is not assured that the values of (*FROM - 1) and (*TO +
322 1) are different from that of C. */
324 Lisp_Object
325 char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to)
327 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
328 int chartab_idx = CHARTAB_IDX (c, 0, 0), idx;
329 Lisp_Object val;
330 bool is_uniprop = UNIPROP_TABLE_P (table);
332 val = tbl->contents[chartab_idx];
333 if (*from < 0)
334 *from = 0;
335 if (*to < 0)
336 *to = MAX_CHAR;
337 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
338 val = uniprop_table_uncompress (table, chartab_idx);
339 if (SUB_CHAR_TABLE_P (val))
340 val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt,
341 is_uniprop);
342 else if (NILP (val))
343 val = tbl->defalt;
344 idx = chartab_idx;
345 while (*from < idx * chartab_chars[0])
347 Lisp_Object this_val;
349 c = idx * chartab_chars[0] - 1;
350 idx--;
351 this_val = tbl->contents[idx];
352 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
353 this_val = uniprop_table_uncompress (table, idx);
354 if (SUB_CHAR_TABLE_P (this_val))
355 this_val = sub_char_table_ref_and_range (this_val, c, from, to,
356 tbl->defalt, is_uniprop);
357 else if (NILP (this_val))
358 this_val = tbl->defalt;
360 if (! EQ (this_val, val))
362 *from = c + 1;
363 break;
366 while (*to >= (chartab_idx + 1) * chartab_chars[0])
368 Lisp_Object this_val;
370 chartab_idx++;
371 c = chartab_idx * chartab_chars[0];
372 this_val = tbl->contents[chartab_idx];
373 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
374 this_val = uniprop_table_uncompress (table, chartab_idx);
375 if (SUB_CHAR_TABLE_P (this_val))
376 this_val = sub_char_table_ref_and_range (this_val, c, from, to,
377 tbl->defalt, is_uniprop);
378 else if (NILP (this_val))
379 this_val = tbl->defalt;
380 if (! EQ (this_val, val))
382 *to = c - 1;
383 break;
387 return val;
391 static void
392 sub_char_table_set (Lisp_Object table, int c, Lisp_Object val, bool is_uniprop)
394 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
395 int depth = tbl->depth, min_char = tbl->min_char;
396 int i = CHARTAB_IDX (c, depth, min_char);
397 Lisp_Object sub;
399 if (depth == 3)
400 set_sub_char_table_contents (table, i, val);
401 else
403 sub = tbl->contents[i];
404 if (! SUB_CHAR_TABLE_P (sub))
406 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub))
407 sub = uniprop_table_uncompress (table, i);
408 else
410 sub = make_sub_char_table (depth + 1,
411 min_char + i * chartab_chars[depth],
412 sub);
413 set_sub_char_table_contents (table, i, sub);
416 sub_char_table_set (sub, c, val, is_uniprop);
420 void
421 char_table_set (Lisp_Object table, int c, Lisp_Object val)
423 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
425 if (ASCII_CHAR_P (c)
426 && SUB_CHAR_TABLE_P (tbl->ascii))
427 set_sub_char_table_contents (tbl->ascii, c, val);
428 else
430 int i = CHARTAB_IDX (c, 0, 0);
431 Lisp_Object sub;
433 sub = tbl->contents[i];
434 if (! SUB_CHAR_TABLE_P (sub))
436 sub = make_sub_char_table (1, i * chartab_chars[0], sub);
437 set_char_table_contents (table, i, sub);
439 sub_char_table_set (sub, c, val, UNIPROP_TABLE_P (table));
440 if (ASCII_CHAR_P (c))
441 set_char_table_ascii (table, char_table_ascii (table));
445 static void
446 sub_char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val,
447 bool is_uniprop)
449 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
450 int depth = tbl->depth, min_char = tbl->min_char;
451 int chars_in_block = chartab_chars[depth];
452 int i, c, lim = chartab_size[depth];
454 if (from < min_char)
455 from = min_char;
456 i = CHARTAB_IDX (from, depth, min_char);
457 c = min_char + chars_in_block * i;
458 for (; i < lim; i++, c += chars_in_block)
460 if (c > to)
461 break;
462 if (from <= c && c + chars_in_block - 1 <= to)
463 set_sub_char_table_contents (table, i, val);
464 else
466 Lisp_Object sub = tbl->contents[i];
467 if (! SUB_CHAR_TABLE_P (sub))
469 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub))
470 sub = uniprop_table_uncompress (table, i);
471 else
473 sub = make_sub_char_table (depth + 1, c, sub);
474 set_sub_char_table_contents (table, i, sub);
477 sub_char_table_set_range (sub, from, to, val, is_uniprop);
483 void
484 char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val)
486 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
488 if (from == to)
489 char_table_set (table, from, val);
490 else
492 bool is_uniprop = UNIPROP_TABLE_P (table);
493 int lim = CHARTAB_IDX (to, 0, 0);
494 int i, c;
496 for (i = CHARTAB_IDX (from, 0, 0), c = 0; i <= lim;
497 i++, c += chartab_chars[0])
499 if (c > to)
500 break;
501 if (from <= c && c + chartab_chars[0] - 1 <= to)
502 set_char_table_contents (table, i, val);
503 else
505 Lisp_Object sub = tbl->contents[i];
506 if (! SUB_CHAR_TABLE_P (sub))
508 sub = make_sub_char_table (1, i * chartab_chars[0], sub);
509 set_char_table_contents (table, i, sub);
511 sub_char_table_set_range (sub, from, to, val, is_uniprop);
514 if (ASCII_CHAR_P (from))
515 set_char_table_ascii (table, char_table_ascii (table));
520 DEFUN ("char-table-subtype", Fchar_table_subtype, Schar_table_subtype,
521 1, 1, 0,
522 doc: /*
523 Return the subtype of char-table CHAR-TABLE. The value is a symbol. */)
524 (Lisp_Object char_table)
526 CHECK_CHAR_TABLE (char_table);
528 return XCHAR_TABLE (char_table)->purpose;
531 DEFUN ("char-table-parent", Fchar_table_parent, Schar_table_parent,
532 1, 1, 0,
533 doc: /* Return the parent char-table of CHAR-TABLE.
534 The value is either nil or another char-table.
535 If CHAR-TABLE holds nil for a given character,
536 then the actual applicable value is inherited from the parent char-table
537 \(or from its parents, if necessary). */)
538 (Lisp_Object char_table)
540 CHECK_CHAR_TABLE (char_table);
542 return XCHAR_TABLE (char_table)->parent;
545 DEFUN ("set-char-table-parent", Fset_char_table_parent, Sset_char_table_parent,
546 2, 2, 0,
547 doc: /* Set the parent char-table of CHAR-TABLE to PARENT.
548 Return PARENT. PARENT must be either nil or another char-table. */)
549 (Lisp_Object char_table, Lisp_Object parent)
551 Lisp_Object temp;
553 CHECK_CHAR_TABLE (char_table);
555 if (!NILP (parent))
557 CHECK_CHAR_TABLE (parent);
559 for (temp = parent; !NILP (temp); temp = XCHAR_TABLE (temp)->parent)
560 if (EQ (temp, char_table))
561 error ("Attempt to make a chartable be its own parent");
564 set_char_table_parent (char_table, parent);
566 return parent;
569 DEFUN ("char-table-extra-slot", Fchar_table_extra_slot, Schar_table_extra_slot,
570 2, 2, 0,
571 doc: /* Return the value of CHAR-TABLE's extra-slot number N. */)
572 (Lisp_Object char_table, Lisp_Object n)
574 CHECK_CHAR_TABLE (char_table);
575 CHECK_NUMBER (n);
576 if (XINT (n) < 0
577 || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
578 args_out_of_range (char_table, n);
580 return XCHAR_TABLE (char_table)->extras[XINT (n)];
583 DEFUN ("set-char-table-extra-slot", Fset_char_table_extra_slot,
584 Sset_char_table_extra_slot,
585 3, 3, 0,
586 doc: /* Set CHAR-TABLE's extra-slot number N to VALUE. */)
587 (Lisp_Object char_table, Lisp_Object n, Lisp_Object value)
589 CHECK_CHAR_TABLE (char_table);
590 CHECK_NUMBER (n);
591 if (XINT (n) < 0
592 || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
593 args_out_of_range (char_table, n);
595 set_char_table_extras (char_table, XINT (n), value);
596 return value;
599 DEFUN ("char-table-range", Fchar_table_range, Schar_table_range,
600 2, 2, 0,
601 doc: /* Return the value in CHAR-TABLE for a range of characters RANGE.
602 RANGE should be nil (for the default value),
603 a cons of character codes (for characters in the range), or a character code. */)
604 (Lisp_Object char_table, Lisp_Object range)
606 Lisp_Object val;
607 CHECK_CHAR_TABLE (char_table);
609 if (EQ (range, Qnil))
610 val = XCHAR_TABLE (char_table)->defalt;
611 else if (CHARACTERP (range))
612 val = CHAR_TABLE_REF (char_table, XFASTINT (range));
613 else if (CONSP (range))
615 int from, to;
617 CHECK_CHARACTER_CAR (range);
618 CHECK_CHARACTER_CDR (range);
619 from = XFASTINT (XCAR (range));
620 to = XFASTINT (XCDR (range));
621 val = char_table_ref_and_range (char_table, from, &from, &to);
622 /* Not yet implemented. */
624 else
625 error ("Invalid RANGE argument to `char-table-range'");
626 return val;
629 DEFUN ("set-char-table-range", Fset_char_table_range, Sset_char_table_range,
630 3, 3, 0,
631 doc: /* Set the value in CHAR-TABLE for a range of characters RANGE to VALUE.
632 RANGE should be t (for all characters), nil (for the default value),
633 a cons of character codes (for characters in the range),
634 or a character code. Return VALUE. */)
635 (Lisp_Object char_table, Lisp_Object range, Lisp_Object value)
637 CHECK_CHAR_TABLE (char_table);
638 if (EQ (range, Qt))
640 int i;
642 set_char_table_ascii (char_table, value);
643 for (i = 0; i < chartab_size[0]; i++)
644 set_char_table_contents (char_table, i, value);
646 else if (EQ (range, Qnil))
647 set_char_table_defalt (char_table, value);
648 else if (CHARACTERP (range))
649 char_table_set (char_table, XINT (range), value);
650 else if (CONSP (range))
652 CHECK_CHARACTER_CAR (range);
653 CHECK_CHARACTER_CDR (range);
654 char_table_set_range (char_table,
655 XINT (XCAR (range)), XINT (XCDR (range)), value);
657 else
658 error ("Invalid RANGE argument to `set-char-table-range'");
660 return value;
663 static Lisp_Object
664 optimize_sub_char_table (Lisp_Object table, Lisp_Object test)
666 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
667 int i, depth = tbl->depth;
668 Lisp_Object elt, this;
669 bool optimizable;
671 elt = XSUB_CHAR_TABLE (table)->contents[0];
672 if (SUB_CHAR_TABLE_P (elt))
674 elt = optimize_sub_char_table (elt, test);
675 set_sub_char_table_contents (table, 0, elt);
677 optimizable = SUB_CHAR_TABLE_P (elt) ? 0 : 1;
678 for (i = 1; i < chartab_size[depth]; i++)
680 this = XSUB_CHAR_TABLE (table)->contents[i];
681 if (SUB_CHAR_TABLE_P (this))
683 this = optimize_sub_char_table (this, test);
684 set_sub_char_table_contents (table, i, this);
686 if (optimizable
687 && (NILP (test) ? NILP (Fequal (this, elt)) /* defaults to `equal'. */
688 : EQ (test, Qeq) ? !EQ (this, elt) /* Optimize `eq' case. */
689 : NILP (call2 (test, this, elt))))
690 optimizable = 0;
693 return (optimizable ? elt : table);
696 DEFUN ("optimize-char-table", Foptimize_char_table, Soptimize_char_table,
697 1, 2, 0,
698 doc: /* Optimize CHAR-TABLE.
699 TEST is the comparison function used to decide whether two entries are
700 equivalent and can be merged. It defaults to `equal'. */)
701 (Lisp_Object char_table, Lisp_Object test)
703 Lisp_Object elt;
704 int i;
706 CHECK_CHAR_TABLE (char_table);
708 for (i = 0; i < chartab_size[0]; i++)
710 elt = XCHAR_TABLE (char_table)->contents[i];
711 if (SUB_CHAR_TABLE_P (elt))
712 set_char_table_contents
713 (char_table, i, optimize_sub_char_table (elt, test));
715 /* Reset the `ascii' cache, in case it got optimized away. */
716 set_char_table_ascii (char_table, char_table_ascii (char_table));
718 return Qnil;
722 /* Map C_FUNCTION or FUNCTION over TABLE (top or sub char-table),
723 calling it for each character or group of characters that share a
724 value. RANGE is a cons (FROM . TO) specifying the range of target
725 characters, VAL is a value of FROM in TABLE, TOP is the top
726 char-table.
728 ARG is passed to C_FUNCTION when that is called.
730 It returns the value of last character covered by TABLE (not the
731 value inherited from the parent), and by side-effect, the car part
732 of RANGE is updated to the minimum character C where C and all the
733 following characters in TABLE have the same value. */
735 static Lisp_Object
736 map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
737 Lisp_Object function, Lisp_Object table, Lisp_Object arg, Lisp_Object val,
738 Lisp_Object range, Lisp_Object top)
740 /* Depth of TABLE. */
741 int depth;
742 /* Minimum and maximum characters covered by TABLE. */
743 int min_char, max_char;
744 /* Number of characters covered by one element of TABLE. */
745 int chars_in_block;
746 int from = XINT (XCAR (range)), to = XINT (XCDR (range));
747 int i, c;
748 bool is_uniprop = UNIPROP_TABLE_P (top);
749 uniprop_decoder_t decoder = UNIPROP_GET_DECODER (top);
751 if (SUB_CHAR_TABLE_P (table))
753 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
755 depth = tbl->depth;
756 min_char = tbl->min_char;
757 max_char = min_char + chartab_chars[depth - 1] - 1;
759 else
761 depth = 0;
762 min_char = 0;
763 max_char = MAX_CHAR;
765 chars_in_block = chartab_chars[depth];
767 if (to < max_char)
768 max_char = to;
769 /* Set I to the index of the first element to check. */
770 if (from <= min_char)
771 i = 0;
772 else
773 i = (from - min_char) / chars_in_block;
774 for (c = min_char + chars_in_block * i; c <= max_char;
775 i++, c += chars_in_block)
777 Lisp_Object this = (SUB_CHAR_TABLE_P (table)
778 ? XSUB_CHAR_TABLE (table)->contents[i]
779 : XCHAR_TABLE (table)->contents[i]);
780 int nextc = c + chars_in_block;
782 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this))
783 this = uniprop_table_uncompress (table, i);
784 if (SUB_CHAR_TABLE_P (this))
786 if (to >= nextc)
787 XSETCDR (range, make_number (nextc - 1));
788 val = map_sub_char_table (c_function, function, this, arg,
789 val, range, top);
791 else
793 if (NILP (this))
794 this = XCHAR_TABLE (top)->defalt;
795 if (!EQ (val, this))
797 bool different_value = 1;
799 if (NILP (val))
801 if (! NILP (XCHAR_TABLE (top)->parent))
803 Lisp_Object parent = XCHAR_TABLE (top)->parent;
804 Lisp_Object temp = XCHAR_TABLE (parent)->parent;
806 /* This is to get a value of FROM in PARENT
807 without checking the parent of PARENT. */
808 set_char_table_parent (parent, Qnil);
809 val = CHAR_TABLE_REF (parent, from);
810 set_char_table_parent (parent, temp);
811 XSETCDR (range, make_number (c - 1));
812 val = map_sub_char_table (c_function, function,
813 parent, arg, val, range,
814 parent);
815 if (EQ (val, this))
816 different_value = 0;
819 if (! NILP (val) && different_value)
821 XSETCDR (range, make_number (c - 1));
822 if (EQ (XCAR (range), XCDR (range)))
824 if (c_function)
825 (*c_function) (arg, XCAR (range), val);
826 else
828 if (decoder)
829 val = decoder (top, val);
830 call2 (function, XCAR (range), val);
833 else
835 if (c_function)
836 (*c_function) (arg, range, val);
837 else
839 if (decoder)
840 val = decoder (top, val);
841 call2 (function, range, val);
845 val = this;
846 from = c;
847 XSETCAR (range, make_number (c));
850 XSETCDR (range, make_number (to));
852 return val;
856 /* Map C_FUNCTION or FUNCTION over TABLE, calling it for each
857 character or group of characters that share a value.
859 ARG is passed to C_FUNCTION when that is called. */
861 void
862 map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
863 Lisp_Object function, Lisp_Object table, Lisp_Object arg)
865 Lisp_Object range, val, parent;
866 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4;
867 uniprop_decoder_t decoder = UNIPROP_GET_DECODER (table);
869 range = Fcons (make_number (0), make_number (MAX_CHAR));
870 parent = XCHAR_TABLE (table)->parent;
872 GCPRO4 (table, arg, range, parent);
873 val = XCHAR_TABLE (table)->ascii;
874 if (SUB_CHAR_TABLE_P (val))
875 val = XSUB_CHAR_TABLE (val)->contents[0];
876 val = map_sub_char_table (c_function, function, table, arg, val, range,
877 table);
879 /* If VAL is nil and TABLE has a parent, we must consult the parent
880 recursively. */
881 while (NILP (val) && ! NILP (XCHAR_TABLE (table)->parent))
883 Lisp_Object temp;
884 int from = XINT (XCAR (range));
886 parent = XCHAR_TABLE (table)->parent;
887 temp = XCHAR_TABLE (parent)->parent;
888 /* This is to get a value of FROM in PARENT without checking the
889 parent of PARENT. */
890 set_char_table_parent (parent, Qnil);
891 val = CHAR_TABLE_REF (parent, from);
892 set_char_table_parent (parent, temp);
893 val = map_sub_char_table (c_function, function, parent, arg, val, range,
894 parent);
895 table = parent;
898 if (! NILP (val))
900 if (EQ (XCAR (range), XCDR (range)))
902 if (c_function)
903 (*c_function) (arg, XCAR (range), val);
904 else
906 if (decoder)
907 val = decoder (table, val);
908 call2 (function, XCAR (range), val);
911 else
913 if (c_function)
914 (*c_function) (arg, range, val);
915 else
917 if (decoder)
918 val = decoder (table, val);
919 call2 (function, range, val);
924 UNGCPRO;
927 DEFUN ("map-char-table", Fmap_char_table, Smap_char_table,
928 2, 2, 0,
929 doc: /* Call FUNCTION for each character in CHAR-TABLE that has non-nil value.
930 FUNCTION is called with two arguments, KEY and VALUE.
931 KEY is a character code or a cons of character codes specifying a
932 range of characters that have the same value.
933 VALUE is what (char-table-range CHAR-TABLE KEY) returns. */)
934 (Lisp_Object function, Lisp_Object char_table)
936 CHECK_CHAR_TABLE (char_table);
938 map_char_table (NULL, function, char_table, char_table);
939 return Qnil;
943 static void
944 map_sub_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object),
945 Lisp_Object function, Lisp_Object table, Lisp_Object arg,
946 Lisp_Object range, struct charset *charset,
947 unsigned from, unsigned to)
949 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
950 int i, c = tbl->min_char, depth = tbl->depth;
952 if (depth < 3)
953 for (i = 0; i < chartab_size[depth]; i++, c += chartab_chars[depth])
955 Lisp_Object this;
957 this = tbl->contents[i];
958 if (SUB_CHAR_TABLE_P (this))
959 map_sub_char_table_for_charset (c_function, function, this, arg,
960 range, charset, from, to);
961 else
963 if (! NILP (XCAR (range)))
965 XSETCDR (range, make_number (c - 1));
966 if (c_function)
967 (*c_function) (arg, range);
968 else
969 call2 (function, range, arg);
971 XSETCAR (range, Qnil);
974 else
975 for (i = 0; i < chartab_size[depth]; i++, c++)
977 Lisp_Object this;
978 unsigned code;
980 this = tbl->contents[i];
981 if (NILP (this)
982 || (charset
983 && (code = ENCODE_CHAR (charset, c),
984 (code < from || code > to))))
986 if (! NILP (XCAR (range)))
988 XSETCDR (range, make_number (c - 1));
989 if (c_function)
990 (*c_function) (arg, range);
991 else
992 call2 (function, range, arg);
993 XSETCAR (range, Qnil);
996 else
998 if (NILP (XCAR (range)))
999 XSETCAR (range, make_number (c));
1005 /* Support function for `map-charset-chars'. Map C_FUNCTION or
1006 FUNCTION over TABLE, calling it for each character or a group of
1007 succeeding characters that have non-nil value in TABLE. TABLE is a
1008 "mapping table" or a "deunifier table" of a certain charset.
1010 If CHARSET is not NULL (this is the case that `map-charset-chars'
1011 is called with non-nil FROM-CODE and TO-CODE), it is a charset who
1012 owns TABLE, and the function is called only on a character in the
1013 range FROM and TO. FROM and TO are not character codes, but code
1014 points of a character in CHARSET.
1016 This function is called in these two cases:
1018 (1) A charset has a mapping file name in :map property.
1020 (2) A charset has an upper code space in :offset property and a
1021 mapping file name in :unify-map property. In this case, this
1022 function is called only for characters in the Unicode code space.
1023 Characters in upper code space are handled directly in
1024 map_charset_chars. */
1026 void
1027 map_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object),
1028 Lisp_Object function, Lisp_Object table, Lisp_Object arg,
1029 struct charset *charset,
1030 unsigned from, unsigned to)
1032 Lisp_Object range;
1033 int c, i;
1034 struct gcpro gcpro1;
1036 range = Fcons (Qnil, Qnil);
1037 GCPRO1 (range);
1039 for (i = 0, c = 0; i < chartab_size[0]; i++, c += chartab_chars[0])
1041 Lisp_Object this;
1043 this = XCHAR_TABLE (table)->contents[i];
1044 if (SUB_CHAR_TABLE_P (this))
1045 map_sub_char_table_for_charset (c_function, function, this, arg,
1046 range, charset, from, to);
1047 else
1049 if (! NILP (XCAR (range)))
1051 XSETCDR (range, make_number (c - 1));
1052 if (c_function)
1053 (*c_function) (arg, range);
1054 else
1055 call2 (function, range, arg);
1057 XSETCAR (range, Qnil);
1060 if (! NILP (XCAR (range)))
1062 XSETCDR (range, make_number (c - 1));
1063 if (c_function)
1064 (*c_function) (arg, range);
1065 else
1066 call2 (function, range, arg);
1069 UNGCPRO;
1073 /* Unicode character property tables.
1075 This section provides a convenient and efficient way to get Unicode
1076 character properties of characters from C code (from Lisp, you must
1077 use get-char-code-property).
1079 The typical usage is to get a char-table object for a specific
1080 property like this (use of the "bidi-class" property below is just
1081 an example):
1083 Lisp_Object bidi_class_table = uniprop_table (intern ("bidi-class"));
1085 (uniprop_table can return nil if it fails to find data for the
1086 named property, or if it fails to load the appropriate Lisp support
1087 file, so the return value should be tested to be non-nil, before it
1088 is used.)
1090 To get a property value for character CH use CHAR_TABLE_REF:
1092 Lisp_Object bidi_class = CHAR_TABLE_REF (bidi_class_table, CH);
1094 In this case, what you actually get is an index number to the
1095 vector of property values (symbols nil, L, R, etc).
1097 The full list of Unicode character properties supported by Emacs is
1098 documented in the ELisp manual, in the node "Character Properties".
1100 A table for Unicode character property has these characteristics:
1102 o The purpose is `char-code-property-table', which implies that the
1103 table has 5 extra slots.
1105 o The second extra slot is a Lisp function, an index (integer) to
1106 the array uniprop_decoder[], or nil. If it is a Lisp function, we
1107 can't use such a table from C (at the moment). If it is nil, it
1108 means that we don't have to decode values.
1110 o The third extra slot is a Lisp function, an index (integer) to
1111 the array uniprop_encoder[], or nil. If it is a Lisp function, we
1112 can't use such a table from C (at the moment). If it is nil, it
1113 means that we don't have to encode values. */
1116 /* Uncompress the IDXth element of sub-char-table TABLE. */
1118 static Lisp_Object
1119 uniprop_table_uncompress (Lisp_Object table, int idx)
1121 Lisp_Object val = XSUB_CHAR_TABLE (table)->contents[idx];
1122 int min_char = XSUB_CHAR_TABLE (table)->min_char + chartab_chars[2] * idx;
1123 Lisp_Object sub = make_sub_char_table (3, min_char, Qnil);
1124 const unsigned char *p, *pend;
1126 set_sub_char_table_contents (table, idx, sub);
1127 p = SDATA (val), pend = p + SBYTES (val);
1128 if (*p == 1)
1130 /* SIMPLE TABLE */
1131 p++;
1132 idx = STRING_CHAR_ADVANCE (p);
1133 while (p < pend && idx < chartab_chars[2])
1135 int v = STRING_CHAR_ADVANCE (p);
1136 set_sub_char_table_contents
1137 (sub, idx++, v > 0 ? make_number (v) : Qnil);
1140 else if (*p == 2)
1142 /* RUN-LENGTH TABLE */
1143 p++;
1144 for (idx = 0; p < pend; )
1146 int v = STRING_CHAR_ADVANCE (p);
1147 int count = 1;
1148 int len;
1150 if (p < pend)
1152 count = STRING_CHAR_AND_LENGTH (p, len);
1153 if (count < 128)
1154 count = 1;
1155 else
1157 count -= 128;
1158 p += len;
1161 while (count-- > 0)
1162 set_sub_char_table_contents (sub, idx++, make_number (v));
1165 /* It seems that we don't need this function because C code won't need
1166 to get a property that is compressed in this form. */
1167 #if 0
1168 else if (*p == 0)
1170 /* WORD-LIST TABLE */
1172 #endif
1173 return sub;
1177 /* Decode VALUE as an element of char-table TABLE. */
1179 static Lisp_Object
1180 uniprop_decode_value_run_length (Lisp_Object table, Lisp_Object value)
1182 if (VECTORP (XCHAR_TABLE (table)->extras[4]))
1184 Lisp_Object valvec = XCHAR_TABLE (table)->extras[4];
1186 if (XINT (value) >= 0 && XINT (value) < ASIZE (valvec))
1187 value = AREF (valvec, XINT (value));
1189 return value;
1192 static uniprop_decoder_t uniprop_decoder [] =
1193 { uniprop_decode_value_run_length };
1195 static const int uniprop_decoder_count = ARRAYELTS (uniprop_decoder);
1197 /* Return the decoder of char-table TABLE or nil if none. */
1199 static uniprop_decoder_t
1200 uniprop_get_decoder (Lisp_Object table)
1202 EMACS_INT i;
1204 if (! INTEGERP (XCHAR_TABLE (table)->extras[1]))
1205 return NULL;
1206 i = XINT (XCHAR_TABLE (table)->extras[1]);
1207 if (i < 0 || i >= uniprop_decoder_count)
1208 return NULL;
1209 return uniprop_decoder[i];
1213 /* Encode VALUE as an element of char-table TABLE which contains
1214 characters as elements. */
1216 static Lisp_Object
1217 uniprop_encode_value_character (Lisp_Object table, Lisp_Object value)
1219 if (! NILP (value) && ! CHARACTERP (value))
1220 wrong_type_argument (Qintegerp, value);
1221 return value;
1225 /* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH
1226 compression. */
1228 static Lisp_Object
1229 uniprop_encode_value_run_length (Lisp_Object table, Lisp_Object value)
1231 Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents;
1232 int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]);
1234 for (i = 0; i < size; i++)
1235 if (EQ (value, value_table[i]))
1236 break;
1237 if (i == size)
1238 wrong_type_argument (build_string ("Unicode property value"), value);
1239 return make_number (i);
1243 /* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH
1244 compression and contains numbers as elements. */
1246 static Lisp_Object
1247 uniprop_encode_value_numeric (Lisp_Object table, Lisp_Object value)
1249 Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents;
1250 int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]);
1252 CHECK_NUMBER (value);
1253 for (i = 0; i < size; i++)
1254 if (EQ (value, value_table[i]))
1255 break;
1256 value = make_number (i);
1257 if (i == size)
1258 set_char_table_extras (table, 4,
1259 CALLN (Fvconcat,
1260 XCHAR_TABLE (table)->extras[4],
1261 Fmake_vector (make_number (1), value)));
1262 return make_number (i);
1265 static uniprop_encoder_t uniprop_encoder[] =
1266 { uniprop_encode_value_character,
1267 uniprop_encode_value_run_length,
1268 uniprop_encode_value_numeric };
1270 static const int uniprop_encoder_count = ARRAYELTS (uniprop_encoder);
1272 /* Return the encoder of char-table TABLE or nil if none. */
1274 static uniprop_decoder_t
1275 uniprop_get_encoder (Lisp_Object table)
1277 EMACS_INT i;
1279 if (! INTEGERP (XCHAR_TABLE (table)->extras[2]))
1280 return NULL;
1281 i = XINT (XCHAR_TABLE (table)->extras[2]);
1282 if (i < 0 || i >= uniprop_encoder_count)
1283 return NULL;
1284 return uniprop_encoder[i];
1287 /* Return a char-table for Unicode character property PROP. This
1288 function may load a Lisp file and thus may cause
1289 garbage-collection. */
1291 Lisp_Object
1292 uniprop_table (Lisp_Object prop)
1294 Lisp_Object val, table, result;
1296 val = Fassq (prop, Vchar_code_property_alist);
1297 if (! CONSP (val))
1298 return Qnil;
1299 table = XCDR (val);
1300 if (STRINGP (table))
1302 struct gcpro gcpro1;
1303 GCPRO1 (val);
1304 AUTO_STRING (intl, "international/");
1305 result = Fload (concat2 (intl, table), Qt, Qt, Qt, Qt);
1306 UNGCPRO;
1307 if (NILP (result))
1308 return Qnil;
1309 table = XCDR (val);
1311 if (! CHAR_TABLE_P (table)
1312 || ! UNIPROP_TABLE_P (table))
1313 return Qnil;
1314 val = XCHAR_TABLE (table)->extras[1];
1315 if (INTEGERP (val)
1316 ? (XINT (val) < 0 || XINT (val) >= uniprop_decoder_count)
1317 : ! NILP (val))
1318 return Qnil;
1319 /* Prepare ASCII values in advance for CHAR_TABLE_REF. */
1320 set_char_table_ascii (table, char_table_ascii (table));
1321 return table;
1324 DEFUN ("unicode-property-table-internal", Funicode_property_table_internal,
1325 Sunicode_property_table_internal, 1, 1, 0,
1326 doc: /* Return a char-table for Unicode character property PROP.
1327 Use `get-unicode-property-internal' and
1328 `put-unicode-property-internal' instead of `aref' and `aset' to get
1329 and put an element value. */)
1330 (Lisp_Object prop)
1332 Lisp_Object table = uniprop_table (prop);
1334 if (CHAR_TABLE_P (table))
1335 return table;
1336 return Fcdr (Fassq (prop, Vchar_code_property_alist));
1339 DEFUN ("get-unicode-property-internal", Fget_unicode_property_internal,
1340 Sget_unicode_property_internal, 2, 2, 0,
1341 doc: /* Return an element of CHAR-TABLE for character CH.
1342 CHAR-TABLE must be what returned by `unicode-property-table-internal'. */)
1343 (Lisp_Object char_table, Lisp_Object ch)
1345 Lisp_Object val;
1346 uniprop_decoder_t decoder;
1348 CHECK_CHAR_TABLE (char_table);
1349 CHECK_CHARACTER (ch);
1350 if (! UNIPROP_TABLE_P (char_table))
1351 error ("Invalid Unicode property table");
1352 val = CHAR_TABLE_REF (char_table, XINT (ch));
1353 decoder = uniprop_get_decoder (char_table);
1354 return (decoder ? decoder (char_table, val) : val);
1357 DEFUN ("put-unicode-property-internal", Fput_unicode_property_internal,
1358 Sput_unicode_property_internal, 3, 3, 0,
1359 doc: /* Set an element of CHAR-TABLE for character CH to VALUE.
1360 CHAR-TABLE must be what returned by `unicode-property-table-internal'. */)
1361 (Lisp_Object char_table, Lisp_Object ch, Lisp_Object value)
1363 uniprop_encoder_t encoder;
1365 CHECK_CHAR_TABLE (char_table);
1366 CHECK_CHARACTER (ch);
1367 if (! UNIPROP_TABLE_P (char_table))
1368 error ("Invalid Unicode property table");
1369 encoder = uniprop_get_encoder (char_table);
1370 if (encoder)
1371 value = encoder (char_table, value);
1372 CHAR_TABLE_SET (char_table, XINT (ch), value);
1373 return Qnil;
1377 void
1378 syms_of_chartab (void)
1380 /* Purpose of uniprop tables. */
1381 DEFSYM (Qchar_code_property_table, "char-code-property-table");
1383 defsubr (&Smake_char_table);
1384 defsubr (&Schar_table_parent);
1385 defsubr (&Schar_table_subtype);
1386 defsubr (&Sset_char_table_parent);
1387 defsubr (&Schar_table_extra_slot);
1388 defsubr (&Sset_char_table_extra_slot);
1389 defsubr (&Schar_table_range);
1390 defsubr (&Sset_char_table_range);
1391 defsubr (&Soptimize_char_table);
1392 defsubr (&Smap_char_table);
1393 defsubr (&Sunicode_property_table_internal);
1394 defsubr (&Sget_unicode_property_internal);
1395 defsubr (&Sput_unicode_property_internal);
1397 /* Each element has the form (PROP . TABLE).
1398 PROP is a symbol representing a character property.
1399 TABLE is a char-table containing the property value for each character.
1400 TABLE may be a name of file to load to build a char-table.
1401 This variable should be modified only through
1402 `define-char-code-property'. */
1404 DEFVAR_LISP ("char-code-property-alist", Vchar_code_property_alist,
1405 doc: /* Alist of character property name vs char-table containing property values.
1406 Internal use only. */);
1407 Vchar_code_property_alist = Qnil;