Merge from trunk.
[emacs.git] / src / category.c
blob5c38f351d014dffa897eb0247d61a7def20aa8ae
1 /* GNU Emacs routines to deal with category tables.
2 Copyright (C) 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8 Copyright (C) 2003
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
12 This file is part of GNU Emacs.
14 GNU Emacs is free software: you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or
17 (at your option) any later version.
19 GNU Emacs is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
28 /* Here we handle three objects: category, category set, and category
29 table. Read comments in the file category.h to understand them. */
31 #include <config.h>
32 #include <ctype.h>
33 #include <setjmp.h>
34 #include "lisp.h"
35 #include "buffer.h"
36 #include "character.h"
37 #include "charset.h"
38 #include "category.h"
39 #include "keymap.h"
41 /* The version number of the latest category table. Each category
42 table has a unique version number. It is assigned a new number
43 also when it is modified. When a regular expression is compiled
44 into the struct re_pattern_buffer, the version number of the
45 category table (of the current buffer) at that moment is also
46 embedded in the structure.
48 For the moment, we are not using this feature. */
49 static int category_table_version;
51 Lisp_Object Qcategory_table, Qcategoryp, Qcategorysetp, Qcategory_table_p;
53 /* Variables to determine word boundary. */
54 Lisp_Object Vword_combining_categories, Vword_separating_categories;
56 /* Temporary internal variable used in macro CHAR_HAS_CATEGORY. */
57 Lisp_Object _temp_category_set;
60 /* Category set staff. */
62 static Lisp_Object hash_get_category_set (Lisp_Object, Lisp_Object);
64 static Lisp_Object
65 hash_get_category_set (Lisp_Object table, Lisp_Object category_set)
67 Lisp_Object val;
68 struct Lisp_Hash_Table *h;
69 int i;
70 unsigned hash;
72 if (NILP (XCHAR_TABLE (table)->extras[1]))
73 XCHAR_TABLE (table)->extras[1]
74 = make_hash_table (Qequal, make_number (DEFAULT_HASH_SIZE),
75 make_float (DEFAULT_REHASH_SIZE),
76 make_float (DEFAULT_REHASH_THRESHOLD),
77 Qnil, Qnil, Qnil);
78 h = XHASH_TABLE (XCHAR_TABLE (table)->extras[1]);
79 i = hash_lookup (h, category_set, &hash);
80 if (i >= 0)
81 return HASH_KEY (h, i);
82 hash_put (h, category_set, Qnil, hash);
83 return category_set;
87 DEFUN ("make-category-set", Fmake_category_set, Smake_category_set, 1, 1, 0,
88 doc: /* Return a newly created category-set which contains CATEGORIES.
89 CATEGORIES is a string of category mnemonics.
90 The value is a bool-vector which has t at the indices corresponding to
91 those categories. */)
92 (Lisp_Object categories)
94 Lisp_Object val;
95 int len;
97 CHECK_STRING (categories);
98 val = MAKE_CATEGORY_SET;
100 if (STRING_MULTIBYTE (categories))
101 error ("Multibyte string in `make-category-set'");
103 len = SCHARS (categories);
104 while (--len >= 0)
106 Lisp_Object category;
108 XSETFASTINT (category, SREF (categories, len));
109 CHECK_CATEGORY (category);
110 SET_CATEGORY_SET (val, category, Qt);
112 return val;
116 /* Category staff. */
118 Lisp_Object check_category_table (Lisp_Object table);
120 DEFUN ("define-category", Fdefine_category, Sdefine_category, 2, 3, 0,
121 doc: /* Define CATEGORY as a category which is described by DOCSTRING.
122 CATEGORY should be an ASCII printing character in the range ` ' to `~'.
123 DOCSTRING is the documentation string of the category. The first line
124 should be a terse text (preferably less than 16 characters),
125 and the rest lines should be the full description.
126 The category is defined only in category table TABLE, which defaults to
127 the current buffer's category table. */)
128 (Lisp_Object category, Lisp_Object docstring, Lisp_Object table)
130 CHECK_CATEGORY (category);
131 CHECK_STRING (docstring);
132 table = check_category_table (table);
134 if (!NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
135 error ("Category `%c' is already defined", XFASTINT (category));
136 if (!NILP (Vpurify_flag))
137 docstring = Fpurecopy (docstring);
138 CATEGORY_DOCSTRING (table, XFASTINT (category)) = docstring;
140 return Qnil;
143 DEFUN ("category-docstring", Fcategory_docstring, Scategory_docstring, 1, 2, 0,
144 doc: /* Return the documentation string of CATEGORY, as defined in TABLE.
145 TABLE should be a category table and defaults to the current buffer's
146 category table. */)
147 (Lisp_Object category, Lisp_Object table)
149 CHECK_CATEGORY (category);
150 table = check_category_table (table);
152 return CATEGORY_DOCSTRING (table, XFASTINT (category));
155 DEFUN ("get-unused-category", Fget_unused_category, Sget_unused_category,
156 0, 1, 0,
157 doc: /* Return a category which is not yet defined in TABLE.
158 If no category remains available, return nil.
159 The optional argument TABLE specifies which category table to modify;
160 it defaults to the current buffer's category table. */)
161 (Lisp_Object table)
163 int i;
165 table = check_category_table (table);
167 for (i = ' '; i <= '~'; i++)
168 if (NILP (CATEGORY_DOCSTRING (table, i)))
169 return make_number (i);
171 return Qnil;
175 /* Category-table staff. */
177 DEFUN ("category-table-p", Fcategory_table_p, Scategory_table_p, 1, 1, 0,
178 doc: /* Return t if ARG is a category table. */)
179 (Lisp_Object arg)
181 if (CHAR_TABLE_P (arg)
182 && EQ (XCHAR_TABLE (arg)->purpose, Qcategory_table))
183 return Qt;
184 return Qnil;
187 /* If TABLE is nil, return the current category table. If TABLE is
188 not nil, check the validity of TABLE as a category table. If
189 valid, return TABLE itself, but if not valid, signal an error of
190 wrong-type-argument. */
192 Lisp_Object
193 check_category_table (Lisp_Object table)
195 if (NILP (table))
196 return current_buffer->category_table;
197 CHECK_TYPE (!NILP (Fcategory_table_p (table)), Qcategory_table_p, table);
198 return table;
201 DEFUN ("category-table", Fcategory_table, Scategory_table, 0, 0, 0,
202 doc: /* Return the current category table.
203 This is the one specified by the current buffer. */)
204 (void)
206 return current_buffer->category_table;
209 DEFUN ("standard-category-table", Fstandard_category_table,
210 Sstandard_category_table, 0, 0, 0,
211 doc: /* Return the standard category table.
212 This is the one used for new buffers. */)
213 (void)
215 return Vstandard_category_table;
219 static void
220 copy_category_entry (Lisp_Object table, Lisp_Object c, Lisp_Object val)
222 val = Fcopy_sequence (val);
223 if (CONSP (c))
224 char_table_set_range (table, XINT (XCAR (c)), XINT (XCDR (c)), val);
225 else
226 char_table_set (table, XINT (c), val);
229 /* Return a copy of category table TABLE. We can't simply use the
230 function copy-sequence because no contents should be shared between
231 the original and the copy. This function is called recursively by
232 binding TABLE to a sub char table. */
234 Lisp_Object
235 copy_category_table (Lisp_Object table)
237 table = copy_char_table (table);
239 if (! NILP (XCHAR_TABLE (table)->defalt))
240 XCHAR_TABLE (table)->defalt
241 = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
242 XCHAR_TABLE (table)->extras[0]
243 = Fcopy_sequence (XCHAR_TABLE (table)->extras[0]);
244 map_char_table (copy_category_entry, Qnil, table, table);
246 return table;
249 DEFUN ("copy-category-table", Fcopy_category_table, Scopy_category_table,
250 0, 1, 0,
251 doc: /* Construct a new category table and return it.
252 It is a copy of the TABLE, which defaults to the standard category table. */)
253 (Lisp_Object table)
255 if (!NILP (table))
256 check_category_table (table);
257 else
258 table = Vstandard_category_table;
260 return copy_category_table (table);
263 DEFUN ("make-category-table", Fmake_category_table, Smake_category_table,
264 0, 0, 0,
265 doc: /* Construct a new and empty category table and return it. */)
266 (void)
268 Lisp_Object val;
269 int i;
271 val = Fmake_char_table (Qcategory_table, Qnil);
272 XCHAR_TABLE (val)->defalt = MAKE_CATEGORY_SET;
273 for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++)
274 XCHAR_TABLE (val)->contents[i] = MAKE_CATEGORY_SET;
275 Fset_char_table_extra_slot (val, make_number (0),
276 Fmake_vector (make_number (95), Qnil));
277 return val;
280 DEFUN ("set-category-table", Fset_category_table, Sset_category_table, 1, 1, 0,
281 doc: /* Specify TABLE as the category table for the current buffer.
282 Return TABLE. */)
283 (Lisp_Object table)
285 int idx;
286 table = check_category_table (table);
287 current_buffer->category_table = table;
288 /* Indicate that this buffer now has a specified category table. */
289 idx = PER_BUFFER_VAR_IDX (category_table);
290 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
291 return table;
295 Lisp_Object
296 char_category_set (int c)
298 return CHAR_TABLE_REF (current_buffer->category_table, c);
301 DEFUN ("char-category-set", Fchar_category_set, Schar_category_set, 1, 1, 0,
302 doc: /* Return the category set of CHAR.
303 usage: (char-category-set CHAR) */)
304 (Lisp_Object ch)
306 CHECK_NUMBER (ch);
307 return CATEGORY_SET (XFASTINT (ch));
310 DEFUN ("category-set-mnemonics", Fcategory_set_mnemonics,
311 Scategory_set_mnemonics, 1, 1, 0,
312 doc: /* Return a string containing mnemonics of the categories in CATEGORY-SET.
313 CATEGORY-SET is a bool-vector, and the categories \"in\" it are those
314 that are indexes where t occurs in the bool-vector.
315 The return value is a string containing those same categories. */)
316 (Lisp_Object category_set)
318 int i, j;
319 char str[96];
321 CHECK_CATEGORY_SET (category_set);
323 j = 0;
324 for (i = 32; i < 127; i++)
325 if (CATEGORY_MEMBER (i, category_set))
326 str[j++] = i;
327 str[j] = '\0';
329 return build_string (str);
332 void
333 set_category_set (Lisp_Object category_set, Lisp_Object category, Lisp_Object val)
335 do {
336 int idx = XINT (category) / 8;
337 unsigned char bits = 1 << (XINT (category) % 8);
339 if (NILP (val))
340 XCATEGORY_SET (category_set)->data[idx] &= ~bits;
341 else
342 XCATEGORY_SET (category_set)->data[idx] |= bits;
343 } while (0);
346 DEFUN ("modify-category-entry", Fmodify_category_entry,
347 Smodify_category_entry, 2, 4, 0,
348 doc: /* Modify the category set of CHARACTER by adding CATEGORY to it.
349 The category is changed only for table TABLE, which defaults to
350 the current buffer's category table.
351 CHARACTER can be either a single character or a cons representing the
352 lower and upper ends of an inclusive character range to modify.
353 If optional fourth argument RESET is non-nil,
354 then delete CATEGORY from the category set instead of adding it. */)
355 (Lisp_Object character, Lisp_Object category, Lisp_Object table, Lisp_Object reset)
357 Lisp_Object set_value; /* Actual value to be set in category sets. */
358 Lisp_Object category_set;
359 int start, end;
360 int from, to;
362 if (INTEGERP (character))
364 CHECK_CHARACTER (character);
365 start = end = XFASTINT (character);
367 else
369 CHECK_CONS (character);
370 CHECK_CHARACTER_CAR (character);
371 CHECK_CHARACTER_CDR (character);
372 start = XFASTINT (XCAR (character));
373 end = XFASTINT (XCDR (character));
376 CHECK_CATEGORY (category);
377 table = check_category_table (table);
379 if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
380 error ("Undefined category: %c", XFASTINT (category));
382 set_value = NILP (reset) ? Qt : Qnil;
384 while (start <= end)
386 from = start, to = end;
387 category_set = char_table_ref_and_range (table, start, &from, &to);
388 if (CATEGORY_MEMBER (XFASTINT (category), category_set) != NILP (reset))
390 category_set = Fcopy_sequence (category_set);
391 SET_CATEGORY_SET (category_set, category, set_value);
392 category_set = hash_get_category_set (table, category_set);
393 char_table_set_range (table, start, to, category_set);
395 start = to + 1;
398 return Qnil;
401 /* Return 1 if there is a word boundary between two word-constituent
402 characters C1 and C2 if they appear in this order, else return 0.
403 Use the macro WORD_BOUNDARY_P instead of calling this function
404 directly. */
407 word_boundary_p (int c1, int c2)
409 Lisp_Object category_set1, category_set2;
410 Lisp_Object tail;
411 int default_result;
413 if (EQ (CHAR_TABLE_REF (Vchar_script_table, c1),
414 CHAR_TABLE_REF (Vchar_script_table, c2)))
416 tail = Vword_separating_categories;
417 default_result = 0;
419 else
421 tail = Vword_combining_categories;
422 default_result = 1;
425 category_set1 = CATEGORY_SET (c1);
426 if (NILP (category_set1))
427 return default_result;
428 category_set2 = CATEGORY_SET (c2);
429 if (NILP (category_set2))
430 return default_result;
432 for (; CONSP (tail); tail = XCDR (tail))
434 Lisp_Object elt = XCAR (tail);
436 if (CONSP (elt)
437 && (NILP (XCAR (elt))
438 || (CATEGORYP (XCAR (elt))
439 && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1)
440 && ! CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set2)))
441 && (NILP (XCDR (elt))
442 || (CATEGORYP (XCDR (elt))
443 && ! CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set1)
444 && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2))))
445 return !default_result;
447 return default_result;
451 void
452 init_category_once (void)
454 /* This has to be done here, before we call Fmake_char_table. */
455 Qcategory_table = intern_c_string ("category-table");
456 staticpro (&Qcategory_table);
458 /* Intern this now in case it isn't already done.
459 Setting this variable twice is harmless.
460 But don't staticpro it here--that is done in alloc.c. */
461 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
463 /* Now we are ready to set up this property, so we can
464 create category tables. */
465 Fput (Qcategory_table, Qchar_table_extra_slots, make_number (2));
467 Vstandard_category_table = Fmake_char_table (Qcategory_table, Qnil);
468 /* Set a category set which contains nothing to the default. */
469 XCHAR_TABLE (Vstandard_category_table)->defalt = MAKE_CATEGORY_SET;
470 Fset_char_table_extra_slot (Vstandard_category_table, make_number (0),
471 Fmake_vector (make_number (95), Qnil));
474 void
475 syms_of_category (void)
477 Qcategoryp = intern_c_string ("categoryp");
478 staticpro (&Qcategoryp);
479 Qcategorysetp = intern_c_string ("categorysetp");
480 staticpro (&Qcategorysetp);
481 Qcategory_table_p = intern_c_string ("category-table-p");
482 staticpro (&Qcategory_table_p);
484 DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories,
485 doc: /* List of pair (cons) of categories to determine word boundary.
487 Emacs treats a sequence of word constituent characters as a single
488 word (i.e. finds no word boundary between them) only if they belong to
489 the same script. But, exceptions are allowed in the following cases.
491 \(1) The case that characters are in different scripts is controlled
492 by the variable `word-combining-categories'.
494 Emacs finds no word boundary between characters of different scripts
495 if they have categories matching some element of this list.
497 More precisely, if an element of this list is a cons of category CAT1
498 and CAT2, and a multibyte character C1 which has CAT1 is followed by
499 C2 which has CAT2, there's no word boundary between C1 and C2.
501 For instance, to tell that Han characters followed by Hiragana
502 characters can form a single word, the element `(?C . ?H)' should be
503 in this list.
505 \(2) The case that character are in the same script is controlled by
506 the variable `word-separating-categories'.
508 Emacs finds a word boundary between characters of the same script
509 if they have categories matching some element of this list.
511 More precisely, if an element of this list is a cons of category CAT1
512 and CAT2, and a multibyte character C1 which has CAT1 but not CAT2 is
513 followed by C2 which has CAT2 but not CAT1, there's a word boundary
514 between C1 and C2.
516 For instance, to tell that there's a word boundary between Hiragana
517 and Katakana (both are in the same script `kana'),
518 the element `(?H . ?K) should be in this list. */);
520 Vword_combining_categories = Qnil;
522 DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories,
523 doc: /* List of pair (cons) of categories to determine word boundary.
524 See the documentation of the variable `word-combining-categories'. */);
526 Vword_separating_categories = Qnil;
528 defsubr (&Smake_category_set);
529 defsubr (&Sdefine_category);
530 defsubr (&Scategory_docstring);
531 defsubr (&Sget_unused_category);
532 defsubr (&Scategory_table_p);
533 defsubr (&Scategory_table);
534 defsubr (&Sstandard_category_table);
535 defsubr (&Scopy_category_table);
536 defsubr (&Smake_category_table);
537 defsubr (&Sset_category_table);
538 defsubr (&Schar_category_set);
539 defsubr (&Scategory_set_mnemonics);
540 defsubr (&Smodify_category_entry);
542 category_table_version = 0;
545 /* arch-tag: 74ebf524-121b-4d9c-bd68-07f8d708b211
546 (do not change this comment) */