; * doc/emacs/search.texi (Word Search): Update for changes in bug#27341.
[emacs.git] / src / category.c
blobb633f65532a506658b3ffc638532e1f6de21b6bc
1 /* GNU Emacs routines to deal with category tables.
3 Copyright (C) 1998, 2001-2017 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8 Copyright (C) 2003
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
12 This file is part of GNU Emacs.
14 GNU Emacs is free software: you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or (at
17 your option) any later version.
19 GNU Emacs is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
28 /* Here we handle three objects: category, category set, and category
29 table. Read comments in the file category.h to understand them. */
31 #include <config.h>
33 #include "lisp.h"
34 #include "character.h"
35 #include "buffer.h"
36 #include "category.h"
38 /* This setter is used only in this file, so it can be private. */
39 static void
40 bset_category_table (struct buffer *b, Lisp_Object val)
42 b->category_table_ = val;
45 /* The version number of the latest category table. Each category
46 table has a unique version number. It is assigned a new number
47 also when it is modified. When a regular expression is compiled
48 into the struct re_pattern_buffer, the version number of the
49 category table (of the current buffer) at that moment is also
50 embedded in the structure.
52 For the moment, we are not using this feature. */
53 static int category_table_version;
55 /* Category set staff. */
57 static Lisp_Object
58 hash_get_category_set (Lisp_Object table, Lisp_Object category_set)
60 struct Lisp_Hash_Table *h;
61 ptrdiff_t i;
62 EMACS_UINT hash;
64 if (NILP (XCHAR_TABLE (table)->extras[1]))
65 set_char_table_extras
66 (table, 1,
67 make_hash_table (hashtest_equal, DEFAULT_HASH_SIZE,
68 DEFAULT_REHASH_SIZE, DEFAULT_REHASH_THRESHOLD,
69 Qnil, false));
70 h = XHASH_TABLE (XCHAR_TABLE (table)->extras[1]);
71 i = hash_lookup (h, category_set, &hash);
72 if (i >= 0)
73 return HASH_KEY (h, i);
74 hash_put (h, category_set, Qnil, hash);
75 return category_set;
78 /* Make CATEGORY_SET include (if VAL) or exclude (if !VAL) CATEGORY. */
80 static void
81 set_category_set (Lisp_Object category_set, EMACS_INT category, bool val)
83 bool_vector_set (category_set, category, val);
86 DEFUN ("make-category-set", Fmake_category_set, Smake_category_set, 1, 1, 0,
87 doc: /* Return a newly created category-set which contains CATEGORIES.
88 CATEGORIES is a string of category mnemonics.
89 The value is a bool-vector which has t at the indices corresponding to
90 those categories. */)
91 (Lisp_Object categories)
93 Lisp_Object val;
94 ptrdiff_t len;
96 CHECK_STRING (categories);
97 val = MAKE_CATEGORY_SET;
99 if (STRING_MULTIBYTE (categories))
100 error ("Multibyte string in `make-category-set'");
102 len = SCHARS (categories);
103 while (--len >= 0)
105 unsigned char cat = SREF (categories, len);
106 Lisp_Object category = make_number (cat);
108 CHECK_CATEGORY (category);
109 set_category_set (val, cat, 1);
111 return val;
115 /* Category staff. */
117 static Lisp_Object check_category_table (Lisp_Object table);
119 DEFUN ("define-category", Fdefine_category, Sdefine_category, 2, 3, 0,
120 doc: /* Define CATEGORY as a category which is described by DOCSTRING.
121 CATEGORY should be an ASCII printing character in the range ` ' to `~'.
122 DOCSTRING is the documentation string of the category. The first line
123 should be a terse text (preferably less than 16 characters),
124 and the rest lines should be the full description.
125 The category is defined only in category table TABLE, which defaults to
126 the current buffer's category table. */)
127 (Lisp_Object category, Lisp_Object docstring, Lisp_Object table)
129 CHECK_CATEGORY (category);
130 CHECK_STRING (docstring);
131 table = check_category_table (table);
133 if (!NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
134 error ("Category `%c' is already defined", (int) XFASTINT (category));
135 if (!NILP (Vpurify_flag))
136 docstring = Fpurecopy (docstring);
137 SET_CATEGORY_DOCSTRING (table, XFASTINT (category), docstring);
139 return Qnil;
142 DEFUN ("category-docstring", Fcategory_docstring, Scategory_docstring, 1, 2, 0,
143 doc: /* Return the documentation string of CATEGORY, as defined in TABLE.
144 TABLE should be a category table and defaults to the current buffer's
145 category table. */)
146 (Lisp_Object category, Lisp_Object table)
148 CHECK_CATEGORY (category);
149 table = check_category_table (table);
151 return CATEGORY_DOCSTRING (table, XFASTINT (category));
154 DEFUN ("get-unused-category", Fget_unused_category, Sget_unused_category,
155 0, 1, 0,
156 doc: /* Return a category which is not yet defined in TABLE.
157 If no category remains available, return nil.
158 The optional argument TABLE specifies which category table to modify;
159 it defaults to the current buffer's category table. */)
160 (Lisp_Object table)
162 int i;
164 table = check_category_table (table);
166 for (i = ' '; i <= '~'; i++)
167 if (NILP (CATEGORY_DOCSTRING (table, i)))
168 return make_number (i);
170 return Qnil;
174 /* Category-table staff. */
176 DEFUN ("category-table-p", Fcategory_table_p, Scategory_table_p, 1, 1, 0,
177 doc: /* Return t if ARG is a category table. */)
178 (Lisp_Object arg)
180 if (CHAR_TABLE_P (arg)
181 && EQ (XCHAR_TABLE (arg)->purpose, Qcategory_table))
182 return Qt;
183 return Qnil;
186 /* If TABLE is nil, return the current category table. If TABLE is
187 not nil, check the validity of TABLE as a category table. If
188 valid, return TABLE itself, but if not valid, signal an error of
189 wrong-type-argument. */
191 static Lisp_Object
192 check_category_table (Lisp_Object table)
194 if (NILP (table))
195 return BVAR (current_buffer, category_table);
196 CHECK_TYPE (!NILP (Fcategory_table_p (table)), Qcategory_table_p, table);
197 return table;
200 DEFUN ("category-table", Fcategory_table, Scategory_table, 0, 0, 0,
201 doc: /* Return the current category table.
202 This is the one specified by the current buffer. */)
203 (void)
205 return BVAR (current_buffer, category_table);
208 DEFUN ("standard-category-table", Fstandard_category_table,
209 Sstandard_category_table, 0, 0, 0,
210 doc: /* Return the standard category table.
211 This is the one used for new buffers. */)
212 (void)
214 return Vstandard_category_table;
218 static void
219 copy_category_entry (Lisp_Object table, Lisp_Object c, Lisp_Object val)
221 val = Fcopy_sequence (val);
222 if (CONSP (c))
223 char_table_set_range (table, XINT (XCAR (c)), XINT (XCDR (c)), val);
224 else
225 char_table_set (table, XINT (c), val);
228 /* Return a copy of category table TABLE. We can't simply use the
229 function copy-sequence because no contents should be shared between
230 the original and the copy. This function is called recursively by
231 binding TABLE to a sub char table. */
233 static Lisp_Object
234 copy_category_table (Lisp_Object table)
236 table = copy_char_table (table);
238 if (! NILP (XCHAR_TABLE (table)->defalt))
239 set_char_table_defalt (table,
240 Fcopy_sequence (XCHAR_TABLE (table)->defalt));
241 set_char_table_extras
242 (table, 0, Fcopy_sequence (XCHAR_TABLE (table)->extras[0]));
243 map_char_table (copy_category_entry, Qnil, table, table);
245 return table;
248 DEFUN ("copy-category-table", Fcopy_category_table, Scopy_category_table,
249 0, 1, 0,
250 doc: /* Construct a new category table and return it.
251 It is a copy of the TABLE, which defaults to the standard category table. */)
252 (Lisp_Object table)
254 if (!NILP (table))
255 check_category_table (table);
256 else
257 table = Vstandard_category_table;
259 return copy_category_table (table);
262 DEFUN ("make-category-table", Fmake_category_table, Smake_category_table,
263 0, 0, 0,
264 doc: /* Construct a new and empty category table and return it. */)
265 (void)
267 Lisp_Object val;
268 int i;
270 val = Fmake_char_table (Qcategory_table, Qnil);
271 set_char_table_defalt (val, MAKE_CATEGORY_SET);
272 for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++)
273 set_char_table_contents (val, i, MAKE_CATEGORY_SET);
274 Fset_char_table_extra_slot (val, make_number (0),
275 Fmake_vector (make_number (95), Qnil));
276 return val;
279 DEFUN ("set-category-table", Fset_category_table, Sset_category_table, 1, 1, 0,
280 doc: /* Specify TABLE as the category table for the current buffer.
281 Return TABLE. */)
282 (Lisp_Object table)
284 int idx;
285 table = check_category_table (table);
286 bset_category_table (current_buffer, table);
287 /* Indicate that this buffer now has a specified category table. */
288 idx = PER_BUFFER_VAR_IDX (category_table);
289 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
290 return table;
294 Lisp_Object
295 char_category_set (int c)
297 return CHAR_TABLE_REF (BVAR (current_buffer, category_table), c);
300 DEFUN ("char-category-set", Fchar_category_set, Schar_category_set, 1, 1, 0,
301 doc: /* Return the category set of CHAR.
302 usage: (char-category-set CHAR) */)
303 (Lisp_Object ch)
305 CHECK_CHARACTER (ch);
306 return CATEGORY_SET (XFASTINT (ch));
309 DEFUN ("category-set-mnemonics", Fcategory_set_mnemonics,
310 Scategory_set_mnemonics, 1, 1, 0,
311 doc: /* Return a string containing mnemonics of the categories in CATEGORY-SET.
312 CATEGORY-SET is a bool-vector, and the categories \"in\" it are those
313 that are indexes where t occurs in the bool-vector.
314 The return value is a string containing those same categories. */)
315 (Lisp_Object category_set)
317 int i, j;
318 char str[96];
320 CHECK_CATEGORY_SET (category_set);
322 j = 0;
323 for (i = 32; i < 127; i++)
324 if (CATEGORY_MEMBER (i, category_set))
325 str[j++] = i;
326 str[j] = '\0';
328 return build_string (str);
331 DEFUN ("modify-category-entry", Fmodify_category_entry,
332 Smodify_category_entry, 2, 4, 0,
333 doc: /* Modify the category set of CHARACTER by adding CATEGORY to it.
334 The category is changed only for table TABLE, which defaults to
335 the current buffer's category table.
336 CHARACTER can be either a single character or a cons representing the
337 lower and upper ends of an inclusive character range to modify.
338 CATEGORY must be a category name (a character between ` ' and `~').
339 Use `describe-categories' to see existing category names.
340 If optional fourth argument RESET is non-nil,
341 then delete CATEGORY from the category set instead of adding it. */)
342 (Lisp_Object character, Lisp_Object category, Lisp_Object table, Lisp_Object reset)
344 bool set_value; /* Actual value to be set in category sets. */
345 Lisp_Object category_set;
346 int start, end;
347 int from, to;
349 if (INTEGERP (character))
351 CHECK_CHARACTER (character);
352 start = end = XFASTINT (character);
354 else
356 CHECK_CONS (character);
357 CHECK_CHARACTER_CAR (character);
358 CHECK_CHARACTER_CDR (character);
359 start = XFASTINT (XCAR (character));
360 end = XFASTINT (XCDR (character));
363 CHECK_CATEGORY (category);
364 table = check_category_table (table);
366 if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
367 error ("Undefined category: %c", (int) XFASTINT (category));
369 set_value = NILP (reset);
371 while (start <= end)
373 from = start, to = end;
374 category_set = char_table_ref_and_range (table, start, &from, &to);
375 if (CATEGORY_MEMBER (XFASTINT (category), category_set) != NILP (reset))
377 category_set = Fcopy_sequence (category_set);
378 set_category_set (category_set, XFASTINT (category), set_value);
379 category_set = hash_get_category_set (table, category_set);
380 char_table_set_range (table, start, to, category_set);
382 start = to + 1;
385 return Qnil;
388 /* Return true if there is a word boundary between two word-constituent
389 characters C1 and C2 if they appear in this order.
390 Use the macro WORD_BOUNDARY_P instead of calling this function
391 directly. */
393 bool
394 word_boundary_p (int c1, int c2)
396 Lisp_Object category_set1, category_set2;
397 Lisp_Object tail;
398 bool default_result;
400 if (EQ (CHAR_TABLE_REF (Vchar_script_table, c1),
401 CHAR_TABLE_REF (Vchar_script_table, c2)))
403 tail = Vword_separating_categories;
404 default_result = 0;
406 else
408 tail = Vword_combining_categories;
409 default_result = 1;
412 category_set1 = CATEGORY_SET (c1);
413 if (NILP (category_set1))
414 return default_result;
415 category_set2 = CATEGORY_SET (c2);
416 if (NILP (category_set2))
417 return default_result;
419 for (; CONSP (tail); tail = XCDR (tail))
421 Lisp_Object elt = XCAR (tail);
423 if (CONSP (elt)
424 && (NILP (XCAR (elt))
425 || (CATEGORYP (XCAR (elt))
426 && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1)
427 && ! CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set2)))
428 && (NILP (XCDR (elt))
429 || (CATEGORYP (XCDR (elt))
430 && ! CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set1)
431 && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2))))
432 return !default_result;
434 return default_result;
438 void
439 init_category_once (void)
441 /* This has to be done here, before we call Fmake_char_table. */
442 DEFSYM (Qcategory_table, "category-table");
443 Fput (Qcategory_table, Qchar_table_extra_slots, make_number (2));
445 Vstandard_category_table = Fmake_char_table (Qcategory_table, Qnil);
446 /* Set a category set which contains nothing to the default. */
447 set_char_table_defalt (Vstandard_category_table, MAKE_CATEGORY_SET);
448 Fset_char_table_extra_slot (Vstandard_category_table, make_number (0),
449 Fmake_vector (make_number (95), Qnil));
452 void
453 syms_of_category (void)
455 DEFSYM (Qcategoryp, "categoryp");
456 DEFSYM (Qcategorysetp, "categorysetp");
457 DEFSYM (Qcategory_table_p, "category-table-p");
459 DEFVAR_LISP ("word-combining-categories", Vword_combining_categories,
460 doc: /* List of pair (cons) of categories to determine word boundary.
462 Emacs treats a sequence of word constituent characters as a single
463 word (i.e. finds no word boundary between them) only if they belong to
464 the same script. But, exceptions are allowed in the following cases.
466 \(1) The case that characters are in different scripts is controlled
467 by the variable `word-combining-categories'.
469 Emacs finds no word boundary between characters of different scripts
470 if they have categories matching some element of this list.
472 More precisely, if an element of this list is a cons of category CAT1
473 and CAT2, and a multibyte character C1 which has CAT1 is followed by
474 C2 which has CAT2, there's no word boundary between C1 and C2.
476 For instance, to tell that Han characters followed by Hiragana
477 characters can form a single word, the element `(?C . ?H)' should be
478 in this list.
480 \(2) The case that character are in the same script is controlled by
481 the variable `word-separating-categories'.
483 Emacs finds a word boundary between characters of the same script
484 if they have categories matching some element of this list.
486 More precisely, if an element of this list is a cons of category CAT1
487 and CAT2, and a multibyte character C1 which has CAT1 but not CAT2 is
488 followed by C2 which has CAT2 but not CAT1, there's a word boundary
489 between C1 and C2.
491 For instance, to tell that there's a word boundary between Hiragana
492 and Katakana (both are in the same script `kana'),
493 the element `(?H . ?K)' should be in this list. */);
495 Vword_combining_categories = Qnil;
497 DEFVAR_LISP ("word-separating-categories", Vword_separating_categories,
498 doc: /* List of pair (cons) of categories to determine word boundary.
499 See the documentation of the variable `word-combining-categories'. */);
501 Vword_separating_categories = Qnil;
503 defsubr (&Smake_category_set);
504 defsubr (&Sdefine_category);
505 defsubr (&Scategory_docstring);
506 defsubr (&Sget_unused_category);
507 defsubr (&Scategory_table_p);
508 defsubr (&Scategory_table);
509 defsubr (&Sstandard_category_table);
510 defsubr (&Scopy_category_table);
511 defsubr (&Smake_category_table);
512 defsubr (&Sset_category_table);
513 defsubr (&Schar_category_set);
514 defsubr (&Scategory_set_mnemonics);
515 defsubr (&Smodify_category_entry);
517 category_table_version = 0;