* mark.texi (Mark): Clarify introduction.
[emacs.git] / src / category.c
blob84af413120fbab13d026e111ad51313bff66451d
1 /* GNU Emacs routines to deal with category tables.
2 Copyright (C) 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8 Copyright (C) 2003
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
12 This file is part of GNU Emacs.
14 GNU Emacs is free software: you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or
17 (at your option) any later version.
19 GNU Emacs is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
28 /* Here we handle three objects: category, category set, and category
29 table. Read comments in the file category.h to understand them. */
31 #include <config.h>
32 #include <ctype.h>
33 #include "lisp.h"
34 #include "buffer.h"
35 #include "character.h"
36 #include "charset.h"
37 #include "category.h"
38 #include "keymap.h"
40 /* The version number of the latest category table. Each category
41 table has a unique version number. It is assigned a new number
42 also when it is modified. When a regular expression is compiled
43 into the struct re_pattern_buffer, the version number of the
44 category table (of the current buffer) at that moment is also
45 embedded in the structure.
47 For the moment, we are not using this feature. */
48 static int category_table_version;
50 Lisp_Object Qcategory_table, Qcategoryp, Qcategorysetp, Qcategory_table_p;
52 /* Variables to determine word boundary. */
53 Lisp_Object Vword_combining_categories, Vword_separating_categories;
55 /* Temporary internal variable used in macro CHAR_HAS_CATEGORY. */
56 Lisp_Object _temp_category_set;
59 /* Category set staff. */
61 static Lisp_Object hash_get_category_set P_ ((Lisp_Object, Lisp_Object));
63 static Lisp_Object
64 hash_get_category_set (table, category_set)
65 Lisp_Object table, category_set;
67 Lisp_Object val;
68 struct Lisp_Hash_Table *h;
69 int i;
70 unsigned hash;
72 if (NILP (XCHAR_TABLE (table)->extras[1]))
73 XCHAR_TABLE (table)->extras[1]
74 = make_hash_table (Qequal, make_number (DEFAULT_HASH_SIZE),
75 make_float (DEFAULT_REHASH_SIZE),
76 make_float (DEFAULT_REHASH_THRESHOLD),
77 Qnil, Qnil, Qnil);
78 h = XHASH_TABLE (XCHAR_TABLE (table)->extras[1]);
79 i = hash_lookup (h, category_set, &hash);
80 if (i >= 0)
81 return HASH_KEY (h, i);
82 hash_put (h, category_set, Qnil, hash);
83 return category_set;
87 DEFUN ("make-category-set", Fmake_category_set, Smake_category_set, 1, 1, 0,
88 doc: /* Return a newly created category-set which contains CATEGORIES.
89 CATEGORIES is a string of category mnemonics.
90 The value is a bool-vector which has t at the indices corresponding to
91 those categories. */)
92 (categories)
93 Lisp_Object categories;
95 Lisp_Object val;
96 int len;
98 CHECK_STRING (categories);
99 val = MAKE_CATEGORY_SET;
101 if (STRING_MULTIBYTE (categories))
102 error ("Multibyte string in `make-category-set'");
104 len = SCHARS (categories);
105 while (--len >= 0)
107 Lisp_Object category;
109 XSETFASTINT (category, SREF (categories, len));
110 CHECK_CATEGORY (category);
111 SET_CATEGORY_SET (val, category, Qt);
113 return val;
117 /* Category staff. */
119 Lisp_Object check_category_table ();
121 DEFUN ("define-category", Fdefine_category, Sdefine_category, 2, 3, 0,
122 doc: /* Define CATEGORY as a category which is described by DOCSTRING.
123 CATEGORY should be an ASCII printing character in the range ` ' to `~'.
124 DOCSTRING is the documentation string of the category. The first line
125 should be a terse text (preferably less than 16 characters),
126 and the rest lines should be the full description.
127 The category is defined only in category table TABLE, which defaults to
128 the current buffer's category table. */)
129 (category, docstring, table)
130 Lisp_Object category, docstring, table;
132 CHECK_CATEGORY (category);
133 CHECK_STRING (docstring);
134 table = check_category_table (table);
136 if (!NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
137 error ("Category `%c' is already defined", XFASTINT (category));
138 CATEGORY_DOCSTRING (table, XFASTINT (category)) = docstring;
140 return Qnil;
143 DEFUN ("category-docstring", Fcategory_docstring, Scategory_docstring, 1, 2, 0,
144 doc: /* Return the documentation string of CATEGORY, as defined in TABLE.
145 TABLE should be a category table and defaults to the current buffer's
146 category table. */)
147 (category, table)
148 Lisp_Object category, table;
150 CHECK_CATEGORY (category);
151 table = check_category_table (table);
153 return CATEGORY_DOCSTRING (table, XFASTINT (category));
156 DEFUN ("get-unused-category", Fget_unused_category, Sget_unused_category,
157 0, 1, 0,
158 doc: /* Return a category which is not yet defined in TABLE.
159 If no category remains available, return nil.
160 The optional argument TABLE specifies which category table to modify;
161 it defaults to the current buffer's category table. */)
162 (table)
163 Lisp_Object table;
165 int i;
167 table = check_category_table (table);
169 for (i = ' '; i <= '~'; i++)
170 if (NILP (CATEGORY_DOCSTRING (table, i)))
171 return make_number (i);
173 return Qnil;
177 /* Category-table staff. */
179 DEFUN ("category-table-p", Fcategory_table_p, Scategory_table_p, 1, 1, 0,
180 doc: /* Return t if ARG is a category table. */)
181 (arg)
182 Lisp_Object arg;
184 if (CHAR_TABLE_P (arg)
185 && EQ (XCHAR_TABLE (arg)->purpose, Qcategory_table))
186 return Qt;
187 return Qnil;
190 /* If TABLE is nil, return the current category table. If TABLE is
191 not nil, check the validity of TABLE as a category table. If
192 valid, return TABLE itself, but if not valid, signal an error of
193 wrong-type-argument. */
195 Lisp_Object
196 check_category_table (table)
197 Lisp_Object table;
199 if (NILP (table))
200 return current_buffer->category_table;
201 CHECK_TYPE (!NILP (Fcategory_table_p (table)), Qcategory_table_p, table);
202 return table;
205 DEFUN ("category-table", Fcategory_table, Scategory_table, 0, 0, 0,
206 doc: /* Return the current category table.
207 This is the one specified by the current buffer. */)
210 return current_buffer->category_table;
213 DEFUN ("standard-category-table", Fstandard_category_table,
214 Sstandard_category_table, 0, 0, 0,
215 doc: /* Return the standard category table.
216 This is the one used for new buffers. */)
219 return Vstandard_category_table;
223 static void
224 copy_category_entry (table, c, val)
225 Lisp_Object table, c, val;
227 val = Fcopy_sequence (val);
228 if (CONSP (c))
229 char_table_set_range (table, XINT (XCAR (c)), XINT (XCDR (c)), val);
230 else
231 char_table_set (table, XINT (c), val);
234 /* Return a copy of category table TABLE. We can't simply use the
235 function copy-sequence because no contents should be shared between
236 the original and the copy. This function is called recursively by
237 binding TABLE to a sub char table. */
239 Lisp_Object
240 copy_category_table (table)
241 Lisp_Object table;
243 table = copy_char_table (table);
245 if (! NILP (XCHAR_TABLE (table)->defalt))
246 XCHAR_TABLE (table)->defalt
247 = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
248 XCHAR_TABLE (table)->extras[0]
249 = Fcopy_sequence (XCHAR_TABLE (table)->extras[0]);
250 map_char_table (copy_category_entry, Qnil, table, table);
252 return table;
255 DEFUN ("copy-category-table", Fcopy_category_table, Scopy_category_table,
256 0, 1, 0,
257 doc: /* Construct a new category table and return it.
258 It is a copy of the TABLE, which defaults to the standard category table. */)
259 (table)
260 Lisp_Object table;
262 if (!NILP (table))
263 check_category_table (table);
264 else
265 table = Vstandard_category_table;
267 return copy_category_table (table);
270 DEFUN ("make-category-table", Fmake_category_table, Smake_category_table,
271 0, 0, 0,
272 doc: /* Construct a new and empty category table and return it. */)
275 Lisp_Object val;
276 int i;
278 val = Fmake_char_table (Qcategory_table, Qnil);
279 XCHAR_TABLE (val)->defalt = MAKE_CATEGORY_SET;
280 for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++)
281 XCHAR_TABLE (val)->contents[i] = MAKE_CATEGORY_SET;
282 Fset_char_table_extra_slot (val, make_number (0),
283 Fmake_vector (make_number (95), Qnil));
284 return val;
287 DEFUN ("set-category-table", Fset_category_table, Sset_category_table, 1, 1, 0,
288 doc: /* Specify TABLE as the category table for the current buffer.
289 Return TABLE. */)
290 (table)
291 Lisp_Object table;
293 int idx;
294 table = check_category_table (table);
295 current_buffer->category_table = table;
296 /* Indicate that this buffer now has a specified category table. */
297 idx = PER_BUFFER_VAR_IDX (category_table);
298 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
299 return table;
303 Lisp_Object
304 char_category_set (c)
305 int c;
307 return CHAR_TABLE_REF (current_buffer->category_table, c);
310 DEFUN ("char-category-set", Fchar_category_set, Schar_category_set, 1, 1, 0,
311 doc: /* Return the category set of CHAR.
312 usage: (char-category-set CHAR) */)
313 (ch)
314 Lisp_Object ch;
316 CHECK_NUMBER (ch);
317 return CATEGORY_SET (XFASTINT (ch));
320 DEFUN ("category-set-mnemonics", Fcategory_set_mnemonics,
321 Scategory_set_mnemonics, 1, 1, 0,
322 doc: /* Return a string containing mnemonics of the categories in CATEGORY-SET.
323 CATEGORY-SET is a bool-vector, and the categories \"in\" it are those
324 that are indexes where t occurs in the bool-vector.
325 The return value is a string containing those same categories. */)
326 (category_set)
327 Lisp_Object category_set;
329 int i, j;
330 char str[96];
332 CHECK_CATEGORY_SET (category_set);
334 j = 0;
335 for (i = 32; i < 127; i++)
336 if (CATEGORY_MEMBER (i, category_set))
337 str[j++] = i;
338 str[j] = '\0';
340 return build_string (str);
343 void
344 set_category_set (category_set, category, val)
345 Lisp_Object category_set, category, val;
347 do {
348 int idx = XINT (category) / 8;
349 unsigned char bits = 1 << (XINT (category) % 8);
351 if (NILP (val))
352 XCATEGORY_SET (category_set)->data[idx] &= ~bits;
353 else
354 XCATEGORY_SET (category_set)->data[idx] |= bits;
355 } while (0);
358 DEFUN ("modify-category-entry", Fmodify_category_entry,
359 Smodify_category_entry, 2, 4, 0,
360 doc: /* Modify the category set of CHARACTER by adding CATEGORY to it.
361 The category is changed only for table TABLE, which defaults to
362 the current buffer's category table.
363 CHARACTER can be either a single character or a cons representing the
364 lower and upper ends of an inclusive character range to modify.
365 If optional fourth argument RESET is non-nil,
366 then delete CATEGORY from the category set instead of adding it. */)
367 (character, category, table, reset)
368 Lisp_Object character, category, table, reset;
370 Lisp_Object set_value; /* Actual value to be set in category sets. */
371 Lisp_Object category_set;
372 int start, end;
373 int from, to;
375 if (INTEGERP (character))
377 CHECK_CHARACTER (character);
378 start = end = XFASTINT (character);
380 else
382 CHECK_CONS (character);
383 CHECK_CHARACTER_CAR (character);
384 CHECK_CHARACTER_CDR (character);
385 start = XFASTINT (XCAR (character));
386 end = XFASTINT (XCDR (character));
389 CHECK_CATEGORY (category);
390 table = check_category_table (table);
392 if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
393 error ("Undefined category: %c", XFASTINT (category));
395 set_value = NILP (reset) ? Qt : Qnil;
397 while (start <= end)
399 from = start, to = end;
400 category_set = char_table_ref_and_range (table, start, &from, &to);
401 if (CATEGORY_MEMBER (XFASTINT (category), category_set) != NILP (reset))
403 category_set = Fcopy_sequence (category_set);
404 SET_CATEGORY_SET (category_set, category, set_value);
405 category_set = hash_get_category_set (table, category_set);
406 char_table_set_range (table, start, to, category_set);
408 start = to + 1;
411 return Qnil;
414 /* Return 1 if there is a word boundary between two word-constituent
415 characters C1 and C2 if they appear in this order, else return 0.
416 Use the macro WORD_BOUNDARY_P instead of calling this function
417 directly. */
420 word_boundary_p (c1, c2)
421 int c1, c2;
423 Lisp_Object category_set1, category_set2;
424 Lisp_Object tail;
425 int default_result;
427 if (EQ (CHAR_TABLE_REF (Vchar_script_table, c1),
428 CHAR_TABLE_REF (Vchar_script_table, c2)))
430 tail = Vword_separating_categories;
431 default_result = 0;
433 else
435 tail = Vword_combining_categories;
436 default_result = 1;
439 category_set1 = CATEGORY_SET (c1);
440 if (NILP (category_set1))
441 return default_result;
442 category_set2 = CATEGORY_SET (c2);
443 if (NILP (category_set2))
444 return default_result;
446 for (; CONSP (tail); tail = XCDR (tail))
448 Lisp_Object elt = XCAR (tail);
450 if (CONSP (elt)
451 && (NILP (XCAR (elt))
452 || (CATEGORYP (XCAR (elt))
453 && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1)))
454 && (NILP (XCDR (elt))
455 || (CATEGORYP (XCDR (elt))
456 && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2))))
457 return !default_result;
459 return default_result;
463 void
464 init_category_once ()
466 /* This has to be done here, before we call Fmake_char_table. */
467 Qcategory_table = intern ("category-table");
468 staticpro (&Qcategory_table);
470 /* Intern this now in case it isn't already done.
471 Setting this variable twice is harmless.
472 But don't staticpro it here--that is done in alloc.c. */
473 Qchar_table_extra_slots = intern ("char-table-extra-slots");
475 /* Now we are ready to set up this property, so we can
476 create category tables. */
477 Fput (Qcategory_table, Qchar_table_extra_slots, make_number (2));
479 Vstandard_category_table = Fmake_char_table (Qcategory_table, Qnil);
480 /* Set a category set which contains nothing to the default. */
481 XCHAR_TABLE (Vstandard_category_table)->defalt = MAKE_CATEGORY_SET;
482 Fset_char_table_extra_slot (Vstandard_category_table, make_number (0),
483 Fmake_vector (make_number (95), Qnil));
486 void
487 syms_of_category ()
489 Qcategoryp = intern ("categoryp");
490 staticpro (&Qcategoryp);
491 Qcategorysetp = intern ("categorysetp");
492 staticpro (&Qcategorysetp);
493 Qcategory_table_p = intern ("category-table-p");
494 staticpro (&Qcategory_table_p);
496 DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories,
497 doc: /* List of pair (cons) of categories to determine word boundary.
499 Emacs treats a sequence of word constituent characters as a single
500 word (i.e. finds no word boundary between them) only if they belong to
501 the same script. But, exceptions are allowed in the following cases.
503 \(1) The case that characters are in different scripts is controlled
504 by the variable `word-combining-categories'.
506 Emacs finds no word boundary between characters of different scripts
507 if they have categories matching some element of this list.
509 More precisely, if an element of this list is a cons of category CAT1
510 and CAT2, and a multibyte character C1 which has CAT1 is followed by
511 C2 which has CAT2, there's no word boundary between C1 and C2.
513 For instance, to tell that Han characters followed by Hiragana
514 characters can form a single word, the element `(?C . ?H)' should be
515 in this list.
517 \(2) The case that character are in the same script is controlled by
518 the variable `word-separating-categories'.
520 Emacs finds a word boundary between characters of the same script
521 if they have categories matching some element of this list.
523 More precisely, if an element of this list is a cons of category CAT1
524 and CAT2, and a multibyte character C1 which has CAT1 is followed by
525 C2 which has CAT2, there's a word boundary between C1 and C2.
527 For instance, to tell that there's a word boundary between Hiragana
528 and Katakana (both are in the same script `kana'),
529 the element `(?H . ?K) should be in this list. */);
531 Vword_combining_categories = Qnil;
533 DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories,
534 doc: /* List of pair (cons) of categories to determine word boundary.
535 See the documentation of the variable `word-combining-categories'. */);
537 Vword_separating_categories = Qnil;
539 defsubr (&Smake_category_set);
540 defsubr (&Sdefine_category);
541 defsubr (&Scategory_docstring);
542 defsubr (&Sget_unused_category);
543 defsubr (&Scategory_table_p);
544 defsubr (&Scategory_table);
545 defsubr (&Sstandard_category_table);
546 defsubr (&Scopy_category_table);
547 defsubr (&Smake_category_table);
548 defsubr (&Sset_category_table);
549 defsubr (&Schar_category_set);
550 defsubr (&Scategory_set_mnemonics);
551 defsubr (&Smodify_category_entry);
553 category_table_version = 0;
556 /* arch-tag: 74ebf524-121b-4d9c-bd68-07f8d708b211
557 (do not change this comment) */