(displayed-month, displayed-year): Move declarations where needed.
[emacs.git] / src / character.c
blob457d57f9251ce53320b5193daf7f0919cd869c5f
1 /* Basic character support.
2 Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 Free Software Foundation, Inc.
6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H13PRO009
10 This file is part of GNU Emacs.
12 GNU Emacs is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 3, or (at your option)
15 any later version.
17 GNU Emacs is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with GNU Emacs; see the file COPYING. If not, write to the
24 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 Boston, MA 02110-1301, USA. */
27 /* At first, see the document in `character.h' to understand the code
28 in this file. */
30 #ifdef emacs
31 #include <config.h>
32 #endif
34 #include <stdio.h>
36 #ifdef emacs
38 #include <sys/types.h>
39 #include "lisp.h"
40 #include "character.h"
41 #include "buffer.h"
42 #include "charset.h"
43 #include "composite.h"
44 #include "disptab.h"
46 #else /* not emacs */
48 #include "mulelib.h"
50 #endif /* emacs */
52 Lisp_Object Qcharacterp;
54 /* Vector of translation table ever defined.
55 ID of a translation table is used to index this vector. */
56 Lisp_Object Vtranslation_table_vector;
58 /* A char-table for characters which may invoke auto-filling. */
59 Lisp_Object Vauto_fill_chars;
61 Lisp_Object Qauto_fill_chars;
63 /* Char-table of information about which character to unify to which
64 Unicode character. */
65 Lisp_Object Vchar_unify_table;
67 /* A char-table. An element is non-nil iff the corresponding
68 character has a printable glyph. */
69 Lisp_Object Vprintable_chars;
71 /* A char-table. An elemnent is a column-width of the corresponding
72 character. */
73 Lisp_Object Vchar_width_table;
75 /* A char-table. An element is a symbol indicating the direction
76 property of corresponding character. */
77 Lisp_Object Vchar_direction_table;
79 /* Variable used locally in the macro FETCH_MULTIBYTE_CHAR. */
80 unsigned char *_fetch_multibyte_char_p;
82 /* Char table of scripts. */
83 Lisp_Object Vchar_script_table;
85 /* Alist of scripts vs representative characters. */
86 Lisp_Object Vscript_representative_chars;
88 static Lisp_Object Qchar_script_table;
90 /* Mapping table from unibyte chars to multibyte chars. */
91 int unibyte_to_multibyte_table[256];
93 /* Nth element is 1 iff unibyte char N can be mapped to a multibyte
94 char. */
95 char unibyte_has_multibyte_table[256];
99 /* If character code C has modifier masks, reflect them to the
100 character code if possible. Return the resulting code. */
103 char_resolve_modifier_mask (c)
104 int c;
106 /* A non-ASCII character can't reflect modifier bits to the code. */
107 if (! ASCII_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
108 return c;
110 /* For Meta, Shift, and Control modifiers, we need special care. */
111 if (c & CHAR_SHIFT)
113 /* Shift modifier is valid only with [A-Za-z]. */
114 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
115 c &= ~CHAR_SHIFT;
116 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
117 c = (c & ~CHAR_SHIFT) - ('a' - 'A');
118 /* Shift modifier for control characters and SPC is ignored. */
119 else if ((c & ~CHAR_MODIFIER_MASK) <= 0x20)
120 c &= ~CHAR_SHIFT;
122 if (c & CHAR_CTL)
124 /* Simulate the code in lread.c. */
125 /* Allow `\C- ' and `\C-?'. */
126 if ((c & 0377) == ' ')
127 c &= ~0177 & ~ CHAR_CTL;
128 else if ((c & 0377) == '?')
129 c = 0177 | (c & ~0177 & ~CHAR_CTL);
130 /* ASCII control chars are made from letters (both cases),
131 as well as the non-letters within 0100...0137. */
132 else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
133 c &= (037 | (~0177 & ~CHAR_CTL));
134 else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
135 c &= (037 | (~0177 & ~CHAR_CTL));
137 if (c & CHAR_META)
139 /* Move the meta bit to the right place for a string. */
140 c = (c & ~CHAR_META) | 0x80;
143 return c;
147 /* Store multibyte form of character C at P. If C has modifier bits,
148 handle them appropriately. */
151 char_string (c, p)
152 unsigned c;
153 unsigned char *p;
155 int bytes;
157 if (c & CHAR_MODIFIER_MASK)
159 c = (unsigned) char_resolve_modifier_mask ((int) c);
160 /* If C still has any modifier bits, just ignore it. */
161 c &= ~CHAR_MODIFIER_MASK;
164 MAYBE_UNIFY_CHAR (c);
166 if (c <= MAX_3_BYTE_CHAR)
168 bytes = CHAR_STRING (c, p);
170 else if (c <= MAX_4_BYTE_CHAR)
172 p[0] = (0xF0 | (c >> 18));
173 p[1] = (0x80 | ((c >> 12) & 0x3F));
174 p[2] = (0x80 | ((c >> 6) & 0x3F));
175 p[3] = (0x80 | (c & 0x3F));
176 bytes = 4;
178 else if (c <= MAX_5_BYTE_CHAR)
180 p[0] = 0xF8;
181 p[1] = (0x80 | ((c >> 18) & 0x0F));
182 p[2] = (0x80 | ((c >> 12) & 0x3F));
183 p[3] = (0x80 | ((c >> 6) & 0x3F));
184 p[4] = (0x80 | (c & 0x3F));
185 bytes = 5;
187 else if (c <= MAX_CHAR)
189 c = CHAR_TO_BYTE8 (c);
190 bytes = BYTE8_STRING (c, p);
192 else
193 error ("Invalid character: %d", c);
195 return bytes;
199 /* Return a character whose multibyte form is at P. Set LEN is not
200 NULL, it must be a pointer to integer. In that case, set *LEN to
201 the byte length of the multibyte form. If ADVANCED is not NULL, is
202 must be a pointer to unsigned char. In that case, set *ADVANCED to
203 the ending address (i.e. the starting address of the next
204 character) of the multibyte form. */
207 string_char (p, advanced, len)
208 const unsigned char *p;
209 const unsigned char **advanced;
210 int *len;
212 int c;
213 const unsigned char *saved_p = p;
215 if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
217 c = STRING_CHAR_ADVANCE (p);
219 else if (! (*p & 0x08))
221 c = ((((p)[0] & 0xF) << 18)
222 | (((p)[1] & 0x3F) << 12)
223 | (((p)[2] & 0x3F) << 6)
224 | ((p)[3] & 0x3F));
225 p += 4;
227 else
229 c = ((((p)[1] & 0x3F) << 18)
230 | (((p)[2] & 0x3F) << 12)
231 | (((p)[3] & 0x3F) << 6)
232 | ((p)[4] & 0x3F));
233 p += 5;
236 MAYBE_UNIFY_CHAR (c);
238 if (len)
239 *len = p - saved_p;
240 if (advanced)
241 *advanced = p;
242 return c;
246 /* Translate character C by translation table TABLE. If C is
247 negative, translate a character specified by CHARSET and CODE. If
248 no translation is found in TABLE, return the untranslated
249 character. If TABLE is a list, elements are char tables. In this
250 case, translace C by all tables. */
253 translate_char (table, c)
254 Lisp_Object table;
255 int c;
257 if (CHAR_TABLE_P (table))
259 Lisp_Object ch;
261 ch = CHAR_TABLE_REF (table, c);
262 if (CHARACTERP (ch))
263 c = XINT (ch);
265 else
267 for (; CONSP (table); table = XCDR (table))
268 c = translate_char (XCAR (table), c);
270 return c;
273 /* Convert the multibyte character C to unibyte 8-bit character based
274 on the current value of charset_unibyte. If dimension of
275 charset_unibyte is more than one, return (C & 0xFF).
277 The argument REV_TBL is now ignored. It will be removed in the
278 future. */
281 multibyte_char_to_unibyte (c, rev_tbl)
282 int c;
283 Lisp_Object rev_tbl;
285 struct charset *charset;
286 unsigned c1;
288 if (CHAR_BYTE8_P (c))
289 return CHAR_TO_BYTE8 (c);
290 charset = CHARSET_FROM_ID (charset_unibyte);
291 c1 = ENCODE_CHAR (charset, c);
292 return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
295 /* Like multibyte_char_to_unibyte, but return -1 if C is not supported
296 by charset_unibyte. */
299 multibyte_char_to_unibyte_safe (c)
300 int c;
302 struct charset *charset;
303 unsigned c1;
305 if (CHAR_BYTE8_P (c))
306 return CHAR_TO_BYTE8 (c);
307 charset = CHARSET_FROM_ID (charset_unibyte);
308 c1 = ENCODE_CHAR (charset, c);
309 return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : -1);
312 DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
313 doc: /* Return non-nil if OBJECT is a character. */)
314 (object, ignore)
315 Lisp_Object object, ignore;
317 return (CHARACTERP (object) ? Qt : Qnil);
320 DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0,
321 doc: /* Return the character of the maximum code. */)
324 return make_number (MAX_CHAR);
327 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
328 Sunibyte_char_to_multibyte, 1, 1, 0,
329 doc: /* Convert the unibyte character CH to multibyte character.
330 The multibyte character is a result of decoding CH by
331 the current unibyte charset (see `unibyte-charset'). */)
332 (ch)
333 Lisp_Object ch;
335 int c;
336 struct charset *charset;
338 CHECK_CHARACTER (ch);
339 c = XFASTINT (ch);
340 if (c >= 0400)
341 error ("Invalid unibyte character: %d", c);
342 charset = CHARSET_FROM_ID (charset_unibyte);
343 c = DECODE_CHAR (charset, c);
344 if (c < 0)
345 c = BYTE8_TO_CHAR (XFASTINT (ch));
346 return make_number (c);
349 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
350 Smultibyte_char_to_unibyte, 1, 1, 0,
351 doc: /* Convert the multibyte character CH to unibyte character.\n\
352 The unibyte character is a result of encoding CH by
353 the current primary charset (value of `charset-primary'). */)
354 (ch)
355 Lisp_Object ch;
357 int c;
359 CHECK_CHARACTER (ch);
360 c = XFASTINT (ch);
361 c = CHAR_TO_BYTE8 (c);
362 return make_number (c);
365 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
366 doc: /* Return 1 regardless of the argument CHAR.
367 This is now an obsolete function. We keep it just for backward compatibility. */)
368 (ch)
369 Lisp_Object ch;
371 CHECK_CHARACTER (ch);
372 return make_number (1);
375 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
376 doc: /* Return width of CHAR when displayed in the current buffer.
377 The width is measured by how many columns it occupies on the screen.
378 Tab is taken to occupy `tab-width' columns. */)
379 (ch)
380 Lisp_Object ch;
382 Lisp_Object disp;
383 int c, width;
384 struct Lisp_Char_Table *dp = buffer_display_table ();
386 CHECK_CHARACTER (ch);
387 c = XINT (ch);
389 /* Get the way the display table would display it. */
390 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
392 if (VECTORP (disp))
393 width = ASIZE (disp);
394 else
395 width = CHAR_WIDTH (c);
397 return make_number (width);
400 /* Return width of string STR of length LEN when displayed in the
401 current buffer. The width is measured by how many columns it
402 occupies on the screen. If PRECISION > 0, return the width of
403 longest substring that doesn't exceed PRECISION, and set number of
404 characters and bytes of the substring in *NCHARS and *NBYTES
405 respectively. */
408 c_string_width (str, len, precision, nchars, nbytes)
409 const unsigned char *str;
410 int precision, *nchars, *nbytes;
412 int i = 0, i_byte = 0;
413 int width = 0;
414 struct Lisp_Char_Table *dp = buffer_display_table ();
416 while (i_byte < len)
418 int bytes, thiswidth;
419 Lisp_Object val;
420 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
422 if (dp)
424 val = DISP_CHAR_VECTOR (dp, c);
425 if (VECTORP (val))
426 thiswidth = XVECTOR (val)->size;
427 else
428 thiswidth = CHAR_WIDTH (c);
430 else
432 thiswidth = CHAR_WIDTH (c);
435 if (precision > 0
436 && (width + thiswidth > precision))
438 *nchars = i;
439 *nbytes = i_byte;
440 return width;
442 i++;
443 i_byte += bytes;
444 width += thiswidth;
447 if (precision > 0)
449 *nchars = i;
450 *nbytes = i_byte;
453 return width;
456 /* Return width of string STR of length LEN when displayed in the
457 current buffer. The width is measured by how many columns it
458 occupies on the screen. */
461 strwidth (str, len)
462 unsigned char *str;
463 int len;
465 return c_string_width (str, len, -1, NULL, NULL);
468 /* Return width of Lisp string STRING when displayed in the current
469 buffer. The width is measured by how many columns it occupies on
470 the screen while paying attention to compositions. If PRECISION >
471 0, return the width of longest substring that doesn't exceed
472 PRECISION, and set number of characters and bytes of the substring
473 in *NCHARS and *NBYTES respectively. */
476 lisp_string_width (string, precision, nchars, nbytes)
477 Lisp_Object string;
478 int precision, *nchars, *nbytes;
480 int len = SCHARS (string);
481 /* This set multibyte to 0 even if STRING is multibyte when it
482 contains only ascii and eight-bit-graphic, but that's
483 intentional. */
484 int multibyte = len < SBYTES (string);
485 unsigned char *str = SDATA (string);
486 int i = 0, i_byte = 0;
487 int width = 0;
488 struct Lisp_Char_Table *dp = buffer_display_table ();
490 while (i < len)
492 int chars, bytes, thiswidth;
493 Lisp_Object val;
494 int cmp_id;
495 EMACS_INT ignore, end;
497 if (find_composition (i, -1, &ignore, &end, &val, string)
498 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
499 >= 0))
501 thiswidth = composition_table[cmp_id]->width;
502 chars = end - i;
503 bytes = string_char_to_byte (string, end) - i_byte;
505 else
507 int c;
509 if (multibyte)
510 c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
511 else
512 c = str[i_byte], bytes = 1;
513 chars = 1;
514 if (dp)
516 val = DISP_CHAR_VECTOR (dp, c);
517 if (VECTORP (val))
518 thiswidth = XVECTOR (val)->size;
519 else
520 thiswidth = CHAR_WIDTH (c);
522 else
524 thiswidth = CHAR_WIDTH (c);
528 if (precision > 0
529 && (width + thiswidth > precision))
531 *nchars = i;
532 *nbytes = i_byte;
533 return width;
535 i += chars;
536 i_byte += bytes;
537 width += thiswidth;
540 if (precision > 0)
542 *nchars = i;
543 *nbytes = i_byte;
546 return width;
549 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
550 doc: /* Return width of STRING when displayed in the current buffer.
551 Width is measured by how many columns it occupies on the screen.
552 When calculating width of a multibyte character in STRING,
553 only the base leading-code is considered; the validity of
554 the following bytes is not checked. Tabs in STRING are always
555 taken to occupy `tab-width' columns. */)
556 (str)
557 Lisp_Object str;
559 Lisp_Object val;
561 CHECK_STRING (str);
562 XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
563 return val;
566 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
567 doc: /* Return the direction of CHAR.
568 The returned value is 0 for left-to-right and 1 for right-to-left. */)
569 (ch)
570 Lisp_Object ch;
572 int c;
574 CHECK_CHARACTER (ch);
575 c = XINT (ch);
576 return CHAR_TABLE_REF (Vchar_direction_table, c);
579 /* Return the number of characters in the NBYTES bytes at PTR.
580 This works by looking at the contents and checking for multibyte
581 sequences while assuming that there's no invalid sequence.
582 However, if the current buffer has enable-multibyte-characters =
583 nil, we treat each byte as a character. */
585 EMACS_INT
586 chars_in_text (ptr, nbytes)
587 const unsigned char *ptr;
588 EMACS_INT nbytes;
590 /* current_buffer is null at early stages of Emacs initialization. */
591 if (current_buffer == 0
592 || NILP (current_buffer->enable_multibyte_characters))
593 return nbytes;
595 return multibyte_chars_in_text (ptr, nbytes);
598 /* Return the number of characters in the NBYTES bytes at PTR.
599 This works by looking at the contents and checking for multibyte
600 sequences while assuming that there's no invalid sequence. It
601 ignores enable-multibyte-characters. */
603 EMACS_INT
604 multibyte_chars_in_text (ptr, nbytes)
605 const unsigned char *ptr;
606 EMACS_INT nbytes;
608 const unsigned char *endp = ptr + nbytes;
609 int chars = 0;
611 while (ptr < endp)
613 int len = MULTIBYTE_LENGTH (ptr, endp);
615 if (len == 0)
616 abort ();
617 ptr += len;
618 chars++;
621 return chars;
624 /* Parse unibyte text at STR of LEN bytes as a multibyte text, count
625 characters and bytes in it, and store them in *NCHARS and *NBYTES
626 respectively. On counting bytes, pay attention to that 8-bit
627 characters not constructing a valid multibyte sequence are
628 represented by 2-byte in a multibyte text. */
630 void
631 parse_str_as_multibyte (str, len, nchars, nbytes)
632 const unsigned char *str;
633 int len, *nchars, *nbytes;
635 const unsigned char *endp = str + len;
636 int n, chars = 0, bytes = 0;
638 if (len >= MAX_MULTIBYTE_LENGTH)
640 const unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
641 while (str < adjusted_endp)
643 if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
644 str += n, bytes += n;
645 else
646 str++, bytes += 2;
647 chars++;
650 while (str < endp)
652 if ((n = MULTIBYTE_LENGTH (str, endp)) > 0)
653 str += n, bytes += n;
654 else
655 str++, bytes += 2;
656 chars++;
659 *nchars = chars;
660 *nbytes = bytes;
661 return;
664 /* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
665 It actually converts only such 8-bit characters that don't contruct
666 a multibyte sequence to multibyte forms of Latin-1 characters. If
667 NCHARS is nonzero, set *NCHARS to the number of characters in the
668 text. It is assured that we can use LEN bytes at STR as a work
669 area and that is enough. Return the number of bytes of the
670 resulting text. */
673 str_as_multibyte (str, len, nbytes, nchars)
674 unsigned char *str;
675 int len, nbytes, *nchars;
677 unsigned char *p = str, *endp = str + nbytes;
678 unsigned char *to;
679 int chars = 0;
680 int n;
682 if (nbytes >= MAX_MULTIBYTE_LENGTH)
684 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
685 while (p < adjusted_endp
686 && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
687 p += n, chars++;
689 while ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
690 p += n, chars++;
691 if (nchars)
692 *nchars = chars;
693 if (p == endp)
694 return nbytes;
696 to = p;
697 nbytes = endp - p;
698 endp = str + len;
699 safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes);
700 p = endp - nbytes;
702 if (nbytes >= MAX_MULTIBYTE_LENGTH)
704 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
705 while (p < adjusted_endp)
707 if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
709 while (n--)
710 *to++ = *p++;
712 else
714 int c = *p++;
715 c = BYTE8_TO_CHAR (c);
716 to += CHAR_STRING (c, to);
719 chars++;
721 while (p < endp)
723 if ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
725 while (n--)
726 *to++ = *p++;
728 else
730 int c = *p++;
731 c = BYTE8_TO_CHAR (c);
732 to += CHAR_STRING (c, to);
734 chars++;
736 if (nchars)
737 *nchars = chars;
738 return (to - str);
741 /* Parse unibyte string at STR of LEN bytes, and return the number of
742 bytes it may ocupy when converted to multibyte string by
743 `str_to_multibyte'. */
746 parse_str_to_multibyte (str, len)
747 unsigned char *str;
748 int len;
750 unsigned char *endp = str + len;
751 int bytes;
753 for (bytes = 0; str < endp; str++)
754 bytes += (*str < 0x80) ? 1 : 2;
755 return bytes;
759 /* Convert unibyte text at STR of NBYTES bytes to a multibyte text
760 that contains the same single-byte characters. It actually
761 converts all 8-bit characters to multibyte forms. It is assured
762 that we can use LEN bytes at STR as a work area and that is
763 enough. */
766 str_to_multibyte (str, len, bytes)
767 unsigned char *str;
768 int len, bytes;
770 unsigned char *p = str, *endp = str + bytes;
771 unsigned char *to;
773 while (p < endp && *p < 0x80) p++;
774 if (p == endp)
775 return bytes;
776 to = p;
777 bytes = endp - p;
778 endp = str + len;
779 safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
780 p = endp - bytes;
781 while (p < endp)
783 int c = *p++;
785 if (c >= 0x80)
786 c = BYTE8_TO_CHAR (c);
787 to += CHAR_STRING (c, to);
789 return (to - str);
792 /* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
793 actually converts characters in the range 0x80..0xFF to
794 unibyte. */
797 str_as_unibyte (str, bytes)
798 unsigned char *str;
799 int bytes;
801 const unsigned char *p = str, *endp = str + bytes;
802 unsigned char *to;
803 int c, len;
805 while (p < endp)
807 c = *p;
808 len = BYTES_BY_CHAR_HEAD (c);
809 if (CHAR_BYTE8_HEAD_P (c))
810 break;
811 p += len;
813 to = str + (p - str);
814 while (p < endp)
816 c = *p;
817 len = BYTES_BY_CHAR_HEAD (c);
818 if (CHAR_BYTE8_HEAD_P (c))
820 c = STRING_CHAR_ADVANCE (p);
821 *to++ = CHAR_TO_BYTE8 (c);
823 else
825 while (len--) *to++ = *p++;
828 return (to - str);
832 string_count_byte8 (string)
833 Lisp_Object string;
835 int multibyte = STRING_MULTIBYTE (string);
836 int nbytes = SBYTES (string);
837 unsigned char *p = SDATA (string);
838 unsigned char *pend = p + nbytes;
839 int count = 0;
840 int c, len;
842 if (multibyte)
843 while (p < pend)
845 c = *p;
846 len = BYTES_BY_CHAR_HEAD (c);
848 if (CHAR_BYTE8_HEAD_P (c))
849 count++;
850 p += len;
852 else
853 while (p < pend)
855 if (*p++ >= 0x80)
856 count++;
858 return count;
862 Lisp_Object
863 string_escape_byte8 (string)
864 Lisp_Object string;
866 int nchars = SCHARS (string);
867 int nbytes = SBYTES (string);
868 int multibyte = STRING_MULTIBYTE (string);
869 int byte8_count;
870 const unsigned char *src, *src_end;
871 unsigned char *dst;
872 Lisp_Object val;
873 int c, len;
875 if (multibyte && nchars == nbytes)
876 return string;
878 byte8_count = string_count_byte8 (string);
880 if (byte8_count == 0)
881 return string;
883 if (multibyte)
884 /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */
885 val = make_uninit_multibyte_string (nchars + byte8_count * 3,
886 nbytes + byte8_count * 2);
887 else
888 /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */
889 val = make_uninit_string (nbytes + byte8_count * 3);
891 src = SDATA (string);
892 src_end = src + nbytes;
893 dst = SDATA (val);
894 if (multibyte)
895 while (src < src_end)
897 c = *src;
898 len = BYTES_BY_CHAR_HEAD (c);
900 if (CHAR_BYTE8_HEAD_P (c))
902 c = STRING_CHAR_ADVANCE (src);
903 c = CHAR_TO_BYTE8 (c);
904 sprintf ((char *) dst, "\\%03o", c);
905 dst += 4;
907 else
908 while (len--) *dst++ = *src++;
910 else
911 while (src < src_end)
913 c = *src++;
914 if (c >= 0x80)
916 sprintf ((char *) dst, "\\%03o", c);
917 dst += 4;
919 else
920 *dst++ = c;
922 return val;
926 DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
927 doc: /*
928 Concatenate all the argument characters and make the result a string.
929 usage: (string &rest CHARACTERS) */)
930 (n, args)
931 int n;
932 Lisp_Object *args;
934 int i;
935 unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
936 unsigned char *p = buf;
937 int c;
939 for (i = 0; i < n; i++)
941 CHECK_CHARACTER (args[i]);
942 c = XINT (args[i]);
943 p += CHAR_STRING (c, p);
946 return make_string_from_bytes ((char *) buf, n, p - buf);
949 DEFUN ("unibyte-string", Funibyte_string, Sunibyte_string, 0, MANY, 0,
950 doc: /* Concatenate all the argument bytes and make the result a unibyte string.
951 usage: (unibyte-string &rest BYTES) */)
952 (n, args)
953 int n;
954 Lisp_Object *args;
956 int i;
957 unsigned char *buf = (unsigned char *) alloca (n);
958 unsigned char *p = buf;
959 unsigned c;
961 for (i = 0; i < n; i++)
963 CHECK_NATNUM (args[i]);
964 c = XFASTINT (args[i]);
965 if (c >= 256)
966 args_out_of_range_3 (args[i], make_number (0), make_number (255));
967 *p++ = c;
970 return make_string_from_bytes ((char *) buf, n, p - buf);
973 DEFUN ("char-resolve-modifers", Fchar_resolve_modifiers,
974 Schar_resolve_modifiers, 1, 1, 0,
975 doc: /* Resolve modifiers in the character CHAR.
976 The value is a character with modifiers resolved into the character
977 code. Unresolved modifiers are kept in the value.
978 usage: (char-resolve-modifers CHAR) */)
979 (character)
980 Lisp_Object character;
982 int c;
984 CHECK_NUMBER (character);
985 c = XINT (character);
986 return make_number (char_resolve_modifier_mask (c));
989 void
990 init_character_once ()
994 #ifdef emacs
996 void
997 syms_of_character ()
999 DEFSYM (Qcharacterp, "characterp");
1000 DEFSYM (Qauto_fill_chars, "auto-fill-chars");
1002 staticpro (&Vchar_unify_table);
1003 Vchar_unify_table = Qnil;
1005 defsubr (&Smax_char);
1006 defsubr (&Scharacterp);
1007 defsubr (&Sunibyte_char_to_multibyte);
1008 defsubr (&Smultibyte_char_to_unibyte);
1009 defsubr (&Schar_bytes);
1010 defsubr (&Schar_width);
1011 defsubr (&Sstring_width);
1012 defsubr (&Schar_direction);
1013 defsubr (&Sstring);
1014 defsubr (&Sunibyte_string);
1015 defsubr (&Schar_resolve_modifiers);
1017 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
1018 doc: /*
1019 Vector recording all translation tables ever defined.
1020 Each element is a pair (SYMBOL . TABLE) relating the table to the
1021 symbol naming it. The ID of a translation table is an index into this vector. */);
1022 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1024 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1025 doc: /*
1026 A char-table for characters which invoke auto-filling.
1027 Such characters have value t in this table. */);
1028 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1029 CHAR_TABLE_SET (Vauto_fill_chars, ' ', Qt);
1030 CHAR_TABLE_SET (Vauto_fill_chars, '\n', Qt);
1032 DEFVAR_LISP ("char-width-table", &Vchar_width_table,
1033 doc: /*
1034 A char-table for width (columns) of each character. */);
1035 Vchar_width_table = Fmake_char_table (Qnil, make_number (1));
1036 char_table_set_range (Vchar_width_table, 0x80, 0x9F, make_number (4));
1037 char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR,
1038 make_number (4));
1040 DEFVAR_LISP ("char-direction-table", &Vchar_direction_table,
1041 doc: /* A char-table for direction of each character. */);
1042 Vchar_direction_table = Fmake_char_table (Qnil, make_number (1));
1044 DEFVAR_LISP ("printable-chars", &Vprintable_chars,
1045 doc: /* A char-table for each printable character. */);
1046 Vprintable_chars = Fmake_char_table (Qnil, Qnil);
1047 Fset_char_table_range (Vprintable_chars,
1048 Fcons (make_number (32), make_number (126)), Qt);
1049 Fset_char_table_range (Vprintable_chars,
1050 Fcons (make_number (160),
1051 make_number (MAX_5_BYTE_CHAR)), Qt);
1053 DEFVAR_LISP ("char-script-table", &Vchar_script_table,
1054 doc: /* Char table of script symbols.
1055 It has one extra slot whose value is a list of script symbols. */);
1057 /* Intern this now in case it isn't already done.
1058 Setting this variable twice is harmless.
1059 But don't staticpro it here--that is done in alloc.c. */
1060 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1061 DEFSYM (Qchar_script_table, "char-script-table");
1062 Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1));
1063 Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil);
1065 DEFVAR_LISP ("script-representative-chars", &Vscript_representative_chars,
1066 doc: /* Alist of scripts vs the representative characters. */);
1067 Vscript_representative_chars = Qnil;
1070 #endif /* emacs */
1072 /* arch-tag: b6665960-3c3d-4184-85cd-af4318197999
1073 (do not change this comment) */