1 /* Basic character support.
3 Copyright (C) 2001-2013 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
5 Licensed to the Free Software Foundation.
6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H13PRO009
10 This file is part of GNU Emacs.
12 GNU Emacs is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
17 GNU Emacs is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
25 /* At first, see the document in `character.h' to understand the code
32 #define CHARACTER_INLINE EXTERN_INLINE
38 #include <sys/types.h>
41 #include "character.h"
44 #include "composite.h"
53 Lisp_Object Qcharacterp
;
55 static Lisp_Object Qauto_fill_chars
;
57 /* Char-table of information about which character to unify to which
58 Unicode character. Mainly used by the macro MAYBE_UNIFY_CHAR. */
59 Lisp_Object Vchar_unify_table
;
61 static Lisp_Object Qchar_script_table
;
65 /* If character code C has modifier masks, reflect them to the
66 character code if possible. Return the resulting code. */
69 char_resolve_modifier_mask (EMACS_INT c
)
71 /* A non-ASCII character can't reflect modifier bits to the code. */
72 if (! ASCII_CHAR_P ((c
& ~CHAR_MODIFIER_MASK
)))
75 /* For Meta, Shift, and Control modifiers, we need special care. */
78 /* Shift modifier is valid only with [A-Za-z]. */
79 if ((c
& 0377) >= 'A' && (c
& 0377) <= 'Z')
81 else if ((c
& 0377) >= 'a' && (c
& 0377) <= 'z')
82 c
= (c
& ~CHAR_SHIFT
) - ('a' - 'A');
83 /* Shift modifier for control characters and SPC is ignored. */
84 else if ((c
& ~CHAR_MODIFIER_MASK
) <= 0x20)
89 /* Simulate the code in lread.c. */
90 /* Allow `\C- ' and `\C-?'. */
91 if ((c
& 0377) == ' ')
92 c
&= ~0177 & ~ CHAR_CTL
;
93 else if ((c
& 0377) == '?')
94 c
= 0177 | (c
& ~0177 & ~CHAR_CTL
);
95 /* ASCII control chars are made from letters (both cases),
96 as well as the non-letters within 0100...0137. */
97 else if ((c
& 0137) >= 0101 && (c
& 0137) <= 0132)
98 c
&= (037 | (~0177 & ~CHAR_CTL
));
99 else if ((c
& 0177) >= 0100 && (c
& 0177) <= 0137)
100 c
&= (037 | (~0177 & ~CHAR_CTL
));
102 #if 0 /* This is outside the scope of this function. (bug#4751) */
105 /* Move the meta bit to the right place for a string. */
106 c
= (c
& ~CHAR_META
) | 0x80;
114 /* Store multibyte form of character C at P. If C has modifier bits,
115 handle them appropriately. */
118 char_string (unsigned int c
, unsigned char *p
)
122 if (c
& CHAR_MODIFIER_MASK
)
124 c
= char_resolve_modifier_mask (c
);
125 /* If C still has any modifier bits, just ignore it. */
126 c
&= ~CHAR_MODIFIER_MASK
;
129 if (c
<= MAX_3_BYTE_CHAR
)
131 bytes
= CHAR_STRING (c
, p
);
133 else if (c
<= MAX_4_BYTE_CHAR
)
135 p
[0] = (0xF0 | (c
>> 18));
136 p
[1] = (0x80 | ((c
>> 12) & 0x3F));
137 p
[2] = (0x80 | ((c
>> 6) & 0x3F));
138 p
[3] = (0x80 | (c
& 0x3F));
141 else if (c
<= MAX_5_BYTE_CHAR
)
144 p
[1] = (0x80 | ((c
>> 18) & 0x0F));
145 p
[2] = (0x80 | ((c
>> 12) & 0x3F));
146 p
[3] = (0x80 | ((c
>> 6) & 0x3F));
147 p
[4] = (0x80 | (c
& 0x3F));
150 else if (c
<= MAX_CHAR
)
152 c
= CHAR_TO_BYTE8 (c
);
153 bytes
= BYTE8_STRING (c
, p
);
156 error ("Invalid character: %x", c
);
162 /* Return a character whose multibyte form is at P. If LEN is not
163 NULL, it must be a pointer to integer. In that case, set *LEN to
164 the byte length of the multibyte form. If ADVANCED is not NULL, it
165 must be a pointer to unsigned char. In that case, set *ADVANCED to
166 the ending address (i.e., the starting address of the next
167 character) of the multibyte form. */
170 string_char (const unsigned char *p
, const unsigned char **advanced
, int *len
)
173 const unsigned char *saved_p
= p
;
175 if (*p
< 0x80 || ! (*p
& 0x20) || ! (*p
& 0x10))
177 /* 1-, 2-, and 3-byte sequences can be handled by the macro. */
178 c
= STRING_CHAR_ADVANCE (p
);
180 else if (! (*p
& 0x08))
182 /* A 4-byte sequence of this form:
183 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
184 c
= ((((p
)[0] & 0x7) << 18)
185 | (((p
)[1] & 0x3F) << 12)
186 | (((p
)[2] & 0x3F) << 6)
192 /* A 5-byte sequence of this form:
194 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
196 Note that the top 4 `x's are always 0, so shifting p[1] can
197 never exceed the maximum valid character codepoint. */
198 c
= (/* (((p)[0] & 0x3) << 24) ... always 0, so no need to shift. */
199 (((p
)[1] & 0x3F) << 18)
200 | (((p
)[2] & 0x3F) << 12)
201 | (((p
)[3] & 0x3F) << 6)
214 /* Translate character C by translation table TABLE. If no translation is
215 found in TABLE, return the untranslated character. If TABLE is a list,
216 elements are char tables. In that case, recursively translate C by all the
217 tables in the list. */
220 translate_char (Lisp_Object table
, int c
)
222 if (CHAR_TABLE_P (table
))
226 ch
= CHAR_TABLE_REF (table
, c
);
232 for (; CONSP (table
); table
= XCDR (table
))
233 c
= translate_char (XCAR (table
), c
);
238 /* Convert ASCII or 8-bit character C to unibyte. If C is none of
239 them, return (C & 0xFF). */
242 multibyte_char_to_unibyte (int c
)
246 if (CHAR_BYTE8_P (c
))
247 return CHAR_TO_BYTE8 (c
);
251 /* Like multibyte_char_to_unibyte, but return -1 if C is not supported
252 by charset_unibyte. */
255 multibyte_char_to_unibyte_safe (int c
)
259 if (CHAR_BYTE8_P (c
))
260 return CHAR_TO_BYTE8 (c
);
264 DEFUN ("characterp", Fcharacterp
, Scharacterp
, 1, 2, 0,
265 doc
: /* Return non-nil if OBJECT is a character.
266 In Emacs Lisp, characters are represented by character codes, which
267 are non-negative integers. The function `max-char' returns the
268 maximum character code.
269 usage: (characterp OBJECT) */)
270 (Lisp_Object object
, Lisp_Object ignore
)
272 return (CHARACTERP (object
) ? Qt
: Qnil
);
275 DEFUN ("max-char", Fmax_char
, Smax_char
, 0, 0, 0,
276 doc
: /* Return the character of the maximum code. */)
279 return make_number (MAX_CHAR
);
282 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte
,
283 Sunibyte_char_to_multibyte
, 1, 1, 0,
284 doc
: /* Convert the byte CH to multibyte character. */)
289 CHECK_CHARACTER (ch
);
292 error ("Not a unibyte character: %d", c
);
293 MAKE_CHAR_MULTIBYTE (c
);
294 return make_number (c
);
297 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte
,
298 Smultibyte_char_to_unibyte
, 1, 1, 0,
299 doc
: /* Convert the multibyte character CH to a byte.
300 If the multibyte character does not represent a byte, return -1. */)
305 CHECK_CHARACTER (ch
);
308 /* Can't distinguish a byte read from a unibyte buffer from
309 a latin1 char, so let's let it slide. */
313 int cu
= CHAR_TO_BYTE_SAFE (cm
);
314 return make_number (cu
);
319 /* Return width (columns) of C considering the buffer display table DP. */
322 char_width (int c
, struct Lisp_Char_Table
*dp
)
324 ptrdiff_t width
= CHAR_WIDTH (c
);
328 Lisp_Object disp
= DISP_CHAR_VECTOR (dp
, c
), ch
;
332 for (i
= 0, width
= 0; i
< ASIZE (disp
); i
++)
337 int w
= CHAR_WIDTH (XFASTINT (ch
));
338 if (INT_ADD_OVERFLOW (width
, w
))
348 DEFUN ("char-width", Fchar_width
, Schar_width
, 1, 1, 0,
349 doc
: /* Return width of CHAR when displayed in the current buffer.
350 The width is measured by how many columns it occupies on the screen.
351 Tab is taken to occupy `tab-width' columns.
352 usage: (char-width CHAR) */)
358 CHECK_CHARACTER (ch
);
360 width
= char_width (c
, buffer_display_table ());
361 return make_number (width
);
364 /* Return width of string STR of length LEN when displayed in the
365 current buffer. The width is measured by how many columns it
366 occupies on the screen. If PRECISION > 0, return the width of
367 longest substring that doesn't exceed PRECISION, and set number of
368 characters and bytes of the substring in *NCHARS and *NBYTES
372 c_string_width (const unsigned char *str
, ptrdiff_t len
, int precision
,
373 ptrdiff_t *nchars
, ptrdiff_t *nbytes
)
375 ptrdiff_t i
= 0, i_byte
= 0;
377 struct Lisp_Char_Table
*dp
= buffer_display_table ();
382 int c
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, bytes
);
383 ptrdiff_t thiswidth
= char_width (c
, dp
);
387 if (INT_ADD_OVERFLOW (width
, thiswidth
))
390 else if (precision
- width
< thiswidth
)
410 /* Return width of string STR of length LEN when displayed in the
411 current buffer. The width is measured by how many columns it
412 occupies on the screen. */
415 strwidth (const char *str
, ptrdiff_t len
)
417 return c_string_width ((const unsigned char *) str
, len
, -1, NULL
, NULL
);
420 /* Return width of Lisp string STRING when displayed in the current
421 buffer. The width is measured by how many columns it occupies on
422 the screen while paying attention to compositions. If PRECISION >
423 0, return the width of longest substring that doesn't exceed
424 PRECISION, and set number of characters and bytes of the substring
425 in *NCHARS and *NBYTES respectively. */
428 lisp_string_width (Lisp_Object string
, ptrdiff_t precision
,
429 ptrdiff_t *nchars
, ptrdiff_t *nbytes
)
431 ptrdiff_t len
= SCHARS (string
);
432 /* This set multibyte to 0 even if STRING is multibyte when it
433 contains only ascii and eight-bit-graphic, but that's
435 bool multibyte
= len
< SBYTES (string
);
436 unsigned char *str
= SDATA (string
);
437 ptrdiff_t i
= 0, i_byte
= 0;
439 struct Lisp_Char_Table
*dp
= buffer_display_table ();
443 ptrdiff_t chars
, bytes
, thiswidth
;
446 ptrdiff_t ignore
, end
;
448 if (find_composition (i
, -1, &ignore
, &end
, &val
, string
)
449 && ((cmp_id
= get_composition_id (i
, i_byte
, end
- i
, val
, string
))
452 thiswidth
= composition_table
[cmp_id
]->width
;
454 bytes
= string_char_to_byte (string
, end
) - i_byte
;
463 c
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, cbytes
);
467 c
= str
[i_byte
], bytes
= 1;
469 thiswidth
= char_width (c
, dp
);
475 if (INT_ADD_OVERFLOW (width
, thiswidth
))
479 else if (precision
- width
< thiswidth
)
499 DEFUN ("string-width", Fstring_width
, Sstring_width
, 1, 1, 0,
500 doc
: /* Return width of STRING when displayed in the current buffer.
501 Width is measured by how many columns it occupies on the screen.
502 When calculating width of a multibyte character in STRING,
503 only the base leading-code is considered; the validity of
504 the following bytes is not checked. Tabs in STRING are always
505 taken to occupy `tab-width' columns.
506 usage: (string-width STRING) */)
512 XSETFASTINT (val
, lisp_string_width (str
, -1, NULL
, NULL
));
516 /* Return the number of characters in the NBYTES bytes at PTR.
517 This works by looking at the contents and checking for multibyte
518 sequences while assuming that there's no invalid sequence.
519 However, if the current buffer has enable-multibyte-characters =
520 nil, we treat each byte as a character. */
523 chars_in_text (const unsigned char *ptr
, ptrdiff_t nbytes
)
525 /* current_buffer is null at early stages of Emacs initialization. */
526 if (current_buffer
== 0
527 || NILP (BVAR (current_buffer
, enable_multibyte_characters
)))
530 return multibyte_chars_in_text (ptr
, nbytes
);
533 /* Return the number of characters in the NBYTES bytes at PTR.
534 This works by looking at the contents and checking for multibyte
535 sequences while assuming that there's no invalid sequence. It
536 ignores enable-multibyte-characters. */
539 multibyte_chars_in_text (const unsigned char *ptr
, ptrdiff_t nbytes
)
541 const unsigned char *endp
= ptr
+ nbytes
;
546 int len
= MULTIBYTE_LENGTH (ptr
, endp
);
557 /* Parse unibyte text at STR of LEN bytes as a multibyte text, count
558 characters and bytes in it, and store them in *NCHARS and *NBYTES
559 respectively. On counting bytes, pay attention to that 8-bit
560 characters not constructing a valid multibyte sequence are
561 represented by 2-byte in a multibyte text. */
564 parse_str_as_multibyte (const unsigned char *str
, ptrdiff_t len
,
565 ptrdiff_t *nchars
, ptrdiff_t *nbytes
)
567 const unsigned char *endp
= str
+ len
;
569 ptrdiff_t chars
= 0, bytes
= 0;
571 if (len
>= MAX_MULTIBYTE_LENGTH
)
573 const unsigned char *adjusted_endp
= endp
- MAX_MULTIBYTE_LENGTH
;
574 while (str
< adjusted_endp
)
576 if (! CHAR_BYTE8_HEAD_P (*str
)
577 && (n
= MULTIBYTE_LENGTH_NO_CHECK (str
)) > 0)
578 str
+= n
, bytes
+= n
;
586 if (! CHAR_BYTE8_HEAD_P (*str
)
587 && (n
= MULTIBYTE_LENGTH (str
, endp
)) > 0)
588 str
+= n
, bytes
+= n
;
599 /* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
600 It actually converts only such 8-bit characters that don't construct
601 a multibyte sequence to multibyte forms of Latin-1 characters. If
602 NCHARS is nonzero, set *NCHARS to the number of characters in the
603 text. It is assured that we can use LEN bytes at STR as a work
604 area and that is enough. Return the number of bytes of the
608 str_as_multibyte (unsigned char *str
, ptrdiff_t len
, ptrdiff_t nbytes
,
611 unsigned char *p
= str
, *endp
= str
+ nbytes
;
616 if (nbytes
>= MAX_MULTIBYTE_LENGTH
)
618 unsigned char *adjusted_endp
= endp
- MAX_MULTIBYTE_LENGTH
;
619 while (p
< adjusted_endp
620 && ! CHAR_BYTE8_HEAD_P (*p
)
621 && (n
= MULTIBYTE_LENGTH_NO_CHECK (p
)) > 0)
625 && ! CHAR_BYTE8_HEAD_P (*p
)
626 && (n
= MULTIBYTE_LENGTH (p
, endp
)) > 0)
636 memmove (endp
- nbytes
, p
, nbytes
);
639 if (nbytes
>= MAX_MULTIBYTE_LENGTH
)
641 unsigned char *adjusted_endp
= endp
- MAX_MULTIBYTE_LENGTH
;
642 while (p
< adjusted_endp
)
644 if (! CHAR_BYTE8_HEAD_P (*p
)
645 && (n
= MULTIBYTE_LENGTH_NO_CHECK (p
)) > 0)
653 c
= BYTE8_TO_CHAR (c
);
654 to
+= CHAR_STRING (c
, to
);
661 if (! CHAR_BYTE8_HEAD_P (*p
)
662 && (n
= MULTIBYTE_LENGTH (p
, endp
)) > 0)
670 c
= BYTE8_TO_CHAR (c
);
671 to
+= CHAR_STRING (c
, to
);
680 /* Parse unibyte string at STR of LEN bytes, and return the number of
681 bytes it may occupy when converted to multibyte string by
682 `str_to_multibyte'. */
685 count_size_as_multibyte (const unsigned char *str
, ptrdiff_t len
)
687 const unsigned char *endp
= str
+ len
;
690 for (bytes
= 0; str
< endp
; str
++)
692 int n
= *str
< 0x80 ? 1 : 2;
693 if (INT_ADD_OVERFLOW (bytes
, n
))
701 /* Convert unibyte text at STR of BYTES bytes to a multibyte text
702 that contains the same single-byte characters. It actually
703 converts all 8-bit characters to multibyte forms. It is assured
704 that we can use LEN bytes at STR as a work area and that is
708 str_to_multibyte (unsigned char *str
, ptrdiff_t len
, ptrdiff_t bytes
)
710 unsigned char *p
= str
, *endp
= str
+ bytes
;
713 while (p
< endp
&& *p
< 0x80) p
++;
719 memmove (endp
- bytes
, p
, bytes
);
726 c
= BYTE8_TO_CHAR (c
);
727 to
+= CHAR_STRING (c
, to
);
732 /* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
733 actually converts characters in the range 0x80..0xFF to
737 str_as_unibyte (unsigned char *str
, ptrdiff_t bytes
)
739 const unsigned char *p
= str
, *endp
= str
+ bytes
;
746 len
= BYTES_BY_CHAR_HEAD (c
);
747 if (CHAR_BYTE8_HEAD_P (c
))
751 to
= str
+ (p
- str
);
755 len
= BYTES_BY_CHAR_HEAD (c
);
756 if (CHAR_BYTE8_HEAD_P (c
))
758 c
= STRING_CHAR_ADVANCE (p
);
759 *to
++ = CHAR_TO_BYTE8 (c
);
763 while (len
--) *to
++ = *p
++;
769 /* Convert eight-bit chars in SRC (in multibyte form) to the
770 corresponding byte and store in DST. CHARS is the number of
771 characters in SRC. The value is the number of bytes stored in DST.
772 Usually, the value is the same as CHARS, but is less than it if SRC
773 contains a non-ASCII, non-eight-bit character. */
776 str_to_unibyte (const unsigned char *src
, unsigned char *dst
, ptrdiff_t chars
)
780 for (i
= 0; i
< chars
; i
++)
782 int c
= STRING_CHAR_ADVANCE (src
);
784 if (CHAR_BYTE8_P (c
))
785 c
= CHAR_TO_BYTE8 (c
);
786 else if (! ASCII_CHAR_P (c
))
795 string_count_byte8 (Lisp_Object string
)
797 bool multibyte
= STRING_MULTIBYTE (string
);
798 ptrdiff_t nbytes
= SBYTES (string
);
799 unsigned char *p
= SDATA (string
);
800 unsigned char *pend
= p
+ nbytes
;
808 len
= BYTES_BY_CHAR_HEAD (c
);
810 if (CHAR_BYTE8_HEAD_P (c
))
825 string_escape_byte8 (Lisp_Object string
)
827 ptrdiff_t nchars
= SCHARS (string
);
828 ptrdiff_t nbytes
= SBYTES (string
);
829 bool multibyte
= STRING_MULTIBYTE (string
);
830 ptrdiff_t byte8_count
;
831 const unsigned char *src
, *src_end
;
836 if (multibyte
&& nchars
== nbytes
)
839 byte8_count
= string_count_byte8 (string
);
841 if (byte8_count
== 0)
846 if ((MOST_POSITIVE_FIXNUM
- nchars
) / 3 < byte8_count
847 || (STRING_BYTES_BOUND
- nbytes
) / 2 < byte8_count
)
850 /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */
851 val
= make_uninit_multibyte_string (nchars
+ byte8_count
* 3,
852 nbytes
+ byte8_count
* 2);
856 if ((STRING_BYTES_BOUND
- nbytes
) / 3 < byte8_count
)
859 /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */
860 val
= make_uninit_string (nbytes
+ byte8_count
* 3);
863 src
= SDATA (string
);
864 src_end
= src
+ nbytes
;
867 while (src
< src_end
)
870 len
= BYTES_BY_CHAR_HEAD (c
);
872 if (CHAR_BYTE8_HEAD_P (c
))
874 c
= STRING_CHAR_ADVANCE (src
);
875 c
= CHAR_TO_BYTE8 (c
);
876 dst
+= sprintf ((char *) dst
, "\\%03o", c
);
879 while (len
--) *dst
++ = *src
++;
882 while (src
< src_end
)
886 dst
+= sprintf ((char *) dst
, "\\%03o", c
);
894 DEFUN ("string", Fstring
, Sstring
, 0, MANY
, 0,
896 Concatenate all the argument characters and make the result a string.
897 usage: (string &rest CHARACTERS) */)
898 (ptrdiff_t n
, Lisp_Object
*args
)
902 unsigned char *buf
, *p
;
906 SAFE_NALLOCA (buf
, MAX_MULTIBYTE_LENGTH
, n
);
909 for (i
= 0; i
< n
; i
++)
911 CHECK_CHARACTER (args
[i
]);
913 p
+= CHAR_STRING (c
, p
);
916 str
= make_string_from_bytes ((char *) buf
, n
, p
- buf
);
921 DEFUN ("unibyte-string", Funibyte_string
, Sunibyte_string
, 0, MANY
, 0,
922 doc
: /* Concatenate all the argument bytes and make the result a unibyte string.
923 usage: (unibyte-string &rest BYTES) */)
924 (ptrdiff_t n
, Lisp_Object
*args
)
929 unsigned char *buf
= SAFE_ALLOCA (n
);
930 unsigned char *p
= buf
;
932 for (i
= 0; i
< n
; i
++)
934 CHECK_RANGED_INTEGER (args
[i
], 0, 255);
935 *p
++ = XINT (args
[i
]);
938 str
= make_string_from_bytes ((char *) buf
, n
, p
- buf
);
943 DEFUN ("char-resolve-modifiers", Fchar_resolve_modifiers
,
944 Schar_resolve_modifiers
, 1, 1, 0,
945 doc
: /* Resolve modifiers in the character CHAR.
946 The value is a character with modifiers resolved into the character
947 code. Unresolved modifiers are kept in the value.
948 usage: (char-resolve-modifiers CHAR) */)
949 (Lisp_Object character
)
953 CHECK_NUMBER (character
);
954 c
= XINT (character
);
955 return make_number (char_resolve_modifier_mask (c
));
958 DEFUN ("get-byte", Fget_byte
, Sget_byte
, 0, 2, 0,
959 doc
: /* Return a byte value of a character at point.
960 Optional 1st arg POSITION, if non-nil, is a position of a character to get
962 Optional 2nd arg STRING, if non-nil, is a string of which first
963 character is a target to get a byte value. In this case, POSITION, if
964 non-nil, is an index of a target character in the string.
966 If the current buffer (or STRING) is multibyte, and the target
967 character is not ASCII nor 8-bit character, an error is signaled. */)
968 (Lisp_Object position
, Lisp_Object string
)
982 CHECK_NUMBER_COERCE_MARKER (position
);
983 if (XINT (position
) < BEGV
|| XINT (position
) >= ZV
)
984 args_out_of_range_3 (position
, make_number (BEGV
), make_number (ZV
));
985 pos
= XFASTINT (position
);
986 p
= CHAR_POS_ADDR (pos
);
988 if (NILP (BVAR (current_buffer
, enable_multibyte_characters
)))
989 return make_number (*p
);
993 CHECK_STRING (string
);
1000 CHECK_NATNUM (position
);
1001 if (XINT (position
) >= SCHARS (string
))
1002 args_out_of_range (string
, position
);
1003 pos
= XFASTINT (position
);
1004 p
= SDATA (string
) + string_char_to_byte (string
, pos
);
1006 if (! STRING_MULTIBYTE (string
))
1007 return make_number (*p
);
1009 c
= STRING_CHAR (p
);
1010 if (CHAR_BYTE8_P (c
))
1011 c
= CHAR_TO_BYTE8 (c
);
1012 else if (! ASCII_CHAR_P (c
))
1013 error ("Not an ASCII nor an 8-bit character: %d", c
);
1014 return make_number (c
);
1020 syms_of_character (void)
1022 DEFSYM (Qcharacterp
, "characterp");
1023 DEFSYM (Qauto_fill_chars
, "auto-fill-chars");
1025 staticpro (&Vchar_unify_table
);
1026 Vchar_unify_table
= Qnil
;
1028 defsubr (&Smax_char
);
1029 defsubr (&Scharacterp
);
1030 defsubr (&Sunibyte_char_to_multibyte
);
1031 defsubr (&Smultibyte_char_to_unibyte
);
1032 defsubr (&Schar_width
);
1033 defsubr (&Sstring_width
);
1035 defsubr (&Sunibyte_string
);
1036 defsubr (&Schar_resolve_modifiers
);
1037 defsubr (&Sget_byte
);
1039 DEFVAR_LISP ("translation-table-vector", Vtranslation_table_vector
,
1041 Vector recording all translation tables ever defined.
1042 Each element is a pair (SYMBOL . TABLE) relating the table to the
1043 symbol naming it. The ID of a translation table is an index into this vector. */);
1044 Vtranslation_table_vector
= Fmake_vector (make_number (16), Qnil
);
1046 DEFVAR_LISP ("auto-fill-chars", Vauto_fill_chars
,
1048 A char-table for characters which invoke auto-filling.
1049 Such characters have value t in this table. */);
1050 Vauto_fill_chars
= Fmake_char_table (Qauto_fill_chars
, Qnil
);
1051 CHAR_TABLE_SET (Vauto_fill_chars
, ' ', Qt
);
1052 CHAR_TABLE_SET (Vauto_fill_chars
, '\n', Qt
);
1054 DEFVAR_LISP ("char-width-table", Vchar_width_table
,
1056 A char-table for width (columns) of each character. */);
1057 Vchar_width_table
= Fmake_char_table (Qnil
, make_number (1));
1058 char_table_set_range (Vchar_width_table
, 0x80, 0x9F, make_number (4));
1059 char_table_set_range (Vchar_width_table
, MAX_5_BYTE_CHAR
+ 1, MAX_CHAR
,
1062 DEFVAR_LISP ("printable-chars", Vprintable_chars
,
1063 doc
: /* A char-table for each printable character. */);
1064 Vprintable_chars
= Fmake_char_table (Qnil
, Qnil
);
1065 Fset_char_table_range (Vprintable_chars
,
1066 Fcons (make_number (32), make_number (126)), Qt
);
1067 Fset_char_table_range (Vprintable_chars
,
1068 Fcons (make_number (160),
1069 make_number (MAX_5_BYTE_CHAR
)), Qt
);
1071 DEFVAR_LISP ("char-script-table", Vchar_script_table
,
1072 doc
: /* Char table of script symbols.
1073 It has one extra slot whose value is a list of script symbols. */);
1075 DEFSYM (Qchar_script_table
, "char-script-table");
1076 Fput (Qchar_script_table
, Qchar_table_extra_slots
, make_number (1));
1077 Vchar_script_table
= Fmake_char_table (Qchar_script_table
, Qnil
);
1079 DEFVAR_LISP ("script-representative-chars", Vscript_representative_chars
,
1080 doc
: /* Alist of scripts vs the representative characters.
1081 Each element is a cons (SCRIPT . CHARS).
1082 SCRIPT is a symbol representing a script or a subgroup of a script.
1083 CHARS is a list or a vector of characters.
1084 If it is a list, all characters in the list are necessary for supporting SCRIPT.
1085 If it is a vector, one of the characters in the vector is necessary.
1086 This variable is used to find a font for a specific script. */);
1087 Vscript_representative_chars
= Qnil
;
1089 DEFVAR_LISP ("unicode-category-table", Vunicode_category_table
,
1090 doc
: /* Char table of Unicode's "General Category".
1091 All Unicode characters have one of the following values (symbol):
1092 Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po,
1093 Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn
1094 See The Unicode Standard for the meaning of those values. */);
1095 /* The correct char-table is setup in characters.el. */
1096 Vunicode_category_table
= Qnil
;