src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  21 Boston, MA 02110-1301, USA.  */
  22
  23 /* At first, see the document in `charset.h' to understand the code in
  24    this file.  */
  25
  26 #ifdef emacs
  27 #include <config.h>
  28 #endif
  29
  30 #include <stdio.h>
  31
  32 #ifdef emacs
  33
  34 #include <sys/types.h>
  35 #include "lisp.h"
  36 #include "buffer.h"
  37 #include "charset.h"
  38 #include "composite.h"
  39 #include "coding.h"
  40 #include "disptab.h"
  41
  42 #else  /* not emacs */
  43
  44 #include "mulelib.h"
  45
  46 #endif /* emacs */
  47
  48 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
  49 Lisp_Object Qunknown;
  50
  51 /* Declaration of special leading-codes.  */
  52 EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
  53 EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
  54 EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
  55 EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
  56
  57 /* Declaration of special charsets.  The values are set by
  58    Fsetup_special_charsets.  */
  59 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  60 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  61 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  62 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  63 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  64 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  65 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  66 int charset_mule_unicode_0100_24ff;
  67 int charset_mule_unicode_2500_33ff;
  68 int charset_mule_unicode_e000_ffff;
  69
  70 Lisp_Object Qcharset_table;
  71
  72 /* A char-table containing information of each character set.  */
  73 Lisp_Object Vcharset_table;
  74
  75 /* A vector of charset symbol indexed by charset-id.  This is used
  76    only for returning charset symbol from C functions.  */
  77 Lisp_Object Vcharset_symbol_table;
  78
  79 /* A list of charset symbols ever defined.  */
  80 Lisp_Object Vcharset_list;
  81
  82 /* Vector of translation table ever defined.
  83    ID of a translation table is used to index this vector.  */
  84 Lisp_Object Vtranslation_table_vector;
  85
  86 /* A char-table for characters which may invoke auto-filling.  */
  87 Lisp_Object Vauto_fill_chars;
  88
  89 Lisp_Object Qauto_fill_chars;
  90
  91 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  92 int bytes_by_char_head[256];
  93 int width_by_char_head[256];
  94
  95 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  96    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  97 int iso_charset_table[2][2][128];
  98
  99 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
 100 unsigned char *_fetch_multibyte_char_p;
 101 int _fetch_multibyte_char_len;
 102
 103 /* Offset to add to a non-ASCII value when inserting it.  */
 104 EMACS_INT nonascii_insert_offset;
 105
 106 /* Translation table for converting non-ASCII unibyte characters
 107    to multibyte codes, or nil.  */
 108 Lisp_Object Vnonascii_translation_table;
 109
 110 /* List of all possible generic characters.  */
 111 Lisp_Object Vgeneric_character_list;
 112
 113 \f
 114 void
 115 invalid_character (c)
 116      int c;
 117 {
 118   error ("Invalid character: %d, #o%o, #x%x", c, c, c);
 119 }
 120
 121 /* Parse string STR of length LENGTH and fetch information of a
 122    character at STR.  Set BYTES to the byte length the character
 123    occupies, CHARSET, C1, C2 to proper values of the character. */
 124
 125 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
 126   do {                                                                       \
 127     (c1) = *(str);                                                           \
 128     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
 129     if ((bytes) == 1)                                                        \
 130       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
 131     else if ((bytes) == 2)                                                   \
 132       {                                                                      \
 133         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                              \
 134           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;         \
 135         else                                                                 \
 136           (charset) = (c1), (c1) = (str)[1] & 0x7F;                          \
 137       }                                                                      \
 138     else if ((bytes) == 3)                                                   \
 139       {                                                                      \
 140         if ((c1) < LEADING_CODE_PRIVATE_11)                                  \
 141           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
 142         else                                                                 \
 143           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                      \
 144       }                                                                      \
 145     else                                                                     \
 146       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
 147   } while (0)
 148
 149 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
 150    Note that this intentionally allows invalid components, such
 151    as 0xA0 0xA0, because there exist many files that contain
 152    such invalid byte sequences, especially in EUC-GB. */
 153 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
 154   ((charset) == CHARSET_ASCII                           \
 155    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
 156    : ((charset) == CHARSET_8_BIT_CONTROL                \
 157       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
 158       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
 159          ? ((c1) >= 0x80 && (c1) <= 0xFF)               \
 160          : (CHARSET_DIMENSION (charset) == 1            \
 161             ? ((c1) >= 0x20 && (c1) <= 0x7F)            \
 162             : ((c1) >= 0x20 && (c1) <= 0x7F             \
 163                && (c2) >= 0x20 && (c2) <= 0x7F)))))
 164
 165 /* Store multi-byte form of the character C in STR.  The caller should
 166    allocate at least 4-byte area at STR in advance.  Returns the
 167    length of the multi-byte form.  If C is an invalid character code,
 168    return -1.  */
 169
 170 int
 171 char_to_string_1 (c, str)
 172      int c;
 173      unsigned char *str;
 174 {
 175   unsigned char *p = str;
 176
 177   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
 178     {
 179       /* Multibyte character can't have a modifier bit.  */
 180       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 181         return -1;
 182
 183       /* For Meta, Shift, and Control modifiers, we need special care.  */
 184       if (c & CHAR_META)
 185         {
 186           /* Move the meta bit to the right place for a string.  */
 187           c = (c & ~CHAR_META) | 0x80;
 188         }
 189       if (c & CHAR_SHIFT)
 190         {
 191           /* Shift modifier is valid only with [A-Za-z].  */
 192           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
 193             c &= ~CHAR_SHIFT;
 194           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
 195             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 196         }
 197       if (c & CHAR_CTL)
 198         {
 199           /* Simulate the code in lread.c.  */
 200           /* Allow `\C- ' and `\C-?'.  */
 201           if (c == (CHAR_CTL | ' '))
 202             c = 0;
 203           else if (c == (CHAR_CTL | '?'))
 204             c = 127;
 205           /* ASCII control chars are made from letters (both cases),
 206              as well as the non-letters within 0100...0137.  */
 207           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
 208             c &= (037 | (~0177 & ~CHAR_CTL));
 209           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 210             c &= (037 | (~0177 & ~CHAR_CTL));
 211         }
 212
 213       /* If C still has any modifier bits, just ignore it.  */
 214       c &= ~CHAR_MODIFIER_MASK;
 215     }
 216
 217   if (SINGLE_BYTE_CHAR_P (c))
 218     {
 219       if (ASCII_BYTE_P (c) || c >= 0xA0)
 220         *p++ = c;
 221       else
 222         {
 223           *p++ = LEADING_CODE_8_BIT_CONTROL;
 224           *p++ = c + 0x20;
 225         }
 226     }
 227   else if (CHAR_VALID_P (c, 0))
 228     {
 229       int charset, c1, c2;
 230
 231       SPLIT_CHAR (c, charset, c1, c2);
 232
 233       if (charset >= LEADING_CODE_EXT_11)
 234         *p++ = (charset < LEADING_CODE_EXT_12
 235                 ? LEADING_CODE_PRIVATE_11
 236                 : (charset < LEADING_CODE_EXT_21
 237                    ? LEADING_CODE_PRIVATE_12
 238                    : (charset < LEADING_CODE_EXT_22
 239                       ? LEADING_CODE_PRIVATE_21
 240                       : LEADING_CODE_PRIVATE_22)));
 241       *p++ = charset;
 242       if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
 243         return -1;
 244       if (c1)
 245         {
 246           *p++ = c1 | 0x80;
 247           if (c2 > 0)
 248             *p++ = c2 | 0x80;
 249         }
 250     }
 251   else
 252     return -1;
 253
 254   return (p - str);
 255 }
 256
 257
 258 /* Store multi-byte form of the character C in STR.  The caller should
 259    allocate at least 4-byte area at STR in advance.  Returns the
 260    length of the multi-byte form.  If C is an invalid character code,
 261    signal an error.
 262
 263    Use macro `CHAR_STRING (C, STR)' instead of calling this function
 264    directly if C can be an ASCII character.  */
 265
 266 int
 267 char_to_string (c, str)
 268      int c;
 269      unsigned char *str;
 270 {
 271   int len;
 272   len = char_to_string_1 (c, str);
 273   if (len == -1)
 274     invalid_character (c);
 275   return len;
 276 }
 277
 278
 279 /* Return the non-ASCII character corresponding to multi-byte form at
 280    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
 281    length of the multibyte form in *ACTUAL_LEN.
 282
 283    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
 284    this function directly if you want ot handle ASCII characters as
 285    well.  */
 286
 287 int
 288 string_to_char (str, len, actual_len)
 289      const unsigned char *str;
 290      int len, *actual_len;
 291 {
 292   int c, bytes, charset, c1, c2;
 293
 294   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
 295   c = MAKE_CHAR (charset, c1, c2);
 296   if (actual_len)
 297     *actual_len = bytes;
 298   return c;
 299 }
 300
 301 /* Return the length of the multi-byte form at string STR of length LEN.
 302    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
 303 int
 304 multibyte_form_length (str, len)
 305      const unsigned char *str;
 306      int len;
 307 {
 308   int bytes;
 309
 310   PARSE_MULTIBYTE_SEQ (str, len, bytes);
 311   return bytes;
 312 }
 313
 314 /* Check multibyte form at string STR of length LEN and set variables
 315    pointed by CHARSET, C1, and C2 to charset and position codes of the
 316    character at STR, and return 0.  If there's no multibyte character,
 317    return -1.  This should be used only in the macro SPLIT_STRING
 318    which checks range of STR in advance.  */
 319
 320 int
 321 split_string (str, len, charset, c1, c2)
 322      const unsigned char *str;
 323      unsigned char *c1, *c2;
 324      int len, *charset;
 325 {
 326   register int bytes, cs, code1, code2 = -1;
 327
 328   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
 329   if (cs == CHARSET_ASCII)
 330     return -1;
 331   *charset = cs;
 332   *c1 = code1;
 333   *c2 = code2;
 334   return 0;
 335 }
 336
 337 /* Return 1 iff character C has valid printable glyph.
 338    Use the macro CHAR_PRINTABLE_P instead.  */
 339 int
 340 char_printable_p (c)
 341      int c;
 342 {
 343   int charset, c1, c2;
 344
 345   if (ASCII_BYTE_P (c))
 346     return 1;
 347   else if (SINGLE_BYTE_CHAR_P (c))
 348     return 0;
 349   else if (c >= MAX_CHAR)
 350     return 0;
 351
 352   SPLIT_CHAR (c, charset, c1, c2);
 353   if (! CHARSET_DEFINED_P (charset))
 354     return 0;
 355   if (CHARSET_CHARS (charset) == 94
 356       ? c1 <= 32 || c1 >= 127
 357       : c1 < 32)
 358     return 0;
 359   if (CHARSET_DIMENSION (charset) == 2
 360       && (CHARSET_CHARS (charset) == 94
 361           ? c2 <= 32 || c2 >= 127
 362           : c2 < 32))
 363     return 0;
 364   return 1;
 365 }
 366
 367 /* Translate character C by translation table TABLE.  If C
 368    is negative, translate a character specified by CHARSET, C1, and C2
 369    (C1 and C2 are code points of the character).  If no translation is
 370    found in TABLE, return C.  */
 371 int
 372 translate_char (table, c, charset, c1, c2)
 373      Lisp_Object table;
 374      int c, charset, c1, c2;
 375 {
 376   Lisp_Object ch;
 377   int alt_charset, alt_c1, alt_c2, dimension;
 378
 379   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
 380   if (!CHAR_TABLE_P (table)
 381       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
 382     return c;
 383
 384   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 385   dimension = CHARSET_DIMENSION (alt_charset);
 386   if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
 387     /* CH is not a generic character, just return it.  */
 388     return XFASTINT (ch);
 389
 390   /* Since CH is a generic character, we must return a specific
 391      charater which has the same position codes as C from CH.  */
 392   if (charset < 0)
 393     SPLIT_CHAR (c, charset, c1, c2);
 394   if (dimension != CHARSET_DIMENSION (charset))
 395     /* We can't make such a character because of dimension mismatch.  */
 396     return c;
 397   return MAKE_CHAR (alt_charset, c1, c2);
 398 }
 399
 400 /* Convert the unibyte character C to multibyte based on
 401    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
 402    convert C to a valid multibyte character, convert it based on
 403    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 404
 405 int
 406 unibyte_char_to_multibyte (c)
 407      int c;
 408 {
 409   if (c < 0400 && c >= 0200)
 410     {
 411       int c_save = c;
 412
 413       if (! NILP (Vnonascii_translation_table))
 414         {
 415           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
 416           if (c >= 0400 && ! char_valid_p (c, 0))
 417             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 418         }
 419       else if (c >= 0240 && nonascii_insert_offset > 0)
 420         {
 421           c += nonascii_insert_offset;
 422           if (c < 0400 || ! char_valid_p (c, 0))
 423             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 424         }
 425       else if (c >= 0240)
 426         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 427     }
 428   return c;
 429 }
 430
 431
 432 /* Convert the multibyte character C to unibyte 8-bit character based
 433    on Vnonascii_translation_table or nonascii_insert_offset.  If
 434    REV_TBL is non-nil, it should be a reverse table of
 435    Vnonascii_translation_table, i.e. what given by:
 436      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
 437
 438 int
 439 multibyte_char_to_unibyte (c, rev_tbl)
 440      int c;
 441      Lisp_Object rev_tbl;
 442 {
 443   if (!SINGLE_BYTE_CHAR_P (c))
 444     {
 445       int c_save = c;
 446
 447       if (! CHAR_TABLE_P (rev_tbl)
 448           && CHAR_TABLE_P (Vnonascii_translation_table))
 449         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
 450                                           make_number (0));
 451       if (CHAR_TABLE_P (rev_tbl))
 452         {
 453           Lisp_Object temp;
 454           temp = Faref (rev_tbl, make_number (c));
 455           if (INTEGERP (temp))
 456             c = XINT (temp);
 457           if (c >= 256)
 458             c = (c_save & 0177) + 0200;
 459         }
 460       else
 461         {
 462           if (nonascii_insert_offset > 0)
 463             c -= nonascii_insert_offset;
 464           if (c < 128 || c >= 256)
 465             c = (c_save & 0177) + 0200;
 466         }
 467     }
 468
 469   return c;
 470 }
 471
 472 \f
 473 /* Update the table Vcharset_table with the given arguments (see the
 474    document of `define-charset' for the meaning of each argument).
 475    Several other table contents are also updated.  The caller should
 476    check the validity of CHARSET-ID and the remaining arguments in
 477    advance.  */
 478
 479 void
 480 update_charset_table (charset_id, dimension, chars, width, direction,
 481                       iso_final_char, iso_graphic_plane,
 482                       short_name, long_name, description)
 483      Lisp_Object charset_id, dimension, chars, width, direction;
 484      Lisp_Object iso_final_char, iso_graphic_plane;
 485      Lisp_Object short_name, long_name, description;
 486 {
 487   int charset = XINT (charset_id);
 488   int bytes;
 489   unsigned char leading_code_base, leading_code_ext;
 490
 491   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 492     CHARSET_TABLE_ENTRY (charset)
 493       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 494
 495   if (NILP (long_name))
 496     long_name = short_name;
 497   if (NILP (description))
 498     description = long_name;
 499
 500   /* Get byte length of multibyte form, base leading-code, and
 501      extended leading-code of the charset.  See the comment under the
 502      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 503   bytes = XINT (dimension);
 504   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 505     {
 506       /* Official charset, it doesn't have an extended leading-code.  */
 507       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
 508         bytes += 1; /* For a base leading-code.  */
 509       leading_code_base = charset;
 510       leading_code_ext = 0;
 511     }
 512   else
 513     {
 514       /* Private charset.  */
 515       bytes += 2; /* For base and extended leading-codes.  */
 516       leading_code_base
 517         = (charset < LEADING_CODE_EXT_12
 518            ? LEADING_CODE_PRIVATE_11
 519            : (charset < LEADING_CODE_EXT_21
 520               ? LEADING_CODE_PRIVATE_12
 521               : (charset < LEADING_CODE_EXT_22
 522                  ? LEADING_CODE_PRIVATE_21
 523                  : LEADING_CODE_PRIVATE_22)));
 524       leading_code_ext = charset;
 525       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
 526         error ("Invalid dimension for the charset-ID %d", charset);
 527     }
 528
 529   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 530   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 531   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 532   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 533   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 534   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 535   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 536     = make_number (leading_code_base);
 537   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 538     = make_number (leading_code_ext);
 539   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 540   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 541     = iso_graphic_plane;
 542   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 543   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 544   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 545   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 546
 547   {
 548     /* If we have already defined a charset which has the same
 549        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 550        DIRECTION, we must update the entry REVERSE-CHARSET of both
 551        charsets.  If there's no such charset, the value of the entry
 552        is set to nil.  */
 553     int i;
 554
 555     for (i = 0; i <= MAX_CHARSET; i++)
 556       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 557         {
 558           if (CHARSET_DIMENSION (i) == XINT (dimension)
 559               && CHARSET_CHARS (i) == XINT (chars)
 560               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 561               && CHARSET_DIRECTION (i) != XINT (direction))
 562             {
 563               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 564                 = make_number (i);
 565               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 566               break;
 567             }
 568         }
 569     if (i > MAX_CHARSET)
 570       /* No such a charset.  */
 571       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 572         = make_number (-1);
 573   }
 574
 575   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
 576       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 577     {
 578       bytes_by_char_head[leading_code_base] = bytes;
 579       width_by_char_head[leading_code_base] = XINT (width);
 580
 581       /* Update table emacs_code_class.  */
 582       emacs_code_class[charset] = (bytes == 2
 583                                    ? EMACS_leading_code_2
 584                                    : (bytes == 3
 585                                       ? EMACS_leading_code_3
 586                                       : EMACS_leading_code_4));
 587     }
 588
 589   /* Update table iso_charset_table.  */
 590   if (XINT (iso_final_char) >= 0
 591       && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 592     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 593 }
 594
 595 #ifdef emacs
 596
 597 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 598    is invalid.  */
 599 int
 600 get_charset_id (charset_symbol)
 601      Lisp_Object charset_symbol;
 602 {
 603   Lisp_Object val;
 604   int charset;
 605
 606   /* This originally used a ?: operator, but reportedly the HP-UX
 607      compiler version HP92453-01 A.10.32.22 miscompiles that.  */
 608   if (SYMBOLP (charset_symbol)
 609       && VECTORP (val = Fget (charset_symbol, Qcharset))
 610       && CHARSET_VALID_P (charset =
 611                           XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
 612     return charset;
 613   else
 614     return -1;
 615 }
 616
 617 /* Return an identification number for a new private charset of
 618    DIMENSION and WIDTH.  If there's no more room for the new charset,
 619    return 0.  */
 620 Lisp_Object
 621 get_new_private_charset_id (dimension, width)
 622      int dimension, width;
 623 {
 624   int charset, from, to;
 625
 626   if (dimension == 1)
 627     {
 628       from = LEADING_CODE_EXT_11;
 629       to = LEADING_CODE_EXT_21;
 630     }
 631   else
 632     {
 633       from = LEADING_CODE_EXT_21;
 634       to = LEADING_CODE_EXT_MAX + 1;
 635     }
 636
 637   for (charset = from; charset < to; charset++)
 638     if (!CHARSET_DEFINED_P (charset)) break;
 639
 640   return make_number (charset < to ? charset : 0);
 641 }
 642
 643 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 644        doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
 645 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
 646  treated as a private charset.
 647 INFO-VECTOR is a vector of the format:
 648    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
 649     SHORT-NAME LONG-NAME DESCRIPTION]
 650 The meanings of each elements is as follows:
 651 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
 652 CHARS (integer) is the number of characters in a dimension: 94 or 96.
 653 WIDTH (integer) is the number of columns a character in the charset
 654 occupies on the screen: one of 0, 1, and 2.
 655
 656 DIRECTION (integer) is the rendering direction of characters in the
 657 charset when rendering.  If 0, render from left to right, else
 658 render from right to left.
 659
 660 ISO-FINAL-CHAR (character) is the final character of the
 661 corresponding ISO 2022 charset.
 662 It may be -1 if the charset is internal use only.
 663
 664 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
 665 while encoding to variants of ISO 2022 coding system, one of the
 666 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
 667 It may be -1 if the charset is internal use only.
 668
 669 SHORT-NAME (string) is the short name to refer to the charset.
 670
 671 LONG-NAME (string) is the long name to refer to the charset.
 672
 673 DESCRIPTION (string) is the description string of the charset.  */)
 674        (charset_id, charset_symbol, info_vector)
 675      Lisp_Object charset_id, charset_symbol, info_vector;
 676 {
 677   Lisp_Object *vec;
 678
 679   if (!NILP (charset_id))
 680     CHECK_NUMBER (charset_id);
 681   CHECK_SYMBOL (charset_symbol);
 682   CHECK_VECTOR (info_vector);
 683
 684   if (! NILP (charset_id))
 685     {
 686       if (! CHARSET_VALID_P (XINT (charset_id)))
 687         error ("Invalid CHARSET: %d", XINT (charset_id));
 688       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 689         error ("Already defined charset: %d", XINT (charset_id));
 690     }
 691
 692   vec = XVECTOR (info_vector)->contents;
 693   if (XVECTOR (info_vector)->size != 9
 694       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 695       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 696       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 697       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 698       || !INTEGERP (vec[4])
 699       || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
 700       || !INTEGERP (vec[5])
 701       || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 702       || !STRINGP (vec[6])
 703       || !STRINGP (vec[7])
 704       || !STRINGP (vec[8]))
 705     error ("Invalid info-vector argument for defining charset %s",
 706            SDATA (SYMBOL_NAME (charset_symbol)));
 707
 708   if (NILP (charset_id))
 709     {
 710       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 711       if (XINT (charset_id) == 0)
 712         error ("There's no room for a new private charset %s",
 713                SDATA (SYMBOL_NAME (charset_symbol)));
 714     }
 715
 716   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 717                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 718   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 719   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 720   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 721   Fupdate_coding_systems_internal ();
 722   return Qnil;
 723 }
 724
 725 DEFUN ("generic-character-list", Fgeneric_character_list,
 726        Sgeneric_character_list, 0, 0, 0,
 727        doc: /* Return a list of all possible generic characters.
 728 It includes a generic character for a charset not yet defined.  */)
 729      ()
 730 {
 731   return Vgeneric_character_list;
 732 }
 733
 734 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 735        Sget_unused_iso_final_char, 2, 2, 0,
 736        doc: /* Return an unused ISO's final char for a charset of DIMENSION and CHARS.
 737 DIMENSION is the number of bytes to represent a character: 1 or 2.
 738 CHARS is the number of characters in a dimension: 94 or 96.
 739
 740 This final char is for private use, thus the range is `0' (48) .. `?' (63).
 741 If there's no unused final char for the specified kind of charset,
 742 return nil.  */)
 743      (dimension, chars)
 744      Lisp_Object dimension, chars;
 745 {
 746   int final_char;
 747
 748   CHECK_NUMBER (dimension);
 749   CHECK_NUMBER (chars);
 750   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 751     error ("Invalid charset dimension %d, it should be 1 or 2",
 752            XINT (dimension));
 753   if (XINT (chars) != 94 && XINT (chars) != 96)
 754     error ("Invalid charset chars %d, it should be 94 or 96",
 755            XINT (chars));
 756   for (final_char = '0'; final_char <= '?'; final_char++)
 757     {
 758       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 759         break;
 760     }
 761   return (final_char <= '?' ? make_number (final_char) : Qnil);
 762 }
 763
 764 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 765        4, 4, 0,
 766        doc: /* Declare an equivalent charset for ISO-2022 decoding.
 767
 768 On decoding by an ISO-2022 base coding system, when a charset
 769 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
 770 if CHARSET is designated instead.  */)
 771      (dimension, chars, final_char, charset)
 772      Lisp_Object dimension, chars, final_char, charset;
 773 {
 774   int charset_id;
 775
 776   CHECK_NUMBER (dimension);
 777   CHECK_NUMBER (chars);
 778   CHECK_NUMBER (final_char);
 779   CHECK_SYMBOL (charset);
 780
 781   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 782     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 783   if (XINT (chars) != 94 && XINT (chars) != 96)
 784     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 785   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 786     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 787   if ((charset_id = get_charset_id (charset)) < 0)
 788     error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset)));
 789
 790   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset_id;
 791   return Qnil;
 792 }
 793
 794 /* Return information about charsets in the text at PTR of NBYTES
 795    bytes, which are NCHARS characters.  The value is:
 796
 797         0: Each character is represented by one byte.  This is always
 798            true for unibyte text.
 799         1: No charsets other than ascii eight-bit-control,
 800            eight-bit-graphic, and latin-1 are found.
 801         2: Otherwise.
 802
 803    In addition, if CHARSETS is nonzero, for each found charset N, set
 804    CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
 805    (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
 806    table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
 807    1 (note that there's no charset whose ID is 1).  */
 808
 809 int
 810 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
 811      const unsigned char *ptr;
 812      int nchars, nbytes, *charsets;
 813      Lisp_Object table;
 814 {
 815   if (nchars == nbytes)
 816     {
 817       if (charsets && nbytes > 0)
 818         {
 819           const unsigned char *endp = ptr + nbytes;
 820           int maskbits = 0;
 821
 822           while (ptr < endp && maskbits != 7)
 823             {
 824               maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
 825               ptr++;
 826             }
 827
 828           if (maskbits & 1)
 829             charsets[CHARSET_ASCII] = 1;
 830           if (maskbits & 2)
 831             charsets[CHARSET_8_BIT_CONTROL] = 1;
 832           if (maskbits & 4)
 833             charsets[CHARSET_8_BIT_GRAPHIC] = 1;
 834         }
 835       return 0;
 836     }
 837   else
 838     {
 839       int return_val = 1;
 840       int bytes, charset, c1, c2;
 841
 842       if (! CHAR_TABLE_P (table))
 843         table = Qnil;
 844
 845       while (nchars-- > 0)
 846         {
 847           SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
 848           ptr += bytes;
 849
 850           if (!CHARSET_DEFINED_P (charset))
 851             charset = 1;
 852           else if (! NILP (table))
 853             {
 854               int c = translate_char (table, -1, charset, c1, c2);
 855               if (c >= 0)
 856                 charset = CHAR_CHARSET (c);
 857             }
 858
 859           if (return_val == 1
 860               && charset != CHARSET_ASCII
 861               && charset != CHARSET_8_BIT_CONTROL
 862               && charset != CHARSET_8_BIT_GRAPHIC
 863               && charset != charset_latin_iso8859_1)
 864             return_val = 2;
 865
 866           if (charsets)
 867             charsets[charset] = 1;
 868           else if (return_val == 2)
 869             break;
 870         }
 871       return return_val;
 872     }
 873 }
 874
 875 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 876        2, 3, 0,
 877        doc: /* Return a list of charsets in the region between BEG and END.
 878 BEG and END are buffer positions.
 879 Optional arg TABLE if non-nil is a translation table to look up.
 880
 881 If the region contains invalid multibyte characters,
 882 `unknown' is included in the returned list.
 883
 884 If the current buffer is unibyte, the returned list may contain
 885 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 886      (beg, end, table)
 887      Lisp_Object beg, end, table;
 888 {
 889   int charsets[MAX_CHARSET + 1];
 890   int from, from_byte, to, stop, stop_byte, i;
 891   Lisp_Object val;
 892
 893   validate_region (&beg, &end);
 894   from = XFASTINT (beg);
 895   stop = to = XFASTINT (end);
 896
 897   if (from < GPT && GPT < to)
 898     {
 899       stop = GPT;
 900       stop_byte = GPT_BYTE;
 901     }
 902   else
 903     stop_byte = CHAR_TO_BYTE (stop);
 904
 905   from_byte = CHAR_TO_BYTE (from);
 906
 907   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 908   while (1)
 909     {
 910       find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
 911                             stop_byte - from_byte, charsets, table);
 912       if (stop < to)
 913         {
 914           from = stop, from_byte = stop_byte;
 915           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 916         }
 917       else
 918         break;
 919     }
 920
 921   val = Qnil;
 922   if (charsets[1])
 923     val = Fcons (Qunknown, val);
 924   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 925     if (charsets[i])
 926       val = Fcons (CHARSET_SYMBOL (i), val);
 927   if (charsets[0])
 928     val = Fcons (Qascii, val);
 929   return val;
 930 }
 931
 932 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 933        1, 2, 0,
 934        doc: /* Return a list of charsets in STR.
 935 Optional arg TABLE if non-nil is a translation table to look up.
 936
 937 If the string contains invalid multibyte characters,
 938 `unknown' is included in the returned list.
 939
 940 If STR is unibyte, the returned list may contain
 941 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 942      (str, table)
 943      Lisp_Object str, table;
 944 {
 945   int charsets[MAX_CHARSET + 1];
 946   int i;
 947   Lisp_Object val;
 948
 949   CHECK_STRING (str);
 950
 951   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 952   find_charset_in_text (SDATA (str), SCHARS (str),
 953                         SBYTES (str), charsets, table);
 954
 955   val = Qnil;
 956   if (charsets[1])
 957     val = Fcons (Qunknown, val);
 958   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 959     if (charsets[i])
 960       val = Fcons (CHARSET_SYMBOL (i), val);
 961   if (charsets[0])
 962     val = Fcons (Qascii, val);
 963   return val;
 964 }
 965
 966 \f
 967 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 968        doc: /* Return a character made from arguments.
 969 Internal use only.  */)
 970      (charset, code1, code2)
 971      Lisp_Object charset, code1, code2;
 972 {
 973   int charset_id, c1, c2;
 974
 975   CHECK_NUMBER (charset);
 976   charset_id = XINT (charset);
 977   if (!CHARSET_DEFINED_P (charset_id))
 978     error ("Invalid charset ID: %d", XINT (charset));
 979
 980   if (NILP (code1))
 981     c1 = 0;
 982   else
 983     {
 984       CHECK_NUMBER (code1);
 985       c1 = XINT (code1);
 986     }
 987   if (NILP (code2))
 988     c2 = 0;
 989   else
 990     {
 991       CHECK_NUMBER (code2);
 992       c2 = XINT (code2);
 993     }
 994
 995   if (charset_id == CHARSET_ASCII)
 996     {
 997       if (c1 < 0 || c1 > 0x7F)
 998         goto invalid_code_posints;
 999       return make_number (c1);
1000     }
1001   else if (charset_id == CHARSET_8_BIT_CONTROL)
1002     {
1003       if (NILP (code1))
1004         c1 = 0x80;
1005       else if (c1 < 0x80 || c1 > 0x9F)
1006         goto invalid_code_posints;
1007       return make_number (c1);
1008     }
1009   else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1010     {
1011       if (NILP (code1))
1012         c1 = 0xA0;
1013       else if (c1 < 0xA0 || c1 > 0xFF)
1014         goto invalid_code_posints;
1015       return make_number (c1);
1016     }
1017   else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1018     goto invalid_code_posints;
1019   c1 &= 0x7F;
1020   c2 &= 0x7F;
1021   if (c1 == 0
1022       ? c2 != 0
1023       : (c2 == 0
1024          ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1025          : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1026     goto invalid_code_posints;
1027   return make_number (MAKE_CHAR (charset_id, c1, c2));
1028
1029  invalid_code_posints:
1030   error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1031 }
1032
1033 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1034        doc: /* Return list of charset and one or two position-codes of CH.
1035 If CH is invalid as a character code,
1036 return a list of symbol `unknown' and CH.  */)
1037      (ch)
1038      Lisp_Object ch;
1039 {
1040   int c, charset, c1, c2;
1041
1042   CHECK_NUMBER (ch);
1043   c = XFASTINT (ch);
1044   if (!CHAR_VALID_P (c, 1))
1045     return Fcons (Qunknown, Fcons (ch, Qnil));
1046   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1047   return (c2 >= 0
1048           ? Fcons (CHARSET_SYMBOL (charset),
1049                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1050           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1051 }
1052
1053 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1054        doc: /* Return charset of CH.  */)
1055      (ch)
1056      Lisp_Object ch;
1057 {
1058   CHECK_NUMBER (ch);
1059
1060   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1061 }
1062
1063 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1064        doc: /* Return charset of a character in the current buffer at position POS.
1065 If POS is nil, it defauls to the current point.
1066 If POS is out of range, the value is nil.  */)
1067      (pos)
1068      Lisp_Object pos;
1069 {
1070   Lisp_Object ch;
1071   int charset;
1072
1073   ch = Fchar_after (pos);
1074   if (! INTEGERP (ch))
1075     return ch;
1076   charset = CHAR_CHARSET (XINT (ch));
1077   return CHARSET_SYMBOL (charset);
1078 }
1079
1080 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1081        doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1082
1083 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1084 by their DIMENSION, CHARS, and FINAL-CHAR,
1085 where as Emacs distinguishes them by charset symbol.
1086 See the documentation of the function `charset-info' for the meanings of
1087 DIMENSION, CHARS, and FINAL-CHAR.  */)
1088      (dimension, chars, final_char)
1089      Lisp_Object dimension, chars, final_char;
1090 {
1091   int charset;
1092
1093   CHECK_NUMBER (dimension);
1094   CHECK_NUMBER (chars);
1095   CHECK_NUMBER (final_char);
1096
1097   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1098     return Qnil;
1099   return CHARSET_SYMBOL (charset);
1100 }
1101
1102 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1103    generic character.  If GENERICP is zero, return nonzero iff C is a
1104    valid normal character.  Do not call this function directly,
1105    instead use macro CHAR_VALID_P.  */
1106 int
1107 char_valid_p (c, genericp)
1108      int c, genericp;
1109 {
1110   int charset, c1, c2;
1111
1112   if (c < 0 || c >= MAX_CHAR)
1113     return 0;
1114   if (SINGLE_BYTE_CHAR_P (c))
1115     return 1;
1116   SPLIT_CHAR (c, charset, c1, c2);
1117   if (genericp)
1118     {
1119       if (c1)
1120         {
1121           if (c2 <= 0) c2 = 0x20;
1122         }
1123       else
1124         {
1125           if (c2 <= 0) c1 = c2 = 0x20;
1126         }
1127     }
1128   return (CHARSET_DEFINED_P (charset)
1129           && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1130 }
1131
1132 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1133        doc: /* Return t if OBJECT is a valid normal character.
1134 If optional arg GENERICP is non-nil, also return t if OBJECT is
1135 a valid generic character.  */)
1136      (object, genericp)
1137      Lisp_Object object, genericp;
1138 {
1139   if (! NATNUMP (object))
1140     return Qnil;
1141   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1142 }
1143
1144 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1145        Sunibyte_char_to_multibyte, 1, 1, 0,
1146        doc: /* Convert the unibyte character CH to multibyte character.
1147 The conversion is done based on `nonascii-translation-table' (which see)
1148  or `nonascii-insert-offset' (which see).  */)
1149      (ch)
1150      Lisp_Object ch;
1151 {
1152   int c;
1153
1154   CHECK_NUMBER (ch);
1155   c = XINT (ch);
1156   if (c < 0 || c >= 0400)
1157     error ("Invalid unibyte character: %d", c);
1158   c = unibyte_char_to_multibyte (c);
1159   if (c < 0)
1160     error ("Can't convert to multibyte character: %d", XINT (ch));
1161   return make_number (c);
1162 }
1163
1164 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1165        Smultibyte_char_to_unibyte, 1, 1, 0,
1166        doc: /* Convert the multibyte character CH to unibyte character.
1167 The conversion is done based on `nonascii-translation-table' (which see)
1168  or `nonascii-insert-offset' (which see).  */)
1169      (ch)
1170      Lisp_Object ch;
1171 {
1172   int c;
1173
1174   CHECK_NUMBER (ch);
1175   c = XINT (ch);
1176   if (! CHAR_VALID_P (c, 0))
1177     error ("Invalid multibyte character: %d", c);
1178   c = multibyte_char_to_unibyte (c, Qnil);
1179   if (c < 0)
1180     error ("Can't convert to unibyte character: %d", XINT (ch));
1181   return make_number (c);
1182 }
1183
1184 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1185        doc: /* Return 1 regardless of the argument CH.  */)
1186      (ch)
1187      Lisp_Object ch;
1188 {
1189   CHECK_NUMBER (ch);
1190   return make_number (1);
1191 }
1192
1193 /* Return how many bytes C will occupy in a multibyte buffer.
1194    Don't call this function directly, instead use macro CHAR_BYTES.  */
1195 int
1196 char_bytes (c)
1197      int c;
1198 {
1199   int charset;
1200
1201   if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1202     return 1;
1203   if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1204     return 1;
1205
1206   charset = CHAR_CHARSET (c);
1207   return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1208 }
1209
1210 /* Return the width of character of which multi-byte form starts with
1211    C.  The width is measured by how many columns occupied on the
1212    screen when displayed in the current buffer.  */
1213
1214 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
1215   (c < 0x20                                                             \
1216    ? (c == '\t'                                                         \
1217       ? XFASTINT (current_buffer->tab_width)                            \
1218       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
1219    : (c < 0x7f                                                          \
1220       ? 1                                                               \
1221       : (c == 0x7F                                                      \
1222          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
1223          : ((! NILP (current_buffer->enable_multibyte_characters)       \
1224              && BASE_LEADING_CODE_P (c))                                \
1225             ? WIDTH_BY_CHAR_HEAD (c)                                    \
1226             : 4))))
1227
1228 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1229        doc: /* Return width of CH when displayed in the current buffer.
1230 The width is measured by how many columns it occupies on the screen.
1231 Tab is taken to occupy `tab-width' columns.  */)
1232      (ch)
1233      Lisp_Object ch;
1234 {
1235   Lisp_Object val, disp;
1236   int c;
1237   struct Lisp_Char_Table *dp = buffer_display_table ();
1238
1239   CHECK_NUMBER (ch);
1240
1241   c = XINT (ch);
1242
1243   /* Get the way the display table would display it.  */
1244   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1245
1246   if (VECTORP (disp))
1247     XSETINT (val, XVECTOR (disp)->size);
1248   else if (SINGLE_BYTE_CHAR_P (c))
1249     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1250   else
1251     {
1252       int charset = CHAR_CHARSET (c);
1253
1254       XSETFASTINT (val, CHARSET_WIDTH (charset));
1255     }
1256   return val;
1257 }
1258
1259 /* Return width of string STR of length LEN when displayed in the
1260    current buffer.  The width is measured by how many columns it
1261    occupies on the screen.  */
1262
1263 int
1264 strwidth (str, len)
1265      unsigned char *str;
1266      int len;
1267 {
1268   return c_string_width (str, len, -1, NULL, NULL);
1269 }
1270
1271 /* Return width of string STR of length LEN when displayed in the
1272    current buffer.  The width is measured by how many columns it
1273    occupies on the screen.  If PRECISION > 0, return the width of
1274    longest substring that doesn't exceed PRECISION, and set number of
1275    characters and bytes of the substring in *NCHARS and *NBYTES
1276    respectively.  */
1277
1278 int
1279 c_string_width (str, len, precision, nchars, nbytes)
1280      const unsigned char *str;
1281      int len, precision, *nchars, *nbytes;
1282 {
1283   int i = 0, i_byte = 0;
1284   int width = 0;
1285   int chars;
1286   struct Lisp_Char_Table *dp = buffer_display_table ();
1287
1288   while (i_byte < len)
1289     {
1290       int bytes, thiswidth;
1291       Lisp_Object val;
1292
1293       if (dp)
1294         {
1295           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1296
1297           chars = 1;
1298           val = DISP_CHAR_VECTOR (dp, c);
1299           if (VECTORP (val))
1300             thiswidth = XVECTOR (val)->size;
1301           else
1302             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1303         }
1304       else
1305         {
1306           chars = 1;
1307           PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1308           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1309         }
1310
1311       if (precision > 0
1312           && (width + thiswidth > precision))
1313         {
1314           *nchars = i;
1315           *nbytes = i_byte;
1316           return width;
1317         }
1318       i++;
1319       i_byte += bytes;
1320       width += thiswidth;
1321   }
1322
1323   if (precision > 0)
1324     {
1325       *nchars = i;
1326       *nbytes = i_byte;
1327     }
1328
1329   return width;
1330 }
1331
1332 /* Return width of Lisp string STRING when displayed in the current
1333    buffer.  The width is measured by how many columns it occupies on
1334    the screen while paying attention to compositions.  If PRECISION >
1335    0, return the width of longest substring that doesn't exceed
1336    PRECISION, and set number of characters and bytes of the substring
1337    in *NCHARS and *NBYTES respectively.  */
1338
1339 int
1340 lisp_string_width (string, precision, nchars, nbytes)
1341      Lisp_Object string;
1342      int precision, *nchars, *nbytes;
1343 {
1344   int len = SCHARS (string);
1345   int len_byte = SBYTES (string);
1346   const unsigned char *str = SDATA (string);
1347   int i = 0, i_byte = 0;
1348   int width = 0;
1349   struct Lisp_Char_Table *dp = buffer_display_table ();
1350
1351   while (i < len)
1352     {
1353       int chars, bytes, thiswidth;
1354       Lisp_Object val;
1355       int cmp_id;
1356       int ignore, end;
1357
1358       if (find_composition (i, -1, &ignore, &end, &val, string)
1359           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1360               >= 0))
1361         {
1362           thiswidth = composition_table[cmp_id]->width;
1363           chars = end - i;
1364           bytes = string_char_to_byte (string, end) - i_byte;
1365         }
1366       else if (dp)
1367         {
1368           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1369
1370           chars = 1;
1371           val = DISP_CHAR_VECTOR (dp, c);
1372           if (VECTORP (val))
1373             thiswidth = XVECTOR (val)->size;
1374           else
1375             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1376         }
1377       else
1378         {
1379           chars = 1;
1380           PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1381           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1382         }
1383
1384       if (precision > 0
1385           && (width + thiswidth > precision))
1386         {
1387           *nchars = i;
1388           *nbytes = i_byte;
1389           return width;
1390         }
1391       i += chars;
1392       i_byte += bytes;
1393       width += thiswidth;
1394   }
1395
1396   if (precision > 0)
1397     {
1398       *nchars = i;
1399       *nbytes = i_byte;
1400     }
1401
1402   return width;
1403 }
1404
1405 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1406        doc: /* Return width of STRING when displayed in the current buffer.
1407 Width is measured by how many columns it occupies on the screen.
1408 When calculating width of a multibyte character in STRING,
1409 only the base leading-code is considered; the validity of
1410 the following bytes is not checked.  Tabs in STRING are always
1411 taken to occupy `tab-width' columns.  */)
1412      (string)
1413      Lisp_Object string;
1414 {
1415   Lisp_Object val;
1416
1417   CHECK_STRING (string);
1418   XSETFASTINT (val, lisp_string_width (string, -1, NULL, NULL));
1419   return val;
1420 }
1421
1422 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1423        doc: /* Return the direction of CH.
1424 The returned value is 0 for left-to-right and 1 for right-to-left.  */)
1425      (ch)
1426      Lisp_Object ch;
1427 {
1428   int charset;
1429
1430   CHECK_NUMBER (ch);
1431   charset = CHAR_CHARSET (XFASTINT (ch));
1432   if (!CHARSET_DEFINED_P (charset))
1433     invalid_character (XINT (ch));
1434   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1435 }
1436
1437 /* Return the number of characters in the NBYTES bytes at PTR.
1438    This works by looking at the contents and checking for multibyte sequences.
1439    However, if the current buffer has enable-multibyte-characters = nil,
1440    we treat each byte as a character.  */
1441
1442 int
1443 chars_in_text (ptr, nbytes)
1444      const unsigned char *ptr;
1445      int nbytes;
1446 {
1447   /* current_buffer is null at early stages of Emacs initialization.  */
1448   if (current_buffer == 0
1449       || NILP (current_buffer->enable_multibyte_characters))
1450     return nbytes;
1451
1452   return multibyte_chars_in_text (ptr, nbytes);
1453 }
1454
1455 /* Return the number of characters in the NBYTES bytes at PTR.
1456    This works by looking at the contents and checking for multibyte sequences.
1457    It ignores enable-multibyte-characters.  */
1458
1459 int
1460 multibyte_chars_in_text (ptr, nbytes)
1461      const unsigned char *ptr;
1462      int nbytes;
1463 {
1464   const unsigned char *endp;
1465   int chars, bytes;
1466
1467   endp = ptr + nbytes;
1468   chars = 0;
1469
1470   while (ptr < endp)
1471     {
1472       PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1473       ptr += bytes;
1474       chars++;
1475     }
1476
1477   return chars;
1478 }
1479
1480 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1481    count the numbers of characters and bytes in it.  On counting
1482    bytes, pay attention to the fact that 8-bit characters in the range
1483    0x80..0x9F are represented by 2 bytes in multibyte text.  */
1484 void
1485 parse_str_as_multibyte (str, len, nchars, nbytes)
1486      const unsigned char *str;
1487      int len, *nchars, *nbytes;
1488 {
1489   const unsigned char *endp = str + len;
1490   int n, chars = 0, bytes = 0;
1491
1492   while (str < endp)
1493     {
1494       if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1495         str += n, bytes += n;
1496       else
1497         str++, bytes += 2;
1498       chars++;
1499     }
1500   *nchars = chars;
1501   *nbytes = bytes;
1502   return;
1503 }
1504
1505 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1506    It actually converts only 8-bit characters in the range 0x80..0x9F
1507    that don't contruct multibyte characters to multibyte forms.  If
1508    NCHARS is nonzero, set *NCHARS to the number of characters in the
1509    text.  It is assured that we can use LEN bytes at STR as a work
1510    area and that is enough.  Return the number of bytes of the
1511    resulting text.  */
1512
1513 int
1514 str_as_multibyte (str, len, nbytes, nchars)
1515      unsigned char *str;
1516      int len, nbytes, *nchars;
1517 {
1518   unsigned char *p = str, *endp = str + nbytes;
1519   unsigned char *to;
1520   int chars = 0;
1521   int n;
1522
1523   while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1524     p += n, chars++;
1525   if (nchars)
1526     *nchars = chars;
1527   if (p == endp)
1528     return nbytes;
1529
1530   to = p;
1531   nbytes = endp - p;
1532   endp = str + len;
1533   safe_bcopy (p, endp - nbytes, nbytes);
1534   p = endp - nbytes;
1535   while (p < endp)
1536     {
1537       if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1538         {
1539           while (n--)
1540             *to++ = *p++;
1541         }
1542       else
1543         {
1544           *to++ = LEADING_CODE_8_BIT_CONTROL;
1545           *to++ = *p++ + 0x20;
1546         }
1547       chars++;
1548     }
1549   if (nchars)
1550     *nchars = chars;
1551   return (to - str);
1552 }
1553
1554 /* Parse unibyte string at STR of LEN bytes, and return the number of
1555    bytes it may ocupy when converted to multibyte string by
1556    `str_to_multibyte'.  */
1557
1558 int
1559 parse_str_to_multibyte (str, len)
1560      unsigned char *str;
1561      int len;
1562 {
1563   unsigned char *endp = str + len;
1564   int bytes;
1565
1566   for (bytes = 0; str < endp; str++)
1567     bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1568   return bytes;
1569 }
1570
1571 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1572    that contains the same single-byte characters.  It actually
1573    converts all 8-bit characters to multibyte forms.  It is assured
1574    that we can use LEN bytes at STR as a work area and that is
1575    enough.  */
1576
1577 int
1578 str_to_multibyte (str, len, bytes)
1579      unsigned char *str;
1580      int len, bytes;
1581 {
1582   unsigned char *p = str, *endp = str + bytes;
1583   unsigned char *to;
1584
1585   while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1586   if (p == endp)
1587     return bytes;
1588   to = p;
1589   bytes = endp - p;
1590   endp = str + len;
1591   safe_bcopy (p, endp - bytes, bytes);
1592   p = endp - bytes;
1593   while (p < endp)
1594     {
1595       if (*p < 0x80 || *p >= 0xA0)
1596         *to++ = *p++;
1597       else
1598         *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1599     }
1600   return (to - str);
1601 }
1602
1603 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1604    actually converts only 8-bit characters in the range 0x80..0x9F to
1605    unibyte forms.  */
1606
1607 int
1608 str_as_unibyte (str, bytes)
1609      unsigned char *str;
1610      int bytes;
1611 {
1612   unsigned char *p = str, *endp = str + bytes;
1613   unsigned char *to = str;
1614
1615   while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1616   to = p;
1617   while (p < endp)
1618     {
1619       if (*p == LEADING_CODE_8_BIT_CONTROL)
1620         *to++ = *(p + 1) - 0x20, p += 2;
1621       else
1622         *to++ = *p++;
1623     }
1624   return (to - str);
1625 }
1626
1627 \f
1628 DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
1629   doc: /* Concatenate all the argument characters and make the result a string.
1630 usage: (string &rest CHARACTERS)  */)
1631      (n, args)
1632      int n;
1633      Lisp_Object *args;
1634 {
1635   int i, bufsize;
1636   unsigned char *buf, *p;
1637   int c;
1638   int multibyte = 0;
1639   Lisp_Object ret;
1640   USE_SAFE_ALLOCA;
1641
1642   bufsize = MAX_MULTIBYTE_LENGTH * n;
1643   SAFE_ALLOCA (buf, unsigned char *, bufsize);
1644   p = buf;
1645
1646   for (i = 0; i < n; i++)
1647     {
1648       CHECK_NUMBER (args[i]);
1649       if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1650         multibyte = 1;
1651     }
1652
1653   for (i = 0; i < n; i++)
1654     {
1655       c = XINT (args[i]);
1656       if (multibyte)
1657         p += CHAR_STRING (c, p);
1658       else
1659         *p++ = c;
1660     }
1661
1662   ret = make_string_from_bytes (buf, n, p - buf);
1663   SAFE_FREE ();
1664
1665   return ret;
1666 }
1667
1668 #endif /* emacs */
1669 \f
1670 int
1671 charset_id_internal (charset_name)
1672      char *charset_name;
1673 {
1674   Lisp_Object val;
1675
1676   val= Fget (intern (charset_name), Qcharset);
1677   if (!VECTORP (val))
1678     error ("Charset %s is not defined", charset_name);
1679
1680   return (XINT (XVECTOR (val)->contents[0]));
1681 }
1682
1683 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1684        Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only.  */)
1685      ()
1686 {
1687   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1688   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1689   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1690   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1691   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1692   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1693   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1694   charset_mule_unicode_0100_24ff
1695     = charset_id_internal ("mule-unicode-0100-24ff");
1696   charset_mule_unicode_2500_33ff
1697     = charset_id_internal ("mule-unicode-2500-33ff");
1698   charset_mule_unicode_e000_ffff
1699     = charset_id_internal ("mule-unicode-e000-ffff");
1700   return Qnil;
1701 }
1702
1703 void
1704 init_charset_once ()
1705 {
1706   int i, j, k;
1707
1708   staticpro (&Vcharset_table);
1709   staticpro (&Vcharset_symbol_table);
1710   staticpro (&Vgeneric_character_list);
1711
1712   /* This has to be done here, before we call Fmake_char_table.  */
1713   Qcharset_table = intern ("charset-table");
1714   staticpro (&Qcharset_table);
1715
1716   /* Intern this now in case it isn't already done.
1717      Setting this variable twice is harmless.
1718      But don't staticpro it here--that is done in alloc.c.  */
1719   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1720
1721   /* Now we are ready to set up this property, so we can
1722      create the charset table.  */
1723   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1724   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1725
1726   Qunknown = intern ("unknown");
1727   staticpro (&Qunknown);
1728   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1729                                         Qunknown);
1730
1731   /* Setup tables.  */
1732   for (i = 0; i < 2; i++)
1733     for (j = 0; j < 2; j++)
1734       for (k = 0; k < 128; k++)
1735         iso_charset_table [i][j][k] = -1;
1736
1737   for (i = 0; i < 256; i++)
1738     bytes_by_char_head[i] = 1;
1739   bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1740   bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1741   bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1742   bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1743
1744   for (i = 0; i < 128; i++)
1745     width_by_char_head[i] = 1;
1746   for (; i < 256; i++)
1747     width_by_char_head[i] = 4;
1748   width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1749   width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1750   width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1751   width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1752
1753   {
1754     Lisp_Object val;
1755
1756     val = Qnil;
1757     for (i = 0x81; i < 0x90; i++)
1758       val = Fcons (make_number ((i - 0x70) << 7), val);
1759     for (; i < 0x9A; i++)
1760       val = Fcons (make_number ((i - 0x8F) << 14), val);
1761     for (i = 0xA0; i < 0xF0; i++)
1762       val = Fcons (make_number ((i - 0x70) << 7), val);
1763     for (; i < 0xFF; i++)
1764       val = Fcons (make_number ((i - 0xE0) << 14), val);
1765     Vgeneric_character_list = Fnreverse (val);
1766   }
1767
1768   nonascii_insert_offset = 0;
1769   Vnonascii_translation_table = Qnil;
1770 }
1771
1772 #ifdef emacs
1773
1774 void
1775 syms_of_charset ()
1776 {
1777   Qcharset = intern ("charset");
1778   staticpro (&Qcharset);
1779
1780   Qascii = intern ("ascii");
1781   staticpro (&Qascii);
1782
1783   Qeight_bit_control = intern ("eight-bit-control");
1784   staticpro (&Qeight_bit_control);
1785
1786   Qeight_bit_graphic = intern ("eight-bit-graphic");
1787   staticpro (&Qeight_bit_graphic);
1788
1789   /* Define special charsets ascii, eight-bit-control, and
1790      eight-bit-graphic.  */
1791   update_charset_table (make_number (CHARSET_ASCII),
1792                         make_number (1), make_number (94),
1793                         make_number (1),
1794                         make_number (0),
1795                         make_number ('B'),
1796                         make_number (0),
1797                         build_string ("ASCII"),
1798                         Qnil,   /* same as above */
1799                         build_string ("ASCII (ISO646 IRV)"));
1800   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1801   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1802
1803   update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1804                         make_number (1), make_number (96),
1805                         make_number (4),
1806                         make_number (0),
1807                         make_number (-1),
1808                         make_number (-1),
1809                         build_string ("8-bit control code (0x80..0x9F)"),
1810                         Qnil,   /* same as above */
1811                         Qnil);  /* same as above */
1812   CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1813   Fput (Qeight_bit_control, Qcharset,
1814         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1815
1816   update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1817                         make_number (1), make_number (96),
1818                         make_number (4),
1819                         make_number (0),
1820                         make_number (-1),
1821                         make_number (-1),
1822                         build_string ("8-bit graphic char (0xA0..0xFF)"),
1823                         Qnil,   /* same as above */
1824                         Qnil);  /* same as above */
1825   CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1826   Fput (Qeight_bit_graphic, Qcharset,
1827         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1828
1829   Qauto_fill_chars = intern ("auto-fill-chars");
1830   staticpro (&Qauto_fill_chars);
1831   Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1832
1833   defsubr (&Sdefine_charset);
1834   defsubr (&Sgeneric_character_list);
1835   defsubr (&Sget_unused_iso_final_char);
1836   defsubr (&Sdeclare_equiv_charset);
1837   defsubr (&Sfind_charset_region);
1838   defsubr (&Sfind_charset_string);
1839   defsubr (&Smake_char_internal);
1840   defsubr (&Ssplit_char);
1841   defsubr (&Schar_charset);
1842   defsubr (&Scharset_after);
1843   defsubr (&Siso_charset);
1844   defsubr (&Schar_valid_p);
1845   defsubr (&Sunibyte_char_to_multibyte);
1846   defsubr (&Smultibyte_char_to_unibyte);
1847   defsubr (&Schar_bytes);
1848   defsubr (&Schar_width);
1849   defsubr (&Sstring_width);
1850   defsubr (&Schar_direction);
1851   defsubr (&Sstring);
1852   defsubr (&Ssetup_special_charsets);
1853
1854   DEFVAR_LISP ("charset-list", &Vcharset_list,
1855                doc: /* List of charsets ever defined.  */);
1856   Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1857                                         Fcons (Qeight_bit_graphic, Qnil)));
1858
1859   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1860                doc: /* Vector of cons cell of a symbol and translation table ever defined.
1861 An ID of a translation table is an index of this vector.  */);
1862   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1863
1864   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1865               doc: /* Leading-code of private TYPE9N charset of column-width 1.  */);
1866   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1867
1868   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1869               doc: /* Leading-code of private TYPE9N charset of column-width 2.  */);
1870   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1871
1872   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1873               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1.  */);
1874   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1875
1876   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1877               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2.  */);
1878   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1879
1880   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1881               doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1882 This is used for converting unibyte text to multibyte,
1883 and for inserting character codes specified by number.
1884
1885 This serves to convert a Latin-1 or similar 8-bit character code
1886 to the corresponding Emacs multibyte character code.
1887 Typically the value should be (- (make-char CHARSET 0) 128),
1888 for your choice of character set.
1889 If `nonascii-translation-table' is non-nil, it overrides this variable.  */);
1890   nonascii_insert_offset = 0;
1891
1892   DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1893                doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1894 This is used for converting unibyte text to multibyte,
1895 and for inserting character codes specified by number.
1896
1897 Conversion is performed only when multibyte characters are enabled,
1898 and it serves to convert a Latin-1 or similar 8-bit character code
1899 to the corresponding Emacs character code.
1900
1901 If this is nil, `nonascii-insert-offset' is used instead.
1902 See also the docstring of `make-translation-table'.  */);
1903   Vnonascii_translation_table = Qnil;
1904
1905   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1906                doc: /* A char-table for characters which invoke auto-filling.
1907 Such characters have value t in this table.  */);
1908   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1909   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1910   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1911 }
1912
1913 #endif /* emacs */
1914
1915 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
1916    (do not change this comment) */