src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3    Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
   4      National Institute of Advanced Industrial Science and Technology (AIST)
   5      Registration Number H14PRO021
   6
   7 This file is part of GNU Emacs.
   8
   9 GNU Emacs is free software; you can redistribute it and/or modify
  10 it under the terms of the GNU General Public License as published by
  11 the Free Software Foundation; either version 2, or (at your option)
  12 any later version.
  13
  14 GNU Emacs is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GNU Emacs; see the file COPYING.  If not, write to
  21 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  22 Boston, MA 02110-1301, USA.  */
  23
  24 /* At first, see the document in `charset.h' to understand the code in
  25    this file.  */
  26
  27 #ifdef emacs
  28 #include <config.h>
  29 #endif
  30
  31 #include <stdio.h>
  32
  33 #ifdef emacs
  34
  35 #include <sys/types.h>
  36 #include "lisp.h"
  37 #include "buffer.h"
  38 #include "charset.h"
  39 #include "composite.h"
  40 #include "coding.h"
  41 #include "disptab.h"
  42
  43 #else  /* not emacs */
  44
  45 #include "mulelib.h"
  46
  47 #endif /* emacs */
  48
  49 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
  50 Lisp_Object Qunknown;
  51
  52 /* Declaration of special leading-codes.  */
  53 EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
  54 EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
  55 EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
  56 EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
  57
  58 /* Declaration of special charsets.  The values are set by
  59    Fsetup_special_charsets.  */
  60 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  61 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  62 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  63 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  64 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  65 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  66 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  67 int charset_mule_unicode_0100_24ff;
  68 int charset_mule_unicode_2500_33ff;
  69 int charset_mule_unicode_e000_ffff;
  70
  71 Lisp_Object Qcharset_table;
  72
  73 /* A char-table containing information of each character set.  */
  74 Lisp_Object Vcharset_table;
  75
  76 /* A vector of charset symbol indexed by charset-id.  This is used
  77    only for returning charset symbol from C functions.  */
  78 Lisp_Object Vcharset_symbol_table;
  79
  80 /* A list of charset symbols ever defined.  */
  81 Lisp_Object Vcharset_list;
  82
  83 /* Vector of translation table ever defined.
  84    ID of a translation table is used to index this vector.  */
  85 Lisp_Object Vtranslation_table_vector;
  86
  87 /* A char-table for characters which may invoke auto-filling.  */
  88 Lisp_Object Vauto_fill_chars;
  89
  90 Lisp_Object Qauto_fill_chars;
  91
  92 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  93 int bytes_by_char_head[256];
  94 int width_by_char_head[256];
  95
  96 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  97    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  98 int iso_charset_table[2][2][128];
  99
 100 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
 101 unsigned char *_fetch_multibyte_char_p;
 102 int _fetch_multibyte_char_len;
 103
 104 /* Offset to add to a non-ASCII value when inserting it.  */
 105 EMACS_INT nonascii_insert_offset;
 106
 107 /* Translation table for converting non-ASCII unibyte characters
 108    to multibyte codes, or nil.  */
 109 Lisp_Object Vnonascii_translation_table;
 110
 111 /* List of all possible generic characters.  */
 112 Lisp_Object Vgeneric_character_list;
 113
 114 \f
 115 void
 116 invalid_character (c)
 117      int c;
 118 {
 119   error ("Invalid character: %d, #o%o, #x%x", c, c, c);
 120 }
 121
 122 /* Parse string STR of length LENGTH and fetch information of a
 123    character at STR.  Set BYTES to the byte length the character
 124    occupies, CHARSET, C1, C2 to proper values of the character. */
 125
 126 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
 127   do {                                                                       \
 128     (c1) = *(str);                                                           \
 129     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
 130     if ((bytes) == 1)                                                        \
 131       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
 132     else if ((bytes) == 2)                                                   \
 133       {                                                                      \
 134         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                              \
 135           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;         \
 136         else                                                                 \
 137           (charset) = (c1), (c1) = (str)[1] & 0x7F;                          \
 138       }                                                                      \
 139     else if ((bytes) == 3)                                                   \
 140       {                                                                      \
 141         if ((c1) < LEADING_CODE_PRIVATE_11)                                  \
 142           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
 143         else                                                                 \
 144           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                      \
 145       }                                                                      \
 146     else                                                                     \
 147       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
 148   } while (0)
 149
 150 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
 151    Note that this intentionally allows invalid components, such
 152    as 0xA0 0xA0, because there exist many files that contain
 153    such invalid byte sequences, especially in EUC-GB. */
 154 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
 155   ((charset) == CHARSET_ASCII                           \
 156    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
 157    : ((charset) == CHARSET_8_BIT_CONTROL                \
 158       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
 159       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
 160          ? ((c1) >= 0x80 && (c1) <= 0xFF)               \
 161          : (CHARSET_DIMENSION (charset) == 1            \
 162             ? ((c1) >= 0x20 && (c1) <= 0x7F)            \
 163             : ((c1) >= 0x20 && (c1) <= 0x7F             \
 164                && (c2) >= 0x20 && (c2) <= 0x7F)))))
 165
 166 /* Store multi-byte form of the character C in STR.  The caller should
 167    allocate at least 4-byte area at STR in advance.  Returns the
 168    length of the multi-byte form.  If C is an invalid character code,
 169    return -1.  */
 170
 171 int
 172 char_to_string_1 (c, str)
 173      int c;
 174      unsigned char *str;
 175 {
 176   unsigned char *p = str;
 177
 178   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
 179     {
 180       /* Multibyte character can't have a modifier bit.  */
 181       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 182         return -1;
 183
 184       /* For Meta, Shift, and Control modifiers, we need special care.  */
 185       if (c & CHAR_META)
 186         {
 187           /* Move the meta bit to the right place for a string.  */
 188           c = (c & ~CHAR_META) | 0x80;
 189         }
 190       if (c & CHAR_SHIFT)
 191         {
 192           /* Shift modifier is valid only with [A-Za-z].  */
 193           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
 194             c &= ~CHAR_SHIFT;
 195           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
 196             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 197         }
 198       if (c & CHAR_CTL)
 199         {
 200           /* Simulate the code in lread.c.  */
 201           /* Allow `\C- ' and `\C-?'.  */
 202           if (c == (CHAR_CTL | ' '))
 203             c = 0;
 204           else if (c == (CHAR_CTL | '?'))
 205             c = 127;
 206           /* ASCII control chars are made from letters (both cases),
 207              as well as the non-letters within 0100...0137.  */
 208           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
 209             c &= (037 | (~0177 & ~CHAR_CTL));
 210           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 211             c &= (037 | (~0177 & ~CHAR_CTL));
 212         }
 213
 214       /* If C still has any modifier bits, just ignore it.  */
 215       c &= ~CHAR_MODIFIER_MASK;
 216     }
 217
 218   if (SINGLE_BYTE_CHAR_P (c))
 219     {
 220       if (ASCII_BYTE_P (c) || c >= 0xA0)
 221         *p++ = c;
 222       else
 223         {
 224           *p++ = LEADING_CODE_8_BIT_CONTROL;
 225           *p++ = c + 0x20;
 226         }
 227     }
 228   else if (CHAR_VALID_P (c, 0))
 229     {
 230       int charset, c1, c2;
 231
 232       SPLIT_CHAR (c, charset, c1, c2);
 233
 234       if (charset >= LEADING_CODE_EXT_11)
 235         *p++ = (charset < LEADING_CODE_EXT_12
 236                 ? LEADING_CODE_PRIVATE_11
 237                 : (charset < LEADING_CODE_EXT_21
 238                    ? LEADING_CODE_PRIVATE_12
 239                    : (charset < LEADING_CODE_EXT_22
 240                       ? LEADING_CODE_PRIVATE_21
 241                       : LEADING_CODE_PRIVATE_22)));
 242       *p++ = charset;
 243       if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
 244         return -1;
 245       if (c1)
 246         {
 247           *p++ = c1 | 0x80;
 248           if (c2 > 0)
 249             *p++ = c2 | 0x80;
 250         }
 251     }
 252   else
 253     return -1;
 254
 255   return (p - str);
 256 }
 257
 258
 259 /* Store multi-byte form of the character C in STR.  The caller should
 260    allocate at least 4-byte area at STR in advance.  Returns the
 261    length of the multi-byte form.  If C is an invalid character code,
 262    signal an error.
 263
 264    Use macro `CHAR_STRING (C, STR)' instead of calling this function
 265    directly if C can be an ASCII character.  */
 266
 267 int
 268 char_to_string (c, str)
 269      int c;
 270      unsigned char *str;
 271 {
 272   int len;
 273   len = char_to_string_1 (c, str);
 274   if (len == -1)
 275     invalid_character (c);
 276   return len;
 277 }
 278
 279
 280 /* Return the non-ASCII character corresponding to multi-byte form at
 281    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
 282    length of the multibyte form in *ACTUAL_LEN.
 283
 284    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
 285    this function directly if you want ot handle ASCII characters as
 286    well.  */
 287
 288 int
 289 string_to_char (str, len, actual_len)
 290      const unsigned char *str;
 291      int len, *actual_len;
 292 {
 293   int c, bytes, charset, c1, c2;
 294
 295   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
 296   c = MAKE_CHAR (charset, c1, c2);
 297   if (actual_len)
 298     *actual_len = bytes;
 299   return c;
 300 }
 301
 302 /* Return the length of the multi-byte form at string STR of length LEN.
 303    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
 304 int
 305 multibyte_form_length (str, len)
 306      const unsigned char *str;
 307      int len;
 308 {
 309   int bytes;
 310
 311   PARSE_MULTIBYTE_SEQ (str, len, bytes);
 312   return bytes;
 313 }
 314
 315 /* Check multibyte form at string STR of length LEN and set variables
 316    pointed by CHARSET, C1, and C2 to charset and position codes of the
 317    character at STR, and return 0.  If there's no multibyte character,
 318    return -1.  This should be used only in the macro SPLIT_STRING
 319    which checks range of STR in advance.  */
 320
 321 int
 322 split_string (str, len, charset, c1, c2)
 323      const unsigned char *str;
 324      unsigned char *c1, *c2;
 325      int len, *charset;
 326 {
 327   register int bytes, cs, code1, code2 = -1;
 328
 329   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
 330   if (cs == CHARSET_ASCII)
 331     return -1;
 332   *charset = cs;
 333   *c1 = code1;
 334   *c2 = code2;
 335   return 0;
 336 }
 337
 338 /* Return 1 iff character C has valid printable glyph.
 339    Use the macro CHAR_PRINTABLE_P instead.  */
 340 int
 341 char_printable_p (c)
 342      int c;
 343 {
 344   int charset, c1, c2;
 345
 346   if (ASCII_BYTE_P (c))
 347     return 1;
 348   else if (SINGLE_BYTE_CHAR_P (c))
 349     return 0;
 350   else if (c >= MAX_CHAR)
 351     return 0;
 352
 353   SPLIT_CHAR (c, charset, c1, c2);
 354   if (! CHARSET_DEFINED_P (charset))
 355     return 0;
 356   if (CHARSET_CHARS (charset) == 94
 357       ? c1 <= 32 || c1 >= 127
 358       : c1 < 32)
 359     return 0;
 360   if (CHARSET_DIMENSION (charset) == 2
 361       && (CHARSET_CHARS (charset) == 94
 362           ? c2 <= 32 || c2 >= 127
 363           : c2 < 32))
 364     return 0;
 365   return 1;
 366 }
 367
 368 /* Translate character C by translation table TABLE.  If C
 369    is negative, translate a character specified by CHARSET, C1, and C2
 370    (C1 and C2 are code points of the character).  If no translation is
 371    found in TABLE, return C.  */
 372 int
 373 translate_char (table, c, charset, c1, c2)
 374      Lisp_Object table;
 375      int c, charset, c1, c2;
 376 {
 377   Lisp_Object ch;
 378   int alt_charset, alt_c1, alt_c2, dimension;
 379
 380   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
 381   if (!CHAR_TABLE_P (table)
 382       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
 383     return c;
 384
 385   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 386   dimension = CHARSET_DIMENSION (alt_charset);
 387   if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
 388     /* CH is not a generic character, just return it.  */
 389     return XFASTINT (ch);
 390
 391   /* Since CH is a generic character, we must return a specific
 392      charater which has the same position codes as C from CH.  */
 393   if (charset < 0)
 394     SPLIT_CHAR (c, charset, c1, c2);
 395   if (dimension != CHARSET_DIMENSION (charset))
 396     /* We can't make such a character because of dimension mismatch.  */
 397     return c;
 398   return MAKE_CHAR (alt_charset, c1, c2);
 399 }
 400
 401 /* Convert the unibyte character C to multibyte based on
 402    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
 403    convert C to a valid multibyte character, convert it based on
 404    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 405
 406 int
 407 unibyte_char_to_multibyte (c)
 408      int c;
 409 {
 410   if (c < 0400 && c >= 0200)
 411     {
 412       int c_save = c;
 413
 414       if (! NILP (Vnonascii_translation_table))
 415         {
 416           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
 417           if (c >= 0400 && ! char_valid_p (c, 0))
 418             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 419         }
 420       else if (c >= 0240 && nonascii_insert_offset > 0)
 421         {
 422           c += nonascii_insert_offset;
 423           if (c < 0400 || ! char_valid_p (c, 0))
 424             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 425         }
 426       else if (c >= 0240)
 427         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 428     }
 429   return c;
 430 }
 431
 432
 433 /* Convert the multibyte character C to unibyte 8-bit character based
 434    on Vnonascii_translation_table or nonascii_insert_offset.  If
 435    REV_TBL is non-nil, it should be a reverse table of
 436    Vnonascii_translation_table, i.e. what given by:
 437      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
 438
 439 int
 440 multibyte_char_to_unibyte (c, rev_tbl)
 441      int c;
 442      Lisp_Object rev_tbl;
 443 {
 444   if (!SINGLE_BYTE_CHAR_P (c))
 445     {
 446       int c_save = c;
 447
 448       if (! CHAR_TABLE_P (rev_tbl)
 449           && CHAR_TABLE_P (Vnonascii_translation_table))
 450         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
 451                                           make_number (0));
 452       if (CHAR_TABLE_P (rev_tbl))
 453         {
 454           Lisp_Object temp;
 455           temp = Faref (rev_tbl, make_number (c));
 456           if (INTEGERP (temp))
 457             c = XINT (temp);
 458           if (c >= 256)
 459             c = (c_save & 0177) + 0200;
 460         }
 461       else
 462         {
 463           if (nonascii_insert_offset > 0)
 464             c -= nonascii_insert_offset;
 465           if (c < 128 || c >= 256)
 466             c = (c_save & 0177) + 0200;
 467         }
 468     }
 469
 470   return c;
 471 }
 472
 473 \f
 474 /* Update the table Vcharset_table with the given arguments (see the
 475    document of `define-charset' for the meaning of each argument).
 476    Several other table contents are also updated.  The caller should
 477    check the validity of CHARSET-ID and the remaining arguments in
 478    advance.  */
 479
 480 void
 481 update_charset_table (charset_id, dimension, chars, width, direction,
 482                       iso_final_char, iso_graphic_plane,
 483                       short_name, long_name, description)
 484      Lisp_Object charset_id, dimension, chars, width, direction;
 485      Lisp_Object iso_final_char, iso_graphic_plane;
 486      Lisp_Object short_name, long_name, description;
 487 {
 488   int charset = XINT (charset_id);
 489   int bytes;
 490   unsigned char leading_code_base, leading_code_ext;
 491
 492   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 493     CHARSET_TABLE_ENTRY (charset)
 494       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 495
 496   if (NILP (long_name))
 497     long_name = short_name;
 498   if (NILP (description))
 499     description = long_name;
 500
 501   /* Get byte length of multibyte form, base leading-code, and
 502      extended leading-code of the charset.  See the comment under the
 503      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 504   bytes = XINT (dimension);
 505   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 506     {
 507       /* Official charset, it doesn't have an extended leading-code.  */
 508       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
 509         bytes += 1; /* For a base leading-code.  */
 510       leading_code_base = charset;
 511       leading_code_ext = 0;
 512     }
 513   else
 514     {
 515       /* Private charset.  */
 516       bytes += 2; /* For base and extended leading-codes.  */
 517       leading_code_base
 518         = (charset < LEADING_CODE_EXT_12
 519            ? LEADING_CODE_PRIVATE_11
 520            : (charset < LEADING_CODE_EXT_21
 521               ? LEADING_CODE_PRIVATE_12
 522               : (charset < LEADING_CODE_EXT_22
 523                  ? LEADING_CODE_PRIVATE_21
 524                  : LEADING_CODE_PRIVATE_22)));
 525       leading_code_ext = charset;
 526       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
 527         error ("Invalid dimension for the charset-ID %d", charset);
 528     }
 529
 530   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 531   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 532   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 533   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 534   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 535   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 536   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 537     = make_number (leading_code_base);
 538   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 539     = make_number (leading_code_ext);
 540   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 541   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 542     = iso_graphic_plane;
 543   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 544   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 545   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 546   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 547
 548   {
 549     /* If we have already defined a charset which has the same
 550        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 551        DIRECTION, we must update the entry REVERSE-CHARSET of both
 552        charsets.  If there's no such charset, the value of the entry
 553        is set to nil.  */
 554     int i;
 555
 556     for (i = 0; i <= MAX_CHARSET; i++)
 557       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 558         {
 559           if (CHARSET_DIMENSION (i) == XINT (dimension)
 560               && CHARSET_CHARS (i) == XINT (chars)
 561               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 562               && CHARSET_DIRECTION (i) != XINT (direction))
 563             {
 564               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 565                 = make_number (i);
 566               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 567               break;
 568             }
 569         }
 570     if (i > MAX_CHARSET)
 571       /* No such a charset.  */
 572       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 573         = make_number (-1);
 574   }
 575
 576   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
 577       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 578     {
 579       bytes_by_char_head[leading_code_base] = bytes;
 580       width_by_char_head[leading_code_base] = XINT (width);
 581
 582       /* Update table emacs_code_class.  */
 583       emacs_code_class[charset] = (bytes == 2
 584                                    ? EMACS_leading_code_2
 585                                    : (bytes == 3
 586                                       ? EMACS_leading_code_3
 587                                       : EMACS_leading_code_4));
 588     }
 589
 590   /* Update table iso_charset_table.  */
 591   if (XINT (iso_final_char) >= 0
 592       && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 593     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 594 }
 595
 596 #ifdef emacs
 597
 598 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 599    is invalid.  */
 600 int
 601 get_charset_id (charset_symbol)
 602      Lisp_Object charset_symbol;
 603 {
 604   Lisp_Object val;
 605   int charset;
 606
 607   /* This originally used a ?: operator, but reportedly the HP-UX
 608      compiler version HP92453-01 A.10.32.22 miscompiles that.  */
 609   if (SYMBOLP (charset_symbol)
 610       && VECTORP (val = Fget (charset_symbol, Qcharset))
 611       && CHARSET_VALID_P (charset =
 612                           XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
 613     return charset;
 614   else
 615     return -1;
 616 }
 617
 618 /* Return an identification number for a new private charset of
 619    DIMENSION and WIDTH.  If there's no more room for the new charset,
 620    return 0.  */
 621 Lisp_Object
 622 get_new_private_charset_id (dimension, width)
 623      int dimension, width;
 624 {
 625   int charset, from, to;
 626
 627   if (dimension == 1)
 628     {
 629       from = LEADING_CODE_EXT_11;
 630       to = LEADING_CODE_EXT_21;
 631     }
 632   else
 633     {
 634       from = LEADING_CODE_EXT_21;
 635       to = LEADING_CODE_EXT_MAX + 1;
 636     }
 637
 638   for (charset = from; charset < to; charset++)
 639     if (!CHARSET_DEFINED_P (charset)) break;
 640
 641   return make_number (charset < to ? charset : 0);
 642 }
 643
 644 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 645        doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
 646 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
 647  treated as a private charset.
 648 INFO-VECTOR is a vector of the format:
 649    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
 650     SHORT-NAME LONG-NAME DESCRIPTION]
 651 The meanings of each elements is as follows:
 652 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
 653 CHARS (integer) is the number of characters in a dimension: 94 or 96.
 654 WIDTH (integer) is the number of columns a character in the charset
 655 occupies on the screen: one of 0, 1, and 2.
 656
 657 DIRECTION (integer) is the rendering direction of characters in the
 658 charset when rendering.  If 0, render from left to right, else
 659 render from right to left.
 660
 661 ISO-FINAL-CHAR (character) is the final character of the
 662 corresponding ISO 2022 charset.
 663 It may be -1 if the charset is internal use only.
 664
 665 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
 666 while encoding to variants of ISO 2022 coding system, one of the
 667 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
 668 It may be -1 if the charset is internal use only.
 669
 670 SHORT-NAME (string) is the short name to refer to the charset.
 671
 672 LONG-NAME (string) is the long name to refer to the charset.
 673
 674 DESCRIPTION (string) is the description string of the charset.  */)
 675        (charset_id, charset_symbol, info_vector)
 676      Lisp_Object charset_id, charset_symbol, info_vector;
 677 {
 678   Lisp_Object *vec;
 679
 680   if (!NILP (charset_id))
 681     CHECK_NUMBER (charset_id);
 682   CHECK_SYMBOL (charset_symbol);
 683   CHECK_VECTOR (info_vector);
 684
 685   if (! NILP (charset_id))
 686     {
 687       if (! CHARSET_VALID_P (XINT (charset_id)))
 688         error ("Invalid CHARSET: %d", XINT (charset_id));
 689       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 690         error ("Already defined charset: %d", XINT (charset_id));
 691     }
 692
 693   vec = XVECTOR (info_vector)->contents;
 694   if (XVECTOR (info_vector)->size != 9
 695       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 696       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 697       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 698       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 699       || !INTEGERP (vec[4])
 700       || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
 701       || !INTEGERP (vec[5])
 702       || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 703       || !STRINGP (vec[6])
 704       || !STRINGP (vec[7])
 705       || !STRINGP (vec[8]))
 706     error ("Invalid info-vector argument for defining charset %s",
 707            SDATA (SYMBOL_NAME (charset_symbol)));
 708
 709   if (NILP (charset_id))
 710     {
 711       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 712       if (XINT (charset_id) == 0)
 713         error ("There's no room for a new private charset %s",
 714                SDATA (SYMBOL_NAME (charset_symbol)));
 715     }
 716
 717   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 718                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 719   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 720   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 721   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 722   Fupdate_coding_systems_internal ();
 723   return Qnil;
 724 }
 725
 726 DEFUN ("generic-character-list", Fgeneric_character_list,
 727        Sgeneric_character_list, 0, 0, 0,
 728        doc: /* Return a list of all possible generic characters.
 729 It includes a generic character for a charset not yet defined.  */)
 730      ()
 731 {
 732   return Vgeneric_character_list;
 733 }
 734
 735 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 736        Sget_unused_iso_final_char, 2, 2, 0,
 737        doc: /* Return an unused ISO's final char for a charset of DIMENSION and CHARS.
 738 DIMENSION is the number of bytes to represent a character: 1 or 2.
 739 CHARS is the number of characters in a dimension: 94 or 96.
 740
 741 This final char is for private use, thus the range is `0' (48) .. `?' (63).
 742 If there's no unused final char for the specified kind of charset,
 743 return nil.  */)
 744      (dimension, chars)
 745      Lisp_Object dimension, chars;
 746 {
 747   int final_char;
 748
 749   CHECK_NUMBER (dimension);
 750   CHECK_NUMBER (chars);
 751   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 752     error ("Invalid charset dimension %d, it should be 1 or 2",
 753            XINT (dimension));
 754   if (XINT (chars) != 94 && XINT (chars) != 96)
 755     error ("Invalid charset chars %d, it should be 94 or 96",
 756            XINT (chars));
 757   for (final_char = '0'; final_char <= '?'; final_char++)
 758     {
 759       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 760         break;
 761     }
 762   return (final_char <= '?' ? make_number (final_char) : Qnil);
 763 }
 764
 765 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 766        4, 4, 0,
 767        doc: /* Declare an equivalent charset for ISO-2022 decoding.
 768
 769 On decoding by an ISO-2022 base coding system, when a charset
 770 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
 771 if CHARSET is designated instead.  */)
 772      (dimension, chars, final_char, charset)
 773      Lisp_Object dimension, chars, final_char, charset;
 774 {
 775   int charset_id;
 776
 777   CHECK_NUMBER (dimension);
 778   CHECK_NUMBER (chars);
 779   CHECK_NUMBER (final_char);
 780   CHECK_SYMBOL (charset);
 781
 782   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 783     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 784   if (XINT (chars) != 94 && XINT (chars) != 96)
 785     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 786   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 787     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 788   if ((charset_id = get_charset_id (charset)) < 0)
 789     error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset)));
 790
 791   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset_id;
 792   return Qnil;
 793 }
 794
 795 /* Return information about charsets in the text at PTR of NBYTES
 796    bytes, which are NCHARS characters.  The value is:
 797
 798         0: Each character is represented by one byte.  This is always
 799            true for unibyte text.
 800         1: No charsets other than ascii eight-bit-control,
 801            eight-bit-graphic, and latin-1 are found.
 802         2: Otherwise.
 803
 804    In addition, if CHARSETS is nonzero, for each found charset N, set
 805    CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
 806    (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
 807    table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
 808    1 (note that there's no charset whose ID is 1).  */
 809
 810 int
 811 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
 812      const unsigned char *ptr;
 813      int nchars, nbytes, *charsets;
 814      Lisp_Object table;
 815 {
 816   if (nchars == nbytes)
 817     {
 818       if (charsets && nbytes > 0)
 819         {
 820           const unsigned char *endp = ptr + nbytes;
 821           int maskbits = 0;
 822
 823           while (ptr < endp && maskbits != 7)
 824             {
 825               maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
 826               ptr++;
 827             }
 828
 829           if (maskbits & 1)
 830             charsets[CHARSET_ASCII] = 1;
 831           if (maskbits & 2)
 832             charsets[CHARSET_8_BIT_CONTROL] = 1;
 833           if (maskbits & 4)
 834             charsets[CHARSET_8_BIT_GRAPHIC] = 1;
 835         }
 836       return 0;
 837     }
 838   else
 839     {
 840       int return_val = 1;
 841       int bytes, charset, c1, c2;
 842
 843       if (! CHAR_TABLE_P (table))
 844         table = Qnil;
 845
 846       while (nchars-- > 0)
 847         {
 848           SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
 849           ptr += bytes;
 850
 851           if (!CHARSET_DEFINED_P (charset))
 852             charset = 1;
 853           else if (! NILP (table))
 854             {
 855               int c = translate_char (table, -1, charset, c1, c2);
 856               if (c >= 0)
 857                 charset = CHAR_CHARSET (c);
 858             }
 859
 860           if (return_val == 1
 861               && charset != CHARSET_ASCII
 862               && charset != CHARSET_8_BIT_CONTROL
 863               && charset != CHARSET_8_BIT_GRAPHIC
 864               && charset != charset_latin_iso8859_1)
 865             return_val = 2;
 866
 867           if (charsets)
 868             charsets[charset] = 1;
 869           else if (return_val == 2)
 870             break;
 871         }
 872       return return_val;
 873     }
 874 }
 875
 876 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 877        2, 3, 0,
 878        doc: /* Return a list of charsets in the region between BEG and END.
 879 BEG and END are buffer positions.
 880 Optional arg TABLE if non-nil is a translation table to look up.
 881
 882 If the region contains invalid multibyte characters,
 883 `unknown' is included in the returned list.
 884
 885 If the current buffer is unibyte, the returned list may contain
 886 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 887      (beg, end, table)
 888      Lisp_Object beg, end, table;
 889 {
 890   int charsets[MAX_CHARSET + 1];
 891   int from, from_byte, to, stop, stop_byte, i;
 892   Lisp_Object val;
 893
 894   validate_region (&beg, &end);
 895   from = XFASTINT (beg);
 896   stop = to = XFASTINT (end);
 897
 898   if (from < GPT && GPT < to)
 899     {
 900       stop = GPT;
 901       stop_byte = GPT_BYTE;
 902     }
 903   else
 904     stop_byte = CHAR_TO_BYTE (stop);
 905
 906   from_byte = CHAR_TO_BYTE (from);
 907
 908   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 909   while (1)
 910     {
 911       find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
 912                             stop_byte - from_byte, charsets, table);
 913       if (stop < to)
 914         {
 915           from = stop, from_byte = stop_byte;
 916           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 917         }
 918       else
 919         break;
 920     }
 921
 922   val = Qnil;
 923   if (charsets[1])
 924     val = Fcons (Qunknown, val);
 925   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 926     if (charsets[i])
 927       val = Fcons (CHARSET_SYMBOL (i), val);
 928   if (charsets[0])
 929     val = Fcons (Qascii, val);
 930   return val;
 931 }
 932
 933 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 934        1, 2, 0,
 935        doc: /* Return a list of charsets in STR.
 936 Optional arg TABLE if non-nil is a translation table to look up.
 937
 938 If the string contains invalid multibyte characters,
 939 `unknown' is included in the returned list.
 940
 941 If STR is unibyte, the returned list may contain
 942 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 943      (str, table)
 944      Lisp_Object str, table;
 945 {
 946   int charsets[MAX_CHARSET + 1];
 947   int i;
 948   Lisp_Object val;
 949
 950   CHECK_STRING (str);
 951
 952   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 953   find_charset_in_text (SDATA (str), SCHARS (str),
 954                         SBYTES (str), charsets, table);
 955
 956   val = Qnil;
 957   if (charsets[1])
 958     val = Fcons (Qunknown, val);
 959   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 960     if (charsets[i])
 961       val = Fcons (CHARSET_SYMBOL (i), val);
 962   if (charsets[0])
 963     val = Fcons (Qascii, val);
 964   return val;
 965 }
 966
 967 \f
 968 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 969        doc: /* Return a character made from arguments.
 970 Internal use only.  */)
 971      (charset, code1, code2)
 972      Lisp_Object charset, code1, code2;
 973 {
 974   int charset_id, c1, c2;
 975
 976   CHECK_NUMBER (charset);
 977   charset_id = XINT (charset);
 978   if (!CHARSET_DEFINED_P (charset_id))
 979     error ("Invalid charset ID: %d", XINT (charset));
 980
 981   if (NILP (code1))
 982     c1 = 0;
 983   else
 984     {
 985       CHECK_NUMBER (code1);
 986       c1 = XINT (code1);
 987     }
 988   if (NILP (code2))
 989     c2 = 0;
 990   else
 991     {
 992       CHECK_NUMBER (code2);
 993       c2 = XINT (code2);
 994     }
 995
 996   if (charset_id == CHARSET_ASCII)
 997     {
 998       if (c1 < 0 || c1 > 0x7F)
 999         goto invalid_code_posints;
1000       return make_number (c1);
1001     }
1002   else if (charset_id == CHARSET_8_BIT_CONTROL)
1003     {
1004       if (NILP (code1))
1005         c1 = 0x80;
1006       else if (c1 < 0x80 || c1 > 0x9F)
1007         goto invalid_code_posints;
1008       return make_number (c1);
1009     }
1010   else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1011     {
1012       if (NILP (code1))
1013         c1 = 0xA0;
1014       else if (c1 < 0xA0 || c1 > 0xFF)
1015         goto invalid_code_posints;
1016       return make_number (c1);
1017     }
1018   else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1019     goto invalid_code_posints;
1020   c1 &= 0x7F;
1021   c2 &= 0x7F;
1022   if (c1 == 0
1023       ? c2 != 0
1024       : (c2 == 0
1025          ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1026          : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1027     goto invalid_code_posints;
1028   return make_number (MAKE_CHAR (charset_id, c1, c2));
1029
1030  invalid_code_posints:
1031   error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1032 }
1033
1034 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1035        doc: /* Return list of charset and one or two position-codes of CH.
1036 If CH is invalid as a character code,
1037 return a list of symbol `unknown' and CH.  */)
1038      (ch)
1039      Lisp_Object ch;
1040 {
1041   int c, charset, c1, c2;
1042
1043   CHECK_NUMBER (ch);
1044   c = XFASTINT (ch);
1045   if (!CHAR_VALID_P (c, 1))
1046     return Fcons (Qunknown, Fcons (ch, Qnil));
1047   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1048   return (c2 >= 0
1049           ? Fcons (CHARSET_SYMBOL (charset),
1050                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1051           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1052 }
1053
1054 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1055        doc: /* Return charset of CH.  */)
1056      (ch)
1057      Lisp_Object ch;
1058 {
1059   CHECK_NUMBER (ch);
1060
1061   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1062 }
1063
1064 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1065        doc: /* Return charset of a character in the current buffer at position POS.
1066 If POS is nil, it defauls to the current point.
1067 If POS is out of range, the value is nil.  */)
1068      (pos)
1069      Lisp_Object pos;
1070 {
1071   Lisp_Object ch;
1072   int charset;
1073
1074   ch = Fchar_after (pos);
1075   if (! INTEGERP (ch))
1076     return ch;
1077   charset = CHAR_CHARSET (XINT (ch));
1078   return CHARSET_SYMBOL (charset);
1079 }
1080
1081 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1082        doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1083
1084 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1085 by their DIMENSION, CHARS, and FINAL-CHAR,
1086 where as Emacs distinguishes them by charset symbol.
1087 See the documentation of the function `charset-info' for the meanings of
1088 DIMENSION, CHARS, and FINAL-CHAR.  */)
1089      (dimension, chars, final_char)
1090      Lisp_Object dimension, chars, final_char;
1091 {
1092   int charset;
1093
1094   CHECK_NUMBER (dimension);
1095   CHECK_NUMBER (chars);
1096   CHECK_NUMBER (final_char);
1097
1098   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1099     return Qnil;
1100   return CHARSET_SYMBOL (charset);
1101 }
1102
1103 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1104    generic character.  If GENERICP is zero, return nonzero iff C is a
1105    valid normal character.  Do not call this function directly,
1106    instead use macro CHAR_VALID_P.  */
1107 int
1108 char_valid_p (c, genericp)
1109      int c, genericp;
1110 {
1111   int charset, c1, c2;
1112
1113   if (c < 0 || c >= MAX_CHAR)
1114     return 0;
1115   if (SINGLE_BYTE_CHAR_P (c))
1116     return 1;
1117   SPLIT_CHAR (c, charset, c1, c2);
1118   if (genericp)
1119     {
1120       if (c1)
1121         {
1122           if (c2 <= 0) c2 = 0x20;
1123         }
1124       else
1125         {
1126           if (c2 <= 0) c1 = c2 = 0x20;
1127         }
1128     }
1129   return (CHARSET_DEFINED_P (charset)
1130           && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1131 }
1132
1133 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1134        doc: /* Return t if OBJECT is a valid normal character.
1135 If optional arg GENERICP is non-nil, also return t if OBJECT is
1136 a valid generic character.  */)
1137      (object, genericp)
1138      Lisp_Object object, genericp;
1139 {
1140   if (! NATNUMP (object))
1141     return Qnil;
1142   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1143 }
1144
1145 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1146        Sunibyte_char_to_multibyte, 1, 1, 0,
1147        doc: /* Convert the unibyte character CH to multibyte character.
1148 The conversion is done based on `nonascii-translation-table' (which see)
1149  or `nonascii-insert-offset' (which see).  */)
1150      (ch)
1151      Lisp_Object ch;
1152 {
1153   int c;
1154
1155   CHECK_NUMBER (ch);
1156   c = XINT (ch);
1157   if (c < 0 || c >= 0400)
1158     error ("Invalid unibyte character: %d", c);
1159   c = unibyte_char_to_multibyte (c);
1160   if (c < 0)
1161     error ("Can't convert to multibyte character: %d", XINT (ch));
1162   return make_number (c);
1163 }
1164
1165 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1166        Smultibyte_char_to_unibyte, 1, 1, 0,
1167        doc: /* Convert the multibyte character CH to unibyte character.
1168 The conversion is done based on `nonascii-translation-table' (which see)
1169  or `nonascii-insert-offset' (which see).  */)
1170      (ch)
1171      Lisp_Object ch;
1172 {
1173   int c;
1174
1175   CHECK_NUMBER (ch);
1176   c = XINT (ch);
1177   if (! CHAR_VALID_P (c, 0))
1178     error ("Invalid multibyte character: %d", c);
1179   c = multibyte_char_to_unibyte (c, Qnil);
1180   if (c < 0)
1181     error ("Can't convert to unibyte character: %d", XINT (ch));
1182   return make_number (c);
1183 }
1184
1185 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1186        doc: /* Return 1 regardless of the argument CH.  */)
1187      (ch)
1188      Lisp_Object ch;
1189 {
1190   CHECK_NUMBER (ch);
1191   return make_number (1);
1192 }
1193
1194 /* Return how many bytes C will occupy in a multibyte buffer.
1195    Don't call this function directly, instead use macro CHAR_BYTES.  */
1196 int
1197 char_bytes (c)
1198      int c;
1199 {
1200   int charset;
1201
1202   if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1203     return 1;
1204   if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1205     return 1;
1206
1207   charset = CHAR_CHARSET (c);
1208   return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1209 }
1210
1211 /* Return the width of character of which multi-byte form starts with
1212    C.  The width is measured by how many columns occupied on the
1213    screen when displayed in the current buffer.  */
1214
1215 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
1216   (c < 0x20                                                             \
1217    ? (c == '\t'                                                         \
1218       ? XFASTINT (current_buffer->tab_width)                            \
1219       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
1220    : (c < 0x7f                                                          \
1221       ? 1                                                               \
1222       : (c == 0x7F                                                      \
1223          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
1224          : ((! NILP (current_buffer->enable_multibyte_characters)       \
1225              && BASE_LEADING_CODE_P (c))                                \
1226             ? WIDTH_BY_CHAR_HEAD (c)                                    \
1227             : 4))))
1228
1229 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1230        doc: /* Return width of CH when displayed in the current buffer.
1231 The width is measured by how many columns it occupies on the screen.
1232 Tab is taken to occupy `tab-width' columns.  */)
1233      (ch)
1234      Lisp_Object ch;
1235 {
1236   Lisp_Object val, disp;
1237   int c;
1238   struct Lisp_Char_Table *dp = buffer_display_table ();
1239
1240   CHECK_NUMBER (ch);
1241
1242   c = XINT (ch);
1243
1244   /* Get the way the display table would display it.  */
1245   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1246
1247   if (VECTORP (disp))
1248     XSETINT (val, XVECTOR (disp)->size);
1249   else if (SINGLE_BYTE_CHAR_P (c))
1250     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1251   else
1252     {
1253       int charset = CHAR_CHARSET (c);
1254
1255       XSETFASTINT (val, CHARSET_WIDTH (charset));
1256     }
1257   return val;
1258 }
1259
1260 /* Return width of string STR of length LEN when displayed in the
1261    current buffer.  The width is measured by how many columns it
1262    occupies on the screen.  */
1263
1264 int
1265 strwidth (str, len)
1266      unsigned char *str;
1267      int len;
1268 {
1269   return c_string_width (str, len, -1, NULL, NULL);
1270 }
1271
1272 /* Return width of string STR of length LEN when displayed in the
1273    current buffer.  The width is measured by how many columns it
1274    occupies on the screen.  If PRECISION > 0, return the width of
1275    longest substring that doesn't exceed PRECISION, and set number of
1276    characters and bytes of the substring in *NCHARS and *NBYTES
1277    respectively.  */
1278
1279 int
1280 c_string_width (str, len, precision, nchars, nbytes)
1281      const unsigned char *str;
1282      int len, precision, *nchars, *nbytes;
1283 {
1284   int i = 0, i_byte = 0;
1285   int width = 0;
1286   int chars;
1287   struct Lisp_Char_Table *dp = buffer_display_table ();
1288
1289   while (i_byte < len)
1290     {
1291       int bytes, thiswidth;
1292       Lisp_Object val;
1293
1294       if (dp)
1295         {
1296           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1297
1298           chars = 1;
1299           val = DISP_CHAR_VECTOR (dp, c);
1300           if (VECTORP (val))
1301             thiswidth = XVECTOR (val)->size;
1302           else
1303             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1304         }
1305       else
1306         {
1307           chars = 1;
1308           PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1309           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1310         }
1311
1312       if (precision > 0
1313           && (width + thiswidth > precision))
1314         {
1315           *nchars = i;
1316           *nbytes = i_byte;
1317           return width;
1318         }
1319       i++;
1320       i_byte += bytes;
1321       width += thiswidth;
1322   }
1323
1324   if (precision > 0)
1325     {
1326       *nchars = i;
1327       *nbytes = i_byte;
1328     }
1329
1330   return width;
1331 }
1332
1333 /* Return width of Lisp string STRING when displayed in the current
1334    buffer.  The width is measured by how many columns it occupies on
1335    the screen while paying attention to compositions.  If PRECISION >
1336    0, return the width of longest substring that doesn't exceed
1337    PRECISION, and set number of characters and bytes of the substring
1338    in *NCHARS and *NBYTES respectively.  */
1339
1340 int
1341 lisp_string_width (string, precision, nchars, nbytes)
1342      Lisp_Object string;
1343      int precision, *nchars, *nbytes;
1344 {
1345   int len = SCHARS (string);
1346   int len_byte = SBYTES (string);
1347   /* This set multibyte to 0 even if STRING is multibyte when it
1348      contains only ascii and eight-bit-graphic, but that's
1349      intentional.  */
1350   int multibyte = len < len_byte;
1351   const unsigned char *str = SDATA (string);
1352   int i = 0, i_byte = 0;
1353   int width = 0;
1354   struct Lisp_Char_Table *dp = buffer_display_table ();
1355
1356   while (i < len)
1357     {
1358       int chars, bytes, thiswidth;
1359       Lisp_Object val;
1360       int cmp_id;
1361       int ignore, end;
1362
1363       if (find_composition (i, -1, &ignore, &end, &val, string)
1364           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1365               >= 0))
1366         {
1367           thiswidth = composition_table[cmp_id]->width;
1368           chars = end - i;
1369           bytes = string_char_to_byte (string, end) - i_byte;
1370         }
1371       else if (dp)
1372         {
1373           int c;
1374
1375           if (multibyte)
1376             c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1377           else
1378             c = str[i_byte], bytes = 1;
1379           chars = 1;
1380           val = DISP_CHAR_VECTOR (dp, c);
1381           if (VECTORP (val))
1382             thiswidth = XVECTOR (val)->size;
1383           else
1384             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1385         }
1386       else
1387         {
1388           chars = 1;
1389           if (multibyte)
1390             PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1391           else
1392             bytes = 1;
1393           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1394         }
1395
1396       if (precision > 0
1397           && (width + thiswidth > precision))
1398         {
1399           *nchars = i;
1400           *nbytes = i_byte;
1401           return width;
1402         }
1403       i += chars;
1404       i_byte += bytes;
1405       width += thiswidth;
1406   }
1407
1408   if (precision > 0)
1409     {
1410       *nchars = i;
1411       *nbytes = i_byte;
1412     }
1413
1414   return width;
1415 }
1416
1417 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1418        doc: /* Return width of STRING when displayed in the current buffer.
1419 Width is measured by how many columns it occupies on the screen.
1420 When calculating width of a multibyte character in STRING,
1421 only the base leading-code is considered; the validity of
1422 the following bytes is not checked.  Tabs in STRING are always
1423 taken to occupy `tab-width' columns.  */)
1424      (string)
1425      Lisp_Object string;
1426 {
1427   Lisp_Object val;
1428
1429   CHECK_STRING (string);
1430   XSETFASTINT (val, lisp_string_width (string, -1, NULL, NULL));
1431   return val;
1432 }
1433
1434 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1435        doc: /* Return the direction of CH.
1436 The returned value is 0 for left-to-right and 1 for right-to-left.  */)
1437      (ch)
1438      Lisp_Object ch;
1439 {
1440   int charset;
1441
1442   CHECK_NUMBER (ch);
1443   charset = CHAR_CHARSET (XFASTINT (ch));
1444   if (!CHARSET_DEFINED_P (charset))
1445     invalid_character (XINT (ch));
1446   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1447 }
1448
1449 /* Return the number of characters in the NBYTES bytes at PTR.
1450    This works by looking at the contents and checking for multibyte sequences.
1451    However, if the current buffer has enable-multibyte-characters = nil,
1452    we treat each byte as a character.  */
1453
1454 int
1455 chars_in_text (ptr, nbytes)
1456      const unsigned char *ptr;
1457      int nbytes;
1458 {
1459   /* current_buffer is null at early stages of Emacs initialization.  */
1460   if (current_buffer == 0
1461       || NILP (current_buffer->enable_multibyte_characters))
1462     return nbytes;
1463
1464   return multibyte_chars_in_text (ptr, nbytes);
1465 }
1466
1467 /* Return the number of characters in the NBYTES bytes at PTR.
1468    This works by looking at the contents and checking for multibyte sequences.
1469    It ignores enable-multibyte-characters.  */
1470
1471 int
1472 multibyte_chars_in_text (ptr, nbytes)
1473      const unsigned char *ptr;
1474      int nbytes;
1475 {
1476   const unsigned char *endp;
1477   int chars, bytes;
1478
1479   endp = ptr + nbytes;
1480   chars = 0;
1481
1482   while (ptr < endp)
1483     {
1484       PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1485       ptr += bytes;
1486       chars++;
1487     }
1488
1489   return chars;
1490 }
1491
1492 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1493    count the numbers of characters and bytes in it.  On counting
1494    bytes, pay attention to the fact that 8-bit characters in the range
1495    0x80..0x9F are represented by 2 bytes in multibyte text.  */
1496 void
1497 parse_str_as_multibyte (str, len, nchars, nbytes)
1498      const unsigned char *str;
1499      int len, *nchars, *nbytes;
1500 {
1501   const unsigned char *endp = str + len;
1502   int n, chars = 0, bytes = 0;
1503
1504   while (str < endp)
1505     {
1506       if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1507         str += n, bytes += n;
1508       else
1509         str++, bytes += 2;
1510       chars++;
1511     }
1512   *nchars = chars;
1513   *nbytes = bytes;
1514   return;
1515 }
1516
1517 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1518    It actually converts only 8-bit characters in the range 0x80..0x9F
1519    that don't contruct multibyte characters to multibyte forms.  If
1520    NCHARS is nonzero, set *NCHARS to the number of characters in the
1521    text.  It is assured that we can use LEN bytes at STR as a work
1522    area and that is enough.  Return the number of bytes of the
1523    resulting text.  */
1524
1525 int
1526 str_as_multibyte (str, len, nbytes, nchars)
1527      unsigned char *str;
1528      int len, nbytes, *nchars;
1529 {
1530   unsigned char *p = str, *endp = str + nbytes;
1531   unsigned char *to;
1532   int chars = 0;
1533   int n;
1534
1535   while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1536     p += n, chars++;
1537   if (nchars)
1538     *nchars = chars;
1539   if (p == endp)
1540     return nbytes;
1541
1542   to = p;
1543   nbytes = endp - p;
1544   endp = str + len;
1545   safe_bcopy (p, endp - nbytes, nbytes);
1546   p = endp - nbytes;
1547   while (p < endp)
1548     {
1549       if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1550         {
1551           while (n--)
1552             *to++ = *p++;
1553         }
1554       else
1555         {
1556           *to++ = LEADING_CODE_8_BIT_CONTROL;
1557           *to++ = *p++ + 0x20;
1558         }
1559       chars++;
1560     }
1561   if (nchars)
1562     *nchars = chars;
1563   return (to - str);
1564 }
1565
1566 /* Parse unibyte string at STR of LEN bytes, and return the number of
1567    bytes it may ocupy when converted to multibyte string by
1568    `str_to_multibyte'.  */
1569
1570 int
1571 parse_str_to_multibyte (str, len)
1572      unsigned char *str;
1573      int len;
1574 {
1575   unsigned char *endp = str + len;
1576   int bytes;
1577
1578   for (bytes = 0; str < endp; str++)
1579     bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1580   return bytes;
1581 }
1582
1583 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1584    that contains the same single-byte characters.  It actually
1585    converts all 8-bit characters to multibyte forms.  It is assured
1586    that we can use LEN bytes at STR as a work area and that is
1587    enough.  */
1588
1589 int
1590 str_to_multibyte (str, len, bytes)
1591      unsigned char *str;
1592      int len, bytes;
1593 {
1594   unsigned char *p = str, *endp = str + bytes;
1595   unsigned char *to;
1596
1597   while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1598   if (p == endp)
1599     return bytes;
1600   to = p;
1601   bytes = endp - p;
1602   endp = str + len;
1603   safe_bcopy (p, endp - bytes, bytes);
1604   p = endp - bytes;
1605   while (p < endp)
1606     {
1607       if (*p < 0x80 || *p >= 0xA0)
1608         *to++ = *p++;
1609       else
1610         *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1611     }
1612   return (to - str);
1613 }
1614
1615 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1616    actually converts only 8-bit characters in the range 0x80..0x9F to
1617    unibyte forms.  */
1618
1619 int
1620 str_as_unibyte (str, bytes)
1621      unsigned char *str;
1622      int bytes;
1623 {
1624   unsigned char *p = str, *endp = str + bytes;
1625   unsigned char *to = str;
1626
1627   while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1628   to = p;
1629   while (p < endp)
1630     {
1631       if (*p == LEADING_CODE_8_BIT_CONTROL)
1632         *to++ = *(p + 1) - 0x20, p += 2;
1633       else
1634         *to++ = *p++;
1635     }
1636   return (to - str);
1637 }
1638
1639 \f
1640 DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
1641   doc: /* Concatenate all the argument characters and make the result a string.
1642 usage: (string &rest CHARACTERS)  */)
1643      (n, args)
1644      int n;
1645      Lisp_Object *args;
1646 {
1647   int i, bufsize;
1648   unsigned char *buf, *p;
1649   int c;
1650   int multibyte = 0;
1651   Lisp_Object ret;
1652   USE_SAFE_ALLOCA;
1653
1654   bufsize = MAX_MULTIBYTE_LENGTH * n;
1655   SAFE_ALLOCA (buf, unsigned char *, bufsize);
1656   p = buf;
1657
1658   for (i = 0; i < n; i++)
1659     {
1660       CHECK_NUMBER (args[i]);
1661       if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1662         multibyte = 1;
1663     }
1664
1665   for (i = 0; i < n; i++)
1666     {
1667       c = XINT (args[i]);
1668       if (multibyte)
1669         p += CHAR_STRING (c, p);
1670       else
1671         *p++ = c;
1672     }
1673
1674   ret = make_string_from_bytes (buf, n, p - buf);
1675   SAFE_FREE ();
1676
1677   return ret;
1678 }
1679
1680 #endif /* emacs */
1681 \f
1682 int
1683 charset_id_internal (charset_name)
1684      char *charset_name;
1685 {
1686   Lisp_Object val;
1687
1688   val= Fget (intern (charset_name), Qcharset);
1689   if (!VECTORP (val))
1690     error ("Charset %s is not defined", charset_name);
1691
1692   return (XINT (XVECTOR (val)->contents[0]));
1693 }
1694
1695 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1696        Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only.  */)
1697      ()
1698 {
1699   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1700   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1701   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1702   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1703   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1704   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1705   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1706   charset_mule_unicode_0100_24ff
1707     = charset_id_internal ("mule-unicode-0100-24ff");
1708   charset_mule_unicode_2500_33ff
1709     = charset_id_internal ("mule-unicode-2500-33ff");
1710   charset_mule_unicode_e000_ffff
1711     = charset_id_internal ("mule-unicode-e000-ffff");
1712   return Qnil;
1713 }
1714
1715 void
1716 init_charset_once ()
1717 {
1718   int i, j, k;
1719
1720   staticpro (&Vcharset_table);
1721   staticpro (&Vcharset_symbol_table);
1722   staticpro (&Vgeneric_character_list);
1723
1724   /* This has to be done here, before we call Fmake_char_table.  */
1725   Qcharset_table = intern ("charset-table");
1726   staticpro (&Qcharset_table);
1727
1728   /* Intern this now in case it isn't already done.
1729      Setting this variable twice is harmless.
1730      But don't staticpro it here--that is done in alloc.c.  */
1731   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1732
1733   /* Now we are ready to set up this property, so we can
1734      create the charset table.  */
1735   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1736   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1737
1738   Qunknown = intern ("unknown");
1739   staticpro (&Qunknown);
1740   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1741                                         Qunknown);
1742
1743   /* Setup tables.  */
1744   for (i = 0; i < 2; i++)
1745     for (j = 0; j < 2; j++)
1746       for (k = 0; k < 128; k++)
1747         iso_charset_table [i][j][k] = -1;
1748
1749   for (i = 0; i < 256; i++)
1750     bytes_by_char_head[i] = 1;
1751   bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1752   bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1753   bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1754   bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1755
1756   for (i = 0; i < 128; i++)
1757     width_by_char_head[i] = 1;
1758   for (; i < 256; i++)
1759     width_by_char_head[i] = 4;
1760   width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1761   width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1762   width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1763   width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1764
1765   {
1766     Lisp_Object val;
1767
1768     val = Qnil;
1769     for (i = 0x81; i < 0x90; i++)
1770       val = Fcons (make_number ((i - 0x70) << 7), val);
1771     for (; i < 0x9A; i++)
1772       val = Fcons (make_number ((i - 0x8F) << 14), val);
1773     for (i = 0xA0; i < 0xF0; i++)
1774       val = Fcons (make_number ((i - 0x70) << 7), val);
1775     for (; i < 0xFF; i++)
1776       val = Fcons (make_number ((i - 0xE0) << 14), val);
1777     Vgeneric_character_list = Fnreverse (val);
1778   }
1779
1780   nonascii_insert_offset = 0;
1781   Vnonascii_translation_table = Qnil;
1782 }
1783
1784 #ifdef emacs
1785
1786 void
1787 syms_of_charset ()
1788 {
1789   Qcharset = intern ("charset");
1790   staticpro (&Qcharset);
1791
1792   Qascii = intern ("ascii");
1793   staticpro (&Qascii);
1794
1795   Qeight_bit_control = intern ("eight-bit-control");
1796   staticpro (&Qeight_bit_control);
1797
1798   Qeight_bit_graphic = intern ("eight-bit-graphic");
1799   staticpro (&Qeight_bit_graphic);
1800
1801   /* Define special charsets ascii, eight-bit-control, and
1802      eight-bit-graphic.  */
1803   update_charset_table (make_number (CHARSET_ASCII),
1804                         make_number (1), make_number (94),
1805                         make_number (1),
1806                         make_number (0),
1807                         make_number ('B'),
1808                         make_number (0),
1809                         build_string ("ASCII"),
1810                         Qnil,   /* same as above */
1811                         build_string ("ASCII (ISO646 IRV)"));
1812   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1813   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1814
1815   update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1816                         make_number (1), make_number (96),
1817                         make_number (4),
1818                         make_number (0),
1819                         make_number (-1),
1820                         make_number (-1),
1821                         build_string ("8-bit control code (0x80..0x9F)"),
1822                         Qnil,   /* same as above */
1823                         Qnil);  /* same as above */
1824   CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1825   Fput (Qeight_bit_control, Qcharset,
1826         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1827
1828   update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1829                         make_number (1), make_number (96),
1830                         make_number (4),
1831                         make_number (0),
1832                         make_number (-1),
1833                         make_number (-1),
1834                         build_string ("8-bit graphic char (0xA0..0xFF)"),
1835                         Qnil,   /* same as above */
1836                         Qnil);  /* same as above */
1837   CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1838   Fput (Qeight_bit_graphic, Qcharset,
1839         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1840
1841   Qauto_fill_chars = intern ("auto-fill-chars");
1842   staticpro (&Qauto_fill_chars);
1843   Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1844
1845   defsubr (&Sdefine_charset);
1846   defsubr (&Sgeneric_character_list);
1847   defsubr (&Sget_unused_iso_final_char);
1848   defsubr (&Sdeclare_equiv_charset);
1849   defsubr (&Sfind_charset_region);
1850   defsubr (&Sfind_charset_string);
1851   defsubr (&Smake_char_internal);
1852   defsubr (&Ssplit_char);
1853   defsubr (&Schar_charset);
1854   defsubr (&Scharset_after);
1855   defsubr (&Siso_charset);
1856   defsubr (&Schar_valid_p);
1857   defsubr (&Sunibyte_char_to_multibyte);
1858   defsubr (&Smultibyte_char_to_unibyte);
1859   defsubr (&Schar_bytes);
1860   defsubr (&Schar_width);
1861   defsubr (&Sstring_width);
1862   defsubr (&Schar_direction);
1863   defsubr (&Sstring);
1864   defsubr (&Ssetup_special_charsets);
1865
1866   DEFVAR_LISP ("charset-list", &Vcharset_list,
1867                doc: /* List of charsets ever defined.  */);
1868   Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1869                                         Fcons (Qeight_bit_graphic, Qnil)));
1870
1871   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1872                doc: /* Vector of cons cell of a symbol and translation table ever defined.
1873 An ID of a translation table is an index of this vector.  */);
1874   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1875
1876   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1877               doc: /* Leading-code of private TYPE9N charset of column-width 1.  */);
1878   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1879
1880   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1881               doc: /* Leading-code of private TYPE9N charset of column-width 2.  */);
1882   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1883
1884   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1885               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1.  */);
1886   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1887
1888   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1889               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2.  */);
1890   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1891
1892   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1893               doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1894 This is used for converting unibyte text to multibyte,
1895 and for inserting character codes specified by number.
1896
1897 This serves to convert a Latin-1 or similar 8-bit character code
1898 to the corresponding Emacs multibyte character code.
1899 Typically the value should be (- (make-char CHARSET 0) 128),
1900 for your choice of character set.
1901 If `nonascii-translation-table' is non-nil, it overrides this variable.  */);
1902   nonascii_insert_offset = 0;
1903
1904   DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1905                doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1906 This is used for converting unibyte text to multibyte,
1907 and for inserting character codes specified by number.
1908
1909 Conversion is performed only when multibyte characters are enabled,
1910 and it serves to convert a Latin-1 or similar 8-bit character code
1911 to the corresponding Emacs character code.
1912
1913 If this is nil, `nonascii-insert-offset' is used instead.
1914 See also the docstring of `make-translation-table'.  */);
1915   Vnonascii_translation_table = Qnil;
1916
1917   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1918                doc: /* A char-table for characters which invoke auto-filling.
1919 Such characters have value t in this table.  */);
1920   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1921   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1922   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1923 }
1924
1925 #endif /* emacs */
1926
1927 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
1928    (do not change this comment) */