src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001 Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /* At first, see the document in `charset.h' to understand the code in
  24    this file.  */
  25
  26 #ifdef emacs
  27 #include <config.h>
  28 #endif
  29
  30 #include <stdio.h>
  31
  32 #ifdef emacs
  33
  34 #include <sys/types.h>
  35 #include "lisp.h"
  36 #include "buffer.h"
  37 #include "charset.h"
  38 #include "composite.h"
  39 #include "coding.h"
  40 #include "disptab.h"
  41
  42 #else  /* not emacs */
  43
  44 #include "mulelib.h"
  45
  46 #endif /* emacs */
  47
  48 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
  49 Lisp_Object Qunknown;
  50
  51 /* Declaration of special leading-codes.  */
  52 EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
  53 EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
  54 EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
  55 EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
  56
  57 /* Declaration of special charsets.  The values are set by
  58    Fsetup_special_charsets.  */
  59 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  60 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  61 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  62 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  63 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  64 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  65 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  66
  67 Lisp_Object Qcharset_table;
  68
  69 /* A char-table containing information of each character set.  */
  70 Lisp_Object Vcharset_table;
  71
  72 /* A vector of charset symbol indexed by charset-id.  This is used
  73    only for returning charset symbol from C functions.  */
  74 Lisp_Object Vcharset_symbol_table;
  75
  76 /* A list of charset symbols ever defined.  */
  77 Lisp_Object Vcharset_list;
  78
  79 /* Vector of translation table ever defined.
  80    ID of a translation table is used to index this vector.  */
  81 Lisp_Object Vtranslation_table_vector;
  82
  83 /* A char-table for characters which may invoke auto-filling.  */
  84 Lisp_Object Vauto_fill_chars;
  85
  86 Lisp_Object Qauto_fill_chars;
  87
  88 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  89 int bytes_by_char_head[256];
  90 int width_by_char_head[256];
  91
  92 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  93    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  94 int iso_charset_table[2][2][128];
  95
  96 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
  97 unsigned char *_fetch_multibyte_char_p;
  98 int _fetch_multibyte_char_len;
  99
 100 /* Offset to add to a non-ASCII value when inserting it.  */
 101 EMACS_INT nonascii_insert_offset;
 102
 103 /* Translation table for converting non-ASCII unibyte characters
 104    to multibyte codes, or nil.  */
 105 Lisp_Object Vnonascii_translation_table;
 106
 107 /* List of all possible generic characters.  */
 108 Lisp_Object Vgeneric_character_list;
 109
 110 \f
 111 void
 112 invalid_character (c)
 113      int c;
 114 {
 115   error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
 116 }
 117
 118 /* Parse string STR of length LENGTH and fetch information of a
 119    character at STR.  Set BYTES to the byte length the character
 120    occupies, CHARSET, C1, C2 to proper values of the character. */
 121
 122 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
 123   do {                                                                       \
 124     (c1) = *(str);                                                           \
 125     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
 126     if ((bytes) == 1)                                                        \
 127       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
 128     else if ((bytes) == 2)                                                   \
 129       {                                                                      \
 130         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                              \
 131           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;         \
 132         else                                                                 \
 133           (charset) = (c1), (c1) = (str)[1] & 0x7F;                          \
 134       }                                                                      \
 135     else if ((bytes) == 3)                                                   \
 136       {                                                                      \
 137         if ((c1) < LEADING_CODE_PRIVATE_11)                                  \
 138           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
 139         else                                                                 \
 140           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                      \
 141       }                                                                      \
 142     else                                                                     \
 143       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
 144   } while (0)
 145
 146 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
 147    Note that this intentionally allows invalid components, such
 148    as 0xA0 0xA0, because there exist many files that contain
 149    such invalid byte sequences, especially in EUC-GB. */
 150 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
 151   ((charset) == CHARSET_ASCII                           \
 152    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
 153    : ((charset) == CHARSET_8_BIT_CONTROL                \
 154       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
 155       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
 156          ? ((c1) >= 0x80 && (c1) <= 0xFF)               \
 157          : (CHARSET_DIMENSION (charset) == 1            \
 158             ? ((c1) >= 0x20 && (c1) <= 0x7F)            \
 159             : ((c1) >= 0x20 && (c1) <= 0x7F             \
 160                && (c2) >= 0x20 && (c2) <= 0x7F)))))
 161
 162 /* Store multi-byte form of the character C in STR.  The caller should
 163    allocate at least 4-byte area at STR in advance.  Returns the
 164    length of the multi-byte form.  If C is an invalid character code,
 165    return -1.  */
 166
 167 int
 168 char_to_string_1 (c, str)
 169      int c;
 170      unsigned char *str;
 171 {
 172   unsigned char *p = str;
 173
 174   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
 175     {
 176       /* Multibyte character can't have a modifier bit.  */
 177       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 178         return -1;
 179
 180       /* For Meta, Shift, and Control modifiers, we need special care.  */
 181       if (c & CHAR_META)
 182         {
 183           /* Move the meta bit to the right place for a string.  */
 184           c = (c & ~CHAR_META) | 0x80;
 185         }
 186       if (c & CHAR_SHIFT)
 187         {
 188           /* Shift modifier is valid only with [A-Za-z].  */
 189           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
 190             c &= ~CHAR_SHIFT;
 191           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
 192             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 193         }
 194       if (c & CHAR_CTL)
 195         {
 196           /* Simulate the code in lread.c.  */
 197           /* Allow `\C- ' and `\C-?'.  */
 198           if (c == (CHAR_CTL | ' '))
 199             c = 0;
 200           else if (c == (CHAR_CTL | '?'))
 201             c = 127;
 202           /* ASCII control chars are made from letters (both cases),
 203              as well as the non-letters within 0100...0137.  */
 204           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
 205             c &= (037 | (~0177 & ~CHAR_CTL));
 206           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 207             c &= (037 | (~0177 & ~CHAR_CTL));
 208         }
 209
 210       /* If C still has any modifier bits, just ignore it.  */
 211       c &= ~CHAR_MODIFIER_MASK;
 212     }
 213
 214   if (SINGLE_BYTE_CHAR_P (c))
 215     {
 216       if (ASCII_BYTE_P (c) || c >= 0xA0)
 217         *p++ = c;
 218       else
 219         {
 220           *p++ = LEADING_CODE_8_BIT_CONTROL;
 221           *p++ = c + 0x20;
 222         }
 223     }
 224   else if (CHAR_VALID_P (c, 0))
 225     {
 226       int charset, c1, c2;
 227
 228       SPLIT_CHAR (c, charset, c1, c2);
 229
 230       if (charset >= LEADING_CODE_EXT_11)
 231         *p++ = (charset < LEADING_CODE_EXT_12
 232                 ? LEADING_CODE_PRIVATE_11
 233                 : (charset < LEADING_CODE_EXT_21
 234                    ? LEADING_CODE_PRIVATE_12
 235                    : (charset < LEADING_CODE_EXT_22
 236                       ? LEADING_CODE_PRIVATE_21
 237                       : LEADING_CODE_PRIVATE_22)));
 238       *p++ = charset;
 239       if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
 240         return -1;
 241       if (c1)
 242         {
 243           *p++ = c1 | 0x80;
 244           if (c2 > 0)
 245             *p++ = c2 | 0x80;
 246         }
 247     }
 248   else
 249     return -1;
 250
 251   return (p - str);
 252 }
 253
 254
 255 /* Store multi-byte form of the character C in STR.  The caller should
 256    allocate at least 4-byte area at STR in advance.  Returns the
 257    length of the multi-byte form.  If C is an invalid character code,
 258    signal an error.
 259
 260    Use macro `CHAR_STRING (C, STR)' instead of calling this function
 261    directly if C can be an ASCII character.  */
 262
 263 int
 264 char_to_string (c, str)
 265      int c;
 266      unsigned char *str;
 267 {
 268   int len;
 269   len = char_to_string_1 (c, str);
 270   if (len == -1)
 271     invalid_character (c);
 272   return len;
 273 }
 274
 275
 276 /* Return the non-ASCII character corresponding to multi-byte form at
 277    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
 278    length of the multibyte form in *ACTUAL_LEN.
 279
 280    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
 281    this function directly if you want ot handle ASCII characters as
 282    well.  */
 283
 284 int
 285 string_to_char (str, len, actual_len)
 286      const unsigned char *str;
 287      int len, *actual_len;
 288 {
 289   int c, bytes, charset, c1, c2;
 290
 291   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
 292   c = MAKE_CHAR (charset, c1, c2);
 293   if (actual_len)
 294     *actual_len = bytes;
 295   return c;
 296 }
 297
 298 /* Return the length of the multi-byte form at string STR of length LEN.
 299    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
 300 int
 301 multibyte_form_length (str, len)
 302      const unsigned char *str;
 303      int len;
 304 {
 305   int bytes;
 306
 307   PARSE_MULTIBYTE_SEQ (str, len, bytes);
 308   return bytes;
 309 }
 310
 311 /* Check multibyte form at string STR of length LEN and set variables
 312    pointed by CHARSET, C1, and C2 to charset and position codes of the
 313    character at STR, and return 0.  If there's no multibyte character,
 314    return -1.  This should be used only in the macro SPLIT_STRING
 315    which checks range of STR in advance.  */
 316
 317 int
 318 split_string (str, len, charset, c1, c2)
 319      const unsigned char *str;
 320      unsigned char *c1, *c2;
 321      int len, *charset;
 322 {
 323   register int bytes, cs, code1, code2 = -1;
 324
 325   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
 326   if (cs == CHARSET_ASCII)
 327     return -1;
 328   *charset = cs;
 329   *c1 = code1;
 330   *c2 = code2;
 331   return 0;
 332 }
 333
 334 /* Return 1 iff character C has valid printable glyph.
 335    Use the macro CHAR_PRINTABLE_P instead.  */
 336 int
 337 char_printable_p (c)
 338      int c;
 339 {
 340   int charset, c1, c2;
 341
 342   if (ASCII_BYTE_P (c))
 343     return 1;
 344   else if (SINGLE_BYTE_CHAR_P (c))
 345     return 0;
 346   else if (c >= MAX_CHAR)
 347     return 0;
 348
 349   SPLIT_CHAR (c, charset, c1, c2);
 350   if (! CHARSET_DEFINED_P (charset))
 351     return 0;
 352   if (CHARSET_CHARS (charset) == 94
 353       ? c1 <= 32 || c1 >= 127
 354       : c1 < 32)
 355     return 0;
 356   if (CHARSET_DIMENSION (charset) == 2
 357       && (CHARSET_CHARS (charset) == 94
 358           ? c2 <= 32 || c2 >= 127
 359           : c2 < 32))
 360     return 0;
 361   return 1;
 362 }
 363
 364 /* Translate character C by translation table TABLE.  If C
 365    is negative, translate a character specified by CHARSET, C1, and C2
 366    (C1 and C2 are code points of the character).  If no translation is
 367    found in TABLE, return C.  */
 368 int
 369 translate_char (table, c, charset, c1, c2)
 370      Lisp_Object table;
 371      int c, charset, c1, c2;
 372 {
 373   Lisp_Object ch;
 374   int alt_charset, alt_c1, alt_c2, dimension;
 375
 376   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
 377   if (!CHAR_TABLE_P (table)
 378       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
 379     return c;
 380
 381   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 382   dimension = CHARSET_DIMENSION (alt_charset);
 383   if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
 384     /* CH is not a generic character, just return it.  */
 385     return XFASTINT (ch);
 386
 387   /* Since CH is a generic character, we must return a specific
 388      charater which has the same position codes as C from CH.  */
 389   if (charset < 0)
 390     SPLIT_CHAR (c, charset, c1, c2);
 391   if (dimension != CHARSET_DIMENSION (charset))
 392     /* We can't make such a character because of dimension mismatch.  */
 393     return c;
 394   return MAKE_CHAR (alt_charset, c1, c2);
 395 }
 396
 397 /* Convert the unibyte character C to multibyte based on
 398    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
 399    convert C to a valid multibyte character, convert it based on
 400    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 401
 402 int
 403 unibyte_char_to_multibyte (c)
 404      int c;
 405 {
 406   if (c < 0400 && c >= 0200)
 407     {
 408       int c_save = c;
 409
 410       if (! NILP (Vnonascii_translation_table))
 411         {
 412           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
 413           if (c >= 0400 && ! char_valid_p (c, 0))
 414             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 415         }
 416       else if (c >= 0240 && nonascii_insert_offset > 0)
 417         {
 418           c += nonascii_insert_offset;
 419           if (c < 0400 || ! char_valid_p (c, 0))
 420             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 421         }
 422       else if (c >= 0240)
 423         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 424     }
 425   return c;
 426 }
 427
 428
 429 /* Convert the multibyte character C to unibyte 8-bit character based
 430    on Vnonascii_translation_table or nonascii_insert_offset.  If
 431    REV_TBL is non-nil, it should be a reverse table of
 432    Vnonascii_translation_table, i.e. what given by:
 433      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
 434
 435 int
 436 multibyte_char_to_unibyte (c, rev_tbl)
 437      int c;
 438      Lisp_Object rev_tbl;
 439 {
 440   if (!SINGLE_BYTE_CHAR_P (c))
 441     {
 442       int c_save = c;
 443
 444       if (! CHAR_TABLE_P (rev_tbl)
 445           && CHAR_TABLE_P (Vnonascii_translation_table))
 446         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
 447                                           make_number (0));
 448       if (CHAR_TABLE_P (rev_tbl))
 449         {
 450           Lisp_Object temp;
 451           temp = Faref (rev_tbl, make_number (c));
 452           if (INTEGERP (temp))
 453             c = XINT (temp);
 454           if (c >= 256)
 455             c = (c_save & 0177) + 0200;
 456         }
 457       else
 458         {
 459           if (nonascii_insert_offset > 0)
 460             c -= nonascii_insert_offset;
 461           if (c < 128 || c >= 256)
 462             c = (c_save & 0177) + 0200;
 463         }
 464     }
 465
 466   return c;
 467 }
 468
 469 \f
 470 /* Update the table Vcharset_table with the given arguments (see the
 471    document of `define-charset' for the meaning of each argument).
 472    Several other table contents are also updated.  The caller should
 473    check the validity of CHARSET-ID and the remaining arguments in
 474    advance.  */
 475
 476 void
 477 update_charset_table (charset_id, dimension, chars, width, direction,
 478                       iso_final_char, iso_graphic_plane,
 479                       short_name, long_name, description)
 480      Lisp_Object charset_id, dimension, chars, width, direction;
 481      Lisp_Object iso_final_char, iso_graphic_plane;
 482      Lisp_Object short_name, long_name, description;
 483 {
 484   int charset = XINT (charset_id);
 485   int bytes;
 486   unsigned char leading_code_base, leading_code_ext;
 487
 488   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 489     CHARSET_TABLE_ENTRY (charset)
 490       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 491
 492   if (NILP (long_name))
 493     long_name = short_name;
 494   if (NILP (description))
 495     description = long_name;
 496
 497   /* Get byte length of multibyte form, base leading-code, and
 498      extended leading-code of the charset.  See the comment under the
 499      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 500   bytes = XINT (dimension);
 501   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 502     {
 503       /* Official charset, it doesn't have an extended leading-code.  */
 504       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
 505         bytes += 1; /* For a base leading-code.  */
 506       leading_code_base = charset;
 507       leading_code_ext = 0;
 508     }
 509   else
 510     {
 511       /* Private charset.  */
 512       bytes += 2; /* For base and extended leading-codes.  */
 513       leading_code_base
 514         = (charset < LEADING_CODE_EXT_12
 515            ? LEADING_CODE_PRIVATE_11
 516            : (charset < LEADING_CODE_EXT_21
 517               ? LEADING_CODE_PRIVATE_12
 518               : (charset < LEADING_CODE_EXT_22
 519                  ? LEADING_CODE_PRIVATE_21
 520                  : LEADING_CODE_PRIVATE_22)));
 521       leading_code_ext = charset;
 522       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
 523         error ("Invalid dimension for the charset-ID %d", charset);
 524     }
 525
 526   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 527   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 528   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 529   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 530   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 531   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 532   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 533     = make_number (leading_code_base);
 534   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 535     = make_number (leading_code_ext);
 536   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 537   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 538     = iso_graphic_plane;
 539   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 540   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 541   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 542   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 543
 544   {
 545     /* If we have already defined a charset which has the same
 546        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 547        DIRECTION, we must update the entry REVERSE-CHARSET of both
 548        charsets.  If there's no such charset, the value of the entry
 549        is set to nil.  */
 550     int i;
 551
 552     for (i = 0; i <= MAX_CHARSET; i++)
 553       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 554         {
 555           if (CHARSET_DIMENSION (i) == XINT (dimension)
 556               && CHARSET_CHARS (i) == XINT (chars)
 557               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 558               && CHARSET_DIRECTION (i) != XINT (direction))
 559             {
 560               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 561                 = make_number (i);
 562               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 563               break;
 564             }
 565         }
 566     if (i > MAX_CHARSET)
 567       /* No such a charset.  */
 568       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 569         = make_number (-1);
 570   }
 571
 572   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
 573       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 574     {
 575       bytes_by_char_head[leading_code_base] = bytes;
 576       width_by_char_head[leading_code_base] = XINT (width);
 577
 578       /* Update table emacs_code_class.  */
 579       emacs_code_class[charset] = (bytes == 2
 580                                    ? EMACS_leading_code_2
 581                                    : (bytes == 3
 582                                       ? EMACS_leading_code_3
 583                                       : EMACS_leading_code_4));
 584     }
 585
 586   /* Update table iso_charset_table.  */
 587   if (XINT (iso_final_char) >= 0
 588       && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 589     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 590 }
 591
 592 #ifdef emacs
 593
 594 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 595    is invalid.  */
 596 int
 597 get_charset_id (charset_symbol)
 598      Lisp_Object charset_symbol;
 599 {
 600   Lisp_Object val;
 601   int charset;
 602
 603   /* This originally used a ?: operator, but reportedly the HP-UX
 604      compiler version HP92453-01 A.10.32.22 miscompiles that.  */
 605   if (SYMBOLP (charset_symbol)
 606       && VECTORP (val = Fget (charset_symbol, Qcharset))
 607       && CHARSET_VALID_P (charset =
 608                           XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
 609     return charset;
 610   else
 611     return -1;
 612 }
 613
 614 /* Return an identification number for a new private charset of
 615    DIMENSION and WIDTH.  If there's no more room for the new charset,
 616    return 0.  */
 617 Lisp_Object
 618 get_new_private_charset_id (dimension, width)
 619      int dimension, width;
 620 {
 621   int charset, from, to;
 622
 623   if (dimension == 1)
 624     {
 625       from = LEADING_CODE_EXT_11;
 626       to = LEADING_CODE_EXT_21;
 627     }
 628   else
 629     {
 630       from = LEADING_CODE_EXT_21;
 631       to = LEADING_CODE_EXT_MAX + 1;
 632     }
 633
 634   for (charset = from; charset < to; charset++)
 635     if (!CHARSET_DEFINED_P (charset)) break;
 636
 637   return make_number (charset < to ? charset : 0);
 638 }
 639
 640 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 641        doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
 642 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
 643  treated as a private charset.
 644 INFO-VECTOR is a vector of the format:
 645    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
 646     SHORT-NAME LONG-NAME DESCRIPTION]
 647 The meanings of each elements is as follows:
 648 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
 649 CHARS (integer) is the number of characters in a dimension: 94 or 96.
 650 WIDTH (integer) is the number of columns a character in the charset
 651 occupies on the screen: one of 0, 1, and 2.
 652
 653 DIRECTION (integer) is the rendering direction of characters in the
 654 charset when rendering.  If 0, render from left to right, else
 655 render from right to left.
 656
 657 ISO-FINAL-CHAR (character) is the final character of the
 658 corresponding ISO 2022 charset.
 659 It may be -1 if the charset is internal use only.
 660
 661 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
 662 while encoding to variants of ISO 2022 coding system, one of the
 663 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
 664 It may be -1 if the charset is internal use only.
 665
 666 SHORT-NAME (string) is the short name to refer to the charset.
 667
 668 LONG-NAME (string) is the long name to refer to the charset.
 669
 670 DESCRIPTION (string) is the description string of the charset.  */)
 671        (charset_id, charset_symbol, info_vector)
 672      Lisp_Object charset_id, charset_symbol, info_vector;
 673 {
 674   Lisp_Object *vec;
 675
 676   if (!NILP (charset_id))
 677     CHECK_NUMBER (charset_id);
 678   CHECK_SYMBOL (charset_symbol);
 679   CHECK_VECTOR (info_vector);
 680
 681   if (! NILP (charset_id))
 682     {
 683       if (! CHARSET_VALID_P (XINT (charset_id)))
 684         error ("Invalid CHARSET: %d", XINT (charset_id));
 685       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 686         error ("Already defined charset: %d", XINT (charset_id));
 687     }
 688
 689   vec = XVECTOR (info_vector)->contents;
 690   if (XVECTOR (info_vector)->size != 9
 691       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 692       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 693       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 694       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 695       || !INTEGERP (vec[4])
 696       || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
 697       || !INTEGERP (vec[5])
 698       || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 699       || !STRINGP (vec[6])
 700       || !STRINGP (vec[7])
 701       || !STRINGP (vec[8]))
 702     error ("Invalid info-vector argument for defining charset %s",
 703            XSTRING (SYMBOL_NAME (charset_symbol))->data);
 704
 705   if (NILP (charset_id))
 706     {
 707       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 708       if (XINT (charset_id) == 0)
 709         error ("There's no room for a new private charset %s",
 710                XSTRING (SYMBOL_NAME (charset_symbol))->data);
 711     }
 712
 713   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 714                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 715   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 716   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 717   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 718   Fupdate_coding_systems_internal ();
 719   return Qnil;
 720 }
 721
 722 DEFUN ("generic-character-list", Fgeneric_character_list,
 723        Sgeneric_character_list, 0, 0, 0,
 724        doc: /* Return a list of all possible generic characters.
 725 It includes a generic character for a charset not yet defined.  */)
 726      ()
 727 {
 728   return Vgeneric_character_list;
 729 }
 730
 731 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 732        Sget_unused_iso_final_char, 2, 2, 0,
 733        doc: /* Return an unsed ISO's final char for a charset of DIMENISION and CHARS.
 734 DIMENSION is the number of bytes to represent a character: 1 or 2.
 735 CHARS is the number of characters in a dimension: 94 or 96.
 736
 737 This final char is for private use, thus the range is `0' (48) .. `?' (63).
 738 If there's no unused final char for the specified kind of charset,
 739 return nil.  */)
 740      (dimension, chars)
 741      Lisp_Object dimension, chars;
 742 {
 743   int final_char;
 744
 745   CHECK_NUMBER (dimension);
 746   CHECK_NUMBER (chars);
 747   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 748     error ("Invalid charset dimension %d, it should be 1 or 2",
 749            XINT (dimension));
 750   if (XINT (chars) != 94 && XINT (chars) != 96)
 751     error ("Invalid charset chars %d, it should be 94 or 96",
 752            XINT (chars));
 753   for (final_char = '0'; final_char <= '?'; final_char++)
 754     {
 755       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 756         break;
 757     }
 758   return (final_char <= '?' ? make_number (final_char) : Qnil);
 759 }
 760
 761 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 762        4, 4, 0,
 763        doc: /* Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
 764 CHARSET should be defined by `defined-charset' in advance.  */)
 765      (dimension, chars, final_char, charset_symbol)
 766      Lisp_Object dimension, chars, final_char, charset_symbol;
 767 {
 768   int charset;
 769
 770   CHECK_NUMBER (dimension);
 771   CHECK_NUMBER (chars);
 772   CHECK_NUMBER (final_char);
 773   CHECK_SYMBOL (charset_symbol);
 774
 775   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 776     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 777   if (XINT (chars) != 94 && XINT (chars) != 96)
 778     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 779   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 780     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 781   if ((charset = get_charset_id (charset_symbol)) < 0)
 782     error ("Invalid charset %s", XSTRING (SYMBOL_NAME (charset_symbol))->data);
 783
 784   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
 785   return Qnil;
 786 }
 787
 788 /* Return information about charsets in the text at PTR of NBYTES
 789    bytes, which are NCHARS characters.  The value is:
 790
 791         0: Each character is represented by one byte.  This is always
 792            true for unibyte text.
 793         1: No charsets other than ascii eight-bit-control,
 794            eight-bit-graphic, and latin-1 are found.
 795         2: Otherwise.
 796
 797    In addition, if CHARSETS is nonzero, for each found charset N, set
 798    CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
 799    (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
 800    table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
 801    1 (note that there's no charset whose ID is 1).  */
 802
 803 int
 804 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
 805      unsigned char *ptr;
 806      int nchars, nbytes, *charsets;
 807      Lisp_Object table;
 808 {
 809   if (nchars == nbytes)
 810     {
 811       if (charsets && nbytes > 0)
 812         {
 813           unsigned char *endp = ptr + nbytes;
 814           int maskbits = 0;
 815
 816           while (ptr < endp && maskbits != 7)
 817             {
 818               maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
 819               ptr++;
 820             }
 821
 822           if (maskbits & 1)
 823             charsets[CHARSET_ASCII] = 1;
 824           if (maskbits & 2)
 825             charsets[CHARSET_8_BIT_CONTROL] = 1;
 826           if (maskbits & 4)
 827             charsets[CHARSET_8_BIT_GRAPHIC] = 1;
 828         }
 829       return 0;
 830     }
 831   else
 832     {
 833       int return_val = 1;
 834       int bytes, charset, c1, c2;
 835
 836       if (! CHAR_TABLE_P (table))
 837         table = Qnil;
 838
 839       while (nchars-- > 0)
 840         {
 841           SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
 842           ptr += bytes;
 843
 844           if (!CHARSET_DEFINED_P (charset))
 845             charset = 1;
 846           else if (! NILP (table))
 847             {
 848               int c = translate_char (table, -1, charset, c1, c2);
 849               if (c >= 0)
 850                 charset = CHAR_CHARSET (c);
 851             }
 852
 853           if (return_val == 1
 854               && charset != CHARSET_ASCII
 855               && charset != CHARSET_8_BIT_CONTROL
 856               && charset != CHARSET_8_BIT_GRAPHIC
 857               && charset != charset_latin_iso8859_1)
 858             return_val = 2;
 859
 860           if (charsets)
 861             charsets[charset] = 1;
 862           else if (return_val == 2)
 863             break;
 864         }
 865       return return_val;
 866     }
 867 }
 868
 869 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 870        2, 3, 0,
 871        doc: /* Return a list of charsets in the region between BEG and END.
 872 BEG and END are buffer positions.
 873 Optional arg TABLE if non-nil is a translation table to look up.
 874
 875 If the region contains invalid multibyte characters,
 876 `unknown' is included in the returned list.
 877
 878 If the current buffer is unibyte, the returned list may contain
 879 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 880      (beg, end, table)
 881      Lisp_Object beg, end, table;
 882 {
 883   int charsets[MAX_CHARSET + 1];
 884   int from, from_byte, to, stop, stop_byte, i;
 885   Lisp_Object val;
 886
 887   validate_region (&beg, &end);
 888   from = XFASTINT (beg);
 889   stop = to = XFASTINT (end);
 890
 891   if (from < GPT && GPT < to)
 892     {
 893       stop = GPT;
 894       stop_byte = GPT_BYTE;
 895     }
 896   else
 897     stop_byte = CHAR_TO_BYTE (stop);
 898
 899   from_byte = CHAR_TO_BYTE (from);
 900
 901   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 902   while (1)
 903     {
 904       find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
 905                             stop_byte - from_byte, charsets, table);
 906       if (stop < to)
 907         {
 908           from = stop, from_byte = stop_byte;
 909           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 910         }
 911       else
 912         break;
 913     }
 914
 915   val = Qnil;
 916   if (charsets[1])
 917     val = Fcons (Qunknown, val);
 918   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 919     if (charsets[i])
 920       val = Fcons (CHARSET_SYMBOL (i), val);
 921   if (charsets[0])
 922     val = Fcons (Qascii, val);
 923   return val;
 924 }
 925
 926 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 927        1, 2, 0,
 928        doc: /* Return a list of charsets in STR.
 929 Optional arg TABLE if non-nil is a translation table to look up.
 930
 931 If the string contains invalid multibyte characters,
 932 `unknown' is included in the returned list.
 933
 934 If STR is unibyte, the returned list may contain
 935 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 936      (str, table)
 937      Lisp_Object str, table;
 938 {
 939   int charsets[MAX_CHARSET + 1];
 940   int i;
 941   Lisp_Object val;
 942
 943   CHECK_STRING (str);
 944
 945   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 946   find_charset_in_text (XSTRING (str)->data, XSTRING (str)->size,
 947                         STRING_BYTES (XSTRING (str)), charsets, table);
 948
 949   val = Qnil;
 950   if (charsets[1])
 951     val = Fcons (Qunknown, val);
 952   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 953     if (charsets[i])
 954       val = Fcons (CHARSET_SYMBOL (i), val);
 955   if (charsets[0])
 956     val = Fcons (Qascii, val);
 957   return val;
 958 }
 959
 960 \f
 961 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 962        doc: /* Return a character made from arguments.
 963 Internal use only.  */)
 964      (charset, code1, code2)
 965      Lisp_Object charset, code1, code2;
 966 {
 967   int charset_id, c1, c2;
 968
 969   CHECK_NUMBER (charset);
 970   charset_id = XINT (charset);
 971   if (!CHARSET_DEFINED_P (charset_id))
 972     error ("Invalid charset ID: %d", XINT (charset));
 973
 974   if (NILP (code1))
 975     c1 = 0;
 976   else
 977     {
 978       CHECK_NUMBER (code1);
 979       c1 = XINT (code1);
 980     }
 981   if (NILP (code2))
 982     c2 = 0;
 983   else
 984     {
 985       CHECK_NUMBER (code2);
 986       c2 = XINT (code2);
 987     }
 988
 989   if (charset_id == CHARSET_ASCII)
 990     {
 991       if (c1 < 0 || c1 > 0x7F)
 992         goto invalid_code_posints;
 993       return make_number (c1);
 994     }
 995   else if (charset_id == CHARSET_8_BIT_CONTROL)
 996     {
 997       if (NILP (code1))
 998         c1 = 0x80;
 999       else if (c1 < 0x80 || c1 > 0x9F)
1000         goto invalid_code_posints;
1001       return make_number (c1);
1002     }
1003   else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1004     {
1005       if (NILP (code1))
1006         c1 = 0xA0;
1007       else if (c1 < 0xA0 || c1 > 0xFF)
1008         goto invalid_code_posints;
1009       return make_number (c1);
1010     }
1011   else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1012     goto invalid_code_posints;
1013   c1 &= 0x7F;
1014   c2 &= 0x7F;
1015   if (c1 == 0
1016       ? c2 != 0
1017       : (c2 == 0
1018          ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1019          : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1020     goto invalid_code_posints;
1021   return make_number (MAKE_CHAR (charset_id, c1, c2));
1022
1023  invalid_code_posints:
1024   error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1025 }
1026
1027 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1028        doc: /* Return list of charset and one or two position-codes of CHAR.
1029 If CHAR is invalid as a character code,
1030 return a list of symbol `unknown' and CHAR.  */)
1031      (ch)
1032      Lisp_Object ch;
1033 {
1034   int c, charset, c1, c2;
1035
1036   CHECK_NUMBER (ch);
1037   c = XFASTINT (ch);
1038   if (!CHAR_VALID_P (c, 1))
1039     return Fcons (Qunknown, Fcons (ch, Qnil));
1040   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1041   return (c2 >= 0
1042           ? Fcons (CHARSET_SYMBOL (charset),
1043                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1044           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1045 }
1046
1047 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1048        doc: /* Return charset of CHAR.  */)
1049      (ch)
1050      Lisp_Object ch;
1051 {
1052   CHECK_NUMBER (ch);
1053
1054   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1055 }
1056
1057 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1058        doc: /* Return charset of a character in the current buffer at position POS.
1059 If POS is nil, it defauls to the current point.
1060 If POS is out of range, the value is nil.  */)
1061      (pos)
1062      Lisp_Object pos;
1063 {
1064   Lisp_Object ch;
1065   int charset;
1066
1067   ch = Fchar_after (pos);
1068   if (! INTEGERP (ch))
1069     return ch;
1070   charset = CHAR_CHARSET (XINT (ch));
1071   return CHARSET_SYMBOL (charset);
1072 }
1073
1074 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1075        doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1076
1077 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1078 by their DIMENSION, CHARS, and FINAL-CHAR,
1079 where as Emacs distinguishes them by charset symbol.
1080 See the documentation of the function `charset-info' for the meanings of
1081 DIMENSION, CHARS, and FINAL-CHAR.  */)
1082      (dimension, chars, final_char)
1083      Lisp_Object dimension, chars, final_char;
1084 {
1085   int charset;
1086
1087   CHECK_NUMBER (dimension);
1088   CHECK_NUMBER (chars);
1089   CHECK_NUMBER (final_char);
1090
1091   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1092     return Qnil;
1093   return CHARSET_SYMBOL (charset);
1094 }
1095
1096 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1097    generic character.  If GENERICP is zero, return nonzero iff C is a
1098    valid normal character.  Do not call this function directly,
1099    instead use macro CHAR_VALID_P.  */
1100 int
1101 char_valid_p (c, genericp)
1102      int c, genericp;
1103 {
1104   int charset, c1, c2;
1105
1106   if (c < 0 || c >= MAX_CHAR)
1107     return 0;
1108   if (SINGLE_BYTE_CHAR_P (c))
1109     return 1;
1110   SPLIT_CHAR (c, charset, c1, c2);
1111   if (genericp)
1112     {
1113       if (c1)
1114         {
1115           if (c2 <= 0) c2 = 0x20;
1116         }
1117       else
1118         {
1119           if (c2 <= 0) c1 = c2 = 0x20;
1120         }
1121     }
1122   return (CHARSET_DEFINED_P (charset)
1123           && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1124 }
1125
1126 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1127        doc: /* Return t if OBJECT is a valid normal character.
1128 If optional arg GENERICP is non-nil, also return t if OBJECT is
1129 a valid generic character.  */)
1130      (object, genericp)
1131      Lisp_Object object, genericp;
1132 {
1133   if (! NATNUMP (object))
1134     return Qnil;
1135   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1136 }
1137
1138 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1139        Sunibyte_char_to_multibyte, 1, 1, 0,
1140        doc: /* Convert the unibyte character CH to multibyte character.
1141 The conversion is done based on `nonascii-translation-table' (which see)
1142  or `nonascii-insert-offset' (which see).  */)
1143      (ch)
1144      Lisp_Object ch;
1145 {
1146   int c;
1147
1148   CHECK_NUMBER (ch);
1149   c = XINT (ch);
1150   if (c < 0 || c >= 0400)
1151     error ("Invalid unibyte character: %d", c);
1152   c = unibyte_char_to_multibyte (c);
1153   if (c < 0)
1154     error ("Can't convert to multibyte character: %d", XINT (ch));
1155   return make_number (c);
1156 }
1157
1158 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1159        Smultibyte_char_to_unibyte, 1, 1, 0,
1160        doc: /* Convert the multibyte character CH to unibyte character.
1161 The conversion is done based on `nonascii-translation-table' (which see)
1162  or `nonascii-insert-offset' (which see).  */)
1163      (ch)
1164      Lisp_Object ch;
1165 {
1166   int c;
1167
1168   CHECK_NUMBER (ch);
1169   c = XINT (ch);
1170   if (! CHAR_VALID_P (c, 0))
1171     error ("Invalid multibyte character: %d", c);
1172   c = multibyte_char_to_unibyte (c, Qnil);
1173   if (c < 0)
1174     error ("Can't convert to unibyte character: %d", XINT (ch));
1175   return make_number (c);
1176 }
1177
1178 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1179        doc: /* Return 1 regardless of the argument CHAR.
1180 This is now an obsolete function.  We keep it just for backward compatibility.  */)
1181      (ch)
1182      Lisp_Object ch;
1183 {
1184   CHECK_NUMBER (ch);
1185   return make_number (1);
1186 }
1187
1188 /* Return how many bytes C will occupy in a multibyte buffer.
1189    Don't call this function directly, instead use macro CHAR_BYTES.  */
1190 int
1191 char_bytes (c)
1192      int c;
1193 {
1194   int charset;
1195
1196   if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1197     return 1;
1198   if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1199     return 1;
1200
1201   charset = CHAR_CHARSET (c);
1202   return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1203 }
1204
1205 /* Return the width of character of which multi-byte form starts with
1206    C.  The width is measured by how many columns occupied on the
1207    screen when displayed in the current buffer.  */
1208
1209 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
1210   (c < 0x20                                                             \
1211    ? (c == '\t'                                                         \
1212       ? XFASTINT (current_buffer->tab_width)                            \
1213       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
1214    : (c < 0x7f                                                          \
1215       ? 1                                                               \
1216       : (c == 0x7F                                                      \
1217          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
1218          : ((! NILP (current_buffer->enable_multibyte_characters)       \
1219              && BASE_LEADING_CODE_P (c))                                \
1220             ? WIDTH_BY_CHAR_HEAD (c)                                    \
1221             : 4))))
1222
1223 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1224        doc: /* Return width of CHAR when displayed in the current buffer.
1225 The width is measured by how many columns it occupies on the screen.
1226 Tab is taken to occupy `tab-width' columns.  */)
1227      (ch)
1228      Lisp_Object ch;
1229 {
1230   Lisp_Object val, disp;
1231   int c;
1232   struct Lisp_Char_Table *dp = buffer_display_table ();
1233
1234   CHECK_NUMBER (ch);
1235
1236   c = XINT (ch);
1237
1238   /* Get the way the display table would display it.  */
1239   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1240
1241   if (VECTORP (disp))
1242     XSETINT (val, XVECTOR (disp)->size);
1243   else if (SINGLE_BYTE_CHAR_P (c))
1244     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1245   else
1246     {
1247       int charset = CHAR_CHARSET (c);
1248
1249       XSETFASTINT (val, CHARSET_WIDTH (charset));
1250     }
1251   return val;
1252 }
1253
1254 /* Return width of string STR of length LEN when displayed in the
1255    current buffer.  The width is measured by how many columns it
1256    occupies on the screen.  */
1257
1258 int
1259 strwidth (str, len)
1260      unsigned char *str;
1261      int len;
1262 {
1263   return c_string_width (str, len, -1, NULL, NULL);
1264 }
1265
1266 /* Return width of string STR of length LEN when displayed in the
1267    current buffer.  The width is measured by how many columns it
1268    occupies on the screen.  If PRECISION > 0, return the width of
1269    longest substring that doesn't exceed PRECISION, and set number of
1270    characters and bytes of the substring in *NCHARS and *NBYTES
1271    respectively.  */
1272
1273 int
1274 c_string_width (str, len, precision, nchars, nbytes)
1275      unsigned char *str;
1276      int precision, *nchars, *nbytes;
1277 {
1278   int i = 0, i_byte = 0;
1279   int width = 0;
1280   int chars;
1281   struct Lisp_Char_Table *dp = buffer_display_table ();
1282
1283   while (i_byte < len)
1284     {
1285       int bytes, thiswidth;
1286       Lisp_Object val;
1287
1288       if (dp)
1289         {
1290           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1291
1292           chars = 1;
1293           val = DISP_CHAR_VECTOR (dp, c);
1294           if (VECTORP (val))
1295             thiswidth = XVECTOR (val)->size;
1296           else
1297             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1298         }
1299       else
1300         {
1301           chars = 1;
1302           PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1303           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1304         }
1305
1306       if (precision > 0
1307           && (width + thiswidth > precision))
1308         {
1309           *nchars = i;
1310           *nbytes = i_byte;
1311           return width;
1312         }
1313       i++;
1314       i_byte += bytes;
1315       width += thiswidth;
1316   }
1317
1318   if (precision > 0)
1319     {
1320       *nchars = i;
1321       *nbytes = i_byte;
1322     }
1323
1324   return width;
1325 }
1326
1327 /* Return width of Lisp string STRING when displayed in the current
1328    buffer.  The width is measured by how many columns it occupies on
1329    the screen while paying attention to compositions.  If PRECISION >
1330    0, return the width of longest substring that doesn't exceed
1331    PRECISION, and set number of characters and bytes of the substring
1332    in *NCHARS and *NBYTES respectively.  */
1333
1334 int
1335 lisp_string_width (string, precision, nchars, nbytes)
1336      Lisp_Object string;
1337      int precision, *nchars, *nbytes;
1338 {
1339   int len = XSTRING (string)->size;
1340   int len_byte = STRING_BYTES (XSTRING (string));
1341   unsigned char *str = XSTRING (string)->data;
1342   int i = 0, i_byte = 0;
1343   int width = 0;
1344   struct Lisp_Char_Table *dp = buffer_display_table ();
1345
1346   while (i < len)
1347     {
1348       int chars, bytes, thiswidth;
1349       Lisp_Object val;
1350       int cmp_id;
1351       int ignore, end;
1352
1353       if (find_composition (i, -1, &ignore, &end, &val, string)
1354           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1355               >= 0))
1356         {
1357           thiswidth = composition_table[cmp_id]->width;
1358           chars = end - i;
1359           bytes = string_char_to_byte (string, end) - i_byte;
1360         }
1361       else if (dp)
1362         {
1363           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1364
1365           chars = 1;
1366           val = DISP_CHAR_VECTOR (dp, c);
1367           if (VECTORP (val))
1368             thiswidth = XVECTOR (val)->size;
1369           else
1370             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1371         }
1372       else
1373         {
1374           chars = 1;
1375           PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1376           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1377         }
1378
1379       if (precision > 0
1380           && (width + thiswidth > precision))
1381         {
1382           *nchars = i;
1383           *nbytes = i_byte;
1384           return width;
1385         }
1386       i += chars;
1387       i_byte += bytes;
1388       width += thiswidth;
1389   }
1390
1391   if (precision > 0)
1392     {
1393       *nchars = i;
1394       *nbytes = i_byte;
1395     }
1396
1397   return width;
1398 }
1399
1400 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1401        doc: /* Return width of STRING when displayed in the current buffer.
1402 Width is measured by how many columns it occupies on the screen.
1403 When calculating width of a multibyte character in STRING,
1404 only the base leading-code is considered; the validity of
1405 the following bytes is not checked.  Tabs in STRING are always
1406 taken to occupy `tab-width' columns.  */)
1407      (str)
1408      Lisp_Object str;
1409 {
1410   Lisp_Object val;
1411
1412   CHECK_STRING (str);
1413   XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
1414   return val;
1415 }
1416
1417 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1418        doc: /* Return the direction of CHAR.
1419 The returned value is 0 for left-to-right and 1 for right-to-left.  */)
1420      (ch)
1421      Lisp_Object ch;
1422 {
1423   int charset;
1424
1425   CHECK_NUMBER (ch);
1426   charset = CHAR_CHARSET (XFASTINT (ch));
1427   if (!CHARSET_DEFINED_P (charset))
1428     invalid_character (XINT (ch));
1429   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1430 }
1431
1432 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
1433        doc: /* Return number of characters between BEG and END.  */)
1434      (beg, end)
1435      Lisp_Object beg, end;
1436 {
1437   int from, to;
1438
1439   CHECK_NUMBER_COERCE_MARKER (beg);
1440   CHECK_NUMBER_COERCE_MARKER (end);
1441
1442   from = min (XFASTINT (beg), XFASTINT (end));
1443   to = max (XFASTINT (beg), XFASTINT (end));
1444
1445   return make_number (to - from);
1446 }
1447
1448 /* Return the number of characters in the NBYTES bytes at PTR.
1449    This works by looking at the contents and checking for multibyte sequences.
1450    However, if the current buffer has enable-multibyte-characters = nil,
1451    we treat each byte as a character.  */
1452
1453 int
1454 chars_in_text (ptr, nbytes)
1455      unsigned char *ptr;
1456      int nbytes;
1457 {
1458   /* current_buffer is null at early stages of Emacs initialization.  */
1459   if (current_buffer == 0
1460       || NILP (current_buffer->enable_multibyte_characters))
1461     return nbytes;
1462
1463   return multibyte_chars_in_text (ptr, nbytes);
1464 }
1465
1466 /* Return the number of characters in the NBYTES bytes at PTR.
1467    This works by looking at the contents and checking for multibyte sequences.
1468    It ignores enable-multibyte-characters.  */
1469
1470 int
1471 multibyte_chars_in_text (ptr, nbytes)
1472      unsigned char *ptr;
1473      int nbytes;
1474 {
1475   unsigned char *endp;
1476   int chars, bytes;
1477
1478   endp = ptr + nbytes;
1479   chars = 0;
1480
1481   while (ptr < endp)
1482     {
1483       PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1484       ptr += bytes;
1485       chars++;
1486     }
1487
1488   return chars;
1489 }
1490
1491 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1492    count the numbers of characters and bytes in it.  On counting
1493    bytes, pay attention to the fact that 8-bit characters in the range
1494    0x80..0x9F are represented by 2 bytes in multibyte text.  */
1495 void
1496 parse_str_as_multibyte (str, len, nchars, nbytes)
1497      unsigned char *str;
1498      int len, *nchars, *nbytes;
1499 {
1500   unsigned char *endp = str + len;
1501   int n, chars = 0, bytes = 0;
1502
1503   while (str < endp)
1504     {
1505       if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1506         str += n, bytes += n;
1507       else
1508         str++, bytes += 2;
1509       chars++;
1510     }
1511   *nchars = chars;
1512   *nbytes = bytes;
1513   return;
1514 }
1515
1516 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1517    It actually converts only 8-bit characters in the range 0x80..0x9F
1518    that don't contruct multibyte characters to multibyte forms.  If
1519    NCHARS is nonzero, set *NCHARS to the number of characters in the
1520    text.  It is assured that we can use LEN bytes at STR as a work
1521    area and that is enough.  Return the number of bytes of the
1522    resulting text.  */
1523
1524 int
1525 str_as_multibyte (str, len, nbytes, nchars)
1526      unsigned char *str;
1527      int len, nbytes, *nchars;
1528 {
1529   unsigned char *p = str, *endp = str + nbytes;
1530   unsigned char *to;
1531   int chars = 0;
1532   int n;
1533
1534   while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1535     p += n, chars++;
1536   if (nchars)
1537     *nchars = chars;
1538   if (p == endp)
1539     return nbytes;
1540
1541   to = p;
1542   nbytes = endp - p;
1543   endp = str + len;
1544   safe_bcopy (p, endp - nbytes, nbytes);
1545   p = endp - nbytes;
1546   while (p < endp)
1547     {
1548       if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1549         {
1550           while (n--)
1551             *to++ = *p++;
1552         }
1553       else
1554         {
1555           *to++ = LEADING_CODE_8_BIT_CONTROL;
1556           *to++ = *p++ + 0x20;
1557         }
1558       chars++;
1559     }
1560   if (nchars)
1561     *nchars = chars;
1562   return (to - str);
1563 }
1564
1565 /* Parse unibyte string at STR of LEN bytes, and return the number of
1566    bytes it may ocupy when converted to multibyte string by
1567    `str_to_multibyte'.  */
1568
1569 int
1570 parse_str_to_multibyte (str, len)
1571      unsigned char *str;
1572      int len;
1573 {
1574   unsigned char *endp = str + len;
1575   int bytes;
1576
1577   for (bytes = 0; str < endp; str++)
1578     bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1579   return bytes;
1580 }
1581
1582 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1583    that contains the same single-byte characters.  It actually
1584    converts all 8-bit characters to multibyte forms.  It is assured
1585    that we can use LEN bytes at STR as a work area and that is
1586    enough.  */
1587
1588 int
1589 str_to_multibyte (str, len, bytes)
1590      unsigned char *str;
1591      int len, bytes;
1592 {
1593   unsigned char *p = str, *endp = str + bytes;
1594   unsigned char *to;
1595
1596   while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1597   if (p == endp)
1598     return bytes;
1599   to = p;
1600   bytes = endp - p;
1601   endp = str + len;
1602   safe_bcopy (p, endp - bytes, bytes);
1603   p = endp - bytes;
1604   while (p < endp)
1605     {
1606       if (*p < 0x80 || *p >= 0xA0)
1607         *to++ = *p++;
1608       else
1609         *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1610     }
1611   return (to - str);
1612 }
1613
1614 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1615    actually converts only 8-bit characters in the range 0x80..0x9F to
1616    unibyte forms.  */
1617
1618 int
1619 str_as_unibyte (str, bytes)
1620      unsigned char *str;
1621      int bytes;
1622 {
1623   unsigned char *p = str, *endp = str + bytes;
1624   unsigned char *to = str;
1625
1626   while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1627   to = p;
1628   while (p < endp)
1629     {
1630       if (*p == LEADING_CODE_8_BIT_CONTROL)
1631         *to++ = *(p + 1) - 0x20, p += 2;
1632       else
1633         *to++ = *p++;
1634     }
1635   return (to - str);
1636 }
1637
1638 \f
1639 DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
1640   doc: /* Concatenate all the argument characters and make the result a string.
1641 usage: (string &rest CHARACTERS)  */)
1642      (n, args)
1643      int n;
1644      Lisp_Object *args;
1645 {
1646   int i;
1647   unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
1648   unsigned char *p = buf;
1649   int c;
1650   int multibyte = 0;
1651
1652   for (i = 0; i < n; i++)
1653     {
1654       CHECK_NUMBER (args[i]);
1655       if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1656         multibyte = 1;
1657     }
1658
1659   for (i = 0; i < n; i++)
1660     {
1661       c = XINT (args[i]);
1662       if (multibyte)
1663         p += CHAR_STRING (c, p);
1664       else
1665         *p++ = c;
1666     }
1667
1668   return make_string_from_bytes (buf, n, p - buf);
1669 }
1670
1671 #endif /* emacs */
1672 \f
1673 int
1674 charset_id_internal (charset_name)
1675      char *charset_name;
1676 {
1677   Lisp_Object val;
1678
1679   val= Fget (intern (charset_name), Qcharset);
1680   if (!VECTORP (val))
1681     error ("Charset %s is not defined", charset_name);
1682
1683   return (XINT (XVECTOR (val)->contents[0]));
1684 }
1685
1686 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1687        Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only.  */)
1688      ()
1689 {
1690   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1691   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1692   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1693   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1694   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1695   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1696   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1697   return Qnil;
1698 }
1699
1700 void
1701 init_charset_once ()
1702 {
1703   int i, j, k;
1704
1705   staticpro (&Vcharset_table);
1706   staticpro (&Vcharset_symbol_table);
1707   staticpro (&Vgeneric_character_list);
1708
1709   /* This has to be done here, before we call Fmake_char_table.  */
1710   Qcharset_table = intern ("charset-table");
1711   staticpro (&Qcharset_table);
1712
1713   /* Intern this now in case it isn't already done.
1714      Setting this variable twice is harmless.
1715      But don't staticpro it here--that is done in alloc.c.  */
1716   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1717
1718   /* Now we are ready to set up this property, so we can
1719      create the charset table.  */
1720   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1721   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1722
1723   Qunknown = intern ("unknown");
1724   staticpro (&Qunknown);
1725   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1726                                         Qunknown);
1727
1728   /* Setup tables.  */
1729   for (i = 0; i < 2; i++)
1730     for (j = 0; j < 2; j++)
1731       for (k = 0; k < 128; k++)
1732         iso_charset_table [i][j][k] = -1;
1733
1734   for (i = 0; i < 256; i++)
1735     bytes_by_char_head[i] = 1;
1736   bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1737   bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1738   bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1739   bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1740
1741   for (i = 0; i < 128; i++)
1742     width_by_char_head[i] = 1;
1743   for (; i < 256; i++)
1744     width_by_char_head[i] = 4;
1745   width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1746   width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1747   width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1748   width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1749
1750   {
1751     Lisp_Object val;
1752
1753     val = Qnil;
1754     for (i = 0x81; i < 0x90; i++)
1755       val = Fcons (make_number ((i - 0x70) << 7), val);
1756     for (; i < 0x9A; i++)
1757       val = Fcons (make_number ((i - 0x8F) << 14), val);
1758     for (i = 0xA0; i < 0xF0; i++)
1759       val = Fcons (make_number ((i - 0x70) << 7), val);
1760     for (; i < 0xFF; i++)
1761       val = Fcons (make_number ((i - 0xE0) << 14), val);
1762     Vgeneric_character_list = Fnreverse (val);
1763   }
1764
1765   nonascii_insert_offset = 0;
1766   Vnonascii_translation_table = Qnil;
1767 }
1768
1769 #ifdef emacs
1770
1771 void
1772 syms_of_charset ()
1773 {
1774   Qcharset = intern ("charset");
1775   staticpro (&Qcharset);
1776
1777   Qascii = intern ("ascii");
1778   staticpro (&Qascii);
1779
1780   Qeight_bit_control = intern ("eight-bit-control");
1781   staticpro (&Qeight_bit_control);
1782
1783   Qeight_bit_graphic = intern ("eight-bit-graphic");
1784   staticpro (&Qeight_bit_graphic);
1785
1786   /* Define special charsets ascii, eight-bit-control, and
1787      eight-bit-graphic.  */
1788   update_charset_table (make_number (CHARSET_ASCII),
1789                         make_number (1), make_number (94),
1790                         make_number (1),
1791                         make_number (0),
1792                         make_number ('B'),
1793                         make_number (0),
1794                         build_string ("ASCII"),
1795                         Qnil,   /* same as above */
1796                         build_string ("ASCII (ISO646 IRV)"));
1797   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1798   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1799
1800   update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1801                         make_number (1), make_number (96),
1802                         make_number (4),
1803                         make_number (0),
1804                         make_number (-1),
1805                         make_number (-1),
1806                         build_string ("8-bit control code (0x80..0x9F)"),
1807                         Qnil,   /* same as above */
1808                         Qnil);  /* same as above */
1809   CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1810   Fput (Qeight_bit_control, Qcharset,
1811         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1812
1813   update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1814                         make_number (1), make_number (96),
1815                         make_number (4),
1816                         make_number (0),
1817                         make_number (-1),
1818                         make_number (-1),
1819                         build_string ("8-bit graphic char (0xA0..0xFF)"),
1820                         Qnil,   /* same as above */
1821                         Qnil);  /* same as above */
1822   CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1823   Fput (Qeight_bit_graphic, Qcharset,
1824         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1825
1826   Qauto_fill_chars = intern ("auto-fill-chars");
1827   staticpro (&Qauto_fill_chars);
1828   Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1829
1830   defsubr (&Sdefine_charset);
1831   defsubr (&Sgeneric_character_list);
1832   defsubr (&Sget_unused_iso_final_char);
1833   defsubr (&Sdeclare_equiv_charset);
1834   defsubr (&Sfind_charset_region);
1835   defsubr (&Sfind_charset_string);
1836   defsubr (&Smake_char_internal);
1837   defsubr (&Ssplit_char);
1838   defsubr (&Schar_charset);
1839   defsubr (&Scharset_after);
1840   defsubr (&Siso_charset);
1841   defsubr (&Schar_valid_p);
1842   defsubr (&Sunibyte_char_to_multibyte);
1843   defsubr (&Smultibyte_char_to_unibyte);
1844   defsubr (&Schar_bytes);
1845   defsubr (&Schar_width);
1846   defsubr (&Sstring_width);
1847   defsubr (&Schar_direction);
1848   defsubr (&Schars_in_region);
1849   defsubr (&Sstring);
1850   defsubr (&Ssetup_special_charsets);
1851
1852   DEFVAR_LISP ("charset-list", &Vcharset_list,
1853                doc: /* List of charsets ever defined.  */);
1854   Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1855                                         Fcons (Qeight_bit_graphic, Qnil)));
1856
1857   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1858                doc: /* Vector of cons cell of a symbol and translation table ever defined.
1859 An ID of a translation table is an index of this vector.  */);
1860   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1861
1862   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1863               doc: /* Leading-code of private TYPE9N charset of column-width 1.  */);
1864   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1865
1866   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1867               doc: /* Leading-code of private TYPE9N charset of column-width 2.  */);
1868   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1869
1870   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1871               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1.  */);
1872   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1873
1874   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1875               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2.  */);
1876   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1877
1878   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1879               doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1880 This is used for converting unibyte text to multibyte,
1881 and for inserting character codes specified by number.
1882
1883 This serves to convert a Latin-1 or similar 8-bit character code
1884 to the corresponding Emacs multibyte character code.
1885 Typically the value should be (- (make-char CHARSET 0) 128),
1886 for your choice of character set.
1887 If `nonascii-translation-table' is non-nil, it overrides this variable.  */);
1888   nonascii_insert_offset = 0;
1889
1890   DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1891                doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1892 This is used for converting unibyte text to multibyte,
1893 and for inserting character codes specified by number.
1894
1895 Conversion is performed only when multibyte characters are enabled,
1896 and it serves to convert a Latin-1 or similar 8-bit character code
1897 to the corresponding Emacs character code.
1898
1899 If this is nil, `nonascii-insert-offset' is used instead.
1900 See also the docstring of `make-translation-table'.  */);
1901   Vnonascii_translation_table = Qnil;
1902
1903   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1904                doc: /* A char-table for characters which invoke auto-filling.
1905 Such characters have value t in this table.  */);
1906   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1907   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1908   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1909 }
1910
1911 #endif /* emacs */