src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001 Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /* At first, see the document in `charset.h' to understand the code in
  24    this file.  */
  25
  26 #ifdef emacs
  27 #include <config.h>
  28 #endif
  29
  30 #include <stdio.h>
  31
  32 #ifdef emacs
  33
  34 #include <sys/types.h>
  35 #include "lisp.h"
  36 #include "buffer.h"
  37 #include "charset.h"
  38 #include "composite.h"
  39 #include "coding.h"
  40 #include "disptab.h"
  41
  42 #else  /* not emacs */
  43
  44 #include "mulelib.h"
  45
  46 #endif /* emacs */
  47
  48 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
  49 Lisp_Object Qunknown;
  50
  51 /* Declaration of special leading-codes.  */
  52 EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
  53 EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
  54 EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
  55 EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
  56
  57 /* Declaration of special charsets.  The values are set by
  58    Fsetup_special_charsets.  */
  59 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  60 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  61 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  62 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  63 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  64 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  65 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  66
  67 Lisp_Object Qcharset_table;
  68
  69 /* A char-table containing information of each character set.  */
  70 Lisp_Object Vcharset_table;
  71
  72 /* A vector of charset symbol indexed by charset-id.  This is used
  73    only for returning charset symbol from C functions.  */
  74 Lisp_Object Vcharset_symbol_table;
  75
  76 /* A list of charset symbols ever defined.  */
  77 Lisp_Object Vcharset_list;
  78
  79 /* Vector of translation table ever defined.
  80    ID of a translation table is used to index this vector.  */
  81 Lisp_Object Vtranslation_table_vector;
  82
  83 /* A char-table for characters which may invoke auto-filling.  */
  84 Lisp_Object Vauto_fill_chars;
  85
  86 Lisp_Object Qauto_fill_chars;
  87
  88 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  89 int bytes_by_char_head[256];
  90 int width_by_char_head[256];
  91
  92 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  93    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  94 int iso_charset_table[2][2][128];
  95
  96 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
  97 unsigned char *_fetch_multibyte_char_p;
  98 int _fetch_multibyte_char_len;
  99
 100 /* Offset to add to a non-ASCII value when inserting it.  */
 101 EMACS_INT nonascii_insert_offset;
 102
 103 /* Translation table for converting non-ASCII unibyte characters
 104    to multibyte codes, or nil.  */
 105 Lisp_Object Vnonascii_translation_table;
 106
 107 /* List of all possible generic characters.  */
 108 Lisp_Object Vgeneric_character_list;
 109
 110 \f
 111 void
 112 invalid_character (c)
 113      int c;
 114 {
 115   error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
 116 }
 117
 118 /* Parse string STR of length LENGTH and fetch information of a
 119    character at STR.  Set BYTES to the byte length the character
 120    occupies, CHARSET, C1, C2 to proper values of the character. */
 121
 122 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
 123   do {                                                                       \
 124     (c1) = *(str);                                                           \
 125     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
 126     if ((bytes) == 1)                                                        \
 127       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
 128     else if ((bytes) == 2)                                                   \
 129       {                                                                      \
 130         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                              \
 131           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;         \
 132         else                                                                 \
 133           (charset) = (c1), (c1) = (str)[1] & 0x7F;                          \
 134       }                                                                      \
 135     else if ((bytes) == 3)                                                   \
 136       {                                                                      \
 137         if ((c1) < LEADING_CODE_PRIVATE_11)                                  \
 138           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
 139         else                                                                 \
 140           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                      \
 141       }                                                                      \
 142     else                                                                     \
 143       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
 144   } while (0)
 145
 146 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
 147    Note that this intentionally allows invalid components, such
 148    as 0xA0 0xA0, because there exist many files that contain
 149    such invalid byte sequences, especially in EUC-GB. */
 150 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
 151   ((charset) == CHARSET_ASCII                           \
 152    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
 153    : ((charset) == CHARSET_8_BIT_CONTROL                \
 154       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
 155       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
 156          ? ((c1) >= 0x80 && (c1) <= 0xFF)               \
 157          : (CHARSET_DIMENSION (charset) == 1            \
 158             ? ((c1) >= 0x20 && (c1) <= 0x7F)            \
 159             : ((c1) >= 0x20 && (c1) <= 0x7F             \
 160                && (c2) >= 0x20 && (c2) <= 0x7F)))))
 161
 162 /* Store multi-byte form of the character C in STR.  The caller should
 163    allocate at least 4-byte area at STR in advance.  Returns the
 164    length of the multi-byte form.  If C is an invalid character code,
 165    return -1.  */
 166
 167 int
 168 char_to_string_1 (c, str)
 169      int c;
 170      unsigned char *str;
 171 {
 172   unsigned char *p = str;
 173
 174   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
 175     {
 176       /* Multibyte character can't have a modifier bit.  */
 177       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 178         return -1;
 179
 180       /* For Meta, Shift, and Control modifiers, we need special care.  */
 181       if (c & CHAR_META)
 182         {
 183           /* Move the meta bit to the right place for a string.  */
 184           c = (c & ~CHAR_META) | 0x80;
 185         }
 186       if (c & CHAR_SHIFT)
 187         {
 188           /* Shift modifier is valid only with [A-Za-z].  */
 189           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
 190             c &= ~CHAR_SHIFT;
 191           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
 192             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 193         }
 194       if (c & CHAR_CTL)
 195         {
 196           /* Simulate the code in lread.c.  */
 197           /* Allow `\C- ' and `\C-?'.  */
 198           if (c == (CHAR_CTL | ' '))
 199             c = 0;
 200           else if (c == (CHAR_CTL | '?'))
 201             c = 127;
 202           /* ASCII control chars are made from letters (both cases),
 203              as well as the non-letters within 0100...0137.  */
 204           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
 205             c &= (037 | (~0177 & ~CHAR_CTL));
 206           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 207             c &= (037 | (~0177 & ~CHAR_CTL));
 208         }
 209
 210       /* If C still has any modifier bits, just ignore it.  */
 211       c &= ~CHAR_MODIFIER_MASK;
 212     }
 213
 214   if (SINGLE_BYTE_CHAR_P (c))
 215     {
 216       if (ASCII_BYTE_P (c) || c >= 0xA0)
 217         *p++ = c;
 218       else
 219         {
 220           *p++ = LEADING_CODE_8_BIT_CONTROL;
 221           *p++ = c + 0x20;
 222         }
 223     }
 224   else if (CHAR_VALID_P (c, 0))
 225     {
 226       int charset, c1, c2;
 227
 228       SPLIT_CHAR (c, charset, c1, c2);
 229
 230       if (charset >= LEADING_CODE_EXT_11)
 231         *p++ = (charset < LEADING_CODE_EXT_12
 232                 ? LEADING_CODE_PRIVATE_11
 233                 : (charset < LEADING_CODE_EXT_21
 234                    ? LEADING_CODE_PRIVATE_12
 235                    : (charset < LEADING_CODE_EXT_22
 236                       ? LEADING_CODE_PRIVATE_21
 237                       : LEADING_CODE_PRIVATE_22)));
 238       *p++ = charset;
 239       if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
 240         return -1;
 241       if (c1)
 242         {
 243           *p++ = c1 | 0x80;
 244           if (c2 > 0)
 245             *p++ = c2 | 0x80;
 246         }
 247     }
 248   else
 249     return -1;
 250
 251   return (p - str);
 252 }
 253
 254
 255 /* Store multi-byte form of the character C in STR.  The caller should
 256    allocate at least 4-byte area at STR in advance.  Returns the
 257    length of the multi-byte form.  If C is an invalid character code,
 258    signal an error.
 259
 260    Use macro `CHAR_STRING (C, STR)' instead of calling this function
 261    directly if C can be an ASCII character.  */
 262
 263 int
 264 char_to_string (c, str)
 265      int c;
 266      unsigned char *str;
 267 {
 268   int len;
 269   len = char_to_string_1 (c, str);
 270   if (len == -1)
 271     invalid_character (c);
 272   return len;
 273 }
 274
 275
 276 /* Return the non-ASCII character corresponding to multi-byte form at
 277    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
 278    length of the multibyte form in *ACTUAL_LEN.
 279
 280    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
 281    this function directly if you want ot handle ASCII characters as
 282    well.  */
 283
 284 int
 285 string_to_char (str, len, actual_len)
 286      const unsigned char *str;
 287      int len, *actual_len;
 288 {
 289   int c, bytes, charset, c1, c2;
 290
 291   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
 292   c = MAKE_CHAR (charset, c1, c2);
 293   if (actual_len)
 294     *actual_len = bytes;
 295   return c;
 296 }
 297
 298 /* Return the length of the multi-byte form at string STR of length LEN.
 299    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
 300 int
 301 multibyte_form_length (str, len)
 302      const unsigned char *str;
 303      int len;
 304 {
 305   int bytes;
 306
 307   PARSE_MULTIBYTE_SEQ (str, len, bytes);
 308   return bytes;
 309 }
 310
 311 /* Check multibyte form at string STR of length LEN and set variables
 312    pointed by CHARSET, C1, and C2 to charset and position codes of the
 313    character at STR, and return 0.  If there's no multibyte character,
 314    return -1.  This should be used only in the macro SPLIT_STRING
 315    which checks range of STR in advance.  */
 316
 317 int
 318 split_string (str, len, charset, c1, c2)
 319      const unsigned char *str;
 320      unsigned char *c1, *c2;
 321      int len, *charset;
 322 {
 323   register int bytes, cs, code1, code2 = -1;
 324
 325   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
 326   if (cs == CHARSET_ASCII)
 327     return -1;
 328   *charset = cs;
 329   *c1 = code1;
 330   *c2 = code2;
 331   return 0;
 332 }
 333
 334 /* Return 1 iff character C has valid printable glyph.
 335    Use the macro CHAR_PRINTABLE_P instead.  */
 336 int
 337 char_printable_p (c)
 338      int c;
 339 {
 340   int charset, c1, c2;
 341
 342   if (ASCII_BYTE_P (c))
 343     return 1;
 344   else if (SINGLE_BYTE_CHAR_P (c))
 345     return 0;
 346   else if (c >= MAX_CHAR)
 347     return 0;
 348
 349   SPLIT_CHAR (c, charset, c1, c2);
 350   if (! CHARSET_DEFINED_P (charset))
 351     return 0;
 352   if (CHARSET_CHARS (charset) == 94
 353       ? c1 <= 32 || c1 >= 127
 354       : c1 < 32)
 355     return 0;
 356   if (CHARSET_DIMENSION (charset) == 2
 357       && (CHARSET_CHARS (charset) == 94
 358           ? c2 <= 32 || c2 >= 127
 359           : c2 < 32))
 360     return 0;
 361   return 1;
 362 }
 363
 364 /* Translate character C by translation table TABLE.  If C
 365    is negative, translate a character specified by CHARSET, C1, and C2
 366    (C1 and C2 are code points of the character).  If no translation is
 367    found in TABLE, return C.  */
 368 int
 369 translate_char (table, c, charset, c1, c2)
 370      Lisp_Object table;
 371      int c, charset, c1, c2;
 372 {
 373   Lisp_Object ch;
 374   int alt_charset, alt_c1, alt_c2, dimension;
 375
 376   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
 377   if (!CHAR_TABLE_P (table)
 378       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
 379     return c;
 380
 381   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 382   dimension = CHARSET_DIMENSION (alt_charset);
 383   if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
 384     /* CH is not a generic character, just return it.  */
 385     return XFASTINT (ch);
 386
 387   /* Since CH is a generic character, we must return a specific
 388      charater which has the same position codes as C from CH.  */
 389   if (charset < 0)
 390     SPLIT_CHAR (c, charset, c1, c2);
 391   if (dimension != CHARSET_DIMENSION (charset))
 392     /* We can't make such a character because of dimension mismatch.  */
 393     return c;
 394   return MAKE_CHAR (alt_charset, c1, c2);
 395 }
 396
 397 /* Convert the unibyte character C to multibyte based on
 398    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
 399    convert C to a valid multibyte character, convert it based on
 400    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 401
 402 int
 403 unibyte_char_to_multibyte (c)
 404      int c;
 405 {
 406   if (c < 0400 && c >= 0200)
 407     {
 408       int c_save = c;
 409
 410       if (! NILP (Vnonascii_translation_table))
 411         {
 412           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
 413           if (c >= 0400 && ! char_valid_p (c, 0))
 414             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 415         }
 416       else if (c >= 0240 && nonascii_insert_offset > 0)
 417         {
 418           c += nonascii_insert_offset;
 419           if (c < 0400 || ! char_valid_p (c, 0))
 420             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 421         }
 422       else if (c >= 0240)
 423         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 424     }
 425   return c;
 426 }
 427
 428
 429 /* Convert the multibyte character C to unibyte 8-bit character based
 430    on Vnonascii_translation_table or nonascii_insert_offset.  If
 431    REV_TBL is non-nil, it should be a reverse table of
 432    Vnonascii_translation_table, i.e. what given by:
 433      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
 434
 435 int
 436 multibyte_char_to_unibyte (c, rev_tbl)
 437      int c;
 438      Lisp_Object rev_tbl;
 439 {
 440   if (!SINGLE_BYTE_CHAR_P (c))
 441     {
 442       int c_save = c;
 443
 444       if (! CHAR_TABLE_P (rev_tbl)
 445           && CHAR_TABLE_P (Vnonascii_translation_table))
 446         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
 447                                           make_number (0));
 448       if (CHAR_TABLE_P (rev_tbl))
 449         {
 450           Lisp_Object temp;
 451           temp = Faref (rev_tbl, make_number (c));
 452           if (INTEGERP (temp))
 453             c = XINT (temp);
 454           if (c >= 256)
 455             c = (c_save & 0177) + 0200;
 456         }
 457       else
 458         {
 459           if (nonascii_insert_offset > 0)
 460             c -= nonascii_insert_offset;
 461           if (c < 128 || c >= 256)
 462             c = (c_save & 0177) + 0200;
 463         }
 464     }
 465
 466   return c;
 467 }
 468
 469 \f
 470 /* Update the table Vcharset_table with the given arguments (see the
 471    document of `define-charset' for the meaning of each argument).
 472    Several other table contents are also updated.  The caller should
 473    check the validity of CHARSET-ID and the remaining arguments in
 474    advance.  */
 475
 476 void
 477 update_charset_table (charset_id, dimension, chars, width, direction,
 478                       iso_final_char, iso_graphic_plane,
 479                       short_name, long_name, description)
 480      Lisp_Object charset_id, dimension, chars, width, direction;
 481      Lisp_Object iso_final_char, iso_graphic_plane;
 482      Lisp_Object short_name, long_name, description;
 483 {
 484   int charset = XINT (charset_id);
 485   int bytes;
 486   unsigned char leading_code_base, leading_code_ext;
 487
 488   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 489     CHARSET_TABLE_ENTRY (charset)
 490       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 491
 492   if (NILP (long_name))
 493     long_name = short_name;
 494   if (NILP (description))
 495     description = long_name;
 496
 497   /* Get byte length of multibyte form, base leading-code, and
 498      extended leading-code of the charset.  See the comment under the
 499      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 500   bytes = XINT (dimension);
 501   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 502     {
 503       /* Official charset, it doesn't have an extended leading-code.  */
 504       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
 505         bytes += 1; /* For a base leading-code.  */
 506       leading_code_base = charset;
 507       leading_code_ext = 0;
 508     }
 509   else
 510     {
 511       /* Private charset.  */
 512       bytes += 2; /* For base and extended leading-codes.  */
 513       leading_code_base
 514         = (charset < LEADING_CODE_EXT_12
 515            ? LEADING_CODE_PRIVATE_11
 516            : (charset < LEADING_CODE_EXT_21
 517               ? LEADING_CODE_PRIVATE_12
 518               : (charset < LEADING_CODE_EXT_22
 519                  ? LEADING_CODE_PRIVATE_21
 520                  : LEADING_CODE_PRIVATE_22)));
 521       leading_code_ext = charset;
 522       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
 523         error ("Invalid dimension for the charset-ID %d", charset);
 524     }
 525
 526   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 527   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 528   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 529   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 530   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 531   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 532   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 533     = make_number (leading_code_base);
 534   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 535     = make_number (leading_code_ext);
 536   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 537   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 538     = iso_graphic_plane;
 539   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 540   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 541   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 542   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 543
 544   {
 545     /* If we have already defined a charset which has the same
 546        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 547        DIRECTION, we must update the entry REVERSE-CHARSET of both
 548        charsets.  If there's no such charset, the value of the entry
 549        is set to nil.  */
 550     int i;
 551
 552     for (i = 0; i <= MAX_CHARSET; i++)
 553       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 554         {
 555           if (CHARSET_DIMENSION (i) == XINT (dimension)
 556               && CHARSET_CHARS (i) == XINT (chars)
 557               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 558               && CHARSET_DIRECTION (i) != XINT (direction))
 559             {
 560               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 561                 = make_number (i);
 562               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 563               break;
 564             }
 565         }
 566     if (i > MAX_CHARSET)
 567       /* No such a charset.  */
 568       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 569         = make_number (-1);
 570   }
 571
 572   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
 573       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 574     {
 575       bytes_by_char_head[leading_code_base] = bytes;
 576       width_by_char_head[leading_code_base] = XINT (width);
 577
 578       /* Update table emacs_code_class.  */
 579       emacs_code_class[charset] = (bytes == 2
 580                                    ? EMACS_leading_code_2
 581                                    : (bytes == 3
 582                                       ? EMACS_leading_code_3
 583                                       : EMACS_leading_code_4));
 584     }
 585
 586   /* Update table iso_charset_table.  */
 587   if (XINT (iso_final_char) >= 0
 588       && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 589     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 590 }
 591
 592 #ifdef emacs
 593
 594 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 595    is invalid.  */
 596 int
 597 get_charset_id (charset_symbol)
 598      Lisp_Object charset_symbol;
 599 {
 600   Lisp_Object val;
 601   int charset;
 602
 603   /* This originally used a ?: operator, but reportedly the HP-UX
 604      compiler version HP92453-01 A.10.32.22 miscompiles that.  */
 605   if (SYMBOLP (charset_symbol)
 606       && VECTORP (val = Fget (charset_symbol, Qcharset))
 607       && CHARSET_VALID_P (charset =
 608                           XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
 609     return charset;
 610   else
 611     return -1;
 612 }
 613
 614 /* Return an identification number for a new private charset of
 615    DIMENSION and WIDTH.  If there's no more room for the new charset,
 616    return 0.  */
 617 Lisp_Object
 618 get_new_private_charset_id (dimension, width)
 619      int dimension, width;
 620 {
 621   int charset, from, to;
 622
 623   if (dimension == 1)
 624     {
 625       from = LEADING_CODE_EXT_11;
 626       to = LEADING_CODE_EXT_21;
 627     }
 628   else
 629     {
 630       from = LEADING_CODE_EXT_21;
 631       to = LEADING_CODE_EXT_MAX + 1;
 632     }
 633
 634   for (charset = from; charset < to; charset++)
 635     if (!CHARSET_DEFINED_P (charset)) break;
 636
 637   return make_number (charset < to ? charset : 0);
 638 }
 639
 640 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 641        doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
 642 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
 643  treated as a private charset.
 644 INFO-VECTOR is a vector of the format:
 645    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
 646     SHORT-NAME LONG-NAME DESCRIPTION]
 647 The meanings of each elements is as follows:
 648 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
 649 CHARS (integer) is the number of characters in a dimension: 94 or 96.
 650 WIDTH (integer) is the number of columns a character in the charset
 651 occupies on the screen: one of 0, 1, and 2.
 652
 653 DIRECTION (integer) is the rendering direction of characters in the
 654 charset when rendering.  If 0, render from left to right, else
 655 render from right to left.
 656
 657 ISO-FINAL-CHAR (character) is the final character of the
 658 corresponding ISO 2022 charset.
 659 It may be -1 if the charset is internal use only.
 660
 661 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
 662 while encoding to variants of ISO 2022 coding system, one of the
 663 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
 664 It may be -1 if the charset is internal use only.
 665
 666 SHORT-NAME (string) is the short name to refer to the charset.
 667
 668 LONG-NAME (string) is the long name to refer to the charset.
 669
 670 DESCRIPTION (string) is the description string of the charset.  */)
 671        (charset_id, charset_symbol, info_vector)
 672      Lisp_Object charset_id, charset_symbol, info_vector;
 673 {
 674   Lisp_Object *vec;
 675
 676   if (!NILP (charset_id))
 677     CHECK_NUMBER (charset_id);
 678   CHECK_SYMBOL (charset_symbol);
 679   CHECK_VECTOR (info_vector);
 680
 681   if (! NILP (charset_id))
 682     {
 683       if (! CHARSET_VALID_P (XINT (charset_id)))
 684         error ("Invalid CHARSET: %d", XINT (charset_id));
 685       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 686         error ("Already defined charset: %d", XINT (charset_id));
 687     }
 688
 689   vec = XVECTOR (info_vector)->contents;
 690   if (XVECTOR (info_vector)->size != 9
 691       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 692       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 693       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 694       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 695       || !INTEGERP (vec[4])
 696       || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
 697       || !INTEGERP (vec[5])
 698       || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 699       || !STRINGP (vec[6])
 700       || !STRINGP (vec[7])
 701       || !STRINGP (vec[8]))
 702     error ("Invalid info-vector argument for defining charset %s",
 703            SDATA (SYMBOL_NAME (charset_symbol)));
 704
 705   if (NILP (charset_id))
 706     {
 707       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 708       if (XINT (charset_id) == 0)
 709         error ("There's no room for a new private charset %s",
 710                SDATA (SYMBOL_NAME (charset_symbol)));
 711     }
 712
 713   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 714                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 715   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 716   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 717   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 718   Fupdate_coding_systems_internal ();
 719   return Qnil;
 720 }
 721
 722 DEFUN ("generic-character-list", Fgeneric_character_list,
 723        Sgeneric_character_list, 0, 0, 0,
 724        doc: /* Return a list of all possible generic characters.
 725 It includes a generic character for a charset not yet defined.  */)
 726      ()
 727 {
 728   return Vgeneric_character_list;
 729 }
 730
 731 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 732        Sget_unused_iso_final_char, 2, 2, 0,
 733        doc: /* Return an unsed ISO's final char for a charset of DIMENISION and CHARS.
 734 DIMENSION is the number of bytes to represent a character: 1 or 2.
 735 CHARS is the number of characters in a dimension: 94 or 96.
 736
 737 This final char is for private use, thus the range is `0' (48) .. `?' (63).
 738 If there's no unused final char for the specified kind of charset,
 739 return nil.  */)
 740      (dimension, chars)
 741      Lisp_Object dimension, chars;
 742 {
 743   int final_char;
 744
 745   CHECK_NUMBER (dimension);
 746   CHECK_NUMBER (chars);
 747   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 748     error ("Invalid charset dimension %d, it should be 1 or 2",
 749            XINT (dimension));
 750   if (XINT (chars) != 94 && XINT (chars) != 96)
 751     error ("Invalid charset chars %d, it should be 94 or 96",
 752            XINT (chars));
 753   for (final_char = '0'; final_char <= '?'; final_char++)
 754     {
 755       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 756         break;
 757     }
 758   return (final_char <= '?' ? make_number (final_char) : Qnil);
 759 }
 760
 761 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 762        4, 4, 0,
 763        doc: /* Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
 764 CHARSET should be defined by `defined-charset' in advance.  */)
 765      (dimension, chars, final_char, charset_symbol)
 766      Lisp_Object dimension, chars, final_char, charset_symbol;
 767 {
 768   int charset;
 769
 770   CHECK_NUMBER (dimension);
 771   CHECK_NUMBER (chars);
 772   CHECK_NUMBER (final_char);
 773   CHECK_SYMBOL (charset_symbol);
 774
 775   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 776     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 777   if (XINT (chars) != 94 && XINT (chars) != 96)
 778     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 779   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 780     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 781   if ((charset = get_charset_id (charset_symbol)) < 0)
 782     error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset_symbol)));
 783
 784   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
 785   return Qnil;
 786 }
 787
 788 /* Return information about charsets in the text at PTR of NBYTES
 789    bytes, which are NCHARS characters.  The value is:
 790
 791         0: Each character is represented by one byte.  This is always
 792            true for unibyte text.
 793         1: No charsets other than ascii eight-bit-control,
 794            eight-bit-graphic, and latin-1 are found.
 795         2: Otherwise.
 796
 797    In addition, if CHARSETS is nonzero, for each found charset N, set
 798    CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
 799    (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
 800    table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
 801    1 (note that there's no charset whose ID is 1).  */
 802
 803 int
 804 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
 805      const unsigned char *ptr;
 806      int nchars, nbytes, *charsets;
 807      Lisp_Object table;
 808 {
 809   if (nchars == nbytes)
 810     {
 811       if (charsets && nbytes > 0)
 812         {
 813           const unsigned char *endp = ptr + nbytes;
 814           int maskbits = 0;
 815
 816           while (ptr < endp && maskbits != 7)
 817             {
 818               maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
 819               ptr++;
 820             }
 821
 822           if (maskbits & 1)
 823             charsets[CHARSET_ASCII] = 1;
 824           if (maskbits & 2)
 825             charsets[CHARSET_8_BIT_CONTROL] = 1;
 826           if (maskbits & 4)
 827             charsets[CHARSET_8_BIT_GRAPHIC] = 1;
 828         }
 829       return 0;
 830     }
 831   else
 832     {
 833       int return_val = 1;
 834       int bytes, charset, c1, c2;
 835
 836       if (! CHAR_TABLE_P (table))
 837         table = Qnil;
 838
 839       while (nchars-- > 0)
 840         {
 841           SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
 842           ptr += bytes;
 843
 844           if (!CHARSET_DEFINED_P (charset))
 845             charset = 1;
 846           else if (! NILP (table))
 847             {
 848               int c = translate_char (table, -1, charset, c1, c2);
 849               if (c >= 0)
 850                 charset = CHAR_CHARSET (c);
 851             }
 852
 853           if (return_val == 1
 854               && charset != CHARSET_ASCII
 855               && charset != CHARSET_8_BIT_CONTROL
 856               && charset != CHARSET_8_BIT_GRAPHIC
 857               && charset != charset_latin_iso8859_1)
 858             return_val = 2;
 859
 860           if (charsets)
 861             charsets[charset] = 1;
 862           else if (return_val == 2)
 863             break;
 864         }
 865       return return_val;
 866     }
 867 }
 868
 869 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 870        2, 3, 0,
 871        doc: /* Return a list of charsets in the region between BEG and END.
 872 BEG and END are buffer positions.
 873 Optional arg TABLE if non-nil is a translation table to look up.
 874
 875 If the region contains invalid multibyte characters,
 876 `unknown' is included in the returned list.
 877
 878 If the current buffer is unibyte, the returned list may contain
 879 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 880      (beg, end, table)
 881      Lisp_Object beg, end, table;
 882 {
 883   int charsets[MAX_CHARSET + 1];
 884   int from, from_byte, to, stop, stop_byte, i;
 885   Lisp_Object val;
 886
 887   validate_region (&beg, &end);
 888   from = XFASTINT (beg);
 889   stop = to = XFASTINT (end);
 890
 891   if (from < GPT && GPT < to)
 892     {
 893       stop = GPT;
 894       stop_byte = GPT_BYTE;
 895     }
 896   else
 897     stop_byte = CHAR_TO_BYTE (stop);
 898
 899   from_byte = CHAR_TO_BYTE (from);
 900
 901   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 902   while (1)
 903     {
 904       find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
 905                             stop_byte - from_byte, charsets, table);
 906       if (stop < to)
 907         {
 908           from = stop, from_byte = stop_byte;
 909           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 910         }
 911       else
 912         break;
 913     }
 914
 915   val = Qnil;
 916   if (charsets[1])
 917     val = Fcons (Qunknown, val);
 918   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 919     if (charsets[i])
 920       val = Fcons (CHARSET_SYMBOL (i), val);
 921   if (charsets[0])
 922     val = Fcons (Qascii, val);
 923   return val;
 924 }
 925
 926 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 927        1, 2, 0,
 928        doc: /* Return a list of charsets in STR.
 929 Optional arg TABLE if non-nil is a translation table to look up.
 930
 931 If the string contains invalid multibyte characters,
 932 `unknown' is included in the returned list.
 933
 934 If STR is unibyte, the returned list may contain
 935 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 936      (str, table)
 937      Lisp_Object str, table;
 938 {
 939   int charsets[MAX_CHARSET + 1];
 940   int i;
 941   Lisp_Object val;
 942
 943   CHECK_STRING (str);
 944
 945   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 946   find_charset_in_text (SDATA (str), SCHARS (str),
 947                         SBYTES (str), charsets, table);
 948
 949   val = Qnil;
 950   if (charsets[1])
 951     val = Fcons (Qunknown, val);
 952   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 953     if (charsets[i])
 954       val = Fcons (CHARSET_SYMBOL (i), val);
 955   if (charsets[0])
 956     val = Fcons (Qascii, val);
 957   return val;
 958 }
 959
 960 \f
 961 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 962        doc: /* Return a character made from arguments.
 963 Internal use only.  */)
 964      (charset, code1, code2)
 965      Lisp_Object charset, code1, code2;
 966 {
 967   int charset_id, c1, c2;
 968
 969   CHECK_NUMBER (charset);
 970   charset_id = XINT (charset);
 971   if (!CHARSET_DEFINED_P (charset_id))
 972     error ("Invalid charset ID: %d", XINT (charset));
 973
 974   if (NILP (code1))
 975     c1 = 0;
 976   else
 977     {
 978       CHECK_NUMBER (code1);
 979       c1 = XINT (code1);
 980     }
 981   if (NILP (code2))
 982     c2 = 0;
 983   else
 984     {
 985       CHECK_NUMBER (code2);
 986       c2 = XINT (code2);
 987     }
 988
 989   if (charset_id == CHARSET_ASCII)
 990     {
 991       if (c1 < 0 || c1 > 0x7F)
 992         goto invalid_code_posints;
 993       return make_number (c1);
 994     }
 995   else if (charset_id == CHARSET_8_BIT_CONTROL)
 996     {
 997       if (NILP (code1))
 998         c1 = 0x80;
 999       else if (c1 < 0x80 || c1 > 0x9F)
1000         goto invalid_code_posints;
1001       return make_number (c1);
1002     }
1003   else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1004     {
1005       if (NILP (code1))
1006         c1 = 0xA0;
1007       else if (c1 < 0xA0 || c1 > 0xFF)
1008         goto invalid_code_posints;
1009       return make_number (c1);
1010     }
1011   else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1012     goto invalid_code_posints;
1013   c1 &= 0x7F;
1014   c2 &= 0x7F;
1015   if (c1 == 0
1016       ? c2 != 0
1017       : (c2 == 0
1018          ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1019          : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1020     goto invalid_code_posints;
1021   return make_number (MAKE_CHAR (charset_id, c1, c2));
1022
1023  invalid_code_posints:
1024   error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1025 }
1026
1027 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1028        doc: /* Return list of charset and one or two position-codes of CHAR.
1029 If CHAR is invalid as a character code,
1030 return a list of symbol `unknown' and CHAR.  */)
1031      (ch)
1032      Lisp_Object ch;
1033 {
1034   int c, charset, c1, c2;
1035
1036   CHECK_NUMBER (ch);
1037   c = XFASTINT (ch);
1038   if (!CHAR_VALID_P (c, 1))
1039     return Fcons (Qunknown, Fcons (ch, Qnil));
1040   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1041   return (c2 >= 0
1042           ? Fcons (CHARSET_SYMBOL (charset),
1043                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1044           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1045 }
1046
1047 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1048        doc: /* Return charset of CHAR.  */)
1049      (ch)
1050      Lisp_Object ch;
1051 {
1052   CHECK_NUMBER (ch);
1053
1054   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1055 }
1056
1057 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1058        doc: /* Return charset of a character in the current buffer at position POS.
1059 If POS is nil, it defauls to the current point.
1060 If POS is out of range, the value is nil.  */)
1061      (pos)
1062      Lisp_Object pos;
1063 {
1064   Lisp_Object ch;
1065   int charset;
1066
1067   ch = Fchar_after (pos);
1068   if (! INTEGERP (ch))
1069     return ch;
1070   charset = CHAR_CHARSET (XINT (ch));
1071   return CHARSET_SYMBOL (charset);
1072 }
1073
1074 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1075        doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1076
1077 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1078 by their DIMENSION, CHARS, and FINAL-CHAR,
1079 where as Emacs distinguishes them by charset symbol.
1080 See the documentation of the function `charset-info' for the meanings of
1081 DIMENSION, CHARS, and FINAL-CHAR.  */)
1082      (dimension, chars, final_char)
1083      Lisp_Object dimension, chars, final_char;
1084 {
1085   int charset;
1086
1087   CHECK_NUMBER (dimension);
1088   CHECK_NUMBER (chars);
1089   CHECK_NUMBER (final_char);
1090
1091   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1092     return Qnil;
1093   return CHARSET_SYMBOL (charset);
1094 }
1095
1096 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1097    generic character.  If GENERICP is zero, return nonzero iff C is a
1098    valid normal character.  Do not call this function directly,
1099    instead use macro CHAR_VALID_P.  */
1100 int
1101 char_valid_p (c, genericp)
1102      int c, genericp;
1103 {
1104   int charset, c1, c2;
1105
1106   if (c < 0 || c >= MAX_CHAR)
1107     return 0;
1108   if (SINGLE_BYTE_CHAR_P (c))
1109     return 1;
1110   SPLIT_CHAR (c, charset, c1, c2);
1111   if (genericp)
1112     {
1113       if (c1)
1114         {
1115           if (c2 <= 0) c2 = 0x20;
1116         }
1117       else
1118         {
1119           if (c2 <= 0) c1 = c2 = 0x20;
1120         }
1121     }
1122   return (CHARSET_DEFINED_P (charset)
1123           && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1124 }
1125
1126 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1127        doc: /* Return t if OBJECT is a valid normal character.
1128 If optional arg GENERICP is non-nil, also return t if OBJECT is
1129 a valid generic character.  */)
1130      (object, genericp)
1131      Lisp_Object object, genericp;
1132 {
1133   if (! NATNUMP (object))
1134     return Qnil;
1135   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1136 }
1137
1138 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1139        Sunibyte_char_to_multibyte, 1, 1, 0,
1140        doc: /* Convert the unibyte character CH to multibyte character.
1141 The conversion is done based on `nonascii-translation-table' (which see)
1142  or `nonascii-insert-offset' (which see).  */)
1143      (ch)
1144      Lisp_Object ch;
1145 {
1146   int c;
1147
1148   CHECK_NUMBER (ch);
1149   c = XINT (ch);
1150   if (c < 0 || c >= 0400)
1151     error ("Invalid unibyte character: %d", c);
1152   c = unibyte_char_to_multibyte (c);
1153   if (c < 0)
1154     error ("Can't convert to multibyte character: %d", XINT (ch));
1155   return make_number (c);
1156 }
1157
1158 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1159        Smultibyte_char_to_unibyte, 1, 1, 0,
1160        doc: /* Convert the multibyte character CH to unibyte character.
1161 The conversion is done based on `nonascii-translation-table' (which see)
1162  or `nonascii-insert-offset' (which see).  */)
1163      (ch)
1164      Lisp_Object ch;
1165 {
1166   int c;
1167
1168   CHECK_NUMBER (ch);
1169   c = XINT (ch);
1170   if (! CHAR_VALID_P (c, 0))
1171     error ("Invalid multibyte character: %d", c);
1172   c = multibyte_char_to_unibyte (c, Qnil);
1173   if (c < 0)
1174     error ("Can't convert to unibyte character: %d", XINT (ch));
1175   return make_number (c);
1176 }
1177
1178 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1179        doc: /* Return 1 regardless of the argument CHAR.  */)
1180      (ch)
1181      Lisp_Object ch;
1182 {
1183   CHECK_NUMBER (ch);
1184   return make_number (1);
1185 }
1186
1187 /* Return how many bytes C will occupy in a multibyte buffer.
1188    Don't call this function directly, instead use macro CHAR_BYTES.  */
1189 int
1190 char_bytes (c)
1191      int c;
1192 {
1193   int charset;
1194
1195   if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1196     return 1;
1197   if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1198     return 1;
1199
1200   charset = CHAR_CHARSET (c);
1201   return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1202 }
1203
1204 /* Return the width of character of which multi-byte form starts with
1205    C.  The width is measured by how many columns occupied on the
1206    screen when displayed in the current buffer.  */
1207
1208 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
1209   (c < 0x20                                                             \
1210    ? (c == '\t'                                                         \
1211       ? XFASTINT (current_buffer->tab_width)                            \
1212       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
1213    : (c < 0x7f                                                          \
1214       ? 1                                                               \
1215       : (c == 0x7F                                                      \
1216          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
1217          : ((! NILP (current_buffer->enable_multibyte_characters)       \
1218              && BASE_LEADING_CODE_P (c))                                \
1219             ? WIDTH_BY_CHAR_HEAD (c)                                    \
1220             : 4))))
1221
1222 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1223        doc: /* Return width of CHAR when displayed in the current buffer.
1224 The width is measured by how many columns it occupies on the screen.
1225 Tab is taken to occupy `tab-width' columns.  */)
1226      (ch)
1227      Lisp_Object ch;
1228 {
1229   Lisp_Object val, disp;
1230   int c;
1231   struct Lisp_Char_Table *dp = buffer_display_table ();
1232
1233   CHECK_NUMBER (ch);
1234
1235   c = XINT (ch);
1236
1237   /* Get the way the display table would display it.  */
1238   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1239
1240   if (VECTORP (disp))
1241     XSETINT (val, XVECTOR (disp)->size);
1242   else if (SINGLE_BYTE_CHAR_P (c))
1243     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1244   else
1245     {
1246       int charset = CHAR_CHARSET (c);
1247
1248       XSETFASTINT (val, CHARSET_WIDTH (charset));
1249     }
1250   return val;
1251 }
1252
1253 /* Return width of string STR of length LEN when displayed in the
1254    current buffer.  The width is measured by how many columns it
1255    occupies on the screen.  */
1256
1257 int
1258 strwidth (str, len)
1259      unsigned char *str;
1260      int len;
1261 {
1262   return c_string_width (str, len, -1, NULL, NULL);
1263 }
1264
1265 /* Return width of string STR of length LEN when displayed in the
1266    current buffer.  The width is measured by how many columns it
1267    occupies on the screen.  If PRECISION > 0, return the width of
1268    longest substring that doesn't exceed PRECISION, and set number of
1269    characters and bytes of the substring in *NCHARS and *NBYTES
1270    respectively.  */
1271
1272 int
1273 c_string_width (str, len, precision, nchars, nbytes)
1274      const unsigned char *str;
1275      int precision, *nchars, *nbytes;
1276 {
1277   int i = 0, i_byte = 0;
1278   int width = 0;
1279   int chars;
1280   struct Lisp_Char_Table *dp = buffer_display_table ();
1281
1282   while (i_byte < len)
1283     {
1284       int bytes, thiswidth;
1285       Lisp_Object val;
1286
1287       if (dp)
1288         {
1289           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1290
1291           chars = 1;
1292           val = DISP_CHAR_VECTOR (dp, c);
1293           if (VECTORP (val))
1294             thiswidth = XVECTOR (val)->size;
1295           else
1296             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1297         }
1298       else
1299         {
1300           chars = 1;
1301           PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1302           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1303         }
1304
1305       if (precision > 0
1306           && (width + thiswidth > precision))
1307         {
1308           *nchars = i;
1309           *nbytes = i_byte;
1310           return width;
1311         }
1312       i++;
1313       i_byte += bytes;
1314       width += thiswidth;
1315   }
1316
1317   if (precision > 0)
1318     {
1319       *nchars = i;
1320       *nbytes = i_byte;
1321     }
1322
1323   return width;
1324 }
1325
1326 /* Return width of Lisp string STRING when displayed in the current
1327    buffer.  The width is measured by how many columns it occupies on
1328    the screen while paying attention to compositions.  If PRECISION >
1329    0, return the width of longest substring that doesn't exceed
1330    PRECISION, and set number of characters and bytes of the substring
1331    in *NCHARS and *NBYTES respectively.  */
1332
1333 int
1334 lisp_string_width (string, precision, nchars, nbytes)
1335      Lisp_Object string;
1336      int precision, *nchars, *nbytes;
1337 {
1338   int len = SCHARS (string);
1339   int len_byte = SBYTES (string);
1340   const unsigned char *str = SDATA (string);
1341   int i = 0, i_byte = 0;
1342   int width = 0;
1343   struct Lisp_Char_Table *dp = buffer_display_table ();
1344
1345   while (i < len)
1346     {
1347       int chars, bytes, thiswidth;
1348       Lisp_Object val;
1349       int cmp_id;
1350       int ignore, end;
1351
1352       if (find_composition (i, -1, &ignore, &end, &val, string)
1353           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1354               >= 0))
1355         {
1356           thiswidth = composition_table[cmp_id]->width;
1357           chars = end - i;
1358           bytes = string_char_to_byte (string, end) - i_byte;
1359         }
1360       else if (dp)
1361         {
1362           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1363
1364           chars = 1;
1365           val = DISP_CHAR_VECTOR (dp, c);
1366           if (VECTORP (val))
1367             thiswidth = XVECTOR (val)->size;
1368           else
1369             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1370         }
1371       else
1372         {
1373           chars = 1;
1374           PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1375           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1376         }
1377
1378       if (precision > 0
1379           && (width + thiswidth > precision))
1380         {
1381           *nchars = i;
1382           *nbytes = i_byte;
1383           return width;
1384         }
1385       i += chars;
1386       i_byte += bytes;
1387       width += thiswidth;
1388   }
1389
1390   if (precision > 0)
1391     {
1392       *nchars = i;
1393       *nbytes = i_byte;
1394     }
1395
1396   return width;
1397 }
1398
1399 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1400        doc: /* Return width of STRING when displayed in the current buffer.
1401 Width is measured by how many columns it occupies on the screen.
1402 When calculating width of a multibyte character in STRING,
1403 only the base leading-code is considered; the validity of
1404 the following bytes is not checked.  Tabs in STRING are always
1405 taken to occupy `tab-width' columns.  */)
1406      (str)
1407      Lisp_Object str;
1408 {
1409   Lisp_Object val;
1410
1411   CHECK_STRING (str);
1412   XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
1413   return val;
1414 }
1415
1416 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1417        doc: /* Return the direction of CHAR.
1418 The returned value is 0 for left-to-right and 1 for right-to-left.  */)
1419      (ch)
1420      Lisp_Object ch;
1421 {
1422   int charset;
1423
1424   CHECK_NUMBER (ch);
1425   charset = CHAR_CHARSET (XFASTINT (ch));
1426   if (!CHARSET_DEFINED_P (charset))
1427     invalid_character (XINT (ch));
1428   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1429 }
1430
1431 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
1432        doc: /* Return number of characters between BEG and END.  */)
1433      (beg, end)
1434      Lisp_Object beg, end;
1435 {
1436   int from, to;
1437
1438   CHECK_NUMBER_COERCE_MARKER (beg);
1439   CHECK_NUMBER_COERCE_MARKER (end);
1440
1441   from = min (XFASTINT (beg), XFASTINT (end));
1442   to = max (XFASTINT (beg), XFASTINT (end));
1443
1444   return make_number (to - from);
1445 }
1446
1447 /* Return the number of characters in the NBYTES bytes at PTR.
1448    This works by looking at the contents and checking for multibyte sequences.
1449    However, if the current buffer has enable-multibyte-characters = nil,
1450    we treat each byte as a character.  */
1451
1452 int
1453 chars_in_text (ptr, nbytes)
1454      const unsigned char *ptr;
1455      int nbytes;
1456 {
1457   /* current_buffer is null at early stages of Emacs initialization.  */
1458   if (current_buffer == 0
1459       || NILP (current_buffer->enable_multibyte_characters))
1460     return nbytes;
1461
1462   return multibyte_chars_in_text (ptr, nbytes);
1463 }
1464
1465 /* Return the number of characters in the NBYTES bytes at PTR.
1466    This works by looking at the contents and checking for multibyte sequences.
1467    It ignores enable-multibyte-characters.  */
1468
1469 int
1470 multibyte_chars_in_text (ptr, nbytes)
1471      const unsigned char *ptr;
1472      int nbytes;
1473 {
1474   const unsigned char *endp;
1475   int chars, bytes;
1476
1477   endp = ptr + nbytes;
1478   chars = 0;
1479
1480   while (ptr < endp)
1481     {
1482       PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1483       ptr += bytes;
1484       chars++;
1485     }
1486
1487   return chars;
1488 }
1489
1490 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1491    count the numbers of characters and bytes in it.  On counting
1492    bytes, pay attention to the fact that 8-bit characters in the range
1493    0x80..0x9F are represented by 2 bytes in multibyte text.  */
1494 void
1495 parse_str_as_multibyte (str, len, nchars, nbytes)
1496      const unsigned char *str;
1497      int len, *nchars, *nbytes;
1498 {
1499   const unsigned char *endp = str + len;
1500   int n, chars = 0, bytes = 0;
1501
1502   while (str < endp)
1503     {
1504       if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1505         str += n, bytes += n;
1506       else
1507         str++, bytes += 2;
1508       chars++;
1509     }
1510   *nchars = chars;
1511   *nbytes = bytes;
1512   return;
1513 }
1514
1515 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1516    It actually converts only 8-bit characters in the range 0x80..0x9F
1517    that don't contruct multibyte characters to multibyte forms.  If
1518    NCHARS is nonzero, set *NCHARS to the number of characters in the
1519    text.  It is assured that we can use LEN bytes at STR as a work
1520    area and that is enough.  Return the number of bytes of the
1521    resulting text.  */
1522
1523 int
1524 str_as_multibyte (str, len, nbytes, nchars)
1525      unsigned char *str;
1526      int len, nbytes, *nchars;
1527 {
1528   unsigned char *p = str, *endp = str + nbytes;
1529   unsigned char *to;
1530   int chars = 0;
1531   int n;
1532
1533   while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1534     p += n, chars++;
1535   if (nchars)
1536     *nchars = chars;
1537   if (p == endp)
1538     return nbytes;
1539
1540   to = p;
1541   nbytes = endp - p;
1542   endp = str + len;
1543   safe_bcopy (p, endp - nbytes, nbytes);
1544   p = endp - nbytes;
1545   while (p < endp)
1546     {
1547       if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1548         {
1549           while (n--)
1550             *to++ = *p++;
1551         }
1552       else
1553         {
1554           *to++ = LEADING_CODE_8_BIT_CONTROL;
1555           *to++ = *p++ + 0x20;
1556         }
1557       chars++;
1558     }
1559   if (nchars)
1560     *nchars = chars;
1561   return (to - str);
1562 }
1563
1564 /* Parse unibyte string at STR of LEN bytes, and return the number of
1565    bytes it may ocupy when converted to multibyte string by
1566    `str_to_multibyte'.  */
1567
1568 int
1569 parse_str_to_multibyte (str, len)
1570      unsigned char *str;
1571      int len;
1572 {
1573   unsigned char *endp = str + len;
1574   int bytes;
1575
1576   for (bytes = 0; str < endp; str++)
1577     bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1578   return bytes;
1579 }
1580
1581 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1582    that contains the same single-byte characters.  It actually
1583    converts all 8-bit characters to multibyte forms.  It is assured
1584    that we can use LEN bytes at STR as a work area and that is
1585    enough.  */
1586
1587 int
1588 str_to_multibyte (str, len, bytes)
1589      unsigned char *str;
1590      int len, bytes;
1591 {
1592   unsigned char *p = str, *endp = str + bytes;
1593   unsigned char *to;
1594
1595   while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1596   if (p == endp)
1597     return bytes;
1598   to = p;
1599   bytes = endp - p;
1600   endp = str + len;
1601   safe_bcopy (p, endp - bytes, bytes);
1602   p = endp - bytes;
1603   while (p < endp)
1604     {
1605       if (*p < 0x80 || *p >= 0xA0)
1606         *to++ = *p++;
1607       else
1608         *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1609     }
1610   return (to - str);
1611 }
1612
1613 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1614    actually converts only 8-bit characters in the range 0x80..0x9F to
1615    unibyte forms.  */
1616
1617 int
1618 str_as_unibyte (str, bytes)
1619      unsigned char *str;
1620      int bytes;
1621 {
1622   unsigned char *p = str, *endp = str + bytes;
1623   unsigned char *to = str;
1624
1625   while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1626   to = p;
1627   while (p < endp)
1628     {
1629       if (*p == LEADING_CODE_8_BIT_CONTROL)
1630         *to++ = *(p + 1) - 0x20, p += 2;
1631       else
1632         *to++ = *p++;
1633     }
1634   return (to - str);
1635 }
1636
1637 \f
1638 DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
1639   doc: /* Concatenate all the argument characters and make the result a string.
1640 usage: (string &rest CHARACTERS)  */)
1641      (n, args)
1642      int n;
1643      Lisp_Object *args;
1644 {
1645   int i;
1646   unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
1647   unsigned char *p = buf;
1648   int c;
1649   int multibyte = 0;
1650
1651   for (i = 0; i < n; i++)
1652     {
1653       CHECK_NUMBER (args[i]);
1654       if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1655         multibyte = 1;
1656     }
1657
1658   for (i = 0; i < n; i++)
1659     {
1660       c = XINT (args[i]);
1661       if (multibyte)
1662         p += CHAR_STRING (c, p);
1663       else
1664         *p++ = c;
1665     }
1666
1667   return make_string_from_bytes (buf, n, p - buf);
1668 }
1669
1670 #endif /* emacs */
1671 \f
1672 int
1673 charset_id_internal (charset_name)
1674      char *charset_name;
1675 {
1676   Lisp_Object val;
1677
1678   val= Fget (intern (charset_name), Qcharset);
1679   if (!VECTORP (val))
1680     error ("Charset %s is not defined", charset_name);
1681
1682   return (XINT (XVECTOR (val)->contents[0]));
1683 }
1684
1685 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1686        Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only.  */)
1687      ()
1688 {
1689   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1690   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1691   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1692   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1693   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1694   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1695   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1696   return Qnil;
1697 }
1698
1699 void
1700 init_charset_once ()
1701 {
1702   int i, j, k;
1703
1704   staticpro (&Vcharset_table);
1705   staticpro (&Vcharset_symbol_table);
1706   staticpro (&Vgeneric_character_list);
1707
1708   /* This has to be done here, before we call Fmake_char_table.  */
1709   Qcharset_table = intern ("charset-table");
1710   staticpro (&Qcharset_table);
1711
1712   /* Intern this now in case it isn't already done.
1713      Setting this variable twice is harmless.
1714      But don't staticpro it here--that is done in alloc.c.  */
1715   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1716
1717   /* Now we are ready to set up this property, so we can
1718      create the charset table.  */
1719   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1720   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1721
1722   Qunknown = intern ("unknown");
1723   staticpro (&Qunknown);
1724   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1725                                         Qunknown);
1726
1727   /* Setup tables.  */
1728   for (i = 0; i < 2; i++)
1729     for (j = 0; j < 2; j++)
1730       for (k = 0; k < 128; k++)
1731         iso_charset_table [i][j][k] = -1;
1732
1733   for (i = 0; i < 256; i++)
1734     bytes_by_char_head[i] = 1;
1735   bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1736   bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1737   bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1738   bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1739
1740   for (i = 0; i < 128; i++)
1741     width_by_char_head[i] = 1;
1742   for (; i < 256; i++)
1743     width_by_char_head[i] = 4;
1744   width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1745   width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1746   width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1747   width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1748
1749   {
1750     Lisp_Object val;
1751
1752     val = Qnil;
1753     for (i = 0x81; i < 0x90; i++)
1754       val = Fcons (make_number ((i - 0x70) << 7), val);
1755     for (; i < 0x9A; i++)
1756       val = Fcons (make_number ((i - 0x8F) << 14), val);
1757     for (i = 0xA0; i < 0xF0; i++)
1758       val = Fcons (make_number ((i - 0x70) << 7), val);
1759     for (; i < 0xFF; i++)
1760       val = Fcons (make_number ((i - 0xE0) << 14), val);
1761     Vgeneric_character_list = Fnreverse (val);
1762   }
1763
1764   nonascii_insert_offset = 0;
1765   Vnonascii_translation_table = Qnil;
1766 }
1767
1768 #ifdef emacs
1769
1770 void
1771 syms_of_charset ()
1772 {
1773   Qcharset = intern ("charset");
1774   staticpro (&Qcharset);
1775
1776   Qascii = intern ("ascii");
1777   staticpro (&Qascii);
1778
1779   Qeight_bit_control = intern ("eight-bit-control");
1780   staticpro (&Qeight_bit_control);
1781
1782   Qeight_bit_graphic = intern ("eight-bit-graphic");
1783   staticpro (&Qeight_bit_graphic);
1784
1785   /* Define special charsets ascii, eight-bit-control, and
1786      eight-bit-graphic.  */
1787   update_charset_table (make_number (CHARSET_ASCII),
1788                         make_number (1), make_number (94),
1789                         make_number (1),
1790                         make_number (0),
1791                         make_number ('B'),
1792                         make_number (0),
1793                         build_string ("ASCII"),
1794                         Qnil,   /* same as above */
1795                         build_string ("ASCII (ISO646 IRV)"));
1796   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1797   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1798
1799   update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1800                         make_number (1), make_number (96),
1801                         make_number (4),
1802                         make_number (0),
1803                         make_number (-1),
1804                         make_number (-1),
1805                         build_string ("8-bit control code (0x80..0x9F)"),
1806                         Qnil,   /* same as above */
1807                         Qnil);  /* same as above */
1808   CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1809   Fput (Qeight_bit_control, Qcharset,
1810         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1811
1812   update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1813                         make_number (1), make_number (96),
1814                         make_number (4),
1815                         make_number (0),
1816                         make_number (-1),
1817                         make_number (-1),
1818                         build_string ("8-bit graphic char (0xA0..0xFF)"),
1819                         Qnil,   /* same as above */
1820                         Qnil);  /* same as above */
1821   CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1822   Fput (Qeight_bit_graphic, Qcharset,
1823         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1824
1825   Qauto_fill_chars = intern ("auto-fill-chars");
1826   staticpro (&Qauto_fill_chars);
1827   Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1828
1829   defsubr (&Sdefine_charset);
1830   defsubr (&Sgeneric_character_list);
1831   defsubr (&Sget_unused_iso_final_char);
1832   defsubr (&Sdeclare_equiv_charset);
1833   defsubr (&Sfind_charset_region);
1834   defsubr (&Sfind_charset_string);
1835   defsubr (&Smake_char_internal);
1836   defsubr (&Ssplit_char);
1837   defsubr (&Schar_charset);
1838   defsubr (&Scharset_after);
1839   defsubr (&Siso_charset);
1840   defsubr (&Schar_valid_p);
1841   defsubr (&Sunibyte_char_to_multibyte);
1842   defsubr (&Smultibyte_char_to_unibyte);
1843   defsubr (&Schar_bytes);
1844   defsubr (&Schar_width);
1845   defsubr (&Sstring_width);
1846   defsubr (&Schar_direction);
1847   defsubr (&Schars_in_region);
1848   defsubr (&Sstring);
1849   defsubr (&Ssetup_special_charsets);
1850
1851   DEFVAR_LISP ("charset-list", &Vcharset_list,
1852                doc: /* List of charsets ever defined.  */);
1853   Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1854                                         Fcons (Qeight_bit_graphic, Qnil)));
1855
1856   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1857                doc: /* Vector of cons cell of a symbol and translation table ever defined.
1858 An ID of a translation table is an index of this vector.  */);
1859   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1860
1861   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1862               doc: /* Leading-code of private TYPE9N charset of column-width 1.  */);
1863   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1864
1865   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1866               doc: /* Leading-code of private TYPE9N charset of column-width 2.  */);
1867   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1868
1869   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1870               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1.  */);
1871   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1872
1873   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1874               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2.  */);
1875   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1876
1877   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1878               doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1879 This is used for converting unibyte text to multibyte,
1880 and for inserting character codes specified by number.
1881
1882 This serves to convert a Latin-1 or similar 8-bit character code
1883 to the corresponding Emacs multibyte character code.
1884 Typically the value should be (- (make-char CHARSET 0) 128),
1885 for your choice of character set.
1886 If `nonascii-translation-table' is non-nil, it overrides this variable.  */);
1887   nonascii_insert_offset = 0;
1888
1889   DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1890                doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1891 This is used for converting unibyte text to multibyte,
1892 and for inserting character codes specified by number.
1893
1894 Conversion is performed only when multibyte characters are enabled,
1895 and it serves to convert a Latin-1 or similar 8-bit character code
1896 to the corresponding Emacs character code.
1897
1898 If this is nil, `nonascii-insert-offset' is used instead.
1899 See also the docstring of `make-translation-table'.  */);
1900   Vnonascii_translation_table = Qnil;
1901
1902   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1903                doc: /* A char-table for characters which invoke auto-filling.
1904 Such characters have value t in this table.  */);
1905   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1906   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1907   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1908 }
1909
1910 #endif /* emacs */