src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001 Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /* At first, see the document in `charset.h' to understand the code in
  24    this file.  */
  25
  26 #ifdef emacs
  27 #include <config.h>
  28 #endif
  29
  30 #include <stdio.h>
  31
  32 #ifdef emacs
  33
  34 #include <sys/types.h>
  35 #include "lisp.h"
  36 #include "buffer.h"
  37 #include "charset.h"
  38 #include "composite.h"
  39 #include "coding.h"
  40 #include "disptab.h"
  41
  42 #else  /* not emacs */
  43
  44 #include "mulelib.h"
  45
  46 #endif /* emacs */
  47
  48 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
  49 Lisp_Object Qunknown;
  50
  51 /* Declaration of special leading-codes.  */
  52 int leading_code_private_11;    /* for private DIMENSION1 of 1-column */
  53 int leading_code_private_12;    /* for private DIMENSION1 of 2-column */
  54 int leading_code_private_21;    /* for private DIMENSION2 of 1-column */
  55 int leading_code_private_22;    /* for private DIMENSION2 of 2-column */
  56
  57 /* Declaration of special charsets.  The values are set by
  58    Fsetup_special_charsets.  */
  59 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  60 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  61 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  62 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  63 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  64 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  65 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  66
  67 Lisp_Object Qcharset_table;
  68
  69 /* A char-table containing information of each character set.  */
  70 Lisp_Object Vcharset_table;
  71
  72 /* A vector of charset symbol indexed by charset-id.  This is used
  73    only for returning charset symbol from C functions.  */
  74 Lisp_Object Vcharset_symbol_table;
  75
  76 /* A list of charset symbols ever defined.  */
  77 Lisp_Object Vcharset_list;
  78
  79 /* Vector of translation table ever defined.
  80    ID of a translation table is used to index this vector.  */
  81 Lisp_Object Vtranslation_table_vector;
  82
  83 /* A char-table for characters which may invoke auto-filling.  */
  84 Lisp_Object Vauto_fill_chars;
  85
  86 Lisp_Object Qauto_fill_chars;
  87
  88 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  89 int bytes_by_char_head[256];
  90 int width_by_char_head[256];
  91
  92 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  93    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  94 int iso_charset_table[2][2][128];
  95
  96 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
  97 unsigned char *_fetch_multibyte_char_p;
  98 int _fetch_multibyte_char_len;
  99
 100 /* Offset to add to a non-ASCII value when inserting it.  */
 101 int nonascii_insert_offset;
 102
 103 /* Translation table for converting non-ASCII unibyte characters
 104    to multibyte codes, or nil.  */
 105 Lisp_Object Vnonascii_translation_table;
 106
 107 /* List of all possible generic characters.  */
 108 Lisp_Object Vgeneric_character_list;
 109
 110 #define min(X, Y) ((X) < (Y) ? (X) : (Y))
 111 #define max(X, Y) ((X) > (Y) ? (X) : (Y))
 112 \f
 113 void
 114 invalid_character (c)
 115      int c;
 116 {
 117   error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
 118 }
 119
 120 /* Parse string STR of length LENGTH and fetch information of a
 121    character at STR.  Set BYTES to the byte length the character
 122    occupies, CHARSET, C1, C2 to proper values of the character. */
 123
 124 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
 125   do {                                                                       \
 126     (c1) = *(str);                                                           \
 127     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
 128     if ((bytes) == 1)                                                        \
 129       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
 130     else if ((bytes) == 2)                                                   \
 131       {                                                                      \
 132         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                              \
 133           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;         \
 134         else                                                                 \
 135           (charset) = (c1), (c1) = (str)[1] & 0x7F;                          \
 136       }                                                                      \
 137     else if ((bytes) == 3)                                                   \
 138       {                                                                      \
 139         if ((c1) < LEADING_CODE_PRIVATE_11)                                  \
 140           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
 141         else                                                                 \
 142           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                      \
 143       }                                                                      \
 144     else                                                                     \
 145       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
 146   } while (0)
 147
 148 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.  */
 149 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
 150   ((charset) == CHARSET_ASCII                           \
 151    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
 152    : ((charset) == CHARSET_8_BIT_CONTROL                \
 153       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
 154       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
 155          ? ((c1) >= 0x80 && (c1) <= 0xFF)               \
 156          : (CHARSET_DIMENSION (charset) == 1            \
 157             ? ((c1) >= 0x20 && (c1) <= 0x7F)            \
 158             : ((c1) >= 0x20 && (c1) <= 0x7F             \
 159                && (c2) >= 0x20 && (c2) <= 0x7F)))))
 160
 161 /* Store multi-byte form of the character C in STR.  The caller should
 162    allocate at least 4-byte area at STR in advance.  Returns the
 163    length of the multi-byte form.  If C is an invalid character code,
 164    return -1.  */
 165
 166 int
 167 char_to_string_1 (c, str)
 168      int c;
 169      unsigned char *str;
 170 {
 171   unsigned char *p = str;
 172
 173   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
 174     {
 175       /* Multibyte character can't have a modifier bit.  */
 176       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 177         return -1;
 178
 179       /* For Meta, Shift, and Control modifiers, we need special care.  */
 180       if (c & CHAR_META)
 181         {
 182           /* Move the meta bit to the right place for a string.  */
 183           c = (c & ~CHAR_META) | 0x80;
 184         }
 185       if (c & CHAR_SHIFT)
 186         {
 187           /* Shift modifier is valid only with [A-Za-z].  */
 188           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
 189             c &= ~CHAR_SHIFT;
 190           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
 191             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 192         }
 193       if (c & CHAR_CTL)
 194         {
 195           /* Simulate the code in lread.c.  */
 196           /* Allow `\C- ' and `\C-?'.  */
 197           if (c == (CHAR_CTL | ' '))
 198             c = 0;
 199           else if (c == (CHAR_CTL | '?'))
 200             c = 127;
 201           /* ASCII control chars are made from letters (both cases),
 202              as well as the non-letters within 0100...0137.  */
 203           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
 204             c &= (037 | (~0177 & ~CHAR_CTL));
 205           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 206             c &= (037 | (~0177 & ~CHAR_CTL));
 207         }
 208
 209       /* If C still has any modifier bits, just ignore it.  */
 210       c &= ~CHAR_MODIFIER_MASK;
 211     }
 212
 213   if (SINGLE_BYTE_CHAR_P (c))
 214     {
 215       if (ASCII_BYTE_P (c) || c >= 0xA0)
 216         *p++ = c;
 217       else
 218         {
 219           *p++ = LEADING_CODE_8_BIT_CONTROL;
 220           *p++ = c + 0x20;
 221         }
 222     }
 223   else if (CHAR_VALID_P (c, 0))
 224     {
 225       int charset, c1, c2;
 226
 227       SPLIT_CHAR (c, charset, c1, c2);
 228
 229       if (charset >= LEADING_CODE_EXT_11)
 230         *p++ = (charset < LEADING_CODE_EXT_12
 231                 ? LEADING_CODE_PRIVATE_11
 232                 : (charset < LEADING_CODE_EXT_21
 233                    ? LEADING_CODE_PRIVATE_12
 234                    : (charset < LEADING_CODE_EXT_22
 235                       ? LEADING_CODE_PRIVATE_21
 236                       : LEADING_CODE_PRIVATE_22)));
 237       *p++ = charset;
 238       if (c1 > 0 && c1 < 32 || c2 > 0 && c2 < 32)
 239         return -1;
 240       if (c1)
 241         {
 242           *p++ = c1 | 0x80;
 243           if (c2 > 0)
 244             *p++ = c2 | 0x80;
 245         }
 246     }
 247   else
 248     return -1;
 249
 250   return (p - str);
 251 }
 252
 253
 254 /* Store multi-byte form of the character C in STR.  The caller should
 255    allocate at least 4-byte area at STR in advance.  Returns the
 256    length of the multi-byte form.  If C is an invalid character code,
 257    signal an error.
 258
 259    Use macro `CHAR_STRING (C, STR)' instead of calling this function
 260    directly if C can be an ASCII character.  */
 261
 262 int
 263 char_to_string (c, str)
 264      int c;
 265      unsigned char *str;
 266 {
 267   int len;
 268   len = char_to_string_1 (c, str);
 269   if (len == -1)
 270     invalid_character (c);
 271   return len;
 272 }
 273
 274
 275 /* Return the non-ASCII character corresponding to multi-byte form at
 276    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
 277    length of the multibyte form in *ACTUAL_LEN.
 278
 279    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
 280    this function directly if you want ot handle ASCII characters as
 281    well.  */
 282
 283 int
 284 string_to_char (str, len, actual_len)
 285      const unsigned char *str;
 286      int len, *actual_len;
 287 {
 288   int c, bytes, charset, c1, c2;
 289
 290   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
 291   c = MAKE_CHAR (charset, c1, c2);
 292   if (actual_len)
 293     *actual_len = bytes;
 294   return c;
 295 }
 296
 297 /* Return the length of the multi-byte form at string STR of length LEN.
 298    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
 299 int
 300 multibyte_form_length (str, len)
 301      const unsigned char *str;
 302      int len;
 303 {
 304   int bytes;
 305
 306   PARSE_MULTIBYTE_SEQ (str, len, bytes);
 307   return bytes;
 308 }
 309
 310 /* Check multibyte form at string STR of length LEN and set variables
 311    pointed by CHARSET, C1, and C2 to charset and position codes of the
 312    character at STR, and return 0.  If there's no multibyte character,
 313    return -1.  This should be used only in the macro SPLIT_STRING
 314    which checks range of STR in advance.  */
 315
 316 int
 317 split_string (str, len, charset, c1, c2)
 318      const unsigned char *str;
 319      unsigned char *c1, *c2;
 320      int len, *charset;
 321 {
 322   register int bytes, cs, code1, code2 = -1;
 323
 324   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
 325   if (cs == CHARSET_ASCII)
 326     return -1;
 327   *charset = cs;
 328   *c1 = code1;
 329   *c2 = code2;
 330   return 0;
 331 }
 332
 333 /* Return 1 iff character C has valid printable glyph.
 334    Use the macro CHAR_PRINTABLE_P instead.  */
 335 int
 336 char_printable_p (c)
 337      int c;
 338 {
 339   int charset, c1, c2;
 340
 341   if (ASCII_BYTE_P (c))
 342     return 1;
 343   else if (SINGLE_BYTE_CHAR_P (c))
 344     return 0;
 345   else if (c >= MAX_CHAR)
 346     return 0;
 347
 348   SPLIT_CHAR (c, charset, c1, c2);
 349   if (! CHARSET_DEFINED_P (charset))
 350     return 0;
 351   if (CHARSET_CHARS (charset) == 94
 352       ? c1 <= 32 || c1 >= 127
 353       : c1 < 32)
 354     return 0;
 355   if (CHARSET_DIMENSION (charset) == 2
 356       && (CHARSET_CHARS (charset) == 94
 357           ? c2 <= 32 || c2 >= 127
 358           : c2 < 32))
 359     return 0;
 360   return 1;
 361 }
 362
 363 /* Translate character C by translation table TABLE.  If C
 364    is negative, translate a character specified by CHARSET, C1, and C2
 365    (C1 and C2 are code points of the character).  If no translation is
 366    found in TABLE, return C.  */
 367 int
 368 translate_char (table, c, charset, c1, c2)
 369      Lisp_Object table;
 370      int c, charset, c1, c2;
 371 {
 372   Lisp_Object ch;
 373   int alt_charset, alt_c1, alt_c2, dimension;
 374
 375   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
 376   if (!CHAR_TABLE_P (table)
 377       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
 378     return c;
 379
 380   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 381   dimension = CHARSET_DIMENSION (alt_charset);
 382   if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
 383     /* CH is not a generic character, just return it.  */
 384     return XFASTINT (ch);
 385
 386   /* Since CH is a generic character, we must return a specific
 387      charater which has the same position codes as C from CH.  */
 388   if (charset < 0)
 389     SPLIT_CHAR (c, charset, c1, c2);
 390   if (dimension != CHARSET_DIMENSION (charset))
 391     /* We can't make such a character because of dimension mismatch.  */
 392     return c;
 393   return MAKE_CHAR (alt_charset, c1, c2);
 394 }
 395
 396 /* Convert the unibyte character C to multibyte based on
 397    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
 398    convert C to a valid multibyte character, convert it based on
 399    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 400
 401 int
 402 unibyte_char_to_multibyte (c)
 403      int c;
 404 {
 405   if (c < 0400 && c >= 0200)
 406     {
 407       int c_save = c;
 408
 409       if (! NILP (Vnonascii_translation_table))
 410         {
 411           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
 412           if (c >= 0400 && ! char_valid_p (c, 0))
 413             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 414         }
 415       else if (c >= 0240 && nonascii_insert_offset > 0)
 416         {
 417           c += nonascii_insert_offset;
 418           if (c < 0400 || ! char_valid_p (c, 0))
 419             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 420         }
 421       else if (c >= 0240)
 422         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 423     }
 424   return c;
 425 }
 426
 427
 428 /* Convert the multibyte character C to unibyte 8-bit character based
 429    on Vnonascii_translation_table or nonascii_insert_offset.  If
 430    REV_TBL is non-nil, it should be a reverse table of
 431    Vnonascii_translation_table, i.e. what given by:
 432      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
 433
 434 int
 435 multibyte_char_to_unibyte (c, rev_tbl)
 436      int c;
 437      Lisp_Object rev_tbl;
 438 {
 439   if (!SINGLE_BYTE_CHAR_P (c))
 440     {
 441       int c_save = c;
 442
 443       if (! CHAR_TABLE_P (rev_tbl)
 444           && CHAR_TABLE_P (Vnonascii_translation_table))
 445         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
 446                                           make_number (0));
 447       if (CHAR_TABLE_P (rev_tbl))
 448         {
 449           Lisp_Object temp;
 450           temp = Faref (rev_tbl, make_number (c));
 451           if (INTEGERP (temp))
 452             c = XINT (temp);
 453           if (c >= 256)
 454             c = (c_save & 0177) + 0200;
 455         }
 456       else
 457         {
 458           if (nonascii_insert_offset > 0)
 459             c -= nonascii_insert_offset;
 460           if (c < 128 || c >= 256)
 461             c = (c_save & 0177) + 0200;
 462         }
 463     }
 464
 465   return c;
 466 }
 467
 468 \f
 469 /* Update the table Vcharset_table with the given arguments (see the
 470    document of `define-charset' for the meaning of each argument).
 471    Several other table contents are also updated.  The caller should
 472    check the validity of CHARSET-ID and the remaining arguments in
 473    advance.  */
 474
 475 void
 476 update_charset_table (charset_id, dimension, chars, width, direction,
 477                       iso_final_char, iso_graphic_plane,
 478                       short_name, long_name, description)
 479      Lisp_Object charset_id, dimension, chars, width, direction;
 480      Lisp_Object iso_final_char, iso_graphic_plane;
 481      Lisp_Object short_name, long_name, description;
 482 {
 483   int charset = XINT (charset_id);
 484   int bytes;
 485   unsigned char leading_code_base, leading_code_ext;
 486
 487   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 488     CHARSET_TABLE_ENTRY (charset)
 489       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 490
 491   if (NILP (long_name))
 492     long_name = short_name;
 493   if (NILP (description))
 494     description = long_name;
 495
 496   /* Get byte length of multibyte form, base leading-code, and
 497      extended leading-code of the charset.  See the comment under the
 498      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 499   bytes = XINT (dimension);
 500   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 501     {
 502       /* Official charset, it doesn't have an extended leading-code.  */
 503       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
 504         bytes += 1; /* For a base leading-code.  */
 505       leading_code_base = charset;
 506       leading_code_ext = 0;
 507     }
 508   else
 509     {
 510       /* Private charset.  */
 511       bytes += 2; /* For base and extended leading-codes.  */
 512       leading_code_base
 513         = (charset < LEADING_CODE_EXT_12
 514            ? LEADING_CODE_PRIVATE_11
 515            : (charset < LEADING_CODE_EXT_21
 516               ? LEADING_CODE_PRIVATE_12
 517               : (charset < LEADING_CODE_EXT_22
 518                  ? LEADING_CODE_PRIVATE_21
 519                  : LEADING_CODE_PRIVATE_22)));
 520       leading_code_ext = charset;
 521       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
 522         error ("Invalid dimension for the charset-ID %d", charset);
 523     }
 524
 525   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 526   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 527   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 528   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 529   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 530   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 531   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 532     = make_number (leading_code_base);
 533   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 534     = make_number (leading_code_ext);
 535   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 536   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 537     = iso_graphic_plane;
 538   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 539   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 540   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 541   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 542
 543   {
 544     /* If we have already defined a charset which has the same
 545        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 546        DIRECTION, we must update the entry REVERSE-CHARSET of both
 547        charsets.  If there's no such charset, the value of the entry
 548        is set to nil.  */
 549     int i;
 550
 551     for (i = 0; i <= MAX_CHARSET; i++)
 552       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 553         {
 554           if (CHARSET_DIMENSION (i) == XINT (dimension)
 555               && CHARSET_CHARS (i) == XINT (chars)
 556               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 557               && CHARSET_DIRECTION (i) != XINT (direction))
 558             {
 559               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 560                 = make_number (i);
 561               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 562               break;
 563             }
 564         }
 565     if (i > MAX_CHARSET)
 566       /* No such a charset.  */
 567       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 568         = make_number (-1);
 569   }
 570
 571   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
 572       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 573     {
 574       bytes_by_char_head[leading_code_base] = bytes;
 575       width_by_char_head[leading_code_base] = XINT (width);
 576
 577       /* Update table emacs_code_class.  */
 578       emacs_code_class[charset] = (bytes == 2
 579                                    ? EMACS_leading_code_2
 580                                    : (bytes == 3
 581                                       ? EMACS_leading_code_3
 582                                       : EMACS_leading_code_4));
 583     }
 584
 585   /* Update table iso_charset_table.  */
 586   if (XINT (iso_final_char) >= 0
 587       && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 588     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 589 }
 590
 591 #ifdef emacs
 592
 593 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 594    is invalid.  */
 595 int
 596 get_charset_id (charset_symbol)
 597      Lisp_Object charset_symbol;
 598 {
 599   Lisp_Object val;
 600   int charset;
 601
 602   /* This originally used a ?: operator, but reportedly the HP-UX
 603      compiler version HP92453-01 A.10.32.22 miscompiles that.  */
 604   if (SYMBOLP (charset_symbol)
 605       && VECTORP (val = Fget (charset_symbol, Qcharset))
 606       && CHARSET_VALID_P (charset =
 607                           XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
 608     return charset;
 609   else
 610     return -1;
 611 }
 612
 613 /* Return an identification number for a new private charset of
 614    DIMENSION and WIDTH.  If there's no more room for the new charset,
 615    return 0.  */
 616 Lisp_Object
 617 get_new_private_charset_id (dimension, width)
 618      int dimension, width;
 619 {
 620   int charset, from, to;
 621
 622   if (dimension == 1)
 623     {
 624       from = LEADING_CODE_EXT_11;
 625       to = LEADING_CODE_EXT_21;
 626     }
 627   else
 628     {
 629       from = LEADING_CODE_EXT_21;
 630       to = LEADING_CODE_EXT_MAX + 1;
 631     }
 632
 633   for (charset = from; charset < to; charset++)
 634     if (!CHARSET_DEFINED_P (charset)) break;
 635
 636   return make_number (charset < to ? charset : 0);
 637 }
 638
 639 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 640   "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
 641 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
 642  treated as a private charset.\n\
 643 INFO-VECTOR is a vector of the format:\n\
 644    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
 645     SHORT-NAME LONG-NAME DESCRIPTION]\n\
 646 The meanings of each elements is as follows:\n\
 647 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
 648 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
 649 WIDTH (integer) is the number of columns a character in the charset\n\
 650 occupies on the screen: one of 0, 1, and 2.\n\
 651 \n\
 652 DIRECTION (integer) is the rendering direction of characters in the\n\
 653 charset when rendering.  If 0, render from left to right, else\n\
 654 render from right to left.\n\
 655 \n\
 656 ISO-FINAL-CHAR (character) is the final character of the\n\
 657 corresponding ISO 2022 charset.\n\
 658 It may be -1 if the charset is internal use only.\n\
 659 \n\
 660 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
 661 while encoding to variants of ISO 2022 coding system, one of the\n\
 662 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
 663 It may be -1 if the charset is internal use only.\n\
 664 \n\
 665 SHORT-NAME (string) is the short name to refer to the charset.\n\
 666 \n\
 667 LONG-NAME (string) is the long name to refer to the charset.\n\
 668 \n\
 669 DESCRIPTION (string) is the description string of the charset.")
 670   (charset_id, charset_symbol, info_vector)
 671      Lisp_Object charset_id, charset_symbol, info_vector;
 672 {
 673   Lisp_Object *vec;
 674
 675   if (!NILP (charset_id))
 676     CHECK_NUMBER (charset_id, 0);
 677   CHECK_SYMBOL (charset_symbol, 1);
 678   CHECK_VECTOR (info_vector, 2);
 679
 680   if (! NILP (charset_id))
 681     {
 682       if (! CHARSET_VALID_P (XINT (charset_id)))
 683         error ("Invalid CHARSET: %d", XINT (charset_id));
 684       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 685         error ("Already defined charset: %d", XINT (charset_id));
 686     }
 687
 688   vec = XVECTOR (info_vector)->contents;
 689   if (XVECTOR (info_vector)->size != 9
 690       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 691       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 692       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 693       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 694       || !INTEGERP (vec[4])
 695       || !(XINT (vec[4]) == -1 || XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
 696       || !INTEGERP (vec[5])
 697       || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 698       || !STRINGP (vec[6])
 699       || !STRINGP (vec[7])
 700       || !STRINGP (vec[8]))
 701     error ("Invalid info-vector argument for defining charset %s",
 702            XSYMBOL (charset_symbol)->name->data);
 703
 704   if (NILP (charset_id))
 705     {
 706       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 707       if (XINT (charset_id) == 0)
 708         error ("There's no room for a new private charset %s",
 709                XSYMBOL (charset_symbol)->name->data);
 710     }
 711
 712   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 713                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 714   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 715   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 716   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 717   Fupdate_coding_systems_internal ();
 718   return Qnil;
 719 }
 720
 721 DEFUN ("generic-character-list", Fgeneric_character_list,
 722        Sgeneric_character_list, 0, 0, 0,
 723   "Return a list of all possible generic characters.\n\
 724 It includes a generic character for a charset not yet defined.")
 725   ()
 726 {
 727   return Vgeneric_character_list;
 728 }
 729
 730 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 731        Sget_unused_iso_final_char, 2, 2, 0,
 732   "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
 733 DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
 734 CHARS is the number of characters in a dimension: 94 or 96.\n\
 735 \n\
 736 This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
 737 If there's no unused final char for the specified kind of charset,\n\
 738 return nil.")
 739   (dimension, chars)
 740      Lisp_Object dimension, chars;
 741 {
 742   int final_char;
 743
 744   CHECK_NUMBER (dimension, 0);
 745   CHECK_NUMBER (chars, 1);
 746   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 747     error ("Invalid charset dimension %d, it should be 1 or 2",
 748            XINT (dimension));
 749   if (XINT (chars) != 94 && XINT (chars) != 96)
 750     error ("Invalid charset chars %d, it should be 94 or 96",
 751            XINT (chars));
 752   for (final_char = '0'; final_char <= '?'; final_char++)
 753     {
 754       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 755         break;
 756     }
 757   return (final_char <= '?' ? make_number (final_char) : Qnil);
 758 }
 759
 760 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 761        4, 4, 0,
 762   "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
 763 CHARSET should be defined by `defined-charset' in advance.")
 764   (dimension, chars, final_char, charset_symbol)
 765      Lisp_Object dimension, chars, final_char, charset_symbol;
 766 {
 767   int charset;
 768
 769   CHECK_NUMBER (dimension, 0);
 770   CHECK_NUMBER (chars, 1);
 771   CHECK_NUMBER (final_char, 2);
 772   CHECK_SYMBOL (charset_symbol, 3);
 773
 774   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 775     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 776   if (XINT (chars) != 94 && XINT (chars) != 96)
 777     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 778   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 779     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 780   if ((charset = get_charset_id (charset_symbol)) < 0)
 781     error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
 782
 783   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
 784   return Qnil;
 785 }
 786
 787 /* Return information about charsets in the text at PTR of NBYTES
 788    bytes, which are NCHARS characters.  The value is:
 789
 790         0: Each character is represented by one byte.  This is always
 791            true for unibyte text.
 792         1: No charsets other than ascii eight-bit-control,
 793            eight-bit-graphic, and latin-1 are found.
 794         2: Otherwise.
 795
 796    In addition, if CHARSETS is nonzero, for each found charset N, set
 797    CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
 798    (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
 799    table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
 800    1 (note that there's no charset whose ID is 1).  */
 801
 802 int
 803 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
 804      unsigned char *ptr;
 805      int nchars, nbytes, *charsets;
 806      Lisp_Object table;
 807 {
 808   if (nchars == nbytes)
 809     {
 810       if (charsets && nbytes > 0)
 811         {
 812           unsigned char *endp = ptr + nbytes;
 813           int maskbits = 0;
 814
 815           while (ptr < endp && maskbits != 7)
 816             {
 817               maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
 818               ptr++;
 819             }
 820
 821           if (maskbits & 1)
 822             charsets[CHARSET_ASCII] = 1;
 823           if (maskbits & 2)
 824             charsets[CHARSET_8_BIT_CONTROL] = 1;
 825           if (maskbits & 4)
 826             charsets[CHARSET_8_BIT_GRAPHIC] = 1;
 827         }
 828       return 0;
 829     }
 830   else
 831     {
 832       int return_val = 1;
 833       int bytes, charset, c1, c2;
 834
 835       if (! CHAR_TABLE_P (table))
 836         table = Qnil;
 837
 838       while (nchars-- > 0)
 839         {
 840           SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
 841           ptr += bytes;
 842
 843           if (!CHARSET_DEFINED_P (charset))
 844             charset = 1;
 845           else if (! NILP (table))
 846             {
 847               int c = translate_char (table, -1, charset, c1, c2);
 848               if (c >= 0)
 849                 charset = CHAR_CHARSET (c);
 850             }
 851
 852           if (return_val == 1
 853               && charset != CHARSET_ASCII
 854               && charset != CHARSET_8_BIT_CONTROL
 855               && charset != CHARSET_8_BIT_GRAPHIC
 856               && charset != charset_latin_iso8859_1)
 857             return_val = 2;
 858
 859           if (charsets)
 860             charsets[charset] = 1;
 861           else if (return_val == 2)
 862             break;
 863         }
 864       return return_val;
 865     }
 866 }
 867
 868 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 869        2, 3, 0,
 870   "Return a list of charsets in the region between BEG and END.\n\
 871 BEG and END are buffer positions.\n\
 872 Optional arg TABLE if non-nil is a translation table to look up.\n\
 873 \n\
 874 If the region contains invalid multibyte characters,\n\
 875 `unknown' is included in the returned list.\n\
 876 \n\
 877 If the current buffer is unibyte, the returned list may contain\n\
 878 only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
 879   (beg, end, table)
 880      Lisp_Object beg, end, table;
 881 {
 882   int charsets[MAX_CHARSET + 1];
 883   int from, from_byte, to, stop, stop_byte, i;
 884   Lisp_Object val;
 885
 886   validate_region (&beg, &end);
 887   from = XFASTINT (beg);
 888   stop = to = XFASTINT (end);
 889
 890   if (from < GPT && GPT < to)
 891     {
 892       stop = GPT;
 893       stop_byte = GPT_BYTE;
 894     }
 895   else
 896     stop_byte = CHAR_TO_BYTE (stop);
 897
 898   from_byte = CHAR_TO_BYTE (from);
 899
 900   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 901   while (1)
 902     {
 903       find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
 904                             stop_byte - from_byte, charsets, table);
 905       if (stop < to)
 906         {
 907           from = stop, from_byte = stop_byte;
 908           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 909         }
 910       else
 911         break;
 912     }
 913
 914   val = Qnil;
 915   if (charsets[1])
 916     val = Fcons (Qunknown, val);
 917   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 918     if (charsets[i])
 919       val = Fcons (CHARSET_SYMBOL (i), val);
 920   if (charsets[0])
 921     val = Fcons (Qascii, val);
 922   return val;
 923 }
 924
 925 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 926        1, 2, 0,
 927   "Return a list of charsets in STR.\n\
 928 Optional arg TABLE if non-nil is a translation table to look up.\n\
 929 \n\
 930 If the string contains invalid multibyte characters,\n\
 931 `unknown' is included in the returned list.\n\
 932 \n\
 933 If STR is unibyte, the returned list may contain\n\
 934 only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
 935   (str, table)
 936      Lisp_Object str, table;
 937 {
 938   int charsets[MAX_CHARSET + 1];
 939   int i;
 940   Lisp_Object val;
 941
 942   CHECK_STRING (str, 0);
 943
 944   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 945   find_charset_in_text (XSTRING (str)->data, XSTRING (str)->size,
 946                         STRING_BYTES (XSTRING (str)), charsets, table);
 947
 948   val = Qnil;
 949   if (charsets[1])
 950     val = Fcons (Qunknown, val);
 951   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 952     if (charsets[i])
 953       val = Fcons (CHARSET_SYMBOL (i), val);
 954   if (charsets[0])
 955     val = Fcons (Qascii, val);
 956   return val;
 957 }
 958
 959 \f
 960 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 961   "")
 962   (charset, code1, code2)
 963      Lisp_Object charset, code1, code2;
 964 {
 965   int charset_id, c1, c2;
 966
 967   CHECK_NUMBER (charset, 0);
 968   charset_id = XINT (charset);
 969   if (!CHARSET_DEFINED_P (charset_id))
 970     error ("Invalid charset ID: %d", XINT (charset));
 971
 972   if (NILP (code1))
 973     c1 = 0;
 974   else
 975     {
 976       CHECK_NUMBER (code1, 1);
 977       c1 = XINT (code1);
 978     }
 979   if (NILP (code2))
 980     c2 = 0;
 981   else
 982     {
 983       CHECK_NUMBER (code2, 2);
 984       c2 = XINT (code2);
 985     }
 986
 987   if (charset_id == CHARSET_ASCII)
 988     {
 989       if (c1 < 0 || c1 > 0x7F)
 990         goto invalid_code_posints;
 991       return make_number (c1);
 992     }
 993   else if (charset_id == CHARSET_8_BIT_CONTROL)
 994     {
 995       if (NILP (code1))
 996         c1 = 0x80;
 997       else if (c1 < 0x80 || c1 > 0x9F)
 998         goto invalid_code_posints;
 999       return make_number (c1);
1000     }
1001   else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1002     {
1003       if (NILP (code1))
1004         c1 = 0xA0;
1005       else if (c1 < 0xA0 || c1 > 0xFF)
1006         goto invalid_code_posints;
1007       return make_number (c1);
1008     }
1009   else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1010     goto invalid_code_posints;
1011   c1 &= 0x7F;
1012   c2 &= 0x7F;
1013   if (c1 == 0
1014       ? c2 != 0
1015       : (c2 == 0
1016          ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1017          : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1018     goto invalid_code_posints;
1019   return make_number (MAKE_CHAR (charset_id, c1, c2));
1020
1021  invalid_code_posints:
1022   error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1023 }
1024
1025 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1026   "Return list of charset and one or two position-codes of CHAR.\n\
1027 If CHAR is invalid as a character code,\n\
1028 return a list of symbol `unknown' and CHAR.")
1029   (ch)
1030      Lisp_Object ch;
1031 {
1032   int c, charset, c1, c2;
1033
1034   CHECK_NUMBER (ch, 0);
1035   c = XFASTINT (ch);
1036   if (!CHAR_VALID_P (c, 1))
1037     return Fcons (Qunknown, Fcons (ch, Qnil));
1038   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1039   return (c2 >= 0
1040           ? Fcons (CHARSET_SYMBOL (charset),
1041                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1042           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1043 }
1044
1045 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1046   "Return charset of CHAR.")
1047   (ch)
1048      Lisp_Object ch;
1049 {
1050   CHECK_NUMBER (ch, 0);
1051
1052   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1053 }
1054
1055 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1056   "Return charset of a character in the current buffer at position POS.\n\
1057 If POS is nil, it defauls to the current point.\n\
1058 If POS is out of range, the value is nil.")
1059   (pos)
1060      Lisp_Object pos;
1061 {
1062   Lisp_Object ch;
1063   int charset;
1064
1065   ch = Fchar_after (pos);
1066   if (! INTEGERP (ch))
1067     return ch;
1068   charset = CHAR_CHARSET (XINT (ch));
1069   return CHARSET_SYMBOL (charset);
1070 }
1071
1072 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1073   "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
1074 \n\
1075 ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
1076 by their DIMENSION, CHARS, and FINAL-CHAR,\n\
1077 where as Emacs distinguishes them by charset symbol.\n\
1078 See the documentation of the function `charset-info' for the meanings of\n\
1079 DIMENSION, CHARS, and FINAL-CHAR.")
1080   (dimension, chars, final_char)
1081      Lisp_Object dimension, chars, final_char;
1082 {
1083   int charset;
1084
1085   CHECK_NUMBER (dimension, 0);
1086   CHECK_NUMBER (chars, 1);
1087   CHECK_NUMBER (final_char, 2);
1088
1089   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1090     return Qnil;
1091   return CHARSET_SYMBOL (charset);
1092 }
1093
1094 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1095    generic character.  If GENERICP is zero, return nonzero iff C is a
1096    valid normal character.  Do not call this function directly,
1097    instead use macro CHAR_VALID_P.  */
1098 int
1099 char_valid_p (c, genericp)
1100      int c, genericp;
1101 {
1102   int charset, c1, c2;
1103
1104   if (c < 0 || c >= MAX_CHAR)
1105     return 0;
1106   if (SINGLE_BYTE_CHAR_P (c))
1107     return 1;
1108   SPLIT_CHAR (c, charset, c1, c2);
1109   if (genericp)
1110     {
1111       if (c1)
1112         {
1113           if (c2 <= 0) c2 = 0x20;
1114         }
1115       else
1116         {
1117           if (c2 <= 0) c1 = c2 = 0x20;
1118         }
1119     }
1120   return (CHARSET_DEFINED_P (charset)
1121           && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1122 }
1123
1124 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1125   "Return t if OBJECT is a valid normal character.\n\
1126 If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
1127 a valid generic character.")
1128   (object, genericp)
1129      Lisp_Object object, genericp;
1130 {
1131   if (! NATNUMP (object))
1132     return Qnil;
1133   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1134 }
1135
1136 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1137        Sunibyte_char_to_multibyte, 1, 1, 0,
1138   "Convert the unibyte character CH to multibyte character.\n\
1139 The conversion is done based on `nonascii-translation-table' (which see)\n\
1140  or `nonascii-insert-offset' (which see).")
1141   (ch)
1142      Lisp_Object ch;
1143 {
1144   int c;
1145
1146   CHECK_NUMBER (ch, 0);
1147   c = XINT (ch);
1148   if (c < 0 || c >= 0400)
1149     error ("Invalid unibyte character: %d", c);
1150   c = unibyte_char_to_multibyte (c);
1151   if (c < 0)
1152     error ("Can't convert to multibyte character: %d", XINT (ch));
1153   return make_number (c);
1154 }
1155
1156 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1157        Smultibyte_char_to_unibyte, 1, 1, 0,
1158   "Convert the multibyte character CH to unibyte character.\n\
1159 The conversion is done based on `nonascii-translation-table' (which see)\n\
1160  or `nonascii-insert-offset' (which see).")
1161   (ch)
1162      Lisp_Object ch;
1163 {
1164   int c;
1165
1166   CHECK_NUMBER (ch, 0);
1167   c = XINT (ch);
1168   if (! CHAR_VALID_P (c, 0))
1169     error ("Invalid multibyte character: %d", c);
1170   c = multibyte_char_to_unibyte (c, Qnil);
1171   if (c < 0)
1172     error ("Can't convert to unibyte character: %d", XINT (ch));
1173   return make_number (c);
1174 }
1175
1176 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1177   "Return 1 regardless of the argument CHAR.\n\
1178 This is now an obsolete function.  We keep it just for backward compatibility.")
1179   (ch)
1180      Lisp_Object ch;
1181 {
1182   CHECK_NUMBER (ch, 0);
1183   return make_number (1);
1184 }
1185
1186 /* Return how many bytes C will occupy in a multibyte buffer.
1187    Don't call this function directly, instead use macro CHAR_BYTES.  */
1188 int
1189 char_bytes (c)
1190      int c;
1191 {
1192   int charset;
1193
1194   if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1195     return 1;
1196   if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1197     return 1;
1198
1199   charset = CHAR_CHARSET (c);
1200   return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1201 }
1202
1203 /* Return the width of character of which multi-byte form starts with
1204    C.  The width is measured by how many columns occupied on the
1205    screen when displayed in the current buffer.  */
1206
1207 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
1208   (c < 0x20                                                             \
1209    ? (c == '\t'                                                         \
1210       ? XFASTINT (current_buffer->tab_width)                            \
1211       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
1212    : (c < 0x7f                                                          \
1213       ? 1                                                               \
1214       : (c == 0x7F                                                      \
1215          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
1216          : ((! NILP (current_buffer->enable_multibyte_characters)       \
1217              && BASE_LEADING_CODE_P (c))                                \
1218             ? WIDTH_BY_CHAR_HEAD (c)                                    \
1219             : 4))))
1220
1221 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1222   "Return width of CHAR when displayed in the current buffer.\n\
1223 The width is measured by how many columns it occupies on the screen.\n\
1224 Tab is taken to occupy `tab-width' columns.")
1225   (ch)
1226        Lisp_Object ch;
1227 {
1228   Lisp_Object val, disp;
1229   int c;
1230   struct Lisp_Char_Table *dp = buffer_display_table ();
1231
1232   CHECK_NUMBER (ch, 0);
1233
1234   c = XINT (ch);
1235
1236   /* Get the way the display table would display it.  */
1237   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1238
1239   if (VECTORP (disp))
1240     XSETINT (val, XVECTOR (disp)->size);
1241   else if (SINGLE_BYTE_CHAR_P (c))
1242     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1243   else
1244     {
1245       int charset = CHAR_CHARSET (c);
1246
1247       XSETFASTINT (val, CHARSET_WIDTH (charset));
1248     }
1249   return val;
1250 }
1251
1252 /* Return width of string STR of length LEN when displayed in the
1253    current buffer.  The width is measured by how many columns it
1254    occupies on the screen.  */
1255
1256 int
1257 strwidth (str, len)
1258      unsigned char *str;
1259      int len;
1260 {
1261   return c_string_width (str, len, -1, NULL, NULL);
1262 }
1263
1264 /* Return width of string STR of length LEN when displayed in the
1265    current buffer.  The width is measured by how many columns it
1266    occupies on the screen.  If PRECISION > 0, return the width of
1267    longest substring that doesn't exceed PRECISION, and set number of
1268    characters and bytes of the substring in *NCHARS and *NBYTES
1269    respectively.  */
1270
1271 int
1272 c_string_width (str, len, precision, nchars, nbytes)
1273      unsigned char *str;
1274      int precision, *nchars, *nbytes;
1275 {
1276   int i = 0, i_byte = 0;
1277   int width = 0;
1278   int chars;
1279   struct Lisp_Char_Table *dp = buffer_display_table ();
1280
1281   while (i_byte < len)
1282     {
1283       int bytes, thiswidth;
1284       Lisp_Object val;
1285
1286       if (dp)
1287         {
1288           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1289
1290           chars = 1;
1291           val = DISP_CHAR_VECTOR (dp, c);
1292           if (VECTORP (val))
1293             thiswidth = XVECTOR (val)->size;
1294           else
1295             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1296         }
1297       else
1298         {
1299           chars = 1;
1300           PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1301           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1302         }
1303
1304       if (precision > 0
1305           && (width + thiswidth > precision))
1306         {
1307           *nchars = i;
1308           *nbytes = i_byte;
1309           return width;
1310         }
1311       i++;
1312       i_byte += bytes;
1313       width += thiswidth;
1314   }
1315
1316   if (precision > 0)
1317     {
1318       *nchars = i;
1319       *nbytes = i_byte;
1320     }
1321
1322   return width;
1323 }
1324
1325 /* Return width of Lisp string STRING when displayed in the current
1326    buffer.  The width is measured by how many columns it occupies on
1327    the screen while paying attention to compositions.  If PRECISION >
1328    0, return the width of longest substring that doesn't exceed
1329    PRECISION, and set number of characters and bytes of the substring
1330    in *NCHARS and *NBYTES respectively.  */
1331
1332 int
1333 lisp_string_width (string, precision, nchars, nbytes)
1334      Lisp_Object string;
1335      int precision, *nchars, *nbytes;
1336 {
1337   int len = XSTRING (string)->size;
1338   int len_byte = STRING_BYTES (XSTRING (string));
1339   unsigned char *str = XSTRING (string)->data;
1340   int i = 0, i_byte = 0;
1341   int width = 0;
1342   struct Lisp_Char_Table *dp = buffer_display_table ();
1343
1344   while (i < len)
1345     {
1346       int chars, bytes, thiswidth;
1347       Lisp_Object val;
1348       int cmp_id;
1349       int ignore, end;
1350
1351       if (find_composition (i, -1, &ignore, &end, &val, string)
1352           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1353               >= 0))
1354         {
1355           thiswidth = composition_table[cmp_id]->width;
1356           chars = end - i;
1357           bytes = string_char_to_byte (string, end) - i_byte;
1358         }
1359       else if (dp)
1360         {
1361           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1362
1363           chars = 1;
1364           val = DISP_CHAR_VECTOR (dp, c);
1365           if (VECTORP (val))
1366             thiswidth = XVECTOR (val)->size;
1367           else
1368             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1369         }
1370       else
1371         {
1372           chars = 1;
1373           PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1374           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1375         }
1376
1377       if (precision > 0
1378           && (width + thiswidth > precision))
1379         {
1380           *nchars = i;
1381           *nbytes = i_byte;
1382           return width;
1383         }
1384       i += chars;
1385       i_byte += bytes;
1386       width += thiswidth;
1387   }
1388
1389   if (precision > 0)
1390     {
1391       *nchars = i;
1392       *nbytes = i_byte;
1393     }
1394
1395   return width;
1396 }
1397
1398 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1399   "Return width of STRING when displayed in the current buffer.\n\
1400 Width is measured by how many columns it occupies on the screen.\n\
1401 When calculating width of a multibyte character in STRING,\n\
1402 only the base leading-code is considered; the validity of\n\
1403 the following bytes is not checked.  Tabs in STRING are always\n\
1404 taken to occupy `tab-width' columns.")
1405   (str)
1406      Lisp_Object str;
1407 {
1408   Lisp_Object val;
1409
1410   CHECK_STRING (str, 0);
1411   XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
1412   return val;
1413 }
1414
1415 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1416   "Return the direction of CHAR.\n\
1417 The returned value is 0 for left-to-right and 1 for right-to-left.")
1418   (ch)
1419      Lisp_Object ch;
1420 {
1421   int charset;
1422
1423   CHECK_NUMBER (ch, 0);
1424   charset = CHAR_CHARSET (XFASTINT (ch));
1425   if (!CHARSET_DEFINED_P (charset))
1426     invalid_character (XINT (ch));
1427   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1428 }
1429
1430 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
1431   "Return number of characters between BEG and END.")
1432   (beg, end)
1433      Lisp_Object beg, end;
1434 {
1435   int from, to;
1436
1437   CHECK_NUMBER_COERCE_MARKER (beg, 0);
1438   CHECK_NUMBER_COERCE_MARKER (end, 1);
1439
1440   from = min (XFASTINT (beg), XFASTINT (end));
1441   to = max (XFASTINT (beg), XFASTINT (end));
1442
1443   return make_number (to - from);
1444 }
1445
1446 /* Return the number of characters in the NBYTES bytes at PTR.
1447    This works by looking at the contents and checking for multibyte sequences.
1448    However, if the current buffer has enable-multibyte-characters = nil,
1449    we treat each byte as a character.  */
1450
1451 int
1452 chars_in_text (ptr, nbytes)
1453      unsigned char *ptr;
1454      int nbytes;
1455 {
1456   /* current_buffer is null at early stages of Emacs initialization.  */
1457   if (current_buffer == 0
1458       || NILP (current_buffer->enable_multibyte_characters))
1459     return nbytes;
1460
1461   return multibyte_chars_in_text (ptr, nbytes);
1462 }
1463
1464 /* Return the number of characters in the NBYTES bytes at PTR.
1465    This works by looking at the contents and checking for multibyte sequences.
1466    It ignores enable-multibyte-characters.  */
1467
1468 int
1469 multibyte_chars_in_text (ptr, nbytes)
1470      unsigned char *ptr;
1471      int nbytes;
1472 {
1473   unsigned char *endp;
1474   int chars, bytes;
1475
1476   endp = ptr + nbytes;
1477   chars = 0;
1478
1479   while (ptr < endp)
1480     {
1481       PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1482       ptr += bytes;
1483       chars++;
1484     }
1485
1486   return chars;
1487 }
1488
1489 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1490    count the numbers of characters and bytes in it.  On counting
1491    bytes, pay attention to the fact that 8-bit characters in the range
1492    0x80..0x9F are represented by 2 bytes in multibyte text.  */
1493 void
1494 parse_str_as_multibyte (str, len, nchars, nbytes)
1495      unsigned char *str;
1496      int len, *nchars, *nbytes;
1497 {
1498   unsigned char *endp = str + len;
1499   int n, chars = 0, bytes = 0;
1500
1501   while (str < endp)
1502     {
1503       if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1504         str += n, bytes += n;
1505       else
1506         str++, bytes += 2;
1507       chars++;
1508     }
1509   *nchars = chars;
1510   *nbytes = bytes;
1511   return;
1512 }
1513
1514 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1515    It actually converts only 8-bit characters in the range 0x80..0x9F
1516    that don't contruct multibyte characters to multibyte forms.  If
1517    NCHARS is nonzero, set *NCHARS to the number of characters in the
1518    text.  It is assured that we can use LEN bytes at STR as a work
1519    area and that is enough.  Return the number of bytes of the
1520    resulting text.  */
1521
1522 int
1523 str_as_multibyte (str, len, nbytes, nchars)
1524      unsigned char *str;
1525      int len, nbytes, *nchars;
1526 {
1527   unsigned char *p = str, *endp = str + nbytes;
1528   unsigned char *to;
1529   int chars = 0;
1530   int n;
1531
1532   while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1533     p += n, chars++;
1534   if (nchars)
1535     *nchars = chars;
1536   if (p == endp)
1537     return nbytes;
1538
1539   to = p;
1540   nbytes = endp - p;
1541   endp = str + len;
1542   safe_bcopy (p, endp - nbytes, nbytes);
1543   p = endp - nbytes;
1544   while (p < endp)
1545     {
1546       if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1547         {
1548           while (n--)
1549             *to++ = *p++;
1550         }
1551       else
1552         {
1553           *to++ = LEADING_CODE_8_BIT_CONTROL;
1554           *to++ = *p++ + 0x20;
1555         }
1556       chars++;
1557     }
1558   if (nchars)
1559     *nchars = chars;
1560   return (to - str);
1561 }
1562
1563 /* Parse unibyte string at STR of LEN bytes, and return the number of
1564    bytes it may ocupy when converted to multibyte string by
1565    `str_to_multibyte'.  */
1566
1567 int
1568 parse_str_to_multibyte (str, len)
1569      unsigned char *str;
1570      int len;
1571 {
1572   unsigned char *endp = str + len;
1573   int bytes;
1574
1575   for (bytes = 0; str < endp; str++)
1576     bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1577   return bytes;
1578 }
1579
1580 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1581    that contains the same single-byte characters.  It actually
1582    converts all 8-bit characters to multibyte forms.  It is assured
1583    that we can use LEN bytes at STR as a work area and that is
1584    enough.  */
1585
1586 int
1587 str_to_multibyte (str, len, bytes)
1588      unsigned char *str;
1589      int len, bytes;
1590 {
1591   unsigned char *p = str, *endp = str + bytes;
1592   unsigned char *to;
1593
1594   while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1595   if (p == endp)
1596     return bytes;
1597   to = p;
1598   bytes = endp - p;
1599   endp = str + len;
1600   safe_bcopy (p, endp - bytes, bytes);
1601   p = endp - bytes;
1602   while (p < endp)
1603     {
1604       if (*p < 0x80 || *p >= 0xA0)
1605         *to++ = *p++;
1606       else
1607         *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1608     }
1609   return (to - str);
1610 }
1611
1612 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1613    actually converts only 8-bit characters in the range 0x80..0x9F to
1614    unibyte forms.  */
1615
1616 int
1617 str_as_unibyte (str, bytes)
1618      unsigned char *str;
1619      int bytes;
1620 {
1621   unsigned char *p = str, *endp = str + bytes;
1622   unsigned char *to = str;
1623
1624   while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1625   to = p;
1626   while (p < endp)
1627     {
1628       if (*p == LEADING_CODE_8_BIT_CONTROL)
1629         *to++ = *(p + 1) - 0x20, p += 2;
1630       else
1631         *to++ = *p++;
1632     }
1633   return (to - str);
1634 }
1635
1636 \f
1637 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
1638   "Concatenate all the argument characters and make the result a string.")
1639   (n, args)
1640      int n;
1641      Lisp_Object *args;
1642 {
1643   int i;
1644   unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
1645   unsigned char *p = buf;
1646   int c;
1647   int multibyte = 0;
1648
1649   for (i = 0; i < n; i++)
1650     {
1651       CHECK_NUMBER (args[i], 0);
1652       if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1653         multibyte = 1;
1654     }
1655
1656   for (i = 0; i < n; i++)
1657     {
1658       c = XINT (args[i]);
1659       if (multibyte)
1660         p += CHAR_STRING (c, p);
1661       else
1662         *p++ = c;
1663     }
1664
1665   return make_string_from_bytes (buf, n, p - buf);
1666 }
1667
1668 #endif /* emacs */
1669 \f
1670 int
1671 charset_id_internal (charset_name)
1672      char *charset_name;
1673 {
1674   Lisp_Object val;
1675
1676   val= Fget (intern (charset_name), Qcharset);
1677   if (!VECTORP (val))
1678     error ("Charset %s is not defined", charset_name);
1679
1680   return (XINT (XVECTOR (val)->contents[0]));
1681 }
1682
1683 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1684        Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1685    ()
1686 {
1687   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1688   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1689   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1690   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1691   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1692   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1693   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1694   return Qnil;
1695 }
1696
1697 void
1698 init_charset_once ()
1699 {
1700   int i, j, k;
1701
1702   staticpro (&Vcharset_table);
1703   staticpro (&Vcharset_symbol_table);
1704   staticpro (&Vgeneric_character_list);
1705
1706   /* This has to be done here, before we call Fmake_char_table.  */
1707   Qcharset_table = intern ("charset-table");
1708   staticpro (&Qcharset_table);
1709
1710   /* Intern this now in case it isn't already done.
1711      Setting this variable twice is harmless.
1712      But don't staticpro it here--that is done in alloc.c.  */
1713   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1714
1715   /* Now we are ready to set up this property, so we can
1716      create the charset table.  */
1717   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1718   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1719
1720   Qunknown = intern ("unknown");
1721   staticpro (&Qunknown);
1722   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1723                                         Qunknown);
1724
1725   /* Setup tables.  */
1726   for (i = 0; i < 2; i++)
1727     for (j = 0; j < 2; j++)
1728       for (k = 0; k < 128; k++)
1729         iso_charset_table [i][j][k] = -1;
1730
1731   for (i = 0; i < 256; i++)
1732     bytes_by_char_head[i] = 1;
1733   bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1734   bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1735   bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1736   bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1737
1738   for (i = 0; i < 128; i++)
1739     width_by_char_head[i] = 1;
1740   for (; i < 256; i++)
1741     width_by_char_head[i] = 4;
1742   width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1743   width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1744   width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1745   width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1746
1747   {
1748     Lisp_Object val;
1749
1750     val = Qnil;
1751     for (i = 0x81; i < 0x90; i++)
1752       val = Fcons (make_number ((i - 0x70) << 7), val);
1753     for (; i < 0x9A; i++)
1754       val = Fcons (make_number ((i - 0x8F) << 14), val);
1755     for (i = 0xA0; i < 0xF0; i++)
1756       val = Fcons (make_number ((i - 0x70) << 7), val);
1757     for (; i < 0xFF; i++)
1758       val = Fcons (make_number ((i - 0xE0) << 14), val);
1759     Vgeneric_character_list = Fnreverse (val);
1760   }
1761
1762   nonascii_insert_offset = 0;
1763   Vnonascii_translation_table = Qnil;
1764 }
1765
1766 #ifdef emacs
1767
1768 void
1769 syms_of_charset ()
1770 {
1771   Qcharset = intern ("charset");
1772   staticpro (&Qcharset);
1773
1774   Qascii = intern ("ascii");
1775   staticpro (&Qascii);
1776
1777   Qeight_bit_control = intern ("eight-bit-control");
1778   staticpro (&Qeight_bit_control);
1779
1780   Qeight_bit_graphic = intern ("eight-bit-graphic");
1781   staticpro (&Qeight_bit_graphic);
1782
1783   /* Define special charsets ascii, eight-bit-control, and
1784      eight-bit-graphic.  */
1785   update_charset_table (make_number (CHARSET_ASCII),
1786                         make_number (1), make_number (94),
1787                         make_number (1),
1788                         make_number (0),
1789                         make_number ('B'),
1790                         make_number (0),
1791                         build_string ("ASCII"),
1792                         Qnil,   /* same as above */
1793                         build_string ("ASCII (ISO646 IRV)"));
1794   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1795   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1796
1797   update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1798                         make_number (1), make_number (96),
1799                         make_number (4),
1800                         make_number (0),
1801                         make_number (-1),
1802                         make_number (-1),
1803                         build_string ("8-bit control code (0x80..0x9F)"),
1804                         Qnil,   /* same as above */
1805                         Qnil);  /* same as above */
1806   CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1807   Fput (Qeight_bit_control, Qcharset,
1808         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1809
1810   update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1811                         make_number (1), make_number (96),
1812                         make_number (4),
1813                         make_number (0),
1814                         make_number (-1),
1815                         make_number (-1),
1816                         build_string ("8-bit graphic char (0xA0..0xFF)"),
1817                         Qnil,   /* same as above */
1818                         Qnil);  /* same as above */
1819   CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1820   Fput (Qeight_bit_graphic, Qcharset,
1821         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1822
1823   Qauto_fill_chars = intern ("auto-fill-chars");
1824   staticpro (&Qauto_fill_chars);
1825   Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1826
1827   defsubr (&Sdefine_charset);
1828   defsubr (&Sgeneric_character_list);
1829   defsubr (&Sget_unused_iso_final_char);
1830   defsubr (&Sdeclare_equiv_charset);
1831   defsubr (&Sfind_charset_region);
1832   defsubr (&Sfind_charset_string);
1833   defsubr (&Smake_char_internal);
1834   defsubr (&Ssplit_char);
1835   defsubr (&Schar_charset);
1836   defsubr (&Scharset_after);
1837   defsubr (&Siso_charset);
1838   defsubr (&Schar_valid_p);
1839   defsubr (&Sunibyte_char_to_multibyte);
1840   defsubr (&Smultibyte_char_to_unibyte);
1841   defsubr (&Schar_bytes);
1842   defsubr (&Schar_width);
1843   defsubr (&Sstring_width);
1844   defsubr (&Schar_direction);
1845   defsubr (&Schars_in_region);
1846   defsubr (&Sstring);
1847   defsubr (&Ssetup_special_charsets);
1848
1849   DEFVAR_LISP ("charset-list", &Vcharset_list,
1850     "List of charsets ever defined.");
1851   Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1852                                         Fcons (Qeight_bit_graphic, Qnil)));
1853
1854   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1855     "Vector of cons cell of a symbol and translation table ever defined.\n\
1856 An ID of a translation table is an index of this vector.");
1857   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1858
1859   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1860     "Leading-code of private TYPE9N charset of column-width 1.");
1861   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1862
1863   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1864     "Leading-code of private TYPE9N charset of column-width 2.");
1865   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1866
1867   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1868     "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1869   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1870
1871   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1872     "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1873   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1874
1875   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1876     "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
1877 This is used for converting unibyte text to multibyte,\n\
1878 and for inserting character codes specified by number.\n\n\
1879 This serves to convert a Latin-1 or similar 8-bit character code\n\
1880 to the corresponding Emacs multibyte character code.\n\
1881 Typically the value should be (- (make-char CHARSET 0) 128),\n\
1882 for your choice of character set.\n\
1883 If `nonascii-translation-table' is non-nil, it overrides this variable.");
1884   nonascii_insert_offset = 0;
1885
1886   DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1887     "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
1888 This is used for converting unibyte text to multibyte,\n\
1889 and for inserting character codes specified by number.\n\n\
1890 Conversion is performed only when multibyte characters are enabled,\n\
1891 and it serves to convert a Latin-1 or similar 8-bit character code\n\
1892 to the corresponding Emacs character code.\n\n\
1893 If this is nil, `nonascii-insert-offset' is used instead.\n\
1894 See also the docstring of `make-translation-table'.");
1895   Vnonascii_translation_table = Qnil;
1896
1897   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1898     "A char-table for characters which invoke auto-filling.\n\
1899 Such characters have value t in this table.");
1900   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1901   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1902   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1903 }
1904
1905 #endif /* emacs */