src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
   5      National Institute of Advanced Industrial Science and Technology (AIST)
   6      Registration Number H14PRO021
   7
   8 This file is part of GNU Emacs.
   9
  10 GNU Emacs is free software; you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation; either version 2, or (at your option)
  13 any later version.
  14
  15 GNU Emacs is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GNU Emacs; see the file COPYING.  If not, write to
  22 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  23 Boston, MA 02110-1301, USA.  */
  24
  25 /* At first, see the document in `charset.h' to understand the code in
  26    this file.  */
  27
  28 #ifdef emacs
  29 #include <config.h>
  30 #endif
  31
  32 #include <stdio.h>
  33
  34 #ifdef emacs
  35
  36 #include <sys/types.h>
  37 #include "lisp.h"
  38 #include "buffer.h"
  39 #include "charset.h"
  40 #include "composite.h"
  41 #include "coding.h"
  42 #include "disptab.h"
  43
  44 #else  /* not emacs */
  45
  46 #include "mulelib.h"
  47
  48 #endif /* emacs */
  49
  50 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
  51 Lisp_Object Qunknown;
  52
  53 /* Declaration of special leading-codes.  */
  54 EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
  55 EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
  56 EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
  57 EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
  58
  59 /* Declaration of special charsets.  The values are set by
  60    Fsetup_special_charsets.  */
  61 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  62 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  63 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  64 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  65 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  66 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  67 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  68 int charset_mule_unicode_0100_24ff;
  69 int charset_mule_unicode_2500_33ff;
  70 int charset_mule_unicode_e000_ffff;
  71
  72 Lisp_Object Qcharset_table;
  73
  74 /* A char-table containing information of each character set.  */
  75 Lisp_Object Vcharset_table;
  76
  77 /* A vector of charset symbol indexed by charset-id.  This is used
  78    only for returning charset symbol from C functions.  */
  79 Lisp_Object Vcharset_symbol_table;
  80
  81 /* A list of charset symbols ever defined.  */
  82 Lisp_Object Vcharset_list;
  83
  84 /* Vector of translation table ever defined.
  85    ID of a translation table is used to index this vector.  */
  86 Lisp_Object Vtranslation_table_vector;
  87
  88 /* A char-table for characters which may invoke auto-filling.  */
  89 Lisp_Object Vauto_fill_chars;
  90
  91 Lisp_Object Qauto_fill_chars;
  92
  93 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  94 int bytes_by_char_head[256];
  95 int width_by_char_head[256];
  96
  97 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  98    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  99 int iso_charset_table[2][2][128];
 100
 101 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
 102 unsigned char *_fetch_multibyte_char_p;
 103 int _fetch_multibyte_char_len;
 104
 105 /* Offset to add to a non-ASCII value when inserting it.  */
 106 EMACS_INT nonascii_insert_offset;
 107
 108 /* Translation table for converting non-ASCII unibyte characters
 109    to multibyte codes, or nil.  */
 110 Lisp_Object Vnonascii_translation_table;
 111
 112 /* List of all possible generic characters.  */
 113 Lisp_Object Vgeneric_character_list;
 114
 115 \f
 116 void
 117 invalid_character (c)
 118      int c;
 119 {
 120   error ("Invalid character: %d, #o%o, #x%x", c, c, c);
 121 }
 122
 123 /* Parse string STR of length LENGTH and fetch information of a
 124    character at STR.  Set BYTES to the byte length the character
 125    occupies, CHARSET, C1, C2 to proper values of the character. */
 126
 127 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
 128   do {                                                                       \
 129     (c1) = *(str);                                                           \
 130     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
 131     if ((bytes) == 1)                                                        \
 132       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
 133     else if ((bytes) == 2)                                                   \
 134       {                                                                      \
 135         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                              \
 136           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;         \
 137         else                                                                 \
 138           (charset) = (c1), (c1) = (str)[1] & 0x7F;                          \
 139       }                                                                      \
 140     else if ((bytes) == 3)                                                   \
 141       {                                                                      \
 142         if ((c1) < LEADING_CODE_PRIVATE_11)                                  \
 143           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
 144         else                                                                 \
 145           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                      \
 146       }                                                                      \
 147     else                                                                     \
 148       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
 149   } while (0)
 150
 151 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
 152    Note that this intentionally allows invalid components, such
 153    as 0xA0 0xA0, because there exist many files that contain
 154    such invalid byte sequences, especially in EUC-GB. */
 155 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
 156   ((charset) == CHARSET_ASCII                           \
 157    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
 158    : ((charset) == CHARSET_8_BIT_CONTROL                \
 159       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
 160       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
 161          ? ((c1) >= 0x80 && (c1) <= 0xFF)               \
 162          : (CHARSET_DIMENSION (charset) == 1            \
 163             ? ((c1) >= 0x20 && (c1) <= 0x7F)            \
 164             : ((c1) >= 0x20 && (c1) <= 0x7F             \
 165                && (c2) >= 0x20 && (c2) <= 0x7F)))))
 166
 167 /* Store multi-byte form of the character C in STR.  The caller should
 168    allocate at least 4-byte area at STR in advance.  Returns the
 169    length of the multi-byte form.  If C is an invalid character code,
 170    return -1.  */
 171
 172 int
 173 char_to_string_1 (c, str)
 174      int c;
 175      unsigned char *str;
 176 {
 177   unsigned char *p = str;
 178
 179   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
 180     {
 181       /* Multibyte character can't have a modifier bit.  */
 182       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 183         return -1;
 184
 185       /* For Meta, Shift, and Control modifiers, we need special care.  */
 186       if (c & CHAR_META)
 187         {
 188           /* Move the meta bit to the right place for a string.  */
 189           c = (c & ~CHAR_META) | 0x80;
 190         }
 191       if (c & CHAR_SHIFT)
 192         {
 193           /* Shift modifier is valid only with [A-Za-z].  */
 194           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
 195             c &= ~CHAR_SHIFT;
 196           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
 197             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 198         }
 199       if (c & CHAR_CTL)
 200         {
 201           /* Simulate the code in lread.c.  */
 202           /* Allow `\C- ' and `\C-?'.  */
 203           if (c == (CHAR_CTL | ' '))
 204             c = 0;
 205           else if (c == (CHAR_CTL | '?'))
 206             c = 127;
 207           /* ASCII control chars are made from letters (both cases),
 208              as well as the non-letters within 0100...0137.  */
 209           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
 210             c &= (037 | (~0177 & ~CHAR_CTL));
 211           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 212             c &= (037 | (~0177 & ~CHAR_CTL));
 213         }
 214
 215       /* If C still has any modifier bits, just ignore it.  */
 216       c &= ~CHAR_MODIFIER_MASK;
 217     }
 218
 219   if (SINGLE_BYTE_CHAR_P (c))
 220     {
 221       if (ASCII_BYTE_P (c) || c >= 0xA0)
 222         *p++ = c;
 223       else
 224         {
 225           *p++ = LEADING_CODE_8_BIT_CONTROL;
 226           *p++ = c + 0x20;
 227         }
 228     }
 229   else if (CHAR_VALID_P (c, 0))
 230     {
 231       int charset, c1, c2;
 232
 233       SPLIT_CHAR (c, charset, c1, c2);
 234
 235       if (charset >= LEADING_CODE_EXT_11)
 236         *p++ = (charset < LEADING_CODE_EXT_12
 237                 ? LEADING_CODE_PRIVATE_11
 238                 : (charset < LEADING_CODE_EXT_21
 239                    ? LEADING_CODE_PRIVATE_12
 240                    : (charset < LEADING_CODE_EXT_22
 241                       ? LEADING_CODE_PRIVATE_21
 242                       : LEADING_CODE_PRIVATE_22)));
 243       *p++ = charset;
 244       if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
 245         return -1;
 246       if (c1)
 247         {
 248           *p++ = c1 | 0x80;
 249           if (c2 > 0)
 250             *p++ = c2 | 0x80;
 251         }
 252     }
 253   else
 254     return -1;
 255
 256   return (p - str);
 257 }
 258
 259
 260 /* Store multi-byte form of the character C in STR.  The caller should
 261    allocate at least 4-byte area at STR in advance.  Returns the
 262    length of the multi-byte form.  If C is an invalid character code,
 263    signal an error.
 264
 265    Use macro `CHAR_STRING (C, STR)' instead of calling this function
 266    directly if C can be an ASCII character.  */
 267
 268 int
 269 char_to_string (c, str)
 270      int c;
 271      unsigned char *str;
 272 {
 273   int len;
 274   len = char_to_string_1 (c, str);
 275   if (len == -1)
 276     invalid_character (c);
 277   return len;
 278 }
 279
 280
 281 /* Return the non-ASCII character corresponding to multi-byte form at
 282    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
 283    length of the multibyte form in *ACTUAL_LEN.
 284
 285    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
 286    this function directly if you want ot handle ASCII characters as
 287    well.  */
 288
 289 int
 290 string_to_char (str, len, actual_len)
 291      const unsigned char *str;
 292      int len, *actual_len;
 293 {
 294   int c, bytes, charset, c1, c2;
 295
 296   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
 297   c = MAKE_CHAR (charset, c1, c2);
 298   if (actual_len)
 299     *actual_len = bytes;
 300   return c;
 301 }
 302
 303 /* Return the length of the multi-byte form at string STR of length LEN.
 304    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
 305 int
 306 multibyte_form_length (str, len)
 307      const unsigned char *str;
 308      int len;
 309 {
 310   int bytes;
 311
 312   PARSE_MULTIBYTE_SEQ (str, len, bytes);
 313   return bytes;
 314 }
 315
 316 /* Check multibyte form at string STR of length LEN and set variables
 317    pointed by CHARSET, C1, and C2 to charset and position codes of the
 318    character at STR, and return 0.  If there's no multibyte character,
 319    return -1.  This should be used only in the macro SPLIT_STRING
 320    which checks range of STR in advance.  */
 321
 322 int
 323 split_string (str, len, charset, c1, c2)
 324      const unsigned char *str;
 325      unsigned char *c1, *c2;
 326      int len, *charset;
 327 {
 328   register int bytes, cs, code1, code2 = -1;
 329
 330   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
 331   if (cs == CHARSET_ASCII)
 332     return -1;
 333   *charset = cs;
 334   *c1 = code1;
 335   *c2 = code2;
 336   return 0;
 337 }
 338
 339 /* Return 1 iff character C has valid printable glyph.
 340    Use the macro CHAR_PRINTABLE_P instead.  */
 341 int
 342 char_printable_p (c)
 343      int c;
 344 {
 345   int charset, c1, c2;
 346
 347   if (ASCII_BYTE_P (c))
 348     return 1;
 349   else if (SINGLE_BYTE_CHAR_P (c))
 350     return 0;
 351   else if (c >= MAX_CHAR)
 352     return 0;
 353
 354   SPLIT_CHAR (c, charset, c1, c2);
 355   if (! CHARSET_DEFINED_P (charset))
 356     return 0;
 357   if (CHARSET_CHARS (charset) == 94
 358       ? c1 <= 32 || c1 >= 127
 359       : c1 < 32)
 360     return 0;
 361   if (CHARSET_DIMENSION (charset) == 2
 362       && (CHARSET_CHARS (charset) == 94
 363           ? c2 <= 32 || c2 >= 127
 364           : c2 < 32))
 365     return 0;
 366   return 1;
 367 }
 368
 369 /* Translate character C by translation table TABLE.  If C
 370    is negative, translate a character specified by CHARSET, C1, and C2
 371    (C1 and C2 are code points of the character).  If no translation is
 372    found in TABLE, return C.  */
 373 int
 374 translate_char (table, c, charset, c1, c2)
 375      Lisp_Object table;
 376      int c, charset, c1, c2;
 377 {
 378   Lisp_Object ch;
 379   int alt_charset, alt_c1, alt_c2, dimension;
 380
 381   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
 382   if (!CHAR_TABLE_P (table)
 383       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
 384     return c;
 385
 386   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 387   dimension = CHARSET_DIMENSION (alt_charset);
 388   if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
 389     /* CH is not a generic character, just return it.  */
 390     return XFASTINT (ch);
 391
 392   /* Since CH is a generic character, we must return a specific
 393      charater which has the same position codes as C from CH.  */
 394   if (charset < 0)
 395     SPLIT_CHAR (c, charset, c1, c2);
 396   if (dimension != CHARSET_DIMENSION (charset))
 397     /* We can't make such a character because of dimension mismatch.  */
 398     return c;
 399   return MAKE_CHAR (alt_charset, c1, c2);
 400 }
 401
 402 /* Convert the unibyte character C to multibyte based on
 403    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
 404    convert C to a valid multibyte character, convert it based on
 405    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 406
 407 int
 408 unibyte_char_to_multibyte (c)
 409      int c;
 410 {
 411   if (c < 0400 && c >= 0200)
 412     {
 413       int c_save = c;
 414
 415       if (! NILP (Vnonascii_translation_table))
 416         {
 417           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
 418           if (c >= 0400 && ! char_valid_p (c, 0))
 419             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 420         }
 421       else if (c >= 0240 && nonascii_insert_offset > 0)
 422         {
 423           c += nonascii_insert_offset;
 424           if (c < 0400 || ! char_valid_p (c, 0))
 425             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 426         }
 427       else if (c >= 0240)
 428         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 429     }
 430   return c;
 431 }
 432
 433
 434 /* Convert the multibyte character C to unibyte 8-bit character based
 435    on Vnonascii_translation_table or nonascii_insert_offset.  If
 436    REV_TBL is non-nil, it should be a reverse table of
 437    Vnonascii_translation_table, i.e. what given by:
 438      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
 439
 440 int
 441 multibyte_char_to_unibyte (c, rev_tbl)
 442      int c;
 443      Lisp_Object rev_tbl;
 444 {
 445   if (!SINGLE_BYTE_CHAR_P (c))
 446     {
 447       int c_save = c;
 448
 449       if (! CHAR_TABLE_P (rev_tbl)
 450           && CHAR_TABLE_P (Vnonascii_translation_table))
 451         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
 452                                           make_number (0));
 453       if (CHAR_TABLE_P (rev_tbl))
 454         {
 455           Lisp_Object temp;
 456           temp = Faref (rev_tbl, make_number (c));
 457           if (INTEGERP (temp))
 458             c = XINT (temp);
 459           if (c >= 256)
 460             c = (c_save & 0177) + 0200;
 461         }
 462       else
 463         {
 464           if (nonascii_insert_offset > 0)
 465             c -= nonascii_insert_offset;
 466           if (c < 128 || c >= 256)
 467             c = (c_save & 0177) + 0200;
 468         }
 469     }
 470
 471   return c;
 472 }
 473
 474 \f
 475 /* Update the table Vcharset_table with the given arguments (see the
 476    document of `define-charset' for the meaning of each argument).
 477    Several other table contents are also updated.  The caller should
 478    check the validity of CHARSET-ID and the remaining arguments in
 479    advance.  */
 480
 481 void
 482 update_charset_table (charset_id, dimension, chars, width, direction,
 483                       iso_final_char, iso_graphic_plane,
 484                       short_name, long_name, description)
 485      Lisp_Object charset_id, dimension, chars, width, direction;
 486      Lisp_Object iso_final_char, iso_graphic_plane;
 487      Lisp_Object short_name, long_name, description;
 488 {
 489   int charset = XINT (charset_id);
 490   int bytes;
 491   unsigned char leading_code_base, leading_code_ext;
 492
 493   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 494     CHARSET_TABLE_ENTRY (charset)
 495       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 496
 497   if (NILP (long_name))
 498     long_name = short_name;
 499   if (NILP (description))
 500     description = long_name;
 501
 502   /* Get byte length of multibyte form, base leading-code, and
 503      extended leading-code of the charset.  See the comment under the
 504      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 505   bytes = XINT (dimension);
 506   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 507     {
 508       /* Official charset, it doesn't have an extended leading-code.  */
 509       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
 510         bytes += 1; /* For a base leading-code.  */
 511       leading_code_base = charset;
 512       leading_code_ext = 0;
 513     }
 514   else
 515     {
 516       /* Private charset.  */
 517       bytes += 2; /* For base and extended leading-codes.  */
 518       leading_code_base
 519         = (charset < LEADING_CODE_EXT_12
 520            ? LEADING_CODE_PRIVATE_11
 521            : (charset < LEADING_CODE_EXT_21
 522               ? LEADING_CODE_PRIVATE_12
 523               : (charset < LEADING_CODE_EXT_22
 524                  ? LEADING_CODE_PRIVATE_21
 525                  : LEADING_CODE_PRIVATE_22)));
 526       leading_code_ext = charset;
 527       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
 528         error ("Invalid dimension for the charset-ID %d", charset);
 529     }
 530
 531   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 532   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 533   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 534   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 535   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 536   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 537   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 538     = make_number (leading_code_base);
 539   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 540     = make_number (leading_code_ext);
 541   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 542   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 543     = iso_graphic_plane;
 544   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 545   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 546   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 547   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 548
 549   {
 550     /* If we have already defined a charset which has the same
 551        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 552        DIRECTION, we must update the entry REVERSE-CHARSET of both
 553        charsets.  If there's no such charset, the value of the entry
 554        is set to nil.  */
 555     int i;
 556
 557     for (i = 0; i <= MAX_CHARSET; i++)
 558       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 559         {
 560           if (CHARSET_DIMENSION (i) == XINT (dimension)
 561               && CHARSET_CHARS (i) == XINT (chars)
 562               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 563               && CHARSET_DIRECTION (i) != XINT (direction))
 564             {
 565               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 566                 = make_number (i);
 567               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 568               break;
 569             }
 570         }
 571     if (i > MAX_CHARSET)
 572       /* No such a charset.  */
 573       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 574         = make_number (-1);
 575   }
 576
 577   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
 578       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 579     {
 580       bytes_by_char_head[leading_code_base] = bytes;
 581       width_by_char_head[leading_code_base] = XINT (width);
 582
 583       /* Update table emacs_code_class.  */
 584       emacs_code_class[charset] = (bytes == 2
 585                                    ? EMACS_leading_code_2
 586                                    : (bytes == 3
 587                                       ? EMACS_leading_code_3
 588                                       : EMACS_leading_code_4));
 589     }
 590
 591   /* Update table iso_charset_table.  */
 592   if (XINT (iso_final_char) >= 0
 593       && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 594     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 595 }
 596
 597 #ifdef emacs
 598
 599 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 600    is invalid.  */
 601 int
 602 get_charset_id (charset_symbol)
 603      Lisp_Object charset_symbol;
 604 {
 605   Lisp_Object val;
 606   int charset;
 607
 608   /* This originally used a ?: operator, but reportedly the HP-UX
 609      compiler version HP92453-01 A.10.32.22 miscompiles that.  */
 610   if (SYMBOLP (charset_symbol)
 611       && VECTORP (val = Fget (charset_symbol, Qcharset))
 612       && CHARSET_VALID_P (charset =
 613                           XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
 614     return charset;
 615   else
 616     return -1;
 617 }
 618
 619 /* Return an identification number for a new private charset of
 620    DIMENSION and WIDTH.  If there's no more room for the new charset,
 621    return 0.  */
 622 Lisp_Object
 623 get_new_private_charset_id (dimension, width)
 624      int dimension, width;
 625 {
 626   int charset, from, to;
 627
 628   if (dimension == 1)
 629     {
 630       from = LEADING_CODE_EXT_11;
 631       to = LEADING_CODE_EXT_21;
 632     }
 633   else
 634     {
 635       from = LEADING_CODE_EXT_21;
 636       to = LEADING_CODE_EXT_MAX + 1;
 637     }
 638
 639   for (charset = from; charset < to; charset++)
 640     if (!CHARSET_DEFINED_P (charset)) break;
 641
 642   return make_number (charset < to ? charset : 0);
 643 }
 644
 645 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 646        doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
 647 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
 648  treated as a private charset.
 649 INFO-VECTOR is a vector of the format:
 650    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
 651     SHORT-NAME LONG-NAME DESCRIPTION]
 652 The meanings of each elements is as follows:
 653 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
 654 CHARS (integer) is the number of characters in a dimension: 94 or 96.
 655 WIDTH (integer) is the number of columns a character in the charset
 656 occupies on the screen: one of 0, 1, and 2.
 657
 658 DIRECTION (integer) is the rendering direction of characters in the
 659 charset when rendering.  If 0, render from left to right, else
 660 render from right to left.
 661
 662 ISO-FINAL-CHAR (character) is the final character of the
 663 corresponding ISO 2022 charset.
 664 It may be -1 if the charset is internal use only.
 665
 666 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
 667 while encoding to variants of ISO 2022 coding system, one of the
 668 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
 669 It may be -1 if the charset is internal use only.
 670
 671 SHORT-NAME (string) is the short name to refer to the charset.
 672
 673 LONG-NAME (string) is the long name to refer to the charset.
 674
 675 DESCRIPTION (string) is the description string of the charset.  */)
 676        (charset_id, charset_symbol, info_vector)
 677      Lisp_Object charset_id, charset_symbol, info_vector;
 678 {
 679   Lisp_Object *vec;
 680
 681   if (!NILP (charset_id))
 682     CHECK_NUMBER (charset_id);
 683   CHECK_SYMBOL (charset_symbol);
 684   CHECK_VECTOR (info_vector);
 685
 686   if (! NILP (charset_id))
 687     {
 688       if (! CHARSET_VALID_P (XINT (charset_id)))
 689         error ("Invalid CHARSET: %d", XINT (charset_id));
 690       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 691         error ("Already defined charset: %d", XINT (charset_id));
 692     }
 693
 694   vec = XVECTOR (info_vector)->contents;
 695   if (XVECTOR (info_vector)->size != 9
 696       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 697       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 698       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 699       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 700       || !INTEGERP (vec[4])
 701       || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
 702       || !INTEGERP (vec[5])
 703       || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 704       || !STRINGP (vec[6])
 705       || !STRINGP (vec[7])
 706       || !STRINGP (vec[8]))
 707     error ("Invalid info-vector argument for defining charset %s",
 708            SDATA (SYMBOL_NAME (charset_symbol)));
 709
 710   if (NILP (charset_id))
 711     {
 712       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 713       if (XINT (charset_id) == 0)
 714         error ("There's no room for a new private charset %s",
 715                SDATA (SYMBOL_NAME (charset_symbol)));
 716     }
 717
 718   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 719                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 720   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 721   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 722   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 723   Fupdate_coding_systems_internal ();
 724   return Qnil;
 725 }
 726
 727 DEFUN ("generic-character-list", Fgeneric_character_list,
 728        Sgeneric_character_list, 0, 0, 0,
 729        doc: /* Return a list of all possible generic characters.
 730 It includes a generic character for a charset not yet defined.  */)
 731      ()
 732 {
 733   return Vgeneric_character_list;
 734 }
 735
 736 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 737        Sget_unused_iso_final_char, 2, 2, 0,
 738        doc: /* Return an unused ISO's final char for a charset of DIMENSION and CHARS.
 739 DIMENSION is the number of bytes to represent a character: 1 or 2.
 740 CHARS is the number of characters in a dimension: 94 or 96.
 741
 742 This final char is for private use, thus the range is `0' (48) .. `?' (63).
 743 If there's no unused final char for the specified kind of charset,
 744 return nil.  */)
 745      (dimension, chars)
 746      Lisp_Object dimension, chars;
 747 {
 748   int final_char;
 749
 750   CHECK_NUMBER (dimension);
 751   CHECK_NUMBER (chars);
 752   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 753     error ("Invalid charset dimension %d, it should be 1 or 2",
 754            XINT (dimension));
 755   if (XINT (chars) != 94 && XINT (chars) != 96)
 756     error ("Invalid charset chars %d, it should be 94 or 96",
 757            XINT (chars));
 758   for (final_char = '0'; final_char <= '?'; final_char++)
 759     {
 760       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 761         break;
 762     }
 763   return (final_char <= '?' ? make_number (final_char) : Qnil);
 764 }
 765
 766 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 767        4, 4, 0,
 768        doc: /* Declare an equivalent charset for ISO-2022 decoding.
 769
 770 On decoding by an ISO-2022 base coding system, when a charset
 771 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
 772 if CHARSET is designated instead.  */)
 773      (dimension, chars, final_char, charset)
 774      Lisp_Object dimension, chars, final_char, charset;
 775 {
 776   int charset_id;
 777
 778   CHECK_NUMBER (dimension);
 779   CHECK_NUMBER (chars);
 780   CHECK_NUMBER (final_char);
 781   CHECK_SYMBOL (charset);
 782
 783   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 784     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 785   if (XINT (chars) != 94 && XINT (chars) != 96)
 786     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 787   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 788     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 789   if ((charset_id = get_charset_id (charset)) < 0)
 790     error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset)));
 791
 792   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset_id;
 793   return Qnil;
 794 }
 795
 796 /* Return information about charsets in the text at PTR of NBYTES
 797    bytes, which are NCHARS characters.  The value is:
 798
 799         0: Each character is represented by one byte.  This is always
 800            true for unibyte text.
 801         1: No charsets other than ascii eight-bit-control,
 802            eight-bit-graphic, and latin-1 are found.
 803         2: Otherwise.
 804
 805    In addition, if CHARSETS is nonzero, for each found charset N, set
 806    CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
 807    (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
 808    table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
 809    1 (note that there's no charset whose ID is 1).  */
 810
 811 int
 812 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
 813      const unsigned char *ptr;
 814      int nchars, nbytes, *charsets;
 815      Lisp_Object table;
 816 {
 817   if (nchars == nbytes)
 818     {
 819       if (charsets && nbytes > 0)
 820         {
 821           const unsigned char *endp = ptr + nbytes;
 822           int maskbits = 0;
 823
 824           while (ptr < endp && maskbits != 7)
 825             {
 826               maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
 827               ptr++;
 828             }
 829
 830           if (maskbits & 1)
 831             charsets[CHARSET_ASCII] = 1;
 832           if (maskbits & 2)
 833             charsets[CHARSET_8_BIT_CONTROL] = 1;
 834           if (maskbits & 4)
 835             charsets[CHARSET_8_BIT_GRAPHIC] = 1;
 836         }
 837       return 0;
 838     }
 839   else
 840     {
 841       int return_val = 1;
 842       int bytes, charset, c1, c2;
 843
 844       if (! CHAR_TABLE_P (table))
 845         table = Qnil;
 846
 847       while (nchars-- > 0)
 848         {
 849           SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
 850           ptr += bytes;
 851
 852           if (!CHARSET_DEFINED_P (charset))
 853             charset = 1;
 854           else if (! NILP (table))
 855             {
 856               int c = translate_char (table, -1, charset, c1, c2);
 857               if (c >= 0)
 858                 charset = CHAR_CHARSET (c);
 859             }
 860
 861           if (return_val == 1
 862               && charset != CHARSET_ASCII
 863               && charset != CHARSET_8_BIT_CONTROL
 864               && charset != CHARSET_8_BIT_GRAPHIC
 865               && charset != charset_latin_iso8859_1)
 866             return_val = 2;
 867
 868           if (charsets)
 869             charsets[charset] = 1;
 870           else if (return_val == 2)
 871             break;
 872         }
 873       return return_val;
 874     }
 875 }
 876
 877 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 878        2, 3, 0,
 879        doc: /* Return a list of charsets in the region between BEG and END.
 880 BEG and END are buffer positions.
 881 Optional arg TABLE if non-nil is a translation table to look up.
 882
 883 If the region contains invalid multibyte characters,
 884 `unknown' is included in the returned list.
 885
 886 If the current buffer is unibyte, the returned list may contain
 887 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 888      (beg, end, table)
 889      Lisp_Object beg, end, table;
 890 {
 891   int charsets[MAX_CHARSET + 1];
 892   int from, from_byte, to, stop, stop_byte, i;
 893   Lisp_Object val;
 894
 895   validate_region (&beg, &end);
 896   from = XFASTINT (beg);
 897   stop = to = XFASTINT (end);
 898
 899   if (from < GPT && GPT < to)
 900     {
 901       stop = GPT;
 902       stop_byte = GPT_BYTE;
 903     }
 904   else
 905     stop_byte = CHAR_TO_BYTE (stop);
 906
 907   from_byte = CHAR_TO_BYTE (from);
 908
 909   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 910   while (1)
 911     {
 912       find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
 913                             stop_byte - from_byte, charsets, table);
 914       if (stop < to)
 915         {
 916           from = stop, from_byte = stop_byte;
 917           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 918         }
 919       else
 920         break;
 921     }
 922
 923   val = Qnil;
 924   if (charsets[1])
 925     val = Fcons (Qunknown, val);
 926   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 927     if (charsets[i])
 928       val = Fcons (CHARSET_SYMBOL (i), val);
 929   if (charsets[0])
 930     val = Fcons (Qascii, val);
 931   return val;
 932 }
 933
 934 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 935        1, 2, 0,
 936        doc: /* Return a list of charsets in STR.
 937 Optional arg TABLE if non-nil is a translation table to look up.
 938
 939 If the string contains invalid multibyte characters,
 940 `unknown' is included in the returned list.
 941
 942 If STR is unibyte, the returned list may contain
 943 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 944      (str, table)
 945      Lisp_Object str, table;
 946 {
 947   int charsets[MAX_CHARSET + 1];
 948   int i;
 949   Lisp_Object val;
 950
 951   CHECK_STRING (str);
 952
 953   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 954   find_charset_in_text (SDATA (str), SCHARS (str),
 955                         SBYTES (str), charsets, table);
 956
 957   val = Qnil;
 958   if (charsets[1])
 959     val = Fcons (Qunknown, val);
 960   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 961     if (charsets[i])
 962       val = Fcons (CHARSET_SYMBOL (i), val);
 963   if (charsets[0])
 964     val = Fcons (Qascii, val);
 965   return val;
 966 }
 967
 968 \f
 969 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 970        doc: /* Return a character made from arguments.
 971 Internal use only.  */)
 972      (charset, code1, code2)
 973      Lisp_Object charset, code1, code2;
 974 {
 975   int charset_id, c1, c2;
 976
 977   CHECK_NUMBER (charset);
 978   charset_id = XINT (charset);
 979   if (!CHARSET_DEFINED_P (charset_id))
 980     error ("Invalid charset ID: %d", XINT (charset));
 981
 982   if (NILP (code1))
 983     c1 = 0;
 984   else
 985     {
 986       CHECK_NUMBER (code1);
 987       c1 = XINT (code1);
 988     }
 989   if (NILP (code2))
 990     c2 = 0;
 991   else
 992     {
 993       CHECK_NUMBER (code2);
 994       c2 = XINT (code2);
 995     }
 996
 997   if (charset_id == CHARSET_ASCII)
 998     {
 999       if (c1 < 0 || c1 > 0x7F)
1000         goto invalid_code_posints;
1001       return make_number (c1);
1002     }
1003   else if (charset_id == CHARSET_8_BIT_CONTROL)
1004     {
1005       if (NILP (code1))
1006         c1 = 0x80;
1007       else if (c1 < 0x80 || c1 > 0x9F)
1008         goto invalid_code_posints;
1009       return make_number (c1);
1010     }
1011   else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1012     {
1013       if (NILP (code1))
1014         c1 = 0xA0;
1015       else if (c1 < 0xA0 || c1 > 0xFF)
1016         goto invalid_code_posints;
1017       return make_number (c1);
1018     }
1019   else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1020     goto invalid_code_posints;
1021   c1 &= 0x7F;
1022   c2 &= 0x7F;
1023   if (c1 == 0
1024       ? c2 != 0
1025       : (c2 == 0
1026          ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1027          : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1028     goto invalid_code_posints;
1029   return make_number (MAKE_CHAR (charset_id, c1, c2));
1030
1031  invalid_code_posints:
1032   error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1033 }
1034
1035 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1036        doc: /* Return list of charset and one or two position-codes of CH.
1037 If CH is invalid as a character code,
1038 return a list of symbol `unknown' and CH.  */)
1039      (ch)
1040      Lisp_Object ch;
1041 {
1042   int c, charset, c1, c2;
1043
1044   CHECK_NUMBER (ch);
1045   c = XFASTINT (ch);
1046   if (!CHAR_VALID_P (c, 1))
1047     return Fcons (Qunknown, Fcons (ch, Qnil));
1048   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1049   return (c2 >= 0
1050           ? Fcons (CHARSET_SYMBOL (charset),
1051                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1052           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1053 }
1054
1055 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1056        doc: /* Return charset of CH.  */)
1057      (ch)
1058      Lisp_Object ch;
1059 {
1060   CHECK_NUMBER (ch);
1061
1062   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1063 }
1064
1065 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1066        doc: /* Return charset of a character in the current buffer at position POS.
1067 If POS is nil, it defauls to the current point.
1068 If POS is out of range, the value is nil.  */)
1069      (pos)
1070      Lisp_Object pos;
1071 {
1072   Lisp_Object ch;
1073   int charset;
1074
1075   ch = Fchar_after (pos);
1076   if (! INTEGERP (ch))
1077     return ch;
1078   charset = CHAR_CHARSET (XINT (ch));
1079   return CHARSET_SYMBOL (charset);
1080 }
1081
1082 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1083        doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1084
1085 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1086 by their DIMENSION, CHARS, and FINAL-CHAR,
1087 where as Emacs distinguishes them by charset symbol.
1088 See the documentation of the function `charset-info' for the meanings of
1089 DIMENSION, CHARS, and FINAL-CHAR.  */)
1090      (dimension, chars, final_char)
1091      Lisp_Object dimension, chars, final_char;
1092 {
1093   int charset;
1094
1095   CHECK_NUMBER (dimension);
1096   CHECK_NUMBER (chars);
1097   CHECK_NUMBER (final_char);
1098
1099   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1100     return Qnil;
1101   return CHARSET_SYMBOL (charset);
1102 }
1103
1104 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1105    generic character.  If GENERICP is zero, return nonzero iff C is a
1106    valid normal character.  Do not call this function directly,
1107    instead use macro CHAR_VALID_P.  */
1108 int
1109 char_valid_p (c, genericp)
1110      int c, genericp;
1111 {
1112   int charset, c1, c2;
1113
1114   if (c < 0 || c >= MAX_CHAR)
1115     return 0;
1116   if (SINGLE_BYTE_CHAR_P (c))
1117     return 1;
1118   SPLIT_CHAR (c, charset, c1, c2);
1119   if (genericp)
1120     {
1121       if (c1)
1122         {
1123           if (c2 <= 0) c2 = 0x20;
1124         }
1125       else
1126         {
1127           if (c2 <= 0) c1 = c2 = 0x20;
1128         }
1129     }
1130   return (CHARSET_DEFINED_P (charset)
1131           && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1132 }
1133
1134 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1135        doc: /* Return t if OBJECT is a valid normal character.
1136 If optional arg GENERICP is non-nil, also return t if OBJECT is
1137 a valid generic character.  */)
1138      (object, genericp)
1139      Lisp_Object object, genericp;
1140 {
1141   if (! NATNUMP (object))
1142     return Qnil;
1143   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1144 }
1145
1146 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1147        Sunibyte_char_to_multibyte, 1, 1, 0,
1148        doc: /* Convert the unibyte character CH to multibyte character.
1149 The conversion is done based on `nonascii-translation-table' (which see)
1150  or `nonascii-insert-offset' (which see).  */)
1151      (ch)
1152      Lisp_Object ch;
1153 {
1154   int c;
1155
1156   CHECK_NUMBER (ch);
1157   c = XINT (ch);
1158   if (c < 0 || c >= 0400)
1159     error ("Invalid unibyte character: %d", c);
1160   c = unibyte_char_to_multibyte (c);
1161   if (c < 0)
1162     error ("Can't convert to multibyte character: %d", XINT (ch));
1163   return make_number (c);
1164 }
1165
1166 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1167        Smultibyte_char_to_unibyte, 1, 1, 0,
1168        doc: /* Convert the multibyte character CH to unibyte character.
1169 The conversion is done based on `nonascii-translation-table' (which see)
1170  or `nonascii-insert-offset' (which see).  */)
1171      (ch)
1172      Lisp_Object ch;
1173 {
1174   int c;
1175
1176   CHECK_NUMBER (ch);
1177   c = XINT (ch);
1178   if (! CHAR_VALID_P (c, 0))
1179     error ("Invalid multibyte character: %d", c);
1180   c = multibyte_char_to_unibyte (c, Qnil);
1181   if (c < 0)
1182     error ("Can't convert to unibyte character: %d", XINT (ch));
1183   return make_number (c);
1184 }
1185
1186 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1187        doc: /* Return 1 regardless of the argument CH.  */)
1188      (ch)
1189      Lisp_Object ch;
1190 {
1191   CHECK_NUMBER (ch);
1192   return make_number (1);
1193 }
1194
1195 /* Return how many bytes C will occupy in a multibyte buffer.
1196    Don't call this function directly, instead use macro CHAR_BYTES.  */
1197 int
1198 char_bytes (c)
1199      int c;
1200 {
1201   int charset;
1202
1203   if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1204     return 1;
1205   if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1206     return 1;
1207
1208   charset = CHAR_CHARSET (c);
1209   return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1210 }
1211
1212 /* Return the width of character of which multi-byte form starts with
1213    C.  The width is measured by how many columns occupied on the
1214    screen when displayed in the current buffer.  */
1215
1216 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
1217   (c < 0x20                                                             \
1218    ? (c == '\t'                                                         \
1219       ? XFASTINT (current_buffer->tab_width)                            \
1220       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
1221    : (c < 0x7f                                                          \
1222       ? 1                                                               \
1223       : (c == 0x7F                                                      \
1224          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
1225          : ((! NILP (current_buffer->enable_multibyte_characters)       \
1226              && BASE_LEADING_CODE_P (c))                                \
1227             ? WIDTH_BY_CHAR_HEAD (c)                                    \
1228             : 4))))
1229
1230 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1231        doc: /* Return width of CH when displayed in the current buffer.
1232 The width is measured by how many columns it occupies on the screen.
1233 Tab is taken to occupy `tab-width' columns.  */)
1234      (ch)
1235      Lisp_Object ch;
1236 {
1237   Lisp_Object val, disp;
1238   int c;
1239   struct Lisp_Char_Table *dp = buffer_display_table ();
1240
1241   CHECK_NUMBER (ch);
1242
1243   c = XINT (ch);
1244
1245   /* Get the way the display table would display it.  */
1246   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1247
1248   if (VECTORP (disp))
1249     XSETINT (val, XVECTOR (disp)->size);
1250   else if (SINGLE_BYTE_CHAR_P (c))
1251     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1252   else
1253     {
1254       int charset = CHAR_CHARSET (c);
1255
1256       XSETFASTINT (val, CHARSET_WIDTH (charset));
1257     }
1258   return val;
1259 }
1260
1261 /* Return width of string STR of length LEN when displayed in the
1262    current buffer.  The width is measured by how many columns it
1263    occupies on the screen.  */
1264
1265 int
1266 strwidth (str, len)
1267      unsigned char *str;
1268      int len;
1269 {
1270   return c_string_width (str, len, -1, NULL, NULL);
1271 }
1272
1273 /* Return width of string STR of length LEN when displayed in the
1274    current buffer.  The width is measured by how many columns it
1275    occupies on the screen.  If PRECISION > 0, return the width of
1276    longest substring that doesn't exceed PRECISION, and set number of
1277    characters and bytes of the substring in *NCHARS and *NBYTES
1278    respectively.  */
1279
1280 int
1281 c_string_width (str, len, precision, nchars, nbytes)
1282      const unsigned char *str;
1283      int len, precision, *nchars, *nbytes;
1284 {
1285   int i = 0, i_byte = 0;
1286   int width = 0;
1287   int chars;
1288   struct Lisp_Char_Table *dp = buffer_display_table ();
1289
1290   while (i_byte < len)
1291     {
1292       int bytes, thiswidth;
1293       Lisp_Object val;
1294
1295       if (dp)
1296         {
1297           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1298
1299           chars = 1;
1300           val = DISP_CHAR_VECTOR (dp, c);
1301           if (VECTORP (val))
1302             thiswidth = XVECTOR (val)->size;
1303           else
1304             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1305         }
1306       else
1307         {
1308           chars = 1;
1309           PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1310           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1311         }
1312
1313       if (precision > 0
1314           && (width + thiswidth > precision))
1315         {
1316           *nchars = i;
1317           *nbytes = i_byte;
1318           return width;
1319         }
1320       i++;
1321       i_byte += bytes;
1322       width += thiswidth;
1323   }
1324
1325   if (precision > 0)
1326     {
1327       *nchars = i;
1328       *nbytes = i_byte;
1329     }
1330
1331   return width;
1332 }
1333
1334 /* Return width of Lisp string STRING when displayed in the current
1335    buffer.  The width is measured by how many columns it occupies on
1336    the screen while paying attention to compositions.  If PRECISION >
1337    0, return the width of longest substring that doesn't exceed
1338    PRECISION, and set number of characters and bytes of the substring
1339    in *NCHARS and *NBYTES respectively.  */
1340
1341 int
1342 lisp_string_width (string, precision, nchars, nbytes)
1343      Lisp_Object string;
1344      int precision, *nchars, *nbytes;
1345 {
1346   int len = SCHARS (string);
1347   int len_byte = SBYTES (string);
1348   /* This set multibyte to 0 even if STRING is multibyte when it
1349      contains only ascii and eight-bit-graphic, but that's
1350      intentional.  */
1351   int multibyte = len < len_byte;
1352   const unsigned char *str = SDATA (string);
1353   int i = 0, i_byte = 0;
1354   int width = 0;
1355   struct Lisp_Char_Table *dp = buffer_display_table ();
1356
1357   while (i < len)
1358     {
1359       int chars, bytes, thiswidth;
1360       Lisp_Object val;
1361       int cmp_id;
1362       int ignore, end;
1363
1364       if (find_composition (i, -1, &ignore, &end, &val, string)
1365           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1366               >= 0))
1367         {
1368           thiswidth = composition_table[cmp_id]->width;
1369           chars = end - i;
1370           bytes = string_char_to_byte (string, end) - i_byte;
1371         }
1372       else if (dp)
1373         {
1374           int c;
1375
1376           if (multibyte)
1377             c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1378           else
1379             c = str[i_byte], bytes = 1;
1380           chars = 1;
1381           val = DISP_CHAR_VECTOR (dp, c);
1382           if (VECTORP (val))
1383             thiswidth = XVECTOR (val)->size;
1384           else
1385             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1386         }
1387       else
1388         {
1389           chars = 1;
1390           if (multibyte)
1391             PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1392           else
1393             bytes = 1;
1394           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1395         }
1396
1397       if (precision > 0
1398           && (width + thiswidth > precision))
1399         {
1400           *nchars = i;
1401           *nbytes = i_byte;
1402           return width;
1403         }
1404       i += chars;
1405       i_byte += bytes;
1406       width += thiswidth;
1407   }
1408
1409   if (precision > 0)
1410     {
1411       *nchars = i;
1412       *nbytes = i_byte;
1413     }
1414
1415   return width;
1416 }
1417
1418 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1419        doc: /* Return width of STRING when displayed in the current buffer.
1420 Width is measured by how many columns it occupies on the screen.
1421 When calculating width of a multibyte character in STRING,
1422 only the base leading-code is considered; the validity of
1423 the following bytes is not checked.  Tabs in STRING are always
1424 taken to occupy `tab-width' columns.  */)
1425      (string)
1426      Lisp_Object string;
1427 {
1428   Lisp_Object val;
1429
1430   CHECK_STRING (string);
1431   XSETFASTINT (val, lisp_string_width (string, -1, NULL, NULL));
1432   return val;
1433 }
1434
1435 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1436        doc: /* Return the direction of CH.
1437 The returned value is 0 for left-to-right and 1 for right-to-left.  */)
1438      (ch)
1439      Lisp_Object ch;
1440 {
1441   int charset;
1442
1443   CHECK_NUMBER (ch);
1444   charset = CHAR_CHARSET (XFASTINT (ch));
1445   if (!CHARSET_DEFINED_P (charset))
1446     invalid_character (XINT (ch));
1447   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1448 }
1449
1450 /* Return the number of characters in the NBYTES bytes at PTR.
1451    This works by looking at the contents and checking for multibyte sequences.
1452    However, if the current buffer has enable-multibyte-characters = nil,
1453    we treat each byte as a character.  */
1454
1455 int
1456 chars_in_text (ptr, nbytes)
1457      const unsigned char *ptr;
1458      int nbytes;
1459 {
1460   /* current_buffer is null at early stages of Emacs initialization.  */
1461   if (current_buffer == 0
1462       || NILP (current_buffer->enable_multibyte_characters))
1463     return nbytes;
1464
1465   return multibyte_chars_in_text (ptr, nbytes);
1466 }
1467
1468 /* Return the number of characters in the NBYTES bytes at PTR.
1469    This works by looking at the contents and checking for multibyte sequences.
1470    It ignores enable-multibyte-characters.  */
1471
1472 int
1473 multibyte_chars_in_text (ptr, nbytes)
1474      const unsigned char *ptr;
1475      int nbytes;
1476 {
1477   const unsigned char *endp;
1478   int chars, bytes;
1479
1480   endp = ptr + nbytes;
1481   chars = 0;
1482
1483   while (ptr < endp)
1484     {
1485       PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1486       ptr += bytes;
1487       chars++;
1488     }
1489
1490   return chars;
1491 }
1492
1493 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1494    count the numbers of characters and bytes in it.  On counting
1495    bytes, pay attention to the fact that 8-bit characters in the range
1496    0x80..0x9F are represented by 2 bytes in multibyte text.  */
1497 void
1498 parse_str_as_multibyte (str, len, nchars, nbytes)
1499      const unsigned char *str;
1500      int len, *nchars, *nbytes;
1501 {
1502   const unsigned char *endp = str + len;
1503   int n, chars = 0, bytes = 0;
1504
1505   while (str < endp)
1506     {
1507       if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1508         str += n, bytes += n;
1509       else
1510         str++, bytes += 2;
1511       chars++;
1512     }
1513   *nchars = chars;
1514   *nbytes = bytes;
1515   return;
1516 }
1517
1518 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1519    It actually converts only 8-bit characters in the range 0x80..0x9F
1520    that don't contruct multibyte characters to multibyte forms.  If
1521    NCHARS is nonzero, set *NCHARS to the number of characters in the
1522    text.  It is assured that we can use LEN bytes at STR as a work
1523    area and that is enough.  Return the number of bytes of the
1524    resulting text.  */
1525
1526 int
1527 str_as_multibyte (str, len, nbytes, nchars)
1528      unsigned char *str;
1529      int len, nbytes, *nchars;
1530 {
1531   unsigned char *p = str, *endp = str + nbytes;
1532   unsigned char *to;
1533   int chars = 0;
1534   int n;
1535
1536   while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1537     p += n, chars++;
1538   if (nchars)
1539     *nchars = chars;
1540   if (p == endp)
1541     return nbytes;
1542
1543   to = p;
1544   nbytes = endp - p;
1545   endp = str + len;
1546   safe_bcopy (p, endp - nbytes, nbytes);
1547   p = endp - nbytes;
1548   while (p < endp)
1549     {
1550       if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1551         {
1552           while (n--)
1553             *to++ = *p++;
1554         }
1555       else
1556         {
1557           *to++ = LEADING_CODE_8_BIT_CONTROL;
1558           *to++ = *p++ + 0x20;
1559         }
1560       chars++;
1561     }
1562   if (nchars)
1563     *nchars = chars;
1564   return (to - str);
1565 }
1566
1567 /* Parse unibyte string at STR of LEN bytes, and return the number of
1568    bytes it may ocupy when converted to multibyte string by
1569    `str_to_multibyte'.  */
1570
1571 int
1572 parse_str_to_multibyte (str, len)
1573      unsigned char *str;
1574      int len;
1575 {
1576   unsigned char *endp = str + len;
1577   int bytes;
1578
1579   for (bytes = 0; str < endp; str++)
1580     bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1581   return bytes;
1582 }
1583
1584 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1585    that contains the same single-byte characters.  It actually
1586    converts all 8-bit characters to multibyte forms.  It is assured
1587    that we can use LEN bytes at STR as a work area and that is
1588    enough.  */
1589
1590 int
1591 str_to_multibyte (str, len, bytes)
1592      unsigned char *str;
1593      int len, bytes;
1594 {
1595   unsigned char *p = str, *endp = str + bytes;
1596   unsigned char *to;
1597
1598   while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1599   if (p == endp)
1600     return bytes;
1601   to = p;
1602   bytes = endp - p;
1603   endp = str + len;
1604   safe_bcopy (p, endp - bytes, bytes);
1605   p = endp - bytes;
1606   while (p < endp)
1607     {
1608       if (*p < 0x80 || *p >= 0xA0)
1609         *to++ = *p++;
1610       else
1611         *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1612     }
1613   return (to - str);
1614 }
1615
1616 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1617    actually converts only 8-bit characters in the range 0x80..0x9F to
1618    unibyte forms.  */
1619
1620 int
1621 str_as_unibyte (str, bytes)
1622      unsigned char *str;
1623      int bytes;
1624 {
1625   unsigned char *p = str, *endp = str + bytes;
1626   unsigned char *to = str;
1627
1628   while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1629   to = p;
1630   while (p < endp)
1631     {
1632       if (*p == LEADING_CODE_8_BIT_CONTROL)
1633         *to++ = *(p + 1) - 0x20, p += 2;
1634       else
1635         *to++ = *p++;
1636     }
1637   return (to - str);
1638 }
1639
1640 \f
1641 DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
1642   doc: /* Concatenate all the argument characters and make the result a string.
1643 usage: (string &rest CHARACTERS)  */)
1644      (n, args)
1645      int n;
1646      Lisp_Object *args;
1647 {
1648   int i, bufsize;
1649   unsigned char *buf, *p;
1650   int c;
1651   int multibyte = 0;
1652   Lisp_Object ret;
1653   USE_SAFE_ALLOCA;
1654
1655   bufsize = MAX_MULTIBYTE_LENGTH * n;
1656   SAFE_ALLOCA (buf, unsigned char *, bufsize);
1657   p = buf;
1658
1659   for (i = 0; i < n; i++)
1660     {
1661       CHECK_NUMBER (args[i]);
1662       if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1663         multibyte = 1;
1664     }
1665
1666   for (i = 0; i < n; i++)
1667     {
1668       c = XINT (args[i]);
1669       if (multibyte)
1670         p += CHAR_STRING (c, p);
1671       else
1672         *p++ = c;
1673     }
1674
1675   ret = make_string_from_bytes (buf, n, p - buf);
1676   SAFE_FREE ();
1677
1678   return ret;
1679 }
1680
1681 #endif /* emacs */
1682 \f
1683 int
1684 charset_id_internal (charset_name)
1685      char *charset_name;
1686 {
1687   Lisp_Object val;
1688
1689   val= Fget (intern (charset_name), Qcharset);
1690   if (!VECTORP (val))
1691     error ("Charset %s is not defined", charset_name);
1692
1693   return (XINT (XVECTOR (val)->contents[0]));
1694 }
1695
1696 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1697        Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only.  */)
1698      ()
1699 {
1700   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1701   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1702   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1703   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1704   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1705   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1706   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1707   charset_mule_unicode_0100_24ff
1708     = charset_id_internal ("mule-unicode-0100-24ff");
1709   charset_mule_unicode_2500_33ff
1710     = charset_id_internal ("mule-unicode-2500-33ff");
1711   charset_mule_unicode_e000_ffff
1712     = charset_id_internal ("mule-unicode-e000-ffff");
1713   return Qnil;
1714 }
1715
1716 void
1717 init_charset_once ()
1718 {
1719   int i, j, k;
1720
1721   staticpro (&Vcharset_table);
1722   staticpro (&Vcharset_symbol_table);
1723   staticpro (&Vgeneric_character_list);
1724
1725   /* This has to be done here, before we call Fmake_char_table.  */
1726   Qcharset_table = intern ("charset-table");
1727   staticpro (&Qcharset_table);
1728
1729   /* Intern this now in case it isn't already done.
1730      Setting this variable twice is harmless.
1731      But don't staticpro it here--that is done in alloc.c.  */
1732   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1733
1734   /* Now we are ready to set up this property, so we can
1735      create the charset table.  */
1736   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1737   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1738
1739   Qunknown = intern ("unknown");
1740   staticpro (&Qunknown);
1741   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1742                                         Qunknown);
1743
1744   /* Setup tables.  */
1745   for (i = 0; i < 2; i++)
1746     for (j = 0; j < 2; j++)
1747       for (k = 0; k < 128; k++)
1748         iso_charset_table [i][j][k] = -1;
1749
1750   for (i = 0; i < 256; i++)
1751     bytes_by_char_head[i] = 1;
1752   bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1753   bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1754   bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1755   bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1756
1757   for (i = 0; i < 128; i++)
1758     width_by_char_head[i] = 1;
1759   for (; i < 256; i++)
1760     width_by_char_head[i] = 4;
1761   width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1762   width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1763   width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1764   width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1765
1766   {
1767     Lisp_Object val;
1768
1769     val = Qnil;
1770     for (i = 0x81; i < 0x90; i++)
1771       val = Fcons (make_number ((i - 0x70) << 7), val);
1772     for (; i < 0x9A; i++)
1773       val = Fcons (make_number ((i - 0x8F) << 14), val);
1774     for (i = 0xA0; i < 0xF0; i++)
1775       val = Fcons (make_number ((i - 0x70) << 7), val);
1776     for (; i < 0xFF; i++)
1777       val = Fcons (make_number ((i - 0xE0) << 14), val);
1778     Vgeneric_character_list = Fnreverse (val);
1779   }
1780
1781   nonascii_insert_offset = 0;
1782   Vnonascii_translation_table = Qnil;
1783 }
1784
1785 #ifdef emacs
1786
1787 void
1788 syms_of_charset ()
1789 {
1790   Qcharset = intern ("charset");
1791   staticpro (&Qcharset);
1792
1793   Qascii = intern ("ascii");
1794   staticpro (&Qascii);
1795
1796   Qeight_bit_control = intern ("eight-bit-control");
1797   staticpro (&Qeight_bit_control);
1798
1799   Qeight_bit_graphic = intern ("eight-bit-graphic");
1800   staticpro (&Qeight_bit_graphic);
1801
1802   /* Define special charsets ascii, eight-bit-control, and
1803      eight-bit-graphic.  */
1804   update_charset_table (make_number (CHARSET_ASCII),
1805                         make_number (1), make_number (94),
1806                         make_number (1),
1807                         make_number (0),
1808                         make_number ('B'),
1809                         make_number (0),
1810                         build_string ("ASCII"),
1811                         Qnil,   /* same as above */
1812                         build_string ("ASCII (ISO646 IRV)"));
1813   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1814   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1815
1816   update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1817                         make_number (1), make_number (96),
1818                         make_number (4),
1819                         make_number (0),
1820                         make_number (-1),
1821                         make_number (-1),
1822                         build_string ("8-bit control code (0x80..0x9F)"),
1823                         Qnil,   /* same as above */
1824                         Qnil);  /* same as above */
1825   CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1826   Fput (Qeight_bit_control, Qcharset,
1827         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1828
1829   update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1830                         make_number (1), make_number (96),
1831                         make_number (4),
1832                         make_number (0),
1833                         make_number (-1),
1834                         make_number (-1),
1835                         build_string ("8-bit graphic char (0xA0..0xFF)"),
1836                         Qnil,   /* same as above */
1837                         Qnil);  /* same as above */
1838   CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1839   Fput (Qeight_bit_graphic, Qcharset,
1840         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1841
1842   Qauto_fill_chars = intern ("auto-fill-chars");
1843   staticpro (&Qauto_fill_chars);
1844   Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1845
1846   defsubr (&Sdefine_charset);
1847   defsubr (&Sgeneric_character_list);
1848   defsubr (&Sget_unused_iso_final_char);
1849   defsubr (&Sdeclare_equiv_charset);
1850   defsubr (&Sfind_charset_region);
1851   defsubr (&Sfind_charset_string);
1852   defsubr (&Smake_char_internal);
1853   defsubr (&Ssplit_char);
1854   defsubr (&Schar_charset);
1855   defsubr (&Scharset_after);
1856   defsubr (&Siso_charset);
1857   defsubr (&Schar_valid_p);
1858   defsubr (&Sunibyte_char_to_multibyte);
1859   defsubr (&Smultibyte_char_to_unibyte);
1860   defsubr (&Schar_bytes);
1861   defsubr (&Schar_width);
1862   defsubr (&Sstring_width);
1863   defsubr (&Schar_direction);
1864   defsubr (&Sstring);
1865   defsubr (&Ssetup_special_charsets);
1866
1867   DEFVAR_LISP ("charset-list", &Vcharset_list,
1868                doc: /* List of charsets ever defined.  */);
1869   Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1870                                         Fcons (Qeight_bit_graphic, Qnil)));
1871
1872   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1873                doc: /* Vector of cons cell of a symbol and translation table ever defined.
1874 An ID of a translation table is an index of this vector.  */);
1875   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1876
1877   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1878               doc: /* Leading-code of private TYPE9N charset of column-width 1.  */);
1879   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1880
1881   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1882               doc: /* Leading-code of private TYPE9N charset of column-width 2.  */);
1883   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1884
1885   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1886               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1.  */);
1887   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1888
1889   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1890               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2.  */);
1891   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1892
1893   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1894               doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1895 This is used for converting unibyte text to multibyte,
1896 and for inserting character codes specified by number.
1897
1898 This serves to convert a Latin-1 or similar 8-bit character code
1899 to the corresponding Emacs multibyte character code.
1900 Typically the value should be (- (make-char CHARSET 0) 128),
1901 for your choice of character set.
1902 If `nonascii-translation-table' is non-nil, it overrides this variable.  */);
1903   nonascii_insert_offset = 0;
1904
1905   DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1906                doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1907 This is used for converting unibyte text to multibyte,
1908 and for inserting character codes specified by number.
1909
1910 Conversion is performed only when multibyte characters are enabled,
1911 and it serves to convert a Latin-1 or similar 8-bit character code
1912 to the corresponding Emacs character code.
1913
1914 If this is nil, `nonascii-insert-offset' is used instead.
1915 See also the docstring of `make-translation-table'.  */);
1916   Vnonascii_translation_table = Qnil;
1917
1918   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1919                doc: /* A char-table for characters which invoke auto-filling.
1920 Such characters have value t in this table.  */);
1921   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1922   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1923   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1924 }
1925
1926 #endif /* emacs */
1927
1928 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
1929    (do not change this comment) */