src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2004 Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /* At first, see the document in `charset.h' to understand the code in
  24    this file.  */
  25
  26 #ifdef emacs
  27 #include <config.h>
  28 #endif
  29
  30 #include <stdio.h>
  31
  32 #ifdef emacs
  33
  34 #include <sys/types.h>
  35 #include "lisp.h"
  36 #include "buffer.h"
  37 #include "charset.h"
  38 #include "composite.h"
  39 #include "coding.h"
  40 #include "disptab.h"
  41
  42 #else  /* not emacs */
  43
  44 #include "mulelib.h"
  45
  46 #endif /* emacs */
  47
  48 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
  49 Lisp_Object Qunknown;
  50
  51 /* Declaration of special leading-codes.  */
  52 EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
  53 EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
  54 EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
  55 EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
  56
  57 /* Declaration of special charsets.  The values are set by
  58    Fsetup_special_charsets.  */
  59 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  60 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  61 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  62 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  63 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  64 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  65 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  66
  67 Lisp_Object Qcharset_table;
  68
  69 /* A char-table containing information of each character set.  */
  70 Lisp_Object Vcharset_table;
  71
  72 /* A vector of charset symbol indexed by charset-id.  This is used
  73    only for returning charset symbol from C functions.  */
  74 Lisp_Object Vcharset_symbol_table;
  75
  76 /* A list of charset symbols ever defined.  */
  77 Lisp_Object Vcharset_list;
  78
  79 /* Vector of translation table ever defined.
  80    ID of a translation table is used to index this vector.  */
  81 Lisp_Object Vtranslation_table_vector;
  82
  83 /* A char-table for characters which may invoke auto-filling.  */
  84 Lisp_Object Vauto_fill_chars;
  85
  86 Lisp_Object Qauto_fill_chars;
  87
  88 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  89 int bytes_by_char_head[256];
  90 int width_by_char_head[256];
  91
  92 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  93    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  94 int iso_charset_table[2][2][128];
  95
  96 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
  97 unsigned char *_fetch_multibyte_char_p;
  98 int _fetch_multibyte_char_len;
  99
 100 /* Offset to add to a non-ASCII value when inserting it.  */
 101 EMACS_INT nonascii_insert_offset;
 102
 103 /* Translation table for converting non-ASCII unibyte characters
 104    to multibyte codes, or nil.  */
 105 Lisp_Object Vnonascii_translation_table;
 106
 107 /* List of all possible generic characters.  */
 108 Lisp_Object Vgeneric_character_list;
 109
 110 \f
 111 void
 112 invalid_character (c)
 113      int c;
 114 {
 115   error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
 116 }
 117
 118 /* Parse string STR of length LENGTH and fetch information of a
 119    character at STR.  Set BYTES to the byte length the character
 120    occupies, CHARSET, C1, C2 to proper values of the character. */
 121
 122 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
 123   do {                                                                       \
 124     (c1) = *(str);                                                           \
 125     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
 126     if ((bytes) == 1)                                                        \
 127       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
 128     else if ((bytes) == 2)                                                   \
 129       {                                                                      \
 130         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                              \
 131           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;         \
 132         else                                                                 \
 133           (charset) = (c1), (c1) = (str)[1] & 0x7F;                          \
 134       }                                                                      \
 135     else if ((bytes) == 3)                                                   \
 136       {                                                                      \
 137         if ((c1) < LEADING_CODE_PRIVATE_11)                                  \
 138           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
 139         else                                                                 \
 140           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                      \
 141       }                                                                      \
 142     else                                                                     \
 143       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
 144   } while (0)
 145
 146 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
 147    Note that this intentionally allows invalid components, such
 148    as 0xA0 0xA0, because there exist many files that contain
 149    such invalid byte sequences, especially in EUC-GB. */
 150 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
 151   ((charset) == CHARSET_ASCII                           \
 152    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
 153    : ((charset) == CHARSET_8_BIT_CONTROL                \
 154       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
 155       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
 156          ? ((c1) >= 0x80 && (c1) <= 0xFF)               \
 157          : (CHARSET_DIMENSION (charset) == 1            \
 158             ? ((c1) >= 0x20 && (c1) <= 0x7F)            \
 159             : ((c1) >= 0x20 && (c1) <= 0x7F             \
 160                && (c2) >= 0x20 && (c2) <= 0x7F)))))
 161
 162 /* Store multi-byte form of the character C in STR.  The caller should
 163    allocate at least 4-byte area at STR in advance.  Returns the
 164    length of the multi-byte form.  If C is an invalid character code,
 165    return -1.  */
 166
 167 int
 168 char_to_string_1 (c, str)
 169      int c;
 170      unsigned char *str;
 171 {
 172   unsigned char *p = str;
 173
 174   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
 175     {
 176       /* Multibyte character can't have a modifier bit.  */
 177       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 178         return -1;
 179
 180       /* For Meta, Shift, and Control modifiers, we need special care.  */
 181       if (c & CHAR_META)
 182         {
 183           /* Move the meta bit to the right place for a string.  */
 184           c = (c & ~CHAR_META) | 0x80;
 185         }
 186       if (c & CHAR_SHIFT)
 187         {
 188           /* Shift modifier is valid only with [A-Za-z].  */
 189           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
 190             c &= ~CHAR_SHIFT;
 191           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
 192             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 193         }
 194       if (c & CHAR_CTL)
 195         {
 196           /* Simulate the code in lread.c.  */
 197           /* Allow `\C- ' and `\C-?'.  */
 198           if (c == (CHAR_CTL | ' '))
 199             c = 0;
 200           else if (c == (CHAR_CTL | '?'))
 201             c = 127;
 202           /* ASCII control chars are made from letters (both cases),
 203              as well as the non-letters within 0100...0137.  */
 204           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
 205             c &= (037 | (~0177 & ~CHAR_CTL));
 206           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 207             c &= (037 | (~0177 & ~CHAR_CTL));
 208         }
 209
 210       /* If C still has any modifier bits, just ignore it.  */
 211       c &= ~CHAR_MODIFIER_MASK;
 212     }
 213
 214   if (SINGLE_BYTE_CHAR_P (c))
 215     {
 216       if (ASCII_BYTE_P (c) || c >= 0xA0)
 217         *p++ = c;
 218       else
 219         {
 220           *p++ = LEADING_CODE_8_BIT_CONTROL;
 221           *p++ = c + 0x20;
 222         }
 223     }
 224   else if (CHAR_VALID_P (c, 0))
 225     {
 226       int charset, c1, c2;
 227
 228       SPLIT_CHAR (c, charset, c1, c2);
 229
 230       if (charset >= LEADING_CODE_EXT_11)
 231         *p++ = (charset < LEADING_CODE_EXT_12
 232                 ? LEADING_CODE_PRIVATE_11
 233                 : (charset < LEADING_CODE_EXT_21
 234                    ? LEADING_CODE_PRIVATE_12
 235                    : (charset < LEADING_CODE_EXT_22
 236                       ? LEADING_CODE_PRIVATE_21
 237                       : LEADING_CODE_PRIVATE_22)));
 238       *p++ = charset;
 239       if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
 240         return -1;
 241       if (c1)
 242         {
 243           *p++ = c1 | 0x80;
 244           if (c2 > 0)
 245             *p++ = c2 | 0x80;
 246         }
 247     }
 248   else
 249     return -1;
 250
 251   return (p - str);
 252 }
 253
 254
 255 /* Store multi-byte form of the character C in STR.  The caller should
 256    allocate at least 4-byte area at STR in advance.  Returns the
 257    length of the multi-byte form.  If C is an invalid character code,
 258    signal an error.
 259
 260    Use macro `CHAR_STRING (C, STR)' instead of calling this function
 261    directly if C can be an ASCII character.  */
 262
 263 int
 264 char_to_string (c, str)
 265      int c;
 266      unsigned char *str;
 267 {
 268   int len;
 269   len = char_to_string_1 (c, str);
 270   if (len == -1)
 271     invalid_character (c);
 272   return len;
 273 }
 274
 275
 276 /* Return the non-ASCII character corresponding to multi-byte form at
 277    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
 278    length of the multibyte form in *ACTUAL_LEN.
 279
 280    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
 281    this function directly if you want ot handle ASCII characters as
 282    well.  */
 283
 284 int
 285 string_to_char (str, len, actual_len)
 286      const unsigned char *str;
 287      int len, *actual_len;
 288 {
 289   int c, bytes, charset, c1, c2;
 290
 291   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
 292   c = MAKE_CHAR (charset, c1, c2);
 293   if (actual_len)
 294     *actual_len = bytes;
 295   return c;
 296 }
 297
 298 /* Return the length of the multi-byte form at string STR of length LEN.
 299    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
 300 int
 301 multibyte_form_length (str, len)
 302      const unsigned char *str;
 303      int len;
 304 {
 305   int bytes;
 306
 307   PARSE_MULTIBYTE_SEQ (str, len, bytes);
 308   return bytes;
 309 }
 310
 311 /* Check multibyte form at string STR of length LEN and set variables
 312    pointed by CHARSET, C1, and C2 to charset and position codes of the
 313    character at STR, and return 0.  If there's no multibyte character,
 314    return -1.  This should be used only in the macro SPLIT_STRING
 315    which checks range of STR in advance.  */
 316
 317 int
 318 split_string (str, len, charset, c1, c2)
 319      const unsigned char *str;
 320      unsigned char *c1, *c2;
 321      int len, *charset;
 322 {
 323   register int bytes, cs, code1, code2 = -1;
 324
 325   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
 326   if (cs == CHARSET_ASCII)
 327     return -1;
 328   *charset = cs;
 329   *c1 = code1;
 330   *c2 = code2;
 331   return 0;
 332 }
 333
 334 /* Return 1 iff character C has valid printable glyph.
 335    Use the macro CHAR_PRINTABLE_P instead.  */
 336 int
 337 char_printable_p (c)
 338      int c;
 339 {
 340   int charset, c1, c2;
 341
 342   if (ASCII_BYTE_P (c))
 343     return 1;
 344   else if (SINGLE_BYTE_CHAR_P (c))
 345     return 0;
 346   else if (c >= MAX_CHAR)
 347     return 0;
 348
 349   SPLIT_CHAR (c, charset, c1, c2);
 350   if (! CHARSET_DEFINED_P (charset))
 351     return 0;
 352   if (CHARSET_CHARS (charset) == 94
 353       ? c1 <= 32 || c1 >= 127
 354       : c1 < 32)
 355     return 0;
 356   if (CHARSET_DIMENSION (charset) == 2
 357       && (CHARSET_CHARS (charset) == 94
 358           ? c2 <= 32 || c2 >= 127
 359           : c2 < 32))
 360     return 0;
 361   return 1;
 362 }
 363
 364 /* Translate character C by translation table TABLE.  If C
 365    is negative, translate a character specified by CHARSET, C1, and C2
 366    (C1 and C2 are code points of the character).  If no translation is
 367    found in TABLE, return C.  */
 368 int
 369 translate_char (table, c, charset, c1, c2)
 370      Lisp_Object table;
 371      int c, charset, c1, c2;
 372 {
 373   Lisp_Object ch;
 374   int alt_charset, alt_c1, alt_c2, dimension;
 375
 376   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
 377   if (!CHAR_TABLE_P (table)
 378       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
 379     return c;
 380
 381   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 382   dimension = CHARSET_DIMENSION (alt_charset);
 383   if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
 384     /* CH is not a generic character, just return it.  */
 385     return XFASTINT (ch);
 386
 387   /* Since CH is a generic character, we must return a specific
 388      charater which has the same position codes as C from CH.  */
 389   if (charset < 0)
 390     SPLIT_CHAR (c, charset, c1, c2);
 391   if (dimension != CHARSET_DIMENSION (charset))
 392     /* We can't make such a character because of dimension mismatch.  */
 393     return c;
 394   return MAKE_CHAR (alt_charset, c1, c2);
 395 }
 396
 397 /* Convert the unibyte character C to multibyte based on
 398    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
 399    convert C to a valid multibyte character, convert it based on
 400    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 401
 402 int
 403 unibyte_char_to_multibyte (c)
 404      int c;
 405 {
 406   if (c < 0400 && c >= 0200)
 407     {
 408       int c_save = c;
 409
 410       if (! NILP (Vnonascii_translation_table))
 411         {
 412           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
 413           if (c >= 0400 && ! char_valid_p (c, 0))
 414             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 415         }
 416       else if (c >= 0240 && nonascii_insert_offset > 0)
 417         {
 418           c += nonascii_insert_offset;
 419           if (c < 0400 || ! char_valid_p (c, 0))
 420             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 421         }
 422       else if (c >= 0240)
 423         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 424     }
 425   return c;
 426 }
 427
 428
 429 /* Convert the multibyte character C to unibyte 8-bit character based
 430    on Vnonascii_translation_table or nonascii_insert_offset.  If
 431    REV_TBL is non-nil, it should be a reverse table of
 432    Vnonascii_translation_table, i.e. what given by:
 433      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
 434
 435 int
 436 multibyte_char_to_unibyte (c, rev_tbl)
 437      int c;
 438      Lisp_Object rev_tbl;
 439 {
 440   if (!SINGLE_BYTE_CHAR_P (c))
 441     {
 442       int c_save = c;
 443
 444       if (! CHAR_TABLE_P (rev_tbl)
 445           && CHAR_TABLE_P (Vnonascii_translation_table))
 446         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
 447                                           make_number (0));
 448       if (CHAR_TABLE_P (rev_tbl))
 449         {
 450           Lisp_Object temp;
 451           temp = Faref (rev_tbl, make_number (c));
 452           if (INTEGERP (temp))
 453             c = XINT (temp);
 454           if (c >= 256)
 455             c = (c_save & 0177) + 0200;
 456         }
 457       else
 458         {
 459           if (nonascii_insert_offset > 0)
 460             c -= nonascii_insert_offset;
 461           if (c < 128 || c >= 256)
 462             c = (c_save & 0177) + 0200;
 463         }
 464     }
 465
 466   return c;
 467 }
 468
 469 \f
 470 /* Update the table Vcharset_table with the given arguments (see the
 471    document of `define-charset' for the meaning of each argument).
 472    Several other table contents are also updated.  The caller should
 473    check the validity of CHARSET-ID and the remaining arguments in
 474    advance.  */
 475
 476 void
 477 update_charset_table (charset_id, dimension, chars, width, direction,
 478                       iso_final_char, iso_graphic_plane,
 479                       short_name, long_name, description)
 480      Lisp_Object charset_id, dimension, chars, width, direction;
 481      Lisp_Object iso_final_char, iso_graphic_plane;
 482      Lisp_Object short_name, long_name, description;
 483 {
 484   int charset = XINT (charset_id);
 485   int bytes;
 486   unsigned char leading_code_base, leading_code_ext;
 487
 488   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 489     CHARSET_TABLE_ENTRY (charset)
 490       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 491
 492   if (NILP (long_name))
 493     long_name = short_name;
 494   if (NILP (description))
 495     description = long_name;
 496
 497   /* Get byte length of multibyte form, base leading-code, and
 498      extended leading-code of the charset.  See the comment under the
 499      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 500   bytes = XINT (dimension);
 501   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 502     {
 503       /* Official charset, it doesn't have an extended leading-code.  */
 504       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
 505         bytes += 1; /* For a base leading-code.  */
 506       leading_code_base = charset;
 507       leading_code_ext = 0;
 508     }
 509   else
 510     {
 511       /* Private charset.  */
 512       bytes += 2; /* For base and extended leading-codes.  */
 513       leading_code_base
 514         = (charset < LEADING_CODE_EXT_12
 515            ? LEADING_CODE_PRIVATE_11
 516            : (charset < LEADING_CODE_EXT_21
 517               ? LEADING_CODE_PRIVATE_12
 518               : (charset < LEADING_CODE_EXT_22
 519                  ? LEADING_CODE_PRIVATE_21
 520                  : LEADING_CODE_PRIVATE_22)));
 521       leading_code_ext = charset;
 522       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
 523         error ("Invalid dimension for the charset-ID %d", charset);
 524     }
 525
 526   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 527   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 528   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 529   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 530   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 531   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 532   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 533     = make_number (leading_code_base);
 534   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 535     = make_number (leading_code_ext);
 536   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 537   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 538     = iso_graphic_plane;
 539   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 540   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 541   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 542   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 543
 544   {
 545     /* If we have already defined a charset which has the same
 546        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 547        DIRECTION, we must update the entry REVERSE-CHARSET of both
 548        charsets.  If there's no such charset, the value of the entry
 549        is set to nil.  */
 550     int i;
 551
 552     for (i = 0; i <= MAX_CHARSET; i++)
 553       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 554         {
 555           if (CHARSET_DIMENSION (i) == XINT (dimension)
 556               && CHARSET_CHARS (i) == XINT (chars)
 557               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 558               && CHARSET_DIRECTION (i) != XINT (direction))
 559             {
 560               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 561                 = make_number (i);
 562               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 563               break;
 564             }
 565         }
 566     if (i > MAX_CHARSET)
 567       /* No such a charset.  */
 568       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 569         = make_number (-1);
 570   }
 571
 572   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
 573       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 574     {
 575       bytes_by_char_head[leading_code_base] = bytes;
 576       width_by_char_head[leading_code_base] = XINT (width);
 577
 578       /* Update table emacs_code_class.  */
 579       emacs_code_class[charset] = (bytes == 2
 580                                    ? EMACS_leading_code_2
 581                                    : (bytes == 3
 582                                       ? EMACS_leading_code_3
 583                                       : EMACS_leading_code_4));
 584     }
 585
 586   /* Update table iso_charset_table.  */
 587   if (XINT (iso_final_char) >= 0
 588       && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 589     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 590 }
 591
 592 #ifdef emacs
 593
 594 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 595    is invalid.  */
 596 int
 597 get_charset_id (charset_symbol)
 598      Lisp_Object charset_symbol;
 599 {
 600   Lisp_Object val;
 601   int charset;
 602
 603   /* This originally used a ?: operator, but reportedly the HP-UX
 604      compiler version HP92453-01 A.10.32.22 miscompiles that.  */
 605   if (SYMBOLP (charset_symbol)
 606       && VECTORP (val = Fget (charset_symbol, Qcharset))
 607       && CHARSET_VALID_P (charset =
 608                           XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
 609     return charset;
 610   else
 611     return -1;
 612 }
 613
 614 /* Return an identification number for a new private charset of
 615    DIMENSION and WIDTH.  If there's no more room for the new charset,
 616    return 0.  */
 617 Lisp_Object
 618 get_new_private_charset_id (dimension, width)
 619      int dimension, width;
 620 {
 621   int charset, from, to;
 622
 623   if (dimension == 1)
 624     {
 625       from = LEADING_CODE_EXT_11;
 626       to = LEADING_CODE_EXT_21;
 627     }
 628   else
 629     {
 630       from = LEADING_CODE_EXT_21;
 631       to = LEADING_CODE_EXT_MAX + 1;
 632     }
 633
 634   for (charset = from; charset < to; charset++)
 635     if (!CHARSET_DEFINED_P (charset)) break;
 636
 637   return make_number (charset < to ? charset : 0);
 638 }
 639
 640 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 641        doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
 642 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
 643  treated as a private charset.
 644 INFO-VECTOR is a vector of the format:
 645    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
 646     SHORT-NAME LONG-NAME DESCRIPTION]
 647 The meanings of each elements is as follows:
 648 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
 649 CHARS (integer) is the number of characters in a dimension: 94 or 96.
 650 WIDTH (integer) is the number of columns a character in the charset
 651 occupies on the screen: one of 0, 1, and 2.
 652
 653 DIRECTION (integer) is the rendering direction of characters in the
 654 charset when rendering.  If 0, render from left to right, else
 655 render from right to left.
 656
 657 ISO-FINAL-CHAR (character) is the final character of the
 658 corresponding ISO 2022 charset.
 659 It may be -1 if the charset is internal use only.
 660
 661 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
 662 while encoding to variants of ISO 2022 coding system, one of the
 663 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
 664 It may be -1 if the charset is internal use only.
 665
 666 SHORT-NAME (string) is the short name to refer to the charset.
 667
 668 LONG-NAME (string) is the long name to refer to the charset.
 669
 670 DESCRIPTION (string) is the description string of the charset.  */)
 671        (charset_id, charset_symbol, info_vector)
 672      Lisp_Object charset_id, charset_symbol, info_vector;
 673 {
 674   Lisp_Object *vec;
 675
 676   if (!NILP (charset_id))
 677     CHECK_NUMBER (charset_id);
 678   CHECK_SYMBOL (charset_symbol);
 679   CHECK_VECTOR (info_vector);
 680
 681   if (! NILP (charset_id))
 682     {
 683       if (! CHARSET_VALID_P (XINT (charset_id)))
 684         error ("Invalid CHARSET: %d", XINT (charset_id));
 685       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 686         error ("Already defined charset: %d", XINT (charset_id));
 687     }
 688
 689   vec = XVECTOR (info_vector)->contents;
 690   if (XVECTOR (info_vector)->size != 9
 691       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 692       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 693       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 694       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 695       || !INTEGERP (vec[4])
 696       || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
 697       || !INTEGERP (vec[5])
 698       || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 699       || !STRINGP (vec[6])
 700       || !STRINGP (vec[7])
 701       || !STRINGP (vec[8]))
 702     error ("Invalid info-vector argument for defining charset %s",
 703            SDATA (SYMBOL_NAME (charset_symbol)));
 704
 705   if (NILP (charset_id))
 706     {
 707       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 708       if (XINT (charset_id) == 0)
 709         error ("There's no room for a new private charset %s",
 710                SDATA (SYMBOL_NAME (charset_symbol)));
 711     }
 712
 713   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 714                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 715   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 716   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 717   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 718   Fupdate_coding_systems_internal ();
 719   return Qnil;
 720 }
 721
 722 DEFUN ("generic-character-list", Fgeneric_character_list,
 723        Sgeneric_character_list, 0, 0, 0,
 724        doc: /* Return a list of all possible generic characters.
 725 It includes a generic character for a charset not yet defined.  */)
 726      ()
 727 {
 728   return Vgeneric_character_list;
 729 }
 730
 731 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 732        Sget_unused_iso_final_char, 2, 2, 0,
 733        doc: /* Return an unused ISO's final char for a charset of DIMENSION and CHARS.
 734 DIMENSION is the number of bytes to represent a character: 1 or 2.
 735 CHARS is the number of characters in a dimension: 94 or 96.
 736
 737 This final char is for private use, thus the range is `0' (48) .. `?' (63).
 738 If there's no unused final char for the specified kind of charset,
 739 return nil.  */)
 740      (dimension, chars)
 741      Lisp_Object dimension, chars;
 742 {
 743   int final_char;
 744
 745   CHECK_NUMBER (dimension);
 746   CHECK_NUMBER (chars);
 747   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 748     error ("Invalid charset dimension %d, it should be 1 or 2",
 749            XINT (dimension));
 750   if (XINT (chars) != 94 && XINT (chars) != 96)
 751     error ("Invalid charset chars %d, it should be 94 or 96",
 752            XINT (chars));
 753   for (final_char = '0'; final_char <= '?'; final_char++)
 754     {
 755       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 756         break;
 757     }
 758   return (final_char <= '?' ? make_number (final_char) : Qnil);
 759 }
 760
 761 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 762        4, 4, 0,
 763        doc: /* Declare an equivalent charset for ISO-2022 decoding.
 764
 765 On decoding by an ISO-2022 base coding system, when a charset
 766 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
 767 if CHARSET is designated instead.  */)
 768      (dimension, chars, final_char, charset)
 769      Lisp_Object dimension, chars, final_char, charset;
 770 {
 771   int charset_id;
 772
 773   CHECK_NUMBER (dimension);
 774   CHECK_NUMBER (chars);
 775   CHECK_NUMBER (final_char);
 776   CHECK_SYMBOL (charset);
 777
 778   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 779     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 780   if (XINT (chars) != 94 && XINT (chars) != 96)
 781     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 782   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 783     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 784   if ((charset_id = get_charset_id (charset)) < 0)
 785     error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset)));
 786
 787   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset_id;
 788   return Qnil;
 789 }
 790
 791 /* Return information about charsets in the text at PTR of NBYTES
 792    bytes, which are NCHARS characters.  The value is:
 793
 794         0: Each character is represented by one byte.  This is always
 795            true for unibyte text.
 796         1: No charsets other than ascii eight-bit-control,
 797            eight-bit-graphic, and latin-1 are found.
 798         2: Otherwise.
 799
 800    In addition, if CHARSETS is nonzero, for each found charset N, set
 801    CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
 802    (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
 803    table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
 804    1 (note that there's no charset whose ID is 1).  */
 805
 806 int
 807 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
 808      const unsigned char *ptr;
 809      int nchars, nbytes, *charsets;
 810      Lisp_Object table;
 811 {
 812   if (nchars == nbytes)
 813     {
 814       if (charsets && nbytes > 0)
 815         {
 816           const unsigned char *endp = ptr + nbytes;
 817           int maskbits = 0;
 818
 819           while (ptr < endp && maskbits != 7)
 820             {
 821               maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
 822               ptr++;
 823             }
 824
 825           if (maskbits & 1)
 826             charsets[CHARSET_ASCII] = 1;
 827           if (maskbits & 2)
 828             charsets[CHARSET_8_BIT_CONTROL] = 1;
 829           if (maskbits & 4)
 830             charsets[CHARSET_8_BIT_GRAPHIC] = 1;
 831         }
 832       return 0;
 833     }
 834   else
 835     {
 836       int return_val = 1;
 837       int bytes, charset, c1, c2;
 838
 839       if (! CHAR_TABLE_P (table))
 840         table = Qnil;
 841
 842       while (nchars-- > 0)
 843         {
 844           SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
 845           ptr += bytes;
 846
 847           if (!CHARSET_DEFINED_P (charset))
 848             charset = 1;
 849           else if (! NILP (table))
 850             {
 851               int c = translate_char (table, -1, charset, c1, c2);
 852               if (c >= 0)
 853                 charset = CHAR_CHARSET (c);
 854             }
 855
 856           if (return_val == 1
 857               && charset != CHARSET_ASCII
 858               && charset != CHARSET_8_BIT_CONTROL
 859               && charset != CHARSET_8_BIT_GRAPHIC
 860               && charset != charset_latin_iso8859_1)
 861             return_val = 2;
 862
 863           if (charsets)
 864             charsets[charset] = 1;
 865           else if (return_val == 2)
 866             break;
 867         }
 868       return return_val;
 869     }
 870 }
 871
 872 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 873        2, 3, 0,
 874        doc: /* Return a list of charsets in the region between BEG and END.
 875 BEG and END are buffer positions.
 876 Optional arg TABLE if non-nil is a translation table to look up.
 877
 878 If the region contains invalid multibyte characters,
 879 `unknown' is included in the returned list.
 880
 881 If the current buffer is unibyte, the returned list may contain
 882 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 883      (beg, end, table)
 884      Lisp_Object beg, end, table;
 885 {
 886   int charsets[MAX_CHARSET + 1];
 887   int from, from_byte, to, stop, stop_byte, i;
 888   Lisp_Object val;
 889
 890   validate_region (&beg, &end);
 891   from = XFASTINT (beg);
 892   stop = to = XFASTINT (end);
 893
 894   if (from < GPT && GPT < to)
 895     {
 896       stop = GPT;
 897       stop_byte = GPT_BYTE;
 898     }
 899   else
 900     stop_byte = CHAR_TO_BYTE (stop);
 901
 902   from_byte = CHAR_TO_BYTE (from);
 903
 904   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 905   while (1)
 906     {
 907       find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
 908                             stop_byte - from_byte, charsets, table);
 909       if (stop < to)
 910         {
 911           from = stop, from_byte = stop_byte;
 912           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 913         }
 914       else
 915         break;
 916     }
 917
 918   val = Qnil;
 919   if (charsets[1])
 920     val = Fcons (Qunknown, val);
 921   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 922     if (charsets[i])
 923       val = Fcons (CHARSET_SYMBOL (i), val);
 924   if (charsets[0])
 925     val = Fcons (Qascii, val);
 926   return val;
 927 }
 928
 929 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 930        1, 2, 0,
 931        doc: /* Return a list of charsets in STR.
 932 Optional arg TABLE if non-nil is a translation table to look up.
 933
 934 If the string contains invalid multibyte characters,
 935 `unknown' is included in the returned list.
 936
 937 If STR is unibyte, the returned list may contain
 938 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 939      (str, table)
 940      Lisp_Object str, table;
 941 {
 942   int charsets[MAX_CHARSET + 1];
 943   int i;
 944   Lisp_Object val;
 945
 946   CHECK_STRING (str);
 947
 948   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 949   find_charset_in_text (SDATA (str), SCHARS (str),
 950                         SBYTES (str), charsets, table);
 951
 952   val = Qnil;
 953   if (charsets[1])
 954     val = Fcons (Qunknown, val);
 955   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 956     if (charsets[i])
 957       val = Fcons (CHARSET_SYMBOL (i), val);
 958   if (charsets[0])
 959     val = Fcons (Qascii, val);
 960   return val;
 961 }
 962
 963 \f
 964 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 965        doc: /* Return a character made from arguments.
 966 Internal use only.  */)
 967      (charset, code1, code2)
 968      Lisp_Object charset, code1, code2;
 969 {
 970   int charset_id, c1, c2;
 971
 972   CHECK_NUMBER (charset);
 973   charset_id = XINT (charset);
 974   if (!CHARSET_DEFINED_P (charset_id))
 975     error ("Invalid charset ID: %d", XINT (charset));
 976
 977   if (NILP (code1))
 978     c1 = 0;
 979   else
 980     {
 981       CHECK_NUMBER (code1);
 982       c1 = XINT (code1);
 983     }
 984   if (NILP (code2))
 985     c2 = 0;
 986   else
 987     {
 988       CHECK_NUMBER (code2);
 989       c2 = XINT (code2);
 990     }
 991
 992   if (charset_id == CHARSET_ASCII)
 993     {
 994       if (c1 < 0 || c1 > 0x7F)
 995         goto invalid_code_posints;
 996       return make_number (c1);
 997     }
 998   else if (charset_id == CHARSET_8_BIT_CONTROL)
 999     {
1000       if (NILP (code1))
1001         c1 = 0x80;
1002       else if (c1 < 0x80 || c1 > 0x9F)
1003         goto invalid_code_posints;
1004       return make_number (c1);
1005     }
1006   else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1007     {
1008       if (NILP (code1))
1009         c1 = 0xA0;
1010       else if (c1 < 0xA0 || c1 > 0xFF)
1011         goto invalid_code_posints;
1012       return make_number (c1);
1013     }
1014   else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1015     goto invalid_code_posints;
1016   c1 &= 0x7F;
1017   c2 &= 0x7F;
1018   if (c1 == 0
1019       ? c2 != 0
1020       : (c2 == 0
1021          ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1022          : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1023     goto invalid_code_posints;
1024   return make_number (MAKE_CHAR (charset_id, c1, c2));
1025
1026  invalid_code_posints:
1027   error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1028 }
1029
1030 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1031        doc: /* Return list of charset and one or two position-codes of CH.
1032 If CH is invalid as a character code,
1033 return a list of symbol `unknown' and CH.  */)
1034      (ch)
1035      Lisp_Object ch;
1036 {
1037   int c, charset, c1, c2;
1038
1039   CHECK_NUMBER (ch);
1040   c = XFASTINT (ch);
1041   if (!CHAR_VALID_P (c, 1))
1042     return Fcons (Qunknown, Fcons (ch, Qnil));
1043   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1044   return (c2 >= 0
1045           ? Fcons (CHARSET_SYMBOL (charset),
1046                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1047           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1048 }
1049
1050 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1051        doc: /* Return charset of CH.  */)
1052      (ch)
1053      Lisp_Object ch;
1054 {
1055   CHECK_NUMBER (ch);
1056
1057   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1058 }
1059
1060 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1061        doc: /* Return charset of a character in the current buffer at position POS.
1062 If POS is nil, it defauls to the current point.
1063 If POS is out of range, the value is nil.  */)
1064      (pos)
1065      Lisp_Object pos;
1066 {
1067   Lisp_Object ch;
1068   int charset;
1069
1070   ch = Fchar_after (pos);
1071   if (! INTEGERP (ch))
1072     return ch;
1073   charset = CHAR_CHARSET (XINT (ch));
1074   return CHARSET_SYMBOL (charset);
1075 }
1076
1077 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1078        doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1079
1080 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1081 by their DIMENSION, CHARS, and FINAL-CHAR,
1082 where as Emacs distinguishes them by charset symbol.
1083 See the documentation of the function `charset-info' for the meanings of
1084 DIMENSION, CHARS, and FINAL-CHAR.  */)
1085      (dimension, chars, final_char)
1086      Lisp_Object dimension, chars, final_char;
1087 {
1088   int charset;
1089
1090   CHECK_NUMBER (dimension);
1091   CHECK_NUMBER (chars);
1092   CHECK_NUMBER (final_char);
1093
1094   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1095     return Qnil;
1096   return CHARSET_SYMBOL (charset);
1097 }
1098
1099 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1100    generic character.  If GENERICP is zero, return nonzero iff C is a
1101    valid normal character.  Do not call this function directly,
1102    instead use macro CHAR_VALID_P.  */
1103 int
1104 char_valid_p (c, genericp)
1105      int c, genericp;
1106 {
1107   int charset, c1, c2;
1108
1109   if (c < 0 || c >= MAX_CHAR)
1110     return 0;
1111   if (SINGLE_BYTE_CHAR_P (c))
1112     return 1;
1113   SPLIT_CHAR (c, charset, c1, c2);
1114   if (genericp)
1115     {
1116       if (c1)
1117         {
1118           if (c2 <= 0) c2 = 0x20;
1119         }
1120       else
1121         {
1122           if (c2 <= 0) c1 = c2 = 0x20;
1123         }
1124     }
1125   return (CHARSET_DEFINED_P (charset)
1126           && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1127 }
1128
1129 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1130        doc: /* Return t if OBJECT is a valid normal character.
1131 If optional arg GENERICP is non-nil, also return t if OBJECT is
1132 a valid generic character.  */)
1133      (object, genericp)
1134      Lisp_Object object, genericp;
1135 {
1136   if (! NATNUMP (object))
1137     return Qnil;
1138   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1139 }
1140
1141 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1142        Sunibyte_char_to_multibyte, 1, 1, 0,
1143        doc: /* Convert the unibyte character CH to multibyte character.
1144 The conversion is done based on `nonascii-translation-table' (which see)
1145  or `nonascii-insert-offset' (which see).  */)
1146      (ch)
1147      Lisp_Object ch;
1148 {
1149   int c;
1150
1151   CHECK_NUMBER (ch);
1152   c = XINT (ch);
1153   if (c < 0 || c >= 0400)
1154     error ("Invalid unibyte character: %d", c);
1155   c = unibyte_char_to_multibyte (c);
1156   if (c < 0)
1157     error ("Can't convert to multibyte character: %d", XINT (ch));
1158   return make_number (c);
1159 }
1160
1161 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1162        Smultibyte_char_to_unibyte, 1, 1, 0,
1163        doc: /* Convert the multibyte character CH to unibyte character.
1164 The conversion is done based on `nonascii-translation-table' (which see)
1165  or `nonascii-insert-offset' (which see).  */)
1166      (ch)
1167      Lisp_Object ch;
1168 {
1169   int c;
1170
1171   CHECK_NUMBER (ch);
1172   c = XINT (ch);
1173   if (! CHAR_VALID_P (c, 0))
1174     error ("Invalid multibyte character: %d", c);
1175   c = multibyte_char_to_unibyte (c, Qnil);
1176   if (c < 0)
1177     error ("Can't convert to unibyte character: %d", XINT (ch));
1178   return make_number (c);
1179 }
1180
1181 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1182        doc: /* Return 1 regardless of the argument CH.  */)
1183      (ch)
1184      Lisp_Object ch;
1185 {
1186   CHECK_NUMBER (ch);
1187   return make_number (1);
1188 }
1189
1190 /* Return how many bytes C will occupy in a multibyte buffer.
1191    Don't call this function directly, instead use macro CHAR_BYTES.  */
1192 int
1193 char_bytes (c)
1194      int c;
1195 {
1196   int charset;
1197
1198   if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1199     return 1;
1200   if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1201     return 1;
1202
1203   charset = CHAR_CHARSET (c);
1204   return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1205 }
1206
1207 /* Return the width of character of which multi-byte form starts with
1208    C.  The width is measured by how many columns occupied on the
1209    screen when displayed in the current buffer.  */
1210
1211 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
1212   (c < 0x20                                                             \
1213    ? (c == '\t'                                                         \
1214       ? XFASTINT (current_buffer->tab_width)                            \
1215       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
1216    : (c < 0x7f                                                          \
1217       ? 1                                                               \
1218       : (c == 0x7F                                                      \
1219          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
1220          : ((! NILP (current_buffer->enable_multibyte_characters)       \
1221              && BASE_LEADING_CODE_P (c))                                \
1222             ? WIDTH_BY_CHAR_HEAD (c)                                    \
1223             : 4))))
1224
1225 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1226        doc: /* Return width of CH when displayed in the current buffer.
1227 The width is measured by how many columns it occupies on the screen.
1228 Tab is taken to occupy `tab-width' columns.  */)
1229      (ch)
1230      Lisp_Object ch;
1231 {
1232   Lisp_Object val, disp;
1233   int c;
1234   struct Lisp_Char_Table *dp = buffer_display_table ();
1235
1236   CHECK_NUMBER (ch);
1237
1238   c = XINT (ch);
1239
1240   /* Get the way the display table would display it.  */
1241   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1242
1243   if (VECTORP (disp))
1244     XSETINT (val, XVECTOR (disp)->size);
1245   else if (SINGLE_BYTE_CHAR_P (c))
1246     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1247   else
1248     {
1249       int charset = CHAR_CHARSET (c);
1250
1251       XSETFASTINT (val, CHARSET_WIDTH (charset));
1252     }
1253   return val;
1254 }
1255
1256 /* Return width of string STR of length LEN when displayed in the
1257    current buffer.  The width is measured by how many columns it
1258    occupies on the screen.  */
1259
1260 int
1261 strwidth (str, len)
1262      unsigned char *str;
1263      int len;
1264 {
1265   return c_string_width (str, len, -1, NULL, NULL);
1266 }
1267
1268 /* Return width of string STR of length LEN when displayed in the
1269    current buffer.  The width is measured by how many columns it
1270    occupies on the screen.  If PRECISION > 0, return the width of
1271    longest substring that doesn't exceed PRECISION, and set number of
1272    characters and bytes of the substring in *NCHARS and *NBYTES
1273    respectively.  */
1274
1275 int
1276 c_string_width (str, len, precision, nchars, nbytes)
1277      const unsigned char *str;
1278      int len, precision, *nchars, *nbytes;
1279 {
1280   int i = 0, i_byte = 0;
1281   int width = 0;
1282   int chars;
1283   struct Lisp_Char_Table *dp = buffer_display_table ();
1284
1285   while (i_byte < len)
1286     {
1287       int bytes, thiswidth;
1288       Lisp_Object val;
1289
1290       if (dp)
1291         {
1292           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1293
1294           chars = 1;
1295           val = DISP_CHAR_VECTOR (dp, c);
1296           if (VECTORP (val))
1297             thiswidth = XVECTOR (val)->size;
1298           else
1299             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1300         }
1301       else
1302         {
1303           chars = 1;
1304           PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1305           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1306         }
1307
1308       if (precision > 0
1309           && (width + thiswidth > precision))
1310         {
1311           *nchars = i;
1312           *nbytes = i_byte;
1313           return width;
1314         }
1315       i++;
1316       i_byte += bytes;
1317       width += thiswidth;
1318   }
1319
1320   if (precision > 0)
1321     {
1322       *nchars = i;
1323       *nbytes = i_byte;
1324     }
1325
1326   return width;
1327 }
1328
1329 /* Return width of Lisp string STRING when displayed in the current
1330    buffer.  The width is measured by how many columns it occupies on
1331    the screen while paying attention to compositions.  If PRECISION >
1332    0, return the width of longest substring that doesn't exceed
1333    PRECISION, and set number of characters and bytes of the substring
1334    in *NCHARS and *NBYTES respectively.  */
1335
1336 int
1337 lisp_string_width (string, precision, nchars, nbytes)
1338      Lisp_Object string;
1339      int precision, *nchars, *nbytes;
1340 {
1341   int len = SCHARS (string);
1342   int len_byte = SBYTES (string);
1343   const unsigned char *str = SDATA (string);
1344   int i = 0, i_byte = 0;
1345   int width = 0;
1346   struct Lisp_Char_Table *dp = buffer_display_table ();
1347
1348   while (i < len)
1349     {
1350       int chars, bytes, thiswidth;
1351       Lisp_Object val;
1352       int cmp_id;
1353       int ignore, end;
1354
1355       if (find_composition (i, -1, &ignore, &end, &val, string)
1356           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1357               >= 0))
1358         {
1359           thiswidth = composition_table[cmp_id]->width;
1360           chars = end - i;
1361           bytes = string_char_to_byte (string, end) - i_byte;
1362         }
1363       else if (dp)
1364         {
1365           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1366
1367           chars = 1;
1368           val = DISP_CHAR_VECTOR (dp, c);
1369           if (VECTORP (val))
1370             thiswidth = XVECTOR (val)->size;
1371           else
1372             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1373         }
1374       else
1375         {
1376           chars = 1;
1377           PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1378           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1379         }
1380
1381       if (precision > 0
1382           && (width + thiswidth > precision))
1383         {
1384           *nchars = i;
1385           *nbytes = i_byte;
1386           return width;
1387         }
1388       i += chars;
1389       i_byte += bytes;
1390       width += thiswidth;
1391   }
1392
1393   if (precision > 0)
1394     {
1395       *nchars = i;
1396       *nbytes = i_byte;
1397     }
1398
1399   return width;
1400 }
1401
1402 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1403        doc: /* Return width of STRING when displayed in the current buffer.
1404 Width is measured by how many columns it occupies on the screen.
1405 When calculating width of a multibyte character in STRING,
1406 only the base leading-code is considered; the validity of
1407 the following bytes is not checked.  Tabs in STRING are always
1408 taken to occupy `tab-width' columns.  */)
1409      (string)
1410      Lisp_Object string;
1411 {
1412   Lisp_Object val;
1413
1414   CHECK_STRING (string);
1415   XSETFASTINT (val, lisp_string_width (string, -1, NULL, NULL));
1416   return val;
1417 }
1418
1419 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1420        doc: /* Return the direction of CH.
1421 The returned value is 0 for left-to-right and 1 for right-to-left.  */)
1422      (ch)
1423      Lisp_Object ch;
1424 {
1425   int charset;
1426
1427   CHECK_NUMBER (ch);
1428   charset = CHAR_CHARSET (XFASTINT (ch));
1429   if (!CHARSET_DEFINED_P (charset))
1430     invalid_character (XINT (ch));
1431   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1432 }
1433
1434 /* Return the number of characters in the NBYTES bytes at PTR.
1435    This works by looking at the contents and checking for multibyte sequences.
1436    However, if the current buffer has enable-multibyte-characters = nil,
1437    we treat each byte as a character.  */
1438
1439 int
1440 chars_in_text (ptr, nbytes)
1441      const unsigned char *ptr;
1442      int nbytes;
1443 {
1444   /* current_buffer is null at early stages of Emacs initialization.  */
1445   if (current_buffer == 0
1446       || NILP (current_buffer->enable_multibyte_characters))
1447     return nbytes;
1448
1449   return multibyte_chars_in_text (ptr, nbytes);
1450 }
1451
1452 /* Return the number of characters in the NBYTES bytes at PTR.
1453    This works by looking at the contents and checking for multibyte sequences.
1454    It ignores enable-multibyte-characters.  */
1455
1456 int
1457 multibyte_chars_in_text (ptr, nbytes)
1458      const unsigned char *ptr;
1459      int nbytes;
1460 {
1461   const unsigned char *endp;
1462   int chars, bytes;
1463
1464   endp = ptr + nbytes;
1465   chars = 0;
1466
1467   while (ptr < endp)
1468     {
1469       PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1470       ptr += bytes;
1471       chars++;
1472     }
1473
1474   return chars;
1475 }
1476
1477 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1478    count the numbers of characters and bytes in it.  On counting
1479    bytes, pay attention to the fact that 8-bit characters in the range
1480    0x80..0x9F are represented by 2 bytes in multibyte text.  */
1481 void
1482 parse_str_as_multibyte (str, len, nchars, nbytes)
1483      const unsigned char *str;
1484      int len, *nchars, *nbytes;
1485 {
1486   const unsigned char *endp = str + len;
1487   int n, chars = 0, bytes = 0;
1488
1489   while (str < endp)
1490     {
1491       if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1492         str += n, bytes += n;
1493       else
1494         str++, bytes += 2;
1495       chars++;
1496     }
1497   *nchars = chars;
1498   *nbytes = bytes;
1499   return;
1500 }
1501
1502 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1503    It actually converts only 8-bit characters in the range 0x80..0x9F
1504    that don't contruct multibyte characters to multibyte forms.  If
1505    NCHARS is nonzero, set *NCHARS to the number of characters in the
1506    text.  It is assured that we can use LEN bytes at STR as a work
1507    area and that is enough.  Return the number of bytes of the
1508    resulting text.  */
1509
1510 int
1511 str_as_multibyte (str, len, nbytes, nchars)
1512      unsigned char *str;
1513      int len, nbytes, *nchars;
1514 {
1515   unsigned char *p = str, *endp = str + nbytes;
1516   unsigned char *to;
1517   int chars = 0;
1518   int n;
1519
1520   while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1521     p += n, chars++;
1522   if (nchars)
1523     *nchars = chars;
1524   if (p == endp)
1525     return nbytes;
1526
1527   to = p;
1528   nbytes = endp - p;
1529   endp = str + len;
1530   safe_bcopy (p, endp - nbytes, nbytes);
1531   p = endp - nbytes;
1532   while (p < endp)
1533     {
1534       if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1535         {
1536           while (n--)
1537             *to++ = *p++;
1538         }
1539       else
1540         {
1541           *to++ = LEADING_CODE_8_BIT_CONTROL;
1542           *to++ = *p++ + 0x20;
1543         }
1544       chars++;
1545     }
1546   if (nchars)
1547     *nchars = chars;
1548   return (to - str);
1549 }
1550
1551 /* Parse unibyte string at STR of LEN bytes, and return the number of
1552    bytes it may ocupy when converted to multibyte string by
1553    `str_to_multibyte'.  */
1554
1555 int
1556 parse_str_to_multibyte (str, len)
1557      unsigned char *str;
1558      int len;
1559 {
1560   unsigned char *endp = str + len;
1561   int bytes;
1562
1563   for (bytes = 0; str < endp; str++)
1564     bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1565   return bytes;
1566 }
1567
1568 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1569    that contains the same single-byte characters.  It actually
1570    converts all 8-bit characters to multibyte forms.  It is assured
1571    that we can use LEN bytes at STR as a work area and that is
1572    enough.  */
1573
1574 int
1575 str_to_multibyte (str, len, bytes)
1576      unsigned char *str;
1577      int len, bytes;
1578 {
1579   unsigned char *p = str, *endp = str + bytes;
1580   unsigned char *to;
1581
1582   while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1583   if (p == endp)
1584     return bytes;
1585   to = p;
1586   bytes = endp - p;
1587   endp = str + len;
1588   safe_bcopy (p, endp - bytes, bytes);
1589   p = endp - bytes;
1590   while (p < endp)
1591     {
1592       if (*p < 0x80 || *p >= 0xA0)
1593         *to++ = *p++;
1594       else
1595         *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1596     }
1597   return (to - str);
1598 }
1599
1600 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1601    actually converts only 8-bit characters in the range 0x80..0x9F to
1602    unibyte forms.  */
1603
1604 int
1605 str_as_unibyte (str, bytes)
1606      unsigned char *str;
1607      int bytes;
1608 {
1609   unsigned char *p = str, *endp = str + bytes;
1610   unsigned char *to = str;
1611
1612   while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1613   to = p;
1614   while (p < endp)
1615     {
1616       if (*p == LEADING_CODE_8_BIT_CONTROL)
1617         *to++ = *(p + 1) - 0x20, p += 2;
1618       else
1619         *to++ = *p++;
1620     }
1621   return (to - str);
1622 }
1623
1624 \f
1625 DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
1626   doc: /* Concatenate all the argument characters and make the result a string.
1627 usage: (string &rest CHARACTERS)  */)
1628      (n, args)
1629      int n;
1630      Lisp_Object *args;
1631 {
1632   int i, bufsize;
1633   unsigned char *buf, *p;
1634   int c;
1635   int multibyte = 0;
1636   Lisp_Object ret;
1637   USE_SAFE_ALLOCA;
1638
1639   bufsize = MAX_MULTIBYTE_LENGTH * n;
1640   SAFE_ALLOCA (buf, unsigned char *, bufsize);
1641   p = buf;
1642
1643   for (i = 0; i < n; i++)
1644     {
1645       CHECK_NUMBER (args[i]);
1646       if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1647         multibyte = 1;
1648     }
1649
1650   for (i = 0; i < n; i++)
1651     {
1652       c = XINT (args[i]);
1653       if (multibyte)
1654         p += CHAR_STRING (c, p);
1655       else
1656         *p++ = c;
1657     }
1658
1659   ret = make_string_from_bytes (buf, n, p - buf);
1660   SAFE_FREE ();
1661
1662   return ret;
1663 }
1664
1665 #endif /* emacs */
1666 \f
1667 int
1668 charset_id_internal (charset_name)
1669      char *charset_name;
1670 {
1671   Lisp_Object val;
1672
1673   val= Fget (intern (charset_name), Qcharset);
1674   if (!VECTORP (val))
1675     error ("Charset %s is not defined", charset_name);
1676
1677   return (XINT (XVECTOR (val)->contents[0]));
1678 }
1679
1680 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1681        Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only.  */)
1682      ()
1683 {
1684   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1685   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1686   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1687   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1688   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1689   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1690   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1691   return Qnil;
1692 }
1693
1694 void
1695 init_charset_once ()
1696 {
1697   int i, j, k;
1698
1699   staticpro (&Vcharset_table);
1700   staticpro (&Vcharset_symbol_table);
1701   staticpro (&Vgeneric_character_list);
1702
1703   /* This has to be done here, before we call Fmake_char_table.  */
1704   Qcharset_table = intern ("charset-table");
1705   staticpro (&Qcharset_table);
1706
1707   /* Intern this now in case it isn't already done.
1708      Setting this variable twice is harmless.
1709      But don't staticpro it here--that is done in alloc.c.  */
1710   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1711
1712   /* Now we are ready to set up this property, so we can
1713      create the charset table.  */
1714   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1715   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1716
1717   Qunknown = intern ("unknown");
1718   staticpro (&Qunknown);
1719   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1720                                         Qunknown);
1721
1722   /* Setup tables.  */
1723   for (i = 0; i < 2; i++)
1724     for (j = 0; j < 2; j++)
1725       for (k = 0; k < 128; k++)
1726         iso_charset_table [i][j][k] = -1;
1727
1728   for (i = 0; i < 256; i++)
1729     bytes_by_char_head[i] = 1;
1730   bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1731   bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1732   bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1733   bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1734
1735   for (i = 0; i < 128; i++)
1736     width_by_char_head[i] = 1;
1737   for (; i < 256; i++)
1738     width_by_char_head[i] = 4;
1739   width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1740   width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1741   width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1742   width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1743
1744   {
1745     Lisp_Object val;
1746
1747     val = Qnil;
1748     for (i = 0x81; i < 0x90; i++)
1749       val = Fcons (make_number ((i - 0x70) << 7), val);
1750     for (; i < 0x9A; i++)
1751       val = Fcons (make_number ((i - 0x8F) << 14), val);
1752     for (i = 0xA0; i < 0xF0; i++)
1753       val = Fcons (make_number ((i - 0x70) << 7), val);
1754     for (; i < 0xFF; i++)
1755       val = Fcons (make_number ((i - 0xE0) << 14), val);
1756     Vgeneric_character_list = Fnreverse (val);
1757   }
1758
1759   nonascii_insert_offset = 0;
1760   Vnonascii_translation_table = Qnil;
1761 }
1762
1763 #ifdef emacs
1764
1765 void
1766 syms_of_charset ()
1767 {
1768   Qcharset = intern ("charset");
1769   staticpro (&Qcharset);
1770
1771   Qascii = intern ("ascii");
1772   staticpro (&Qascii);
1773
1774   Qeight_bit_control = intern ("eight-bit-control");
1775   staticpro (&Qeight_bit_control);
1776
1777   Qeight_bit_graphic = intern ("eight-bit-graphic");
1778   staticpro (&Qeight_bit_graphic);
1779
1780   /* Define special charsets ascii, eight-bit-control, and
1781      eight-bit-graphic.  */
1782   update_charset_table (make_number (CHARSET_ASCII),
1783                         make_number (1), make_number (94),
1784                         make_number (1),
1785                         make_number (0),
1786                         make_number ('B'),
1787                         make_number (0),
1788                         build_string ("ASCII"),
1789                         Qnil,   /* same as above */
1790                         build_string ("ASCII (ISO646 IRV)"));
1791   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1792   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1793
1794   update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1795                         make_number (1), make_number (96),
1796                         make_number (4),
1797                         make_number (0),
1798                         make_number (-1),
1799                         make_number (-1),
1800                         build_string ("8-bit control code (0x80..0x9F)"),
1801                         Qnil,   /* same as above */
1802                         Qnil);  /* same as above */
1803   CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1804   Fput (Qeight_bit_control, Qcharset,
1805         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1806
1807   update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1808                         make_number (1), make_number (96),
1809                         make_number (4),
1810                         make_number (0),
1811                         make_number (-1),
1812                         make_number (-1),
1813                         build_string ("8-bit graphic char (0xA0..0xFF)"),
1814                         Qnil,   /* same as above */
1815                         Qnil);  /* same as above */
1816   CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1817   Fput (Qeight_bit_graphic, Qcharset,
1818         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1819
1820   Qauto_fill_chars = intern ("auto-fill-chars");
1821   staticpro (&Qauto_fill_chars);
1822   Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1823
1824   defsubr (&Sdefine_charset);
1825   defsubr (&Sgeneric_character_list);
1826   defsubr (&Sget_unused_iso_final_char);
1827   defsubr (&Sdeclare_equiv_charset);
1828   defsubr (&Sfind_charset_region);
1829   defsubr (&Sfind_charset_string);
1830   defsubr (&Smake_char_internal);
1831   defsubr (&Ssplit_char);
1832   defsubr (&Schar_charset);
1833   defsubr (&Scharset_after);
1834   defsubr (&Siso_charset);
1835   defsubr (&Schar_valid_p);
1836   defsubr (&Sunibyte_char_to_multibyte);
1837   defsubr (&Smultibyte_char_to_unibyte);
1838   defsubr (&Schar_bytes);
1839   defsubr (&Schar_width);
1840   defsubr (&Sstring_width);
1841   defsubr (&Schar_direction);
1842   defsubr (&Sstring);
1843   defsubr (&Ssetup_special_charsets);
1844
1845   DEFVAR_LISP ("charset-list", &Vcharset_list,
1846                doc: /* List of charsets ever defined.  */);
1847   Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1848                                         Fcons (Qeight_bit_graphic, Qnil)));
1849
1850   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1851                doc: /* Vector of cons cell of a symbol and translation table ever defined.
1852 An ID of a translation table is an index of this vector.  */);
1853   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1854
1855   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1856               doc: /* Leading-code of private TYPE9N charset of column-width 1.  */);
1857   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1858
1859   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1860               doc: /* Leading-code of private TYPE9N charset of column-width 2.  */);
1861   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1862
1863   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1864               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1.  */);
1865   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1866
1867   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1868               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2.  */);
1869   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1870
1871   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1872               doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1873 This is used for converting unibyte text to multibyte,
1874 and for inserting character codes specified by number.
1875
1876 This serves to convert a Latin-1 or similar 8-bit character code
1877 to the corresponding Emacs multibyte character code.
1878 Typically the value should be (- (make-char CHARSET 0) 128),
1879 for your choice of character set.
1880 If `nonascii-translation-table' is non-nil, it overrides this variable.  */);
1881   nonascii_insert_offset = 0;
1882
1883   DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1884                doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1885 This is used for converting unibyte text to multibyte,
1886 and for inserting character codes specified by number.
1887
1888 Conversion is performed only when multibyte characters are enabled,
1889 and it serves to convert a Latin-1 or similar 8-bit character code
1890 to the corresponding Emacs character code.
1891
1892 If this is nil, `nonascii-insert-offset' is used instead.
1893 See also the docstring of `make-translation-table'.  */);
1894   Vnonascii_translation_table = Qnil;
1895
1896   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1897                doc: /* A char-table for characters which invoke auto-filling.
1898 Such characters have value t in this table.  */);
1899   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1900   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1901   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1902 }
1903
1904 #endif /* emacs */
1905
1906 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
1907    (do not change this comment) */