src/character.c

   1 /* Basic character support.
   2    Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001 Free Software Foundation, Inc.
   5    Copyright (C) 2001, 2002
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H13PRO009
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 2, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 Boston, MA 02111-1307, USA.  */
  25
  26 /* At first, see the document in `character.h' to understand the code
  27    in this file.  */
  28
  29 #ifdef emacs
  30 #include <config.h>
  31 #endif
  32
  33 #include <stdio.h>
  34
  35 #ifdef emacs
  36
  37 #include <sys/types.h>
  38 #include "lisp.h"
  39 #include "character.h"
  40 #include "buffer.h"
  41 #include "charset.h"
  42 #include "composite.h"
  43 #include "disptab.h"
  44
  45 #else  /* not emacs */
  46
  47 #include "mulelib.h"
  48
  49 #endif /* emacs */
  50
  51 Lisp_Object Qcharacterp;
  52
  53 /* Vector of translation table ever defined.
  54    ID of a translation table is used to index this vector.  */
  55 Lisp_Object Vtranslation_table_vector;
  56
  57 /* A char-table for characters which may invoke auto-filling.  */
  58 Lisp_Object Vauto_fill_chars;
  59
  60 Lisp_Object Qauto_fill_chars;
  61
  62 Lisp_Object Vchar_unify_table;
  63
  64 /* A char-table.  An element is non-nil iff the corresponding
  65    character has a printable glyph.  */
  66 Lisp_Object Vprintable_chars;
  67
  68 /* A char-table.  An elemnent is a column-width of the corresponding
  69    character.  */
  70 Lisp_Object Vchar_width_table;
  71
  72 /* A char-table.  An element is a symbol indicating the direction
  73    property of corresponding character.  */
  74 Lisp_Object Vchar_direction_table;
  75
  76 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
  77 unsigned char *_fetch_multibyte_char_p;
  78 int _fetch_multibyte_char_len;
  79
  80 \f
  81
  82 int
  83 char_string_with_unification (c, p)
  84      int c;
  85      unsigned char *p;
  86 {
  87   int bytes;
  88
  89   MAYBE_UNIFY_CHAR (c);
  90
  91   if (c <= MAX_3_BYTE_CHAR || c > MAX_5_BYTE_CHAR)
  92     {
  93       bytes = CHAR_STRING (c, p);
  94     }
  95   else if (c <= MAX_4_BYTE_CHAR)
  96     {
  97       p[0] = (0xF0 | (c >> 18));
  98       p[1] = (0x80 | ((c >> 12) & 0x3F));
  99       p[2] = (0x80 | ((c >> 6) & 0x3F));
 100       p[3] = (0x80 | (c & 0x3F));
 101       bytes = 4;
 102     }
 103   else
 104     {
 105       p[0] = 0xF8;
 106       p[1] = (0x80 | ((c >> 18) & 0x0F));
 107       p[2] = (0x80 | ((c >> 12) & 0x3F));
 108       p[3] = (0x80 | ((c >> 6) & 0x3F));
 109       p[4] = (0x80 | (c & 0x3F));
 110       bytes = 5;
 111     }
 112
 113   return bytes;
 114 }
 115
 116
 117 int
 118 string_char_with_unification (p, advanced, len)
 119      unsigned char *p, **advanced;
 120      int *len;
 121 {
 122   int c;
 123   unsigned char *saved_p = p;
 124
 125   if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
 126     {
 127       c = STRING_CHAR_ADVANCE (p);
 128     }
 129   else if (! (*p & 0x08))
 130     {
 131       c = ((((p)[0] & 0xF) << 18)
 132            | (((p)[1] & 0x3F) << 12)
 133            | (((p)[2] & 0x3F) << 6)
 134            | ((p)[3] & 0x3F));
 135       p += 4;
 136     }
 137   else
 138     {
 139       c = ((((p)[1] & 0x3F) << 18)
 140            | (((p)[2] & 0x3F) << 12)
 141            | (((p)[3] & 0x3F) << 6)
 142            | ((p)[4] & 0x3F));
 143       p += 5;
 144     }
 145
 146   MAYBE_UNIFY_CHAR (c);
 147
 148   if (len)
 149     *len = p - saved_p;
 150   if (advanced)
 151     *advanced = p;
 152   return c;
 153 }
 154
 155
 156 /* Translate character C by translation table TABLE.  If C is
 157    negative, translate a character specified by CHARSET and CODE.  If
 158    no translation is found in TABLE, return the untranslated
 159    character.  */
 160
 161 int
 162 translate_char (table, c)
 163      Lisp_Object table;
 164      int c;
 165 {
 166   Lisp_Object ch;
 167
 168   if (! CHAR_TABLE_P (table))
 169     return c;
 170   ch = CHAR_TABLE_REF (table, c);
 171   if (! CHARACTERP (ch))
 172     return c;
 173   return XINT (ch);
 174 }
 175
 176 /* Convert the unibyte character C to the corresponding multibyte
 177    character based on the current value of charset_primary.  If C
 178    can't be converted, return C.  */
 179
 180 int
 181 unibyte_char_to_multibyte (c)
 182      int c;
 183 {
 184   struct charset *charset = CHARSET_FROM_ID (charset_primary);
 185   int c1 = DECODE_CHAR (charset, c);
 186
 187   return ((c1 >= 0) ? c1 : c);
 188 }
 189
 190
 191 /* Convert the multibyte character C to unibyte 8-bit character based
 192    on the current value of charset_primary.  If dimension of
 193    charset_primary is more than one, return (C & 0xFF).
 194
 195    The argument REV_TBL is now ignored.  It will be removed in the
 196    future.  */
 197
 198 int
 199 multibyte_char_to_unibyte (c, rev_tbl)
 200      int c;
 201      Lisp_Object rev_tbl;
 202 {
 203   struct charset *charset = CHARSET_FROM_ID (charset_primary);
 204   unsigned c1 = ENCODE_CHAR (charset, c);
 205
 206   return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
 207 }
 208
 209
 210 DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
 211        doc: /* Return non-nil if OBJECT is a character.  */)
 212      (object, ignore)
 213      Lisp_Object object, ignore;
 214 {
 215   return (CHARACTERP (object) ? Qt : Qnil);
 216 }
 217
 218 DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0,
 219        doc: /* Return the character of the maximum code.  */)
 220      ()
 221 {
 222   return make_number (MAX_CHAR);
 223 }
 224
 225 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
 226        Sunibyte_char_to_multibyte, 1, 1, 0,
 227        doc: /* Convert the unibyte character CH to multibyte character.
 228 The multibyte character is a result of decoding CH by
 229 the current primary charset (value of `charset-primary').  */)
 230      (ch)
 231      Lisp_Object ch;
 232 {
 233   int c;
 234   struct charset *charset;
 235
 236   CHECK_CHARACTER (ch);
 237   c = XFASTINT (ch);
 238   if (c >= 0400)
 239     error ("Invalid unibyte character: %d", c);
 240   charset = CHARSET_FROM_ID (charset_primary);
 241   c = DECODE_CHAR (charset, c);
 242   if (c < 0)
 243     error ("Can't convert to multibyte character: %d", XINT (ch));
 244   return make_number (c);
 245 }
 246
 247 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
 248        Smultibyte_char_to_unibyte, 1, 1, 0,
 249        doc: /* Convert the multibyte character CH to unibyte character.\n\
 250 The unibyte character is a result of encoding CH by
 251 the current primary charset (value of `charset-primary').  */)
 252      (ch)
 253      Lisp_Object ch;
 254 {
 255   int c;
 256   unsigned code;
 257   struct charset *charset;
 258
 259   CHECK_CHARACTER (ch);
 260   c = XFASTINT (ch);
 261   charset = CHARSET_FROM_ID (charset_primary);
 262   code = ENCODE_CHAR (charset, c);
 263   if (code < CHARSET_MIN_CODE (charset)
 264       || code > CHARSET_MAX_CODE (charset))
 265     error ("Can't convert to unibyte character: %d", XINT (ch));
 266   return make_number (code);
 267 }
 268
 269 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
 270        doc: /* Return 1 regardless of the argument CHAR.
 271 This is now an obsolete function.  We keep it just for backward compatibility.   */)
 272      (ch)
 273      Lisp_Object ch;
 274 {
 275   CHECK_CHARACTER (ch);
 276   return make_number (1);
 277 }
 278
 279 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
 280        doc: /* Return width of CHAR when displayed in the current buffer.
 281 The width is measured by how many columns it occupies on the screen.
 282 Tab is taken to occupy `tab-width' columns.  */)
 283      (ch)
 284        Lisp_Object ch;
 285 {
 286   Lisp_Object disp;
 287   int c, width;
 288   struct Lisp_Char_Table *dp = buffer_display_table ();
 289
 290   CHECK_CHARACTER (ch);
 291   c = XINT (ch);
 292
 293   /* Get the way the display table would display it.  */
 294   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
 295
 296   if (VECTORP (disp))
 297     width = ASIZE (disp);
 298   else
 299     width = CHAR_WIDTH (c);
 300
 301   return make_number (width);
 302 }
 303
 304 /* Return width of string STR of length LEN when displayed in the
 305    current buffer.  The width is measured by how many columns it
 306    occupies on the screen.  If PRECISION > 0, return the width of
 307    longest substring that doesn't exceed PRECISION, and set number of
 308    characters and bytes of the substring in *NCHARS and *NBYTES
 309    respectively.  */
 310
 311 int
 312 c_string_width (str, len, precision, nchars, nbytes)
 313      unsigned char *str;
 314      int precision, *nchars, *nbytes;
 315 {
 316   int i = 0, i_byte = 0;
 317   int width = 0;
 318   struct Lisp_Char_Table *dp = buffer_display_table ();
 319
 320   while (i_byte < len)
 321     {
 322       int bytes, thiswidth;
 323       Lisp_Object val;
 324       int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
 325
 326       if (dp)
 327         {
 328           val = DISP_CHAR_VECTOR (dp, c);
 329           if (VECTORP (val))
 330             thiswidth = XVECTOR (val)->size;
 331           else
 332             thiswidth = CHAR_WIDTH (c);
 333         }
 334       else
 335         {
 336           thiswidth = CHAR_WIDTH (c);
 337         }
 338
 339       if (precision > 0
 340           && (width + thiswidth > precision))
 341         {
 342           *nchars = i;
 343           *nbytes = i_byte;
 344           return width;
 345         }
 346       i++;
 347       i_byte += bytes;
 348       width += thiswidth;
 349   }
 350
 351   if (precision > 0)
 352     {
 353       *nchars = i;
 354       *nbytes = i_byte;
 355     }
 356
 357   return width;
 358 }
 359
 360 /* Return width of string STR of length LEN when displayed in the
 361    current buffer.  The width is measured by how many columns it
 362    occupies on the screen.  */
 363
 364 int
 365 strwidth (str, len)
 366      unsigned char *str;
 367      int len;
 368 {
 369   return c_string_width (str, len, -1, NULL, NULL);
 370 }
 371
 372 /* Return width of Lisp string STRING when displayed in the current
 373    buffer.  The width is measured by how many columns it occupies on
 374    the screen while paying attention to compositions.  If PRECISION >
 375    0, return the width of longest substring that doesn't exceed
 376    PRECISION, and set number of characters and bytes of the substring
 377    in *NCHARS and *NBYTES respectively.  */
 378
 379 int
 380 lisp_string_width (string, precision, nchars, nbytes)
 381      Lisp_Object string;
 382      int precision, *nchars, *nbytes;
 383 {
 384   int len = XSTRING (string)->size;
 385   unsigned char *str = XSTRING (string)->data;
 386   int i = 0, i_byte = 0;
 387   int width = 0;
 388   struct Lisp_Char_Table *dp = buffer_display_table ();
 389
 390   while (i < len)
 391     {
 392       int chars, bytes, thiswidth;
 393       Lisp_Object val;
 394       int cmp_id;
 395       int ignore, end;
 396
 397       if (find_composition (i, -1, &ignore, &end, &val, string)
 398           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
 399               >= 0))
 400         {
 401           thiswidth = composition_table[cmp_id]->width;
 402           chars = end - i;
 403           bytes = string_char_to_byte (string, end) - i_byte;
 404         }
 405       else if (dp)
 406         {
 407           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
 408
 409           chars = 1;
 410           val = DISP_CHAR_VECTOR (dp, c);
 411           if (VECTORP (val))
 412             thiswidth = XVECTOR (val)->size;
 413           else
 414             thiswidth = CHAR_WIDTH (c);
 415         }
 416       else
 417         {
 418           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
 419
 420           chars = 1;
 421           thiswidth = CHAR_WIDTH (c);
 422         }
 423
 424       if (precision > 0
 425           && (width + thiswidth > precision))
 426         {
 427           *nchars = i;
 428           *nbytes = i_byte;
 429           return width;
 430         }
 431       i += chars;
 432       i_byte += bytes;
 433       width += thiswidth;
 434   }
 435
 436   if (precision > 0)
 437     {
 438       *nchars = i;
 439       *nbytes = i_byte;
 440     }
 441
 442   return width;
 443 }
 444
 445 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
 446        doc: /* Return width of STRING when displayed in the current buffer.
 447 Width is measured by how many columns it occupies on the screen.
 448 When calculating width of a multibyte character in STRING,
 449 only the base leading-code is considered; the validity of
 450 the following bytes is not checked.  Tabs in STRING are always
 451 taken to occupy `tab-width' columns.  */)
 452      (str)
 453      Lisp_Object str;
 454 {
 455   Lisp_Object val;
 456
 457   CHECK_STRING (str);
 458   XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
 459   return val;
 460 }
 461
 462 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
 463        doc: /* Return the direction of CHAR.
 464 The returned value is 0 for left-to-right and 1 for right-to-left.  */)
 465      (ch)
 466      Lisp_Object ch;
 467 {
 468   int c;
 469
 470   CHECK_CHARACTER (ch);
 471   c = XINT (ch);
 472   return CHAR_TABLE_REF (Vchar_direction_table, c);
 473 }
 474
 475 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
 476        doc: /* Return number of characters between BEG and END.
 477 This is now an obsolete function.  We keep it just for backward compatibility.  */)
 478      (beg, end)
 479      Lisp_Object beg, end;
 480 {
 481   int from, to;
 482
 483   CHECK_NUMBER_COERCE_MARKER (beg);
 484   CHECK_NUMBER_COERCE_MARKER (end);
 485
 486   from = min (XFASTINT (beg), XFASTINT (end));
 487   to = max (XFASTINT (beg), XFASTINT (end));
 488
 489   return make_number (to - from);
 490 }
 491
 492 /* Return the number of characters in the NBYTES bytes at PTR.
 493    This works by looking at the contents and checking for multibyte
 494    sequences while assuming that there's no invalid sequence.
 495    However, if the current buffer has enable-multibyte-characters =
 496    nil, we treat each byte as a character.  */
 497
 498 int
 499 chars_in_text (ptr, nbytes)
 500      unsigned char *ptr;
 501      int nbytes;
 502 {
 503   /* current_buffer is null at early stages of Emacs initialization.  */
 504   if (current_buffer == 0
 505       || NILP (current_buffer->enable_multibyte_characters))
 506     return nbytes;
 507
 508   return multibyte_chars_in_text (ptr, nbytes);
 509 }
 510
 511 /* Return the number of characters in the NBYTES bytes at PTR.
 512    This works by looking at the contents and checking for multibyte
 513    sequences while assuming that there's no invalid sequence.  It
 514    ignores enable-multibyte-characters.  */
 515
 516 int
 517 multibyte_chars_in_text (ptr, nbytes)
 518      unsigned char *ptr;
 519      int nbytes;
 520 {
 521   unsigned char *endp = ptr + nbytes;
 522   int chars = 0;
 523
 524   while (ptr < endp)
 525     {
 526       int len = MULTIBYTE_LENGTH (ptr, endp);
 527
 528       if (len == 0)
 529         abort ();
 530       ptr += len;
 531       chars++;
 532     }
 533
 534   return chars;
 535 }
 536
 537 /* Parse unibyte text at STR of LEN bytes as a multibyte text, count
 538    characters and bytes in it, and store them in *NCHARS and *NBYTES
 539    respectively.  On counting bytes, pay attention to that 8-bit
 540    characters not constructing a valid multibyte sequence are
 541    represented by 2-byte in a multibyte text.  */
 542
 543 void
 544 parse_str_as_multibyte (str, len, nchars, nbytes)
 545      unsigned char *str;
 546      int len, *nchars, *nbytes;
 547 {
 548   unsigned char *endp = str + len;
 549   int n, chars = 0, bytes = 0;
 550
 551   if (len >= MAX_MULTIBYTE_LENGTH)
 552     {
 553       unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
 554       while (str < adjusted_endp)
 555         {
 556           if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
 557             str += n, bytes += n;
 558           else
 559             str++, bytes += 2;
 560           chars++;
 561         }
 562     }
 563   while (str < endp)
 564     {
 565       if ((n = MULTIBYTE_LENGTH (str, endp)) > 0)
 566         str += n, bytes += n;
 567       else
 568         str++, bytes += 2;
 569       chars++;
 570     }
 571
 572   *nchars = chars;
 573   *nbytes = bytes;
 574   return;
 575 }
 576
 577 /* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
 578    It actually converts only such 8-bit characters that don't contruct
 579    a multibyte sequence to multibyte forms of Latin-1 characters.  If
 580    NCHARS is nonzero, set *NCHARS to the number of characters in the
 581    text.  It is assured that we can use LEN bytes at STR as a work
 582    area and that is enough.  Return the number of bytes of the
 583    resulting text.  */
 584
 585 int
 586 str_as_multibyte (str, len, nbytes, nchars)
 587      unsigned char *str;
 588      int len, nbytes, *nchars;
 589 {
 590   unsigned char *p = str, *endp = str + nbytes;
 591   unsigned char *to;
 592   int chars = 0;
 593   int n;
 594
 595   if (nbytes >= MAX_MULTIBYTE_LENGTH)
 596     {
 597       unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
 598       while (p < adjusted_endp
 599              && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
 600         p += n, chars++;
 601     }
 602   while ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
 603     p += n, chars++;
 604   if (nchars)
 605     *nchars = chars;
 606   if (p == endp)
 607     return nbytes;
 608
 609   to = p;
 610   nbytes = endp - p;
 611   endp = str + len;
 612   safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes);
 613   p = endp - nbytes;
 614
 615   if (nbytes >= MAX_MULTIBYTE_LENGTH)
 616     {
 617       unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
 618       while (p < adjusted_endp)
 619         {
 620           if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
 621             {
 622               while (n--)
 623                 *to++ = *p++;
 624             }
 625           else
 626             {
 627               int c = *p++;
 628               c = BYTE8_TO_CHAR (c);
 629               to += CHAR_STRING (c, to);
 630             }
 631         }
 632       chars++;
 633     }
 634   while (p < endp)
 635     {
 636       if ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
 637         {
 638           while (n--)
 639             *to++ = *p++;
 640         }
 641       else
 642         {
 643           int c = *p++;
 644           c = BYTE8_TO_CHAR (c);
 645           to += CHAR_STRING (c, to);
 646         }
 647       chars++;
 648     }
 649   if (nchars)
 650     *nchars = chars;
 651   return (to - str);
 652 }
 653
 654 /* Parse unibyte string at STR of LEN bytes, and return the number of
 655    bytes it may ocupy when converted to multibyte string by
 656    `str_to_multibyte'.  */
 657
 658 int
 659 parse_str_to_multibyte (str, len)
 660      unsigned char *str;
 661      int len;
 662 {
 663   unsigned char *endp = str + len;
 664   int bytes;
 665
 666   for (bytes = 0; str < endp; str++)
 667     bytes += (*str < 0x80) ? 1 : 2;
 668   return bytes;
 669 }
 670
 671
 672 /* Convert unibyte text at STR of NBYTES bytes to a multibyte text
 673    that contains the same single-byte characters.  It actually
 674    converts all 8-bit characters to multibyte forms.  It is assured
 675    that we can use LEN bytes at STR as a work area and that is
 676    enough.  */
 677
 678 int
 679 str_to_multibyte (str, len, bytes)
 680      unsigned char *str;
 681      int len, bytes;
 682 {
 683   unsigned char *p = str, *endp = str + bytes;
 684   unsigned char *to;
 685
 686   while (p < endp && *p < 0x80) p++;
 687   if (p == endp)
 688     return bytes;
 689   to = p;
 690   bytes = endp - p;
 691   endp = str + len;
 692   safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
 693   p = endp - bytes;
 694   while (p < endp)
 695     {
 696       int c = *p++;
 697
 698       if (c >= 0x80)
 699         c = BYTE8_TO_CHAR (c);
 700       to += CHAR_STRING (c, to);
 701     }
 702   return (to - str);
 703 }
 704
 705 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
 706    actually converts characters in the range 0x80..0xFF to
 707    unibyte.  */
 708
 709 int
 710 str_as_unibyte (str, bytes)
 711      unsigned char *str;
 712      int bytes;
 713 {
 714   unsigned char *p = str, *endp = str + bytes;
 715   unsigned char *to = str;
 716   int c, len;
 717
 718   while (p < endp)
 719     {
 720       c = *p;
 721       len = BYTES_BY_CHAR_HEAD (c);
 722       if (CHAR_BYTE8_HEAD_P (c))
 723         break;
 724       p += len;
 725     }
 726   to = p;
 727   while (p < endp)
 728     {
 729       c = *p;
 730       len = BYTES_BY_CHAR_HEAD (c);
 731       if (CHAR_BYTE8_HEAD_P (c))
 732         {
 733           c = STRING_CHAR_ADVANCE (p);
 734           *to++ = CHAR_TO_BYTE8 (c);
 735         }
 736       else
 737         {
 738           while (len--) *to++ = *p++;
 739         }
 740     }
 741   return (to - str);
 742 }
 743
 744 int
 745 string_count_byte8 (string)
 746      Lisp_Object string;
 747 {
 748   int multibyte = STRING_MULTIBYTE (string);
 749   int nbytes = STRING_BYTES (XSTRING (string));
 750   unsigned char *p = XSTRING (string)->data;
 751   unsigned char *pend = p + nbytes;
 752   int count = 0;
 753   int c, len;
 754
 755   if (multibyte)
 756     while (p < pend)
 757       {
 758         c = *p;
 759         len = BYTES_BY_CHAR_HEAD (c);
 760
 761         if (CHAR_BYTE8_HEAD_P (c))
 762           count++;
 763         p += len;
 764       }
 765   else
 766     while (p < pend)
 767       {
 768         if (*p++ >= 0x80)
 769           count++;
 770       }
 771   return count;
 772 }
 773
 774
 775 Lisp_Object
 776 string_escape_byte8 (string)
 777      Lisp_Object string;
 778 {
 779   int nchars = XSTRING (string)->size;
 780   int nbytes = STRING_BYTES (XSTRING (string));
 781   int multibyte = STRING_MULTIBYTE (string);
 782   int byte8_count;
 783   unsigned char *src, *src_end, *dst;
 784   Lisp_Object val;
 785   int c, len;
 786
 787   if (multibyte && nchars == nbytes)
 788     return string;
 789
 790   byte8_count = string_count_byte8 (string);
 791
 792   if (byte8_count == 0)
 793     return string;
 794
 795   if (multibyte)
 796     /* Convert 2-byte sequence of byte8 chars to 4-byte octal.  */
 797     val = make_uninit_multibyte_string (nchars + byte8_count * 3,
 798                                         nbytes + byte8_count * 2);
 799   else
 800     /* Convert 1-byte sequence of byte8 chars to 4-byte octal.  */
 801     val = make_uninit_string (nbytes + byte8_count * 3);
 802
 803   src = XSTRING (string)->data;
 804   src_end = src + nbytes;
 805   dst = XSTRING (val)->data;
 806   if (multibyte)
 807     while (src < src_end)
 808       {
 809         c = *src;
 810         len = BYTES_BY_CHAR_HEAD (c);
 811
 812         if (CHAR_BYTE8_HEAD_P (c))
 813           {
 814             c = STRING_CHAR_ADVANCE (src);
 815             c = CHAR_TO_BYTE8 (c);
 816             sprintf ((char *) dst, "\\%03o", c);
 817             dst += 4;
 818           }
 819         else
 820           while (len--) *dst++ = *src++;
 821       }
 822   else
 823     while (src < src_end)
 824       {
 825         c = *src++;
 826         if (c >= 0x80)
 827           {
 828             sprintf ((char *) dst, "\\%03o", c);
 829             dst += 4;
 830           }
 831         else
 832           *dst++ = c;
 833       }
 834   return val;
 835 }
 836
 837 \f
 838 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
 839        doc: /*
 840 Concatenate all the argument characters and make the result a string.
 841 usage: (string &rest CHARACTERS)  */)
 842      (n, args)
 843      int n;
 844      Lisp_Object *args;
 845 {
 846   int i;
 847   unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
 848   unsigned char *p = buf;
 849   int c;
 850
 851   for (i = 0; i < n; i++)
 852     {
 853       CHECK_CHARACTER (args[i]);
 854       c = XINT (args[i]);
 855       p += CHAR_STRING (c, p);
 856     }
 857
 858   return make_string_from_bytes ((char *) buf, n, p - buf);
 859 }
 860
 861 void
 862 init_character_once ()
 863 {
 864 }
 865
 866 #ifdef emacs
 867
 868 void
 869 syms_of_character ()
 870 {
 871   DEFSYM (Qcharacterp, "characterp");
 872   DEFSYM (Qauto_fill_chars, "auto-fill-chars");
 873
 874   staticpro (&Vchar_unify_table);
 875   Vchar_unify_table = Qnil;
 876
 877   defsubr (&Smax_char);
 878   defsubr (&Scharacterp);
 879   defsubr (&Sunibyte_char_to_multibyte);
 880   defsubr (&Smultibyte_char_to_unibyte);
 881   defsubr (&Schar_bytes);
 882   defsubr (&Schar_width);
 883   defsubr (&Sstring_width);
 884   defsubr (&Schar_direction);
 885   defsubr (&Schars_in_region);
 886   defsubr (&Sstring);
 887
 888   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
 889                doc: /*
 890 Vector of cons cell of a symbol and translation table ever defined.
 891 An ID of a translation table is an index of this vector.  */);
 892   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
 893
 894   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
 895                doc: /*
 896 A char-table for characters which invoke auto-filling.
 897 Such characters have value t in this table.  */);
 898   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
 899   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
 900   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
 901
 902   DEFVAR_LISP ("char-width-table", &Vchar_width_table,
 903                doc: /*
 904 A char-table for width (columns) of each character.  */);
 905   Vchar_width_table = Fmake_char_table (Qnil, make_number (1));
 906
 907   DEFVAR_LISP ("char-direction-table", &Vchar_direction_table,
 908                doc: /* A char-table for direction of each character.  */);
 909   Vchar_direction_table = Fmake_char_table (Qnil, make_number (1));
 910
 911   DEFVAR_LISP ("printable-chars", &Vprintable_chars,
 912                doc: /* A char-table for each printable character.  */);
 913   Vprintable_chars = Fmake_char_table (Qnil, Qt);
 914 }
 915
 916 #endif /* emacs */