src/character.h

   1 /* Header for multibyte character handler.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3      Licensed to the Free Software Foundation.
   4    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
   5      National Institute of Advanced Industrial Science and Technology (AIST)
   6      Registration Number H13PRO009
   7
   8 This file is part of GNU Emacs.
   9
  10 GNU Emacs is free software: you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation, either version 3 of the License, or
  13 (at your option) any later version.
  14
  15 GNU Emacs is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  22
  23 #ifndef EMACS_CHARACTER_H
  24 #define EMACS_CHARACTER_H
  25
  26 /* character code       1st byte   byte sequence
  27    --------------       --------   -------------
  28         0-7F            00..7F     0xxxxxxx
  29        80-7FF           C2..DF     110xxxxx 10xxxxxx
  30       800-FFFF          E0..EF     1110xxxx 10xxxxxx 10xxxxxx
  31     10000-1FFFFF        F0..F7     11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  32    200000-3FFF7F        F8         11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
  33    3FFF80-3FFFFF        C0..C1     1100000x 10xxxxxx (for eight-bit-char)
  34    400000-...           invalid
  35
  36    invalid 1st byte     80..BF     10xxxxxx
  37                         F9..FF     11111xxx (xxx != 000)
  38 */
  39
  40 /* Maximum character code ((1 << CHARACTERBITS) - 1).  */
  41 #define MAX_CHAR  0x3FFFFF
  42
  43 /* Maximum Unicode character code.  */
  44 #define MAX_UNICODE_CHAR 0x10FFFF
  45
  46 /* Maximum N-byte character codes.  */
  47 #define MAX_1_BYTE_CHAR 0x7F
  48 #define MAX_2_BYTE_CHAR 0x7FF
  49 #define MAX_3_BYTE_CHAR 0xFFFF
  50 #define MAX_4_BYTE_CHAR 0x1FFFFF
  51 #define MAX_5_BYTE_CHAR 0x3FFF7F
  52
  53 /* Minimum leading code of multibyte characters.  */
  54 #define MIN_MULTIBYTE_LEADING_CODE 0xC0
  55 /* Maximum leading code of multibyte characters.  */
  56 #define MAX_MULTIBYTE_LEADING_CODE 0xF8
  57
  58 /* Nonzero iff C is a character that corresponds to a raw 8-bit
  59    byte.  */
  60 #define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
  61
  62 /* Return the character code for raw 8-bit byte BYTE.  */
  63 #define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
  64
  65 /* Return the raw 8-bit byte for character C.  */
  66 #define CHAR_TO_BYTE8(c)        \
  67   (CHAR_BYTE8_P (c)             \
  68    ? (c) - 0x3FFF00             \
  69    : multibyte_char_to_unibyte (c, Qnil))
  70
  71 /* Return the raw 8-bit byte for character C,
  72    or -1 if C doesn't correspond to a byte.  */
  73 #define CHAR_TO_BYTE_SAFE(c)    \
  74   (CHAR_BYTE8_P (c)             \
  75    ? (c) - 0x3FFF00             \
  76    : multibyte_char_to_unibyte_safe (c))
  77
  78 /* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
  79    that corresponds to a raw 8-bit byte.  */
  80 #define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
  81
  82 /* Mapping table from unibyte chars to multibyte chars.  */
  83 extern int unibyte_to_multibyte_table[256];
  84
  85 /* Convert the unibyte character C to the corresponding multibyte
  86    character.  If C can't be converted, return C.  */
  87 #define unibyte_char_to_multibyte(c)    \
  88   ((c) < 256 ? unibyte_to_multibyte_table[(c)] : (c))
  89
  90 /* Nth element is 1 iff unibyte char N can be mapped to a multibyte
  91    char.  */
  92 extern char unibyte_has_multibyte_table[256];
  93
  94 #define UNIBYTE_CHAR_HAS_MULTIBYTE_P(c) (unibyte_has_multibyte_table[(c)])
  95
  96 /* If C is not ASCII, make it unibyte. */
  97 #define MAKE_CHAR_UNIBYTE(c)    \
  98   do {                          \
  99     if (! ASCII_CHAR_P (c))     \
 100       c = CHAR_TO_BYTE8 (c);    \
 101   } while (0)
 102
 103
 104 /* If C is not ASCII, make it multibyte.  Assumes C < 256.  */
 105 #define MAKE_CHAR_MULTIBYTE(c) \
 106   (eassert ((c) >= 0 && (c) < 256), (c) = unibyte_to_multibyte_table[(c)])
 107
 108 /* This is the maximum byte length of multibyte form.  */
 109 #define MAX_MULTIBYTE_LENGTH 5
 110
 111 /* Return a Lisp character whose character code is C.  Assumes C is
 112    a valid character code.  */
 113 #define make_char(c) make_number (c)
 114
 115 /* Nonzero iff C is an ASCII byte.  */
 116 #define ASCII_BYTE_P(c) ((unsigned) (c) < 0x80)
 117
 118 /* Nonzero iff X is a character.  */
 119 #define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
 120
 121 /* Nonzero iff C is valid as a character code.  GENERICP is not used.  */
 122 #define CHAR_VALID_P(c, genericp) ((unsigned) (c) <= MAX_CHAR)
 123
 124 /* Check if Lisp object X is a character or not.  */
 125 #define CHECK_CHARACTER(x) \
 126   CHECK_TYPE (CHARACTERP (x), Qcharacterp, x)
 127
 128 #define CHECK_CHARACTER_CAR(x) \
 129   do {                                  \
 130     Lisp_Object tmp = XCAR (x);         \
 131     CHECK_CHARACTER (tmp);              \
 132     XSETCAR ((x), tmp);                 \
 133   } while (0)
 134
 135 #define CHECK_CHARACTER_CDR(x) \
 136   do {                                  \
 137     Lisp_Object tmp = XCDR (x);         \
 138     CHECK_CHARACTER (tmp);              \
 139     XSETCDR ((x), tmp);                 \
 140   } while (0)
 141
 142 /* Nonzero iff C is an ASCII character.  */
 143 #define ASCII_CHAR_P(c) ((unsigned) (c) < 0x80)
 144
 145 /* Nonzero iff C is a character of code less than 0x100.  */
 146 #define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100)
 147
 148 /* Nonzero if character C has a printable glyph.  */
 149 #define CHAR_PRINTABLE_P(c)     \
 150   (((c) >= 32 && ((c) < 127)    \
 151     || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c)))))
 152
 153 /* Return byte length of multibyte form for character C.  */
 154 #define CHAR_BYTES(c)                   \
 155   ( (c) <= MAX_1_BYTE_CHAR ? 1          \
 156     : (c) <= MAX_2_BYTE_CHAR ? 2        \
 157     : (c) <= MAX_3_BYTE_CHAR ? 3        \
 158     : (c) <= MAX_4_BYTE_CHAR ? 4        \
 159     : (c) <= MAX_5_BYTE_CHAR ? 5        \
 160     : 2)
 161
 162
 163 /* Return the leading code of multibyte form of C.  */
 164 #define CHAR_LEADING_CODE(c)                            \
 165   ((c) <= MAX_1_BYTE_CHAR ? c                           \
 166    : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6))       \
 167    : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12))      \
 168    : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18))      \
 169    : (c) <= MAX_5_BYTE_CHAR ? 0xF8                      \
 170    : (0xC0 | (((c) >> 6) & 0x01)))
 171
 172
 173 /* Store multibyte form of the character C in P.  The caller should
 174    allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
 175    Returns the length of the multibyte form.  */
 176
 177 #define CHAR_STRING(c, p)                       \
 178   ((unsigned) (c) <= MAX_1_BYTE_CHAR            \
 179    ? ((p)[0] = (c),                             \
 180       1)                                        \
 181    : (unsigned) (c) <= MAX_2_BYTE_CHAR          \
 182    ? ((p)[0] = (0xC0 | ((c) >> 6)),             \
 183       (p)[1] = (0x80 | ((c) & 0x3F)),           \
 184       2)                                        \
 185    : (unsigned) (c) <= MAX_3_BYTE_CHAR          \
 186    ? ((p)[0] = (0xE0 | ((c) >> 12)),            \
 187       (p)[1] = (0x80 | (((c) >> 6) & 0x3F)),    \
 188       (p)[2] = (0x80 | ((c) & 0x3F)),           \
 189       3)                                        \
 190    : char_string ((unsigned) c, p))
 191
 192 /* Store multibyte form of byte B in P.  The caller should allocate at
 193    least MAX_MULTIBYTE_LENGTH bytes area at P in advance.  Returns the
 194    length of the multibyte form.  */
 195
 196 #define BYTE8_STRING(b, p)                      \
 197   ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)),       \
 198    (p)[1] = (0x80 | ((b) & 0x3F)),              \
 199    2)
 200
 201
 202 /* Store multibyte form of the character C in P and advance P to the
 203    end of the multibyte form.  The caller should allocate at least
 204    MAX_MULTIBYTE_LENGTH bytes area at P in advance.  */
 205
 206 #define CHAR_STRING_ADVANCE(c, p)               \
 207   do {                                          \
 208     if ((c) <= MAX_1_BYTE_CHAR)                 \
 209       *(p)++ = (c);                             \
 210     else if ((c) <= MAX_2_BYTE_CHAR)            \
 211       *(p)++ = (0xC0 | ((c) >> 6)),             \
 212         *(p)++ = (0x80 | ((c) & 0x3F));         \
 213     else if ((c) <= MAX_3_BYTE_CHAR)            \
 214       *(p)++ = (0xE0 | ((c) >> 12)),            \
 215         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
 216         *(p)++ = (0x80 | ((c) & 0x3F));         \
 217     else                                        \
 218       (p) += char_string ((c), (p));            \
 219   } while (0)
 220
 221
 222 /* Nonzero iff BYTE starts a non-ASCII character in a multibyte
 223    form.  */
 224 #define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
 225
 226 /* Nonzero iff BYTE is a trailing code of a non-ASCII character in a
 227    multibyte form.  */
 228 #define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80)
 229
 230 /* Nonzero iff BYTE starts a character in a multibyte form.
 231    This is equivalent to:
 232         (ASCII_BYTE_P (byte) || LEADING_CODE_P (byte))  */
 233 #define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
 234
 235 /* Kept for backward compatibility.  This macro will be removed in the
 236    future.  */
 237 #define BASE_LEADING_CODE_P LEADING_CODE_P
 238
 239 /* How many bytes a character that starts with BYTE occupies in a
 240    multibyte form.  */
 241 #define BYTES_BY_CHAR_HEAD(byte)        \
 242   (!((byte) & 0x80) ? 1                 \
 243    : !((byte) & 0x20) ? 2               \
 244    : !((byte) & 0x10) ? 3               \
 245    : !((byte) & 0x08) ? 4               \
 246    : 5)
 247
 248
 249 /* Return the length of the multi-byte form at string STR of length
 250    LEN while assuming that STR points a valid multi-byte form.  As
 251    this macro isn't necessary anymore, all callers will be changed to
 252    use BYTES_BY_CHAR_HEAD directly in the future.  */
 253
 254 #define MULTIBYTE_FORM_LENGTH(str, len)         \
 255   BYTES_BY_CHAR_HEAD (*(str))
 256
 257 /* Parse multibyte string STR of length LENGTH and set BYTES to the
 258    byte length of a character at STR while assuming that STR points a
 259    valid multibyte form.  As this macro isn't necessary anymore, all
 260    callers will be changed to use BYTES_BY_CHAR_HEAD directly in the
 261    future.  */
 262
 263 #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
 264   (bytes) = BYTES_BY_CHAR_HEAD (*(str))
 265
 266 /* The byte length of multibyte form at unibyte string P ending at
 267    PEND.  If STR doesn't point to a valid multibyte form, return 0.  */
 268
 269 #define MULTIBYTE_LENGTH(p, pend)                               \
 270   (p >= pend ? 0                                                \
 271    : !((p)[0] & 0x80) ? 1                                       \
 272    : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0         \
 273    : ((p)[0] & 0xE0) == 0xC0 ? 2                                \
 274    : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0         \
 275    : ((p)[0] & 0xF0) == 0xE0 ? 3                                \
 276    : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0         \
 277    : ((p)[0] & 0xF8) == 0xF0 ? 4                                \
 278    : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0         \
 279    : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5              \
 280    : 0)
 281
 282
 283 /* Like MULTIBYTE_LENGTH, but don't check the ending address.  */
 284
 285 #define MULTIBYTE_LENGTH_NO_CHECK(p)                    \
 286   (!((p)[0] & 0x80) ? 1                                 \
 287    : ((p)[1] & 0xC0) != 0x80 ? 0                        \
 288    : ((p)[0] & 0xE0) == 0xC0 ? 2                        \
 289    : ((p)[2] & 0xC0) != 0x80 ? 0                        \
 290    : ((p)[0] & 0xF0) == 0xE0 ? 3                        \
 291    : ((p)[3] & 0xC0) != 0x80 ? 0                        \
 292    : ((p)[0] & 0xF8) == 0xF0 ? 4                        \
 293    : ((p)[4] & 0xC0) != 0x80 ? 0                        \
 294    : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5      \
 295    : 0)
 296
 297 /* If P is before LIMIT, advance P to the next character boundary.
 298    Assumes that P is already at a character boundary of the same
 299    mulitbyte form whose end address is LIMIT.  */
 300
 301 #define NEXT_CHAR_BOUNDARY(p, limit)    \
 302   do {                                  \
 303     if ((p) < (limit))                  \
 304       (p) += BYTES_BY_CHAR_HEAD (*(p)); \
 305   } while (0)
 306
 307
 308 /* If P is after LIMIT, advance P to the previous character boundary.
 309    Assumes that P is already at a character boundary of the same
 310    mulitbyte form whose beginning address is LIMIT.  */
 311
 312 #define PREV_CHAR_BOUNDARY(p, limit)                                    \
 313   do {                                                                  \
 314     if ((p) > (limit))                                                  \
 315       {                                                                 \
 316         const unsigned char *p0 = (p);                                  \
 317         do {                                                            \
 318           p0--;                                                         \
 319         } while (p0 >= limit && ! CHAR_HEAD_P (*p0));                   \
 320         (p) = (BYTES_BY_CHAR_HEAD (*p0) == (p) - p0) ? p0 : (p) - 1;    \
 321       }                                                                 \
 322   } while (0)
 323
 324 /* Return the character code of character whose multibyte form is at
 325    P.  The argument LEN is ignored.  It will be removed in the
 326    future.  */
 327
 328 #define STRING_CHAR(p, len)                                     \
 329   (!((p)[0] & 0x80)                                             \
 330    ? (p)[0]                                                     \
 331    : ! ((p)[0] & 0x20)                                          \
 332    ? (((((p)[0] & 0x1F) << 6)                                   \
 333        | ((p)[1] & 0x3F))                                       \
 334       + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))       \
 335    : ! ((p)[0] & 0x10)                                          \
 336    ? ((((p)[0] & 0x0F) << 12)                                   \
 337       | (((p)[1] & 0x3F) << 6)                                  \
 338       | ((p)[2] & 0x3F))                                        \
 339    : string_char ((p), NULL, NULL))
 340
 341
 342 /* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
 343    form.  The argument LEN is ignored.  It will be removed in the
 344    future.  */
 345
 346 #define STRING_CHAR_AND_LENGTH(p, len, actual_len)              \
 347   (!((p)[0] & 0x80)                                             \
 348    ? ((actual_len) = 1, (p)[0])                                 \
 349    : ! ((p)[0] & 0x20)                                          \
 350    ? ((actual_len) = 2,                                         \
 351       (((((p)[0] & 0x1F) << 6)                                  \
 352         | ((p)[1] & 0x3F))                                      \
 353        + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)))     \
 354    : ! ((p)[0] & 0x10)                                          \
 355    ? ((actual_len) = 3,                                         \
 356       ((((p)[0] & 0x0F) << 12)                                  \
 357        | (((p)[1] & 0x3F) << 6)                                 \
 358        | ((p)[2] & 0x3F)))                                      \
 359    : string_char ((p), NULL, &actual_len))
 360
 361
 362 /* Like STRING_CHAR, but advance P to the end of multibyte form.  */
 363
 364 #define STRING_CHAR_ADVANCE(p)                                  \
 365   (!((p)[0] & 0x80)                                             \
 366    ? *(p)++                                                     \
 367    : ! ((p)[0] & 0x20)                                          \
 368    ? ((p) += 2,                                                 \
 369       ((((p)[-2] & 0x1F) << 6)                                  \
 370        | ((p)[-1] & 0x3F)                                       \
 371        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
 372    : ! ((p)[0] & 0x10)                                          \
 373    ? ((p) += 3,                                                 \
 374       ((((p)[-3] & 0x0F) << 12)                                 \
 375        | (((p)[-2] & 0x3F) << 6)                                \
 376        | ((p)[-1] & 0x3F)))                                     \
 377    : string_char ((p), &(p), NULL))
 378
 379
 380 /* Fetch the "next" character from Lisp string STRING at byte position
 381    BYTEIDX, character position CHARIDX.  Store it into OUTPUT.
 382
 383    All the args must be side-effect-free.
 384    BYTEIDX and CHARIDX must be lvalues;
 385    we increment them past the character fetched.  */
 386
 387 #define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX)     \
 388   do                                                                    \
 389     {                                                                   \
 390       CHARIDX++;                                                        \
 391       if (STRING_MULTIBYTE (STRING))                                    \
 392         {                                                               \
 393           unsigned char *ptr = &SDATA (STRING)[BYTEIDX];                \
 394           int len;                                                      \
 395                                                                         \
 396           OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len);                \
 397           BYTEIDX += len;                                               \
 398         }                                                               \
 399       else                                                              \
 400         {                                                               \
 401           OUTPUT = SREF (STRING, BYTEIDX);                              \
 402           BYTEIDX++;                                                    \
 403         }                                                               \
 404     }                                                                   \
 405   while (0)
 406
 407 /* Like FETCH_STRING_CHAR_ADVANCE, but return a multibyte character
 408    even if STRING is unibyte.  */
 409
 410 #define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
 411   do                                                                          \
 412     {                                                                         \
 413       CHARIDX++;                                                              \
 414       if (STRING_MULTIBYTE (STRING))                                          \
 415         {                                                                     \
 416           unsigned char *ptr = &SDATA (STRING)[BYTEIDX];                      \
 417           int len;                                                            \
 418                                                                               \
 419           OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len);                      \
 420           BYTEIDX += len;                                                     \
 421         }                                                                     \
 422       else                                                                    \
 423         {                                                                     \
 424           OUTPUT = SREF (STRING, BYTEIDX);                                    \
 425           BYTEIDX++;                                                          \
 426           MAKE_CHAR_MULTIBYTE (OUTPUT);                                       \
 427         }                                                                     \
 428     }                                                                         \
 429   while (0)
 430
 431
 432 /* Like FETCH_STRING_CHAR_ADVANCE, but assumes STRING is multibyte.  */
 433
 434 #define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
 435   do                                                                         \
 436     {                                                                        \
 437       unsigned char *ptr = &SDATA (STRING)[BYTEIDX];                         \
 438       int len;                                                               \
 439                                                                              \
 440       OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len);                         \
 441       BYTEIDX += len;                                                        \
 442       CHARIDX++;                                                             \
 443     }                                                                        \
 444   while (0)
 445
 446
 447 /* Like FETCH_STRING_CHAR_ADVANCE, but fetch character from the current
 448    buffer.  */
 449
 450 #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX)            \
 451   do                                                            \
 452     {                                                           \
 453       CHARIDX++;                                                \
 454       if (!NILP (current_buffer->enable_multibyte_characters))  \
 455         {                                                       \
 456           unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX);         \
 457           int len;                                              \
 458                                                                 \
 459           OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len);         \
 460           BYTEIDX += len;                                       \
 461         }                                                       \
 462       else                                                      \
 463         {                                                       \
 464           OUTPUT = *(BYTE_POS_ADDR (BYTEIDX));                  \
 465           BYTEIDX++;                                            \
 466         }                                                       \
 467     }                                                           \
 468   while (0)
 469
 470
 471 /* Like FETCH_CHAR_ADVANCE, but assumes the current buffer is multibyte.  */
 472
 473 #define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX)   \
 474   do                                                            \
 475     {                                                           \
 476       unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX);             \
 477       int len;                                                  \
 478                                                                 \
 479       OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len);             \
 480       BYTEIDX += len;                                           \
 481       CHARIDX++;                                                \
 482     }                                                           \
 483   while (0)
 484
 485
 486 /* Increment the buffer byte position POS_BYTE of the current buffer to
 487    the next character boundary.  No range checking of POS.  */
 488
 489 #define INC_POS(pos_byte)                               \
 490   do {                                                  \
 491     unsigned char *p = BYTE_POS_ADDR (pos_byte);        \
 492     pos_byte += BYTES_BY_CHAR_HEAD (*p);                \
 493   } while (0)
 494
 495
 496 /* Decrement the buffer byte position POS_BYTE of the current buffer to
 497    the previous character boundary.  No range checking of POS.  */
 498
 499 #define DEC_POS(pos_byte)                       \
 500   do {                                          \
 501     unsigned char *p;                           \
 502                                                 \
 503     pos_byte--;                                 \
 504     if (pos_byte < GPT_BYTE)                    \
 505       p = BEG_ADDR + pos_byte - BEG_BYTE;       \
 506     else                                        \
 507       p = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE;\
 508     while (!CHAR_HEAD_P (*p))                   \
 509       {                                         \
 510         p--;                                    \
 511         pos_byte--;                             \
 512       }                                         \
 513   } while (0)
 514
 515 /* Increment both CHARPOS and BYTEPOS, each in the appropriate way.  */
 516
 517 #define INC_BOTH(charpos, bytepos)                              \
 518   do                                                            \
 519     {                                                           \
 520       (charpos)++;                                              \
 521       if (NILP (current_buffer->enable_multibyte_characters))   \
 522         (bytepos)++;                                            \
 523       else                                                      \
 524         INC_POS ((bytepos));                                    \
 525     }                                                           \
 526   while (0)
 527
 528
 529 /* Decrement both CHARPOS and BYTEPOS, each in the appropriate way.  */
 530
 531 #define DEC_BOTH(charpos, bytepos)                              \
 532   do                                                            \
 533     {                                                           \
 534       (charpos)--;                                              \
 535       if (NILP (current_buffer->enable_multibyte_characters))   \
 536         (bytepos)--;                                            \
 537       else                                                      \
 538         DEC_POS ((bytepos));                                    \
 539     }                                                           \
 540   while (0)
 541
 542
 543 /* Increment the buffer byte position POS_BYTE of the current buffer to
 544    the next character boundary.  This macro relies on the fact that
 545    *GPT_ADDR and *Z_ADDR are always accessible and the values are
 546    '\0'.  No range checking of POS_BYTE.  */
 547
 548 #define BUF_INC_POS(buf, pos_byte)                              \
 549   do {                                                          \
 550     unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte);        \
 551     pos_byte += BYTES_BY_CHAR_HEAD (*p);                        \
 552   } while (0)
 553
 554
 555 /* Decrement the buffer byte position POS_BYTE of the current buffer to
 556    the previous character boundary.  No range checking of POS_BYTE.  */
 557
 558 #define BUF_DEC_POS(buf, pos_byte)                                      \
 559   do {                                                                  \
 560     unsigned char *p;                                                   \
 561     pos_byte--;                                                         \
 562     if (pos_byte < BUF_GPT_BYTE (buf))                                  \
 563       p = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE;                     \
 564     else                                                                \
 565       p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
 566     while (!CHAR_HEAD_P (*p))                                           \
 567       {                                                                 \
 568         p--;                                                            \
 569         pos_byte--;                                                     \
 570       }                                                                 \
 571   } while (0)
 572
 573
 574 /* If C is a character to be unified with a Unicode character, return
 575    the unified Unicode character.  */
 576
 577 #define MAYBE_UNIFY_CHAR(c)                             \
 578   do {                                                  \
 579     if (c > MAX_UNICODE_CHAR && c <= MAX_5_BYTE_CHAR)   \
 580       {                                                 \
 581         Lisp_Object val;                                \
 582         val = CHAR_TABLE_REF (Vchar_unify_table, c);    \
 583         if (INTEGERP (val))                             \
 584           c = XINT (val);                               \
 585         else if (! NILP (val))                          \
 586           c = maybe_unify_char (c, val);                \
 587       }                                                 \
 588   } while (0)
 589
 590
 591 /* Return the width of ASCII character C.  The width is measured by
 592    how many columns C will occupy on the screen when displayed in the
 593    current buffer.  */
 594
 595 #define ASCII_CHAR_WIDTH(c)                                             \
 596   (c < 0x20                                                             \
 597    ? (c == '\t'                                                         \
 598       ? XFASTINT (current_buffer->tab_width)                            \
 599       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
 600    : (c < 0x7f                                                          \
 601       ? 1                                                               \
 602       : ((NILP (current_buffer->ctl_arrow) ? 4 : 2))))
 603
 604 /* Return the width of character C.  The width is measured by how many
 605    columns C will occupy on the screen when displayed in the current
 606    buffer.  */
 607
 608 #define CHAR_WIDTH(c)           \
 609   (ASCII_CHAR_P (c)             \
 610    ? ASCII_CHAR_WIDTH (c)       \
 611    : XINT (CHAR_TABLE_REF (Vchar_width_table, c)))
 612
 613 /* If C is a variation selector, return the index numnber of the
 614    variation selector (1..256).  Otherwise, return 0.  */
 615
 616 #define CHAR_VARIATION_SELECTOR_P(c)            \
 617   ((c) < 0xFE00 ? 0                             \
 618    : (c) <= 0xFE0F ? (c) - 0xFE00 + 1           \
 619    : (c) < 0xE0100 ? 0                          \
 620    : (c) <= 0xE01EF ? (c) - 0xE0100 + 17        \
 621    : 0)
 622
 623 /* If C is a high surrogate, return 1.  If C is a low surrogate,
 624    return 0. Otherwise, return 0.  */
 625
 626 #define CHAR_SURROGATE_PAIR_P(c)        \
 627   ((c) < 0xD800 ? 0                     \
 628    : (c) <= 0xDBFF ? 1                  \
 629    : (c) <= 0xDFFF ? 2                  \
 630    : 0)
 631
 632
 633 extern int char_resolve_modifier_mask P_ ((int));
 634 extern int char_string P_ ((unsigned, unsigned char *));
 635 extern int string_char P_ ((const unsigned char *,
 636                             const unsigned char **, int *));
 637
 638 extern int translate_char P_ ((Lisp_Object, int c));
 639 extern int char_printable_p P_ ((int c));
 640 extern void parse_str_as_multibyte P_ ((const unsigned char *, int, int *,
 641                                         int *));
 642 extern int parse_str_to_multibyte P_ ((unsigned char *, int));
 643 extern int str_as_multibyte P_ ((unsigned char *, int, int, int *));
 644 extern int str_to_multibyte P_ ((unsigned char *, int, int));
 645 extern int str_as_unibyte P_ ((unsigned char *, int));
 646 extern EMACS_INT str_to_unibyte P_ ((const unsigned char *, unsigned char *,
 647                                      EMACS_INT, int));
 648 extern int strwidth P_ ((unsigned char *, int));
 649 extern int c_string_width P_ ((const unsigned char *, int, int, int *, int *));
 650 extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *));
 651
 652 extern Lisp_Object Vprintable_chars;
 653
 654 extern Lisp_Object Qcharacterp, Qauto_fill_chars;
 655 extern Lisp_Object Vtranslation_table_vector;
 656 extern Lisp_Object Vchar_width_table;
 657 extern Lisp_Object Vchar_direction_table;
 658 extern Lisp_Object Vchar_unify_table;
 659 extern Lisp_Object Vunicode_category_table;
 660
 661 extern Lisp_Object string_escape_byte8 P_ ((Lisp_Object));
 662
 663 /* Return a translation table of id number ID.  */
 664 #define GET_TRANSLATION_TABLE(id) \
 665   (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
 666
 667 /* A char-table for characters which may invoke auto-filling.  */
 668 extern Lisp_Object Vauto_fill_chars;
 669
 670 extern Lisp_Object Vchar_script_table;
 671 extern Lisp_Object Vscript_representative_chars;
 672
 673 /* Copy LEN bytes from FROM to TO.  This macro should be used only
 674    when a caller knows that LEN is short and the obvious copy loop is
 675    faster than calling bcopy which has some overhead.  Copying a
 676    multibyte sequence of a character is the typical case.  */
 677
 678 #define BCOPY_SHORT(from, to, len)              \
 679   do {                                          \
 680     int i = len;                                \
 681     unsigned char *from_p = from, *to_p = to;   \
 682     while (i--) *to_p++ = *from_p++;            \
 683   } while (0)
 684
 685 #define DEFSYM(sym, name)       \
 686   do { (sym) = intern ((name)); staticpro (&(sym)); } while (0)
 687
 688 #endif /* EMACS_CHARACTER_H */
 689
 690 /* arch-tag: 4ef86004-2eff-4073-8cea-cfcbcf7188ac
 691    (do not change this comment) */