src/character.h

   1 /* Header for multibyte character handler.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3      Licensed to the Free Software Foundation.
   4    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   5      National Institute of Advanced Industrial Science and Technology (AIST)
   6      Registration Number H13PRO009
   7
   8 This file is part of GNU Emacs.
   9
  10 GNU Emacs is free software: you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation, either version 3 of the License, or
  13 (at your option) any later version.
  14
  15 GNU Emacs is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  22
  23 #ifndef EMACS_CHARACTER_H
  24 #define EMACS_CHARACTER_H
  25
  26 #include <verify.h>
  27
  28 /* character code       1st byte   byte sequence
  29    --------------       --------   -------------
  30         0-7F            00..7F     0xxxxxxx
  31        80-7FF           C2..DF     110xxxxx 10xxxxxx
  32       800-FFFF          E0..EF     1110xxxx 10xxxxxx 10xxxxxx
  33     10000-1FFFFF        F0..F7     11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  34    200000-3FFF7F        F8         11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
  35    3FFF80-3FFFFF        C0..C1     1100000x 10xxxxxx (for eight-bit-char)
  36    400000-...           invalid
  37
  38    invalid 1st byte     80..BF     10xxxxxx
  39                         F9..FF     11111xxx (xxx != 000)
  40 */
  41
  42 /* Maximum character code ((1 << CHARACTERBITS) - 1).  */
  43 #define MAX_CHAR  0x3FFFFF
  44
  45 /* Maximum Unicode character code.  */
  46 #define MAX_UNICODE_CHAR 0x10FFFF
  47
  48 /* Maximum N-byte character codes.  */
  49 #define MAX_1_BYTE_CHAR 0x7F
  50 #define MAX_2_BYTE_CHAR 0x7FF
  51 #define MAX_3_BYTE_CHAR 0xFFFF
  52 #define MAX_4_BYTE_CHAR 0x1FFFFF
  53 #define MAX_5_BYTE_CHAR 0x3FFF7F
  54
  55 /* Minimum leading code of multibyte characters.  */
  56 #define MIN_MULTIBYTE_LEADING_CODE 0xC0
  57 /* Maximum leading code of multibyte characters.  */
  58 #define MAX_MULTIBYTE_LEADING_CODE 0xF8
  59
  60 /* Nonzero iff C is a character that corresponds to a raw 8-bit
  61    byte.  */
  62 #define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
  63
  64 /* Return the character code for raw 8-bit byte BYTE.  */
  65 #define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
  66
  67 #define UNIBYTE_TO_CHAR(byte) \
  68   (ASCII_BYTE_P (byte) ? (byte) : BYTE8_TO_CHAR (byte))
  69
  70 /* Return the raw 8-bit byte for character C.  */
  71 #define CHAR_TO_BYTE8(c)        \
  72   (CHAR_BYTE8_P (c)             \
  73    ? (c) - 0x3FFF00             \
  74    : multibyte_char_to_unibyte (c))
  75
  76 /* Return the raw 8-bit byte for character C,
  77    or -1 if C doesn't correspond to a byte.  */
  78 #define CHAR_TO_BYTE_SAFE(c)    \
  79   (CHAR_BYTE8_P (c)             \
  80    ? (c) - 0x3FFF00             \
  81    : multibyte_char_to_unibyte_safe (c))
  82
  83 /* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
  84    that corresponds to a raw 8-bit byte.  */
  85 #define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
  86
  87 /* If C is not ASCII, make it unibyte. */
  88 #define MAKE_CHAR_UNIBYTE(c)    \
  89   do {                          \
  90     if (! ASCII_CHAR_P (c))     \
  91       c = CHAR_TO_BYTE8 (c);    \
  92   } while (0)
  93
  94
  95 /* If C is not ASCII, make it multibyte.  Assumes C < 256.  */
  96 #define MAKE_CHAR_MULTIBYTE(c) \
  97   (eassert ((c) >= 0 && (c) < 256), (c) = UNIBYTE_TO_CHAR (c))
  98
  99 /* This is the maximum byte length of multibyte form.  */
 100 #define MAX_MULTIBYTE_LENGTH 5
 101
 102 /* Return a Lisp character whose character code is C.  Assumes C is
 103    a valid character code.  */
 104 #define make_char(c) make_number (c)
 105
 106 /* Nonzero iff C is an ASCII byte.  */
 107 #define ASCII_BYTE_P(c) UNSIGNED_CMP (c, <, 0x80)
 108
 109 /* Nonzero iff X is a character.  */
 110 #define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
 111
 112 /* Nonzero iff C is valid as a character code.  */
 113 #define CHAR_VALID_P(c) UNSIGNED_CMP (c, <=, MAX_CHAR)
 114
 115 /* Check if Lisp object X is a character or not.  */
 116 #define CHECK_CHARACTER(x) \
 117   CHECK_TYPE (CHARACTERP (x), Qcharacterp, x)
 118
 119 #define CHECK_CHARACTER_CAR(x) \
 120   do {                                  \
 121     Lisp_Object tmp = XCAR (x);         \
 122     CHECK_CHARACTER (tmp);              \
 123     XSETCAR ((x), tmp);                 \
 124   } while (0)
 125
 126 #define CHECK_CHARACTER_CDR(x) \
 127   do {                                  \
 128     Lisp_Object tmp = XCDR (x);         \
 129     CHECK_CHARACTER (tmp);              \
 130     XSETCDR ((x), tmp);                 \
 131   } while (0)
 132
 133 /* Nonzero iff C is a character of code less than 0x100.  */
 134 #define SINGLE_BYTE_CHAR_P(c) UNSIGNED_CMP (c, <, 0x100)
 135
 136 /* Nonzero if character C has a printable glyph.  */
 137 #define CHAR_PRINTABLE_P(c)     \
 138   (((c) >= 32 && (c) < 127)     \
 139    || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c))))
 140
 141 /* Return byte length of multibyte form for character C.  */
 142 #define CHAR_BYTES(c)                   \
 143   ( (c) <= MAX_1_BYTE_CHAR ? 1          \
 144     : (c) <= MAX_2_BYTE_CHAR ? 2        \
 145     : (c) <= MAX_3_BYTE_CHAR ? 3        \
 146     : (c) <= MAX_4_BYTE_CHAR ? 4        \
 147     : (c) <= MAX_5_BYTE_CHAR ? 5        \
 148     : 2)
 149
 150
 151 /* Return the leading code of multibyte form of C.  */
 152 #define CHAR_LEADING_CODE(c)                            \
 153   ((c) <= MAX_1_BYTE_CHAR ? c                           \
 154    : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6))       \
 155    : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12))      \
 156    : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18))      \
 157    : (c) <= MAX_5_BYTE_CHAR ? 0xF8                      \
 158    : (0xC0 | (((c) >> 6) & 0x01)))
 159
 160
 161 /* Store multibyte form of the character C in P.  The caller should
 162    allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
 163    Returns the length of the multibyte form.  */
 164
 165 #define CHAR_STRING(c, p)                       \
 166   (UNSIGNED_CMP (c, <=, MAX_1_BYTE_CHAR)        \
 167    ? ((p)[0] = (c),                             \
 168       1)                                        \
 169    : UNSIGNED_CMP (c, <=, MAX_2_BYTE_CHAR)      \
 170    ? ((p)[0] = (0xC0 | ((c) >> 6)),             \
 171       (p)[1] = (0x80 | ((c) & 0x3F)),           \
 172       2)                                        \
 173    : UNSIGNED_CMP (c, <=, MAX_3_BYTE_CHAR)      \
 174    ? ((p)[0] = (0xE0 | ((c) >> 12)),            \
 175       (p)[1] = (0x80 | (((c) >> 6) & 0x3F)),    \
 176       (p)[2] = (0x80 | ((c) & 0x3F)),           \
 177       3)                                        \
 178    : verify_expr (sizeof (c) <= sizeof (unsigned), char_string (c, p)))
 179
 180 /* Store multibyte form of byte B in P.  The caller should allocate at
 181    least MAX_MULTIBYTE_LENGTH bytes area at P in advance.  Returns the
 182    length of the multibyte form.  */
 183
 184 #define BYTE8_STRING(b, p)                      \
 185   ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)),       \
 186    (p)[1] = (0x80 | ((b) & 0x3F)),              \
 187    2)
 188
 189
 190 /* Store multibyte form of the character C in P and advance P to the
 191    end of the multibyte form.  The caller should allocate at least
 192    MAX_MULTIBYTE_LENGTH bytes area at P in advance.  */
 193
 194 #define CHAR_STRING_ADVANCE(c, p)               \
 195   do {                                          \
 196     if ((c) <= MAX_1_BYTE_CHAR)                 \
 197       *(p)++ = (c);                             \
 198     else if ((c) <= MAX_2_BYTE_CHAR)            \
 199       *(p)++ = (0xC0 | ((c) >> 6)),             \
 200         *(p)++ = (0x80 | ((c) & 0x3F));         \
 201     else if ((c) <= MAX_3_BYTE_CHAR)            \
 202       *(p)++ = (0xE0 | ((c) >> 12)),            \
 203         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
 204         *(p)++ = (0x80 | ((c) & 0x3F));         \
 205     else                                        \
 206       {                                         \
 207         verify (sizeof (c) <= sizeof (unsigned));       \
 208         (p) += char_string (c, p);              \
 209       }                                         \
 210   } while (0)
 211
 212
 213 /* Nonzero iff BYTE starts a non-ASCII character in a multibyte
 214    form.  */
 215 #define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
 216
 217 /* Nonzero iff BYTE is a trailing code of a non-ASCII character in a
 218    multibyte form.  */
 219 #define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80)
 220
 221 /* Nonzero iff BYTE starts a character in a multibyte form.
 222    This is equivalent to:
 223         (ASCII_BYTE_P (byte) || LEADING_CODE_P (byte))  */
 224 #define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
 225
 226 /* How many bytes a character that starts with BYTE occupies in a
 227    multibyte form.  */
 228 #define BYTES_BY_CHAR_HEAD(byte)        \
 229   (!((byte) & 0x80) ? 1                 \
 230    : !((byte) & 0x20) ? 2               \
 231    : !((byte) & 0x10) ? 3               \
 232    : !((byte) & 0x08) ? 4               \
 233    : 5)
 234
 235
 236 /* The byte length of multibyte form at unibyte string P ending at
 237    PEND.  If STR doesn't point to a valid multibyte form, return 0.  */
 238
 239 #define MULTIBYTE_LENGTH(p, pend)                               \
 240   (p >= pend ? 0                                                \
 241    : !((p)[0] & 0x80) ? 1                                       \
 242    : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0         \
 243    : ((p)[0] & 0xE0) == 0xC0 ? 2                                \
 244    : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0         \
 245    : ((p)[0] & 0xF0) == 0xE0 ? 3                                \
 246    : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0         \
 247    : ((p)[0] & 0xF8) == 0xF0 ? 4                                \
 248    : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0         \
 249    : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5              \
 250    : 0)
 251
 252
 253 /* Like MULTIBYTE_LENGTH, but don't check the ending address.  */
 254
 255 #define MULTIBYTE_LENGTH_NO_CHECK(p)                    \
 256   (!((p)[0] & 0x80) ? 1                                 \
 257    : ((p)[1] & 0xC0) != 0x80 ? 0                        \
 258    : ((p)[0] & 0xE0) == 0xC0 ? 2                        \
 259    : ((p)[2] & 0xC0) != 0x80 ? 0                        \
 260    : ((p)[0] & 0xF0) == 0xE0 ? 3                        \
 261    : ((p)[3] & 0xC0) != 0x80 ? 0                        \
 262    : ((p)[0] & 0xF8) == 0xF0 ? 4                        \
 263    : ((p)[4] & 0xC0) != 0x80 ? 0                        \
 264    : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5      \
 265    : 0)
 266
 267 /* If P is before LIMIT, advance P to the next character boundary.
 268    Assumes that P is already at a character boundary of the same
 269    multibyte form whose end address is LIMIT.  */
 270
 271 #define NEXT_CHAR_BOUNDARY(p, limit)    \
 272   do {                                  \
 273     if ((p) < (limit))                  \
 274       (p) += BYTES_BY_CHAR_HEAD (*(p)); \
 275   } while (0)
 276
 277
 278 /* If P is after LIMIT, advance P to the previous character boundary.
 279    Assumes that P is already at a character boundary of the same
 280    multibyte form whose beginning address is LIMIT.  */
 281
 282 #define PREV_CHAR_BOUNDARY(p, limit)                                    \
 283   do {                                                                  \
 284     if ((p) > (limit))                                                  \
 285       {                                                                 \
 286         const unsigned char *chp = (p);                                 \
 287         do {                                                            \
 288           chp--;                                                        \
 289         } while (chp >= limit && ! CHAR_HEAD_P (*chp));                 \
 290         (p) = (BYTES_BY_CHAR_HEAD (*chp) == (p) - chp) ? chp : (p) - 1; \
 291       }                                                                 \
 292   } while (0)
 293
 294 /* Return the character code of character whose multibyte form is at
 295    P.  Note that this macro unifies CJK characters whose codepoints
 296    are in the Private Use Areas (PUAs), so it might return a different
 297    codepoint from the one actually stored at P.  */
 298
 299 #define STRING_CHAR(p)                                          \
 300   (!((p)[0] & 0x80)                                             \
 301    ? (p)[0]                                                     \
 302    : ! ((p)[0] & 0x20)                                          \
 303    ? (((((p)[0] & 0x1F) << 6)                                   \
 304        | ((p)[1] & 0x3F))                                       \
 305       + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))       \
 306    : ! ((p)[0] & 0x10)                                          \
 307    ? ((((p)[0] & 0x0F) << 12)                                   \
 308       | (((p)[1] & 0x3F) << 6)                                  \
 309       | ((p)[2] & 0x3F))                                        \
 310    : string_char ((p), NULL, NULL))
 311
 312
 313 /* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
 314    form.
 315
 316    Note: This macro returns the actual length of the character's
 317    multibyte sequence as it is stored in a buffer or string.  The
 318    character it returns might have a different codepoint that has a
 319    different multibyte sequence of a different length, due to possible
 320    unification of CJK characters inside string_char.  Therefore do NOT
 321    assume that the length returned by this macro is identical to the
 322    length of the multibyte sequence of the character it returns.  */
 323
 324 #define STRING_CHAR_AND_LENGTH(p, actual_len)                   \
 325   (!((p)[0] & 0x80)                                             \
 326    ? ((actual_len) = 1, (p)[0])                                 \
 327    : ! ((p)[0] & 0x20)                                          \
 328    ? ((actual_len) = 2,                                         \
 329       (((((p)[0] & 0x1F) << 6)                                  \
 330         | ((p)[1] & 0x3F))                                      \
 331        + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)))     \
 332    : ! ((p)[0] & 0x10)                                          \
 333    ? ((actual_len) = 3,                                         \
 334       ((((p)[0] & 0x0F) << 12)                                  \
 335        | (((p)[1] & 0x3F) << 6)                                 \
 336        | ((p)[2] & 0x3F)))                                      \
 337    : string_char ((p), NULL, &actual_len))
 338
 339
 340 /* Like STRING_CHAR, but advance P to the end of multibyte form.  */
 341
 342 #define STRING_CHAR_ADVANCE(p)                                  \
 343   (!((p)[0] & 0x80)                                             \
 344    ? *(p)++                                                     \
 345    : ! ((p)[0] & 0x20)                                          \
 346    ? ((p) += 2,                                                 \
 347       ((((p)[-2] & 0x1F) << 6)                                  \
 348        | ((p)[-1] & 0x3F)                                       \
 349        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
 350    : ! ((p)[0] & 0x10)                                          \
 351    ? ((p) += 3,                                                 \
 352       ((((p)[-3] & 0x0F) << 12)                                 \
 353        | (((p)[-2] & 0x3F) << 6)                                \
 354        | ((p)[-1] & 0x3F)))                                     \
 355    : string_char ((p), &(p), NULL))
 356
 357
 358 /* Fetch the "next" character from Lisp string STRING at byte position
 359    BYTEIDX, character position CHARIDX.  Store it into OUTPUT.
 360
 361    All the args must be side-effect-free.
 362    BYTEIDX and CHARIDX must be lvalues;
 363    we increment them past the character fetched.  */
 364
 365 #define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX)     \
 366   do                                                                    \
 367     {                                                                   \
 368       CHARIDX++;                                                        \
 369       if (STRING_MULTIBYTE (STRING))                                    \
 370         {                                                               \
 371           unsigned char *chp = &SDATA (STRING)[BYTEIDX];                \
 372           int chlen;                                                    \
 373                                                                         \
 374           OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);                 \
 375           BYTEIDX += chlen;                                             \
 376         }                                                               \
 377       else                                                              \
 378         {                                                               \
 379           OUTPUT = SREF (STRING, BYTEIDX);                              \
 380           BYTEIDX++;                                                    \
 381         }                                                               \
 382     }                                                                   \
 383   while (0)
 384
 385 /* Like FETCH_STRING_CHAR_ADVANCE, but return a multibyte character
 386    even if STRING is unibyte.  */
 387
 388 #define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
 389   do                                                                          \
 390     {                                                                         \
 391       CHARIDX++;                                                              \
 392       if (STRING_MULTIBYTE (STRING))                                          \
 393         {                                                                     \
 394           unsigned char *chp = &SDATA (STRING)[BYTEIDX];                      \
 395           int chlen;                                                          \
 396                                                                               \
 397           OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);                       \
 398           BYTEIDX += chlen;                                                   \
 399         }                                                                     \
 400       else                                                                    \
 401         {                                                                     \
 402           OUTPUT = SREF (STRING, BYTEIDX);                                    \
 403           BYTEIDX++;                                                          \
 404           MAKE_CHAR_MULTIBYTE (OUTPUT);                                       \
 405         }                                                                     \
 406     }                                                                         \
 407   while (0)
 408
 409
 410 /* Like FETCH_STRING_CHAR_ADVANCE, but assumes STRING is multibyte.  */
 411
 412 #define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
 413   do                                                                         \
 414     {                                                                        \
 415       unsigned char *fetch_ptr = &SDATA (STRING)[BYTEIDX];                   \
 416       int fetch_len;                                                         \
 417                                                                              \
 418       OUTPUT = STRING_CHAR_AND_LENGTH (fetch_ptr, fetch_len);                \
 419       BYTEIDX += fetch_len;                                                  \
 420       CHARIDX++;                                                             \
 421     }                                                                        \
 422   while (0)
 423
 424
 425 /* Like FETCH_STRING_CHAR_ADVANCE, but fetch character from the current
 426    buffer.  */
 427
 428 #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX)            \
 429   do                                                            \
 430     {                                                           \
 431       CHARIDX++;                                                \
 432       if (!NILP (BVAR (current_buffer, enable_multibyte_characters)))   \
 433         {                                                       \
 434           unsigned char *chp = BYTE_POS_ADDR (BYTEIDX);         \
 435           int chlen;                                            \
 436                                                                 \
 437           OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);         \
 438           BYTEIDX += chlen;                                     \
 439         }                                                       \
 440       else                                                      \
 441         {                                                       \
 442           OUTPUT = *(BYTE_POS_ADDR (BYTEIDX));                  \
 443           BYTEIDX++;                                            \
 444         }                                                       \
 445     }                                                           \
 446   while (0)
 447
 448
 449 /* Like FETCH_CHAR_ADVANCE, but assumes the current buffer is multibyte.  */
 450
 451 #define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX)   \
 452   do                                                            \
 453     {                                                           \
 454       unsigned char *chp = BYTE_POS_ADDR (BYTEIDX);             \
 455       int chlen;                                                        \
 456                                                                 \
 457       OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);             \
 458       BYTEIDX += chlen;                                         \
 459       CHARIDX++;                                                \
 460     }                                                           \
 461   while (0)
 462
 463
 464 /* Increment the buffer byte position POS_BYTE of the current buffer to
 465    the next character boundary.  No range checking of POS.  */
 466
 467 #define INC_POS(pos_byte)                               \
 468   do {                                                  \
 469     unsigned char *chp = BYTE_POS_ADDR (pos_byte);      \
 470     pos_byte += BYTES_BY_CHAR_HEAD (*chp);              \
 471   } while (0)
 472
 473
 474 /* Decrement the buffer byte position POS_BYTE of the current buffer to
 475    the previous character boundary.  No range checking of POS.  */
 476
 477 #define DEC_POS(pos_byte)                       \
 478   do {                                          \
 479     unsigned char *chp;                         \
 480                                                 \
 481     pos_byte--;                                 \
 482     if (pos_byte < GPT_BYTE)                    \
 483       chp = BEG_ADDR + pos_byte - BEG_BYTE;     \
 484     else                                        \
 485       chp = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE; \
 486     while (!CHAR_HEAD_P (*chp))                 \
 487       {                                         \
 488         chp--;                                  \
 489         pos_byte--;                             \
 490       }                                         \
 491   } while (0)
 492
 493 /* Increment both CHARPOS and BYTEPOS, each in the appropriate way.  */
 494
 495 #define INC_BOTH(charpos, bytepos)                              \
 496   do                                                            \
 497     {                                                           \
 498       (charpos)++;                                              \
 499       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))    \
 500         (bytepos)++;                                            \
 501       else                                                      \
 502         INC_POS ((bytepos));                                    \
 503     }                                                           \
 504   while (0)
 505
 506
 507 /* Decrement both CHARPOS and BYTEPOS, each in the appropriate way.  */
 508
 509 #define DEC_BOTH(charpos, bytepos)                              \
 510   do                                                            \
 511     {                                                           \
 512       (charpos)--;                                              \
 513       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))    \
 514         (bytepos)--;                                            \
 515       else                                                      \
 516         DEC_POS ((bytepos));                                    \
 517     }                                                           \
 518   while (0)
 519
 520
 521 /* Increment the buffer byte position POS_BYTE of the current buffer to
 522    the next character boundary.  This macro relies on the fact that
 523    *GPT_ADDR and *Z_ADDR are always accessible and the values are
 524    '\0'.  No range checking of POS_BYTE.  */
 525
 526 #define BUF_INC_POS(buf, pos_byte)                              \
 527   do {                                                          \
 528     unsigned char *chp = BUF_BYTE_ADDRESS (buf, pos_byte);      \
 529     pos_byte += BYTES_BY_CHAR_HEAD (*chp);                      \
 530   } while (0)
 531
 532
 533 /* Decrement the buffer byte position POS_BYTE of the current buffer to
 534    the previous character boundary.  No range checking of POS_BYTE.  */
 535
 536 #define BUF_DEC_POS(buf, pos_byte)                                      \
 537   do {                                                                  \
 538     unsigned char *chp;                                                 \
 539     pos_byte--;                                                         \
 540     if (pos_byte < BUF_GPT_BYTE (buf))                                  \
 541       chp = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE;                   \
 542     else                                                                \
 543       chp = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
 544     while (!CHAR_HEAD_P (*chp))                                         \
 545       {                                                                 \
 546         chp--;                                                          \
 547         pos_byte--;                                                     \
 548       }                                                                 \
 549   } while (0)
 550
 551
 552 /* If C is a character to be unified with a Unicode character, return
 553    the unified Unicode character.  */
 554
 555 #define MAYBE_UNIFY_CHAR(c)                             \
 556   do {                                                  \
 557     if (c > MAX_UNICODE_CHAR && c <= MAX_5_BYTE_CHAR)   \
 558       {                                                 \
 559         Lisp_Object val;                                \
 560         val = CHAR_TABLE_REF (Vchar_unify_table, c);    \
 561         if (INTEGERP (val))                             \
 562           c = XFASTINT (val);                           \
 563         else if (! NILP (val))                          \
 564           c = maybe_unify_char (c, val);                \
 565       }                                                 \
 566   } while (0)
 567
 568
 569 /* Return a non-outlandish value for the tab width.  */
 570
 571 #define SANE_TAB_WIDTH(buf) \
 572   sanitize_tab_width (XFASTINT (BVAR (buf, tab_width)))
 573 static inline int
 574 sanitize_tab_width (EMACS_INT width)
 575 {
 576   return 0 < width && width <= 1000 ? width : 8;
 577 }
 578
 579 /* Return the width of ASCII character C.  The width is measured by
 580    how many columns C will occupy on the screen when displayed in the
 581    current buffer.  */
 582
 583 #define ASCII_CHAR_WIDTH(c)                                             \
 584   (c < 0x20                                                             \
 585    ? (c == '\t'                                                         \
 586       ? SANE_TAB_WIDTH (current_buffer)                                 \
 587       : (c == '\n' ? 0 : (NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2)))    \
 588    : (c < 0x7f                                                          \
 589       ? 1                                                               \
 590       : ((NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2))))
 591
 592 /* Return a non-outlandish value for a character width.  */
 593
 594 static inline int
 595 sanitize_char_width (EMACS_INT width)
 596 {
 597   return 0 <= width && width <= 1000 ? width : 1000;
 598 }
 599
 600 /* Return the width of character C.  The width is measured by how many
 601    columns C will occupy on the screen when displayed in the current
 602    buffer.  */
 603
 604 #define CHAR_WIDTH(c)           \
 605   (ASCII_CHAR_P (c)             \
 606    ? ASCII_CHAR_WIDTH (c)       \
 607    : sanitize_char_width (XINT (CHAR_TABLE_REF (Vchar_width_table, c))))
 608
 609 /* If C is a variation selector, return the index of the
 610    variation selector (1..256).  Otherwise, return 0.  */
 611
 612 #define CHAR_VARIATION_SELECTOR_P(c)            \
 613   ((c) < 0xFE00 ? 0                             \
 614    : (c) <= 0xFE0F ? (c) - 0xFE00 + 1           \
 615    : (c) < 0xE0100 ? 0                          \
 616    : (c) <= 0xE01EF ? (c) - 0xE0100 + 17        \
 617    : 0)
 618
 619 /* If C is a high surrogate, return 1.  If C is a low surrogate,
 620    return 0.  Otherwise, return 0.  */
 621
 622 #define CHAR_SURROGATE_PAIR_P(c)        \
 623   ((c) < 0xD800 ? 0                     \
 624    : (c) <= 0xDBFF ? 1                  \
 625    : (c) <= 0xDFFF ? 2                  \
 626    : 0)
 627
 628 /* Data type for Unicode general category.
 629
 630    The order of members must be in sync with the 8th element of the
 631    member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for
 632    Unicode character property `general-category'.  */
 633
 634 typedef enum {
 635   UNICODE_CATEGORY_UNKNOWN = 0,
 636   UNICODE_CATEGORY_Lu,
 637   UNICODE_CATEGORY_Ll,
 638   UNICODE_CATEGORY_Lt,
 639   UNICODE_CATEGORY_Lm,
 640   UNICODE_CATEGORY_Lo,
 641   UNICODE_CATEGORY_Mn,
 642   UNICODE_CATEGORY_Mc,
 643   UNICODE_CATEGORY_Me,
 644   UNICODE_CATEGORY_Nd,
 645   UNICODE_CATEGORY_Nl,
 646   UNICODE_CATEGORY_No,
 647   UNICODE_CATEGORY_Pc,
 648   UNICODE_CATEGORY_Pd,
 649   UNICODE_CATEGORY_Ps,
 650   UNICODE_CATEGORY_Pe,
 651   UNICODE_CATEGORY_Pi,
 652   UNICODE_CATEGORY_Pf,
 653   UNICODE_CATEGORY_Po,
 654   UNICODE_CATEGORY_Sm,
 655   UNICODE_CATEGORY_Sc,
 656   UNICODE_CATEGORY_Sk,
 657   UNICODE_CATEGORY_So,
 658   UNICODE_CATEGORY_Zs,
 659   UNICODE_CATEGORY_Zl,
 660   UNICODE_CATEGORY_Zp,
 661   UNICODE_CATEGORY_Cc,
 662   UNICODE_CATEGORY_Cf,
 663   UNICODE_CATEGORY_Cs,
 664   UNICODE_CATEGORY_Co,
 665   UNICODE_CATEGORY_Cn
 666 } unicode_category_t;
 667
 668 extern EMACS_INT char_resolve_modifier_mask (EMACS_INT);
 669 extern int char_string (unsigned, unsigned char *);
 670 extern int string_char (const unsigned char *,
 671                         const unsigned char **, int *);
 672
 673 extern int translate_char (Lisp_Object, int c);
 674 extern int char_printable_p (int c);
 675 extern void parse_str_as_multibyte (const unsigned char *,
 676                                     ptrdiff_t, ptrdiff_t *, ptrdiff_t *);
 677 extern ptrdiff_t count_size_as_multibyte (const unsigned char *, ptrdiff_t);
 678 extern ptrdiff_t str_as_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t,
 679                                    ptrdiff_t *);
 680 extern ptrdiff_t str_to_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t);
 681 extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t);
 682 extern ptrdiff_t str_to_unibyte (const unsigned char *, unsigned char *,
 683                                  ptrdiff_t, int);
 684 extern ptrdiff_t strwidth (const char *, ptrdiff_t);
 685 extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int,
 686                                  ptrdiff_t *, ptrdiff_t *);
 687 extern ptrdiff_t lisp_string_width (Lisp_Object, ptrdiff_t,
 688                                     ptrdiff_t *, ptrdiff_t *);
 689
 690 extern Lisp_Object Qcharacterp;
 691 extern Lisp_Object Vchar_unify_table;
 692 extern Lisp_Object string_escape_byte8 (Lisp_Object);
 693
 694 /* Return a translation table of id number ID.  */
 695 #define GET_TRANSLATION_TABLE(id) \
 696   (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
 697
 698 #endif /* EMACS_CHARACTER_H */