src/character.h

   1 /* Header for multibyte character handler.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3      Licensed to the Free Software Foundation.
   4    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   5      National Institute of Advanced Industrial Science and Technology (AIST)
   6      Registration Number H13PRO009
   7
   8 This file is part of GNU Emacs.
   9
  10 GNU Emacs is free software: you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation, either version 3 of the License, or
  13 (at your option) any later version.
  14
  15 GNU Emacs is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  22
  23 #ifndef EMACS_CHARACTER_H
  24 #define EMACS_CHARACTER_H
  25
  26 #include <verify.h>
  27
  28 INLINE_HEADER_BEGIN
  29
  30 /* character code       1st byte   byte sequence
  31    --------------       --------   -------------
  32         0-7F            00..7F     0xxxxxxx
  33        80-7FF           C2..DF     110xxxxx 10xxxxxx
  34       800-FFFF          E0..EF     1110xxxx 10xxxxxx 10xxxxxx
  35     10000-1FFFFF        F0..F7     11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  36    200000-3FFF7F        F8         11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
  37    3FFF80-3FFFFF        C0..C1     1100000x 10xxxxxx (for eight-bit-char)
  38    400000-...           invalid
  39
  40    invalid 1st byte     80..BF     10xxxxxx
  41                         F9..FF     11111xxx (xxx != 000)
  42 */
  43
  44 /* Maximum character code ((1 << CHARACTERBITS) - 1).  */
  45 #define MAX_CHAR  0x3FFFFF
  46
  47 /* Maximum Unicode character code.  */
  48 #define MAX_UNICODE_CHAR 0x10FFFF
  49
  50 /* Maximum N-byte character codes.  */
  51 #define MAX_1_BYTE_CHAR 0x7F
  52 #define MAX_2_BYTE_CHAR 0x7FF
  53 #define MAX_3_BYTE_CHAR 0xFFFF
  54 #define MAX_4_BYTE_CHAR 0x1FFFFF
  55 #define MAX_5_BYTE_CHAR 0x3FFF7F
  56
  57 /* Minimum leading code of multibyte characters.  */
  58 #define MIN_MULTIBYTE_LEADING_CODE 0xC0
  59 /* Maximum leading code of multibyte characters.  */
  60 #define MAX_MULTIBYTE_LEADING_CODE 0xF8
  61
  62 /* Nonzero iff C is a character that corresponds to a raw 8-bit
  63    byte.  */
  64 #define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
  65
  66 /* Return the character code for raw 8-bit byte BYTE.  */
  67 #define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
  68
  69 #define UNIBYTE_TO_CHAR(byte) \
  70   (ASCII_BYTE_P (byte) ? (byte) : BYTE8_TO_CHAR (byte))
  71
  72 /* Return the raw 8-bit byte for character C.  */
  73 #define CHAR_TO_BYTE8(c)        \
  74   (CHAR_BYTE8_P (c)             \
  75    ? (c) - 0x3FFF00             \
  76    : multibyte_char_to_unibyte (c))
  77
  78 /* Return the raw 8-bit byte for character C,
  79    or -1 if C doesn't correspond to a byte.  */
  80 #define CHAR_TO_BYTE_SAFE(c)    \
  81   (CHAR_BYTE8_P (c)             \
  82    ? (c) - 0x3FFF00             \
  83    : multibyte_char_to_unibyte_safe (c))
  84
  85 /* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
  86    that corresponds to a raw 8-bit byte.  */
  87 #define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
  88
  89 /* If C is not ASCII, make it unibyte. */
  90 #define MAKE_CHAR_UNIBYTE(c)    \
  91   do {                          \
  92     if (! ASCII_CHAR_P (c))     \
  93       c = CHAR_TO_BYTE8 (c);    \
  94   } while (false)
  95
  96
  97 /* If C is not ASCII, make it multibyte.  Assumes C < 256.  */
  98 #define MAKE_CHAR_MULTIBYTE(c) \
  99   (eassert ((c) >= 0 && (c) < 256), (c) = UNIBYTE_TO_CHAR (c))
 100
 101 /* This is the maximum byte length of multibyte form.  */
 102 #define MAX_MULTIBYTE_LENGTH 5
 103
 104 /* Return a Lisp character whose character code is C.  Assumes C is
 105    a valid character code.  */
 106 #define make_char(c) make_number (c)
 107
 108 /* Nonzero iff C is an ASCII byte.  */
 109 #define ASCII_BYTE_P(c) UNSIGNED_CMP (c, <, 0x80)
 110
 111 /* Nonzero iff X is a character.  */
 112 #define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
 113
 114 /* Nonzero iff C is valid as a character code.  */
 115 #define CHAR_VALID_P(c) UNSIGNED_CMP (c, <=, MAX_CHAR)
 116
 117 /* Check if Lisp object X is a character or not.  */
 118 #define CHECK_CHARACTER(x) \
 119   CHECK_TYPE (CHARACTERP (x), Qcharacterp, x)
 120
 121 #define CHECK_CHARACTER_CAR(x) \
 122   do {                                  \
 123     Lisp_Object tmp = XCAR (x);         \
 124     CHECK_CHARACTER (tmp);              \
 125     XSETCAR ((x), tmp);                 \
 126   } while (false)
 127
 128 #define CHECK_CHARACTER_CDR(x) \
 129   do {                                  \
 130     Lisp_Object tmp = XCDR (x);         \
 131     CHECK_CHARACTER (tmp);              \
 132     XSETCDR ((x), tmp);                 \
 133   } while (false)
 134
 135 /* Nonzero iff C is a character of code less than 0x100.  */
 136 #define SINGLE_BYTE_CHAR_P(c) UNSIGNED_CMP (c, <, 0x100)
 137
 138 /* Nonzero if character C has a printable glyph.  */
 139 #define CHAR_PRINTABLE_P(c)     \
 140   (((c) >= 32 && (c) < 127)     \
 141    || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c))))
 142
 143 /* Return byte length of multibyte form for character C.  */
 144 #define CHAR_BYTES(c)                   \
 145   ( (c) <= MAX_1_BYTE_CHAR ? 1          \
 146     : (c) <= MAX_2_BYTE_CHAR ? 2        \
 147     : (c) <= MAX_3_BYTE_CHAR ? 3        \
 148     : (c) <= MAX_4_BYTE_CHAR ? 4        \
 149     : (c) <= MAX_5_BYTE_CHAR ? 5        \
 150     : 2)
 151
 152
 153 /* Return the leading code of multibyte form of C.  */
 154 #define CHAR_LEADING_CODE(c)                            \
 155   ((c) <= MAX_1_BYTE_CHAR ? c                           \
 156    : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6))       \
 157    : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12))      \
 158    : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18))      \
 159    : (c) <= MAX_5_BYTE_CHAR ? 0xF8                      \
 160    : (0xC0 | (((c) >> 6) & 0x01)))
 161
 162
 163 /* Store multibyte form of the character C in P.  The caller should
 164    allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
 165    Returns the length of the multibyte form.  */
 166
 167 #define CHAR_STRING(c, p)                       \
 168   (UNSIGNED_CMP (c, <=, MAX_1_BYTE_CHAR)        \
 169    ? ((p)[0] = (c),                             \
 170       1)                                        \
 171    : UNSIGNED_CMP (c, <=, MAX_2_BYTE_CHAR)      \
 172    ? ((p)[0] = (0xC0 | ((c) >> 6)),             \
 173       (p)[1] = (0x80 | ((c) & 0x3F)),           \
 174       2)                                        \
 175    : UNSIGNED_CMP (c, <=, MAX_3_BYTE_CHAR)      \
 176    ? ((p)[0] = (0xE0 | ((c) >> 12)),            \
 177       (p)[1] = (0x80 | (((c) >> 6) & 0x3F)),    \
 178       (p)[2] = (0x80 | ((c) & 0x3F)),           \
 179       3)                                        \
 180    : verify_expr (sizeof (c) <= sizeof (unsigned), char_string (c, p)))
 181
 182 /* Store multibyte form of byte B in P.  The caller should allocate at
 183    least MAX_MULTIBYTE_LENGTH bytes area at P in advance.  Returns the
 184    length of the multibyte form.  */
 185
 186 #define BYTE8_STRING(b, p)                      \
 187   ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)),       \
 188    (p)[1] = (0x80 | ((b) & 0x3F)),              \
 189    2)
 190
 191
 192 /* Store multibyte form of the character C in P and advance P to the
 193    end of the multibyte form.  The caller should allocate at least
 194    MAX_MULTIBYTE_LENGTH bytes area at P in advance.  */
 195
 196 #define CHAR_STRING_ADVANCE(c, p)               \
 197   do {                                          \
 198     if ((c) <= MAX_1_BYTE_CHAR)                 \
 199       *(p)++ = (c);                             \
 200     else if ((c) <= MAX_2_BYTE_CHAR)            \
 201       *(p)++ = (0xC0 | ((c) >> 6)),             \
 202         *(p)++ = (0x80 | ((c) & 0x3F));         \
 203     else if ((c) <= MAX_3_BYTE_CHAR)            \
 204       *(p)++ = (0xE0 | ((c) >> 12)),            \
 205         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
 206         *(p)++ = (0x80 | ((c) & 0x3F));         \
 207     else                                        \
 208       {                                         \
 209         verify (sizeof (c) <= sizeof (unsigned));       \
 210         (p) += char_string (c, p);              \
 211       }                                         \
 212   } while (false)
 213
 214
 215 /* Nonzero iff BYTE starts a non-ASCII character in a multibyte
 216    form.  */
 217 #define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
 218
 219 /* Nonzero iff BYTE is a trailing code of a non-ASCII character in a
 220    multibyte form.  */
 221 #define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80)
 222
 223 /* Nonzero iff BYTE starts a character in a multibyte form.
 224    This is equivalent to:
 225         (ASCII_BYTE_P (byte) || LEADING_CODE_P (byte))  */
 226 #define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
 227
 228 /* How many bytes a character that starts with BYTE occupies in a
 229    multibyte form.  */
 230 #define BYTES_BY_CHAR_HEAD(byte)        \
 231   (!((byte) & 0x80) ? 1                 \
 232    : !((byte) & 0x20) ? 2               \
 233    : !((byte) & 0x10) ? 3               \
 234    : !((byte) & 0x08) ? 4               \
 235    : 5)
 236
 237
 238 /* The byte length of multibyte form at unibyte string P ending at
 239    PEND.  If STR doesn't point to a valid multibyte form, return 0.  */
 240
 241 #define MULTIBYTE_LENGTH(p, pend)                               \
 242   (p >= pend ? 0                                                \
 243    : !((p)[0] & 0x80) ? 1                                       \
 244    : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0         \
 245    : ((p)[0] & 0xE0) == 0xC0 ? 2                                \
 246    : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0         \
 247    : ((p)[0] & 0xF0) == 0xE0 ? 3                                \
 248    : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0         \
 249    : ((p)[0] & 0xF8) == 0xF0 ? 4                                \
 250    : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0         \
 251    : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5              \
 252    : 0)
 253
 254
 255 /* Like MULTIBYTE_LENGTH, but don't check the ending address.  */
 256
 257 #define MULTIBYTE_LENGTH_NO_CHECK(p)                    \
 258   (!((p)[0] & 0x80) ? 1                                 \
 259    : ((p)[1] & 0xC0) != 0x80 ? 0                        \
 260    : ((p)[0] & 0xE0) == 0xC0 ? 2                        \
 261    : ((p)[2] & 0xC0) != 0x80 ? 0                        \
 262    : ((p)[0] & 0xF0) == 0xE0 ? 3                        \
 263    : ((p)[3] & 0xC0) != 0x80 ? 0                        \
 264    : ((p)[0] & 0xF8) == 0xF0 ? 4                        \
 265    : ((p)[4] & 0xC0) != 0x80 ? 0                        \
 266    : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5      \
 267    : 0)
 268
 269 /* If P is before LIMIT, advance P to the next character boundary.
 270    Assumes that P is already at a character boundary of the same
 271    multibyte form whose end address is LIMIT.  */
 272
 273 #define NEXT_CHAR_BOUNDARY(p, limit)    \
 274   do {                                  \
 275     if ((p) < (limit))                  \
 276       (p) += BYTES_BY_CHAR_HEAD (*(p)); \
 277   } while (false)
 278
 279
 280 /* If P is after LIMIT, advance P to the previous character boundary.
 281    Assumes that P is already at a character boundary of the same
 282    multibyte form whose beginning address is LIMIT.  */
 283
 284 #define PREV_CHAR_BOUNDARY(p, limit)                                    \
 285   do {                                                                  \
 286     if ((p) > (limit))                                                  \
 287       {                                                                 \
 288         const unsigned char *chp = (p);                                 \
 289         do {                                                            \
 290           chp--;                                                        \
 291         } while (chp >= limit && ! CHAR_HEAD_P (*chp));                 \
 292         (p) = (BYTES_BY_CHAR_HEAD (*chp) == (p) - chp) ? chp : (p) - 1; \
 293       }                                                                 \
 294   } while (false)
 295
 296 /* Return the character code of character whose multibyte form is at
 297    P.  Note that this macro unifies CJK characters whose codepoints
 298    are in the Private Use Areas (PUAs), so it might return a different
 299    codepoint from the one actually stored at P.  */
 300
 301 #define STRING_CHAR(p)                                          \
 302   (!((p)[0] & 0x80)                                             \
 303    ? (p)[0]                                                     \
 304    : ! ((p)[0] & 0x20)                                          \
 305    ? (((((p)[0] & 0x1F) << 6)                                   \
 306        | ((p)[1] & 0x3F))                                       \
 307       + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))       \
 308    : ! ((p)[0] & 0x10)                                          \
 309    ? ((((p)[0] & 0x0F) << 12)                                   \
 310       | (((p)[1] & 0x3F) << 6)                                  \
 311       | ((p)[2] & 0x3F))                                        \
 312    : string_char ((p), NULL, NULL))
 313
 314
 315 /* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
 316    form.
 317
 318    Note: This macro returns the actual length of the character's
 319    multibyte sequence as it is stored in a buffer or string.  The
 320    character it returns might have a different codepoint that has a
 321    different multibyte sequence of a different length, due to possible
 322    unification of CJK characters inside string_char.  Therefore do NOT
 323    assume that the length returned by this macro is identical to the
 324    length of the multibyte sequence of the character it returns.  */
 325
 326 #define STRING_CHAR_AND_LENGTH(p, actual_len)                   \
 327   (!((p)[0] & 0x80)                                             \
 328    ? ((actual_len) = 1, (p)[0])                                 \
 329    : ! ((p)[0] & 0x20)                                          \
 330    ? ((actual_len) = 2,                                         \
 331       (((((p)[0] & 0x1F) << 6)                                  \
 332         | ((p)[1] & 0x3F))                                      \
 333        + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)))     \
 334    : ! ((p)[0] & 0x10)                                          \
 335    ? ((actual_len) = 3,                                         \
 336       ((((p)[0] & 0x0F) << 12)                                  \
 337        | (((p)[1] & 0x3F) << 6)                                 \
 338        | ((p)[2] & 0x3F)))                                      \
 339    : string_char ((p), NULL, &actual_len))
 340
 341
 342 /* Like STRING_CHAR, but advance P to the end of multibyte form.  */
 343
 344 #define STRING_CHAR_ADVANCE(p)                                  \
 345   (!((p)[0] & 0x80)                                             \
 346    ? *(p)++                                                     \
 347    : ! ((p)[0] & 0x20)                                          \
 348    ? ((p) += 2,                                                 \
 349       ((((p)[-2] & 0x1F) << 6)                                  \
 350        | ((p)[-1] & 0x3F)                                       \
 351        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
 352    : ! ((p)[0] & 0x10)                                          \
 353    ? ((p) += 3,                                                 \
 354       ((((p)[-3] & 0x0F) << 12)                                 \
 355        | (((p)[-2] & 0x3F) << 6)                                \
 356        | ((p)[-1] & 0x3F)))                                     \
 357    : string_char ((p), &(p), NULL))
 358
 359
 360 /* Fetch the "next" character from Lisp string STRING at byte position
 361    BYTEIDX, character position CHARIDX.  Store it into OUTPUT.
 362
 363    All the args must be side-effect-free.
 364    BYTEIDX and CHARIDX must be lvalues;
 365    we increment them past the character fetched.  */
 366
 367 #define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX)     \
 368   do                                                                    \
 369     {                                                                   \
 370       CHARIDX++;                                                        \
 371       if (STRING_MULTIBYTE (STRING))                                    \
 372         {                                                               \
 373           unsigned char *chp = &SDATA (STRING)[BYTEIDX];                \
 374           int chlen;                                                    \
 375                                                                         \
 376           OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);                 \
 377           BYTEIDX += chlen;                                             \
 378         }                                                               \
 379       else                                                              \
 380         {                                                               \
 381           OUTPUT = SREF (STRING, BYTEIDX);                              \
 382           BYTEIDX++;                                                    \
 383         }                                                               \
 384     }                                                                   \
 385   while (false)
 386
 387 /* Like FETCH_STRING_CHAR_ADVANCE, but return a multibyte character
 388    even if STRING is unibyte.  */
 389
 390 #define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
 391   do                                                                          \
 392     {                                                                         \
 393       CHARIDX++;                                                              \
 394       if (STRING_MULTIBYTE (STRING))                                          \
 395         {                                                                     \
 396           unsigned char *chp = &SDATA (STRING)[BYTEIDX];                      \
 397           int chlen;                                                          \
 398                                                                               \
 399           OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);                       \
 400           BYTEIDX += chlen;                                                   \
 401         }                                                                     \
 402       else                                                                    \
 403         {                                                                     \
 404           OUTPUT = SREF (STRING, BYTEIDX);                                    \
 405           BYTEIDX++;                                                          \
 406           MAKE_CHAR_MULTIBYTE (OUTPUT);                                       \
 407         }                                                                     \
 408     }                                                                         \
 409   while (false)
 410
 411
 412 /* Like FETCH_STRING_CHAR_ADVANCE, but assumes STRING is multibyte.  */
 413
 414 #define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
 415   do                                                                         \
 416     {                                                                        \
 417       unsigned char *fetch_ptr = &SDATA (STRING)[BYTEIDX];                   \
 418       int fetch_len;                                                         \
 419                                                                              \
 420       OUTPUT = STRING_CHAR_AND_LENGTH (fetch_ptr, fetch_len);                \
 421       BYTEIDX += fetch_len;                                                  \
 422       CHARIDX++;                                                             \
 423     }                                                                        \
 424   while (false)
 425
 426
 427 /* Like FETCH_STRING_CHAR_ADVANCE, but fetch character from the current
 428    buffer.  */
 429
 430 #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX)            \
 431   do                                                            \
 432     {                                                           \
 433       CHARIDX++;                                                \
 434       if (!NILP (BVAR (current_buffer, enable_multibyte_characters)))   \
 435         {                                                       \
 436           unsigned char *chp = BYTE_POS_ADDR (BYTEIDX);         \
 437           int chlen;                                            \
 438                                                                 \
 439           OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);         \
 440           BYTEIDX += chlen;                                     \
 441         }                                                       \
 442       else                                                      \
 443         {                                                       \
 444           OUTPUT = *(BYTE_POS_ADDR (BYTEIDX));                  \
 445           BYTEIDX++;                                            \
 446         }                                                       \
 447     }                                                           \
 448   while (false)
 449
 450
 451 /* Like FETCH_CHAR_ADVANCE, but assumes the current buffer is multibyte.  */
 452
 453 #define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX)   \
 454   do                                                            \
 455     {                                                           \
 456       unsigned char *chp = BYTE_POS_ADDR (BYTEIDX);             \
 457       int chlen;                                                        \
 458                                                                 \
 459       OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);             \
 460       BYTEIDX += chlen;                                         \
 461       CHARIDX++;                                                \
 462     }                                                           \
 463   while (false)
 464
 465
 466 /* Increment the buffer byte position POS_BYTE of the current buffer to
 467    the next character boundary.  No range checking of POS.  */
 468
 469 #define INC_POS(pos_byte)                               \
 470   do {                                                  \
 471     unsigned char *chp = BYTE_POS_ADDR (pos_byte);      \
 472     pos_byte += BYTES_BY_CHAR_HEAD (*chp);              \
 473   } while (false)
 474
 475
 476 /* Decrement the buffer byte position POS_BYTE of the current buffer to
 477    the previous character boundary.  No range checking of POS.  */
 478
 479 #define DEC_POS(pos_byte)                       \
 480   do {                                          \
 481     unsigned char *chp;                         \
 482                                                 \
 483     pos_byte--;                                 \
 484     if (pos_byte < GPT_BYTE)                    \
 485       chp = BEG_ADDR + pos_byte - BEG_BYTE;     \
 486     else                                        \
 487       chp = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE; \
 488     while (!CHAR_HEAD_P (*chp))                 \
 489       {                                         \
 490         chp--;                                  \
 491         pos_byte--;                             \
 492       }                                         \
 493   } while (false)
 494
 495 /* Increment both CHARPOS and BYTEPOS, each in the appropriate way.  */
 496
 497 #define INC_BOTH(charpos, bytepos)                              \
 498   do                                                            \
 499     {                                                           \
 500       (charpos)++;                                              \
 501       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))    \
 502         (bytepos)++;                                            \
 503       else                                                      \
 504         INC_POS ((bytepos));                                    \
 505     }                                                           \
 506   while (false)
 507
 508
 509 /* Decrement both CHARPOS and BYTEPOS, each in the appropriate way.  */
 510
 511 #define DEC_BOTH(charpos, bytepos)                              \
 512   do                                                            \
 513     {                                                           \
 514       (charpos)--;                                              \
 515       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))    \
 516         (bytepos)--;                                            \
 517       else                                                      \
 518         DEC_POS ((bytepos));                                    \
 519     }                                                           \
 520   while (false)
 521
 522
 523 /* Increment the buffer byte position POS_BYTE of the current buffer to
 524    the next character boundary.  This macro relies on the fact that
 525    *GPT_ADDR and *Z_ADDR are always accessible and the values are
 526    '\0'.  No range checking of POS_BYTE.  */
 527
 528 #define BUF_INC_POS(buf, pos_byte)                              \
 529   do {                                                          \
 530     unsigned char *chp = BUF_BYTE_ADDRESS (buf, pos_byte);      \
 531     pos_byte += BYTES_BY_CHAR_HEAD (*chp);                      \
 532   } while (false)
 533
 534
 535 /* Decrement the buffer byte position POS_BYTE of the current buffer to
 536    the previous character boundary.  No range checking of POS_BYTE.  */
 537
 538 #define BUF_DEC_POS(buf, pos_byte)                                      \
 539   do {                                                                  \
 540     unsigned char *chp;                                                 \
 541     pos_byte--;                                                         \
 542     if (pos_byte < BUF_GPT_BYTE (buf))                                  \
 543       chp = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE;                   \
 544     else                                                                \
 545       chp = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
 546     while (!CHAR_HEAD_P (*chp))                                         \
 547       {                                                                 \
 548         chp--;                                                          \
 549         pos_byte--;                                                     \
 550       }                                                                 \
 551   } while (false)
 552
 553
 554 /* Return a non-outlandish value for the tab width.  */
 555
 556 #define SANE_TAB_WIDTH(buf) \
 557   sanitize_tab_width (XFASTINT (BVAR (buf, tab_width)))
 558 INLINE int
 559 sanitize_tab_width (EMACS_INT width)
 560 {
 561   return 0 < width && width <= 1000 ? width : 8;
 562 }
 563
 564 /* Return the width of ASCII character C.  The width is measured by
 565    how many columns C will occupy on the screen when displayed in the
 566    current buffer.  */
 567
 568 #define ASCII_CHAR_WIDTH(c)                                             \
 569   (c < 0x20                                                             \
 570    ? (c == '\t'                                                         \
 571       ? SANE_TAB_WIDTH (current_buffer)                                 \
 572       : (c == '\n' ? 0 : (NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2)))    \
 573    : (c < 0x7f                                                          \
 574       ? 1                                                               \
 575       : ((NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2))))
 576
 577 /* Return a non-outlandish value for a character width.  */
 578
 579 INLINE int
 580 sanitize_char_width (EMACS_INT width)
 581 {
 582   return 0 <= width && width <= 1000 ? width : 1000;
 583 }
 584
 585 /* Return the width of character C.  The width is measured by how many
 586    columns C will occupy on the screen when displayed in the current
 587    buffer.  */
 588
 589 #define CHAR_WIDTH(c)           \
 590   (ASCII_CHAR_P (c)             \
 591    ? ASCII_CHAR_WIDTH (c)       \
 592    : sanitize_char_width (XINT (CHAR_TABLE_REF (Vchar_width_table, c))))
 593
 594 /* If C is a variation selector, return the index of the
 595    variation selector (1..256).  Otherwise, return 0.  */
 596
 597 #define CHAR_VARIATION_SELECTOR_P(c)            \
 598   ((c) < 0xFE00 ? 0                             \
 599    : (c) <= 0xFE0F ? (c) - 0xFE00 + 1           \
 600    : (c) < 0xE0100 ? 0                          \
 601    : (c) <= 0xE01EF ? (c) - 0xE0100 + 17        \
 602    : 0)
 603
 604 /* If C is a high surrogate, return 1.  If C is a low surrogate,
 605    return 2.  Otherwise, return 0.  */
 606
 607 #define CHAR_SURROGATE_PAIR_P(c)        \
 608   ((c) < 0xD800 ? 0                     \
 609    : (c) <= 0xDBFF ? 1                  \
 610    : (c) <= 0xDFFF ? 2                  \
 611    : 0)
 612
 613 /* Data type for Unicode general category.
 614
 615    The order of members must be in sync with the 8th element of the
 616    member of unidata-prop-alist (in admin/unidata/unidata-gen.el) for
 617    Unicode character property `general-category'.  */
 618
 619 typedef enum {
 620   UNICODE_CATEGORY_UNKNOWN = 0,
 621   UNICODE_CATEGORY_Lu,
 622   UNICODE_CATEGORY_Ll,
 623   UNICODE_CATEGORY_Lt,
 624   UNICODE_CATEGORY_Lm,
 625   UNICODE_CATEGORY_Lo,
 626   UNICODE_CATEGORY_Mn,
 627   UNICODE_CATEGORY_Mc,
 628   UNICODE_CATEGORY_Me,
 629   UNICODE_CATEGORY_Nd,
 630   UNICODE_CATEGORY_Nl,
 631   UNICODE_CATEGORY_No,
 632   UNICODE_CATEGORY_Pc,
 633   UNICODE_CATEGORY_Pd,
 634   UNICODE_CATEGORY_Ps,
 635   UNICODE_CATEGORY_Pe,
 636   UNICODE_CATEGORY_Pi,
 637   UNICODE_CATEGORY_Pf,
 638   UNICODE_CATEGORY_Po,
 639   UNICODE_CATEGORY_Sm,
 640   UNICODE_CATEGORY_Sc,
 641   UNICODE_CATEGORY_Sk,
 642   UNICODE_CATEGORY_So,
 643   UNICODE_CATEGORY_Zs,
 644   UNICODE_CATEGORY_Zl,
 645   UNICODE_CATEGORY_Zp,
 646   UNICODE_CATEGORY_Cc,
 647   UNICODE_CATEGORY_Cf,
 648   UNICODE_CATEGORY_Cs,
 649   UNICODE_CATEGORY_Co,
 650   UNICODE_CATEGORY_Cn
 651 } unicode_category_t;
 652
 653 extern EMACS_INT char_resolve_modifier_mask (EMACS_INT) ATTRIBUTE_CONST;
 654 extern int char_string (unsigned, unsigned char *);
 655 extern int string_char (const unsigned char *,
 656                         const unsigned char **, int *);
 657
 658 extern int translate_char (Lisp_Object, int c);
 659 extern void parse_str_as_multibyte (const unsigned char *,
 660                                     ptrdiff_t, ptrdiff_t *, ptrdiff_t *);
 661 extern ptrdiff_t count_size_as_multibyte (const unsigned char *, ptrdiff_t);
 662 extern ptrdiff_t str_as_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t,
 663                                    ptrdiff_t *);
 664 extern ptrdiff_t str_to_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t);
 665 extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t);
 666 extern ptrdiff_t str_to_unibyte (const unsigned char *, unsigned char *,
 667                                  ptrdiff_t);
 668 extern ptrdiff_t strwidth (const char *, ptrdiff_t);
 669 extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int,
 670                                  ptrdiff_t *, ptrdiff_t *);
 671 extern ptrdiff_t lisp_string_width (Lisp_Object, ptrdiff_t,
 672                                     ptrdiff_t *, ptrdiff_t *);
 673
 674 extern Lisp_Object Qcharacterp;
 675 extern Lisp_Object Vchar_unify_table;
 676 extern Lisp_Object string_escape_byte8 (Lisp_Object);
 677
 678 /* Return a translation table of id number ID.  */
 679 #define GET_TRANSLATION_TABLE(id) \
 680   (XCDR (XVECTOR (Vtranslation_table_vector)->contents[(id)]))
 681
 682 INLINE_HEADER_END
 683
 684 #endif /* EMACS_CHARACTER_H */