src/character.h

   1 /* Header for multibyte character handler.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3      Licensed to the Free Software Foundation.
   4    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   5      National Institute of Advanced Industrial Science and Technology (AIST)
   6      Registration Number H13PRO009
   7
   8 This file is part of GNU Emacs.
   9
  10 GNU Emacs is free software: you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation, either version 3 of the License, or
  13 (at your option) any later version.
  14
  15 GNU Emacs is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  22
  23 #ifndef EMACS_CHARACTER_H
  24 #define EMACS_CHARACTER_H
  25
  26 #include <verify.h>
  27
  28 INLINE_HEADER_BEGIN
  29 #ifndef CHARACTER_INLINE
  30 # define CHARACTER_INLINE INLINE
  31 #endif
  32
  33 /* character code       1st byte   byte sequence
  34    --------------       --------   -------------
  35         0-7F            00..7F     0xxxxxxx
  36        80-7FF           C2..DF     110xxxxx 10xxxxxx
  37       800-FFFF          E0..EF     1110xxxx 10xxxxxx 10xxxxxx
  38     10000-1FFFFF        F0..F7     11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  39    200000-3FFF7F        F8         11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
  40    3FFF80-3FFFFF        C0..C1     1100000x 10xxxxxx (for eight-bit-char)
  41    400000-...           invalid
  42
  43    invalid 1st byte     80..BF     10xxxxxx
  44                         F9..FF     11111xxx (xxx != 000)
  45 */
  46
  47 /* Maximum character code ((1 << CHARACTERBITS) - 1).  */
  48 #define MAX_CHAR  0x3FFFFF
  49
  50 /* Maximum Unicode character code.  */
  51 #define MAX_UNICODE_CHAR 0x10FFFF
  52
  53 /* Maximum N-byte character codes.  */
  54 #define MAX_1_BYTE_CHAR 0x7F
  55 #define MAX_2_BYTE_CHAR 0x7FF
  56 #define MAX_3_BYTE_CHAR 0xFFFF
  57 #define MAX_4_BYTE_CHAR 0x1FFFFF
  58 #define MAX_5_BYTE_CHAR 0x3FFF7F
  59
  60 /* Minimum leading code of multibyte characters.  */
  61 #define MIN_MULTIBYTE_LEADING_CODE 0xC0
  62 /* Maximum leading code of multibyte characters.  */
  63 #define MAX_MULTIBYTE_LEADING_CODE 0xF8
  64
  65 /* Nonzero iff C is a character that corresponds to a raw 8-bit
  66    byte.  */
  67 #define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
  68
  69 /* Return the character code for raw 8-bit byte BYTE.  */
  70 #define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
  71
  72 #define UNIBYTE_TO_CHAR(byte) \
  73   (ASCII_BYTE_P (byte) ? (byte) : BYTE8_TO_CHAR (byte))
  74
  75 /* Return the raw 8-bit byte for character C.  */
  76 #define CHAR_TO_BYTE8(c)        \
  77   (CHAR_BYTE8_P (c)             \
  78    ? (c) - 0x3FFF00             \
  79    : multibyte_char_to_unibyte (c))
  80
  81 /* Return the raw 8-bit byte for character C,
  82    or -1 if C doesn't correspond to a byte.  */
  83 #define CHAR_TO_BYTE_SAFE(c)    \
  84   (CHAR_BYTE8_P (c)             \
  85    ? (c) - 0x3FFF00             \
  86    : multibyte_char_to_unibyte_safe (c))
  87
  88 /* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
  89    that corresponds to a raw 8-bit byte.  */
  90 #define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
  91
  92 /* If C is not ASCII, make it unibyte. */
  93 #define MAKE_CHAR_UNIBYTE(c)    \
  94   do {                          \
  95     if (! ASCII_CHAR_P (c))     \
  96       c = CHAR_TO_BYTE8 (c);    \
  97   } while (0)
  98
  99
 100 /* If C is not ASCII, make it multibyte.  Assumes C < 256.  */
 101 #define MAKE_CHAR_MULTIBYTE(c) \
 102   (eassert ((c) >= 0 && (c) < 256), (c) = UNIBYTE_TO_CHAR (c))
 103
 104 /* This is the maximum byte length of multibyte form.  */
 105 #define MAX_MULTIBYTE_LENGTH 5
 106
 107 /* Return a Lisp character whose character code is C.  Assumes C is
 108    a valid character code.  */
 109 #define make_char(c) make_number (c)
 110
 111 /* Nonzero iff C is an ASCII byte.  */
 112 #define ASCII_BYTE_P(c) UNSIGNED_CMP (c, <, 0x80)
 113
 114 /* Nonzero iff X is a character.  */
 115 #define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
 116
 117 /* Nonzero iff C is valid as a character code.  */
 118 #define CHAR_VALID_P(c) UNSIGNED_CMP (c, <=, MAX_CHAR)
 119
 120 /* Check if Lisp object X is a character or not.  */
 121 #define CHECK_CHARACTER(x) \
 122   CHECK_TYPE (CHARACTERP (x), Qcharacterp, x)
 123
 124 #define CHECK_CHARACTER_CAR(x) \
 125   do {                                  \
 126     Lisp_Object tmp = XCAR (x);         \
 127     CHECK_CHARACTER (tmp);              \
 128     XSETCAR ((x), tmp);                 \
 129   } while (0)
 130
 131 #define CHECK_CHARACTER_CDR(x) \
 132   do {                                  \
 133     Lisp_Object tmp = XCDR (x);         \
 134     CHECK_CHARACTER (tmp);              \
 135     XSETCDR ((x), tmp);                 \
 136   } while (0)
 137
 138 /* Nonzero iff C is a character of code less than 0x100.  */
 139 #define SINGLE_BYTE_CHAR_P(c) UNSIGNED_CMP (c, <, 0x100)
 140
 141 /* Nonzero if character C has a printable glyph.  */
 142 #define CHAR_PRINTABLE_P(c)     \
 143   (((c) >= 32 && (c) < 127)     \
 144    || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c))))
 145
 146 /* Return byte length of multibyte form for character C.  */
 147 #define CHAR_BYTES(c)                   \
 148   ( (c) <= MAX_1_BYTE_CHAR ? 1          \
 149     : (c) <= MAX_2_BYTE_CHAR ? 2        \
 150     : (c) <= MAX_3_BYTE_CHAR ? 3        \
 151     : (c) <= MAX_4_BYTE_CHAR ? 4        \
 152     : (c) <= MAX_5_BYTE_CHAR ? 5        \
 153     : 2)
 154
 155
 156 /* Return the leading code of multibyte form of C.  */
 157 #define CHAR_LEADING_CODE(c)                            \
 158   ((c) <= MAX_1_BYTE_CHAR ? c                           \
 159    : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6))       \
 160    : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12))      \
 161    : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18))      \
 162    : (c) <= MAX_5_BYTE_CHAR ? 0xF8                      \
 163    : (0xC0 | (((c) >> 6) & 0x01)))
 164
 165
 166 /* Store multibyte form of the character C in P.  The caller should
 167    allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
 168    Returns the length of the multibyte form.  */
 169
 170 #define CHAR_STRING(c, p)                       \
 171   (UNSIGNED_CMP (c, <=, MAX_1_BYTE_CHAR)        \
 172    ? ((p)[0] = (c),                             \
 173       1)                                        \
 174    : UNSIGNED_CMP (c, <=, MAX_2_BYTE_CHAR)      \
 175    ? ((p)[0] = (0xC0 | ((c) >> 6)),             \
 176       (p)[1] = (0x80 | ((c) & 0x3F)),           \
 177       2)                                        \
 178    : UNSIGNED_CMP (c, <=, MAX_3_BYTE_CHAR)      \
 179    ? ((p)[0] = (0xE0 | ((c) >> 12)),            \
 180       (p)[1] = (0x80 | (((c) >> 6) & 0x3F)),    \
 181       (p)[2] = (0x80 | ((c) & 0x3F)),           \
 182       3)                                        \
 183    : verify_expr (sizeof (c) <= sizeof (unsigned), char_string (c, p)))
 184
 185 /* Store multibyte form of byte B in P.  The caller should allocate at
 186    least MAX_MULTIBYTE_LENGTH bytes area at P in advance.  Returns the
 187    length of the multibyte form.  */
 188
 189 #define BYTE8_STRING(b, p)                      \
 190   ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)),       \
 191    (p)[1] = (0x80 | ((b) & 0x3F)),              \
 192    2)
 193
 194
 195 /* Store multibyte form of the character C in P and advance P to the
 196    end of the multibyte form.  The caller should allocate at least
 197    MAX_MULTIBYTE_LENGTH bytes area at P in advance.  */
 198
 199 #define CHAR_STRING_ADVANCE(c, p)               \
 200   do {                                          \
 201     if ((c) <= MAX_1_BYTE_CHAR)                 \
 202       *(p)++ = (c);                             \
 203     else if ((c) <= MAX_2_BYTE_CHAR)            \
 204       *(p)++ = (0xC0 | ((c) >> 6)),             \
 205         *(p)++ = (0x80 | ((c) & 0x3F));         \
 206     else if ((c) <= MAX_3_BYTE_CHAR)            \
 207       *(p)++ = (0xE0 | ((c) >> 12)),            \
 208         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
 209         *(p)++ = (0x80 | ((c) & 0x3F));         \
 210     else                                        \
 211       {                                         \
 212         verify (sizeof (c) <= sizeof (unsigned));       \
 213         (p) += char_string (c, p);              \
 214       }                                         \
 215   } while (0)
 216
 217
 218 /* Nonzero iff BYTE starts a non-ASCII character in a multibyte
 219    form.  */
 220 #define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
 221
 222 /* Nonzero iff BYTE is a trailing code of a non-ASCII character in a
 223    multibyte form.  */
 224 #define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80)
 225
 226 /* Nonzero iff BYTE starts a character in a multibyte form.
 227    This is equivalent to:
 228         (ASCII_BYTE_P (byte) || LEADING_CODE_P (byte))  */
 229 #define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
 230
 231 /* How many bytes a character that starts with BYTE occupies in a
 232    multibyte form.  */
 233 #define BYTES_BY_CHAR_HEAD(byte)        \
 234   (!((byte) & 0x80) ? 1                 \
 235    : !((byte) & 0x20) ? 2               \
 236    : !((byte) & 0x10) ? 3               \
 237    : !((byte) & 0x08) ? 4               \
 238    : 5)
 239
 240
 241 /* The byte length of multibyte form at unibyte string P ending at
 242    PEND.  If STR doesn't point to a valid multibyte form, return 0.  */
 243
 244 #define MULTIBYTE_LENGTH(p, pend)                               \
 245   (p >= pend ? 0                                                \
 246    : !((p)[0] & 0x80) ? 1                                       \
 247    : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0         \
 248    : ((p)[0] & 0xE0) == 0xC0 ? 2                                \
 249    : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0         \
 250    : ((p)[0] & 0xF0) == 0xE0 ? 3                                \
 251    : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0         \
 252    : ((p)[0] & 0xF8) == 0xF0 ? 4                                \
 253    : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0         \
 254    : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5              \
 255    : 0)
 256
 257
 258 /* Like MULTIBYTE_LENGTH, but don't check the ending address.  */
 259
 260 #define MULTIBYTE_LENGTH_NO_CHECK(p)                    \
 261   (!((p)[0] & 0x80) ? 1                                 \
 262    : ((p)[1] & 0xC0) != 0x80 ? 0                        \
 263    : ((p)[0] & 0xE0) == 0xC0 ? 2                        \
 264    : ((p)[2] & 0xC0) != 0x80 ? 0                        \
 265    : ((p)[0] & 0xF0) == 0xE0 ? 3                        \
 266    : ((p)[3] & 0xC0) != 0x80 ? 0                        \
 267    : ((p)[0] & 0xF8) == 0xF0 ? 4                        \
 268    : ((p)[4] & 0xC0) != 0x80 ? 0                        \
 269    : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5      \
 270    : 0)
 271
 272 /* If P is before LIMIT, advance P to the next character boundary.
 273    Assumes that P is already at a character boundary of the same
 274    multibyte form whose end address is LIMIT.  */
 275
 276 #define NEXT_CHAR_BOUNDARY(p, limit)    \
 277   do {                                  \
 278     if ((p) < (limit))                  \
 279       (p) += BYTES_BY_CHAR_HEAD (*(p)); \
 280   } while (0)
 281
 282
 283 /* If P is after LIMIT, advance P to the previous character boundary.
 284    Assumes that P is already at a character boundary of the same
 285    multibyte form whose beginning address is LIMIT.  */
 286
 287 #define PREV_CHAR_BOUNDARY(p, limit)                                    \
 288   do {                                                                  \
 289     if ((p) > (limit))                                                  \
 290       {                                                                 \
 291         const unsigned char *chp = (p);                                 \
 292         do {                                                            \
 293           chp--;                                                        \
 294         } while (chp >= limit && ! CHAR_HEAD_P (*chp));                 \
 295         (p) = (BYTES_BY_CHAR_HEAD (*chp) == (p) - chp) ? chp : (p) - 1; \
 296       }                                                                 \
 297   } while (0)
 298
 299 /* Return the character code of character whose multibyte form is at
 300    P.  Note that this macro unifies CJK characters whose codepoints
 301    are in the Private Use Areas (PUAs), so it might return a different
 302    codepoint from the one actually stored at P.  */
 303
 304 #define STRING_CHAR(p)                                          \
 305   (!((p)[0] & 0x80)                                             \
 306    ? (p)[0]                                                     \
 307    : ! ((p)[0] & 0x20)                                          \
 308    ? (((((p)[0] & 0x1F) << 6)                                   \
 309        | ((p)[1] & 0x3F))                                       \
 310       + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))       \
 311    : ! ((p)[0] & 0x10)                                          \
 312    ? ((((p)[0] & 0x0F) << 12)                                   \
 313       | (((p)[1] & 0x3F) << 6)                                  \
 314       | ((p)[2] & 0x3F))                                        \
 315    : string_char ((p), NULL, NULL))
 316
 317
 318 /* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
 319    form.
 320
 321    Note: This macro returns the actual length of the character's
 322    multibyte sequence as it is stored in a buffer or string.  The
 323    character it returns might have a different codepoint that has a
 324    different multibyte sequence of a different length, due to possible
 325    unification of CJK characters inside string_char.  Therefore do NOT
 326    assume that the length returned by this macro is identical to the
 327    length of the multibyte sequence of the character it returns.  */
 328
 329 #define STRING_CHAR_AND_LENGTH(p, actual_len)                   \
 330   (!((p)[0] & 0x80)                                             \
 331    ? ((actual_len) = 1, (p)[0])                                 \
 332    : ! ((p)[0] & 0x20)                                          \
 333    ? ((actual_len) = 2,                                         \
 334       (((((p)[0] & 0x1F) << 6)                                  \
 335         | ((p)[1] & 0x3F))                                      \
 336        + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)))     \
 337    : ! ((p)[0] & 0x10)                                          \
 338    ? ((actual_len) = 3,                                         \
 339       ((((p)[0] & 0x0F) << 12)                                  \
 340        | (((p)[1] & 0x3F) << 6)                                 \
 341        | ((p)[2] & 0x3F)))                                      \
 342    : string_char ((p), NULL, &actual_len))
 343
 344
 345 /* Like STRING_CHAR, but advance P to the end of multibyte form.  */
 346
 347 #define STRING_CHAR_ADVANCE(p)                                  \
 348   (!((p)[0] & 0x80)                                             \
 349    ? *(p)++                                                     \
 350    : ! ((p)[0] & 0x20)                                          \
 351    ? ((p) += 2,                                                 \
 352       ((((p)[-2] & 0x1F) << 6)                                  \
 353        | ((p)[-1] & 0x3F)                                       \
 354        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
 355    : ! ((p)[0] & 0x10)                                          \
 356    ? ((p) += 3,                                                 \
 357       ((((p)[-3] & 0x0F) << 12)                                 \
 358        | (((p)[-2] & 0x3F) << 6)                                \
 359        | ((p)[-1] & 0x3F)))                                     \
 360    : string_char ((p), &(p), NULL))
 361
 362
 363 /* Fetch the "next" character from Lisp string STRING at byte position
 364    BYTEIDX, character position CHARIDX.  Store it into OUTPUT.
 365
 366    All the args must be side-effect-free.
 367    BYTEIDX and CHARIDX must be lvalues;
 368    we increment them past the character fetched.  */
 369
 370 #define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX)     \
 371   do                                                                    \
 372     {                                                                   \
 373       CHARIDX++;                                                        \
 374       if (STRING_MULTIBYTE (STRING))                                    \
 375         {                                                               \
 376           unsigned char *chp = &SDATA (STRING)[BYTEIDX];                \
 377           int chlen;                                                    \
 378                                                                         \
 379           OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);                 \
 380           BYTEIDX += chlen;                                             \
 381         }                                                               \
 382       else                                                              \
 383         {                                                               \
 384           OUTPUT = SREF (STRING, BYTEIDX);                              \
 385           BYTEIDX++;                                                    \
 386         }                                                               \
 387     }                                                                   \
 388   while (0)
 389
 390 /* Like FETCH_STRING_CHAR_ADVANCE, but return a multibyte character
 391    even if STRING is unibyte.  */
 392
 393 #define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
 394   do                                                                          \
 395     {                                                                         \
 396       CHARIDX++;                                                              \
 397       if (STRING_MULTIBYTE (STRING))                                          \
 398         {                                                                     \
 399           unsigned char *chp = &SDATA (STRING)[BYTEIDX];                      \
 400           int chlen;                                                          \
 401                                                                               \
 402           OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);                       \
 403           BYTEIDX += chlen;                                                   \
 404         }                                                                     \
 405       else                                                                    \
 406         {                                                                     \
 407           OUTPUT = SREF (STRING, BYTEIDX);                                    \
 408           BYTEIDX++;                                                          \
 409           MAKE_CHAR_MULTIBYTE (OUTPUT);                                       \
 410         }                                                                     \
 411     }                                                                         \
 412   while (0)
 413
 414
 415 /* Like FETCH_STRING_CHAR_ADVANCE, but assumes STRING is multibyte.  */
 416
 417 #define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
 418   do                                                                         \
 419     {                                                                        \
 420       unsigned char *fetch_ptr = &SDATA (STRING)[BYTEIDX];                   \
 421       int fetch_len;                                                         \
 422                                                                              \
 423       OUTPUT = STRING_CHAR_AND_LENGTH (fetch_ptr, fetch_len);                \
 424       BYTEIDX += fetch_len;                                                  \
 425       CHARIDX++;                                                             \
 426     }                                                                        \
 427   while (0)
 428
 429
 430 /* Like FETCH_STRING_CHAR_ADVANCE, but fetch character from the current
 431    buffer.  */
 432
 433 #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX)            \
 434   do                                                            \
 435     {                                                           \
 436       CHARIDX++;                                                \
 437       if (!NILP (BVAR (current_buffer, enable_multibyte_characters)))   \
 438         {                                                       \
 439           unsigned char *chp = BYTE_POS_ADDR (BYTEIDX);         \
 440           int chlen;                                            \
 441                                                                 \
 442           OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);         \
 443           BYTEIDX += chlen;                                     \
 444         }                                                       \
 445       else                                                      \
 446         {                                                       \
 447           OUTPUT = *(BYTE_POS_ADDR (BYTEIDX));                  \
 448           BYTEIDX++;                                            \
 449         }                                                       \
 450     }                                                           \
 451   while (0)
 452
 453
 454 /* Like FETCH_CHAR_ADVANCE, but assumes the current buffer is multibyte.  */
 455
 456 #define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX)   \
 457   do                                                            \
 458     {                                                           \
 459       unsigned char *chp = BYTE_POS_ADDR (BYTEIDX);             \
 460       int chlen;                                                        \
 461                                                                 \
 462       OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);             \
 463       BYTEIDX += chlen;                                         \
 464       CHARIDX++;                                                \
 465     }                                                           \
 466   while (0)
 467
 468
 469 /* Increment the buffer byte position POS_BYTE of the current buffer to
 470    the next character boundary.  No range checking of POS.  */
 471
 472 #define INC_POS(pos_byte)                               \
 473   do {                                                  \
 474     unsigned char *chp = BYTE_POS_ADDR (pos_byte);      \
 475     pos_byte += BYTES_BY_CHAR_HEAD (*chp);              \
 476   } while (0)
 477
 478
 479 /* Decrement the buffer byte position POS_BYTE of the current buffer to
 480    the previous character boundary.  No range checking of POS.  */
 481
 482 #define DEC_POS(pos_byte)                       \
 483   do {                                          \
 484     unsigned char *chp;                         \
 485                                                 \
 486     pos_byte--;                                 \
 487     if (pos_byte < GPT_BYTE)                    \
 488       chp = BEG_ADDR + pos_byte - BEG_BYTE;     \
 489     else                                        \
 490       chp = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE; \
 491     while (!CHAR_HEAD_P (*chp))                 \
 492       {                                         \
 493         chp--;                                  \
 494         pos_byte--;                             \
 495       }                                         \
 496   } while (0)
 497
 498 /* Increment both CHARPOS and BYTEPOS, each in the appropriate way.  */
 499
 500 #define INC_BOTH(charpos, bytepos)                              \
 501   do                                                            \
 502     {                                                           \
 503       (charpos)++;                                              \
 504       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))    \
 505         (bytepos)++;                                            \
 506       else                                                      \
 507         INC_POS ((bytepos));                                    \
 508     }                                                           \
 509   while (0)
 510
 511
 512 /* Decrement both CHARPOS and BYTEPOS, each in the appropriate way.  */
 513
 514 #define DEC_BOTH(charpos, bytepos)                              \
 515   do                                                            \
 516     {                                                           \
 517       (charpos)--;                                              \
 518       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))    \
 519         (bytepos)--;                                            \
 520       else                                                      \
 521         DEC_POS ((bytepos));                                    \
 522     }                                                           \
 523   while (0)
 524
 525
 526 /* Increment the buffer byte position POS_BYTE of the current buffer to
 527    the next character boundary.  This macro relies on the fact that
 528    *GPT_ADDR and *Z_ADDR are always accessible and the values are
 529    '\0'.  No range checking of POS_BYTE.  */
 530
 531 #define BUF_INC_POS(buf, pos_byte)                              \
 532   do {                                                          \
 533     unsigned char *chp = BUF_BYTE_ADDRESS (buf, pos_byte);      \
 534     pos_byte += BYTES_BY_CHAR_HEAD (*chp);                      \
 535   } while (0)
 536
 537
 538 /* Decrement the buffer byte position POS_BYTE of the current buffer to
 539    the previous character boundary.  No range checking of POS_BYTE.  */
 540
 541 #define BUF_DEC_POS(buf, pos_byte)                                      \
 542   do {                                                                  \
 543     unsigned char *chp;                                                 \
 544     pos_byte--;                                                         \
 545     if (pos_byte < BUF_GPT_BYTE (buf))                                  \
 546       chp = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE;                   \
 547     else                                                                \
 548       chp = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
 549     while (!CHAR_HEAD_P (*chp))                                         \
 550       {                                                                 \
 551         chp--;                                                          \
 552         pos_byte--;                                                     \
 553       }                                                                 \
 554   } while (0)
 555
 556
 557 /* Return a non-outlandish value for the tab width.  */
 558
 559 #define SANE_TAB_WIDTH(buf) \
 560   sanitize_tab_width (XFASTINT (BVAR (buf, tab_width)))
 561 CHARACTER_INLINE int
 562 sanitize_tab_width (EMACS_INT width)
 563 {
 564   return 0 < width && width <= 1000 ? width : 8;
 565 }
 566
 567 /* Return the width of ASCII character C.  The width is measured by
 568    how many columns C will occupy on the screen when displayed in the
 569    current buffer.  */
 570
 571 #define ASCII_CHAR_WIDTH(c)                                             \
 572   (c < 0x20                                                             \
 573    ? (c == '\t'                                                         \
 574       ? SANE_TAB_WIDTH (current_buffer)                                 \
 575       : (c == '\n' ? 0 : (NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2)))    \
 576    : (c < 0x7f                                                          \
 577       ? 1                                                               \
 578       : ((NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2))))
 579
 580 /* Return a non-outlandish value for a character width.  */
 581
 582 CHARACTER_INLINE int
 583 sanitize_char_width (EMACS_INT width)
 584 {
 585   return 0 <= width && width <= 1000 ? width : 1000;
 586 }
 587
 588 /* Return the width of character C.  The width is measured by how many
 589    columns C will occupy on the screen when displayed in the current
 590    buffer.  */
 591
 592 #define CHAR_WIDTH(c)           \
 593   (ASCII_CHAR_P (c)             \
 594    ? ASCII_CHAR_WIDTH (c)       \
 595    : sanitize_char_width (XINT (CHAR_TABLE_REF (Vchar_width_table, c))))
 596
 597 /* If C is a variation selector, return the index of the
 598    variation selector (1..256).  Otherwise, return 0.  */
 599
 600 #define CHAR_VARIATION_SELECTOR_P(c)            \
 601   ((c) < 0xFE00 ? 0                             \
 602    : (c) <= 0xFE0F ? (c) - 0xFE00 + 1           \
 603    : (c) < 0xE0100 ? 0                          \
 604    : (c) <= 0xE01EF ? (c) - 0xE0100 + 17        \
 605    : 0)
 606
 607 /* If C is a high surrogate, return 1.  If C is a low surrogate,
 608    return 0.  Otherwise, return 0.  */
 609
 610 #define CHAR_SURROGATE_PAIR_P(c)        \
 611   ((c) < 0xD800 ? 0                     \
 612    : (c) <= 0xDBFF ? 1                  \
 613    : (c) <= 0xDFFF ? 2                  \
 614    : 0)
 615
 616 /* Data type for Unicode general category.
 617
 618    The order of members must be in sync with the 8th element of the
 619    member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for
 620    Unicode character property `general-category'.  */
 621
 622 typedef enum {
 623   UNICODE_CATEGORY_UNKNOWN = 0,
 624   UNICODE_CATEGORY_Lu,
 625   UNICODE_CATEGORY_Ll,
 626   UNICODE_CATEGORY_Lt,
 627   UNICODE_CATEGORY_Lm,
 628   UNICODE_CATEGORY_Lo,
 629   UNICODE_CATEGORY_Mn,
 630   UNICODE_CATEGORY_Mc,
 631   UNICODE_CATEGORY_Me,
 632   UNICODE_CATEGORY_Nd,
 633   UNICODE_CATEGORY_Nl,
 634   UNICODE_CATEGORY_No,
 635   UNICODE_CATEGORY_Pc,
 636   UNICODE_CATEGORY_Pd,
 637   UNICODE_CATEGORY_Ps,
 638   UNICODE_CATEGORY_Pe,
 639   UNICODE_CATEGORY_Pi,
 640   UNICODE_CATEGORY_Pf,
 641   UNICODE_CATEGORY_Po,
 642   UNICODE_CATEGORY_Sm,
 643   UNICODE_CATEGORY_Sc,
 644   UNICODE_CATEGORY_Sk,
 645   UNICODE_CATEGORY_So,
 646   UNICODE_CATEGORY_Zs,
 647   UNICODE_CATEGORY_Zl,
 648   UNICODE_CATEGORY_Zp,
 649   UNICODE_CATEGORY_Cc,
 650   UNICODE_CATEGORY_Cf,
 651   UNICODE_CATEGORY_Cs,
 652   UNICODE_CATEGORY_Co,
 653   UNICODE_CATEGORY_Cn
 654 } unicode_category_t;
 655
 656 extern EMACS_INT char_resolve_modifier_mask (EMACS_INT) ATTRIBUTE_CONST;
 657 extern int char_string (unsigned, unsigned char *);
 658 extern int string_char (const unsigned char *,
 659                         const unsigned char **, int *);
 660
 661 extern int translate_char (Lisp_Object, int c);
 662 extern void parse_str_as_multibyte (const unsigned char *,
 663                                     ptrdiff_t, ptrdiff_t *, ptrdiff_t *);
 664 extern ptrdiff_t count_size_as_multibyte (const unsigned char *, ptrdiff_t);
 665 extern ptrdiff_t str_as_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t,
 666                                    ptrdiff_t *);
 667 extern ptrdiff_t str_to_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t);
 668 extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t);
 669 extern ptrdiff_t str_to_unibyte (const unsigned char *, unsigned char *,
 670                                  ptrdiff_t);
 671 extern ptrdiff_t strwidth (const char *, ptrdiff_t);
 672 extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int,
 673                                  ptrdiff_t *, ptrdiff_t *);
 674 extern ptrdiff_t lisp_string_width (Lisp_Object, ptrdiff_t,
 675                                     ptrdiff_t *, ptrdiff_t *);
 676
 677 extern Lisp_Object Qcharacterp;
 678 extern Lisp_Object Vchar_unify_table;
 679 extern Lisp_Object string_escape_byte8 (Lisp_Object);
 680
 681 /* Return a translation table of id number ID.  */
 682 #define GET_TRANSLATION_TABLE(id) \
 683   (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
 684
 685 INLINE_HEADER_END
 686
 687 #endif /* EMACS_CHARACTER_H */