src/character.h

   1 /* Header for multibyte character handler.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3      Licensed to the Free Software Foundation.
   4    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   5      National Institute of Advanced Industrial Science and Technology (AIST)
   6      Registration Number H13PRO009
   7
   8 This file is part of GNU Emacs.
   9
  10 GNU Emacs is free software: you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation, either version 3 of the License, or
  13 (at your option) any later version.
  14
  15 GNU Emacs is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  22
  23 #ifndef EMACS_CHARACTER_H
  24 #define EMACS_CHARACTER_H
  25
  26 #include <verify.h>
  27
  28 INLINE_HEADER_BEGIN
  29
  30 /* character code       1st byte   byte sequence
  31    --------------       --------   -------------
  32         0-7F            00..7F     0xxxxxxx
  33        80-7FF           C2..DF     110xxxxx 10xxxxxx
  34       800-FFFF          E0..EF     1110xxxx 10xxxxxx 10xxxxxx
  35     10000-1FFFFF        F0..F7     11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  36    200000-3FFF7F        F8         11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
  37    3FFF80-3FFFFF        C0..C1     1100000x 10xxxxxx (for eight-bit-char)
  38    400000-...           invalid
  39
  40    invalid 1st byte     80..BF     10xxxxxx
  41                         F9..FF     11111xxx (xxx != 000)
  42 */
  43
  44 /* Maximum character code ((1 << CHARACTERBITS) - 1).  */
  45 #define MAX_CHAR  0x3FFFFF
  46
  47 /* Maximum Unicode character code.  */
  48 #define MAX_UNICODE_CHAR 0x10FFFF
  49
  50 /* Maximum N-byte character codes.  */
  51 #define MAX_1_BYTE_CHAR 0x7F
  52 #define MAX_2_BYTE_CHAR 0x7FF
  53 #define MAX_3_BYTE_CHAR 0xFFFF
  54 #define MAX_4_BYTE_CHAR 0x1FFFFF
  55 #define MAX_5_BYTE_CHAR 0x3FFF7F
  56
  57 /* Minimum leading code of multibyte characters.  */
  58 #define MIN_MULTIBYTE_LEADING_CODE 0xC0
  59 /* Maximum leading code of multibyte characters.  */
  60 #define MAX_MULTIBYTE_LEADING_CODE 0xF8
  61
  62 /* Nonzero iff C is a character that corresponds to a raw 8-bit
  63    byte.  */
  64 #define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
  65
  66 /* Return the character code for raw 8-bit byte BYTE.  */
  67 #define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
  68
  69 #define UNIBYTE_TO_CHAR(byte) \
  70   (ASCII_CHAR_P (byte) ? (byte) : BYTE8_TO_CHAR (byte))
  71
  72 /* Return the raw 8-bit byte for character C.  */
  73 #define CHAR_TO_BYTE8(c) (CHAR_BYTE8_P (c) ? (c) - 0x3FFF00 : (c & 0xFF))
  74
  75 /* Return the raw 8-bit byte for character C,
  76    or -1 if C doesn't correspond to a byte.  */
  77 #define CHAR_TO_BYTE_SAFE(c)                                            \
  78   (ASCII_CHAR_P (c) ? c : (CHAR_BYTE8_P (c) ? (c) - 0x3FFF00 : -1))
  79
  80 /* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
  81    that corresponds to a raw 8-bit byte.  */
  82 #define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
  83
  84 /* If C is not ASCII, make it unibyte. */
  85 #define MAKE_CHAR_UNIBYTE(c)    \
  86   do {                          \
  87     if (! ASCII_CHAR_P (c))     \
  88       c = CHAR_TO_BYTE8 (c);    \
  89   } while (false)
  90
  91
  92 /* If C is not ASCII, make it multibyte.  Assumes C < 256.  */
  93 #define MAKE_CHAR_MULTIBYTE(c) \
  94   (eassert ((c) >= 0 && (c) < 256), (c) = UNIBYTE_TO_CHAR (c))
  95
  96 /* This is the maximum byte length of multibyte form.  */
  97 #define MAX_MULTIBYTE_LENGTH 5
  98
  99 /* Nonzero iff X is a character.  */
 100 #define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
 101
 102 /* Nonzero iff C is valid as a character code.  */
 103 #define CHAR_VALID_P(c) UNSIGNED_CMP (c, <=, MAX_CHAR)
 104
 105 /* Check if Lisp object X is a character or not.  */
 106 #define CHECK_CHARACTER(x) \
 107   CHECK_TYPE (CHARACTERP (x), Qcharacterp, x)
 108
 109 #define CHECK_CHARACTER_CAR(x) \
 110   do {                                  \
 111     Lisp_Object tmp = XCAR (x);         \
 112     CHECK_CHARACTER (tmp);              \
 113     XSETCAR ((x), tmp);                 \
 114   } while (false)
 115
 116 #define CHECK_CHARACTER_CDR(x) \
 117   do {                                  \
 118     Lisp_Object tmp = XCDR (x);         \
 119     CHECK_CHARACTER (tmp);              \
 120     XSETCDR ((x), tmp);                 \
 121   } while (false)
 122
 123 /* Nonzero iff C is a character of code less than 0x100.  */
 124 #define SINGLE_BYTE_CHAR_P(c) UNSIGNED_CMP (c, <, 0x100)
 125
 126 /* Nonzero if character C has a printable glyph.  */
 127 #define CHAR_PRINTABLE_P(c)     \
 128   (((c) >= 32 && (c) < 127)     \
 129    || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c))))
 130
 131 /* Return byte length of multibyte form for character C.  */
 132 #define CHAR_BYTES(c)                   \
 133   ( (c) <= MAX_1_BYTE_CHAR ? 1          \
 134     : (c) <= MAX_2_BYTE_CHAR ? 2        \
 135     : (c) <= MAX_3_BYTE_CHAR ? 3        \
 136     : (c) <= MAX_4_BYTE_CHAR ? 4        \
 137     : (c) <= MAX_5_BYTE_CHAR ? 5        \
 138     : 2)
 139
 140
 141 /* Return the leading code of multibyte form of C.  */
 142 #define CHAR_LEADING_CODE(c)                            \
 143   ((c) <= MAX_1_BYTE_CHAR ? c                           \
 144    : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6))       \
 145    : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12))      \
 146    : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18))      \
 147    : (c) <= MAX_5_BYTE_CHAR ? 0xF8                      \
 148    : (0xC0 | (((c) >> 6) & 0x01)))
 149
 150
 151 /* Store multibyte form of the character C in P.  The caller should
 152    allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
 153    Returns the length of the multibyte form.  */
 154
 155 #define CHAR_STRING(c, p)                       \
 156   (UNSIGNED_CMP (c, <=, MAX_1_BYTE_CHAR)        \
 157    ? ((p)[0] = (c),                             \
 158       1)                                        \
 159    : UNSIGNED_CMP (c, <=, MAX_2_BYTE_CHAR)      \
 160    ? ((p)[0] = (0xC0 | ((c) >> 6)),             \
 161       (p)[1] = (0x80 | ((c) & 0x3F)),           \
 162       2)                                        \
 163    : UNSIGNED_CMP (c, <=, MAX_3_BYTE_CHAR)      \
 164    ? ((p)[0] = (0xE0 | ((c) >> 12)),            \
 165       (p)[1] = (0x80 | (((c) >> 6) & 0x3F)),    \
 166       (p)[2] = (0x80 | ((c) & 0x3F)),           \
 167       3)                                        \
 168    : verify_expr (sizeof (c) <= sizeof (unsigned), char_string (c, p)))
 169
 170 /* Store multibyte form of byte B in P.  The caller should allocate at
 171    least MAX_MULTIBYTE_LENGTH bytes area at P in advance.  Returns the
 172    length of the multibyte form.  */
 173
 174 #define BYTE8_STRING(b, p)                      \
 175   ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)),       \
 176    (p)[1] = (0x80 | ((b) & 0x3F)),              \
 177    2)
 178
 179
 180 /* Store multibyte form of the character C in P and advance P to the
 181    end of the multibyte form.  The caller should allocate at least
 182    MAX_MULTIBYTE_LENGTH bytes area at P in advance.  */
 183
 184 #define CHAR_STRING_ADVANCE(c, p)               \
 185   do {                                          \
 186     if ((c) <= MAX_1_BYTE_CHAR)                 \
 187       *(p)++ = (c);                             \
 188     else if ((c) <= MAX_2_BYTE_CHAR)            \
 189       *(p)++ = (0xC0 | ((c) >> 6)),             \
 190         *(p)++ = (0x80 | ((c) & 0x3F));         \
 191     else if ((c) <= MAX_3_BYTE_CHAR)            \
 192       *(p)++ = (0xE0 | ((c) >> 12)),            \
 193         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
 194         *(p)++ = (0x80 | ((c) & 0x3F));         \
 195     else                                        \
 196       {                                         \
 197         verify (sizeof (c) <= sizeof (unsigned));       \
 198         (p) += char_string (c, p);              \
 199       }                                         \
 200   } while (false)
 201
 202
 203 /* Nonzero iff BYTE starts a non-ASCII character in a multibyte
 204    form.  */
 205 #define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
 206
 207 /* Nonzero iff BYTE is a trailing code of a non-ASCII character in a
 208    multibyte form.  */
 209 #define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80)
 210
 211 /* Nonzero iff BYTE starts a character in a multibyte form.
 212    This is equivalent to:
 213         (ASCII_CHAR_P (byte) || LEADING_CODE_P (byte))  */
 214 #define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
 215
 216 /* How many bytes a character that starts with BYTE occupies in a
 217    multibyte form.  */
 218 #define BYTES_BY_CHAR_HEAD(byte)        \
 219   (!((byte) & 0x80) ? 1                 \
 220    : !((byte) & 0x20) ? 2               \
 221    : !((byte) & 0x10) ? 3               \
 222    : !((byte) & 0x08) ? 4               \
 223    : 5)
 224
 225
 226 /* The byte length of multibyte form at unibyte string P ending at
 227    PEND.  If STR doesn't point to a valid multibyte form, return 0.  */
 228
 229 #define MULTIBYTE_LENGTH(p, pend)                               \
 230   (p >= pend ? 0                                                \
 231    : !((p)[0] & 0x80) ? 1                                       \
 232    : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0         \
 233    : ((p)[0] & 0xE0) == 0xC0 ? 2                                \
 234    : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0         \
 235    : ((p)[0] & 0xF0) == 0xE0 ? 3                                \
 236    : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0         \
 237    : ((p)[0] & 0xF8) == 0xF0 ? 4                                \
 238    : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0         \
 239    : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5              \
 240    : 0)
 241
 242
 243 /* Like MULTIBYTE_LENGTH, but don't check the ending address.  */
 244
 245 #define MULTIBYTE_LENGTH_NO_CHECK(p)                    \
 246   (!((p)[0] & 0x80) ? 1                                 \
 247    : ((p)[1] & 0xC0) != 0x80 ? 0                        \
 248    : ((p)[0] & 0xE0) == 0xC0 ? 2                        \
 249    : ((p)[2] & 0xC0) != 0x80 ? 0                        \
 250    : ((p)[0] & 0xF0) == 0xE0 ? 3                        \
 251    : ((p)[3] & 0xC0) != 0x80 ? 0                        \
 252    : ((p)[0] & 0xF8) == 0xF0 ? 4                        \
 253    : ((p)[4] & 0xC0) != 0x80 ? 0                        \
 254    : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5      \
 255    : 0)
 256
 257 /* If P is before LIMIT, advance P to the next character boundary.
 258    Assumes that P is already at a character boundary of the same
 259    multibyte form whose end address is LIMIT.  */
 260
 261 #define NEXT_CHAR_BOUNDARY(p, limit)    \
 262   do {                                  \
 263     if ((p) < (limit))                  \
 264       (p) += BYTES_BY_CHAR_HEAD (*(p)); \
 265   } while (false)
 266
 267
 268 /* If P is after LIMIT, advance P to the previous character boundary.
 269    Assumes that P is already at a character boundary of the same
 270    multibyte form whose beginning address is LIMIT.  */
 271
 272 #define PREV_CHAR_BOUNDARY(p, limit)                                    \
 273   do {                                                                  \
 274     if ((p) > (limit))                                                  \
 275       {                                                                 \
 276         const unsigned char *chp = (p);                                 \
 277         do {                                                            \
 278           chp--;                                                        \
 279         } while (chp >= limit && ! CHAR_HEAD_P (*chp));                 \
 280         (p) = (BYTES_BY_CHAR_HEAD (*chp) == (p) - chp) ? chp : (p) - 1; \
 281       }                                                                 \
 282   } while (false)
 283
 284 /* Return the character code of character whose multibyte form is at
 285    P.  Note that this macro unifies CJK characters whose codepoints
 286    are in the Private Use Areas (PUAs), so it might return a different
 287    codepoint from the one actually stored at P.  */
 288
 289 #define STRING_CHAR(p)                                          \
 290   (!((p)[0] & 0x80)                                             \
 291    ? (p)[0]                                                     \
 292    : ! ((p)[0] & 0x20)                                          \
 293    ? (((((p)[0] & 0x1F) << 6)                                   \
 294        | ((p)[1] & 0x3F))                                       \
 295       + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))       \
 296    : ! ((p)[0] & 0x10)                                          \
 297    ? ((((p)[0] & 0x0F) << 12)                                   \
 298       | (((p)[1] & 0x3F) << 6)                                  \
 299       | ((p)[2] & 0x3F))                                        \
 300    : string_char ((p), NULL, NULL))
 301
 302
 303 /* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
 304    form.
 305
 306    Note: This macro returns the actual length of the character's
 307    multibyte sequence as it is stored in a buffer or string.  The
 308    character it returns might have a different codepoint that has a
 309    different multibyte sequence of a different length, due to possible
 310    unification of CJK characters inside string_char.  Therefore do NOT
 311    assume that the length returned by this macro is identical to the
 312    length of the multibyte sequence of the character it returns.  */
 313
 314 #define STRING_CHAR_AND_LENGTH(p, actual_len)                   \
 315   (!((p)[0] & 0x80)                                             \
 316    ? ((actual_len) = 1, (p)[0])                                 \
 317    : ! ((p)[0] & 0x20)                                          \
 318    ? ((actual_len) = 2,                                         \
 319       (((((p)[0] & 0x1F) << 6)                                  \
 320         | ((p)[1] & 0x3F))                                      \
 321        + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)))     \
 322    : ! ((p)[0] & 0x10)                                          \
 323    ? ((actual_len) = 3,                                         \
 324       ((((p)[0] & 0x0F) << 12)                                  \
 325        | (((p)[1] & 0x3F) << 6)                                 \
 326        | ((p)[2] & 0x3F)))                                      \
 327    : string_char ((p), NULL, &actual_len))
 328
 329
 330 /* Like STRING_CHAR, but advance P to the end of multibyte form.  */
 331
 332 #define STRING_CHAR_ADVANCE(p)                                  \
 333   (!((p)[0] & 0x80)                                             \
 334    ? *(p)++                                                     \
 335    : ! ((p)[0] & 0x20)                                          \
 336    ? ((p) += 2,                                                 \
 337       ((((p)[-2] & 0x1F) << 6)                                  \
 338        | ((p)[-1] & 0x3F)                                       \
 339        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
 340    : ! ((p)[0] & 0x10)                                          \
 341    ? ((p) += 3,                                                 \
 342       ((((p)[-3] & 0x0F) << 12)                                 \
 343        | (((p)[-2] & 0x3F) << 6)                                \
 344        | ((p)[-1] & 0x3F)))                                     \
 345    : string_char ((p), &(p), NULL))
 346
 347
 348 /* Fetch the "next" character from Lisp string STRING at byte position
 349    BYTEIDX, character position CHARIDX.  Store it into OUTPUT.
 350
 351    All the args must be side-effect-free.
 352    BYTEIDX and CHARIDX must be lvalues;
 353    we increment them past the character fetched.  */
 354
 355 #define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX)     \
 356   do                                                                    \
 357     {                                                                   \
 358       CHARIDX++;                                                        \
 359       if (STRING_MULTIBYTE (STRING))                                    \
 360         {                                                               \
 361           unsigned char *chp = &SDATA (STRING)[BYTEIDX];                \
 362           int chlen;                                                    \
 363                                                                         \
 364           OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);                 \
 365           BYTEIDX += chlen;                                             \
 366         }                                                               \
 367       else                                                              \
 368         {                                                               \
 369           OUTPUT = SREF (STRING, BYTEIDX);                              \
 370           BYTEIDX++;                                                    \
 371         }                                                               \
 372     }                                                                   \
 373   while (false)
 374
 375 /* Like FETCH_STRING_CHAR_ADVANCE, but return a multibyte character
 376    even if STRING is unibyte.  */
 377
 378 #define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
 379   do                                                                          \
 380     {                                                                         \
 381       CHARIDX++;                                                              \
 382       if (STRING_MULTIBYTE (STRING))                                          \
 383         {                                                                     \
 384           unsigned char *chp = &SDATA (STRING)[BYTEIDX];                      \
 385           int chlen;                                                          \
 386                                                                               \
 387           OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);                       \
 388           BYTEIDX += chlen;                                                   \
 389         }                                                                     \
 390       else                                                                    \
 391         {                                                                     \
 392           OUTPUT = SREF (STRING, BYTEIDX);                                    \
 393           BYTEIDX++;                                                          \
 394           MAKE_CHAR_MULTIBYTE (OUTPUT);                                       \
 395         }                                                                     \
 396     }                                                                         \
 397   while (false)
 398
 399
 400 /* Like FETCH_STRING_CHAR_ADVANCE, but assumes STRING is multibyte.  */
 401
 402 #define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
 403   do                                                                         \
 404     {                                                                        \
 405       unsigned char *fetch_ptr = &SDATA (STRING)[BYTEIDX];                   \
 406       int fetch_len;                                                         \
 407                                                                              \
 408       OUTPUT = STRING_CHAR_AND_LENGTH (fetch_ptr, fetch_len);                \
 409       BYTEIDX += fetch_len;                                                  \
 410       CHARIDX++;                                                             \
 411     }                                                                        \
 412   while (false)
 413
 414
 415 /* Like FETCH_STRING_CHAR_ADVANCE, but fetch character from the current
 416    buffer.  */
 417
 418 #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX)            \
 419   do                                                            \
 420     {                                                           \
 421       CHARIDX++;                                                \
 422       if (!NILP (BVAR (current_buffer, enable_multibyte_characters)))   \
 423         {                                                       \
 424           unsigned char *chp = BYTE_POS_ADDR (BYTEIDX);         \
 425           int chlen;                                            \
 426                                                                 \
 427           OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);         \
 428           BYTEIDX += chlen;                                     \
 429         }                                                       \
 430       else                                                      \
 431         {                                                       \
 432           OUTPUT = *(BYTE_POS_ADDR (BYTEIDX));                  \
 433           BYTEIDX++;                                            \
 434         }                                                       \
 435     }                                                           \
 436   while (false)
 437
 438
 439 /* Like FETCH_CHAR_ADVANCE, but assumes the current buffer is multibyte.  */
 440
 441 #define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX)   \
 442   do                                                            \
 443     {                                                           \
 444       unsigned char *chp = BYTE_POS_ADDR (BYTEIDX);             \
 445       int chlen;                                                        \
 446                                                                 \
 447       OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);             \
 448       BYTEIDX += chlen;                                         \
 449       CHARIDX++;                                                \
 450     }                                                           \
 451   while (false)
 452
 453
 454 /* Increment the buffer byte position POS_BYTE of the current buffer to
 455    the next character boundary.  No range checking of POS.  */
 456
 457 #define INC_POS(pos_byte)                               \
 458   do {                                                  \
 459     unsigned char *chp = BYTE_POS_ADDR (pos_byte);      \
 460     pos_byte += BYTES_BY_CHAR_HEAD (*chp);              \
 461   } while (false)
 462
 463
 464 /* Decrement the buffer byte position POS_BYTE of the current buffer to
 465    the previous character boundary.  No range checking of POS.  */
 466
 467 #define DEC_POS(pos_byte)                       \
 468   do {                                          \
 469     unsigned char *chp;                         \
 470                                                 \
 471     pos_byte--;                                 \
 472     if (pos_byte < GPT_BYTE)                    \
 473       chp = BEG_ADDR + pos_byte - BEG_BYTE;     \
 474     else                                        \
 475       chp = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE; \
 476     while (!CHAR_HEAD_P (*chp))                 \
 477       {                                         \
 478         chp--;                                  \
 479         pos_byte--;                             \
 480       }                                         \
 481   } while (false)
 482
 483 /* Increment both CHARPOS and BYTEPOS, each in the appropriate way.  */
 484
 485 #define INC_BOTH(charpos, bytepos)                              \
 486   do                                                            \
 487     {                                                           \
 488       (charpos)++;                                              \
 489       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))    \
 490         (bytepos)++;                                            \
 491       else                                                      \
 492         INC_POS ((bytepos));                                    \
 493     }                                                           \
 494   while (false)
 495
 496
 497 /* Decrement both CHARPOS and BYTEPOS, each in the appropriate way.  */
 498
 499 #define DEC_BOTH(charpos, bytepos)                              \
 500   do                                                            \
 501     {                                                           \
 502       (charpos)--;                                              \
 503       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))    \
 504         (bytepos)--;                                            \
 505       else                                                      \
 506         DEC_POS ((bytepos));                                    \
 507     }                                                           \
 508   while (false)
 509
 510
 511 /* Increment the buffer byte position POS_BYTE of the current buffer to
 512    the next character boundary.  This macro relies on the fact that
 513    *GPT_ADDR and *Z_ADDR are always accessible and the values are
 514    '\0'.  No range checking of POS_BYTE.  */
 515
 516 #define BUF_INC_POS(buf, pos_byte)                              \
 517   do {                                                          \
 518     unsigned char *chp = BUF_BYTE_ADDRESS (buf, pos_byte);      \
 519     pos_byte += BYTES_BY_CHAR_HEAD (*chp);                      \
 520   } while (false)
 521
 522
 523 /* Decrement the buffer byte position POS_BYTE of the current buffer to
 524    the previous character boundary.  No range checking of POS_BYTE.  */
 525
 526 #define BUF_DEC_POS(buf, pos_byte)                                      \
 527   do {                                                                  \
 528     unsigned char *chp;                                                 \
 529     pos_byte--;                                                         \
 530     if (pos_byte < BUF_GPT_BYTE (buf))                                  \
 531       chp = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE;                   \
 532     else                                                                \
 533       chp = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
 534     while (!CHAR_HEAD_P (*chp))                                         \
 535       {                                                                 \
 536         chp--;                                                          \
 537         pos_byte--;                                                     \
 538       }                                                                 \
 539   } while (false)
 540
 541
 542 /* Return a non-outlandish value for the tab width.  */
 543
 544 #define SANE_TAB_WIDTH(buf) \
 545   sanitize_tab_width (XFASTINT (BVAR (buf, tab_width)))
 546 INLINE int
 547 sanitize_tab_width (EMACS_INT width)
 548 {
 549   return 0 < width && width <= 1000 ? width : 8;
 550 }
 551
 552 /* Return the width of ASCII character C.  The width is measured by
 553    how many columns C will occupy on the screen when displayed in the
 554    current buffer.  */
 555
 556 #define ASCII_CHAR_WIDTH(c)                                             \
 557   (c < 0x20                                                             \
 558    ? (c == '\t'                                                         \
 559       ? SANE_TAB_WIDTH (current_buffer)                                 \
 560       : (c == '\n' ? 0 : (NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2)))    \
 561    : (c < 0x7f                                                          \
 562       ? 1                                                               \
 563       : ((NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2))))
 564
 565 /* Return a non-outlandish value for a character width.  */
 566
 567 INLINE int
 568 sanitize_char_width (EMACS_INT width)
 569 {
 570   return 0 <= width && width <= 1000 ? width : 1000;
 571 }
 572
 573 /* Return the width of character C.  The width is measured by how many
 574    columns C will occupy on the screen when displayed in the current
 575    buffer.  */
 576
 577 #define CHAR_WIDTH(c)           \
 578   (ASCII_CHAR_P (c)             \
 579    ? ASCII_CHAR_WIDTH (c)       \
 580    : sanitize_char_width (XINT (CHAR_TABLE_REF (Vchar_width_table, c))))
 581
 582 /* If C is a variation selector, return the index of the
 583    variation selector (1..256).  Otherwise, return 0.  */
 584
 585 #define CHAR_VARIATION_SELECTOR_P(c)            \
 586   ((c) < 0xFE00 ? 0                             \
 587    : (c) <= 0xFE0F ? (c) - 0xFE00 + 1           \
 588    : (c) < 0xE0100 ? 0                          \
 589    : (c) <= 0xE01EF ? (c) - 0xE0100 + 17        \
 590    : 0)
 591
 592 /* If C is a high surrogate, return 1.  If C is a low surrogate,
 593    return 2.  Otherwise, return 0.  */
 594
 595 #define CHAR_SURROGATE_PAIR_P(c)        \
 596   ((c) < 0xD800 ? 0                     \
 597    : (c) <= 0xDBFF ? 1                  \
 598    : (c) <= 0xDFFF ? 2                  \
 599    : 0)
 600
 601 /* Data type for Unicode general category.
 602
 603    The order of members must be in sync with the 8th element of the
 604    member of unidata-prop-alist (in admin/unidata/unidata-gen.el) for
 605    Unicode character property `general-category'.  */
 606
 607 typedef enum {
 608   UNICODE_CATEGORY_UNKNOWN = 0,
 609   UNICODE_CATEGORY_Lu,
 610   UNICODE_CATEGORY_Ll,
 611   UNICODE_CATEGORY_Lt,
 612   UNICODE_CATEGORY_Lm,
 613   UNICODE_CATEGORY_Lo,
 614   UNICODE_CATEGORY_Mn,
 615   UNICODE_CATEGORY_Mc,
 616   UNICODE_CATEGORY_Me,
 617   UNICODE_CATEGORY_Nd,
 618   UNICODE_CATEGORY_Nl,
 619   UNICODE_CATEGORY_No,
 620   UNICODE_CATEGORY_Pc,
 621   UNICODE_CATEGORY_Pd,
 622   UNICODE_CATEGORY_Ps,
 623   UNICODE_CATEGORY_Pe,
 624   UNICODE_CATEGORY_Pi,
 625   UNICODE_CATEGORY_Pf,
 626   UNICODE_CATEGORY_Po,
 627   UNICODE_CATEGORY_Sm,
 628   UNICODE_CATEGORY_Sc,
 629   UNICODE_CATEGORY_Sk,
 630   UNICODE_CATEGORY_So,
 631   UNICODE_CATEGORY_Zs,
 632   UNICODE_CATEGORY_Zl,
 633   UNICODE_CATEGORY_Zp,
 634   UNICODE_CATEGORY_Cc,
 635   UNICODE_CATEGORY_Cf,
 636   UNICODE_CATEGORY_Cs,
 637   UNICODE_CATEGORY_Co,
 638   UNICODE_CATEGORY_Cn
 639 } unicode_category_t;
 640
 641 extern EMACS_INT char_resolve_modifier_mask (EMACS_INT) ATTRIBUTE_CONST;
 642 extern int char_string (unsigned, unsigned char *);
 643 extern int string_char (const unsigned char *,
 644                         const unsigned char **, int *);
 645
 646 extern int translate_char (Lisp_Object, int c);
 647 extern ptrdiff_t count_size_as_multibyte (const unsigned char *, ptrdiff_t);
 648 extern ptrdiff_t str_as_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t,
 649                                    ptrdiff_t *);
 650 extern ptrdiff_t str_to_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t);
 651 extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t);
 652 extern ptrdiff_t str_to_unibyte (const unsigned char *, unsigned char *,
 653                                  ptrdiff_t);
 654 extern ptrdiff_t strwidth (const char *, ptrdiff_t);
 655 extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int,
 656                                  ptrdiff_t *, ptrdiff_t *);
 657 extern ptrdiff_t lisp_string_width (Lisp_Object, ptrdiff_t,
 658                                     ptrdiff_t *, ptrdiff_t *);
 659
 660 extern Lisp_Object Vchar_unify_table;
 661 extern Lisp_Object string_escape_byte8 (Lisp_Object);
 662
 663 /* Return a translation table of id number ID.  */
 664 #define GET_TRANSLATION_TABLE(id) \
 665   (XCDR (XVECTOR (Vtranslation_table_vector)->contents[(id)]))
 666
 667 /* Look up the element in char table OBJ at index CH, and return it as
 668    an integer.  If the element is not a character, return CH itself.  */
 669
 670 INLINE int
 671 char_table_translate (Lisp_Object obj, int ch)
 672 {
 673   /* This internal function is expected to be called with valid arguments,
 674      so there is a eassert instead of CHECK_xxx for the sake of speed.  */
 675   eassert (CHAR_VALID_P (ch));
 676   eassert (CHAR_TABLE_P (obj));
 677   obj = CHAR_TABLE_REF (obj, ch);
 678   return CHARACTERP (obj) ? XINT (obj) : ch;
 679 }
 680
 681 INLINE_HEADER_END
 682
 683 #endif /* EMACS_CHARACTER_H */