src/charset.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * VIM - Vi IMproved    by Bram Moolenaar
   4  *
   5  * Do ":help uganda"  in Vim to read copying and usage conditions.
   6  * Do ":help credits" in Vim to see a list of people who contributed.
   7  * See README.txt for an overview of the Vim source code.
   8  */
   9
  10 #include "vim.h"
  11
  12 #ifdef FEAT_LINEBREAK
  13 static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
  14 #endif
  15
  16 #ifdef FEAT_MBYTE
  17 static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
  18 #endif
  19
  20 static unsigned nr2hex __ARGS((unsigned c));
  21
  22 static int    chartab_initialized = FALSE;
  23
  24 /* b_chartab[] is an array of 32 bytes, each bit representing one of the
  25  * characters 0-255. */
  26 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
  27 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
  28 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
  29
  30 /*
  31  * Fill chartab[].  Also fills curbuf->b_chartab[] with flags for keyword
  32  * characters for current buffer.
  33  *
  34  * Depends on the option settings 'iskeyword', 'isident', 'isfname',
  35  * 'isprint' and 'encoding'.
  36  *
  37  * The index in chartab[] depends on 'encoding':
  38  * - For non-multi-byte index with the byte (same as the character).
  39  * - For DBCS index with the first byte.
  40  * - For UTF-8 index with the character (when first byte is up to 0x80 it is
  41  *   the same as the character, if the first byte is 0x80 and above it depends
  42  *   on further bytes).
  43  *
  44  * The contents of chartab[]:
  45  * - The lower two bits, masked by CT_CELL_MASK, give the number of display
  46  *   cells the character occupies (1 or 2).  Not valid for UTF-8 above 0x80.
  47  * - CT_PRINT_CHAR bit is set when the character is printable (no need to
  48  *   translate the character before displaying it).  Note that only DBCS
  49  *   characters can have 2 display cells and still be printable.
  50  * - CT_FNAME_CHAR bit is set when the character can be in a file name.
  51  * - CT_ID_CHAR bit is set when the character can be in an identifier.
  52  *
  53  * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
  54  * error, OK otherwise.
  55  */
  56     int
  57 init_chartab()
  58 {
  59     return buf_init_chartab(curbuf, TRUE);
  60 }
  61
  62     int
  63 buf_init_chartab(buf, global)
  64     buf_T       *buf;
  65     int         global;         /* FALSE: only set buf->b_chartab[] */
  66 {
  67     int         c;
  68     int         c2;
  69     char_u      *p;
  70     int         i;
  71     int         tilde;
  72     int         do_isalpha;
  73
  74     if (global)
  75     {
  76         /*
  77          * Set the default size for printable characters:
  78          * From <Space> to '~' is 1 (printable), others are 2 (not printable).
  79          * This also inits all 'isident' and 'isfname' flags to FALSE.
  80          *
  81          * EBCDIC: all chars below ' ' are not printable, all others are
  82          * printable.
  83          */
  84         c = 0;
  85         while (c < ' ')
  86             chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
  87 #ifdef EBCDIC
  88         while (c < 255)
  89 #else
  90         while (c <= '~')
  91 #endif
  92             chartab[c++] = 1 + CT_PRINT_CHAR;
  93 #ifdef FEAT_FKMAP
  94         if (p_altkeymap)
  95         {
  96             while (c < YE)
  97                 chartab[c++] = 1 + CT_PRINT_CHAR;
  98         }
  99 #endif
 100         while (c < 256)
 101         {
 102 #ifdef FEAT_MBYTE
 103             /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
 104             if (enc_utf8 && c >= 0xa0)
 105                 chartab[c++] = CT_PRINT_CHAR + 1;
 106             /* euc-jp characters starting with 0x8e are single width */
 107             else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
 108                 chartab[c++] = CT_PRINT_CHAR + 1;
 109             /* other double-byte chars can be printable AND double-width */
 110             else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
 111                 chartab[c++] = CT_PRINT_CHAR + 2;
 112             else
 113 #endif
 114                 /* the rest is unprintable by default */
 115                 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
 116         }
 117
 118 #ifdef FEAT_MBYTE
 119         /* Assume that every multi-byte char is a filename character. */
 120         for (c = 1; c < 256; ++c)
 121             if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
 122                     || (enc_dbcs == DBCS_JPNU && c == 0x8e)
 123                     || (enc_utf8 && c >= 0xa0))
 124                 chartab[c] |= CT_FNAME_CHAR;
 125 #endif
 126     }
 127
 128     /*
 129      * Init word char flags all to FALSE
 130      */
 131     vim_memset(buf->b_chartab, 0, (size_t)32);
 132 #ifdef FEAT_MBYTE
 133     if (enc_dbcs != 0)
 134         for (c = 0; c < 256; ++c)
 135         {
 136             /* double-byte characters are probably word characters */
 137             if (MB_BYTE2LEN(c) == 2)
 138                 SET_CHARTAB(buf, c);
 139         }
 140 #endif
 141
 142 #ifdef FEAT_LISP
 143     /*
 144      * In lisp mode the '-' character is included in keywords.
 145      */
 146     if (buf->b_p_lisp)
 147         SET_CHARTAB(buf, '-');
 148 #endif
 149
 150     /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
 151      * options Each option is a list of characters, character numbers or
 152      * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
 153      */
 154     for (i = global ? 0 : 3; i <= 3; ++i)
 155     {
 156         if (i == 0)
 157             p = p_isi;          /* first round: 'isident' */
 158         else if (i == 1)
 159             p = p_isp;          /* second round: 'isprint' */
 160         else if (i == 2)
 161             p = p_isf;          /* third round: 'isfname' */
 162         else    /* i == 3 */
 163             p = buf->b_p_isk;   /* fourth round: 'iskeyword' */
 164
 165         while (*p)
 166         {
 167             tilde = FALSE;
 168             do_isalpha = FALSE;
 169             if (*p == '^' && p[1] != NUL)
 170             {
 171                 tilde = TRUE;
 172                 ++p;
 173             }
 174             if (VIM_ISDIGIT(*p))
 175                 c = getdigits(&p);
 176             else
 177                 c = *p++;
 178             c2 = -1;
 179             if (*p == '-' && p[1] != NUL)
 180             {
 181                 ++p;
 182                 if (VIM_ISDIGIT(*p))
 183                     c2 = getdigits(&p);
 184                 else
 185                     c2 = *p++;
 186             }
 187             if (c <= 0 || (c2 < c && c2 != -1) || c2 >= 256
 188                                                  || !(*p == NUL || *p == ','))
 189                 return FAIL;
 190
 191             if (c2 == -1)       /* not a range */
 192             {
 193                 /*
 194                  * A single '@' (not "@-@"):
 195                  * Decide on letters being ID/printable/keyword chars with
 196                  * standard function isalpha(). This takes care of locale for
 197                  * single-byte characters).
 198                  */
 199                 if (c == '@')
 200                 {
 201                     do_isalpha = TRUE;
 202                     c = 1;
 203                     c2 = 255;
 204                 }
 205                 else
 206                     c2 = c;
 207             }
 208             while (c <= c2)
 209             {
 210                 /* Use the MB_ functions here, because isalpha() doesn't
 211                  * work properly when 'encoding' is "latin1" and the locale is
 212                  * "C".  */
 213                 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
 214 #ifdef FEAT_FKMAP
 215                         || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
 216 #endif
 217                             )
 218                 {
 219                     if (i == 0)                 /* (re)set ID flag */
 220                     {
 221                         if (tilde)
 222                             chartab[c] &= ~CT_ID_CHAR;
 223                         else
 224                             chartab[c] |= CT_ID_CHAR;
 225                     }
 226                     else if (i == 1)            /* (re)set printable */
 227                     {
 228                         if ((c < ' '
 229 #ifndef EBCDIC
 230                                     || c > '~'
 231 #endif
 232 #ifdef FEAT_FKMAP
 233                                     || (p_altkeymap
 234                                         && (F_isalpha(c) || F_isdigit(c)))
 235 #endif
 236                             )
 237 #ifdef FEAT_MBYTE
 238                                 /* For double-byte we keep the cell width, so
 239                                  * that we can detect it from the first byte. */
 240                                 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
 241 #endif
 242                            )
 243                         {
 244                             if (tilde)
 245                             {
 246                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
 247                                              + ((dy_flags & DY_UHEX) ? 4 : 2);
 248                                 chartab[c] &= ~CT_PRINT_CHAR;
 249                             }
 250                             else
 251                             {
 252                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
 253                                 chartab[c] |= CT_PRINT_CHAR;
 254                             }
 255                         }
 256                     }
 257                     else if (i == 2)            /* (re)set fname flag */
 258                     {
 259                         if (tilde)
 260                             chartab[c] &= ~CT_FNAME_CHAR;
 261                         else
 262                             chartab[c] |= CT_FNAME_CHAR;
 263                     }
 264                     else /* i == 3 */           /* (re)set keyword flag */
 265                     {
 266                         if (tilde)
 267                             RESET_CHARTAB(buf, c);
 268                         else
 269                             SET_CHARTAB(buf, c);
 270                     }
 271                 }
 272                 ++c;
 273             }
 274             p = skip_to_option_part(p);
 275         }
 276     }
 277     chartab_initialized = TRUE;
 278     return OK;
 279 }
 280
 281 /*
 282  * Translate any special characters in buf[bufsize] in-place.
 283  * The result is a string with only printable characters, but if there is not
 284  * enough room, not all characters will be translated.
 285  */
 286     void
 287 trans_characters(buf, bufsize)
 288     char_u      *buf;
 289     int         bufsize;
 290 {
 291     int         len;            /* length of string needing translation */
 292     int         room;           /* room in buffer after string */
 293     char_u      *trs;           /* translated character */
 294     int         trs_len;        /* length of trs[] */
 295
 296     len = (int)STRLEN(buf);
 297     room = bufsize - len;
 298     while (*buf != 0)
 299     {
 300 # ifdef FEAT_MBYTE
 301         /* Assume a multi-byte character doesn't need translation. */
 302         if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
 303             len -= trs_len;
 304         else
 305 # endif
 306         {
 307             trs = transchar_byte(*buf);
 308             trs_len = (int)STRLEN(trs);
 309             if (trs_len > 1)
 310             {
 311                 room -= trs_len - 1;
 312                 if (room <= 0)
 313                     return;
 314                 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
 315             }
 316             mch_memmove(buf, trs, (size_t)trs_len);
 317             --len;
 318         }
 319         buf += trs_len;
 320     }
 321 }
 322
 323 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
 324         || defined(PROTO)
 325 /*
 326  * Translate a string into allocated memory, replacing special chars with
 327  * printable chars.  Returns NULL when out of memory.
 328  */
 329     char_u *
 330 transstr(s)
 331     char_u      *s;
 332 {
 333     char_u      *res;
 334     char_u      *p;
 335 #ifdef FEAT_MBYTE
 336     int         l, len, c;
 337     char_u      hexbuf[11];
 338 #endif
 339
 340 #ifdef FEAT_MBYTE
 341     if (has_mbyte)
 342     {
 343         /* Compute the length of the result, taking account of unprintable
 344          * multi-byte characters. */
 345         len = 0;
 346         p = s;
 347         while (*p != NUL)
 348         {
 349             if ((l = (*mb_ptr2len)(p)) > 1)
 350             {
 351                 c = (*mb_ptr2char)(p);
 352                 p += l;
 353                 if (vim_isprintc(c))
 354                     len += l;
 355                 else
 356                 {
 357                     transchar_hex(hexbuf, c);
 358                     len += (int)STRLEN(hexbuf);
 359                 }
 360             }
 361             else
 362             {
 363                 l = byte2cells(*p++);
 364                 if (l > 0)
 365                     len += l;
 366                 else
 367                     len += 4;   /* illegal byte sequence */
 368             }
 369         }
 370         res = alloc((unsigned)(len + 1));
 371     }
 372     else
 373 #endif
 374         res = alloc((unsigned)(vim_strsize(s) + 1));
 375     if (res != NULL)
 376     {
 377         *res = NUL;
 378         p = s;
 379         while (*p != NUL)
 380         {
 381 #ifdef FEAT_MBYTE
 382             if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
 383             {
 384                 c = (*mb_ptr2char)(p);
 385                 if (vim_isprintc(c))
 386                     STRNCAT(res, p, l); /* append printable multi-byte char */
 387                 else
 388                     transchar_hex(res + STRLEN(res), c);
 389                 p += l;
 390             }
 391             else
 392 #endif
 393                 STRCAT(res, transchar_byte(*p++));
 394         }
 395     }
 396     return res;
 397 }
 398 #endif
 399
 400 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
 401 /*
 402  * Convert the string "str[orglen]" to do ignore-case comparing.  Uses the
 403  * current locale.
 404  * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
 405  * Otherwise puts the result in "buf[buflen]".
 406  */
 407     char_u *
 408 str_foldcase(str, orglen, buf, buflen)
 409     char_u      *str;
 410     int         orglen;
 411     char_u      *buf;
 412     int         buflen;
 413 {
 414     garray_T    ga;
 415     int         i;
 416     int         len = orglen;
 417
 418 #define GA_CHAR(i)  ((char_u *)ga.ga_data)[i]
 419 #define GA_PTR(i)   ((char_u *)ga.ga_data + i)
 420 #define STR_CHAR(i)  (buf == NULL ? GA_CHAR(i) : buf[i])
 421 #define STR_PTR(i)   (buf == NULL ? GA_PTR(i) : buf + i)
 422
 423     /* Copy "str" into "buf" or allocated memory, unmodified. */
 424     if (buf == NULL)
 425     {
 426         ga_init2(&ga, 1, 10);
 427         if (ga_grow(&ga, len + 1) == FAIL)
 428             return NULL;
 429         mch_memmove(ga.ga_data, str, (size_t)len);
 430         ga.ga_len = len;
 431     }
 432     else
 433     {
 434         if (len >= buflen)          /* Ugly! */
 435             len = buflen - 1;
 436         mch_memmove(buf, str, (size_t)len);
 437     }
 438     if (buf == NULL)
 439         GA_CHAR(len) = NUL;
 440     else
 441         buf[len] = NUL;
 442
 443     /* Make each character lower case. */
 444     i = 0;
 445     while (STR_CHAR(i) != NUL)
 446     {
 447 #ifdef FEAT_MBYTE
 448         if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
 449         {
 450             if (enc_utf8)
 451             {
 452                 int     c = utf_ptr2char(STR_PTR(i));
 453                 int     ol = utf_ptr2len(STR_PTR(i));
 454                 int     lc = utf_tolower(c);
 455
 456                 /* Only replace the character when it is not an invalid
 457                  * sequence (ASCII character or more than one byte) and
 458                  * utf_tolower() doesn't return the original character. */
 459                 if ((c < 0x80 || ol > 1) && c != lc)
 460                 {
 461                     int     nl = utf_char2len(lc);
 462
 463                     /* If the byte length changes need to shift the following
 464                      * characters forward or backward. */
 465                     if (ol != nl)
 466                     {
 467                         if (nl > ol)
 468                         {
 469                             if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
 470                                                     : len + nl - ol >= buflen)
 471                             {
 472                                 /* out of memory, keep old char */
 473                                 lc = c;
 474                                 nl = ol;
 475                             }
 476                         }
 477                         if (ol != nl)
 478                         {
 479                             if (buf == NULL)
 480                             {
 481                                 STRMOVE(GA_PTR(i) + nl, GA_PTR(i) + ol);
 482                                 ga.ga_len += nl - ol;
 483                             }
 484                             else
 485                             {
 486                                 STRMOVE(buf + i + nl, buf + i + ol);
 487                                 len += nl - ol;
 488                             }
 489                         }
 490                     }
 491                     (void)utf_char2bytes(lc, STR_PTR(i));
 492                 }
 493             }
 494             /* skip to next multi-byte char */
 495             i += (*mb_ptr2len)(STR_PTR(i));
 496         }
 497         else
 498 #endif
 499         {
 500             if (buf == NULL)
 501                 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
 502             else
 503                 buf[i] = TOLOWER_LOC(buf[i]);
 504             ++i;
 505         }
 506     }
 507
 508     if (buf == NULL)
 509         return (char_u *)ga.ga_data;
 510     return buf;
 511 }
 512 #endif
 513
 514 /*
 515  * Catch 22: chartab[] can't be initialized before the options are
 516  * initialized, and initializing options may cause transchar() to be called!
 517  * When chartab_initialized == FALSE don't use chartab[].
 518  * Does NOT work for multi-byte characters, c must be <= 255.
 519  * Also doesn't work for the first byte of a multi-byte, "c" must be a
 520  * character!
 521  */
 522 static char_u   transchar_buf[7];
 523
 524     char_u *
 525 transchar(c)
 526     int         c;
 527 {
 528     int                 i;
 529
 530     i = 0;
 531     if (IS_SPECIAL(c))      /* special key code, display as ~@ char */
 532     {
 533         transchar_buf[0] = '~';
 534         transchar_buf[1] = '@';
 535         i = 2;
 536         c = K_SECOND(c);
 537     }
 538
 539     if ((!chartab_initialized && (
 540 #ifdef EBCDIC
 541                     (c >= 64 && c < 255)
 542 #else
 543                     (c >= ' ' && c <= '~')
 544 #endif
 545 #ifdef FEAT_FKMAP
 546                         || F_ischar(c)
 547 #endif
 548                 )) || (c < 256 && vim_isprintc_strict(c)))
 549     {
 550         /* printable character */
 551         transchar_buf[i] = c;
 552         transchar_buf[i + 1] = NUL;
 553     }
 554     else
 555         transchar_nonprint(transchar_buf + i, c);
 556     return transchar_buf;
 557 }
 558
 559 #if defined(FEAT_MBYTE) || defined(PROTO)
 560 /*
 561  * Like transchar(), but called with a byte instead of a character.  Checks
 562  * for an illegal UTF-8 byte.
 563  */
 564     char_u *
 565 transchar_byte(c)
 566     int         c;
 567 {
 568     if (enc_utf8 && c >= 0x80)
 569     {
 570         transchar_nonprint(transchar_buf, c);
 571         return transchar_buf;
 572     }
 573     return transchar(c);
 574 }
 575 #endif
 576
 577 /*
 578  * Convert non-printable character to two or more printable characters in
 579  * "buf[]".  "buf" needs to be able to hold five bytes.
 580  * Does NOT work for multi-byte characters, c must be <= 255.
 581  */
 582     void
 583 transchar_nonprint(buf, c)
 584     char_u      *buf;
 585     int         c;
 586 {
 587     if (c == NL)
 588         c = NUL;                /* we use newline in place of a NUL */
 589     else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
 590         c = NL;                 /* we use CR in place of  NL in this case */
 591
 592     if (dy_flags & DY_UHEX)             /* 'display' has "uhex" */
 593         transchar_hex(buf, c);
 594
 595 #ifdef EBCDIC
 596     /* For EBCDIC only the characters 0-63 and 255 are not printable */
 597     else if (CtrlChar(c) != 0 || c == DEL)
 598 #else
 599     else if (c <= 0x7f)                         /* 0x00 - 0x1f and 0x7f */
 600 #endif
 601     {
 602         buf[0] = '^';
 603 #ifdef EBCDIC
 604         if (c == DEL)
 605             buf[1] = '?';               /* DEL displayed as ^? */
 606         else
 607             buf[1] = CtrlChar(c);
 608 #else
 609         buf[1] = c ^ 0x40;              /* DEL displayed as ^? */
 610 #endif
 611
 612         buf[2] = NUL;
 613     }
 614 #ifdef FEAT_MBYTE
 615     else if (enc_utf8 && c >= 0x80)
 616     {
 617         transchar_hex(buf, c);
 618     }
 619 #endif
 620 #ifndef EBCDIC
 621     else if (c >= ' ' + 0x80 && c <= '~' + 0x80)    /* 0xa0 - 0xfe */
 622     {
 623         buf[0] = '|';
 624         buf[1] = c - 0x80;
 625         buf[2] = NUL;
 626     }
 627 #else
 628     else if (c < 64)
 629     {
 630         buf[0] = '~';
 631         buf[1] = MetaChar(c);
 632         buf[2] = NUL;
 633     }
 634 #endif
 635     else                                            /* 0x80 - 0x9f and 0xff */
 636     {
 637         /*
 638          * TODO: EBCDIC I don't know what to do with this chars, so I display
 639          * them as '~?' for now
 640          */
 641         buf[0] = '~';
 642 #ifdef EBCDIC
 643         buf[1] = '?';                   /* 0xff displayed as ~? */
 644 #else
 645         buf[1] = (c - 0x80) ^ 0x40;     /* 0xff displayed as ~? */
 646 #endif
 647         buf[2] = NUL;
 648     }
 649 }
 650
 651     void
 652 transchar_hex(buf, c)
 653     char_u      *buf;
 654     int         c;
 655 {
 656     int         i = 0;
 657
 658     buf[0] = '<';
 659 #ifdef FEAT_MBYTE
 660     if (c > 255)
 661     {
 662         buf[++i] = nr2hex((unsigned)c >> 12);
 663         buf[++i] = nr2hex((unsigned)c >> 8);
 664     }
 665 #endif
 666     buf[++i] = nr2hex((unsigned)c >> 4);
 667     buf[++i] = nr2hex((unsigned)c);
 668     buf[++i] = '>';
 669     buf[++i] = NUL;
 670 }
 671
 672 /*
 673  * Convert the lower 4 bits of byte "c" to its hex character.
 674  * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
 675  * function key 1.
 676  */
 677     static unsigned
 678 nr2hex(c)
 679     unsigned    c;
 680 {
 681     if ((c & 0xf) <= 9)
 682         return (c & 0xf) + '0';
 683     return (c & 0xf) - 10 + 'a';
 684 }
 685
 686 /*
 687  * Return number of display cells occupied by byte "b".
 688  * Caller must make sure 0 <= b <= 255.
 689  * For multi-byte mode "b" must be the first byte of a character.
 690  * A TAB is counted as two cells: "^I".
 691  * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
 692  * cells depends on further bytes.
 693  */
 694     int
 695 byte2cells(b)
 696     int         b;
 697 {
 698 #ifdef FEAT_MBYTE
 699     if (enc_utf8 && b >= 0x80)
 700         return 0;
 701 #endif
 702     return (chartab[b] & CT_CELL_MASK);
 703 }
 704
 705 /*
 706  * Return number of display cells occupied by character "c".
 707  * "c" can be a special key (negative number) in which case 3 or 4 is returned.
 708  * A TAB is counted as two cells: "^I" or four: "<09>".
 709  */
 710     int
 711 char2cells(c)
 712     int         c;
 713 {
 714     if (IS_SPECIAL(c))
 715         return char2cells(K_SECOND(c)) + 2;
 716 #ifdef FEAT_MBYTE
 717     if (c >= 0x80)
 718     {
 719         /* UTF-8: above 0x80 need to check the value */
 720         if (enc_utf8)
 721             return utf_char2cells(c);
 722         /* DBCS: double-byte means double-width, except for euc-jp with first
 723          * byte 0x8e */
 724         if (enc_dbcs != 0 && c >= 0x100)
 725         {
 726             if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
 727                 return 1;
 728             return 2;
 729         }
 730     }
 731 #endif
 732     return (chartab[c & 0xff] & CT_CELL_MASK);
 733 }
 734
 735 /*
 736  * Return number of display cells occupied by character at "*p".
 737  * A TAB is counted as two cells: "^I" or four: "<09>".
 738  */
 739     int
 740 ptr2cells(p)
 741     char_u      *p;
 742 {
 743 #ifdef FEAT_MBYTE
 744     /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
 745     if (enc_utf8 && *p >= 0x80)
 746         return utf_ptr2cells(p);
 747     /* For DBCS we can tell the cell count from the first byte. */
 748 #endif
 749     return (chartab[*p] & CT_CELL_MASK);
 750 }
 751
 752 /*
 753  * Return the number of characters string "s" will take on the screen,
 754  * counting TABs as two characters: "^I".
 755  */
 756     int
 757 vim_strsize(s)
 758     char_u      *s;
 759 {
 760     return vim_strnsize(s, (int)MAXCOL);
 761 }
 762
 763 /*
 764  * Return the number of characters string "s[len]" will take on the screen,
 765  * counting TABs as two characters: "^I".
 766  */
 767     int
 768 vim_strnsize(s, len)
 769     char_u      *s;
 770     int         len;
 771 {
 772     int         size = 0;
 773
 774     while (*s != NUL && --len >= 0)
 775     {
 776 #ifdef FEAT_MBYTE
 777         if (has_mbyte)
 778         {
 779             int     l = (*mb_ptr2len)(s);
 780
 781             size += ptr2cells(s);
 782             s += l;
 783             len -= l - 1;
 784         }
 785         else
 786 #endif
 787             size += byte2cells(*s++);
 788     }
 789     return size;
 790 }
 791
 792 /*
 793  * Return the number of characters 'c' will take on the screen, taking
 794  * into account the size of a tab.
 795  * Use a define to make it fast, this is used very often!!!
 796  * Also see getvcol() below.
 797  */
 798
 799 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
 800     if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
 801     { \
 802         int ts; \
 803         ts = (buf)->b_p_ts; \
 804         return (int)(ts - (col % ts)); \
 805     } \
 806     else \
 807         return ptr2cells(p);
 808
 809 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
 810         || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
 811     int
 812 chartabsize(p, col)
 813     char_u      *p;
 814     colnr_T     col;
 815 {
 816     RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
 817 }
 818 #endif
 819
 820 #ifdef FEAT_LINEBREAK
 821     static int
 822 win_chartabsize(wp, p, col)
 823     win_T       *wp;
 824     char_u      *p;
 825     colnr_T     col;
 826 {
 827     RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
 828 }
 829 #endif
 830
 831 /*
 832  * return the number of characters the string 's' will take on the screen,
 833  * taking into account the size of a tab
 834  */
 835     int
 836 linetabsize(s)
 837     char_u      *s;
 838 {
 839     colnr_T     col = 0;
 840
 841     while (*s != NUL)
 842         col += lbr_chartabsize_adv(&s, col);
 843     return (int)col;
 844 }
 845
 846 /*
 847  * Like linetabsize(), but for a given window instead of the current one.
 848  */
 849     int
 850 win_linetabsize(wp, p, len)
 851     win_T       *wp;
 852     char_u      *p;
 853     colnr_T     len;
 854 {
 855     colnr_T     col = 0;
 856     char_u      *s;
 857
 858     for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
 859         col += win_lbr_chartabsize(wp, s, col, NULL);
 860     return (int)col;
 861 }
 862
 863 /*
 864  * Return TRUE if 'c' is a normal identifier character:
 865  * Letters and characters from the 'isident' option.
 866  */
 867     int
 868 vim_isIDc(c)
 869     int c;
 870 {
 871     return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
 872 }
 873
 874 /*
 875  * return TRUE if 'c' is a keyword character: Letters and characters from
 876  * 'iskeyword' option for current buffer.
 877  * For multi-byte characters mb_get_class() is used (builtin rules).
 878  */
 879     int
 880 vim_iswordc(c)
 881     int c;
 882 {
 883 #ifdef FEAT_MBYTE
 884     if (c >= 0x100)
 885     {
 886         if (enc_dbcs != 0)
 887             return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
 888         if (enc_utf8)
 889             return utf_class(c) >= 2;
 890     }
 891 #endif
 892     return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
 893 }
 894
 895 /*
 896  * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
 897  */
 898     int
 899 vim_iswordp(p)
 900     char_u *p;
 901 {
 902 #ifdef FEAT_MBYTE
 903     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 904         return mb_get_class(p) >= 2;
 905 #endif
 906     return GET_CHARTAB(curbuf, *p) != 0;
 907 }
 908
 909 #if defined(FEAT_SYN_HL) || defined(PROTO)
 910     int
 911 vim_iswordc_buf(p, buf)
 912     char_u      *p;
 913     buf_T       *buf;
 914 {
 915 # ifdef FEAT_MBYTE
 916     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 917         return mb_get_class(p) >= 2;
 918 # endif
 919     return (GET_CHARTAB(buf, *p) != 0);
 920 }
 921 #endif
 922
 923 /*
 924  * return TRUE if 'c' is a valid file-name character
 925  * Assume characters above 0x100 are valid (multi-byte).
 926  */
 927     int
 928 vim_isfilec(c)
 929     int c;
 930 {
 931     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
 932 }
 933
 934 /*
 935  * return TRUE if 'c' is a valid file-name character or a wildcard character
 936  * Assume characters above 0x100 are valid (multi-byte).
 937  * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
 938  * returns false.
 939  */
 940     int
 941 vim_isfilec_or_wc(c)
 942     int c;
 943 {
 944     char_u buf[2];
 945
 946     buf[0] = (char_u)c;
 947     buf[1] = NUL;
 948     return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
 949 }
 950
 951 /*
 952  * return TRUE if 'c' is a printable character
 953  * Assume characters above 0x100 are printable (multi-byte), except for
 954  * Unicode.
 955  */
 956     int
 957 vim_isprintc(c)
 958     int c;
 959 {
 960 #ifdef FEAT_MBYTE
 961     if (enc_utf8 && c >= 0x100)
 962         return utf_printable(c);
 963 #endif
 964     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 965 }
 966
 967 /*
 968  * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
 969  * byte of a double-byte character.
 970  */
 971     int
 972 vim_isprintc_strict(c)
 973     int c;
 974 {
 975 #ifdef FEAT_MBYTE
 976     if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
 977         return FALSE;
 978     if (enc_utf8 && c >= 0x100)
 979         return utf_printable(c);
 980 #endif
 981     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 982 }
 983
 984 /*
 985  * like chartabsize(), but also check for line breaks on the screen
 986  */
 987     int
 988 lbr_chartabsize(s, col)
 989     unsigned char       *s;
 990     colnr_T             col;
 991 {
 992 #ifdef FEAT_LINEBREAK
 993     if (!curwin->w_p_lbr && *p_sbr == NUL)
 994     {
 995 #endif
 996 #ifdef FEAT_MBYTE
 997         if (curwin->w_p_wrap)
 998             return win_nolbr_chartabsize(curwin, s, col, NULL);
 999 #endif
1000         RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1001 #ifdef FEAT_LINEBREAK
1002     }
1003     return win_lbr_chartabsize(curwin, s, col, NULL);
1004 #endif
1005 }
1006
1007 /*
1008  * Call lbr_chartabsize() and advance the pointer.
1009  */
1010     int
1011 lbr_chartabsize_adv(s, col)
1012     char_u      **s;
1013     colnr_T     col;
1014 {
1015     int         retval;
1016
1017     retval = lbr_chartabsize(*s, col);
1018     mb_ptr_adv(*s);
1019     return retval;
1020 }
1021
1022 /*
1023  * This function is used very often, keep it fast!!!!
1024  *
1025  * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1026  * string at start of line.  Warning: *headp is only set if it's a non-zero
1027  * value, init to 0 before calling.
1028  */
1029     int
1030 win_lbr_chartabsize(wp, s, col, headp)
1031     win_T       *wp;
1032     char_u      *s;
1033     colnr_T     col;
1034     int         *headp UNUSED;
1035 {
1036 #ifdef FEAT_LINEBREAK
1037     int         c;
1038     int         size;
1039     colnr_T     col2;
1040     colnr_T     colmax;
1041     int         added;
1042 # ifdef FEAT_MBYTE
1043     int         mb_added = 0;
1044 # else
1045 #  define mb_added 0
1046 # endif
1047     int         numberextra;
1048     char_u      *ps;
1049     int         tab_corr = (*s == TAB);
1050     int         n;
1051
1052     /*
1053      * No 'linebreak' and 'showbreak': return quickly.
1054      */
1055     if (!wp->w_p_lbr && *p_sbr == NUL)
1056 #endif
1057     {
1058 #ifdef FEAT_MBYTE
1059         if (wp->w_p_wrap)
1060             return win_nolbr_chartabsize(wp, s, col, headp);
1061 #endif
1062         RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1063     }
1064
1065 #ifdef FEAT_LINEBREAK
1066     /*
1067      * First get normal size, without 'linebreak'
1068      */
1069     size = win_chartabsize(wp, s, col);
1070     c = *s;
1071
1072     /*
1073      * If 'linebreak' set check at a blank before a non-blank if the line
1074      * needs a break here
1075      */
1076     if (wp->w_p_lbr
1077             && vim_isbreak(c)
1078             && !vim_isbreak(s[1])
1079             && !wp->w_p_list
1080             && wp->w_p_wrap
1081 # ifdef FEAT_VERTSPLIT
1082             && wp->w_width != 0
1083 # endif
1084        )
1085     {
1086         /*
1087          * Count all characters from first non-blank after a blank up to next
1088          * non-blank after a blank.
1089          */
1090         numberextra = win_col_off(wp);
1091         col2 = col;
1092         colmax = (colnr_T)(W_WIDTH(wp) - numberextra);
1093         if (col >= colmax)
1094         {
1095             n = colmax + win_col_off2(wp);
1096             if (n > 0)
1097                 colmax += (((col - colmax) / n) + 1) * n;
1098         }
1099
1100         for (;;)
1101         {
1102             ps = s;
1103             mb_ptr_adv(s);
1104             c = *s;
1105             if (!(c != NUL
1106                     && (vim_isbreak(c)
1107                         || (!vim_isbreak(c)
1108                             && (col2 == col || !vim_isbreak(*ps))))))
1109                 break;
1110
1111             col2 += win_chartabsize(wp, s, col2);
1112             if (col2 >= colmax)         /* doesn't fit */
1113             {
1114                 size = colmax - col;
1115                 tab_corr = FALSE;
1116                 break;
1117             }
1118         }
1119     }
1120 # ifdef FEAT_MBYTE
1121     else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1122                                     && wp->w_p_wrap && in_win_border(wp, col))
1123     {
1124         ++size;         /* Count the ">" in the last column. */
1125         mb_added = 1;
1126     }
1127 # endif
1128
1129     /*
1130      * May have to add something for 'showbreak' string at start of line
1131      * Set *headp to the size of what we add.
1132      */
1133     added = 0;
1134     if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1135     {
1136         numberextra = win_col_off(wp);
1137         col += numberextra + mb_added;
1138         if (col >= (colnr_T)W_WIDTH(wp))
1139         {
1140             col -= W_WIDTH(wp);
1141             numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1142             if (numberextra > 0)
1143                 col = col % numberextra;
1144         }
1145         if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1146         {
1147             added = vim_strsize(p_sbr);
1148             if (tab_corr)
1149                 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1150             else
1151                 size += added;
1152             if (col != 0)
1153                 added = 0;
1154         }
1155     }
1156     if (headp != NULL)
1157         *headp = added + mb_added;
1158     return size;
1159 #endif
1160 }
1161
1162 #if defined(FEAT_MBYTE) || defined(PROTO)
1163 /*
1164  * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1165  * 'wrap' is on.  This means we need to check for a double-byte character that
1166  * doesn't fit at the end of the screen line.
1167  */
1168     static int
1169 win_nolbr_chartabsize(wp, s, col, headp)
1170     win_T       *wp;
1171     char_u      *s;
1172     colnr_T     col;
1173     int         *headp;
1174 {
1175     int         n;
1176
1177     if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1178     {
1179         n = wp->w_buffer->b_p_ts;
1180         return (int)(n - (col % n));
1181     }
1182     n = ptr2cells(s);
1183     /* Add one cell for a double-width character in the last column of the
1184      * window, displayed with a ">". */
1185     if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1186     {
1187         if (headp != NULL)
1188             *headp = 1;
1189         return 3;
1190     }
1191     return n;
1192 }
1193
1194 /*
1195  * Return TRUE if virtual column "vcol" is in the rightmost column of window
1196  * "wp".
1197  */
1198     int
1199 in_win_border(wp, vcol)
1200     win_T       *wp;
1201     colnr_T     vcol;
1202 {
1203     int         width1;         /* width of first line (after line number) */
1204     int         width2;         /* width of further lines */
1205
1206 #ifdef FEAT_VERTSPLIT
1207     if (wp->w_width == 0)       /* there is no border */
1208         return FALSE;
1209 #endif
1210     width1 = W_WIDTH(wp) - win_col_off(wp);
1211     if ((int)vcol < width1 - 1)
1212         return FALSE;
1213     if ((int)vcol == width1 - 1)
1214         return TRUE;
1215     width2 = width1 + win_col_off2(wp);
1216     return ((vcol - width1) % width2 == width2 - 1);
1217 }
1218 #endif /* FEAT_MBYTE */
1219
1220 /*
1221  * Get virtual column number of pos.
1222  *  start: on the first position of this character (TAB, ctrl)
1223  * cursor: where the cursor is on this character (first char, except for TAB)
1224  *    end: on the last position of this character (TAB, ctrl)
1225  *
1226  * This is used very often, keep it fast!
1227  */
1228     void
1229 getvcol(wp, pos, start, cursor, end)
1230     win_T       *wp;
1231     pos_T       *pos;
1232     colnr_T     *start;
1233     colnr_T     *cursor;
1234     colnr_T     *end;
1235 {
1236     colnr_T     vcol;
1237     char_u      *ptr;           /* points to current char */
1238     char_u      *posptr;        /* points to char at pos->col */
1239     int         incr;
1240     int         head;
1241     int         ts = wp->w_buffer->b_p_ts;
1242     int         c;
1243
1244     vcol = 0;
1245     ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1246     posptr = ptr + pos->col;
1247
1248     /*
1249      * This function is used very often, do some speed optimizations.
1250      * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1251      * Also use this when 'list' is set but tabs take their normal size.
1252      */
1253     if ((!wp->w_p_list || lcs_tab1 != NUL)
1254 #ifdef FEAT_LINEBREAK
1255             && !wp->w_p_lbr && *p_sbr == NUL
1256 #endif
1257        )
1258     {
1259 #ifndef FEAT_MBYTE
1260         head = 0;
1261 #endif
1262         for (;;)
1263         {
1264 #ifdef FEAT_MBYTE
1265             head = 0;
1266 #endif
1267             c = *ptr;
1268             /* make sure we don't go past the end of the line */
1269             if (c == NUL)
1270             {
1271                 incr = 1;       /* NUL at end of line only takes one column */
1272                 break;
1273             }
1274             /* A tab gets expanded, depending on the current column */
1275             if (c == TAB)
1276                 incr = ts - (vcol % ts);
1277             else
1278             {
1279 #ifdef FEAT_MBYTE
1280                 if (has_mbyte)
1281                 {
1282                     /* For utf-8, if the byte is >= 0x80, need to look at
1283                      * further bytes to find the cell width. */
1284                     if (enc_utf8 && c >= 0x80)
1285                         incr = utf_ptr2cells(ptr);
1286                     else
1287                         incr = CHARSIZE(c);
1288
1289                     /* If a double-cell char doesn't fit at the end of a line
1290                      * it wraps to the next line, it's like this char is three
1291                      * cells wide. */
1292                     if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1293                             && in_win_border(wp, vcol))
1294                     {
1295                         ++incr;
1296                         head = 1;
1297                     }
1298                 }
1299                 else
1300 #endif
1301                     incr = CHARSIZE(c);
1302             }
1303
1304             if (ptr >= posptr)  /* character at pos->col */
1305                 break;
1306
1307             vcol += incr;
1308             mb_ptr_adv(ptr);
1309         }
1310     }
1311     else
1312     {
1313         for (;;)
1314         {
1315             /* A tab gets expanded, depending on the current column */
1316             head = 0;
1317             incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1318             /* make sure we don't go past the end of the line */
1319             if (*ptr == NUL)
1320             {
1321                 incr = 1;       /* NUL at end of line only takes one column */
1322                 break;
1323             }
1324
1325             if (ptr >= posptr)  /* character at pos->col */
1326                 break;
1327
1328             vcol += incr;
1329             mb_ptr_adv(ptr);
1330         }
1331     }
1332     if (start != NULL)
1333         *start = vcol + head;
1334     if (end != NULL)
1335         *end = vcol + incr - 1;
1336     if (cursor != NULL)
1337     {
1338         if (*ptr == TAB
1339                 && (State & NORMAL)
1340                 && !wp->w_p_list
1341                 && !virtual_active()
1342 #ifdef FEAT_VISUAL
1343                 && !(VIsual_active
1344                                    && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1345 #endif
1346                 )
1347             *cursor = vcol + incr - 1;      /* cursor at end */
1348         else
1349             *cursor = vcol + head;          /* cursor at start */
1350     }
1351 }
1352
1353 /*
1354  * Get virtual cursor column in the current window, pretending 'list' is off.
1355  */
1356     colnr_T
1357 getvcol_nolist(posp)
1358     pos_T       *posp;
1359 {
1360     int         list_save = curwin->w_p_list;
1361     colnr_T     vcol;
1362
1363     curwin->w_p_list = FALSE;
1364     getvcol(curwin, posp, NULL, &vcol, NULL);
1365     curwin->w_p_list = list_save;
1366     return vcol;
1367 }
1368
1369 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1370 /*
1371  * Get virtual column in virtual mode.
1372  */
1373     void
1374 getvvcol(wp, pos, start, cursor, end)
1375     win_T       *wp;
1376     pos_T       *pos;
1377     colnr_T     *start;
1378     colnr_T     *cursor;
1379     colnr_T     *end;
1380 {
1381     colnr_T     col;
1382     colnr_T     coladd;
1383     colnr_T     endadd;
1384 # ifdef FEAT_MBYTE
1385     char_u      *ptr;
1386 # endif
1387
1388     if (virtual_active())
1389     {
1390         /* For virtual mode, only want one value */
1391         getvcol(wp, pos, &col, NULL, NULL);
1392
1393         coladd = pos->coladd;
1394         endadd = 0;
1395 # ifdef FEAT_MBYTE
1396         /* Cannot put the cursor on part of a wide character. */
1397         ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1398         if (pos->col < (colnr_T)STRLEN(ptr))
1399         {
1400             int c = (*mb_ptr2char)(ptr + pos->col);
1401
1402             if (c != TAB && vim_isprintc(c))
1403             {
1404                 endadd = (colnr_T)(char2cells(c) - 1);
1405                 if (coladd > endadd)    /* past end of line */
1406                     endadd = 0;
1407                 else
1408                     coladd = 0;
1409             }
1410         }
1411 # endif
1412         col += coladd;
1413         if (start != NULL)
1414             *start = col;
1415         if (cursor != NULL)
1416             *cursor = col;
1417         if (end != NULL)
1418             *end = col + endadd;
1419     }
1420     else
1421         getvcol(wp, pos, start, cursor, end);
1422 }
1423 #endif
1424
1425 #if defined(FEAT_VISUAL) || defined(PROTO)
1426 /*
1427  * Get the leftmost and rightmost virtual column of pos1 and pos2.
1428  * Used for Visual block mode.
1429  */
1430     void
1431 getvcols(wp, pos1, pos2, left, right)
1432     win_T       *wp;
1433     pos_T       *pos1, *pos2;
1434     colnr_T     *left, *right;
1435 {
1436     colnr_T     from1, from2, to1, to2;
1437
1438     if (ltp(pos1, pos2))
1439     {
1440         getvvcol(wp, pos1, &from1, NULL, &to1);
1441         getvvcol(wp, pos2, &from2, NULL, &to2);
1442     }
1443     else
1444     {
1445         getvvcol(wp, pos2, &from1, NULL, &to1);
1446         getvvcol(wp, pos1, &from2, NULL, &to2);
1447     }
1448     if (from2 < from1)
1449         *left = from2;
1450     else
1451         *left = from1;
1452     if (to2 > to1)
1453     {
1454         if (*p_sel == 'e' && from2 - 1 >= to1)
1455             *right = from2 - 1;
1456         else
1457             *right = to2;
1458     }
1459     else
1460         *right = to1;
1461 }
1462 #endif
1463
1464 /*
1465  * skipwhite: skip over ' ' and '\t'.
1466  */
1467     char_u *
1468 skipwhite(q)
1469     char_u      *q;
1470 {
1471     char_u      *p = q;
1472
1473     while (vim_iswhite(*p)) /* skip to next non-white */
1474         ++p;
1475     return p;
1476 }
1477
1478 /*
1479  * skip over digits
1480  */
1481     char_u *
1482 skipdigits(q)
1483     char_u      *q;
1484 {
1485     char_u      *p = q;
1486
1487     while (VIM_ISDIGIT(*p))     /* skip to next non-digit */
1488         ++p;
1489     return p;
1490 }
1491
1492 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
1493 /*
1494  * skip over digits and hex characters
1495  */
1496     char_u *
1497 skiphex(q)
1498     char_u      *q;
1499 {
1500     char_u      *p = q;
1501
1502     while (vim_isxdigit(*p))    /* skip to next non-digit */
1503         ++p;
1504     return p;
1505 }
1506 #endif
1507
1508 #if defined(FEAT_EX_EXTRA) || defined(PROTO)
1509 /*
1510  * skip to digit (or NUL after the string)
1511  */
1512     char_u *
1513 skiptodigit(q)
1514     char_u      *q;
1515 {
1516     char_u      *p = q;
1517
1518     while (*p != NUL && !VIM_ISDIGIT(*p))       /* skip to next digit */
1519         ++p;
1520     return p;
1521 }
1522
1523 /*
1524  * skip to hex character (or NUL after the string)
1525  */
1526     char_u *
1527 skiptohex(q)
1528     char_u      *q;
1529 {
1530     char_u      *p = q;
1531
1532     while (*p != NUL && !vim_isxdigit(*p))      /* skip to next digit */
1533         ++p;
1534     return p;
1535 }
1536 #endif
1537
1538 /*
1539  * Variant of isdigit() that can handle characters > 0x100.
1540  * We don't use isdigit() here, because on some systems it also considers
1541  * superscript 1 to be a digit.
1542  * Use the VIM_ISDIGIT() macro for simple arguments.
1543  */
1544     int
1545 vim_isdigit(c)
1546     int         c;
1547 {
1548     return (c >= '0' && c <= '9');
1549 }
1550
1551 /*
1552  * Variant of isxdigit() that can handle characters > 0x100.
1553  * We don't use isxdigit() here, because on some systems it also considers
1554  * superscript 1 to be a digit.
1555  */
1556     int
1557 vim_isxdigit(c)
1558     int         c;
1559 {
1560     return (c >= '0' && c <= '9')
1561         || (c >= 'a' && c <= 'f')
1562         || (c >= 'A' && c <= 'F');
1563 }
1564
1565 #if defined(FEAT_MBYTE) || defined(PROTO)
1566 /*
1567  * Vim's own character class functions.  These exist because many library
1568  * islower()/toupper() etc. do not work properly: they crash when used with
1569  * invalid values or can't handle latin1 when the locale is C.
1570  * Speed is most important here.
1571  */
1572 #define LATIN1LOWER 'l'
1573 #define LATIN1UPPER 'U'
1574
1575 /*                                                                 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~                                  ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
1576 static char_u latin1flags[257] = "                                                                 UUUUUUUUUUUUUUUUUUUUUUUUUU      llllllllllllllllllllllllll                                                                     UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1577 static char_u latin1upper[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1578 static char_u latin1lower[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
1579
1580     int
1581 vim_islower(c)
1582     int     c;
1583 {
1584     if (c <= '@')
1585         return FALSE;
1586     if (c >= 0x80)
1587     {
1588         if (enc_utf8)
1589             return utf_islower(c);
1590         if (c >= 0x100)
1591         {
1592 #ifdef HAVE_ISWLOWER
1593             if (has_mbyte)
1594                 return iswlower(c);
1595 #endif
1596             /* islower() can't handle these chars and may crash */
1597             return FALSE;
1598         }
1599         if (enc_latin1like)
1600             return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1601     }
1602     return islower(c);
1603 }
1604
1605     int
1606 vim_isupper(c)
1607     int     c;
1608 {
1609     if (c <= '@')
1610         return FALSE;
1611     if (c >= 0x80)
1612     {
1613         if (enc_utf8)
1614             return utf_isupper(c);
1615         if (c >= 0x100)
1616         {
1617 #ifdef HAVE_ISWUPPER
1618             if (has_mbyte)
1619                 return iswupper(c);
1620 #endif
1621             /* islower() can't handle these chars and may crash */
1622             return FALSE;
1623         }
1624         if (enc_latin1like)
1625             return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1626     }
1627     return isupper(c);
1628 }
1629
1630     int
1631 vim_toupper(c)
1632     int     c;
1633 {
1634     if (c <= '@')
1635         return c;
1636     if (c >= 0x80)
1637     {
1638         if (enc_utf8)
1639             return utf_toupper(c);
1640         if (c >= 0x100)
1641         {
1642 #ifdef HAVE_TOWUPPER
1643             if (has_mbyte)
1644                 return towupper(c);
1645 #endif
1646             /* toupper() can't handle these chars and may crash */
1647             return c;
1648         }
1649         if (enc_latin1like)
1650             return latin1upper[c];
1651     }
1652     return TOUPPER_LOC(c);
1653 }
1654
1655     int
1656 vim_tolower(c)
1657     int     c;
1658 {
1659     if (c <= '@')
1660         return c;
1661     if (c >= 0x80)
1662     {
1663         if (enc_utf8)
1664             return utf_tolower(c);
1665         if (c >= 0x100)
1666         {
1667 #ifdef HAVE_TOWLOWER
1668             if (has_mbyte)
1669                 return towlower(c);
1670 #endif
1671             /* tolower() can't handle these chars and may crash */
1672             return c;
1673         }
1674         if (enc_latin1like)
1675             return latin1lower[c];
1676     }
1677     return TOLOWER_LOC(c);
1678 }
1679 #endif
1680
1681 /*
1682  * skiptowhite: skip over text until ' ' or '\t' or NUL.
1683  */
1684     char_u *
1685 skiptowhite(p)
1686     char_u      *p;
1687 {
1688     while (*p != ' ' && *p != '\t' && *p != NUL)
1689         ++p;
1690     return p;
1691 }
1692
1693 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1694         || defined(PROTO)
1695 /*
1696  * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1697  */
1698     char_u *
1699 skiptowhite_esc(p)
1700     char_u      *p;
1701 {
1702     while (*p != ' ' && *p != '\t' && *p != NUL)
1703     {
1704         if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1705             ++p;
1706         ++p;
1707     }
1708     return p;
1709 }
1710 #endif
1711
1712 /*
1713  * Getdigits: Get a number from a string and skip over it.
1714  * Note: the argument is a pointer to a char_u pointer!
1715  */
1716     long
1717 getdigits(pp)
1718     char_u **pp;
1719 {
1720     char_u      *p;
1721     long        retval;
1722
1723     p = *pp;
1724     retval = atol((char *)p);
1725     if (*p == '-')              /* skip negative sign */
1726         ++p;
1727     p = skipdigits(p);          /* skip to next non-digit */
1728     *pp = p;
1729     return retval;
1730 }
1731
1732 /*
1733  * Return TRUE if "lbuf" is empty or only contains blanks.
1734  */
1735     int
1736 vim_isblankline(lbuf)
1737     char_u      *lbuf;
1738 {
1739     char_u      *p;
1740
1741     p = skipwhite(lbuf);
1742     return (*p == NUL || *p == '\r' || *p == '\n');
1743 }
1744
1745 /*
1746  * Convert a string into a long and/or unsigned long, taking care of
1747  * hexadecimal and octal numbers.  Accepts a '-' sign.
1748  * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1749  *  0       decimal
1750  *  '0'     octal
1751  *  'X'     hex
1752  *  'x'     hex
1753  * If "len" is not NULL, the length of the number in characters is returned.
1754  * If "nptr" is not NULL, the signed result is returned in it.
1755  * If "unptr" is not NULL, the unsigned result is returned in it.
1756  * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1757  * octal number.
1758  * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
1759  * hex number.
1760  */
1761     void
1762 vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1763     char_u              *start;
1764     int                 *hexp;      /* return: type of number 0 = decimal, 'x'
1765                                        or 'X' is hex, '0' = octal */
1766     int                 *len;       /* return: detected length of number */
1767     int                 dooct;      /* recognize octal number */
1768     int                 dohex;      /* recognize hex number */
1769     long                *nptr;      /* return: signed result */
1770     unsigned long       *unptr;     /* return: unsigned result */
1771 {
1772     char_u          *ptr = start;
1773     int             hex = 0;            /* default is decimal */
1774     int             negative = FALSE;
1775     unsigned long   un = 0;
1776     int             n;
1777
1778     if (ptr[0] == '-')
1779     {
1780         negative = TRUE;
1781         ++ptr;
1782     }
1783
1784     /* Recognize hex and octal. */
1785     if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
1786     {
1787         hex = ptr[1];
1788         if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1789             ptr += 2;                   /* hexadecimal */
1790         else
1791         {
1792             hex = 0;                    /* default is decimal */
1793             if (dooct)
1794             {
1795                 /* Don't interpret "0", "08" or "0129" as octal. */
1796                 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1797                 {
1798                     if (ptr[n] > '7')
1799                     {
1800                         hex = 0;        /* can't be octal */
1801                         break;
1802                     }
1803                     if (ptr[n] > '0')
1804                         hex = '0';      /* assume octal */
1805                 }
1806             }
1807         }
1808     }
1809
1810     /*
1811      * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1812      */
1813     if (hex == '0' || dooct > 1)
1814     {
1815         /* octal */
1816         while ('0' <= *ptr && *ptr <= '7')
1817         {
1818             un = 8 * un + (unsigned long)(*ptr - '0');
1819             ++ptr;
1820         }
1821     }
1822     else if (hex != 0 || dohex > 1)
1823     {
1824         /* hex */
1825         while (vim_isxdigit(*ptr))
1826         {
1827             un = 16 * un + (unsigned long)hex2nr(*ptr);
1828             ++ptr;
1829         }
1830     }
1831     else
1832     {
1833         /* decimal */
1834         while (VIM_ISDIGIT(*ptr))
1835         {
1836             un = 10 * un + (unsigned long)(*ptr - '0');
1837             ++ptr;
1838         }
1839     }
1840
1841     if (hexp != NULL)
1842         *hexp = hex;
1843     if (len != NULL)
1844         *len = (int)(ptr - start);
1845     if (nptr != NULL)
1846     {
1847         if (negative)   /* account for leading '-' for decimal numbers */
1848             *nptr = -(long)un;
1849         else
1850             *nptr = (long)un;
1851     }
1852     if (unptr != NULL)
1853         *unptr = un;
1854 }
1855
1856 /*
1857  * Return the value of a single hex character.
1858  * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1859  */
1860     int
1861 hex2nr(c)
1862     int         c;
1863 {
1864     if (c >= 'a' && c <= 'f')
1865         return c - 'a' + 10;
1866     if (c >= 'A' && c <= 'F')
1867         return c - 'A' + 10;
1868     return c - '0';
1869 }
1870
1871 #if defined(FEAT_TERMRESPONSE) \
1872         || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1873 /*
1874  * Convert two hex characters to a byte.
1875  * Return -1 if one of the characters is not hex.
1876  */
1877     int
1878 hexhex2nr(p)
1879     char_u      *p;
1880 {
1881     if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1882         return -1;
1883     return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1884 }
1885 #endif
1886
1887 /*
1888  * Return TRUE if "str" starts with a backslash that should be removed.
1889  * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1890  * backslash is not a normal file name character.
1891  * '$' is a valid file name character, we don't remove the backslash before
1892  * it.  This means it is not possible to use an environment variable after a
1893  * backslash.  "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1894  * Although "\ name" is valid, the backslash in "Program\ files" must be
1895  * removed.  Assume a file name doesn't start with a space.
1896  * For multi-byte names, never remove a backslash before a non-ascii
1897  * character, assume that all multi-byte characters are valid file name
1898  * characters.
1899  */
1900     int
1901 rem_backslash(str)
1902     char_u  *str;
1903 {
1904 #ifdef BACKSLASH_IN_FILENAME
1905     return (str[0] == '\\'
1906 # ifdef FEAT_MBYTE
1907             && str[1] < 0x80
1908 # endif
1909             && (str[1] == ' '
1910                 || (str[1] != NUL
1911                     && str[1] != '*'
1912                     && str[1] != '?'
1913                     && !vim_isfilec(str[1]))));
1914 #else
1915     return (str[0] == '\\' && str[1] != NUL);
1916 #endif
1917 }
1918
1919 /*
1920  * Halve the number of backslashes in a file name argument.
1921  * For MS-DOS we only do this if the character after the backslash
1922  * is not a normal file character.
1923  */
1924     void
1925 backslash_halve(p)
1926     char_u      *p;
1927 {
1928     for ( ; *p; ++p)
1929         if (rem_backslash(p))
1930             STRMOVE(p, p + 1);
1931 }
1932
1933 /*
1934  * backslash_halve() plus save the result in allocated memory.
1935  */
1936     char_u *
1937 backslash_halve_save(p)
1938     char_u      *p;
1939 {
1940     char_u      *res;
1941
1942     res = vim_strsave(p);
1943     if (res == NULL)
1944         return p;
1945     backslash_halve(res);
1946     return res;
1947 }
1948
1949 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1950 /*
1951  * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1952  * The first 64 entries have been added to map control characters defined in
1953  * ascii.h
1954  */
1955 static char_u ebcdic2ascii_tab[256] =
1956 {
1957     0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1958     0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1959     0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1960     0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1961     0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1962     0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1963     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1964     0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1965     0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1966     0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1967     0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1968     0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1969     0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1970     0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1971     0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1972     0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1973     0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1974     0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1975     0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1976     0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1977     0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1978     0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1979     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1980     0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1981     0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1982     0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1983     0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1984     0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1985     0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1986     0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1987     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1988     0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1989 };
1990
1991 /*
1992  * Convert a buffer worth of characters from EBCDIC to ASCII.  Only useful if
1993  * wanting 7-bit ASCII characters out the other end.
1994  */
1995     void
1996 ebcdic2ascii(buffer, len)
1997     char_u      *buffer;
1998     int         len;
1999 {
2000     int         i;
2001
2002     for (i = 0; i < len; i++)
2003         buffer[i] = ebcdic2ascii_tab[buffer[i]];
2004 }
2005 #endif