src/charset.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * VIM - Vi IMproved    by Bram Moolenaar
   4  *
   5  * Do ":help uganda"  in Vim to read copying and usage conditions.
   6  * Do ":help credits" in Vim to see a list of people who contributed.
   7  * See README.txt for an overview of the Vim source code.
   8  */
   9
  10 #include "vim.h"
  11
  12 #ifdef FEAT_LINEBREAK
  13 static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
  14 #endif
  15
  16 #ifdef FEAT_MBYTE
  17 static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
  18 #endif
  19
  20 static int nr2hex __ARGS((int c));
  21
  22 static int    chartab_initialized = FALSE;
  23
  24 /* b_chartab[] is an array of 32 bytes, each bit representing one of the
  25  * characters 0-255. */
  26 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
  27 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
  28 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
  29
  30 /*
  31  * Fill chartab[].  Also fills curbuf->b_chartab[] with flags for keyword
  32  * characters for current buffer.
  33  *
  34  * Depends on the option settings 'iskeyword', 'isident', 'isfname',
  35  * 'isprint' and 'encoding'.
  36  *
  37  * The index in chartab[] depends on 'encoding':
  38  * - For non-multi-byte index with the byte (same as the character).
  39  * - For DBCS index with the first byte.
  40  * - For UTF-8 index with the character (when first byte is up to 0x80 it is
  41  *   the same as the character, if the first byte is 0x80 and above it depends
  42  *   on further bytes).
  43  *
  44  * The contents of chartab[]:
  45  * - The lower two bits, masked by CT_CELL_MASK, give the number of display
  46  *   cells the character occupies (1 or 2).  Not valid for UTF-8 above 0x80.
  47  * - CT_PRINT_CHAR bit is set when the character is printable (no need to
  48  *   translate the character before displaying it).  Note that only DBCS
  49  *   characters can have 2 display cells and still be printable.
  50  * - CT_FNAME_CHAR bit is set when the character can be in a file name.
  51  * - CT_ID_CHAR bit is set when the character can be in an identifier.
  52  *
  53  * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
  54  * error, OK otherwise.
  55  */
  56     int
  57 init_chartab()
  58 {
  59     return buf_init_chartab(curbuf, TRUE);
  60 }
  61
  62     int
  63 buf_init_chartab(buf, global)
  64     buf_T       *buf;
  65     int         global;         /* FALSE: only set buf->b_chartab[] */
  66 {
  67     int         c;
  68     int         c2;
  69     char_u      *p;
  70     int         i;
  71     int         tilde;
  72     int         do_isalpha;
  73
  74     if (global)
  75     {
  76         /*
  77          * Set the default size for printable characters:
  78          * From <Space> to '~' is 1 (printable), others are 2 (not printable).
  79          * This also inits all 'isident' and 'isfname' flags to FALSE.
  80          *
  81          * EBCDIC: all chars below ' ' are not printable, all others are
  82          * printable.
  83          */
  84         c = 0;
  85         while (c < ' ')
  86             chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
  87 #ifdef EBCDIC
  88         while (c < 255)
  89 #else
  90         while (c <= '~')
  91 #endif
  92             chartab[c++] = 1 + CT_PRINT_CHAR;
  93 #ifdef FEAT_FKMAP
  94         if (p_altkeymap)
  95         {
  96             while (c < YE)
  97                 chartab[c++] = 1 + CT_PRINT_CHAR;
  98         }
  99 #endif
 100         while (c < 256)
 101         {
 102 #ifdef FEAT_MBYTE
 103             /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
 104             if (enc_utf8 && c >= 0xa0)
 105                 chartab[c++] = CT_PRINT_CHAR + 1;
 106             /* euc-jp characters starting with 0x8e are single width */
 107             else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
 108                 chartab[c++] = CT_PRINT_CHAR + 1;
 109             /* other double-byte chars can be printable AND double-width */
 110             else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
 111                 chartab[c++] = CT_PRINT_CHAR + 2;
 112             else
 113 #endif
 114                 /* the rest is unprintable by default */
 115                 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
 116         }
 117
 118 #ifdef FEAT_MBYTE
 119         /* Assume that every multi-byte char is a filename character. */
 120         for (c = 1; c < 256; ++c)
 121             if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
 122                     || (enc_dbcs == DBCS_JPNU && c == 0x8e)
 123                     || (enc_utf8 && c >= 0xa0))
 124                 chartab[c] |= CT_FNAME_CHAR;
 125 #endif
 126     }
 127
 128     /*
 129      * Init word char flags all to FALSE
 130      */
 131     vim_memset(buf->b_chartab, 0, (size_t)32);
 132 #ifdef FEAT_MBYTE
 133     if (enc_dbcs != 0)
 134         for (c = 0; c < 256; ++c)
 135         {
 136             /* double-byte characters are probably word characters */
 137             if (MB_BYTE2LEN(c) == 2)
 138                 SET_CHARTAB(buf, c);
 139         }
 140 #endif
 141
 142 #ifdef FEAT_LISP
 143     /*
 144      * In lisp mode the '-' character is included in keywords.
 145      */
 146     if (buf->b_p_lisp)
 147         SET_CHARTAB(buf, '-');
 148 #endif
 149
 150     /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
 151      * options Each option is a list of characters, character numbers or
 152      * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
 153      */
 154     for (i = global ? 0 : 3; i <= 3; ++i)
 155     {
 156         if (i == 0)
 157             p = p_isi;          /* first round: 'isident' */
 158         else if (i == 1)
 159             p = p_isp;          /* second round: 'isprint' */
 160         else if (i == 2)
 161             p = p_isf;          /* third round: 'isfname' */
 162         else    /* i == 3 */
 163             p = buf->b_p_isk;   /* fourth round: 'iskeyword' */
 164
 165         while (*p)
 166         {
 167             tilde = FALSE;
 168             do_isalpha = FALSE;
 169             if (*p == '^' && p[1] != NUL)
 170             {
 171                 tilde = TRUE;
 172                 ++p;
 173             }
 174             if (VIM_ISDIGIT(*p))
 175                 c = getdigits(&p);
 176             else
 177                 c = *p++;
 178             c2 = -1;
 179             if (*p == '-' && p[1] != NUL)
 180             {
 181                 ++p;
 182                 if (VIM_ISDIGIT(*p))
 183                     c2 = getdigits(&p);
 184                 else
 185                     c2 = *p++;
 186             }
 187             if (c <= 0 || (c2 < c && c2 != -1) || c2 >= 256
 188                                                  || !(*p == NUL || *p == ','))
 189                 return FAIL;
 190
 191             if (c2 == -1)       /* not a range */
 192             {
 193                 /*
 194                  * A single '@' (not "@-@"):
 195                  * Decide on letters being ID/printable/keyword chars with
 196                  * standard function isalpha(). This takes care of locale for
 197                  * single-byte characters).
 198                  */
 199                 if (c == '@')
 200                 {
 201                     do_isalpha = TRUE;
 202                     c = 1;
 203                     c2 = 255;
 204                 }
 205                 else
 206                     c2 = c;
 207             }
 208             while (c <= c2)
 209             {
 210                 /* Use the MB_ functions here, because isalpha() doesn't
 211                  * work properly when 'encoding' is "latin1" and the locale is
 212                  * "C".  */
 213                 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
 214 #ifdef FEAT_FKMAP
 215                         || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
 216 #endif
 217                             )
 218                 {
 219                     if (i == 0)                 /* (re)set ID flag */
 220                     {
 221                         if (tilde)
 222                             chartab[c] &= ~CT_ID_CHAR;
 223                         else
 224                             chartab[c] |= CT_ID_CHAR;
 225                     }
 226                     else if (i == 1)            /* (re)set printable */
 227                     {
 228                         if ((c < ' '
 229 #ifndef EBCDIC
 230                                     || c > '~'
 231 #endif
 232 #ifdef FEAT_FKMAP
 233                                     || (p_altkeymap
 234                                         && (F_isalpha(c) || F_isdigit(c)))
 235 #endif
 236                             )
 237 #ifdef FEAT_MBYTE
 238                                 /* For double-byte we keep the cell width, so
 239                                  * that we can detect it from the first byte. */
 240                                 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
 241 #endif
 242                            )
 243                         {
 244                             if (tilde)
 245                             {
 246                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
 247                                              + ((dy_flags & DY_UHEX) ? 4 : 2);
 248                                 chartab[c] &= ~CT_PRINT_CHAR;
 249                             }
 250                             else
 251                             {
 252                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
 253                                 chartab[c] |= CT_PRINT_CHAR;
 254                             }
 255                         }
 256                     }
 257                     else if (i == 2)            /* (re)set fname flag */
 258                     {
 259                         if (tilde)
 260                             chartab[c] &= ~CT_FNAME_CHAR;
 261                         else
 262                             chartab[c] |= CT_FNAME_CHAR;
 263                     }
 264                     else /* i == 3 */           /* (re)set keyword flag */
 265                     {
 266                         if (tilde)
 267                             RESET_CHARTAB(buf, c);
 268                         else
 269                             SET_CHARTAB(buf, c);
 270                     }
 271                 }
 272                 ++c;
 273             }
 274             p = skip_to_option_part(p);
 275         }
 276     }
 277     chartab_initialized = TRUE;
 278     return OK;
 279 }
 280
 281 /*
 282  * Translate any special characters in buf[bufsize] in-place.
 283  * The result is a string with only printable characters, but if there is not
 284  * enough room, not all characters will be translated.
 285  */
 286     void
 287 trans_characters(buf, bufsize)
 288     char_u      *buf;
 289     int         bufsize;
 290 {
 291     int         len;            /* length of string needing translation */
 292     int         room;           /* room in buffer after string */
 293     char_u      *trs;           /* translated character */
 294     int         trs_len;        /* length of trs[] */
 295
 296     len = (int)STRLEN(buf);
 297     room = bufsize - len;
 298     while (*buf != 0)
 299     {
 300 # ifdef FEAT_MBYTE
 301         /* Assume a multi-byte character doesn't need translation. */
 302         if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
 303             len -= trs_len;
 304         else
 305 # endif
 306         {
 307             trs = transchar_byte(*buf);
 308             trs_len = (int)STRLEN(trs);
 309             if (trs_len > 1)
 310             {
 311                 room -= trs_len - 1;
 312                 if (room <= 0)
 313                     return;
 314                 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
 315             }
 316             mch_memmove(buf, trs, (size_t)trs_len);
 317             --len;
 318         }
 319         buf += trs_len;
 320     }
 321 }
 322
 323 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
 324         || defined(PROTO)
 325 /*
 326  * Translate a string into allocated memory, replacing special chars with
 327  * printable chars.  Returns NULL when out of memory.
 328  */
 329     char_u *
 330 transstr(s)
 331     char_u      *s;
 332 {
 333     char_u      *res;
 334     char_u      *p;
 335 #ifdef FEAT_MBYTE
 336     int         l, len, c;
 337     char_u      hexbuf[11];
 338 #endif
 339
 340 #ifdef FEAT_MBYTE
 341     if (has_mbyte)
 342     {
 343         /* Compute the length of the result, taking account of unprintable
 344          * multi-byte characters. */
 345         len = 0;
 346         p = s;
 347         while (*p != NUL)
 348         {
 349             if ((l = (*mb_ptr2len)(p)) > 1)
 350             {
 351                 c = (*mb_ptr2char)(p);
 352                 p += l;
 353                 if (vim_isprintc(c))
 354                     len += l;
 355                 else
 356                 {
 357                     transchar_hex(hexbuf, c);
 358                     len += (int)STRLEN(hexbuf);
 359                 }
 360             }
 361             else
 362             {
 363                 l = byte2cells(*p++);
 364                 if (l > 0)
 365                     len += l;
 366                 else
 367                     len += 4;   /* illegal byte sequence */
 368             }
 369         }
 370         res = alloc((unsigned)(len + 1));
 371     }
 372     else
 373 #endif
 374         res = alloc((unsigned)(vim_strsize(s) + 1));
 375     if (res != NULL)
 376     {
 377         *res = NUL;
 378         p = s;
 379         while (*p != NUL)
 380         {
 381 #ifdef FEAT_MBYTE
 382             if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
 383             {
 384                 c = (*mb_ptr2char)(p);
 385                 if (vim_isprintc(c))
 386                     STRNCAT(res, p, l); /* append printable multi-byte char */
 387                 else
 388                     transchar_hex(res + STRLEN(res), c);
 389                 p += l;
 390             }
 391             else
 392 #endif
 393                 STRCAT(res, transchar_byte(*p++));
 394         }
 395     }
 396     return res;
 397 }
 398 #endif
 399
 400 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
 401 /*
 402  * Convert the string "str[orglen]" to do ignore-case comparing.  Uses the
 403  * current locale.
 404  * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
 405  * Otherwise puts the result in "buf[buflen]".
 406  */
 407     char_u *
 408 str_foldcase(str, orglen, buf, buflen)
 409     char_u      *str;
 410     int         orglen;
 411     char_u      *buf;
 412     int         buflen;
 413 {
 414     garray_T    ga;
 415     int         i;
 416     int         len = orglen;
 417
 418 #define GA_CHAR(i)  ((char_u *)ga.ga_data)[i]
 419 #define GA_PTR(i)   ((char_u *)ga.ga_data + i)
 420 #define STR_CHAR(i)  (buf == NULL ? GA_CHAR(i) : buf[i])
 421 #define STR_PTR(i)   (buf == NULL ? GA_PTR(i) : buf + i)
 422
 423     /* Copy "str" into "buf" or allocated memory, unmodified. */
 424     if (buf == NULL)
 425     {
 426         ga_init2(&ga, 1, 10);
 427         if (ga_grow(&ga, len + 1) == FAIL)
 428             return NULL;
 429         mch_memmove(ga.ga_data, str, (size_t)len);
 430         ga.ga_len = len;
 431     }
 432     else
 433     {
 434         if (len >= buflen)          /* Ugly! */
 435             len = buflen - 1;
 436         mch_memmove(buf, str, (size_t)len);
 437     }
 438     if (buf == NULL)
 439         GA_CHAR(len) = NUL;
 440     else
 441         buf[len] = NUL;
 442
 443     /* Make each character lower case. */
 444     i = 0;
 445     while (STR_CHAR(i) != NUL)
 446     {
 447 #ifdef FEAT_MBYTE
 448         if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
 449         {
 450             if (enc_utf8)
 451             {
 452                 int     c, lc;
 453
 454                 c = utf_ptr2char(STR_PTR(i));
 455                 lc = utf_tolower(c);
 456                 if (c != lc)
 457                 {
 458                     int     ol = utf_char2len(c);
 459                     int     nl = utf_char2len(lc);
 460
 461                     /* If the byte length changes need to shift the following
 462                      * characters forward or backward. */
 463                     if (ol != nl)
 464                     {
 465                         if (nl > ol)
 466                         {
 467                             if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
 468                                                     : len + nl - ol >= buflen)
 469                             {
 470                                 /* out of memory, keep old char */
 471                                 lc = c;
 472                                 nl = ol;
 473                             }
 474                         }
 475                         if (ol != nl)
 476                         {
 477                             if (buf == NULL)
 478                             {
 479                                 mch_memmove(GA_PTR(i) + nl, GA_PTR(i) + ol,
 480                                                   STRLEN(GA_PTR(i) + ol) + 1);
 481                                 ga.ga_len += nl - ol;
 482                             }
 483                             else
 484                             {
 485                                 mch_memmove(buf + i + nl, buf + i + ol,
 486                                                     STRLEN(buf + i + ol) + 1);
 487                                 len += nl - ol;
 488                             }
 489                         }
 490                     }
 491                     (void)utf_char2bytes(lc, STR_PTR(i));
 492                 }
 493             }
 494             /* skip to next multi-byte char */
 495             i += (*mb_ptr2len)(STR_PTR(i));
 496         }
 497         else
 498 #endif
 499         {
 500             if (buf == NULL)
 501                 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
 502             else
 503                 buf[i] = TOLOWER_LOC(buf[i]);
 504             ++i;
 505         }
 506     }
 507
 508     if (buf == NULL)
 509         return (char_u *)ga.ga_data;
 510     return buf;
 511 }
 512 #endif
 513
 514 /*
 515  * Catch 22: chartab[] can't be initialized before the options are
 516  * initialized, and initializing options may cause transchar() to be called!
 517  * When chartab_initialized == FALSE don't use chartab[].
 518  * Does NOT work for multi-byte characters, c must be <= 255.
 519  * Also doesn't work for the first byte of a multi-byte, "c" must be a
 520  * character!
 521  */
 522 static char_u   transchar_buf[7];
 523
 524     char_u *
 525 transchar(c)
 526     int         c;
 527 {
 528     int                 i;
 529
 530     i = 0;
 531     if (IS_SPECIAL(c))      /* special key code, display as ~@ char */
 532     {
 533         transchar_buf[0] = '~';
 534         transchar_buf[1] = '@';
 535         i = 2;
 536         c = K_SECOND(c);
 537     }
 538
 539     if ((!chartab_initialized && (
 540 #ifdef EBCDIC
 541                     (c >= 64 && c < 255)
 542 #else
 543                     (c >= ' ' && c <= '~')
 544 #endif
 545 #ifdef FEAT_FKMAP
 546                         || F_ischar(c)
 547 #endif
 548                 )) || (c < 256 && vim_isprintc_strict(c)))
 549     {
 550         /* printable character */
 551         transchar_buf[i] = c;
 552         transchar_buf[i + 1] = NUL;
 553     }
 554     else
 555         transchar_nonprint(transchar_buf + i, c);
 556     return transchar_buf;
 557 }
 558
 559 #if defined(FEAT_MBYTE) || defined(PROTO)
 560 /*
 561  * Like transchar(), but called with a byte instead of a character.  Checks
 562  * for an illegal UTF-8 byte.
 563  */
 564     char_u *
 565 transchar_byte(c)
 566     int         c;
 567 {
 568     if (enc_utf8 && c >= 0x80)
 569     {
 570         transchar_nonprint(transchar_buf, c);
 571         return transchar_buf;
 572     }
 573     return transchar(c);
 574 }
 575 #endif
 576
 577 /*
 578  * Convert non-printable character to two or more printable characters in
 579  * "buf[]".  "buf" needs to be able to hold five bytes.
 580  * Does NOT work for multi-byte characters, c must be <= 255.
 581  */
 582     void
 583 transchar_nonprint(buf, c)
 584     char_u      *buf;
 585     int         c;
 586 {
 587     if (c == NL)
 588         c = NUL;                /* we use newline in place of a NUL */
 589     else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
 590         c = NL;                 /* we use CR in place of  NL in this case */
 591
 592     if (dy_flags & DY_UHEX)             /* 'display' has "uhex" */
 593         transchar_hex(buf, c);
 594
 595 #ifdef EBCDIC
 596     /* For EBCDIC only the characters 0-63 and 255 are not printable */
 597     else if (CtrlChar(c) != 0 || c == DEL)
 598 #else
 599     else if (c <= 0x7f)                         /* 0x00 - 0x1f and 0x7f */
 600 #endif
 601     {
 602         buf[0] = '^';
 603 #ifdef EBCDIC
 604         if (c == DEL)
 605             buf[1] = '?';               /* DEL displayed as ^? */
 606         else
 607             buf[1] = CtrlChar(c);
 608 #else
 609         buf[1] = c ^ 0x40;              /* DEL displayed as ^? */
 610 #endif
 611
 612         buf[2] = NUL;
 613     }
 614 #ifdef FEAT_MBYTE
 615     else if (enc_utf8 && c >= 0x80)
 616     {
 617         transchar_hex(buf, c);
 618     }
 619 #endif
 620 #ifndef EBCDIC
 621     else if (c >= ' ' + 0x80 && c <= '~' + 0x80)    /* 0xa0 - 0xfe */
 622     {
 623         buf[0] = '|';
 624         buf[1] = c - 0x80;
 625         buf[2] = NUL;
 626     }
 627 #else
 628     else if (c < 64)
 629     {
 630         buf[0] = '~';
 631         buf[1] = MetaChar(c);
 632         buf[2] = NUL;
 633     }
 634 #endif
 635     else                                            /* 0x80 - 0x9f and 0xff */
 636     {
 637         /*
 638          * TODO: EBCDIC I don't know what to do with this chars, so I display
 639          * them as '~?' for now
 640          */
 641         buf[0] = '~';
 642 #ifdef EBCDIC
 643         buf[1] = '?';                   /* 0xff displayed as ~? */
 644 #else
 645         buf[1] = (c - 0x80) ^ 0x40;     /* 0xff displayed as ~? */
 646 #endif
 647         buf[2] = NUL;
 648     }
 649 }
 650
 651     void
 652 transchar_hex(buf, c)
 653     char_u      *buf;
 654     int         c;
 655 {
 656     int         i = 0;
 657
 658     buf[0] = '<';
 659 #ifdef FEAT_MBYTE
 660     if (c > 255)
 661     {
 662         buf[++i] = nr2hex((unsigned)c >> 12);
 663         buf[++i] = nr2hex((unsigned)c >> 8);
 664     }
 665 #endif
 666     buf[++i] = nr2hex((unsigned)c >> 4);
 667     buf[++i] = nr2hex(c);
 668     buf[++i] = '>';
 669     buf[++i] = NUL;
 670 }
 671
 672 /*
 673  * Convert the lower 4 bits of byte "c" to its hex character.
 674  * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
 675  * function key 1.
 676  */
 677     static int
 678 nr2hex(c)
 679     int         c;
 680 {
 681     if ((c & 0xf) <= 9)
 682         return (c & 0xf) + '0';
 683     return (c & 0xf) - 10 + 'a';
 684 }
 685
 686 /*
 687  * Return number of display cells occupied by byte "b".
 688  * Caller must make sure 0 <= b <= 255.
 689  * For multi-byte mode "b" must be the first byte of a character.
 690  * A TAB is counted as two cells: "^I".
 691  * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
 692  * cells depends on further bytes.
 693  */
 694     int
 695 byte2cells(b)
 696     int         b;
 697 {
 698 #ifdef FEAT_MBYTE
 699     if (enc_utf8 && b >= 0x80)
 700         return 0;
 701 #endif
 702     return (chartab[b] & CT_CELL_MASK);
 703 }
 704
 705 /*
 706  * Return number of display cells occupied by character "c".
 707  * "c" can be a special key (negative number) in which case 3 or 4 is returned.
 708  * A TAB is counted as two cells: "^I" or four: "<09>".
 709  */
 710     int
 711 char2cells(c)
 712     int         c;
 713 {
 714     if (IS_SPECIAL(c))
 715         return char2cells(K_SECOND(c)) + 2;
 716 #ifdef FEAT_MBYTE
 717     if (c >= 0x80)
 718     {
 719         /* UTF-8: above 0x80 need to check the value */
 720         if (enc_utf8)
 721             return utf_char2cells(c);
 722         /* DBCS: double-byte means double-width, except for euc-jp with first
 723          * byte 0x8e */
 724         if (enc_dbcs != 0 && c >= 0x100)
 725         {
 726             if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
 727                 return 1;
 728             return 2;
 729         }
 730     }
 731 #endif
 732     return (chartab[c & 0xff] & CT_CELL_MASK);
 733 }
 734
 735 /*
 736  * Return number of display cells occupied by character at "*p".
 737  * A TAB is counted as two cells: "^I" or four: "<09>".
 738  */
 739     int
 740 ptr2cells(p)
 741     char_u      *p;
 742 {
 743 #ifdef FEAT_MBYTE
 744     /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
 745     if (enc_utf8 && *p >= 0x80)
 746         return utf_ptr2cells(p);
 747     /* For DBCS we can tell the cell count from the first byte. */
 748 #endif
 749     return (chartab[*p] & CT_CELL_MASK);
 750 }
 751
 752 /*
 753  * Return the number of characters string "s" will take on the screen,
 754  * counting TABs as two characters: "^I".
 755  */
 756     int
 757 vim_strsize(s)
 758     char_u      *s;
 759 {
 760     return vim_strnsize(s, (int)MAXCOL);
 761 }
 762
 763 /*
 764  * Return the number of characters string "s[len]" will take on the screen,
 765  * counting TABs as two characters: "^I".
 766  */
 767     int
 768 vim_strnsize(s, len)
 769     char_u      *s;
 770     int         len;
 771 {
 772     int         size = 0;
 773
 774     while (*s != NUL && --len >= 0)
 775     {
 776 #ifdef FEAT_MBYTE
 777         if (has_mbyte)
 778         {
 779             int     l = (*mb_ptr2len)(s);
 780
 781             size += ptr2cells(s);
 782             s += l;
 783             len -= l - 1;
 784         }
 785         else
 786 #endif
 787             size += byte2cells(*s++);
 788     }
 789     return size;
 790 }
 791
 792 /*
 793  * Return the number of characters 'c' will take on the screen, taking
 794  * into account the size of a tab.
 795  * Use a define to make it fast, this is used very often!!!
 796  * Also see getvcol() below.
 797  */
 798
 799 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
 800     if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
 801     { \
 802         int ts; \
 803         ts = (buf)->b_p_ts; \
 804         return (int)(ts - (col % ts)); \
 805     } \
 806     else \
 807         return ptr2cells(p);
 808
 809 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
 810         || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
 811     int
 812 chartabsize(p, col)
 813     char_u      *p;
 814     colnr_T     col;
 815 {
 816     RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
 817 }
 818 #endif
 819
 820 #ifdef FEAT_LINEBREAK
 821     static int
 822 win_chartabsize(wp, p, col)
 823     win_T       *wp;
 824     char_u      *p;
 825     colnr_T     col;
 826 {
 827     RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
 828 }
 829 #endif
 830
 831 /*
 832  * return the number of characters the string 's' will take on the screen,
 833  * taking into account the size of a tab
 834  */
 835     int
 836 linetabsize(s)
 837     char_u      *s;
 838 {
 839     colnr_T     col = 0;
 840
 841     while (*s != NUL)
 842         col += lbr_chartabsize_adv(&s, col);
 843     return (int)col;
 844 }
 845
 846 /*
 847  * Like linetabsize(), but for a given window instead of the current one.
 848  */
 849     int
 850 win_linetabsize(wp, p, len)
 851     win_T       *wp;
 852     char_u      *p;
 853     colnr_T     len;
 854 {
 855     colnr_T     col = 0;
 856     char_u      *s;
 857
 858     for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
 859         col += win_lbr_chartabsize(wp, s, col, NULL);
 860     return (int)col;
 861 }
 862
 863 /*
 864  * Return TRUE if 'c' is a normal identifier character:
 865  * Letters and characters from the 'isident' option.
 866  */
 867     int
 868 vim_isIDc(c)
 869     int c;
 870 {
 871     return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
 872 }
 873
 874 /*
 875  * return TRUE if 'c' is a keyword character: Letters and characters from
 876  * 'iskeyword' option for current buffer.
 877  * For multi-byte characters mb_get_class() is used (builtin rules).
 878  */
 879     int
 880 vim_iswordc(c)
 881     int c;
 882 {
 883 #ifdef FEAT_MBYTE
 884     if (c >= 0x100)
 885     {
 886         if (enc_dbcs != 0)
 887             return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
 888         if (enc_utf8)
 889             return utf_class(c) >= 2;
 890     }
 891 #endif
 892     return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
 893 }
 894
 895 /*
 896  * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
 897  */
 898     int
 899 vim_iswordp(p)
 900     char_u *p;
 901 {
 902 #ifdef FEAT_MBYTE
 903     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 904         return mb_get_class(p) >= 2;
 905 #endif
 906     return GET_CHARTAB(curbuf, *p) != 0;
 907 }
 908
 909 #if defined(FEAT_SYN_HL) || defined(PROTO)
 910     int
 911 vim_iswordc_buf(p, buf)
 912     char_u      *p;
 913     buf_T       *buf;
 914 {
 915 # ifdef FEAT_MBYTE
 916     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 917         return mb_get_class(p) >= 2;
 918 # endif
 919     return (GET_CHARTAB(buf, *p) != 0);
 920 }
 921 #endif
 922
 923 /*
 924  * return TRUE if 'c' is a valid file-name character
 925  * Assume characters above 0x100 are valid (multi-byte).
 926  */
 927     int
 928 vim_isfilec(c)
 929     int c;
 930 {
 931     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
 932 }
 933
 934 /*
 935  * return TRUE if 'c' is a valid file-name character or a wildcard character
 936  * Assume characters above 0x100 are valid (multi-byte).
 937  * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
 938  * returns false.
 939  */
 940     int
 941 vim_isfilec_or_wc(c)
 942     int c;
 943 {
 944     char_u buf[2];
 945
 946     buf[0] = (char_u)c;
 947     buf[1] = NUL;
 948     return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
 949 }
 950
 951 /*
 952  * return TRUE if 'c' is a printable character
 953  * Assume characters above 0x100 are printable (multi-byte), except for
 954  * Unicode.
 955  */
 956     int
 957 vim_isprintc(c)
 958     int c;
 959 {
 960 #ifdef FEAT_MBYTE
 961     if (enc_utf8 && c >= 0x100)
 962         return utf_printable(c);
 963 #endif
 964     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 965 }
 966
 967 /*
 968  * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
 969  * byte of a double-byte character.
 970  */
 971     int
 972 vim_isprintc_strict(c)
 973     int c;
 974 {
 975 #ifdef FEAT_MBYTE
 976     if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
 977         return FALSE;
 978     if (enc_utf8 && c >= 0x100)
 979         return utf_printable(c);
 980 #endif
 981     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 982 }
 983
 984 /*
 985  * like chartabsize(), but also check for line breaks on the screen
 986  */
 987     int
 988 lbr_chartabsize(s, col)
 989     unsigned char       *s;
 990     colnr_T             col;
 991 {
 992 #ifdef FEAT_LINEBREAK
 993     if (!curwin->w_p_lbr && *p_sbr == NUL)
 994     {
 995 #endif
 996 #ifdef FEAT_MBYTE
 997         if (curwin->w_p_wrap)
 998             return win_nolbr_chartabsize(curwin, s, col, NULL);
 999 #endif
1000         RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1001 #ifdef FEAT_LINEBREAK
1002     }
1003     return win_lbr_chartabsize(curwin, s, col, NULL);
1004 #endif
1005 }
1006
1007 /*
1008  * Call lbr_chartabsize() and advance the pointer.
1009  */
1010     int
1011 lbr_chartabsize_adv(s, col)
1012     char_u      **s;
1013     colnr_T     col;
1014 {
1015     int         retval;
1016
1017     retval = lbr_chartabsize(*s, col);
1018     mb_ptr_adv(*s);
1019     return retval;
1020 }
1021
1022 /*
1023  * This function is used very often, keep it fast!!!!
1024  *
1025  * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1026  * string at start of line.  Warning: *headp is only set if it's a non-zero
1027  * value, init to 0 before calling.
1028  */
1029 /*ARGSUSED*/
1030     int
1031 win_lbr_chartabsize(wp, s, col, headp)
1032     win_T       *wp;
1033     char_u      *s;
1034     colnr_T     col;
1035     int         *headp;
1036 {
1037 #ifdef FEAT_LINEBREAK
1038     int         c;
1039     int         size;
1040     colnr_T     col2;
1041     colnr_T     colmax;
1042     int         added;
1043 # ifdef FEAT_MBYTE
1044     int         mb_added = 0;
1045 # else
1046 #  define mb_added 0
1047 # endif
1048     int         numberextra;
1049     char_u      *ps;
1050     int         tab_corr = (*s == TAB);
1051     int         n;
1052
1053     /*
1054      * No 'linebreak' and 'showbreak': return quickly.
1055      */
1056     if (!wp->w_p_lbr && *p_sbr == NUL)
1057 #endif
1058     {
1059 #ifdef FEAT_MBYTE
1060         if (wp->w_p_wrap)
1061             return win_nolbr_chartabsize(wp, s, col, headp);
1062 #endif
1063         RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1064     }
1065
1066 #ifdef FEAT_LINEBREAK
1067     /*
1068      * First get normal size, without 'linebreak'
1069      */
1070     size = win_chartabsize(wp, s, col);
1071     c = *s;
1072
1073     /*
1074      * If 'linebreak' set check at a blank before a non-blank if the line
1075      * needs a break here
1076      */
1077     if (wp->w_p_lbr
1078             && vim_isbreak(c)
1079             && !vim_isbreak(s[1])
1080             && !wp->w_p_list
1081             && wp->w_p_wrap
1082 # ifdef FEAT_VERTSPLIT
1083             && wp->w_width != 0
1084 # endif
1085        )
1086     {
1087         /*
1088          * Count all characters from first non-blank after a blank up to next
1089          * non-blank after a blank.
1090          */
1091         numberextra = win_col_off(wp);
1092         col2 = col;
1093         colmax = W_WIDTH(wp) - numberextra;
1094         if (col >= colmax)
1095         {
1096             n = colmax + win_col_off2(wp);
1097             if (n > 0)
1098                 colmax += (((col - colmax) / n) + 1) * n;
1099         }
1100
1101         for (;;)
1102         {
1103             ps = s;
1104             mb_ptr_adv(s);
1105             c = *s;
1106             if (!(c != NUL
1107                     && (vim_isbreak(c)
1108                         || (!vim_isbreak(c)
1109                             && (col2 == col || !vim_isbreak(*ps))))))
1110                 break;
1111
1112             col2 += win_chartabsize(wp, s, col2);
1113             if (col2 >= colmax)         /* doesn't fit */
1114             {
1115                 size = colmax - col;
1116                 tab_corr = FALSE;
1117                 break;
1118             }
1119         }
1120     }
1121 # ifdef FEAT_MBYTE
1122     else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1123                                     && wp->w_p_wrap && in_win_border(wp, col))
1124     {
1125         ++size;         /* Count the ">" in the last column. */
1126         mb_added = 1;
1127     }
1128 # endif
1129
1130     /*
1131      * May have to add something for 'showbreak' string at start of line
1132      * Set *headp to the size of what we add.
1133      */
1134     added = 0;
1135     if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1136     {
1137         numberextra = win_col_off(wp);
1138         col += numberextra + mb_added;
1139         if (col >= (colnr_T)W_WIDTH(wp))
1140         {
1141             col -= W_WIDTH(wp);
1142             numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1143             if (numberextra > 0)
1144                 col = col % numberextra;
1145         }
1146         if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1147         {
1148             added = vim_strsize(p_sbr);
1149             if (tab_corr)
1150                 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1151             else
1152                 size += added;
1153             if (col != 0)
1154                 added = 0;
1155         }
1156     }
1157     if (headp != NULL)
1158         *headp = added + mb_added;
1159     return size;
1160 #endif
1161 }
1162
1163 #if defined(FEAT_MBYTE) || defined(PROTO)
1164 /*
1165  * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1166  * 'wrap' is on.  This means we need to check for a double-byte character that
1167  * doesn't fit at the end of the screen line.
1168  */
1169     static int
1170 win_nolbr_chartabsize(wp, s, col, headp)
1171     win_T       *wp;
1172     char_u      *s;
1173     colnr_T     col;
1174     int         *headp;
1175 {
1176     int         n;
1177
1178     if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1179     {
1180         n = wp->w_buffer->b_p_ts;
1181         return (int)(n - (col % n));
1182     }
1183     n = ptr2cells(s);
1184     /* Add one cell for a double-width character in the last column of the
1185      * window, displayed with a ">". */
1186     if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1187     {
1188         if (headp != NULL)
1189             *headp = 1;
1190         return 3;
1191     }
1192     return n;
1193 }
1194
1195 /*
1196  * Return TRUE if virtual column "vcol" is in the rightmost column of window
1197  * "wp".
1198  */
1199     int
1200 in_win_border(wp, vcol)
1201     win_T       *wp;
1202     colnr_T     vcol;
1203 {
1204     colnr_T     width1;         /* width of first line (after line number) */
1205     colnr_T     width2;         /* width of further lines */
1206
1207 #ifdef FEAT_VERTSPLIT
1208     if (wp->w_width == 0)       /* there is no border */
1209         return FALSE;
1210 #endif
1211     width1 = W_WIDTH(wp) - win_col_off(wp);
1212     if (vcol < width1 - 1)
1213         return FALSE;
1214     if (vcol == width1 - 1)
1215         return TRUE;
1216     width2 = width1 + win_col_off2(wp);
1217     return ((vcol - width1) % width2 == width2 - 1);
1218 }
1219 #endif /* FEAT_MBYTE */
1220
1221 /*
1222  * Get virtual column number of pos.
1223  *  start: on the first position of this character (TAB, ctrl)
1224  * cursor: where the cursor is on this character (first char, except for TAB)
1225  *    end: on the last position of this character (TAB, ctrl)
1226  *
1227  * This is used very often, keep it fast!
1228  */
1229     void
1230 getvcol(wp, pos, start, cursor, end)
1231     win_T       *wp;
1232     pos_T       *pos;
1233     colnr_T     *start;
1234     colnr_T     *cursor;
1235     colnr_T     *end;
1236 {
1237     colnr_T     vcol;
1238     char_u      *ptr;           /* points to current char */
1239     char_u      *posptr;        /* points to char at pos->col */
1240     int         incr;
1241     int         head;
1242     int         ts = wp->w_buffer->b_p_ts;
1243     int         c;
1244
1245     vcol = 0;
1246     ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1247     posptr = ptr + pos->col;
1248
1249     /*
1250      * This function is used very often, do some speed optimizations.
1251      * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1252      * Also use this when 'list' is set but tabs take their normal size.
1253      */
1254     if ((!wp->w_p_list || lcs_tab1 != NUL)
1255 #ifdef FEAT_LINEBREAK
1256             && !wp->w_p_lbr && *p_sbr == NUL
1257 #endif
1258        )
1259     {
1260 #ifndef FEAT_MBYTE
1261         head = 0;
1262 #endif
1263         for (;;)
1264         {
1265 #ifdef FEAT_MBYTE
1266             head = 0;
1267 #endif
1268             c = *ptr;
1269             /* make sure we don't go past the end of the line */
1270             if (c == NUL)
1271             {
1272                 incr = 1;       /* NUL at end of line only takes one column */
1273                 break;
1274             }
1275             /* A tab gets expanded, depending on the current column */
1276             if (c == TAB)
1277                 incr = ts - (vcol % ts);
1278             else
1279             {
1280 #ifdef FEAT_MBYTE
1281                 if (has_mbyte)
1282                 {
1283                     /* For utf-8, if the byte is >= 0x80, need to look at
1284                      * further bytes to find the cell width. */
1285                     if (enc_utf8 && c >= 0x80)
1286                         incr = utf_ptr2cells(ptr);
1287                     else
1288                         incr = CHARSIZE(c);
1289
1290                     /* If a double-cell char doesn't fit at the end of a line
1291                      * it wraps to the next line, it's like this char is three
1292                      * cells wide. */
1293                     if (incr == 2 && wp->w_p_wrap && in_win_border(wp, vcol))
1294                     {
1295                         ++incr;
1296                         head = 1;
1297                     }
1298                 }
1299                 else
1300 #endif
1301                     incr = CHARSIZE(c);
1302             }
1303
1304             if (ptr >= posptr)  /* character at pos->col */
1305                 break;
1306
1307             vcol += incr;
1308             mb_ptr_adv(ptr);
1309         }
1310     }
1311     else
1312     {
1313         for (;;)
1314         {
1315             /* A tab gets expanded, depending on the current column */
1316             head = 0;
1317             incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1318             /* make sure we don't go past the end of the line */
1319             if (*ptr == NUL)
1320             {
1321                 incr = 1;       /* NUL at end of line only takes one column */
1322                 break;
1323             }
1324
1325             if (ptr >= posptr)  /* character at pos->col */
1326                 break;
1327
1328             vcol += incr;
1329             mb_ptr_adv(ptr);
1330         }
1331     }
1332     if (start != NULL)
1333         *start = vcol + head;
1334     if (end != NULL)
1335         *end = vcol + incr - 1;
1336     if (cursor != NULL)
1337     {
1338         if (*ptr == TAB
1339                 && (State & NORMAL)
1340                 && !wp->w_p_list
1341                 && !virtual_active()
1342 #ifdef FEAT_VISUAL
1343                 && !(VIsual_active
1344                                    && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1345 #endif
1346                 )
1347             *cursor = vcol + incr - 1;      /* cursor at end */
1348         else
1349             *cursor = vcol + head;          /* cursor at start */
1350     }
1351 }
1352
1353 /*
1354  * Get virtual cursor column in the current window, pretending 'list' is off.
1355  */
1356     colnr_T
1357 getvcol_nolist(posp)
1358     pos_T       *posp;
1359 {
1360     int         list_save = curwin->w_p_list;
1361     colnr_T     vcol;
1362
1363     curwin->w_p_list = FALSE;
1364     getvcol(curwin, posp, NULL, &vcol, NULL);
1365     curwin->w_p_list = list_save;
1366     return vcol;
1367 }
1368
1369 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1370 /*
1371  * Get virtual column in virtual mode.
1372  */
1373     void
1374 getvvcol(wp, pos, start, cursor, end)
1375     win_T       *wp;
1376     pos_T       *pos;
1377     colnr_T     *start;
1378     colnr_T     *cursor;
1379     colnr_T     *end;
1380 {
1381     colnr_T     col;
1382     colnr_T     coladd;
1383     colnr_T     endadd;
1384 # ifdef FEAT_MBYTE
1385     char_u      *ptr;
1386 # endif
1387
1388     if (virtual_active())
1389     {
1390         /* For virtual mode, only want one value */
1391         getvcol(wp, pos, &col, NULL, NULL);
1392
1393         coladd = pos->coladd;
1394         endadd = 0;
1395 # ifdef FEAT_MBYTE
1396         /* Cannot put the cursor on part of a wide character. */
1397         ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1398         if (pos->col < STRLEN(ptr))
1399         {
1400             int c = (*mb_ptr2char)(ptr + pos->col);
1401
1402             if (c != TAB && vim_isprintc(c))
1403             {
1404                 endadd = char2cells(c) - 1;
1405                 if (coladd > endadd)    /* past end of line */
1406                     endadd = 0;
1407                 else
1408                     coladd = 0;
1409             }
1410         }
1411 # endif
1412         col += coladd;
1413         if (start != NULL)
1414             *start = col;
1415         if (cursor != NULL)
1416             *cursor = col;
1417         if (end != NULL)
1418             *end = col + endadd;
1419     }
1420     else
1421         getvcol(wp, pos, start, cursor, end);
1422 }
1423 #endif
1424
1425 #if defined(FEAT_VISUAL) || defined(PROTO)
1426 /*
1427  * Get the leftmost and rightmost virtual column of pos1 and pos2.
1428  * Used for Visual block mode.
1429  */
1430     void
1431 getvcols(wp, pos1, pos2, left, right)
1432     win_T       *wp;
1433     pos_T       *pos1, *pos2;
1434     colnr_T     *left, *right;
1435 {
1436     colnr_T     from1, from2, to1, to2;
1437
1438     if (ltp(pos1, pos2))
1439     {
1440         getvvcol(wp, pos1, &from1, NULL, &to1);
1441         getvvcol(wp, pos2, &from2, NULL, &to2);
1442     }
1443     else
1444     {
1445         getvvcol(wp, pos2, &from1, NULL, &to1);
1446         getvvcol(wp, pos1, &from2, NULL, &to2);
1447     }
1448     if (from2 < from1)
1449         *left = from2;
1450     else
1451         *left = from1;
1452     if (to2 > to1)
1453     {
1454         if (*p_sel == 'e' && from2 - 1 >= to1)
1455             *right = from2 - 1;
1456         else
1457             *right = to2;
1458     }
1459     else
1460         *right = to1;
1461 }
1462 #endif
1463
1464 /*
1465  * skipwhite: skip over ' ' and '\t'.
1466  */
1467     char_u *
1468 skipwhite(p)
1469     char_u      *p;
1470 {
1471     while (vim_iswhite(*p)) /* skip to next non-white */
1472         ++p;
1473     return p;
1474 }
1475
1476 /*
1477  * skip over digits
1478  */
1479     char_u *
1480 skipdigits(p)
1481     char_u      *p;
1482 {
1483     while (VIM_ISDIGIT(*p))     /* skip to next non-digit */
1484         ++p;
1485     return p;
1486 }
1487
1488 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
1489 /*
1490  * skip over digits and hex characters
1491  */
1492     char_u *
1493 skiphex(p)
1494     char_u      *p;
1495 {
1496     while (vim_isxdigit(*p))    /* skip to next non-digit */
1497         ++p;
1498     return p;
1499 }
1500 #endif
1501
1502 #if defined(FEAT_EX_EXTRA) || defined(PROTO)
1503 /*
1504  * skip to digit (or NUL after the string)
1505  */
1506     char_u *
1507 skiptodigit(p)
1508     char_u      *p;
1509 {
1510     while (*p != NUL && !VIM_ISDIGIT(*p))       /* skip to next digit */
1511         ++p;
1512     return p;
1513 }
1514
1515 /*
1516  * skip to hex character (or NUL after the string)
1517  */
1518     char_u *
1519 skiptohex(p)
1520     char_u      *p;
1521 {
1522     while (*p != NUL && !vim_isxdigit(*p))      /* skip to next digit */
1523         ++p;
1524     return p;
1525 }
1526 #endif
1527
1528 /*
1529  * Variant of isdigit() that can handle characters > 0x100.
1530  * We don't use isdigit() here, because on some systems it also considers
1531  * superscript 1 to be a digit.
1532  * Use the VIM_ISDIGIT() macro for simple arguments.
1533  */
1534     int
1535 vim_isdigit(c)
1536     int         c;
1537 {
1538     return (c >= '0' && c <= '9');
1539 }
1540
1541 /*
1542  * Variant of isxdigit() that can handle characters > 0x100.
1543  * We don't use isxdigit() here, because on some systems it also considers
1544  * superscript 1 to be a digit.
1545  */
1546     int
1547 vim_isxdigit(c)
1548     int         c;
1549 {
1550     return (c >= '0' && c <= '9')
1551         || (c >= 'a' && c <= 'f')
1552         || (c >= 'A' && c <= 'F');
1553 }
1554
1555 #if defined(FEAT_MBYTE) || defined(PROTO)
1556 /*
1557  * Vim's own character class functions.  These exist because many library
1558  * islower()/toupper() etc. do not work properly: they crash when used with
1559  * invalid values or can't handle latin1 when the locale is C.
1560  * Speed is most important here.
1561  */
1562 #define LATIN1LOWER 'l'
1563 #define LATIN1UPPER 'U'
1564
1565 /*                                                                 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~                                  ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
1566 static char_u latin1flags[257] = "                                                                 UUUUUUUUUUUUUUUUUUUUUUUUUU      llllllllllllllllllllllllll                                                                     UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1567 static char_u latin1upper[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1568 static char_u latin1lower[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
1569
1570     int
1571 vim_islower(c)
1572     int     c;
1573 {
1574     if (c <= '@')
1575         return FALSE;
1576     if (c >= 0x80)
1577     {
1578         if (enc_utf8)
1579             return utf_islower(c);
1580         if (c >= 0x100)
1581         {
1582 #ifdef HAVE_ISWLOWER
1583             if (has_mbyte)
1584                 return iswlower(c);
1585 #endif
1586             /* islower() can't handle these chars and may crash */
1587             return FALSE;
1588         }
1589         if (enc_latin1like)
1590             return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1591     }
1592     return islower(c);
1593 }
1594
1595     int
1596 vim_isupper(c)
1597     int     c;
1598 {
1599     if (c <= '@')
1600         return FALSE;
1601     if (c >= 0x80)
1602     {
1603         if (enc_utf8)
1604             return utf_isupper(c);
1605         if (c >= 0x100)
1606         {
1607 #ifdef HAVE_ISWUPPER
1608             if (has_mbyte)
1609                 return iswupper(c);
1610 #endif
1611             /* islower() can't handle these chars and may crash */
1612             return FALSE;
1613         }
1614         if (enc_latin1like)
1615             return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1616     }
1617     return isupper(c);
1618 }
1619
1620     int
1621 vim_toupper(c)
1622     int     c;
1623 {
1624     if (c <= '@')
1625         return c;
1626     if (c >= 0x80)
1627     {
1628         if (enc_utf8)
1629             return utf_toupper(c);
1630         if (c >= 0x100)
1631         {
1632 #ifdef HAVE_TOWUPPER
1633             if (has_mbyte)
1634                 return towupper(c);
1635 #endif
1636             /* toupper() can't handle these chars and may crash */
1637             return c;
1638         }
1639         if (enc_latin1like)
1640             return latin1upper[c];
1641     }
1642     return TOUPPER_LOC(c);
1643 }
1644
1645     int
1646 vim_tolower(c)
1647     int     c;
1648 {
1649     if (c <= '@')
1650         return c;
1651     if (c >= 0x80)
1652     {
1653         if (enc_utf8)
1654             return utf_tolower(c);
1655         if (c >= 0x100)
1656         {
1657 #ifdef HAVE_TOWLOWER
1658             if (has_mbyte)
1659                 return towlower(c);
1660 #endif
1661             /* tolower() can't handle these chars and may crash */
1662             return c;
1663         }
1664         if (enc_latin1like)
1665             return latin1lower[c];
1666     }
1667     return TOLOWER_LOC(c);
1668 }
1669 #endif
1670
1671 /*
1672  * skiptowhite: skip over text until ' ' or '\t' or NUL.
1673  */
1674     char_u *
1675 skiptowhite(p)
1676     char_u      *p;
1677 {
1678     while (*p != ' ' && *p != '\t' && *p != NUL)
1679         ++p;
1680     return p;
1681 }
1682
1683 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1684         || defined(PROTO)
1685 /*
1686  * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1687  */
1688     char_u *
1689 skiptowhite_esc(p)
1690     char_u      *p;
1691 {
1692     while (*p != ' ' && *p != '\t' && *p != NUL)
1693     {
1694         if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1695             ++p;
1696         ++p;
1697     }
1698     return p;
1699 }
1700 #endif
1701
1702 /*
1703  * Getdigits: Get a number from a string and skip over it.
1704  * Note: the argument is a pointer to a char_u pointer!
1705  */
1706     long
1707 getdigits(pp)
1708     char_u **pp;
1709 {
1710     char_u      *p;
1711     long        retval;
1712
1713     p = *pp;
1714     retval = atol((char *)p);
1715     if (*p == '-')              /* skip negative sign */
1716         ++p;
1717     p = skipdigits(p);          /* skip to next non-digit */
1718     *pp = p;
1719     return retval;
1720 }
1721
1722 /*
1723  * Return TRUE if "lbuf" is empty or only contains blanks.
1724  */
1725     int
1726 vim_isblankline(lbuf)
1727     char_u      *lbuf;
1728 {
1729     char_u      *p;
1730
1731     p = skipwhite(lbuf);
1732     return (*p == NUL || *p == '\r' || *p == '\n');
1733 }
1734
1735 /*
1736  * Convert a string into a long and/or unsigned long, taking care of
1737  * hexadecimal and octal numbers.  Accepts a '-' sign.
1738  * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1739  *  0       decimal
1740  *  '0'     octal
1741  *  'X'     hex
1742  *  'x'     hex
1743  * If "len" is not NULL, the length of the number in characters is returned.
1744  * If "nptr" is not NULL, the signed result is returned in it.
1745  * If "unptr" is not NULL, the unsigned result is returned in it.
1746  * If "unptr" is not NULL, the unsigned result is returned in it.
1747  * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1748  * octal number.
1749  * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
1750  * hex number.
1751  */
1752     void
1753 vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1754     char_u              *start;
1755     int                 *hexp;      /* return: type of number 0 = decimal, 'x'
1756                                        or 'X' is hex, '0' = octal */
1757     int                 *len;       /* return: detected length of number */
1758     int                 dooct;      /* recognize octal number */
1759     int                 dohex;      /* recognize hex number */
1760     long                *nptr;      /* return: signed result */
1761     unsigned long       *unptr;     /* return: unsigned result */
1762 {
1763     char_u          *ptr = start;
1764     int             hex = 0;            /* default is decimal */
1765     int             negative = FALSE;
1766     unsigned long   un = 0;
1767     int             n;
1768
1769     if (ptr[0] == '-')
1770     {
1771         negative = TRUE;
1772         ++ptr;
1773     }
1774
1775     /* Recognize hex and octal. */
1776     if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
1777     {
1778         hex = ptr[1];
1779         if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1780             ptr += 2;                   /* hexadecimal */
1781         else
1782         {
1783             hex = 0;                    /* default is decimal */
1784             if (dooct)
1785             {
1786                 /* Don't interpret "0", "08" or "0129" as octal. */
1787                 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1788                 {
1789                     if (ptr[n] > '7')
1790                     {
1791                         hex = 0;        /* can't be octal */
1792                         break;
1793                     }
1794                     if (ptr[n] > '0')
1795                         hex = '0';      /* assume octal */
1796                 }
1797             }
1798         }
1799     }
1800
1801     /*
1802      * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1803      */
1804     if (hex == '0' || dooct > 1)
1805     {
1806         /* octal */
1807         while ('0' <= *ptr && *ptr <= '7')
1808         {
1809             un = 8 * un + (unsigned long)(*ptr - '0');
1810             ++ptr;
1811         }
1812     }
1813     else if (hex != 0 || dohex > 1)
1814     {
1815         /* hex */
1816         while (vim_isxdigit(*ptr))
1817         {
1818             un = 16 * un + (unsigned long)hex2nr(*ptr);
1819             ++ptr;
1820         }
1821     }
1822     else
1823     {
1824         /* decimal */
1825         while (VIM_ISDIGIT(*ptr))
1826         {
1827             un = 10 * un + (unsigned long)(*ptr - '0');
1828             ++ptr;
1829         }
1830     }
1831
1832     if (hexp != NULL)
1833         *hexp = hex;
1834     if (len != NULL)
1835         *len = (int)(ptr - start);
1836     if (nptr != NULL)
1837     {
1838         if (negative)   /* account for leading '-' for decimal numbers */
1839             *nptr = -(long)un;
1840         else
1841             *nptr = (long)un;
1842     }
1843     if (unptr != NULL)
1844         *unptr = un;
1845 }
1846
1847 /*
1848  * Return the value of a single hex character.
1849  * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1850  */
1851     int
1852 hex2nr(c)
1853     int         c;
1854 {
1855     if (c >= 'a' && c <= 'f')
1856         return c - 'a' + 10;
1857     if (c >= 'A' && c <= 'F')
1858         return c - 'A' + 10;
1859     return c - '0';
1860 }
1861
1862 #if defined(FEAT_TERMRESPONSE) \
1863         || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1864 /*
1865  * Convert two hex characters to a byte.
1866  * Return -1 if one of the characters is not hex.
1867  */
1868     int
1869 hexhex2nr(p)
1870     char_u      *p;
1871 {
1872     if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1873         return -1;
1874     return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1875 }
1876 #endif
1877
1878 /*
1879  * Return TRUE if "str" starts with a backslash that should be removed.
1880  * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1881  * backslash is not a normal file name character.
1882  * '$' is a valid file name character, we don't remove the backslash before
1883  * it.  This means it is not possible to use an environment variable after a
1884  * backslash.  "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1885  * Although "\ name" is valid, the backslash in "Program\ files" must be
1886  * removed.  Assume a file name doesn't start with a space.
1887  * For multi-byte names, never remove a backslash before a non-ascii
1888  * character, assume that all multi-byte characters are valid file name
1889  * characters.
1890  */
1891     int
1892 rem_backslash(str)
1893     char_u  *str;
1894 {
1895 #ifdef BACKSLASH_IN_FILENAME
1896     return (str[0] == '\\'
1897 # ifdef FEAT_MBYTE
1898             && str[1] < 0x80
1899 # endif
1900             && (str[1] == ' '
1901                 || (str[1] != NUL
1902                     && str[1] != '*'
1903                     && str[1] != '?'
1904                     && !vim_isfilec(str[1]))));
1905 #else
1906     return (str[0] == '\\' && str[1] != NUL);
1907 #endif
1908 }
1909
1910 /*
1911  * Halve the number of backslashes in a file name argument.
1912  * For MS-DOS we only do this if the character after the backslash
1913  * is not a normal file character.
1914  */
1915     void
1916 backslash_halve(p)
1917     char_u      *p;
1918 {
1919     for ( ; *p; ++p)
1920         if (rem_backslash(p))
1921             mch_memmove(p, p + 1, STRLEN(p));
1922 }
1923
1924 /*
1925  * backslash_halve() plus save the result in allocated memory.
1926  */
1927     char_u *
1928 backslash_halve_save(p)
1929     char_u      *p;
1930 {
1931     char_u      *res;
1932
1933     res = vim_strsave(p);
1934     if (res == NULL)
1935         return p;
1936     backslash_halve(res);
1937     return res;
1938 }
1939
1940 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1941 /*
1942  * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1943  * The first 64 entries have been added to map control characters defined in
1944  * ascii.h
1945  */
1946 static char_u ebcdic2ascii_tab[256] =
1947 {
1948     0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1949     0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1950     0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1951     0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1952     0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1953     0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1954     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1955     0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1956     0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1957     0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1958     0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1959     0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1960     0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1961     0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1962     0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1963     0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1964     0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1965     0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1966     0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1967     0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1968     0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1969     0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1970     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1971     0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1972     0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1973     0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1974     0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1975     0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1976     0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1977     0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1978     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1979     0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1980 };
1981
1982 /*
1983  * Convert a buffer worth of characters from EBCDIC to ASCII.  Only useful if
1984  * wanting 7-bit ASCII characters out the other end.
1985  */
1986     void
1987 ebcdic2ascii(buffer, len)
1988     char_u      *buffer;
1989     int         len;
1990 {
1991     int         i;
1992
1993     for (i = 0; i < len; i++)
1994         buffer[i] = ebcdic2ascii_tab[buffer[i]];
1995 }
1996 #endif