src/charset.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * VIM - Vi IMproved    by Bram Moolenaar
   4  *
   5  * Do ":help uganda"  in Vim to read copying and usage conditions.
   6  * Do ":help credits" in Vim to see a list of people who contributed.
   7  * See README.txt for an overview of the Vim source code.
   8  */
   9
  10 #include "vim.h"
  11
  12 #ifdef FEAT_LINEBREAK
  13 static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
  14 #endif
  15
  16 #ifdef FEAT_MBYTE
  17 static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
  18 #endif
  19
  20 static int nr2hex __ARGS((int c));
  21
  22 static int    chartab_initialized = FALSE;
  23
  24 /* b_chartab[] is an array of 32 bytes, each bit representing one of the
  25  * characters 0-255. */
  26 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
  27 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
  28 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
  29
  30 /*
  31  * Fill chartab[].  Also fills curbuf->b_chartab[] with flags for keyword
  32  * characters for current buffer.
  33  *
  34  * Depends on the option settings 'iskeyword', 'isident', 'isfname',
  35  * 'isprint' and 'encoding'.
  36  *
  37  * The index in chartab[] depends on 'encoding':
  38  * - For non-multi-byte index with the byte (same as the character).
  39  * - For DBCS index with the first byte.
  40  * - For UTF-8 index with the character (when first byte is up to 0x80 it is
  41  *   the same as the character, if the first byte is 0x80 and above it depends
  42  *   on further bytes).
  43  *
  44  * The contents of chartab[]:
  45  * - The lower two bits, masked by CT_CELL_MASK, give the number of display
  46  *   cells the character occupies (1 or 2).  Not valid for UTF-8 above 0x80.
  47  * - CT_PRINT_CHAR bit is set when the character is printable (no need to
  48  *   translate the character before displaying it).  Note that only DBCS
  49  *   characters can have 2 display cells and still be printable.
  50  * - CT_FNAME_CHAR bit is set when the character can be in a file name.
  51  * - CT_ID_CHAR bit is set when the character can be in an identifier.
  52  *
  53  * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
  54  * error, OK otherwise.
  55  */
  56     int
  57 init_chartab()
  58 {
  59     return buf_init_chartab(curbuf, TRUE);
  60 }
  61
  62     int
  63 buf_init_chartab(buf, global)
  64     buf_T       *buf;
  65     int         global;         /* FALSE: only set buf->b_chartab[] */
  66 {
  67     int         c;
  68     int         c2;
  69     char_u      *p;
  70     int         i;
  71     int         tilde;
  72     int         do_isalpha;
  73
  74     if (global)
  75     {
  76         /*
  77          * Set the default size for printable characters:
  78          * From <Space> to '~' is 1 (printable), others are 2 (not printable).
  79          * This also inits all 'isident' and 'isfname' flags to FALSE.
  80          *
  81          * EBCDIC: all chars below ' ' are not printable, all others are
  82          * printable.
  83          */
  84         c = 0;
  85         while (c < ' ')
  86             chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
  87 #ifdef EBCDIC
  88         while (c < 255)
  89 #else
  90         while (c <= '~')
  91 #endif
  92             chartab[c++] = 1 + CT_PRINT_CHAR;
  93 #ifdef FEAT_FKMAP
  94         if (p_altkeymap)
  95         {
  96             while (c < YE)
  97                 chartab[c++] = 1 + CT_PRINT_CHAR;
  98         }
  99 #endif
 100         while (c < 256)
 101         {
 102 #ifdef FEAT_MBYTE
 103             /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
 104             if (enc_utf8 && c >= 0xa0)
 105                 chartab[c++] = CT_PRINT_CHAR + 1;
 106             /* euc-jp characters starting with 0x8e are single width */
 107             else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
 108                 chartab[c++] = CT_PRINT_CHAR + 1;
 109             /* other double-byte chars can be printable AND double-width */
 110             else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
 111                 chartab[c++] = CT_PRINT_CHAR + 2;
 112             else
 113 #endif
 114                 /* the rest is unprintable by default */
 115                 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
 116         }
 117
 118 #ifdef FEAT_MBYTE
 119         /* Assume that every multi-byte char is a filename character. */
 120         for (c = 1; c < 256; ++c)
 121             if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
 122                     || (enc_dbcs == DBCS_JPNU && c == 0x8e)
 123                     || (enc_utf8 && c >= 0xa0))
 124                 chartab[c] |= CT_FNAME_CHAR;
 125 #endif
 126     }
 127
 128     /*
 129      * Init word char flags all to FALSE
 130      */
 131     vim_memset(buf->b_chartab, 0, (size_t)32);
 132 #ifdef FEAT_MBYTE
 133     if (enc_dbcs != 0)
 134         for (c = 0; c < 256; ++c)
 135         {
 136             /* double-byte characters are probably word characters */
 137             if (MB_BYTE2LEN(c) == 2)
 138                 SET_CHARTAB(buf, c);
 139         }
 140 #endif
 141
 142 #ifdef FEAT_LISP
 143     /*
 144      * In lisp mode the '-' character is included in keywords.
 145      */
 146     if (buf->b_p_lisp)
 147         SET_CHARTAB(buf, '-');
 148 #endif
 149
 150     /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
 151      * options Each option is a list of characters, character numbers or
 152      * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
 153      */
 154     for (i = global ? 0 : 3; i <= 3; ++i)
 155     {
 156         if (i == 0)
 157             p = p_isi;          /* first round: 'isident' */
 158         else if (i == 1)
 159             p = p_isp;          /* second round: 'isprint' */
 160         else if (i == 2)
 161             p = p_isf;          /* third round: 'isfname' */
 162         else    /* i == 3 */
 163             p = buf->b_p_isk;   /* fourth round: 'iskeyword' */
 164
 165         while (*p)
 166         {
 167             tilde = FALSE;
 168             do_isalpha = FALSE;
 169             if (*p == '^' && p[1] != NUL)
 170             {
 171                 tilde = TRUE;
 172                 ++p;
 173             }
 174             if (VIM_ISDIGIT(*p))
 175                 c = getdigits(&p);
 176             else
 177                 c = *p++;
 178             c2 = -1;
 179             if (*p == '-' && p[1] != NUL)
 180             {
 181                 ++p;
 182                 if (VIM_ISDIGIT(*p))
 183                     c2 = getdigits(&p);
 184                 else
 185                     c2 = *p++;
 186             }
 187             if (c <= 0 || (c2 < c && c2 != -1) || c2 >= 256
 188                                                  || !(*p == NUL || *p == ','))
 189                 return FAIL;
 190
 191             if (c2 == -1)       /* not a range */
 192             {
 193                 /*
 194                  * A single '@' (not "@-@"):
 195                  * Decide on letters being ID/printable/keyword chars with
 196                  * standard function isalpha(). This takes care of locale for
 197                  * single-byte characters).
 198                  */
 199                 if (c == '@')
 200                 {
 201                     do_isalpha = TRUE;
 202                     c = 1;
 203                     c2 = 255;
 204                 }
 205                 else
 206                     c2 = c;
 207             }
 208             while (c <= c2)
 209             {
 210                 /* Use the MB_ functions here, because isalpha() doesn't
 211                  * work properly when 'encoding' is "latin1" and the locale is
 212                  * "C".  */
 213                 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
 214 #ifdef FEAT_FKMAP
 215                         || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
 216 #endif
 217                             )
 218                 {
 219                     if (i == 0)                 /* (re)set ID flag */
 220                     {
 221                         if (tilde)
 222                             chartab[c] &= ~CT_ID_CHAR;
 223                         else
 224                             chartab[c] |= CT_ID_CHAR;
 225                     }
 226                     else if (i == 1)            /* (re)set printable */
 227                     {
 228                         if ((c < ' '
 229 #ifndef EBCDIC
 230                                     || c > '~'
 231 #endif
 232 #ifdef FEAT_FKMAP
 233                                     || (p_altkeymap
 234                                         && (F_isalpha(c) || F_isdigit(c)))
 235 #endif
 236                             )
 237 #ifdef FEAT_MBYTE
 238                                 /* For double-byte we keep the cell width, so
 239                                  * that we can detect it from the first byte. */
 240                                 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
 241 #endif
 242                            )
 243                         {
 244                             if (tilde)
 245                             {
 246                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
 247                                              + ((dy_flags & DY_UHEX) ? 4 : 2);
 248                                 chartab[c] &= ~CT_PRINT_CHAR;
 249                             }
 250                             else
 251                             {
 252                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
 253                                 chartab[c] |= CT_PRINT_CHAR;
 254                             }
 255                         }
 256                     }
 257                     else if (i == 2)            /* (re)set fname flag */
 258                     {
 259                         if (tilde)
 260                             chartab[c] &= ~CT_FNAME_CHAR;
 261                         else
 262                             chartab[c] |= CT_FNAME_CHAR;
 263                     }
 264                     else /* i == 3 */           /* (re)set keyword flag */
 265                     {
 266                         if (tilde)
 267                             RESET_CHARTAB(buf, c);
 268                         else
 269                             SET_CHARTAB(buf, c);
 270                     }
 271                 }
 272                 ++c;
 273             }
 274             p = skip_to_option_part(p);
 275         }
 276     }
 277     chartab_initialized = TRUE;
 278     return OK;
 279 }
 280
 281 /*
 282  * Translate any special characters in buf[bufsize] in-place.
 283  * The result is a string with only printable characters, but if there is not
 284  * enough room, not all characters will be translated.
 285  */
 286     void
 287 trans_characters(buf, bufsize)
 288     char_u      *buf;
 289     int         bufsize;
 290 {
 291     int         len;            /* length of string needing translation */
 292     int         room;           /* room in buffer after string */
 293     char_u      *trs;           /* translated character */
 294     int         trs_len;        /* length of trs[] */
 295
 296     len = (int)STRLEN(buf);
 297     room = bufsize - len;
 298     while (*buf != 0)
 299     {
 300 # ifdef FEAT_MBYTE
 301         /* Assume a multi-byte character doesn't need translation. */
 302         if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
 303             len -= trs_len;
 304         else
 305 # endif
 306         {
 307             trs = transchar_byte(*buf);
 308             trs_len = (int)STRLEN(trs);
 309             if (trs_len > 1)
 310             {
 311                 room -= trs_len - 1;
 312                 if (room <= 0)
 313                     return;
 314                 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
 315             }
 316             mch_memmove(buf, trs, (size_t)trs_len);
 317             --len;
 318         }
 319         buf += trs_len;
 320     }
 321 }
 322
 323 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
 324         || defined(PROTO)
 325 /*
 326  * Translate a string into allocated memory, replacing special chars with
 327  * printable chars.  Returns NULL when out of memory.
 328  */
 329     char_u *
 330 transstr(s)
 331     char_u      *s;
 332 {
 333     char_u      *res;
 334     char_u      *p;
 335 #ifdef FEAT_MBYTE
 336     int         l, len, c;
 337     char_u      hexbuf[11];
 338 #endif
 339
 340 #ifdef FEAT_MBYTE
 341     if (has_mbyte)
 342     {
 343         /* Compute the length of the result, taking account of unprintable
 344          * multi-byte characters. */
 345         len = 0;
 346         p = s;
 347         while (*p != NUL)
 348         {
 349             if ((l = (*mb_ptr2len)(p)) > 1)
 350             {
 351                 c = (*mb_ptr2char)(p);
 352                 p += l;
 353                 if (vim_isprintc(c))
 354                     len += l;
 355                 else
 356                 {
 357                     transchar_hex(hexbuf, c);
 358                     len += (int)STRLEN(hexbuf);
 359                 }
 360             }
 361             else
 362             {
 363                 l = byte2cells(*p++);
 364                 if (l > 0)
 365                     len += l;
 366                 else
 367                     len += 4;   /* illegal byte sequence */
 368             }
 369         }
 370         res = alloc((unsigned)(len + 1));
 371     }
 372     else
 373 #endif
 374         res = alloc((unsigned)(vim_strsize(s) + 1));
 375     if (res != NULL)
 376     {
 377         *res = NUL;
 378         p = s;
 379         while (*p != NUL)
 380         {
 381 #ifdef FEAT_MBYTE
 382             if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
 383             {
 384                 c = (*mb_ptr2char)(p);
 385                 if (vim_isprintc(c))
 386                     STRNCAT(res, p, l); /* append printable multi-byte char */
 387                 else
 388                     transchar_hex(res + STRLEN(res), c);
 389                 p += l;
 390             }
 391             else
 392 #endif
 393                 STRCAT(res, transchar_byte(*p++));
 394         }
 395     }
 396     return res;
 397 }
 398 #endif
 399
 400 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
 401 /*
 402  * Convert the string "str[orglen]" to do ignore-case comparing.  Uses the
 403  * current locale.
 404  * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
 405  * Otherwise puts the result in "buf[buflen]".
 406  */
 407     char_u *
 408 str_foldcase(str, orglen, buf, buflen)
 409     char_u      *str;
 410     int         orglen;
 411     char_u      *buf;
 412     int         buflen;
 413 {
 414     garray_T    ga;
 415     int         i;
 416     int         len = orglen;
 417
 418 #define GA_CHAR(i)  ((char_u *)ga.ga_data)[i]
 419 #define GA_PTR(i)   ((char_u *)ga.ga_data + i)
 420 #define STR_CHAR(i)  (buf == NULL ? GA_CHAR(i) : buf[i])
 421 #define STR_PTR(i)   (buf == NULL ? GA_PTR(i) : buf + i)
 422
 423     /* Copy "str" into "buf" or allocated memory, unmodified. */
 424     if (buf == NULL)
 425     {
 426         ga_init2(&ga, 1, 10);
 427         if (ga_grow(&ga, len + 1) == FAIL)
 428             return NULL;
 429         mch_memmove(ga.ga_data, str, (size_t)len);
 430         ga.ga_len = len;
 431     }
 432     else
 433     {
 434         if (len >= buflen)          /* Ugly! */
 435             len = buflen - 1;
 436         mch_memmove(buf, str, (size_t)len);
 437     }
 438     if (buf == NULL)
 439         GA_CHAR(len) = NUL;
 440     else
 441         buf[len] = NUL;
 442
 443     /* Make each character lower case. */
 444     i = 0;
 445     while (STR_CHAR(i) != NUL)
 446     {
 447 #ifdef FEAT_MBYTE
 448         if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
 449         {
 450             if (enc_utf8)
 451             {
 452                 int     c, lc;
 453
 454                 c = utf_ptr2char(STR_PTR(i));
 455                 lc = utf_tolower(c);
 456                 if (c != lc)
 457                 {
 458                     int     ol = utf_char2len(c);
 459                     int     nl = utf_char2len(lc);
 460
 461                     /* If the byte length changes need to shift the following
 462                      * characters forward or backward. */
 463                     if (ol != nl)
 464                     {
 465                         if (nl > ol)
 466                         {
 467                             if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
 468                                                     : len + nl - ol >= buflen)
 469                             {
 470                                 /* out of memory, keep old char */
 471                                 lc = c;
 472                                 nl = ol;
 473                             }
 474                         }
 475                         if (ol != nl)
 476                         {
 477                             if (buf == NULL)
 478                             {
 479                                 mch_memmove(GA_PTR(i) + nl, GA_PTR(i) + ol,
 480                                                   STRLEN(GA_PTR(i) + ol) + 1);
 481                                 ga.ga_len += nl - ol;
 482                             }
 483                             else
 484                             {
 485                                 mch_memmove(buf + i + nl, buf + i + ol,
 486                                                     STRLEN(buf + i + ol) + 1);
 487                                 len += nl - ol;
 488                             }
 489                         }
 490                     }
 491                     (void)utf_char2bytes(lc, STR_PTR(i));
 492                 }
 493             }
 494             /* skip to next multi-byte char */
 495             i += (*mb_ptr2len)(STR_PTR(i));
 496         }
 497         else
 498 #endif
 499         {
 500             if (buf == NULL)
 501                 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
 502             else
 503                 buf[i] = TOLOWER_LOC(buf[i]);
 504             ++i;
 505         }
 506     }
 507
 508     if (buf == NULL)
 509         return (char_u *)ga.ga_data;
 510     return buf;
 511 }
 512 #endif
 513
 514 /*
 515  * Catch 22: chartab[] can't be initialized before the options are
 516  * initialized, and initializing options may cause transchar() to be called!
 517  * When chartab_initialized == FALSE don't use chartab[].
 518  * Does NOT work for multi-byte characters, c must be <= 255.
 519  * Also doesn't work for the first byte of a multi-byte, "c" must be a
 520  * character!
 521  */
 522 static char_u   transchar_buf[7];
 523
 524     char_u *
 525 transchar(c)
 526     int         c;
 527 {
 528     int                 i;
 529
 530     i = 0;
 531     if (IS_SPECIAL(c))      /* special key code, display as ~@ char */
 532     {
 533         transchar_buf[0] = '~';
 534         transchar_buf[1] = '@';
 535         i = 2;
 536         c = K_SECOND(c);
 537     }
 538
 539     if ((!chartab_initialized && (
 540 #ifdef EBCDIC
 541                     (c >= 64 && c < 255)
 542 #else
 543                     (c >= ' ' && c <= '~')
 544 #endif
 545 #ifdef FEAT_FKMAP
 546                         || F_ischar(c)
 547 #endif
 548                 )) || (c < 256 && vim_isprintc_strict(c)))
 549     {
 550         /* printable character */
 551         transchar_buf[i] = c;
 552         transchar_buf[i + 1] = NUL;
 553     }
 554     else
 555         transchar_nonprint(transchar_buf + i, c);
 556     return transchar_buf;
 557 }
 558
 559 #if defined(FEAT_MBYTE) || defined(PROTO)
 560 /*
 561  * Like transchar(), but called with a byte instead of a character.  Checks
 562  * for an illegal UTF-8 byte.
 563  */
 564     char_u *
 565 transchar_byte(c)
 566     int         c;
 567 {
 568     if (enc_utf8 && c >= 0x80)
 569     {
 570         transchar_nonprint(transchar_buf, c);
 571         return transchar_buf;
 572     }
 573     return transchar(c);
 574 }
 575 #endif
 576
 577 /*
 578  * Convert non-printable character to two or more printable characters in
 579  * "buf[]".  "buf" needs to be able to hold five bytes.
 580  * Does NOT work for multi-byte characters, c must be <= 255.
 581  */
 582     void
 583 transchar_nonprint(buf, c)
 584     char_u      *buf;
 585     int         c;
 586 {
 587     if (c == NL)
 588         c = NUL;                /* we use newline in place of a NUL */
 589     else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
 590         c = NL;                 /* we use CR in place of  NL in this case */
 591
 592     if (dy_flags & DY_UHEX)             /* 'display' has "uhex" */
 593         transchar_hex(buf, c);
 594
 595 #ifdef EBCDIC
 596     /* For EBCDIC only the characters 0-63 and 255 are not printable */
 597     else if (CtrlChar(c) != 0 || c == DEL)
 598 #else
 599     else if (c <= 0x7f)                         /* 0x00 - 0x1f and 0x7f */
 600 #endif
 601     {
 602         buf[0] = '^';
 603 #ifdef EBCDIC
 604         if (c == DEL)
 605             buf[1] = '?';               /* DEL displayed as ^? */
 606         else
 607             buf[1] = CtrlChar(c);
 608 #else
 609         buf[1] = c ^ 0x40;              /* DEL displayed as ^? */
 610 #endif
 611
 612         buf[2] = NUL;
 613     }
 614 #ifdef FEAT_MBYTE
 615     else if (enc_utf8 && c >= 0x80)
 616     {
 617         transchar_hex(buf, c);
 618     }
 619 #endif
 620 #ifndef EBCDIC
 621     else if (c >= ' ' + 0x80 && c <= '~' + 0x80)    /* 0xa0 - 0xfe */
 622     {
 623         buf[0] = '|';
 624         buf[1] = c - 0x80;
 625         buf[2] = NUL;
 626     }
 627 #else
 628     else if (c < 64)
 629     {
 630         buf[0] = '~';
 631         buf[1] = MetaChar(c);
 632         buf[2] = NUL;
 633     }
 634 #endif
 635     else                                            /* 0x80 - 0x9f and 0xff */
 636     {
 637         /*
 638          * TODO: EBCDIC I don't know what to do with this chars, so I display
 639          * them as '~?' for now
 640          */
 641         buf[0] = '~';
 642 #ifdef EBCDIC
 643         buf[1] = '?';                   /* 0xff displayed as ~? */
 644 #else
 645         buf[1] = (c - 0x80) ^ 0x40;     /* 0xff displayed as ~? */
 646 #endif
 647         buf[2] = NUL;
 648     }
 649 }
 650
 651     void
 652 transchar_hex(buf, c)
 653     char_u      *buf;
 654     int         c;
 655 {
 656     int         i = 0;
 657
 658     buf[0] = '<';
 659 #ifdef FEAT_MBYTE
 660     if (c > 255)
 661     {
 662         buf[++i] = nr2hex((unsigned)c >> 12);
 663         buf[++i] = nr2hex((unsigned)c >> 8);
 664     }
 665 #endif
 666     buf[++i] = nr2hex((unsigned)c >> 4);
 667     buf[++i] = nr2hex(c);
 668     buf[++i] = '>';
 669     buf[++i] = NUL;
 670 }
 671
 672 /*
 673  * Convert the lower 4 bits of byte "c" to its hex character.
 674  * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
 675  * function key 1.
 676  */
 677     static int
 678 nr2hex(c)
 679     int         c;
 680 {
 681     if ((c & 0xf) <= 9)
 682         return (c & 0xf) + '0';
 683     return (c & 0xf) - 10 + 'a';
 684 }
 685
 686 /*
 687  * Return number of display cells occupied by byte "b".
 688  * Caller must make sure 0 <= b <= 255.
 689  * For multi-byte mode "b" must be the first byte of a character.
 690  * A TAB is counted as two cells: "^I".
 691  * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
 692  * cells depends on further bytes.
 693  */
 694     int
 695 byte2cells(b)
 696     int         b;
 697 {
 698 #ifdef FEAT_MBYTE
 699     if (enc_utf8 && b >= 0x80)
 700         return 0;
 701 #endif
 702     return (chartab[b] & CT_CELL_MASK);
 703 }
 704
 705 /*
 706  * Return number of display cells occupied by character "c".
 707  * "c" can be a special key (negative number) in which case 3 or 4 is returned.
 708  * A TAB is counted as two cells: "^I" or four: "<09>".
 709  */
 710     int
 711 char2cells(c)
 712     int         c;
 713 {
 714     if (IS_SPECIAL(c))
 715         return char2cells(K_SECOND(c)) + 2;
 716 #ifdef FEAT_MBYTE
 717     if (c >= 0x80)
 718     {
 719         /* UTF-8: above 0x80 need to check the value */
 720         if (enc_utf8)
 721             return utf_char2cells(c);
 722         /* DBCS: double-byte means double-width, except for euc-jp with first
 723          * byte 0x8e */
 724         if (enc_dbcs != 0 && c >= 0x100)
 725         {
 726             if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
 727                 return 1;
 728             return 2;
 729         }
 730     }
 731 #endif
 732     return (chartab[c & 0xff] & CT_CELL_MASK);
 733 }
 734
 735 /*
 736  * Return number of display cells occupied by character at "*p".
 737  * A TAB is counted as two cells: "^I" or four: "<09>".
 738  */
 739     int
 740 ptr2cells(p)
 741     char_u      *p;
 742 {
 743 #ifdef FEAT_MBYTE
 744     /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
 745     if (enc_utf8 && *p >= 0x80)
 746         return utf_ptr2cells(p);
 747     /* For DBCS we can tell the cell count from the first byte. */
 748 #endif
 749     return (chartab[*p] & CT_CELL_MASK);
 750 }
 751
 752 /*
 753  * Return the number of characters string "s" will take on the screen,
 754  * counting TABs as two characters: "^I".
 755  */
 756     int
 757 vim_strsize(s)
 758     char_u      *s;
 759 {
 760     return vim_strnsize(s, (int)MAXCOL);
 761 }
 762
 763 /*
 764  * Return the number of characters string "s[len]" will take on the screen,
 765  * counting TABs as two characters: "^I".
 766  */
 767     int
 768 vim_strnsize(s, len)
 769     char_u      *s;
 770     int         len;
 771 {
 772     int         size = 0;
 773
 774     while (*s != NUL && --len >= 0)
 775     {
 776 #ifdef FEAT_MBYTE
 777         if (has_mbyte)
 778         {
 779             int     l = (*mb_ptr2len)(s);
 780
 781             size += ptr2cells(s);
 782             s += l;
 783             len -= l - 1;
 784         }
 785         else
 786 #endif
 787             size += byte2cells(*s++);
 788     }
 789     return size;
 790 }
 791
 792 /*
 793  * Return the number of characters 'c' will take on the screen, taking
 794  * into account the size of a tab.
 795  * Use a define to make it fast, this is used very often!!!
 796  * Also see getvcol() below.
 797  */
 798
 799 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
 800     if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
 801     { \
 802         int ts; \
 803         ts = (buf)->b_p_ts; \
 804         return (int)(ts - (col % ts)); \
 805     } \
 806     else \
 807         return ptr2cells(p);
 808
 809 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
 810         || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
 811     int
 812 chartabsize(p, col)
 813     char_u      *p;
 814     colnr_T     col;
 815 {
 816     RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
 817 }
 818 #endif
 819
 820 #ifdef FEAT_LINEBREAK
 821     static int
 822 win_chartabsize(wp, p, col)
 823     win_T       *wp;
 824     char_u      *p;
 825     colnr_T     col;
 826 {
 827     RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
 828 }
 829 #endif
 830
 831 /*
 832  * return the number of characters the string 's' will take on the screen,
 833  * taking into account the size of a tab
 834  */
 835     int
 836 linetabsize(s)
 837     char_u      *s;
 838 {
 839     colnr_T     col = 0;
 840
 841     while (*s != NUL)
 842         col += lbr_chartabsize_adv(&s, col);
 843     return (int)col;
 844 }
 845
 846 /*
 847  * Like linetabsize(), but for a given window instead of the current one.
 848  */
 849     int
 850 win_linetabsize(wp, p, len)
 851     win_T       *wp;
 852     char_u      *p;
 853     colnr_T     len;
 854 {
 855     colnr_T     col = 0;
 856     char_u      *s;
 857
 858     for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
 859         col += win_lbr_chartabsize(wp, s, col, NULL);
 860     return (int)col;
 861 }
 862
 863 /*
 864  * Return TRUE if 'c' is a normal identifier character:
 865  * Letters and characters from the 'isident' option.
 866  */
 867     int
 868 vim_isIDc(c)
 869     int c;
 870 {
 871     return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
 872 }
 873
 874 /*
 875  * return TRUE if 'c' is a keyword character: Letters and characters from
 876  * 'iskeyword' option for current buffer.
 877  * For multi-byte characters mb_get_class() is used (builtin rules).
 878  */
 879     int
 880 vim_iswordc(c)
 881     int c;
 882 {
 883 #ifdef FEAT_MBYTE
 884     if (c >= 0x100)
 885     {
 886         if (enc_dbcs != 0)
 887             return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
 888         if (enc_utf8)
 889             return utf_class(c) >= 2;
 890     }
 891 #endif
 892     return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
 893 }
 894
 895 /*
 896  * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
 897  */
 898     int
 899 vim_iswordp(p)
 900     char_u *p;
 901 {
 902 #ifdef FEAT_MBYTE
 903     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 904         return mb_get_class(p) >= 2;
 905 #endif
 906     return GET_CHARTAB(curbuf, *p) != 0;
 907 }
 908
 909 #if defined(FEAT_SYN_HL) || defined(PROTO)
 910     int
 911 vim_iswordc_buf(p, buf)
 912     char_u      *p;
 913     buf_T       *buf;
 914 {
 915 # ifdef FEAT_MBYTE
 916     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 917         return mb_get_class(p) >= 2;
 918 # endif
 919     return (GET_CHARTAB(buf, *p) != 0);
 920 }
 921 #endif
 922
 923 /*
 924  * return TRUE if 'c' is a valid file-name character
 925  * Assume characters above 0x100 are valid (multi-byte).
 926  */
 927     int
 928 vim_isfilec(c)
 929     int c;
 930 {
 931     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
 932 }
 933
 934 /*
 935  * return TRUE if 'c' is a valid file-name character or a wildcard character
 936  * Assume characters above 0x100 are valid (multi-byte).
 937  * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
 938  * returns false.
 939  */
 940     int
 941 vim_isfilec_or_wc(c)
 942     int c;
 943 {
 944     char_u buf[2];
 945
 946     buf[0] = (char_u)c;
 947     buf[1] = NUL;
 948     return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
 949 }
 950
 951 /*
 952  * return TRUE if 'c' is a printable character
 953  * Assume characters above 0x100 are printable (multi-byte), except for
 954  * Unicode.
 955  */
 956     int
 957 vim_isprintc(c)
 958     int c;
 959 {
 960 #ifdef FEAT_MBYTE
 961     if (enc_utf8 && c >= 0x100)
 962         return utf_printable(c);
 963 #endif
 964     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 965 }
 966
 967 /*
 968  * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
 969  * byte of a double-byte character.
 970  */
 971     int
 972 vim_isprintc_strict(c)
 973     int c;
 974 {
 975 #ifdef FEAT_MBYTE
 976     if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
 977         return FALSE;
 978     if (enc_utf8 && c >= 0x100)
 979         return utf_printable(c);
 980 #endif
 981     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 982 }
 983
 984 /*
 985  * like chartabsize(), but also check for line breaks on the screen
 986  */
 987     int
 988 lbr_chartabsize(s, col)
 989     unsigned char       *s;
 990     colnr_T             col;
 991 {
 992 #ifdef FEAT_LINEBREAK
 993     if (!curwin->w_p_lbr && *p_sbr == NUL)
 994     {
 995 #endif
 996 #ifdef FEAT_MBYTE
 997         if (curwin->w_p_wrap)
 998             return win_nolbr_chartabsize(curwin, s, col, NULL);
 999 #endif
1000         RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1001 #ifdef FEAT_LINEBREAK
1002     }
1003     return win_lbr_chartabsize(curwin, s, col, NULL);
1004 #endif
1005 }
1006
1007 /*
1008  * Call lbr_chartabsize() and advance the pointer.
1009  */
1010     int
1011 lbr_chartabsize_adv(s, col)
1012     char_u      **s;
1013     colnr_T     col;
1014 {
1015     int         retval;
1016
1017     retval = lbr_chartabsize(*s, col);
1018     mb_ptr_adv(*s);
1019     return retval;
1020 }
1021
1022 /*
1023  * This function is used very often, keep it fast!!!!
1024  *
1025  * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1026  * string at start of line.  Warning: *headp is only set if it's a non-zero
1027  * value, init to 0 before calling.
1028  */
1029 /*ARGSUSED*/
1030     int
1031 win_lbr_chartabsize(wp, s, col, headp)
1032     win_T       *wp;
1033     char_u      *s;
1034     colnr_T     col;
1035     int         *headp;
1036 {
1037 #ifdef FEAT_LINEBREAK
1038     int         c;
1039     int         size;
1040     colnr_T     col2;
1041     colnr_T     colmax;
1042     int         added;
1043 # ifdef FEAT_MBYTE
1044     int         mb_added = 0;
1045 # else
1046 #  define mb_added 0
1047 # endif
1048     int         numberextra;
1049     char_u      *ps;
1050     int         tab_corr = (*s == TAB);
1051     int         n;
1052
1053     /*
1054      * No 'linebreak' and 'showbreak': return quickly.
1055      */
1056     if (!wp->w_p_lbr && *p_sbr == NUL)
1057 #endif
1058     {
1059 #ifdef FEAT_MBYTE
1060         if (wp->w_p_wrap)
1061             return win_nolbr_chartabsize(wp, s, col, headp);
1062 #endif
1063         RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1064     }
1065
1066 #ifdef FEAT_LINEBREAK
1067     /*
1068      * First get normal size, without 'linebreak'
1069      */
1070     size = win_chartabsize(wp, s, col);
1071     c = *s;
1072
1073     /*
1074      * If 'linebreak' set check at a blank before a non-blank if the line
1075      * needs a break here
1076      */
1077     if (wp->w_p_lbr
1078             && vim_isbreak(c)
1079             && !vim_isbreak(s[1])
1080             && !wp->w_p_list
1081             && wp->w_p_wrap
1082 # ifdef FEAT_VERTSPLIT
1083             && wp->w_width != 0
1084 # endif
1085        )
1086     {
1087         /*
1088          * Count all characters from first non-blank after a blank up to next
1089          * non-blank after a blank.
1090          */
1091         numberextra = win_col_off(wp);
1092         col2 = col;
1093         colmax = W_WIDTH(wp) - numberextra;
1094         if (col >= colmax)
1095         {
1096             n = colmax + win_col_off2(wp);
1097             if (n > 0)
1098                 colmax += (((col - colmax) / n) + 1) * n;
1099         }
1100
1101         for (;;)
1102         {
1103             ps = s;
1104             mb_ptr_adv(s);
1105             c = *s;
1106             if (!(c != NUL
1107                     && (vim_isbreak(c)
1108                         || (!vim_isbreak(c)
1109                             && (col2 == col || !vim_isbreak(*ps))))))
1110                 break;
1111
1112             col2 += win_chartabsize(wp, s, col2);
1113             if (col2 >= colmax)         /* doesn't fit */
1114             {
1115                 size = colmax - col;
1116                 tab_corr = FALSE;
1117                 break;
1118             }
1119         }
1120     }
1121 # ifdef FEAT_MBYTE
1122     else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1123                                     && wp->w_p_wrap && in_win_border(wp, col))
1124     {
1125         ++size;         /* Count the ">" in the last column. */
1126         mb_added = 1;
1127     }
1128 # endif
1129
1130     /*
1131      * May have to add something for 'showbreak' string at start of line
1132      * Set *headp to the size of what we add.
1133      */
1134     added = 0;
1135     if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1136     {
1137         numberextra = win_col_off(wp);
1138         col += numberextra + mb_added;
1139         if (col >= (colnr_T)W_WIDTH(wp))
1140         {
1141             col -= W_WIDTH(wp);
1142             numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1143             if (numberextra > 0)
1144                 col = col % numberextra;
1145         }
1146         if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1147         {
1148             added = vim_strsize(p_sbr);
1149             if (tab_corr)
1150                 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1151             else
1152                 size += added;
1153             if (col != 0)
1154                 added = 0;
1155         }
1156     }
1157     if (headp != NULL)
1158         *headp = added + mb_added;
1159     return size;
1160 #endif
1161 }
1162
1163 #if defined(FEAT_MBYTE) || defined(PROTO)
1164 /*
1165  * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1166  * 'wrap' is on.  This means we need to check for a double-byte character that
1167  * doesn't fit at the end of the screen line.
1168  */
1169     static int
1170 win_nolbr_chartabsize(wp, s, col, headp)
1171     win_T       *wp;
1172     char_u      *s;
1173     colnr_T     col;
1174     int         *headp;
1175 {
1176     int         n;
1177
1178     if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1179     {
1180         n = wp->w_buffer->b_p_ts;
1181         return (int)(n - (col % n));
1182     }
1183     n = ptr2cells(s);
1184     /* Add one cell for a double-width character in the last column of the
1185      * window, displayed with a ">". */
1186     if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1187     {
1188         if (headp != NULL)
1189             *headp = 1;
1190         return 3;
1191     }
1192     return n;
1193 }
1194
1195 /*
1196  * Return TRUE if virtual column "vcol" is in the rightmost column of window
1197  * "wp".
1198  */
1199     int
1200 in_win_border(wp, vcol)
1201     win_T       *wp;
1202     colnr_T     vcol;
1203 {
1204     colnr_T     width1;         /* width of first line (after line number) */
1205     colnr_T     width2;         /* width of further lines */
1206
1207 #ifdef FEAT_VERTSPLIT
1208     if (wp->w_width == 0)       /* there is no border */
1209         return FALSE;
1210 #endif
1211     width1 = W_WIDTH(wp) - win_col_off(wp);
1212     if (vcol < width1 - 1)
1213         return FALSE;
1214     if (vcol == width1 - 1)
1215         return TRUE;
1216     width2 = width1 + win_col_off2(wp);
1217     return ((vcol - width1) % width2 == width2 - 1);
1218 }
1219 #endif /* FEAT_MBYTE */
1220
1221 /*
1222  * Get virtual column number of pos.
1223  *  start: on the first position of this character (TAB, ctrl)
1224  * cursor: where the cursor is on this character (first char, except for TAB)
1225  *    end: on the last position of this character (TAB, ctrl)
1226  *
1227  * This is used very often, keep it fast!
1228  */
1229     void
1230 getvcol(wp, pos, start, cursor, end)
1231     win_T       *wp;
1232     pos_T       *pos;
1233     colnr_T     *start;
1234     colnr_T     *cursor;
1235     colnr_T     *end;
1236 {
1237     colnr_T     vcol;
1238     char_u      *ptr;           /* points to current char */
1239     char_u      *posptr;        /* points to char at pos->col */
1240     int         incr;
1241     int         head;
1242     int         ts = wp->w_buffer->b_p_ts;
1243     int         c;
1244
1245     vcol = 0;
1246     ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1247     posptr = ptr + pos->col;
1248
1249     /*
1250      * This function is used very often, do some speed optimizations.
1251      * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1252      * Also use this when 'list' is set but tabs take their normal size.
1253      */
1254     if ((!wp->w_p_list || lcs_tab1 != NUL)
1255 #ifdef FEAT_LINEBREAK
1256             && !wp->w_p_lbr && *p_sbr == NUL
1257 #endif
1258        )
1259     {
1260 #ifndef FEAT_MBYTE
1261         head = 0;
1262 #endif
1263         for (;;)
1264         {
1265 #ifdef FEAT_MBYTE
1266             head = 0;
1267 #endif
1268             c = *ptr;
1269             /* make sure we don't go past the end of the line */
1270             if (c == NUL)
1271             {
1272                 incr = 1;       /* NUL at end of line only takes one column */
1273                 break;
1274             }
1275             /* A tab gets expanded, depending on the current column */
1276             if (c == TAB)
1277                 incr = ts - (vcol % ts);
1278             else
1279             {
1280 #ifdef FEAT_MBYTE
1281                 if (has_mbyte)
1282                 {
1283                     /* For utf-8, if the byte is >= 0x80, need to look at
1284                      * further bytes to find the cell width. */
1285                     if (enc_utf8 && c >= 0x80)
1286                         incr = utf_ptr2cells(ptr);
1287                     else
1288                         incr = CHARSIZE(c);
1289
1290                     /* If a double-cell char doesn't fit at the end of a line
1291                      * it wraps to the next line, it's like this char is three
1292                      * cells wide. */
1293                     if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1294                             && in_win_border(wp, vcol))
1295                     {
1296                         ++incr;
1297                         head = 1;
1298                     }
1299                 }
1300                 else
1301 #endif
1302                     incr = CHARSIZE(c);
1303             }
1304
1305             if (ptr >= posptr)  /* character at pos->col */
1306                 break;
1307
1308             vcol += incr;
1309             mb_ptr_adv(ptr);
1310         }
1311     }
1312     else
1313     {
1314         for (;;)
1315         {
1316             /* A tab gets expanded, depending on the current column */
1317             head = 0;
1318             incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1319             /* make sure we don't go past the end of the line */
1320             if (*ptr == NUL)
1321             {
1322                 incr = 1;       /* NUL at end of line only takes one column */
1323                 break;
1324             }
1325
1326             if (ptr >= posptr)  /* character at pos->col */
1327                 break;
1328
1329             vcol += incr;
1330             mb_ptr_adv(ptr);
1331         }
1332     }
1333     if (start != NULL)
1334         *start = vcol + head;
1335     if (end != NULL)
1336         *end = vcol + incr - 1;
1337     if (cursor != NULL)
1338     {
1339         if (*ptr == TAB
1340                 && (State & NORMAL)
1341                 && !wp->w_p_list
1342                 && !virtual_active()
1343 #ifdef FEAT_VISUAL
1344                 && !(VIsual_active
1345                                    && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1346 #endif
1347                 )
1348             *cursor = vcol + incr - 1;      /* cursor at end */
1349         else
1350             *cursor = vcol + head;          /* cursor at start */
1351     }
1352 }
1353
1354 /*
1355  * Get virtual cursor column in the current window, pretending 'list' is off.
1356  */
1357     colnr_T
1358 getvcol_nolist(posp)
1359     pos_T       *posp;
1360 {
1361     int         list_save = curwin->w_p_list;
1362     colnr_T     vcol;
1363
1364     curwin->w_p_list = FALSE;
1365     getvcol(curwin, posp, NULL, &vcol, NULL);
1366     curwin->w_p_list = list_save;
1367     return vcol;
1368 }
1369
1370 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1371 /*
1372  * Get virtual column in virtual mode.
1373  */
1374     void
1375 getvvcol(wp, pos, start, cursor, end)
1376     win_T       *wp;
1377     pos_T       *pos;
1378     colnr_T     *start;
1379     colnr_T     *cursor;
1380     colnr_T     *end;
1381 {
1382     colnr_T     col;
1383     colnr_T     coladd;
1384     colnr_T     endadd;
1385 # ifdef FEAT_MBYTE
1386     char_u      *ptr;
1387 # endif
1388
1389     if (virtual_active())
1390     {
1391         /* For virtual mode, only want one value */
1392         getvcol(wp, pos, &col, NULL, NULL);
1393
1394         coladd = pos->coladd;
1395         endadd = 0;
1396 # ifdef FEAT_MBYTE
1397         /* Cannot put the cursor on part of a wide character. */
1398         ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1399         if (pos->col < STRLEN(ptr))
1400         {
1401             int c = (*mb_ptr2char)(ptr + pos->col);
1402
1403             if (c != TAB && vim_isprintc(c))
1404             {
1405                 endadd = char2cells(c) - 1;
1406                 if (coladd > endadd)    /* past end of line */
1407                     endadd = 0;
1408                 else
1409                     coladd = 0;
1410             }
1411         }
1412 # endif
1413         col += coladd;
1414         if (start != NULL)
1415             *start = col;
1416         if (cursor != NULL)
1417             *cursor = col;
1418         if (end != NULL)
1419             *end = col + endadd;
1420     }
1421     else
1422         getvcol(wp, pos, start, cursor, end);
1423 }
1424 #endif
1425
1426 #if defined(FEAT_VISUAL) || defined(PROTO)
1427 /*
1428  * Get the leftmost and rightmost virtual column of pos1 and pos2.
1429  * Used for Visual block mode.
1430  */
1431     void
1432 getvcols(wp, pos1, pos2, left, right)
1433     win_T       *wp;
1434     pos_T       *pos1, *pos2;
1435     colnr_T     *left, *right;
1436 {
1437     colnr_T     from1, from2, to1, to2;
1438
1439     if (ltp(pos1, pos2))
1440     {
1441         getvvcol(wp, pos1, &from1, NULL, &to1);
1442         getvvcol(wp, pos2, &from2, NULL, &to2);
1443     }
1444     else
1445     {
1446         getvvcol(wp, pos2, &from1, NULL, &to1);
1447         getvvcol(wp, pos1, &from2, NULL, &to2);
1448     }
1449     if (from2 < from1)
1450         *left = from2;
1451     else
1452         *left = from1;
1453     if (to2 > to1)
1454     {
1455         if (*p_sel == 'e' && from2 - 1 >= to1)
1456             *right = from2 - 1;
1457         else
1458             *right = to2;
1459     }
1460     else
1461         *right = to1;
1462 }
1463 #endif
1464
1465 /*
1466  * skipwhite: skip over ' ' and '\t'.
1467  */
1468     char_u *
1469 skipwhite(p)
1470     char_u      *p;
1471 {
1472     while (vim_iswhite(*p)) /* skip to next non-white */
1473         ++p;
1474     return p;
1475 }
1476
1477 /*
1478  * skip over digits
1479  */
1480     char_u *
1481 skipdigits(p)
1482     char_u      *p;
1483 {
1484     while (VIM_ISDIGIT(*p))     /* skip to next non-digit */
1485         ++p;
1486     return p;
1487 }
1488
1489 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
1490 /*
1491  * skip over digits and hex characters
1492  */
1493     char_u *
1494 skiphex(p)
1495     char_u      *p;
1496 {
1497     while (vim_isxdigit(*p))    /* skip to next non-digit */
1498         ++p;
1499     return p;
1500 }
1501 #endif
1502
1503 #if defined(FEAT_EX_EXTRA) || defined(PROTO)
1504 /*
1505  * skip to digit (or NUL after the string)
1506  */
1507     char_u *
1508 skiptodigit(p)
1509     char_u      *p;
1510 {
1511     while (*p != NUL && !VIM_ISDIGIT(*p))       /* skip to next digit */
1512         ++p;
1513     return p;
1514 }
1515
1516 /*
1517  * skip to hex character (or NUL after the string)
1518  */
1519     char_u *
1520 skiptohex(p)
1521     char_u      *p;
1522 {
1523     while (*p != NUL && !vim_isxdigit(*p))      /* skip to next digit */
1524         ++p;
1525     return p;
1526 }
1527 #endif
1528
1529 /*
1530  * Variant of isdigit() that can handle characters > 0x100.
1531  * We don't use isdigit() here, because on some systems it also considers
1532  * superscript 1 to be a digit.
1533  * Use the VIM_ISDIGIT() macro for simple arguments.
1534  */
1535     int
1536 vim_isdigit(c)
1537     int         c;
1538 {
1539     return (c >= '0' && c <= '9');
1540 }
1541
1542 /*
1543  * Variant of isxdigit() that can handle characters > 0x100.
1544  * We don't use isxdigit() here, because on some systems it also considers
1545  * superscript 1 to be a digit.
1546  */
1547     int
1548 vim_isxdigit(c)
1549     int         c;
1550 {
1551     return (c >= '0' && c <= '9')
1552         || (c >= 'a' && c <= 'f')
1553         || (c >= 'A' && c <= 'F');
1554 }
1555
1556 #if defined(FEAT_MBYTE) || defined(PROTO)
1557 /*
1558  * Vim's own character class functions.  These exist because many library
1559  * islower()/toupper() etc. do not work properly: they crash when used with
1560  * invalid values or can't handle latin1 when the locale is C.
1561  * Speed is most important here.
1562  */
1563 #define LATIN1LOWER 'l'
1564 #define LATIN1UPPER 'U'
1565
1566 /*                                                                 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~                                  ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
1567 static char_u latin1flags[257] = "                                                                 UUUUUUUUUUUUUUUUUUUUUUUUUU      llllllllllllllllllllllllll                                                                     UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1568 static char_u latin1upper[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1569 static char_u latin1lower[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
1570
1571     int
1572 vim_islower(c)
1573     int     c;
1574 {
1575     if (c <= '@')
1576         return FALSE;
1577     if (c >= 0x80)
1578     {
1579         if (enc_utf8)
1580             return utf_islower(c);
1581         if (c >= 0x100)
1582         {
1583 #ifdef HAVE_ISWLOWER
1584             if (has_mbyte)
1585                 return iswlower(c);
1586 #endif
1587             /* islower() can't handle these chars and may crash */
1588             return FALSE;
1589         }
1590         if (enc_latin1like)
1591             return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1592     }
1593     return islower(c);
1594 }
1595
1596     int
1597 vim_isupper(c)
1598     int     c;
1599 {
1600     if (c <= '@')
1601         return FALSE;
1602     if (c >= 0x80)
1603     {
1604         if (enc_utf8)
1605             return utf_isupper(c);
1606         if (c >= 0x100)
1607         {
1608 #ifdef HAVE_ISWUPPER
1609             if (has_mbyte)
1610                 return iswupper(c);
1611 #endif
1612             /* islower() can't handle these chars and may crash */
1613             return FALSE;
1614         }
1615         if (enc_latin1like)
1616             return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1617     }
1618     return isupper(c);
1619 }
1620
1621     int
1622 vim_toupper(c)
1623     int     c;
1624 {
1625     if (c <= '@')
1626         return c;
1627     if (c >= 0x80)
1628     {
1629         if (enc_utf8)
1630             return utf_toupper(c);
1631         if (c >= 0x100)
1632         {
1633 #ifdef HAVE_TOWUPPER
1634             if (has_mbyte)
1635                 return towupper(c);
1636 #endif
1637             /* toupper() can't handle these chars and may crash */
1638             return c;
1639         }
1640         if (enc_latin1like)
1641             return latin1upper[c];
1642     }
1643     return TOUPPER_LOC(c);
1644 }
1645
1646     int
1647 vim_tolower(c)
1648     int     c;
1649 {
1650     if (c <= '@')
1651         return c;
1652     if (c >= 0x80)
1653     {
1654         if (enc_utf8)
1655             return utf_tolower(c);
1656         if (c >= 0x100)
1657         {
1658 #ifdef HAVE_TOWLOWER
1659             if (has_mbyte)
1660                 return towlower(c);
1661 #endif
1662             /* tolower() can't handle these chars and may crash */
1663             return c;
1664         }
1665         if (enc_latin1like)
1666             return latin1lower[c];
1667     }
1668     return TOLOWER_LOC(c);
1669 }
1670 #endif
1671
1672 /*
1673  * skiptowhite: skip over text until ' ' or '\t' or NUL.
1674  */
1675     char_u *
1676 skiptowhite(p)
1677     char_u      *p;
1678 {
1679     while (*p != ' ' && *p != '\t' && *p != NUL)
1680         ++p;
1681     return p;
1682 }
1683
1684 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1685         || defined(PROTO)
1686 /*
1687  * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1688  */
1689     char_u *
1690 skiptowhite_esc(p)
1691     char_u      *p;
1692 {
1693     while (*p != ' ' && *p != '\t' && *p != NUL)
1694     {
1695         if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1696             ++p;
1697         ++p;
1698     }
1699     return p;
1700 }
1701 #endif
1702
1703 /*
1704  * Getdigits: Get a number from a string and skip over it.
1705  * Note: the argument is a pointer to a char_u pointer!
1706  */
1707     long
1708 getdigits(pp)
1709     char_u **pp;
1710 {
1711     char_u      *p;
1712     long        retval;
1713
1714     p = *pp;
1715     retval = atol((char *)p);
1716     if (*p == '-')              /* skip negative sign */
1717         ++p;
1718     p = skipdigits(p);          /* skip to next non-digit */
1719     *pp = p;
1720     return retval;
1721 }
1722
1723 /*
1724  * Return TRUE if "lbuf" is empty or only contains blanks.
1725  */
1726     int
1727 vim_isblankline(lbuf)
1728     char_u      *lbuf;
1729 {
1730     char_u      *p;
1731
1732     p = skipwhite(lbuf);
1733     return (*p == NUL || *p == '\r' || *p == '\n');
1734 }
1735
1736 /*
1737  * Convert a string into a long and/or unsigned long, taking care of
1738  * hexadecimal and octal numbers.  Accepts a '-' sign.
1739  * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1740  *  0       decimal
1741  *  '0'     octal
1742  *  'X'     hex
1743  *  'x'     hex
1744  * If "len" is not NULL, the length of the number in characters is returned.
1745  * If "nptr" is not NULL, the signed result is returned in it.
1746  * If "unptr" is not NULL, the unsigned result is returned in it.
1747  * If "unptr" is not NULL, the unsigned result is returned in it.
1748  * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1749  * octal number.
1750  * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
1751  * hex number.
1752  */
1753     void
1754 vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1755     char_u              *start;
1756     int                 *hexp;      /* return: type of number 0 = decimal, 'x'
1757                                        or 'X' is hex, '0' = octal */
1758     int                 *len;       /* return: detected length of number */
1759     int                 dooct;      /* recognize octal number */
1760     int                 dohex;      /* recognize hex number */
1761     long                *nptr;      /* return: signed result */
1762     unsigned long       *unptr;     /* return: unsigned result */
1763 {
1764     char_u          *ptr = start;
1765     int             hex = 0;            /* default is decimal */
1766     int             negative = FALSE;
1767     unsigned long   un = 0;
1768     int             n;
1769
1770     if (ptr[0] == '-')
1771     {
1772         negative = TRUE;
1773         ++ptr;
1774     }
1775
1776     /* Recognize hex and octal. */
1777     if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
1778     {
1779         hex = ptr[1];
1780         if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1781             ptr += 2;                   /* hexadecimal */
1782         else
1783         {
1784             hex = 0;                    /* default is decimal */
1785             if (dooct)
1786             {
1787                 /* Don't interpret "0", "08" or "0129" as octal. */
1788                 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1789                 {
1790                     if (ptr[n] > '7')
1791                     {
1792                         hex = 0;        /* can't be octal */
1793                         break;
1794                     }
1795                     if (ptr[n] > '0')
1796                         hex = '0';      /* assume octal */
1797                 }
1798             }
1799         }
1800     }
1801
1802     /*
1803      * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1804      */
1805     if (hex == '0' || dooct > 1)
1806     {
1807         /* octal */
1808         while ('0' <= *ptr && *ptr <= '7')
1809         {
1810             un = 8 * un + (unsigned long)(*ptr - '0');
1811             ++ptr;
1812         }
1813     }
1814     else if (hex != 0 || dohex > 1)
1815     {
1816         /* hex */
1817         while (vim_isxdigit(*ptr))
1818         {
1819             un = 16 * un + (unsigned long)hex2nr(*ptr);
1820             ++ptr;
1821         }
1822     }
1823     else
1824     {
1825         /* decimal */
1826         while (VIM_ISDIGIT(*ptr))
1827         {
1828             un = 10 * un + (unsigned long)(*ptr - '0');
1829             ++ptr;
1830         }
1831     }
1832
1833     if (hexp != NULL)
1834         *hexp = hex;
1835     if (len != NULL)
1836         *len = (int)(ptr - start);
1837     if (nptr != NULL)
1838     {
1839         if (negative)   /* account for leading '-' for decimal numbers */
1840             *nptr = -(long)un;
1841         else
1842             *nptr = (long)un;
1843     }
1844     if (unptr != NULL)
1845         *unptr = un;
1846 }
1847
1848 /*
1849  * Return the value of a single hex character.
1850  * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1851  */
1852     int
1853 hex2nr(c)
1854     int         c;
1855 {
1856     if (c >= 'a' && c <= 'f')
1857         return c - 'a' + 10;
1858     if (c >= 'A' && c <= 'F')
1859         return c - 'A' + 10;
1860     return c - '0';
1861 }
1862
1863 #if defined(FEAT_TERMRESPONSE) \
1864         || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1865 /*
1866  * Convert two hex characters to a byte.
1867  * Return -1 if one of the characters is not hex.
1868  */
1869     int
1870 hexhex2nr(p)
1871     char_u      *p;
1872 {
1873     if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1874         return -1;
1875     return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1876 }
1877 #endif
1878
1879 /*
1880  * Return TRUE if "str" starts with a backslash that should be removed.
1881  * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1882  * backslash is not a normal file name character.
1883  * '$' is a valid file name character, we don't remove the backslash before
1884  * it.  This means it is not possible to use an environment variable after a
1885  * backslash.  "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1886  * Although "\ name" is valid, the backslash in "Program\ files" must be
1887  * removed.  Assume a file name doesn't start with a space.
1888  * For multi-byte names, never remove a backslash before a non-ascii
1889  * character, assume that all multi-byte characters are valid file name
1890  * characters.
1891  */
1892     int
1893 rem_backslash(str)
1894     char_u  *str;
1895 {
1896 #ifdef BACKSLASH_IN_FILENAME
1897     return (str[0] == '\\'
1898 # ifdef FEAT_MBYTE
1899             && str[1] < 0x80
1900 # endif
1901             && (str[1] == ' '
1902                 || (str[1] != NUL
1903                     && str[1] != '*'
1904                     && str[1] != '?'
1905                     && !vim_isfilec(str[1]))));
1906 #else
1907     return (str[0] == '\\' && str[1] != NUL);
1908 #endif
1909 }
1910
1911 /*
1912  * Halve the number of backslashes in a file name argument.
1913  * For MS-DOS we only do this if the character after the backslash
1914  * is not a normal file character.
1915  */
1916     void
1917 backslash_halve(p)
1918     char_u      *p;
1919 {
1920     for ( ; *p; ++p)
1921         if (rem_backslash(p))
1922             mch_memmove(p, p + 1, STRLEN(p));
1923 }
1924
1925 /*
1926  * backslash_halve() plus save the result in allocated memory.
1927  */
1928     char_u *
1929 backslash_halve_save(p)
1930     char_u      *p;
1931 {
1932     char_u      *res;
1933
1934     res = vim_strsave(p);
1935     if (res == NULL)
1936         return p;
1937     backslash_halve(res);
1938     return res;
1939 }
1940
1941 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1942 /*
1943  * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1944  * The first 64 entries have been added to map control characters defined in
1945  * ascii.h
1946  */
1947 static char_u ebcdic2ascii_tab[256] =
1948 {
1949     0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1950     0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1951     0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1952     0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1953     0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1954     0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1955     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1956     0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1957     0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1958     0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1959     0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1960     0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1961     0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1962     0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1963     0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1964     0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1965     0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1966     0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1967     0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1968     0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1969     0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1970     0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1971     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1972     0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1973     0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1974     0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1975     0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1976     0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1977     0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1978     0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1979     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1980     0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1981 };
1982
1983 /*
1984  * Convert a buffer worth of characters from EBCDIC to ASCII.  Only useful if
1985  * wanting 7-bit ASCII characters out the other end.
1986  */
1987     void
1988 ebcdic2ascii(buffer, len)
1989     char_u      *buffer;
1990     int         len;
1991 {
1992     int         i;
1993
1994     for (i = 0; i < len; i++)
1995         buffer[i] = ebcdic2ascii_tab[buffer[i]];
1996 }
1997 #endif