src/charset.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * VIM - Vi IMproved    by Bram Moolenaar
   4  *
   5  * Do ":help uganda"  in Vim to read copying and usage conditions.
   6  * Do ":help credits" in Vim to see a list of people who contributed.
   7  * See README.txt for an overview of the Vim source code.
   8  */
   9
  10 #include "vim.h"
  11
  12 #ifdef FEAT_LINEBREAK
  13 static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
  14 #endif
  15
  16 #ifdef FEAT_MBYTE
  17 static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
  18 #endif
  19
  20 static int nr2hex __ARGS((int c));
  21
  22 static int    chartab_initialized = FALSE;
  23
  24 /* b_chartab[] is an array of 32 bytes, each bit representing one of the
  25  * characters 0-255. */
  26 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
  27 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
  28 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
  29
  30 /*
  31  * Fill chartab[].  Also fills curbuf->b_chartab[] with flags for keyword
  32  * characters for current buffer.
  33  *
  34  * Depends on the option settings 'iskeyword', 'isident', 'isfname',
  35  * 'isprint' and 'encoding'.
  36  *
  37  * The index in chartab[] depends on 'encoding':
  38  * - For non-multi-byte index with the byte (same as the character).
  39  * - For DBCS index with the first byte.
  40  * - For UTF-8 index with the character (when first byte is up to 0x80 it is
  41  *   the same as the character, if the first byte is 0x80 and above it depends
  42  *   on further bytes).
  43  *
  44  * The contents of chartab[]:
  45  * - The lower two bits, masked by CT_CELL_MASK, give the number of display
  46  *   cells the character occupies (1 or 2).  Not valid for UTF-8 above 0x80.
  47  * - CT_PRINT_CHAR bit is set when the character is printable (no need to
  48  *   translate the character before displaying it).  Note that only DBCS
  49  *   characters can have 2 display cells and still be printable.
  50  * - CT_FNAME_CHAR bit is set when the character can be in a file name.
  51  * - CT_ID_CHAR bit is set when the character can be in an identifier.
  52  *
  53  * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
  54  * error, OK otherwise.
  55  */
  56     int
  57 init_chartab()
  58 {
  59     return buf_init_chartab(curbuf, TRUE);
  60 }
  61
  62     int
  63 buf_init_chartab(buf, global)
  64     buf_T       *buf;
  65     int         global;         /* FALSE: only set buf->b_chartab[] */
  66 {
  67     int         c;
  68     int         c2;
  69     char_u      *p;
  70     int         i;
  71     int         tilde;
  72     int         do_isalpha;
  73
  74     if (global)
  75     {
  76         /*
  77          * Set the default size for printable characters:
  78          * From <Space> to '~' is 1 (printable), others are 2 (not printable).
  79          * This also inits all 'isident' and 'isfname' flags to FALSE.
  80          *
  81          * EBCDIC: all chars below ' ' are not printable, all others are
  82          * printable.
  83          */
  84         c = 0;
  85         while (c < ' ')
  86             chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
  87 #ifdef EBCDIC
  88         while (c < 255)
  89 #else
  90         while (c <= '~')
  91 #endif
  92             chartab[c++] = 1 + CT_PRINT_CHAR;
  93 #ifdef FEAT_FKMAP
  94         if (p_altkeymap)
  95         {
  96             while (c < YE)
  97                 chartab[c++] = 1 + CT_PRINT_CHAR;
  98         }
  99 #endif
 100         while (c < 256)
 101         {
 102 #ifdef FEAT_MBYTE
 103             /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
 104             if (enc_utf8 && c >= 0xa0)
 105                 chartab[c++] = CT_PRINT_CHAR + 1;
 106             /* euc-jp characters starting with 0x8e are single width */
 107             else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
 108                 chartab[c++] = CT_PRINT_CHAR + 1;
 109             /* other double-byte chars can be printable AND double-width */
 110             else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
 111                 chartab[c++] = CT_PRINT_CHAR + 2;
 112             else
 113 #endif
 114                 /* the rest is unprintable by default */
 115                 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
 116         }
 117
 118 #ifdef FEAT_MBYTE
 119         /* Assume that every multi-byte char is a filename character. */
 120         for (c = 1; c < 256; ++c)
 121             if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
 122                     || (enc_dbcs == DBCS_JPNU && c == 0x8e)
 123                     || (enc_utf8 && c >= 0xa0))
 124                 chartab[c] |= CT_FNAME_CHAR;
 125 #endif
 126     }
 127
 128     /*
 129      * Init word char flags all to FALSE
 130      */
 131     vim_memset(buf->b_chartab, 0, (size_t)32);
 132 #ifdef FEAT_MBYTE
 133     if (enc_dbcs != 0)
 134         for (c = 0; c < 256; ++c)
 135         {
 136             /* double-byte characters are probably word characters */
 137             if (MB_BYTE2LEN(c) == 2)
 138                 SET_CHARTAB(buf, c);
 139         }
 140 #endif
 141
 142 #ifdef FEAT_LISP
 143     /*
 144      * In lisp mode the '-' character is included in keywords.
 145      */
 146     if (buf->b_p_lisp)
 147         SET_CHARTAB(buf, '-');
 148 #endif
 149
 150     /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
 151      * options Each option is a list of characters, character numbers or
 152      * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
 153      */
 154     for (i = global ? 0 : 3; i <= 3; ++i)
 155     {
 156         if (i == 0)
 157             p = p_isi;          /* first round: 'isident' */
 158         else if (i == 1)
 159             p = p_isp;          /* second round: 'isprint' */
 160         else if (i == 2)
 161             p = p_isf;          /* third round: 'isfname' */
 162         else    /* i == 3 */
 163             p = buf->b_p_isk;   /* fourth round: 'iskeyword' */
 164
 165         while (*p)
 166         {
 167             tilde = FALSE;
 168             do_isalpha = FALSE;
 169             if (*p == '^' && p[1] != NUL)
 170             {
 171                 tilde = TRUE;
 172                 ++p;
 173             }
 174             if (VIM_ISDIGIT(*p))
 175                 c = getdigits(&p);
 176             else
 177                 c = *p++;
 178             c2 = -1;
 179             if (*p == '-' && p[1] != NUL)
 180             {
 181                 ++p;
 182                 if (VIM_ISDIGIT(*p))
 183                     c2 = getdigits(&p);
 184                 else
 185                     c2 = *p++;
 186             }
 187             if (c <= 0 || (c2 < c && c2 != -1) || c2 >= 256
 188                                                  || !(*p == NUL || *p == ','))
 189                 return FAIL;
 190
 191             if (c2 == -1)       /* not a range */
 192             {
 193                 /*
 194                  * A single '@' (not "@-@"):
 195                  * Decide on letters being ID/printable/keyword chars with
 196                  * standard function isalpha(). This takes care of locale for
 197                  * single-byte characters).
 198                  */
 199                 if (c == '@')
 200                 {
 201                     do_isalpha = TRUE;
 202                     c = 1;
 203                     c2 = 255;
 204                 }
 205                 else
 206                     c2 = c;
 207             }
 208             while (c <= c2)
 209             {
 210                 if (!do_isalpha || isalpha(c)
 211 #ifdef FEAT_FKMAP
 212                         || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
 213 #endif
 214                             )
 215                 {
 216                     if (i == 0)                 /* (re)set ID flag */
 217                     {
 218                         if (tilde)
 219                             chartab[c] &= ~CT_ID_CHAR;
 220                         else
 221                             chartab[c] |= CT_ID_CHAR;
 222                     }
 223                     else if (i == 1)            /* (re)set printable */
 224                     {
 225                         if ((c < ' '
 226 #ifndef EBCDIC
 227                                     || c > '~'
 228 #endif
 229 #ifdef FEAT_FKMAP
 230                                     || (p_altkeymap
 231                                         && (F_isalpha(c) || F_isdigit(c)))
 232 #endif
 233                             )
 234 #ifdef FEAT_MBYTE
 235                                 /* For double-byte we keep the cell width, so
 236                                  * that we can detect it from the first byte. */
 237                                 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
 238 #endif
 239                            )
 240                         {
 241                             if (tilde)
 242                             {
 243                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
 244                                              + ((dy_flags & DY_UHEX) ? 4 : 2);
 245                                 chartab[c] &= ~CT_PRINT_CHAR;
 246                             }
 247                             else
 248                             {
 249                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
 250                                 chartab[c] |= CT_PRINT_CHAR;
 251                             }
 252                         }
 253                     }
 254                     else if (i == 2)            /* (re)set fname flag */
 255                     {
 256                         if (tilde)
 257                             chartab[c] &= ~CT_FNAME_CHAR;
 258                         else
 259                             chartab[c] |= CT_FNAME_CHAR;
 260                     }
 261                     else /* i == 3 */           /* (re)set keyword flag */
 262                     {
 263                         if (tilde)
 264                             RESET_CHARTAB(buf, c);
 265                         else
 266                             SET_CHARTAB(buf, c);
 267                     }
 268                 }
 269                 ++c;
 270             }
 271             p = skip_to_option_part(p);
 272         }
 273     }
 274     chartab_initialized = TRUE;
 275     return OK;
 276 }
 277
 278 /*
 279  * Translate any special characters in buf[bufsize] in-place.
 280  * The result is a string with only printable characters, but if there is not
 281  * enough room, not all characters will be translated.
 282  */
 283     void
 284 trans_characters(buf, bufsize)
 285     char_u      *buf;
 286     int         bufsize;
 287 {
 288     int         len;            /* length of string needing translation */
 289     int         room;           /* room in buffer after string */
 290     char_u      *trs;           /* translated character */
 291     int         trs_len;        /* length of trs[] */
 292
 293     len = (int)STRLEN(buf);
 294     room = bufsize - len;
 295     while (*buf != 0)
 296     {
 297 # ifdef FEAT_MBYTE
 298         /* Assume a multi-byte character doesn't need translation. */
 299         if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
 300             len -= trs_len;
 301         else
 302 # endif
 303         {
 304             trs = transchar_byte(*buf);
 305             trs_len = (int)STRLEN(trs);
 306             if (trs_len > 1)
 307             {
 308                 room -= trs_len - 1;
 309                 if (room <= 0)
 310                     return;
 311                 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
 312             }
 313             mch_memmove(buf, trs, (size_t)trs_len);
 314             --len;
 315         }
 316         buf += trs_len;
 317     }
 318 }
 319
 320 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
 321         || defined(PROTO)
 322 /*
 323  * Translate a string into allocated memory, replacing special chars with
 324  * printable chars.  Returns NULL when out of memory.
 325  */
 326     char_u *
 327 transstr(s)
 328     char_u      *s;
 329 {
 330     char_u      *res;
 331     char_u      *p;
 332 #ifdef FEAT_MBYTE
 333     int         l, len, c;
 334     char_u      hexbuf[11];
 335 #endif
 336
 337 #ifdef FEAT_MBYTE
 338     if (has_mbyte)
 339     {
 340         /* Compute the length of the result, taking account of unprintable
 341          * multi-byte characters. */
 342         len = 0;
 343         p = s;
 344         while (*p != NUL)
 345         {
 346             if ((l = (*mb_ptr2len)(p)) > 1)
 347             {
 348                 c = (*mb_ptr2char)(p);
 349                 p += l;
 350                 if (vim_isprintc(c))
 351                     len += l;
 352                 else
 353                 {
 354                     transchar_hex(hexbuf, c);
 355                     len += (int)STRLEN(hexbuf);
 356                 }
 357             }
 358             else
 359             {
 360                 l = byte2cells(*p++);
 361                 if (l > 0)
 362                     len += l;
 363                 else
 364                     len += 4;   /* illegal byte sequence */
 365             }
 366         }
 367         res = alloc((unsigned)(len + 1));
 368     }
 369     else
 370 #endif
 371         res = alloc((unsigned)(vim_strsize(s) + 1));
 372     if (res != NULL)
 373     {
 374         *res = NUL;
 375         p = s;
 376         while (*p != NUL)
 377         {
 378 #ifdef FEAT_MBYTE
 379             if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
 380             {
 381                 c = (*mb_ptr2char)(p);
 382                 if (vim_isprintc(c))
 383                     STRNCAT(res, p, l); /* append printable multi-byte char */
 384                 else
 385                     transchar_hex(res + STRLEN(res), c);
 386                 p += l;
 387             }
 388             else
 389 #endif
 390                 STRCAT(res, transchar_byte(*p++));
 391         }
 392     }
 393     return res;
 394 }
 395 #endif
 396
 397 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
 398 /*
 399  * Convert the string "str[orglen]" to do ignore-case comparing.  Uses the
 400  * current locale.
 401  * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
 402  * Otherwise puts the result in "buf[buflen]".
 403  */
 404     char_u *
 405 str_foldcase(str, orglen, buf, buflen)
 406     char_u      *str;
 407     int         orglen;
 408     char_u      *buf;
 409     int         buflen;
 410 {
 411     garray_T    ga;
 412     int         i;
 413     int         len = orglen;
 414
 415 #define GA_CHAR(i)  ((char_u *)ga.ga_data)[i]
 416 #define GA_PTR(i)   ((char_u *)ga.ga_data + i)
 417 #define STR_CHAR(i)  (buf == NULL ? GA_CHAR(i) : buf[i])
 418 #define STR_PTR(i)   (buf == NULL ? GA_PTR(i) : buf + i)
 419
 420     /* Copy "str" into "buf" or allocated memory, unmodified. */
 421     if (buf == NULL)
 422     {
 423         ga_init2(&ga, 1, 10);
 424         if (ga_grow(&ga, len + 1) == FAIL)
 425             return NULL;
 426         mch_memmove(ga.ga_data, str, (size_t)len);
 427         ga.ga_len = len;
 428     }
 429     else
 430     {
 431         if (len >= buflen)          /* Ugly! */
 432             len = buflen - 1;
 433         mch_memmove(buf, str, (size_t)len);
 434     }
 435     if (buf == NULL)
 436         GA_CHAR(len) = NUL;
 437     else
 438         buf[len] = NUL;
 439
 440     /* Make each character lower case. */
 441     i = 0;
 442     while (STR_CHAR(i) != NUL)
 443     {
 444 #ifdef FEAT_MBYTE
 445         if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
 446         {
 447             if (enc_utf8)
 448             {
 449                 int     c, lc;
 450
 451                 c = utf_ptr2char(STR_PTR(i));
 452                 lc = utf_tolower(c);
 453                 if (c != lc)
 454                 {
 455                     int     ol = utf_char2len(c);
 456                     int     nl = utf_char2len(lc);
 457
 458                     /* If the byte length changes need to shift the following
 459                      * characters forward or backward. */
 460                     if (ol != nl)
 461                     {
 462                         if (nl > ol)
 463                         {
 464                             if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
 465                                                     : len + nl - ol >= buflen)
 466                             {
 467                                 /* out of memory, keep old char */
 468                                 lc = c;
 469                                 nl = ol;
 470                             }
 471                         }
 472                         if (ol != nl)
 473                         {
 474                             if (buf == NULL)
 475                             {
 476                                 mch_memmove(GA_PTR(i) + nl, GA_PTR(i) + ol,
 477                                                   STRLEN(GA_PTR(i) + ol) + 1);
 478                                 ga.ga_len += nl - ol;
 479                             }
 480                             else
 481                             {
 482                                 mch_memmove(buf + i + nl, buf + i + ol,
 483                                                     STRLEN(buf + i + ol) + 1);
 484                                 len += nl - ol;
 485                             }
 486                         }
 487                     }
 488                     (void)utf_char2bytes(lc, STR_PTR(i));
 489                 }
 490             }
 491             /* skip to next multi-byte char */
 492             i += (*mb_ptr2len)(STR_PTR(i));
 493         }
 494         else
 495 #endif
 496         {
 497             if (buf == NULL)
 498                 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
 499             else
 500                 buf[i] = TOLOWER_LOC(buf[i]);
 501             ++i;
 502         }
 503     }
 504
 505     if (buf == NULL)
 506         return (char_u *)ga.ga_data;
 507     return buf;
 508 }
 509 #endif
 510
 511 /*
 512  * Catch 22: chartab[] can't be initialized before the options are
 513  * initialized, and initializing options may cause transchar() to be called!
 514  * When chartab_initialized == FALSE don't use chartab[].
 515  * Does NOT work for multi-byte characters, c must be <= 255.
 516  * Also doesn't work for the first byte of a multi-byte, "c" must be a
 517  * character!
 518  */
 519 static char_u   transchar_buf[7];
 520
 521     char_u *
 522 transchar(c)
 523     int         c;
 524 {
 525     int                 i;
 526
 527     i = 0;
 528     if (IS_SPECIAL(c))      /* special key code, display as ~@ char */
 529     {
 530         transchar_buf[0] = '~';
 531         transchar_buf[1] = '@';
 532         i = 2;
 533         c = K_SECOND(c);
 534     }
 535
 536     if ((!chartab_initialized && (
 537 #ifdef EBCDIC
 538                     (c >= 64 && c < 255)
 539 #else
 540                     (c >= ' ' && c <= '~')
 541 #endif
 542 #ifdef FEAT_FKMAP
 543                         || F_ischar(c)
 544 #endif
 545                 )) || (c < 256 && vim_isprintc_strict(c)))
 546     {
 547         /* printable character */
 548         transchar_buf[i] = c;
 549         transchar_buf[i + 1] = NUL;
 550     }
 551     else
 552         transchar_nonprint(transchar_buf + i, c);
 553     return transchar_buf;
 554 }
 555
 556 #if defined(FEAT_MBYTE) || defined(PROTO)
 557 /*
 558  * Like transchar(), but called with a byte instead of a character.  Checks
 559  * for an illegal UTF-8 byte.
 560  */
 561     char_u *
 562 transchar_byte(c)
 563     int         c;
 564 {
 565     if (enc_utf8 && c >= 0x80)
 566     {
 567         transchar_nonprint(transchar_buf, c);
 568         return transchar_buf;
 569     }
 570     return transchar(c);
 571 }
 572 #endif
 573
 574 /*
 575  * Convert non-printable character to two or more printable characters in
 576  * "buf[]".  "buf" needs to be able to hold five bytes.
 577  * Does NOT work for multi-byte characters, c must be <= 255.
 578  */
 579     void
 580 transchar_nonprint(buf, c)
 581     char_u      *buf;
 582     int         c;
 583 {
 584     if (c == NL)
 585         c = NUL;                /* we use newline in place of a NUL */
 586     else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
 587         c = NL;                 /* we use CR in place of  NL in this case */
 588
 589     if (dy_flags & DY_UHEX)             /* 'display' has "uhex" */
 590         transchar_hex(buf, c);
 591
 592 #ifdef EBCDIC
 593     /* For EBCDIC only the characters 0-63 and 255 are not printable */
 594     else if (CtrlChar(c) != 0 || c == DEL)
 595 #else
 596     else if (c <= 0x7f)                         /* 0x00 - 0x1f and 0x7f */
 597 #endif
 598     {
 599         buf[0] = '^';
 600 #ifdef EBCDIC
 601         if (c == DEL)
 602             buf[1] = '?';               /* DEL displayed as ^? */
 603         else
 604             buf[1] = CtrlChar(c);
 605 #else
 606         buf[1] = c ^ 0x40;              /* DEL displayed as ^? */
 607 #endif
 608
 609         buf[2] = NUL;
 610     }
 611 #ifdef FEAT_MBYTE
 612     else if (enc_utf8 && c >= 0x80)
 613     {
 614         transchar_hex(buf, c);
 615     }
 616 #endif
 617 #ifndef EBCDIC
 618     else if (c >= ' ' + 0x80 && c <= '~' + 0x80)    /* 0xa0 - 0xfe */
 619     {
 620         buf[0] = '|';
 621         buf[1] = c - 0x80;
 622         buf[2] = NUL;
 623     }
 624 #else
 625     else if (c < 64)
 626     {
 627         buf[0] = '~';
 628         buf[1] = MetaChar(c);
 629         buf[2] = NUL;
 630     }
 631 #endif
 632     else                                            /* 0x80 - 0x9f and 0xff */
 633     {
 634         /*
 635          * TODO: EBCDIC I don't know what to do with this chars, so I display
 636          * them as '~?' for now
 637          */
 638         buf[0] = '~';
 639 #ifdef EBCDIC
 640         buf[1] = '?';                   /* 0xff displayed as ~? */
 641 #else
 642         buf[1] = (c - 0x80) ^ 0x40;     /* 0xff displayed as ~? */
 643 #endif
 644         buf[2] = NUL;
 645     }
 646 }
 647
 648     void
 649 transchar_hex(buf, c)
 650     char_u      *buf;
 651     int         c;
 652 {
 653     int         i = 0;
 654
 655     buf[0] = '<';
 656 #ifdef FEAT_MBYTE
 657     if (c > 255)
 658     {
 659         buf[++i] = nr2hex((unsigned)c >> 12);
 660         buf[++i] = nr2hex((unsigned)c >> 8);
 661     }
 662 #endif
 663     buf[++i] = nr2hex((unsigned)c >> 4);
 664     buf[++i] = nr2hex(c);
 665     buf[++i] = '>';
 666     buf[++i] = NUL;
 667 }
 668
 669 /*
 670  * Convert the lower 4 bits of byte "c" to its hex character.
 671  * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
 672  * function key 1.
 673  */
 674     static int
 675 nr2hex(c)
 676     int         c;
 677 {
 678     if ((c & 0xf) <= 9)
 679         return (c & 0xf) + '0';
 680     return (c & 0xf) - 10 + 'a';
 681 }
 682
 683 /*
 684  * Return number of display cells occupied by byte "b".
 685  * Caller must make sure 0 <= b <= 255.
 686  * For multi-byte mode "b" must be the first byte of a character.
 687  * A TAB is counted as two cells: "^I".
 688  * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
 689  * cells depends on further bytes.
 690  */
 691     int
 692 byte2cells(b)
 693     int         b;
 694 {
 695 #ifdef FEAT_MBYTE
 696     if (enc_utf8 && b >= 0x80)
 697         return 0;
 698 #endif
 699     return (chartab[b] & CT_CELL_MASK);
 700 }
 701
 702 /*
 703  * Return number of display cells occupied by character "c".
 704  * "c" can be a special key (negative number) in which case 3 or 4 is returned.
 705  * A TAB is counted as two cells: "^I" or four: "<09>".
 706  */
 707     int
 708 char2cells(c)
 709     int         c;
 710 {
 711     if (IS_SPECIAL(c))
 712         return char2cells(K_SECOND(c)) + 2;
 713 #ifdef FEAT_MBYTE
 714     if (c >= 0x80)
 715     {
 716         /* UTF-8: above 0x80 need to check the value */
 717         if (enc_utf8)
 718             return utf_char2cells(c);
 719         /* DBCS: double-byte means double-width, except for euc-jp with first
 720          * byte 0x8e */
 721         if (enc_dbcs != 0 && c >= 0x100)
 722         {
 723             if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
 724                 return 1;
 725             return 2;
 726         }
 727     }
 728 #endif
 729     return (chartab[c & 0xff] & CT_CELL_MASK);
 730 }
 731
 732 /*
 733  * Return number of display cells occupied by character at "*p".
 734  * A TAB is counted as two cells: "^I" or four: "<09>".
 735  */
 736     int
 737 ptr2cells(p)
 738     char_u      *p;
 739 {
 740 #ifdef FEAT_MBYTE
 741     /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
 742     if (enc_utf8 && *p >= 0x80)
 743         return utf_ptr2cells(p);
 744     /* For DBCS we can tell the cell count from the first byte. */
 745 #endif
 746     return (chartab[*p] & CT_CELL_MASK);
 747 }
 748
 749 /*
 750  * Return the number of characters string "s" will take on the screen,
 751  * counting TABs as two characters: "^I".
 752  */
 753     int
 754 vim_strsize(s)
 755     char_u      *s;
 756 {
 757     return vim_strnsize(s, (int)MAXCOL);
 758 }
 759
 760 /*
 761  * Return the number of characters string "s[len]" will take on the screen,
 762  * counting TABs as two characters: "^I".
 763  */
 764     int
 765 vim_strnsize(s, len)
 766     char_u      *s;
 767     int         len;
 768 {
 769     int         size = 0;
 770
 771     while (*s != NUL && --len >= 0)
 772     {
 773 #ifdef FEAT_MBYTE
 774         if (has_mbyte)
 775         {
 776             int     l = (*mb_ptr2len)(s);
 777
 778             size += ptr2cells(s);
 779             s += l;
 780             len -= l - 1;
 781         }
 782         else
 783 #endif
 784             size += byte2cells(*s++);
 785     }
 786     return size;
 787 }
 788
 789 /*
 790  * Return the number of characters 'c' will take on the screen, taking
 791  * into account the size of a tab.
 792  * Use a define to make it fast, this is used very often!!!
 793  * Also see getvcol() below.
 794  */
 795
 796 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
 797     if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
 798     { \
 799         int ts; \
 800         ts = (buf)->b_p_ts; \
 801         return (int)(ts - (col % ts)); \
 802     } \
 803     else \
 804         return ptr2cells(p);
 805
 806 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
 807         || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
 808     int
 809 chartabsize(p, col)
 810     char_u      *p;
 811     colnr_T     col;
 812 {
 813     RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
 814 }
 815 #endif
 816
 817 #ifdef FEAT_LINEBREAK
 818     static int
 819 win_chartabsize(wp, p, col)
 820     win_T       *wp;
 821     char_u      *p;
 822     colnr_T     col;
 823 {
 824     RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
 825 }
 826 #endif
 827
 828 /*
 829  * return the number of characters the string 's' will take on the screen,
 830  * taking into account the size of a tab
 831  */
 832     int
 833 linetabsize(s)
 834     char_u      *s;
 835 {
 836     colnr_T     col = 0;
 837
 838     while (*s != NUL)
 839         col += lbr_chartabsize_adv(&s, col);
 840     return (int)col;
 841 }
 842
 843 /*
 844  * Like linetabsize(), but for a given window instead of the current one.
 845  */
 846     int
 847 win_linetabsize(wp, p, len)
 848     win_T       *wp;
 849     char_u      *p;
 850     colnr_T     len;
 851 {
 852     colnr_T     col = 0;
 853     char_u      *s;
 854
 855     for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
 856         col += win_lbr_chartabsize(wp, s, col, NULL);
 857     return (int)col;
 858 }
 859
 860 /*
 861  * Return TRUE if 'c' is a normal identifier character:
 862  * Letters and characters from the 'isident' option.
 863  */
 864     int
 865 vim_isIDc(c)
 866     int c;
 867 {
 868     return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
 869 }
 870
 871 /*
 872  * return TRUE if 'c' is a keyword character: Letters and characters from
 873  * 'iskeyword' option for current buffer.
 874  * For multi-byte characters mb_get_class() is used (builtin rules).
 875  */
 876     int
 877 vim_iswordc(c)
 878     int c;
 879 {
 880 #ifdef FEAT_MBYTE
 881     if (c >= 0x100)
 882     {
 883         if (enc_dbcs != 0)
 884             return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
 885         if (enc_utf8)
 886             return utf_class(c) >= 2;
 887     }
 888 #endif
 889     return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
 890 }
 891
 892 /*
 893  * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
 894  */
 895     int
 896 vim_iswordp(p)
 897     char_u *p;
 898 {
 899 #ifdef FEAT_MBYTE
 900     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 901         return mb_get_class(p) >= 2;
 902 #endif
 903     return GET_CHARTAB(curbuf, *p) != 0;
 904 }
 905
 906 #if defined(FEAT_SYN_HL) || defined(PROTO)
 907     int
 908 vim_iswordc_buf(p, buf)
 909     char_u      *p;
 910     buf_T       *buf;
 911 {
 912 # ifdef FEAT_MBYTE
 913     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 914         return mb_get_class(p) >= 2;
 915 # endif
 916     return (GET_CHARTAB(buf, *p) != 0);
 917 }
 918 #endif
 919
 920 /*
 921  * return TRUE if 'c' is a valid file-name character
 922  * Assume characters above 0x100 are valid (multi-byte).
 923  */
 924     int
 925 vim_isfilec(c)
 926     int c;
 927 {
 928     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
 929 }
 930
 931 /*
 932  * return TRUE if 'c' is a printable character
 933  * Assume characters above 0x100 are printable (multi-byte), except for
 934  * Unicode.
 935  */
 936     int
 937 vim_isprintc(c)
 938     int c;
 939 {
 940 #ifdef FEAT_MBYTE
 941     if (enc_utf8 && c >= 0x100)
 942         return utf_printable(c);
 943 #endif
 944     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 945 }
 946
 947 /*
 948  * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
 949  * byte of a double-byte character.
 950  */
 951     int
 952 vim_isprintc_strict(c)
 953     int c;
 954 {
 955 #ifdef FEAT_MBYTE
 956     if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
 957         return FALSE;
 958     if (enc_utf8 && c >= 0x100)
 959         return utf_printable(c);
 960 #endif
 961     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 962 }
 963
 964 /*
 965  * like chartabsize(), but also check for line breaks on the screen
 966  */
 967     int
 968 lbr_chartabsize(s, col)
 969     unsigned char       *s;
 970     colnr_T             col;
 971 {
 972 #ifdef FEAT_LINEBREAK
 973     if (!curwin->w_p_lbr && *p_sbr == NUL)
 974     {
 975 #endif
 976 #ifdef FEAT_MBYTE
 977         if (curwin->w_p_wrap)
 978             return win_nolbr_chartabsize(curwin, s, col, NULL);
 979 #endif
 980         RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
 981 #ifdef FEAT_LINEBREAK
 982     }
 983     return win_lbr_chartabsize(curwin, s, col, NULL);
 984 #endif
 985 }
 986
 987 /*
 988  * Call lbr_chartabsize() and advance the pointer.
 989  */
 990     int
 991 lbr_chartabsize_adv(s, col)
 992     char_u      **s;
 993     colnr_T     col;
 994 {
 995     int         retval;
 996
 997     retval = lbr_chartabsize(*s, col);
 998     mb_ptr_adv(*s);
 999     return retval;
1000 }
1001
1002 /*
1003  * This function is used very often, keep it fast!!!!
1004  *
1005  * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1006  * string at start of line.  Warning: *headp is only set if it's a non-zero
1007  * value, init to 0 before calling.
1008  */
1009 /*ARGSUSED*/
1010     int
1011 win_lbr_chartabsize(wp, s, col, headp)
1012     win_T       *wp;
1013     char_u      *s;
1014     colnr_T     col;
1015     int         *headp;
1016 {
1017 #ifdef FEAT_LINEBREAK
1018     int         c;
1019     int         size;
1020     colnr_T     col2;
1021     colnr_T     colmax;
1022     int         added;
1023 # ifdef FEAT_MBYTE
1024     int         mb_added = 0;
1025 # else
1026 #  define mb_added 0
1027 # endif
1028     int         numberextra;
1029     char_u      *ps;
1030     int         tab_corr = (*s == TAB);
1031     int         n;
1032
1033     /*
1034      * No 'linebreak' and 'showbreak': return quickly.
1035      */
1036     if (!wp->w_p_lbr && *p_sbr == NUL)
1037 #endif
1038     {
1039 #ifdef FEAT_MBYTE
1040         if (wp->w_p_wrap)
1041             return win_nolbr_chartabsize(wp, s, col, headp);
1042 #endif
1043         RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1044     }
1045
1046 #ifdef FEAT_LINEBREAK
1047     /*
1048      * First get normal size, without 'linebreak'
1049      */
1050     size = win_chartabsize(wp, s, col);
1051     c = *s;
1052
1053     /*
1054      * If 'linebreak' set check at a blank before a non-blank if the line
1055      * needs a break here
1056      */
1057     if (wp->w_p_lbr
1058             && vim_isbreak(c)
1059             && !vim_isbreak(s[1])
1060             && !wp->w_p_list
1061             && wp->w_p_wrap
1062 # ifdef FEAT_VERTSPLIT
1063             && wp->w_width != 0
1064 # endif
1065        )
1066     {
1067         /*
1068          * Count all characters from first non-blank after a blank up to next
1069          * non-blank after a blank.
1070          */
1071         numberextra = win_col_off(wp);
1072         col2 = col;
1073         colmax = W_WIDTH(wp) - numberextra;
1074         if (col >= colmax)
1075         {
1076             n = colmax + win_col_off2(wp);
1077             if (n > 0)
1078                 colmax += (((col - colmax) / n) + 1) * n;
1079         }
1080
1081         for (;;)
1082         {
1083             ps = s;
1084             mb_ptr_adv(s);
1085             c = *s;
1086             if (!(c != NUL
1087                     && (vim_isbreak(c)
1088                         || (!vim_isbreak(c)
1089                             && (col2 == col || !vim_isbreak(*ps))))))
1090                 break;
1091
1092             col2 += win_chartabsize(wp, s, col2);
1093             if (col2 >= colmax)         /* doesn't fit */
1094             {
1095                 size = colmax - col;
1096                 tab_corr = FALSE;
1097                 break;
1098             }
1099         }
1100     }
1101 # ifdef FEAT_MBYTE
1102     else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1103                                     && wp->w_p_wrap && in_win_border(wp, col))
1104     {
1105         ++size;         /* Count the ">" in the last column. */
1106         mb_added = 1;
1107     }
1108 # endif
1109
1110     /*
1111      * May have to add something for 'showbreak' string at start of line
1112      * Set *headp to the size of what we add.
1113      */
1114     added = 0;
1115     if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1116     {
1117         numberextra = win_col_off(wp);
1118         col += numberextra + mb_added;
1119         if (col >= (colnr_T)W_WIDTH(wp))
1120         {
1121             col -= W_WIDTH(wp);
1122             numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1123             if (numberextra > 0)
1124                 col = col % numberextra;
1125         }
1126         if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1127         {
1128             added = vim_strsize(p_sbr);
1129             if (tab_corr)
1130                 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1131             else
1132                 size += added;
1133             if (col != 0)
1134                 added = 0;
1135         }
1136     }
1137     if (headp != NULL)
1138         *headp = added + mb_added;
1139     return size;
1140 #endif
1141 }
1142
1143 #if defined(FEAT_MBYTE) || defined(PROTO)
1144 /*
1145  * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1146  * 'wrap' is on.  This means we need to check for a double-byte character that
1147  * doesn't fit at the end of the screen line.
1148  */
1149     static int
1150 win_nolbr_chartabsize(wp, s, col, headp)
1151     win_T       *wp;
1152     char_u      *s;
1153     colnr_T     col;
1154     int         *headp;
1155 {
1156     int         n;
1157
1158     if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1159     {
1160         n = wp->w_buffer->b_p_ts;
1161         return (int)(n - (col % n));
1162     }
1163     n = ptr2cells(s);
1164     /* Add one cell for a double-width character in the last column of the
1165      * window, displayed with a ">". */
1166     if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1167     {
1168         if (headp != NULL)
1169             *headp = 1;
1170         return 3;
1171     }
1172     return n;
1173 }
1174
1175 /*
1176  * Return TRUE if virtual column "vcol" is in the rightmost column of window
1177  * "wp".
1178  */
1179     int
1180 in_win_border(wp, vcol)
1181     win_T       *wp;
1182     colnr_T     vcol;
1183 {
1184     colnr_T     width1;         /* width of first line (after line number) */
1185     colnr_T     width2;         /* width of further lines */
1186
1187 #ifdef FEAT_VERTSPLIT
1188     if (wp->w_width == 0)       /* there is no border */
1189         return FALSE;
1190 #endif
1191     width1 = W_WIDTH(wp) - win_col_off(wp);
1192     if (vcol < width1 - 1)
1193         return FALSE;
1194     if (vcol == width1 - 1)
1195         return TRUE;
1196     width2 = width1 + win_col_off2(wp);
1197     return ((vcol - width1) % width2 == width2 - 1);
1198 }
1199 #endif /* FEAT_MBYTE */
1200
1201 /*
1202  * Get virtual column number of pos.
1203  *  start: on the first position of this character (TAB, ctrl)
1204  * cursor: where the cursor is on this character (first char, except for TAB)
1205  *    end: on the last position of this character (TAB, ctrl)
1206  *
1207  * This is used very often, keep it fast!
1208  */
1209     void
1210 getvcol(wp, pos, start, cursor, end)
1211     win_T       *wp;
1212     pos_T       *pos;
1213     colnr_T     *start;
1214     colnr_T     *cursor;
1215     colnr_T     *end;
1216 {
1217     colnr_T     vcol;
1218     char_u      *ptr;           /* points to current char */
1219     char_u      *posptr;        /* points to char at pos->col */
1220     int         incr;
1221     int         head;
1222     int         ts = wp->w_buffer->b_p_ts;
1223     int         c;
1224
1225     vcol = 0;
1226     ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1227     posptr = ptr + pos->col;
1228
1229     /*
1230      * This function is used very often, do some speed optimizations.
1231      * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1232      * Also use this when 'list' is set but tabs take their normal size.
1233      */
1234     if ((!wp->w_p_list || lcs_tab1 != NUL)
1235 #ifdef FEAT_LINEBREAK
1236             && !wp->w_p_lbr && *p_sbr == NUL
1237 #endif
1238        )
1239     {
1240 #ifndef FEAT_MBYTE
1241         head = 0;
1242 #endif
1243         for (;;)
1244         {
1245 #ifdef FEAT_MBYTE
1246             head = 0;
1247 #endif
1248             c = *ptr;
1249             /* make sure we don't go past the end of the line */
1250             if (c == NUL)
1251             {
1252                 incr = 1;       /* NUL at end of line only takes one column */
1253                 break;
1254             }
1255             /* A tab gets expanded, depending on the current column */
1256             if (c == TAB)
1257                 incr = ts - (vcol % ts);
1258             else
1259             {
1260 #ifdef FEAT_MBYTE
1261                 if (has_mbyte)
1262                 {
1263                     /* For utf-8, if the byte is >= 0x80, need to look at
1264                      * further bytes to find the cell width. */
1265                     if (enc_utf8 && c >= 0x80)
1266                         incr = utf_ptr2cells(ptr);
1267                     else
1268                         incr = CHARSIZE(c);
1269
1270                     /* If a double-cell char doesn't fit at the end of a line
1271                      * it wraps to the next line, it's like this char is three
1272                      * cells wide. */
1273                     if (incr == 2 && wp->w_p_wrap && in_win_border(wp, vcol))
1274                     {
1275                         ++incr;
1276                         head = 1;
1277                     }
1278                 }
1279                 else
1280 #endif
1281                     incr = CHARSIZE(c);
1282             }
1283
1284             if (ptr >= posptr)  /* character at pos->col */
1285                 break;
1286
1287             vcol += incr;
1288             mb_ptr_adv(ptr);
1289         }
1290     }
1291     else
1292     {
1293         for (;;)
1294         {
1295             /* A tab gets expanded, depending on the current column */
1296             head = 0;
1297             incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1298             /* make sure we don't go past the end of the line */
1299             if (*ptr == NUL)
1300             {
1301                 incr = 1;       /* NUL at end of line only takes one column */
1302                 break;
1303             }
1304
1305             if (ptr >= posptr)  /* character at pos->col */
1306                 break;
1307
1308             vcol += incr;
1309             mb_ptr_adv(ptr);
1310         }
1311     }
1312     if (start != NULL)
1313         *start = vcol + head;
1314     if (end != NULL)
1315         *end = vcol + incr - 1;
1316     if (cursor != NULL)
1317     {
1318         if (*ptr == TAB
1319                 && (State & NORMAL)
1320                 && !wp->w_p_list
1321                 && !virtual_active()
1322 #ifdef FEAT_VISUAL
1323                 && !(VIsual_active
1324                                    && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1325 #endif
1326                 )
1327             *cursor = vcol + incr - 1;      /* cursor at end */
1328         else
1329             *cursor = vcol + head;          /* cursor at start */
1330     }
1331 }
1332
1333 /*
1334  * Get virtual cursor column in the current window, pretending 'list' is off.
1335  */
1336     colnr_T
1337 getvcol_nolist(posp)
1338     pos_T       *posp;
1339 {
1340     int         list_save = curwin->w_p_list;
1341     colnr_T     vcol;
1342
1343     curwin->w_p_list = FALSE;
1344     getvcol(curwin, posp, NULL, &vcol, NULL);
1345     curwin->w_p_list = list_save;
1346     return vcol;
1347 }
1348
1349 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1350 /*
1351  * Get virtual column in virtual mode.
1352  */
1353     void
1354 getvvcol(wp, pos, start, cursor, end)
1355     win_T       *wp;
1356     pos_T       *pos;
1357     colnr_T     *start;
1358     colnr_T     *cursor;
1359     colnr_T     *end;
1360 {
1361     colnr_T     col;
1362     colnr_T     coladd;
1363     colnr_T     endadd;
1364 # ifdef FEAT_MBYTE
1365     char_u      *ptr;
1366 # endif
1367
1368     if (virtual_active())
1369     {
1370         /* For virtual mode, only want one value */
1371         getvcol(wp, pos, &col, NULL, NULL);
1372
1373         coladd = pos->coladd;
1374         endadd = 0;
1375 # ifdef FEAT_MBYTE
1376         /* Cannot put the cursor on part of a wide character. */
1377         ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1378         if (pos->col < STRLEN(ptr))
1379         {
1380             int c = (*mb_ptr2char)(ptr + pos->col);
1381
1382             if (c != TAB && vim_isprintc(c))
1383             {
1384                 endadd = char2cells(c) - 1;
1385                 if (coladd > endadd)    /* past end of line */
1386                     endadd = 0;
1387                 else
1388                     coladd = 0;
1389             }
1390         }
1391 # endif
1392         col += coladd;
1393         if (start != NULL)
1394             *start = col;
1395         if (cursor != NULL)
1396             *cursor = col;
1397         if (end != NULL)
1398             *end = col + endadd;
1399     }
1400     else
1401         getvcol(wp, pos, start, cursor, end);
1402 }
1403 #endif
1404
1405 #if defined(FEAT_VISUAL) || defined(PROTO)
1406 /*
1407  * Get the leftmost and rightmost virtual column of pos1 and pos2.
1408  * Used for Visual block mode.
1409  */
1410     void
1411 getvcols(wp, pos1, pos2, left, right)
1412     win_T       *wp;
1413     pos_T       *pos1, *pos2;
1414     colnr_T     *left, *right;
1415 {
1416     colnr_T     from1, from2, to1, to2;
1417
1418     if (ltp(pos1, pos2))
1419     {
1420         getvvcol(wp, pos1, &from1, NULL, &to1);
1421         getvvcol(wp, pos2, &from2, NULL, &to2);
1422     }
1423     else
1424     {
1425         getvvcol(wp, pos2, &from1, NULL, &to1);
1426         getvvcol(wp, pos1, &from2, NULL, &to2);
1427     }
1428     if (from2 < from1)
1429         *left = from2;
1430     else
1431         *left = from1;
1432     if (to2 > to1)
1433     {
1434         if (*p_sel == 'e' && from2 - 1 >= to1)
1435             *right = from2 - 1;
1436         else
1437             *right = to2;
1438     }
1439     else
1440         *right = to1;
1441 }
1442 #endif
1443
1444 /*
1445  * skipwhite: skip over ' ' and '\t'.
1446  */
1447     char_u *
1448 skipwhite(p)
1449     char_u      *p;
1450 {
1451     while (vim_iswhite(*p)) /* skip to next non-white */
1452         ++p;
1453     return p;
1454 }
1455
1456 /*
1457  * skip over digits
1458  */
1459     char_u *
1460 skipdigits(p)
1461     char_u      *p;
1462 {
1463     while (VIM_ISDIGIT(*p))     /* skip to next non-digit */
1464         ++p;
1465     return p;
1466 }
1467
1468 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
1469 /*
1470  * skip over digits and hex characters
1471  */
1472     char_u *
1473 skiphex(p)
1474     char_u      *p;
1475 {
1476     while (vim_isxdigit(*p))    /* skip to next non-digit */
1477         ++p;
1478     return p;
1479 }
1480 #endif
1481
1482 #if defined(FEAT_EX_EXTRA) || defined(PROTO)
1483 /*
1484  * skip to digit (or NUL after the string)
1485  */
1486     char_u *
1487 skiptodigit(p)
1488     char_u      *p;
1489 {
1490     while (*p != NUL && !VIM_ISDIGIT(*p))       /* skip to next digit */
1491         ++p;
1492     return p;
1493 }
1494
1495 /*
1496  * skip to hex character (or NUL after the string)
1497  */
1498     char_u *
1499 skiptohex(p)
1500     char_u      *p;
1501 {
1502     while (*p != NUL && !vim_isxdigit(*p))      /* skip to next digit */
1503         ++p;
1504     return p;
1505 }
1506 #endif
1507
1508 /*
1509  * Variant of isdigit() that can handle characters > 0x100.
1510  * We don't use isdigit() here, because on some systems it also considers
1511  * superscript 1 to be a digit.
1512  * Use the VIM_ISDIGIT() macro for simple arguments.
1513  */
1514     int
1515 vim_isdigit(c)
1516     int         c;
1517 {
1518     return (c >= '0' && c <= '9');
1519 }
1520
1521 /*
1522  * Variant of isxdigit() that can handle characters > 0x100.
1523  * We don't use isxdigit() here, because on some systems it also considers
1524  * superscript 1 to be a digit.
1525  */
1526     int
1527 vim_isxdigit(c)
1528     int         c;
1529 {
1530     return (c >= '0' && c <= '9')
1531         || (c >= 'a' && c <= 'f')
1532         || (c >= 'A' && c <= 'F');
1533 }
1534
1535 #if defined(FEAT_MBYTE) || defined(PROTO)
1536 /*
1537  * Vim's own character class functions.  These exist because many library
1538  * islower()/toupper() etc. do not work properly: they crash when used with
1539  * invalid values or can't handle latin1 when the locale is C.
1540  * Speed is most important here.
1541  */
1542 #define LATIN1LOWER 'l'
1543 #define LATIN1UPPER 'U'
1544
1545 /*                                                                 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~                                  ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
1546 static char_u latin1flags[257] = "                                                                 UUUUUUUUUUUUUUUUUUUUUUUUUU      llllllllllllllllllllllllll                                                                     UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1547 static char_u latin1upper[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1548 static char_u latin1lower[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
1549
1550     int
1551 vim_islower(c)
1552     int     c;
1553 {
1554     if (c <= '@')
1555         return FALSE;
1556     if (c >= 0x80)
1557     {
1558         if (enc_utf8)
1559             return utf_islower(c);
1560         if (c >= 0x100)
1561         {
1562 #ifdef HAVE_ISWLOWER
1563             if (has_mbyte)
1564                 return iswlower(c);
1565 #endif
1566             /* islower() can't handle these chars and may crash */
1567             return FALSE;
1568         }
1569         if (enc_latin1like)
1570             return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1571     }
1572     return islower(c);
1573 }
1574
1575     int
1576 vim_isupper(c)
1577     int     c;
1578 {
1579     if (c <= '@')
1580         return FALSE;
1581     if (c >= 0x80)
1582     {
1583         if (enc_utf8)
1584             return utf_isupper(c);
1585         if (c >= 0x100)
1586         {
1587 #ifdef HAVE_ISWUPPER
1588             if (has_mbyte)
1589                 return iswupper(c);
1590 #endif
1591             /* islower() can't handle these chars and may crash */
1592             return FALSE;
1593         }
1594         if (enc_latin1like)
1595             return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1596     }
1597     return isupper(c);
1598 }
1599
1600     int
1601 vim_toupper(c)
1602     int     c;
1603 {
1604     if (c <= '@')
1605         return c;
1606     if (c >= 0x80)
1607     {
1608         if (enc_utf8)
1609             return utf_toupper(c);
1610         if (c >= 0x100)
1611         {
1612 #ifdef HAVE_TOWUPPER
1613             if (has_mbyte)
1614                 return towupper(c);
1615 #endif
1616             /* toupper() can't handle these chars and may crash */
1617             return c;
1618         }
1619         if (enc_latin1like)
1620             return latin1upper[c];
1621     }
1622     return TOUPPER_LOC(c);
1623 }
1624
1625     int
1626 vim_tolower(c)
1627     int     c;
1628 {
1629     if (c <= '@')
1630         return c;
1631     if (c >= 0x80)
1632     {
1633         if (enc_utf8)
1634             return utf_tolower(c);
1635         if (c >= 0x100)
1636         {
1637 #ifdef HAVE_TOWLOWER
1638             if (has_mbyte)
1639                 return towlower(c);
1640 #endif
1641             /* tolower() can't handle these chars and may crash */
1642             return c;
1643         }
1644         if (enc_latin1like)
1645             return latin1lower[c];
1646     }
1647     return TOLOWER_LOC(c);
1648 }
1649 #endif
1650
1651 /*
1652  * skiptowhite: skip over text until ' ' or '\t' or NUL.
1653  */
1654     char_u *
1655 skiptowhite(p)
1656     char_u      *p;
1657 {
1658     while (*p != ' ' && *p != '\t' && *p != NUL)
1659         ++p;
1660     return p;
1661 }
1662
1663 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1664         || defined(PROTO)
1665 /*
1666  * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1667  */
1668     char_u *
1669 skiptowhite_esc(p)
1670     char_u      *p;
1671 {
1672     while (*p != ' ' && *p != '\t' && *p != NUL)
1673     {
1674         if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1675             ++p;
1676         ++p;
1677     }
1678     return p;
1679 }
1680 #endif
1681
1682 /*
1683  * Getdigits: Get a number from a string and skip over it.
1684  * Note: the argument is a pointer to a char_u pointer!
1685  */
1686     long
1687 getdigits(pp)
1688     char_u **pp;
1689 {
1690     char_u      *p;
1691     long        retval;
1692
1693     p = *pp;
1694     retval = atol((char *)p);
1695     if (*p == '-')              /* skip negative sign */
1696         ++p;
1697     p = skipdigits(p);          /* skip to next non-digit */
1698     *pp = p;
1699     return retval;
1700 }
1701
1702 /*
1703  * Return TRUE if "lbuf" is empty or only contains blanks.
1704  */
1705     int
1706 vim_isblankline(lbuf)
1707     char_u      *lbuf;
1708 {
1709     char_u      *p;
1710
1711     p = skipwhite(lbuf);
1712     return (*p == NUL || *p == '\r' || *p == '\n');
1713 }
1714
1715 /*
1716  * Convert a string into a long and/or unsigned long, taking care of
1717  * hexadecimal and octal numbers.  Accepts a '-' sign.
1718  * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1719  *  0       decimal
1720  *  '0'     octal
1721  *  'X'     hex
1722  *  'x'     hex
1723  * If "len" is not NULL, the length of the number in characters is returned.
1724  * If "nptr" is not NULL, the signed result is returned in it.
1725  * If "unptr" is not NULL, the unsigned result is returned in it.
1726  * If "unptr" is not NULL, the unsigned result is returned in it.
1727  * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1728  * octal number.
1729  * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
1730  * hex number.
1731  */
1732     void
1733 vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1734     char_u              *start;
1735     int                 *hexp;      /* return: type of number 0 = decimal, 'x'
1736                                        or 'X' is hex, '0' = octal */
1737     int                 *len;       /* return: detected length of number */
1738     int                 dooct;      /* recognize octal number */
1739     int                 dohex;      /* recognize hex number */
1740     long                *nptr;      /* return: signed result */
1741     unsigned long       *unptr;     /* return: unsigned result */
1742 {
1743     char_u          *ptr = start;
1744     int             hex = 0;            /* default is decimal */
1745     int             negative = FALSE;
1746     unsigned long   un = 0;
1747     int             n;
1748
1749     if (ptr[0] == '-')
1750     {
1751         negative = TRUE;
1752         ++ptr;
1753     }
1754
1755     /* Recognize hex and octal. */
1756     if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
1757     {
1758         hex = ptr[1];
1759         if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1760             ptr += 2;                   /* hexadecimal */
1761         else
1762         {
1763             hex = 0;                    /* default is decimal */
1764             if (dooct)
1765             {
1766                 /* Don't interpret "0", "08" or "0129" as octal. */
1767                 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1768                 {
1769                     if (ptr[n] > '7')
1770                     {
1771                         hex = 0;        /* can't be octal */
1772                         break;
1773                     }
1774                     if (ptr[n] > '0')
1775                         hex = '0';      /* assume octal */
1776                 }
1777             }
1778         }
1779     }
1780
1781     /*
1782      * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1783      */
1784     if (hex == '0' || dooct > 1)
1785     {
1786         /* octal */
1787         while ('0' <= *ptr && *ptr <= '7')
1788         {
1789             un = 8 * un + (unsigned long)(*ptr - '0');
1790             ++ptr;
1791         }
1792     }
1793     else if (hex != 0 || dohex > 1)
1794     {
1795         /* hex */
1796         while (vim_isxdigit(*ptr))
1797         {
1798             un = 16 * un + (unsigned long)hex2nr(*ptr);
1799             ++ptr;
1800         }
1801     }
1802     else
1803     {
1804         /* decimal */
1805         while (VIM_ISDIGIT(*ptr))
1806         {
1807             un = 10 * un + (unsigned long)(*ptr - '0');
1808             ++ptr;
1809         }
1810     }
1811
1812     if (hexp != NULL)
1813         *hexp = hex;
1814     if (len != NULL)
1815         *len = (int)(ptr - start);
1816     if (nptr != NULL)
1817     {
1818         if (negative)   /* account for leading '-' for decimal numbers */
1819             *nptr = -(long)un;
1820         else
1821             *nptr = (long)un;
1822     }
1823     if (unptr != NULL)
1824         *unptr = un;
1825 }
1826
1827 /*
1828  * Return the value of a single hex character.
1829  * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1830  */
1831     int
1832 hex2nr(c)
1833     int         c;
1834 {
1835     if (c >= 'a' && c <= 'f')
1836         return c - 'a' + 10;
1837     if (c >= 'A' && c <= 'F')
1838         return c - 'A' + 10;
1839     return c - '0';
1840 }
1841
1842 #if defined(FEAT_TERMRESPONSE) \
1843         || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1844 /*
1845  * Convert two hex characters to a byte.
1846  * Return -1 if one of the characters is not hex.
1847  */
1848     int
1849 hexhex2nr(p)
1850     char_u      *p;
1851 {
1852     if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1853         return -1;
1854     return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1855 }
1856 #endif
1857
1858 /*
1859  * Return TRUE if "str" starts with a backslash that should be removed.
1860  * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1861  * backslash is not a normal file name character.
1862  * '$' is a valid file name character, we don't remove the backslash before
1863  * it.  This means it is not possible to use an environment variable after a
1864  * backslash.  "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1865  * Although "\ name" is valid, the backslash in "Program\ files" must be
1866  * removed.  Assume a file name doesn't start with a space.
1867  * For multi-byte names, never remove a backslash before a non-ascii
1868  * character, assume that all multi-byte characters are valid file name
1869  * characters.
1870  */
1871     int
1872 rem_backslash(str)
1873     char_u  *str;
1874 {
1875 #ifdef BACKSLASH_IN_FILENAME
1876     return (str[0] == '\\'
1877 # ifdef FEAT_MBYTE
1878             && str[1] < 0x80
1879 # endif
1880             && (str[1] == ' '
1881                 || (str[1] != NUL
1882                     && str[1] != '*'
1883                     && str[1] != '?'
1884                     && !vim_isfilec(str[1]))));
1885 #else
1886     return (str[0] == '\\' && str[1] != NUL);
1887 #endif
1888 }
1889
1890 /*
1891  * Halve the number of backslashes in a file name argument.
1892  * For MS-DOS we only do this if the character after the backslash
1893  * is not a normal file character.
1894  */
1895     void
1896 backslash_halve(p)
1897     char_u      *p;
1898 {
1899     for ( ; *p; ++p)
1900         if (rem_backslash(p))
1901             STRCPY(p, p + 1);
1902 }
1903
1904 /*
1905  * backslash_halve() plus save the result in allocated memory.
1906  */
1907     char_u *
1908 backslash_halve_save(p)
1909     char_u      *p;
1910 {
1911     char_u      *res;
1912
1913     res = vim_strsave(p);
1914     if (res == NULL)
1915         return p;
1916     backslash_halve(res);
1917     return res;
1918 }
1919
1920 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1921 /*
1922  * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1923  * The first 64 entries have been added to map control characters defined in
1924  * ascii.h
1925  */
1926 static char_u ebcdic2ascii_tab[256] =
1927 {
1928     0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1929     0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1930     0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1931     0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1932     0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1933     0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1934     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1935     0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1936     0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1937     0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1938     0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1939     0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1940     0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1941     0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1942     0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1943     0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1944     0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1945     0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1946     0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1947     0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1948     0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1949     0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1950     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1951     0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1952     0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1953     0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1954     0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1955     0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1956     0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1957     0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1958     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1959     0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1960 };
1961
1962 /*
1963  * Convert a buffer worth of characters from EBCDIC to ASCII.  Only useful if
1964  * wanting 7-bit ASCII characters out the other end.
1965  */
1966     void
1967 ebcdic2ascii(buffer, len)
1968     char_u      *buffer;
1969     int         len;
1970 {
1971     int         i;
1972
1973     for (i = 0; i < len; i++)
1974         buffer[i] = ebcdic2ascii_tab[buffer[i]];
1975 }
1976 #endif