src/charset.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * VIM - Vi IMproved    by Bram Moolenaar
   4  *
   5  * Do ":help uganda"  in Vim to read copying and usage conditions.
   6  * Do ":help credits" in Vim to see a list of people who contributed.
   7  * See README.txt for an overview of the Vim source code.
   8  */
   9
  10 #include "vim.h"
  11
  12 #ifdef FEAT_LINEBREAK
  13 static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
  14 #endif
  15
  16 #ifdef FEAT_MBYTE
  17 static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
  18 #endif
  19
  20 static int nr2hex __ARGS((int c));
  21
  22 static int    chartab_initialized = FALSE;
  23
  24 /* b_chartab[] is an array of 32 bytes, each bit representing one of the
  25  * characters 0-255. */
  26 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
  27 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
  28 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
  29
  30 /*
  31  * Fill chartab[].  Also fills curbuf->b_chartab[] with flags for keyword
  32  * characters for current buffer.
  33  *
  34  * Depends on the option settings 'iskeyword', 'isident', 'isfname',
  35  * 'isprint' and 'encoding'.
  36  *
  37  * The index in chartab[] depends on 'encoding':
  38  * - For non-multi-byte index with the byte (same as the character).
  39  * - For DBCS index with the first byte.
  40  * - For UTF-8 index with the character (when first byte is up to 0x80 it is
  41  *   the same as the character, if the first byte is 0x80 and above it depends
  42  *   on further bytes).
  43  *
  44  * The contents of chartab[]:
  45  * - The lower two bits, masked by CT_CELL_MASK, give the number of display
  46  *   cells the character occupies (1 or 2).  Not valid for UTF-8 above 0x80.
  47  * - CT_PRINT_CHAR bit is set when the character is printable (no need to
  48  *   translate the character before displaying it).  Note that only DBCS
  49  *   characters can have 2 display cells and still be printable.
  50  * - CT_FNAME_CHAR bit is set when the character can be in a file name.
  51  * - CT_ID_CHAR bit is set when the character can be in an identifier.
  52  *
  53  * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
  54  * error, OK otherwise.
  55  */
  56     int
  57 init_chartab()
  58 {
  59     return buf_init_chartab(curbuf, TRUE);
  60 }
  61
  62     int
  63 buf_init_chartab(buf, global)
  64     buf_T       *buf;
  65     int         global;         /* FALSE: only set buf->b_chartab[] */
  66 {
  67     int         c;
  68     int         c2;
  69     char_u      *p;
  70     int         i;
  71     int         tilde;
  72     int         do_isalpha;
  73
  74     if (global)
  75     {
  76         /*
  77          * Set the default size for printable characters:
  78          * From <Space> to '~' is 1 (printable), others are 2 (not printable).
  79          * This also inits all 'isident' and 'isfname' flags to FALSE.
  80          *
  81          * EBCDIC: all chars below ' ' are not printable, all others are
  82          * printable.
  83          */
  84         c = 0;
  85         while (c < ' ')
  86             chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
  87 #ifdef EBCDIC
  88         while (c < 255)
  89 #else
  90         while (c <= '~')
  91 #endif
  92             chartab[c++] = 1 + CT_PRINT_CHAR;
  93 #ifdef FEAT_FKMAP
  94         if (p_altkeymap)
  95         {
  96             while (c < YE)
  97                 chartab[c++] = 1 + CT_PRINT_CHAR;
  98         }
  99 #endif
 100         while (c < 256)
 101         {
 102 #ifdef FEAT_MBYTE
 103             /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
 104             if (enc_utf8 && c >= 0xa0)
 105                 chartab[c++] = CT_PRINT_CHAR + 1;
 106             /* euc-jp characters starting with 0x8e are single width */
 107             else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
 108                 chartab[c++] = CT_PRINT_CHAR + 1;
 109             /* other double-byte chars can be printable AND double-width */
 110             else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
 111                 chartab[c++] = CT_PRINT_CHAR + 2;
 112             else
 113 #endif
 114                 /* the rest is unprintable by default */
 115                 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
 116         }
 117
 118 #ifdef FEAT_MBYTE
 119         /* Assume that every multi-byte char is a filename character. */
 120         for (c = 1; c < 256; ++c)
 121             if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
 122                     || (enc_dbcs == DBCS_JPNU && c == 0x8e)
 123                     || (enc_utf8 && c >= 0xa0))
 124                 chartab[c] |= CT_FNAME_CHAR;
 125 #endif
 126     }
 127
 128     /*
 129      * Init word char flags all to FALSE
 130      */
 131     vim_memset(buf->b_chartab, 0, (size_t)32);
 132 #ifdef FEAT_MBYTE
 133     if (enc_dbcs != 0)
 134         for (c = 0; c < 256; ++c)
 135         {
 136             /* double-byte characters are probably word characters */
 137             if (MB_BYTE2LEN(c) == 2)
 138                 SET_CHARTAB(buf, c);
 139         }
 140 #endif
 141
 142 #ifdef FEAT_LISP
 143     /*
 144      * In lisp mode the '-' character is included in keywords.
 145      */
 146     if (buf->b_p_lisp)
 147         SET_CHARTAB(buf, '-');
 148 #endif
 149
 150     /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
 151      * options Each option is a list of characters, character numbers or
 152      * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
 153      */
 154     for (i = global ? 0 : 3; i <= 3; ++i)
 155     {
 156         if (i == 0)
 157             p = p_isi;          /* first round: 'isident' */
 158         else if (i == 1)
 159             p = p_isp;          /* second round: 'isprint' */
 160         else if (i == 2)
 161             p = p_isf;          /* third round: 'isfname' */
 162         else    /* i == 3 */
 163             p = buf->b_p_isk;   /* fourth round: 'iskeyword' */
 164
 165         while (*p)
 166         {
 167             tilde = FALSE;
 168             do_isalpha = FALSE;
 169             if (*p == '^' && p[1] != NUL)
 170             {
 171                 tilde = TRUE;
 172                 ++p;
 173             }
 174             if (VIM_ISDIGIT(*p))
 175                 c = getdigits(&p);
 176             else
 177                 c = *p++;
 178             c2 = -1;
 179             if (*p == '-' && p[1] != NUL)
 180             {
 181                 ++p;
 182                 if (VIM_ISDIGIT(*p))
 183                     c2 = getdigits(&p);
 184                 else
 185                     c2 = *p++;
 186             }
 187             if (c <= 0 || (c2 < c && c2 != -1) || c2 >= 256
 188                                                  || !(*p == NUL || *p == ','))
 189                 return FAIL;
 190
 191             if (c2 == -1)       /* not a range */
 192             {
 193                 /*
 194                  * A single '@' (not "@-@"):
 195                  * Decide on letters being ID/printable/keyword chars with
 196                  * standard function isalpha(). This takes care of locale for
 197                  * single-byte characters).
 198                  */
 199                 if (c == '@')
 200                 {
 201                     do_isalpha = TRUE;
 202                     c = 1;
 203                     c2 = 255;
 204                 }
 205                 else
 206                     c2 = c;
 207             }
 208             while (c <= c2)
 209             {
 210                 if (!do_isalpha || isalpha(c)
 211 #ifdef FEAT_FKMAP
 212                         || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
 213 #endif
 214                             )
 215                 {
 216                     if (i == 0)                 /* (re)set ID flag */
 217                     {
 218                         if (tilde)
 219                             chartab[c] &= ~CT_ID_CHAR;
 220                         else
 221                             chartab[c] |= CT_ID_CHAR;
 222                     }
 223                     else if (i == 1)            /* (re)set printable */
 224                     {
 225                         if ((c < ' '
 226 #ifndef EBCDIC
 227                                     || c > '~'
 228 #endif
 229 #ifdef FEAT_FKMAP
 230                                     || (p_altkeymap
 231                                         && (F_isalpha(c) || F_isdigit(c)))
 232 #endif
 233                             )
 234 #ifdef FEAT_MBYTE
 235                                 /* For double-byte we keep the cell width, so
 236                                  * that we can detect it from the first byte. */
 237                                 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
 238 #endif
 239                            )
 240                         {
 241                             if (tilde)
 242                             {
 243                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
 244                                              + ((dy_flags & DY_UHEX) ? 4 : 2);
 245                                 chartab[c] &= ~CT_PRINT_CHAR;
 246                             }
 247                             else
 248                             {
 249                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
 250                                 chartab[c] |= CT_PRINT_CHAR;
 251                             }
 252                         }
 253                     }
 254                     else if (i == 2)            /* (re)set fname flag */
 255                     {
 256                         if (tilde)
 257                             chartab[c] &= ~CT_FNAME_CHAR;
 258                         else
 259                             chartab[c] |= CT_FNAME_CHAR;
 260                     }
 261                     else /* i == 3 */           /* (re)set keyword flag */
 262                     {
 263                         if (tilde)
 264                             RESET_CHARTAB(buf, c);
 265                         else
 266                             SET_CHARTAB(buf, c);
 267                     }
 268                 }
 269                 ++c;
 270             }
 271             p = skip_to_option_part(p);
 272         }
 273     }
 274     chartab_initialized = TRUE;
 275     return OK;
 276 }
 277
 278 /*
 279  * Translate any special characters in buf[bufsize] in-place.
 280  * The result is a string with only printable characters, but if there is not
 281  * enough room, not all characters will be translated.
 282  */
 283     void
 284 trans_characters(buf, bufsize)
 285     char_u      *buf;
 286     int         bufsize;
 287 {
 288     int         len;            /* length of string needing translation */
 289     int         room;           /* room in buffer after string */
 290     char_u      *trs;           /* translated character */
 291     int         trs_len;        /* length of trs[] */
 292
 293     len = (int)STRLEN(buf);
 294     room = bufsize - len;
 295     while (*buf != 0)
 296     {
 297 # ifdef FEAT_MBYTE
 298         /* Assume a multi-byte character doesn't need translation. */
 299         if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
 300             len -= trs_len;
 301         else
 302 # endif
 303         {
 304             trs = transchar_byte(*buf);
 305             trs_len = (int)STRLEN(trs);
 306             if (trs_len > 1)
 307             {
 308                 room -= trs_len - 1;
 309                 if (room <= 0)
 310                     return;
 311                 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
 312             }
 313             mch_memmove(buf, trs, (size_t)trs_len);
 314             --len;
 315         }
 316         buf += trs_len;
 317     }
 318 }
 319
 320 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(PROTO)
 321 /*
 322  * Translate a string into allocated memory, replacing special chars with
 323  * printable chars.  Returns NULL when out of memory.
 324  */
 325     char_u *
 326 transstr(s)
 327     char_u      *s;
 328 {
 329     char_u      *res;
 330     char_u      *p;
 331 #ifdef FEAT_MBYTE
 332     int         l, len, c;
 333     char_u      hexbuf[11];
 334 #endif
 335
 336 #ifdef FEAT_MBYTE
 337     if (has_mbyte)
 338     {
 339         /* Compute the length of the result, taking account of unprintable
 340          * multi-byte characters. */
 341         len = 0;
 342         p = s;
 343         while (*p != NUL)
 344         {
 345             if ((l = (*mb_ptr2len)(p)) > 1)
 346             {
 347                 c = (*mb_ptr2char)(p);
 348                 p += l;
 349                 if (vim_isprintc(c))
 350                     len += l;
 351                 else
 352                 {
 353                     transchar_hex(hexbuf, c);
 354                     len += (int)STRLEN(hexbuf);
 355                 }
 356             }
 357             else
 358             {
 359                 l = byte2cells(*p++);
 360                 if (l > 0)
 361                     len += l;
 362                 else
 363                     len += 4;   /* illegal byte sequence */
 364             }
 365         }
 366         res = alloc((unsigned)(len + 1));
 367     }
 368     else
 369 #endif
 370         res = alloc((unsigned)(vim_strsize(s) + 1));
 371     if (res != NULL)
 372     {
 373         *res = NUL;
 374         p = s;
 375         while (*p != NUL)
 376         {
 377 #ifdef FEAT_MBYTE
 378             if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
 379             {
 380                 c = (*mb_ptr2char)(p);
 381                 if (vim_isprintc(c))
 382                     STRNCAT(res, p, l); /* append printable multi-byte char */
 383                 else
 384                     transchar_hex(res + STRLEN(res), c);
 385                 p += l;
 386             }
 387             else
 388 #endif
 389                 STRCAT(res, transchar_byte(*p++));
 390         }
 391     }
 392     return res;
 393 }
 394 #endif
 395
 396 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
 397 /*
 398  * Convert the string "str[orglen]" to do ignore-case comparing.  Uses the
 399  * current locale.
 400  * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
 401  * Otherwise puts the result in "buf[buflen]".
 402  */
 403     char_u *
 404 str_foldcase(str, orglen, buf, buflen)
 405     char_u      *str;
 406     int         orglen;
 407     char_u      *buf;
 408     int         buflen;
 409 {
 410     garray_T    ga;
 411     int         i;
 412     int         len = orglen;
 413
 414 #define GA_CHAR(i)  ((char_u *)ga.ga_data)[i]
 415 #define GA_PTR(i)   ((char_u *)ga.ga_data + i)
 416 #define STR_CHAR(i)  (buf == NULL ? GA_CHAR(i) : buf[i])
 417 #define STR_PTR(i)   (buf == NULL ? GA_PTR(i) : buf + i)
 418
 419     /* Copy "str" into "buf" or allocated memory, unmodified. */
 420     if (buf == NULL)
 421     {
 422         ga_init2(&ga, 1, 10);
 423         if (ga_grow(&ga, len + 1) == FAIL)
 424             return NULL;
 425         mch_memmove(ga.ga_data, str, (size_t)len);
 426         ga.ga_len = len;
 427     }
 428     else
 429     {
 430         if (len >= buflen)          /* Ugly! */
 431             len = buflen - 1;
 432         mch_memmove(buf, str, (size_t)len);
 433     }
 434     if (buf == NULL)
 435         GA_CHAR(len) = NUL;
 436     else
 437         buf[len] = NUL;
 438
 439     /* Make each character lower case. */
 440     i = 0;
 441     while (STR_CHAR(i) != NUL)
 442     {
 443 #ifdef FEAT_MBYTE
 444         if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
 445         {
 446             if (enc_utf8)
 447             {
 448                 int     c, lc;
 449
 450                 c = utf_ptr2char(STR_PTR(i));
 451                 lc = utf_tolower(c);
 452                 if (c != lc)
 453                 {
 454                     int     ol = utf_char2len(c);
 455                     int     nl = utf_char2len(lc);
 456
 457                     /* If the byte length changes need to shift the following
 458                      * characters forward or backward. */
 459                     if (ol != nl)
 460                     {
 461                         if (nl > ol)
 462                         {
 463                             if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
 464                                                     : len + nl - ol >= buflen)
 465                             {
 466                                 /* out of memory, keep old char */
 467                                 lc = c;
 468                                 nl = ol;
 469                             }
 470                         }
 471                         if (ol != nl)
 472                         {
 473                             if (buf == NULL)
 474                             {
 475                                 mch_memmove(GA_PTR(i) + nl, GA_PTR(i) + ol,
 476                                                   STRLEN(GA_PTR(i) + ol) + 1);
 477                                 ga.ga_len += nl - ol;
 478                             }
 479                             else
 480                             {
 481                                 mch_memmove(buf + i + nl, buf + i + ol,
 482                                                     STRLEN(buf + i + ol) + 1);
 483                                 len += nl - ol;
 484                             }
 485                         }
 486                     }
 487                     (void)utf_char2bytes(lc, STR_PTR(i));
 488                 }
 489             }
 490             /* skip to next multi-byte char */
 491             i += (*mb_ptr2len)(STR_PTR(i));
 492         }
 493         else
 494 #endif
 495         {
 496             if (buf == NULL)
 497                 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
 498             else
 499                 buf[i] = TOLOWER_LOC(buf[i]);
 500             ++i;
 501         }
 502     }
 503
 504     if (buf == NULL)
 505         return (char_u *)ga.ga_data;
 506     return buf;
 507 }
 508 #endif
 509
 510 /*
 511  * Catch 22: chartab[] can't be initialized before the options are
 512  * initialized, and initializing options may cause transchar() to be called!
 513  * When chartab_initialized == FALSE don't use chartab[].
 514  * Does NOT work for multi-byte characters, c must be <= 255.
 515  * Also doesn't work for the first byte of a multi-byte, "c" must be a
 516  * character!
 517  */
 518 static char_u   transchar_buf[7];
 519
 520     char_u *
 521 transchar(c)
 522     int         c;
 523 {
 524     int                 i;
 525
 526     i = 0;
 527     if (IS_SPECIAL(c))      /* special key code, display as ~@ char */
 528     {
 529         transchar_buf[0] = '~';
 530         transchar_buf[1] = '@';
 531         i = 2;
 532         c = K_SECOND(c);
 533     }
 534
 535     if ((!chartab_initialized && (
 536 #ifdef EBCDIC
 537                     (c >= 64 && c < 255)
 538 #else
 539                     (c >= ' ' && c <= '~')
 540 #endif
 541 #ifdef FEAT_FKMAP
 542                         || F_ischar(c)
 543 #endif
 544                 )) || (c < 256 && vim_isprintc_strict(c)))
 545     {
 546         /* printable character */
 547         transchar_buf[i] = c;
 548         transchar_buf[i + 1] = NUL;
 549     }
 550     else
 551         transchar_nonprint(transchar_buf + i, c);
 552     return transchar_buf;
 553 }
 554
 555 #if defined(FEAT_MBYTE) || defined(PROTO)
 556 /*
 557  * Like transchar(), but called with a byte instead of a character.  Checks
 558  * for an illegal UTF-8 byte.
 559  */
 560     char_u *
 561 transchar_byte(c)
 562     int         c;
 563 {
 564     if (enc_utf8 && c >= 0x80)
 565     {
 566         transchar_nonprint(transchar_buf, c);
 567         return transchar_buf;
 568     }
 569     return transchar(c);
 570 }
 571 #endif
 572
 573 /*
 574  * Convert non-printable character to two or more printable characters in
 575  * "buf[]".  "buf" needs to be able to hold five bytes.
 576  * Does NOT work for multi-byte characters, c must be <= 255.
 577  */
 578     void
 579 transchar_nonprint(buf, c)
 580     char_u      *buf;
 581     int         c;
 582 {
 583     if (c == NL)
 584         c = NUL;                /* we use newline in place of a NUL */
 585     else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
 586         c = NL;                 /* we use CR in place of  NL in this case */
 587
 588     if (dy_flags & DY_UHEX)             /* 'display' has "uhex" */
 589         transchar_hex(buf, c);
 590
 591 #ifdef EBCDIC
 592     /* For EBCDIC only the characters 0-63 and 255 are not printable */
 593     else if (CtrlChar(c) != 0 || c == DEL)
 594 #else
 595     else if (c <= 0x7f)                         /* 0x00 - 0x1f and 0x7f */
 596 #endif
 597     {
 598         buf[0] = '^';
 599 #ifdef EBCDIC
 600         if (c == DEL)
 601             buf[1] = '?';               /* DEL displayed as ^? */
 602         else
 603             buf[1] = CtrlChar(c);
 604 #else
 605         buf[1] = c ^ 0x40;              /* DEL displayed as ^? */
 606 #endif
 607
 608         buf[2] = NUL;
 609     }
 610 #ifdef FEAT_MBYTE
 611     else if (enc_utf8 && c >= 0x80)
 612     {
 613         transchar_hex(buf, c);
 614     }
 615 #endif
 616 #ifndef EBCDIC
 617     else if (c >= ' ' + 0x80 && c <= '~' + 0x80)    /* 0xa0 - 0xfe */
 618     {
 619         buf[0] = '|';
 620         buf[1] = c - 0x80;
 621         buf[2] = NUL;
 622     }
 623 #else
 624     else if (c < 64)
 625     {
 626         buf[0] = '~';
 627         buf[1] = MetaChar(c);
 628         buf[2] = NUL;
 629     }
 630 #endif
 631     else                                            /* 0x80 - 0x9f and 0xff */
 632     {
 633         /*
 634          * TODO: EBCDIC I don't know what to do with this chars, so I display
 635          * them as '~?' for now
 636          */
 637         buf[0] = '~';
 638 #ifdef EBCDIC
 639         buf[1] = '?';                   /* 0xff displayed as ~? */
 640 #else
 641         buf[1] = (c - 0x80) ^ 0x40;     /* 0xff displayed as ~? */
 642 #endif
 643         buf[2] = NUL;
 644     }
 645 }
 646
 647     void
 648 transchar_hex(buf, c)
 649     char_u      *buf;
 650     int         c;
 651 {
 652     int         i = 0;
 653
 654     buf[0] = '<';
 655 #ifdef FEAT_MBYTE
 656     if (c > 255)
 657     {
 658         buf[++i] = nr2hex((unsigned)c >> 12);
 659         buf[++i] = nr2hex((unsigned)c >> 8);
 660     }
 661 #endif
 662     buf[++i] = nr2hex((unsigned)c >> 4);
 663     buf[++i] = nr2hex(c);
 664     buf[++i] = '>';
 665     buf[++i] = NUL;
 666 }
 667
 668 /*
 669  * Convert the lower 4 bits of byte "c" to its hex character.
 670  * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
 671  * function key 1.
 672  */
 673     static int
 674 nr2hex(c)
 675     int         c;
 676 {
 677     if ((c & 0xf) <= 9)
 678         return (c & 0xf) + '0';
 679     return (c & 0xf) - 10 + 'a';
 680 }
 681
 682 /*
 683  * Return number of display cells occupied by byte "b".
 684  * Caller must make sure 0 <= b <= 255.
 685  * For multi-byte mode "b" must be the first byte of a character.
 686  * A TAB is counted as two cells: "^I".
 687  * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
 688  * cells depends on further bytes.
 689  */
 690     int
 691 byte2cells(b)
 692     int         b;
 693 {
 694 #ifdef FEAT_MBYTE
 695     if (enc_utf8 && b >= 0x80)
 696         return 0;
 697 #endif
 698     return (chartab[b] & CT_CELL_MASK);
 699 }
 700
 701 /*
 702  * Return number of display cells occupied by character "c".
 703  * "c" can be a special key (negative number) in which case 3 or 4 is returned.
 704  * A TAB is counted as two cells: "^I" or four: "<09>".
 705  */
 706     int
 707 char2cells(c)
 708     int         c;
 709 {
 710     if (IS_SPECIAL(c))
 711         return char2cells(K_SECOND(c)) + 2;
 712 #ifdef FEAT_MBYTE
 713     if (c >= 0x80)
 714     {
 715         /* UTF-8: above 0x80 need to check the value */
 716         if (enc_utf8)
 717             return utf_char2cells(c);
 718         /* DBCS: double-byte means double-width, except for euc-jp with first
 719          * byte 0x8e */
 720         if (enc_dbcs != 0 && c >= 0x100)
 721         {
 722             if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
 723                 return 1;
 724             return 2;
 725         }
 726     }
 727 #endif
 728     return (chartab[c & 0xff] & CT_CELL_MASK);
 729 }
 730
 731 /*
 732  * Return number of display cells occupied by character at "*p".
 733  * A TAB is counted as two cells: "^I" or four: "<09>".
 734  */
 735     int
 736 ptr2cells(p)
 737     char_u      *p;
 738 {
 739 #ifdef FEAT_MBYTE
 740     /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
 741     if (enc_utf8 && *p >= 0x80)
 742         return utf_ptr2cells(p);
 743     /* For DBCS we can tell the cell count from the first byte. */
 744 #endif
 745     return (chartab[*p] & CT_CELL_MASK);
 746 }
 747
 748 /*
 749  * Return the number of characters string "s" will take on the screen,
 750  * counting TABs as two characters: "^I".
 751  */
 752     int
 753 vim_strsize(s)
 754     char_u      *s;
 755 {
 756     return vim_strnsize(s, (int)MAXCOL);
 757 }
 758
 759 /*
 760  * Return the number of characters string "s[len]" will take on the screen,
 761  * counting TABs as two characters: "^I".
 762  */
 763     int
 764 vim_strnsize(s, len)
 765     char_u      *s;
 766     int         len;
 767 {
 768     int         size = 0;
 769
 770     while (*s != NUL && --len >= 0)
 771     {
 772 #ifdef FEAT_MBYTE
 773         if (has_mbyte)
 774         {
 775             int     l = (*mb_ptr2len)(s);
 776
 777             size += ptr2cells(s);
 778             s += l;
 779             len -= l - 1;
 780         }
 781         else
 782 #endif
 783             size += byte2cells(*s++);
 784     }
 785     return size;
 786 }
 787
 788 /*
 789  * Return the number of characters 'c' will take on the screen, taking
 790  * into account the size of a tab.
 791  * Use a define to make it fast, this is used very often!!!
 792  * Also see getvcol() below.
 793  */
 794
 795 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
 796     if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
 797     { \
 798         int ts; \
 799         ts = (buf)->b_p_ts; \
 800         return (int)(ts - (col % ts)); \
 801     } \
 802     else \
 803         return ptr2cells(p);
 804
 805 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
 806         || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
 807     int
 808 chartabsize(p, col)
 809     char_u      *p;
 810     colnr_T     col;
 811 {
 812     RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
 813 }
 814 #endif
 815
 816 #ifdef FEAT_LINEBREAK
 817     static int
 818 win_chartabsize(wp, p, col)
 819     win_T       *wp;
 820     char_u      *p;
 821     colnr_T     col;
 822 {
 823     RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
 824 }
 825 #endif
 826
 827 /*
 828  * return the number of characters the string 's' will take on the screen,
 829  * taking into account the size of a tab
 830  */
 831     int
 832 linetabsize(s)
 833     char_u      *s;
 834 {
 835     colnr_T     col = 0;
 836
 837     while (*s != NUL)
 838         col += lbr_chartabsize_adv(&s, col);
 839     return (int)col;
 840 }
 841
 842 /*
 843  * Like linetabsize(), but for a given window instead of the current one.
 844  */
 845     int
 846 win_linetabsize(wp, p, len)
 847     win_T       *wp;
 848     char_u      *p;
 849     colnr_T     len;
 850 {
 851     colnr_T     col = 0;
 852     char_u      *s;
 853
 854     for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
 855         col += win_lbr_chartabsize(wp, s, col, NULL);
 856     return (int)col;
 857 }
 858
 859 /*
 860  * Return TRUE if 'c' is a normal identifier character:
 861  * Letters and characters from the 'isident' option.
 862  */
 863     int
 864 vim_isIDc(c)
 865     int c;
 866 {
 867     return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
 868 }
 869
 870 /*
 871  * return TRUE if 'c' is a keyword character: Letters and characters from
 872  * 'iskeyword' option for current buffer.
 873  * For multi-byte characters mb_get_class() is used (builtin rules).
 874  */
 875     int
 876 vim_iswordc(c)
 877     int c;
 878 {
 879 #ifdef FEAT_MBYTE
 880     if (c >= 0x100)
 881     {
 882         if (enc_dbcs != 0)
 883             return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
 884         if (enc_utf8)
 885             return utf_class(c) >= 2;
 886     }
 887 #endif
 888     return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
 889 }
 890
 891 /*
 892  * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
 893  */
 894     int
 895 vim_iswordp(p)
 896     char_u *p;
 897 {
 898 #ifdef FEAT_MBYTE
 899     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 900         return mb_get_class(p) >= 2;
 901 #endif
 902     return GET_CHARTAB(curbuf, *p) != 0;
 903 }
 904
 905 #if defined(FEAT_SYN_HL) || defined(PROTO)
 906     int
 907 vim_iswordc_buf(p, buf)
 908     char_u      *p;
 909     buf_T       *buf;
 910 {
 911 # ifdef FEAT_MBYTE
 912     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 913         return mb_get_class(p) >= 2;
 914 # endif
 915     return (GET_CHARTAB(buf, *p) != 0);
 916 }
 917 #endif
 918
 919 /*
 920  * return TRUE if 'c' is a valid file-name character
 921  * Assume characters above 0x100 are valid (multi-byte).
 922  */
 923     int
 924 vim_isfilec(c)
 925     int c;
 926 {
 927     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
 928 }
 929
 930 /*
 931  * return TRUE if 'c' is a printable character
 932  * Assume characters above 0x100 are printable (multi-byte), except for
 933  * Unicode.
 934  */
 935     int
 936 vim_isprintc(c)
 937     int c;
 938 {
 939 #ifdef FEAT_MBYTE
 940     if (enc_utf8 && c >= 0x100)
 941         return utf_printable(c);
 942 #endif
 943     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 944 }
 945
 946 /*
 947  * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
 948  * byte of a double-byte character.
 949  */
 950     int
 951 vim_isprintc_strict(c)
 952     int c;
 953 {
 954 #ifdef FEAT_MBYTE
 955     if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
 956         return FALSE;
 957     if (enc_utf8 && c >= 0x100)
 958         return utf_printable(c);
 959 #endif
 960     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 961 }
 962
 963 /*
 964  * like chartabsize(), but also check for line breaks on the screen
 965  */
 966     int
 967 lbr_chartabsize(s, col)
 968     unsigned char       *s;
 969     colnr_T             col;
 970 {
 971 #ifdef FEAT_LINEBREAK
 972     if (!curwin->w_p_lbr && *p_sbr == NUL)
 973     {
 974 #endif
 975 #ifdef FEAT_MBYTE
 976         if (curwin->w_p_wrap)
 977             return win_nolbr_chartabsize(curwin, s, col, NULL);
 978 #endif
 979         RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
 980 #ifdef FEAT_LINEBREAK
 981     }
 982     return win_lbr_chartabsize(curwin, s, col, NULL);
 983 #endif
 984 }
 985
 986 /*
 987  * Call lbr_chartabsize() and advance the pointer.
 988  */
 989     int
 990 lbr_chartabsize_adv(s, col)
 991     char_u      **s;
 992     colnr_T     col;
 993 {
 994     int         retval;
 995
 996     retval = lbr_chartabsize(*s, col);
 997     mb_ptr_adv(*s);
 998     return retval;
 999 }
1000
1001 /*
1002  * This function is used very often, keep it fast!!!!
1003  *
1004  * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1005  * string at start of line.  Warning: *headp is only set if it's a non-zero
1006  * value, init to 0 before calling.
1007  */
1008 /*ARGSUSED*/
1009     int
1010 win_lbr_chartabsize(wp, s, col, headp)
1011     win_T       *wp;
1012     char_u      *s;
1013     colnr_T     col;
1014     int         *headp;
1015 {
1016 #ifdef FEAT_LINEBREAK
1017     int         c;
1018     int         size;
1019     colnr_T     col2;
1020     colnr_T     colmax;
1021     int         added;
1022 # ifdef FEAT_MBYTE
1023     int         mb_added = 0;
1024 # else
1025 #  define mb_added 0
1026 # endif
1027     int         numberextra;
1028     char_u      *ps;
1029     int         tab_corr = (*s == TAB);
1030     int         n;
1031
1032     /*
1033      * No 'linebreak' and 'showbreak': return quickly.
1034      */
1035     if (!wp->w_p_lbr && *p_sbr == NUL)
1036 #endif
1037     {
1038 #ifdef FEAT_MBYTE
1039         if (wp->w_p_wrap)
1040             return win_nolbr_chartabsize(wp, s, col, headp);
1041 #endif
1042         RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1043     }
1044
1045 #ifdef FEAT_LINEBREAK
1046     /*
1047      * First get normal size, without 'linebreak'
1048      */
1049     size = win_chartabsize(wp, s, col);
1050     c = *s;
1051
1052     /*
1053      * If 'linebreak' set check at a blank before a non-blank if the line
1054      * needs a break here
1055      */
1056     if (wp->w_p_lbr
1057             && vim_isbreak(c)
1058             && !vim_isbreak(s[1])
1059             && !wp->w_p_list
1060             && wp->w_p_wrap
1061 # ifdef FEAT_VERTSPLIT
1062             && wp->w_width != 0
1063 # endif
1064        )
1065     {
1066         /*
1067          * Count all characters from first non-blank after a blank up to next
1068          * non-blank after a blank.
1069          */
1070         numberextra = win_col_off(wp);
1071         col2 = col;
1072         colmax = W_WIDTH(wp) - numberextra;
1073         if (col >= colmax)
1074         {
1075             n = colmax + win_col_off2(wp);
1076             if (n > 0)
1077                 colmax += (((col - colmax) / n) + 1) * n;
1078         }
1079
1080         for (;;)
1081         {
1082             ps = s;
1083             mb_ptr_adv(s);
1084             c = *s;
1085             if (!(c != NUL
1086                     && (vim_isbreak(c)
1087                         || (!vim_isbreak(c)
1088                             && (col2 == col || !vim_isbreak(*ps))))))
1089                 break;
1090
1091             col2 += win_chartabsize(wp, s, col2);
1092             if (col2 >= colmax)         /* doesn't fit */
1093             {
1094                 size = colmax - col;
1095                 tab_corr = FALSE;
1096                 break;
1097             }
1098         }
1099     }
1100 # ifdef FEAT_MBYTE
1101     else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1102                                     && wp->w_p_wrap && in_win_border(wp, col))
1103     {
1104         ++size;         /* Count the ">" in the last column. */
1105         mb_added = 1;
1106     }
1107 # endif
1108
1109     /*
1110      * May have to add something for 'showbreak' string at start of line
1111      * Set *headp to the size of what we add.
1112      */
1113     added = 0;
1114     if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1115     {
1116         numberextra = win_col_off(wp);
1117         col += numberextra + mb_added;
1118         if (col >= (colnr_T)W_WIDTH(wp))
1119         {
1120             col -= W_WIDTH(wp);
1121             numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1122             if (numberextra > 0)
1123                 col = col % numberextra;
1124         }
1125         if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1126         {
1127             added = vim_strsize(p_sbr);
1128             if (tab_corr)
1129                 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1130             else
1131                 size += added;
1132             if (col != 0)
1133                 added = 0;
1134         }
1135     }
1136     if (headp != NULL)
1137         *headp = added + mb_added;
1138     return size;
1139 #endif
1140 }
1141
1142 #if defined(FEAT_MBYTE) || defined(PROTO)
1143 /*
1144  * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1145  * 'wrap' is on.  This means we need to check for a double-byte character that
1146  * doesn't fit at the end of the screen line.
1147  */
1148     static int
1149 win_nolbr_chartabsize(wp, s, col, headp)
1150     win_T       *wp;
1151     char_u      *s;
1152     colnr_T     col;
1153     int         *headp;
1154 {
1155     int         n;
1156
1157     if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1158     {
1159         n = wp->w_buffer->b_p_ts;
1160         return (int)(n - (col % n));
1161     }
1162     n = ptr2cells(s);
1163     /* Add one cell for a double-width character in the last column of the
1164      * window, displayed with a ">". */
1165     if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1166     {
1167         if (headp != NULL)
1168             *headp = 1;
1169         return 3;
1170     }
1171     return n;
1172 }
1173
1174 /*
1175  * Return TRUE if virtual column "vcol" is in the rightmost column of window
1176  * "wp".
1177  */
1178     int
1179 in_win_border(wp, vcol)
1180     win_T       *wp;
1181     colnr_T     vcol;
1182 {
1183     colnr_T     width1;         /* width of first line (after line number) */
1184     colnr_T     width2;         /* width of further lines */
1185
1186 #ifdef FEAT_VERTSPLIT
1187     if (wp->w_width == 0)       /* there is no border */
1188         return FALSE;
1189 #endif
1190     width1 = W_WIDTH(wp) - win_col_off(wp);
1191     if (vcol < width1 - 1)
1192         return FALSE;
1193     if (vcol == width1 - 1)
1194         return TRUE;
1195     width2 = width1 + win_col_off2(wp);
1196     return ((vcol - width1) % width2 == width2 - 1);
1197 }
1198 #endif /* FEAT_MBYTE */
1199
1200 /*
1201  * Get virtual column number of pos.
1202  *  start: on the first position of this character (TAB, ctrl)
1203  * cursor: where the cursor is on this character (first char, except for TAB)
1204  *    end: on the last position of this character (TAB, ctrl)
1205  *
1206  * This is used very often, keep it fast!
1207  */
1208     void
1209 getvcol(wp, pos, start, cursor, end)
1210     win_T       *wp;
1211     pos_T       *pos;
1212     colnr_T     *start;
1213     colnr_T     *cursor;
1214     colnr_T     *end;
1215 {
1216     colnr_T     vcol;
1217     char_u      *ptr;           /* points to current char */
1218     char_u      *posptr;        /* points to char at pos->col */
1219     int         incr;
1220     int         head;
1221     int         ts = wp->w_buffer->b_p_ts;
1222     int         c;
1223
1224     vcol = 0;
1225     ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1226     posptr = ptr + pos->col;
1227
1228     /*
1229      * This function is used very often, do some speed optimizations.
1230      * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1231      * Also use this when 'list' is set but tabs take their normal size.
1232      */
1233     if ((!wp->w_p_list || lcs_tab1 != NUL)
1234 #ifdef FEAT_LINEBREAK
1235             && !wp->w_p_lbr && *p_sbr == NUL
1236 #endif
1237        )
1238     {
1239 #ifndef FEAT_MBYTE
1240         head = 0;
1241 #endif
1242         for (;;)
1243         {
1244 #ifdef FEAT_MBYTE
1245             head = 0;
1246 #endif
1247             c = *ptr;
1248             /* make sure we don't go past the end of the line */
1249             if (c == NUL)
1250             {
1251                 incr = 1;       /* NUL at end of line only takes one column */
1252                 break;
1253             }
1254             /* A tab gets expanded, depending on the current column */
1255             if (c == TAB)
1256                 incr = ts - (vcol % ts);
1257             else
1258             {
1259 #ifdef FEAT_MBYTE
1260                 if (has_mbyte)
1261                 {
1262                     /* For utf-8, if the byte is >= 0x80, need to look at
1263                      * further bytes to find the cell width. */
1264                     if (enc_utf8 && c >= 0x80)
1265                         incr = utf_ptr2cells(ptr);
1266                     else
1267                         incr = CHARSIZE(c);
1268
1269                     /* If a double-cell char doesn't fit at the end of a line
1270                      * it wraps to the next line, it's like this char is three
1271                      * cells wide. */
1272                     if (incr == 2 && wp->w_p_wrap && in_win_border(wp, vcol))
1273                     {
1274                         ++incr;
1275                         head = 1;
1276                     }
1277                 }
1278                 else
1279 #endif
1280                     incr = CHARSIZE(c);
1281             }
1282
1283             if (ptr >= posptr)  /* character at pos->col */
1284                 break;
1285
1286             vcol += incr;
1287             mb_ptr_adv(ptr);
1288         }
1289     }
1290     else
1291     {
1292         for (;;)
1293         {
1294             /* A tab gets expanded, depending on the current column */
1295             head = 0;
1296             incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1297             /* make sure we don't go past the end of the line */
1298             if (*ptr == NUL)
1299             {
1300                 incr = 1;       /* NUL at end of line only takes one column */
1301                 break;
1302             }
1303
1304             if (ptr >= posptr)  /* character at pos->col */
1305                 break;
1306
1307             vcol += incr;
1308             mb_ptr_adv(ptr);
1309         }
1310     }
1311     if (start != NULL)
1312         *start = vcol + head;
1313     if (end != NULL)
1314         *end = vcol + incr - 1;
1315     if (cursor != NULL)
1316     {
1317         if (*ptr == TAB
1318                 && (State & NORMAL)
1319                 && !wp->w_p_list
1320                 && !virtual_active()
1321 #ifdef FEAT_VISUAL
1322                 && !(VIsual_active
1323                                    && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1324 #endif
1325                 )
1326             *cursor = vcol + incr - 1;      /* cursor at end */
1327         else
1328             *cursor = vcol + head;          /* cursor at start */
1329     }
1330 }
1331
1332 /*
1333  * Get virtual cursor column in the current window, pretending 'list' is off.
1334  */
1335     colnr_T
1336 getvcol_nolist(posp)
1337     pos_T       *posp;
1338 {
1339     int         list_save = curwin->w_p_list;
1340     colnr_T     vcol;
1341
1342     curwin->w_p_list = FALSE;
1343     getvcol(curwin, posp, NULL, &vcol, NULL);
1344     curwin->w_p_list = list_save;
1345     return vcol;
1346 }
1347
1348 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1349 /*
1350  * Get virtual column in virtual mode.
1351  */
1352     void
1353 getvvcol(wp, pos, start, cursor, end)
1354     win_T       *wp;
1355     pos_T       *pos;
1356     colnr_T     *start;
1357     colnr_T     *cursor;
1358     colnr_T     *end;
1359 {
1360     colnr_T     col;
1361     colnr_T     coladd;
1362     colnr_T     endadd;
1363 # ifdef FEAT_MBYTE
1364     char_u      *ptr;
1365 # endif
1366
1367     if (virtual_active())
1368     {
1369         /* For virtual mode, only want one value */
1370         getvcol(wp, pos, &col, NULL, NULL);
1371
1372         coladd = pos->coladd;
1373         endadd = 0;
1374 # ifdef FEAT_MBYTE
1375         /* Cannot put the cursor on part of a wide character. */
1376         ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1377         if (pos->col < STRLEN(ptr))
1378         {
1379             int c = (*mb_ptr2char)(ptr + pos->col);
1380
1381             if (c != TAB && vim_isprintc(c))
1382             {
1383                 endadd = char2cells(c) - 1;
1384                 if (coladd > endadd)    /* past end of line */
1385                     endadd = 0;
1386                 else
1387                     coladd = 0;
1388             }
1389         }
1390 # endif
1391         col += coladd;
1392         if (start != NULL)
1393             *start = col;
1394         if (cursor != NULL)
1395             *cursor = col;
1396         if (end != NULL)
1397             *end = col + endadd;
1398     }
1399     else
1400         getvcol(wp, pos, start, cursor, end);
1401 }
1402 #endif
1403
1404 #if defined(FEAT_VISUAL) || defined(PROTO)
1405 /*
1406  * Get the leftmost and rightmost virtual column of pos1 and pos2.
1407  * Used for Visual block mode.
1408  */
1409     void
1410 getvcols(wp, pos1, pos2, left, right)
1411     win_T       *wp;
1412     pos_T       *pos1, *pos2;
1413     colnr_T     *left, *right;
1414 {
1415     colnr_T     from1, from2, to1, to2;
1416
1417     if (ltp(pos1, pos2))
1418     {
1419         getvvcol(wp, pos1, &from1, NULL, &to1);
1420         getvvcol(wp, pos2, &from2, NULL, &to2);
1421     }
1422     else
1423     {
1424         getvvcol(wp, pos2, &from1, NULL, &to1);
1425         getvvcol(wp, pos1, &from2, NULL, &to2);
1426     }
1427     if (from2 < from1)
1428         *left = from2;
1429     else
1430         *left = from1;
1431     if (to2 > to1)
1432     {
1433         if (*p_sel == 'e' && from2 - 1 >= to1)
1434             *right = from2 - 1;
1435         else
1436             *right = to2;
1437     }
1438     else
1439         *right = to1;
1440 }
1441 #endif
1442
1443 /*
1444  * skipwhite: skip over ' ' and '\t'.
1445  */
1446     char_u *
1447 skipwhite(p)
1448     char_u      *p;
1449 {
1450     while (vim_iswhite(*p)) /* skip to next non-white */
1451         ++p;
1452     return p;
1453 }
1454
1455 /*
1456  * skip over digits
1457  */
1458     char_u *
1459 skipdigits(p)
1460     char_u      *p;
1461 {
1462     while (VIM_ISDIGIT(*p))     /* skip to next non-digit */
1463         ++p;
1464     return p;
1465 }
1466
1467 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
1468 /*
1469  * skip over digits and hex characters
1470  */
1471     char_u *
1472 skiphex(p)
1473     char_u      *p;
1474 {
1475     while (vim_isxdigit(*p))    /* skip to next non-digit */
1476         ++p;
1477     return p;
1478 }
1479 #endif
1480
1481 #if defined(FEAT_EX_EXTRA) || defined(PROTO)
1482 /*
1483  * skip to digit (or NUL after the string)
1484  */
1485     char_u *
1486 skiptodigit(p)
1487     char_u      *p;
1488 {
1489     while (*p != NUL && !VIM_ISDIGIT(*p))       /* skip to next digit */
1490         ++p;
1491     return p;
1492 }
1493
1494 /*
1495  * skip to hex character (or NUL after the string)
1496  */
1497     char_u *
1498 skiptohex(p)
1499     char_u      *p;
1500 {
1501     while (*p != NUL && !vim_isxdigit(*p))      /* skip to next digit */
1502         ++p;
1503     return p;
1504 }
1505 #endif
1506
1507 /*
1508  * Variant of isdigit() that can handle characters > 0x100.
1509  * We don't use isdigit() here, because on some systems it also considers
1510  * superscript 1 to be a digit.
1511  * Use the VIM_ISDIGIT() macro for simple arguments.
1512  */
1513     int
1514 vim_isdigit(c)
1515     int         c;
1516 {
1517     return (c >= '0' && c <= '9');
1518 }
1519
1520 /*
1521  * Variant of isxdigit() that can handle characters > 0x100.
1522  * We don't use isxdigit() here, because on some systems it also considers
1523  * superscript 1 to be a digit.
1524  */
1525     int
1526 vim_isxdigit(c)
1527     int         c;
1528 {
1529     return (c >= '0' && c <= '9')
1530         || (c >= 'a' && c <= 'f')
1531         || (c >= 'A' && c <= 'F');
1532 }
1533
1534 #if defined(FEAT_MBYTE) || defined(PROTO)
1535 /*
1536  * Vim's own character class functions.  These exist because many library
1537  * islower()/toupper() etc. do not work properly: they crash when used with
1538  * invalid values or can't handle latin1 when the locale is C.
1539  * Speed is most important here.
1540  */
1541 #define LATIN1LOWER 'l'
1542 #define LATIN1UPPER 'U'
1543
1544 /*                                                                 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~                                  ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
1545 static char_u latin1flags[257] = "                                                                 UUUUUUUUUUUUUUUUUUUUUUUUUU      llllllllllllllllllllllllll                                                                     UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1546 static char_u latin1upper[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1547 static char_u latin1lower[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
1548
1549     int
1550 vim_islower(c)
1551     int     c;
1552 {
1553     if (c <= '@')
1554         return FALSE;
1555     if (c >= 0x80)
1556     {
1557         if (enc_utf8)
1558             return utf_islower(c);
1559         if (c >= 0x100)
1560         {
1561 #ifdef HAVE_ISWLOWER
1562             if (has_mbyte)
1563                 return iswlower(c);
1564 #endif
1565             /* islower() can't handle these chars and may crash */
1566             return FALSE;
1567         }
1568         if (enc_latin1like)
1569             return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1570     }
1571     return islower(c);
1572 }
1573
1574     int
1575 vim_isupper(c)
1576     int     c;
1577 {
1578     if (c <= '@')
1579         return FALSE;
1580     if (c >= 0x80)
1581     {
1582         if (enc_utf8)
1583             return utf_isupper(c);
1584         if (c >= 0x100)
1585         {
1586 #ifdef HAVE_ISWUPPER
1587             if (has_mbyte)
1588                 return iswupper(c);
1589 #endif
1590             /* islower() can't handle these chars and may crash */
1591             return FALSE;
1592         }
1593         if (enc_latin1like)
1594             return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1595     }
1596     return isupper(c);
1597 }
1598
1599     int
1600 vim_toupper(c)
1601     int     c;
1602 {
1603     if (c <= '@')
1604         return c;
1605     if (c >= 0x80)
1606     {
1607         if (enc_utf8)
1608             return utf_toupper(c);
1609         if (c >= 0x100)
1610         {
1611 #ifdef HAVE_TOWUPPER
1612             if (has_mbyte)
1613                 return towupper(c);
1614 #endif
1615             /* toupper() can't handle these chars and may crash */
1616             return c;
1617         }
1618         if (enc_latin1like)
1619             return latin1upper[c];
1620     }
1621     return TOUPPER_LOC(c);
1622 }
1623
1624     int
1625 vim_tolower(c)
1626     int     c;
1627 {
1628     if (c <= '@')
1629         return c;
1630     if (c >= 0x80)
1631     {
1632         if (enc_utf8)
1633             return utf_tolower(c);
1634         if (c >= 0x100)
1635         {
1636 #ifdef HAVE_TOWLOWER
1637             if (has_mbyte)
1638                 return towlower(c);
1639 #endif
1640             /* tolower() can't handle these chars and may crash */
1641             return c;
1642         }
1643         if (enc_latin1like)
1644             return latin1lower[c];
1645     }
1646     return TOLOWER_LOC(c);
1647 }
1648 #endif
1649
1650 /*
1651  * skiptowhite: skip over text until ' ' or '\t' or NUL.
1652  */
1653     char_u *
1654 skiptowhite(p)
1655     char_u      *p;
1656 {
1657     while (*p != ' ' && *p != '\t' && *p != NUL)
1658         ++p;
1659     return p;
1660 }
1661
1662 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1663         || defined(PROTO)
1664 /*
1665  * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1666  */
1667     char_u *
1668 skiptowhite_esc(p)
1669     char_u      *p;
1670 {
1671     while (*p != ' ' && *p != '\t' && *p != NUL)
1672     {
1673         if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1674             ++p;
1675         ++p;
1676     }
1677     return p;
1678 }
1679 #endif
1680
1681 /*
1682  * Getdigits: Get a number from a string and skip over it.
1683  * Note: the argument is a pointer to a char_u pointer!
1684  */
1685     long
1686 getdigits(pp)
1687     char_u **pp;
1688 {
1689     char_u      *p;
1690     long        retval;
1691
1692     p = *pp;
1693     retval = atol((char *)p);
1694     if (*p == '-')              /* skip negative sign */
1695         ++p;
1696     p = skipdigits(p);          /* skip to next non-digit */
1697     *pp = p;
1698     return retval;
1699 }
1700
1701 /*
1702  * Return TRUE if "lbuf" is empty or only contains blanks.
1703  */
1704     int
1705 vim_isblankline(lbuf)
1706     char_u      *lbuf;
1707 {
1708     char_u      *p;
1709
1710     p = skipwhite(lbuf);
1711     return (*p == NUL || *p == '\r' || *p == '\n');
1712 }
1713
1714 /*
1715  * Convert a string into a long and/or unsigned long, taking care of
1716  * hexadecimal and octal numbers.  Accepts a '-' sign.
1717  * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1718  *  0       decimal
1719  *  '0'     octal
1720  *  'X'     hex
1721  *  'x'     hex
1722  * If "len" is not NULL, the length of the number in characters is returned.
1723  * If "nptr" is not NULL, the signed result is returned in it.
1724  * If "unptr" is not NULL, the unsigned result is returned in it.
1725  * If "unptr" is not NULL, the unsigned result is returned in it.
1726  * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1727  * octal number.
1728  * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
1729  * hex number.
1730  */
1731     void
1732 vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1733     char_u              *start;
1734     int                 *hexp;      /* return: type of number 0 = decimal, 'x'
1735                                        or 'X' is hex, '0' = octal */
1736     int                 *len;       /* return: detected length of number */
1737     int                 dooct;      /* recognize octal number */
1738     int                 dohex;      /* recognize hex number */
1739     long                *nptr;      /* return: signed result */
1740     unsigned long       *unptr;     /* return: unsigned result */
1741 {
1742     char_u          *ptr = start;
1743     int             hex = 0;            /* default is decimal */
1744     int             negative = FALSE;
1745     unsigned long   un = 0;
1746     int             n;
1747
1748     if (ptr[0] == '-')
1749     {
1750         negative = TRUE;
1751         ++ptr;
1752     }
1753
1754     /* Recognize hex and octal. */
1755     if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
1756     {
1757         hex = ptr[1];
1758         if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1759             ptr += 2;                   /* hexadecimal */
1760         else
1761         {
1762             hex = 0;                    /* default is decimal */
1763             if (dooct)
1764             {
1765                 /* Don't interpret "0", "08" or "0129" as octal. */
1766                 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1767                 {
1768                     if (ptr[n] > '7')
1769                     {
1770                         hex = 0;        /* can't be octal */
1771                         break;
1772                     }
1773                     if (ptr[n] > '0')
1774                         hex = '0';      /* assume octal */
1775                 }
1776             }
1777         }
1778     }
1779
1780     /*
1781      * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1782      */
1783     if (hex == '0' || dooct > 1)
1784     {
1785         /* octal */
1786         while ('0' <= *ptr && *ptr <= '7')
1787         {
1788             un = 8 * un + (unsigned long)(*ptr - '0');
1789             ++ptr;
1790         }
1791     }
1792     else if (hex != 0 || dohex > 1)
1793     {
1794         /* hex */
1795         while (vim_isxdigit(*ptr))
1796         {
1797             un = 16 * un + (unsigned long)hex2nr(*ptr);
1798             ++ptr;
1799         }
1800     }
1801     else
1802     {
1803         /* decimal */
1804         while (VIM_ISDIGIT(*ptr))
1805         {
1806             un = 10 * un + (unsigned long)(*ptr - '0');
1807             ++ptr;
1808         }
1809     }
1810
1811     if (hexp != NULL)
1812         *hexp = hex;
1813     if (len != NULL)
1814         *len = (int)(ptr - start);
1815     if (nptr != NULL)
1816     {
1817         if (negative)   /* account for leading '-' for decimal numbers */
1818             *nptr = -(long)un;
1819         else
1820             *nptr = (long)un;
1821     }
1822     if (unptr != NULL)
1823         *unptr = un;
1824 }
1825
1826 /*
1827  * Return the value of a single hex character.
1828  * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1829  */
1830     int
1831 hex2nr(c)
1832     int         c;
1833 {
1834     if (c >= 'a' && c <= 'f')
1835         return c - 'a' + 10;
1836     if (c >= 'A' && c <= 'F')
1837         return c - 'A' + 10;
1838     return c - '0';
1839 }
1840
1841 #if defined(FEAT_TERMRESPONSE) \
1842         || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1843 /*
1844  * Convert two hex characters to a byte.
1845  * Return -1 if one of the characters is not hex.
1846  */
1847     int
1848 hexhex2nr(p)
1849     char_u      *p;
1850 {
1851     if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1852         return -1;
1853     return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1854 }
1855 #endif
1856
1857 /*
1858  * Return TRUE if "str" starts with a backslash that should be removed.
1859  * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1860  * backslash is not a normal file name character.
1861  * '$' is a valid file name character, we don't remove the backslash before
1862  * it.  This means it is not possible to use an environment variable after a
1863  * backslash.  "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1864  * Although "\ name" is valid, the backslash in "Program\ files" must be
1865  * removed.  Assume a file name doesn't start with a space.
1866  * For multi-byte names, never remove a backslash before a non-ascii
1867  * character, assume that all multi-byte characters are valid file name
1868  * characters.
1869  */
1870     int
1871 rem_backslash(str)
1872     char_u  *str;
1873 {
1874 #ifdef BACKSLASH_IN_FILENAME
1875     return (str[0] == '\\'
1876 # ifdef FEAT_MBYTE
1877             && str[1] < 0x80
1878 # endif
1879             && (str[1] == ' '
1880                 || (str[1] != NUL
1881                     && str[1] != '*'
1882                     && str[1] != '?'
1883                     && !vim_isfilec(str[1]))));
1884 #else
1885     return (str[0] == '\\' && str[1] != NUL);
1886 #endif
1887 }
1888
1889 /*
1890  * Halve the number of backslashes in a file name argument.
1891  * For MS-DOS we only do this if the character after the backslash
1892  * is not a normal file character.
1893  */
1894     void
1895 backslash_halve(p)
1896     char_u      *p;
1897 {
1898     for ( ; *p; ++p)
1899         if (rem_backslash(p))
1900             STRCPY(p, p + 1);
1901 }
1902
1903 /*
1904  * backslash_halve() plus save the result in allocated memory.
1905  */
1906     char_u *
1907 backslash_halve_save(p)
1908     char_u      *p;
1909 {
1910     char_u      *res;
1911
1912     res = vim_strsave(p);
1913     if (res == NULL)
1914         return p;
1915     backslash_halve(res);
1916     return res;
1917 }
1918
1919 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1920 /*
1921  * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1922  * The first 64 entries have been added to map control characters defined in
1923  * ascii.h
1924  */
1925 static char_u ebcdic2ascii_tab[256] =
1926 {
1927     0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1928     0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1929     0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1930     0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1931     0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1932     0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1933     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1934     0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1935     0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1936     0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1937     0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1938     0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1939     0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1940     0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1941     0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1942     0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1943     0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1944     0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1945     0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1946     0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1947     0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1948     0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1949     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1950     0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1951     0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1952     0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1953     0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1954     0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1955     0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1956     0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1957     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1958     0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1959 };
1960
1961 /*
1962  * Convert a buffer worth of characters from EBCDIC to ASCII.  Only useful if
1963  * wanting 7-bit ASCII characters out the other end.
1964  */
1965     void
1966 ebcdic2ascii(buffer, len)
1967     char_u      *buffer;
1968     int         len;
1969 {
1970     int         i;
1971
1972     for (i = 0; i < len; i++)
1973         buffer[i] = ebcdic2ascii_tab[buffer[i]];
1974 }
1975 #endif