src/charset.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * VIM - Vi IMproved    by Bram Moolenaar
   4  *
   5  * Do ":help uganda"  in Vim to read copying and usage conditions.
   6  * Do ":help credits" in Vim to see a list of people who contributed.
   7  * See README.txt for an overview of the Vim source code.
   8  */
   9
  10 #include "vim.h"
  11
  12 #ifdef FEAT_LINEBREAK
  13 static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
  14 #endif
  15
  16 #ifdef FEAT_MBYTE
  17 static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
  18 #endif
  19
  20 static unsigned nr2hex __ARGS((unsigned c));
  21
  22 static int    chartab_initialized = FALSE;
  23
  24 /* b_chartab[] is an array of 32 bytes, each bit representing one of the
  25  * characters 0-255. */
  26 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
  27 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
  28 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
  29
  30 /*
  31  * Fill chartab[].  Also fills curbuf->b_chartab[] with flags for keyword
  32  * characters for current buffer.
  33  *
  34  * Depends on the option settings 'iskeyword', 'isident', 'isfname',
  35  * 'isprint' and 'encoding'.
  36  *
  37  * The index in chartab[] depends on 'encoding':
  38  * - For non-multi-byte index with the byte (same as the character).
  39  * - For DBCS index with the first byte.
  40  * - For UTF-8 index with the character (when first byte is up to 0x80 it is
  41  *   the same as the character, if the first byte is 0x80 and above it depends
  42  *   on further bytes).
  43  *
  44  * The contents of chartab[]:
  45  * - The lower two bits, masked by CT_CELL_MASK, give the number of display
  46  *   cells the character occupies (1 or 2).  Not valid for UTF-8 above 0x80.
  47  * - CT_PRINT_CHAR bit is set when the character is printable (no need to
  48  *   translate the character before displaying it).  Note that only DBCS
  49  *   characters can have 2 display cells and still be printable.
  50  * - CT_FNAME_CHAR bit is set when the character can be in a file name.
  51  * - CT_ID_CHAR bit is set when the character can be in an identifier.
  52  *
  53  * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
  54  * error, OK otherwise.
  55  */
  56     int
  57 init_chartab()
  58 {
  59     return buf_init_chartab(curbuf, TRUE);
  60 }
  61
  62     int
  63 buf_init_chartab(buf, global)
  64     buf_T       *buf;
  65     int         global;         /* FALSE: only set buf->b_chartab[] */
  66 {
  67     int         c;
  68     int         c2;
  69     char_u      *p;
  70     int         i;
  71     int         tilde;
  72     int         do_isalpha;
  73
  74     if (global)
  75     {
  76         /*
  77          * Set the default size for printable characters:
  78          * From <Space> to '~' is 1 (printable), others are 2 (not printable).
  79          * This also inits all 'isident' and 'isfname' flags to FALSE.
  80          *
  81          * EBCDIC: all chars below ' ' are not printable, all others are
  82          * printable.
  83          */
  84         c = 0;
  85         while (c < ' ')
  86             chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
  87 #ifdef EBCDIC
  88         while (c < 255)
  89 #else
  90         while (c <= '~')
  91 #endif
  92             chartab[c++] = 1 + CT_PRINT_CHAR;
  93 #ifdef FEAT_FKMAP
  94         if (p_altkeymap)
  95         {
  96             while (c < YE)
  97                 chartab[c++] = 1 + CT_PRINT_CHAR;
  98         }
  99 #endif
 100         while (c < 256)
 101         {
 102 #ifdef FEAT_MBYTE
 103             /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
 104             if (enc_utf8 && c >= 0xa0)
 105                 chartab[c++] = CT_PRINT_CHAR + 1;
 106             /* euc-jp characters starting with 0x8e are single width */
 107             else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
 108                 chartab[c++] = CT_PRINT_CHAR + 1;
 109             /* other double-byte chars can be printable AND double-width */
 110             else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
 111                 chartab[c++] = CT_PRINT_CHAR + 2;
 112             else
 113 #endif
 114                 /* the rest is unprintable by default */
 115                 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
 116         }
 117
 118 #ifdef FEAT_MBYTE
 119         /* Assume that every multi-byte char is a filename character. */
 120         for (c = 1; c < 256; ++c)
 121             if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
 122                     || (enc_dbcs == DBCS_JPNU && c == 0x8e)
 123                     || (enc_utf8 && c >= 0xa0))
 124                 chartab[c] |= CT_FNAME_CHAR;
 125 #endif
 126     }
 127
 128     /*
 129      * Init word char flags all to FALSE
 130      */
 131     vim_memset(buf->b_chartab, 0, (size_t)32);
 132 #ifdef FEAT_MBYTE
 133     if (enc_dbcs != 0)
 134         for (c = 0; c < 256; ++c)
 135         {
 136             /* double-byte characters are probably word characters */
 137             if (MB_BYTE2LEN(c) == 2)
 138                 SET_CHARTAB(buf, c);
 139         }
 140 #endif
 141
 142 #ifdef FEAT_LISP
 143     /*
 144      * In lisp mode the '-' character is included in keywords.
 145      */
 146     if (buf->b_p_lisp)
 147         SET_CHARTAB(buf, '-');
 148 #endif
 149
 150     /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
 151      * options Each option is a list of characters, character numbers or
 152      * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
 153      */
 154     for (i = global ? 0 : 3; i <= 3; ++i)
 155     {
 156         if (i == 0)
 157             p = p_isi;          /* first round: 'isident' */
 158         else if (i == 1)
 159             p = p_isp;          /* second round: 'isprint' */
 160         else if (i == 2)
 161             p = p_isf;          /* third round: 'isfname' */
 162         else    /* i == 3 */
 163             p = buf->b_p_isk;   /* fourth round: 'iskeyword' */
 164
 165         while (*p)
 166         {
 167             tilde = FALSE;
 168             do_isalpha = FALSE;
 169             if (*p == '^' && p[1] != NUL)
 170             {
 171                 tilde = TRUE;
 172                 ++p;
 173             }
 174             if (VIM_ISDIGIT(*p))
 175                 c = getdigits(&p);
 176             else
 177 #ifdef FEAT_MBYTE
 178                  if (has_mbyte)
 179                 c = mb_ptr2char_adv(&p);
 180             else
 181 #endif
 182                 c = *p++;
 183             c2 = -1;
 184             if (*p == '-' && p[1] != NUL)
 185             {
 186                 ++p;
 187                 if (VIM_ISDIGIT(*p))
 188                     c2 = getdigits(&p);
 189                 else
 190 #ifdef FEAT_MBYTE
 191                      if (has_mbyte)
 192                     c2 = mb_ptr2char_adv(&p);
 193                 else
 194 #endif
 195                     c2 = *p++;
 196             }
 197             if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
 198                                                  || !(*p == NUL || *p == ','))
 199                 return FAIL;
 200
 201             if (c2 == -1)       /* not a range */
 202             {
 203                 /*
 204                  * A single '@' (not "@-@"):
 205                  * Decide on letters being ID/printable/keyword chars with
 206                  * standard function isalpha(). This takes care of locale for
 207                  * single-byte characters).
 208                  */
 209                 if (c == '@')
 210                 {
 211                     do_isalpha = TRUE;
 212                     c = 1;
 213                     c2 = 255;
 214                 }
 215                 else
 216                     c2 = c;
 217             }
 218             while (c <= c2)
 219             {
 220                 /* Use the MB_ functions here, because isalpha() doesn't
 221                  * work properly when 'encoding' is "latin1" and the locale is
 222                  * "C".  */
 223                 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
 224 #ifdef FEAT_FKMAP
 225                         || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
 226 #endif
 227                             )
 228                 {
 229                     if (i == 0)                 /* (re)set ID flag */
 230                     {
 231                         if (tilde)
 232                             chartab[c] &= ~CT_ID_CHAR;
 233                         else
 234                             chartab[c] |= CT_ID_CHAR;
 235                     }
 236                     else if (i == 1)            /* (re)set printable */
 237                     {
 238                         if ((c < ' '
 239 #ifndef EBCDIC
 240                                     || c > '~'
 241 #endif
 242 #ifdef FEAT_FKMAP
 243                                     || (p_altkeymap
 244                                         && (F_isalpha(c) || F_isdigit(c)))
 245 #endif
 246                             )
 247 #ifdef FEAT_MBYTE
 248                                 /* For double-byte we keep the cell width, so
 249                                  * that we can detect it from the first byte. */
 250                                 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
 251 #endif
 252                            )
 253                         {
 254                             if (tilde)
 255                             {
 256                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
 257                                              + ((dy_flags & DY_UHEX) ? 4 : 2);
 258                                 chartab[c] &= ~CT_PRINT_CHAR;
 259                             }
 260                             else
 261                             {
 262                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
 263                                 chartab[c] |= CT_PRINT_CHAR;
 264                             }
 265                         }
 266                     }
 267                     else if (i == 2)            /* (re)set fname flag */
 268                     {
 269                         if (tilde)
 270                             chartab[c] &= ~CT_FNAME_CHAR;
 271                         else
 272                             chartab[c] |= CT_FNAME_CHAR;
 273                     }
 274                     else /* i == 3 */           /* (re)set keyword flag */
 275                     {
 276                         if (tilde)
 277                             RESET_CHARTAB(buf, c);
 278                         else
 279                             SET_CHARTAB(buf, c);
 280                     }
 281                 }
 282                 ++c;
 283             }
 284             p = skip_to_option_part(p);
 285         }
 286     }
 287     chartab_initialized = TRUE;
 288     return OK;
 289 }
 290
 291 /*
 292  * Translate any special characters in buf[bufsize] in-place.
 293  * The result is a string with only printable characters, but if there is not
 294  * enough room, not all characters will be translated.
 295  */
 296     void
 297 trans_characters(buf, bufsize)
 298     char_u      *buf;
 299     int         bufsize;
 300 {
 301     int         len;            /* length of string needing translation */
 302     int         room;           /* room in buffer after string */
 303     char_u      *trs;           /* translated character */
 304     int         trs_len;        /* length of trs[] */
 305
 306     len = (int)STRLEN(buf);
 307     room = bufsize - len;
 308     while (*buf != 0)
 309     {
 310 # ifdef FEAT_MBYTE
 311         /* Assume a multi-byte character doesn't need translation. */
 312         if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
 313             len -= trs_len;
 314         else
 315 # endif
 316         {
 317             trs = transchar_byte(*buf);
 318             trs_len = (int)STRLEN(trs);
 319             if (trs_len > 1)
 320             {
 321                 room -= trs_len - 1;
 322                 if (room <= 0)
 323                     return;
 324                 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
 325             }
 326             mch_memmove(buf, trs, (size_t)trs_len);
 327             --len;
 328         }
 329         buf += trs_len;
 330     }
 331 }
 332
 333 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
 334         || defined(PROTO)
 335 /*
 336  * Translate a string into allocated memory, replacing special chars with
 337  * printable chars.  Returns NULL when out of memory.
 338  */
 339     char_u *
 340 transstr(s)
 341     char_u      *s;
 342 {
 343     char_u      *res;
 344     char_u      *p;
 345 #ifdef FEAT_MBYTE
 346     int         l, len, c;
 347     char_u      hexbuf[11];
 348 #endif
 349
 350 #ifdef FEAT_MBYTE
 351     if (has_mbyte)
 352     {
 353         /* Compute the length of the result, taking account of unprintable
 354          * multi-byte characters. */
 355         len = 0;
 356         p = s;
 357         while (*p != NUL)
 358         {
 359             if ((l = (*mb_ptr2len)(p)) > 1)
 360             {
 361                 c = (*mb_ptr2char)(p);
 362                 p += l;
 363                 if (vim_isprintc(c))
 364                     len += l;
 365                 else
 366                 {
 367                     transchar_hex(hexbuf, c);
 368                     len += (int)STRLEN(hexbuf);
 369                 }
 370             }
 371             else
 372             {
 373                 l = byte2cells(*p++);
 374                 if (l > 0)
 375                     len += l;
 376                 else
 377                     len += 4;   /* illegal byte sequence */
 378             }
 379         }
 380         res = alloc((unsigned)(len + 1));
 381     }
 382     else
 383 #endif
 384         res = alloc((unsigned)(vim_strsize(s) + 1));
 385     if (res != NULL)
 386     {
 387         *res = NUL;
 388         p = s;
 389         while (*p != NUL)
 390         {
 391 #ifdef FEAT_MBYTE
 392             if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
 393             {
 394                 c = (*mb_ptr2char)(p);
 395                 if (vim_isprintc(c))
 396                     STRNCAT(res, p, l); /* append printable multi-byte char */
 397                 else
 398                     transchar_hex(res + STRLEN(res), c);
 399                 p += l;
 400             }
 401             else
 402 #endif
 403                 STRCAT(res, transchar_byte(*p++));
 404         }
 405     }
 406     return res;
 407 }
 408 #endif
 409
 410 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
 411 /*
 412  * Convert the string "str[orglen]" to do ignore-case comparing.  Uses the
 413  * current locale.
 414  * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
 415  * Otherwise puts the result in "buf[buflen]".
 416  */
 417     char_u *
 418 str_foldcase(str, orglen, buf, buflen)
 419     char_u      *str;
 420     int         orglen;
 421     char_u      *buf;
 422     int         buflen;
 423 {
 424     garray_T    ga;
 425     int         i;
 426     int         len = orglen;
 427
 428 #define GA_CHAR(i)  ((char_u *)ga.ga_data)[i]
 429 #define GA_PTR(i)   ((char_u *)ga.ga_data + i)
 430 #define STR_CHAR(i)  (buf == NULL ? GA_CHAR(i) : buf[i])
 431 #define STR_PTR(i)   (buf == NULL ? GA_PTR(i) : buf + i)
 432
 433     /* Copy "str" into "buf" or allocated memory, unmodified. */
 434     if (buf == NULL)
 435     {
 436         ga_init2(&ga, 1, 10);
 437         if (ga_grow(&ga, len + 1) == FAIL)
 438             return NULL;
 439         mch_memmove(ga.ga_data, str, (size_t)len);
 440         ga.ga_len = len;
 441     }
 442     else
 443     {
 444         if (len >= buflen)          /* Ugly! */
 445             len = buflen - 1;
 446         mch_memmove(buf, str, (size_t)len);
 447     }
 448     if (buf == NULL)
 449         GA_CHAR(len) = NUL;
 450     else
 451         buf[len] = NUL;
 452
 453     /* Make each character lower case. */
 454     i = 0;
 455     while (STR_CHAR(i) != NUL)
 456     {
 457 #ifdef FEAT_MBYTE
 458         if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
 459         {
 460             if (enc_utf8)
 461             {
 462                 int     c = utf_ptr2char(STR_PTR(i));
 463                 int     ol = utf_ptr2len(STR_PTR(i));
 464                 int     lc = utf_tolower(c);
 465
 466                 /* Only replace the character when it is not an invalid
 467                  * sequence (ASCII character or more than one byte) and
 468                  * utf_tolower() doesn't return the original character. */
 469                 if ((c < 0x80 || ol > 1) && c != lc)
 470                 {
 471                     int     nl = utf_char2len(lc);
 472
 473                     /* If the byte length changes need to shift the following
 474                      * characters forward or backward. */
 475                     if (ol != nl)
 476                     {
 477                         if (nl > ol)
 478                         {
 479                             if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
 480                                                     : len + nl - ol >= buflen)
 481                             {
 482                                 /* out of memory, keep old char */
 483                                 lc = c;
 484                                 nl = ol;
 485                             }
 486                         }
 487                         if (ol != nl)
 488                         {
 489                             if (buf == NULL)
 490                             {
 491                                 STRMOVE(GA_PTR(i) + nl, GA_PTR(i) + ol);
 492                                 ga.ga_len += nl - ol;
 493                             }
 494                             else
 495                             {
 496                                 STRMOVE(buf + i + nl, buf + i + ol);
 497                                 len += nl - ol;
 498                             }
 499                         }
 500                     }
 501                     (void)utf_char2bytes(lc, STR_PTR(i));
 502                 }
 503             }
 504             /* skip to next multi-byte char */
 505             i += (*mb_ptr2len)(STR_PTR(i));
 506         }
 507         else
 508 #endif
 509         {
 510             if (buf == NULL)
 511                 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
 512             else
 513                 buf[i] = TOLOWER_LOC(buf[i]);
 514             ++i;
 515         }
 516     }
 517
 518     if (buf == NULL)
 519         return (char_u *)ga.ga_data;
 520     return buf;
 521 }
 522 #endif
 523
 524 /*
 525  * Catch 22: chartab[] can't be initialized before the options are
 526  * initialized, and initializing options may cause transchar() to be called!
 527  * When chartab_initialized == FALSE don't use chartab[].
 528  * Does NOT work for multi-byte characters, c must be <= 255.
 529  * Also doesn't work for the first byte of a multi-byte, "c" must be a
 530  * character!
 531  */
 532 static char_u   transchar_buf[7];
 533
 534     char_u *
 535 transchar(c)
 536     int         c;
 537 {
 538     int                 i;
 539
 540     i = 0;
 541     if (IS_SPECIAL(c))      /* special key code, display as ~@ char */
 542     {
 543         transchar_buf[0] = '~';
 544         transchar_buf[1] = '@';
 545         i = 2;
 546         c = K_SECOND(c);
 547     }
 548
 549     if ((!chartab_initialized && (
 550 #ifdef EBCDIC
 551                     (c >= 64 && c < 255)
 552 #else
 553                     (c >= ' ' && c <= '~')
 554 #endif
 555 #ifdef FEAT_FKMAP
 556                         || F_ischar(c)
 557 #endif
 558                 )) || (c < 256 && vim_isprintc_strict(c)))
 559     {
 560         /* printable character */
 561         transchar_buf[i] = c;
 562         transchar_buf[i + 1] = NUL;
 563     }
 564     else
 565         transchar_nonprint(transchar_buf + i, c);
 566     return transchar_buf;
 567 }
 568
 569 #if defined(FEAT_MBYTE) || defined(PROTO)
 570 /*
 571  * Like transchar(), but called with a byte instead of a character.  Checks
 572  * for an illegal UTF-8 byte.
 573  */
 574     char_u *
 575 transchar_byte(c)
 576     int         c;
 577 {
 578     if (enc_utf8 && c >= 0x80)
 579     {
 580         transchar_nonprint(transchar_buf, c);
 581         return transchar_buf;
 582     }
 583     return transchar(c);
 584 }
 585 #endif
 586
 587 /*
 588  * Convert non-printable character to two or more printable characters in
 589  * "buf[]".  "buf" needs to be able to hold five bytes.
 590  * Does NOT work for multi-byte characters, c must be <= 255.
 591  */
 592     void
 593 transchar_nonprint(buf, c)
 594     char_u      *buf;
 595     int         c;
 596 {
 597     if (c == NL)
 598         c = NUL;                /* we use newline in place of a NUL */
 599     else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
 600         c = NL;                 /* we use CR in place of  NL in this case */
 601
 602     if (dy_flags & DY_UHEX)             /* 'display' has "uhex" */
 603         transchar_hex(buf, c);
 604
 605 #ifdef EBCDIC
 606     /* For EBCDIC only the characters 0-63 and 255 are not printable */
 607     else if (CtrlChar(c) != 0 || c == DEL)
 608 #else
 609     else if (c <= 0x7f)                         /* 0x00 - 0x1f and 0x7f */
 610 #endif
 611     {
 612         buf[0] = '^';
 613 #ifdef EBCDIC
 614         if (c == DEL)
 615             buf[1] = '?';               /* DEL displayed as ^? */
 616         else
 617             buf[1] = CtrlChar(c);
 618 #else
 619         buf[1] = c ^ 0x40;              /* DEL displayed as ^? */
 620 #endif
 621
 622         buf[2] = NUL;
 623     }
 624 #ifdef FEAT_MBYTE
 625     else if (enc_utf8 && c >= 0x80)
 626     {
 627         transchar_hex(buf, c);
 628     }
 629 #endif
 630 #ifndef EBCDIC
 631     else if (c >= ' ' + 0x80 && c <= '~' + 0x80)    /* 0xa0 - 0xfe */
 632     {
 633         buf[0] = '|';
 634         buf[1] = c - 0x80;
 635         buf[2] = NUL;
 636     }
 637 #else
 638     else if (c < 64)
 639     {
 640         buf[0] = '~';
 641         buf[1] = MetaChar(c);
 642         buf[2] = NUL;
 643     }
 644 #endif
 645     else                                            /* 0x80 - 0x9f and 0xff */
 646     {
 647         /*
 648          * TODO: EBCDIC I don't know what to do with this chars, so I display
 649          * them as '~?' for now
 650          */
 651         buf[0] = '~';
 652 #ifdef EBCDIC
 653         buf[1] = '?';                   /* 0xff displayed as ~? */
 654 #else
 655         buf[1] = (c - 0x80) ^ 0x40;     /* 0xff displayed as ~? */
 656 #endif
 657         buf[2] = NUL;
 658     }
 659 }
 660
 661     void
 662 transchar_hex(buf, c)
 663     char_u      *buf;
 664     int         c;
 665 {
 666     int         i = 0;
 667
 668     buf[0] = '<';
 669 #ifdef FEAT_MBYTE
 670     if (c > 255)
 671     {
 672         buf[++i] = nr2hex((unsigned)c >> 12);
 673         buf[++i] = nr2hex((unsigned)c >> 8);
 674     }
 675 #endif
 676     buf[++i] = nr2hex((unsigned)c >> 4);
 677     buf[++i] = nr2hex((unsigned)c);
 678     buf[++i] = '>';
 679     buf[++i] = NUL;
 680 }
 681
 682 /*
 683  * Convert the lower 4 bits of byte "c" to its hex character.
 684  * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
 685  * function key 1.
 686  */
 687     static unsigned
 688 nr2hex(c)
 689     unsigned    c;
 690 {
 691     if ((c & 0xf) <= 9)
 692         return (c & 0xf) + '0';
 693     return (c & 0xf) - 10 + 'a';
 694 }
 695
 696 /*
 697  * Return number of display cells occupied by byte "b".
 698  * Caller must make sure 0 <= b <= 255.
 699  * For multi-byte mode "b" must be the first byte of a character.
 700  * A TAB is counted as two cells: "^I".
 701  * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
 702  * cells depends on further bytes.
 703  */
 704     int
 705 byte2cells(b)
 706     int         b;
 707 {
 708 #ifdef FEAT_MBYTE
 709     if (enc_utf8 && b >= 0x80)
 710         return 0;
 711 #endif
 712     return (chartab[b] & CT_CELL_MASK);
 713 }
 714
 715 /*
 716  * Return number of display cells occupied by character "c".
 717  * "c" can be a special key (negative number) in which case 3 or 4 is returned.
 718  * A TAB is counted as two cells: "^I" or four: "<09>".
 719  */
 720     int
 721 char2cells(c)
 722     int         c;
 723 {
 724     if (IS_SPECIAL(c))
 725         return char2cells(K_SECOND(c)) + 2;
 726 #ifdef FEAT_MBYTE
 727     if (c >= 0x80)
 728     {
 729         /* UTF-8: above 0x80 need to check the value */
 730         if (enc_utf8)
 731             return utf_char2cells(c);
 732         /* DBCS: double-byte means double-width, except for euc-jp with first
 733          * byte 0x8e */
 734         if (enc_dbcs != 0 && c >= 0x100)
 735         {
 736             if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
 737                 return 1;
 738             return 2;
 739         }
 740     }
 741 #endif
 742     return (chartab[c & 0xff] & CT_CELL_MASK);
 743 }
 744
 745 /*
 746  * Return number of display cells occupied by character at "*p".
 747  * A TAB is counted as two cells: "^I" or four: "<09>".
 748  */
 749     int
 750 ptr2cells(p)
 751     char_u      *p;
 752 {
 753 #ifdef FEAT_MBYTE
 754     /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
 755     if (enc_utf8 && *p >= 0x80)
 756         return utf_ptr2cells(p);
 757     /* For DBCS we can tell the cell count from the first byte. */
 758 #endif
 759     return (chartab[*p] & CT_CELL_MASK);
 760 }
 761
 762 /*
 763  * Return the number of characters string "s" will take on the screen,
 764  * counting TABs as two characters: "^I".
 765  */
 766     int
 767 vim_strsize(s)
 768     char_u      *s;
 769 {
 770     return vim_strnsize(s, (int)MAXCOL);
 771 }
 772
 773 /*
 774  * Return the number of characters string "s[len]" will take on the screen,
 775  * counting TABs as two characters: "^I".
 776  */
 777     int
 778 vim_strnsize(s, len)
 779     char_u      *s;
 780     int         len;
 781 {
 782     int         size = 0;
 783
 784     while (*s != NUL && --len >= 0)
 785     {
 786 #ifdef FEAT_MBYTE
 787         if (has_mbyte)
 788         {
 789             int     l = (*mb_ptr2len)(s);
 790
 791             size += ptr2cells(s);
 792             s += l;
 793             len -= l - 1;
 794         }
 795         else
 796 #endif
 797             size += byte2cells(*s++);
 798     }
 799     return size;
 800 }
 801
 802 /*
 803  * Return the number of characters 'c' will take on the screen, taking
 804  * into account the size of a tab.
 805  * Use a define to make it fast, this is used very often!!!
 806  * Also see getvcol() below.
 807  */
 808
 809 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
 810     if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
 811     { \
 812         int ts; \
 813         ts = (buf)->b_p_ts; \
 814         return (int)(ts - (col % ts)); \
 815     } \
 816     else \
 817         return ptr2cells(p);
 818
 819 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
 820         || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
 821     int
 822 chartabsize(p, col)
 823     char_u      *p;
 824     colnr_T     col;
 825 {
 826     RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
 827 }
 828 #endif
 829
 830 #ifdef FEAT_LINEBREAK
 831     static int
 832 win_chartabsize(wp, p, col)
 833     win_T       *wp;
 834     char_u      *p;
 835     colnr_T     col;
 836 {
 837     RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
 838 }
 839 #endif
 840
 841 /*
 842  * return the number of characters the string 's' will take on the screen,
 843  * taking into account the size of a tab
 844  */
 845     int
 846 linetabsize(s)
 847     char_u      *s;
 848 {
 849     colnr_T     col = 0;
 850
 851     while (*s != NUL)
 852         col += lbr_chartabsize_adv(&s, col);
 853     return (int)col;
 854 }
 855
 856 /*
 857  * Like linetabsize(), but for a given window instead of the current one.
 858  */
 859     int
 860 win_linetabsize(wp, p, len)
 861     win_T       *wp;
 862     char_u      *p;
 863     colnr_T     len;
 864 {
 865     colnr_T     col = 0;
 866     char_u      *s;
 867
 868     for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
 869         col += win_lbr_chartabsize(wp, s, col, NULL);
 870     return (int)col;
 871 }
 872
 873 /*
 874  * Return TRUE if 'c' is a normal identifier character:
 875  * Letters and characters from the 'isident' option.
 876  */
 877     int
 878 vim_isIDc(c)
 879     int c;
 880 {
 881     return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
 882 }
 883
 884 /*
 885  * return TRUE if 'c' is a keyword character: Letters and characters from
 886  * 'iskeyword' option for current buffer.
 887  * For multi-byte characters mb_get_class() is used (builtin rules).
 888  */
 889     int
 890 vim_iswordc(c)
 891     int c;
 892 {
 893 #ifdef FEAT_MBYTE
 894     if (c >= 0x100)
 895     {
 896         if (enc_dbcs != 0)
 897             return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
 898         if (enc_utf8)
 899             return utf_class(c) >= 2;
 900     }
 901 #endif
 902     return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
 903 }
 904
 905 /*
 906  * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
 907  */
 908     int
 909 vim_iswordp(p)
 910     char_u *p;
 911 {
 912 #ifdef FEAT_MBYTE
 913     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 914         return mb_get_class(p) >= 2;
 915 #endif
 916     return GET_CHARTAB(curbuf, *p) != 0;
 917 }
 918
 919 #if defined(FEAT_SYN_HL) || defined(PROTO)
 920     int
 921 vim_iswordc_buf(p, buf)
 922     char_u      *p;
 923     buf_T       *buf;
 924 {
 925 # ifdef FEAT_MBYTE
 926     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 927         return mb_get_class(p) >= 2;
 928 # endif
 929     return (GET_CHARTAB(buf, *p) != 0);
 930 }
 931 #endif
 932
 933 /*
 934  * return TRUE if 'c' is a valid file-name character
 935  * Assume characters above 0x100 are valid (multi-byte).
 936  */
 937     int
 938 vim_isfilec(c)
 939     int c;
 940 {
 941     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
 942 }
 943
 944 /*
 945  * return TRUE if 'c' is a valid file-name character or a wildcard character
 946  * Assume characters above 0x100 are valid (multi-byte).
 947  * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
 948  * returns false.
 949  */
 950     int
 951 vim_isfilec_or_wc(c)
 952     int c;
 953 {
 954     char_u buf[2];
 955
 956     buf[0] = (char_u)c;
 957     buf[1] = NUL;
 958     return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
 959 }
 960
 961 /*
 962  * return TRUE if 'c' is a printable character
 963  * Assume characters above 0x100 are printable (multi-byte), except for
 964  * Unicode.
 965  */
 966     int
 967 vim_isprintc(c)
 968     int c;
 969 {
 970 #ifdef FEAT_MBYTE
 971     if (enc_utf8 && c >= 0x100)
 972         return utf_printable(c);
 973 #endif
 974     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 975 }
 976
 977 /*
 978  * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
 979  * byte of a double-byte character.
 980  */
 981     int
 982 vim_isprintc_strict(c)
 983     int c;
 984 {
 985 #ifdef FEAT_MBYTE
 986     if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
 987         return FALSE;
 988     if (enc_utf8 && c >= 0x100)
 989         return utf_printable(c);
 990 #endif
 991     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 992 }
 993
 994 /*
 995  * like chartabsize(), but also check for line breaks on the screen
 996  */
 997     int
 998 lbr_chartabsize(s, col)
 999     unsigned char       *s;
1000     colnr_T             col;
1001 {
1002 #ifdef FEAT_LINEBREAK
1003     if (!curwin->w_p_lbr && *p_sbr == NUL)
1004     {
1005 #endif
1006 #ifdef FEAT_MBYTE
1007         if (curwin->w_p_wrap)
1008             return win_nolbr_chartabsize(curwin, s, col, NULL);
1009 #endif
1010         RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1011 #ifdef FEAT_LINEBREAK
1012     }
1013     return win_lbr_chartabsize(curwin, s, col, NULL);
1014 #endif
1015 }
1016
1017 /*
1018  * Call lbr_chartabsize() and advance the pointer.
1019  */
1020     int
1021 lbr_chartabsize_adv(s, col)
1022     char_u      **s;
1023     colnr_T     col;
1024 {
1025     int         retval;
1026
1027     retval = lbr_chartabsize(*s, col);
1028     mb_ptr_adv(*s);
1029     return retval;
1030 }
1031
1032 /*
1033  * This function is used very often, keep it fast!!!!
1034  *
1035  * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1036  * string at start of line.  Warning: *headp is only set if it's a non-zero
1037  * value, init to 0 before calling.
1038  */
1039     int
1040 win_lbr_chartabsize(wp, s, col, headp)
1041     win_T       *wp;
1042     char_u      *s;
1043     colnr_T     col;
1044     int         *headp UNUSED;
1045 {
1046 #ifdef FEAT_LINEBREAK
1047     int         c;
1048     int         size;
1049     colnr_T     col2;
1050     colnr_T     colmax;
1051     int         added;
1052 # ifdef FEAT_MBYTE
1053     int         mb_added = 0;
1054 # else
1055 #  define mb_added 0
1056 # endif
1057     int         numberextra;
1058     char_u      *ps;
1059     int         tab_corr = (*s == TAB);
1060     int         n;
1061
1062     /*
1063      * No 'linebreak' and 'showbreak': return quickly.
1064      */
1065     if (!wp->w_p_lbr && *p_sbr == NUL)
1066 #endif
1067     {
1068 #ifdef FEAT_MBYTE
1069         if (wp->w_p_wrap)
1070             return win_nolbr_chartabsize(wp, s, col, headp);
1071 #endif
1072         RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1073     }
1074
1075 #ifdef FEAT_LINEBREAK
1076     /*
1077      * First get normal size, without 'linebreak'
1078      */
1079     size = win_chartabsize(wp, s, col);
1080     c = *s;
1081
1082     /*
1083      * If 'linebreak' set check at a blank before a non-blank if the line
1084      * needs a break here
1085      */
1086     if (wp->w_p_lbr
1087             && vim_isbreak(c)
1088             && !vim_isbreak(s[1])
1089             && !wp->w_p_list
1090             && wp->w_p_wrap
1091 # ifdef FEAT_VERTSPLIT
1092             && wp->w_width != 0
1093 # endif
1094        )
1095     {
1096         /*
1097          * Count all characters from first non-blank after a blank up to next
1098          * non-blank after a blank.
1099          */
1100         numberextra = win_col_off(wp);
1101         col2 = col;
1102         colmax = (colnr_T)(W_WIDTH(wp) - numberextra);
1103         if (col >= colmax)
1104         {
1105             n = colmax + win_col_off2(wp);
1106             if (n > 0)
1107                 colmax += (((col - colmax) / n) + 1) * n;
1108         }
1109
1110         for (;;)
1111         {
1112             ps = s;
1113             mb_ptr_adv(s);
1114             c = *s;
1115             if (!(c != NUL
1116                     && (vim_isbreak(c)
1117                         || (!vim_isbreak(c)
1118                             && (col2 == col || !vim_isbreak(*ps))))))
1119                 break;
1120
1121             col2 += win_chartabsize(wp, s, col2);
1122             if (col2 >= colmax)         /* doesn't fit */
1123             {
1124                 size = colmax - col;
1125                 tab_corr = FALSE;
1126                 break;
1127             }
1128         }
1129     }
1130 # ifdef FEAT_MBYTE
1131     else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1132                                     && wp->w_p_wrap && in_win_border(wp, col))
1133     {
1134         ++size;         /* Count the ">" in the last column. */
1135         mb_added = 1;
1136     }
1137 # endif
1138
1139     /*
1140      * May have to add something for 'showbreak' string at start of line
1141      * Set *headp to the size of what we add.
1142      */
1143     added = 0;
1144     if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1145     {
1146         numberextra = win_col_off(wp);
1147         col += numberextra + mb_added;
1148         if (col >= (colnr_T)W_WIDTH(wp))
1149         {
1150             col -= W_WIDTH(wp);
1151             numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1152             if (numberextra > 0)
1153                 col = col % numberextra;
1154         }
1155         if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1156         {
1157             added = vim_strsize(p_sbr);
1158             if (tab_corr)
1159                 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1160             else
1161                 size += added;
1162             if (col != 0)
1163                 added = 0;
1164         }
1165     }
1166     if (headp != NULL)
1167         *headp = added + mb_added;
1168     return size;
1169 #endif
1170 }
1171
1172 #if defined(FEAT_MBYTE) || defined(PROTO)
1173 /*
1174  * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1175  * 'wrap' is on.  This means we need to check for a double-byte character that
1176  * doesn't fit at the end of the screen line.
1177  */
1178     static int
1179 win_nolbr_chartabsize(wp, s, col, headp)
1180     win_T       *wp;
1181     char_u      *s;
1182     colnr_T     col;
1183     int         *headp;
1184 {
1185     int         n;
1186
1187     if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1188     {
1189         n = wp->w_buffer->b_p_ts;
1190         return (int)(n - (col % n));
1191     }
1192     n = ptr2cells(s);
1193     /* Add one cell for a double-width character in the last column of the
1194      * window, displayed with a ">". */
1195     if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1196     {
1197         if (headp != NULL)
1198             *headp = 1;
1199         return 3;
1200     }
1201     return n;
1202 }
1203
1204 /*
1205  * Return TRUE if virtual column "vcol" is in the rightmost column of window
1206  * "wp".
1207  */
1208     int
1209 in_win_border(wp, vcol)
1210     win_T       *wp;
1211     colnr_T     vcol;
1212 {
1213     int         width1;         /* width of first line (after line number) */
1214     int         width2;         /* width of further lines */
1215
1216 #ifdef FEAT_VERTSPLIT
1217     if (wp->w_width == 0)       /* there is no border */
1218         return FALSE;
1219 #endif
1220     width1 = W_WIDTH(wp) - win_col_off(wp);
1221     if ((int)vcol < width1 - 1)
1222         return FALSE;
1223     if ((int)vcol == width1 - 1)
1224         return TRUE;
1225     width2 = width1 + win_col_off2(wp);
1226     if (width2 <= 0)
1227         return FALSE;
1228     return ((vcol - width1) % width2 == width2 - 1);
1229 }
1230 #endif /* FEAT_MBYTE */
1231
1232 /*
1233  * Get virtual column number of pos.
1234  *  start: on the first position of this character (TAB, ctrl)
1235  * cursor: where the cursor is on this character (first char, except for TAB)
1236  *    end: on the last position of this character (TAB, ctrl)
1237  *
1238  * This is used very often, keep it fast!
1239  */
1240     void
1241 getvcol(wp, pos, start, cursor, end)
1242     win_T       *wp;
1243     pos_T       *pos;
1244     colnr_T     *start;
1245     colnr_T     *cursor;
1246     colnr_T     *end;
1247 {
1248     colnr_T     vcol;
1249     char_u      *ptr;           /* points to current char */
1250     char_u      *posptr;        /* points to char at pos->col */
1251     int         incr;
1252     int         head;
1253     int         ts = wp->w_buffer->b_p_ts;
1254     int         c;
1255
1256     vcol = 0;
1257     ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1258     if (pos->col == MAXCOL)
1259         posptr = NULL;  /* continue until the NUL */
1260     else
1261         posptr = ptr + pos->col;
1262
1263     /*
1264      * This function is used very often, do some speed optimizations.
1265      * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1266      * Also use this when 'list' is set but tabs take their normal size.
1267      */
1268     if ((!wp->w_p_list || lcs_tab1 != NUL)
1269 #ifdef FEAT_LINEBREAK
1270             && !wp->w_p_lbr && *p_sbr == NUL
1271 #endif
1272        )
1273     {
1274 #ifndef FEAT_MBYTE
1275         head = 0;
1276 #endif
1277         for (;;)
1278         {
1279 #ifdef FEAT_MBYTE
1280             head = 0;
1281 #endif
1282             c = *ptr;
1283             /* make sure we don't go past the end of the line */
1284             if (c == NUL)
1285             {
1286                 incr = 1;       /* NUL at end of line only takes one column */
1287                 break;
1288             }
1289             /* A tab gets expanded, depending on the current column */
1290             if (c == TAB)
1291                 incr = ts - (vcol % ts);
1292             else
1293             {
1294 #ifdef FEAT_MBYTE
1295                 if (has_mbyte)
1296                 {
1297                     /* For utf-8, if the byte is >= 0x80, need to look at
1298                      * further bytes to find the cell width. */
1299                     if (enc_utf8 && c >= 0x80)
1300                         incr = utf_ptr2cells(ptr);
1301                     else
1302                         incr = CHARSIZE(c);
1303
1304                     /* If a double-cell char doesn't fit at the end of a line
1305                      * it wraps to the next line, it's like this char is three
1306                      * cells wide. */
1307                     if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1308                             && in_win_border(wp, vcol))
1309                     {
1310                         ++incr;
1311                         head = 1;
1312                     }
1313                 }
1314                 else
1315 #endif
1316                     incr = CHARSIZE(c);
1317             }
1318
1319             if (posptr != NULL && ptr >= posptr) /* character at pos->col */
1320                 break;
1321
1322             vcol += incr;
1323             mb_ptr_adv(ptr);
1324         }
1325     }
1326     else
1327     {
1328         for (;;)
1329         {
1330             /* A tab gets expanded, depending on the current column */
1331             head = 0;
1332             incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1333             /* make sure we don't go past the end of the line */
1334             if (*ptr == NUL)
1335             {
1336                 incr = 1;       /* NUL at end of line only takes one column */
1337                 break;
1338             }
1339
1340             if (posptr != NULL && ptr >= posptr) /* character at pos->col */
1341                 break;
1342
1343             vcol += incr;
1344             mb_ptr_adv(ptr);
1345         }
1346     }
1347     if (start != NULL)
1348         *start = vcol + head;
1349     if (end != NULL)
1350         *end = vcol + incr - 1;
1351     if (cursor != NULL)
1352     {
1353         if (*ptr == TAB
1354                 && (State & NORMAL)
1355                 && !wp->w_p_list
1356                 && !virtual_active()
1357 #ifdef FEAT_VISUAL
1358                 && !(VIsual_active
1359                                    && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1360 #endif
1361                 )
1362             *cursor = vcol + incr - 1;      /* cursor at end */
1363         else
1364             *cursor = vcol + head;          /* cursor at start */
1365     }
1366 }
1367
1368 /*
1369  * Get virtual cursor column in the current window, pretending 'list' is off.
1370  */
1371     colnr_T
1372 getvcol_nolist(posp)
1373     pos_T       *posp;
1374 {
1375     int         list_save = curwin->w_p_list;
1376     colnr_T     vcol;
1377
1378     curwin->w_p_list = FALSE;
1379     getvcol(curwin, posp, NULL, &vcol, NULL);
1380     curwin->w_p_list = list_save;
1381     return vcol;
1382 }
1383
1384 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1385 /*
1386  * Get virtual column in virtual mode.
1387  */
1388     void
1389 getvvcol(wp, pos, start, cursor, end)
1390     win_T       *wp;
1391     pos_T       *pos;
1392     colnr_T     *start;
1393     colnr_T     *cursor;
1394     colnr_T     *end;
1395 {
1396     colnr_T     col;
1397     colnr_T     coladd;
1398     colnr_T     endadd;
1399 # ifdef FEAT_MBYTE
1400     char_u      *ptr;
1401 # endif
1402
1403     if (virtual_active())
1404     {
1405         /* For virtual mode, only want one value */
1406         getvcol(wp, pos, &col, NULL, NULL);
1407
1408         coladd = pos->coladd;
1409         endadd = 0;
1410 # ifdef FEAT_MBYTE
1411         /* Cannot put the cursor on part of a wide character. */
1412         ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1413         if (pos->col < (colnr_T)STRLEN(ptr))
1414         {
1415             int c = (*mb_ptr2char)(ptr + pos->col);
1416
1417             if (c != TAB && vim_isprintc(c))
1418             {
1419                 endadd = (colnr_T)(char2cells(c) - 1);
1420                 if (coladd > endadd)    /* past end of line */
1421                     endadd = 0;
1422                 else
1423                     coladd = 0;
1424             }
1425         }
1426 # endif
1427         col += coladd;
1428         if (start != NULL)
1429             *start = col;
1430         if (cursor != NULL)
1431             *cursor = col;
1432         if (end != NULL)
1433             *end = col + endadd;
1434     }
1435     else
1436         getvcol(wp, pos, start, cursor, end);
1437 }
1438 #endif
1439
1440 #if defined(FEAT_VISUAL) || defined(PROTO)
1441 /*
1442  * Get the leftmost and rightmost virtual column of pos1 and pos2.
1443  * Used for Visual block mode.
1444  */
1445     void
1446 getvcols(wp, pos1, pos2, left, right)
1447     win_T       *wp;
1448     pos_T       *pos1, *pos2;
1449     colnr_T     *left, *right;
1450 {
1451     colnr_T     from1, from2, to1, to2;
1452
1453     if (ltp(pos1, pos2))
1454     {
1455         getvvcol(wp, pos1, &from1, NULL, &to1);
1456         getvvcol(wp, pos2, &from2, NULL, &to2);
1457     }
1458     else
1459     {
1460         getvvcol(wp, pos2, &from1, NULL, &to1);
1461         getvvcol(wp, pos1, &from2, NULL, &to2);
1462     }
1463     if (from2 < from1)
1464         *left = from2;
1465     else
1466         *left = from1;
1467     if (to2 > to1)
1468     {
1469         if (*p_sel == 'e' && from2 - 1 >= to1)
1470             *right = from2 - 1;
1471         else
1472             *right = to2;
1473     }
1474     else
1475         *right = to1;
1476 }
1477 #endif
1478
1479 /*
1480  * skipwhite: skip over ' ' and '\t'.
1481  */
1482     char_u *
1483 skipwhite(q)
1484     char_u      *q;
1485 {
1486     char_u      *p = q;
1487
1488     while (vim_iswhite(*p)) /* skip to next non-white */
1489         ++p;
1490     return p;
1491 }
1492
1493 /*
1494  * skip over digits
1495  */
1496     char_u *
1497 skipdigits(q)
1498     char_u      *q;
1499 {
1500     char_u      *p = q;
1501
1502     while (VIM_ISDIGIT(*p))     /* skip to next non-digit */
1503         ++p;
1504     return p;
1505 }
1506
1507 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
1508 /*
1509  * skip over digits and hex characters
1510  */
1511     char_u *
1512 skiphex(q)
1513     char_u      *q;
1514 {
1515     char_u      *p = q;
1516
1517     while (vim_isxdigit(*p))    /* skip to next non-digit */
1518         ++p;
1519     return p;
1520 }
1521 #endif
1522
1523 #if defined(FEAT_EX_EXTRA) || defined(PROTO)
1524 /*
1525  * skip to digit (or NUL after the string)
1526  */
1527     char_u *
1528 skiptodigit(q)
1529     char_u      *q;
1530 {
1531     char_u      *p = q;
1532
1533     while (*p != NUL && !VIM_ISDIGIT(*p))       /* skip to next digit */
1534         ++p;
1535     return p;
1536 }
1537
1538 /*
1539  * skip to hex character (or NUL after the string)
1540  */
1541     char_u *
1542 skiptohex(q)
1543     char_u      *q;
1544 {
1545     char_u      *p = q;
1546
1547     while (*p != NUL && !vim_isxdigit(*p))      /* skip to next digit */
1548         ++p;
1549     return p;
1550 }
1551 #endif
1552
1553 /*
1554  * Variant of isdigit() that can handle characters > 0x100.
1555  * We don't use isdigit() here, because on some systems it also considers
1556  * superscript 1 to be a digit.
1557  * Use the VIM_ISDIGIT() macro for simple arguments.
1558  */
1559     int
1560 vim_isdigit(c)
1561     int         c;
1562 {
1563     return (c >= '0' && c <= '9');
1564 }
1565
1566 /*
1567  * Variant of isxdigit() that can handle characters > 0x100.
1568  * We don't use isxdigit() here, because on some systems it also considers
1569  * superscript 1 to be a digit.
1570  */
1571     int
1572 vim_isxdigit(c)
1573     int         c;
1574 {
1575     return (c >= '0' && c <= '9')
1576         || (c >= 'a' && c <= 'f')
1577         || (c >= 'A' && c <= 'F');
1578 }
1579
1580 #if defined(FEAT_MBYTE) || defined(PROTO)
1581 /*
1582  * Vim's own character class functions.  These exist because many library
1583  * islower()/toupper() etc. do not work properly: they crash when used with
1584  * invalid values or can't handle latin1 when the locale is C.
1585  * Speed is most important here.
1586  */
1587 #define LATIN1LOWER 'l'
1588 #define LATIN1UPPER 'U'
1589
1590 /*                                                                 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~                                  ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
1591 static char_u latin1flags[257] = "                                                                 UUUUUUUUUUUUUUUUUUUUUUUUUU      llllllllllllllllllllllllll                                                                     UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1592 static char_u latin1upper[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1593 static char_u latin1lower[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
1594
1595     int
1596 vim_islower(c)
1597     int     c;
1598 {
1599     if (c <= '@')
1600         return FALSE;
1601     if (c >= 0x80)
1602     {
1603         if (enc_utf8)
1604             return utf_islower(c);
1605         if (c >= 0x100)
1606         {
1607 #ifdef HAVE_ISWLOWER
1608             if (has_mbyte)
1609                 return iswlower(c);
1610 #endif
1611             /* islower() can't handle these chars and may crash */
1612             return FALSE;
1613         }
1614         if (enc_latin1like)
1615             return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1616     }
1617     return islower(c);
1618 }
1619
1620     int
1621 vim_isupper(c)
1622     int     c;
1623 {
1624     if (c <= '@')
1625         return FALSE;
1626     if (c >= 0x80)
1627     {
1628         if (enc_utf8)
1629             return utf_isupper(c);
1630         if (c >= 0x100)
1631         {
1632 #ifdef HAVE_ISWUPPER
1633             if (has_mbyte)
1634                 return iswupper(c);
1635 #endif
1636             /* islower() can't handle these chars and may crash */
1637             return FALSE;
1638         }
1639         if (enc_latin1like)
1640             return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1641     }
1642     return isupper(c);
1643 }
1644
1645     int
1646 vim_toupper(c)
1647     int     c;
1648 {
1649     if (c <= '@')
1650         return c;
1651     if (c >= 0x80)
1652     {
1653         if (enc_utf8)
1654             return utf_toupper(c);
1655         if (c >= 0x100)
1656         {
1657 #ifdef HAVE_TOWUPPER
1658             if (has_mbyte)
1659                 return towupper(c);
1660 #endif
1661             /* toupper() can't handle these chars and may crash */
1662             return c;
1663         }
1664         if (enc_latin1like)
1665             return latin1upper[c];
1666     }
1667     return TOUPPER_LOC(c);
1668 }
1669
1670     int
1671 vim_tolower(c)
1672     int     c;
1673 {
1674     if (c <= '@')
1675         return c;
1676     if (c >= 0x80)
1677     {
1678         if (enc_utf8)
1679             return utf_tolower(c);
1680         if (c >= 0x100)
1681         {
1682 #ifdef HAVE_TOWLOWER
1683             if (has_mbyte)
1684                 return towlower(c);
1685 #endif
1686             /* tolower() can't handle these chars and may crash */
1687             return c;
1688         }
1689         if (enc_latin1like)
1690             return latin1lower[c];
1691     }
1692     return TOLOWER_LOC(c);
1693 }
1694 #endif
1695
1696 /*
1697  * skiptowhite: skip over text until ' ' or '\t' or NUL.
1698  */
1699     char_u *
1700 skiptowhite(p)
1701     char_u      *p;
1702 {
1703     while (*p != ' ' && *p != '\t' && *p != NUL)
1704         ++p;
1705     return p;
1706 }
1707
1708 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1709         || defined(PROTO)
1710 /*
1711  * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1712  */
1713     char_u *
1714 skiptowhite_esc(p)
1715     char_u      *p;
1716 {
1717     while (*p != ' ' && *p != '\t' && *p != NUL)
1718     {
1719         if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1720             ++p;
1721         ++p;
1722     }
1723     return p;
1724 }
1725 #endif
1726
1727 /*
1728  * Getdigits: Get a number from a string and skip over it.
1729  * Note: the argument is a pointer to a char_u pointer!
1730  */
1731     long
1732 getdigits(pp)
1733     char_u **pp;
1734 {
1735     char_u      *p;
1736     long        retval;
1737
1738     p = *pp;
1739     retval = atol((char *)p);
1740     if (*p == '-')              /* skip negative sign */
1741         ++p;
1742     p = skipdigits(p);          /* skip to next non-digit */
1743     *pp = p;
1744     return retval;
1745 }
1746
1747 /*
1748  * Return TRUE if "lbuf" is empty or only contains blanks.
1749  */
1750     int
1751 vim_isblankline(lbuf)
1752     char_u      *lbuf;
1753 {
1754     char_u      *p;
1755
1756     p = skipwhite(lbuf);
1757     return (*p == NUL || *p == '\r' || *p == '\n');
1758 }
1759
1760 /*
1761  * Convert a string into a long and/or unsigned long, taking care of
1762  * hexadecimal and octal numbers.  Accepts a '-' sign.
1763  * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1764  *  0       decimal
1765  *  '0'     octal
1766  *  'X'     hex
1767  *  'x'     hex
1768  * If "len" is not NULL, the length of the number in characters is returned.
1769  * If "nptr" is not NULL, the signed result is returned in it.
1770  * If "unptr" is not NULL, the unsigned result is returned in it.
1771  * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1772  * octal number.
1773  * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
1774  * hex number.
1775  */
1776     void
1777 vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1778     char_u              *start;
1779     int                 *hexp;      /* return: type of number 0 = decimal, 'x'
1780                                        or 'X' is hex, '0' = octal */
1781     int                 *len;       /* return: detected length of number */
1782     int                 dooct;      /* recognize octal number */
1783     int                 dohex;      /* recognize hex number */
1784     long                *nptr;      /* return: signed result */
1785     unsigned long       *unptr;     /* return: unsigned result */
1786 {
1787     char_u          *ptr = start;
1788     int             hex = 0;            /* default is decimal */
1789     int             negative = FALSE;
1790     unsigned long   un = 0;
1791     int             n;
1792
1793     if (ptr[0] == '-')
1794     {
1795         negative = TRUE;
1796         ++ptr;
1797     }
1798
1799     /* Recognize hex and octal. */
1800     if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
1801     {
1802         hex = ptr[1];
1803         if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1804             ptr += 2;                   /* hexadecimal */
1805         else
1806         {
1807             hex = 0;                    /* default is decimal */
1808             if (dooct)
1809             {
1810                 /* Don't interpret "0", "08" or "0129" as octal. */
1811                 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1812                 {
1813                     if (ptr[n] > '7')
1814                     {
1815                         hex = 0;        /* can't be octal */
1816                         break;
1817                     }
1818                     if (ptr[n] > '0')
1819                         hex = '0';      /* assume octal */
1820                 }
1821             }
1822         }
1823     }
1824
1825     /*
1826      * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1827      */
1828     if (hex == '0' || dooct > 1)
1829     {
1830         /* octal */
1831         while ('0' <= *ptr && *ptr <= '7')
1832         {
1833             un = 8 * un + (unsigned long)(*ptr - '0');
1834             ++ptr;
1835         }
1836     }
1837     else if (hex != 0 || dohex > 1)
1838     {
1839         /* hex */
1840         while (vim_isxdigit(*ptr))
1841         {
1842             un = 16 * un + (unsigned long)hex2nr(*ptr);
1843             ++ptr;
1844         }
1845     }
1846     else
1847     {
1848         /* decimal */
1849         while (VIM_ISDIGIT(*ptr))
1850         {
1851             un = 10 * un + (unsigned long)(*ptr - '0');
1852             ++ptr;
1853         }
1854     }
1855
1856     if (hexp != NULL)
1857         *hexp = hex;
1858     if (len != NULL)
1859         *len = (int)(ptr - start);
1860     if (nptr != NULL)
1861     {
1862         if (negative)   /* account for leading '-' for decimal numbers */
1863             *nptr = -(long)un;
1864         else
1865             *nptr = (long)un;
1866     }
1867     if (unptr != NULL)
1868         *unptr = un;
1869 }
1870
1871 /*
1872  * Return the value of a single hex character.
1873  * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1874  */
1875     int
1876 hex2nr(c)
1877     int         c;
1878 {
1879     if (c >= 'a' && c <= 'f')
1880         return c - 'a' + 10;
1881     if (c >= 'A' && c <= 'F')
1882         return c - 'A' + 10;
1883     return c - '0';
1884 }
1885
1886 #if defined(FEAT_TERMRESPONSE) \
1887         || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1888 /*
1889  * Convert two hex characters to a byte.
1890  * Return -1 if one of the characters is not hex.
1891  */
1892     int
1893 hexhex2nr(p)
1894     char_u      *p;
1895 {
1896     if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1897         return -1;
1898     return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1899 }
1900 #endif
1901
1902 /*
1903  * Return TRUE if "str" starts with a backslash that should be removed.
1904  * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1905  * backslash is not a normal file name character.
1906  * '$' is a valid file name character, we don't remove the backslash before
1907  * it.  This means it is not possible to use an environment variable after a
1908  * backslash.  "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1909  * Although "\ name" is valid, the backslash in "Program\ files" must be
1910  * removed.  Assume a file name doesn't start with a space.
1911  * For multi-byte names, never remove a backslash before a non-ascii
1912  * character, assume that all multi-byte characters are valid file name
1913  * characters.
1914  */
1915     int
1916 rem_backslash(str)
1917     char_u  *str;
1918 {
1919 #ifdef BACKSLASH_IN_FILENAME
1920     return (str[0] == '\\'
1921 # ifdef FEAT_MBYTE
1922             && str[1] < 0x80
1923 # endif
1924             && (str[1] == ' '
1925                 || (str[1] != NUL
1926                     && str[1] != '*'
1927                     && str[1] != '?'
1928                     && !vim_isfilec(str[1]))));
1929 #else
1930     return (str[0] == '\\' && str[1] != NUL);
1931 #endif
1932 }
1933
1934 /*
1935  * Halve the number of backslashes in a file name argument.
1936  * For MS-DOS we only do this if the character after the backslash
1937  * is not a normal file character.
1938  */
1939     void
1940 backslash_halve(p)
1941     char_u      *p;
1942 {
1943     for ( ; *p; ++p)
1944         if (rem_backslash(p))
1945             STRMOVE(p, p + 1);
1946 }
1947
1948 /*
1949  * backslash_halve() plus save the result in allocated memory.
1950  */
1951     char_u *
1952 backslash_halve_save(p)
1953     char_u      *p;
1954 {
1955     char_u      *res;
1956
1957     res = vim_strsave(p);
1958     if (res == NULL)
1959         return p;
1960     backslash_halve(res);
1961     return res;
1962 }
1963
1964 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1965 /*
1966  * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1967  * The first 64 entries have been added to map control characters defined in
1968  * ascii.h
1969  */
1970 static char_u ebcdic2ascii_tab[256] =
1971 {
1972     0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1973     0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1974     0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1975     0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1976     0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1977     0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1978     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1979     0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1980     0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1981     0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1982     0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1983     0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1984     0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1985     0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1986     0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1987     0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1988     0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1989     0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1990     0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1991     0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1992     0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1993     0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1994     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1995     0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1996     0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1997     0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1998     0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1999     0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2000     0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2001     0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2002     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2003     0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2004 };
2005
2006 /*
2007  * Convert a buffer worth of characters from EBCDIC to ASCII.  Only useful if
2008  * wanting 7-bit ASCII characters out the other end.
2009  */
2010     void
2011 ebcdic2ascii(buffer, len)
2012     char_u      *buffer;
2013     int         len;
2014 {
2015     int         i;
2016
2017     for (i = 0; i < len; i++)
2018         buffer[i] = ebcdic2ascii_tab[buffer[i]];
2019 }
2020 #endif