src/charset.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * VIM - Vi IMproved    by Bram Moolenaar
   4  *
   5  * Do ":help uganda"  in Vim to read copying and usage conditions.
   6  * Do ":help credits" in Vim to see a list of people who contributed.
   7  * See README.txt for an overview of the Vim source code.
   8  */
   9
  10 #include "vim.h"
  11
  12 #ifdef FEAT_LINEBREAK
  13 static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
  14 #endif
  15
  16 #ifdef FEAT_MBYTE
  17 static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
  18 #endif
  19
  20 static unsigned nr2hex __ARGS((unsigned c));
  21
  22 static int    chartab_initialized = FALSE;
  23
  24 /* b_chartab[] is an array of 32 bytes, each bit representing one of the
  25  * characters 0-255. */
  26 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
  27 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
  28 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
  29
  30 /*
  31  * Fill chartab[].  Also fills curbuf->b_chartab[] with flags for keyword
  32  * characters for current buffer.
  33  *
  34  * Depends on the option settings 'iskeyword', 'isident', 'isfname',
  35  * 'isprint' and 'encoding'.
  36  *
  37  * The index in chartab[] depends on 'encoding':
  38  * - For non-multi-byte index with the byte (same as the character).
  39  * - For DBCS index with the first byte.
  40  * - For UTF-8 index with the character (when first byte is up to 0x80 it is
  41  *   the same as the character, if the first byte is 0x80 and above it depends
  42  *   on further bytes).
  43  *
  44  * The contents of chartab[]:
  45  * - The lower two bits, masked by CT_CELL_MASK, give the number of display
  46  *   cells the character occupies (1 or 2).  Not valid for UTF-8 above 0x80.
  47  * - CT_PRINT_CHAR bit is set when the character is printable (no need to
  48  *   translate the character before displaying it).  Note that only DBCS
  49  *   characters can have 2 display cells and still be printable.
  50  * - CT_FNAME_CHAR bit is set when the character can be in a file name.
  51  * - CT_ID_CHAR bit is set when the character can be in an identifier.
  52  *
  53  * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
  54  * error, OK otherwise.
  55  */
  56     int
  57 init_chartab()
  58 {
  59     return buf_init_chartab(curbuf, TRUE);
  60 }
  61
  62     int
  63 buf_init_chartab(buf, global)
  64     buf_T       *buf;
  65     int         global;         /* FALSE: only set buf->b_chartab[] */
  66 {
  67     int         c;
  68     int         c2;
  69     char_u      *p;
  70     int         i;
  71     int         tilde;
  72     int         do_isalpha;
  73
  74     if (global)
  75     {
  76         /*
  77          * Set the default size for printable characters:
  78          * From <Space> to '~' is 1 (printable), others are 2 (not printable).
  79          * This also inits all 'isident' and 'isfname' flags to FALSE.
  80          *
  81          * EBCDIC: all chars below ' ' are not printable, all others are
  82          * printable.
  83          */
  84         c = 0;
  85         while (c < ' ')
  86             chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
  87 #ifdef EBCDIC
  88         while (c < 255)
  89 #else
  90         while (c <= '~')
  91 #endif
  92             chartab[c++] = 1 + CT_PRINT_CHAR;
  93 #ifdef FEAT_FKMAP
  94         if (p_altkeymap)
  95         {
  96             while (c < YE)
  97                 chartab[c++] = 1 + CT_PRINT_CHAR;
  98         }
  99 #endif
 100         while (c < 256)
 101         {
 102 #ifdef FEAT_MBYTE
 103             /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
 104             if (enc_utf8 && c >= 0xa0)
 105                 chartab[c++] = CT_PRINT_CHAR + 1;
 106             /* euc-jp characters starting with 0x8e are single width */
 107             else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
 108                 chartab[c++] = CT_PRINT_CHAR + 1;
 109             /* other double-byte chars can be printable AND double-width */
 110             else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
 111                 chartab[c++] = CT_PRINT_CHAR + 2;
 112             else
 113 #endif
 114                 /* the rest is unprintable by default */
 115                 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
 116         }
 117
 118 #ifdef FEAT_MBYTE
 119         /* Assume that every multi-byte char is a filename character. */
 120         for (c = 1; c < 256; ++c)
 121             if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
 122                     || (enc_dbcs == DBCS_JPNU && c == 0x8e)
 123                     || (enc_utf8 && c >= 0xa0))
 124                 chartab[c] |= CT_FNAME_CHAR;
 125 #endif
 126     }
 127
 128     /*
 129      * Init word char flags all to FALSE
 130      */
 131     vim_memset(buf->b_chartab, 0, (size_t)32);
 132 #ifdef FEAT_MBYTE
 133     if (enc_dbcs != 0)
 134         for (c = 0; c < 256; ++c)
 135         {
 136             /* double-byte characters are probably word characters */
 137             if (MB_BYTE2LEN(c) == 2)
 138                 SET_CHARTAB(buf, c);
 139         }
 140 #endif
 141
 142 #ifdef FEAT_LISP
 143     /*
 144      * In lisp mode the '-' character is included in keywords.
 145      */
 146     if (buf->b_p_lisp)
 147         SET_CHARTAB(buf, '-');
 148 #endif
 149
 150     /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
 151      * options Each option is a list of characters, character numbers or
 152      * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
 153      */
 154     for (i = global ? 0 : 3; i <= 3; ++i)
 155     {
 156         if (i == 0)
 157             p = p_isi;          /* first round: 'isident' */
 158         else if (i == 1)
 159             p = p_isp;          /* second round: 'isprint' */
 160         else if (i == 2)
 161             p = p_isf;          /* third round: 'isfname' */
 162         else    /* i == 3 */
 163             p = buf->b_p_isk;   /* fourth round: 'iskeyword' */
 164
 165         while (*p)
 166         {
 167             tilde = FALSE;
 168             do_isalpha = FALSE;
 169             if (*p == '^' && p[1] != NUL)
 170             {
 171                 tilde = TRUE;
 172                 ++p;
 173             }
 174             if (VIM_ISDIGIT(*p))
 175                 c = getdigits(&p);
 176             else
 177 #ifdef FEAT_MBYTE
 178                  if (has_mbyte)
 179                 c = mb_ptr2char_adv(&p);
 180             else
 181 #endif
 182                 c = *p++;
 183             c2 = -1;
 184             if (*p == '-' && p[1] != NUL)
 185             {
 186                 ++p;
 187                 if (VIM_ISDIGIT(*p))
 188                     c2 = getdigits(&p);
 189                 else
 190 #ifdef FEAT_MBYTE
 191                      if (has_mbyte)
 192                     c2 = mb_ptr2char_adv(&p);
 193                 else
 194 #endif
 195                     c2 = *p++;
 196             }
 197             if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
 198                                                  || !(*p == NUL || *p == ','))
 199                 return FAIL;
 200
 201             if (c2 == -1)       /* not a range */
 202             {
 203                 /*
 204                  * A single '@' (not "@-@"):
 205                  * Decide on letters being ID/printable/keyword chars with
 206                  * standard function isalpha(). This takes care of locale for
 207                  * single-byte characters).
 208                  */
 209                 if (c == '@')
 210                 {
 211                     do_isalpha = TRUE;
 212                     c = 1;
 213                     c2 = 255;
 214                 }
 215                 else
 216                     c2 = c;
 217             }
 218             while (c <= c2)
 219             {
 220                 /* Use the MB_ functions here, because isalpha() doesn't
 221                  * work properly when 'encoding' is "latin1" and the locale is
 222                  * "C".  */
 223                 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
 224 #ifdef FEAT_FKMAP
 225                         || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
 226 #endif
 227                             )
 228                 {
 229                     if (i == 0)                 /* (re)set ID flag */
 230                     {
 231                         if (tilde)
 232                             chartab[c] &= ~CT_ID_CHAR;
 233                         else
 234                             chartab[c] |= CT_ID_CHAR;
 235                     }
 236                     else if (i == 1)            /* (re)set printable */
 237                     {
 238                         if ((c < ' '
 239 #ifndef EBCDIC
 240                                     || c > '~'
 241 #endif
 242 #ifdef FEAT_FKMAP
 243                                     || (p_altkeymap
 244                                         && (F_isalpha(c) || F_isdigit(c)))
 245 #endif
 246                             )
 247 #ifdef FEAT_MBYTE
 248                                 /* For double-byte we keep the cell width, so
 249                                  * that we can detect it from the first byte. */
 250                                 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
 251 #endif
 252                            )
 253                         {
 254                             if (tilde)
 255                             {
 256                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
 257                                              + ((dy_flags & DY_UHEX) ? 4 : 2);
 258                                 chartab[c] &= ~CT_PRINT_CHAR;
 259                             }
 260                             else
 261                             {
 262                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
 263                                 chartab[c] |= CT_PRINT_CHAR;
 264                             }
 265                         }
 266                     }
 267                     else if (i == 2)            /* (re)set fname flag */
 268                     {
 269                         if (tilde)
 270                             chartab[c] &= ~CT_FNAME_CHAR;
 271                         else
 272                             chartab[c] |= CT_FNAME_CHAR;
 273                     }
 274                     else /* i == 3 */           /* (re)set keyword flag */
 275                     {
 276                         if (tilde)
 277                             RESET_CHARTAB(buf, c);
 278                         else
 279                             SET_CHARTAB(buf, c);
 280                     }
 281                 }
 282                 ++c;
 283             }
 284             p = skip_to_option_part(p);
 285         }
 286     }
 287     chartab_initialized = TRUE;
 288     return OK;
 289 }
 290
 291 /*
 292  * Translate any special characters in buf[bufsize] in-place.
 293  * The result is a string with only printable characters, but if there is not
 294  * enough room, not all characters will be translated.
 295  */
 296     void
 297 trans_characters(buf, bufsize)
 298     char_u      *buf;
 299     int         bufsize;
 300 {
 301     int         len;            /* length of string needing translation */
 302     int         room;           /* room in buffer after string */
 303     char_u      *trs;           /* translated character */
 304     int         trs_len;        /* length of trs[] */
 305
 306     len = (int)STRLEN(buf);
 307     room = bufsize - len;
 308     while (*buf != 0)
 309     {
 310 # ifdef FEAT_MBYTE
 311         /* Assume a multi-byte character doesn't need translation. */
 312         if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
 313             len -= trs_len;
 314         else
 315 # endif
 316         {
 317             trs = transchar_byte(*buf);
 318             trs_len = (int)STRLEN(trs);
 319             if (trs_len > 1)
 320             {
 321                 room -= trs_len - 1;
 322                 if (room <= 0)
 323                     return;
 324                 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
 325             }
 326             mch_memmove(buf, trs, (size_t)trs_len);
 327             --len;
 328         }
 329         buf += trs_len;
 330     }
 331 }
 332
 333 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
 334         || defined(PROTO)
 335 /*
 336  * Translate a string into allocated memory, replacing special chars with
 337  * printable chars.  Returns NULL when out of memory.
 338  */
 339     char_u *
 340 transstr(s)
 341     char_u      *s;
 342 {
 343     char_u      *res;
 344     char_u      *p;
 345 #ifdef FEAT_MBYTE
 346     int         l, len, c;
 347     char_u      hexbuf[11];
 348 #endif
 349
 350 #ifdef FEAT_MBYTE
 351     if (has_mbyte)
 352     {
 353         /* Compute the length of the result, taking account of unprintable
 354          * multi-byte characters. */
 355         len = 0;
 356         p = s;
 357         while (*p != NUL)
 358         {
 359             if ((l = (*mb_ptr2len)(p)) > 1)
 360             {
 361                 c = (*mb_ptr2char)(p);
 362                 p += l;
 363                 if (vim_isprintc(c))
 364                     len += l;
 365                 else
 366                 {
 367                     transchar_hex(hexbuf, c);
 368                     len += (int)STRLEN(hexbuf);
 369                 }
 370             }
 371             else
 372             {
 373                 l = byte2cells(*p++);
 374                 if (l > 0)
 375                     len += l;
 376                 else
 377                     len += 4;   /* illegal byte sequence */
 378             }
 379         }
 380         res = alloc((unsigned)(len + 1));
 381     }
 382     else
 383 #endif
 384         res = alloc((unsigned)(vim_strsize(s) + 1));
 385     if (res != NULL)
 386     {
 387         *res = NUL;
 388         p = s;
 389         while (*p != NUL)
 390         {
 391 #ifdef FEAT_MBYTE
 392             if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
 393             {
 394                 c = (*mb_ptr2char)(p);
 395                 if (vim_isprintc(c))
 396                     STRNCAT(res, p, l); /* append printable multi-byte char */
 397                 else
 398                     transchar_hex(res + STRLEN(res), c);
 399                 p += l;
 400             }
 401             else
 402 #endif
 403                 STRCAT(res, transchar_byte(*p++));
 404         }
 405     }
 406     return res;
 407 }
 408 #endif
 409
 410 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
 411 /*
 412  * Convert the string "str[orglen]" to do ignore-case comparing.  Uses the
 413  * current locale.
 414  * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
 415  * Otherwise puts the result in "buf[buflen]".
 416  */
 417     char_u *
 418 str_foldcase(str, orglen, buf, buflen)
 419     char_u      *str;
 420     int         orglen;
 421     char_u      *buf;
 422     int         buflen;
 423 {
 424     garray_T    ga;
 425     int         i;
 426     int         len = orglen;
 427
 428 #define GA_CHAR(i)  ((char_u *)ga.ga_data)[i]
 429 #define GA_PTR(i)   ((char_u *)ga.ga_data + i)
 430 #define STR_CHAR(i)  (buf == NULL ? GA_CHAR(i) : buf[i])
 431 #define STR_PTR(i)   (buf == NULL ? GA_PTR(i) : buf + i)
 432
 433     /* Copy "str" into "buf" or allocated memory, unmodified. */
 434     if (buf == NULL)
 435     {
 436         ga_init2(&ga, 1, 10);
 437         if (ga_grow(&ga, len + 1) == FAIL)
 438             return NULL;
 439         mch_memmove(ga.ga_data, str, (size_t)len);
 440         ga.ga_len = len;
 441     }
 442     else
 443     {
 444         if (len >= buflen)          /* Ugly! */
 445             len = buflen - 1;
 446         mch_memmove(buf, str, (size_t)len);
 447     }
 448     if (buf == NULL)
 449         GA_CHAR(len) = NUL;
 450     else
 451         buf[len] = NUL;
 452
 453     /* Make each character lower case. */
 454     i = 0;
 455     while (STR_CHAR(i) != NUL)
 456     {
 457 #ifdef FEAT_MBYTE
 458         if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
 459         {
 460             if (enc_utf8)
 461             {
 462                 int     c = utf_ptr2char(STR_PTR(i));
 463                 int     ol = utf_ptr2len(STR_PTR(i));
 464                 int     lc = utf_tolower(c);
 465
 466                 /* Only replace the character when it is not an invalid
 467                  * sequence (ASCII character or more than one byte) and
 468                  * utf_tolower() doesn't return the original character. */
 469                 if ((c < 0x80 || ol > 1) && c != lc)
 470                 {
 471                     int     nl = utf_char2len(lc);
 472
 473                     /* If the byte length changes need to shift the following
 474                      * characters forward or backward. */
 475                     if (ol != nl)
 476                     {
 477                         if (nl > ol)
 478                         {
 479                             if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
 480                                                     : len + nl - ol >= buflen)
 481                             {
 482                                 /* out of memory, keep old char */
 483                                 lc = c;
 484                                 nl = ol;
 485                             }
 486                         }
 487                         if (ol != nl)
 488                         {
 489                             if (buf == NULL)
 490                             {
 491                                 STRMOVE(GA_PTR(i) + nl, GA_PTR(i) + ol);
 492                                 ga.ga_len += nl - ol;
 493                             }
 494                             else
 495                             {
 496                                 STRMOVE(buf + i + nl, buf + i + ol);
 497                                 len += nl - ol;
 498                             }
 499                         }
 500                     }
 501                     (void)utf_char2bytes(lc, STR_PTR(i));
 502                 }
 503             }
 504             /* skip to next multi-byte char */
 505             i += (*mb_ptr2len)(STR_PTR(i));
 506         }
 507         else
 508 #endif
 509         {
 510             if (buf == NULL)
 511                 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
 512             else
 513                 buf[i] = TOLOWER_LOC(buf[i]);
 514             ++i;
 515         }
 516     }
 517
 518     if (buf == NULL)
 519         return (char_u *)ga.ga_data;
 520     return buf;
 521 }
 522 #endif
 523
 524 /*
 525  * Catch 22: chartab[] can't be initialized before the options are
 526  * initialized, and initializing options may cause transchar() to be called!
 527  * When chartab_initialized == FALSE don't use chartab[].
 528  * Does NOT work for multi-byte characters, c must be <= 255.
 529  * Also doesn't work for the first byte of a multi-byte, "c" must be a
 530  * character!
 531  */
 532 static char_u   transchar_buf[7];
 533
 534     char_u *
 535 transchar(c)
 536     int         c;
 537 {
 538     int                 i;
 539
 540     i = 0;
 541     if (IS_SPECIAL(c))      /* special key code, display as ~@ char */
 542     {
 543         transchar_buf[0] = '~';
 544         transchar_buf[1] = '@';
 545         i = 2;
 546         c = K_SECOND(c);
 547     }
 548
 549     if ((!chartab_initialized && (
 550 #ifdef EBCDIC
 551                     (c >= 64 && c < 255)
 552 #else
 553                     (c >= ' ' && c <= '~')
 554 #endif
 555 #ifdef FEAT_FKMAP
 556                         || F_ischar(c)
 557 #endif
 558                 )) || (c < 256 && vim_isprintc_strict(c)))
 559     {
 560         /* printable character */
 561         transchar_buf[i] = c;
 562         transchar_buf[i + 1] = NUL;
 563     }
 564     else
 565         transchar_nonprint(transchar_buf + i, c);
 566     return transchar_buf;
 567 }
 568
 569 #if defined(FEAT_MBYTE) || defined(PROTO)
 570 /*
 571  * Like transchar(), but called with a byte instead of a character.  Checks
 572  * for an illegal UTF-8 byte.
 573  */
 574     char_u *
 575 transchar_byte(c)
 576     int         c;
 577 {
 578     if (enc_utf8 && c >= 0x80)
 579     {
 580         transchar_nonprint(transchar_buf, c);
 581         return transchar_buf;
 582     }
 583     return transchar(c);
 584 }
 585 #endif
 586
 587 /*
 588  * Convert non-printable character to two or more printable characters in
 589  * "buf[]".  "buf" needs to be able to hold five bytes.
 590  * Does NOT work for multi-byte characters, c must be <= 255.
 591  */
 592     void
 593 transchar_nonprint(buf, c)
 594     char_u      *buf;
 595     int         c;
 596 {
 597     if (c == NL)
 598         c = NUL;                /* we use newline in place of a NUL */
 599     else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
 600         c = NL;                 /* we use CR in place of  NL in this case */
 601
 602     if (dy_flags & DY_UHEX)             /* 'display' has "uhex" */
 603         transchar_hex(buf, c);
 604
 605 #ifdef EBCDIC
 606     /* For EBCDIC only the characters 0-63 and 255 are not printable */
 607     else if (CtrlChar(c) != 0 || c == DEL)
 608 #else
 609     else if (c <= 0x7f)                         /* 0x00 - 0x1f and 0x7f */
 610 #endif
 611     {
 612         buf[0] = '^';
 613 #ifdef EBCDIC
 614         if (c == DEL)
 615             buf[1] = '?';               /* DEL displayed as ^? */
 616         else
 617             buf[1] = CtrlChar(c);
 618 #else
 619         buf[1] = c ^ 0x40;              /* DEL displayed as ^? */
 620 #endif
 621
 622         buf[2] = NUL;
 623     }
 624 #ifdef FEAT_MBYTE
 625     else if (enc_utf8 && c >= 0x80)
 626     {
 627         transchar_hex(buf, c);
 628     }
 629 #endif
 630 #ifndef EBCDIC
 631     else if (c >= ' ' + 0x80 && c <= '~' + 0x80)    /* 0xa0 - 0xfe */
 632     {
 633         buf[0] = '|';
 634         buf[1] = c - 0x80;
 635         buf[2] = NUL;
 636     }
 637 #else
 638     else if (c < 64)
 639     {
 640         buf[0] = '~';
 641         buf[1] = MetaChar(c);
 642         buf[2] = NUL;
 643     }
 644 #endif
 645     else                                            /* 0x80 - 0x9f and 0xff */
 646     {
 647         /*
 648          * TODO: EBCDIC I don't know what to do with this chars, so I display
 649          * them as '~?' for now
 650          */
 651         buf[0] = '~';
 652 #ifdef EBCDIC
 653         buf[1] = '?';                   /* 0xff displayed as ~? */
 654 #else
 655         buf[1] = (c - 0x80) ^ 0x40;     /* 0xff displayed as ~? */
 656 #endif
 657         buf[2] = NUL;
 658     }
 659 }
 660
 661     void
 662 transchar_hex(buf, c)
 663     char_u      *buf;
 664     int         c;
 665 {
 666     int         i = 0;
 667
 668     buf[0] = '<';
 669 #ifdef FEAT_MBYTE
 670     if (c > 255)
 671     {
 672         buf[++i] = nr2hex((unsigned)c >> 12);
 673         buf[++i] = nr2hex((unsigned)c >> 8);
 674     }
 675 #endif
 676     buf[++i] = nr2hex((unsigned)c >> 4);
 677     buf[++i] = nr2hex((unsigned)c);
 678     buf[++i] = '>';
 679     buf[++i] = NUL;
 680 }
 681
 682 /*
 683  * Convert the lower 4 bits of byte "c" to its hex character.
 684  * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
 685  * function key 1.
 686  */
 687     static unsigned
 688 nr2hex(c)
 689     unsigned    c;
 690 {
 691     if ((c & 0xf) <= 9)
 692         return (c & 0xf) + '0';
 693     return (c & 0xf) - 10 + 'a';
 694 }
 695
 696 /*
 697  * Return number of display cells occupied by byte "b".
 698  * Caller must make sure 0 <= b <= 255.
 699  * For multi-byte mode "b" must be the first byte of a character.
 700  * A TAB is counted as two cells: "^I".
 701  * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
 702  * cells depends on further bytes.
 703  */
 704     int
 705 byte2cells(b)
 706     int         b;
 707 {
 708 #ifdef FEAT_MBYTE
 709     if (enc_utf8 && b >= 0x80)
 710         return 0;
 711 #endif
 712     return (chartab[b] & CT_CELL_MASK);
 713 }
 714
 715 /*
 716  * Return number of display cells occupied by character "c".
 717  * "c" can be a special key (negative number) in which case 3 or 4 is returned.
 718  * A TAB is counted as two cells: "^I" or four: "<09>".
 719  */
 720     int
 721 char2cells(c)
 722     int         c;
 723 {
 724     if (IS_SPECIAL(c))
 725         return char2cells(K_SECOND(c)) + 2;
 726 #ifdef FEAT_MBYTE
 727     if (c >= 0x80)
 728     {
 729         /* UTF-8: above 0x80 need to check the value */
 730         if (enc_utf8)
 731             return utf_char2cells(c);
 732         /* DBCS: double-byte means double-width, except for euc-jp with first
 733          * byte 0x8e */
 734         if (enc_dbcs != 0 && c >= 0x100)
 735         {
 736             if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
 737                 return 1;
 738             return 2;
 739         }
 740     }
 741 #endif
 742     return (chartab[c & 0xff] & CT_CELL_MASK);
 743 }
 744
 745 /*
 746  * Return number of display cells occupied by character at "*p".
 747  * A TAB is counted as two cells: "^I" or four: "<09>".
 748  */
 749     int
 750 ptr2cells(p)
 751     char_u      *p;
 752 {
 753 #ifdef FEAT_MBYTE
 754     /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
 755     if (enc_utf8 && *p >= 0x80)
 756         return utf_ptr2cells(p);
 757     /* For DBCS we can tell the cell count from the first byte. */
 758 #endif
 759     return (chartab[*p] & CT_CELL_MASK);
 760 }
 761
 762 /*
 763  * Return the number of characters string "s" will take on the screen,
 764  * counting TABs as two characters: "^I".
 765  */
 766     int
 767 vim_strsize(s)
 768     char_u      *s;
 769 {
 770     return vim_strnsize(s, (int)MAXCOL);
 771 }
 772
 773 /*
 774  * Return the number of characters string "s[len]" will take on the screen,
 775  * counting TABs as two characters: "^I".
 776  */
 777     int
 778 vim_strnsize(s, len)
 779     char_u      *s;
 780     int         len;
 781 {
 782     int         size = 0;
 783
 784     while (*s != NUL && --len >= 0)
 785     {
 786 #ifdef FEAT_MBYTE
 787         if (has_mbyte)
 788         {
 789             int     l = (*mb_ptr2len)(s);
 790
 791             size += ptr2cells(s);
 792             s += l;
 793             len -= l - 1;
 794         }
 795         else
 796 #endif
 797             size += byte2cells(*s++);
 798     }
 799     return size;
 800 }
 801
 802 /*
 803  * Return the number of characters 'c' will take on the screen, taking
 804  * into account the size of a tab.
 805  * Use a define to make it fast, this is used very often!!!
 806  * Also see getvcol() below.
 807  */
 808
 809 #ifdef FEAT_VARTABS
 810 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
 811     if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
 812     { \
 813         return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_ary); \
 814     } \
 815     else \
 816         return ptr2cells(p);
 817 #else
 818 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
 819     if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
 820     { \
 821         int ts; \
 822         ts = (buf)->b_p_ts; \
 823         return (int)(ts - (col % ts)); \
 824     } \
 825     else \
 826         return ptr2cells(p);
 827 #endif
 828
 829 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
 830         || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
 831     int
 832 chartabsize(p, col)
 833     char_u      *p;
 834     colnr_T     col;
 835 {
 836     RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
 837 }
 838 #endif
 839
 840 #ifdef FEAT_LINEBREAK
 841     static int
 842 win_chartabsize(wp, p, col)
 843     win_T       *wp;
 844     char_u      *p;
 845     colnr_T     col;
 846 {
 847     RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
 848 }
 849 #endif
 850
 851 /*
 852  * return the number of characters the string 's' will take on the screen,
 853  * taking into account the size of a tab
 854  */
 855     int
 856 linetabsize(s, lnum)
 857     char_u      *s;
 858     linenr_T    lnum;
 859 {
 860     colnr_T     col = 0;
 861
 862     while (*s != NUL)
 863         col += lbr_chartabsize_adv(&s, col, lnum);
 864     return (int)col;
 865 }
 866
 867 /*
 868  * Like linetabsize(), but for a given window instead of the current one.
 869  */
 870     int
 871 win_linetabsize(wp, p, len, lnum)
 872     win_T       *wp;
 873     char_u      *p;
 874     colnr_T     len;
 875     linenr_T    lnum;
 876 {
 877     colnr_T     col = 0;
 878     char_u      *s;
 879
 880     for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
 881         col += win_lbr_chartabsize(wp, s, col, NULL, lnum);
 882     return (int)col;
 883 }
 884
 885 /*
 886  * Return TRUE if 'c' is a normal identifier character:
 887  * Letters and characters from the 'isident' option.
 888  */
 889     int
 890 vim_isIDc(c)
 891     int c;
 892 {
 893     return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
 894 }
 895
 896 /*
 897  * return TRUE if 'c' is a keyword character: Letters and characters from
 898  * 'iskeyword' option for current buffer.
 899  * For multi-byte characters mb_get_class() is used (builtin rules).
 900  */
 901     int
 902 vim_iswordc(c)
 903     int c;
 904 {
 905 #ifdef FEAT_MBYTE
 906     if (c >= 0x100)
 907     {
 908         if (enc_dbcs != 0)
 909             return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
 910         if (enc_utf8)
 911             return utf_class(c) >= 2;
 912     }
 913 #endif
 914     return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
 915 }
 916
 917 /*
 918  * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
 919  */
 920     int
 921 vim_iswordp(p)
 922     char_u *p;
 923 {
 924 #ifdef FEAT_MBYTE
 925     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 926         return mb_get_class(p) >= 2;
 927 #endif
 928     return GET_CHARTAB(curbuf, *p) != 0;
 929 }
 930
 931 #if defined(FEAT_SYN_HL) || defined(PROTO)
 932     int
 933 vim_iswordc_buf(p, buf)
 934     char_u      *p;
 935     buf_T       *buf;
 936 {
 937 # ifdef FEAT_MBYTE
 938     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 939         return mb_get_class(p) >= 2;
 940 # endif
 941     return (GET_CHARTAB(buf, *p) != 0);
 942 }
 943 #endif
 944
 945 /*
 946  * return TRUE if 'c' is a valid file-name character
 947  * Assume characters above 0x100 are valid (multi-byte).
 948  */
 949     int
 950 vim_isfilec(c)
 951     int c;
 952 {
 953     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
 954 }
 955
 956 /*
 957  * return TRUE if 'c' is a valid file-name character or a wildcard character
 958  * Assume characters above 0x100 are valid (multi-byte).
 959  * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
 960  * returns false.
 961  */
 962     int
 963 vim_isfilec_or_wc(c)
 964     int c;
 965 {
 966     char_u buf[2];
 967
 968     buf[0] = (char_u)c;
 969     buf[1] = NUL;
 970     return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
 971 }
 972
 973 /*
 974  * return TRUE if 'c' is a printable character
 975  * Assume characters above 0x100 are printable (multi-byte), except for
 976  * Unicode.
 977  */
 978     int
 979 vim_isprintc(c)
 980     int c;
 981 {
 982 #ifdef FEAT_MBYTE
 983     if (enc_utf8 && c >= 0x100)
 984         return utf_printable(c);
 985 #endif
 986     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 987 }
 988
 989 /*
 990  * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
 991  * byte of a double-byte character.
 992  */
 993     int
 994 vim_isprintc_strict(c)
 995     int c;
 996 {
 997 #ifdef FEAT_MBYTE
 998     if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
 999         return FALSE;
1000     if (enc_utf8 && c >= 0x100)
1001         return utf_printable(c);
1002 #endif
1003     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
1004 }
1005
1006 /*
1007  * like chartabsize(), but also check for line breaks on the screen
1008  */
1009     int
1010 lbr_chartabsize(s, col, lnum)
1011     unsigned char       *s;
1012     colnr_T             col;
1013     linenr_T            lnum;
1014 {
1015 #ifdef FEAT_LINEBREAK
1016     if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri)
1017     {
1018 #endif
1019 #ifdef FEAT_MBYTE
1020         if (curwin->w_p_wrap)
1021             return win_nolbr_chartabsize(curwin, s, col, NULL);
1022 #endif
1023         RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1024 #ifdef FEAT_LINEBREAK
1025     }
1026     return win_lbr_chartabsize(curwin, s, col, NULL, lnum);
1027 #endif
1028 }
1029
1030 /*
1031  * Call lbr_chartabsize() and advance the pointer.
1032  */
1033     int
1034 lbr_chartabsize_adv(s, col, lnum)
1035     char_u      **s;
1036     colnr_T     col;
1037     linenr_T    lnum;
1038 {
1039     int         retval;
1040
1041     retval = lbr_chartabsize(*s, col, lnum);
1042     mb_ptr_adv(*s);
1043     return retval;
1044 }
1045
1046 /*
1047  * This function is used very often, keep it fast!!!!
1048  *
1049  * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1050  * string at start of line.  Warning: *headp is only set if it's a non-zero
1051  * value, init to 0 before calling.
1052  *
1053  * linenr argument needed if in visual highlighting and breakindent=on, then
1054  * the line calculated is not current; if 0, normal functionality is preserved.
1055  */
1056     int
1057 win_lbr_chartabsize(wp, s, col, headp, lnum)
1058     win_T       *wp;
1059     char_u      *s;
1060     colnr_T     col;
1061     int         *headp UNUSED;
1062     linenr_T    lnum;
1063 {
1064 #ifdef FEAT_LINEBREAK
1065     int         c;
1066     int         size;
1067     colnr_T     col2;
1068     colnr_T     colmax;
1069     int         added;
1070 # ifdef FEAT_VARTABS
1071     colnr_T     orig_col = col;
1072 # endif
1073 # ifdef FEAT_MBYTE
1074     int         mb_added = 0;
1075 # else
1076 #  define mb_added 0
1077 # endif
1078     int         numberextra;
1079     char_u      *ps;
1080     int         tab_corr = (*s == TAB);
1081     int         n;
1082
1083     /*
1084      * No 'linebreak' and 'showbreak' and 'breakindent': return quickly.
1085      */
1086     if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL)
1087 #endif
1088     {
1089 #ifdef FEAT_MBYTE
1090         if (wp->w_p_wrap)
1091             return win_nolbr_chartabsize(wp, s, col, headp);
1092 #endif
1093         RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1094     }
1095
1096 #ifdef FEAT_LINEBREAK
1097     /*
1098      * First get normal size, without 'linebreak'
1099      */
1100     size = win_chartabsize(wp, s, col);
1101     c = *s;
1102
1103     /*
1104      * If 'linebreak' set check at a blank before a non-blank if the line
1105      * needs a break here
1106      */
1107     if (wp->w_p_lbr
1108             && vim_isbreak(c)
1109             && !vim_isbreak(s[1])
1110             && !wp->w_p_list
1111             && wp->w_p_wrap
1112 # ifdef FEAT_VERTSPLIT
1113             && wp->w_width != 0
1114 # endif
1115        )
1116     {
1117         /*
1118          * Count all characters from first non-blank after a blank up to next
1119          * non-blank after a blank.
1120          */
1121         numberextra = win_col_off(wp);
1122         col2 = col;
1123         colmax = (colnr_T)(W_WIDTH(wp) - numberextra);
1124         if (col >= colmax)
1125         {
1126             n = colmax + win_col_off2(wp);
1127             if (n > 0)
1128                 colmax += (((col - colmax) / n) + 1) * n;
1129         }
1130
1131         for (;;)
1132         {
1133             ps = s;
1134             mb_ptr_adv(s);
1135             c = *s;
1136             if (!(c != NUL
1137                     && (vim_isbreak(c)
1138                         || (!vim_isbreak(c)
1139                             && (col2 == col || !vim_isbreak(*ps))))))
1140                 break;
1141
1142             col2 += win_chartabsize(wp, s, col2);
1143             if (col2 >= colmax)         /* doesn't fit */
1144             {
1145                 size = colmax - col;
1146                 tab_corr = FALSE;
1147                 break;
1148             }
1149         }
1150     }
1151 # ifdef FEAT_MBYTE
1152     else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1153                                     && wp->w_p_wrap && in_win_border(wp, col))
1154     {
1155         ++size;         /* Count the ">" in the last column. */
1156         mb_added = 1;
1157     }
1158 # endif
1159
1160     /*
1161      * May have to add something for 'breakindent' and/or 'showbreak'
1162      * string at start of line.
1163      * Set *headp to the size of what we add.
1164      */
1165     added = 0;
1166     if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
1167     {
1168         numberextra = win_col_off(wp);
1169         col += numberextra + mb_added;
1170         if (col >= (colnr_T)W_WIDTH(wp))
1171         {
1172             col -= W_WIDTH(wp);
1173             numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1174             if (numberextra > 0)
1175                 col = col % numberextra;
1176         }
1177         if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1178         {
1179             added = 0;
1180             if (*p_sbr != NUL)
1181                 added += vim_strsize(p_sbr);
1182             if (wp->w_p_bri)
1183                 added += get_breakindent_win(wp,lnum);
1184
1185             if (tab_corr)
1186             {
1187 # ifdef FEAT_VARTABS
1188                 int ts = tabstop_at(orig_col, wp->w_buffer->b_p_ts,
1189                                               wp->w_buffer->b_p_vts_ary);
1190                 size += (added / ts) * ts;
1191 # else
1192                 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1193 # endif
1194             }
1195             else
1196                 size += added;
1197             if (col != 0)
1198                 added = 0;
1199         }
1200     }
1201     if (headp != NULL)
1202         *headp = added + mb_added;
1203     return size;
1204 #endif
1205 }
1206
1207 #if defined(FEAT_MBYTE) || defined(PROTO)
1208 /*
1209  * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1210  * 'wrap' is on.  This means we need to check for a double-byte character that
1211  * doesn't fit at the end of the screen line.
1212  */
1213     static int
1214 win_nolbr_chartabsize(wp, s, col, headp)
1215     win_T       *wp;
1216     char_u      *s;
1217     colnr_T     col;
1218     int         *headp;
1219 {
1220     int         n;
1221
1222     if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1223     {
1224 # ifdef FEAT_VARTABS
1225         return tabstop_padding(col, wp->w_buffer->b_p_ts,
1226                                     wp->w_buffer->b_p_vts_ary);
1227 # else
1228         n = wp->w_buffer->b_p_ts;
1229         return (int)(n - (col % n));
1230 # endif
1231     }
1232     n = ptr2cells(s);
1233     /* Add one cell for a double-width character in the last column of the
1234      * window, displayed with a ">". */
1235     if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1236     {
1237         if (headp != NULL)
1238             *headp = 1;
1239         return 3;
1240     }
1241     return n;
1242 }
1243
1244 /*
1245  * Return TRUE if virtual column "vcol" is in the rightmost column of window
1246  * "wp".
1247  */
1248     int
1249 in_win_border(wp, vcol)
1250     win_T       *wp;
1251     colnr_T     vcol;
1252 {
1253     int         width1;         /* width of first line (after line number) */
1254     int         width2;         /* width of further lines */
1255
1256 #ifdef FEAT_VERTSPLIT
1257     if (wp->w_width == 0)       /* there is no border */
1258         return FALSE;
1259 #endif
1260     width1 = W_WIDTH(wp) - win_col_off(wp);
1261     if ((int)vcol < width1 - 1)
1262         return FALSE;
1263     if ((int)vcol == width1 - 1)
1264         return TRUE;
1265     width2 = width1 + win_col_off2(wp);
1266     if (width2 <= 0)
1267         return FALSE;
1268     return ((vcol - width1) % width2 == width2 - 1);
1269 }
1270 #endif /* FEAT_MBYTE */
1271
1272 /*
1273  * Get virtual column number of pos.
1274  *  start: on the first position of this character (TAB, ctrl)
1275  * cursor: where the cursor is on this character (first char, except for TAB)
1276  *    end: on the last position of this character (TAB, ctrl)
1277  *
1278  * This is used very often, keep it fast!
1279  */
1280     void
1281 getvcol(wp, pos, start, cursor, end)
1282     win_T       *wp;
1283     pos_T       *pos;
1284     colnr_T     *start;
1285     colnr_T     *cursor;
1286     colnr_T     *end;
1287 {
1288     colnr_T     vcol;
1289     char_u      *ptr;           /* points to current char */
1290     char_u      *posptr;        /* points to char at pos->col */
1291     int         incr;
1292     int         head;
1293 #ifdef FEAT_VARTABS
1294     int         *vts = wp->w_buffer->b_p_vts_ary;
1295 #endif
1296     int         ts = wp->w_buffer->b_p_ts;
1297     int         c;
1298
1299     vcol = 0;
1300     ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1301     if (pos->col == MAXCOL)
1302         posptr = NULL;  /* continue until the NUL */
1303     else
1304         posptr = ptr + pos->col;
1305
1306     /*
1307      * This function is used very often, do some speed optimizations.
1308      * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1309      * use a simple loop.
1310      * Also use this when 'list' is set but tabs take their normal size.
1311      */
1312     if ((!wp->w_p_list || lcs_tab1 != NUL)
1313 #ifdef FEAT_LINEBREAK
1314             && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri
1315 #endif
1316        )
1317     {
1318 #ifndef FEAT_MBYTE
1319         head = 0;
1320 #endif
1321         for (;;)
1322         {
1323 #ifdef FEAT_MBYTE
1324             head = 0;
1325 #endif
1326             c = *ptr;
1327             /* make sure we don't go past the end of the line */
1328             if (c == NUL)
1329             {
1330                 incr = 1;       /* NUL at end of line only takes one column */
1331                 break;
1332             }
1333             /* A tab gets expanded, depending on the current column */
1334             if (c == TAB)
1335 #ifdef FEAT_VARTABS
1336                 incr = tabstop_padding(vcol, ts, vts);
1337 #else
1338                 incr = ts - (vcol % ts);
1339 #endif
1340             else
1341             {
1342 #ifdef FEAT_MBYTE
1343                 if (has_mbyte)
1344                 {
1345                     /* For utf-8, if the byte is >= 0x80, need to look at
1346                      * further bytes to find the cell width. */
1347                     if (enc_utf8 && c >= 0x80)
1348                         incr = utf_ptr2cells(ptr);
1349                     else
1350                         incr = CHARSIZE(c);
1351
1352                     /* If a double-cell char doesn't fit at the end of a line
1353                      * it wraps to the next line, it's like this char is three
1354                      * cells wide. */
1355                     if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1356                             && in_win_border(wp, vcol))
1357                     {
1358                         ++incr;
1359                         head = 1;
1360                     }
1361                 }
1362                 else
1363 #endif
1364                     incr = CHARSIZE(c);
1365             }
1366
1367             if (posptr != NULL && ptr >= posptr) /* character at pos->col */
1368                 break;
1369
1370             vcol += incr;
1371             mb_ptr_adv(ptr);
1372         }
1373     }
1374     else
1375     {
1376         for (;;)
1377         {
1378             /* A tab gets expanded, depending on the current column */
1379             head = 0;
1380             incr = win_lbr_chartabsize(wp, ptr, vcol, &head, pos->lnum);
1381             /* make sure we don't go past the end of the line */
1382             if (*ptr == NUL)
1383             {
1384                 incr = 1;       /* NUL at end of line only takes one column */
1385                 break;
1386             }
1387
1388             if (posptr != NULL && ptr >= posptr) /* character at pos->col */
1389                 break;
1390
1391             vcol += incr;
1392             mb_ptr_adv(ptr);
1393         }
1394     }
1395     if (start != NULL)
1396         *start = vcol + head;
1397     if (end != NULL)
1398         *end = vcol + incr - 1;
1399     if (cursor != NULL)
1400     {
1401         if (*ptr == TAB
1402                 && (State & NORMAL)
1403                 && !wp->w_p_list
1404                 && !virtual_active()
1405 #ifdef FEAT_VISUAL
1406                 && !(VIsual_active
1407                                    && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1408 #endif
1409                 )
1410             *cursor = vcol + incr - 1;      /* cursor at end */
1411         else
1412             *cursor = vcol + head;          /* cursor at start */
1413     }
1414 }
1415
1416 /*
1417  * Get virtual cursor column in the current window, pretending 'list' is off.
1418  */
1419     colnr_T
1420 getvcol_nolist(posp)
1421     pos_T       *posp;
1422 {
1423     int         list_save = curwin->w_p_list;
1424     colnr_T     vcol;
1425
1426     curwin->w_p_list = FALSE;
1427     getvcol(curwin, posp, NULL, &vcol, NULL);
1428     curwin->w_p_list = list_save;
1429     return vcol;
1430 }
1431
1432 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1433 /*
1434  * Get virtual column in virtual mode.
1435  */
1436     void
1437 getvvcol(wp, pos, start, cursor, end)
1438     win_T       *wp;
1439     pos_T       *pos;
1440     colnr_T     *start;
1441     colnr_T     *cursor;
1442     colnr_T     *end;
1443 {
1444     colnr_T     col;
1445     colnr_T     coladd;
1446     colnr_T     endadd;
1447 # ifdef FEAT_MBYTE
1448     char_u      *ptr;
1449 # endif
1450
1451     if (virtual_active())
1452     {
1453         /* For virtual mode, only want one value */
1454         getvcol(wp, pos, &col, NULL, NULL);
1455
1456         coladd = pos->coladd;
1457         endadd = 0;
1458 # ifdef FEAT_MBYTE
1459         /* Cannot put the cursor on part of a wide character. */
1460         ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1461         if (pos->col < (colnr_T)STRLEN(ptr))
1462         {
1463             int c = (*mb_ptr2char)(ptr + pos->col);
1464
1465             if (c != TAB && vim_isprintc(c))
1466             {
1467                 endadd = (colnr_T)(char2cells(c) - 1);
1468                 if (coladd > endadd)    /* past end of line */
1469                     endadd = 0;
1470                 else
1471                     coladd = 0;
1472             }
1473         }
1474 # endif
1475         col += coladd;
1476         if (start != NULL)
1477             *start = col;
1478         if (cursor != NULL)
1479             *cursor = col;
1480         if (end != NULL)
1481             *end = col + endadd;
1482     }
1483     else
1484         getvcol(wp, pos, start, cursor, end);
1485 }
1486 #endif
1487
1488 #if defined(FEAT_VISUAL) || defined(PROTO)
1489 /*
1490  * Get the leftmost and rightmost virtual column of pos1 and pos2.
1491  * Used for Visual block mode.
1492  */
1493     void
1494 getvcols(wp, pos1, pos2, left, right)
1495     win_T       *wp;
1496     pos_T       *pos1, *pos2;
1497     colnr_T     *left, *right;
1498 {
1499     colnr_T     from1, from2, to1, to2;
1500
1501     if (ltp(pos1, pos2))
1502     {
1503         getvvcol(wp, pos1, &from1, NULL, &to1);
1504         getvvcol(wp, pos2, &from2, NULL, &to2);
1505     }
1506     else
1507     {
1508         getvvcol(wp, pos2, &from1, NULL, &to1);
1509         getvvcol(wp, pos1, &from2, NULL, &to2);
1510     }
1511     if (from2 < from1)
1512         *left = from2;
1513     else
1514         *left = from1;
1515     if (to2 > to1)
1516     {
1517         if (*p_sel == 'e' && from2 - 1 >= to1)
1518             *right = from2 - 1;
1519         else
1520             *right = to2;
1521     }
1522     else
1523         *right = to1;
1524 }
1525 #endif
1526
1527 /*
1528  * skipwhite: skip over ' ' and '\t'.
1529  */
1530     char_u *
1531 skipwhite(q)
1532     char_u      *q;
1533 {
1534     char_u      *p = q;
1535
1536     while (vim_iswhite(*p)) /* skip to next non-white */
1537         ++p;
1538     return p;
1539 }
1540
1541 /*
1542  * skip over digits
1543  */
1544     char_u *
1545 skipdigits(q)
1546     char_u      *q;
1547 {
1548     char_u      *p = q;
1549
1550     while (VIM_ISDIGIT(*p))     /* skip to next non-digit */
1551         ++p;
1552     return p;
1553 }
1554
1555 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
1556 /*
1557  * skip over digits and hex characters
1558  */
1559     char_u *
1560 skiphex(q)
1561     char_u      *q;
1562 {
1563     char_u      *p = q;
1564
1565     while (vim_isxdigit(*p))    /* skip to next non-digit */
1566         ++p;
1567     return p;
1568 }
1569 #endif
1570
1571 #if defined(FEAT_EX_EXTRA) || defined(PROTO)
1572 /*
1573  * skip to digit (or NUL after the string)
1574  */
1575     char_u *
1576 skiptodigit(q)
1577     char_u      *q;
1578 {
1579     char_u      *p = q;
1580
1581     while (*p != NUL && !VIM_ISDIGIT(*p))       /* skip to next digit */
1582         ++p;
1583     return p;
1584 }
1585
1586 /*
1587  * skip to hex character (or NUL after the string)
1588  */
1589     char_u *
1590 skiptohex(q)
1591     char_u      *q;
1592 {
1593     char_u      *p = q;
1594
1595     while (*p != NUL && !vim_isxdigit(*p))      /* skip to next digit */
1596         ++p;
1597     return p;
1598 }
1599 #endif
1600
1601 /*
1602  * Variant of isdigit() that can handle characters > 0x100.
1603  * We don't use isdigit() here, because on some systems it also considers
1604  * superscript 1 to be a digit.
1605  * Use the VIM_ISDIGIT() macro for simple arguments.
1606  */
1607     int
1608 vim_isdigit(c)
1609     int         c;
1610 {
1611     return (c >= '0' && c <= '9');
1612 }
1613
1614 /*
1615  * Variant of isxdigit() that can handle characters > 0x100.
1616  * We don't use isxdigit() here, because on some systems it also considers
1617  * superscript 1 to be a digit.
1618  */
1619     int
1620 vim_isxdigit(c)
1621     int         c;
1622 {
1623     return (c >= '0' && c <= '9')
1624         || (c >= 'a' && c <= 'f')
1625         || (c >= 'A' && c <= 'F');
1626 }
1627
1628 #if defined(FEAT_MBYTE) || defined(PROTO)
1629 /*
1630  * Vim's own character class functions.  These exist because many library
1631  * islower()/toupper() etc. do not work properly: they crash when used with
1632  * invalid values or can't handle latin1 when the locale is C.
1633  * Speed is most important here.
1634  */
1635 #define LATIN1LOWER 'l'
1636 #define LATIN1UPPER 'U'
1637
1638 /*                                                                 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~                                  ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
1639 static char_u latin1flags[257] = "                                                                 UUUUUUUUUUUUUUUUUUUUUUUUUU      llllllllllllllllllllllllll                                                                     UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1640 static char_u latin1upper[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1641 static char_u latin1lower[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
1642
1643     int
1644 vim_islower(c)
1645     int     c;
1646 {
1647     if (c <= '@')
1648         return FALSE;
1649     if (c >= 0x80)
1650     {
1651         if (enc_utf8)
1652             return utf_islower(c);
1653         if (c >= 0x100)
1654         {
1655 #ifdef HAVE_ISWLOWER
1656             if (has_mbyte)
1657                 return iswlower(c);
1658 #endif
1659             /* islower() can't handle these chars and may crash */
1660             return FALSE;
1661         }
1662         if (enc_latin1like)
1663             return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1664     }
1665     return islower(c);
1666 }
1667
1668     int
1669 vim_isupper(c)
1670     int     c;
1671 {
1672     if (c <= '@')
1673         return FALSE;
1674     if (c >= 0x80)
1675     {
1676         if (enc_utf8)
1677             return utf_isupper(c);
1678         if (c >= 0x100)
1679         {
1680 #ifdef HAVE_ISWUPPER
1681             if (has_mbyte)
1682                 return iswupper(c);
1683 #endif
1684             /* islower() can't handle these chars and may crash */
1685             return FALSE;
1686         }
1687         if (enc_latin1like)
1688             return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1689     }
1690     return isupper(c);
1691 }
1692
1693     int
1694 vim_toupper(c)
1695     int     c;
1696 {
1697     if (c <= '@')
1698         return c;
1699     if (c >= 0x80)
1700     {
1701         if (enc_utf8)
1702             return utf_toupper(c);
1703         if (c >= 0x100)
1704         {
1705 #ifdef HAVE_TOWUPPER
1706             if (has_mbyte)
1707                 return towupper(c);
1708 #endif
1709             /* toupper() can't handle these chars and may crash */
1710             return c;
1711         }
1712         if (enc_latin1like)
1713             return latin1upper[c];
1714     }
1715     return TOUPPER_LOC(c);
1716 }
1717
1718     int
1719 vim_tolower(c)
1720     int     c;
1721 {
1722     if (c <= '@')
1723         return c;
1724     if (c >= 0x80)
1725     {
1726         if (enc_utf8)
1727             return utf_tolower(c);
1728         if (c >= 0x100)
1729         {
1730 #ifdef HAVE_TOWLOWER
1731             if (has_mbyte)
1732                 return towlower(c);
1733 #endif
1734             /* tolower() can't handle these chars and may crash */
1735             return c;
1736         }
1737         if (enc_latin1like)
1738             return latin1lower[c];
1739     }
1740     return TOLOWER_LOC(c);
1741 }
1742 #endif
1743
1744 /*
1745  * skiptowhite: skip over text until ' ' or '\t' or NUL.
1746  */
1747     char_u *
1748 skiptowhite(p)
1749     char_u      *p;
1750 {
1751     while (*p != ' ' && *p != '\t' && *p != NUL)
1752         ++p;
1753     return p;
1754 }
1755
1756 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1757         || defined(PROTO)
1758 /*
1759  * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1760  */
1761     char_u *
1762 skiptowhite_esc(p)
1763     char_u      *p;
1764 {
1765     while (*p != ' ' && *p != '\t' && *p != NUL)
1766     {
1767         if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1768             ++p;
1769         ++p;
1770     }
1771     return p;
1772 }
1773 #endif
1774
1775 /*
1776  * Getdigits: Get a number from a string and skip over it.
1777  * Note: the argument is a pointer to a char_u pointer!
1778  */
1779     long
1780 getdigits(pp)
1781     char_u **pp;
1782 {
1783     char_u      *p;
1784     long        retval;
1785
1786     p = *pp;
1787     retval = atol((char *)p);
1788     if (*p == '-')              /* skip negative sign */
1789         ++p;
1790     p = skipdigits(p);          /* skip to next non-digit */
1791     *pp = p;
1792     return retval;
1793 }
1794
1795 /*
1796  * Return TRUE if "lbuf" is empty or only contains blanks.
1797  */
1798     int
1799 vim_isblankline(lbuf)
1800     char_u      *lbuf;
1801 {
1802     char_u      *p;
1803
1804     p = skipwhite(lbuf);
1805     return (*p == NUL || *p == '\r' || *p == '\n');
1806 }
1807
1808 /*
1809  * Convert a string into a long and/or unsigned long, taking care of
1810  * hexadecimal and octal numbers.  Accepts a '-' sign.
1811  * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1812  *  0       decimal
1813  *  '0'     octal
1814  *  'X'     hex
1815  *  'x'     hex
1816  * If "len" is not NULL, the length of the number in characters is returned.
1817  * If "nptr" is not NULL, the signed result is returned in it.
1818  * If "unptr" is not NULL, the unsigned result is returned in it.
1819  * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1820  * octal number.
1821  * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
1822  * hex number.
1823  */
1824     void
1825 vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1826     char_u              *start;
1827     int                 *hexp;      /* return: type of number 0 = decimal, 'x'
1828                                        or 'X' is hex, '0' = octal */
1829     int                 *len;       /* return: detected length of number */
1830     int                 dooct;      /* recognize octal number */
1831     int                 dohex;      /* recognize hex number */
1832     long                *nptr;      /* return: signed result */
1833     unsigned long       *unptr;     /* return: unsigned result */
1834 {
1835     char_u          *ptr = start;
1836     int             hex = 0;            /* default is decimal */
1837     int             negative = FALSE;
1838     unsigned long   un = 0;
1839     int             n;
1840
1841     if (ptr[0] == '-')
1842     {
1843         negative = TRUE;
1844         ++ptr;
1845     }
1846
1847     /* Recognize hex and octal. */
1848     if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
1849     {
1850         hex = ptr[1];
1851         if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1852             ptr += 2;                   /* hexadecimal */
1853         else
1854         {
1855             hex = 0;                    /* default is decimal */
1856             if (dooct)
1857             {
1858                 /* Don't interpret "0", "08" or "0129" as octal. */
1859                 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1860                 {
1861                     if (ptr[n] > '7')
1862                     {
1863                         hex = 0;        /* can't be octal */
1864                         break;
1865                     }
1866                     if (ptr[n] > '0')
1867                         hex = '0';      /* assume octal */
1868                 }
1869             }
1870         }
1871     }
1872
1873     /*
1874      * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1875      */
1876     if (hex == '0' || dooct > 1)
1877     {
1878         /* octal */
1879         while ('0' <= *ptr && *ptr <= '7')
1880         {
1881             un = 8 * un + (unsigned long)(*ptr - '0');
1882             ++ptr;
1883         }
1884     }
1885     else if (hex != 0 || dohex > 1)
1886     {
1887         /* hex */
1888         while (vim_isxdigit(*ptr))
1889         {
1890             un = 16 * un + (unsigned long)hex2nr(*ptr);
1891             ++ptr;
1892         }
1893     }
1894     else
1895     {
1896         /* decimal */
1897         while (VIM_ISDIGIT(*ptr))
1898         {
1899             un = 10 * un + (unsigned long)(*ptr - '0');
1900             ++ptr;
1901         }
1902     }
1903
1904     if (hexp != NULL)
1905         *hexp = hex;
1906     if (len != NULL)
1907         *len = (int)(ptr - start);
1908     if (nptr != NULL)
1909     {
1910         if (negative)   /* account for leading '-' for decimal numbers */
1911             *nptr = -(long)un;
1912         else
1913             *nptr = (long)un;
1914     }
1915     if (unptr != NULL)
1916         *unptr = un;
1917 }
1918
1919 /*
1920  * Return the value of a single hex character.
1921  * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1922  */
1923     int
1924 hex2nr(c)
1925     int         c;
1926 {
1927     if (c >= 'a' && c <= 'f')
1928         return c - 'a' + 10;
1929     if (c >= 'A' && c <= 'F')
1930         return c - 'A' + 10;
1931     return c - '0';
1932 }
1933
1934 #if defined(FEAT_TERMRESPONSE) \
1935         || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1936 /*
1937  * Convert two hex characters to a byte.
1938  * Return -1 if one of the characters is not hex.
1939  */
1940     int
1941 hexhex2nr(p)
1942     char_u      *p;
1943 {
1944     if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1945         return -1;
1946     return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1947 }
1948 #endif
1949
1950 /*
1951  * Return TRUE if "str" starts with a backslash that should be removed.
1952  * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1953  * backslash is not a normal file name character.
1954  * '$' is a valid file name character, we don't remove the backslash before
1955  * it.  This means it is not possible to use an environment variable after a
1956  * backslash.  "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1957  * Although "\ name" is valid, the backslash in "Program\ files" must be
1958  * removed.  Assume a file name doesn't start with a space.
1959  * For multi-byte names, never remove a backslash before a non-ascii
1960  * character, assume that all multi-byte characters are valid file name
1961  * characters.
1962  */
1963     int
1964 rem_backslash(str)
1965     char_u  *str;
1966 {
1967 #ifdef BACKSLASH_IN_FILENAME
1968     return (str[0] == '\\'
1969 # ifdef FEAT_MBYTE
1970             && str[1] < 0x80
1971 # endif
1972             && (str[1] == ' '
1973                 || (str[1] != NUL
1974                     && str[1] != '*'
1975                     && str[1] != '?'
1976                     && !vim_isfilec(str[1]))));
1977 #else
1978     return (str[0] == '\\' && str[1] != NUL);
1979 #endif
1980 }
1981
1982 /*
1983  * Halve the number of backslashes in a file name argument.
1984  * For MS-DOS we only do this if the character after the backslash
1985  * is not a normal file character.
1986  */
1987     void
1988 backslash_halve(p)
1989     char_u      *p;
1990 {
1991     for ( ; *p; ++p)
1992         if (rem_backslash(p))
1993             STRMOVE(p, p + 1);
1994 }
1995
1996 /*
1997  * backslash_halve() plus save the result in allocated memory.
1998  */
1999     char_u *
2000 backslash_halve_save(p)
2001     char_u      *p;
2002 {
2003     char_u      *res;
2004
2005     res = vim_strsave(p);
2006     if (res == NULL)
2007         return p;
2008     backslash_halve(res);
2009     return res;
2010 }
2011
2012 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2013 /*
2014  * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2015  * The first 64 entries have been added to map control characters defined in
2016  * ascii.h
2017  */
2018 static char_u ebcdic2ascii_tab[256] =
2019 {
2020     0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2021     0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2022     0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2023     0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2024     0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2025     0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2026     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2027     0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2028     0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2029     0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2030     0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2031     0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2032     0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2033     0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2034     0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2035     0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2036     0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2037     0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2038     0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2039     0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2040     0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2041     0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2042     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2043     0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2044     0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2045     0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2046     0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2047     0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2048     0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2049     0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2050     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2051     0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2052 };
2053
2054 /*
2055  * Convert a buffer worth of characters from EBCDIC to ASCII.  Only useful if
2056  * wanting 7-bit ASCII characters out the other end.
2057  */
2058     void
2059 ebcdic2ascii(buffer, len)
2060     char_u      *buffer;
2061     int         len;
2062 {
2063     int         i;
2064
2065     for (i = 0; i < len; i++)
2066         buffer[i] = ebcdic2ascii_tab[buffer[i]];
2067 }
2068 #endif