src/charset.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * VIM - Vi IMproved    by Bram Moolenaar
   4  *
   5  * Do ":help uganda"  in Vim to read copying and usage conditions.
   6  * Do ":help credits" in Vim to see a list of people who contributed.
   7  * See README.txt for an overview of the Vim source code.
   8  */
   9
  10 #include "vim.h"
  11
  12 #ifdef FEAT_LINEBREAK
  13 static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
  14 #endif
  15
  16 #ifdef FEAT_MBYTE
  17 static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
  18 #endif
  19
  20 static unsigned nr2hex __ARGS((unsigned c));
  21
  22 static int    chartab_initialized = FALSE;
  23
  24 /* b_chartab[] is an array of 32 bytes, each bit representing one of the
  25  * characters 0-255. */
  26 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
  27 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
  28 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
  29
  30 /*
  31  * Fill chartab[].  Also fills curbuf->b_chartab[] with flags for keyword
  32  * characters for current buffer.
  33  *
  34  * Depends on the option settings 'iskeyword', 'isident', 'isfname',
  35  * 'isprint' and 'encoding'.
  36  *
  37  * The index in chartab[] depends on 'encoding':
  38  * - For non-multi-byte index with the byte (same as the character).
  39  * - For DBCS index with the first byte.
  40  * - For UTF-8 index with the character (when first byte is up to 0x80 it is
  41  *   the same as the character, if the first byte is 0x80 and above it depends
  42  *   on further bytes).
  43  *
  44  * The contents of chartab[]:
  45  * - The lower two bits, masked by CT_CELL_MASK, give the number of display
  46  *   cells the character occupies (1 or 2).  Not valid for UTF-8 above 0x80.
  47  * - CT_PRINT_CHAR bit is set when the character is printable (no need to
  48  *   translate the character before displaying it).  Note that only DBCS
  49  *   characters can have 2 display cells and still be printable.
  50  * - CT_FNAME_CHAR bit is set when the character can be in a file name.
  51  * - CT_ID_CHAR bit is set when the character can be in an identifier.
  52  *
  53  * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
  54  * error, OK otherwise.
  55  */
  56     int
  57 init_chartab()
  58 {
  59     return buf_init_chartab(curbuf, TRUE);
  60 }
  61
  62     int
  63 buf_init_chartab(buf, global)
  64     buf_T       *buf;
  65     int         global;         /* FALSE: only set buf->b_chartab[] */
  66 {
  67     int         c;
  68     int         c2;
  69     char_u      *p;
  70     int         i;
  71     int         tilde;
  72     int         do_isalpha;
  73
  74     if (global)
  75     {
  76         /*
  77          * Set the default size for printable characters:
  78          * From <Space> to '~' is 1 (printable), others are 2 (not printable).
  79          * This also inits all 'isident' and 'isfname' flags to FALSE.
  80          *
  81          * EBCDIC: all chars below ' ' are not printable, all others are
  82          * printable.
  83          */
  84         c = 0;
  85         while (c < ' ')
  86             chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
  87 #ifdef EBCDIC
  88         while (c < 255)
  89 #else
  90         while (c <= '~')
  91 #endif
  92             chartab[c++] = 1 + CT_PRINT_CHAR;
  93 #ifdef FEAT_FKMAP
  94         if (p_altkeymap)
  95         {
  96             while (c < YE)
  97                 chartab[c++] = 1 + CT_PRINT_CHAR;
  98         }
  99 #endif
 100         while (c < 256)
 101         {
 102 #ifdef FEAT_MBYTE
 103             /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
 104             if (enc_utf8 && c >= 0xa0)
 105                 chartab[c++] = CT_PRINT_CHAR + 1;
 106             /* euc-jp characters starting with 0x8e are single width */
 107             else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
 108                 chartab[c++] = CT_PRINT_CHAR + 1;
 109             /* other double-byte chars can be printable AND double-width */
 110             else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
 111                 chartab[c++] = CT_PRINT_CHAR + 2;
 112             else
 113 #endif
 114                 /* the rest is unprintable by default */
 115                 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
 116         }
 117
 118 #ifdef FEAT_MBYTE
 119         /* Assume that every multi-byte char is a filename character. */
 120         for (c = 1; c < 256; ++c)
 121             if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
 122                     || (enc_dbcs == DBCS_JPNU && c == 0x8e)
 123                     || (enc_utf8 && c >= 0xa0))
 124                 chartab[c] |= CT_FNAME_CHAR;
 125 #endif
 126     }
 127
 128     /*
 129      * Init word char flags all to FALSE
 130      */
 131     vim_memset(buf->b_chartab, 0, (size_t)32);
 132 #ifdef FEAT_MBYTE
 133     if (enc_dbcs != 0)
 134         for (c = 0; c < 256; ++c)
 135         {
 136             /* double-byte characters are probably word characters */
 137             if (MB_BYTE2LEN(c) == 2)
 138                 SET_CHARTAB(buf, c);
 139         }
 140 #endif
 141
 142 #ifdef FEAT_LISP
 143     /*
 144      * In lisp mode the '-' character is included in keywords.
 145      */
 146     if (buf->b_p_lisp)
 147         SET_CHARTAB(buf, '-');
 148 #endif
 149
 150     /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
 151      * options Each option is a list of characters, character numbers or
 152      * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
 153      */
 154     for (i = global ? 0 : 3; i <= 3; ++i)
 155     {
 156         if (i == 0)
 157             p = p_isi;          /* first round: 'isident' */
 158         else if (i == 1)
 159             p = p_isp;          /* second round: 'isprint' */
 160         else if (i == 2)
 161             p = p_isf;          /* third round: 'isfname' */
 162         else    /* i == 3 */
 163             p = buf->b_p_isk;   /* fourth round: 'iskeyword' */
 164
 165         while (*p)
 166         {
 167             tilde = FALSE;
 168             do_isalpha = FALSE;
 169             if (*p == '^' && p[1] != NUL)
 170             {
 171                 tilde = TRUE;
 172                 ++p;
 173             }
 174             if (VIM_ISDIGIT(*p))
 175                 c = getdigits(&p);
 176             else
 177 #ifdef FEAT_MBYTE
 178                  if (has_mbyte)
 179                 c = mb_ptr2char_adv(&p);
 180             else
 181 #endif
 182                 c = *p++;
 183             c2 = -1;
 184             if (*p == '-' && p[1] != NUL)
 185             {
 186                 ++p;
 187                 if (VIM_ISDIGIT(*p))
 188                     c2 = getdigits(&p);
 189                 else
 190 #ifdef FEAT_MBYTE
 191                      if (has_mbyte)
 192                     c2 = mb_ptr2char_adv(&p);
 193                 else
 194 #endif
 195                     c2 = *p++;
 196             }
 197             if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
 198                                                  || !(*p == NUL || *p == ','))
 199                 return FAIL;
 200
 201             if (c2 == -1)       /* not a range */
 202             {
 203                 /*
 204                  * A single '@' (not "@-@"):
 205                  * Decide on letters being ID/printable/keyword chars with
 206                  * standard function isalpha(). This takes care of locale for
 207                  * single-byte characters).
 208                  */
 209                 if (c == '@')
 210                 {
 211                     do_isalpha = TRUE;
 212                     c = 1;
 213                     c2 = 255;
 214                 }
 215                 else
 216                     c2 = c;
 217             }
 218             while (c <= c2)
 219             {
 220                 /* Use the MB_ functions here, because isalpha() doesn't
 221                  * work properly when 'encoding' is "latin1" and the locale is
 222                  * "C".  */
 223                 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
 224 #ifdef FEAT_FKMAP
 225                         || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
 226 #endif
 227                             )
 228                 {
 229                     if (i == 0)                 /* (re)set ID flag */
 230                     {
 231                         if (tilde)
 232                             chartab[c] &= ~CT_ID_CHAR;
 233                         else
 234                             chartab[c] |= CT_ID_CHAR;
 235                     }
 236                     else if (i == 1)            /* (re)set printable */
 237                     {
 238                         if ((c < ' '
 239 #ifndef EBCDIC
 240                                     || c > '~'
 241 #endif
 242 #ifdef FEAT_FKMAP
 243                                     || (p_altkeymap
 244                                         && (F_isalpha(c) || F_isdigit(c)))
 245 #endif
 246                             )
 247 #ifdef FEAT_MBYTE
 248                                 /* For double-byte we keep the cell width, so
 249                                  * that we can detect it from the first byte. */
 250                                 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
 251 #endif
 252                            )
 253                         {
 254                             if (tilde)
 255                             {
 256                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
 257                                              + ((dy_flags & DY_UHEX) ? 4 : 2);
 258                                 chartab[c] &= ~CT_PRINT_CHAR;
 259                             }
 260                             else
 261                             {
 262                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
 263                                 chartab[c] |= CT_PRINT_CHAR;
 264                             }
 265                         }
 266                     }
 267                     else if (i == 2)            /* (re)set fname flag */
 268                     {
 269                         if (tilde)
 270                             chartab[c] &= ~CT_FNAME_CHAR;
 271                         else
 272                             chartab[c] |= CT_FNAME_CHAR;
 273                     }
 274                     else /* i == 3 */           /* (re)set keyword flag */
 275                     {
 276                         if (tilde)
 277                             RESET_CHARTAB(buf, c);
 278                         else
 279                             SET_CHARTAB(buf, c);
 280                     }
 281                 }
 282                 ++c;
 283             }
 284             p = skip_to_option_part(p);
 285         }
 286     }
 287     chartab_initialized = TRUE;
 288     return OK;
 289 }
 290
 291 /*
 292  * Translate any special characters in buf[bufsize] in-place.
 293  * The result is a string with only printable characters, but if there is not
 294  * enough room, not all characters will be translated.
 295  */
 296     void
 297 trans_characters(buf, bufsize)
 298     char_u      *buf;
 299     int         bufsize;
 300 {
 301     int         len;            /* length of string needing translation */
 302     int         room;           /* room in buffer after string */
 303     char_u      *trs;           /* translated character */
 304     int         trs_len;        /* length of trs[] */
 305
 306     len = (int)STRLEN(buf);
 307     room = bufsize - len;
 308     while (*buf != 0)
 309     {
 310 # ifdef FEAT_MBYTE
 311         /* Assume a multi-byte character doesn't need translation. */
 312         if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
 313             len -= trs_len;
 314         else
 315 # endif
 316         {
 317             trs = transchar_byte(*buf);
 318             trs_len = (int)STRLEN(trs);
 319             if (trs_len > 1)
 320             {
 321                 room -= trs_len - 1;
 322                 if (room <= 0)
 323                     return;
 324                 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
 325             }
 326             mch_memmove(buf, trs, (size_t)trs_len);
 327             --len;
 328         }
 329         buf += trs_len;
 330     }
 331 }
 332
 333 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
 334         || defined(PROTO)
 335 /*
 336  * Translate a string into allocated memory, replacing special chars with
 337  * printable chars.  Returns NULL when out of memory.
 338  */
 339     char_u *
 340 transstr(s)
 341     char_u      *s;
 342 {
 343     char_u      *res;
 344     char_u      *p;
 345 #ifdef FEAT_MBYTE
 346     int         l, len, c;
 347     char_u      hexbuf[11];
 348 #endif
 349
 350 #ifdef FEAT_MBYTE
 351     if (has_mbyte)
 352     {
 353         /* Compute the length of the result, taking account of unprintable
 354          * multi-byte characters. */
 355         len = 0;
 356         p = s;
 357         while (*p != NUL)
 358         {
 359             if ((l = (*mb_ptr2len)(p)) > 1)
 360             {
 361                 c = (*mb_ptr2char)(p);
 362                 p += l;
 363                 if (vim_isprintc(c))
 364                     len += l;
 365                 else
 366                 {
 367                     transchar_hex(hexbuf, c);
 368                     len += (int)STRLEN(hexbuf);
 369                 }
 370             }
 371             else
 372             {
 373                 l = byte2cells(*p++);
 374                 if (l > 0)
 375                     len += l;
 376                 else
 377                     len += 4;   /* illegal byte sequence */
 378             }
 379         }
 380         res = alloc((unsigned)(len + 1));
 381     }
 382     else
 383 #endif
 384         res = alloc((unsigned)(vim_strsize(s) + 1));
 385     if (res != NULL)
 386     {
 387         *res = NUL;
 388         p = s;
 389         while (*p != NUL)
 390         {
 391 #ifdef FEAT_MBYTE
 392             if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
 393             {
 394                 c = (*mb_ptr2char)(p);
 395                 if (vim_isprintc(c))
 396                     STRNCAT(res, p, l); /* append printable multi-byte char */
 397                 else
 398                     transchar_hex(res + STRLEN(res), c);
 399                 p += l;
 400             }
 401             else
 402 #endif
 403                 STRCAT(res, transchar_byte(*p++));
 404         }
 405     }
 406     return res;
 407 }
 408 #endif
 409
 410 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
 411 /*
 412  * Convert the string "str[orglen]" to do ignore-case comparing.  Uses the
 413  * current locale.
 414  * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
 415  * Otherwise puts the result in "buf[buflen]".
 416  */
 417     char_u *
 418 str_foldcase(str, orglen, buf, buflen)
 419     char_u      *str;
 420     int         orglen;
 421     char_u      *buf;
 422     int         buflen;
 423 {
 424     garray_T    ga;
 425     int         i;
 426     int         len = orglen;
 427
 428 #define GA_CHAR(i)  ((char_u *)ga.ga_data)[i]
 429 #define GA_PTR(i)   ((char_u *)ga.ga_data + i)
 430 #define STR_CHAR(i)  (buf == NULL ? GA_CHAR(i) : buf[i])
 431 #define STR_PTR(i)   (buf == NULL ? GA_PTR(i) : buf + i)
 432
 433     /* Copy "str" into "buf" or allocated memory, unmodified. */
 434     if (buf == NULL)
 435     {
 436         ga_init2(&ga, 1, 10);
 437         if (ga_grow(&ga, len + 1) == FAIL)
 438             return NULL;
 439         mch_memmove(ga.ga_data, str, (size_t)len);
 440         ga.ga_len = len;
 441     }
 442     else
 443     {
 444         if (len >= buflen)          /* Ugly! */
 445             len = buflen - 1;
 446         mch_memmove(buf, str, (size_t)len);
 447     }
 448     if (buf == NULL)
 449         GA_CHAR(len) = NUL;
 450     else
 451         buf[len] = NUL;
 452
 453     /* Make each character lower case. */
 454     i = 0;
 455     while (STR_CHAR(i) != NUL)
 456     {
 457 #ifdef FEAT_MBYTE
 458         if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
 459         {
 460             if (enc_utf8)
 461             {
 462                 int     c = utf_ptr2char(STR_PTR(i));
 463                 int     ol = utf_ptr2len(STR_PTR(i));
 464                 int     lc = utf_tolower(c);
 465
 466                 /* Only replace the character when it is not an invalid
 467                  * sequence (ASCII character or more than one byte) and
 468                  * utf_tolower() doesn't return the original character. */
 469                 if ((c < 0x80 || ol > 1) && c != lc)
 470                 {
 471                     int     nl = utf_char2len(lc);
 472
 473                     /* If the byte length changes need to shift the following
 474                      * characters forward or backward. */
 475                     if (ol != nl)
 476                     {
 477                         if (nl > ol)
 478                         {
 479                             if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
 480                                                     : len + nl - ol >= buflen)
 481                             {
 482                                 /* out of memory, keep old char */
 483                                 lc = c;
 484                                 nl = ol;
 485                             }
 486                         }
 487                         if (ol != nl)
 488                         {
 489                             if (buf == NULL)
 490                             {
 491                                 STRMOVE(GA_PTR(i) + nl, GA_PTR(i) + ol);
 492                                 ga.ga_len += nl - ol;
 493                             }
 494                             else
 495                             {
 496                                 STRMOVE(buf + i + nl, buf + i + ol);
 497                                 len += nl - ol;
 498                             }
 499                         }
 500                     }
 501                     (void)utf_char2bytes(lc, STR_PTR(i));
 502                 }
 503             }
 504             /* skip to next multi-byte char */
 505             i += (*mb_ptr2len)(STR_PTR(i));
 506         }
 507         else
 508 #endif
 509         {
 510             if (buf == NULL)
 511                 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
 512             else
 513                 buf[i] = TOLOWER_LOC(buf[i]);
 514             ++i;
 515         }
 516     }
 517
 518     if (buf == NULL)
 519         return (char_u *)ga.ga_data;
 520     return buf;
 521 }
 522 #endif
 523
 524 /*
 525  * Catch 22: chartab[] can't be initialized before the options are
 526  * initialized, and initializing options may cause transchar() to be called!
 527  * When chartab_initialized == FALSE don't use chartab[].
 528  * Does NOT work for multi-byte characters, c must be <= 255.
 529  * Also doesn't work for the first byte of a multi-byte, "c" must be a
 530  * character!
 531  */
 532 static char_u   transchar_buf[7];
 533
 534     char_u *
 535 transchar(c)
 536     int         c;
 537 {
 538     int                 i;
 539
 540     i = 0;
 541     if (IS_SPECIAL(c))      /* special key code, display as ~@ char */
 542     {
 543         transchar_buf[0] = '~';
 544         transchar_buf[1] = '@';
 545         i = 2;
 546         c = K_SECOND(c);
 547     }
 548
 549     if ((!chartab_initialized && (
 550 #ifdef EBCDIC
 551                     (c >= 64 && c < 255)
 552 #else
 553                     (c >= ' ' && c <= '~')
 554 #endif
 555 #ifdef FEAT_FKMAP
 556                         || F_ischar(c)
 557 #endif
 558                 )) || (c < 256 && vim_isprintc_strict(c)))
 559     {
 560         /* printable character */
 561         transchar_buf[i] = c;
 562         transchar_buf[i + 1] = NUL;
 563     }
 564     else
 565         transchar_nonprint(transchar_buf + i, c);
 566     return transchar_buf;
 567 }
 568
 569 #if defined(FEAT_MBYTE) || defined(PROTO)
 570 /*
 571  * Like transchar(), but called with a byte instead of a character.  Checks
 572  * for an illegal UTF-8 byte.
 573  */
 574     char_u *
 575 transchar_byte(c)
 576     int         c;
 577 {
 578     if (enc_utf8 && c >= 0x80)
 579     {
 580         transchar_nonprint(transchar_buf, c);
 581         return transchar_buf;
 582     }
 583     return transchar(c);
 584 }
 585 #endif
 586
 587 /*
 588  * Convert non-printable character to two or more printable characters in
 589  * "buf[]".  "buf" needs to be able to hold five bytes.
 590  * Does NOT work for multi-byte characters, c must be <= 255.
 591  */
 592     void
 593 transchar_nonprint(buf, c)
 594     char_u      *buf;
 595     int         c;
 596 {
 597     if (c == NL)
 598         c = NUL;                /* we use newline in place of a NUL */
 599     else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
 600         c = NL;                 /* we use CR in place of  NL in this case */
 601
 602     if (dy_flags & DY_UHEX)             /* 'display' has "uhex" */
 603         transchar_hex(buf, c);
 604
 605 #ifdef EBCDIC
 606     /* For EBCDIC only the characters 0-63 and 255 are not printable */
 607     else if (CtrlChar(c) != 0 || c == DEL)
 608 #else
 609     else if (c <= 0x7f)                         /* 0x00 - 0x1f and 0x7f */
 610 #endif
 611     {
 612         buf[0] = '^';
 613 #ifdef EBCDIC
 614         if (c == DEL)
 615             buf[1] = '?';               /* DEL displayed as ^? */
 616         else
 617             buf[1] = CtrlChar(c);
 618 #else
 619         buf[1] = c ^ 0x40;              /* DEL displayed as ^? */
 620 #endif
 621
 622         buf[2] = NUL;
 623     }
 624 #ifdef FEAT_MBYTE
 625     else if (enc_utf8 && c >= 0x80)
 626     {
 627         transchar_hex(buf, c);
 628     }
 629 #endif
 630 #ifndef EBCDIC
 631     else if (c >= ' ' + 0x80 && c <= '~' + 0x80)    /* 0xa0 - 0xfe */
 632     {
 633         buf[0] = '|';
 634         buf[1] = c - 0x80;
 635         buf[2] = NUL;
 636     }
 637 #else
 638     else if (c < 64)
 639     {
 640         buf[0] = '~';
 641         buf[1] = MetaChar(c);
 642         buf[2] = NUL;
 643     }
 644 #endif
 645     else                                            /* 0x80 - 0x9f and 0xff */
 646     {
 647         /*
 648          * TODO: EBCDIC I don't know what to do with this chars, so I display
 649          * them as '~?' for now
 650          */
 651         buf[0] = '~';
 652 #ifdef EBCDIC
 653         buf[1] = '?';                   /* 0xff displayed as ~? */
 654 #else
 655         buf[1] = (c - 0x80) ^ 0x40;     /* 0xff displayed as ~? */
 656 #endif
 657         buf[2] = NUL;
 658     }
 659 }
 660
 661     void
 662 transchar_hex(buf, c)
 663     char_u      *buf;
 664     int         c;
 665 {
 666     int         i = 0;
 667
 668     buf[0] = '<';
 669 #ifdef FEAT_MBYTE
 670     if (c > 255)
 671     {
 672         buf[++i] = nr2hex((unsigned)c >> 12);
 673         buf[++i] = nr2hex((unsigned)c >> 8);
 674     }
 675 #endif
 676     buf[++i] = nr2hex((unsigned)c >> 4);
 677     buf[++i] = nr2hex((unsigned)c);
 678     buf[++i] = '>';
 679     buf[++i] = NUL;
 680 }
 681
 682 /*
 683  * Convert the lower 4 bits of byte "c" to its hex character.
 684  * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
 685  * function key 1.
 686  */
 687     static unsigned
 688 nr2hex(c)
 689     unsigned    c;
 690 {
 691     if ((c & 0xf) <= 9)
 692         return (c & 0xf) + '0';
 693     return (c & 0xf) - 10 + 'a';
 694 }
 695
 696 /*
 697  * Return number of display cells occupied by byte "b".
 698  * Caller must make sure 0 <= b <= 255.
 699  * For multi-byte mode "b" must be the first byte of a character.
 700  * A TAB is counted as two cells: "^I".
 701  * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
 702  * cells depends on further bytes.
 703  */
 704     int
 705 byte2cells(b)
 706     int         b;
 707 {
 708 #ifdef FEAT_MBYTE
 709     if (enc_utf8 && b >= 0x80)
 710         return 0;
 711 #endif
 712     return (chartab[b] & CT_CELL_MASK);
 713 }
 714
 715 /*
 716  * Return number of display cells occupied by character "c".
 717  * "c" can be a special key (negative number) in which case 3 or 4 is returned.
 718  * A TAB is counted as two cells: "^I" or four: "<09>".
 719  */
 720     int
 721 char2cells(c)
 722     int         c;
 723 {
 724     if (IS_SPECIAL(c))
 725         return char2cells(K_SECOND(c)) + 2;
 726 #ifdef FEAT_MBYTE
 727     if (c >= 0x80)
 728     {
 729         /* UTF-8: above 0x80 need to check the value */
 730         if (enc_utf8)
 731             return utf_char2cells(c);
 732         /* DBCS: double-byte means double-width, except for euc-jp with first
 733          * byte 0x8e */
 734         if (enc_dbcs != 0 && c >= 0x100)
 735         {
 736             if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
 737                 return 1;
 738             return 2;
 739         }
 740     }
 741 #endif
 742     return (chartab[c & 0xff] & CT_CELL_MASK);
 743 }
 744
 745 /*
 746  * Return number of display cells occupied by character at "*p".
 747  * A TAB is counted as two cells: "^I" or four: "<09>".
 748  */
 749     int
 750 ptr2cells(p)
 751     char_u      *p;
 752 {
 753 #ifdef FEAT_MBYTE
 754     /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
 755     if (enc_utf8 && *p >= 0x80)
 756         return utf_ptr2cells(p);
 757     /* For DBCS we can tell the cell count from the first byte. */
 758 #endif
 759     return (chartab[*p] & CT_CELL_MASK);
 760 }
 761
 762 /*
 763  * Return the number of characters string "s" will take on the screen,
 764  * counting TABs as two characters: "^I".
 765  */
 766     int
 767 vim_strsize(s)
 768     char_u      *s;
 769 {
 770     return vim_strnsize(s, (int)MAXCOL);
 771 }
 772
 773 /*
 774  * Return the number of characters string "s[len]" will take on the screen,
 775  * counting TABs as two characters: "^I".
 776  */
 777     int
 778 vim_strnsize(s, len)
 779     char_u      *s;
 780     int         len;
 781 {
 782     int         size = 0;
 783
 784     while (*s != NUL && --len >= 0)
 785     {
 786 #ifdef FEAT_MBYTE
 787         if (has_mbyte)
 788         {
 789             int     l = (*mb_ptr2len)(s);
 790
 791             size += ptr2cells(s);
 792             s += l;
 793             len -= l - 1;
 794         }
 795         else
 796 #endif
 797             size += byte2cells(*s++);
 798     }
 799     return size;
 800 }
 801
 802 /*
 803  * Return the number of characters 'c' will take on the screen, taking
 804  * into account the size of a tab.
 805  * Use a define to make it fast, this is used very often!!!
 806  * Also see getvcol() below.
 807  */
 808
 809 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
 810     if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
 811     { \
 812         int ts; \
 813         ts = (buf)->b_p_ts; \
 814         return (int)(ts - (col % ts)); \
 815     } \
 816     else \
 817         return ptr2cells(p);
 818
 819 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
 820         || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
 821     int
 822 chartabsize(p, col)
 823     char_u      *p;
 824     colnr_T     col;
 825 {
 826     RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
 827 }
 828 #endif
 829
 830 #ifdef FEAT_LINEBREAK
 831     static int
 832 win_chartabsize(wp, p, col)
 833     win_T       *wp;
 834     char_u      *p;
 835     colnr_T     col;
 836 {
 837     RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
 838 }
 839 #endif
 840
 841 /*
 842  * return the number of characters the string 's' will take on the screen,
 843  * taking into account the size of a tab
 844  */
 845     int
 846 linetabsize(s)
 847     char_u      *s;
 848 {
 849     colnr_T     col = 0;
 850
 851     while (*s != NUL)
 852         col += lbr_chartabsize_adv(&s, col);
 853     return (int)col;
 854 }
 855
 856 /*
 857  * Like linetabsize(), but for a given window instead of the current one.
 858  */
 859     int
 860 win_linetabsize(wp, p, len)
 861     win_T       *wp;
 862     char_u      *p;
 863     colnr_T     len;
 864 {
 865     colnr_T     col = 0;
 866     char_u      *s;
 867
 868     for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
 869         col += win_lbr_chartabsize(wp, s, col, NULL);
 870     return (int)col;
 871 }
 872
 873 /*
 874  * Return TRUE if 'c' is a normal identifier character:
 875  * Letters and characters from the 'isident' option.
 876  */
 877     int
 878 vim_isIDc(c)
 879     int c;
 880 {
 881     return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
 882 }
 883
 884 /*
 885  * return TRUE if 'c' is a keyword character: Letters and characters from
 886  * 'iskeyword' option for current buffer.
 887  * For multi-byte characters mb_get_class() is used (builtin rules).
 888  */
 889     int
 890 vim_iswordc(c)
 891     int c;
 892 {
 893 #ifdef FEAT_MBYTE
 894     if (c >= 0x100)
 895     {
 896         if (enc_dbcs != 0)
 897             return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
 898         if (enc_utf8)
 899             return utf_class(c) >= 2;
 900     }
 901 #endif
 902     return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
 903 }
 904
 905 /*
 906  * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
 907  */
 908     int
 909 vim_iswordp(p)
 910     char_u *p;
 911 {
 912 #ifdef FEAT_MBYTE
 913     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 914         return mb_get_class(p) >= 2;
 915 #endif
 916     return GET_CHARTAB(curbuf, *p) != 0;
 917 }
 918
 919 #if defined(FEAT_SYN_HL) || defined(PROTO)
 920     int
 921 vim_iswordc_buf(p, buf)
 922     char_u      *p;
 923     buf_T       *buf;
 924 {
 925 # ifdef FEAT_MBYTE
 926     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 927         return mb_get_class(p) >= 2;
 928 # endif
 929     return (GET_CHARTAB(buf, *p) != 0);
 930 }
 931 #endif
 932
 933 /*
 934  * return TRUE if 'c' is a valid file-name character
 935  * Assume characters above 0x100 are valid (multi-byte).
 936  */
 937     int
 938 vim_isfilec(c)
 939     int c;
 940 {
 941     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
 942 }
 943
 944 /*
 945  * return TRUE if 'c' is a valid file-name character or a wildcard character
 946  * Assume characters above 0x100 are valid (multi-byte).
 947  * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
 948  * returns false.
 949  */
 950     int
 951 vim_isfilec_or_wc(c)
 952     int c;
 953 {
 954     char_u buf[2];
 955
 956     buf[0] = (char_u)c;
 957     buf[1] = NUL;
 958     return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
 959 }
 960
 961 /*
 962  * return TRUE if 'c' is a printable character
 963  * Assume characters above 0x100 are printable (multi-byte), except for
 964  * Unicode.
 965  */
 966     int
 967 vim_isprintc(c)
 968     int c;
 969 {
 970 #ifdef FEAT_MBYTE
 971     if (enc_utf8 && c >= 0x100)
 972         return utf_printable(c);
 973 #endif
 974     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 975 }
 976
 977 /*
 978  * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
 979  * byte of a double-byte character.
 980  */
 981     int
 982 vim_isprintc_strict(c)
 983     int c;
 984 {
 985 #ifdef FEAT_MBYTE
 986     if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
 987         return FALSE;
 988     if (enc_utf8 && c >= 0x100)
 989         return utf_printable(c);
 990 #endif
 991     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 992 }
 993
 994 /*
 995  * like chartabsize(), but also check for line breaks on the screen
 996  */
 997     int
 998 lbr_chartabsize(s, col)
 999     unsigned char       *s;
1000     colnr_T             col;
1001 {
1002 #ifdef FEAT_LINEBREAK
1003     if (!curwin->w_p_lbr && *p_sbr == NUL)
1004     {
1005 #endif
1006 #ifdef FEAT_MBYTE
1007         if (curwin->w_p_wrap)
1008             return win_nolbr_chartabsize(curwin, s, col, NULL);
1009 #endif
1010         RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1011 #ifdef FEAT_LINEBREAK
1012     }
1013     return win_lbr_chartabsize(curwin, s, col, NULL);
1014 #endif
1015 }
1016
1017 /*
1018  * Call lbr_chartabsize() and advance the pointer.
1019  */
1020     int
1021 lbr_chartabsize_adv(s, col)
1022     char_u      **s;
1023     colnr_T     col;
1024 {
1025     int         retval;
1026
1027     retval = lbr_chartabsize(*s, col);
1028     mb_ptr_adv(*s);
1029     return retval;
1030 }
1031
1032 /*
1033  * This function is used very often, keep it fast!!!!
1034  *
1035  * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1036  * string at start of line.  Warning: *headp is only set if it's a non-zero
1037  * value, init to 0 before calling.
1038  */
1039     int
1040 win_lbr_chartabsize(wp, s, col, headp)
1041     win_T       *wp;
1042     char_u      *s;
1043     colnr_T     col;
1044     int         *headp UNUSED;
1045 {
1046 #ifdef FEAT_LINEBREAK
1047     int         c;
1048     int         size;
1049     colnr_T     col2;
1050     colnr_T     colmax;
1051     int         added;
1052 # ifdef FEAT_MBYTE
1053     int         mb_added = 0;
1054 # else
1055 #  define mb_added 0
1056 # endif
1057     int         numberextra;
1058     char_u      *ps;
1059     int         tab_corr = (*s == TAB);
1060     int         n;
1061
1062     /*
1063      * No 'linebreak' and 'showbreak': return quickly.
1064      */
1065     if (!wp->w_p_lbr && *p_sbr == NUL)
1066 #endif
1067     {
1068 #ifdef FEAT_MBYTE
1069         if (wp->w_p_wrap)
1070             return win_nolbr_chartabsize(wp, s, col, headp);
1071 #endif
1072         RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1073     }
1074
1075 #ifdef FEAT_LINEBREAK
1076     /*
1077      * First get normal size, without 'linebreak'
1078      */
1079     size = win_chartabsize(wp, s, col);
1080     c = *s;
1081
1082     /*
1083      * If 'linebreak' set check at a blank before a non-blank if the line
1084      * needs a break here
1085      */
1086     if (wp->w_p_lbr
1087             && vim_isbreak(c)
1088             && !vim_isbreak(s[1])
1089             && !wp->w_p_list
1090             && wp->w_p_wrap
1091 # ifdef FEAT_VERTSPLIT
1092             && wp->w_width != 0
1093 # endif
1094        )
1095     {
1096         /*
1097          * Count all characters from first non-blank after a blank up to next
1098          * non-blank after a blank.
1099          */
1100         numberextra = win_col_off(wp);
1101         col2 = col;
1102         colmax = (colnr_T)(W_WIDTH(wp) - numberextra);
1103         if (col >= colmax)
1104         {
1105             n = colmax + win_col_off2(wp);
1106             if (n > 0)
1107                 colmax += (((col - colmax) / n) + 1) * n;
1108         }
1109
1110         for (;;)
1111         {
1112             ps = s;
1113             mb_ptr_adv(s);
1114             c = *s;
1115             if (!(c != NUL
1116                     && (vim_isbreak(c)
1117                         || (!vim_isbreak(c)
1118                             && (col2 == col || !vim_isbreak(*ps))))))
1119                 break;
1120
1121             col2 += win_chartabsize(wp, s, col2);
1122             if (col2 >= colmax)         /* doesn't fit */
1123             {
1124                 size = colmax - col;
1125                 tab_corr = FALSE;
1126                 break;
1127             }
1128         }
1129     }
1130 # ifdef FEAT_MBYTE
1131     else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1132                                     && wp->w_p_wrap && in_win_border(wp, col))
1133     {
1134         ++size;         /* Count the ">" in the last column. */
1135         mb_added = 1;
1136     }
1137 # endif
1138
1139     /*
1140      * May have to add something for 'showbreak' string at start of line
1141      * Set *headp to the size of what we add.
1142      */
1143     added = 0;
1144     if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1145     {
1146         numberextra = win_col_off(wp);
1147         col += numberextra + mb_added;
1148         if (col >= (colnr_T)W_WIDTH(wp))
1149         {
1150             col -= W_WIDTH(wp);
1151             numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1152             if (numberextra > 0)
1153                 col = col % numberextra;
1154         }
1155         if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1156         {
1157             added = vim_strsize(p_sbr);
1158             if (tab_corr)
1159                 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1160             else
1161                 size += added;
1162             if (col != 0)
1163                 added = 0;
1164         }
1165     }
1166     if (headp != NULL)
1167         *headp = added + mb_added;
1168     return size;
1169 #endif
1170 }
1171
1172 #if defined(FEAT_MBYTE) || defined(PROTO)
1173 /*
1174  * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1175  * 'wrap' is on.  This means we need to check for a double-byte character that
1176  * doesn't fit at the end of the screen line.
1177  */
1178     static int
1179 win_nolbr_chartabsize(wp, s, col, headp)
1180     win_T       *wp;
1181     char_u      *s;
1182     colnr_T     col;
1183     int         *headp;
1184 {
1185     int         n;
1186
1187     if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1188     {
1189         n = wp->w_buffer->b_p_ts;
1190         return (int)(n - (col % n));
1191     }
1192     n = ptr2cells(s);
1193     /* Add one cell for a double-width character in the last column of the
1194      * window, displayed with a ">". */
1195     if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1196     {
1197         if (headp != NULL)
1198             *headp = 1;
1199         return 3;
1200     }
1201     return n;
1202 }
1203
1204 /*
1205  * Return TRUE if virtual column "vcol" is in the rightmost column of window
1206  * "wp".
1207  */
1208     int
1209 in_win_border(wp, vcol)
1210     win_T       *wp;
1211     colnr_T     vcol;
1212 {
1213     int         width1;         /* width of first line (after line number) */
1214     int         width2;         /* width of further lines */
1215
1216 #ifdef FEAT_VERTSPLIT
1217     if (wp->w_width == 0)       /* there is no border */
1218         return FALSE;
1219 #endif
1220     width1 = W_WIDTH(wp) - win_col_off(wp);
1221     if ((int)vcol < width1 - 1)
1222         return FALSE;
1223     if ((int)vcol == width1 - 1)
1224         return TRUE;
1225     width2 = width1 + win_col_off2(wp);
1226     if (width2 <= 0)
1227         return FALSE;
1228     return ((vcol - width1) % width2 == width2 - 1);
1229 }
1230 #endif /* FEAT_MBYTE */
1231
1232 /*
1233  * Get virtual column number of pos.
1234  *  start: on the first position of this character (TAB, ctrl)
1235  * cursor: where the cursor is on this character (first char, except for TAB)
1236  *    end: on the last position of this character (TAB, ctrl)
1237  *
1238  * This is used very often, keep it fast!
1239  */
1240     void
1241 getvcol(wp, pos, start, cursor, end)
1242     win_T       *wp;
1243     pos_T       *pos;
1244     colnr_T     *start;
1245     colnr_T     *cursor;
1246     colnr_T     *end;
1247 {
1248     colnr_T     vcol;
1249     char_u      *ptr;           /* points to current char */
1250     char_u      *posptr;        /* points to char at pos->col */
1251     int         incr;
1252     int         head;
1253     int         ts = wp->w_buffer->b_p_ts;
1254     int         c;
1255
1256     vcol = 0;
1257     ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1258     posptr = ptr + pos->col;
1259
1260     /*
1261      * This function is used very often, do some speed optimizations.
1262      * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1263      * Also use this when 'list' is set but tabs take their normal size.
1264      */
1265     if ((!wp->w_p_list || lcs_tab1 != NUL)
1266 #ifdef FEAT_LINEBREAK
1267             && !wp->w_p_lbr && *p_sbr == NUL
1268 #endif
1269        )
1270     {
1271 #ifndef FEAT_MBYTE
1272         head = 0;
1273 #endif
1274         for (;;)
1275         {
1276 #ifdef FEAT_MBYTE
1277             head = 0;
1278 #endif
1279             c = *ptr;
1280             /* make sure we don't go past the end of the line */
1281             if (c == NUL)
1282             {
1283                 incr = 1;       /* NUL at end of line only takes one column */
1284                 break;
1285             }
1286             /* A tab gets expanded, depending on the current column */
1287             if (c == TAB)
1288                 incr = ts - (vcol % ts);
1289             else
1290             {
1291 #ifdef FEAT_MBYTE
1292                 if (has_mbyte)
1293                 {
1294                     /* For utf-8, if the byte is >= 0x80, need to look at
1295                      * further bytes to find the cell width. */
1296                     if (enc_utf8 && c >= 0x80)
1297                         incr = utf_ptr2cells(ptr);
1298                     else
1299                         incr = CHARSIZE(c);
1300
1301                     /* If a double-cell char doesn't fit at the end of a line
1302                      * it wraps to the next line, it's like this char is three
1303                      * cells wide. */
1304                     if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1305                             && in_win_border(wp, vcol))
1306                     {
1307                         ++incr;
1308                         head = 1;
1309                     }
1310                 }
1311                 else
1312 #endif
1313                     incr = CHARSIZE(c);
1314             }
1315
1316             if (ptr >= posptr)  /* character at pos->col */
1317                 break;
1318
1319             vcol += incr;
1320             mb_ptr_adv(ptr);
1321         }
1322     }
1323     else
1324     {
1325         for (;;)
1326         {
1327             /* A tab gets expanded, depending on the current column */
1328             head = 0;
1329             incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1330             /* make sure we don't go past the end of the line */
1331             if (*ptr == NUL)
1332             {
1333                 incr = 1;       /* NUL at end of line only takes one column */
1334                 break;
1335             }
1336
1337             if (ptr >= posptr)  /* character at pos->col */
1338                 break;
1339
1340             vcol += incr;
1341             mb_ptr_adv(ptr);
1342         }
1343     }
1344     if (start != NULL)
1345         *start = vcol + head;
1346     if (end != NULL)
1347         *end = vcol + incr - 1;
1348     if (cursor != NULL)
1349     {
1350         if (*ptr == TAB
1351                 && (State & NORMAL)
1352                 && !wp->w_p_list
1353                 && !virtual_active()
1354 #ifdef FEAT_VISUAL
1355                 && !(VIsual_active
1356                                    && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1357 #endif
1358                 )
1359             *cursor = vcol + incr - 1;      /* cursor at end */
1360         else
1361             *cursor = vcol + head;          /* cursor at start */
1362     }
1363 }
1364
1365 /*
1366  * Get virtual cursor column in the current window, pretending 'list' is off.
1367  */
1368     colnr_T
1369 getvcol_nolist(posp)
1370     pos_T       *posp;
1371 {
1372     int         list_save = curwin->w_p_list;
1373     colnr_T     vcol;
1374
1375     curwin->w_p_list = FALSE;
1376     getvcol(curwin, posp, NULL, &vcol, NULL);
1377     curwin->w_p_list = list_save;
1378     return vcol;
1379 }
1380
1381 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1382 /*
1383  * Get virtual column in virtual mode.
1384  */
1385     void
1386 getvvcol(wp, pos, start, cursor, end)
1387     win_T       *wp;
1388     pos_T       *pos;
1389     colnr_T     *start;
1390     colnr_T     *cursor;
1391     colnr_T     *end;
1392 {
1393     colnr_T     col;
1394     colnr_T     coladd;
1395     colnr_T     endadd;
1396 # ifdef FEAT_MBYTE
1397     char_u      *ptr;
1398 # endif
1399
1400     if (virtual_active())
1401     {
1402         /* For virtual mode, only want one value */
1403         getvcol(wp, pos, &col, NULL, NULL);
1404
1405         coladd = pos->coladd;
1406         endadd = 0;
1407 # ifdef FEAT_MBYTE
1408         /* Cannot put the cursor on part of a wide character. */
1409         ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1410         if (pos->col < (colnr_T)STRLEN(ptr))
1411         {
1412             int c = (*mb_ptr2char)(ptr + pos->col);
1413
1414             if (c != TAB && vim_isprintc(c))
1415             {
1416                 endadd = (colnr_T)(char2cells(c) - 1);
1417                 if (coladd > endadd)    /* past end of line */
1418                     endadd = 0;
1419                 else
1420                     coladd = 0;
1421             }
1422         }
1423 # endif
1424         col += coladd;
1425         if (start != NULL)
1426             *start = col;
1427         if (cursor != NULL)
1428             *cursor = col;
1429         if (end != NULL)
1430             *end = col + endadd;
1431     }
1432     else
1433         getvcol(wp, pos, start, cursor, end);
1434 }
1435 #endif
1436
1437 #if defined(FEAT_VISUAL) || defined(PROTO)
1438 /*
1439  * Get the leftmost and rightmost virtual column of pos1 and pos2.
1440  * Used for Visual block mode.
1441  */
1442     void
1443 getvcols(wp, pos1, pos2, left, right)
1444     win_T       *wp;
1445     pos_T       *pos1, *pos2;
1446     colnr_T     *left, *right;
1447 {
1448     colnr_T     from1, from2, to1, to2;
1449
1450     if (ltp(pos1, pos2))
1451     {
1452         getvvcol(wp, pos1, &from1, NULL, &to1);
1453         getvvcol(wp, pos2, &from2, NULL, &to2);
1454     }
1455     else
1456     {
1457         getvvcol(wp, pos2, &from1, NULL, &to1);
1458         getvvcol(wp, pos1, &from2, NULL, &to2);
1459     }
1460     if (from2 < from1)
1461         *left = from2;
1462     else
1463         *left = from1;
1464     if (to2 > to1)
1465     {
1466         if (*p_sel == 'e' && from2 - 1 >= to1)
1467             *right = from2 - 1;
1468         else
1469             *right = to2;
1470     }
1471     else
1472         *right = to1;
1473 }
1474 #endif
1475
1476 /*
1477  * skipwhite: skip over ' ' and '\t'.
1478  */
1479     char_u *
1480 skipwhite(q)
1481     char_u      *q;
1482 {
1483     char_u      *p = q;
1484
1485     while (vim_iswhite(*p)) /* skip to next non-white */
1486         ++p;
1487     return p;
1488 }
1489
1490 /*
1491  * skip over digits
1492  */
1493     char_u *
1494 skipdigits(q)
1495     char_u      *q;
1496 {
1497     char_u      *p = q;
1498
1499     while (VIM_ISDIGIT(*p))     /* skip to next non-digit */
1500         ++p;
1501     return p;
1502 }
1503
1504 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
1505 /*
1506  * skip over digits and hex characters
1507  */
1508     char_u *
1509 skiphex(q)
1510     char_u      *q;
1511 {
1512     char_u      *p = q;
1513
1514     while (vim_isxdigit(*p))    /* skip to next non-digit */
1515         ++p;
1516     return p;
1517 }
1518 #endif
1519
1520 #if defined(FEAT_EX_EXTRA) || defined(PROTO)
1521 /*
1522  * skip to digit (or NUL after the string)
1523  */
1524     char_u *
1525 skiptodigit(q)
1526     char_u      *q;
1527 {
1528     char_u      *p = q;
1529
1530     while (*p != NUL && !VIM_ISDIGIT(*p))       /* skip to next digit */
1531         ++p;
1532     return p;
1533 }
1534
1535 /*
1536  * skip to hex character (or NUL after the string)
1537  */
1538     char_u *
1539 skiptohex(q)
1540     char_u      *q;
1541 {
1542     char_u      *p = q;
1543
1544     while (*p != NUL && !vim_isxdigit(*p))      /* skip to next digit */
1545         ++p;
1546     return p;
1547 }
1548 #endif
1549
1550 /*
1551  * Variant of isdigit() that can handle characters > 0x100.
1552  * We don't use isdigit() here, because on some systems it also considers
1553  * superscript 1 to be a digit.
1554  * Use the VIM_ISDIGIT() macro for simple arguments.
1555  */
1556     int
1557 vim_isdigit(c)
1558     int         c;
1559 {
1560     return (c >= '0' && c <= '9');
1561 }
1562
1563 /*
1564  * Variant of isxdigit() that can handle characters > 0x100.
1565  * We don't use isxdigit() here, because on some systems it also considers
1566  * superscript 1 to be a digit.
1567  */
1568     int
1569 vim_isxdigit(c)
1570     int         c;
1571 {
1572     return (c >= '0' && c <= '9')
1573         || (c >= 'a' && c <= 'f')
1574         || (c >= 'A' && c <= 'F');
1575 }
1576
1577 #if defined(FEAT_MBYTE) || defined(PROTO)
1578 /*
1579  * Vim's own character class functions.  These exist because many library
1580  * islower()/toupper() etc. do not work properly: they crash when used with
1581  * invalid values or can't handle latin1 when the locale is C.
1582  * Speed is most important here.
1583  */
1584 #define LATIN1LOWER 'l'
1585 #define LATIN1UPPER 'U'
1586
1587 /*                                                                 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~                                  ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
1588 static char_u latin1flags[257] = "                                                                 UUUUUUUUUUUUUUUUUUUUUUUUUU      llllllllllllllllllllllllll                                                                     UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1589 static char_u latin1upper[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1590 static char_u latin1lower[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
1591
1592     int
1593 vim_islower(c)
1594     int     c;
1595 {
1596     if (c <= '@')
1597         return FALSE;
1598     if (c >= 0x80)
1599     {
1600         if (enc_utf8)
1601             return utf_islower(c);
1602         if (c >= 0x100)
1603         {
1604 #ifdef HAVE_ISWLOWER
1605             if (has_mbyte)
1606                 return iswlower(c);
1607 #endif
1608             /* islower() can't handle these chars and may crash */
1609             return FALSE;
1610         }
1611         if (enc_latin1like)
1612             return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1613     }
1614     return islower(c);
1615 }
1616
1617     int
1618 vim_isupper(c)
1619     int     c;
1620 {
1621     if (c <= '@')
1622         return FALSE;
1623     if (c >= 0x80)
1624     {
1625         if (enc_utf8)
1626             return utf_isupper(c);
1627         if (c >= 0x100)
1628         {
1629 #ifdef HAVE_ISWUPPER
1630             if (has_mbyte)
1631                 return iswupper(c);
1632 #endif
1633             /* islower() can't handle these chars and may crash */
1634             return FALSE;
1635         }
1636         if (enc_latin1like)
1637             return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1638     }
1639     return isupper(c);
1640 }
1641
1642     int
1643 vim_toupper(c)
1644     int     c;
1645 {
1646     if (c <= '@')
1647         return c;
1648     if (c >= 0x80)
1649     {
1650         if (enc_utf8)
1651             return utf_toupper(c);
1652         if (c >= 0x100)
1653         {
1654 #ifdef HAVE_TOWUPPER
1655             if (has_mbyte)
1656                 return towupper(c);
1657 #endif
1658             /* toupper() can't handle these chars and may crash */
1659             return c;
1660         }
1661         if (enc_latin1like)
1662             return latin1upper[c];
1663     }
1664     return TOUPPER_LOC(c);
1665 }
1666
1667     int
1668 vim_tolower(c)
1669     int     c;
1670 {
1671     if (c <= '@')
1672         return c;
1673     if (c >= 0x80)
1674     {
1675         if (enc_utf8)
1676             return utf_tolower(c);
1677         if (c >= 0x100)
1678         {
1679 #ifdef HAVE_TOWLOWER
1680             if (has_mbyte)
1681                 return towlower(c);
1682 #endif
1683             /* tolower() can't handle these chars and may crash */
1684             return c;
1685         }
1686         if (enc_latin1like)
1687             return latin1lower[c];
1688     }
1689     return TOLOWER_LOC(c);
1690 }
1691 #endif
1692
1693 /*
1694  * skiptowhite: skip over text until ' ' or '\t' or NUL.
1695  */
1696     char_u *
1697 skiptowhite(p)
1698     char_u      *p;
1699 {
1700     while (*p != ' ' && *p != '\t' && *p != NUL)
1701         ++p;
1702     return p;
1703 }
1704
1705 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1706         || defined(PROTO)
1707 /*
1708  * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1709  */
1710     char_u *
1711 skiptowhite_esc(p)
1712     char_u      *p;
1713 {
1714     while (*p != ' ' && *p != '\t' && *p != NUL)
1715     {
1716         if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1717             ++p;
1718         ++p;
1719     }
1720     return p;
1721 }
1722 #endif
1723
1724 /*
1725  * Getdigits: Get a number from a string and skip over it.
1726  * Note: the argument is a pointer to a char_u pointer!
1727  */
1728     long
1729 getdigits(pp)
1730     char_u **pp;
1731 {
1732     char_u      *p;
1733     long        retval;
1734
1735     p = *pp;
1736     retval = atol((char *)p);
1737     if (*p == '-')              /* skip negative sign */
1738         ++p;
1739     p = skipdigits(p);          /* skip to next non-digit */
1740     *pp = p;
1741     return retval;
1742 }
1743
1744 /*
1745  * Return TRUE if "lbuf" is empty or only contains blanks.
1746  */
1747     int
1748 vim_isblankline(lbuf)
1749     char_u      *lbuf;
1750 {
1751     char_u      *p;
1752
1753     p = skipwhite(lbuf);
1754     return (*p == NUL || *p == '\r' || *p == '\n');
1755 }
1756
1757 /*
1758  * Convert a string into a long and/or unsigned long, taking care of
1759  * hexadecimal and octal numbers.  Accepts a '-' sign.
1760  * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1761  *  0       decimal
1762  *  '0'     octal
1763  *  'X'     hex
1764  *  'x'     hex
1765  * If "len" is not NULL, the length of the number in characters is returned.
1766  * If "nptr" is not NULL, the signed result is returned in it.
1767  * If "unptr" is not NULL, the unsigned result is returned in it.
1768  * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1769  * octal number.
1770  * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
1771  * hex number.
1772  */
1773     void
1774 vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1775     char_u              *start;
1776     int                 *hexp;      /* return: type of number 0 = decimal, 'x'
1777                                        or 'X' is hex, '0' = octal */
1778     int                 *len;       /* return: detected length of number */
1779     int                 dooct;      /* recognize octal number */
1780     int                 dohex;      /* recognize hex number */
1781     long                *nptr;      /* return: signed result */
1782     unsigned long       *unptr;     /* return: unsigned result */
1783 {
1784     char_u          *ptr = start;
1785     int             hex = 0;            /* default is decimal */
1786     int             negative = FALSE;
1787     unsigned long   un = 0;
1788     int             n;
1789
1790     if (ptr[0] == '-')
1791     {
1792         negative = TRUE;
1793         ++ptr;
1794     }
1795
1796     /* Recognize hex and octal. */
1797     if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
1798     {
1799         hex = ptr[1];
1800         if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1801             ptr += 2;                   /* hexadecimal */
1802         else
1803         {
1804             hex = 0;                    /* default is decimal */
1805             if (dooct)
1806             {
1807                 /* Don't interpret "0", "08" or "0129" as octal. */
1808                 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1809                 {
1810                     if (ptr[n] > '7')
1811                     {
1812                         hex = 0;        /* can't be octal */
1813                         break;
1814                     }
1815                     if (ptr[n] > '0')
1816                         hex = '0';      /* assume octal */
1817                 }
1818             }
1819         }
1820     }
1821
1822     /*
1823      * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1824      */
1825     if (hex == '0' || dooct > 1)
1826     {
1827         /* octal */
1828         while ('0' <= *ptr && *ptr <= '7')
1829         {
1830             un = 8 * un + (unsigned long)(*ptr - '0');
1831             ++ptr;
1832         }
1833     }
1834     else if (hex != 0 || dohex > 1)
1835     {
1836         /* hex */
1837         while (vim_isxdigit(*ptr))
1838         {
1839             un = 16 * un + (unsigned long)hex2nr(*ptr);
1840             ++ptr;
1841         }
1842     }
1843     else
1844     {
1845         /* decimal */
1846         while (VIM_ISDIGIT(*ptr))
1847         {
1848             un = 10 * un + (unsigned long)(*ptr - '0');
1849             ++ptr;
1850         }
1851     }
1852
1853     if (hexp != NULL)
1854         *hexp = hex;
1855     if (len != NULL)
1856         *len = (int)(ptr - start);
1857     if (nptr != NULL)
1858     {
1859         if (negative)   /* account for leading '-' for decimal numbers */
1860             *nptr = -(long)un;
1861         else
1862             *nptr = (long)un;
1863     }
1864     if (unptr != NULL)
1865         *unptr = un;
1866 }
1867
1868 /*
1869  * Return the value of a single hex character.
1870  * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1871  */
1872     int
1873 hex2nr(c)
1874     int         c;
1875 {
1876     if (c >= 'a' && c <= 'f')
1877         return c - 'a' + 10;
1878     if (c >= 'A' && c <= 'F')
1879         return c - 'A' + 10;
1880     return c - '0';
1881 }
1882
1883 #if defined(FEAT_TERMRESPONSE) \
1884         || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1885 /*
1886  * Convert two hex characters to a byte.
1887  * Return -1 if one of the characters is not hex.
1888  */
1889     int
1890 hexhex2nr(p)
1891     char_u      *p;
1892 {
1893     if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1894         return -1;
1895     return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1896 }
1897 #endif
1898
1899 /*
1900  * Return TRUE if "str" starts with a backslash that should be removed.
1901  * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1902  * backslash is not a normal file name character.
1903  * '$' is a valid file name character, we don't remove the backslash before
1904  * it.  This means it is not possible to use an environment variable after a
1905  * backslash.  "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1906  * Although "\ name" is valid, the backslash in "Program\ files" must be
1907  * removed.  Assume a file name doesn't start with a space.
1908  * For multi-byte names, never remove a backslash before a non-ascii
1909  * character, assume that all multi-byte characters are valid file name
1910  * characters.
1911  */
1912     int
1913 rem_backslash(str)
1914     char_u  *str;
1915 {
1916 #ifdef BACKSLASH_IN_FILENAME
1917     return (str[0] == '\\'
1918 # ifdef FEAT_MBYTE
1919             && str[1] < 0x80
1920 # endif
1921             && (str[1] == ' '
1922                 || (str[1] != NUL
1923                     && str[1] != '*'
1924                     && str[1] != '?'
1925                     && !vim_isfilec(str[1]))));
1926 #else
1927     return (str[0] == '\\' && str[1] != NUL);
1928 #endif
1929 }
1930
1931 /*
1932  * Halve the number of backslashes in a file name argument.
1933  * For MS-DOS we only do this if the character after the backslash
1934  * is not a normal file character.
1935  */
1936     void
1937 backslash_halve(p)
1938     char_u      *p;
1939 {
1940     for ( ; *p; ++p)
1941         if (rem_backslash(p))
1942             STRMOVE(p, p + 1);
1943 }
1944
1945 /*
1946  * backslash_halve() plus save the result in allocated memory.
1947  */
1948     char_u *
1949 backslash_halve_save(p)
1950     char_u      *p;
1951 {
1952     char_u      *res;
1953
1954     res = vim_strsave(p);
1955     if (res == NULL)
1956         return p;
1957     backslash_halve(res);
1958     return res;
1959 }
1960
1961 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1962 /*
1963  * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1964  * The first 64 entries have been added to map control characters defined in
1965  * ascii.h
1966  */
1967 static char_u ebcdic2ascii_tab[256] =
1968 {
1969     0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1970     0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1971     0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1972     0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1973     0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1974     0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1975     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1976     0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1977     0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1978     0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1979     0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1980     0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1981     0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1982     0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1983     0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1984     0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1985     0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1986     0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1987     0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1988     0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1989     0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1990     0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1991     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1992     0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1993     0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1994     0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1995     0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1996     0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1997     0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1998     0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1999     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2000     0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2001 };
2002
2003 /*
2004  * Convert a buffer worth of characters from EBCDIC to ASCII.  Only useful if
2005  * wanting 7-bit ASCII characters out the other end.
2006  */
2007     void
2008 ebcdic2ascii(buffer, len)
2009     char_u      *buffer;
2010     int         len;
2011 {
2012     int         i;
2013
2014     for (i = 0; i < len; i++)
2015         buffer[i] = ebcdic2ascii_tab[buffer[i]];
2016 }
2017 #endif