src/charset.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * VIM - Vi IMproved    by Bram Moolenaar
   4  *
   5  * Do ":help uganda"  in Vim to read copying and usage conditions.
   6  * Do ":help credits" in Vim to see a list of people who contributed.
   7  * See README.txt for an overview of the Vim source code.
   8  */
   9
  10 #include "vim.h"
  11
  12 #ifdef FEAT_LINEBREAK
  13 static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
  14 #endif
  15
  16 #ifdef FEAT_MBYTE
  17 static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
  18 #endif
  19
  20 static int nr2hex __ARGS((int c));
  21
  22 static int    chartab_initialized = FALSE;
  23
  24 /* b_chartab[] is an array of 32 bytes, each bit representing one of the
  25  * characters 0-255. */
  26 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
  27 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
  28 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
  29
  30 /*
  31  * Fill chartab[].  Also fills curbuf->b_chartab[] with flags for keyword
  32  * characters for current buffer.
  33  *
  34  * Depends on the option settings 'iskeyword', 'isident', 'isfname',
  35  * 'isprint' and 'encoding'.
  36  *
  37  * The index in chartab[] depends on 'encoding':
  38  * - For non-multi-byte index with the byte (same as the character).
  39  * - For DBCS index with the first byte.
  40  * - For UTF-8 index with the character (when first byte is up to 0x80 it is
  41  *   the same as the character, if the first byte is 0x80 and above it depends
  42  *   on further bytes).
  43  *
  44  * The contents of chartab[]:
  45  * - The lower two bits, masked by CT_CELL_MASK, give the number of display
  46  *   cells the character occupies (1 or 2).  Not valid for UTF-8 above 0x80.
  47  * - CT_PRINT_CHAR bit is set when the character is printable (no need to
  48  *   translate the character before displaying it).  Note that only DBCS
  49  *   characters can have 2 display cells and still be printable.
  50  * - CT_FNAME_CHAR bit is set when the character can be in a file name.
  51  * - CT_ID_CHAR bit is set when the character can be in an identifier.
  52  *
  53  * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
  54  * error, OK otherwise.
  55  */
  56     int
  57 init_chartab()
  58 {
  59     return buf_init_chartab(curbuf, TRUE);
  60 }
  61
  62     int
  63 buf_init_chartab(buf, global)
  64     buf_T       *buf;
  65     int         global;         /* FALSE: only set buf->b_chartab[] */
  66 {
  67     int         c;
  68     int         c2;
  69     char_u      *p;
  70     int         i;
  71     int         tilde;
  72     int         do_isalpha;
  73
  74     if (global)
  75     {
  76         /*
  77          * Set the default size for printable characters:
  78          * From <Space> to '~' is 1 (printable), others are 2 (not printable).
  79          * This also inits all 'isident' and 'isfname' flags to FALSE.
  80          *
  81          * EBCDIC: all chars below ' ' are not printable, all others are
  82          * printable.
  83          */
  84         c = 0;
  85         while (c < ' ')
  86             chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
  87 #ifdef EBCDIC
  88         while (c < 255)
  89 #else
  90         while (c <= '~')
  91 #endif
  92             chartab[c++] = 1 + CT_PRINT_CHAR;
  93 #ifdef FEAT_FKMAP
  94         if (p_altkeymap)
  95         {
  96             while (c < YE)
  97                 chartab[c++] = 1 + CT_PRINT_CHAR;
  98         }
  99 #endif
 100         while (c < 256)
 101         {
 102 #ifdef FEAT_MBYTE
 103             /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
 104             if (enc_utf8 && c >= 0xa0)
 105                 chartab[c++] = CT_PRINT_CHAR + 1;
 106             /* euc-jp characters starting with 0x8e are single width */
 107             else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
 108                 chartab[c++] = CT_PRINT_CHAR + 1;
 109             /* other double-byte chars can be printable AND double-width */
 110             else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
 111                 chartab[c++] = CT_PRINT_CHAR + 2;
 112             else
 113 #endif
 114                 /* the rest is unprintable by default */
 115                 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
 116         }
 117
 118 #ifdef FEAT_MBYTE
 119         /* Assume that every multi-byte char is a filename character. */
 120         for (c = 1; c < 256; ++c)
 121             if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
 122                     || (enc_dbcs == DBCS_JPNU && c == 0x8e)
 123                     || (enc_utf8 && c >= 0xa0))
 124                 chartab[c] |= CT_FNAME_CHAR;
 125 #endif
 126     }
 127
 128     /*
 129      * Init word char flags all to FALSE
 130      */
 131     vim_memset(buf->b_chartab, 0, (size_t)32);
 132 #ifdef FEAT_MBYTE
 133     if (enc_dbcs != 0)
 134         for (c = 0; c < 256; ++c)
 135         {
 136             /* double-byte characters are probably word characters */
 137             if (MB_BYTE2LEN(c) == 2)
 138                 SET_CHARTAB(buf, c);
 139         }
 140 #endif
 141
 142 #ifdef FEAT_LISP
 143     /*
 144      * In lisp mode the '-' character is included in keywords.
 145      */
 146     if (buf->b_p_lisp)
 147         SET_CHARTAB(buf, '-');
 148 #endif
 149
 150     /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
 151      * options Each option is a list of characters, character numbers or
 152      * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
 153      */
 154     for (i = global ? 0 : 3; i <= 3; ++i)
 155     {
 156         if (i == 0)
 157             p = p_isi;          /* first round: 'isident' */
 158         else if (i == 1)
 159             p = p_isp;          /* second round: 'isprint' */
 160         else if (i == 2)
 161             p = p_isf;          /* third round: 'isfname' */
 162         else    /* i == 3 */
 163             p = buf->b_p_isk;   /* fourth round: 'iskeyword' */
 164
 165         while (*p)
 166         {
 167             tilde = FALSE;
 168             do_isalpha = FALSE;
 169             if (*p == '^' && p[1] != NUL)
 170             {
 171                 tilde = TRUE;
 172                 ++p;
 173             }
 174             if (VIM_ISDIGIT(*p))
 175                 c = getdigits(&p);
 176             else
 177                 c = *p++;
 178             c2 = -1;
 179             if (*p == '-' && p[1] != NUL)
 180             {
 181                 ++p;
 182                 if (VIM_ISDIGIT(*p))
 183                     c2 = getdigits(&p);
 184                 else
 185                     c2 = *p++;
 186             }
 187             if (c <= 0 || (c2 < c && c2 != -1) || c2 >= 256
 188                                                  || !(*p == NUL || *p == ','))
 189                 return FAIL;
 190
 191             if (c2 == -1)       /* not a range */
 192             {
 193                 /*
 194                  * A single '@' (not "@-@"):
 195                  * Decide on letters being ID/printable/keyword chars with
 196                  * standard function isalpha(). This takes care of locale for
 197                  * single-byte characters).
 198                  */
 199                 if (c == '@')
 200                 {
 201                     do_isalpha = TRUE;
 202                     c = 1;
 203                     c2 = 255;
 204                 }
 205                 else
 206                     c2 = c;
 207             }
 208             while (c <= c2)
 209             {
 210                 /* Use the MB_ functions here, because isalpha() doesn't
 211                  * work properly when 'encoding' is "latin1" and the locale is
 212                  * "C".  */
 213                 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
 214 #ifdef FEAT_FKMAP
 215                         || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
 216 #endif
 217                             )
 218                 {
 219                     if (i == 0)                 /* (re)set ID flag */
 220                     {
 221                         if (tilde)
 222                             chartab[c] &= ~CT_ID_CHAR;
 223                         else
 224                             chartab[c] |= CT_ID_CHAR;
 225                     }
 226                     else if (i == 1)            /* (re)set printable */
 227                     {
 228                         if ((c < ' '
 229 #ifndef EBCDIC
 230                                     || c > '~'
 231 #endif
 232 #ifdef FEAT_FKMAP
 233                                     || (p_altkeymap
 234                                         && (F_isalpha(c) || F_isdigit(c)))
 235 #endif
 236                             )
 237 #ifdef FEAT_MBYTE
 238                                 /* For double-byte we keep the cell width, so
 239                                  * that we can detect it from the first byte. */
 240                                 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
 241 #endif
 242                            )
 243                         {
 244                             if (tilde)
 245                             {
 246                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
 247                                              + ((dy_flags & DY_UHEX) ? 4 : 2);
 248                                 chartab[c] &= ~CT_PRINT_CHAR;
 249                             }
 250                             else
 251                             {
 252                                 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
 253                                 chartab[c] |= CT_PRINT_CHAR;
 254                             }
 255                         }
 256                     }
 257                     else if (i == 2)            /* (re)set fname flag */
 258                     {
 259                         if (tilde)
 260                             chartab[c] &= ~CT_FNAME_CHAR;
 261                         else
 262                             chartab[c] |= CT_FNAME_CHAR;
 263                     }
 264                     else /* i == 3 */           /* (re)set keyword flag */
 265                     {
 266                         if (tilde)
 267                             RESET_CHARTAB(buf, c);
 268                         else
 269                             SET_CHARTAB(buf, c);
 270                     }
 271                 }
 272                 ++c;
 273             }
 274             p = skip_to_option_part(p);
 275         }
 276     }
 277     chartab_initialized = TRUE;
 278     return OK;
 279 }
 280
 281 /*
 282  * Translate any special characters in buf[bufsize] in-place.
 283  * The result is a string with only printable characters, but if there is not
 284  * enough room, not all characters will be translated.
 285  */
 286     void
 287 trans_characters(buf, bufsize)
 288     char_u      *buf;
 289     int         bufsize;
 290 {
 291     int         len;            /* length of string needing translation */
 292     int         room;           /* room in buffer after string */
 293     char_u      *trs;           /* translated character */
 294     int         trs_len;        /* length of trs[] */
 295
 296     len = (int)STRLEN(buf);
 297     room = bufsize - len;
 298     while (*buf != 0)
 299     {
 300 # ifdef FEAT_MBYTE
 301         /* Assume a multi-byte character doesn't need translation. */
 302         if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
 303             len -= trs_len;
 304         else
 305 # endif
 306         {
 307             trs = transchar_byte(*buf);
 308             trs_len = (int)STRLEN(trs);
 309             if (trs_len > 1)
 310             {
 311                 room -= trs_len - 1;
 312                 if (room <= 0)
 313                     return;
 314                 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
 315             }
 316             mch_memmove(buf, trs, (size_t)trs_len);
 317             --len;
 318         }
 319         buf += trs_len;
 320     }
 321 }
 322
 323 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
 324         || defined(PROTO)
 325 /*
 326  * Translate a string into allocated memory, replacing special chars with
 327  * printable chars.  Returns NULL when out of memory.
 328  */
 329     char_u *
 330 transstr(s)
 331     char_u      *s;
 332 {
 333     char_u      *res;
 334     char_u      *p;
 335 #ifdef FEAT_MBYTE
 336     int         l, len, c;
 337     char_u      hexbuf[11];
 338 #endif
 339
 340 #ifdef FEAT_MBYTE
 341     if (has_mbyte)
 342     {
 343         /* Compute the length of the result, taking account of unprintable
 344          * multi-byte characters. */
 345         len = 0;
 346         p = s;
 347         while (*p != NUL)
 348         {
 349             if ((l = (*mb_ptr2len)(p)) > 1)
 350             {
 351                 c = (*mb_ptr2char)(p);
 352                 p += l;
 353                 if (vim_isprintc(c))
 354                     len += l;
 355                 else
 356                 {
 357                     transchar_hex(hexbuf, c);
 358                     len += (int)STRLEN(hexbuf);
 359                 }
 360             }
 361             else
 362             {
 363                 l = byte2cells(*p++);
 364                 if (l > 0)
 365                     len += l;
 366                 else
 367                     len += 4;   /* illegal byte sequence */
 368             }
 369         }
 370         res = alloc((unsigned)(len + 1));
 371     }
 372     else
 373 #endif
 374         res = alloc((unsigned)(vim_strsize(s) + 1));
 375     if (res != NULL)
 376     {
 377         *res = NUL;
 378         p = s;
 379         while (*p != NUL)
 380         {
 381 #ifdef FEAT_MBYTE
 382             if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
 383             {
 384                 c = (*mb_ptr2char)(p);
 385                 if (vim_isprintc(c))
 386                     STRNCAT(res, p, l); /* append printable multi-byte char */
 387                 else
 388                     transchar_hex(res + STRLEN(res), c);
 389                 p += l;
 390             }
 391             else
 392 #endif
 393                 STRCAT(res, transchar_byte(*p++));
 394         }
 395     }
 396     return res;
 397 }
 398 #endif
 399
 400 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
 401 /*
 402  * Convert the string "str[orglen]" to do ignore-case comparing.  Uses the
 403  * current locale.
 404  * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
 405  * Otherwise puts the result in "buf[buflen]".
 406  */
 407     char_u *
 408 str_foldcase(str, orglen, buf, buflen)
 409     char_u      *str;
 410     int         orglen;
 411     char_u      *buf;
 412     int         buflen;
 413 {
 414     garray_T    ga;
 415     int         i;
 416     int         len = orglen;
 417
 418 #define GA_CHAR(i)  ((char_u *)ga.ga_data)[i]
 419 #define GA_PTR(i)   ((char_u *)ga.ga_data + i)
 420 #define STR_CHAR(i)  (buf == NULL ? GA_CHAR(i) : buf[i])
 421 #define STR_PTR(i)   (buf == NULL ? GA_PTR(i) : buf + i)
 422
 423     /* Copy "str" into "buf" or allocated memory, unmodified. */
 424     if (buf == NULL)
 425     {
 426         ga_init2(&ga, 1, 10);
 427         if (ga_grow(&ga, len + 1) == FAIL)
 428             return NULL;
 429         mch_memmove(ga.ga_data, str, (size_t)len);
 430         ga.ga_len = len;
 431     }
 432     else
 433     {
 434         if (len >= buflen)          /* Ugly! */
 435             len = buflen - 1;
 436         mch_memmove(buf, str, (size_t)len);
 437     }
 438     if (buf == NULL)
 439         GA_CHAR(len) = NUL;
 440     else
 441         buf[len] = NUL;
 442
 443     /* Make each character lower case. */
 444     i = 0;
 445     while (STR_CHAR(i) != NUL)
 446     {
 447 #ifdef FEAT_MBYTE
 448         if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
 449         {
 450             if (enc_utf8)
 451             {
 452                 int     c = utf_ptr2char(STR_PTR(i));
 453                 int     ol = utf_ptr2len(STR_PTR(i));
 454                 int     lc = utf_tolower(c);
 455
 456                 /* Only replace the character when it is not an invalid
 457                  * sequence (ASCII character or more than one byte) and
 458                  * utf_tolower() doesn't return the original character. */
 459                 if ((c < 0x80 || ol > 1) && c != lc)
 460                 {
 461                     int     nl = utf_char2len(lc);
 462
 463                     /* If the byte length changes need to shift the following
 464                      * characters forward or backward. */
 465                     if (ol != nl)
 466                     {
 467                         if (nl > ol)
 468                         {
 469                             if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
 470                                                     : len + nl - ol >= buflen)
 471                             {
 472                                 /* out of memory, keep old char */
 473                                 lc = c;
 474                                 nl = ol;
 475                             }
 476                         }
 477                         if (ol != nl)
 478                         {
 479                             if (buf == NULL)
 480                             {
 481                                 STRMOVE(GA_PTR(i) + nl, GA_PTR(i) + ol);
 482                                 ga.ga_len += nl - ol;
 483                             }
 484                             else
 485                             {
 486                                 STRMOVE(buf + i + nl, buf + i + ol);
 487                                 len += nl - ol;
 488                             }
 489                         }
 490                     }
 491                     (void)utf_char2bytes(lc, STR_PTR(i));
 492                 }
 493             }
 494             /* skip to next multi-byte char */
 495             i += (*mb_ptr2len)(STR_PTR(i));
 496         }
 497         else
 498 #endif
 499         {
 500             if (buf == NULL)
 501                 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
 502             else
 503                 buf[i] = TOLOWER_LOC(buf[i]);
 504             ++i;
 505         }
 506     }
 507
 508     if (buf == NULL)
 509         return (char_u *)ga.ga_data;
 510     return buf;
 511 }
 512 #endif
 513
 514 /*
 515  * Catch 22: chartab[] can't be initialized before the options are
 516  * initialized, and initializing options may cause transchar() to be called!
 517  * When chartab_initialized == FALSE don't use chartab[].
 518  * Does NOT work for multi-byte characters, c must be <= 255.
 519  * Also doesn't work for the first byte of a multi-byte, "c" must be a
 520  * character!
 521  */
 522 static char_u   transchar_buf[7];
 523
 524     char_u *
 525 transchar(c)
 526     int         c;
 527 {
 528     int                 i;
 529
 530     i = 0;
 531     if (IS_SPECIAL(c))      /* special key code, display as ~@ char */
 532     {
 533         transchar_buf[0] = '~';
 534         transchar_buf[1] = '@';
 535         i = 2;
 536         c = K_SECOND(c);
 537     }
 538
 539     if ((!chartab_initialized && (
 540 #ifdef EBCDIC
 541                     (c >= 64 && c < 255)
 542 #else
 543                     (c >= ' ' && c <= '~')
 544 #endif
 545 #ifdef FEAT_FKMAP
 546                         || F_ischar(c)
 547 #endif
 548                 )) || (c < 256 && vim_isprintc_strict(c)))
 549     {
 550         /* printable character */
 551         transchar_buf[i] = c;
 552         transchar_buf[i + 1] = NUL;
 553     }
 554     else
 555         transchar_nonprint(transchar_buf + i, c);
 556     return transchar_buf;
 557 }
 558
 559 #if defined(FEAT_MBYTE) || defined(PROTO)
 560 /*
 561  * Like transchar(), but called with a byte instead of a character.  Checks
 562  * for an illegal UTF-8 byte.
 563  */
 564     char_u *
 565 transchar_byte(c)
 566     int         c;
 567 {
 568     if (enc_utf8 && c >= 0x80)
 569     {
 570         transchar_nonprint(transchar_buf, c);
 571         return transchar_buf;
 572     }
 573     return transchar(c);
 574 }
 575 #endif
 576
 577 /*
 578  * Convert non-printable character to two or more printable characters in
 579  * "buf[]".  "buf" needs to be able to hold five bytes.
 580  * Does NOT work for multi-byte characters, c must be <= 255.
 581  */
 582     void
 583 transchar_nonprint(buf, c)
 584     char_u      *buf;
 585     int         c;
 586 {
 587     if (c == NL)
 588         c = NUL;                /* we use newline in place of a NUL */
 589     else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
 590         c = NL;                 /* we use CR in place of  NL in this case */
 591
 592     if (dy_flags & DY_UHEX)             /* 'display' has "uhex" */
 593         transchar_hex(buf, c);
 594
 595 #ifdef EBCDIC
 596     /* For EBCDIC only the characters 0-63 and 255 are not printable */
 597     else if (CtrlChar(c) != 0 || c == DEL)
 598 #else
 599     else if (c <= 0x7f)                         /* 0x00 - 0x1f and 0x7f */
 600 #endif
 601     {
 602         buf[0] = '^';
 603 #ifdef EBCDIC
 604         if (c == DEL)
 605             buf[1] = '?';               /* DEL displayed as ^? */
 606         else
 607             buf[1] = CtrlChar(c);
 608 #else
 609         buf[1] = c ^ 0x40;              /* DEL displayed as ^? */
 610 #endif
 611
 612         buf[2] = NUL;
 613     }
 614 #ifdef FEAT_MBYTE
 615     else if (enc_utf8 && c >= 0x80)
 616     {
 617         transchar_hex(buf, c);
 618     }
 619 #endif
 620 #ifndef EBCDIC
 621     else if (c >= ' ' + 0x80 && c <= '~' + 0x80)    /* 0xa0 - 0xfe */
 622     {
 623         buf[0] = '|';
 624         buf[1] = c - 0x80;
 625         buf[2] = NUL;
 626     }
 627 #else
 628     else if (c < 64)
 629     {
 630         buf[0] = '~';
 631         buf[1] = MetaChar(c);
 632         buf[2] = NUL;
 633     }
 634 #endif
 635     else                                            /* 0x80 - 0x9f and 0xff */
 636     {
 637         /*
 638          * TODO: EBCDIC I don't know what to do with this chars, so I display
 639          * them as '~?' for now
 640          */
 641         buf[0] = '~';
 642 #ifdef EBCDIC
 643         buf[1] = '?';                   /* 0xff displayed as ~? */
 644 #else
 645         buf[1] = (c - 0x80) ^ 0x40;     /* 0xff displayed as ~? */
 646 #endif
 647         buf[2] = NUL;
 648     }
 649 }
 650
 651     void
 652 transchar_hex(buf, c)
 653     char_u      *buf;
 654     int         c;
 655 {
 656     int         i = 0;
 657
 658     buf[0] = '<';
 659 #ifdef FEAT_MBYTE
 660     if (c > 255)
 661     {
 662         buf[++i] = nr2hex((unsigned)c >> 12);
 663         buf[++i] = nr2hex((unsigned)c >> 8);
 664     }
 665 #endif
 666     buf[++i] = nr2hex((unsigned)c >> 4);
 667     buf[++i] = nr2hex(c);
 668     buf[++i] = '>';
 669     buf[++i] = NUL;
 670 }
 671
 672 /*
 673  * Convert the lower 4 bits of byte "c" to its hex character.
 674  * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
 675  * function key 1.
 676  */
 677     static int
 678 nr2hex(c)
 679     int         c;
 680 {
 681     if ((c & 0xf) <= 9)
 682         return (c & 0xf) + '0';
 683     return (c & 0xf) - 10 + 'a';
 684 }
 685
 686 /*
 687  * Return number of display cells occupied by byte "b".
 688  * Caller must make sure 0 <= b <= 255.
 689  * For multi-byte mode "b" must be the first byte of a character.
 690  * A TAB is counted as two cells: "^I".
 691  * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
 692  * cells depends on further bytes.
 693  */
 694     int
 695 byte2cells(b)
 696     int         b;
 697 {
 698 #ifdef FEAT_MBYTE
 699     if (enc_utf8 && b >= 0x80)
 700         return 0;
 701 #endif
 702     return (chartab[b] & CT_CELL_MASK);
 703 }
 704
 705 /*
 706  * Return number of display cells occupied by character "c".
 707  * "c" can be a special key (negative number) in which case 3 or 4 is returned.
 708  * A TAB is counted as two cells: "^I" or four: "<09>".
 709  */
 710     int
 711 char2cells(c)
 712     int         c;
 713 {
 714     if (IS_SPECIAL(c))
 715         return char2cells(K_SECOND(c)) + 2;
 716 #ifdef FEAT_MBYTE
 717     if (c >= 0x80)
 718     {
 719         /* UTF-8: above 0x80 need to check the value */
 720         if (enc_utf8)
 721             return utf_char2cells(c);
 722         /* DBCS: double-byte means double-width, except for euc-jp with first
 723          * byte 0x8e */
 724         if (enc_dbcs != 0 && c >= 0x100)
 725         {
 726             if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
 727                 return 1;
 728             return 2;
 729         }
 730     }
 731 #endif
 732     return (chartab[c & 0xff] & CT_CELL_MASK);
 733 }
 734
 735 /*
 736  * Return number of display cells occupied by character at "*p".
 737  * A TAB is counted as two cells: "^I" or four: "<09>".
 738  */
 739     int
 740 ptr2cells(p)
 741     char_u      *p;
 742 {
 743 #ifdef FEAT_MBYTE
 744     /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
 745     if (enc_utf8 && *p >= 0x80)
 746         return utf_ptr2cells(p);
 747     /* For DBCS we can tell the cell count from the first byte. */
 748 #endif
 749     return (chartab[*p] & CT_CELL_MASK);
 750 }
 751
 752 /*
 753  * Return the number of characters string "s" will take on the screen,
 754  * counting TABs as two characters: "^I".
 755  */
 756     int
 757 vim_strsize(s)
 758     char_u      *s;
 759 {
 760     return vim_strnsize(s, (int)MAXCOL);
 761 }
 762
 763 /*
 764  * Return the number of characters string "s[len]" will take on the screen,
 765  * counting TABs as two characters: "^I".
 766  */
 767     int
 768 vim_strnsize(s, len)
 769     char_u      *s;
 770     int         len;
 771 {
 772     int         size = 0;
 773
 774     while (*s != NUL && --len >= 0)
 775     {
 776 #ifdef FEAT_MBYTE
 777         if (has_mbyte)
 778         {
 779             int     l = (*mb_ptr2len)(s);
 780
 781             size += ptr2cells(s);
 782             s += l;
 783             len -= l - 1;
 784         }
 785         else
 786 #endif
 787             size += byte2cells(*s++);
 788     }
 789     return size;
 790 }
 791
 792 /*
 793  * Return the number of characters 'c' will take on the screen, taking
 794  * into account the size of a tab.
 795  * Use a define to make it fast, this is used very often!!!
 796  * Also see getvcol() below.
 797  */
 798
 799 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
 800     if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
 801     { \
 802         int ts; \
 803         ts = (buf)->b_p_ts; \
 804         return (int)(ts - (col % ts)); \
 805     } \
 806     else \
 807         return ptr2cells(p);
 808
 809 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
 810         || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
 811     int
 812 chartabsize(p, col)
 813     char_u      *p;
 814     colnr_T     col;
 815 {
 816     RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
 817 }
 818 #endif
 819
 820 #ifdef FEAT_LINEBREAK
 821     static int
 822 win_chartabsize(wp, p, col)
 823     win_T       *wp;
 824     char_u      *p;
 825     colnr_T     col;
 826 {
 827     RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
 828 }
 829 #endif
 830
 831 /*
 832  * return the number of characters the string 's' will take on the screen,
 833  * taking into account the size of a tab
 834  */
 835     int
 836 linetabsize(s)
 837     char_u      *s;
 838 {
 839     colnr_T     col = 0;
 840
 841     while (*s != NUL)
 842         col += lbr_chartabsize_adv(&s, col);
 843     return (int)col;
 844 }
 845
 846 /*
 847  * Like linetabsize(), but for a given window instead of the current one.
 848  */
 849     int
 850 win_linetabsize(wp, p, len)
 851     win_T       *wp;
 852     char_u      *p;
 853     colnr_T     len;
 854 {
 855     colnr_T     col = 0;
 856     char_u      *s;
 857
 858     for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
 859         col += win_lbr_chartabsize(wp, s, col, NULL);
 860     return (int)col;
 861 }
 862
 863 /*
 864  * Return TRUE if 'c' is a normal identifier character:
 865  * Letters and characters from the 'isident' option.
 866  */
 867     int
 868 vim_isIDc(c)
 869     int c;
 870 {
 871     return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
 872 }
 873
 874 /*
 875  * return TRUE if 'c' is a keyword character: Letters and characters from
 876  * 'iskeyword' option for current buffer.
 877  * For multi-byte characters mb_get_class() is used (builtin rules).
 878  */
 879     int
 880 vim_iswordc(c)
 881     int c;
 882 {
 883 #ifdef FEAT_MBYTE
 884     if (c >= 0x100)
 885     {
 886         if (enc_dbcs != 0)
 887             return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
 888         if (enc_utf8)
 889             return utf_class(c) >= 2;
 890     }
 891 #endif
 892     return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
 893 }
 894
 895 /*
 896  * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
 897  */
 898     int
 899 vim_iswordp(p)
 900     char_u *p;
 901 {
 902 #ifdef FEAT_MBYTE
 903     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 904         return mb_get_class(p) >= 2;
 905 #endif
 906     return GET_CHARTAB(curbuf, *p) != 0;
 907 }
 908
 909 #if defined(FEAT_SYN_HL) || defined(PROTO)
 910     int
 911 vim_iswordc_buf(p, buf)
 912     char_u      *p;
 913     buf_T       *buf;
 914 {
 915 # ifdef FEAT_MBYTE
 916     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 917         return mb_get_class(p) >= 2;
 918 # endif
 919     return (GET_CHARTAB(buf, *p) != 0);
 920 }
 921 #endif
 922
 923 /*
 924  * return TRUE if 'c' is a valid file-name character
 925  * Assume characters above 0x100 are valid (multi-byte).
 926  */
 927     int
 928 vim_isfilec(c)
 929     int c;
 930 {
 931     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
 932 }
 933
 934 /*
 935  * return TRUE if 'c' is a valid file-name character or a wildcard character
 936  * Assume characters above 0x100 are valid (multi-byte).
 937  * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
 938  * returns false.
 939  */
 940     int
 941 vim_isfilec_or_wc(c)
 942     int c;
 943 {
 944     char_u buf[2];
 945
 946     buf[0] = (char_u)c;
 947     buf[1] = NUL;
 948     return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
 949 }
 950
 951 /*
 952  * return TRUE if 'c' is a printable character
 953  * Assume characters above 0x100 are printable (multi-byte), except for
 954  * Unicode.
 955  */
 956     int
 957 vim_isprintc(c)
 958     int c;
 959 {
 960 #ifdef FEAT_MBYTE
 961     if (enc_utf8 && c >= 0x100)
 962         return utf_printable(c);
 963 #endif
 964     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 965 }
 966
 967 /*
 968  * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
 969  * byte of a double-byte character.
 970  */
 971     int
 972 vim_isprintc_strict(c)
 973     int c;
 974 {
 975 #ifdef FEAT_MBYTE
 976     if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
 977         return FALSE;
 978     if (enc_utf8 && c >= 0x100)
 979         return utf_printable(c);
 980 #endif
 981     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
 982 }
 983
 984 /*
 985  * like chartabsize(), but also check for line breaks on the screen
 986  */
 987     int
 988 lbr_chartabsize(s, col)
 989     unsigned char       *s;
 990     colnr_T             col;
 991 {
 992 #ifdef FEAT_LINEBREAK
 993     if (!curwin->w_p_lbr && *p_sbr == NUL)
 994     {
 995 #endif
 996 #ifdef FEAT_MBYTE
 997         if (curwin->w_p_wrap)
 998             return win_nolbr_chartabsize(curwin, s, col, NULL);
 999 #endif
1000         RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1001 #ifdef FEAT_LINEBREAK
1002     }
1003     return win_lbr_chartabsize(curwin, s, col, NULL);
1004 #endif
1005 }
1006
1007 /*
1008  * Call lbr_chartabsize() and advance the pointer.
1009  */
1010     int
1011 lbr_chartabsize_adv(s, col)
1012     char_u      **s;
1013     colnr_T     col;
1014 {
1015     int         retval;
1016
1017     retval = lbr_chartabsize(*s, col);
1018     mb_ptr_adv(*s);
1019     return retval;
1020 }
1021
1022 /*
1023  * This function is used very often, keep it fast!!!!
1024  *
1025  * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1026  * string at start of line.  Warning: *headp is only set if it's a non-zero
1027  * value, init to 0 before calling.
1028  */
1029 /*ARGSUSED*/
1030     int
1031 win_lbr_chartabsize(wp, s, col, headp)
1032     win_T       *wp;
1033     char_u      *s;
1034     colnr_T     col;
1035     int         *headp;
1036 {
1037 #ifdef FEAT_LINEBREAK
1038     int         c;
1039     int         size;
1040     colnr_T     col2;
1041     colnr_T     colmax;
1042     int         added;
1043 # ifdef FEAT_MBYTE
1044     int         mb_added = 0;
1045 # else
1046 #  define mb_added 0
1047 # endif
1048     int         numberextra;
1049     char_u      *ps;
1050     int         tab_corr = (*s == TAB);
1051     int         n;
1052
1053     /*
1054      * No 'linebreak' and 'showbreak': return quickly.
1055      */
1056     if (!wp->w_p_lbr && *p_sbr == NUL)
1057 #endif
1058     {
1059 #ifdef FEAT_MBYTE
1060         if (wp->w_p_wrap)
1061             return win_nolbr_chartabsize(wp, s, col, headp);
1062 #endif
1063         RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1064     }
1065
1066 #ifdef FEAT_LINEBREAK
1067     /*
1068      * First get normal size, without 'linebreak'
1069      */
1070     size = win_chartabsize(wp, s, col);
1071     c = *s;
1072
1073     /*
1074      * If 'linebreak' set check at a blank before a non-blank if the line
1075      * needs a break here
1076      */
1077     if (wp->w_p_lbr
1078             && vim_isbreak(c)
1079             && !vim_isbreak(s[1])
1080             && !wp->w_p_list
1081             && wp->w_p_wrap
1082 # ifdef FEAT_VERTSPLIT
1083             && wp->w_width != 0
1084 # endif
1085        )
1086     {
1087         /*
1088          * Count all characters from first non-blank after a blank up to next
1089          * non-blank after a blank.
1090          */
1091         numberextra = win_col_off(wp);
1092         col2 = col;
1093         colmax = W_WIDTH(wp) - numberextra;
1094         if (col >= colmax)
1095         {
1096             n = colmax + win_col_off2(wp);
1097             if (n > 0)
1098                 colmax += (((col - colmax) / n) + 1) * n;
1099         }
1100
1101         for (;;)
1102         {
1103             ps = s;
1104             mb_ptr_adv(s);
1105             c = *s;
1106             if (!(c != NUL
1107                     && (vim_isbreak(c)
1108                         || (!vim_isbreak(c)
1109                             && (col2 == col || !vim_isbreak(*ps))))))
1110                 break;
1111
1112             col2 += win_chartabsize(wp, s, col2);
1113             if (col2 >= colmax)         /* doesn't fit */
1114             {
1115                 size = colmax - col;
1116                 tab_corr = FALSE;
1117                 break;
1118             }
1119         }
1120     }
1121 # ifdef FEAT_MBYTE
1122     else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1123                                     && wp->w_p_wrap && in_win_border(wp, col))
1124     {
1125         ++size;         /* Count the ">" in the last column. */
1126         mb_added = 1;
1127     }
1128 # endif
1129
1130     /*
1131      * May have to add something for 'showbreak' string at start of line
1132      * Set *headp to the size of what we add.
1133      */
1134     added = 0;
1135     if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1136     {
1137         numberextra = win_col_off(wp);
1138         col += numberextra + mb_added;
1139         if (col >= (colnr_T)W_WIDTH(wp))
1140         {
1141             col -= W_WIDTH(wp);
1142             numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1143             if (numberextra > 0)
1144                 col = col % numberextra;
1145         }
1146         if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1147         {
1148             added = vim_strsize(p_sbr);
1149             if (tab_corr)
1150                 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1151             else
1152                 size += added;
1153             if (col != 0)
1154                 added = 0;
1155         }
1156     }
1157     if (headp != NULL)
1158         *headp = added + mb_added;
1159     return size;
1160 #endif
1161 }
1162
1163 #if defined(FEAT_MBYTE) || defined(PROTO)
1164 /*
1165  * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1166  * 'wrap' is on.  This means we need to check for a double-byte character that
1167  * doesn't fit at the end of the screen line.
1168  */
1169     static int
1170 win_nolbr_chartabsize(wp, s, col, headp)
1171     win_T       *wp;
1172     char_u      *s;
1173     colnr_T     col;
1174     int         *headp;
1175 {
1176     int         n;
1177
1178     if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1179     {
1180         n = wp->w_buffer->b_p_ts;
1181         return (int)(n - (col % n));
1182     }
1183     n = ptr2cells(s);
1184     /* Add one cell for a double-width character in the last column of the
1185      * window, displayed with a ">". */
1186     if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1187     {
1188         if (headp != NULL)
1189             *headp = 1;
1190         return 3;
1191     }
1192     return n;
1193 }
1194
1195 /*
1196  * Return TRUE if virtual column "vcol" is in the rightmost column of window
1197  * "wp".
1198  */
1199     int
1200 in_win_border(wp, vcol)
1201     win_T       *wp;
1202     colnr_T     vcol;
1203 {
1204     colnr_T     width1;         /* width of first line (after line number) */
1205     colnr_T     width2;         /* width of further lines */
1206
1207 #ifdef FEAT_VERTSPLIT
1208     if (wp->w_width == 0)       /* there is no border */
1209         return FALSE;
1210 #endif
1211     width1 = W_WIDTH(wp) - win_col_off(wp);
1212     if (vcol < width1 - 1)
1213         return FALSE;
1214     if (vcol == width1 - 1)
1215         return TRUE;
1216     width2 = width1 + win_col_off2(wp);
1217     return ((vcol - width1) % width2 == width2 - 1);
1218 }
1219 #endif /* FEAT_MBYTE */
1220
1221 /*
1222  * Get virtual column number of pos.
1223  *  start: on the first position of this character (TAB, ctrl)
1224  * cursor: where the cursor is on this character (first char, except for TAB)
1225  *    end: on the last position of this character (TAB, ctrl)
1226  *
1227  * This is used very often, keep it fast!
1228  */
1229     void
1230 getvcol(wp, pos, start, cursor, end)
1231     win_T       *wp;
1232     pos_T       *pos;
1233     colnr_T     *start;
1234     colnr_T     *cursor;
1235     colnr_T     *end;
1236 {
1237     colnr_T     vcol;
1238     char_u      *ptr;           /* points to current char */
1239     char_u      *posptr;        /* points to char at pos->col */
1240     int         incr;
1241     int         head;
1242     int         ts = wp->w_buffer->b_p_ts;
1243     int         c;
1244
1245     vcol = 0;
1246     ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1247     posptr = ptr + pos->col;
1248
1249     /*
1250      * This function is used very often, do some speed optimizations.
1251      * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1252      * Also use this when 'list' is set but tabs take their normal size.
1253      */
1254     if ((!wp->w_p_list || lcs_tab1 != NUL)
1255 #ifdef FEAT_LINEBREAK
1256             && !wp->w_p_lbr && *p_sbr == NUL
1257 #endif
1258        )
1259     {
1260 #ifndef FEAT_MBYTE
1261         head = 0;
1262 #endif
1263         for (;;)
1264         {
1265 #ifdef FEAT_MBYTE
1266             head = 0;
1267 #endif
1268             c = *ptr;
1269             /* make sure we don't go past the end of the line */
1270             if (c == NUL)
1271             {
1272                 incr = 1;       /* NUL at end of line only takes one column */
1273                 break;
1274             }
1275             /* A tab gets expanded, depending on the current column */
1276             if (c == TAB)
1277                 incr = ts - (vcol % ts);
1278             else
1279             {
1280 #ifdef FEAT_MBYTE
1281                 if (has_mbyte)
1282                 {
1283                     /* For utf-8, if the byte is >= 0x80, need to look at
1284                      * further bytes to find the cell width. */
1285                     if (enc_utf8 && c >= 0x80)
1286                         incr = utf_ptr2cells(ptr);
1287                     else
1288                         incr = CHARSIZE(c);
1289
1290                     /* If a double-cell char doesn't fit at the end of a line
1291                      * it wraps to the next line, it's like this char is three
1292                      * cells wide. */
1293                     if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1294                             && in_win_border(wp, vcol))
1295                     {
1296                         ++incr;
1297                         head = 1;
1298                     }
1299                 }
1300                 else
1301 #endif
1302                     incr = CHARSIZE(c);
1303             }
1304
1305             if (ptr >= posptr)  /* character at pos->col */
1306                 break;
1307
1308             vcol += incr;
1309             mb_ptr_adv(ptr);
1310         }
1311     }
1312     else
1313     {
1314         for (;;)
1315         {
1316             /* A tab gets expanded, depending on the current column */
1317             head = 0;
1318             incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1319             /* make sure we don't go past the end of the line */
1320             if (*ptr == NUL)
1321             {
1322                 incr = 1;       /* NUL at end of line only takes one column */
1323                 break;
1324             }
1325
1326             if (ptr >= posptr)  /* character at pos->col */
1327                 break;
1328
1329             vcol += incr;
1330             mb_ptr_adv(ptr);
1331         }
1332     }
1333     if (start != NULL)
1334         *start = vcol + head;
1335     if (end != NULL)
1336         *end = vcol + incr - 1;
1337     if (cursor != NULL)
1338     {
1339         if (*ptr == TAB
1340                 && (State & NORMAL)
1341                 && !wp->w_p_list
1342                 && !virtual_active()
1343 #ifdef FEAT_VISUAL
1344                 && !(VIsual_active
1345                                    && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1346 #endif
1347                 )
1348             *cursor = vcol + incr - 1;      /* cursor at end */
1349         else
1350             *cursor = vcol + head;          /* cursor at start */
1351     }
1352 }
1353
1354 /*
1355  * Get virtual cursor column in the current window, pretending 'list' is off.
1356  */
1357     colnr_T
1358 getvcol_nolist(posp)
1359     pos_T       *posp;
1360 {
1361     int         list_save = curwin->w_p_list;
1362     colnr_T     vcol;
1363
1364     curwin->w_p_list = FALSE;
1365     getvcol(curwin, posp, NULL, &vcol, NULL);
1366     curwin->w_p_list = list_save;
1367     return vcol;
1368 }
1369
1370 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1371 /*
1372  * Get virtual column in virtual mode.
1373  */
1374     void
1375 getvvcol(wp, pos, start, cursor, end)
1376     win_T       *wp;
1377     pos_T       *pos;
1378     colnr_T     *start;
1379     colnr_T     *cursor;
1380     colnr_T     *end;
1381 {
1382     colnr_T     col;
1383     colnr_T     coladd;
1384     colnr_T     endadd;
1385 # ifdef FEAT_MBYTE
1386     char_u      *ptr;
1387 # endif
1388
1389     if (virtual_active())
1390     {
1391         /* For virtual mode, only want one value */
1392         getvcol(wp, pos, &col, NULL, NULL);
1393
1394         coladd = pos->coladd;
1395         endadd = 0;
1396 # ifdef FEAT_MBYTE
1397         /* Cannot put the cursor on part of a wide character. */
1398         ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1399         if (pos->col < STRLEN(ptr))
1400         {
1401             int c = (*mb_ptr2char)(ptr + pos->col);
1402
1403             if (c != TAB && vim_isprintc(c))
1404             {
1405                 endadd = char2cells(c) - 1;
1406                 if (coladd > endadd)    /* past end of line */
1407                     endadd = 0;
1408                 else
1409                     coladd = 0;
1410             }
1411         }
1412 # endif
1413         col += coladd;
1414         if (start != NULL)
1415             *start = col;
1416         if (cursor != NULL)
1417             *cursor = col;
1418         if (end != NULL)
1419             *end = col + endadd;
1420     }
1421     else
1422         getvcol(wp, pos, start, cursor, end);
1423 }
1424 #endif
1425
1426 #if defined(FEAT_VISUAL) || defined(PROTO)
1427 /*
1428  * Get the leftmost and rightmost virtual column of pos1 and pos2.
1429  * Used for Visual block mode.
1430  */
1431     void
1432 getvcols(wp, pos1, pos2, left, right)
1433     win_T       *wp;
1434     pos_T       *pos1, *pos2;
1435     colnr_T     *left, *right;
1436 {
1437     colnr_T     from1, from2, to1, to2;
1438
1439     if (ltp(pos1, pos2))
1440     {
1441         getvvcol(wp, pos1, &from1, NULL, &to1);
1442         getvvcol(wp, pos2, &from2, NULL, &to2);
1443     }
1444     else
1445     {
1446         getvvcol(wp, pos2, &from1, NULL, &to1);
1447         getvvcol(wp, pos1, &from2, NULL, &to2);
1448     }
1449     if (from2 < from1)
1450         *left = from2;
1451     else
1452         *left = from1;
1453     if (to2 > to1)
1454     {
1455         if (*p_sel == 'e' && from2 - 1 >= to1)
1456             *right = from2 - 1;
1457         else
1458             *right = to2;
1459     }
1460     else
1461         *right = to1;
1462 }
1463 #endif
1464
1465 /*
1466  * skipwhite: skip over ' ' and '\t'.
1467  */
1468     char_u *
1469 skipwhite(q)
1470     char_u      *q;
1471 {
1472     char_u      *p = q;
1473
1474     while (vim_iswhite(*p)) /* skip to next non-white */
1475         ++p;
1476     return p;
1477 }
1478
1479 /*
1480  * skip over digits
1481  */
1482     char_u *
1483 skipdigits(q)
1484     char_u      *q;
1485 {
1486     char_u      *p = q;
1487
1488     while (VIM_ISDIGIT(*p))     /* skip to next non-digit */
1489         ++p;
1490     return p;
1491 }
1492
1493 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
1494 /*
1495  * skip over digits and hex characters
1496  */
1497     char_u *
1498 skiphex(q)
1499     char_u      *q;
1500 {
1501     char_u      *p = q;
1502
1503     while (vim_isxdigit(*p))    /* skip to next non-digit */
1504         ++p;
1505     return p;
1506 }
1507 #endif
1508
1509 #if defined(FEAT_EX_EXTRA) || defined(PROTO)
1510 /*
1511  * skip to digit (or NUL after the string)
1512  */
1513     char_u *
1514 skiptodigit(q)
1515     char_u      *q;
1516 {
1517     char_u      *p = q;
1518
1519     while (*p != NUL && !VIM_ISDIGIT(*p))       /* skip to next digit */
1520         ++p;
1521     return p;
1522 }
1523
1524 /*
1525  * skip to hex character (or NUL after the string)
1526  */
1527     char_u *
1528 skiptohex(q)
1529     char_u      *q;
1530 {
1531     char_u      *p = q;
1532
1533     while (*p != NUL && !vim_isxdigit(*p))      /* skip to next digit */
1534         ++p;
1535     return p;
1536 }
1537 #endif
1538
1539 /*
1540  * Variant of isdigit() that can handle characters > 0x100.
1541  * We don't use isdigit() here, because on some systems it also considers
1542  * superscript 1 to be a digit.
1543  * Use the VIM_ISDIGIT() macro for simple arguments.
1544  */
1545     int
1546 vim_isdigit(c)
1547     int         c;
1548 {
1549     return (c >= '0' && c <= '9');
1550 }
1551
1552 /*
1553  * Variant of isxdigit() that can handle characters > 0x100.
1554  * We don't use isxdigit() here, because on some systems it also considers
1555  * superscript 1 to be a digit.
1556  */
1557     int
1558 vim_isxdigit(c)
1559     int         c;
1560 {
1561     return (c >= '0' && c <= '9')
1562         || (c >= 'a' && c <= 'f')
1563         || (c >= 'A' && c <= 'F');
1564 }
1565
1566 #if defined(FEAT_MBYTE) || defined(PROTO)
1567 /*
1568  * Vim's own character class functions.  These exist because many library
1569  * islower()/toupper() etc. do not work properly: they crash when used with
1570  * invalid values or can't handle latin1 when the locale is C.
1571  * Speed is most important here.
1572  */
1573 #define LATIN1LOWER 'l'
1574 #define LATIN1UPPER 'U'
1575
1576 /*                                                                 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~                                  ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
1577 static char_u latin1flags[257] = "                                                                 UUUUUUUUUUUUUUUUUUUUUUUUUU      llllllllllllllllllllllllll                                                                     UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1578 static char_u latin1upper[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1579 static char_u latin1lower[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
1580
1581     int
1582 vim_islower(c)
1583     int     c;
1584 {
1585     if (c <= '@')
1586         return FALSE;
1587     if (c >= 0x80)
1588     {
1589         if (enc_utf8)
1590             return utf_islower(c);
1591         if (c >= 0x100)
1592         {
1593 #ifdef HAVE_ISWLOWER
1594             if (has_mbyte)
1595                 return iswlower(c);
1596 #endif
1597             /* islower() can't handle these chars and may crash */
1598             return FALSE;
1599         }
1600         if (enc_latin1like)
1601             return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1602     }
1603     return islower(c);
1604 }
1605
1606     int
1607 vim_isupper(c)
1608     int     c;
1609 {
1610     if (c <= '@')
1611         return FALSE;
1612     if (c >= 0x80)
1613     {
1614         if (enc_utf8)
1615             return utf_isupper(c);
1616         if (c >= 0x100)
1617         {
1618 #ifdef HAVE_ISWUPPER
1619             if (has_mbyte)
1620                 return iswupper(c);
1621 #endif
1622             /* islower() can't handle these chars and may crash */
1623             return FALSE;
1624         }
1625         if (enc_latin1like)
1626             return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1627     }
1628     return isupper(c);
1629 }
1630
1631     int
1632 vim_toupper(c)
1633     int     c;
1634 {
1635     if (c <= '@')
1636         return c;
1637     if (c >= 0x80)
1638     {
1639         if (enc_utf8)
1640             return utf_toupper(c);
1641         if (c >= 0x100)
1642         {
1643 #ifdef HAVE_TOWUPPER
1644             if (has_mbyte)
1645                 return towupper(c);
1646 #endif
1647             /* toupper() can't handle these chars and may crash */
1648             return c;
1649         }
1650         if (enc_latin1like)
1651             return latin1upper[c];
1652     }
1653     return TOUPPER_LOC(c);
1654 }
1655
1656     int
1657 vim_tolower(c)
1658     int     c;
1659 {
1660     if (c <= '@')
1661         return c;
1662     if (c >= 0x80)
1663     {
1664         if (enc_utf8)
1665             return utf_tolower(c);
1666         if (c >= 0x100)
1667         {
1668 #ifdef HAVE_TOWLOWER
1669             if (has_mbyte)
1670                 return towlower(c);
1671 #endif
1672             /* tolower() can't handle these chars and may crash */
1673             return c;
1674         }
1675         if (enc_latin1like)
1676             return latin1lower[c];
1677     }
1678     return TOLOWER_LOC(c);
1679 }
1680 #endif
1681
1682 /*
1683  * skiptowhite: skip over text until ' ' or '\t' or NUL.
1684  */
1685     char_u *
1686 skiptowhite(p)
1687     char_u      *p;
1688 {
1689     while (*p != ' ' && *p != '\t' && *p != NUL)
1690         ++p;
1691     return p;
1692 }
1693
1694 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1695         || defined(PROTO)
1696 /*
1697  * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1698  */
1699     char_u *
1700 skiptowhite_esc(p)
1701     char_u      *p;
1702 {
1703     while (*p != ' ' && *p != '\t' && *p != NUL)
1704     {
1705         if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1706             ++p;
1707         ++p;
1708     }
1709     return p;
1710 }
1711 #endif
1712
1713 /*
1714  * Getdigits: Get a number from a string and skip over it.
1715  * Note: the argument is a pointer to a char_u pointer!
1716  */
1717     long
1718 getdigits(pp)
1719     char_u **pp;
1720 {
1721     char_u      *p;
1722     long        retval;
1723
1724     p = *pp;
1725     retval = atol((char *)p);
1726     if (*p == '-')              /* skip negative sign */
1727         ++p;
1728     p = skipdigits(p);          /* skip to next non-digit */
1729     *pp = p;
1730     return retval;
1731 }
1732
1733 /*
1734  * Return TRUE if "lbuf" is empty or only contains blanks.
1735  */
1736     int
1737 vim_isblankline(lbuf)
1738     char_u      *lbuf;
1739 {
1740     char_u      *p;
1741
1742     p = skipwhite(lbuf);
1743     return (*p == NUL || *p == '\r' || *p == '\n');
1744 }
1745
1746 /*
1747  * Convert a string into a long and/or unsigned long, taking care of
1748  * hexadecimal and octal numbers.  Accepts a '-' sign.
1749  * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1750  *  0       decimal
1751  *  '0'     octal
1752  *  'X'     hex
1753  *  'x'     hex
1754  * If "len" is not NULL, the length of the number in characters is returned.
1755  * If "nptr" is not NULL, the signed result is returned in it.
1756  * If "unptr" is not NULL, the unsigned result is returned in it.
1757  * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1758  * octal number.
1759  * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
1760  * hex number.
1761  */
1762     void
1763 vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1764     char_u              *start;
1765     int                 *hexp;      /* return: type of number 0 = decimal, 'x'
1766                                        or 'X' is hex, '0' = octal */
1767     int                 *len;       /* return: detected length of number */
1768     int                 dooct;      /* recognize octal number */
1769     int                 dohex;      /* recognize hex number */
1770     long                *nptr;      /* return: signed result */
1771     unsigned long       *unptr;     /* return: unsigned result */
1772 {
1773     char_u          *ptr = start;
1774     int             hex = 0;            /* default is decimal */
1775     int             negative = FALSE;
1776     unsigned long   un = 0;
1777     int             n;
1778
1779     if (ptr[0] == '-')
1780     {
1781         negative = TRUE;
1782         ++ptr;
1783     }
1784
1785     /* Recognize hex and octal. */
1786     if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
1787     {
1788         hex = ptr[1];
1789         if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1790             ptr += 2;                   /* hexadecimal */
1791         else
1792         {
1793             hex = 0;                    /* default is decimal */
1794             if (dooct)
1795             {
1796                 /* Don't interpret "0", "08" or "0129" as octal. */
1797                 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1798                 {
1799                     if (ptr[n] > '7')
1800                     {
1801                         hex = 0;        /* can't be octal */
1802                         break;
1803                     }
1804                     if (ptr[n] > '0')
1805                         hex = '0';      /* assume octal */
1806                 }
1807             }
1808         }
1809     }
1810
1811     /*
1812      * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1813      */
1814     if (hex == '0' || dooct > 1)
1815     {
1816         /* octal */
1817         while ('0' <= *ptr && *ptr <= '7')
1818         {
1819             un = 8 * un + (unsigned long)(*ptr - '0');
1820             ++ptr;
1821         }
1822     }
1823     else if (hex != 0 || dohex > 1)
1824     {
1825         /* hex */
1826         while (vim_isxdigit(*ptr))
1827         {
1828             un = 16 * un + (unsigned long)hex2nr(*ptr);
1829             ++ptr;
1830         }
1831     }
1832     else
1833     {
1834         /* decimal */
1835         while (VIM_ISDIGIT(*ptr))
1836         {
1837             un = 10 * un + (unsigned long)(*ptr - '0');
1838             ++ptr;
1839         }
1840     }
1841
1842     if (hexp != NULL)
1843         *hexp = hex;
1844     if (len != NULL)
1845         *len = (int)(ptr - start);
1846     if (nptr != NULL)
1847     {
1848         if (negative)   /* account for leading '-' for decimal numbers */
1849             *nptr = -(long)un;
1850         else
1851             *nptr = (long)un;
1852     }
1853     if (unptr != NULL)
1854         *unptr = un;
1855 }
1856
1857 /*
1858  * Return the value of a single hex character.
1859  * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1860  */
1861     int
1862 hex2nr(c)
1863     int         c;
1864 {
1865     if (c >= 'a' && c <= 'f')
1866         return c - 'a' + 10;
1867     if (c >= 'A' && c <= 'F')
1868         return c - 'A' + 10;
1869     return c - '0';
1870 }
1871
1872 #if defined(FEAT_TERMRESPONSE) \
1873         || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1874 /*
1875  * Convert two hex characters to a byte.
1876  * Return -1 if one of the characters is not hex.
1877  */
1878     int
1879 hexhex2nr(p)
1880     char_u      *p;
1881 {
1882     if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1883         return -1;
1884     return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1885 }
1886 #endif
1887
1888 /*
1889  * Return TRUE if "str" starts with a backslash that should be removed.
1890  * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1891  * backslash is not a normal file name character.
1892  * '$' is a valid file name character, we don't remove the backslash before
1893  * it.  This means it is not possible to use an environment variable after a
1894  * backslash.  "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1895  * Although "\ name" is valid, the backslash in "Program\ files" must be
1896  * removed.  Assume a file name doesn't start with a space.
1897  * For multi-byte names, never remove a backslash before a non-ascii
1898  * character, assume that all multi-byte characters are valid file name
1899  * characters.
1900  */
1901     int
1902 rem_backslash(str)
1903     char_u  *str;
1904 {
1905 #ifdef BACKSLASH_IN_FILENAME
1906     return (str[0] == '\\'
1907 # ifdef FEAT_MBYTE
1908             && str[1] < 0x80
1909 # endif
1910             && (str[1] == ' '
1911                 || (str[1] != NUL
1912                     && str[1] != '*'
1913                     && str[1] != '?'
1914                     && !vim_isfilec(str[1]))));
1915 #else
1916     return (str[0] == '\\' && str[1] != NUL);
1917 #endif
1918 }
1919
1920 /*
1921  * Halve the number of backslashes in a file name argument.
1922  * For MS-DOS we only do this if the character after the backslash
1923  * is not a normal file character.
1924  */
1925     void
1926 backslash_halve(p)
1927     char_u      *p;
1928 {
1929     for ( ; *p; ++p)
1930         if (rem_backslash(p))
1931             STRMOVE(p, p + 1);
1932 }
1933
1934 /*
1935  * backslash_halve() plus save the result in allocated memory.
1936  */
1937     char_u *
1938 backslash_halve_save(p)
1939     char_u      *p;
1940 {
1941     char_u      *res;
1942
1943     res = vim_strsave(p);
1944     if (res == NULL)
1945         return p;
1946     backslash_halve(res);
1947     return res;
1948 }
1949
1950 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1951 /*
1952  * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1953  * The first 64 entries have been added to map control characters defined in
1954  * ascii.h
1955  */
1956 static char_u ebcdic2ascii_tab[256] =
1957 {
1958     0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1959     0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1960     0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1961     0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1962     0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1963     0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1964     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1965     0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1966     0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1967     0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1968     0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1969     0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1970     0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1971     0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1972     0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1973     0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1974     0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1975     0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1976     0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1977     0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1978     0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1979     0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1980     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1981     0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1982     0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1983     0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1984     0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1985     0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1986     0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1987     0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1988     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1989     0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1990 };
1991
1992 /*
1993  * Convert a buffer worth of characters from EBCDIC to ASCII.  Only useful if
1994  * wanting 7-bit ASCII characters out the other end.
1995  */
1996     void
1997 ebcdic2ascii(buffer, len)
1998     char_u      *buffer;
1999     int         len;
2000 {
2001     int         i;
2002
2003     for (i = 0; i < len; i++)
2004         buffer[i] = ebcdic2ascii_tab[buffer[i]];
2005 }
2006 #endif