5b0e71c8a45e13e2f08be2a0dcc4bd1c6bddd534
[MacVim.git] / src / charset.c
blob5b0e71c8a45e13e2f08be2a0dcc4bd1c6bddd534
1 /* vi:set ts=8 sts=4 sw=4:
3 * VIM - Vi IMproved by Bram Moolenaar
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
10 #include "vim.h"
12 #ifdef FEAT_LINEBREAK
13 static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
14 #endif
16 #ifdef FEAT_MBYTE
17 static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
18 #endif
20 static unsigned nr2hex __ARGS((unsigned c));
22 static int chartab_initialized = FALSE;
24 /* b_chartab[] is an array of 32 bytes, each bit representing one of the
25 * characters 0-255. */
26 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
27 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
28 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
31 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
32 * characters for current buffer.
34 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
35 * 'isprint' and 'encoding'.
37 * The index in chartab[] depends on 'encoding':
38 * - For non-multi-byte index with the byte (same as the character).
39 * - For DBCS index with the first byte.
40 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
41 * the same as the character, if the first byte is 0x80 and above it depends
42 * on further bytes).
44 * The contents of chartab[]:
45 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
46 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
47 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
48 * translate the character before displaying it). Note that only DBCS
49 * characters can have 2 display cells and still be printable.
50 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
51 * - CT_ID_CHAR bit is set when the character can be in an identifier.
53 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
54 * error, OK otherwise.
56 int
57 init_chartab()
59 return buf_init_chartab(curbuf, TRUE);
62 int
63 buf_init_chartab(buf, global)
64 buf_T *buf;
65 int global; /* FALSE: only set buf->b_chartab[] */
67 int c;
68 int c2;
69 char_u *p;
70 int i;
71 int tilde;
72 int do_isalpha;
74 if (global)
77 * Set the default size for printable characters:
78 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
79 * This also inits all 'isident' and 'isfname' flags to FALSE.
81 * EBCDIC: all chars below ' ' are not printable, all others are
82 * printable.
84 c = 0;
85 while (c < ' ')
86 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
87 #ifdef EBCDIC
88 while (c < 255)
89 #else
90 while (c <= '~')
91 #endif
92 chartab[c++] = 1 + CT_PRINT_CHAR;
93 #ifdef FEAT_FKMAP
94 if (p_altkeymap)
96 while (c < YE)
97 chartab[c++] = 1 + CT_PRINT_CHAR;
99 #endif
100 while (c < 256)
102 #ifdef FEAT_MBYTE
103 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
104 if (enc_utf8 && c >= 0xa0)
105 chartab[c++] = CT_PRINT_CHAR + 1;
106 /* euc-jp characters starting with 0x8e are single width */
107 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
108 chartab[c++] = CT_PRINT_CHAR + 1;
109 /* other double-byte chars can be printable AND double-width */
110 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
111 chartab[c++] = CT_PRINT_CHAR + 2;
112 else
113 #endif
114 /* the rest is unprintable by default */
115 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
118 #ifdef FEAT_MBYTE
119 /* Assume that every multi-byte char is a filename character. */
120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
124 chartab[c] |= CT_FNAME_CHAR;
125 #endif
129 * Init word char flags all to FALSE
131 vim_memset(buf->b_chartab, 0, (size_t)32);
132 #ifdef FEAT_MBYTE
133 if (enc_dbcs != 0)
134 for (c = 0; c < 256; ++c)
136 /* double-byte characters are probably word characters */
137 if (MB_BYTE2LEN(c) == 2)
138 SET_CHARTAB(buf, c);
140 #endif
142 #ifdef FEAT_LISP
144 * In lisp mode the '-' character is included in keywords.
146 if (buf->b_p_lisp)
147 SET_CHARTAB(buf, '-');
148 #endif
150 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
151 * options Each option is a list of characters, character numbers or
152 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
154 for (i = global ? 0 : 3; i <= 3; ++i)
156 if (i == 0)
157 p = p_isi; /* first round: 'isident' */
158 else if (i == 1)
159 p = p_isp; /* second round: 'isprint' */
160 else if (i == 2)
161 p = p_isf; /* third round: 'isfname' */
162 else /* i == 3 */
163 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
165 while (*p)
167 tilde = FALSE;
168 do_isalpha = FALSE;
169 if (*p == '^' && p[1] != NUL)
171 tilde = TRUE;
172 ++p;
174 if (VIM_ISDIGIT(*p))
175 c = getdigits(&p);
176 else
177 #ifdef FEAT_MBYTE
178 if (has_mbyte)
179 c = mb_ptr2char_adv(&p);
180 else
181 #endif
182 c = *p++;
183 c2 = -1;
184 if (*p == '-' && p[1] != NUL)
186 ++p;
187 if (VIM_ISDIGIT(*p))
188 c2 = getdigits(&p);
189 else
190 #ifdef FEAT_MBYTE
191 if (has_mbyte)
192 c2 = mb_ptr2char_adv(&p);
193 else
194 #endif
195 c2 = *p++;
197 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
198 || !(*p == NUL || *p == ','))
199 return FAIL;
201 if (c2 == -1) /* not a range */
204 * A single '@' (not "@-@"):
205 * Decide on letters being ID/printable/keyword chars with
206 * standard function isalpha(). This takes care of locale for
207 * single-byte characters).
209 if (c == '@')
211 do_isalpha = TRUE;
212 c = 1;
213 c2 = 255;
215 else
216 c2 = c;
218 while (c <= c2)
220 /* Use the MB_ functions here, because isalpha() doesn't
221 * work properly when 'encoding' is "latin1" and the locale is
222 * "C". */
223 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
224 #ifdef FEAT_FKMAP
225 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
226 #endif
229 if (i == 0) /* (re)set ID flag */
231 if (tilde)
232 chartab[c] &= ~CT_ID_CHAR;
233 else
234 chartab[c] |= CT_ID_CHAR;
236 else if (i == 1) /* (re)set printable */
238 if ((c < ' '
239 #ifndef EBCDIC
240 || c > '~'
241 #endif
242 #ifdef FEAT_FKMAP
243 || (p_altkeymap
244 && (F_isalpha(c) || F_isdigit(c)))
245 #endif
247 #ifdef FEAT_MBYTE
248 /* For double-byte we keep the cell width, so
249 * that we can detect it from the first byte. */
250 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
251 #endif
254 if (tilde)
256 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
257 + ((dy_flags & DY_UHEX) ? 4 : 2);
258 chartab[c] &= ~CT_PRINT_CHAR;
260 else
262 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
263 chartab[c] |= CT_PRINT_CHAR;
267 else if (i == 2) /* (re)set fname flag */
269 if (tilde)
270 chartab[c] &= ~CT_FNAME_CHAR;
271 else
272 chartab[c] |= CT_FNAME_CHAR;
274 else /* i == 3 */ /* (re)set keyword flag */
276 if (tilde)
277 RESET_CHARTAB(buf, c);
278 else
279 SET_CHARTAB(buf, c);
282 ++c;
284 p = skip_to_option_part(p);
287 chartab_initialized = TRUE;
288 return OK;
292 * Translate any special characters in buf[bufsize] in-place.
293 * The result is a string with only printable characters, but if there is not
294 * enough room, not all characters will be translated.
296 void
297 trans_characters(buf, bufsize)
298 char_u *buf;
299 int bufsize;
301 int len; /* length of string needing translation */
302 int room; /* room in buffer after string */
303 char_u *trs; /* translated character */
304 int trs_len; /* length of trs[] */
306 len = (int)STRLEN(buf);
307 room = bufsize - len;
308 while (*buf != 0)
310 # ifdef FEAT_MBYTE
311 /* Assume a multi-byte character doesn't need translation. */
312 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
313 len -= trs_len;
314 else
315 # endif
317 trs = transchar_byte(*buf);
318 trs_len = (int)STRLEN(trs);
319 if (trs_len > 1)
321 room -= trs_len - 1;
322 if (room <= 0)
323 return;
324 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
326 mch_memmove(buf, trs, (size_t)trs_len);
327 --len;
329 buf += trs_len;
333 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
334 || defined(PROTO)
336 * Translate a string into allocated memory, replacing special chars with
337 * printable chars. Returns NULL when out of memory.
339 char_u *
340 transstr(s)
341 char_u *s;
343 char_u *res;
344 char_u *p;
345 #ifdef FEAT_MBYTE
346 int l, len, c;
347 char_u hexbuf[11];
348 #endif
350 #ifdef FEAT_MBYTE
351 if (has_mbyte)
353 /* Compute the length of the result, taking account of unprintable
354 * multi-byte characters. */
355 len = 0;
356 p = s;
357 while (*p != NUL)
359 if ((l = (*mb_ptr2len)(p)) > 1)
361 c = (*mb_ptr2char)(p);
362 p += l;
363 if (vim_isprintc(c))
364 len += l;
365 else
367 transchar_hex(hexbuf, c);
368 len += (int)STRLEN(hexbuf);
371 else
373 l = byte2cells(*p++);
374 if (l > 0)
375 len += l;
376 else
377 len += 4; /* illegal byte sequence */
380 res = alloc((unsigned)(len + 1));
382 else
383 #endif
384 res = alloc((unsigned)(vim_strsize(s) + 1));
385 if (res != NULL)
387 *res = NUL;
388 p = s;
389 while (*p != NUL)
391 #ifdef FEAT_MBYTE
392 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
394 c = (*mb_ptr2char)(p);
395 if (vim_isprintc(c))
396 STRNCAT(res, p, l); /* append printable multi-byte char */
397 else
398 transchar_hex(res + STRLEN(res), c);
399 p += l;
401 else
402 #endif
403 STRCAT(res, transchar_byte(*p++));
406 return res;
408 #endif
410 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
412 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
413 * current locale.
414 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
415 * Otherwise puts the result in "buf[buflen]".
417 char_u *
418 str_foldcase(str, orglen, buf, buflen)
419 char_u *str;
420 int orglen;
421 char_u *buf;
422 int buflen;
424 garray_T ga;
425 int i;
426 int len = orglen;
428 #define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
429 #define GA_PTR(i) ((char_u *)ga.ga_data + i)
430 #define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
431 #define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
433 /* Copy "str" into "buf" or allocated memory, unmodified. */
434 if (buf == NULL)
436 ga_init2(&ga, 1, 10);
437 if (ga_grow(&ga, len + 1) == FAIL)
438 return NULL;
439 mch_memmove(ga.ga_data, str, (size_t)len);
440 ga.ga_len = len;
442 else
444 if (len >= buflen) /* Ugly! */
445 len = buflen - 1;
446 mch_memmove(buf, str, (size_t)len);
448 if (buf == NULL)
449 GA_CHAR(len) = NUL;
450 else
451 buf[len] = NUL;
453 /* Make each character lower case. */
454 i = 0;
455 while (STR_CHAR(i) != NUL)
457 #ifdef FEAT_MBYTE
458 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
460 if (enc_utf8)
462 int c = utf_ptr2char(STR_PTR(i));
463 int ol = utf_ptr2len(STR_PTR(i));
464 int lc = utf_tolower(c);
466 /* Only replace the character when it is not an invalid
467 * sequence (ASCII character or more than one byte) and
468 * utf_tolower() doesn't return the original character. */
469 if ((c < 0x80 || ol > 1) && c != lc)
471 int nl = utf_char2len(lc);
473 /* If the byte length changes need to shift the following
474 * characters forward or backward. */
475 if (ol != nl)
477 if (nl > ol)
479 if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
480 : len + nl - ol >= buflen)
482 /* out of memory, keep old char */
483 lc = c;
484 nl = ol;
487 if (ol != nl)
489 if (buf == NULL)
491 STRMOVE(GA_PTR(i) + nl, GA_PTR(i) + ol);
492 ga.ga_len += nl - ol;
494 else
496 STRMOVE(buf + i + nl, buf + i + ol);
497 len += nl - ol;
501 (void)utf_char2bytes(lc, STR_PTR(i));
504 /* skip to next multi-byte char */
505 i += (*mb_ptr2len)(STR_PTR(i));
507 else
508 #endif
510 if (buf == NULL)
511 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
512 else
513 buf[i] = TOLOWER_LOC(buf[i]);
514 ++i;
518 if (buf == NULL)
519 return (char_u *)ga.ga_data;
520 return buf;
522 #endif
525 * Catch 22: chartab[] can't be initialized before the options are
526 * initialized, and initializing options may cause transchar() to be called!
527 * When chartab_initialized == FALSE don't use chartab[].
528 * Does NOT work for multi-byte characters, c must be <= 255.
529 * Also doesn't work for the first byte of a multi-byte, "c" must be a
530 * character!
532 static char_u transchar_buf[7];
534 char_u *
535 transchar(c)
536 int c;
538 int i;
540 i = 0;
541 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
543 transchar_buf[0] = '~';
544 transchar_buf[1] = '@';
545 i = 2;
546 c = K_SECOND(c);
549 if ((!chartab_initialized && (
550 #ifdef EBCDIC
551 (c >= 64 && c < 255)
552 #else
553 (c >= ' ' && c <= '~')
554 #endif
555 #ifdef FEAT_FKMAP
556 || F_ischar(c)
557 #endif
558 )) || (c < 256 && vim_isprintc_strict(c)))
560 /* printable character */
561 transchar_buf[i] = c;
562 transchar_buf[i + 1] = NUL;
564 else
565 transchar_nonprint(transchar_buf + i, c);
566 return transchar_buf;
569 #if defined(FEAT_MBYTE) || defined(PROTO)
571 * Like transchar(), but called with a byte instead of a character. Checks
572 * for an illegal UTF-8 byte.
574 char_u *
575 transchar_byte(c)
576 int c;
578 if (enc_utf8 && c >= 0x80)
580 transchar_nonprint(transchar_buf, c);
581 return transchar_buf;
583 return transchar(c);
585 #endif
588 * Convert non-printable character to two or more printable characters in
589 * "buf[]". "buf" needs to be able to hold five bytes.
590 * Does NOT work for multi-byte characters, c must be <= 255.
592 void
593 transchar_nonprint(buf, c)
594 char_u *buf;
595 int c;
597 if (c == NL)
598 c = NUL; /* we use newline in place of a NUL */
599 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
600 c = NL; /* we use CR in place of NL in this case */
602 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
603 transchar_hex(buf, c);
605 #ifdef EBCDIC
606 /* For EBCDIC only the characters 0-63 and 255 are not printable */
607 else if (CtrlChar(c) != 0 || c == DEL)
608 #else
609 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
610 #endif
612 buf[0] = '^';
613 #ifdef EBCDIC
614 if (c == DEL)
615 buf[1] = '?'; /* DEL displayed as ^? */
616 else
617 buf[1] = CtrlChar(c);
618 #else
619 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
620 #endif
622 buf[2] = NUL;
624 #ifdef FEAT_MBYTE
625 else if (enc_utf8 && c >= 0x80)
627 transchar_hex(buf, c);
629 #endif
630 #ifndef EBCDIC
631 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
633 buf[0] = '|';
634 buf[1] = c - 0x80;
635 buf[2] = NUL;
637 #else
638 else if (c < 64)
640 buf[0] = '~';
641 buf[1] = MetaChar(c);
642 buf[2] = NUL;
644 #endif
645 else /* 0x80 - 0x9f and 0xff */
648 * TODO: EBCDIC I don't know what to do with this chars, so I display
649 * them as '~?' for now
651 buf[0] = '~';
652 #ifdef EBCDIC
653 buf[1] = '?'; /* 0xff displayed as ~? */
654 #else
655 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
656 #endif
657 buf[2] = NUL;
661 void
662 transchar_hex(buf, c)
663 char_u *buf;
664 int c;
666 int i = 0;
668 buf[0] = '<';
669 #ifdef FEAT_MBYTE
670 if (c > 255)
672 buf[++i] = nr2hex((unsigned)c >> 12);
673 buf[++i] = nr2hex((unsigned)c >> 8);
675 #endif
676 buf[++i] = nr2hex((unsigned)c >> 4);
677 buf[++i] = nr2hex((unsigned)c);
678 buf[++i] = '>';
679 buf[++i] = NUL;
683 * Convert the lower 4 bits of byte "c" to its hex character.
684 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
685 * function key 1.
687 static unsigned
688 nr2hex(c)
689 unsigned c;
691 if ((c & 0xf) <= 9)
692 return (c & 0xf) + '0';
693 return (c & 0xf) - 10 + 'a';
697 * Return number of display cells occupied by byte "b".
698 * Caller must make sure 0 <= b <= 255.
699 * For multi-byte mode "b" must be the first byte of a character.
700 * A TAB is counted as two cells: "^I".
701 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
702 * cells depends on further bytes.
705 byte2cells(b)
706 int b;
708 #ifdef FEAT_MBYTE
709 if (enc_utf8 && b >= 0x80)
710 return 0;
711 #endif
712 return (chartab[b] & CT_CELL_MASK);
716 * Return number of display cells occupied by character "c".
717 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
718 * A TAB is counted as two cells: "^I" or four: "<09>".
721 char2cells(c)
722 int c;
724 if (IS_SPECIAL(c))
725 return char2cells(K_SECOND(c)) + 2;
726 #ifdef FEAT_MBYTE
727 if (c >= 0x80)
729 /* UTF-8: above 0x80 need to check the value */
730 if (enc_utf8)
731 return utf_char2cells(c);
732 /* DBCS: double-byte means double-width, except for euc-jp with first
733 * byte 0x8e */
734 if (enc_dbcs != 0 && c >= 0x100)
736 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
737 return 1;
738 return 2;
741 #endif
742 return (chartab[c & 0xff] & CT_CELL_MASK);
746 * Return number of display cells occupied by character at "*p".
747 * A TAB is counted as two cells: "^I" or four: "<09>".
750 ptr2cells(p)
751 char_u *p;
753 #ifdef FEAT_MBYTE
754 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
755 if (enc_utf8 && *p >= 0x80)
756 return utf_ptr2cells(p);
757 /* For DBCS we can tell the cell count from the first byte. */
758 #endif
759 return (chartab[*p] & CT_CELL_MASK);
763 * Return the number of characters string "s" will take on the screen,
764 * counting TABs as two characters: "^I".
767 vim_strsize(s)
768 char_u *s;
770 return vim_strnsize(s, (int)MAXCOL);
774 * Return the number of characters string "s[len]" will take on the screen,
775 * counting TABs as two characters: "^I".
778 vim_strnsize(s, len)
779 char_u *s;
780 int len;
782 int size = 0;
784 while (*s != NUL && --len >= 0)
786 #ifdef FEAT_MBYTE
787 if (has_mbyte)
789 int l = (*mb_ptr2len)(s);
791 size += ptr2cells(s);
792 s += l;
793 len -= l - 1;
795 else
796 #endif
797 size += byte2cells(*s++);
799 return size;
803 * Return the number of characters 'c' will take on the screen, taking
804 * into account the size of a tab.
805 * Use a define to make it fast, this is used very often!!!
806 * Also see getvcol() below.
809 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
810 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
812 int ts; \
813 ts = (buf)->b_p_ts; \
814 return (int)(ts - (col % ts)); \
816 else \
817 return ptr2cells(p);
819 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
820 || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
822 chartabsize(p, col)
823 char_u *p;
824 colnr_T col;
826 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
828 #endif
830 #ifdef FEAT_LINEBREAK
831 static int
832 win_chartabsize(wp, p, col)
833 win_T *wp;
834 char_u *p;
835 colnr_T col;
837 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
839 #endif
842 * return the number of characters the string 's' will take on the screen,
843 * taking into account the size of a tab
846 linetabsize(s)
847 char_u *s;
849 colnr_T col = 0;
851 while (*s != NUL)
852 col += lbr_chartabsize_adv(&s, col);
853 return (int)col;
857 * Like linetabsize(), but for a given window instead of the current one.
860 win_linetabsize(wp, p, len)
861 win_T *wp;
862 char_u *p;
863 colnr_T len;
865 colnr_T col = 0;
866 char_u *s;
868 for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
869 col += win_lbr_chartabsize(wp, s, col, NULL);
870 return (int)col;
874 * Return TRUE if 'c' is a normal identifier character:
875 * Letters and characters from the 'isident' option.
878 vim_isIDc(c)
879 int c;
881 return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
885 * return TRUE if 'c' is a keyword character: Letters and characters from
886 * 'iskeyword' option for current buffer.
887 * For multi-byte characters mb_get_class() is used (builtin rules).
890 vim_iswordc(c)
891 int c;
893 #ifdef FEAT_MBYTE
894 if (c >= 0x100)
896 if (enc_dbcs != 0)
897 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
898 if (enc_utf8)
899 return utf_class(c) >= 2;
901 #endif
902 return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
906 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
909 vim_iswordp(p)
910 char_u *p;
912 #ifdef FEAT_MBYTE
913 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
914 return mb_get_class(p) >= 2;
915 #endif
916 return GET_CHARTAB(curbuf, *p) != 0;
919 #if defined(FEAT_SYN_HL) || defined(PROTO)
921 vim_iswordc_buf(p, buf)
922 char_u *p;
923 buf_T *buf;
925 # ifdef FEAT_MBYTE
926 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
927 return mb_get_class(p) >= 2;
928 # endif
929 return (GET_CHARTAB(buf, *p) != 0);
931 #endif
934 * return TRUE if 'c' is a valid file-name character
935 * Assume characters above 0x100 are valid (multi-byte).
938 vim_isfilec(c)
939 int c;
941 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
945 * return TRUE if 'c' is a valid file-name character or a wildcard character
946 * Assume characters above 0x100 are valid (multi-byte).
947 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
948 * returns false.
951 vim_isfilec_or_wc(c)
952 int c;
954 char_u buf[2];
956 buf[0] = (char_u)c;
957 buf[1] = NUL;
958 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
962 * return TRUE if 'c' is a printable character
963 * Assume characters above 0x100 are printable (multi-byte), except for
964 * Unicode.
967 vim_isprintc(c)
968 int c;
970 #ifdef FEAT_MBYTE
971 if (enc_utf8 && c >= 0x100)
972 return utf_printable(c);
973 #endif
974 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
978 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
979 * byte of a double-byte character.
982 vim_isprintc_strict(c)
983 int c;
985 #ifdef FEAT_MBYTE
986 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
987 return FALSE;
988 if (enc_utf8 && c >= 0x100)
989 return utf_printable(c);
990 #endif
991 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
995 * like chartabsize(), but also check for line breaks on the screen
998 lbr_chartabsize(s, col)
999 unsigned char *s;
1000 colnr_T col;
1002 #ifdef FEAT_LINEBREAK
1003 if (!curwin->w_p_lbr && *p_sbr == NUL)
1005 #endif
1006 #ifdef FEAT_MBYTE
1007 if (curwin->w_p_wrap)
1008 return win_nolbr_chartabsize(curwin, s, col, NULL);
1009 #endif
1010 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1011 #ifdef FEAT_LINEBREAK
1013 return win_lbr_chartabsize(curwin, s, col, NULL);
1014 #endif
1018 * Call lbr_chartabsize() and advance the pointer.
1021 lbr_chartabsize_adv(s, col)
1022 char_u **s;
1023 colnr_T col;
1025 int retval;
1027 retval = lbr_chartabsize(*s, col);
1028 mb_ptr_adv(*s);
1029 return retval;
1033 * This function is used very often, keep it fast!!!!
1035 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1036 * string at start of line. Warning: *headp is only set if it's a non-zero
1037 * value, init to 0 before calling.
1040 win_lbr_chartabsize(wp, s, col, headp)
1041 win_T *wp;
1042 char_u *s;
1043 colnr_T col;
1044 int *headp UNUSED;
1046 #ifdef FEAT_LINEBREAK
1047 int c;
1048 int size;
1049 colnr_T col2;
1050 colnr_T colmax;
1051 int added;
1052 # ifdef FEAT_MBYTE
1053 int mb_added = 0;
1054 # else
1055 # define mb_added 0
1056 # endif
1057 int numberextra;
1058 char_u *ps;
1059 int tab_corr = (*s == TAB);
1060 int n;
1063 * No 'linebreak' and 'showbreak': return quickly.
1065 if (!wp->w_p_lbr && *p_sbr == NUL)
1066 #endif
1068 #ifdef FEAT_MBYTE
1069 if (wp->w_p_wrap)
1070 return win_nolbr_chartabsize(wp, s, col, headp);
1071 #endif
1072 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1075 #ifdef FEAT_LINEBREAK
1077 * First get normal size, without 'linebreak'
1079 size = win_chartabsize(wp, s, col);
1080 c = *s;
1083 * If 'linebreak' set check at a blank before a non-blank if the line
1084 * needs a break here
1086 if (wp->w_p_lbr
1087 && vim_isbreak(c)
1088 && !vim_isbreak(s[1])
1089 && !wp->w_p_list
1090 && wp->w_p_wrap
1091 # ifdef FEAT_VERTSPLIT
1092 && wp->w_width != 0
1093 # endif
1097 * Count all characters from first non-blank after a blank up to next
1098 * non-blank after a blank.
1100 numberextra = win_col_off(wp);
1101 col2 = col;
1102 colmax = (colnr_T)(W_WIDTH(wp) - numberextra);
1103 if (col >= colmax)
1105 n = colmax + win_col_off2(wp);
1106 if (n > 0)
1107 colmax += (((col - colmax) / n) + 1) * n;
1110 for (;;)
1112 ps = s;
1113 mb_ptr_adv(s);
1114 c = *s;
1115 if (!(c != NUL
1116 && (vim_isbreak(c)
1117 || (!vim_isbreak(c)
1118 && (col2 == col || !vim_isbreak(*ps))))))
1119 break;
1121 col2 += win_chartabsize(wp, s, col2);
1122 if (col2 >= colmax) /* doesn't fit */
1124 size = colmax - col;
1125 tab_corr = FALSE;
1126 break;
1130 # ifdef FEAT_MBYTE
1131 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1132 && wp->w_p_wrap && in_win_border(wp, col))
1134 ++size; /* Count the ">" in the last column. */
1135 mb_added = 1;
1137 # endif
1140 * May have to add something for 'showbreak' string at start of line
1141 * Set *headp to the size of what we add.
1143 added = 0;
1144 if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1146 numberextra = win_col_off(wp);
1147 col += numberextra + mb_added;
1148 if (col >= (colnr_T)W_WIDTH(wp))
1150 col -= W_WIDTH(wp);
1151 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1152 if (numberextra > 0)
1153 col = col % numberextra;
1155 if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1157 added = vim_strsize(p_sbr);
1158 if (tab_corr)
1159 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1160 else
1161 size += added;
1162 if (col != 0)
1163 added = 0;
1166 if (headp != NULL)
1167 *headp = added + mb_added;
1168 return size;
1169 #endif
1172 #if defined(FEAT_MBYTE) || defined(PROTO)
1174 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1175 * 'wrap' is on. This means we need to check for a double-byte character that
1176 * doesn't fit at the end of the screen line.
1178 static int
1179 win_nolbr_chartabsize(wp, s, col, headp)
1180 win_T *wp;
1181 char_u *s;
1182 colnr_T col;
1183 int *headp;
1185 int n;
1187 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1189 n = wp->w_buffer->b_p_ts;
1190 return (int)(n - (col % n));
1192 n = ptr2cells(s);
1193 /* Add one cell for a double-width character in the last column of the
1194 * window, displayed with a ">". */
1195 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1197 if (headp != NULL)
1198 *headp = 1;
1199 return 3;
1201 return n;
1205 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1206 * "wp".
1209 in_win_border(wp, vcol)
1210 win_T *wp;
1211 colnr_T vcol;
1213 int width1; /* width of first line (after line number) */
1214 int width2; /* width of further lines */
1216 #ifdef FEAT_VERTSPLIT
1217 if (wp->w_width == 0) /* there is no border */
1218 return FALSE;
1219 #endif
1220 width1 = W_WIDTH(wp) - win_col_off(wp);
1221 if ((int)vcol < width1 - 1)
1222 return FALSE;
1223 if ((int)vcol == width1 - 1)
1224 return TRUE;
1225 width2 = width1 + win_col_off2(wp);
1226 if (width2 <= 0)
1227 return FALSE;
1228 return ((vcol - width1) % width2 == width2 - 1);
1230 #endif /* FEAT_MBYTE */
1233 * Get virtual column number of pos.
1234 * start: on the first position of this character (TAB, ctrl)
1235 * cursor: where the cursor is on this character (first char, except for TAB)
1236 * end: on the last position of this character (TAB, ctrl)
1238 * This is used very often, keep it fast!
1240 void
1241 getvcol(wp, pos, start, cursor, end)
1242 win_T *wp;
1243 pos_T *pos;
1244 colnr_T *start;
1245 colnr_T *cursor;
1246 colnr_T *end;
1248 colnr_T vcol;
1249 char_u *ptr; /* points to current char */
1250 char_u *posptr; /* points to char at pos->col */
1251 int incr;
1252 int head;
1253 int ts = wp->w_buffer->b_p_ts;
1254 int c;
1256 vcol = 0;
1257 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1258 posptr = ptr + pos->col;
1261 * This function is used very often, do some speed optimizations.
1262 * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1263 * Also use this when 'list' is set but tabs take their normal size.
1265 if ((!wp->w_p_list || lcs_tab1 != NUL)
1266 #ifdef FEAT_LINEBREAK
1267 && !wp->w_p_lbr && *p_sbr == NUL
1268 #endif
1271 #ifndef FEAT_MBYTE
1272 head = 0;
1273 #endif
1274 for (;;)
1276 #ifdef FEAT_MBYTE
1277 head = 0;
1278 #endif
1279 c = *ptr;
1280 /* make sure we don't go past the end of the line */
1281 if (c == NUL)
1283 incr = 1; /* NUL at end of line only takes one column */
1284 break;
1286 /* A tab gets expanded, depending on the current column */
1287 if (c == TAB)
1288 incr = ts - (vcol % ts);
1289 else
1291 #ifdef FEAT_MBYTE
1292 if (has_mbyte)
1294 /* For utf-8, if the byte is >= 0x80, need to look at
1295 * further bytes to find the cell width. */
1296 if (enc_utf8 && c >= 0x80)
1297 incr = utf_ptr2cells(ptr);
1298 else
1299 incr = CHARSIZE(c);
1301 /* If a double-cell char doesn't fit at the end of a line
1302 * it wraps to the next line, it's like this char is three
1303 * cells wide. */
1304 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1305 && in_win_border(wp, vcol))
1307 ++incr;
1308 head = 1;
1311 else
1312 #endif
1313 incr = CHARSIZE(c);
1316 if (ptr >= posptr) /* character at pos->col */
1317 break;
1319 vcol += incr;
1320 mb_ptr_adv(ptr);
1323 else
1325 for (;;)
1327 /* A tab gets expanded, depending on the current column */
1328 head = 0;
1329 incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1330 /* make sure we don't go past the end of the line */
1331 if (*ptr == NUL)
1333 incr = 1; /* NUL at end of line only takes one column */
1334 break;
1337 if (ptr >= posptr) /* character at pos->col */
1338 break;
1340 vcol += incr;
1341 mb_ptr_adv(ptr);
1344 if (start != NULL)
1345 *start = vcol + head;
1346 if (end != NULL)
1347 *end = vcol + incr - 1;
1348 if (cursor != NULL)
1350 if (*ptr == TAB
1351 && (State & NORMAL)
1352 && !wp->w_p_list
1353 && !virtual_active()
1354 #ifdef FEAT_VISUAL
1355 && !(VIsual_active
1356 && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1357 #endif
1359 *cursor = vcol + incr - 1; /* cursor at end */
1360 else
1361 *cursor = vcol + head; /* cursor at start */
1366 * Get virtual cursor column in the current window, pretending 'list' is off.
1368 colnr_T
1369 getvcol_nolist(posp)
1370 pos_T *posp;
1372 int list_save = curwin->w_p_list;
1373 colnr_T vcol;
1375 curwin->w_p_list = FALSE;
1376 getvcol(curwin, posp, NULL, &vcol, NULL);
1377 curwin->w_p_list = list_save;
1378 return vcol;
1381 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1383 * Get virtual column in virtual mode.
1385 void
1386 getvvcol(wp, pos, start, cursor, end)
1387 win_T *wp;
1388 pos_T *pos;
1389 colnr_T *start;
1390 colnr_T *cursor;
1391 colnr_T *end;
1393 colnr_T col;
1394 colnr_T coladd;
1395 colnr_T endadd;
1396 # ifdef FEAT_MBYTE
1397 char_u *ptr;
1398 # endif
1400 if (virtual_active())
1402 /* For virtual mode, only want one value */
1403 getvcol(wp, pos, &col, NULL, NULL);
1405 coladd = pos->coladd;
1406 endadd = 0;
1407 # ifdef FEAT_MBYTE
1408 /* Cannot put the cursor on part of a wide character. */
1409 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1410 if (pos->col < (colnr_T)STRLEN(ptr))
1412 int c = (*mb_ptr2char)(ptr + pos->col);
1414 if (c != TAB && vim_isprintc(c))
1416 endadd = (colnr_T)(char2cells(c) - 1);
1417 if (coladd > endadd) /* past end of line */
1418 endadd = 0;
1419 else
1420 coladd = 0;
1423 # endif
1424 col += coladd;
1425 if (start != NULL)
1426 *start = col;
1427 if (cursor != NULL)
1428 *cursor = col;
1429 if (end != NULL)
1430 *end = col + endadd;
1432 else
1433 getvcol(wp, pos, start, cursor, end);
1435 #endif
1437 #if defined(FEAT_VISUAL) || defined(PROTO)
1439 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1440 * Used for Visual block mode.
1442 void
1443 getvcols(wp, pos1, pos2, left, right)
1444 win_T *wp;
1445 pos_T *pos1, *pos2;
1446 colnr_T *left, *right;
1448 colnr_T from1, from2, to1, to2;
1450 if (ltp(pos1, pos2))
1452 getvvcol(wp, pos1, &from1, NULL, &to1);
1453 getvvcol(wp, pos2, &from2, NULL, &to2);
1455 else
1457 getvvcol(wp, pos2, &from1, NULL, &to1);
1458 getvvcol(wp, pos1, &from2, NULL, &to2);
1460 if (from2 < from1)
1461 *left = from2;
1462 else
1463 *left = from1;
1464 if (to2 > to1)
1466 if (*p_sel == 'e' && from2 - 1 >= to1)
1467 *right = from2 - 1;
1468 else
1469 *right = to2;
1471 else
1472 *right = to1;
1474 #endif
1477 * skipwhite: skip over ' ' and '\t'.
1479 char_u *
1480 skipwhite(q)
1481 char_u *q;
1483 char_u *p = q;
1485 while (vim_iswhite(*p)) /* skip to next non-white */
1486 ++p;
1487 return p;
1491 * skip over digits
1493 char_u *
1494 skipdigits(q)
1495 char_u *q;
1497 char_u *p = q;
1499 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1500 ++p;
1501 return p;
1504 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
1506 * skip over digits and hex characters
1508 char_u *
1509 skiphex(q)
1510 char_u *q;
1512 char_u *p = q;
1514 while (vim_isxdigit(*p)) /* skip to next non-digit */
1515 ++p;
1516 return p;
1518 #endif
1520 #if defined(FEAT_EX_EXTRA) || defined(PROTO)
1522 * skip to digit (or NUL after the string)
1524 char_u *
1525 skiptodigit(q)
1526 char_u *q;
1528 char_u *p = q;
1530 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1531 ++p;
1532 return p;
1536 * skip to hex character (or NUL after the string)
1538 char_u *
1539 skiptohex(q)
1540 char_u *q;
1542 char_u *p = q;
1544 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1545 ++p;
1546 return p;
1548 #endif
1551 * Variant of isdigit() that can handle characters > 0x100.
1552 * We don't use isdigit() here, because on some systems it also considers
1553 * superscript 1 to be a digit.
1554 * Use the VIM_ISDIGIT() macro for simple arguments.
1557 vim_isdigit(c)
1558 int c;
1560 return (c >= '0' && c <= '9');
1564 * Variant of isxdigit() that can handle characters > 0x100.
1565 * We don't use isxdigit() here, because on some systems it also considers
1566 * superscript 1 to be a digit.
1569 vim_isxdigit(c)
1570 int c;
1572 return (c >= '0' && c <= '9')
1573 || (c >= 'a' && c <= 'f')
1574 || (c >= 'A' && c <= 'F');
1577 #if defined(FEAT_MBYTE) || defined(PROTO)
1579 * Vim's own character class functions. These exist because many library
1580 * islower()/toupper() etc. do not work properly: they crash when used with
1581 * invalid values or can't handle latin1 when the locale is C.
1582 * Speed is most important here.
1584 #define LATIN1LOWER 'l'
1585 #define LATIN1UPPER 'U'
1587 /* !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
1588 static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1589 static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1590 static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
1593 vim_islower(c)
1594 int c;
1596 if (c <= '@')
1597 return FALSE;
1598 if (c >= 0x80)
1600 if (enc_utf8)
1601 return utf_islower(c);
1602 if (c >= 0x100)
1604 #ifdef HAVE_ISWLOWER
1605 if (has_mbyte)
1606 return iswlower(c);
1607 #endif
1608 /* islower() can't handle these chars and may crash */
1609 return FALSE;
1611 if (enc_latin1like)
1612 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1614 return islower(c);
1618 vim_isupper(c)
1619 int c;
1621 if (c <= '@')
1622 return FALSE;
1623 if (c >= 0x80)
1625 if (enc_utf8)
1626 return utf_isupper(c);
1627 if (c >= 0x100)
1629 #ifdef HAVE_ISWUPPER
1630 if (has_mbyte)
1631 return iswupper(c);
1632 #endif
1633 /* islower() can't handle these chars and may crash */
1634 return FALSE;
1636 if (enc_latin1like)
1637 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1639 return isupper(c);
1643 vim_toupper(c)
1644 int c;
1646 if (c <= '@')
1647 return c;
1648 if (c >= 0x80)
1650 if (enc_utf8)
1651 return utf_toupper(c);
1652 if (c >= 0x100)
1654 #ifdef HAVE_TOWUPPER
1655 if (has_mbyte)
1656 return towupper(c);
1657 #endif
1658 /* toupper() can't handle these chars and may crash */
1659 return c;
1661 if (enc_latin1like)
1662 return latin1upper[c];
1664 return TOUPPER_LOC(c);
1668 vim_tolower(c)
1669 int c;
1671 if (c <= '@')
1672 return c;
1673 if (c >= 0x80)
1675 if (enc_utf8)
1676 return utf_tolower(c);
1677 if (c >= 0x100)
1679 #ifdef HAVE_TOWLOWER
1680 if (has_mbyte)
1681 return towlower(c);
1682 #endif
1683 /* tolower() can't handle these chars and may crash */
1684 return c;
1686 if (enc_latin1like)
1687 return latin1lower[c];
1689 return TOLOWER_LOC(c);
1691 #endif
1694 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1696 char_u *
1697 skiptowhite(p)
1698 char_u *p;
1700 while (*p != ' ' && *p != '\t' && *p != NUL)
1701 ++p;
1702 return p;
1705 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1706 || defined(PROTO)
1708 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1710 char_u *
1711 skiptowhite_esc(p)
1712 char_u *p;
1714 while (*p != ' ' && *p != '\t' && *p != NUL)
1716 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1717 ++p;
1718 ++p;
1720 return p;
1722 #endif
1725 * Getdigits: Get a number from a string and skip over it.
1726 * Note: the argument is a pointer to a char_u pointer!
1728 long
1729 getdigits(pp)
1730 char_u **pp;
1732 char_u *p;
1733 long retval;
1735 p = *pp;
1736 retval = atol((char *)p);
1737 if (*p == '-') /* skip negative sign */
1738 ++p;
1739 p = skipdigits(p); /* skip to next non-digit */
1740 *pp = p;
1741 return retval;
1745 * Return TRUE if "lbuf" is empty or only contains blanks.
1748 vim_isblankline(lbuf)
1749 char_u *lbuf;
1751 char_u *p;
1753 p = skipwhite(lbuf);
1754 return (*p == NUL || *p == '\r' || *p == '\n');
1758 * Convert a string into a long and/or unsigned long, taking care of
1759 * hexadecimal and octal numbers. Accepts a '-' sign.
1760 * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1761 * 0 decimal
1762 * '0' octal
1763 * 'X' hex
1764 * 'x' hex
1765 * If "len" is not NULL, the length of the number in characters is returned.
1766 * If "nptr" is not NULL, the signed result is returned in it.
1767 * If "unptr" is not NULL, the unsigned result is returned in it.
1768 * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1769 * octal number.
1770 * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
1771 * hex number.
1773 void
1774 vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1775 char_u *start;
1776 int *hexp; /* return: type of number 0 = decimal, 'x'
1777 or 'X' is hex, '0' = octal */
1778 int *len; /* return: detected length of number */
1779 int dooct; /* recognize octal number */
1780 int dohex; /* recognize hex number */
1781 long *nptr; /* return: signed result */
1782 unsigned long *unptr; /* return: unsigned result */
1784 char_u *ptr = start;
1785 int hex = 0; /* default is decimal */
1786 int negative = FALSE;
1787 unsigned long un = 0;
1788 int n;
1790 if (ptr[0] == '-')
1792 negative = TRUE;
1793 ++ptr;
1796 /* Recognize hex and octal. */
1797 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
1799 hex = ptr[1];
1800 if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1801 ptr += 2; /* hexadecimal */
1802 else
1804 hex = 0; /* default is decimal */
1805 if (dooct)
1807 /* Don't interpret "0", "08" or "0129" as octal. */
1808 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1810 if (ptr[n] > '7')
1812 hex = 0; /* can't be octal */
1813 break;
1815 if (ptr[n] > '0')
1816 hex = '0'; /* assume octal */
1823 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1825 if (hex == '0' || dooct > 1)
1827 /* octal */
1828 while ('0' <= *ptr && *ptr <= '7')
1830 un = 8 * un + (unsigned long)(*ptr - '0');
1831 ++ptr;
1834 else if (hex != 0 || dohex > 1)
1836 /* hex */
1837 while (vim_isxdigit(*ptr))
1839 un = 16 * un + (unsigned long)hex2nr(*ptr);
1840 ++ptr;
1843 else
1845 /* decimal */
1846 while (VIM_ISDIGIT(*ptr))
1848 un = 10 * un + (unsigned long)(*ptr - '0');
1849 ++ptr;
1853 if (hexp != NULL)
1854 *hexp = hex;
1855 if (len != NULL)
1856 *len = (int)(ptr - start);
1857 if (nptr != NULL)
1859 if (negative) /* account for leading '-' for decimal numbers */
1860 *nptr = -(long)un;
1861 else
1862 *nptr = (long)un;
1864 if (unptr != NULL)
1865 *unptr = un;
1869 * Return the value of a single hex character.
1870 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1873 hex2nr(c)
1874 int c;
1876 if (c >= 'a' && c <= 'f')
1877 return c - 'a' + 10;
1878 if (c >= 'A' && c <= 'F')
1879 return c - 'A' + 10;
1880 return c - '0';
1883 #if defined(FEAT_TERMRESPONSE) \
1884 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1886 * Convert two hex characters to a byte.
1887 * Return -1 if one of the characters is not hex.
1890 hexhex2nr(p)
1891 char_u *p;
1893 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1894 return -1;
1895 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1897 #endif
1900 * Return TRUE if "str" starts with a backslash that should be removed.
1901 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1902 * backslash is not a normal file name character.
1903 * '$' is a valid file name character, we don't remove the backslash before
1904 * it. This means it is not possible to use an environment variable after a
1905 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1906 * Although "\ name" is valid, the backslash in "Program\ files" must be
1907 * removed. Assume a file name doesn't start with a space.
1908 * For multi-byte names, never remove a backslash before a non-ascii
1909 * character, assume that all multi-byte characters are valid file name
1910 * characters.
1913 rem_backslash(str)
1914 char_u *str;
1916 #ifdef BACKSLASH_IN_FILENAME
1917 return (str[0] == '\\'
1918 # ifdef FEAT_MBYTE
1919 && str[1] < 0x80
1920 # endif
1921 && (str[1] == ' '
1922 || (str[1] != NUL
1923 && str[1] != '*'
1924 && str[1] != '?'
1925 && !vim_isfilec(str[1]))));
1926 #else
1927 return (str[0] == '\\' && str[1] != NUL);
1928 #endif
1932 * Halve the number of backslashes in a file name argument.
1933 * For MS-DOS we only do this if the character after the backslash
1934 * is not a normal file character.
1936 void
1937 backslash_halve(p)
1938 char_u *p;
1940 for ( ; *p; ++p)
1941 if (rem_backslash(p))
1942 STRMOVE(p, p + 1);
1946 * backslash_halve() plus save the result in allocated memory.
1948 char_u *
1949 backslash_halve_save(p)
1950 char_u *p;
1952 char_u *res;
1954 res = vim_strsave(p);
1955 if (res == NULL)
1956 return p;
1957 backslash_halve(res);
1958 return res;
1961 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1963 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1964 * The first 64 entries have been added to map control characters defined in
1965 * ascii.h
1967 static char_u ebcdic2ascii_tab[256] =
1969 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1970 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1971 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1972 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1973 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1974 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1975 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1976 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1977 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1978 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1979 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1980 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1981 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1982 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1983 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1984 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1985 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1986 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1987 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1988 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1989 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1990 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1991 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1992 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1993 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1994 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1995 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1996 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1997 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1998 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1999 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2000 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2004 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2005 * wanting 7-bit ASCII characters out the other end.
2007 void
2008 ebcdic2ascii(buffer, len)
2009 char_u *buffer;
2010 int len;
2012 int i;
2014 for (i = 0; i < len; i++)
2015 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2017 #endif