Merge branch 'vim' into fix/fast-join
[vim_extended.git] / src / charset.c
blobbac1a20ffe9cac785ebc28b24b448f540085e20b
1 /* vi:set ts=8 sts=4 sw=4:
3 * VIM - Vi IMproved by Bram Moolenaar
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
10 #include "vim.h"
12 #ifdef FEAT_LINEBREAK
13 static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
14 #endif
16 #ifdef FEAT_MBYTE
17 static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
18 #endif
20 static unsigned nr2hex __ARGS((unsigned c));
22 static int chartab_initialized = FALSE;
24 /* b_chartab[] is an array of 32 bytes, each bit representing one of the
25 * characters 0-255. */
26 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
27 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
28 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
31 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
32 * characters for current buffer.
34 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
35 * 'isprint' and 'encoding'.
37 * The index in chartab[] depends on 'encoding':
38 * - For non-multi-byte index with the byte (same as the character).
39 * - For DBCS index with the first byte.
40 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
41 * the same as the character, if the first byte is 0x80 and above it depends
42 * on further bytes).
44 * The contents of chartab[]:
45 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
46 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
47 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
48 * translate the character before displaying it). Note that only DBCS
49 * characters can have 2 display cells and still be printable.
50 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
51 * - CT_ID_CHAR bit is set when the character can be in an identifier.
53 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
54 * error, OK otherwise.
56 int
57 init_chartab()
59 return buf_init_chartab(curbuf, TRUE);
62 int
63 buf_init_chartab(buf, global)
64 buf_T *buf;
65 int global; /* FALSE: only set buf->b_chartab[] */
67 int c;
68 int c2;
69 char_u *p;
70 int i;
71 int tilde;
72 int do_isalpha;
74 if (global)
77 * Set the default size for printable characters:
78 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
79 * This also inits all 'isident' and 'isfname' flags to FALSE.
81 * EBCDIC: all chars below ' ' are not printable, all others are
82 * printable.
84 c = 0;
85 while (c < ' ')
86 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
87 #ifdef EBCDIC
88 while (c < 255)
89 #else
90 while (c <= '~')
91 #endif
92 chartab[c++] = 1 + CT_PRINT_CHAR;
93 #ifdef FEAT_FKMAP
94 if (p_altkeymap)
96 while (c < YE)
97 chartab[c++] = 1 + CT_PRINT_CHAR;
99 #endif
100 while (c < 256)
102 #ifdef FEAT_MBYTE
103 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
104 if (enc_utf8 && c >= 0xa0)
105 chartab[c++] = CT_PRINT_CHAR + 1;
106 /* euc-jp characters starting with 0x8e are single width */
107 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
108 chartab[c++] = CT_PRINT_CHAR + 1;
109 /* other double-byte chars can be printable AND double-width */
110 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
111 chartab[c++] = CT_PRINT_CHAR + 2;
112 else
113 #endif
114 /* the rest is unprintable by default */
115 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
118 #ifdef FEAT_MBYTE
119 /* Assume that every multi-byte char is a filename character. */
120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
124 chartab[c] |= CT_FNAME_CHAR;
125 #endif
129 * Init word char flags all to FALSE
131 vim_memset(buf->b_chartab, 0, (size_t)32);
132 #ifdef FEAT_MBYTE
133 if (enc_dbcs != 0)
134 for (c = 0; c < 256; ++c)
136 /* double-byte characters are probably word characters */
137 if (MB_BYTE2LEN(c) == 2)
138 SET_CHARTAB(buf, c);
140 #endif
142 #ifdef FEAT_LISP
144 * In lisp mode the '-' character is included in keywords.
146 if (buf->b_p_lisp)
147 SET_CHARTAB(buf, '-');
148 #endif
150 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
151 * options Each option is a list of characters, character numbers or
152 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
154 for (i = global ? 0 : 3; i <= 3; ++i)
156 if (i == 0)
157 p = p_isi; /* first round: 'isident' */
158 else if (i == 1)
159 p = p_isp; /* second round: 'isprint' */
160 else if (i == 2)
161 p = p_isf; /* third round: 'isfname' */
162 else /* i == 3 */
163 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
165 while (*p)
167 tilde = FALSE;
168 do_isalpha = FALSE;
169 if (*p == '^' && p[1] != NUL)
171 tilde = TRUE;
172 ++p;
174 if (VIM_ISDIGIT(*p))
175 c = getdigits(&p);
176 else
177 c = *p++;
178 c2 = -1;
179 if (*p == '-' && p[1] != NUL)
181 ++p;
182 if (VIM_ISDIGIT(*p))
183 c2 = getdigits(&p);
184 else
185 c2 = *p++;
187 if (c <= 0 || (c2 < c && c2 != -1) || c2 >= 256
188 || !(*p == NUL || *p == ','))
189 return FAIL;
191 if (c2 == -1) /* not a range */
194 * A single '@' (not "@-@"):
195 * Decide on letters being ID/printable/keyword chars with
196 * standard function isalpha(). This takes care of locale for
197 * single-byte characters).
199 if (c == '@')
201 do_isalpha = TRUE;
202 c = 1;
203 c2 = 255;
205 else
206 c2 = c;
208 while (c <= c2)
210 /* Use the MB_ functions here, because isalpha() doesn't
211 * work properly when 'encoding' is "latin1" and the locale is
212 * "C". */
213 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
214 #ifdef FEAT_FKMAP
215 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
216 #endif
219 if (i == 0) /* (re)set ID flag */
221 if (tilde)
222 chartab[c] &= ~CT_ID_CHAR;
223 else
224 chartab[c] |= CT_ID_CHAR;
226 else if (i == 1) /* (re)set printable */
228 if ((c < ' '
229 #ifndef EBCDIC
230 || c > '~'
231 #endif
232 #ifdef FEAT_FKMAP
233 || (p_altkeymap
234 && (F_isalpha(c) || F_isdigit(c)))
235 #endif
237 #ifdef FEAT_MBYTE
238 /* For double-byte we keep the cell width, so
239 * that we can detect it from the first byte. */
240 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
241 #endif
244 if (tilde)
246 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
247 + ((dy_flags & DY_UHEX) ? 4 : 2);
248 chartab[c] &= ~CT_PRINT_CHAR;
250 else
252 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
253 chartab[c] |= CT_PRINT_CHAR;
257 else if (i == 2) /* (re)set fname flag */
259 if (tilde)
260 chartab[c] &= ~CT_FNAME_CHAR;
261 else
262 chartab[c] |= CT_FNAME_CHAR;
264 else /* i == 3 */ /* (re)set keyword flag */
266 if (tilde)
267 RESET_CHARTAB(buf, c);
268 else
269 SET_CHARTAB(buf, c);
272 ++c;
274 p = skip_to_option_part(p);
277 chartab_initialized = TRUE;
278 return OK;
282 * Translate any special characters in buf[bufsize] in-place.
283 * The result is a string with only printable characters, but if there is not
284 * enough room, not all characters will be translated.
286 void
287 trans_characters(buf, bufsize)
288 char_u *buf;
289 int bufsize;
291 int len; /* length of string needing translation */
292 int room; /* room in buffer after string */
293 char_u *trs; /* translated character */
294 int trs_len; /* length of trs[] */
296 len = (int)STRLEN(buf);
297 room = bufsize - len;
298 while (*buf != 0)
300 # ifdef FEAT_MBYTE
301 /* Assume a multi-byte character doesn't need translation. */
302 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
303 len -= trs_len;
304 else
305 # endif
307 trs = transchar_byte(*buf);
308 trs_len = (int)STRLEN(trs);
309 if (trs_len > 1)
311 room -= trs_len - 1;
312 if (room <= 0)
313 return;
314 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
316 mch_memmove(buf, trs, (size_t)trs_len);
317 --len;
319 buf += trs_len;
323 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
324 || defined(PROTO)
326 * Translate a string into allocated memory, replacing special chars with
327 * printable chars. Returns NULL when out of memory.
329 char_u *
330 transstr(s)
331 char_u *s;
333 char_u *res;
334 char_u *p;
335 #ifdef FEAT_MBYTE
336 int l, len, c;
337 char_u hexbuf[11];
338 #endif
340 #ifdef FEAT_MBYTE
341 if (has_mbyte)
343 /* Compute the length of the result, taking account of unprintable
344 * multi-byte characters. */
345 len = 0;
346 p = s;
347 while (*p != NUL)
349 if ((l = (*mb_ptr2len)(p)) > 1)
351 c = (*mb_ptr2char)(p);
352 p += l;
353 if (vim_isprintc(c))
354 len += l;
355 else
357 transchar_hex(hexbuf, c);
358 len += (int)STRLEN(hexbuf);
361 else
363 l = byte2cells(*p++);
364 if (l > 0)
365 len += l;
366 else
367 len += 4; /* illegal byte sequence */
370 res = alloc((unsigned)(len + 1));
372 else
373 #endif
374 res = alloc((unsigned)(vim_strsize(s) + 1));
375 if (res != NULL)
377 *res = NUL;
378 p = s;
379 while (*p != NUL)
381 #ifdef FEAT_MBYTE
382 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
384 c = (*mb_ptr2char)(p);
385 if (vim_isprintc(c))
386 STRNCAT(res, p, l); /* append printable multi-byte char */
387 else
388 transchar_hex(res + STRLEN(res), c);
389 p += l;
391 else
392 #endif
393 STRCAT(res, transchar_byte(*p++));
396 return res;
398 #endif
400 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
402 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
403 * current locale.
404 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
405 * Otherwise puts the result in "buf[buflen]".
407 char_u *
408 str_foldcase(str, orglen, buf, buflen)
409 char_u *str;
410 int orglen;
411 char_u *buf;
412 int buflen;
414 garray_T ga;
415 int i;
416 int len = orglen;
418 #define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
419 #define GA_PTR(i) ((char_u *)ga.ga_data + i)
420 #define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
421 #define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
423 /* Copy "str" into "buf" or allocated memory, unmodified. */
424 if (buf == NULL)
426 ga_init2(&ga, 1, 10);
427 if (ga_grow(&ga, len + 1) == FAIL)
428 return NULL;
429 mch_memmove(ga.ga_data, str, (size_t)len);
430 ga.ga_len = len;
432 else
434 if (len >= buflen) /* Ugly! */
435 len = buflen - 1;
436 mch_memmove(buf, str, (size_t)len);
438 if (buf == NULL)
439 GA_CHAR(len) = NUL;
440 else
441 buf[len] = NUL;
443 /* Make each character lower case. */
444 i = 0;
445 while (STR_CHAR(i) != NUL)
447 #ifdef FEAT_MBYTE
448 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
450 if (enc_utf8)
452 int c = utf_ptr2char(STR_PTR(i));
453 int ol = utf_ptr2len(STR_PTR(i));
454 int lc = utf_tolower(c);
456 /* Only replace the character when it is not an invalid
457 * sequence (ASCII character or more than one byte) and
458 * utf_tolower() doesn't return the original character. */
459 if ((c < 0x80 || ol > 1) && c != lc)
461 int nl = utf_char2len(lc);
463 /* If the byte length changes need to shift the following
464 * characters forward or backward. */
465 if (ol != nl)
467 if (nl > ol)
469 if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
470 : len + nl - ol >= buflen)
472 /* out of memory, keep old char */
473 lc = c;
474 nl = ol;
477 if (ol != nl)
479 if (buf == NULL)
481 STRMOVE(GA_PTR(i) + nl, GA_PTR(i) + ol);
482 ga.ga_len += nl - ol;
484 else
486 STRMOVE(buf + i + nl, buf + i + ol);
487 len += nl - ol;
491 (void)utf_char2bytes(lc, STR_PTR(i));
494 /* skip to next multi-byte char */
495 i += (*mb_ptr2len)(STR_PTR(i));
497 else
498 #endif
500 if (buf == NULL)
501 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
502 else
503 buf[i] = TOLOWER_LOC(buf[i]);
504 ++i;
508 if (buf == NULL)
509 return (char_u *)ga.ga_data;
510 return buf;
512 #endif
515 * Catch 22: chartab[] can't be initialized before the options are
516 * initialized, and initializing options may cause transchar() to be called!
517 * When chartab_initialized == FALSE don't use chartab[].
518 * Does NOT work for multi-byte characters, c must be <= 255.
519 * Also doesn't work for the first byte of a multi-byte, "c" must be a
520 * character!
522 static char_u transchar_buf[7];
524 char_u *
525 transchar(c)
526 int c;
528 int i;
530 i = 0;
531 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
533 transchar_buf[0] = '~';
534 transchar_buf[1] = '@';
535 i = 2;
536 c = K_SECOND(c);
539 if ((!chartab_initialized && (
540 #ifdef EBCDIC
541 (c >= 64 && c < 255)
542 #else
543 (c >= ' ' && c <= '~')
544 #endif
545 #ifdef FEAT_FKMAP
546 || F_ischar(c)
547 #endif
548 )) || (c < 256 && vim_isprintc_strict(c)))
550 /* printable character */
551 transchar_buf[i] = c;
552 transchar_buf[i + 1] = NUL;
554 else
555 transchar_nonprint(transchar_buf + i, c);
556 return transchar_buf;
559 #if defined(FEAT_MBYTE) || defined(PROTO)
561 * Like transchar(), but called with a byte instead of a character. Checks
562 * for an illegal UTF-8 byte.
564 char_u *
565 transchar_byte(c)
566 int c;
568 if (enc_utf8 && c >= 0x80)
570 transchar_nonprint(transchar_buf, c);
571 return transchar_buf;
573 return transchar(c);
575 #endif
578 * Convert non-printable character to two or more printable characters in
579 * "buf[]". "buf" needs to be able to hold five bytes.
580 * Does NOT work for multi-byte characters, c must be <= 255.
582 void
583 transchar_nonprint(buf, c)
584 char_u *buf;
585 int c;
587 if (c == NL)
588 c = NUL; /* we use newline in place of a NUL */
589 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
590 c = NL; /* we use CR in place of NL in this case */
592 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
593 transchar_hex(buf, c);
595 #ifdef EBCDIC
596 /* For EBCDIC only the characters 0-63 and 255 are not printable */
597 else if (CtrlChar(c) != 0 || c == DEL)
598 #else
599 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
600 #endif
602 buf[0] = '^';
603 #ifdef EBCDIC
604 if (c == DEL)
605 buf[1] = '?'; /* DEL displayed as ^? */
606 else
607 buf[1] = CtrlChar(c);
608 #else
609 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
610 #endif
612 buf[2] = NUL;
614 #ifdef FEAT_MBYTE
615 else if (enc_utf8 && c >= 0x80)
617 transchar_hex(buf, c);
619 #endif
620 #ifndef EBCDIC
621 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
623 buf[0] = '|';
624 buf[1] = c - 0x80;
625 buf[2] = NUL;
627 #else
628 else if (c < 64)
630 buf[0] = '~';
631 buf[1] = MetaChar(c);
632 buf[2] = NUL;
634 #endif
635 else /* 0x80 - 0x9f and 0xff */
638 * TODO: EBCDIC I don't know what to do with this chars, so I display
639 * them as '~?' for now
641 buf[0] = '~';
642 #ifdef EBCDIC
643 buf[1] = '?'; /* 0xff displayed as ~? */
644 #else
645 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
646 #endif
647 buf[2] = NUL;
651 void
652 transchar_hex(buf, c)
653 char_u *buf;
654 int c;
656 int i = 0;
658 buf[0] = '<';
659 #ifdef FEAT_MBYTE
660 if (c > 255)
662 buf[++i] = nr2hex((unsigned)c >> 12);
663 buf[++i] = nr2hex((unsigned)c >> 8);
665 #endif
666 buf[++i] = nr2hex((unsigned)c >> 4);
667 buf[++i] = nr2hex((unsigned)c);
668 buf[++i] = '>';
669 buf[++i] = NUL;
673 * Convert the lower 4 bits of byte "c" to its hex character.
674 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
675 * function key 1.
677 static unsigned
678 nr2hex(c)
679 unsigned c;
681 if ((c & 0xf) <= 9)
682 return (c & 0xf) + '0';
683 return (c & 0xf) - 10 + 'a';
687 * Return number of display cells occupied by byte "b".
688 * Caller must make sure 0 <= b <= 255.
689 * For multi-byte mode "b" must be the first byte of a character.
690 * A TAB is counted as two cells: "^I".
691 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
692 * cells depends on further bytes.
695 byte2cells(b)
696 int b;
698 #ifdef FEAT_MBYTE
699 if (enc_utf8 && b >= 0x80)
700 return 0;
701 #endif
702 return (chartab[b] & CT_CELL_MASK);
706 * Return number of display cells occupied by character "c".
707 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
708 * A TAB is counted as two cells: "^I" or four: "<09>".
711 char2cells(c)
712 int c;
714 if (IS_SPECIAL(c))
715 return char2cells(K_SECOND(c)) + 2;
716 #ifdef FEAT_MBYTE
717 if (c >= 0x80)
719 /* UTF-8: above 0x80 need to check the value */
720 if (enc_utf8)
721 return utf_char2cells(c);
722 /* DBCS: double-byte means double-width, except for euc-jp with first
723 * byte 0x8e */
724 if (enc_dbcs != 0 && c >= 0x100)
726 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
727 return 1;
728 return 2;
731 #endif
732 return (chartab[c & 0xff] & CT_CELL_MASK);
736 * Return number of display cells occupied by character at "*p".
737 * A TAB is counted as two cells: "^I" or four: "<09>".
740 ptr2cells(p)
741 char_u *p;
743 #ifdef FEAT_MBYTE
744 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
745 if (enc_utf8 && *p >= 0x80)
746 return utf_ptr2cells(p);
747 /* For DBCS we can tell the cell count from the first byte. */
748 #endif
749 return (chartab[*p] & CT_CELL_MASK);
753 * Return the number of characters string "s" will take on the screen,
754 * counting TABs as two characters: "^I".
757 vim_strsize(s)
758 char_u *s;
760 return vim_strnsize(s, (int)MAXCOL);
764 * Return the number of characters string "s[len]" will take on the screen,
765 * counting TABs as two characters: "^I".
768 vim_strnsize(s, len)
769 char_u *s;
770 int len;
772 int size = 0;
774 while (*s != NUL && --len >= 0)
776 #ifdef FEAT_MBYTE
777 if (has_mbyte)
779 int l = (*mb_ptr2len)(s);
781 size += ptr2cells(s);
782 s += l;
783 len -= l - 1;
785 else
786 #endif
787 size += byte2cells(*s++);
789 return size;
793 * Return the number of characters 'c' will take on the screen, taking
794 * into account the size of a tab.
795 * Use a define to make it fast, this is used very often!!!
796 * Also see getvcol() below.
799 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
800 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
802 int ts; \
803 ts = (buf)->b_p_ts; \
804 return (int)(ts - (col % ts)); \
806 else \
807 return ptr2cells(p);
809 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
810 || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
812 chartabsize(p, col)
813 char_u *p;
814 colnr_T col;
816 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
818 #endif
820 #ifdef FEAT_LINEBREAK
821 static int
822 win_chartabsize(wp, p, col)
823 win_T *wp;
824 char_u *p;
825 colnr_T col;
827 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
829 #endif
832 * return the number of characters the string 's' will take on the screen,
833 * taking into account the size of a tab
836 linetabsize(s)
837 char_u *s;
839 colnr_T col = 0;
841 while (*s != NUL)
842 col += lbr_chartabsize_adv(&s, col);
843 return (int)col;
847 * Like linetabsize(), but for a given window instead of the current one.
850 win_linetabsize(wp, p, len)
851 win_T *wp;
852 char_u *p;
853 colnr_T len;
855 colnr_T col = 0;
856 char_u *s;
858 for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
859 col += win_lbr_chartabsize(wp, s, col, NULL);
860 return (int)col;
864 * Return TRUE if 'c' is a normal identifier character:
865 * Letters and characters from the 'isident' option.
868 vim_isIDc(c)
869 int c;
871 return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
875 * return TRUE if 'c' is a keyword character: Letters and characters from
876 * 'iskeyword' option for current buffer.
877 * For multi-byte characters mb_get_class() is used (builtin rules).
880 vim_iswordc(c)
881 int c;
883 #ifdef FEAT_MBYTE
884 if (c >= 0x100)
886 if (enc_dbcs != 0)
887 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
888 if (enc_utf8)
889 return utf_class(c) >= 2;
891 #endif
892 return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
896 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
899 vim_iswordp(p)
900 char_u *p;
902 #ifdef FEAT_MBYTE
903 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
904 return mb_get_class(p) >= 2;
905 #endif
906 return GET_CHARTAB(curbuf, *p) != 0;
909 #if defined(FEAT_SYN_HL) || defined(PROTO)
911 vim_iswordc_buf(p, buf)
912 char_u *p;
913 buf_T *buf;
915 # ifdef FEAT_MBYTE
916 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
917 return mb_get_class(p) >= 2;
918 # endif
919 return (GET_CHARTAB(buf, *p) != 0);
921 #endif
924 * return TRUE if 'c' is a valid file-name character
925 * Assume characters above 0x100 are valid (multi-byte).
928 vim_isfilec(c)
929 int c;
931 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
935 * return TRUE if 'c' is a valid file-name character or a wildcard character
936 * Assume characters above 0x100 are valid (multi-byte).
937 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
938 * returns false.
941 vim_isfilec_or_wc(c)
942 int c;
944 char_u buf[2];
946 buf[0] = (char_u)c;
947 buf[1] = NUL;
948 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
952 * return TRUE if 'c' is a printable character
953 * Assume characters above 0x100 are printable (multi-byte), except for
954 * Unicode.
957 vim_isprintc(c)
958 int c;
960 #ifdef FEAT_MBYTE
961 if (enc_utf8 && c >= 0x100)
962 return utf_printable(c);
963 #endif
964 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
968 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
969 * byte of a double-byte character.
972 vim_isprintc_strict(c)
973 int c;
975 #ifdef FEAT_MBYTE
976 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
977 return FALSE;
978 if (enc_utf8 && c >= 0x100)
979 return utf_printable(c);
980 #endif
981 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
985 * like chartabsize(), but also check for line breaks on the screen
988 lbr_chartabsize(s, col)
989 unsigned char *s;
990 colnr_T col;
992 #ifdef FEAT_LINEBREAK
993 if (!curwin->w_p_lbr && *p_sbr == NUL)
995 #endif
996 #ifdef FEAT_MBYTE
997 if (curwin->w_p_wrap)
998 return win_nolbr_chartabsize(curwin, s, col, NULL);
999 #endif
1000 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1001 #ifdef FEAT_LINEBREAK
1003 return win_lbr_chartabsize(curwin, s, col, NULL);
1004 #endif
1008 * Call lbr_chartabsize() and advance the pointer.
1011 lbr_chartabsize_adv(s, col)
1012 char_u **s;
1013 colnr_T col;
1015 int retval;
1017 retval = lbr_chartabsize(*s, col);
1018 mb_ptr_adv(*s);
1019 return retval;
1023 * This function is used very often, keep it fast!!!!
1025 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1026 * string at start of line. Warning: *headp is only set if it's a non-zero
1027 * value, init to 0 before calling.
1030 win_lbr_chartabsize(wp, s, col, headp)
1031 win_T *wp;
1032 char_u *s;
1033 colnr_T col;
1034 int *headp UNUSED;
1036 #ifdef FEAT_LINEBREAK
1037 int c;
1038 int size;
1039 colnr_T col2;
1040 colnr_T colmax;
1041 int added;
1042 # ifdef FEAT_MBYTE
1043 int mb_added = 0;
1044 # else
1045 # define mb_added 0
1046 # endif
1047 int numberextra;
1048 char_u *ps;
1049 int tab_corr = (*s == TAB);
1050 int n;
1053 * No 'linebreak' and 'showbreak': return quickly.
1055 if (!wp->w_p_lbr && *p_sbr == NUL)
1056 #endif
1058 #ifdef FEAT_MBYTE
1059 if (wp->w_p_wrap)
1060 return win_nolbr_chartabsize(wp, s, col, headp);
1061 #endif
1062 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1065 #ifdef FEAT_LINEBREAK
1067 * First get normal size, without 'linebreak'
1069 size = win_chartabsize(wp, s, col);
1070 c = *s;
1073 * If 'linebreak' set check at a blank before a non-blank if the line
1074 * needs a break here
1076 if (wp->w_p_lbr
1077 && vim_isbreak(c)
1078 && !vim_isbreak(s[1])
1079 && !wp->w_p_list
1080 && wp->w_p_wrap
1081 # ifdef FEAT_VERTSPLIT
1082 && wp->w_width != 0
1083 # endif
1087 * Count all characters from first non-blank after a blank up to next
1088 * non-blank after a blank.
1090 numberextra = win_col_off(wp);
1091 col2 = col;
1092 colmax = (colnr_T)(W_WIDTH(wp) - numberextra);
1093 if (col >= colmax)
1095 n = colmax + win_col_off2(wp);
1096 if (n > 0)
1097 colmax += (((col - colmax) / n) + 1) * n;
1100 for (;;)
1102 ps = s;
1103 mb_ptr_adv(s);
1104 c = *s;
1105 if (!(c != NUL
1106 && (vim_isbreak(c)
1107 || (!vim_isbreak(c)
1108 && (col2 == col || !vim_isbreak(*ps))))))
1109 break;
1111 col2 += win_chartabsize(wp, s, col2);
1112 if (col2 >= colmax) /* doesn't fit */
1114 size = colmax - col;
1115 tab_corr = FALSE;
1116 break;
1120 # ifdef FEAT_MBYTE
1121 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1122 && wp->w_p_wrap && in_win_border(wp, col))
1124 ++size; /* Count the ">" in the last column. */
1125 mb_added = 1;
1127 # endif
1130 * May have to add something for 'showbreak' string at start of line
1131 * Set *headp to the size of what we add.
1133 added = 0;
1134 if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1136 numberextra = win_col_off(wp);
1137 col += numberextra + mb_added;
1138 if (col >= (colnr_T)W_WIDTH(wp))
1140 col -= W_WIDTH(wp);
1141 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1142 if (numberextra > 0)
1143 col = col % numberextra;
1145 if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1147 added = vim_strsize(p_sbr);
1148 if (tab_corr)
1149 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1150 else
1151 size += added;
1152 if (col != 0)
1153 added = 0;
1156 if (headp != NULL)
1157 *headp = added + mb_added;
1158 return size;
1159 #endif
1162 #if defined(FEAT_MBYTE) || defined(PROTO)
1164 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1165 * 'wrap' is on. This means we need to check for a double-byte character that
1166 * doesn't fit at the end of the screen line.
1168 static int
1169 win_nolbr_chartabsize(wp, s, col, headp)
1170 win_T *wp;
1171 char_u *s;
1172 colnr_T col;
1173 int *headp;
1175 int n;
1177 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1179 n = wp->w_buffer->b_p_ts;
1180 return (int)(n - (col % n));
1182 n = ptr2cells(s);
1183 /* Add one cell for a double-width character in the last column of the
1184 * window, displayed with a ">". */
1185 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1187 if (headp != NULL)
1188 *headp = 1;
1189 return 3;
1191 return n;
1195 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1196 * "wp".
1199 in_win_border(wp, vcol)
1200 win_T *wp;
1201 colnr_T vcol;
1203 int width1; /* width of first line (after line number) */
1204 int width2; /* width of further lines */
1206 #ifdef FEAT_VERTSPLIT
1207 if (wp->w_width == 0) /* there is no border */
1208 return FALSE;
1209 #endif
1210 width1 = W_WIDTH(wp) - win_col_off(wp);
1211 if ((int)vcol < width1 - 1)
1212 return FALSE;
1213 if ((int)vcol == width1 - 1)
1214 return TRUE;
1215 width2 = width1 + win_col_off2(wp);
1216 return ((vcol - width1) % width2 == width2 - 1);
1218 #endif /* FEAT_MBYTE */
1221 * Get virtual column number of pos.
1222 * start: on the first position of this character (TAB, ctrl)
1223 * cursor: where the cursor is on this character (first char, except for TAB)
1224 * end: on the last position of this character (TAB, ctrl)
1226 * This is used very often, keep it fast!
1228 void
1229 getvcol(wp, pos, start, cursor, end)
1230 win_T *wp;
1231 pos_T *pos;
1232 colnr_T *start;
1233 colnr_T *cursor;
1234 colnr_T *end;
1236 colnr_T vcol;
1237 char_u *ptr; /* points to current char */
1238 char_u *posptr; /* points to char at pos->col */
1239 int incr;
1240 int head;
1241 int ts = wp->w_buffer->b_p_ts;
1242 int c;
1244 vcol = 0;
1245 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1246 posptr = ptr + pos->col;
1249 * This function is used very often, do some speed optimizations.
1250 * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1251 * Also use this when 'list' is set but tabs take their normal size.
1253 if ((!wp->w_p_list || lcs_tab1 != NUL)
1254 #ifdef FEAT_LINEBREAK
1255 && !wp->w_p_lbr && *p_sbr == NUL
1256 #endif
1259 #ifndef FEAT_MBYTE
1260 head = 0;
1261 #endif
1262 for (;;)
1264 #ifdef FEAT_MBYTE
1265 head = 0;
1266 #endif
1267 c = *ptr;
1268 /* make sure we don't go past the end of the line */
1269 if (c == NUL)
1271 incr = 1; /* NUL at end of line only takes one column */
1272 break;
1274 /* A tab gets expanded, depending on the current column */
1275 if (c == TAB)
1276 incr = ts - (vcol % ts);
1277 else
1279 #ifdef FEAT_MBYTE
1280 if (has_mbyte)
1282 /* For utf-8, if the byte is >= 0x80, need to look at
1283 * further bytes to find the cell width. */
1284 if (enc_utf8 && c >= 0x80)
1285 incr = utf_ptr2cells(ptr);
1286 else
1287 incr = CHARSIZE(c);
1289 /* If a double-cell char doesn't fit at the end of a line
1290 * it wraps to the next line, it's like this char is three
1291 * cells wide. */
1292 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1293 && in_win_border(wp, vcol))
1295 ++incr;
1296 head = 1;
1299 else
1300 #endif
1301 incr = CHARSIZE(c);
1304 if (ptr >= posptr) /* character at pos->col */
1305 break;
1307 vcol += incr;
1308 mb_ptr_adv(ptr);
1311 else
1313 for (;;)
1315 /* A tab gets expanded, depending on the current column */
1316 head = 0;
1317 incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1318 /* make sure we don't go past the end of the line */
1319 if (*ptr == NUL)
1321 incr = 1; /* NUL at end of line only takes one column */
1322 break;
1325 if (ptr >= posptr) /* character at pos->col */
1326 break;
1328 vcol += incr;
1329 mb_ptr_adv(ptr);
1332 if (start != NULL)
1333 *start = vcol + head;
1334 if (end != NULL)
1335 *end = vcol + incr - 1;
1336 if (cursor != NULL)
1338 if (*ptr == TAB
1339 && (State & NORMAL)
1340 && !wp->w_p_list
1341 && !virtual_active()
1342 #ifdef FEAT_VISUAL
1343 && !(VIsual_active
1344 && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1345 #endif
1347 *cursor = vcol + incr - 1; /* cursor at end */
1348 else
1349 *cursor = vcol + head; /* cursor at start */
1354 * Get virtual cursor column in the current window, pretending 'list' is off.
1356 colnr_T
1357 getvcol_nolist(posp)
1358 pos_T *posp;
1360 int list_save = curwin->w_p_list;
1361 colnr_T vcol;
1363 curwin->w_p_list = FALSE;
1364 getvcol(curwin, posp, NULL, &vcol, NULL);
1365 curwin->w_p_list = list_save;
1366 return vcol;
1369 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1371 * Get virtual column in virtual mode.
1373 void
1374 getvvcol(wp, pos, start, cursor, end)
1375 win_T *wp;
1376 pos_T *pos;
1377 colnr_T *start;
1378 colnr_T *cursor;
1379 colnr_T *end;
1381 colnr_T col;
1382 colnr_T coladd;
1383 colnr_T endadd;
1384 # ifdef FEAT_MBYTE
1385 char_u *ptr;
1386 # endif
1388 if (virtual_active())
1390 /* For virtual mode, only want one value */
1391 getvcol(wp, pos, &col, NULL, NULL);
1393 coladd = pos->coladd;
1394 endadd = 0;
1395 # ifdef FEAT_MBYTE
1396 /* Cannot put the cursor on part of a wide character. */
1397 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1398 if (pos->col < (colnr_T)STRLEN(ptr))
1400 int c = (*mb_ptr2char)(ptr + pos->col);
1402 if (c != TAB && vim_isprintc(c))
1404 endadd = (colnr_T)(char2cells(c) - 1);
1405 if (coladd > endadd) /* past end of line */
1406 endadd = 0;
1407 else
1408 coladd = 0;
1411 # endif
1412 col += coladd;
1413 if (start != NULL)
1414 *start = col;
1415 if (cursor != NULL)
1416 *cursor = col;
1417 if (end != NULL)
1418 *end = col + endadd;
1420 else
1421 getvcol(wp, pos, start, cursor, end);
1423 #endif
1425 #if defined(FEAT_VISUAL) || defined(PROTO)
1427 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1428 * Used for Visual block mode.
1430 void
1431 getvcols(wp, pos1, pos2, left, right)
1432 win_T *wp;
1433 pos_T *pos1, *pos2;
1434 colnr_T *left, *right;
1436 colnr_T from1, from2, to1, to2;
1438 if (ltp(pos1, pos2))
1440 getvvcol(wp, pos1, &from1, NULL, &to1);
1441 getvvcol(wp, pos2, &from2, NULL, &to2);
1443 else
1445 getvvcol(wp, pos2, &from1, NULL, &to1);
1446 getvvcol(wp, pos1, &from2, NULL, &to2);
1448 if (from2 < from1)
1449 *left = from2;
1450 else
1451 *left = from1;
1452 if (to2 > to1)
1454 if (*p_sel == 'e' && from2 - 1 >= to1)
1455 *right = from2 - 1;
1456 else
1457 *right = to2;
1459 else
1460 *right = to1;
1462 #endif
1465 * skipwhite: skip over ' ' and '\t'.
1467 char_u *
1468 skipwhite(q)
1469 char_u *q;
1471 char_u *p = q;
1473 while (vim_iswhite(*p)) /* skip to next non-white */
1474 ++p;
1475 return p;
1479 * skip over digits
1481 char_u *
1482 skipdigits(q)
1483 char_u *q;
1485 char_u *p = q;
1487 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1488 ++p;
1489 return p;
1492 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
1494 * skip over digits and hex characters
1496 char_u *
1497 skiphex(q)
1498 char_u *q;
1500 char_u *p = q;
1502 while (vim_isxdigit(*p)) /* skip to next non-digit */
1503 ++p;
1504 return p;
1506 #endif
1508 #if defined(FEAT_EX_EXTRA) || defined(PROTO)
1510 * skip to digit (or NUL after the string)
1512 char_u *
1513 skiptodigit(q)
1514 char_u *q;
1516 char_u *p = q;
1518 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1519 ++p;
1520 return p;
1524 * skip to hex character (or NUL after the string)
1526 char_u *
1527 skiptohex(q)
1528 char_u *q;
1530 char_u *p = q;
1532 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1533 ++p;
1534 return p;
1536 #endif
1539 * Variant of isdigit() that can handle characters > 0x100.
1540 * We don't use isdigit() here, because on some systems it also considers
1541 * superscript 1 to be a digit.
1542 * Use the VIM_ISDIGIT() macro for simple arguments.
1545 vim_isdigit(c)
1546 int c;
1548 return (c >= '0' && c <= '9');
1552 * Variant of isxdigit() that can handle characters > 0x100.
1553 * We don't use isxdigit() here, because on some systems it also considers
1554 * superscript 1 to be a digit.
1557 vim_isxdigit(c)
1558 int c;
1560 return (c >= '0' && c <= '9')
1561 || (c >= 'a' && c <= 'f')
1562 || (c >= 'A' && c <= 'F');
1565 #if defined(FEAT_MBYTE) || defined(PROTO)
1567 * Vim's own character class functions. These exist because many library
1568 * islower()/toupper() etc. do not work properly: they crash when used with
1569 * invalid values or can't handle latin1 when the locale is C.
1570 * Speed is most important here.
1572 #define LATIN1LOWER 'l'
1573 #define LATIN1UPPER 'U'
1575 /* !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
1576 static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1577 static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1578 static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
1581 vim_islower(c)
1582 int c;
1584 if (c <= '@')
1585 return FALSE;
1586 if (c >= 0x80)
1588 if (enc_utf8)
1589 return utf_islower(c);
1590 if (c >= 0x100)
1592 #ifdef HAVE_ISWLOWER
1593 if (has_mbyte)
1594 return iswlower(c);
1595 #endif
1596 /* islower() can't handle these chars and may crash */
1597 return FALSE;
1599 if (enc_latin1like)
1600 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1602 return islower(c);
1606 vim_isupper(c)
1607 int c;
1609 if (c <= '@')
1610 return FALSE;
1611 if (c >= 0x80)
1613 if (enc_utf8)
1614 return utf_isupper(c);
1615 if (c >= 0x100)
1617 #ifdef HAVE_ISWUPPER
1618 if (has_mbyte)
1619 return iswupper(c);
1620 #endif
1621 /* islower() can't handle these chars and may crash */
1622 return FALSE;
1624 if (enc_latin1like)
1625 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1627 return isupper(c);
1631 vim_toupper(c)
1632 int c;
1634 if (c <= '@')
1635 return c;
1636 if (c >= 0x80)
1638 if (enc_utf8)
1639 return utf_toupper(c);
1640 if (c >= 0x100)
1642 #ifdef HAVE_TOWUPPER
1643 if (has_mbyte)
1644 return towupper(c);
1645 #endif
1646 /* toupper() can't handle these chars and may crash */
1647 return c;
1649 if (enc_latin1like)
1650 return latin1upper[c];
1652 return TOUPPER_LOC(c);
1656 vim_tolower(c)
1657 int c;
1659 if (c <= '@')
1660 return c;
1661 if (c >= 0x80)
1663 if (enc_utf8)
1664 return utf_tolower(c);
1665 if (c >= 0x100)
1667 #ifdef HAVE_TOWLOWER
1668 if (has_mbyte)
1669 return towlower(c);
1670 #endif
1671 /* tolower() can't handle these chars and may crash */
1672 return c;
1674 if (enc_latin1like)
1675 return latin1lower[c];
1677 return TOLOWER_LOC(c);
1679 #endif
1682 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1684 char_u *
1685 skiptowhite(p)
1686 char_u *p;
1688 while (*p != ' ' && *p != '\t' && *p != NUL)
1689 ++p;
1690 return p;
1693 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1694 || defined(PROTO)
1696 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1698 char_u *
1699 skiptowhite_esc(p)
1700 char_u *p;
1702 while (*p != ' ' && *p != '\t' && *p != NUL)
1704 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1705 ++p;
1706 ++p;
1708 return p;
1710 #endif
1713 * Getdigits: Get a number from a string and skip over it.
1714 * Note: the argument is a pointer to a char_u pointer!
1716 long
1717 getdigits(pp)
1718 char_u **pp;
1720 char_u *p;
1721 long retval;
1723 p = *pp;
1724 retval = atol((char *)p);
1725 if (*p == '-') /* skip negative sign */
1726 ++p;
1727 p = skipdigits(p); /* skip to next non-digit */
1728 *pp = p;
1729 return retval;
1733 * Return TRUE if "lbuf" is empty or only contains blanks.
1736 vim_isblankline(lbuf)
1737 char_u *lbuf;
1739 char_u *p;
1741 p = skipwhite(lbuf);
1742 return (*p == NUL || *p == '\r' || *p == '\n');
1746 * Convert a string into a long and/or unsigned long, taking care of
1747 * hexadecimal and octal numbers. Accepts a '-' sign.
1748 * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1749 * 0 decimal
1750 * '0' octal
1751 * 'X' hex
1752 * 'x' hex
1753 * If "len" is not NULL, the length of the number in characters is returned.
1754 * If "nptr" is not NULL, the signed result is returned in it.
1755 * If "unptr" is not NULL, the unsigned result is returned in it.
1756 * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1757 * octal number.
1758 * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
1759 * hex number.
1761 void
1762 vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1763 char_u *start;
1764 int *hexp; /* return: type of number 0 = decimal, 'x'
1765 or 'X' is hex, '0' = octal */
1766 int *len; /* return: detected length of number */
1767 int dooct; /* recognize octal number */
1768 int dohex; /* recognize hex number */
1769 long *nptr; /* return: signed result */
1770 unsigned long *unptr; /* return: unsigned result */
1772 char_u *ptr = start;
1773 int hex = 0; /* default is decimal */
1774 int negative = FALSE;
1775 unsigned long un = 0;
1776 int n;
1778 if (ptr[0] == '-')
1780 negative = TRUE;
1781 ++ptr;
1784 /* Recognize hex and octal. */
1785 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
1787 hex = ptr[1];
1788 if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1789 ptr += 2; /* hexadecimal */
1790 else
1792 hex = 0; /* default is decimal */
1793 if (dooct)
1795 /* Don't interpret "0", "08" or "0129" as octal. */
1796 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1798 if (ptr[n] > '7')
1800 hex = 0; /* can't be octal */
1801 break;
1803 if (ptr[n] > '0')
1804 hex = '0'; /* assume octal */
1811 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1813 if (hex == '0' || dooct > 1)
1815 /* octal */
1816 while ('0' <= *ptr && *ptr <= '7')
1818 un = 8 * un + (unsigned long)(*ptr - '0');
1819 ++ptr;
1822 else if (hex != 0 || dohex > 1)
1824 /* hex */
1825 while (vim_isxdigit(*ptr))
1827 un = 16 * un + (unsigned long)hex2nr(*ptr);
1828 ++ptr;
1831 else
1833 /* decimal */
1834 while (VIM_ISDIGIT(*ptr))
1836 un = 10 * un + (unsigned long)(*ptr - '0');
1837 ++ptr;
1841 if (hexp != NULL)
1842 *hexp = hex;
1843 if (len != NULL)
1844 *len = (int)(ptr - start);
1845 if (nptr != NULL)
1847 if (negative) /* account for leading '-' for decimal numbers */
1848 *nptr = -(long)un;
1849 else
1850 *nptr = (long)un;
1852 if (unptr != NULL)
1853 *unptr = un;
1857 * Return the value of a single hex character.
1858 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1861 hex2nr(c)
1862 int c;
1864 if (c >= 'a' && c <= 'f')
1865 return c - 'a' + 10;
1866 if (c >= 'A' && c <= 'F')
1867 return c - 'A' + 10;
1868 return c - '0';
1871 #if defined(FEAT_TERMRESPONSE) \
1872 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1874 * Convert two hex characters to a byte.
1875 * Return -1 if one of the characters is not hex.
1878 hexhex2nr(p)
1879 char_u *p;
1881 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1882 return -1;
1883 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1885 #endif
1888 * Return TRUE if "str" starts with a backslash that should be removed.
1889 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1890 * backslash is not a normal file name character.
1891 * '$' is a valid file name character, we don't remove the backslash before
1892 * it. This means it is not possible to use an environment variable after a
1893 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1894 * Although "\ name" is valid, the backslash in "Program\ files" must be
1895 * removed. Assume a file name doesn't start with a space.
1896 * For multi-byte names, never remove a backslash before a non-ascii
1897 * character, assume that all multi-byte characters are valid file name
1898 * characters.
1901 rem_backslash(str)
1902 char_u *str;
1904 #ifdef BACKSLASH_IN_FILENAME
1905 return (str[0] == '\\'
1906 # ifdef FEAT_MBYTE
1907 && str[1] < 0x80
1908 # endif
1909 && (str[1] == ' '
1910 || (str[1] != NUL
1911 && str[1] != '*'
1912 && str[1] != '?'
1913 && !vim_isfilec(str[1]))));
1914 #else
1915 return (str[0] == '\\' && str[1] != NUL);
1916 #endif
1920 * Halve the number of backslashes in a file name argument.
1921 * For MS-DOS we only do this if the character after the backslash
1922 * is not a normal file character.
1924 void
1925 backslash_halve(p)
1926 char_u *p;
1928 for ( ; *p; ++p)
1929 if (rem_backslash(p))
1930 STRMOVE(p, p + 1);
1934 * backslash_halve() plus save the result in allocated memory.
1936 char_u *
1937 backslash_halve_save(p)
1938 char_u *p;
1940 char_u *res;
1942 res = vim_strsave(p);
1943 if (res == NULL)
1944 return p;
1945 backslash_halve(res);
1946 return res;
1949 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1951 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1952 * The first 64 entries have been added to map control characters defined in
1953 * ascii.h
1955 static char_u ebcdic2ascii_tab[256] =
1957 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1958 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1959 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1960 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1961 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1962 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1963 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1964 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1965 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1966 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1967 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1968 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1969 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1970 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1971 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1972 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1973 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1974 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1975 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1976 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1977 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1978 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1979 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1980 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1981 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1982 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1983 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1984 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1985 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1986 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1987 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1988 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1992 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
1993 * wanting 7-bit ASCII characters out the other end.
1995 void
1996 ebcdic2ascii(buffer, len)
1997 char_u *buffer;
1998 int len;
2000 int i;
2002 for (i = 0; i < len; i++)
2003 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2005 #endif