Merge branch 'feat/tagfunc'
[vim_extended.git] / src / charset.c
blobe65bc26900a7a942e5c4e12f5be9241332831c89
1 /* vi:set ts=8 sts=4 sw=4:
3 * VIM - Vi IMproved by Bram Moolenaar
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
10 #include "vim.h"
12 #ifdef FEAT_LINEBREAK
13 static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
14 #endif
16 #ifdef FEAT_MBYTE
17 static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
18 #endif
20 static unsigned nr2hex __ARGS((unsigned c));
22 static int chartab_initialized = FALSE;
24 /* b_chartab[] is an array of 32 bytes, each bit representing one of the
25 * characters 0-255. */
26 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
27 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
28 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
31 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
32 * characters for current buffer.
34 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
35 * 'isprint' and 'encoding'.
37 * The index in chartab[] depends on 'encoding':
38 * - For non-multi-byte index with the byte (same as the character).
39 * - For DBCS index with the first byte.
40 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
41 * the same as the character, if the first byte is 0x80 and above it depends
42 * on further bytes).
44 * The contents of chartab[]:
45 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
46 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
47 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
48 * translate the character before displaying it). Note that only DBCS
49 * characters can have 2 display cells and still be printable.
50 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
51 * - CT_ID_CHAR bit is set when the character can be in an identifier.
53 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
54 * error, OK otherwise.
56 int
57 init_chartab()
59 return buf_init_chartab(curbuf, TRUE);
62 int
63 buf_init_chartab(buf, global)
64 buf_T *buf;
65 int global; /* FALSE: only set buf->b_chartab[] */
67 int c;
68 int c2;
69 char_u *p;
70 int i;
71 int tilde;
72 int do_isalpha;
74 if (global)
77 * Set the default size for printable characters:
78 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
79 * This also inits all 'isident' and 'isfname' flags to FALSE.
81 * EBCDIC: all chars below ' ' are not printable, all others are
82 * printable.
84 c = 0;
85 while (c < ' ')
86 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
87 #ifdef EBCDIC
88 while (c < 255)
89 #else
90 while (c <= '~')
91 #endif
92 chartab[c++] = 1 + CT_PRINT_CHAR;
93 #ifdef FEAT_FKMAP
94 if (p_altkeymap)
96 while (c < YE)
97 chartab[c++] = 1 + CT_PRINT_CHAR;
99 #endif
100 while (c < 256)
102 #ifdef FEAT_MBYTE
103 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
104 if (enc_utf8 && c >= 0xa0)
105 chartab[c++] = CT_PRINT_CHAR + 1;
106 /* euc-jp characters starting with 0x8e are single width */
107 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
108 chartab[c++] = CT_PRINT_CHAR + 1;
109 /* other double-byte chars can be printable AND double-width */
110 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
111 chartab[c++] = CT_PRINT_CHAR + 2;
112 else
113 #endif
114 /* the rest is unprintable by default */
115 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
118 #ifdef FEAT_MBYTE
119 /* Assume that every multi-byte char is a filename character. */
120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
124 chartab[c] |= CT_FNAME_CHAR;
125 #endif
129 * Init word char flags all to FALSE
131 vim_memset(buf->b_chartab, 0, (size_t)32);
132 #ifdef FEAT_MBYTE
133 if (enc_dbcs != 0)
134 for (c = 0; c < 256; ++c)
136 /* double-byte characters are probably word characters */
137 if (MB_BYTE2LEN(c) == 2)
138 SET_CHARTAB(buf, c);
140 #endif
142 #ifdef FEAT_LISP
144 * In lisp mode the '-' character is included in keywords.
146 if (buf->b_p_lisp)
147 SET_CHARTAB(buf, '-');
148 #endif
150 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
151 * options Each option is a list of characters, character numbers or
152 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
154 for (i = global ? 0 : 3; i <= 3; ++i)
156 if (i == 0)
157 p = p_isi; /* first round: 'isident' */
158 else if (i == 1)
159 p = p_isp; /* second round: 'isprint' */
160 else if (i == 2)
161 p = p_isf; /* third round: 'isfname' */
162 else /* i == 3 */
163 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
165 while (*p)
167 tilde = FALSE;
168 do_isalpha = FALSE;
169 if (*p == '^' && p[1] != NUL)
171 tilde = TRUE;
172 ++p;
174 if (VIM_ISDIGIT(*p))
175 c = getdigits(&p);
176 else
177 #ifdef FEAT_MBYTE
178 if (has_mbyte)
179 c = mb_ptr2char_adv(&p);
180 else
181 #endif
182 c = *p++;
183 c2 = -1;
184 if (*p == '-' && p[1] != NUL)
186 ++p;
187 if (VIM_ISDIGIT(*p))
188 c2 = getdigits(&p);
189 else
190 #ifdef FEAT_MBYTE
191 if (has_mbyte)
192 c2 = mb_ptr2char_adv(&p);
193 else
194 #endif
195 c2 = *p++;
197 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
198 || !(*p == NUL || *p == ','))
199 return FAIL;
201 if (c2 == -1) /* not a range */
204 * A single '@' (not "@-@"):
205 * Decide on letters being ID/printable/keyword chars with
206 * standard function isalpha(). This takes care of locale for
207 * single-byte characters).
209 if (c == '@')
211 do_isalpha = TRUE;
212 c = 1;
213 c2 = 255;
215 else
216 c2 = c;
218 while (c <= c2)
220 /* Use the MB_ functions here, because isalpha() doesn't
221 * work properly when 'encoding' is "latin1" and the locale is
222 * "C". */
223 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
224 #ifdef FEAT_FKMAP
225 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
226 #endif
229 if (i == 0) /* (re)set ID flag */
231 if (tilde)
232 chartab[c] &= ~CT_ID_CHAR;
233 else
234 chartab[c] |= CT_ID_CHAR;
236 else if (i == 1) /* (re)set printable */
238 if ((c < ' '
239 #ifndef EBCDIC
240 || c > '~'
241 #endif
242 #ifdef FEAT_FKMAP
243 || (p_altkeymap
244 && (F_isalpha(c) || F_isdigit(c)))
245 #endif
247 #ifdef FEAT_MBYTE
248 /* For double-byte we keep the cell width, so
249 * that we can detect it from the first byte. */
250 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
251 #endif
254 if (tilde)
256 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
257 + ((dy_flags & DY_UHEX) ? 4 : 2);
258 chartab[c] &= ~CT_PRINT_CHAR;
260 else
262 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
263 chartab[c] |= CT_PRINT_CHAR;
267 else if (i == 2) /* (re)set fname flag */
269 if (tilde)
270 chartab[c] &= ~CT_FNAME_CHAR;
271 else
272 chartab[c] |= CT_FNAME_CHAR;
274 else /* i == 3 */ /* (re)set keyword flag */
276 if (tilde)
277 RESET_CHARTAB(buf, c);
278 else
279 SET_CHARTAB(buf, c);
282 ++c;
284 p = skip_to_option_part(p);
287 chartab_initialized = TRUE;
288 return OK;
292 * Translate any special characters in buf[bufsize] in-place.
293 * The result is a string with only printable characters, but if there is not
294 * enough room, not all characters will be translated.
296 void
297 trans_characters(buf, bufsize)
298 char_u *buf;
299 int bufsize;
301 int len; /* length of string needing translation */
302 int room; /* room in buffer after string */
303 char_u *trs; /* translated character */
304 int trs_len; /* length of trs[] */
306 len = (int)STRLEN(buf);
307 room = bufsize - len;
308 while (*buf != 0)
310 # ifdef FEAT_MBYTE
311 /* Assume a multi-byte character doesn't need translation. */
312 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
313 len -= trs_len;
314 else
315 # endif
317 trs = transchar_byte(*buf);
318 trs_len = (int)STRLEN(trs);
319 if (trs_len > 1)
321 room -= trs_len - 1;
322 if (room <= 0)
323 return;
324 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
326 mch_memmove(buf, trs, (size_t)trs_len);
327 --len;
329 buf += trs_len;
333 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
334 || defined(PROTO)
336 * Translate a string into allocated memory, replacing special chars with
337 * printable chars. Returns NULL when out of memory.
339 char_u *
340 transstr(s)
341 char_u *s;
343 char_u *res;
344 char_u *p;
345 #ifdef FEAT_MBYTE
346 int l, len, c;
347 char_u hexbuf[11];
348 #endif
350 #ifdef FEAT_MBYTE
351 if (has_mbyte)
353 /* Compute the length of the result, taking account of unprintable
354 * multi-byte characters. */
355 len = 0;
356 p = s;
357 while (*p != NUL)
359 if ((l = (*mb_ptr2len)(p)) > 1)
361 c = (*mb_ptr2char)(p);
362 p += l;
363 if (vim_isprintc(c))
364 len += l;
365 else
367 transchar_hex(hexbuf, c);
368 len += (int)STRLEN(hexbuf);
371 else
373 l = byte2cells(*p++);
374 if (l > 0)
375 len += l;
376 else
377 len += 4; /* illegal byte sequence */
380 res = alloc((unsigned)(len + 1));
382 else
383 #endif
384 res = alloc((unsigned)(vim_strsize(s) + 1));
385 if (res != NULL)
387 *res = NUL;
388 p = s;
389 while (*p != NUL)
391 #ifdef FEAT_MBYTE
392 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
394 c = (*mb_ptr2char)(p);
395 if (vim_isprintc(c))
396 STRNCAT(res, p, l); /* append printable multi-byte char */
397 else
398 transchar_hex(res + STRLEN(res), c);
399 p += l;
401 else
402 #endif
403 STRCAT(res, transchar_byte(*p++));
406 return res;
408 #endif
410 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
412 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
413 * current locale.
414 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
415 * Otherwise puts the result in "buf[buflen]".
417 char_u *
418 str_foldcase(str, orglen, buf, buflen)
419 char_u *str;
420 int orglen;
421 char_u *buf;
422 int buflen;
424 garray_T ga;
425 int i;
426 int len = orglen;
428 #define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
429 #define GA_PTR(i) ((char_u *)ga.ga_data + i)
430 #define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
431 #define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
433 /* Copy "str" into "buf" or allocated memory, unmodified. */
434 if (buf == NULL)
436 ga_init2(&ga, 1, 10);
437 if (ga_grow(&ga, len + 1) == FAIL)
438 return NULL;
439 mch_memmove(ga.ga_data, str, (size_t)len);
440 ga.ga_len = len;
442 else
444 if (len >= buflen) /* Ugly! */
445 len = buflen - 1;
446 mch_memmove(buf, str, (size_t)len);
448 if (buf == NULL)
449 GA_CHAR(len) = NUL;
450 else
451 buf[len] = NUL;
453 /* Make each character lower case. */
454 i = 0;
455 while (STR_CHAR(i) != NUL)
457 #ifdef FEAT_MBYTE
458 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
460 if (enc_utf8)
462 int c = utf_ptr2char(STR_PTR(i));
463 int ol = utf_ptr2len(STR_PTR(i));
464 int lc = utf_tolower(c);
466 /* Only replace the character when it is not an invalid
467 * sequence (ASCII character or more than one byte) and
468 * utf_tolower() doesn't return the original character. */
469 if ((c < 0x80 || ol > 1) && c != lc)
471 int nl = utf_char2len(lc);
473 /* If the byte length changes need to shift the following
474 * characters forward or backward. */
475 if (ol != nl)
477 if (nl > ol)
479 if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
480 : len + nl - ol >= buflen)
482 /* out of memory, keep old char */
483 lc = c;
484 nl = ol;
487 if (ol != nl)
489 if (buf == NULL)
491 STRMOVE(GA_PTR(i) + nl, GA_PTR(i) + ol);
492 ga.ga_len += nl - ol;
494 else
496 STRMOVE(buf + i + nl, buf + i + ol);
497 len += nl - ol;
501 (void)utf_char2bytes(lc, STR_PTR(i));
504 /* skip to next multi-byte char */
505 i += (*mb_ptr2len)(STR_PTR(i));
507 else
508 #endif
510 if (buf == NULL)
511 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
512 else
513 buf[i] = TOLOWER_LOC(buf[i]);
514 ++i;
518 if (buf == NULL)
519 return (char_u *)ga.ga_data;
520 return buf;
522 #endif
525 * Catch 22: chartab[] can't be initialized before the options are
526 * initialized, and initializing options may cause transchar() to be called!
527 * When chartab_initialized == FALSE don't use chartab[].
528 * Does NOT work for multi-byte characters, c must be <= 255.
529 * Also doesn't work for the first byte of a multi-byte, "c" must be a
530 * character!
532 static char_u transchar_buf[7];
534 char_u *
535 transchar(c)
536 int c;
538 int i;
540 i = 0;
541 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
543 transchar_buf[0] = '~';
544 transchar_buf[1] = '@';
545 i = 2;
546 c = K_SECOND(c);
549 if ((!chartab_initialized && (
550 #ifdef EBCDIC
551 (c >= 64 && c < 255)
552 #else
553 (c >= ' ' && c <= '~')
554 #endif
555 #ifdef FEAT_FKMAP
556 || F_ischar(c)
557 #endif
558 )) || (c < 256 && vim_isprintc_strict(c)))
560 /* printable character */
561 transchar_buf[i] = c;
562 transchar_buf[i + 1] = NUL;
564 else
565 transchar_nonprint(transchar_buf + i, c);
566 return transchar_buf;
569 #if defined(FEAT_MBYTE) || defined(PROTO)
571 * Like transchar(), but called with a byte instead of a character. Checks
572 * for an illegal UTF-8 byte.
574 char_u *
575 transchar_byte(c)
576 int c;
578 if (enc_utf8 && c >= 0x80)
580 transchar_nonprint(transchar_buf, c);
581 return transchar_buf;
583 return transchar(c);
585 #endif
588 * Convert non-printable character to two or more printable characters in
589 * "buf[]". "buf" needs to be able to hold five bytes.
590 * Does NOT work for multi-byte characters, c must be <= 255.
592 void
593 transchar_nonprint(buf, c)
594 char_u *buf;
595 int c;
597 if (c == NL)
598 c = NUL; /* we use newline in place of a NUL */
599 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
600 c = NL; /* we use CR in place of NL in this case */
602 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
603 transchar_hex(buf, c);
605 #ifdef EBCDIC
606 /* For EBCDIC only the characters 0-63 and 255 are not printable */
607 else if (CtrlChar(c) != 0 || c == DEL)
608 #else
609 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
610 #endif
612 buf[0] = '^';
613 #ifdef EBCDIC
614 if (c == DEL)
615 buf[1] = '?'; /* DEL displayed as ^? */
616 else
617 buf[1] = CtrlChar(c);
618 #else
619 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
620 #endif
622 buf[2] = NUL;
624 #ifdef FEAT_MBYTE
625 else if (enc_utf8 && c >= 0x80)
627 transchar_hex(buf, c);
629 #endif
630 #ifndef EBCDIC
631 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
633 buf[0] = '|';
634 buf[1] = c - 0x80;
635 buf[2] = NUL;
637 #else
638 else if (c < 64)
640 buf[0] = '~';
641 buf[1] = MetaChar(c);
642 buf[2] = NUL;
644 #endif
645 else /* 0x80 - 0x9f and 0xff */
648 * TODO: EBCDIC I don't know what to do with this chars, so I display
649 * them as '~?' for now
651 buf[0] = '~';
652 #ifdef EBCDIC
653 buf[1] = '?'; /* 0xff displayed as ~? */
654 #else
655 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
656 #endif
657 buf[2] = NUL;
661 void
662 transchar_hex(buf, c)
663 char_u *buf;
664 int c;
666 int i = 0;
668 buf[0] = '<';
669 #ifdef FEAT_MBYTE
670 if (c > 255)
672 buf[++i] = nr2hex((unsigned)c >> 12);
673 buf[++i] = nr2hex((unsigned)c >> 8);
675 #endif
676 buf[++i] = nr2hex((unsigned)c >> 4);
677 buf[++i] = nr2hex((unsigned)c);
678 buf[++i] = '>';
679 buf[++i] = NUL;
683 * Convert the lower 4 bits of byte "c" to its hex character.
684 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
685 * function key 1.
687 static unsigned
688 nr2hex(c)
689 unsigned c;
691 if ((c & 0xf) <= 9)
692 return (c & 0xf) + '0';
693 return (c & 0xf) - 10 + 'a';
697 * Return number of display cells occupied by byte "b".
698 * Caller must make sure 0 <= b <= 255.
699 * For multi-byte mode "b" must be the first byte of a character.
700 * A TAB is counted as two cells: "^I".
701 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
702 * cells depends on further bytes.
705 byte2cells(b)
706 int b;
708 #ifdef FEAT_MBYTE
709 if (enc_utf8 && b >= 0x80)
710 return 0;
711 #endif
712 return (chartab[b] & CT_CELL_MASK);
716 * Return number of display cells occupied by character "c".
717 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
718 * A TAB is counted as two cells: "^I" or four: "<09>".
721 char2cells(c)
722 int c;
724 if (IS_SPECIAL(c))
725 return char2cells(K_SECOND(c)) + 2;
726 #ifdef FEAT_MBYTE
727 if (c >= 0x80)
729 /* UTF-8: above 0x80 need to check the value */
730 if (enc_utf8)
731 return utf_char2cells(c);
732 /* DBCS: double-byte means double-width, except for euc-jp with first
733 * byte 0x8e */
734 if (enc_dbcs != 0 && c >= 0x100)
736 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
737 return 1;
738 return 2;
741 #endif
742 return (chartab[c & 0xff] & CT_CELL_MASK);
746 * Return number of display cells occupied by character at "*p".
747 * A TAB is counted as two cells: "^I" or four: "<09>".
750 ptr2cells(p)
751 char_u *p;
753 #ifdef FEAT_MBYTE
754 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
755 if (enc_utf8 && *p >= 0x80)
756 return utf_ptr2cells(p);
757 /* For DBCS we can tell the cell count from the first byte. */
758 #endif
759 return (chartab[*p] & CT_CELL_MASK);
763 * Return the number of characters string "s" will take on the screen,
764 * counting TABs as two characters: "^I".
767 vim_strsize(s)
768 char_u *s;
770 return vim_strnsize(s, (int)MAXCOL);
774 * Return the number of characters string "s[len]" will take on the screen,
775 * counting TABs as two characters: "^I".
778 vim_strnsize(s, len)
779 char_u *s;
780 int len;
782 int size = 0;
784 while (*s != NUL && --len >= 0)
786 #ifdef FEAT_MBYTE
787 if (has_mbyte)
789 int l = (*mb_ptr2len)(s);
791 size += ptr2cells(s);
792 s += l;
793 len -= l - 1;
795 else
796 #endif
797 size += byte2cells(*s++);
799 return size;
803 * Return the number of characters 'c' will take on the screen, taking
804 * into account the size of a tab.
805 * Use a define to make it fast, this is used very often!!!
806 * Also see getvcol() below.
809 #ifdef FEAT_VARTABS
810 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
811 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
813 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_ary); \
815 else \
816 return ptr2cells(p);
817 #else
818 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
819 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
821 int ts; \
822 ts = (buf)->b_p_ts; \
823 return (int)(ts - (col % ts)); \
825 else \
826 return ptr2cells(p);
827 #endif
829 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
830 || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
832 chartabsize(p, col)
833 char_u *p;
834 colnr_T col;
836 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
838 #endif
840 #ifdef FEAT_LINEBREAK
841 static int
842 win_chartabsize(wp, p, col)
843 win_T *wp;
844 char_u *p;
845 colnr_T col;
847 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
849 #endif
852 * return the number of characters the string 's' will take on the screen,
853 * taking into account the size of a tab
856 linetabsize(s, lnum)
857 char_u *s;
858 linenr_T lnum;
860 colnr_T col = 0;
862 while (*s != NUL)
863 col += lbr_chartabsize_adv(&s, col, lnum);
864 return (int)col;
868 * Like linetabsize(), but for a given window instead of the current one.
871 win_linetabsize(wp, p, len, lnum)
872 win_T *wp;
873 char_u *p;
874 colnr_T len;
875 linenr_T lnum;
877 colnr_T col = 0;
878 char_u *s;
880 for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
881 col += win_lbr_chartabsize(wp, s, col, NULL, lnum);
882 return (int)col;
886 * Return TRUE if 'c' is a normal identifier character:
887 * Letters and characters from the 'isident' option.
890 vim_isIDc(c)
891 int c;
893 return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
897 * return TRUE if 'c' is a keyword character: Letters and characters from
898 * 'iskeyword' option for current buffer.
899 * For multi-byte characters mb_get_class() is used (builtin rules).
902 vim_iswordc(c)
903 int c;
905 #ifdef FEAT_MBYTE
906 if (c >= 0x100)
908 if (enc_dbcs != 0)
909 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
910 if (enc_utf8)
911 return utf_class(c) >= 2;
913 #endif
914 return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
918 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
921 vim_iswordp(p)
922 char_u *p;
924 #ifdef FEAT_MBYTE
925 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
926 return mb_get_class(p) >= 2;
927 #endif
928 return GET_CHARTAB(curbuf, *p) != 0;
931 #if defined(FEAT_SYN_HL) || defined(PROTO)
933 vim_iswordc_buf(p, buf)
934 char_u *p;
935 buf_T *buf;
937 # ifdef FEAT_MBYTE
938 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
939 return mb_get_class(p) >= 2;
940 # endif
941 return (GET_CHARTAB(buf, *p) != 0);
943 #endif
946 * return TRUE if 'c' is a valid file-name character
947 * Assume characters above 0x100 are valid (multi-byte).
950 vim_isfilec(c)
951 int c;
953 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
957 * return TRUE if 'c' is a valid file-name character or a wildcard character
958 * Assume characters above 0x100 are valid (multi-byte).
959 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
960 * returns false.
963 vim_isfilec_or_wc(c)
964 int c;
966 char_u buf[2];
968 buf[0] = (char_u)c;
969 buf[1] = NUL;
970 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
974 * return TRUE if 'c' is a printable character
975 * Assume characters above 0x100 are printable (multi-byte), except for
976 * Unicode.
979 vim_isprintc(c)
980 int c;
982 #ifdef FEAT_MBYTE
983 if (enc_utf8 && c >= 0x100)
984 return utf_printable(c);
985 #endif
986 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
990 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
991 * byte of a double-byte character.
994 vim_isprintc_strict(c)
995 int c;
997 #ifdef FEAT_MBYTE
998 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
999 return FALSE;
1000 if (enc_utf8 && c >= 0x100)
1001 return utf_printable(c);
1002 #endif
1003 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
1007 * like chartabsize(), but also check for line breaks on the screen
1010 lbr_chartabsize(s, col, lnum)
1011 unsigned char *s;
1012 colnr_T col;
1013 linenr_T lnum;
1015 #ifdef FEAT_LINEBREAK
1016 if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri)
1018 #endif
1019 #ifdef FEAT_MBYTE
1020 if (curwin->w_p_wrap)
1021 return win_nolbr_chartabsize(curwin, s, col, NULL);
1022 #endif
1023 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1024 #ifdef FEAT_LINEBREAK
1026 return win_lbr_chartabsize(curwin, s, col, NULL, lnum);
1027 #endif
1031 * Call lbr_chartabsize() and advance the pointer.
1034 lbr_chartabsize_adv(s, col, lnum)
1035 char_u **s;
1036 colnr_T col;
1037 linenr_T lnum;
1039 int retval;
1041 retval = lbr_chartabsize(*s, col, lnum);
1042 mb_ptr_adv(*s);
1043 return retval;
1047 * This function is used very often, keep it fast!!!!
1049 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1050 * string at start of line. Warning: *headp is only set if it's a non-zero
1051 * value, init to 0 before calling.
1053 * linenr argument needed if in visual highlighting and breakindent=on, then
1054 * the line calculated is not current; if 0, normal functionality is preserved.
1057 win_lbr_chartabsize(wp, s, col, headp, lnum)
1058 win_T *wp;
1059 char_u *s;
1060 colnr_T col;
1061 int *headp UNUSED;
1062 linenr_T lnum;
1064 #ifdef FEAT_LINEBREAK
1065 int c;
1066 int size;
1067 colnr_T col2;
1068 colnr_T colmax;
1069 int added;
1070 # ifdef FEAT_VARTABS
1071 colnr_T orig_col = col;
1072 # endif
1073 # ifdef FEAT_MBYTE
1074 int mb_added = 0;
1075 # else
1076 # define mb_added 0
1077 # endif
1078 int numberextra;
1079 char_u *ps;
1080 int tab_corr = (*s == TAB);
1081 int n;
1084 * No 'linebreak' and 'showbreak' and 'breakindent': return quickly.
1086 if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL)
1087 #endif
1089 #ifdef FEAT_MBYTE
1090 if (wp->w_p_wrap)
1091 return win_nolbr_chartabsize(wp, s, col, headp);
1092 #endif
1093 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1096 #ifdef FEAT_LINEBREAK
1098 * First get normal size, without 'linebreak'
1100 size = win_chartabsize(wp, s, col);
1101 c = *s;
1104 * If 'linebreak' set check at a blank before a non-blank if the line
1105 * needs a break here
1107 if (wp->w_p_lbr
1108 && vim_isbreak(c)
1109 && !vim_isbreak(s[1])
1110 && !wp->w_p_list
1111 && wp->w_p_wrap
1112 # ifdef FEAT_VERTSPLIT
1113 && wp->w_width != 0
1114 # endif
1118 * Count all characters from first non-blank after a blank up to next
1119 * non-blank after a blank.
1121 numberextra = win_col_off(wp);
1122 col2 = col;
1123 colmax = (colnr_T)(W_WIDTH(wp) - numberextra);
1124 if (col >= colmax)
1126 n = colmax + win_col_off2(wp);
1127 if (n > 0)
1128 colmax += (((col - colmax) / n) + 1) * n;
1131 for (;;)
1133 ps = s;
1134 mb_ptr_adv(s);
1135 c = *s;
1136 if (!(c != NUL
1137 && (vim_isbreak(c)
1138 || (!vim_isbreak(c)
1139 && (col2 == col || !vim_isbreak(*ps))))))
1140 break;
1142 col2 += win_chartabsize(wp, s, col2);
1143 if (col2 >= colmax) /* doesn't fit */
1145 size = colmax - col;
1146 tab_corr = FALSE;
1147 break;
1151 # ifdef FEAT_MBYTE
1152 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1153 && wp->w_p_wrap && in_win_border(wp, col))
1155 ++size; /* Count the ">" in the last column. */
1156 mb_added = 1;
1158 # endif
1161 * May have to add something for 'breakindent' and/or 'showbreak'
1162 * string at start of line.
1163 * Set *headp to the size of what we add.
1165 added = 0;
1166 if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
1168 numberextra = win_col_off(wp);
1169 col += numberextra + mb_added;
1170 if (col >= (colnr_T)W_WIDTH(wp))
1172 col -= W_WIDTH(wp);
1173 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1174 if (numberextra > 0)
1175 col = col % numberextra;
1177 if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1179 added = 0;
1180 if (*p_sbr != NUL)
1181 added += vim_strsize(p_sbr);
1182 if (wp->w_p_bri)
1183 added += get_breakindent_win(wp,lnum);
1185 if (tab_corr)
1187 # ifdef FEAT_VARTABS
1188 int ts = tabstop_at(orig_col, wp->w_buffer->b_p_ts,
1189 wp->w_buffer->b_p_vts_ary);
1190 size += (added / ts) * ts;
1191 # else
1192 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1193 # endif
1195 else
1196 size += added;
1197 if (col != 0)
1198 added = 0;
1201 if (headp != NULL)
1202 *headp = added + mb_added;
1203 return size;
1204 #endif
1207 #if defined(FEAT_MBYTE) || defined(PROTO)
1209 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1210 * 'wrap' is on. This means we need to check for a double-byte character that
1211 * doesn't fit at the end of the screen line.
1213 static int
1214 win_nolbr_chartabsize(wp, s, col, headp)
1215 win_T *wp;
1216 char_u *s;
1217 colnr_T col;
1218 int *headp;
1220 int n;
1222 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1224 # ifdef FEAT_VARTABS
1225 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1226 wp->w_buffer->b_p_vts_ary);
1227 # else
1228 n = wp->w_buffer->b_p_ts;
1229 return (int)(n - (col % n));
1230 # endif
1232 n = ptr2cells(s);
1233 /* Add one cell for a double-width character in the last column of the
1234 * window, displayed with a ">". */
1235 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1237 if (headp != NULL)
1238 *headp = 1;
1239 return 3;
1241 return n;
1245 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1246 * "wp".
1249 in_win_border(wp, vcol)
1250 win_T *wp;
1251 colnr_T vcol;
1253 int width1; /* width of first line (after line number) */
1254 int width2; /* width of further lines */
1256 #ifdef FEAT_VERTSPLIT
1257 if (wp->w_width == 0) /* there is no border */
1258 return FALSE;
1259 #endif
1260 width1 = W_WIDTH(wp) - win_col_off(wp);
1261 if ((int)vcol < width1 - 1)
1262 return FALSE;
1263 if ((int)vcol == width1 - 1)
1264 return TRUE;
1265 width2 = width1 + win_col_off2(wp);
1266 if (width2 <= 0)
1267 return FALSE;
1268 return ((vcol - width1) % width2 == width2 - 1);
1270 #endif /* FEAT_MBYTE */
1273 * Get virtual column number of pos.
1274 * start: on the first position of this character (TAB, ctrl)
1275 * cursor: where the cursor is on this character (first char, except for TAB)
1276 * end: on the last position of this character (TAB, ctrl)
1278 * This is used very often, keep it fast!
1280 void
1281 getvcol(wp, pos, start, cursor, end)
1282 win_T *wp;
1283 pos_T *pos;
1284 colnr_T *start;
1285 colnr_T *cursor;
1286 colnr_T *end;
1288 colnr_T vcol;
1289 char_u *ptr; /* points to current char */
1290 char_u *posptr; /* points to char at pos->col */
1291 int incr;
1292 int head;
1293 #ifdef FEAT_VARTABS
1294 int *vts = wp->w_buffer->b_p_vts_ary;
1295 #endif
1296 int ts = wp->w_buffer->b_p_ts;
1297 int c;
1299 vcol = 0;
1300 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1301 if (pos->col == MAXCOL)
1302 posptr = NULL; /* continue until the NUL */
1303 else
1304 posptr = ptr + pos->col;
1307 * This function is used very often, do some speed optimizations.
1308 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1309 * use a simple loop.
1310 * Also use this when 'list' is set but tabs take their normal size.
1312 if ((!wp->w_p_list || lcs_tab1 != NUL)
1313 #ifdef FEAT_LINEBREAK
1314 && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri
1315 #endif
1318 #ifndef FEAT_MBYTE
1319 head = 0;
1320 #endif
1321 for (;;)
1323 #ifdef FEAT_MBYTE
1324 head = 0;
1325 #endif
1326 c = *ptr;
1327 /* make sure we don't go past the end of the line */
1328 if (c == NUL)
1330 incr = 1; /* NUL at end of line only takes one column */
1331 break;
1333 /* A tab gets expanded, depending on the current column */
1334 if (c == TAB)
1335 #ifdef FEAT_VARTABS
1336 incr = tabstop_padding(vcol, ts, vts);
1337 #else
1338 incr = ts - (vcol % ts);
1339 #endif
1340 else
1342 #ifdef FEAT_MBYTE
1343 if (has_mbyte)
1345 /* For utf-8, if the byte is >= 0x80, need to look at
1346 * further bytes to find the cell width. */
1347 if (enc_utf8 && c >= 0x80)
1348 incr = utf_ptr2cells(ptr);
1349 else
1350 incr = CHARSIZE(c);
1352 /* If a double-cell char doesn't fit at the end of a line
1353 * it wraps to the next line, it's like this char is three
1354 * cells wide. */
1355 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1356 && in_win_border(wp, vcol))
1358 ++incr;
1359 head = 1;
1362 else
1363 #endif
1364 incr = CHARSIZE(c);
1367 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
1368 break;
1370 vcol += incr;
1371 mb_ptr_adv(ptr);
1374 else
1376 for (;;)
1378 /* A tab gets expanded, depending on the current column */
1379 head = 0;
1380 incr = win_lbr_chartabsize(wp, ptr, vcol, &head, pos->lnum);
1381 /* make sure we don't go past the end of the line */
1382 if (*ptr == NUL)
1384 incr = 1; /* NUL at end of line only takes one column */
1385 break;
1388 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
1389 break;
1391 vcol += incr;
1392 mb_ptr_adv(ptr);
1395 if (start != NULL)
1396 *start = vcol + head;
1397 if (end != NULL)
1398 *end = vcol + incr - 1;
1399 if (cursor != NULL)
1401 if (*ptr == TAB
1402 && (State & NORMAL)
1403 && !wp->w_p_list
1404 && !virtual_active()
1405 #ifdef FEAT_VISUAL
1406 && !(VIsual_active
1407 && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1408 #endif
1410 *cursor = vcol + incr - 1; /* cursor at end */
1411 else
1412 *cursor = vcol + head; /* cursor at start */
1417 * Get virtual cursor column in the current window, pretending 'list' is off.
1419 colnr_T
1420 getvcol_nolist(posp)
1421 pos_T *posp;
1423 int list_save = curwin->w_p_list;
1424 colnr_T vcol;
1426 curwin->w_p_list = FALSE;
1427 getvcol(curwin, posp, NULL, &vcol, NULL);
1428 curwin->w_p_list = list_save;
1429 return vcol;
1432 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1434 * Get virtual column in virtual mode.
1436 void
1437 getvvcol(wp, pos, start, cursor, end)
1438 win_T *wp;
1439 pos_T *pos;
1440 colnr_T *start;
1441 colnr_T *cursor;
1442 colnr_T *end;
1444 colnr_T col;
1445 colnr_T coladd;
1446 colnr_T endadd;
1447 # ifdef FEAT_MBYTE
1448 char_u *ptr;
1449 # endif
1451 if (virtual_active())
1453 /* For virtual mode, only want one value */
1454 getvcol(wp, pos, &col, NULL, NULL);
1456 coladd = pos->coladd;
1457 endadd = 0;
1458 # ifdef FEAT_MBYTE
1459 /* Cannot put the cursor on part of a wide character. */
1460 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1461 if (pos->col < (colnr_T)STRLEN(ptr))
1463 int c = (*mb_ptr2char)(ptr + pos->col);
1465 if (c != TAB && vim_isprintc(c))
1467 endadd = (colnr_T)(char2cells(c) - 1);
1468 if (coladd > endadd) /* past end of line */
1469 endadd = 0;
1470 else
1471 coladd = 0;
1474 # endif
1475 col += coladd;
1476 if (start != NULL)
1477 *start = col;
1478 if (cursor != NULL)
1479 *cursor = col;
1480 if (end != NULL)
1481 *end = col + endadd;
1483 else
1484 getvcol(wp, pos, start, cursor, end);
1486 #endif
1488 #if defined(FEAT_VISUAL) || defined(PROTO)
1490 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1491 * Used for Visual block mode.
1493 void
1494 getvcols(wp, pos1, pos2, left, right)
1495 win_T *wp;
1496 pos_T *pos1, *pos2;
1497 colnr_T *left, *right;
1499 colnr_T from1, from2, to1, to2;
1501 if (ltp(pos1, pos2))
1503 getvvcol(wp, pos1, &from1, NULL, &to1);
1504 getvvcol(wp, pos2, &from2, NULL, &to2);
1506 else
1508 getvvcol(wp, pos2, &from1, NULL, &to1);
1509 getvvcol(wp, pos1, &from2, NULL, &to2);
1511 if (from2 < from1)
1512 *left = from2;
1513 else
1514 *left = from1;
1515 if (to2 > to1)
1517 if (*p_sel == 'e' && from2 - 1 >= to1)
1518 *right = from2 - 1;
1519 else
1520 *right = to2;
1522 else
1523 *right = to1;
1525 #endif
1528 * skipwhite: skip over ' ' and '\t'.
1530 char_u *
1531 skipwhite(q)
1532 char_u *q;
1534 char_u *p = q;
1536 while (vim_iswhite(*p)) /* skip to next non-white */
1537 ++p;
1538 return p;
1542 * skip over digits
1544 char_u *
1545 skipdigits(q)
1546 char_u *q;
1548 char_u *p = q;
1550 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1551 ++p;
1552 return p;
1555 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
1557 * skip over digits and hex characters
1559 char_u *
1560 skiphex(q)
1561 char_u *q;
1563 char_u *p = q;
1565 while (vim_isxdigit(*p)) /* skip to next non-digit */
1566 ++p;
1567 return p;
1569 #endif
1571 #if defined(FEAT_EX_EXTRA) || defined(PROTO)
1573 * skip to digit (or NUL after the string)
1575 char_u *
1576 skiptodigit(q)
1577 char_u *q;
1579 char_u *p = q;
1581 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1582 ++p;
1583 return p;
1587 * skip to hex character (or NUL after the string)
1589 char_u *
1590 skiptohex(q)
1591 char_u *q;
1593 char_u *p = q;
1595 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1596 ++p;
1597 return p;
1599 #endif
1602 * Variant of isdigit() that can handle characters > 0x100.
1603 * We don't use isdigit() here, because on some systems it also considers
1604 * superscript 1 to be a digit.
1605 * Use the VIM_ISDIGIT() macro for simple arguments.
1608 vim_isdigit(c)
1609 int c;
1611 return (c >= '0' && c <= '9');
1615 * Variant of isxdigit() that can handle characters > 0x100.
1616 * We don't use isxdigit() here, because on some systems it also considers
1617 * superscript 1 to be a digit.
1620 vim_isxdigit(c)
1621 int c;
1623 return (c >= '0' && c <= '9')
1624 || (c >= 'a' && c <= 'f')
1625 || (c >= 'A' && c <= 'F');
1628 #if defined(FEAT_MBYTE) || defined(PROTO)
1630 * Vim's own character class functions. These exist because many library
1631 * islower()/toupper() etc. do not work properly: they crash when used with
1632 * invalid values or can't handle latin1 when the locale is C.
1633 * Speed is most important here.
1635 #define LATIN1LOWER 'l'
1636 #define LATIN1UPPER 'U'
1638 /* !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
1639 static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1640 static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1641 static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
1644 vim_islower(c)
1645 int c;
1647 if (c <= '@')
1648 return FALSE;
1649 if (c >= 0x80)
1651 if (enc_utf8)
1652 return utf_islower(c);
1653 if (c >= 0x100)
1655 #ifdef HAVE_ISWLOWER
1656 if (has_mbyte)
1657 return iswlower(c);
1658 #endif
1659 /* islower() can't handle these chars and may crash */
1660 return FALSE;
1662 if (enc_latin1like)
1663 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1665 return islower(c);
1669 vim_isupper(c)
1670 int c;
1672 if (c <= '@')
1673 return FALSE;
1674 if (c >= 0x80)
1676 if (enc_utf8)
1677 return utf_isupper(c);
1678 if (c >= 0x100)
1680 #ifdef HAVE_ISWUPPER
1681 if (has_mbyte)
1682 return iswupper(c);
1683 #endif
1684 /* islower() can't handle these chars and may crash */
1685 return FALSE;
1687 if (enc_latin1like)
1688 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1690 return isupper(c);
1694 vim_toupper(c)
1695 int c;
1697 if (c <= '@')
1698 return c;
1699 if (c >= 0x80)
1701 if (enc_utf8)
1702 return utf_toupper(c);
1703 if (c >= 0x100)
1705 #ifdef HAVE_TOWUPPER
1706 if (has_mbyte)
1707 return towupper(c);
1708 #endif
1709 /* toupper() can't handle these chars and may crash */
1710 return c;
1712 if (enc_latin1like)
1713 return latin1upper[c];
1715 return TOUPPER_LOC(c);
1719 vim_tolower(c)
1720 int c;
1722 if (c <= '@')
1723 return c;
1724 if (c >= 0x80)
1726 if (enc_utf8)
1727 return utf_tolower(c);
1728 if (c >= 0x100)
1730 #ifdef HAVE_TOWLOWER
1731 if (has_mbyte)
1732 return towlower(c);
1733 #endif
1734 /* tolower() can't handle these chars and may crash */
1735 return c;
1737 if (enc_latin1like)
1738 return latin1lower[c];
1740 return TOLOWER_LOC(c);
1742 #endif
1745 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1747 char_u *
1748 skiptowhite(p)
1749 char_u *p;
1751 while (*p != ' ' && *p != '\t' && *p != NUL)
1752 ++p;
1753 return p;
1756 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1757 || defined(PROTO)
1759 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1761 char_u *
1762 skiptowhite_esc(p)
1763 char_u *p;
1765 while (*p != ' ' && *p != '\t' && *p != NUL)
1767 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1768 ++p;
1769 ++p;
1771 return p;
1773 #endif
1776 * Getdigits: Get a number from a string and skip over it.
1777 * Note: the argument is a pointer to a char_u pointer!
1779 long
1780 getdigits(pp)
1781 char_u **pp;
1783 char_u *p;
1784 long retval;
1786 p = *pp;
1787 retval = atol((char *)p);
1788 if (*p == '-') /* skip negative sign */
1789 ++p;
1790 p = skipdigits(p); /* skip to next non-digit */
1791 *pp = p;
1792 return retval;
1796 * Return TRUE if "lbuf" is empty or only contains blanks.
1799 vim_isblankline(lbuf)
1800 char_u *lbuf;
1802 char_u *p;
1804 p = skipwhite(lbuf);
1805 return (*p == NUL || *p == '\r' || *p == '\n');
1809 * Convert a string into a long and/or unsigned long, taking care of
1810 * hexadecimal and octal numbers. Accepts a '-' sign.
1811 * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1812 * 0 decimal
1813 * '0' octal
1814 * 'X' hex
1815 * 'x' hex
1816 * If "len" is not NULL, the length of the number in characters is returned.
1817 * If "nptr" is not NULL, the signed result is returned in it.
1818 * If "unptr" is not NULL, the unsigned result is returned in it.
1819 * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1820 * octal number.
1821 * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
1822 * hex number.
1824 void
1825 vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1826 char_u *start;
1827 int *hexp; /* return: type of number 0 = decimal, 'x'
1828 or 'X' is hex, '0' = octal */
1829 int *len; /* return: detected length of number */
1830 int dooct; /* recognize octal number */
1831 int dohex; /* recognize hex number */
1832 long *nptr; /* return: signed result */
1833 unsigned long *unptr; /* return: unsigned result */
1835 char_u *ptr = start;
1836 int hex = 0; /* default is decimal */
1837 int negative = FALSE;
1838 unsigned long un = 0;
1839 int n;
1841 if (ptr[0] == '-')
1843 negative = TRUE;
1844 ++ptr;
1847 /* Recognize hex and octal. */
1848 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
1850 hex = ptr[1];
1851 if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1852 ptr += 2; /* hexadecimal */
1853 else
1855 hex = 0; /* default is decimal */
1856 if (dooct)
1858 /* Don't interpret "0", "08" or "0129" as octal. */
1859 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1861 if (ptr[n] > '7')
1863 hex = 0; /* can't be octal */
1864 break;
1866 if (ptr[n] > '0')
1867 hex = '0'; /* assume octal */
1874 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1876 if (hex == '0' || dooct > 1)
1878 /* octal */
1879 while ('0' <= *ptr && *ptr <= '7')
1881 un = 8 * un + (unsigned long)(*ptr - '0');
1882 ++ptr;
1885 else if (hex != 0 || dohex > 1)
1887 /* hex */
1888 while (vim_isxdigit(*ptr))
1890 un = 16 * un + (unsigned long)hex2nr(*ptr);
1891 ++ptr;
1894 else
1896 /* decimal */
1897 while (VIM_ISDIGIT(*ptr))
1899 un = 10 * un + (unsigned long)(*ptr - '0');
1900 ++ptr;
1904 if (hexp != NULL)
1905 *hexp = hex;
1906 if (len != NULL)
1907 *len = (int)(ptr - start);
1908 if (nptr != NULL)
1910 if (negative) /* account for leading '-' for decimal numbers */
1911 *nptr = -(long)un;
1912 else
1913 *nptr = (long)un;
1915 if (unptr != NULL)
1916 *unptr = un;
1920 * Return the value of a single hex character.
1921 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1924 hex2nr(c)
1925 int c;
1927 if (c >= 'a' && c <= 'f')
1928 return c - 'a' + 10;
1929 if (c >= 'A' && c <= 'F')
1930 return c - 'A' + 10;
1931 return c - '0';
1934 #if defined(FEAT_TERMRESPONSE) \
1935 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1937 * Convert two hex characters to a byte.
1938 * Return -1 if one of the characters is not hex.
1941 hexhex2nr(p)
1942 char_u *p;
1944 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1945 return -1;
1946 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1948 #endif
1951 * Return TRUE if "str" starts with a backslash that should be removed.
1952 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1953 * backslash is not a normal file name character.
1954 * '$' is a valid file name character, we don't remove the backslash before
1955 * it. This means it is not possible to use an environment variable after a
1956 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1957 * Although "\ name" is valid, the backslash in "Program\ files" must be
1958 * removed. Assume a file name doesn't start with a space.
1959 * For multi-byte names, never remove a backslash before a non-ascii
1960 * character, assume that all multi-byte characters are valid file name
1961 * characters.
1964 rem_backslash(str)
1965 char_u *str;
1967 #ifdef BACKSLASH_IN_FILENAME
1968 return (str[0] == '\\'
1969 # ifdef FEAT_MBYTE
1970 && str[1] < 0x80
1971 # endif
1972 && (str[1] == ' '
1973 || (str[1] != NUL
1974 && str[1] != '*'
1975 && str[1] != '?'
1976 && !vim_isfilec(str[1]))));
1977 #else
1978 return (str[0] == '\\' && str[1] != NUL);
1979 #endif
1983 * Halve the number of backslashes in a file name argument.
1984 * For MS-DOS we only do this if the character after the backslash
1985 * is not a normal file character.
1987 void
1988 backslash_halve(p)
1989 char_u *p;
1991 for ( ; *p; ++p)
1992 if (rem_backslash(p))
1993 STRMOVE(p, p + 1);
1997 * backslash_halve() plus save the result in allocated memory.
1999 char_u *
2000 backslash_halve_save(p)
2001 char_u *p;
2003 char_u *res;
2005 res = vim_strsave(p);
2006 if (res == NULL)
2007 return p;
2008 backslash_halve(res);
2009 return res;
2012 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2014 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2015 * The first 64 entries have been added to map control characters defined in
2016 * ascii.h
2018 static char_u ebcdic2ascii_tab[256] =
2020 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2021 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2022 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2023 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2024 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2025 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2026 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2027 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2028 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2029 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2030 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2031 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2032 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2033 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2034 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2035 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2036 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2037 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2038 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2039 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2040 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2041 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2042 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2043 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2044 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2045 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2046 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2047 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2048 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2049 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2050 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2051 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2055 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2056 * wanting 7-bit ASCII characters out the other end.
2058 void
2059 ebcdic2ascii(buffer, len)
2060 char_u *buffer;
2061 int len;
2063 int i;
2065 for (i = 0; i < len; i++)
2066 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2068 #endif