1 /* vi:set ts=8 sts=4 sw=4:
3 * VIM - Vi IMproved by Bram Moolenaar
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
13 static int win_chartabsize
__ARGS((win_T
*wp
, char_u
*p
, colnr_T col
));
17 static int win_nolbr_chartabsize
__ARGS((win_T
*wp
, char_u
*s
, colnr_T col
, int *headp
));
20 static int nr2hex
__ARGS((int c
));
22 static int chartab_initialized
= FALSE
;
24 /* b_chartab[] is an array of 32 bytes, each bit representing one of the
25 * characters 0-255. */
26 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
27 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
28 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
31 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
32 * characters for current buffer.
34 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
35 * 'isprint' and 'encoding'.
37 * The index in chartab[] depends on 'encoding':
38 * - For non-multi-byte index with the byte (same as the character).
39 * - For DBCS index with the first byte.
40 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
41 * the same as the character, if the first byte is 0x80 and above it depends
44 * The contents of chartab[]:
45 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
46 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
47 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
48 * translate the character before displaying it). Note that only DBCS
49 * characters can have 2 display cells and still be printable.
50 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
51 * - CT_ID_CHAR bit is set when the character can be in an identifier.
53 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
54 * error, OK otherwise.
59 return buf_init_chartab(curbuf
, TRUE
);
63 buf_init_chartab(buf
, global
)
65 int global
; /* FALSE: only set buf->b_chartab[] */
77 * Set the default size for printable characters:
78 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
79 * This also inits all 'isident' and 'isfname' flags to FALSE.
81 * EBCDIC: all chars below ' ' are not printable, all others are
86 chartab
[c
++] = (dy_flags
& DY_UHEX
) ? 4 : 2;
92 chartab
[c
++] = 1 + CT_PRINT_CHAR
;
97 chartab
[c
++] = 1 + CT_PRINT_CHAR
;
103 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
104 if (enc_utf8
&& c
>= 0xa0)
105 chartab
[c
++] = CT_PRINT_CHAR
+ 1;
106 /* euc-jp characters starting with 0x8e are single width */
107 else if (enc_dbcs
== DBCS_JPNU
&& c
== 0x8e)
108 chartab
[c
++] = CT_PRINT_CHAR
+ 1;
109 /* other double-byte chars can be printable AND double-width */
110 else if (enc_dbcs
!= 0 && MB_BYTE2LEN(c
) == 2)
111 chartab
[c
++] = CT_PRINT_CHAR
+ 2;
114 /* the rest is unprintable by default */
115 chartab
[c
++] = (dy_flags
& DY_UHEX
) ? 4 : 2;
119 /* Assume that every multi-byte char is a filename character. */
120 for (c
= 1; c
< 256; ++c
)
121 if ((enc_dbcs
!= 0 && MB_BYTE2LEN(c
) > 1)
122 || (enc_dbcs
== DBCS_JPNU
&& c
== 0x8e)
123 || (enc_utf8
&& c
>= 0xa0))
124 chartab
[c
] |= CT_FNAME_CHAR
;
129 * Init word char flags all to FALSE
131 vim_memset(buf
->b_chartab
, 0, (size_t)32);
134 for (c
= 0; c
< 256; ++c
)
136 /* double-byte characters are probably word characters */
137 if (MB_BYTE2LEN(c
) == 2)
144 * In lisp mode the '-' character is included in keywords.
147 SET_CHARTAB(buf
, '-');
150 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
151 * options Each option is a list of characters, character numbers or
152 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
154 for (i
= global
? 0 : 3; i
<= 3; ++i
)
157 p
= p_isi
; /* first round: 'isident' */
159 p
= p_isp
; /* second round: 'isprint' */
161 p
= p_isf
; /* third round: 'isfname' */
163 p
= buf
->b_p_isk
; /* fourth round: 'iskeyword' */
169 if (*p
== '^' && p
[1] != NUL
)
179 if (*p
== '-' && p
[1] != NUL
)
187 if (c
<= 0 || (c2
< c
&& c2
!= -1) || c2
>= 256
188 || !(*p
== NUL
|| *p
== ','))
191 if (c2
== -1) /* not a range */
194 * A single '@' (not "@-@"):
195 * Decide on letters being ID/printable/keyword chars with
196 * standard function isalpha(). This takes care of locale for
197 * single-byte characters).
210 /* Use the MB_ functions here, because isalpha() doesn't
211 * work properly when 'encoding' is "latin1" and the locale is
213 if (!do_isalpha
|| MB_ISLOWER(c
) || MB_ISUPPER(c
)
215 || (p_altkeymap
&& (F_isalpha(c
) || F_isdigit(c
)))
219 if (i
== 0) /* (re)set ID flag */
222 chartab
[c
] &= ~CT_ID_CHAR
;
224 chartab
[c
] |= CT_ID_CHAR
;
226 else if (i
== 1) /* (re)set printable */
234 && (F_isalpha(c
) || F_isdigit(c
)))
238 /* For double-byte we keep the cell width, so
239 * that we can detect it from the first byte. */
240 && !(enc_dbcs
&& MB_BYTE2LEN(c
) == 2)
246 chartab
[c
] = (chartab
[c
] & ~CT_CELL_MASK
)
247 + ((dy_flags
& DY_UHEX
) ? 4 : 2);
248 chartab
[c
] &= ~CT_PRINT_CHAR
;
252 chartab
[c
] = (chartab
[c
] & ~CT_CELL_MASK
) + 1;
253 chartab
[c
] |= CT_PRINT_CHAR
;
257 else if (i
== 2) /* (re)set fname flag */
260 chartab
[c
] &= ~CT_FNAME_CHAR
;
262 chartab
[c
] |= CT_FNAME_CHAR
;
264 else /* i == 3 */ /* (re)set keyword flag */
267 RESET_CHARTAB(buf
, c
);
274 p
= skip_to_option_part(p
);
277 chartab_initialized
= TRUE
;
282 * Translate any special characters in buf[bufsize] in-place.
283 * The result is a string with only printable characters, but if there is not
284 * enough room, not all characters will be translated.
287 trans_characters(buf
, bufsize
)
291 int len
; /* length of string needing translation */
292 int room
; /* room in buffer after string */
293 char_u
*trs
; /* translated character */
294 int trs_len
; /* length of trs[] */
296 len
= (int)STRLEN(buf
);
297 room
= bufsize
- len
;
301 /* Assume a multi-byte character doesn't need translation. */
302 if (has_mbyte
&& (trs_len
= (*mb_ptr2len
)(buf
)) > 1)
307 trs
= transchar_byte(*buf
);
308 trs_len
= (int)STRLEN(trs
);
314 mch_memmove(buf
+ trs_len
, buf
+ 1, (size_t)len
);
316 mch_memmove(buf
, trs
, (size_t)trs_len
);
323 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
326 * Translate a string into allocated memory, replacing special chars with
327 * printable chars. Returns NULL when out of memory.
343 /* Compute the length of the result, taking account of unprintable
344 * multi-byte characters. */
349 if ((l
= (*mb_ptr2len
)(p
)) > 1)
351 c
= (*mb_ptr2char
)(p
);
357 transchar_hex(hexbuf
, c
);
358 len
+= (int)STRLEN(hexbuf
);
363 l
= byte2cells(*p
++);
367 len
+= 4; /* illegal byte sequence */
370 res
= alloc((unsigned)(len
+ 1));
374 res
= alloc((unsigned)(vim_strsize(s
) + 1));
382 if (has_mbyte
&& (l
= (*mb_ptr2len
)(p
)) > 1)
384 c
= (*mb_ptr2char
)(p
);
386 STRNCAT(res
, p
, l
); /* append printable multi-byte char */
388 transchar_hex(res
+ STRLEN(res
), c
);
393 STRCAT(res
, transchar_byte(*p
++));
400 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
402 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
404 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
405 * Otherwise puts the result in "buf[buflen]".
408 str_foldcase(str
, orglen
, buf
, buflen
)
418 #define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
419 #define GA_PTR(i) ((char_u *)ga.ga_data + i)
420 #define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
421 #define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
423 /* Copy "str" into "buf" or allocated memory, unmodified. */
426 ga_init2(&ga
, 1, 10);
427 if (ga_grow(&ga
, len
+ 1) == FAIL
)
429 mch_memmove(ga
.ga_data
, str
, (size_t)len
);
434 if (len
>= buflen
) /* Ugly! */
436 mch_memmove(buf
, str
, (size_t)len
);
443 /* Make each character lower case. */
445 while (STR_CHAR(i
) != NUL
)
448 if (enc_utf8
|| (has_mbyte
&& MB_BYTE2LEN(STR_CHAR(i
)) > 1))
452 int c
= utf_ptr2char(STR_PTR(i
));
453 int ol
= utf_ptr2len(STR_PTR(i
));
454 int lc
= utf_tolower(c
);
456 /* Only replace the character when it is not an invalid
457 * sequence (ASCII character or more than one byte) and
458 * utf_tolower() doesn't return the original character. */
459 if ((c
< 0x80 || ol
> 1) && c
!= lc
)
461 int nl
= utf_char2len(lc
);
463 /* If the byte length changes need to shift the following
464 * characters forward or backward. */
469 if (buf
== NULL
? ga_grow(&ga
, nl
- ol
+ 1) == FAIL
470 : len
+ nl
- ol
>= buflen
)
472 /* out of memory, keep old char */
481 STRMOVE(GA_PTR(i
) + nl
, GA_PTR(i
) + ol
);
482 ga
.ga_len
+= nl
- ol
;
486 STRMOVE(buf
+ i
+ nl
, buf
+ i
+ ol
);
491 (void)utf_char2bytes(lc
, STR_PTR(i
));
494 /* skip to next multi-byte char */
495 i
+= (*mb_ptr2len
)(STR_PTR(i
));
501 GA_CHAR(i
) = TOLOWER_LOC(GA_CHAR(i
));
503 buf
[i
] = TOLOWER_LOC(buf
[i
]);
509 return (char_u
*)ga
.ga_data
;
515 * Catch 22: chartab[] can't be initialized before the options are
516 * initialized, and initializing options may cause transchar() to be called!
517 * When chartab_initialized == FALSE don't use chartab[].
518 * Does NOT work for multi-byte characters, c must be <= 255.
519 * Also doesn't work for the first byte of a multi-byte, "c" must be a
522 static char_u transchar_buf
[7];
531 if (IS_SPECIAL(c
)) /* special key code, display as ~@ char */
533 transchar_buf
[0] = '~';
534 transchar_buf
[1] = '@';
539 if ((!chartab_initialized
&& (
543 (c
>= ' ' && c
<= '~')
548 )) || (c
< 256 && vim_isprintc_strict(c
)))
550 /* printable character */
551 transchar_buf
[i
] = c
;
552 transchar_buf
[i
+ 1] = NUL
;
555 transchar_nonprint(transchar_buf
+ i
, c
);
556 return transchar_buf
;
559 #if defined(FEAT_MBYTE) || defined(PROTO)
561 * Like transchar(), but called with a byte instead of a character. Checks
562 * for an illegal UTF-8 byte.
568 if (enc_utf8
&& c
>= 0x80)
570 transchar_nonprint(transchar_buf
, c
);
571 return transchar_buf
;
578 * Convert non-printable character to two or more printable characters in
579 * "buf[]". "buf" needs to be able to hold five bytes.
580 * Does NOT work for multi-byte characters, c must be <= 255.
583 transchar_nonprint(buf
, c
)
588 c
= NUL
; /* we use newline in place of a NUL */
589 else if (c
== CAR
&& get_fileformat(curbuf
) == EOL_MAC
)
590 c
= NL
; /* we use CR in place of NL in this case */
592 if (dy_flags
& DY_UHEX
) /* 'display' has "uhex" */
593 transchar_hex(buf
, c
);
596 /* For EBCDIC only the characters 0-63 and 255 are not printable */
597 else if (CtrlChar(c
) != 0 || c
== DEL
)
599 else if (c
<= 0x7f) /* 0x00 - 0x1f and 0x7f */
605 buf
[1] = '?'; /* DEL displayed as ^? */
607 buf
[1] = CtrlChar(c
);
609 buf
[1] = c
^ 0x40; /* DEL displayed as ^? */
615 else if (enc_utf8
&& c
>= 0x80)
617 transchar_hex(buf
, c
);
621 else if (c
>= ' ' + 0x80 && c
<= '~' + 0x80) /* 0xa0 - 0xfe */
631 buf
[1] = MetaChar(c
);
635 else /* 0x80 - 0x9f and 0xff */
638 * TODO: EBCDIC I don't know what to do with this chars, so I display
639 * them as '~?' for now
643 buf
[1] = '?'; /* 0xff displayed as ~? */
645 buf
[1] = (c
- 0x80) ^ 0x40; /* 0xff displayed as ~? */
652 transchar_hex(buf
, c
)
662 buf
[++i
] = nr2hex((unsigned)c
>> 12);
663 buf
[++i
] = nr2hex((unsigned)c
>> 8);
666 buf
[++i
] = nr2hex((unsigned)c
>> 4);
667 buf
[++i
] = nr2hex(c
);
673 * Convert the lower 4 bits of byte "c" to its hex character.
674 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
682 return (c
& 0xf) + '0';
683 return (c
& 0xf) - 10 + 'a';
687 * Return number of display cells occupied by byte "b".
688 * Caller must make sure 0 <= b <= 255.
689 * For multi-byte mode "b" must be the first byte of a character.
690 * A TAB is counted as two cells: "^I".
691 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
692 * cells depends on further bytes.
699 if (enc_utf8
&& b
>= 0x80)
702 return (chartab
[b
] & CT_CELL_MASK
);
706 * Return number of display cells occupied by character "c".
707 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
708 * A TAB is counted as two cells: "^I" or four: "<09>".
715 return char2cells(K_SECOND(c
)) + 2;
719 /* UTF-8: above 0x80 need to check the value */
721 return utf_char2cells(c
);
722 /* DBCS: double-byte means double-width, except for euc-jp with first
724 if (enc_dbcs
!= 0 && c
>= 0x100)
726 if (enc_dbcs
== DBCS_JPNU
&& ((unsigned)c
>> 8) == 0x8e)
732 return (chartab
[c
& 0xff] & CT_CELL_MASK
);
736 * Return number of display cells occupied by character at "*p".
737 * A TAB is counted as two cells: "^I" or four: "<09>".
744 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
745 if (enc_utf8
&& *p
>= 0x80)
746 return utf_ptr2cells(p
);
747 /* For DBCS we can tell the cell count from the first byte. */
749 return (chartab
[*p
] & CT_CELL_MASK
);
753 * Return the number of characters string "s" will take on the screen,
754 * counting TABs as two characters: "^I".
760 return vim_strnsize(s
, (int)MAXCOL
);
764 * Return the number of characters string "s[len]" will take on the screen,
765 * counting TABs as two characters: "^I".
774 while (*s
!= NUL
&& --len
>= 0)
779 int l
= (*mb_ptr2len
)(s
);
781 size
+= ptr2cells(s
);
787 size
+= byte2cells(*s
++);
793 * Return the number of characters 'c' will take on the screen, taking
794 * into account the size of a tab.
795 * Use a define to make it fast, this is used very often!!!
796 * Also see getvcol() below.
799 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
800 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
803 ts = (buf)->b_p_ts; \
804 return (int)(ts - (col % ts)); \
809 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
810 || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
816 RET_WIN_BUF_CHARTABSIZE(curwin
, curbuf
, p
, col
)
820 #ifdef FEAT_LINEBREAK
822 win_chartabsize(wp
, p
, col
)
827 RET_WIN_BUF_CHARTABSIZE(wp
, wp
->w_buffer
, p
, col
)
832 * return the number of characters the string 's' will take on the screen,
833 * taking into account the size of a tab
842 col
+= lbr_chartabsize_adv(&s
, col
);
847 * Like linetabsize(), but for a given window instead of the current one.
850 win_linetabsize(wp
, p
, len
)
858 for (s
= p
; *s
!= NUL
&& (len
== MAXCOL
|| s
< p
+ len
); mb_ptr_adv(s
))
859 col
+= win_lbr_chartabsize(wp
, s
, col
, NULL
);
864 * Return TRUE if 'c' is a normal identifier character:
865 * Letters and characters from the 'isident' option.
871 return (c
> 0 && c
< 0x100 && (chartab
[c
] & CT_ID_CHAR
));
875 * return TRUE if 'c' is a keyword character: Letters and characters from
876 * 'iskeyword' option for current buffer.
877 * For multi-byte characters mb_get_class() is used (builtin rules).
887 return dbcs_class((unsigned)c
>> 8, c
& 0xff) >= 2;
889 return utf_class(c
) >= 2;
892 return (c
> 0 && c
< 0x100 && GET_CHARTAB(curbuf
, c
) != 0);
896 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
903 if (has_mbyte
&& MB_BYTE2LEN(*p
) > 1)
904 return mb_get_class(p
) >= 2;
906 return GET_CHARTAB(curbuf
, *p
) != 0;
909 #if defined(FEAT_SYN_HL) || defined(PROTO)
911 vim_iswordc_buf(p
, buf
)
916 if (has_mbyte
&& MB_BYTE2LEN(*p
) > 1)
917 return mb_get_class(p
) >= 2;
919 return (GET_CHARTAB(buf
, *p
) != 0);
924 * return TRUE if 'c' is a valid file-name character
925 * Assume characters above 0x100 are valid (multi-byte).
931 return (c
>= 0x100 || (c
> 0 && (chartab
[c
] & CT_FNAME_CHAR
)));
935 * return TRUE if 'c' is a valid file-name character or a wildcard character
936 * Assume characters above 0x100 are valid (multi-byte).
937 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
948 return vim_isfilec(c
) || c
== ']' || mch_has_wildcard(buf
);
952 * return TRUE if 'c' is a printable character
953 * Assume characters above 0x100 are printable (multi-byte), except for
961 if (enc_utf8
&& c
>= 0x100)
962 return utf_printable(c
);
964 return (c
>= 0x100 || (c
> 0 && (chartab
[c
] & CT_PRINT_CHAR
)));
968 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
969 * byte of a double-byte character.
972 vim_isprintc_strict(c
)
976 if (enc_dbcs
!= 0 && c
< 0x100 && MB_BYTE2LEN(c
) > 1)
978 if (enc_utf8
&& c
>= 0x100)
979 return utf_printable(c
);
981 return (c
>= 0x100 || (c
> 0 && (chartab
[c
] & CT_PRINT_CHAR
)));
985 * like chartabsize(), but also check for line breaks on the screen
988 lbr_chartabsize(s
, col
)
992 #ifdef FEAT_LINEBREAK
993 if (!curwin
->w_p_lbr
&& *p_sbr
== NUL
)
997 if (curwin
->w_p_wrap
)
998 return win_nolbr_chartabsize(curwin
, s
, col
, NULL
);
1000 RET_WIN_BUF_CHARTABSIZE(curwin
, curbuf
, s
, col
)
1001 #ifdef FEAT_LINEBREAK
1003 return win_lbr_chartabsize(curwin
, s
, col
, NULL
);
1008 * Call lbr_chartabsize() and advance the pointer.
1011 lbr_chartabsize_adv(s
, col
)
1017 retval
= lbr_chartabsize(*s
, col
);
1023 * This function is used very often, keep it fast!!!!
1025 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1026 * string at start of line. Warning: *headp is only set if it's a non-zero
1027 * value, init to 0 before calling.
1031 win_lbr_chartabsize(wp
, s
, col
, headp
)
1037 #ifdef FEAT_LINEBREAK
1050 int tab_corr
= (*s
== TAB
);
1054 * No 'linebreak' and 'showbreak': return quickly.
1056 if (!wp
->w_p_lbr
&& *p_sbr
== NUL
)
1061 return win_nolbr_chartabsize(wp
, s
, col
, headp
);
1063 RET_WIN_BUF_CHARTABSIZE(wp
, wp
->w_buffer
, s
, col
)
1066 #ifdef FEAT_LINEBREAK
1068 * First get normal size, without 'linebreak'
1070 size
= win_chartabsize(wp
, s
, col
);
1074 * If 'linebreak' set check at a blank before a non-blank if the line
1075 * needs a break here
1079 && !vim_isbreak(s
[1])
1082 # ifdef FEAT_VERTSPLIT
1088 * Count all characters from first non-blank after a blank up to next
1089 * non-blank after a blank.
1091 numberextra
= win_col_off(wp
);
1093 colmax
= W_WIDTH(wp
) - numberextra
;
1096 n
= colmax
+ win_col_off2(wp
);
1098 colmax
+= (((col
- colmax
) / n
) + 1) * n
;
1109 && (col2
== col
|| !vim_isbreak(*ps
))))))
1112 col2
+= win_chartabsize(wp
, s
, col2
);
1113 if (col2
>= colmax
) /* doesn't fit */
1115 size
= colmax
- col
;
1122 else if (has_mbyte
&& size
== 2 && MB_BYTE2LEN(*s
) > 1
1123 && wp
->w_p_wrap
&& in_win_border(wp
, col
))
1125 ++size
; /* Count the ">" in the last column. */
1131 * May have to add something for 'showbreak' string at start of line
1132 * Set *headp to the size of what we add.
1135 if (*p_sbr
!= NUL
&& wp
->w_p_wrap
&& col
!= 0)
1137 numberextra
= win_col_off(wp
);
1138 col
+= numberextra
+ mb_added
;
1139 if (col
>= (colnr_T
)W_WIDTH(wp
))
1142 numberextra
= W_WIDTH(wp
) - (numberextra
- win_col_off2(wp
));
1143 if (numberextra
> 0)
1144 col
= col
% numberextra
;
1146 if (col
== 0 || col
+ size
> (colnr_T
)W_WIDTH(wp
))
1148 added
= vim_strsize(p_sbr
);
1150 size
+= (added
/ wp
->w_buffer
->b_p_ts
) * wp
->w_buffer
->b_p_ts
;
1158 *headp
= added
+ mb_added
;
1163 #if defined(FEAT_MBYTE) || defined(PROTO)
1165 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1166 * 'wrap' is on. This means we need to check for a double-byte character that
1167 * doesn't fit at the end of the screen line.
1170 win_nolbr_chartabsize(wp
, s
, col
, headp
)
1178 if (*s
== TAB
&& (!wp
->w_p_list
|| lcs_tab1
))
1180 n
= wp
->w_buffer
->b_p_ts
;
1181 return (int)(n
- (col
% n
));
1184 /* Add one cell for a double-width character in the last column of the
1185 * window, displayed with a ">". */
1186 if (n
== 2 && MB_BYTE2LEN(*s
) > 1 && in_win_border(wp
, col
))
1196 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1200 in_win_border(wp
, vcol
)
1204 colnr_T width1
; /* width of first line (after line number) */
1205 colnr_T width2
; /* width of further lines */
1207 #ifdef FEAT_VERTSPLIT
1208 if (wp
->w_width
== 0) /* there is no border */
1211 width1
= W_WIDTH(wp
) - win_col_off(wp
);
1212 if (vcol
< width1
- 1)
1214 if (vcol
== width1
- 1)
1216 width2
= width1
+ win_col_off2(wp
);
1217 return ((vcol
- width1
) % width2
== width2
- 1);
1219 #endif /* FEAT_MBYTE */
1222 * Get virtual column number of pos.
1223 * start: on the first position of this character (TAB, ctrl)
1224 * cursor: where the cursor is on this character (first char, except for TAB)
1225 * end: on the last position of this character (TAB, ctrl)
1227 * This is used very often, keep it fast!
1230 getvcol(wp
, pos
, start
, cursor
, end
)
1238 char_u
*ptr
; /* points to current char */
1239 char_u
*posptr
; /* points to char at pos->col */
1242 int ts
= wp
->w_buffer
->b_p_ts
;
1246 ptr
= ml_get_buf(wp
->w_buffer
, pos
->lnum
, FALSE
);
1247 posptr
= ptr
+ pos
->col
;
1250 * This function is used very often, do some speed optimizations.
1251 * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1252 * Also use this when 'list' is set but tabs take their normal size.
1254 if ((!wp
->w_p_list
|| lcs_tab1
!= NUL
)
1255 #ifdef FEAT_LINEBREAK
1256 && !wp
->w_p_lbr
&& *p_sbr
== NUL
1269 /* make sure we don't go past the end of the line */
1272 incr
= 1; /* NUL at end of line only takes one column */
1275 /* A tab gets expanded, depending on the current column */
1277 incr
= ts
- (vcol
% ts
);
1283 /* For utf-8, if the byte is >= 0x80, need to look at
1284 * further bytes to find the cell width. */
1285 if (enc_utf8
&& c
>= 0x80)
1286 incr
= utf_ptr2cells(ptr
);
1290 /* If a double-cell char doesn't fit at the end of a line
1291 * it wraps to the next line, it's like this char is three
1293 if (incr
== 2 && wp
->w_p_wrap
&& MB_BYTE2LEN(*ptr
) > 1
1294 && in_win_border(wp
, vcol
))
1305 if (ptr
>= posptr
) /* character at pos->col */
1316 /* A tab gets expanded, depending on the current column */
1318 incr
= win_lbr_chartabsize(wp
, ptr
, vcol
, &head
);
1319 /* make sure we don't go past the end of the line */
1322 incr
= 1; /* NUL at end of line only takes one column */
1326 if (ptr
>= posptr
) /* character at pos->col */
1334 *start
= vcol
+ head
;
1336 *end
= vcol
+ incr
- 1;
1342 && !virtual_active()
1345 && (*p_sel
== 'e' || ltoreq(*pos
, VIsual
)))
1348 *cursor
= vcol
+ incr
- 1; /* cursor at end */
1350 *cursor
= vcol
+ head
; /* cursor at start */
1355 * Get virtual cursor column in the current window, pretending 'list' is off.
1358 getvcol_nolist(posp
)
1361 int list_save
= curwin
->w_p_list
;
1364 curwin
->w_p_list
= FALSE
;
1365 getvcol(curwin
, posp
, NULL
, &vcol
, NULL
);
1366 curwin
->w_p_list
= list_save
;
1370 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1372 * Get virtual column in virtual mode.
1375 getvvcol(wp
, pos
, start
, cursor
, end
)
1389 if (virtual_active())
1391 /* For virtual mode, only want one value */
1392 getvcol(wp
, pos
, &col
, NULL
, NULL
);
1394 coladd
= pos
->coladd
;
1397 /* Cannot put the cursor on part of a wide character. */
1398 ptr
= ml_get_buf(wp
->w_buffer
, pos
->lnum
, FALSE
);
1399 if (pos
->col
< STRLEN(ptr
))
1401 int c
= (*mb_ptr2char
)(ptr
+ pos
->col
);
1403 if (c
!= TAB
&& vim_isprintc(c
))
1405 endadd
= char2cells(c
) - 1;
1406 if (coladd
> endadd
) /* past end of line */
1419 *end
= col
+ endadd
;
1422 getvcol(wp
, pos
, start
, cursor
, end
);
1426 #if defined(FEAT_VISUAL) || defined(PROTO)
1428 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1429 * Used for Visual block mode.
1432 getvcols(wp
, pos1
, pos2
, left
, right
)
1435 colnr_T
*left
, *right
;
1437 colnr_T from1
, from2
, to1
, to2
;
1439 if (ltp(pos1
, pos2
))
1441 getvvcol(wp
, pos1
, &from1
, NULL
, &to1
);
1442 getvvcol(wp
, pos2
, &from2
, NULL
, &to2
);
1446 getvvcol(wp
, pos2
, &from1
, NULL
, &to1
);
1447 getvvcol(wp
, pos1
, &from2
, NULL
, &to2
);
1455 if (*p_sel
== 'e' && from2
- 1 >= to1
)
1466 * skipwhite: skip over ' ' and '\t'.
1474 while (vim_iswhite(*p
)) /* skip to next non-white */
1488 while (VIM_ISDIGIT(*p
)) /* skip to next non-digit */
1493 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
1495 * skip over digits and hex characters
1503 while (vim_isxdigit(*p
)) /* skip to next non-digit */
1509 #if defined(FEAT_EX_EXTRA) || defined(PROTO)
1511 * skip to digit (or NUL after the string)
1519 while (*p
!= NUL
&& !VIM_ISDIGIT(*p
)) /* skip to next digit */
1525 * skip to hex character (or NUL after the string)
1533 while (*p
!= NUL
&& !vim_isxdigit(*p
)) /* skip to next digit */
1540 * Variant of isdigit() that can handle characters > 0x100.
1541 * We don't use isdigit() here, because on some systems it also considers
1542 * superscript 1 to be a digit.
1543 * Use the VIM_ISDIGIT() macro for simple arguments.
1549 return (c
>= '0' && c
<= '9');
1553 * Variant of isxdigit() that can handle characters > 0x100.
1554 * We don't use isxdigit() here, because on some systems it also considers
1555 * superscript 1 to be a digit.
1561 return (c
>= '0' && c
<= '9')
1562 || (c
>= 'a' && c
<= 'f')
1563 || (c
>= 'A' && c
<= 'F');
1566 #if defined(FEAT_MBYTE) || defined(PROTO)
1568 * Vim's own character class functions. These exist because many library
1569 * islower()/toupper() etc. do not work properly: they crash when used with
1570 * invalid values or can't handle latin1 when the locale is C.
1571 * Speed is most important here.
1573 #define LATIN1LOWER 'l'
1574 #define LATIN1UPPER 'U'
1576 /* !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
1577 static char_u latin1flags
[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1578 static char_u latin1upper
[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1579 static char_u latin1lower
[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
1590 return utf_islower(c
);
1593 #ifdef HAVE_ISWLOWER
1597 /* islower() can't handle these chars and may crash */
1601 return (latin1flags
[c
] & LATIN1LOWER
) == LATIN1LOWER
;
1615 return utf_isupper(c
);
1618 #ifdef HAVE_ISWUPPER
1622 /* islower() can't handle these chars and may crash */
1626 return (latin1flags
[c
] & LATIN1UPPER
) == LATIN1UPPER
;
1640 return utf_toupper(c
);
1643 #ifdef HAVE_TOWUPPER
1647 /* toupper() can't handle these chars and may crash */
1651 return latin1upper
[c
];
1653 return TOUPPER_LOC(c
);
1665 return utf_tolower(c
);
1668 #ifdef HAVE_TOWLOWER
1672 /* tolower() can't handle these chars and may crash */
1676 return latin1lower
[c
];
1678 return TOLOWER_LOC(c
);
1683 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1689 while (*p
!= ' ' && *p
!= '\t' && *p
!= NUL
)
1694 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1697 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1703 while (*p
!= ' ' && *p
!= '\t' && *p
!= NUL
)
1705 if ((*p
== '\\' || *p
== Ctrl_V
) && *(p
+ 1) != NUL
)
1714 * Getdigits: Get a number from a string and skip over it.
1715 * Note: the argument is a pointer to a char_u pointer!
1725 retval
= atol((char *)p
);
1726 if (*p
== '-') /* skip negative sign */
1728 p
= skipdigits(p
); /* skip to next non-digit */
1734 * Return TRUE if "lbuf" is empty or only contains blanks.
1737 vim_isblankline(lbuf
)
1742 p
= skipwhite(lbuf
);
1743 return (*p
== NUL
|| *p
== '\r' || *p
== '\n');
1747 * Convert a string into a long and/or unsigned long, taking care of
1748 * hexadecimal and octal numbers. Accepts a '-' sign.
1749 * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1754 * If "len" is not NULL, the length of the number in characters is returned.
1755 * If "nptr" is not NULL, the signed result is returned in it.
1756 * If "unptr" is not NULL, the unsigned result is returned in it.
1757 * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1759 * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
1763 vim_str2nr(start
, hexp
, len
, dooct
, dohex
, nptr
, unptr
)
1765 int *hexp
; /* return: type of number 0 = decimal, 'x'
1766 or 'X' is hex, '0' = octal */
1767 int *len
; /* return: detected length of number */
1768 int dooct
; /* recognize octal number */
1769 int dohex
; /* recognize hex number */
1770 long *nptr
; /* return: signed result */
1771 unsigned long *unptr
; /* return: unsigned result */
1773 char_u
*ptr
= start
;
1774 int hex
= 0; /* default is decimal */
1775 int negative
= FALSE
;
1776 unsigned long un
= 0;
1785 /* Recognize hex and octal. */
1786 if (ptr
[0] == '0' && ptr
[1] != '8' && ptr
[1] != '9')
1789 if (dohex
&& (hex
== 'X' || hex
== 'x') && vim_isxdigit(ptr
[2]))
1790 ptr
+= 2; /* hexadecimal */
1793 hex
= 0; /* default is decimal */
1796 /* Don't interpret "0", "08" or "0129" as octal. */
1797 for (n
= 1; VIM_ISDIGIT(ptr
[n
]); ++n
)
1801 hex
= 0; /* can't be octal */
1805 hex
= '0'; /* assume octal */
1812 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1814 if (hex
== '0' || dooct
> 1)
1817 while ('0' <= *ptr
&& *ptr
<= '7')
1819 un
= 8 * un
+ (unsigned long)(*ptr
- '0');
1823 else if (hex
!= 0 || dohex
> 1)
1826 while (vim_isxdigit(*ptr
))
1828 un
= 16 * un
+ (unsigned long)hex2nr(*ptr
);
1835 while (VIM_ISDIGIT(*ptr
))
1837 un
= 10 * un
+ (unsigned long)(*ptr
- '0');
1845 *len
= (int)(ptr
- start
);
1848 if (negative
) /* account for leading '-' for decimal numbers */
1858 * Return the value of a single hex character.
1859 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1865 if (c
>= 'a' && c
<= 'f')
1866 return c
- 'a' + 10;
1867 if (c
>= 'A' && c
<= 'F')
1868 return c
- 'A' + 10;
1872 #if defined(FEAT_TERMRESPONSE) \
1873 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1875 * Convert two hex characters to a byte.
1876 * Return -1 if one of the characters is not hex.
1882 if (!vim_isxdigit(p
[0]) || !vim_isxdigit(p
[1]))
1884 return (hex2nr(p
[0]) << 4) + hex2nr(p
[1]);
1889 * Return TRUE if "str" starts with a backslash that should be removed.
1890 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1891 * backslash is not a normal file name character.
1892 * '$' is a valid file name character, we don't remove the backslash before
1893 * it. This means it is not possible to use an environment variable after a
1894 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1895 * Although "\ name" is valid, the backslash in "Program\ files" must be
1896 * removed. Assume a file name doesn't start with a space.
1897 * For multi-byte names, never remove a backslash before a non-ascii
1898 * character, assume that all multi-byte characters are valid file name
1905 #ifdef BACKSLASH_IN_FILENAME
1906 return (str
[0] == '\\'
1914 && !vim_isfilec(str
[1]))));
1916 return (str
[0] == '\\' && str
[1] != NUL
);
1921 * Halve the number of backslashes in a file name argument.
1922 * For MS-DOS we only do this if the character after the backslash
1923 * is not a normal file character.
1930 if (rem_backslash(p
))
1935 * backslash_halve() plus save the result in allocated memory.
1938 backslash_halve_save(p
)
1943 res
= vim_strsave(p
);
1946 backslash_halve(res
);
1950 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1952 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1953 * The first 64 entries have been added to map control characters defined in
1956 static char_u ebcdic2ascii_tab
[256] =
1958 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1959 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1960 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1961 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1962 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1963 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1964 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1965 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1966 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1967 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1968 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1969 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1970 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1971 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1972 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1973 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1974 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1975 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1976 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1977 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1978 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1979 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1980 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1981 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1982 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1983 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1984 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1985 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1986 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1987 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1988 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1989 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1993 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
1994 * wanting 7-bit ASCII characters out the other end.
1997 ebcdic2ascii(buffer
, len
)
2003 for (i
= 0; i
< len
; i
++)
2004 buffer
[i
] = ebcdic2ascii_tab
[buffer
[i
]];