2 * Copyright (c) 1992, 1993
3 * The Regents of the University of California. All rights reserved.
5 * %sccs.include.redist.c%
9 static char sccsid
[] = "$Id: v_word.c,v 8.10 1993/10/26 17:22:58 bostic Exp $ (Berkeley) $Date: 1993/10/26 17:22:58 $";
12 #include <sys/types.h>
20 * There are two types of "words". Bigwords are easy -- groups of anything
21 * delimited by whitespace. Normal words are trickier. They are either a
22 * group of characters, numbers and underscores, or a group of anything but,
23 * delimited by whitespace. When for a word, if you're in whitespace, it's
24 * easy, just remove the whitespace and go to the beginning or end of the
25 * word. Otherwise, figure out if the next character is in a different group.
26 * If it is, go to the beginning or end of that group, otherwise, go to the
27 * beginning or end of the current group. The historic version of vi didn't
28 * get this right, so, for example, there were cases where "4e" was not the
29 * same as "eeee". To get it right you have to resolve the cursor after each
30 * search so that the look-ahead to figure out what type of "word" the cursor
31 * is in will be correct.
33 * Empty lines, and lines that consist of only white-space characters count
34 * as a single word, and the beginning and end of the file counts as an
35 * infinite number of words.
37 * Movements associated with commands are different than movement commands.
38 * For example, in "abc def", with the cursor on the 'a', "cw" is from
39 * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white
40 * space is discarded from the change movement. Another example is that,
41 * in the same string, a "cw" on any white space character replaces that
42 * single character, and nothing else. Ain't nothin' in here that's easy.
44 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
45 * would treat groups of empty lines as individual words, i.e. the command
46 * would move the cursor to each new empty line. The 'e' and 'E' commands
47 * would treat groups of empty lines as a single word, i.e. the first use
48 * would move past the group of lines. The 'b' command would just beep at
49 * you. If the lines contained only white-space characters, the 'w' and 'W'
50 * commands will just beep at you, and the 'B', 'b', 'E' and 'e' commands
51 * will treat the group as a single word, and the 'B' and 'b' commands will
52 * treat the lines as individual words. This implementation treats both
53 * cases as a single white-space word.
56 #define FW(test) for (; len && (test); --len, ++p)
57 #define BW(test) for (; len && (test); --len, --p)
59 enum which
{BIGWORD
, LITTLEWORD
};
61 static int bword
__P((SCR
*, EXF
*, VICMDARG
*, MARK
*, MARK
*, int));
62 static int eword
__P((SCR
*, EXF
*, VICMDARG
*, MARK
*, MARK
*, int));
63 static int fword
__P((SCR
*, EXF
*, VICMDARG
*, MARK
*, MARK
*, enum which
));
67 * Move forward a word at a time.
70 v_wordw(sp
, ep
, vp
, fm
, tm
, rp
)
76 return (fword(sp
, ep
, vp
, fm
, rp
, LITTLEWORD
));
81 * Move forward a bigword at a time.
84 v_wordW(sp
, ep
, vp
, fm
, tm
, rp
)
90 return (fword(sp
, ep
, vp
, fm
, rp
, BIGWORD
));
95 * Move forward by words.
98 fword(sp
, ep
, vp
, fm
, rp
, type
)
105 enum { INWORD
, NOTWORD
} state
;
111 if (cs_init(sp
, ep
, &cs
))
114 cnt
= F_ISSET(vp
, VC_C1SET
) ? vp
->count
: 1;
118 * If the count is 1, and it's a change command, we're done.
119 * Else, move to the first non-white-space character, which
120 * counts as a single word move. If it's a motion command,
121 * don't move off the end of the line.
123 if (cs
.cs_flags
== CS_EMP
|| cs
.cs_flags
== 0 && isblank(cs
.cs_ch
)) {
124 if (cs
.cs_flags
!= CS_EMP
&& cnt
== 1) {
125 if (F_ISSET(vp
, VC_C
)) {
129 if (F_ISSET(vp
, VC_D
| VC_Y
)) {
130 if (cs_fspace(sp
, ep
, &cs
))
135 if (cs_fblank(sp
, ep
, &cs
))
141 * Cyclically move to the next word -- this involves skipping
142 * over word characters and then any trailing non-word characters.
143 * Note, for the 'w' command, the definition of a word keeps
149 if (cs_next(sp
, ep
, &cs
))
151 if (cs
.cs_flags
== CS_EOF
)
153 if (cs
.cs_flags
!= 0 || isblank(cs
.cs_ch
))
157 * If a motion command and we're at the end of the
158 * last word, we're done. Delete and yank eat any
159 * trailing blanks, but we don't move off the end
160 * of the line regardless.
162 if (cnt
== 0 && F_ISSET(vp
, VC_C
| VC_D
| VC_Y
)) {
163 if (F_ISSET(vp
, VC_D
| VC_Y
) &&
164 cs_fspace(sp
, ep
, &cs
))
169 /* Eat whitespace characters. */
170 if (cs_fblank(sp
, ep
, &cs
))
172 if (cs
.cs_flags
== CS_EOF
)
177 state
= cs
.cs_flags
== 0 &&
178 inword(cs
.cs_ch
) ? INWORD
: NOTWORD
;
180 if (cs_next(sp
, ep
, &cs
))
182 if (cs
.cs_flags
== CS_EOF
)
184 if (cs
.cs_flags
!= 0 || isblank(cs
.cs_ch
))
186 if (state
== INWORD
) {
187 if (!inword(cs
.cs_ch
))
190 if (inword(cs
.cs_ch
))
193 /* See comment above. */
194 if (cnt
== 0 && F_ISSET(vp
, VC_C
| VC_D
| VC_Y
)) {
195 if (F_ISSET(vp
, VC_D
| VC_Y
) &&
196 cs_fspace(sp
, ep
, &cs
))
201 /* Eat whitespace characters. */
202 if (cs
.cs_flags
!= 0 || isblank(cs
.cs_ch
))
203 if (cs_fblank(sp
, ep
, &cs
))
205 if (cs
.cs_flags
== CS_EOF
)
210 * If a motion command, and eating the trailing non-word would
211 * move us off this line, don't do it. Move the return cursor
212 * to one past the EOL instead.
214 ret1
: if (F_ISSET(vp
, VC_C
| VC_D
| VC_Y
) && cs
.cs_flags
== CS_EOL
)
217 /* If we didn't move, we must be at EOF. */
218 ret2
: if (cs
.cs_lno
== fm
->lno
&& cs
.cs_cno
== fm
->cno
) {
223 * If at EOF, and it's a motion command, move the return cursor
226 if (F_ISSET(vp
, VC_C
| VC_D
| VC_Y
) && cs
.cs_flags
== CS_EOF
)
228 ret3
: rp
->lno
= cs
.cs_lno
;
234 * v_wordb -- [count]b
235 * Move backward a word at a time.
238 v_wordb(sp
, ep
, vp
, fm
, tm
, rp
)
244 return (bword(sp
, ep
, vp
, fm
, rp
, 0));
248 * v_WordB -- [count]B
249 * Move backward a bigword at a time.
252 v_wordB(sp
, ep
, vp
, fm
, tm
, rp
)
258 return (bword(sp
, ep
, vp
, fm
, rp
, 1));
263 * Move backward by words.
266 bword(sp
, ep
, vp
, fm
, rp
, spaceonly
)
282 /* Check for start of file. */
283 if (lno
== 1 && cno
== 0) {
288 if ((p
= file_gline(sp
, ep
, lno
, &len
)) == NULL
) {
289 if (file_lline(sp
, ep
, &lno
))
294 GETLINE_ERR(sp
, lno
);
298 cnt
= F_ISSET(vp
, VC_C1SET
) ? vp
->count
: 1;
301 * Reset the length to the number of characters in the line; the
302 * first character is the current cursor position.
304 len
= cno
? cno
+ 1 : 0;
307 for (startp
= p
, p
+= cno
; cnt
--;) {
332 BW(!isblank(*p
) && !inword(*p
));
337 if (cnt
&& len
== 0) {
338 /* If we hit SOF, stay there (historic practice). */
339 line
: if (lno
== 1) {
346 * Get the line. If the line is empty, decrement
347 * count and get another one.
349 if ((p
= file_gline(sp
, ep
, --lno
, &len
)) == NULL
) {
350 GETLINE_ERR(sp
, lno
);
354 if (cnt
== 0 || --cnt
== 0) {
363 * Set the cursor to the end of the line. If the word
364 * at the end of this line has only a single character,
365 * we've already skipped over it.
370 if (cnt
&& len
> 1 && !isblank(p
[0]))
374 } else if (!isblank(p
[-1]) &&
384 rp
->cno
= p
- startp
;
389 * v_worde -- [count]e
390 * Move forward to the end of the word.
393 v_worde(sp
, ep
, vp
, fm
, tm
, rp
)
399 return (eword(sp
, ep
, vp
, fm
, rp
, 0));
403 * v_wordE -- [count]E
404 * Move forward to the end of the bigword.
407 v_wordE(sp
, ep
, vp
, fm
, tm
, rp
)
413 return (eword(sp
, ep
, vp
, fm
, rp
, 1));
418 * Move forward to the end of the word.
421 eword(sp
, ep
, vp
, fm
, rp
, spaceonly
)
438 if ((p
= file_gline(sp
, ep
, lno
, &llen
)) == NULL
) {
439 if (file_lline(sp
, ep
, &lno
))
444 GETLINE_ERR(sp
, lno
);
448 cnt
= F_ISSET(vp
, VC_C1SET
) ? vp
->count
: 1;
451 * Reset the length; the first character is the current cursor
452 * position. If no more characters in this line, may already
456 if (empty
= llen
== 0 || llen
== cno
+ 1)
459 for (startp
= p
+= cno
; cnt
--; empty
= 0) {
484 FW(!isblank(*p
) && !inword(*p
));
489 if (cnt
&& len
== 0) {
490 /* If we hit EOF, stay there (historic practice). */
491 line
: if ((p
= file_gline(sp
, ep
, ++lno
, &llen
)) == NULL
) {
493 * If already at eof, complain, unless it's
494 * a change command or a delete command and
495 * there's something to delete.
498 if (F_ISSET(vp
, VC_C
) ||
499 F_ISSET(vp
, VC_D
) && llen
!= 0) {
501 rp
->cno
= llen
? llen
: 1;
508 file_gline(sp
, ep
, --lno
, &llen
)) == NULL
) {
509 GETLINE_ERR(sp
, lno
);
513 rp
->cno
= llen
? llen
- 1 : 0;
514 /* The 'c', 'd' and 'y' need one more space. */
515 if (F_ISSET(vp
, VC_C
| VC_D
| VC_Y
))
528 rp
->cno
= cno
+ (p
- startp
);
530 /* The 'c', 'd' and 'y' need one more space. */
531 if (F_ISSET(vp
, VC_C
| VC_D
| VC_Y
))