d9b166a3e2550af2ec03e94c97403e3c0cfe88e2
[nvi.git] / vi / v_word.c
blobd9b166a3e2550af2ec03e94c97403e3c0cfe88e2
1 /*-
2 * Copyright (c) 1992, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1992, 1993, 1994, 1995, 1996
5 * Keith Bostic. All rights reserved.
7 * See the LICENSE file for redistribution information.
8 */
10 #include "config.h"
12 #ifndef lint
13 static const char sccsid[] = "$Id: v_word.c,v 10.6 2001/06/25 15:19:36 skimo Exp $ (Berkeley) $Date: 2001/06/25 15:19:36 $";
14 #endif /* not lint */
16 #include <sys/types.h>
17 #include <sys/queue.h>
18 #include <sys/time.h>
20 #include <bitstring.h>
21 #include <ctype.h>
22 #include <limits.h>
23 #include <stdio.h>
25 #include "../common/common.h"
26 #include "vi.h"
29 * There are two types of "words". Bigwords are easy -- groups of anything
30 * delimited by whitespace. Normal words are trickier. They are either a
31 * group of characters, numbers and underscores, or a group of anything but,
32 * delimited by whitespace. When for a word, if you're in whitespace, it's
33 * easy, just remove the whitespace and go to the beginning or end of the
34 * word. Otherwise, figure out if the next character is in a different group.
35 * If it is, go to the beginning or end of that group, otherwise, go to the
36 * beginning or end of the current group. The historic version of vi didn't
37 * get this right, so, for example, there were cases where "4e" was not the
38 * same as "eeee" -- in particular, single character words, and commands that
39 * began in whitespace were almost always handled incorrectly. To get it right
40 * you have to resolve the cursor after each search so that the look-ahead to
41 * figure out what type of "word" the cursor is in will be correct.
43 * Empty lines, and lines that consist of only white-space characters count
44 * as a single word, and the beginning and end of the file counts as an
45 * infinite number of words.
47 * Movements associated with commands are different than movement commands.
48 * For example, in "abc def", with the cursor on the 'a', "cw" is from
49 * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white
50 * space is discarded from the change movement. Another example is that,
51 * in the same string, a "cw" on any white space character replaces that
52 * single character, and nothing else. Ain't nothin' in here that's easy.
54 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
55 * would treat groups of empty lines as individual words, i.e. the command
56 * would move the cursor to each new empty line. The 'e' and 'E' commands
57 * would treat groups of empty lines as a single word, i.e. the first use
58 * would move past the group of lines. The 'b' command would just beep at
59 * you, or, if you did it from the start of the line as part of a motion
60 * command, go absolutely nuts. If the lines contained only white-space
61 * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
62 * 'b', 'E' and 'e' commands would treat the group as a single word, and
63 * the 'B' and 'b' commands will treat the lines as individual words. This
64 * implementation treats all of these cases as a single white-space word.
67 enum which {BIGWORD, LITTLEWORD};
69 static int bword __P((SCR *, VICMD *, enum which));
70 static int eword __P((SCR *, VICMD *, enum which));
71 static int fword __P((SCR *, VICMD *, enum which));
74 * v_wordW -- [count]W
75 * Move forward a bigword at a time.
77 * PUBLIC: int v_wordW __P((SCR *, VICMD *));
79 int
80 v_wordW(SCR *sp, VICMD *vp)
82 return (fword(sp, vp, BIGWORD));
86 * v_wordw -- [count]w
87 * Move forward a word at a time.
89 * PUBLIC: int v_wordw __P((SCR *, VICMD *));
91 int
92 v_wordw(SCR *sp, VICMD *vp)
94 return (fword(sp, vp, LITTLEWORD));
98 * fword --
99 * Move forward by words.
101 static int
102 fword(SCR *sp, VICMD *vp, enum which type)
104 enum { INWORD, NOTWORD } state;
105 VCS cs;
106 u_long cnt;
108 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
109 cs.cs_lno = vp->m_start.lno;
110 cs.cs_cno = vp->m_start.cno;
111 if (cs_init(sp, &cs))
112 return (1);
115 * If in white-space:
116 * If the count is 1, and it's a change command, we're done.
117 * Else, move to the first non-white-space character, which
118 * counts as a single word move. If it's a motion command,
119 * don't move off the end of the line.
121 if (cs.cs_flags == CS_EMP || cs.cs_flags == 0 && isblank(cs.cs_ch)) {
122 if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
123 if (ISCMD(vp->rkp, 'c'))
124 return (0);
125 if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
126 if (cs_fspace(sp, &cs))
127 return (1);
128 goto ret;
131 if (cs_fblank(sp, &cs))
132 return (1);
133 --cnt;
137 * Cyclically move to the next word -- this involves skipping
138 * over word characters and then any trailing non-word characters.
139 * Note, for the 'w' command, the definition of a word keeps
140 * switching.
142 if (type == BIGWORD)
143 while (cnt--) {
144 for (;;) {
145 if (cs_next(sp, &cs))
146 return (1);
147 if (cs.cs_flags == CS_EOF)
148 goto ret;
149 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
150 break;
153 * If a motion command and we're at the end of the
154 * last word, we're done. Delete and yank eat any
155 * trailing blanks, but we don't move off the end
156 * of the line regardless.
158 if (cnt == 0 && ISMOTION(vp)) {
159 if ((ISCMD(vp->rkp, 'd') ||
160 ISCMD(vp->rkp, 'y')) &&
161 cs_fspace(sp, &cs))
162 return (1);
163 break;
166 /* Eat whitespace characters. */
167 if (cs_fblank(sp, &cs))
168 return (1);
169 if (cs.cs_flags == CS_EOF)
170 goto ret;
172 else
173 while (cnt--) {
174 state = cs.cs_flags == 0 &&
175 inword(cs.cs_ch) ? INWORD : NOTWORD;
176 for (;;) {
177 if (cs_next(sp, &cs))
178 return (1);
179 if (cs.cs_flags == CS_EOF)
180 goto ret;
181 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
182 break;
183 if (state == INWORD) {
184 if (!inword(cs.cs_ch))
185 break;
186 } else
187 if (inword(cs.cs_ch))
188 break;
190 /* See comment above. */
191 if (cnt == 0 && ISMOTION(vp)) {
192 if ((ISCMD(vp->rkp, 'd') ||
193 ISCMD(vp->rkp, 'y')) &&
194 cs_fspace(sp, &cs))
195 return (1);
196 break;
199 /* Eat whitespace characters. */
200 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
201 if (cs_fblank(sp, &cs))
202 return (1);
203 if (cs.cs_flags == CS_EOF)
204 goto ret;
208 * If we didn't move, we must be at EOF.
210 * !!!
211 * That's okay for motion commands, however.
213 ret: if (!ISMOTION(vp) &&
214 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
215 v_eof(sp, &vp->m_start);
216 return (1);
219 /* Adjust the end of the range for motion commands. */
220 vp->m_stop.lno = cs.cs_lno;
221 vp->m_stop.cno = cs.cs_cno;
222 if (ISMOTION(vp) && cs.cs_flags == 0)
223 --vp->m_stop.cno;
226 * Non-motion commands move to the end of the range. Delete
227 * and yank stay at the start, ignore others.
229 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
230 return (0);
234 * v_wordE -- [count]E
235 * Move forward to the end of the bigword.
237 * PUBLIC: int v_wordE __P((SCR *, VICMD *));
240 v_wordE(SCR *sp, VICMD *vp)
242 return (eword(sp, vp, BIGWORD));
246 * v_worde -- [count]e
247 * Move forward to the end of the word.
249 * PUBLIC: int v_worde __P((SCR *, VICMD *));
252 v_worde(SCR *sp, VICMD *vp)
254 return (eword(sp, vp, LITTLEWORD));
258 * eword --
259 * Move forward to the end of the word.
261 static int
262 eword(SCR *sp, VICMD *vp, enum which type)
264 enum { INWORD, NOTWORD } state;
265 VCS cs;
266 u_long cnt;
268 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
269 cs.cs_lno = vp->m_start.lno;
270 cs.cs_cno = vp->m_start.cno;
271 if (cs_init(sp, &cs))
272 return (1);
275 * !!!
276 * If in whitespace, or the next character is whitespace, move past
277 * it. (This doesn't count as a word move.) Stay at the character
278 * past the current one, it sets word "state" for the 'e' command.
280 if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
281 if (cs_next(sp, &cs))
282 return (1);
283 if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
284 goto start;
286 if (cs_fblank(sp, &cs))
287 return (1);
290 * Cyclically move to the next word -- this involves skipping
291 * over word characters and then any trailing non-word characters.
292 * Note, for the 'e' command, the definition of a word keeps
293 * switching.
295 start: if (type == BIGWORD)
296 while (cnt--) {
297 for (;;) {
298 if (cs_next(sp, &cs))
299 return (1);
300 if (cs.cs_flags == CS_EOF)
301 goto ret;
302 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
303 break;
306 * When we reach the start of the word after the last
307 * word, we're done. If we changed state, back up one
308 * to the end of the previous word.
310 if (cnt == 0) {
311 if (cs.cs_flags == 0 && cs_prev(sp, &cs))
312 return (1);
313 break;
316 /* Eat whitespace characters. */
317 if (cs_fblank(sp, &cs))
318 return (1);
319 if (cs.cs_flags == CS_EOF)
320 goto ret;
322 else
323 while (cnt--) {
324 state = cs.cs_flags == 0 &&
325 inword(cs.cs_ch) ? INWORD : NOTWORD;
326 for (;;) {
327 if (cs_next(sp, &cs))
328 return (1);
329 if (cs.cs_flags == CS_EOF)
330 goto ret;
331 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
332 break;
333 if (state == INWORD) {
334 if (!inword(cs.cs_ch))
335 break;
336 } else
337 if (inword(cs.cs_ch))
338 break;
340 /* See comment above. */
341 if (cnt == 0) {
342 if (cs.cs_flags == 0 && cs_prev(sp, &cs))
343 return (1);
344 break;
347 /* Eat whitespace characters. */
348 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
349 if (cs_fblank(sp, &cs))
350 return (1);
351 if (cs.cs_flags == CS_EOF)
352 goto ret;
356 * If we didn't move, we must be at EOF.
358 * !!!
359 * That's okay for motion commands, however.
361 ret: if (!ISMOTION(vp) &&
362 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
363 v_eof(sp, &vp->m_start);
364 return (1);
367 /* Set the end of the range for motion commands. */
368 vp->m_stop.lno = cs.cs_lno;
369 vp->m_stop.cno = cs.cs_cno;
372 * Non-motion commands move to the end of the range.
373 * Delete and yank stay at the start, ignore others.
375 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
376 return (0);
380 * v_WordB -- [count]B
381 * Move backward a bigword at a time.
383 * PUBLIC: int v_wordB __P((SCR *, VICMD *));
386 v_wordB(SCR *sp, VICMD *vp)
388 return (bword(sp, vp, BIGWORD));
392 * v_wordb -- [count]b
393 * Move backward a word at a time.
395 * PUBLIC: int v_wordb __P((SCR *, VICMD *));
398 v_wordb(SCR *sp, VICMD *vp)
400 return (bword(sp, vp, LITTLEWORD));
404 * bword --
405 * Move backward by words.
407 static int
408 bword(SCR *sp, VICMD *vp, enum which type)
410 enum { INWORD, NOTWORD } state;
411 VCS cs;
412 u_long cnt;
414 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
415 cs.cs_lno = vp->m_start.lno;
416 cs.cs_cno = vp->m_start.cno;
417 if (cs_init(sp, &cs))
418 return (1);
421 * !!!
422 * If in whitespace, or the previous character is whitespace, move
423 * past it. (This doesn't count as a word move.) Stay at the
424 * character before the current one, it sets word "state" for the
425 * 'b' command.
427 if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
428 if (cs_prev(sp, &cs))
429 return (1);
430 if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
431 goto start;
433 if (cs_bblank(sp, &cs))
434 return (1);
437 * Cyclically move to the beginning of the previous word -- this
438 * involves skipping over word characters and then any trailing
439 * non-word characters. Note, for the 'b' command, the definition
440 * of a word keeps switching.
442 start: if (type == BIGWORD)
443 while (cnt--) {
444 for (;;) {
445 if (cs_prev(sp, &cs))
446 return (1);
447 if (cs.cs_flags == CS_SOF)
448 goto ret;
449 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
450 break;
453 * When we reach the end of the word before the last
454 * word, we're done. If we changed state, move forward
455 * one to the end of the next word.
457 if (cnt == 0) {
458 if (cs.cs_flags == 0 && cs_next(sp, &cs))
459 return (1);
460 break;
463 /* Eat whitespace characters. */
464 if (cs_bblank(sp, &cs))
465 return (1);
466 if (cs.cs_flags == CS_SOF)
467 goto ret;
469 else
470 while (cnt--) {
471 state = cs.cs_flags == 0 &&
472 inword(cs.cs_ch) ? INWORD : NOTWORD;
473 for (;;) {
474 if (cs_prev(sp, &cs))
475 return (1);
476 if (cs.cs_flags == CS_SOF)
477 goto ret;
478 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
479 break;
480 if (state == INWORD) {
481 if (!inword(cs.cs_ch))
482 break;
483 } else
484 if (inword(cs.cs_ch))
485 break;
487 /* See comment above. */
488 if (cnt == 0) {
489 if (cs.cs_flags == 0 && cs_next(sp, &cs))
490 return (1);
491 break;
494 /* Eat whitespace characters. */
495 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
496 if (cs_bblank(sp, &cs))
497 return (1);
498 if (cs.cs_flags == CS_SOF)
499 goto ret;
502 /* If we didn't move, we must be at SOF. */
503 ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
504 v_sof(sp, &vp->m_start);
505 return (1);
508 /* Set the end of the range for motion commands. */
509 vp->m_stop.lno = cs.cs_lno;
510 vp->m_stop.cno = cs.cs_cno;
513 * All commands move to the end of the range. Motion commands
514 * adjust the starting point to the character before the current
515 * one.
517 * !!!
518 * The historic vi didn't get this right -- the `yb' command yanked
519 * the right stuff and even updated the cursor value, but the cursor
520 * was not actually updated on the screen.
522 vp->m_final = vp->m_stop;
523 if (ISMOTION(vp))
524 --vp->m_start.cno;
525 return (0);