the script used to extract a release
[nvi.git] / vi / v_word.c
blob619a04b3d9e1fda190be7b97ff2f13019542c312
1 /*-
2 * Copyright (c) 1992, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1992, 1993, 1994, 1995, 1996
5 * Keith Bostic. All rights reserved.
7 * See the LICENSE file for redistribution information.
8 */
10 #include "config.h"
12 #ifndef lint
13 static const char sccsid[] = "$Id: v_word.c,v 10.5 1996/03/06 19:54:48 bostic Exp $ (Berkeley) $Date: 1996/03/06 19:54:48 $";
14 #endif /* not lint */
16 #include <sys/types.h>
17 #include <sys/queue.h>
18 #include <sys/time.h>
20 #include <bitstring.h>
21 #include <ctype.h>
22 #include <limits.h>
23 #include <stdio.h>
25 #include "../common/common.h"
26 #include "vi.h"
29 * There are two types of "words". Bigwords are easy -- groups of anything
30 * delimited by whitespace. Normal words are trickier. They are either a
31 * group of characters, numbers and underscores, or a group of anything but,
32 * delimited by whitespace. When for a word, if you're in whitespace, it's
33 * easy, just remove the whitespace and go to the beginning or end of the
34 * word. Otherwise, figure out if the next character is in a different group.
35 * If it is, go to the beginning or end of that group, otherwise, go to the
36 * beginning or end of the current group. The historic version of vi didn't
37 * get this right, so, for example, there were cases where "4e" was not the
38 * same as "eeee" -- in particular, single character words, and commands that
39 * began in whitespace were almost always handled incorrectly. To get it right
40 * you have to resolve the cursor after each search so that the look-ahead to
41 * figure out what type of "word" the cursor is in will be correct.
43 * Empty lines, and lines that consist of only white-space characters count
44 * as a single word, and the beginning and end of the file counts as an
45 * infinite number of words.
47 * Movements associated with commands are different than movement commands.
48 * For example, in "abc def", with the cursor on the 'a', "cw" is from
49 * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white
50 * space is discarded from the change movement. Another example is that,
51 * in the same string, a "cw" on any white space character replaces that
52 * single character, and nothing else. Ain't nothin' in here that's easy.
54 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
55 * would treat groups of empty lines as individual words, i.e. the command
56 * would move the cursor to each new empty line. The 'e' and 'E' commands
57 * would treat groups of empty lines as a single word, i.e. the first use
58 * would move past the group of lines. The 'b' command would just beep at
59 * you, or, if you did it from the start of the line as part of a motion
60 * command, go absolutely nuts. If the lines contained only white-space
61 * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
62 * 'b', 'E' and 'e' commands would treat the group as a single word, and
63 * the 'B' and 'b' commands will treat the lines as individual words. This
64 * implementation treats all of these cases as a single white-space word.
67 enum which {BIGWORD, LITTLEWORD};
69 static int bword __P((SCR *, VICMD *, enum which));
70 static int eword __P((SCR *, VICMD *, enum which));
71 static int fword __P((SCR *, VICMD *, enum which));
74 * v_wordW -- [count]W
75 * Move forward a bigword at a time.
77 * PUBLIC: int v_wordW __P((SCR *, VICMD *));
79 int
80 v_wordW(sp, vp)
81 SCR *sp;
82 VICMD *vp;
84 return (fword(sp, vp, BIGWORD));
88 * v_wordw -- [count]w
89 * Move forward a word at a time.
91 * PUBLIC: int v_wordw __P((SCR *, VICMD *));
93 int
94 v_wordw(sp, vp)
95 SCR *sp;
96 VICMD *vp;
98 return (fword(sp, vp, LITTLEWORD));
102 * fword --
103 * Move forward by words.
105 static int
106 fword(sp, vp, type)
107 SCR *sp;
108 VICMD *vp;
109 enum which type;
111 enum { INWORD, NOTWORD } state;
112 VCS cs;
113 u_long cnt;
115 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
116 cs.cs_lno = vp->m_start.lno;
117 cs.cs_cno = vp->m_start.cno;
118 if (cs_init(sp, &cs))
119 return (1);
122 * If in white-space:
123 * If the count is 1, and it's a change command, we're done.
124 * Else, move to the first non-white-space character, which
125 * counts as a single word move. If it's a motion command,
126 * don't move off the end of the line.
128 if (cs.cs_flags == CS_EMP || cs.cs_flags == 0 && isblank(cs.cs_ch)) {
129 if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
130 if (ISCMD(vp->rkp, 'c'))
131 return (0);
132 if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
133 if (cs_fspace(sp, &cs))
134 return (1);
135 goto ret;
138 if (cs_fblank(sp, &cs))
139 return (1);
140 --cnt;
144 * Cyclically move to the next word -- this involves skipping
145 * over word characters and then any trailing non-word characters.
146 * Note, for the 'w' command, the definition of a word keeps
147 * switching.
149 if (type == BIGWORD)
150 while (cnt--) {
151 for (;;) {
152 if (cs_next(sp, &cs))
153 return (1);
154 if (cs.cs_flags == CS_EOF)
155 goto ret;
156 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
157 break;
160 * If a motion command and we're at the end of the
161 * last word, we're done. Delete and yank eat any
162 * trailing blanks, but we don't move off the end
163 * of the line regardless.
165 if (cnt == 0 && ISMOTION(vp)) {
166 if ((ISCMD(vp->rkp, 'd') ||
167 ISCMD(vp->rkp, 'y')) &&
168 cs_fspace(sp, &cs))
169 return (1);
170 break;
173 /* Eat whitespace characters. */
174 if (cs_fblank(sp, &cs))
175 return (1);
176 if (cs.cs_flags == CS_EOF)
177 goto ret;
179 else
180 while (cnt--) {
181 state = cs.cs_flags == 0 &&
182 inword(cs.cs_ch) ? INWORD : NOTWORD;
183 for (;;) {
184 if (cs_next(sp, &cs))
185 return (1);
186 if (cs.cs_flags == CS_EOF)
187 goto ret;
188 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
189 break;
190 if (state == INWORD) {
191 if (!inword(cs.cs_ch))
192 break;
193 } else
194 if (inword(cs.cs_ch))
195 break;
197 /* See comment above. */
198 if (cnt == 0 && ISMOTION(vp)) {
199 if ((ISCMD(vp->rkp, 'd') ||
200 ISCMD(vp->rkp, 'y')) &&
201 cs_fspace(sp, &cs))
202 return (1);
203 break;
206 /* Eat whitespace characters. */
207 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
208 if (cs_fblank(sp, &cs))
209 return (1);
210 if (cs.cs_flags == CS_EOF)
211 goto ret;
215 * If we didn't move, we must be at EOF.
217 * !!!
218 * That's okay for motion commands, however.
220 ret: if (!ISMOTION(vp) &&
221 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
222 v_eof(sp, &vp->m_start);
223 return (1);
226 /* Adjust the end of the range for motion commands. */
227 vp->m_stop.lno = cs.cs_lno;
228 vp->m_stop.cno = cs.cs_cno;
229 if (ISMOTION(vp) && cs.cs_flags == 0)
230 --vp->m_stop.cno;
233 * Non-motion commands move to the end of the range. Delete
234 * and yank stay at the start, ignore others.
236 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
237 return (0);
241 * v_wordE -- [count]E
242 * Move forward to the end of the bigword.
244 * PUBLIC: int v_wordE __P((SCR *, VICMD *));
247 v_wordE(sp, vp)
248 SCR *sp;
249 VICMD *vp;
251 return (eword(sp, vp, BIGWORD));
255 * v_worde -- [count]e
256 * Move forward to the end of the word.
258 * PUBLIC: int v_worde __P((SCR *, VICMD *));
261 v_worde(sp, vp)
262 SCR *sp;
263 VICMD *vp;
265 return (eword(sp, vp, LITTLEWORD));
269 * eword --
270 * Move forward to the end of the word.
272 static int
273 eword(sp, vp, type)
274 SCR *sp;
275 VICMD *vp;
276 enum which type;
278 enum { INWORD, NOTWORD } state;
279 VCS cs;
280 u_long cnt;
282 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
283 cs.cs_lno = vp->m_start.lno;
284 cs.cs_cno = vp->m_start.cno;
285 if (cs_init(sp, &cs))
286 return (1);
289 * !!!
290 * If in whitespace, or the next character is whitespace, move past
291 * it. (This doesn't count as a word move.) Stay at the character
292 * past the current one, it sets word "state" for the 'e' command.
294 if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
295 if (cs_next(sp, &cs))
296 return (1);
297 if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
298 goto start;
300 if (cs_fblank(sp, &cs))
301 return (1);
304 * Cyclically move to the next word -- this involves skipping
305 * over word characters and then any trailing non-word characters.
306 * Note, for the 'e' command, the definition of a word keeps
307 * switching.
309 start: if (type == BIGWORD)
310 while (cnt--) {
311 for (;;) {
312 if (cs_next(sp, &cs))
313 return (1);
314 if (cs.cs_flags == CS_EOF)
315 goto ret;
316 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
317 break;
320 * When we reach the start of the word after the last
321 * word, we're done. If we changed state, back up one
322 * to the end of the previous word.
324 if (cnt == 0) {
325 if (cs.cs_flags == 0 && cs_prev(sp, &cs))
326 return (1);
327 break;
330 /* Eat whitespace characters. */
331 if (cs_fblank(sp, &cs))
332 return (1);
333 if (cs.cs_flags == CS_EOF)
334 goto ret;
336 else
337 while (cnt--) {
338 state = cs.cs_flags == 0 &&
339 inword(cs.cs_ch) ? INWORD : NOTWORD;
340 for (;;) {
341 if (cs_next(sp, &cs))
342 return (1);
343 if (cs.cs_flags == CS_EOF)
344 goto ret;
345 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
346 break;
347 if (state == INWORD) {
348 if (!inword(cs.cs_ch))
349 break;
350 } else
351 if (inword(cs.cs_ch))
352 break;
354 /* See comment above. */
355 if (cnt == 0) {
356 if (cs.cs_flags == 0 && cs_prev(sp, &cs))
357 return (1);
358 break;
361 /* Eat whitespace characters. */
362 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
363 if (cs_fblank(sp, &cs))
364 return (1);
365 if (cs.cs_flags == CS_EOF)
366 goto ret;
370 * If we didn't move, we must be at EOF.
372 * !!!
373 * That's okay for motion commands, however.
375 ret: if (!ISMOTION(vp) &&
376 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
377 v_eof(sp, &vp->m_start);
378 return (1);
381 /* Set the end of the range for motion commands. */
382 vp->m_stop.lno = cs.cs_lno;
383 vp->m_stop.cno = cs.cs_cno;
386 * Non-motion commands move to the end of the range.
387 * Delete and yank stay at the start, ignore others.
389 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
390 return (0);
394 * v_WordB -- [count]B
395 * Move backward a bigword at a time.
397 * PUBLIC: int v_wordB __P((SCR *, VICMD *));
400 v_wordB(sp, vp)
401 SCR *sp;
402 VICMD *vp;
404 return (bword(sp, vp, BIGWORD));
408 * v_wordb -- [count]b
409 * Move backward a word at a time.
411 * PUBLIC: int v_wordb __P((SCR *, VICMD *));
414 v_wordb(sp, vp)
415 SCR *sp;
416 VICMD *vp;
418 return (bword(sp, vp, LITTLEWORD));
422 * bword --
423 * Move backward by words.
425 static int
426 bword(sp, vp, type)
427 SCR *sp;
428 VICMD *vp;
429 enum which type;
431 enum { INWORD, NOTWORD } state;
432 VCS cs;
433 u_long cnt;
435 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
436 cs.cs_lno = vp->m_start.lno;
437 cs.cs_cno = vp->m_start.cno;
438 if (cs_init(sp, &cs))
439 return (1);
442 * !!!
443 * If in whitespace, or the previous character is whitespace, move
444 * past it. (This doesn't count as a word move.) Stay at the
445 * character before the current one, it sets word "state" for the
446 * 'b' command.
448 if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
449 if (cs_prev(sp, &cs))
450 return (1);
451 if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
452 goto start;
454 if (cs_bblank(sp, &cs))
455 return (1);
458 * Cyclically move to the beginning of the previous word -- this
459 * involves skipping over word characters and then any trailing
460 * non-word characters. Note, for the 'b' command, the definition
461 * of a word keeps switching.
463 start: if (type == BIGWORD)
464 while (cnt--) {
465 for (;;) {
466 if (cs_prev(sp, &cs))
467 return (1);
468 if (cs.cs_flags == CS_SOF)
469 goto ret;
470 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
471 break;
474 * When we reach the end of the word before the last
475 * word, we're done. If we changed state, move forward
476 * one to the end of the next word.
478 if (cnt == 0) {
479 if (cs.cs_flags == 0 && cs_next(sp, &cs))
480 return (1);
481 break;
484 /* Eat whitespace characters. */
485 if (cs_bblank(sp, &cs))
486 return (1);
487 if (cs.cs_flags == CS_SOF)
488 goto ret;
490 else
491 while (cnt--) {
492 state = cs.cs_flags == 0 &&
493 inword(cs.cs_ch) ? INWORD : NOTWORD;
494 for (;;) {
495 if (cs_prev(sp, &cs))
496 return (1);
497 if (cs.cs_flags == CS_SOF)
498 goto ret;
499 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
500 break;
501 if (state == INWORD) {
502 if (!inword(cs.cs_ch))
503 break;
504 } else
505 if (inword(cs.cs_ch))
506 break;
508 /* See comment above. */
509 if (cnt == 0) {
510 if (cs.cs_flags == 0 && cs_next(sp, &cs))
511 return (1);
512 break;
515 /* Eat whitespace characters. */
516 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
517 if (cs_bblank(sp, &cs))
518 return (1);
519 if (cs.cs_flags == CS_SOF)
520 goto ret;
523 /* If we didn't move, we must be at SOF. */
524 ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
525 v_sof(sp, &vp->m_start);
526 return (1);
529 /* Set the end of the range for motion commands. */
530 vp->m_stop.lno = cs.cs_lno;
531 vp->m_stop.cno = cs.cs_cno;
534 * All commands move to the end of the range. Motion commands
535 * adjust the starting point to the character before the current
536 * one.
538 * !!!
539 * The historic vi didn't get this right -- the `yb' command yanked
540 * the right stuff and even updated the cursor value, but the cursor
541 * was not actually updated on the screen.
543 vp->m_final = vp->m_stop;
544 if (ISMOTION(vp))
545 --vp->m_start.cno;
546 return (0);