add f_sourceany, so we can print an error message
[nvi.git] / vi / v_word.c
blob28e712f3314d8e7f1f0a56d280a65016dee50137
1 /*-
2 * Copyright (c) 1992, 1993
3 * The Regents of the University of California. All rights reserved.
5 * %sccs.include.redist.c%
6 */
8 #ifndef lint
9 static char sccsid[] = "$Id: v_word.c,v 8.10 1993/10/26 17:22:58 bostic Exp $ (Berkeley) $Date: 1993/10/26 17:22:58 $";
10 #endif /* not lint */
12 #include <sys/types.h>
14 #include <ctype.h>
16 #include "vi.h"
17 #include "vcmd.h"
20 * There are two types of "words". Bigwords are easy -- groups of anything
21 * delimited by whitespace. Normal words are trickier. They are either a
22 * group of characters, numbers and underscores, or a group of anything but,
23 * delimited by whitespace. When for a word, if you're in whitespace, it's
24 * easy, just remove the whitespace and go to the beginning or end of the
25 * word. Otherwise, figure out if the next character is in a different group.
26 * If it is, go to the beginning or end of that group, otherwise, go to the
27 * beginning or end of the current group. The historic version of vi didn't
28 * get this right, so, for example, there were cases where "4e" was not the
29 * same as "eeee". To get it right you have to resolve the cursor after each
30 * search so that the look-ahead to figure out what type of "word" the cursor
31 * is in will be correct.
33 * Empty lines, and lines that consist of only white-space characters count
34 * as a single word, and the beginning and end of the file counts as an
35 * infinite number of words.
37 * Movements associated with commands are different than movement commands.
38 * For example, in "abc def", with the cursor on the 'a', "cw" is from
39 * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white
40 * space is discarded from the change movement. Another example is that,
41 * in the same string, a "cw" on any white space character replaces that
42 * single character, and nothing else. Ain't nothin' in here that's easy.
44 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
45 * would treat groups of empty lines as individual words, i.e. the command
46 * would move the cursor to each new empty line. The 'e' and 'E' commands
47 * would treat groups of empty lines as a single word, i.e. the first use
48 * would move past the group of lines. The 'b' command would just beep at
49 * you. If the lines contained only white-space characters, the 'w' and 'W'
50 * commands will just beep at you, and the 'B', 'b', 'E' and 'e' commands
51 * will treat the group as a single word, and the 'B' and 'b' commands will
52 * treat the lines as individual words. This implementation treats both
53 * cases as a single white-space word.
56 #define FW(test) for (; len && (test); --len, ++p)
57 #define BW(test) for (; len && (test); --len, --p)
59 enum which {BIGWORD, LITTLEWORD};
61 static int bword __P((SCR *, EXF *, VICMDARG *, MARK *, MARK *, int));
62 static int eword __P((SCR *, EXF *, VICMDARG *, MARK *, MARK *, int));
63 static int fword __P((SCR *, EXF *, VICMDARG *, MARK *, MARK *, enum which));
66 * v_wordw -- [count]w
67 * Move forward a word at a time.
69 int
70 v_wordw(sp, ep, vp, fm, tm, rp)
71 SCR *sp;
72 EXF *ep;
73 VICMDARG *vp;
74 MARK *fm, *tm, *rp;
76 return (fword(sp, ep, vp, fm, rp, LITTLEWORD));
80 * v_wordW -- [count]W
81 * Move forward a bigword at a time.
83 int
84 v_wordW(sp, ep, vp, fm, tm, rp)
85 SCR *sp;
86 EXF *ep;
87 VICMDARG *vp;
88 MARK *fm, *tm, *rp;
90 return (fword(sp, ep, vp, fm, rp, BIGWORD));
94 * fword --
95 * Move forward by words.
97 static int
98 fword(sp, ep, vp, fm, rp, type)
99 SCR *sp;
100 EXF *ep;
101 VICMDARG *vp;
102 MARK *fm, *rp;
103 enum which type;
105 enum { INWORD, NOTWORD } state;
106 VCS cs;
107 u_long cnt;
109 cs.cs_lno = fm->lno;
110 cs.cs_cno = fm->cno;
111 if (cs_init(sp, ep, &cs))
112 return (1);
114 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
117 * If in white-space:
118 * If the count is 1, and it's a change command, we're done.
119 * Else, move to the first non-white-space character, which
120 * counts as a single word move. If it's a motion command,
121 * don't move off the end of the line.
123 if (cs.cs_flags == CS_EMP || cs.cs_flags == 0 && isblank(cs.cs_ch)) {
124 if (cs.cs_flags != CS_EMP && cnt == 1) {
125 if (F_ISSET(vp, VC_C)) {
126 ++cs.cs_cno;
127 goto ret3;
129 if (F_ISSET(vp, VC_D | VC_Y)) {
130 if (cs_fspace(sp, ep, &cs))
131 return (1);
132 goto ret1;
135 if (cs_fblank(sp, ep, &cs))
136 return (1);
137 --cnt;
141 * Cyclically move to the next word -- this involves skipping
142 * over word characters and then any trailing non-word characters.
143 * Note, for the 'w' command, the definition of a word keeps
144 * switching.
146 if (type == BIGWORD)
147 while (cnt--) {
148 for (;;) {
149 if (cs_next(sp, ep, &cs))
150 return (1);
151 if (cs.cs_flags == CS_EOF)
152 goto ret2;
153 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
154 break;
157 * If a motion command and we're at the end of the
158 * last word, we're done. Delete and yank eat any
159 * trailing blanks, but we don't move off the end
160 * of the line regardless.
162 if (cnt == 0 && F_ISSET(vp, VC_C | VC_D | VC_Y)) {
163 if (F_ISSET(vp, VC_D | VC_Y) &&
164 cs_fspace(sp, ep, &cs))
165 return (1);
166 break;
169 /* Eat whitespace characters. */
170 if (cs_fblank(sp, ep, &cs))
171 return (1);
172 if (cs.cs_flags == CS_EOF)
173 goto ret2;
175 else
176 while (cnt--) {
177 state = cs.cs_flags == 0 &&
178 inword(cs.cs_ch) ? INWORD : NOTWORD;
179 for (;;) {
180 if (cs_next(sp, ep, &cs))
181 return (1);
182 if (cs.cs_flags == CS_EOF)
183 goto ret2;
184 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
185 break;
186 if (state == INWORD) {
187 if (!inword(cs.cs_ch))
188 break;
189 } else
190 if (inword(cs.cs_ch))
191 break;
193 /* See comment above. */
194 if (cnt == 0 && F_ISSET(vp, VC_C | VC_D | VC_Y)) {
195 if (F_ISSET(vp, VC_D | VC_Y) &&
196 cs_fspace(sp, ep, &cs))
197 return (1);
198 break;
201 /* Eat whitespace characters. */
202 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
203 if (cs_fblank(sp, ep, &cs))
204 return (1);
205 if (cs.cs_flags == CS_EOF)
206 goto ret2;
210 * If a motion command, and eating the trailing non-word would
211 * move us off this line, don't do it. Move the return cursor
212 * to one past the EOL instead.
214 ret1: if (F_ISSET(vp, VC_C | VC_D | VC_Y) && cs.cs_flags == CS_EOL)
215 ++cs.cs_cno;
217 /* If we didn't move, we must be at EOF. */
218 ret2: if (cs.cs_lno == fm->lno && cs.cs_cno == fm->cno) {
219 v_eof(sp, ep, fm);
220 return (1);
223 * If at EOF, and it's a motion command, move the return cursor
224 * one past the EOF.
226 if (F_ISSET(vp, VC_C | VC_D | VC_Y) && cs.cs_flags == CS_EOF)
227 ++cs.cs_cno;
228 ret3: rp->lno = cs.cs_lno;
229 rp->cno = cs.cs_cno;
230 return (0);
234 * v_wordb -- [count]b
235 * Move backward a word at a time.
238 v_wordb(sp, ep, vp, fm, tm, rp)
239 SCR *sp;
240 EXF *ep;
241 VICMDARG *vp;
242 MARK *fm, *tm, *rp;
244 return (bword(sp, ep, vp, fm, rp, 0));
248 * v_WordB -- [count]B
249 * Move backward a bigword at a time.
252 v_wordB(sp, ep, vp, fm, tm, rp)
253 SCR *sp;
254 EXF *ep;
255 VICMDARG *vp;
256 MARK *fm, *tm, *rp;
258 return (bword(sp, ep, vp, fm, rp, 1));
262 * bword --
263 * Move backward by words.
265 static int
266 bword(sp, ep, vp, fm, rp, spaceonly)
267 SCR *sp;
268 EXF *ep;
269 VICMDARG *vp;
270 MARK *fm, *rp;
271 int spaceonly;
273 register char *p;
274 recno_t lno;
275 size_t len;
276 u_long cno, cnt;
277 char *startp;
279 lno = fm->lno;
280 cno = fm->cno;
282 /* Check for start of file. */
283 if (lno == 1 && cno == 0) {
284 v_sof(sp, NULL);
285 return (1);
288 if ((p = file_gline(sp, ep, lno, &len)) == NULL) {
289 if (file_lline(sp, ep, &lno))
290 return (1);
291 if (lno == 0)
292 v_sof(sp, NULL);
293 else
294 GETLINE_ERR(sp, lno);
295 return (1);
298 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
301 * Reset the length to the number of characters in the line; the
302 * first character is the current cursor position.
304 len = cno ? cno + 1 : 0;
305 if (len == 0)
306 goto line;
307 for (startp = p, p += cno; cnt--;) {
308 if (spaceonly) {
309 if (!isblank(*p)) {
310 if (len < 2)
311 goto line;
312 --p;
313 --len;
315 BW(isblank(*p));
316 if (len)
317 BW(!isblank(*p));
318 else
319 goto line;
320 } else {
321 if (!isblank(*p)) {
322 if (len < 2)
323 goto line;
324 --p;
325 --len;
327 BW(isblank(*p));
328 if (len)
329 if (inword(*p))
330 BW(inword(*p));
331 else
332 BW(!isblank(*p) && !inword(*p));
333 else
334 goto line;
337 if (cnt && len == 0) {
338 /* If we hit SOF, stay there (historic practice). */
339 line: if (lno == 1) {
340 rp->lno = 1;
341 rp->cno = 0;
342 return (0);
346 * Get the line. If the line is empty, decrement
347 * count and get another one.
349 if ((p = file_gline(sp, ep, --lno, &len)) == NULL) {
350 GETLINE_ERR(sp, lno);
351 return (1);
353 if (len == 0) {
354 if (cnt == 0 || --cnt == 0) {
355 rp->lno = lno;
356 rp->cno = 0;
357 return (0);
359 goto line;
363 * Set the cursor to the end of the line. If the word
364 * at the end of this line has only a single character,
365 * we've already skipped over it.
367 startp = p;
368 if (len) {
369 p += len - 1;
370 if (cnt && len > 1 && !isblank(p[0]))
371 if (inword(p[0])) {
372 if (!inword(p[-1]))
373 --cnt;
374 } else if (!isblank(p[-1]) &&
375 !inword(p[-1]))
376 --cnt;
378 } else {
379 ++p;
380 ++len;
383 rp->lno = lno;
384 rp->cno = p - startp;
385 return (0);
389 * v_worde -- [count]e
390 * Move forward to the end of the word.
393 v_worde(sp, ep, vp, fm, tm, rp)
394 SCR *sp;
395 EXF *ep;
396 VICMDARG *vp;
397 MARK *fm, *tm, *rp;
399 return (eword(sp, ep, vp, fm, rp, 0));
403 * v_wordE -- [count]E
404 * Move forward to the end of the bigword.
407 v_wordE(sp, ep, vp, fm, tm, rp)
408 SCR *sp;
409 EXF *ep;
410 VICMDARG *vp;
411 MARK *fm, *tm, *rp;
413 return (eword(sp, ep, vp, fm, rp, 1));
417 * eword --
418 * Move forward to the end of the word.
420 static int
421 eword(sp, ep, vp, fm, rp, spaceonly)
422 SCR *sp;
423 EXF *ep;
424 VICMDARG *vp;
425 MARK *fm, *rp;
426 int spaceonly;
428 register char *p;
429 recno_t lno;
430 size_t len, llen;
431 u_long cno, cnt;
432 int empty;
433 char *startp;
435 lno = fm->lno;
436 cno = fm->cno;
438 if ((p = file_gline(sp, ep, lno, &llen)) == NULL) {
439 if (file_lline(sp, ep, &lno))
440 return (1);
441 if (lno == 0)
442 v_eof(sp, ep, NULL);
443 else
444 GETLINE_ERR(sp, lno);
445 return (1);
448 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
451 * Reset the length; the first character is the current cursor
452 * position. If no more characters in this line, may already
453 * be at EOF.
455 len = llen - cno;
456 if (empty = llen == 0 || llen == cno + 1)
457 goto line;
459 for (startp = p += cno; cnt--; empty = 0) {
460 if (spaceonly) {
461 if (!isblank(*p)) {
462 if (len < 2)
463 goto line;
464 ++p;
465 --len;
467 FW(isblank(*p));
468 if (len)
469 FW(!isblank(*p));
470 else
471 ++cnt;
472 } else {
473 if (!isblank(*p)) {
474 if (len < 2)
475 goto line;
476 ++p;
477 --len;
479 FW(isblank(*p));
480 if (len)
481 if (inword(*p))
482 FW(inword(*p));
483 else
484 FW(!isblank(*p) && !inword(*p));
485 else
486 ++cnt;
489 if (cnt && len == 0) {
490 /* If we hit EOF, stay there (historic practice). */
491 line: if ((p = file_gline(sp, ep, ++lno, &llen)) == NULL) {
493 * If already at eof, complain, unless it's
494 * a change command or a delete command and
495 * there's something to delete.
497 if (empty) {
498 if (F_ISSET(vp, VC_C) ||
499 F_ISSET(vp, VC_D) && llen != 0) {
500 rp->lno = lno - 1;
501 rp->cno = llen ? llen : 1;
502 return (0);
504 v_eof(sp, ep, NULL);
505 return (1);
507 if ((p =
508 file_gline(sp, ep, --lno, &llen)) == NULL) {
509 GETLINE_ERR(sp, lno);
510 return (1);
512 rp->lno = lno;
513 rp->cno = llen ? llen - 1 : 0;
514 /* The 'c', 'd' and 'y' need one more space. */
515 if (F_ISSET(vp, VC_C | VC_D | VC_Y))
516 ++rp->cno;
517 return (0);
519 len = llen;
520 cno = 0;
521 startp = p;
522 } else {
523 --p;
524 ++len;
527 rp->lno = lno;
528 rp->cno = cno + (p - startp);
530 /* The 'c', 'd' and 'y' need one more space. */
531 if (F_ISSET(vp, VC_C | VC_D | VC_Y))
532 ++rp->cno;
533 return (0);