make SET_UP_INTERRUPTS and TEAR_DOWN_INTERRUPTS macros in interrupt.h
[nvi.git] / common / search.c
blobfb907ae45f9d05480c51ffb97cb94b0aa28bb17c
1 /*-
2 * Copyright (c) 1992, 1993
3 * The Regents of the University of California. All rights reserved.
5 * %sccs.include.redist.c%
6 */
8 #ifndef lint
9 static char sccsid[] = "$Id: search.c,v 8.32 1994/01/09 17:55:45 bostic Exp $ (Berkeley) $Date: 1994/01/09 17:55:45 $";
10 #endif /* not lint */
12 #include <sys/types.h>
14 #include <ctype.h>
15 #include <errno.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <unistd.h>
20 #include "vi.h"
21 #include "interrupt.h"
23 static int check_delta __P((SCR *, EXF *, long, recno_t));
24 static int ctag_conv __P((SCR *, char **, int *));
25 static int get_delta __P((SCR *, char **, long *, u_int *));
26 static int resetup __P((SCR *, regex_t **, enum direction,
27 char *, char **, long *, u_int *));
28 static void search_intr __P((int));
31 * resetup --
32 * Set up a search for a regular expression.
34 static int
35 resetup(sp, rep, dir, ptrn, epp, deltap, flagp)
36 SCR *sp;
37 regex_t **rep;
38 enum direction dir;
39 char *ptrn, **epp;
40 long *deltap;
41 u_int *flagp;
43 u_int flags;
44 int delim, eval, re_flags, replaced;
45 char *p, *t;
47 /* Set return information the default. */
48 *deltap = 0;
51 * Use saved pattern if no pattern supplied, or if only a delimiter
52 * character is supplied. Only the pattern was saved, historic vi
53 * did not reuse any delta supplied.
55 flags = *flagp;
56 if (ptrn == NULL)
57 goto prev;
58 if (ptrn[1] == '\0') {
59 if (epp != NULL)
60 *epp = ptrn + 1;
61 goto prev;
63 if (ptrn[0] == ptrn[1] && ptrn[2] == '\0') {
64 if (epp != NULL)
65 *epp = ptrn + 2;
66 prev: if (!F_ISSET(sp, S_SRE_SET)) {
67 msgq(sp, M_ERR, "No previous search pattern.");
68 return (1);
70 *rep = &sp->sre;
72 /* Empty patterns set the direction. */
73 if (LF_ISSET(SEARCH_SET)) {
74 F_SET(sp, S_SRE_SET);
75 sp->searchdir = dir;
76 sp->sre = **rep;
78 return (0);
81 re_flags = 0; /* Set RE flags. */
82 if (O_ISSET(sp, O_EXTENDED))
83 re_flags |= REG_EXTENDED;
84 if (O_ISSET(sp, O_IGNORECASE))
85 re_flags |= REG_ICASE;
87 if (LF_ISSET(SEARCH_PARSE)) { /* Parse the string. */
88 /* Set delimiter. */
89 delim = *ptrn++;
91 /* Find terminating delimiter, handling escaped delimiters. */
92 for (p = t = ptrn;;) {
93 if (p[0] == '\0' || p[0] == delim) {
94 if (p[0] == delim)
95 ++p;
96 *t = '\0';
97 break;
99 if (p[1] == delim && p[0] == '\\')
100 ++p;
101 *t++ = *p++;
105 * If characters after the terminating delimiter, it may
106 * be an error, or may be an offset. In either case, we
107 * return the end of the string, whatever it may be.
109 if (*p) {
110 if (get_delta(sp, &p, deltap, flagp))
111 return (1);
112 if (*p && LF_ISSET(SEARCH_TERM)) {
113 msgq(sp, M_ERR,
114 "Characters after search string and/or delta.");
115 return (1);
118 if (epp != NULL)
119 *epp = p;
121 /* Check for "/ " or other such silliness. */
122 if (*ptrn == '\0')
123 goto prev;
125 if (re_conv(sp, &ptrn, &replaced))
126 return (1);
127 } else if (LF_ISSET(SEARCH_TAG)) {
128 if (ctag_conv(sp, &ptrn, &replaced))
129 return (1);
130 re_flags &= ~(REG_EXTENDED | REG_ICASE);
133 /* Compile the RE. */
134 if (eval = regcomp(*rep, ptrn, re_flags))
135 re_error(sp, eval, *rep);
136 else if (LF_ISSET(SEARCH_SET)) {
137 F_SET(sp, S_SRE_SET);
138 sp->searchdir = dir;
139 sp->sre = **rep;
142 /* Free up any extra memory. */
143 if (replaced)
144 FREE_SPACE(sp, ptrn, 0);
145 return (eval);
149 * ctag_conv --
150 * Convert a tags search path into something that the POSIX
151 * 1003.2 RE functions can handle.
153 static int
154 ctag_conv(sp, ptrnp, replacedp)
155 SCR *sp;
156 char **ptrnp;
157 int *replacedp;
159 size_t blen, len;
160 int lastdollar;
161 char *bp, *p, *t;
163 *replacedp = 0;
165 len = strlen(p = *ptrnp);
167 /* Max memory usage is 2 times the length of the string. */
168 GET_SPACE_RET(sp, bp, blen, len * 2);
170 t = bp;
172 /* The last charcter is a '/' or '?', we just strip it. */
173 if (p[len - 1] == '/' || p[len - 1] == '?')
174 p[len - 1] = '\0';
176 /* The next-to-last character is a '$', and it's magic. */
177 if (p[len - 2] == '$') {
178 lastdollar = 1;
179 p[len - 2] = '\0';
180 } else
181 lastdollar = 0;
183 /* The first character is a '/' or '?', we just strip it. */
184 if (p[0] == '/' || p[0] == '?')
185 ++p;
187 /* The second character is a '^', and it's magic. */
188 if (p[0] == '^')
189 *t++ = *p++;
192 * Escape every other magic character we can find, stripping the
193 * backslashes ctags inserts to escape the search delimiter
194 * characters.
196 while (p[0]) {
197 /* Ctags escapes the search delimiter characters. */
198 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?'))
199 ++p;
200 else if (strchr("^.[]$*", p[0]))
201 *t++ = '\\';
202 *t++ = *p++;
204 if (lastdollar)
205 *t++ = '$';
206 *t++ = '\0';
208 *ptrnp = bp;
209 *replacedp = 1;
210 return (0);
214 * search_intr --
215 * Set the interrupt bit in any screen that is interruptible.
217 * XXX
218 * In the future this may be a problem. The user should be able to move to
219 * another screen and keep typing while this runs. If so, and the user has
220 * more than one search/global (see ex/ex_global.c) running, it will be hard
221 * to decide which one to stop.
223 static void
224 search_intr(signo)
225 int signo;
227 SCR *sp;
229 for (sp = __global_list->dq.cqh_first;
230 sp != (void *)&__global_list->dq; sp = sp->q.cqe_next)
231 if (F_ISSET(sp, S_INTERRUPTIBLE))
232 F_SET(sp, S_INTERRUPTED);
235 #define EMPTYMSG "File empty; nothing to search."
236 #define EOFMSG "Reached end-of-file without finding the pattern."
237 #define NOTFOUND "Pattern not found."
238 #define SOFMSG "Reached top-of-file without finding the pattern."
239 #define WRAPMSG "Search wrapped."
242 f_search(sp, ep, fm, rm, ptrn, eptrn, flagp)
243 SCR *sp;
244 EXF *ep;
245 MARK *fm, *rm;
246 char *ptrn, **eptrn;
247 u_int *flagp;
249 DECLARE_INTERRUPTS;
250 regmatch_t match[1];
251 regex_t *re, lre;
252 recno_t lno;
253 size_t coff, len;
254 long delta;
255 u_int flags;
256 int eval, rval, wrapped;
257 char *l;
259 if (file_lline(sp, ep, &lno))
260 return (1);
261 flags = *flagp;
262 if (lno == 0) {
263 if (LF_ISSET(SEARCH_MSG))
264 msgq(sp, M_INFO, EMPTYMSG);
265 return (1);
268 re = &lre;
269 if (resetup(sp, &re, FORWARD, ptrn, eptrn, &delta, flagp))
270 return (1);
273 * Start searching immediately after the cursor. If at the end of the
274 * line, start searching on the next line. This is incompatible (read
275 * bug fix) with the historic vi -- searches for the '$' pattern never
276 * moved forward, and "-t foo" didn't work if "foo" was the first thing
277 * in the file.
279 if (LF_ISSET(SEARCH_FILE)) {
280 lno = 1;
281 coff = 0;
282 } else {
283 if ((l = file_gline(sp, ep, fm->lno, &len)) == NULL) {
284 GETLINE_ERR(sp, fm->lno);
285 return (1);
287 if (fm->cno + 1 >= len) {
288 if (fm->lno == lno) {
289 if (!O_ISSET(sp, O_WRAPSCAN)) {
290 if (LF_ISSET(SEARCH_MSG))
291 msgq(sp, M_INFO, EOFMSG);
292 return (1);
294 lno = 1;
295 } else
296 lno = fm->lno + 1;
297 coff = 0;
298 } else {
299 lno = fm->lno;
300 coff = fm->cno + 1;
305 * Set up busy message, interrupts.
307 * F_search is called from the ex_tagfirst() routine, which runs
308 * before the screen really exists. Make sure we don't step on
309 * anything.
311 if (sp->s_position != NULL)
312 busy_on(sp, 1, "Searching...");
313 SET_UP_INTERRUPTS(search_intr);
315 for (rval = 1, wrapped = 0;; ++lno, coff = 0) {
316 if (F_ISSET(sp, S_INTERRUPTED)) {
317 msgq(sp, M_INFO, "Interrupted.");
318 break;
320 if (wrapped && lno > fm->lno ||
321 (l = file_gline(sp, ep, lno, &len)) == NULL) {
322 if (wrapped) {
323 if (LF_ISSET(SEARCH_MSG))
324 msgq(sp, M_INFO, NOTFOUND);
325 break;
327 if (!O_ISSET(sp, O_WRAPSCAN)) {
328 if (LF_ISSET(SEARCH_MSG))
329 msgq(sp, M_INFO, EOFMSG);
330 break;
332 lno = 0;
333 wrapped = 1;
334 continue;
337 /* If already at EOL, just keep going. */
338 if (len && coff == len)
339 continue;
341 /* Set the termination. */
342 match[0].rm_so = coff;
343 match[0].rm_eo = len;
345 #if defined(DEBUG) && 0
346 TRACE(sp, "F search: %lu from %u to %u\n",
347 lno, coff, len ? len - 1 : len);
348 #endif
349 /* Search the line. */
350 eval = regexec(re, l, 1, match,
351 (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
352 if (eval == REG_NOMATCH)
353 continue;
354 if (eval != 0) {
355 re_error(sp, eval, re);
356 break;
359 /* Warn if wrapped. */
360 if (wrapped && O_ISSET(sp, O_WARN) && LF_ISSET(SEARCH_MSG))
361 msgq(sp, M_INFO, WRAPMSG);
364 * If an offset, see if it's legal. It's possible to match
365 * past the end of the line with $, so check for that case.
367 if (delta) {
368 if (check_delta(sp, ep, delta, lno))
369 break;
370 rm->lno = delta + lno;
371 rm->cno = 0;
372 } else {
373 #if defined(DEBUG) && 0
374 TRACE(sp, "found: %qu to %qu\n",
375 match[0].rm_so, match[0].rm_eo);
376 #endif
377 rm->lno = lno;
378 rm->cno = match[0].rm_so;
381 * If a change command, it's possible to move beyond
382 * the end of a line. Historic vi generally got this
383 * wrong (try "c?$<cr>"). Not all that sure this gets
384 * it right, there are lots of strange cases.
386 if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
387 rm->cno = len ? len - 1 : 0;
389 rval = 0;
390 break;
393 interrupt_err:
395 /* Turn off busy message, interrupts. */
396 if (sp->s_position != NULL)
397 busy_off(sp);
398 TEAR_DOWN_INTERRUPTS;
400 return (rval);
404 b_search(sp, ep, fm, rm, ptrn, eptrn, flagp)
405 SCR *sp;
406 EXF *ep;
407 MARK *fm, *rm;
408 char *ptrn, **eptrn;
409 u_int *flagp;
411 DECLARE_INTERRUPTS;
412 regmatch_t match[1];
413 regex_t *re, lre;
414 recno_t lno;
415 size_t coff, len, last;
416 long delta;
417 u_int flags;
418 int eval, rval, wrapped;
419 char *l;
421 if (file_lline(sp, ep, &lno))
422 return (1);
423 flags = *flagp;
424 if (lno == 0) {
425 if (LF_ISSET(SEARCH_MSG))
426 msgq(sp, M_INFO, EMPTYMSG);
427 return (1);
430 re = &lre;
431 if (resetup(sp, &re, BACKWARD, ptrn, eptrn, &delta, flagp))
432 return (1);
434 /* If in the first column, start searching on the previous line. */
435 if (fm->cno == 0) {
436 if (fm->lno == 1) {
437 if (!O_ISSET(sp, O_WRAPSCAN)) {
438 if (LF_ISSET(SEARCH_MSG))
439 msgq(sp, M_INFO, SOFMSG);
440 return (1);
442 } else
443 lno = fm->lno - 1;
444 } else
445 lno = fm->lno;
447 /* Turn on busy message, interrupts. */
448 busy_on(sp, 1, "Searching...");
450 if (F_ISSET(sp->gp, G_ISFROMTTY))
451 SET_UP_INTERRUPTS(search_intr);
453 for (rval = 1, wrapped = 0, coff = fm->cno;; --lno, coff = 0) {
454 if (F_ISSET(sp, S_INTERRUPTED)) {
455 msgq(sp, M_INFO, "Interrupted.");
456 break;
458 if (wrapped && lno < fm->lno || lno == 0) {
459 if (wrapped) {
460 if (LF_ISSET(SEARCH_MSG))
461 msgq(sp, M_INFO, NOTFOUND);
462 break;
464 if (!O_ISSET(sp, O_WRAPSCAN)) {
465 if (LF_ISSET(SEARCH_MSG))
466 msgq(sp, M_INFO, SOFMSG);
467 break;
469 if (file_lline(sp, ep, &lno))
470 goto err;
471 if (lno == 0) {
472 if (LF_ISSET(SEARCH_MSG))
473 msgq(sp, M_INFO, EMPTYMSG);
474 break;
476 ++lno;
477 wrapped = 1;
478 continue;
481 if ((l = file_gline(sp, ep, lno, &len)) == NULL)
482 goto err;
484 /* Set the termination. */
485 match[0].rm_so = 0;
486 match[0].rm_eo = coff ? coff : len;
488 #if defined(DEBUG) && 0
489 TRACE(sp, "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
490 #endif
491 /* Search the line. */
492 eval = regexec(re, l, 1, match,
493 (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
494 if (eval == REG_NOMATCH)
495 continue;
496 if (eval != 0) {
497 re_error(sp, eval, re);
498 break;
501 /* Warn if wrapped. */
502 if (wrapped && O_ISSET(sp, O_WARN) && LF_ISSET(SEARCH_MSG))
503 msgq(sp, M_INFO, WRAPMSG);
505 if (delta) {
506 if (check_delta(sp, ep, delta, lno))
507 break;
508 rm->lno = delta + lno;
509 rm->cno = 0;
510 } else {
511 #if defined(DEBUG) && 0
512 TRACE(sp, "found: %qu to %qu\n",
513 match[0].rm_so, match[0].rm_eo);
514 #endif
516 * Find the last acceptable one in this line. This
517 * is really painful, we need a cleaner interface to
518 * regexec to make this possible.
520 for (;;) {
521 last = match[0].rm_so;
522 match[0].rm_so = match[0].rm_eo + 1;
523 if (match[0].rm_so >= len ||
524 coff && match[0].rm_so >= coff)
525 break;
526 match[0].rm_eo = coff ? coff : len;
527 eval = regexec(re, l, 1, match,
528 (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
529 REG_STARTEND);
530 if (eval == REG_NOMATCH)
531 break;
532 if (eval != 0) {
533 re_error(sp, eval, re);
534 goto err;
537 rm->lno = lno;
539 /* See comment in f_search(). */
540 if (!LF_ISSET(SEARCH_EOL) && last >= len)
541 rm->cno = len ? len - 1 : 0;
542 else
543 rm->cno = last;
545 rval = 0;
546 break;
549 /* Turn off busy message, interrupts. */
550 interrupt_err:
551 err: busy_off(sp);
553 if (F_ISSET(sp->gp, G_ISFROMTTY))
554 TEAR_DOWN_INTERRUPTS;
556 return (rval);
560 * re_conv --
561 * Convert vi's regular expressions into something that the
562 * the POSIX 1003.2 RE functions can handle.
564 * There are three conversions we make to make vi's RE's (specifically
565 * the global, search, and substitute patterns) work with POSIX RE's.
567 * 1: If O_MAGIC is not set, strip backslashes from the magic character
568 * set (.[]*~) that have them, and add them to the ones that don't.
569 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
570 * from the last substitute command's replacement string. If O_MAGIC
571 * is set, it's the string "~".
572 * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
573 * new RE escapes.
576 re_conv(sp, ptrnp, replacedp)
577 SCR *sp;
578 char **ptrnp;
579 int *replacedp;
581 size_t blen, needlen;
582 int magic;
583 char *bp, *p, *t;
586 * First pass through, we figure out how much space we'll need.
587 * We do it in two passes, on the grounds that most of the time
588 * the user is doing a search and won't have magic characters.
589 * That way we can skip the malloc and memmove's.
591 for (p = *ptrnp, magic = 0, needlen = 0; *p != '\0'; ++p)
592 switch (*p) {
593 case '\\':
594 switch (*++p) {
595 case '<':
596 magic = 1;
597 needlen += sizeof(RE_WSTART);
598 break;
599 case '>':
600 magic = 1;
601 needlen += sizeof(RE_WSTOP);
602 break;
603 case '~':
604 if (!O_ISSET(sp, O_MAGIC)) {
605 magic = 1;
606 needlen += sp->repl_len;
608 break;
609 case '.':
610 case '[':
611 case ']':
612 case '*':
613 if (!O_ISSET(sp, O_MAGIC)) {
614 magic = 1;
615 needlen += 1;
617 break;
618 default:
619 needlen += 2;
621 break;
622 case '~':
623 if (O_ISSET(sp, O_MAGIC)) {
624 magic = 1;
625 needlen += sp->repl_len;
627 break;
628 case '.':
629 case '[':
630 case ']':
631 case '*':
632 if (!O_ISSET(sp, O_MAGIC)) {
633 magic = 1;
634 needlen += 2;
636 break;
637 default:
638 needlen += 1;
639 break;
642 if (!magic) {
643 *replacedp = 0;
644 return (0);
648 * Get enough memory to hold the final pattern.
650 * XXX
651 * It's nul-terminated, for now.
653 GET_SPACE_RET(sp, bp, blen, needlen + 1);
655 for (p = *ptrnp, t = bp; *p != '\0'; ++p)
656 switch (*p) {
657 case '\\':
658 switch (*++p) {
659 case '<':
660 memmove(t, RE_WSTART, sizeof(RE_WSTART) - 1);
661 t += sizeof(RE_WSTART) - 1;
662 break;
663 case '>':
664 memmove(t, RE_WSTOP, sizeof(RE_WSTOP) - 1);
665 t += sizeof(RE_WSTOP) - 1;
666 break;
667 case '~':
668 if (O_ISSET(sp, O_MAGIC))
669 *t++ = '~';
670 else {
671 memmove(t, sp->repl, sp->repl_len);
672 t += sp->repl_len;
674 break;
675 case '.':
676 case '[':
677 case ']':
678 case '*':
679 if (O_ISSET(sp, O_MAGIC))
680 *t++ = '\\';
681 *t++ = *p;
682 break;
683 default:
684 *t++ = '\\';
685 *t++ = *p;
687 break;
688 case '~':
689 if (O_ISSET(sp, O_MAGIC)) {
690 memmove(t, sp->repl, sp->repl_len);
691 t += sp->repl_len;
692 } else
693 *t++ = '~';
694 break;
695 case '.':
696 case '[':
697 case ']':
698 case '*':
699 if (!O_ISSET(sp, O_MAGIC))
700 *t++ = '\\';
701 *t++ = *p;
702 break;
703 default:
704 *t++ = *p;
705 break;
707 *t = '\0';
709 *ptrnp = bp;
710 *replacedp = 1;
711 return (0);
715 * get_delta --
716 * Get a line delta. The trickiness is that the delta can be pretty
717 * complicated, i.e. "+3-2+3++- ++" is allowed.
719 * !!!
720 * In historic vi, if you had a delta on a search pattern which was used as
721 * a motion command, the command became a line mode command regardless of the
722 * cursor positions. A fairly common trick is to use a delta of "+0" to make
723 * the command a line mode command. This is the only place that knows about
724 * delta's, so we set the return flag information here.
726 static int
727 get_delta(sp, dp, valp, flagp)
728 SCR *sp;
729 char **dp;
730 long *valp;
731 u_int *flagp;
733 char *p;
734 long val, tval;
736 for (tval = 0, p = *dp; *p != '\0'; *flagp |= SEARCH_DELTA) {
737 if (isblank(*p)) {
738 ++p;
739 continue;
741 if (*p == '+' || *p == '-') {
742 if (!isdigit(*(p + 1))) {
743 if (*p == '+') {
744 if (tval == LONG_MAX)
745 goto overflow;
746 ++tval;
747 } else {
748 if (tval == LONG_MIN)
749 goto underflow;
750 --tval;
752 ++p;
753 continue;
755 } else
756 if (!isdigit(*p))
757 break;
759 errno = 0;
760 val = strtol(p, &p, 10);
761 if (errno == ERANGE) {
762 if (val == LONG_MAX)
763 overflow: msgq(sp, M_ERR, "Delta value overflow.");
764 else if (val == LONG_MIN)
765 underflow: msgq(sp, M_ERR, "Delta value underflow.");
766 else
767 msgq(sp, M_SYSERR, NULL);
768 return (1);
770 if (val >= 0) {
771 if (LONG_MAX - val < tval)
772 goto overflow;
773 } else
774 if (-(LONG_MIN - tval) > val)
775 goto underflow;
776 tval += val;
778 *dp = p;
779 *valp = tval;
780 return (0);
784 * check_delta --
785 * Check a line delta to see if it's legal.
787 static int
788 check_delta(sp, ep, delta, lno)
789 SCR *sp;
790 EXF *ep;
791 long delta;
792 recno_t lno;
794 /* A delta can overflow a record number. */
795 if (delta < 0) {
796 if (lno < LONG_MAX && delta >= (long)lno) {
797 msgq(sp, M_ERR, "Search offset before line 1.");
798 return (1);
800 } else {
801 if (ULONG_MAX - lno < delta) {
802 msgq(sp, M_ERR, "Delta value overflow.");
803 return (1);
805 if (file_gline(sp, ep, lno + delta, NULL) == NULL) {
806 msgq(sp, M_ERR, "Search offset past end-of-file.");
807 return (1);
810 return (0);
814 * re_error --
815 * Report a regular expression error.
817 void
818 re_error(sp, errcode, preg)
819 SCR *sp;
820 int errcode;
821 regex_t *preg;
823 size_t s;
824 char *oe;
826 s = regerror(errcode, preg, "", 0);
827 if ((oe = malloc(s)) == NULL)
828 msgq(sp, M_SYSERR, NULL);
829 else {
830 (void)regerror(errcode, preg, oe, s);
831 msgq(sp, M_ERR, "RE error: %s", oe);
833 free(oe);