2 * Copyright (C) 1984-2002 Mark Nudelman
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
7 * For more information about less, or for information on how to
8 * contact the author, see the README file.
13 * Routines to search a file for a pattern.
19 #define MINPOS(a,b) (((a) < (b)) ? (a) : (b))
20 #define MAXPOS(a,b) (((a) > (b)) ? (a) : (b))
22 #if HAVE_POSIX_REGCOMP
25 #define REGCOMP_FLAG REG_EXTENDED
27 #define REGCOMP_FLAG 0
49 extern int how_search
;
53 extern int jump_sline
;
56 extern int status_col
;
57 extern POSITION start_attnpos
;
58 extern POSITION end_attnpos
;
60 extern int hilite_search
;
61 extern int screen_trashed
;
62 extern int size_linebuf
;
64 extern int can_goto_line
;
65 static int hide_hilite
;
66 static POSITION prep_startpos
;
67 static POSITION prep_endpos
;
71 struct hilite
*hl_next
;
75 static struct hilite hilite_anchor
= { NULL
, NULL_POSITION
, NULL_POSITION
};
76 #define hl_first hl_next
80 * These are the static variables that represent the "remembered"
83 #if HAVE_POSIX_REGCOMP
84 static regex_t
*regpattern
= NULL
;
87 pcre
*regpattern
= NULL
;
93 static char *cpattern
= NULL
;
96 static struct regexp
*regpattern
= NULL
;
99 static int is_caseless
;
100 static int is_ucase_pattern
;
101 static int last_search_type
;
102 static char *last_pattern
= NULL
;
105 * Convert text. Perform one or more of these transformations:
107 #define CVT_TO_LC 01 /* Convert upper-case to lower-case */
108 #define CVT_BS 02 /* Do backspace processing */
109 #define CVT_CRLF 04 /* Remove CR after LF */
110 #define CVT_ANSI 010 /* Remove ANSI escape sequences */
113 cvt_text(odst
, osrc
, ops
)
121 for (src
= osrc
, dst
= odst
; *src
!= '\0'; src
++)
123 if ((ops
& CVT_TO_LC
) && isupper((unsigned char) *src
))
124 /* Convert uppercase to lowercase. */
125 *dst
++ = tolower((unsigned char) *src
);
126 else if ((ops
& CVT_BS
) && *src
== '\b' && dst
> odst
)
127 /* Delete BS and preceding char. */
129 else if ((ops
& CVT_ANSI
) && *src
== ESC
)
131 /* Skip to end of ANSI escape sequence. */
132 while (src
[1] != '\0')
133 if (is_ansi_end(*++src
))
139 if ((ops
& CVT_CRLF
) && dst
> odst
&& dst
[-1] == '\r')
145 * Determine which conversions to perform.
151 if (is_caseless
|| bs_mode
== BS_SPECIAL
)
155 if (bs_mode
== BS_SPECIAL
)
157 if (bs_mode
!= BS_CONTROL
)
159 } else if (bs_mode
!= BS_CONTROL
)
163 if (ctldisp
== OPT_ONPLUS
)
169 * Are there any uppercase letters in this string?
177 for (p
= s
; *p
!= '\0'; p
++)
178 if (isupper((unsigned char) *p
))
184 * Is there a previous (remembered) search pattern?
189 if (last_search_type
& SRCH_NO_REGEX
)
190 return (last_pattern
!= NULL
);
191 #if HAVE_POSIX_REGCOMP
192 return (regpattern
!= NULL
);
195 return (regpattern
!= NULL
);
198 return (re_pattern
!= 0);
201 return (cpattern
!= NULL
);
204 return (regpattern
!= NULL
);
207 return (last_pattern
!= NULL
);
213 * Repaint the hilites currently displayed on the screen.
214 * Repaint each line which contains highlighted text.
215 * If on==0, force all hilites off.
224 int save_hide_hilite
;
229 save_hide_hilite
= hide_hilite
;
240 hide_hilite
= save_hide_hilite
;
244 for (slinenum
= TOP
; slinenum
< TOP
+ sc_height
-1; slinenum
++)
246 pos
= position(slinenum
);
247 if (pos
== NULL_POSITION
)
249 epos
= position(slinenum
+1);
251 * If any character in the line is highlighted,
254 if (is_hilited(pos
, epos
, 1))
256 (void) forw_line(pos
);
261 hide_hilite
= save_hide_hilite
;
265 * Clear the attn hilite.
271 POSITION old_start_attnpos
;
272 POSITION old_end_attnpos
;
276 if (start_attnpos
== NULL_POSITION
)
278 old_start_attnpos
= start_attnpos
;
279 old_end_attnpos
= end_attnpos
;
280 start_attnpos
= end_attnpos
= NULL_POSITION
;
290 for (slinenum
= TOP
; slinenum
< TOP
+ sc_height
-1; slinenum
++)
292 pos
= position(slinenum
);
293 if (pos
== NULL_POSITION
)
295 epos
= position(slinenum
+1);
296 if (pos
< old_end_attnpos
&&
297 (epos
== NULL_POSITION
|| epos
> old_start_attnpos
))
299 (void) forw_line(pos
);
308 * Hide search string highlighting.
315 error("No previous regular expression", NULL_PARG
);
319 hide_hilite
= !hide_hilite
;
325 * Compile a search pattern, for future use by match_pattern.
328 compile_pattern(pattern
, search_type
)
332 if ((search_type
& SRCH_NO_REGEX
) == 0)
334 #if HAVE_POSIX_REGCOMP
335 regex_t
*s
= (regex_t
*) ecalloc(1, sizeof(regex_t
));
336 if (regcomp(s
, pattern
, REGCOMP_FLAG
))
339 error("Invalid pattern", NULL_PARG
);
342 if (regpattern
!= NULL
)
348 const char *errstring
;
351 comp
= pcre_compile(pattern
, 0,
352 &errstring
, &erroffset
, NULL
);
355 parg
.p_string
= (char *) errstring
;
363 if ((parg
.p_string
= re_comp(pattern
)) != NULL
)
372 if ((s
= regcmp(pattern
, 0)) == NULL
)
374 error("Invalid pattern", NULL_PARG
);
377 if (cpattern
!= NULL
)
383 if ((s
= regcomp(pattern
)) == NULL
)
386 * regcomp has already printed an error message
391 if (regpattern
!= NULL
)
397 if (last_pattern
!= NULL
)
399 last_pattern
= (char *) calloc(1, strlen(pattern
)+1);
400 if (last_pattern
!= NULL
)
401 strcpy(last_pattern
, pattern
);
403 last_search_type
= search_type
;
408 * Forget that we have a compiled pattern.
413 #if HAVE_POSIX_REGCOMP
414 if (regpattern
!= NULL
)
419 if (regpattern
!= NULL
)
420 pcre_free(regpattern
);
427 if (cpattern
!= NULL
)
432 if (regpattern
!= NULL
)
440 * Perform a pattern match with the previously compiled pattern.
441 * Set sp and ep to the start and end of the matched string.
444 match_pattern(line
, sp
, ep
, notbol
)
452 if (last_search_type
& SRCH_NO_REGEX
)
453 return (match(last_pattern
, line
, sp
, ep
));
455 #if HAVE_POSIX_REGCOMP
458 int flags
= (notbol
) ? REG_NOTBOL
: 0;
459 matched
= !regexec(regpattern
, line
, 1, &rm
, flags
);
463 *sp
= line
+ rm
.rm_so
;
464 *ep
= line
+ rm
.rm_eo
;
473 int flags
= (notbol
) ? PCRE_NOTBOL
: 0;
475 matched
= pcre_exec(regpattern
, NULL
, line
, strlen(line
),
476 0, flags
, ovector
, 3) >= 0;
479 *sp
= line
+ ovector
[0];
480 *ep
= line
+ ovector
[1];
484 matched
= (re_exec(line
) == 1);
486 * re_exec doesn't seem to provide a way to get the matched string.
491 *ep
= regex(cpattern
, line
);
492 matched
= (*ep
!= NULL
);
499 matched
= regexec2(regpattern
, line
, notbol
);
501 matched
= regexec(regpattern
, line
);
505 *sp
= regpattern
->startp
[0];
506 *ep
= regpattern
->endp
[0];
509 matched
= match(last_pattern
, line
, sp
, ep
);
516 * Clear the hilite list.
522 struct hilite
*nexthl
;
524 for (hl
= hilite_anchor
.hl_first
; hl
!= NULL
; hl
= nexthl
)
526 nexthl
= hl
->hl_next
;
529 hilite_anchor
.hl_first
= NULL
;
530 prep_startpos
= prep_endpos
= NULL_POSITION
;
534 * Should any characters in a specified range be highlighted?
535 * If nohide is nonzero, don't consider hide_hilite.
538 is_hilited(pos
, epos
, nohide
)
546 start_attnpos
!= NULL_POSITION
&&
548 (epos
== NULL_POSITION
|| epos
> start_attnpos
))
550 * The attn line overlaps this range.
554 if (hilite_search
== 0)
556 * Not doing highlighting.
560 if (!nohide
&& hide_hilite
)
562 * Highlighting is hidden.
567 * Look at each highlight and see if any part of it falls in the range.
569 for (hl
= hilite_anchor
.hl_first
; hl
!= NULL
; hl
= hl
->hl_next
)
571 if (hl
->hl_endpos
> pos
&&
572 (epos
== NULL_POSITION
|| epos
> hl
->hl_startpos
))
579 * Add a new hilite to a hilite list.
582 add_hilite(anchor
, hl
)
583 struct hilite
*anchor
;
589 * Hilites are sorted in the list; find where new one belongs.
590 * Insert new one after ihl.
592 for (ihl
= anchor
; ihl
->hl_next
!= NULL
; ihl
= ihl
->hl_next
)
594 if (ihl
->hl_next
->hl_startpos
> hl
->hl_startpos
)
599 * Truncate hilite so it doesn't overlap any existing ones
600 * above and below it.
603 hl
->hl_startpos
= MAXPOS(hl
->hl_startpos
, ihl
->hl_endpos
);
604 if (ihl
->hl_next
!= NULL
)
605 hl
->hl_endpos
= MINPOS(hl
->hl_endpos
, ihl
->hl_next
->hl_startpos
);
606 if (hl
->hl_startpos
>= hl
->hl_endpos
)
609 * Hilite was truncated out of existence.
614 hl
->hl_next
= ihl
->hl_next
;
619 * Adjust hl_startpos & hl_endpos to account for backspace processing.
622 adj_hilite(anchor
, linepos
, cvt_ops
)
623 struct hilite
*anchor
;
634 * The line was already scanned and hilites were added (in hilite_line).
635 * But it was assumed that each char position in the line
636 * correponds to one char position in the file.
637 * This may not be true if there are backspaces in the line.
638 * Get the raw line again. Look at each character.
640 (void) forw_raw_line(linepos
, &line
);
641 opos
= npos
= linepos
;
642 hl
= anchor
->hl_first
;
647 * See if we need to adjust the current hl_startpos or
648 * hl_endpos. After adjusting startpos[i], move to endpos[i].
649 * After adjusting endpos[i], move to startpos[i+1].
650 * The hilite list must be sorted thus:
651 * startpos[0] < endpos[0] <= startpos[1] < endpos[1] <= etc.
653 if (checkstart
&& hl
->hl_startpos
== opos
)
655 hl
->hl_startpos
= npos
;
657 continue; /* {{ not really necessary }} */
658 } else if (!checkstart
&& hl
->hl_endpos
== opos
)
660 hl
->hl_endpos
= npos
;
663 continue; /* {{ necessary }} */
667 if (cvt_ops
& CVT_ANSI
)
669 while (line
[0] == ESC
)
672 * Found an ESC. The file position moves
673 * forward past the entire ANSI escape sequence.
677 while (*line
!= '\0')
680 if (is_ansi_end(*line
++))
688 if (cvt_ops
& CVT_BS
)
690 while (line
[0] == '\b' && line
[1] != '\0')
693 * Found a backspace. The file position moves
694 * forward by 2 relative to the processed line
695 * which was searched in hilite_line.
705 * Make a hilite for each string in a physical line which matches
706 * the current pattern.
707 * sp,ep delimit the first match already found.
710 hilite_line(linepos
, line
, sp
, ep
, cvt_ops
)
719 struct hilite hilites
;
721 if (sp
== NULL
|| ep
== NULL
)
724 * sp and ep delimit the first match in the line.
725 * Mark the corresponding file positions, then
726 * look for further matches and mark them.
727 * {{ This technique, of calling match_pattern on subsequent
728 * substrings of the line, may mark more than is correct
729 * if the pattern starts with "^". This bug is fixed
730 * for those regex functions that accept a notbol parameter
731 * (currently POSIX and V8-with-regexec2). }}
735 * Put the hilites into a temporary list until they're adjusted.
737 hilites
.hl_first
= NULL
;
742 * Assume that each char position in the "line"
743 * buffer corresponds to one char position in the file.
744 * This is not quite true; we need to adjust later.
746 hl
= (struct hilite
*) ecalloc(1, sizeof(struct hilite
));
747 hl
->hl_startpos
= linepos
+ (sp
-line
);
748 hl
->hl_endpos
= linepos
+ (ep
-line
);
749 add_hilite(&hilites
, hl
);
752 * If we matched more than zero characters,
753 * move to the first char after the string we matched.
754 * If we matched zero, just move to the next char.
758 else if (*searchp
!= '\0')
760 else /* end of line */
762 } while (match_pattern(searchp
, &sp
, &ep
, 1));
765 * If there were backspaces in the original line, they
766 * were removed, and hl_startpos/hl_endpos are not correct.
767 * {{ This is very ugly. }}
769 adj_hilite(&hilites
, linepos
, cvt_ops
);
772 * Now put the hilites into the real list.
774 while ((hl
= hilites
.hl_next
) != NULL
)
776 hilites
.hl_next
= hl
->hl_next
;
777 add_hilite(&hilite_anchor
, hl
);
783 * Change the caseless-ness of searches.
784 * Updates the internal search state to reflect a change in the -i flag.
789 if (!is_ucase_pattern
)
791 * Pattern did not have uppercase.
792 * Just set the search caselessness to the global caselessness.
794 is_caseless
= caseless
;
797 * Pattern did have uppercase.
798 * Discard the pattern; we can't change search caselessness now.
805 * Find matching text which is currently on screen and highlight it.
810 struct scrpos scrpos
;
813 if (scrpos
.pos
== NULL_POSITION
)
815 prep_hilite(scrpos
.pos
, position(BOTTOM_PLUS_ONE
), -1);
820 * Change highlighting parameters.
826 * Erase any highlights currently on screen.
831 if (hilite_search
== OPT_ONPLUS
)
833 * Display highlights.
840 * Figure out where to start a search.
843 search_pos(search_type
)
852 * Start at the beginning (or end) of the file.
853 * The empty_screen() case is mainly for
854 * command line initiated searches;
855 * for example, "+/xyz" on the command line.
856 * Also for multi-file (SRCH_PAST_EOF) searches.
858 if (search_type
& SRCH_FORW
)
864 if (pos
== NULL_POSITION
)
866 (void) ch_end_seek();
875 * Search does not include current screen.
877 if (search_type
& SRCH_FORW
)
878 linenum
= BOTTOM_PLUS_ONE
;
881 pos
= position(linenum
);
885 * Search includes current screen.
886 * It starts at the jump target (if searching backwards),
887 * or at the jump target plus one (if forwards).
889 linenum
= adjsline(jump_sline
);
890 pos
= position(linenum
);
891 if (search_type
& SRCH_FORW
)
893 pos
= forw_raw_line(pos
, (char **)NULL
);
894 while (pos
== NULL_POSITION
)
896 if (++linenum
>= sc_height
)
898 pos
= position(linenum
);
902 while (pos
== NULL_POSITION
)
906 pos
= position(linenum
);
914 * Search a subset of the file, specified by start/end position.
917 search_range(pos
, endpos
, search_type
, matches
, maxlines
, plinepos
, pendpos
)
931 POSITION linepos
, oldpos
;
933 linenum
= find_linenum(pos
);
938 * Get lines until we find a matching one or until
939 * we hit end-of-file (or beginning-of-file if we're
940 * going backwards), or until we hit the end position.
945 * A signal aborts the search.
950 if ((endpos
!= NULL_POSITION
&& pos
>= endpos
) || maxlines
== 0)
953 * Reached end position without a match.
962 if (search_type
& SRCH_FORW
)
965 * Read the next line, and save the
966 * starting position of that line in linepos.
969 pos
= forw_raw_line(pos
, &line
);
975 * Read the previous line and save the
976 * starting position of that line in linepos.
978 pos
= back_raw_line(pos
, &line
);
984 if (pos
== NULL_POSITION
)
987 * Reached EOF/BOF without a match.
995 * If we're using line numbers, we might as well
996 * remember the information we have now (the position
997 * and line number of the current line).
998 * Don't do it for every line because it slows down
999 * the search. Remember the line number only if
1000 * we're "far" from the last place we remembered it.
1002 if (linenums
&& abs((int)(pos
- oldpos
)) > 1024)
1003 add_lnum(linenum
, pos
);
1007 * If it's a caseless search, convert the line to lowercase.
1008 * If we're doing backspace processing, delete backspaces.
1010 cvt_ops
= get_cvt_ops();
1011 cvt_text(line
, line
, cvt_ops
);
1014 * Test the next line to see if we have a match.
1015 * We are successful if we either want a match and got one,
1016 * or if we want a non-match and got one.
1018 line_match
= match_pattern(line
, &sp
, &ep
, 0);
1019 line_match
= (!(search_type
& SRCH_NO_MATCH
) && line_match
) ||
1020 ((search_type
& SRCH_NO_MATCH
) && !line_match
);
1026 if (search_type
& SRCH_FIND_ALL
)
1030 * We are supposed to find all matches in the range.
1031 * Just add the matches in this line to the
1032 * hilite list and keep searching.
1035 hilite_line(linepos
, line
, sp
, ep
, cvt_ops
);
1037 } else if (--matches
<= 0)
1040 * Found the one match we're looking for.
1044 if (hilite_search
== 1)
1047 * Clear the hilite list and add only
1048 * the matches in this one line.
1052 hilite_line(linepos
, line
, sp
, ep
, cvt_ops
);
1055 if (plinepos
!= NULL
)
1056 *plinepos
= linepos
;
1063 * Search for the n-th occurrence of a specified pattern,
1064 * either forward or backward.
1065 * Return the number of matches not yet found in this file
1066 * (that is, n minus the number of matches found).
1067 * Return -1 if the search should be aborted.
1068 * Caller may continue the search in another file
1069 * if less than n matches are found in this file.
1072 search(search_type
, pattern
, n
)
1080 if (pattern
== NULL
|| *pattern
== '\0')
1083 * A null pattern means use the previously compiled pattern.
1085 if (!prev_pattern())
1087 error("No previous regular expression", NULL_PARG
);
1090 if ((search_type
& SRCH_NO_REGEX
) !=
1091 (last_search_type
& SRCH_NO_REGEX
))
1093 error("Please re-enter search pattern", NULL_PARG
);
1097 if (hilite_search
== OPT_ON
)
1100 * Erase the highlights currently on screen.
1101 * If the search fails, we'll redisplay them later.
1105 if (hilite_search
== OPT_ONPLUS
&& hide_hilite
)
1108 * Highlight any matches currently on screen,
1109 * before we actually start the search.
1119 * Compile the pattern.
1121 ucase
= is_ucase(pattern
);
1122 if (caseless
== OPT_ONPLUS
)
1123 cvt_text(pattern
, pattern
, CVT_TO_LC
);
1124 if (compile_pattern(pattern
, search_type
) < 0)
1127 * Ignore case if -I is set OR
1128 * -i is set AND the pattern is all lowercase.
1130 is_ucase_pattern
= ucase
;
1131 if (is_ucase_pattern
&& caseless
!= OPT_ONPLUS
)
1134 is_caseless
= caseless
;
1139 * Erase the highlights currently on screen.
1140 * Also permanently delete them from the hilite list.
1146 if (hilite_search
== OPT_ONPLUS
)
1149 * Highlight any matches currently on screen,
1150 * before we actually start the search.
1158 * Figure out where to start the search.
1160 pos
= search_pos(search_type
);
1161 if (pos
== NULL_POSITION
)
1164 * Can't find anyplace to start searching from.
1166 if (search_type
& SRCH_PAST_EOF
)
1168 /* repaint(); -- why was this here? */
1169 error("Nothing to search", NULL_PARG
);
1173 n
= search_range(pos
, NULL_POSITION
, search_type
, n
, -1,
1174 &pos
, (POSITION
*)NULL
);
1178 * Search was unsuccessful.
1181 if (hilite_search
== OPT_ON
&& n
> 0)
1183 * Redisplay old hilites.
1190 if (!(search_type
& SRCH_NO_MOVE
))
1193 * Go to the matching line.
1195 jump_loc(pos
, jump_sline
);
1199 if (hilite_search
== OPT_ON
)
1201 * Display new hilites in the matching line.
1211 * Prepare hilites in a given range of the file.
1213 * The pair (prep_startpos,prep_endpos) delimits a contiguous region
1214 * of the file that has been "prepared"; that is, scanned for matches for
1215 * the current search pattern, and hilites have been created for such matches.
1216 * If prep_startpos == NULL_POSITION, the prep region is empty.
1217 * If prep_endpos == NULL_POSITION, the prep region extends to EOF.
1218 * prep_hilite asks that the range (spos,epos) be covered by the prep region.
1221 prep_hilite(spos
, epos
, maxlines
)
1226 POSITION nprep_startpos
= prep_startpos
;
1227 POSITION nprep_endpos
= prep_endpos
;
1233 * Search beyond where we're asked to search, so the prep region covers
1234 * more than we need. Do one big search instead of a bunch of small ones.
1236 #define SEARCH_MORE (3*size_linebuf)
1238 if (!prev_pattern())
1242 * If we're limited to a max number of lines, figure out the
1243 * file position we should stop at.
1246 max_epos
= NULL_POSITION
;
1250 for (i
= 0; i
< maxlines
; i
++)
1251 max_epos
= forw_raw_line(max_epos
, (char **)NULL
);
1256 * The range that we need to search (spos,epos); and the range that
1257 * the "prep" region will then cover (nprep_startpos,nprep_endpos).
1260 if (prep_startpos
== NULL_POSITION
||
1261 (epos
!= NULL_POSITION
&& epos
< prep_startpos
) ||
1265 * New range is not contiguous with old prep region.
1266 * Discard the old prep region and start a new one.
1269 if (epos
!= NULL_POSITION
)
1270 epos
+= SEARCH_MORE
;
1271 nprep_startpos
= spos
;
1275 * New range partially or completely overlaps old prep region.
1277 if (epos
== NULL_POSITION
)
1280 * New range goes to end of file.
1283 } else if (epos
> prep_endpos
)
1286 * New range ends after old prep region.
1287 * Extend prep region to end at end of new range.
1289 epos
+= SEARCH_MORE
;
1290 } else /* (epos <= prep_endpos) */
1293 * New range ends within old prep region.
1294 * Truncate search to end at start of old prep region.
1296 epos
= prep_startpos
;
1299 if (spos
< prep_startpos
)
1302 * New range starts before old prep region.
1303 * Extend old prep region backwards to start at
1304 * start of new range.
1306 if (spos
< SEARCH_MORE
)
1309 spos
-= SEARCH_MORE
;
1310 nprep_startpos
= spos
;
1311 } else /* (spos >= prep_startpos) */
1314 * New range starts within or after old prep region.
1315 * Trim search to start at end of old prep region.
1321 if (epos
!= NULL_POSITION
&& max_epos
!= NULL_POSITION
&&
1324 * Don't go past the max position we're allowed.
1328 if (epos
== NULL_POSITION
|| epos
> spos
)
1330 result
= search_range(spos
, epos
, SRCH_FORW
|SRCH_FIND_ALL
, 0,
1331 maxlines
, (POSITION
*)NULL
, &new_epos
);
1334 if (prep_endpos
== NULL_POSITION
|| new_epos
> prep_endpos
)
1335 nprep_endpos
= new_epos
;
1337 prep_startpos
= nprep_startpos
;
1338 prep_endpos
= nprep_endpos
;
1343 * Simple pattern matching function.
1344 * It supports no metacharacters like *, etc.
1347 match(pattern
, buf
, pfound
, pend
)
1348 char *pattern
, *buf
;
1349 char **pfound
, **pend
;
1351 register char *pp
, *lp
;
1353 for ( ; *buf
!= '\0'; buf
++)
1355 for (pp
= pattern
, lp
= buf
; *pp
== *lp
; pp
++, lp
++)
1356 if (*pp
== '\0' || *lp
== '\0')
1372 * This function is called by the V8 regcomp to report
1373 * errors in regular expressions.