1 /* kwsearch.c - searching subroutines using kwset for grep.
2 Copyright 1992, 1998, 2000, 2007, 2009-2015 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
19 /* Written August 1992 by Mike Haertel. */
24 /* Whether -w considers WC to be a word constituent. */
28 return wc
== L
'_' || iswalnum (wc
);
31 /* KWset compiled pattern. For Ecompile and Gcompile, we compile
32 a list of strings, at least one of which is known to occur in
33 any string matching the regexp. */
37 Fcompile (char const *pattern
, size_t size
)
43 char const *p
= pattern
;
47 char const *sep
= memchr (p
, '\n', total
);
63 buf
= xmalloc (len
+ 2);
65 memcpy (buf
+ 1, p
, len
);
66 buf
[len
+ 1] = eolbyte
;
70 kwsincr (kwset
, p
, len
);
81 Fexecute (char const *buf
, size_t size
, size_t *match_size
,
82 char const *start_ptr
)
84 char const *beg
, *try, *end
, *mb_start
;
87 struct kwsmatch kwsmatch
;
90 for (mb_start
= beg
= start_ptr
? start_ptr
: buf
; beg
<= buf
+ size
; beg
++)
92 size_t offset
= kwsexec (kwset
, beg
- match_lines
,
93 buf
+ size
- beg
+ match_lines
, &kwsmatch
);
94 if (offset
== (size_t) -1)
96 len
= kwsmatch
.size
[0] - 2 * match_lines
;
97 if (!match_lines
&& MB_CUR_MAX
> 1 && !using_utf8 ()
98 && mb_goback (&mb_start
, beg
+ offset
, buf
+ size
) != 0)
100 /* We have matched a single byte that is not at the beginning of a
101 multibyte character. mb_goback has advanced MB_START past that
102 multibyte character. Now, we want to position BEG so that the
103 next kwsexec search starts there. Thus, to compensate for the
104 for-loop's BEG++, above, subtract one here. This code is
105 unusually hard to reach, and exceptionally, let's show how to
108 printf '\203AA\n'|LC_ALL=ja_JP.SHIFT_JIS src/grep -F A
110 That assumes the named locale is installed.
111 Note that your system's shift-JIS locale may have a different
112 name, possibly including "sjis". */
117 if (start_ptr
&& !match_words
)
118 goto success_in_beg_and_len
;
121 len
+= start_ptr
== NULL
;
122 goto success_in_beg_and_len
;
127 char const *bol
= memrchr (buf
, eol
, beg
- buf
);
128 bol
= bol
? bol
+ 1 : buf
;
129 if (wordchar (mb_prev_wc (bol
, try, buf
+ size
)))
131 if (wordchar (mb_next_wc (try + len
, buf
+ size
)))
135 offset
= kwsexec (kwset
, beg
, --len
, &kwsmatch
);
136 if (offset
== (size_t) -1)
139 len
= kwsmatch
.size
[0];
144 goto success_in_beg_and_len
;
148 } /* for (beg in buf) */
154 end
= memchr (beg
+ len
, eol
, (buf
+ size
) - (beg
+ len
));
155 end
= end
? end
+ 1 : buf
+ size
;
156 beg
= memrchr (buf
, eol
, beg
- buf
);
157 beg
= beg
? beg
+ 1 : buf
;
159 success_in_beg_and_len
:;
160 size_t off
= beg
- buf
;