poll - Fix events == 0 handling for TAP and TUN, fix console spam
[dragonfly.git] / contrib / grep / src / searchutils.c
blob84c319c79405dee86b91a54539a1bdb8759f8ca1
1 /* searchutils.c - helper subroutines for grep's matchers.
2 Copyright 1992, 1998, 2000, 2007, 2009-2020 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 02110-1301, USA. */
19 #include <config.h>
21 #define SEARCH_INLINE _GL_EXTERN_INLINE
22 #define SYSTEM_INLINE _GL_EXTERN_INLINE
23 #include "search.h"
25 /* For each byte B, sbwordchar[B] is true if B is a single-byte
26 character that is a word constituent, and is false otherwise. */
27 static bool sbwordchar[NCHAR];
29 /* Whether -w considers WC to be a word constituent. */
30 static bool
31 wordchar (wint_t wc)
33 return wc == L'_' || iswalnum (wc);
36 void
37 wordinit (void)
39 for (int i = 0; i < NCHAR; i++)
40 sbwordchar[i] = wordchar (localeinfo.sbctowc[i]);
43 kwset_t
44 kwsinit (bool mb_trans)
46 char *trans = NULL;
48 if (match_icase && (MB_CUR_MAX == 1 || mb_trans))
50 trans = xmalloc (NCHAR);
51 if (MB_CUR_MAX == 1)
52 for (int i = 0; i < NCHAR; i++)
53 trans[i] = toupper (i);
54 else
55 for (int i = 0; i < NCHAR; i++)
57 wint_t wc = localeinfo.sbctowc[i];
58 wint_t uwc = towupper (wc);
59 if (uwc != wc)
61 mbstate_t mbs = { 0 };
62 size_t len = wcrtomb (&trans[i], uwc, &mbs);
63 if (len != 1)
64 abort ();
66 else
67 trans[i] = i;
71 return kwsalloc (trans);
74 /* In the buffer *MB_START, return the number of bytes needed to go
75 back from CUR to the previous boundary, where a "boundary" is the
76 start of a multibyte character or is an error-encoding byte. The
77 buffer ends at END (i.e., one past the address of the buffer's last
78 byte). If CUR is already at a boundary, return 0. If CUR is no
79 larger than *MB_START, return CUR - *MB_START without modifying
80 *MB_START or *MBCLEN.
82 When returning zero, set *MB_START to CUR. When returning a
83 positive value, set *MB_START to the next boundary after CUR,
84 or to END if there is no such boundary, and set *MBCLEN to the
85 length of the preceding character. */
86 ptrdiff_t
87 mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
88 char const *end)
90 const char *p = *mb_start;
91 const char *p0 = p;
92 size_t clen;
94 if (cur <= p)
95 return cur - p;
97 if (localeinfo.using_utf8)
99 p = cur;
100 clen = 1;
102 if (cur < end && (*cur & 0xc0) == 0x80)
103 for (int i = 1; i <= 3; i++)
104 if ((cur[-i] & 0xc0) != 0x80)
106 mbstate_t mbs = { 0 };
107 clen = mb_clen (cur - i, end - (cur - i), &mbs);
108 if (i < clen && clen < (size_t) -2)
110 p0 = cur - i;
111 p = p0 + clen;
113 break;
116 else
118 mbstate_t mbs = { 0 };
121 clen = mb_clen (p, end - p, &mbs);
123 if ((size_t) -2 <= clen)
125 /* An invalid sequence, or a truncated multibyte character.
126 Treat it as a single byte character. */
127 clen = 1;
128 memset (&mbs, 0, sizeof mbs);
130 p0 = p;
131 p += clen;
133 while (p < cur);
136 *mb_start = p;
137 if (mbclen)
138 *mbclen = clen;
139 return p == cur ? 0 : cur - p0;
142 /* Examine the start of BUF (which goes to END) for word constituents.
143 If COUNTALL, examine as many as possible; otherwise, examine at most one.
144 Return the total number of bytes in the examined characters. */
145 static size_t
146 wordchars_count (char const *buf, char const *end, bool countall)
148 size_t n = 0;
149 mbstate_t mbs = { 0 };
150 while (n < end - buf)
152 unsigned char b = buf[n];
153 if (sbwordchar[b])
154 n++;
155 else if (localeinfo.sbclen[b] != -2)
156 break;
157 else
159 wchar_t wc = 0;
160 size_t wcbytes = mbrtowc (&wc, buf + n, end - buf - n, &mbs);
161 if (!wordchar (wc))
162 break;
163 n += wcbytes + !wcbytes;
165 if (!countall)
166 break;
168 return n;
171 /* Examine the start of BUF for the longest prefix containing just
172 word constituents. Return the total number of bytes in the prefix.
173 The buffer ends at END. */
174 size_t
175 wordchars_size (char const *buf, char const *end)
177 return wordchars_count (buf, end, true);
180 /* If BUF starts with a word constituent, return the number of bytes
181 used to represent it; otherwise, return zero. The buffer ends at END. */
182 size_t
183 wordchar_next (char const *buf, char const *end)
185 return wordchars_count (buf, end, false);
188 /* In the buffer BUF, return nonzero if the character whose encoding
189 contains the byte before CUR is a word constituent. The buffer
190 ends at END. */
191 size_t
192 wordchar_prev (char const *buf, char const *cur, char const *end)
194 if (buf == cur)
195 return 0;
196 unsigned char b = *--cur;
197 if (! localeinfo.multibyte
198 || (localeinfo.using_utf8 && localeinfo.sbclen[b] != -2))
199 return sbwordchar[b];
200 char const *p = buf;
201 cur -= mb_goback (&p, NULL, cur, end);
202 return wordchar_next (cur, end);