Upgraded GRUB2 to 2.00 release.
[AROS.git] / arch / all-pc / boot / grub2-aros / grub-core / gnulib / regex_internal.c
blob98b8d5d21bb6a2db4226c37a633c033c9fea3791
1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
3 Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License along
18 with this program; if not, write to the Free Software Foundation,
19 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21 static void re_string_construct_common (const char *str, Idx len,
22 re_string_t *pstr,
23 RE_TRANSLATE_TYPE trans, bool icase,
24 const re_dfa_t *dfa) internal_function;
25 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
26 const re_node_set *nodes,
27 re_hashval_t hash) internal_function;
28 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
29 const re_node_set *nodes,
30 unsigned int context,
31 re_hashval_t hash) internal_function;
33 /* Functions for string operation. */
35 /* This function allocate the buffers. It is necessary to call
36 re_string_reconstruct before using the object. */
38 static reg_errcode_t
39 internal_function __attribute_warn_unused_result__
40 re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
41 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
43 reg_errcode_t ret;
44 Idx init_buf_len;
46 /* Ensure at least one character fits into the buffers. */
47 if (init_len < dfa->mb_cur_max)
48 init_len = dfa->mb_cur_max;
49 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
50 re_string_construct_common (str, len, pstr, trans, icase, dfa);
52 ret = re_string_realloc_buffers (pstr, init_buf_len);
53 if (BE (ret != REG_NOERROR, 0))
54 return ret;
56 pstr->word_char = dfa->word_char;
57 pstr->word_ops_used = dfa->word_ops_used;
58 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
59 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
60 pstr->valid_raw_len = pstr->valid_len;
61 return REG_NOERROR;
64 /* This function allocate the buffers, and initialize them. */
66 static reg_errcode_t
67 internal_function __attribute_warn_unused_result__
68 re_string_construct (re_string_t *pstr, const char *str, Idx len,
69 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
71 reg_errcode_t ret;
72 memset (pstr, '\0', sizeof (re_string_t));
73 re_string_construct_common (str, len, pstr, trans, icase, dfa);
75 if (len > 0)
77 ret = re_string_realloc_buffers (pstr, len + 1);
78 if (BE (ret != REG_NOERROR, 0))
79 return ret;
81 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
83 if (icase)
85 #ifdef RE_ENABLE_I18N
86 if (dfa->mb_cur_max > 1)
88 while (1)
90 ret = build_wcs_upper_buffer (pstr);
91 if (BE (ret != REG_NOERROR, 0))
92 return ret;
93 if (pstr->valid_raw_len >= len)
94 break;
95 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
96 break;
97 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
98 if (BE (ret != REG_NOERROR, 0))
99 return ret;
102 else
103 #endif /* RE_ENABLE_I18N */
104 build_upper_buffer (pstr);
106 else
108 #ifdef RE_ENABLE_I18N
109 if (dfa->mb_cur_max > 1)
110 build_wcs_buffer (pstr);
111 else
112 #endif /* RE_ENABLE_I18N */
114 if (trans != NULL)
115 re_string_translate_buffer (pstr);
116 else
118 pstr->valid_len = pstr->bufs_len;
119 pstr->valid_raw_len = pstr->bufs_len;
124 return REG_NOERROR;
127 /* Helper functions for re_string_allocate, and re_string_construct. */
129 static reg_errcode_t
130 internal_function __attribute_warn_unused_result__
131 re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
133 #ifdef RE_ENABLE_I18N
134 if (pstr->mb_cur_max > 1)
136 wint_t *new_wcs;
138 /* Avoid overflow. */
139 size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
140 if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
141 return REG_ESPACE;
143 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
144 if (BE (new_wcs == NULL, 0))
145 return REG_ESPACE;
146 pstr->wcs = new_wcs;
147 if (pstr->offsets != NULL)
149 Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
150 if (BE (new_offsets == NULL, 0))
151 return REG_ESPACE;
152 pstr->offsets = new_offsets;
155 #endif /* RE_ENABLE_I18N */
156 if (pstr->mbs_allocated)
158 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
159 new_buf_len);
160 if (BE (new_mbs == NULL, 0))
161 return REG_ESPACE;
162 pstr->mbs = new_mbs;
164 pstr->bufs_len = new_buf_len;
165 return REG_NOERROR;
169 static void
170 internal_function
171 re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
172 RE_TRANSLATE_TYPE trans, bool icase,
173 const re_dfa_t *dfa)
175 pstr->raw_mbs = (const unsigned char *) str;
176 pstr->len = len;
177 pstr->raw_len = len;
178 pstr->trans = trans;
179 pstr->icase = icase;
180 pstr->mbs_allocated = (trans != NULL || icase);
181 pstr->mb_cur_max = dfa->mb_cur_max;
182 pstr->is_utf8 = dfa->is_utf8;
183 pstr->map_notascii = dfa->map_notascii;
184 pstr->stop = pstr->len;
185 pstr->raw_stop = pstr->stop;
188 #ifdef RE_ENABLE_I18N
190 /* Build wide character buffer PSTR->WCS.
191 If the byte sequence of the string are:
192 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
193 Then wide character buffer will be:
194 <wc1> , WEOF , <wc2> , WEOF , <wc3>
195 We use WEOF for padding, they indicate that the position isn't
196 a first byte of a multibyte character.
198 Note that this function assumes PSTR->VALID_LEN elements are already
199 built and starts from PSTR->VALID_LEN. */
201 static void
202 internal_function
203 build_wcs_buffer (re_string_t *pstr)
205 #ifdef _LIBC
206 unsigned char buf[MB_LEN_MAX];
207 assert (MB_LEN_MAX >= pstr->mb_cur_max);
208 #else
209 unsigned char buf[64];
210 #endif
211 mbstate_t prev_st;
212 Idx byte_idx, end_idx, remain_len;
213 size_t mbclen;
215 /* Build the buffers from pstr->valid_len to either pstr->len or
216 pstr->bufs_len. */
217 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
218 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
220 wchar_t wc;
221 const char *p;
223 remain_len = end_idx - byte_idx;
224 prev_st = pstr->cur_state;
225 /* Apply the translation if we need. */
226 if (BE (pstr->trans != NULL, 0))
228 int i, ch;
230 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
232 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
233 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
235 p = (const char *) buf;
237 else
238 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
239 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
240 if (BE (mbclen == (size_t) -2, 0))
242 /* The buffer doesn't have enough space, finish to build. */
243 pstr->cur_state = prev_st;
244 break;
246 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
248 /* We treat these cases as a singlebyte character. */
249 mbclen = 1;
250 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
251 if (BE (pstr->trans != NULL, 0))
252 wc = pstr->trans[wc];
253 pstr->cur_state = prev_st;
256 /* Write wide character and padding. */
257 pstr->wcs[byte_idx++] = wc;
258 /* Write paddings. */
259 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
260 pstr->wcs[byte_idx++] = WEOF;
262 pstr->valid_len = byte_idx;
263 pstr->valid_raw_len = byte_idx;
266 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
267 but for REG_ICASE. */
269 static reg_errcode_t
270 internal_function __attribute_warn_unused_result__
271 build_wcs_upper_buffer (re_string_t *pstr)
273 mbstate_t prev_st;
274 Idx src_idx, byte_idx, end_idx, remain_len;
275 size_t mbclen;
276 #ifdef _LIBC
277 char buf[MB_LEN_MAX];
278 assert (MB_LEN_MAX >= pstr->mb_cur_max);
279 #else
280 char buf[64];
281 #endif
283 byte_idx = pstr->valid_len;
284 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
286 /* The following optimization assumes that ASCII characters can be
287 mapped to wide characters with a simple cast. */
288 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
290 while (byte_idx < end_idx)
292 wchar_t wc;
294 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
295 && mbsinit (&pstr->cur_state))
297 /* In case of a singlebyte character. */
298 pstr->mbs[byte_idx]
299 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
300 /* The next step uses the assumption that wchar_t is encoded
301 ASCII-safe: all ASCII values can be converted like this. */
302 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
303 ++byte_idx;
304 continue;
307 remain_len = end_idx - byte_idx;
308 prev_st = pstr->cur_state;
309 mbclen = __mbrtowc (&wc,
310 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
311 + byte_idx), remain_len, &pstr->cur_state);
312 if (BE (mbclen < (size_t) -2, 1))
314 wchar_t wcu = wc;
315 if (iswlower (wc))
317 size_t mbcdlen;
319 wcu = towupper (wc);
320 mbcdlen = wcrtomb (buf, wcu, &prev_st);
321 if (BE (mbclen == mbcdlen, 1))
322 memcpy (pstr->mbs + byte_idx, buf, mbclen);
323 else
325 src_idx = byte_idx;
326 goto offsets_needed;
329 else
330 memcpy (pstr->mbs + byte_idx,
331 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
332 pstr->wcs[byte_idx++] = wcu;
333 /* Write paddings. */
334 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
335 pstr->wcs[byte_idx++] = WEOF;
337 else if (mbclen == (size_t) -1 || mbclen == 0)
339 /* It is an invalid character or '\0'. Just use the byte. */
340 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
341 pstr->mbs[byte_idx] = ch;
342 /* And also cast it to wide char. */
343 pstr->wcs[byte_idx++] = (wchar_t) ch;
344 if (BE (mbclen == (size_t) -1, 0))
345 pstr->cur_state = prev_st;
347 else
349 /* The buffer doesn't have enough space, finish to build. */
350 pstr->cur_state = prev_st;
351 break;
354 pstr->valid_len = byte_idx;
355 pstr->valid_raw_len = byte_idx;
356 return REG_NOERROR;
358 else
359 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
361 wchar_t wc;
362 const char *p;
363 offsets_needed:
364 remain_len = end_idx - byte_idx;
365 prev_st = pstr->cur_state;
366 if (BE (pstr->trans != NULL, 0))
368 int i, ch;
370 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
372 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
373 buf[i] = pstr->trans[ch];
375 p = (const char *) buf;
377 else
378 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
379 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
380 if (BE (mbclen < (size_t) -2, 1))
382 wchar_t wcu = wc;
383 if (iswlower (wc))
385 size_t mbcdlen;
387 wcu = towupper (wc);
388 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
389 if (BE (mbclen == mbcdlen, 1))
390 memcpy (pstr->mbs + byte_idx, buf, mbclen);
391 else if (mbcdlen != (size_t) -1)
393 size_t i;
395 if (byte_idx + mbcdlen > pstr->bufs_len)
397 pstr->cur_state = prev_st;
398 break;
401 if (pstr->offsets == NULL)
403 pstr->offsets = re_malloc (Idx, pstr->bufs_len);
405 if (pstr->offsets == NULL)
406 return REG_ESPACE;
408 if (!pstr->offsets_needed)
410 for (i = 0; i < (size_t) byte_idx; ++i)
411 pstr->offsets[i] = i;
412 pstr->offsets_needed = 1;
415 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
416 pstr->wcs[byte_idx] = wcu;
417 pstr->offsets[byte_idx] = src_idx;
418 for (i = 1; i < mbcdlen; ++i)
420 pstr->offsets[byte_idx + i]
421 = src_idx + (i < mbclen ? i : mbclen - 1);
422 pstr->wcs[byte_idx + i] = WEOF;
424 pstr->len += mbcdlen - mbclen;
425 if (pstr->raw_stop > src_idx)
426 pstr->stop += mbcdlen - mbclen;
427 end_idx = (pstr->bufs_len > pstr->len)
428 ? pstr->len : pstr->bufs_len;
429 byte_idx += mbcdlen;
430 src_idx += mbclen;
431 continue;
433 else
434 memcpy (pstr->mbs + byte_idx, p, mbclen);
436 else
437 memcpy (pstr->mbs + byte_idx, p, mbclen);
439 if (BE (pstr->offsets_needed != 0, 0))
441 size_t i;
442 for (i = 0; i < mbclen; ++i)
443 pstr->offsets[byte_idx + i] = src_idx + i;
445 src_idx += mbclen;
447 pstr->wcs[byte_idx++] = wcu;
448 /* Write paddings. */
449 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
450 pstr->wcs[byte_idx++] = WEOF;
452 else if (mbclen == (size_t) -1 || mbclen == 0)
454 /* It is an invalid character or '\0'. Just use the byte. */
455 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
457 if (BE (pstr->trans != NULL, 0))
458 ch = pstr->trans [ch];
459 pstr->mbs[byte_idx] = ch;
461 if (BE (pstr->offsets_needed != 0, 0))
462 pstr->offsets[byte_idx] = src_idx;
463 ++src_idx;
465 /* And also cast it to wide char. */
466 pstr->wcs[byte_idx++] = (wchar_t) ch;
467 if (BE (mbclen == (size_t) -1, 0))
468 pstr->cur_state = prev_st;
470 else
472 /* The buffer doesn't have enough space, finish to build. */
473 pstr->cur_state = prev_st;
474 break;
477 pstr->valid_len = byte_idx;
478 pstr->valid_raw_len = src_idx;
479 return REG_NOERROR;
482 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
483 Return the index. */
485 static Idx
486 internal_function
487 re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
489 mbstate_t prev_st;
490 Idx rawbuf_idx;
491 size_t mbclen;
492 wint_t wc = WEOF;
494 /* Skip the characters which are not necessary to check. */
495 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
496 rawbuf_idx < new_raw_idx;)
498 wchar_t wc2;
499 Idx remain_len;
500 remain_len = pstr->len - rawbuf_idx;
501 prev_st = pstr->cur_state;
502 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
503 remain_len, &pstr->cur_state);
504 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
506 /* We treat these cases as a single byte character. */
507 if (mbclen == 0 || remain_len == 0)
508 wc = L'\0';
509 else
510 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
511 mbclen = 1;
512 pstr->cur_state = prev_st;
514 else
515 wc = wc2;
516 /* Then proceed the next character. */
517 rawbuf_idx += mbclen;
519 *last_wc = wc;
520 return rawbuf_idx;
522 #endif /* RE_ENABLE_I18N */
524 /* Build the buffer PSTR->MBS, and apply the translation if we need.
525 This function is used in case of REG_ICASE. */
527 static void
528 internal_function
529 build_upper_buffer (re_string_t *pstr)
531 Idx char_idx, end_idx;
532 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
534 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
536 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
537 if (BE (pstr->trans != NULL, 0))
538 ch = pstr->trans[ch];
539 if (islower (ch))
540 pstr->mbs[char_idx] = toupper (ch);
541 else
542 pstr->mbs[char_idx] = ch;
544 pstr->valid_len = char_idx;
545 pstr->valid_raw_len = char_idx;
548 /* Apply TRANS to the buffer in PSTR. */
550 static void
551 internal_function
552 re_string_translate_buffer (re_string_t *pstr)
554 Idx buf_idx, end_idx;
555 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
557 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
559 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
560 pstr->mbs[buf_idx] = pstr->trans[ch];
563 pstr->valid_len = buf_idx;
564 pstr->valid_raw_len = buf_idx;
567 /* This function re-construct the buffers.
568 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
569 convert to upper case in case of REG_ICASE, apply translation. */
571 static reg_errcode_t
572 internal_function __attribute_warn_unused_result__
573 re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
575 Idx offset;
577 if (BE (pstr->raw_mbs_idx <= idx, 0))
578 offset = idx - pstr->raw_mbs_idx;
579 else
581 /* Reset buffer. */
582 #ifdef RE_ENABLE_I18N
583 if (pstr->mb_cur_max > 1)
584 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
585 #endif /* RE_ENABLE_I18N */
586 pstr->len = pstr->raw_len;
587 pstr->stop = pstr->raw_stop;
588 pstr->valid_len = 0;
589 pstr->raw_mbs_idx = 0;
590 pstr->valid_raw_len = 0;
591 pstr->offsets_needed = 0;
592 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
593 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
594 if (!pstr->mbs_allocated)
595 pstr->mbs = (unsigned char *) pstr->raw_mbs;
596 offset = idx;
599 if (BE (offset != 0, 1))
601 /* Should the already checked characters be kept? */
602 if (BE (offset < pstr->valid_raw_len, 1))
604 /* Yes, move them to the front of the buffer. */
605 #ifdef RE_ENABLE_I18N
606 if (BE (pstr->offsets_needed, 0))
608 Idx low = 0, high = pstr->valid_len, mid;
611 mid = (high + low) / 2;
612 if (pstr->offsets[mid] > offset)
613 high = mid;
614 else if (pstr->offsets[mid] < offset)
615 low = mid + 1;
616 else
617 break;
619 while (low < high);
620 if (pstr->offsets[mid] < offset)
621 ++mid;
622 pstr->tip_context = re_string_context_at (pstr, mid - 1,
623 eflags);
624 /* This can be quite complicated, so handle specially
625 only the common and easy case where the character with
626 different length representation of lower and upper
627 case is present at or after offset. */
628 if (pstr->valid_len > offset
629 && mid == offset && pstr->offsets[mid] == offset)
631 memmove (pstr->wcs, pstr->wcs + offset,
632 (pstr->valid_len - offset) * sizeof (wint_t));
633 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
634 pstr->valid_len -= offset;
635 pstr->valid_raw_len -= offset;
636 for (low = 0; low < pstr->valid_len; low++)
637 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
639 else
641 /* Otherwise, just find out how long the partial multibyte
642 character at offset is and fill it with WEOF/255. */
643 pstr->len = pstr->raw_len - idx + offset;
644 pstr->stop = pstr->raw_stop - idx + offset;
645 pstr->offsets_needed = 0;
646 while (mid > 0 && pstr->offsets[mid - 1] == offset)
647 --mid;
648 while (mid < pstr->valid_len)
649 if (pstr->wcs[mid] != WEOF)
650 break;
651 else
652 ++mid;
653 if (mid == pstr->valid_len)
654 pstr->valid_len = 0;
655 else
657 pstr->valid_len = pstr->offsets[mid] - offset;
658 if (pstr->valid_len)
660 for (low = 0; low < pstr->valid_len; ++low)
661 pstr->wcs[low] = WEOF;
662 memset (pstr->mbs, 255, pstr->valid_len);
665 pstr->valid_raw_len = pstr->valid_len;
668 else
669 #endif
671 pstr->tip_context = re_string_context_at (pstr, offset - 1,
672 eflags);
673 #ifdef RE_ENABLE_I18N
674 if (pstr->mb_cur_max > 1)
675 memmove (pstr->wcs, pstr->wcs + offset,
676 (pstr->valid_len - offset) * sizeof (wint_t));
677 #endif /* RE_ENABLE_I18N */
678 if (BE (pstr->mbs_allocated, 0))
679 memmove (pstr->mbs, pstr->mbs + offset,
680 pstr->valid_len - offset);
681 pstr->valid_len -= offset;
682 pstr->valid_raw_len -= offset;
683 #if DEBUG
684 assert (pstr->valid_len > 0);
685 #endif
688 else
690 #ifdef RE_ENABLE_I18N
691 /* No, skip all characters until IDX. */
692 Idx prev_valid_len = pstr->valid_len;
694 if (BE (pstr->offsets_needed, 0))
696 pstr->len = pstr->raw_len - idx + offset;
697 pstr->stop = pstr->raw_stop - idx + offset;
698 pstr->offsets_needed = 0;
700 #endif
701 pstr->valid_len = 0;
702 #ifdef RE_ENABLE_I18N
703 if (pstr->mb_cur_max > 1)
705 Idx wcs_idx;
706 wint_t wc = WEOF;
708 if (pstr->is_utf8)
710 const unsigned char *raw, *p, *end;
712 /* Special case UTF-8. Multi-byte chars start with any
713 byte other than 0x80 - 0xbf. */
714 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
715 end = raw + (offset - pstr->mb_cur_max);
716 if (end < pstr->raw_mbs)
717 end = pstr->raw_mbs;
718 p = raw + offset - 1;
719 #ifdef _LIBC
720 /* We know the wchar_t encoding is UCS4, so for the simple
721 case, ASCII characters, skip the conversion step. */
722 if (isascii (*p) && BE (pstr->trans == NULL, 1))
724 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
725 /* pstr->valid_len = 0; */
726 wc = (wchar_t) *p;
728 else
729 #endif
730 for (; p >= end; --p)
731 if ((*p & 0xc0) != 0x80)
733 mbstate_t cur_state;
734 wchar_t wc2;
735 Idx mlen = raw + pstr->len - p;
736 size_t mbclen;
738 #if 0 /* dead code: buf is set but never used */
739 unsigned char buf[6];
740 if (BE (pstr->trans != NULL, 0))
742 int i = mlen < 6 ? mlen : 6;
743 while (--i >= 0)
744 buf[i] = pstr->trans[p[i]];
746 #endif
747 /* XXX Don't use mbrtowc, we know which conversion
748 to use (UTF-8 -> UCS4). */
749 memset (&cur_state, 0, sizeof (cur_state));
750 mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
751 &cur_state);
752 if (raw + offset - p <= mbclen
753 && mbclen < (size_t) -2)
755 memset (&pstr->cur_state, '\0',
756 sizeof (mbstate_t));
757 pstr->valid_len = mbclen - (raw + offset - p);
758 wc = wc2;
760 break;
764 if (wc == WEOF)
765 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
766 if (wc == WEOF)
767 pstr->tip_context
768 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
769 else
770 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
771 && IS_WIDE_WORD_CHAR (wc))
772 ? CONTEXT_WORD
773 : ((IS_WIDE_NEWLINE (wc)
774 && pstr->newline_anchor)
775 ? CONTEXT_NEWLINE : 0));
776 if (BE (pstr->valid_len, 0))
778 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
779 pstr->wcs[wcs_idx] = WEOF;
780 if (pstr->mbs_allocated)
781 memset (pstr->mbs, 255, pstr->valid_len);
783 pstr->valid_raw_len = pstr->valid_len;
785 else
786 #endif /* RE_ENABLE_I18N */
788 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
789 pstr->valid_raw_len = 0;
790 if (pstr->trans)
791 c = pstr->trans[c];
792 pstr->tip_context = (bitset_contain (pstr->word_char, c)
793 ? CONTEXT_WORD
794 : ((IS_NEWLINE (c) && pstr->newline_anchor)
795 ? CONTEXT_NEWLINE : 0));
798 if (!BE (pstr->mbs_allocated, 0))
799 pstr->mbs += offset;
801 pstr->raw_mbs_idx = idx;
802 pstr->len -= offset;
803 pstr->stop -= offset;
805 /* Then build the buffers. */
806 #ifdef RE_ENABLE_I18N
807 if (pstr->mb_cur_max > 1)
809 if (pstr->icase)
811 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
812 if (BE (ret != REG_NOERROR, 0))
813 return ret;
815 else
816 build_wcs_buffer (pstr);
818 else
819 #endif /* RE_ENABLE_I18N */
820 if (BE (pstr->mbs_allocated, 0))
822 if (pstr->icase)
823 build_upper_buffer (pstr);
824 else if (pstr->trans != NULL)
825 re_string_translate_buffer (pstr);
827 else
828 pstr->valid_len = pstr->len;
830 pstr->cur_idx = 0;
831 return REG_NOERROR;
834 static unsigned char
835 internal_function __attribute ((pure))
836 re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
838 int ch;
839 Idx off;
841 /* Handle the common (easiest) cases first. */
842 if (BE (!pstr->mbs_allocated, 1))
843 return re_string_peek_byte (pstr, idx);
845 #ifdef RE_ENABLE_I18N
846 if (pstr->mb_cur_max > 1
847 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
848 return re_string_peek_byte (pstr, idx);
849 #endif
851 off = pstr->cur_idx + idx;
852 #ifdef RE_ENABLE_I18N
853 if (pstr->offsets_needed)
854 off = pstr->offsets[off];
855 #endif
857 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
859 #ifdef RE_ENABLE_I18N
860 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
861 this function returns CAPITAL LETTER I instead of first byte of
862 DOTLESS SMALL LETTER I. The latter would confuse the parser,
863 since peek_byte_case doesn't advance cur_idx in any way. */
864 if (pstr->offsets_needed && !isascii (ch))
865 return re_string_peek_byte (pstr, idx);
866 #endif
868 return ch;
871 static unsigned char
872 internal_function __attribute ((pure))
873 re_string_fetch_byte_case (re_string_t *pstr)
875 if (BE (!pstr->mbs_allocated, 1))
876 return re_string_fetch_byte (pstr);
878 #ifdef RE_ENABLE_I18N
879 if (pstr->offsets_needed)
881 Idx off;
882 int ch;
884 /* For tr_TR.UTF-8 [[:islower:]] there is
885 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
886 in that case the whole multi-byte character and return
887 the original letter. On the other side, with
888 [[: DOTLESS SMALL LETTER I return [[:I, as doing
889 anything else would complicate things too much. */
891 if (!re_string_first_byte (pstr, pstr->cur_idx))
892 return re_string_fetch_byte (pstr);
894 off = pstr->offsets[pstr->cur_idx];
895 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
897 if (! isascii (ch))
898 return re_string_fetch_byte (pstr);
900 re_string_skip_bytes (pstr,
901 re_string_char_size_at (pstr, pstr->cur_idx));
902 return ch;
904 #endif
906 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
909 static void
910 internal_function
911 re_string_destruct (re_string_t *pstr)
913 #ifdef RE_ENABLE_I18N
914 re_free (pstr->wcs);
915 re_free (pstr->offsets);
916 #endif /* RE_ENABLE_I18N */
917 if (pstr->mbs_allocated)
918 re_free (pstr->mbs);
921 /* Return the context at IDX in INPUT. */
923 static unsigned int
924 internal_function
925 re_string_context_at (const re_string_t *input, Idx idx, int eflags)
927 int c;
928 if (BE (! REG_VALID_INDEX (idx), 0))
929 /* In this case, we use the value stored in input->tip_context,
930 since we can't know the character in input->mbs[-1] here. */
931 return input->tip_context;
932 if (BE (idx == input->len, 0))
933 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
934 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
935 #ifdef RE_ENABLE_I18N
936 if (input->mb_cur_max > 1)
938 wint_t wc;
939 Idx wc_idx = idx;
940 while(input->wcs[wc_idx] == WEOF)
942 #ifdef DEBUG
943 /* It must not happen. */
944 assert (REG_VALID_INDEX (wc_idx));
945 #endif
946 --wc_idx;
947 if (! REG_VALID_INDEX (wc_idx))
948 return input->tip_context;
950 wc = input->wcs[wc_idx];
951 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
952 return CONTEXT_WORD;
953 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
954 ? CONTEXT_NEWLINE : 0);
956 else
957 #endif
959 c = re_string_byte_at (input, idx);
960 if (bitset_contain (input->word_char, c))
961 return CONTEXT_WORD;
962 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
966 /* Functions for set operation. */
968 static reg_errcode_t
969 internal_function __attribute_warn_unused_result__
970 re_node_set_alloc (re_node_set *set, Idx size)
972 set->alloc = size;
973 set->nelem = 0;
974 set->elems = re_malloc (Idx, size);
975 if (BE (set->elems == NULL, 0))
976 return REG_ESPACE;
977 return REG_NOERROR;
980 static reg_errcode_t
981 internal_function __attribute_warn_unused_result__
982 re_node_set_init_1 (re_node_set *set, Idx elem)
984 set->alloc = 1;
985 set->nelem = 1;
986 set->elems = re_malloc (Idx, 1);
987 if (BE (set->elems == NULL, 0))
989 set->alloc = set->nelem = 0;
990 return REG_ESPACE;
992 set->elems[0] = elem;
993 return REG_NOERROR;
996 static reg_errcode_t
997 internal_function __attribute_warn_unused_result__
998 re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
1000 set->alloc = 2;
1001 set->elems = re_malloc (Idx, 2);
1002 if (BE (set->elems == NULL, 0))
1003 return REG_ESPACE;
1004 if (elem1 == elem2)
1006 set->nelem = 1;
1007 set->elems[0] = elem1;
1009 else
1011 set->nelem = 2;
1012 if (elem1 < elem2)
1014 set->elems[0] = elem1;
1015 set->elems[1] = elem2;
1017 else
1019 set->elems[0] = elem2;
1020 set->elems[1] = elem1;
1023 return REG_NOERROR;
1026 static reg_errcode_t
1027 internal_function __attribute_warn_unused_result__
1028 re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1030 dest->nelem = src->nelem;
1031 if (src->nelem > 0)
1033 dest->alloc = dest->nelem;
1034 dest->elems = re_malloc (Idx, dest->alloc);
1035 if (BE (dest->elems == NULL, 0))
1037 dest->alloc = dest->nelem = 0;
1038 return REG_ESPACE;
1040 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1042 else
1043 re_node_set_init_empty (dest);
1044 return REG_NOERROR;
1047 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1048 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1049 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1051 static reg_errcode_t
1052 internal_function __attribute_warn_unused_result__
1053 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1054 const re_node_set *src2)
1056 Idx i1, i2, is, id, delta, sbase;
1057 if (src1->nelem == 0 || src2->nelem == 0)
1058 return REG_NOERROR;
1060 /* We need dest->nelem + 2 * elems_in_intersection; this is a
1061 conservative estimate. */
1062 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1064 Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1065 Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1066 if (BE (new_elems == NULL, 0))
1067 return REG_ESPACE;
1068 dest->elems = new_elems;
1069 dest->alloc = new_alloc;
1072 /* Find the items in the intersection of SRC1 and SRC2, and copy
1073 into the top of DEST those that are not already in DEST itself. */
1074 sbase = dest->nelem + src1->nelem + src2->nelem;
1075 i1 = src1->nelem - 1;
1076 i2 = src2->nelem - 1;
1077 id = dest->nelem - 1;
1078 for (;;)
1080 if (src1->elems[i1] == src2->elems[i2])
1082 /* Try to find the item in DEST. Maybe we could binary search? */
1083 while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
1084 --id;
1086 if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
1087 dest->elems[--sbase] = src1->elems[i1];
1089 if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
1090 break;
1093 /* Lower the highest of the two items. */
1094 else if (src1->elems[i1] < src2->elems[i2])
1096 if (! REG_VALID_INDEX (--i2))
1097 break;
1099 else
1101 if (! REG_VALID_INDEX (--i1))
1102 break;
1106 id = dest->nelem - 1;
1107 is = dest->nelem + src1->nelem + src2->nelem - 1;
1108 delta = is - sbase + 1;
1110 /* Now copy. When DELTA becomes zero, the remaining
1111 DEST elements are already in place; this is more or
1112 less the same loop that is in re_node_set_merge. */
1113 dest->nelem += delta;
1114 if (delta > 0 && REG_VALID_INDEX (id))
1115 for (;;)
1117 if (dest->elems[is] > dest->elems[id])
1119 /* Copy from the top. */
1120 dest->elems[id + delta--] = dest->elems[is--];
1121 if (delta == 0)
1122 break;
1124 else
1126 /* Slide from the bottom. */
1127 dest->elems[id + delta] = dest->elems[id];
1128 if (! REG_VALID_INDEX (--id))
1129 break;
1133 /* Copy remaining SRC elements. */
1134 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
1136 return REG_NOERROR;
1139 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1140 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1142 static reg_errcode_t
1143 internal_function __attribute_warn_unused_result__
1144 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1145 const re_node_set *src2)
1147 Idx i1, i2, id;
1148 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1150 dest->alloc = src1->nelem + src2->nelem;
1151 dest->elems = re_malloc (Idx, dest->alloc);
1152 if (BE (dest->elems == NULL, 0))
1153 return REG_ESPACE;
1155 else
1157 if (src1 != NULL && src1->nelem > 0)
1158 return re_node_set_init_copy (dest, src1);
1159 else if (src2 != NULL && src2->nelem > 0)
1160 return re_node_set_init_copy (dest, src2);
1161 else
1162 re_node_set_init_empty (dest);
1163 return REG_NOERROR;
1165 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1167 if (src1->elems[i1] > src2->elems[i2])
1169 dest->elems[id++] = src2->elems[i2++];
1170 continue;
1172 if (src1->elems[i1] == src2->elems[i2])
1173 ++i2;
1174 dest->elems[id++] = src1->elems[i1++];
1176 if (i1 < src1->nelem)
1178 memcpy (dest->elems + id, src1->elems + i1,
1179 (src1->nelem - i1) * sizeof (Idx));
1180 id += src1->nelem - i1;
1182 else if (i2 < src2->nelem)
1184 memcpy (dest->elems + id, src2->elems + i2,
1185 (src2->nelem - i2) * sizeof (Idx));
1186 id += src2->nelem - i2;
1188 dest->nelem = id;
1189 return REG_NOERROR;
1192 /* Calculate the union set of the sets DEST and SRC. And store it to
1193 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1195 static reg_errcode_t
1196 internal_function __attribute_warn_unused_result__
1197 re_node_set_merge (re_node_set *dest, const re_node_set *src)
1199 Idx is, id, sbase, delta;
1200 if (src == NULL || src->nelem == 0)
1201 return REG_NOERROR;
1202 if (dest->alloc < 2 * src->nelem + dest->nelem)
1204 Idx new_alloc = 2 * (src->nelem + dest->alloc);
1205 Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1206 if (BE (new_buffer == NULL, 0))
1207 return REG_ESPACE;
1208 dest->elems = new_buffer;
1209 dest->alloc = new_alloc;
1212 if (BE (dest->nelem == 0, 0))
1214 dest->nelem = src->nelem;
1215 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1216 return REG_NOERROR;
1219 /* Copy into the top of DEST the items of SRC that are not
1220 found in DEST. Maybe we could binary search in DEST? */
1221 for (sbase = dest->nelem + 2 * src->nelem,
1222 is = src->nelem - 1, id = dest->nelem - 1;
1223 REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
1225 if (dest->elems[id] == src->elems[is])
1226 is--, id--;
1227 else if (dest->elems[id] < src->elems[is])
1228 dest->elems[--sbase] = src->elems[is--];
1229 else /* if (dest->elems[id] > src->elems[is]) */
1230 --id;
1233 if (REG_VALID_INDEX (is))
1235 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1236 sbase -= is + 1;
1237 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
1240 id = dest->nelem - 1;
1241 is = dest->nelem + 2 * src->nelem - 1;
1242 delta = is - sbase + 1;
1243 if (delta == 0)
1244 return REG_NOERROR;
1246 /* Now copy. When DELTA becomes zero, the remaining
1247 DEST elements are already in place. */
1248 dest->nelem += delta;
1249 for (;;)
1251 if (dest->elems[is] > dest->elems[id])
1253 /* Copy from the top. */
1254 dest->elems[id + delta--] = dest->elems[is--];
1255 if (delta == 0)
1256 break;
1258 else
1260 /* Slide from the bottom. */
1261 dest->elems[id + delta] = dest->elems[id];
1262 if (! REG_VALID_INDEX (--id))
1264 /* Copy remaining SRC elements. */
1265 memcpy (dest->elems, dest->elems + sbase,
1266 delta * sizeof (Idx));
1267 break;
1272 return REG_NOERROR;
1275 /* Insert the new element ELEM to the re_node_set* SET.
1276 SET should not already have ELEM.
1277 Return true if successful. */
1279 static bool
1280 internal_function __attribute_warn_unused_result__
1281 re_node_set_insert (re_node_set *set, Idx elem)
1283 Idx idx;
1284 /* In case the set is empty. */
1285 if (set->alloc == 0)
1286 return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
1288 if (BE (set->nelem, 0) == 0)
1290 /* We already guaranteed above that set->alloc != 0. */
1291 set->elems[0] = elem;
1292 ++set->nelem;
1293 return true;
1296 /* Realloc if we need. */
1297 if (set->alloc == set->nelem)
1299 Idx *new_elems;
1300 set->alloc = set->alloc * 2;
1301 new_elems = re_realloc (set->elems, Idx, set->alloc);
1302 if (BE (new_elems == NULL, 0))
1303 return false;
1304 set->elems = new_elems;
1307 /* Move the elements which follows the new element. Test the
1308 first element separately to skip a check in the inner loop. */
1309 if (elem < set->elems[0])
1311 idx = 0;
1312 for (idx = set->nelem; idx > 0; idx--)
1313 set->elems[idx] = set->elems[idx - 1];
1315 else
1317 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1318 set->elems[idx] = set->elems[idx - 1];
1321 /* Insert the new element. */
1322 set->elems[idx] = elem;
1323 ++set->nelem;
1324 return true;
1327 /* Insert the new element ELEM to the re_node_set* SET.
1328 SET should not already have any element greater than or equal to ELEM.
1329 Return true if successful. */
1331 static bool
1332 internal_function __attribute_warn_unused_result__
1333 re_node_set_insert_last (re_node_set *set, Idx elem)
1335 /* Realloc if we need. */
1336 if (set->alloc == set->nelem)
1338 Idx *new_elems;
1339 set->alloc = (set->alloc + 1) * 2;
1340 new_elems = re_realloc (set->elems, Idx, set->alloc);
1341 if (BE (new_elems == NULL, 0))
1342 return false;
1343 set->elems = new_elems;
1346 /* Insert the new element. */
1347 set->elems[set->nelem++] = elem;
1348 return true;
1351 /* Compare two node sets SET1 and SET2.
1352 Return true if SET1 and SET2 are equivalent. */
1354 static bool
1355 internal_function __attribute ((pure))
1356 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1358 Idx i;
1359 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1360 return false;
1361 for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
1362 if (set1->elems[i] != set2->elems[i])
1363 return false;
1364 return true;
1367 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1369 static Idx
1370 internal_function __attribute ((pure))
1371 re_node_set_contains (const re_node_set *set, Idx elem)
1373 __re_size_t idx, right, mid;
1374 if (! REG_VALID_NONZERO_INDEX (set->nelem))
1375 return 0;
1377 /* Binary search the element. */
1378 idx = 0;
1379 right = set->nelem - 1;
1380 while (idx < right)
1382 mid = (idx + right) / 2;
1383 if (set->elems[mid] < elem)
1384 idx = mid + 1;
1385 else
1386 right = mid;
1388 return set->elems[idx] == elem ? idx + 1 : 0;
1391 static void
1392 internal_function
1393 re_node_set_remove_at (re_node_set *set, Idx idx)
1395 if (idx < 0 || idx >= set->nelem)
1396 return;
1397 --set->nelem;
1398 for (; idx < set->nelem; idx++)
1399 set->elems[idx] = set->elems[idx + 1];
1403 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1404 Or return REG_MISSING if an error occurred. */
1406 static Idx
1407 internal_function
1408 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1410 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1412 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1413 Idx *new_nexts, *new_indices;
1414 re_node_set *new_edests, *new_eclosures;
1415 re_token_t *new_nodes;
1416 size_t max_object_size =
1417 MAX (sizeof (re_token_t),
1418 MAX (sizeof (re_node_set),
1419 sizeof (Idx)));
1421 /* Avoid overflows. */
1422 if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0))
1423 return REG_MISSING;
1425 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1426 if (BE (new_nodes == NULL, 0))
1427 return REG_MISSING;
1428 dfa->nodes = new_nodes;
1429 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1430 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1431 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1432 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1433 if (BE (new_nexts == NULL || new_indices == NULL
1434 || new_edests == NULL || new_eclosures == NULL, 0))
1435 return REG_MISSING;
1436 dfa->nexts = new_nexts;
1437 dfa->org_indices = new_indices;
1438 dfa->edests = new_edests;
1439 dfa->eclosures = new_eclosures;
1440 dfa->nodes_alloc = new_nodes_alloc;
1442 dfa->nodes[dfa->nodes_len] = token;
1443 dfa->nodes[dfa->nodes_len].constraint = 0;
1444 #ifdef RE_ENABLE_I18N
1446 int type = token.type;
1447 dfa->nodes[dfa->nodes_len].accept_mb =
1448 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1450 #endif
1451 dfa->nexts[dfa->nodes_len] = REG_MISSING;
1452 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1453 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1454 return dfa->nodes_len++;
1457 static inline re_hashval_t
1458 internal_function
1459 calc_state_hash (const re_node_set *nodes, unsigned int context)
1461 re_hashval_t hash = nodes->nelem + context;
1462 Idx i;
1463 for (i = 0 ; i < nodes->nelem ; i++)
1464 hash += nodes->elems[i];
1465 return hash;
1468 /* Search for the state whose node_set is equivalent to NODES.
1469 Return the pointer to the state, if we found it in the DFA.
1470 Otherwise create the new one and return it. In case of an error
1471 return NULL and set the error code in ERR.
1472 Note: - We assume NULL as the invalid state, then it is possible that
1473 return value is NULL and ERR is REG_NOERROR.
1474 - We never return non-NULL value in case of any errors, it is for
1475 optimization. */
1477 static re_dfastate_t *
1478 internal_function __attribute_warn_unused_result__
1479 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1480 const re_node_set *nodes)
1482 re_hashval_t hash;
1483 re_dfastate_t *new_state;
1484 struct re_state_table_entry *spot;
1485 Idx i;
1486 #ifdef lint
1487 /* Suppress bogus uninitialized-variable warnings. */
1488 *err = REG_NOERROR;
1489 #endif
1490 if (BE (nodes->nelem == 0, 0))
1492 *err = REG_NOERROR;
1493 return NULL;
1495 hash = calc_state_hash (nodes, 0);
1496 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1498 for (i = 0 ; i < spot->num ; i++)
1500 re_dfastate_t *state = spot->array[i];
1501 if (hash != state->hash)
1502 continue;
1503 if (re_node_set_compare (&state->nodes, nodes))
1504 return state;
1507 /* There are no appropriate state in the dfa, create the new one. */
1508 new_state = create_ci_newstate (dfa, nodes, hash);
1509 if (BE (new_state == NULL, 0))
1510 *err = REG_ESPACE;
1512 return new_state;
1515 /* Search for the state whose node_set is equivalent to NODES and
1516 whose context is equivalent to CONTEXT.
1517 Return the pointer to the state, if we found it in the DFA.
1518 Otherwise create the new one and return it. In case of an error
1519 return NULL and set the error code in ERR.
1520 Note: - We assume NULL as the invalid state, then it is possible that
1521 return value is NULL and ERR is REG_NOERROR.
1522 - We never return non-NULL value in case of any errors, it is for
1523 optimization. */
1525 static re_dfastate_t *
1526 internal_function __attribute_warn_unused_result__
1527 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1528 const re_node_set *nodes, unsigned int context)
1530 re_hashval_t hash;
1531 re_dfastate_t *new_state;
1532 struct re_state_table_entry *spot;
1533 Idx i;
1534 #ifdef lint
1535 /* Suppress bogus uninitialized-variable warnings. */
1536 *err = REG_NOERROR;
1537 #endif
1538 if (nodes->nelem == 0)
1540 *err = REG_NOERROR;
1541 return NULL;
1543 hash = calc_state_hash (nodes, context);
1544 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1546 for (i = 0 ; i < spot->num ; i++)
1548 re_dfastate_t *state = spot->array[i];
1549 if (state->hash == hash
1550 && state->context == context
1551 && re_node_set_compare (state->entrance_nodes, nodes))
1552 return state;
1554 /* There are no appropriate state in `dfa', create the new one. */
1555 new_state = create_cd_newstate (dfa, nodes, context, hash);
1556 if (BE (new_state == NULL, 0))
1557 *err = REG_ESPACE;
1559 return new_state;
1562 /* Finish initialization of the new state NEWSTATE, and using its hash value
1563 HASH put in the appropriate bucket of DFA's state table. Return value
1564 indicates the error code if failed. */
1566 static reg_errcode_t
1567 __attribute_warn_unused_result__
1568 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1569 re_hashval_t hash)
1571 struct re_state_table_entry *spot;
1572 reg_errcode_t err;
1573 Idx i;
1575 newstate->hash = hash;
1576 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1577 if (BE (err != REG_NOERROR, 0))
1578 return REG_ESPACE;
1579 for (i = 0; i < newstate->nodes.nelem; i++)
1581 Idx elem = newstate->nodes.elems[i];
1582 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1583 if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0))
1584 return REG_ESPACE;
1587 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1588 if (BE (spot->alloc <= spot->num, 0))
1590 Idx new_alloc = 2 * spot->num + 2;
1591 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1592 new_alloc);
1593 if (BE (new_array == NULL, 0))
1594 return REG_ESPACE;
1595 spot->array = new_array;
1596 spot->alloc = new_alloc;
1598 spot->array[spot->num++] = newstate;
1599 return REG_NOERROR;
1602 static void
1603 free_state (re_dfastate_t *state)
1605 re_node_set_free (&state->non_eps_nodes);
1606 re_node_set_free (&state->inveclosure);
1607 if (state->entrance_nodes != &state->nodes)
1609 re_node_set_free (state->entrance_nodes);
1610 re_free (state->entrance_nodes);
1612 re_node_set_free (&state->nodes);
1613 re_free (state->word_trtable);
1614 re_free (state->trtable);
1615 re_free (state);
1618 /* Create the new state which is independ of contexts.
1619 Return the new state if succeeded, otherwise return NULL. */
1621 static re_dfastate_t *
1622 internal_function __attribute_warn_unused_result__
1623 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1624 re_hashval_t hash)
1626 Idx i;
1627 reg_errcode_t err;
1628 re_dfastate_t *newstate;
1630 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1631 if (BE (newstate == NULL, 0))
1632 return NULL;
1633 err = re_node_set_init_copy (&newstate->nodes, nodes);
1634 if (BE (err != REG_NOERROR, 0))
1636 re_free (newstate);
1637 return NULL;
1640 newstate->entrance_nodes = &newstate->nodes;
1641 for (i = 0 ; i < nodes->nelem ; i++)
1643 re_token_t *node = dfa->nodes + nodes->elems[i];
1644 re_token_type_t type = node->type;
1645 if (type == CHARACTER && !node->constraint)
1646 continue;
1647 #ifdef RE_ENABLE_I18N
1648 newstate->accept_mb |= node->accept_mb;
1649 #endif /* RE_ENABLE_I18N */
1651 /* If the state has the halt node, the state is a halt state. */
1652 if (type == END_OF_RE)
1653 newstate->halt = 1;
1654 else if (type == OP_BACK_REF)
1655 newstate->has_backref = 1;
1656 else if (type == ANCHOR || node->constraint)
1657 newstate->has_constraint = 1;
1659 err = register_state (dfa, newstate, hash);
1660 if (BE (err != REG_NOERROR, 0))
1662 free_state (newstate);
1663 newstate = NULL;
1665 return newstate;
1668 /* Create the new state which is depend on the context CONTEXT.
1669 Return the new state if succeeded, otherwise return NULL. */
1671 static re_dfastate_t *
1672 internal_function __attribute_warn_unused_result__
1673 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1674 unsigned int context, re_hashval_t hash)
1676 Idx i, nctx_nodes = 0;
1677 reg_errcode_t err;
1678 re_dfastate_t *newstate;
1680 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1681 if (BE (newstate == NULL, 0))
1682 return NULL;
1683 err = re_node_set_init_copy (&newstate->nodes, nodes);
1684 if (BE (err != REG_NOERROR, 0))
1686 re_free (newstate);
1687 return NULL;
1690 newstate->context = context;
1691 newstate->entrance_nodes = &newstate->nodes;
1693 for (i = 0 ; i < nodes->nelem ; i++)
1695 re_token_t *node = dfa->nodes + nodes->elems[i];
1696 re_token_type_t type = node->type;
1697 unsigned int constraint = node->constraint;
1699 if (type == CHARACTER && !constraint)
1700 continue;
1701 #ifdef RE_ENABLE_I18N
1702 newstate->accept_mb |= node->accept_mb;
1703 #endif /* RE_ENABLE_I18N */
1705 /* If the state has the halt node, the state is a halt state. */
1706 if (type == END_OF_RE)
1707 newstate->halt = 1;
1708 else if (type == OP_BACK_REF)
1709 newstate->has_backref = 1;
1711 if (constraint)
1713 if (newstate->entrance_nodes == &newstate->nodes)
1715 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1716 if (BE (newstate->entrance_nodes == NULL, 0))
1718 free_state (newstate);
1719 return NULL;
1721 if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1722 != REG_NOERROR)
1723 return NULL;
1724 nctx_nodes = 0;
1725 newstate->has_constraint = 1;
1728 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1730 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1731 ++nctx_nodes;
1735 err = register_state (dfa, newstate, hash);
1736 if (BE (err != REG_NOERROR, 0))
1738 free_state (newstate);
1739 newstate = NULL;
1741 return newstate;