* sysdeps/x86_64/fpu/libm-test-ulps: Adjust expected atan2f results.
[glibc.git] / posix / regex_internal.c
blobbaa58443ac45ea2eb6924379570e68ba80bfd83b
1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 static void re_string_construct_common (const char *str, int len,
22 re_string_t *pstr,
23 RE_TRANSLATE_TYPE trans, int icase,
24 const re_dfa_t *dfa) internal_function;
25 #ifdef RE_ENABLE_I18N
26 static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
27 wint_t *last_wc) internal_function;
28 #endif /* RE_ENABLE_I18N */
29 static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
30 unsigned int hash) internal_function;
31 static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
32 const re_node_set *nodes,
33 unsigned int hash) internal_function;
34 static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
35 const re_node_set *nodes,
36 unsigned int context,
37 unsigned int hash) internal_function;
38 static unsigned int inline calc_state_hash (const re_node_set *nodes,
39 unsigned int context) internal_function;
41 /* Functions for string operation. */
43 /* This function allocate the buffers. It is necessary to call
44 re_string_reconstruct before using the object. */
46 static reg_errcode_t
47 re_string_allocate (pstr, str, len, init_len, trans, icase, dfa)
48 re_string_t *pstr;
49 const char *str;
50 int len, init_len, icase;
51 RE_TRANSLATE_TYPE trans;
52 const re_dfa_t *dfa;
54 reg_errcode_t ret;
55 int init_buf_len;
57 /* Ensure at least one character fits into the buffers. */
58 if (init_len < dfa->mb_cur_max)
59 init_len = dfa->mb_cur_max;
60 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
61 re_string_construct_common (str, len, pstr, trans, icase, dfa);
63 ret = re_string_realloc_buffers (pstr, init_buf_len);
64 if (BE (ret != REG_NOERROR, 0))
65 return ret;
67 pstr->word_char = dfa->word_char;
68 pstr->word_ops_used = dfa->word_ops_used;
69 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
70 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
71 pstr->valid_raw_len = pstr->valid_len;
72 return REG_NOERROR;
75 /* This function allocate the buffers, and initialize them. */
77 static reg_errcode_t
78 re_string_construct (pstr, str, len, trans, icase, dfa)
79 re_string_t *pstr;
80 const char *str;
81 int len, icase;
82 RE_TRANSLATE_TYPE trans;
83 const re_dfa_t *dfa;
85 reg_errcode_t ret;
86 memset (pstr, '\0', sizeof (re_string_t));
87 re_string_construct_common (str, len, pstr, trans, icase, dfa);
89 if (len > 0)
91 ret = re_string_realloc_buffers (pstr, len + 1);
92 if (BE (ret != REG_NOERROR, 0))
93 return ret;
95 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
97 if (icase)
99 #ifdef RE_ENABLE_I18N
100 if (dfa->mb_cur_max > 1)
102 while (1)
104 ret = build_wcs_upper_buffer (pstr);
105 if (BE (ret != REG_NOERROR, 0))
106 return ret;
107 if (pstr->valid_raw_len >= len)
108 break;
109 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
110 break;
111 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
112 if (BE (ret != REG_NOERROR, 0))
113 return ret;
116 else
117 #endif /* RE_ENABLE_I18N */
118 build_upper_buffer (pstr);
120 else
122 #ifdef RE_ENABLE_I18N
123 if (dfa->mb_cur_max > 1)
124 build_wcs_buffer (pstr);
125 else
126 #endif /* RE_ENABLE_I18N */
128 if (trans != NULL)
129 re_string_translate_buffer (pstr);
130 else
132 pstr->valid_len = pstr->bufs_len;
133 pstr->valid_raw_len = pstr->bufs_len;
138 return REG_NOERROR;
141 /* Helper functions for re_string_allocate, and re_string_construct. */
143 static reg_errcode_t
144 re_string_realloc_buffers (pstr, new_buf_len)
145 re_string_t *pstr;
146 int new_buf_len;
148 #ifdef RE_ENABLE_I18N
149 if (pstr->mb_cur_max > 1)
151 wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len);
152 if (BE (new_array == NULL, 0))
153 return REG_ESPACE;
154 pstr->wcs = new_array;
155 if (pstr->offsets != NULL)
157 int *new_array = re_realloc (pstr->offsets, int, new_buf_len);
158 if (BE (new_array == NULL, 0))
159 return REG_ESPACE;
160 pstr->offsets = new_array;
163 #endif /* RE_ENABLE_I18N */
164 if (pstr->mbs_allocated)
166 unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
167 new_buf_len);
168 if (BE (new_array == NULL, 0))
169 return REG_ESPACE;
170 pstr->mbs = new_array;
172 pstr->bufs_len = new_buf_len;
173 return REG_NOERROR;
177 static void
178 re_string_construct_common (str, len, pstr, trans, icase, dfa)
179 const char *str;
180 int len;
181 re_string_t *pstr;
182 RE_TRANSLATE_TYPE trans;
183 int icase;
184 const re_dfa_t *dfa;
186 pstr->raw_mbs = (const unsigned char *) str;
187 pstr->len = len;
188 pstr->raw_len = len;
189 pstr->trans = (unsigned RE_TRANSLATE_TYPE) trans;
190 pstr->icase = icase ? 1 : 0;
191 pstr->mbs_allocated = (trans != NULL || icase);
192 pstr->mb_cur_max = dfa->mb_cur_max;
193 pstr->is_utf8 = dfa->is_utf8;
194 pstr->map_notascii = dfa->map_notascii;
195 pstr->stop = pstr->len;
196 pstr->raw_stop = pstr->stop;
199 #ifdef RE_ENABLE_I18N
201 /* Build wide character buffer PSTR->WCS.
202 If the byte sequence of the string are:
203 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
204 Then wide character buffer will be:
205 <wc1> , WEOF , <wc2> , WEOF , <wc3>
206 We use WEOF for padding, they indicate that the position isn't
207 a first byte of a multibyte character.
209 Note that this function assumes PSTR->VALID_LEN elements are already
210 built and starts from PSTR->VALID_LEN. */
212 static void
213 build_wcs_buffer (pstr)
214 re_string_t *pstr;
216 #ifdef _LIBC
217 unsigned char buf[MB_LEN_MAX];
218 assert (MB_LEN_MAX >= pstr->mb_cur_max);
219 #else
220 unsigned char buf[64];
221 #endif
222 mbstate_t prev_st;
223 int byte_idx, end_idx, remain_len;
224 size_t mbclen;
226 /* Build the buffers from pstr->valid_len to either pstr->len or
227 pstr->bufs_len. */
228 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
229 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
231 wchar_t wc;
232 const char *p;
234 remain_len = end_idx - byte_idx;
235 prev_st = pstr->cur_state;
236 /* Apply the translation if we need. */
237 if (BE (pstr->trans != NULL, 0))
239 int i, ch;
241 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
243 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
244 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
246 p = (const char *) buf;
248 else
249 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
250 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
251 if (BE (mbclen == (size_t) -2, 0))
253 /* The buffer doesn't have enough space, finish to build. */
254 pstr->cur_state = prev_st;
255 break;
257 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
259 /* We treat these cases as a singlebyte character. */
260 mbclen = 1;
261 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
262 if (BE (pstr->trans != NULL, 0))
263 wc = pstr->trans[wc];
264 pstr->cur_state = prev_st;
267 /* Write wide character and padding. */
268 pstr->wcs[byte_idx++] = wc;
269 /* Write paddings. */
270 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
271 pstr->wcs[byte_idx++] = WEOF;
273 pstr->valid_len = byte_idx;
274 pstr->valid_raw_len = byte_idx;
277 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
278 but for REG_ICASE. */
280 static int
281 build_wcs_upper_buffer (pstr)
282 re_string_t *pstr;
284 mbstate_t prev_st;
285 int src_idx, byte_idx, end_idx, remain_len;
286 size_t mbclen;
287 #ifdef _LIBC
288 char buf[MB_LEN_MAX];
289 assert (MB_LEN_MAX >= pstr->mb_cur_max);
290 #else
291 char buf[64];
292 #endif
294 byte_idx = pstr->valid_len;
295 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
297 /* The following optimization assumes that ASCII characters can be
298 mapped to wide characters with a simple cast. */
299 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
301 while (byte_idx < end_idx)
303 wchar_t wc;
305 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
306 && mbsinit (&pstr->cur_state))
308 /* In case of a singlebyte character. */
309 pstr->mbs[byte_idx]
310 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
311 /* The next step uses the assumption that wchar_t is encoded
312 ASCII-safe: all ASCII values can be converted like this. */
313 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
314 ++byte_idx;
315 continue;
318 remain_len = end_idx - byte_idx;
319 prev_st = pstr->cur_state;
320 mbclen = mbrtowc (&wc,
321 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
322 + byte_idx), remain_len, &pstr->cur_state);
323 if (BE (mbclen + 2 > 2, 1))
325 wchar_t wcu = wc;
326 if (iswlower (wc))
328 size_t mbcdlen;
330 wcu = towupper (wc);
331 mbcdlen = wcrtomb (buf, wcu, &prev_st);
332 if (BE (mbclen == mbcdlen, 1))
333 memcpy (pstr->mbs + byte_idx, buf, mbclen);
334 else
336 src_idx = byte_idx;
337 goto offsets_needed;
340 else
341 memcpy (pstr->mbs + byte_idx,
342 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
343 pstr->wcs[byte_idx++] = wcu;
344 /* Write paddings. */
345 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
346 pstr->wcs[byte_idx++] = WEOF;
348 else if (mbclen == (size_t) -1 || mbclen == 0)
350 /* It is an invalid character or '\0'. Just use the byte. */
351 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
352 pstr->mbs[byte_idx] = ch;
353 /* And also cast it to wide char. */
354 pstr->wcs[byte_idx++] = (wchar_t) ch;
355 if (BE (mbclen == (size_t) -1, 0))
356 pstr->cur_state = prev_st;
358 else
360 /* The buffer doesn't have enough space, finish to build. */
361 pstr->cur_state = prev_st;
362 break;
365 pstr->valid_len = byte_idx;
366 pstr->valid_raw_len = byte_idx;
367 return REG_NOERROR;
369 else
370 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
372 wchar_t wc;
373 const char *p;
374 offsets_needed:
375 remain_len = end_idx - byte_idx;
376 prev_st = pstr->cur_state;
377 if (BE (pstr->trans != NULL, 0))
379 int i, ch;
381 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
383 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
384 buf[i] = pstr->trans[ch];
386 p = (const char *) buf;
388 else
389 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
390 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
391 if (BE (mbclen + 2 > 2, 1))
393 wchar_t wcu = wc;
394 if (iswlower (wc))
396 size_t mbcdlen;
398 wcu = towupper (wc);
399 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
400 if (BE (mbclen == mbcdlen, 1))
401 memcpy (pstr->mbs + byte_idx, buf, mbclen);
402 else if (mbcdlen != (size_t) -1)
404 size_t i;
406 if (byte_idx + mbcdlen > pstr->bufs_len)
408 pstr->cur_state = prev_st;
409 break;
412 if (pstr->offsets == NULL)
414 pstr->offsets = re_malloc (int, pstr->bufs_len);
416 if (pstr->offsets == NULL)
417 return REG_ESPACE;
419 if (!pstr->offsets_needed)
421 for (i = 0; i < (size_t) byte_idx; ++i)
422 pstr->offsets[i] = i;
423 pstr->offsets_needed = 1;
426 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
427 pstr->wcs[byte_idx] = wcu;
428 pstr->offsets[byte_idx] = src_idx;
429 for (i = 1; i < mbcdlen; ++i)
431 pstr->offsets[byte_idx + i]
432 = src_idx + (i < mbclen ? i : mbclen - 1);
433 pstr->wcs[byte_idx + i] = WEOF;
435 pstr->len += mbcdlen - mbclen;
436 if (pstr->raw_stop > src_idx)
437 pstr->stop += mbcdlen - mbclen;
438 end_idx = (pstr->bufs_len > pstr->len)
439 ? pstr->len : pstr->bufs_len;
440 byte_idx += mbcdlen;
441 src_idx += mbclen;
442 continue;
444 else
445 memcpy (pstr->mbs + byte_idx, p, mbclen);
447 else
448 memcpy (pstr->mbs + byte_idx, p, mbclen);
450 if (BE (pstr->offsets_needed != 0, 0))
452 size_t i;
453 for (i = 0; i < mbclen; ++i)
454 pstr->offsets[byte_idx + i] = src_idx + i;
456 src_idx += mbclen;
458 pstr->wcs[byte_idx++] = wcu;
459 /* Write paddings. */
460 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
461 pstr->wcs[byte_idx++] = WEOF;
463 else if (mbclen == (size_t) -1 || mbclen == 0)
465 /* It is an invalid character or '\0'. Just use the byte. */
466 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
468 if (BE (pstr->trans != NULL, 0))
469 ch = pstr->trans [ch];
470 pstr->mbs[byte_idx] = ch;
472 if (BE (pstr->offsets_needed != 0, 0))
473 pstr->offsets[byte_idx] = src_idx;
474 ++src_idx;
476 /* And also cast it to wide char. */
477 pstr->wcs[byte_idx++] = (wchar_t) ch;
478 if (BE (mbclen == (size_t) -1, 0))
479 pstr->cur_state = prev_st;
481 else
483 /* The buffer doesn't have enough space, finish to build. */
484 pstr->cur_state = prev_st;
485 break;
488 pstr->valid_len = byte_idx;
489 pstr->valid_raw_len = src_idx;
490 return REG_NOERROR;
493 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
494 Return the index. */
496 static int
497 re_string_skip_chars (pstr, new_raw_idx, last_wc)
498 re_string_t *pstr;
499 int new_raw_idx;
500 wint_t *last_wc;
502 mbstate_t prev_st;
503 int rawbuf_idx;
504 size_t mbclen;
505 wchar_t wc = 0;
507 /* Skip the characters which are not necessary to check. */
508 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
509 rawbuf_idx < new_raw_idx;)
511 int remain_len;
512 remain_len = pstr->len - rawbuf_idx;
513 prev_st = pstr->cur_state;
514 mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
515 remain_len, &pstr->cur_state);
516 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
518 /* We treat these cases as a singlebyte character. */
519 mbclen = 1;
520 pstr->cur_state = prev_st;
522 /* Then proceed the next character. */
523 rawbuf_idx += mbclen;
525 *last_wc = (wint_t) wc;
526 return rawbuf_idx;
528 #endif /* RE_ENABLE_I18N */
530 /* Build the buffer PSTR->MBS, and apply the translation if we need.
531 This function is used in case of REG_ICASE. */
533 static void
534 build_upper_buffer (pstr)
535 re_string_t *pstr;
537 int char_idx, end_idx;
538 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
540 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
542 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
543 if (BE (pstr->trans != NULL, 0))
544 ch = pstr->trans[ch];
545 if (islower (ch))
546 pstr->mbs[char_idx] = toupper (ch);
547 else
548 pstr->mbs[char_idx] = ch;
550 pstr->valid_len = char_idx;
551 pstr->valid_raw_len = char_idx;
554 /* Apply TRANS to the buffer in PSTR. */
556 static void
557 re_string_translate_buffer (pstr)
558 re_string_t *pstr;
560 int buf_idx, end_idx;
561 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
563 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
565 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
566 pstr->mbs[buf_idx] = pstr->trans[ch];
569 pstr->valid_len = buf_idx;
570 pstr->valid_raw_len = buf_idx;
573 /* This function re-construct the buffers.
574 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
575 convert to upper case in case of REG_ICASE, apply translation. */
577 static reg_errcode_t
578 re_string_reconstruct (pstr, idx, eflags)
579 re_string_t *pstr;
580 int idx, eflags;
582 int offset = idx - pstr->raw_mbs_idx;
583 if (BE (offset < 0, 0))
585 /* Reset buffer. */
586 #ifdef RE_ENABLE_I18N
587 if (pstr->mb_cur_max > 1)
588 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
589 #endif /* RE_ENABLE_I18N */
590 pstr->len = pstr->raw_len;
591 pstr->stop = pstr->raw_stop;
592 pstr->valid_len = 0;
593 pstr->raw_mbs_idx = 0;
594 pstr->valid_raw_len = 0;
595 pstr->offsets_needed = 0;
596 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
597 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
598 if (!pstr->mbs_allocated)
599 pstr->mbs = (unsigned char *) pstr->raw_mbs;
600 offset = idx;
603 if (BE (offset != 0, 1))
605 /* Are the characters which are already checked remain? */
606 if (BE (offset < pstr->valid_raw_len, 1)
607 #ifdef RE_ENABLE_I18N
608 /* Handling this would enlarge the code too much.
609 Accept a slowdown in that case. */
610 && pstr->offsets_needed == 0
611 #endif
614 /* Yes, move them to the front of the buffer. */
615 pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags);
616 #ifdef RE_ENABLE_I18N
617 if (pstr->mb_cur_max > 1)
618 memmove (pstr->wcs, pstr->wcs + offset,
619 (pstr->valid_len - offset) * sizeof (wint_t));
620 #endif /* RE_ENABLE_I18N */
621 if (BE (pstr->mbs_allocated, 0))
622 memmove (pstr->mbs, pstr->mbs + offset,
623 pstr->valid_len - offset);
624 pstr->valid_len -= offset;
625 pstr->valid_raw_len -= offset;
626 #if DEBUG
627 assert (pstr->valid_len > 0);
628 #endif
630 else
632 /* No, skip all characters until IDX. */
633 #ifdef RE_ENABLE_I18N
634 if (BE (pstr->offsets_needed, 0))
636 pstr->len = pstr->raw_len - idx + offset;
637 pstr->stop = pstr->raw_stop - idx + offset;
638 pstr->offsets_needed = 0;
640 #endif
641 pstr->valid_len = 0;
642 pstr->valid_raw_len = 0;
643 #ifdef RE_ENABLE_I18N
644 if (pstr->mb_cur_max > 1)
646 int wcs_idx;
647 wint_t wc = WEOF;
649 if (pstr->is_utf8)
651 const unsigned char *raw, *p, *q, *end;
653 /* Special case UTF-8. Multi-byte chars start with any
654 byte other than 0x80 - 0xbf. */
655 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
656 end = raw + (offset - pstr->mb_cur_max);
657 for (p = raw + offset - 1; p >= end; --p)
658 if ((*p & 0xc0) != 0x80)
660 mbstate_t cur_state;
661 wchar_t wc2;
662 int mlen = raw + pstr->len - p;
663 unsigned char buf[6];
665 q = p;
666 if (BE (pstr->trans != NULL, 0))
668 int i = mlen < 6 ? mlen : 6;
669 while (--i >= 0)
670 buf[i] = pstr->trans[p[i]];
671 q = buf;
673 /* XXX Don't use mbrtowc, we know which conversion
674 to use (UTF-8 -> UCS4). */
675 memset (&cur_state, 0, sizeof (cur_state));
676 mlen = (mbrtowc (&wc2, (const char *) p, mlen,
677 &cur_state)
678 - (raw + offset - p));
679 if (mlen >= 0)
681 memset (&pstr->cur_state, '\0',
682 sizeof (mbstate_t));
683 pstr->valid_len = mlen;
684 wc = wc2;
686 break;
690 if (wc == WEOF)
691 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
692 if (BE (pstr->valid_len, 0))
694 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
695 pstr->wcs[wcs_idx] = WEOF;
696 if (pstr->mbs_allocated)
697 memset (pstr->mbs, 255, pstr->valid_len);
699 pstr->valid_raw_len = pstr->valid_len;
700 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
701 && IS_WIDE_WORD_CHAR (wc))
702 ? CONTEXT_WORD
703 : ((IS_WIDE_NEWLINE (wc)
704 && pstr->newline_anchor)
705 ? CONTEXT_NEWLINE : 0));
707 else
708 #endif /* RE_ENABLE_I18N */
710 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
711 if (pstr->trans)
712 c = pstr->trans[c];
713 pstr->tip_context = (bitset_contain (pstr->word_char, c)
714 ? CONTEXT_WORD
715 : ((IS_NEWLINE (c) && pstr->newline_anchor)
716 ? CONTEXT_NEWLINE : 0));
719 if (!BE (pstr->mbs_allocated, 0))
720 pstr->mbs += offset;
722 pstr->raw_mbs_idx = idx;
723 pstr->len -= offset;
724 pstr->stop -= offset;
726 /* Then build the buffers. */
727 #ifdef RE_ENABLE_I18N
728 if (pstr->mb_cur_max > 1)
730 if (pstr->icase)
732 int ret = build_wcs_upper_buffer (pstr);
733 if (BE (ret != REG_NOERROR, 0))
734 return ret;
736 else
737 build_wcs_buffer (pstr);
739 else
740 #endif /* RE_ENABLE_I18N */
741 if (BE (pstr->mbs_allocated, 0))
743 if (pstr->icase)
744 build_upper_buffer (pstr);
745 else if (pstr->trans != NULL)
746 re_string_translate_buffer (pstr);
748 else
749 pstr->valid_len = pstr->len;
751 pstr->cur_idx = 0;
752 return REG_NOERROR;
755 static unsigned char
756 re_string_peek_byte_case (pstr, idx)
757 const re_string_t *pstr;
758 int idx;
760 int ch, off;
762 /* Handle the common (easiest) cases first. */
763 if (BE (!pstr->mbs_allocated, 1))
764 return re_string_peek_byte (pstr, idx);
766 #ifdef RE_ENABLE_I18N
767 if (pstr->mb_cur_max > 1
768 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
769 return re_string_peek_byte (pstr, idx);
770 #endif
772 off = pstr->cur_idx + idx;
773 #ifdef RE_ENABLE_I18N
774 if (pstr->offsets_needed)
775 off = pstr->offsets[off];
776 #endif
778 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
780 #ifdef RE_ENABLE_I18N
781 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
782 this function returns CAPITAL LETTER I instead of first byte of
783 DOTLESS SMALL LETTER I. The latter would confuse the parser,
784 since peek_byte_case doesn't advance cur_idx in any way. */
785 if (pstr->offsets_needed && !isascii (ch))
786 return re_string_peek_byte (pstr, idx);
787 #endif
789 return ch;
792 static unsigned char
793 re_string_fetch_byte_case (pstr)
794 re_string_t *pstr;
796 if (BE (!pstr->mbs_allocated, 1))
797 return re_string_fetch_byte (pstr);
799 #ifdef RE_ENABLE_I18N
800 if (pstr->offsets_needed)
802 int off, ch;
804 /* For tr_TR.UTF-8 [[:islower:]] there is
805 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
806 in that case the whole multi-byte character and return
807 the original letter. On the other side, with
808 [[: DOTLESS SMALL LETTER I return [[:I, as doing
809 anything else would complicate things too much. */
811 if (!re_string_first_byte (pstr, pstr->cur_idx))
812 return re_string_fetch_byte (pstr);
814 off = pstr->offsets[pstr->cur_idx];
815 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
817 if (! isascii (ch))
818 return re_string_fetch_byte (pstr);
820 re_string_skip_bytes (pstr,
821 re_string_char_size_at (pstr, pstr->cur_idx));
822 return ch;
824 #endif
826 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
829 static void
830 re_string_destruct (pstr)
831 re_string_t *pstr;
833 #ifdef RE_ENABLE_I18N
834 re_free (pstr->wcs);
835 re_free (pstr->offsets);
836 #endif /* RE_ENABLE_I18N */
837 if (pstr->mbs_allocated)
838 re_free (pstr->mbs);
841 /* Return the context at IDX in INPUT. */
843 static unsigned int
844 re_string_context_at (input, idx, eflags)
845 const re_string_t *input;
846 int idx, eflags;
848 int c;
849 if (BE (idx < 0, 0))
850 /* In this case, we use the value stored in input->tip_context,
851 since we can't know the character in input->mbs[-1] here. */
852 return input->tip_context;
853 if (BE (idx == input->len, 0))
854 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
855 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
856 #ifdef RE_ENABLE_I18N
857 if (input->mb_cur_max > 1)
859 wint_t wc;
860 int wc_idx = idx;
861 while(input->wcs[wc_idx] == WEOF)
863 #ifdef DEBUG
864 /* It must not happen. */
865 assert (wc_idx >= 0);
866 #endif
867 --wc_idx;
868 if (wc_idx < 0)
869 return input->tip_context;
871 wc = input->wcs[wc_idx];
872 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
873 return CONTEXT_WORD;
874 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
875 ? CONTEXT_NEWLINE : 0);
877 else
878 #endif
880 c = re_string_byte_at (input, idx);
881 if (bitset_contain (input->word_char, c))
882 return CONTEXT_WORD;
883 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
887 /* Functions for set operation. */
889 static reg_errcode_t
890 re_node_set_alloc (set, size)
891 re_node_set *set;
892 int size;
894 set->alloc = size;
895 set->nelem = 0;
896 set->elems = re_malloc (int, size);
897 if (BE (set->elems == NULL, 0))
898 return REG_ESPACE;
899 return REG_NOERROR;
902 static reg_errcode_t
903 re_node_set_init_1 (set, elem)
904 re_node_set *set;
905 int elem;
907 set->alloc = 1;
908 set->nelem = 1;
909 set->elems = re_malloc (int, 1);
910 if (BE (set->elems == NULL, 0))
912 set->alloc = set->nelem = 0;
913 return REG_ESPACE;
915 set->elems[0] = elem;
916 return REG_NOERROR;
919 static reg_errcode_t
920 re_node_set_init_2 (set, elem1, elem2)
921 re_node_set *set;
922 int elem1, elem2;
924 set->alloc = 2;
925 set->elems = re_malloc (int, 2);
926 if (BE (set->elems == NULL, 0))
927 return REG_ESPACE;
928 if (elem1 == elem2)
930 set->nelem = 1;
931 set->elems[0] = elem1;
933 else
935 set->nelem = 2;
936 if (elem1 < elem2)
938 set->elems[0] = elem1;
939 set->elems[1] = elem2;
941 else
943 set->elems[0] = elem2;
944 set->elems[1] = elem1;
947 return REG_NOERROR;
950 static reg_errcode_t
951 re_node_set_init_copy (dest, src)
952 re_node_set *dest;
953 const re_node_set *src;
955 dest->nelem = src->nelem;
956 if (src->nelem > 0)
958 dest->alloc = dest->nelem;
959 dest->elems = re_malloc (int, dest->alloc);
960 if (BE (dest->elems == NULL, 0))
962 dest->alloc = dest->nelem = 0;
963 return REG_ESPACE;
965 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
967 else
968 re_node_set_init_empty (dest);
969 return REG_NOERROR;
972 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
973 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
974 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
976 static reg_errcode_t
977 re_node_set_add_intersect (dest, src1, src2)
978 re_node_set *dest;
979 const re_node_set *src1, *src2;
981 int i1, i2, is, id, delta, sbase;
982 if (src1->nelem == 0 || src2->nelem == 0)
983 return REG_NOERROR;
985 /* We need dest->nelem + 2 * elems_in_intersection; this is a
986 conservative estimate. */
987 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
989 int new_alloc = src1->nelem + src2->nelem + dest->alloc;
990 int *new_elems = re_realloc (dest->elems, int, new_alloc);
991 if (BE (new_elems == NULL, 0))
992 return REG_ESPACE;
993 dest->elems = new_elems;
994 dest->alloc = new_alloc;
997 /* Find the items in the intersection of SRC1 and SRC2, and copy
998 into the top of DEST those that are not already in DEST itself. */
999 sbase = dest->nelem + src1->nelem + src2->nelem;
1000 i1 = src1->nelem - 1;
1001 i2 = src2->nelem - 1;
1002 id = dest->nelem - 1;
1003 for (;;)
1005 if (src1->elems[i1] == src2->elems[i2])
1007 /* Try to find the item in DEST. Maybe we could binary search? */
1008 while (id >= 0 && dest->elems[id] > src1->elems[i1])
1009 --id;
1011 if (id < 0 || dest->elems[id] != src1->elems[i1])
1012 dest->elems[--sbase] = src1->elems[i1];
1014 if (--i1 < 0 || --i2 < 0)
1015 break;
1018 /* Lower the highest of the two items. */
1019 else if (src1->elems[i1] < src2->elems[i2])
1021 if (--i2 < 0)
1022 break;
1024 else
1026 if (--i1 < 0)
1027 break;
1031 id = dest->nelem - 1;
1032 is = dest->nelem + src1->nelem + src2->nelem - 1;
1033 delta = is - sbase + 1;
1035 /* Now copy. When DELTA becomes zero, the remaining
1036 DEST elements are already in place; this is more or
1037 less the same loop that is in re_node_set_merge. */
1038 dest->nelem += delta;
1039 if (delta > 0 && id >= 0)
1040 for (;;)
1042 if (dest->elems[is] > dest->elems[id])
1044 /* Copy from the top. */
1045 dest->elems[id + delta--] = dest->elems[is--];
1046 if (delta == 0)
1047 break;
1049 else
1051 /* Slide from the bottom. */
1052 dest->elems[id + delta] = dest->elems[id];
1053 if (--id < 0)
1054 break;
1058 /* Copy remaining SRC elements. */
1059 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
1061 return REG_NOERROR;
1064 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1065 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1067 static reg_errcode_t
1068 re_node_set_init_union (dest, src1, src2)
1069 re_node_set *dest;
1070 const re_node_set *src1, *src2;
1072 int i1, i2, id;
1073 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1075 dest->alloc = src1->nelem + src2->nelem;
1076 dest->elems = re_malloc (int, dest->alloc);
1077 if (BE (dest->elems == NULL, 0))
1078 return REG_ESPACE;
1080 else
1082 if (src1 != NULL && src1->nelem > 0)
1083 return re_node_set_init_copy (dest, src1);
1084 else if (src2 != NULL && src2->nelem > 0)
1085 return re_node_set_init_copy (dest, src2);
1086 else
1087 re_node_set_init_empty (dest);
1088 return REG_NOERROR;
1090 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1092 if (src1->elems[i1] > src2->elems[i2])
1094 dest->elems[id++] = src2->elems[i2++];
1095 continue;
1097 if (src1->elems[i1] == src2->elems[i2])
1098 ++i2;
1099 dest->elems[id++] = src1->elems[i1++];
1101 if (i1 < src1->nelem)
1103 memcpy (dest->elems + id, src1->elems + i1,
1104 (src1->nelem - i1) * sizeof (int));
1105 id += src1->nelem - i1;
1107 else if (i2 < src2->nelem)
1109 memcpy (dest->elems + id, src2->elems + i2,
1110 (src2->nelem - i2) * sizeof (int));
1111 id += src2->nelem - i2;
1113 dest->nelem = id;
1114 return REG_NOERROR;
1117 /* Calculate the union set of the sets DEST and SRC. And store it to
1118 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1120 static reg_errcode_t
1121 re_node_set_merge (dest, src)
1122 re_node_set *dest;
1123 const re_node_set *src;
1125 int is, id, sbase, delta;
1126 if (src == NULL || src->nelem == 0)
1127 return REG_NOERROR;
1128 if (dest->alloc < 2 * src->nelem + dest->nelem)
1130 int new_alloc = 2 * (src->nelem + dest->alloc);
1131 int *new_buffer = re_realloc (dest->elems, int, new_alloc);
1132 if (BE (new_buffer == NULL, 0))
1133 return REG_ESPACE;
1134 dest->elems = new_buffer;
1135 dest->alloc = new_alloc;
1138 if (BE (dest->nelem == 0, 0))
1140 dest->nelem = src->nelem;
1141 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1142 return REG_NOERROR;
1145 /* Copy into the top of DEST the items of SRC that are not
1146 found in DEST. Maybe we could binary search in DEST? */
1147 for (sbase = dest->nelem + 2 * src->nelem,
1148 is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
1150 if (dest->elems[id] == src->elems[is])
1151 is--, id--;
1152 else if (dest->elems[id] < src->elems[is])
1153 dest->elems[--sbase] = src->elems[is--];
1154 else /* if (dest->elems[id] > src->elems[is]) */
1155 --id;
1158 if (is >= 0)
1160 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1161 sbase -= is + 1;
1162 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
1165 id = dest->nelem - 1;
1166 is = dest->nelem + 2 * src->nelem - 1;
1167 delta = is - sbase + 1;
1168 if (delta == 0)
1169 return REG_NOERROR;
1171 /* Now copy. When DELTA becomes zero, the remaining
1172 DEST elements are already in place. */
1173 dest->nelem += delta;
1174 for (;;)
1176 if (dest->elems[is] > dest->elems[id])
1178 /* Copy from the top. */
1179 dest->elems[id + delta--] = dest->elems[is--];
1180 if (delta == 0)
1181 break;
1183 else
1185 /* Slide from the bottom. */
1186 dest->elems[id + delta] = dest->elems[id];
1187 if (--id < 0)
1189 /* Copy remaining SRC elements. */
1190 memcpy (dest->elems, dest->elems + sbase,
1191 delta * sizeof (int));
1192 break;
1197 return REG_NOERROR;
1200 /* Insert the new element ELEM to the re_node_set* SET.
1201 SET should not already have ELEM.
1202 return -1 if an error is occured, return 1 otherwise. */
1204 static int
1205 re_node_set_insert (set, elem)
1206 re_node_set *set;
1207 int elem;
1209 int idx;
1210 /* In case the set is empty. */
1211 if (set->alloc == 0)
1213 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
1214 return 1;
1215 else
1216 return -1;
1219 if (BE (set->nelem, 0) == 0)
1221 /* We already guaranteed above that set->alloc != 0. */
1222 set->elems[0] = elem;
1223 ++set->nelem;
1224 return 1;
1227 /* Realloc if we need. */
1228 if (set->alloc == set->nelem)
1230 int *new_array;
1231 set->alloc = set->alloc * 2;
1232 new_array = re_realloc (set->elems, int, set->alloc);
1233 if (BE (new_array == NULL, 0))
1234 return -1;
1235 set->elems = new_array;
1238 /* Move the elements which follows the new element. Test the
1239 first element separately to skip a check in the inner loop. */
1240 if (elem < set->elems[0])
1242 idx = 0;
1243 for (idx = set->nelem; idx > 0; idx--)
1244 set->elems[idx] = set->elems[idx - 1];
1246 else
1248 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1249 set->elems[idx] = set->elems[idx - 1];
1252 /* Insert the new element. */
1253 set->elems[idx] = elem;
1254 ++set->nelem;
1255 return 1;
1258 /* Insert the new element ELEM to the re_node_set* SET.
1259 SET should not already have any element greater than or equal to ELEM.
1260 Return -1 if an error is occured, return 1 otherwise. */
1262 static int
1263 re_node_set_insert_last (set, elem)
1264 re_node_set *set;
1265 int elem;
1267 /* Realloc if we need. */
1268 if (set->alloc == set->nelem)
1270 int *new_array;
1271 set->alloc = (set->alloc + 1) * 2;
1272 new_array = re_realloc (set->elems, int, set->alloc);
1273 if (BE (new_array == NULL, 0))
1274 return -1;
1275 set->elems = new_array;
1278 /* Insert the new element. */
1279 set->elems[set->nelem++] = elem;
1280 return 1;
1283 /* Compare two node sets SET1 and SET2.
1284 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
1286 static int
1287 re_node_set_compare (set1, set2)
1288 const re_node_set *set1, *set2;
1290 int i;
1291 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1292 return 0;
1293 for (i = set1->nelem ; --i >= 0 ; )
1294 if (set1->elems[i] != set2->elems[i])
1295 return 0;
1296 return 1;
1299 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1301 static int
1302 re_node_set_contains (set, elem)
1303 const re_node_set *set;
1304 int elem;
1306 unsigned int idx, right, mid;
1307 if (set->nelem <= 0)
1308 return 0;
1310 /* Binary search the element. */
1311 idx = 0;
1312 right = set->nelem - 1;
1313 while (idx < right)
1315 mid = (idx + right) / 2;
1316 if (set->elems[mid] < elem)
1317 idx = mid + 1;
1318 else
1319 right = mid;
1321 return set->elems[idx] == elem ? idx + 1 : 0;
1324 static void
1325 re_node_set_remove_at (set, idx)
1326 re_node_set *set;
1327 int idx;
1329 if (idx < 0 || idx >= set->nelem)
1330 return;
1331 --set->nelem;
1332 for (; idx < set->nelem; idx++)
1333 set->elems[idx] = set->elems[idx + 1];
1337 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1338 Or return -1, if an error will be occured. */
1340 static int
1341 re_dfa_add_node (dfa, token)
1342 re_dfa_t *dfa;
1343 re_token_t token;
1345 int type = token.type;
1346 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1348 int new_nodes_alloc = dfa->nodes_alloc * 2;
1349 int *new_nexts, *new_indices;
1350 re_node_set *new_edests, *new_eclosures;
1352 re_token_t *new_array = re_realloc (dfa->nodes, re_token_t,
1353 new_nodes_alloc);
1354 if (BE (new_array == NULL, 0))
1355 return -1;
1356 dfa->nodes = new_array;
1357 new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
1358 new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
1359 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1360 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1361 if (BE (new_nexts == NULL || new_indices == NULL
1362 || new_edests == NULL || new_eclosures == NULL, 0))
1363 return -1;
1364 dfa->nexts = new_nexts;
1365 dfa->org_indices = new_indices;
1366 dfa->edests = new_edests;
1367 dfa->eclosures = new_eclosures;
1368 dfa->nodes_alloc = new_nodes_alloc;
1370 dfa->nodes[dfa->nodes_len] = token;
1371 dfa->nodes[dfa->nodes_len].constraint = 0;
1372 #ifdef RE_ENABLE_I18N
1373 dfa->nodes[dfa->nodes_len].accept_mb =
1374 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1375 #endif
1376 dfa->nexts[dfa->nodes_len] = -1;
1377 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1378 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1379 return dfa->nodes_len++;
1382 static unsigned int inline
1383 calc_state_hash (nodes, context)
1384 const re_node_set *nodes;
1385 unsigned int context;
1387 unsigned int hash = nodes->nelem + context;
1388 int i;
1389 for (i = 0 ; i < nodes->nelem ; i++)
1390 hash += nodes->elems[i];
1391 return hash;
1394 /* Search for the state whose node_set is equivalent to NODES.
1395 Return the pointer to the state, if we found it in the DFA.
1396 Otherwise create the new one and return it. In case of an error
1397 return NULL and set the error code in ERR.
1398 Note: - We assume NULL as the invalid state, then it is possible that
1399 return value is NULL and ERR is REG_NOERROR.
1400 - We never return non-NULL value in case of any errors, it is for
1401 optimization. */
1403 static re_dfastate_t*
1404 re_acquire_state (err, dfa, nodes)
1405 reg_errcode_t *err;
1406 re_dfa_t *dfa;
1407 const re_node_set *nodes;
1409 unsigned int hash;
1410 re_dfastate_t *new_state;
1411 struct re_state_table_entry *spot;
1412 int i;
1413 if (BE (nodes->nelem == 0, 0))
1415 *err = REG_NOERROR;
1416 return NULL;
1418 hash = calc_state_hash (nodes, 0);
1419 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1421 for (i = 0 ; i < spot->num ; i++)
1423 re_dfastate_t *state = spot->array[i];
1424 if (hash != state->hash)
1425 continue;
1426 if (re_node_set_compare (&state->nodes, nodes))
1427 return state;
1430 /* There are no appropriate state in the dfa, create the new one. */
1431 new_state = create_ci_newstate (dfa, nodes, hash);
1432 if (BE (new_state != NULL, 1))
1433 return new_state;
1434 else
1436 *err = REG_ESPACE;
1437 return NULL;
1441 /* Search for the state whose node_set is equivalent to NODES and
1442 whose context is equivalent to CONTEXT.
1443 Return the pointer to the state, if we found it in the DFA.
1444 Otherwise create the new one and return it. In case of an error
1445 return NULL and set the error code in ERR.
1446 Note: - We assume NULL as the invalid state, then it is possible that
1447 return value is NULL and ERR is REG_NOERROR.
1448 - We never return non-NULL value in case of any errors, it is for
1449 optimization. */
1451 static re_dfastate_t*
1452 re_acquire_state_context (err, dfa, nodes, context)
1453 reg_errcode_t *err;
1454 re_dfa_t *dfa;
1455 const re_node_set *nodes;
1456 unsigned int context;
1458 unsigned int hash;
1459 re_dfastate_t *new_state;
1460 struct re_state_table_entry *spot;
1461 int i;
1462 if (nodes->nelem == 0)
1464 *err = REG_NOERROR;
1465 return NULL;
1467 hash = calc_state_hash (nodes, context);
1468 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1470 for (i = 0 ; i < spot->num ; i++)
1472 re_dfastate_t *state = spot->array[i];
1473 if (state->hash == hash
1474 && state->context == context
1475 && re_node_set_compare (state->entrance_nodes, nodes))
1476 return state;
1478 /* There are no appropriate state in `dfa', create the new one. */
1479 new_state = create_cd_newstate (dfa, nodes, context, hash);
1480 if (BE (new_state != NULL, 1))
1481 return new_state;
1482 else
1484 *err = REG_ESPACE;
1485 return NULL;
1489 /* Finish initialization of the new state NEWSTATE, and using its hash value
1490 HASH put in the appropriate bucket of DFA's state table. Return value
1491 indicates the error code if failed. */
1493 static reg_errcode_t
1494 register_state (dfa, newstate, hash)
1495 re_dfa_t *dfa;
1496 re_dfastate_t *newstate;
1497 unsigned int hash;
1499 struct re_state_table_entry *spot;
1500 reg_errcode_t err;
1501 int i;
1503 newstate->hash = hash;
1504 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1505 if (BE (err != REG_NOERROR, 0))
1506 return REG_ESPACE;
1507 for (i = 0; i < newstate->nodes.nelem; i++)
1509 int elem = newstate->nodes.elems[i];
1510 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1511 re_node_set_insert_last (&newstate->non_eps_nodes, elem);
1514 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1515 if (BE (spot->alloc <= spot->num, 0))
1517 int new_alloc = 2 * spot->num + 2;
1518 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1519 new_alloc);
1520 if (BE (new_array == NULL, 0))
1521 return REG_ESPACE;
1522 spot->array = new_array;
1523 spot->alloc = new_alloc;
1525 spot->array[spot->num++] = newstate;
1526 return REG_NOERROR;
1529 /* Create the new state which is independ of contexts.
1530 Return the new state if succeeded, otherwise return NULL. */
1532 static re_dfastate_t *
1533 create_ci_newstate (dfa, nodes, hash)
1534 re_dfa_t *dfa;
1535 const re_node_set *nodes;
1536 unsigned int hash;
1538 int i;
1539 reg_errcode_t err;
1540 re_dfastate_t *newstate;
1542 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1543 if (BE (newstate == NULL, 0))
1544 return NULL;
1545 err = re_node_set_init_copy (&newstate->nodes, nodes);
1546 if (BE (err != REG_NOERROR, 0))
1548 re_free (newstate);
1549 return NULL;
1552 newstate->entrance_nodes = &newstate->nodes;
1553 for (i = 0 ; i < nodes->nelem ; i++)
1555 re_token_t *node = dfa->nodes + nodes->elems[i];
1556 re_token_type_t type = node->type;
1557 if (type == CHARACTER && !node->constraint)
1558 continue;
1559 #ifdef RE_ENABLE_I18N
1560 newstate->accept_mb |= node->accept_mb;
1561 #endif /* RE_ENABLE_I18N */
1563 /* If the state has the halt node, the state is a halt state. */
1564 if (type == END_OF_RE)
1565 newstate->halt = 1;
1566 else if (type == OP_BACK_REF)
1567 newstate->has_backref = 1;
1568 else if (type == ANCHOR || node->constraint)
1569 newstate->has_constraint = 1;
1571 err = register_state (dfa, newstate, hash);
1572 if (BE (err != REG_NOERROR, 0))
1574 free_state (newstate);
1575 newstate = NULL;
1577 return newstate;
1580 /* Create the new state which is depend on the context CONTEXT.
1581 Return the new state if succeeded, otherwise return NULL. */
1583 static re_dfastate_t *
1584 create_cd_newstate (dfa, nodes, context, hash)
1585 re_dfa_t *dfa;
1586 const re_node_set *nodes;
1587 unsigned int context, hash;
1589 int i, nctx_nodes = 0;
1590 reg_errcode_t err;
1591 re_dfastate_t *newstate;
1593 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1594 if (BE (newstate == NULL, 0))
1595 return NULL;
1596 err = re_node_set_init_copy (&newstate->nodes, nodes);
1597 if (BE (err != REG_NOERROR, 0))
1599 re_free (newstate);
1600 return NULL;
1603 newstate->context = context;
1604 newstate->entrance_nodes = &newstate->nodes;
1606 for (i = 0 ; i < nodes->nelem ; i++)
1608 unsigned int constraint = 0;
1609 re_token_t *node = dfa->nodes + nodes->elems[i];
1610 re_token_type_t type = node->type;
1611 if (node->constraint)
1612 constraint = node->constraint;
1614 if (type == CHARACTER && !constraint)
1615 continue;
1616 #ifdef RE_ENABLE_I18N
1617 newstate->accept_mb |= node->accept_mb;
1618 #endif /* RE_ENABLE_I18N */
1620 /* If the state has the halt node, the state is a halt state. */
1621 if (type == END_OF_RE)
1622 newstate->halt = 1;
1623 else if (type == OP_BACK_REF)
1624 newstate->has_backref = 1;
1625 else if (type == ANCHOR)
1626 constraint = node->opr.ctx_type;
1628 if (constraint)
1630 if (newstate->entrance_nodes == &newstate->nodes)
1632 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1633 if (BE (newstate->entrance_nodes == NULL, 0))
1635 free_state (newstate);
1636 return NULL;
1638 re_node_set_init_copy (newstate->entrance_nodes, nodes);
1639 nctx_nodes = 0;
1640 newstate->has_constraint = 1;
1643 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1645 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1646 ++nctx_nodes;
1650 err = register_state (dfa, newstate, hash);
1651 if (BE (err != REG_NOERROR, 0))
1653 free_state (newstate);
1654 newstate = NULL;
1656 return newstate;
1659 static void
1660 free_state (state)
1661 re_dfastate_t *state;
1663 re_node_set_free (&state->non_eps_nodes);
1664 re_node_set_free (&state->inveclosure);
1665 if (state->entrance_nodes != &state->nodes)
1667 re_node_set_free (state->entrance_nodes);
1668 re_free (state->entrance_nodes);
1670 re_node_set_free (&state->nodes);
1671 re_free (state->word_trtable);
1672 re_free (state->trtable);
1673 re_free (state);