fix gcc warning with -Wmisleading-indentation
[uclibc-ng.git] / libc / misc / regex / regex_internal.c
blobc74c6a0c302e9d28851071d08a644e17cd94d894
1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
20 static void re_string_construct_common (const char *str, int len,
21 re_string_t *pstr,
22 __RE_TRANSLATE_TYPE trans, int icase,
23 const re_dfa_t *dfa) internal_function;
24 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
25 const re_node_set *nodes,
26 unsigned int hash) internal_function;
27 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
28 const re_node_set *nodes,
29 unsigned int context,
30 unsigned int hash) internal_function;
32 /* Functions for string operation. */
34 /* This function allocate the buffers. It is necessary to call
35 re_string_reconstruct before using the object. */
37 static reg_errcode_t
38 internal_function
39 re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
40 __RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
42 reg_errcode_t ret;
43 int init_buf_len;
45 /* Ensure at least one character fits into the buffers. */
46 if (init_len < dfa->mb_cur_max)
47 init_len = dfa->mb_cur_max;
48 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
49 re_string_construct_common (str, len, pstr, trans, icase, dfa);
51 ret = re_string_realloc_buffers (pstr, init_buf_len);
52 if (BE (ret != REG_NOERROR, 0))
53 return ret;
55 pstr->word_char = dfa->word_char;
56 pstr->word_ops_used = dfa->word_ops_used;
57 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
58 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
59 pstr->valid_raw_len = pstr->valid_len;
60 return REG_NOERROR;
63 /* This function allocate the buffers, and initialize them. */
65 static reg_errcode_t
66 internal_function
67 re_string_construct (re_string_t *pstr, const char *str, int len,
68 __RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
70 reg_errcode_t ret;
71 memset (pstr, '\0', sizeof (re_string_t));
72 re_string_construct_common (str, len, pstr, trans, icase, dfa);
74 if (len > 0)
76 ret = re_string_realloc_buffers (pstr, len + 1);
77 if (BE (ret != REG_NOERROR, 0))
78 return ret;
80 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
82 if (icase)
84 #ifdef RE_ENABLE_I18N
85 if (dfa->mb_cur_max > 1)
87 while (1)
89 ret = build_wcs_upper_buffer (pstr);
90 if (BE (ret != REG_NOERROR, 0))
91 return ret;
92 if (pstr->valid_raw_len >= len)
93 break;
94 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
95 break;
96 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
97 if (BE (ret != REG_NOERROR, 0))
98 return ret;
101 else
102 #endif /* RE_ENABLE_I18N */
103 build_upper_buffer (pstr);
105 else
107 #ifdef RE_ENABLE_I18N
108 if (dfa->mb_cur_max > 1)
109 build_wcs_buffer (pstr);
110 else
111 #endif
113 if (trans != NULL)
114 re_string_translate_buffer (pstr);
115 else
117 pstr->valid_len = pstr->bufs_len;
118 pstr->valid_raw_len = pstr->bufs_len;
123 return REG_NOERROR;
126 /* Helper functions for re_string_allocate, and re_string_construct. */
128 static reg_errcode_t
129 internal_function
130 re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
132 #ifdef RE_ENABLE_I18N
133 if (pstr->mb_cur_max > 1)
135 wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
136 if (BE (new_wcs == NULL, 0))
137 return REG_ESPACE;
138 pstr->wcs = new_wcs;
139 if (pstr->offsets != NULL)
141 int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
142 if (BE (new_offsets == NULL, 0))
143 return REG_ESPACE;
144 pstr->offsets = new_offsets;
147 #endif /* RE_ENABLE_I18N */
148 if (pstr->mbs_allocated)
150 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
151 new_buf_len);
152 if (BE (new_mbs == NULL, 0))
153 return REG_ESPACE;
154 pstr->mbs = new_mbs;
156 pstr->bufs_len = new_buf_len;
157 return REG_NOERROR;
161 static void
162 internal_function
163 re_string_construct_common (const char *str, int len, re_string_t *pstr,
164 __RE_TRANSLATE_TYPE trans, int icase,
165 const re_dfa_t *dfa)
167 pstr->raw_mbs = (const unsigned char *) str;
168 pstr->len = len;
169 pstr->raw_len = len;
170 pstr->trans = trans;
171 pstr->icase = icase ? 1 : 0;
172 pstr->mbs_allocated = (trans != NULL || icase);
173 pstr->mb_cur_max = dfa->mb_cur_max;
174 pstr->is_utf8 = dfa->is_utf8;
175 pstr->map_notascii = dfa->map_notascii;
176 pstr->stop = pstr->len;
177 pstr->raw_stop = pstr->stop;
180 #ifdef RE_ENABLE_I18N
182 /* Build wide character buffer PSTR->WCS.
183 If the byte sequence of the string are:
184 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
185 Then wide character buffer will be:
186 <wc1> , WEOF , <wc2> , WEOF , <wc3>
187 We use WEOF for padding, they indicate that the position isn't
188 a first byte of a multibyte character.
190 Note that this function assumes PSTR->VALID_LEN elements are already
191 built and starts from PSTR->VALID_LEN. */
193 static void
194 internal_function
195 build_wcs_buffer (re_string_t *pstr)
197 #if defined __UCLIBC__
198 unsigned char buf[MB_LEN_MAX];
199 assert (MB_LEN_MAX >= pstr->mb_cur_max);
200 #else
201 unsigned char buf[64];
202 #endif
203 mbstate_t prev_st;
204 int byte_idx, end_idx, remain_len;
205 size_t mbclen;
207 /* Build the buffers from pstr->valid_len to either pstr->len or
208 pstr->bufs_len. */
209 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
210 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
212 wchar_t wc;
213 const char *p;
215 remain_len = end_idx - byte_idx;
216 prev_st = pstr->cur_state;
217 /* Apply the translation if we need. */
218 if (BE (pstr->trans != NULL, 0))
220 int i, ch;
222 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
224 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
225 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
227 p = (const char *) buf;
229 else
230 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
231 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
232 if (BE (mbclen == (size_t) -2, 0))
234 /* The buffer doesn't have enough space, finish to build. */
235 pstr->cur_state = prev_st;
236 break;
238 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
240 /* We treat these cases as a singlebyte character. */
241 mbclen = 1;
242 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
243 if (BE (pstr->trans != NULL, 0))
244 wc = pstr->trans[wc];
245 pstr->cur_state = prev_st;
248 /* Write wide character and padding. */
249 pstr->wcs[byte_idx++] = wc;
250 /* Write paddings. */
251 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
252 pstr->wcs[byte_idx++] = WEOF;
254 pstr->valid_len = byte_idx;
255 pstr->valid_raw_len = byte_idx;
258 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
259 but for REG_ICASE. */
261 static reg_errcode_t
262 internal_function
263 build_wcs_upper_buffer (re_string_t *pstr)
265 mbstate_t prev_st;
266 int src_idx, byte_idx, end_idx, remain_len;
267 size_t mbclen;
268 #if defined __UCLIBC__
269 char buf[MB_LEN_MAX];
270 assert (MB_LEN_MAX >= pstr->mb_cur_max);
271 #else
272 char buf[64];
273 #endif
275 byte_idx = pstr->valid_len;
276 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
278 /* The following optimization assumes that ASCII characters can be
279 mapped to wide characters with a simple cast. */
280 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
282 while (byte_idx < end_idx)
284 wchar_t wc;
286 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
287 && mbsinit (&pstr->cur_state))
289 /* In case of a singlebyte character. */
290 pstr->mbs[byte_idx]
291 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
292 /* The next step uses the assumption that wchar_t is encoded
293 ASCII-safe: all ASCII values can be converted like this. */
294 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
295 ++byte_idx;
296 continue;
299 remain_len = end_idx - byte_idx;
300 prev_st = pstr->cur_state;
301 mbclen = mbrtowc (&wc,
302 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
303 + byte_idx), remain_len, &pstr->cur_state);
304 if (BE (mbclen + 2 > 2, 1))
306 wchar_t wcu = wc;
307 if (iswlower (wc))
309 size_t mbcdlen;
311 wcu = towupper (wc);
312 mbcdlen = wcrtomb (buf, wcu, &prev_st);
313 if (BE (mbclen == mbcdlen, 1))
314 memcpy (pstr->mbs + byte_idx, buf, mbclen);
315 else
317 src_idx = byte_idx;
318 goto offsets_needed;
321 else
322 memcpy (pstr->mbs + byte_idx,
323 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
324 pstr->wcs[byte_idx++] = wcu;
325 /* Write paddings. */
326 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
327 pstr->wcs[byte_idx++] = WEOF;
329 else if (mbclen == (size_t) -1 || mbclen == 0)
331 /* It is an invalid character or '\0'. Just use the byte. */
332 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
333 pstr->mbs[byte_idx] = ch;
334 /* And also cast it to wide char. */
335 pstr->wcs[byte_idx++] = (wchar_t) ch;
336 if (BE (mbclen == (size_t) -1, 0))
337 pstr->cur_state = prev_st;
339 else
341 /* The buffer doesn't have enough space, finish to build. */
342 pstr->cur_state = prev_st;
343 break;
346 pstr->valid_len = byte_idx;
347 pstr->valid_raw_len = byte_idx;
348 return REG_NOERROR;
350 else
351 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
353 wchar_t wc;
354 const char *p;
355 offsets_needed:
356 remain_len = end_idx - byte_idx;
357 prev_st = pstr->cur_state;
358 if (BE (pstr->trans != NULL, 0))
360 int i, ch;
362 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
364 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
365 buf[i] = pstr->trans[ch];
367 p = (const char *) buf;
369 else
370 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
371 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
372 if (BE (mbclen + 2 > 2, 1))
374 wchar_t wcu = wc;
375 if (iswlower (wc))
377 size_t mbcdlen;
379 wcu = towupper (wc);
380 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
381 if (BE (mbclen == mbcdlen, 1))
382 memcpy (pstr->mbs + byte_idx, buf, mbclen);
383 else if (mbcdlen != (size_t) -1)
385 size_t i;
387 if (byte_idx + mbcdlen > pstr->bufs_len)
389 pstr->cur_state = prev_st;
390 break;
393 if (pstr->offsets == NULL)
395 pstr->offsets = re_malloc (int, pstr->bufs_len);
397 if (pstr->offsets == NULL)
398 return REG_ESPACE;
400 if (!pstr->offsets_needed)
402 for (i = 0; i < (size_t) byte_idx; ++i)
403 pstr->offsets[i] = i;
404 pstr->offsets_needed = 1;
407 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
408 pstr->wcs[byte_idx] = wcu;
409 pstr->offsets[byte_idx] = src_idx;
410 for (i = 1; i < mbcdlen; ++i)
412 pstr->offsets[byte_idx + i]
413 = src_idx + (i < mbclen ? i : mbclen - 1);
414 pstr->wcs[byte_idx + i] = WEOF;
416 pstr->len += mbcdlen - mbclen;
417 if (pstr->raw_stop > src_idx)
418 pstr->stop += mbcdlen - mbclen;
419 end_idx = (pstr->bufs_len > pstr->len)
420 ? pstr->len : pstr->bufs_len;
421 byte_idx += mbcdlen;
422 src_idx += mbclen;
423 continue;
425 else
426 memcpy (pstr->mbs + byte_idx, p, mbclen);
428 else
429 memcpy (pstr->mbs + byte_idx, p, mbclen);
431 if (BE (pstr->offsets_needed != 0, 0))
433 size_t i;
434 for (i = 0; i < mbclen; ++i)
435 pstr->offsets[byte_idx + i] = src_idx + i;
437 src_idx += mbclen;
439 pstr->wcs[byte_idx++] = wcu;
440 /* Write paddings. */
441 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
442 pstr->wcs[byte_idx++] = WEOF;
444 else if (mbclen == (size_t) -1 || mbclen == 0)
446 /* It is an invalid character or '\0'. Just use the byte. */
447 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
449 if (BE (pstr->trans != NULL, 0))
450 ch = pstr->trans [ch];
451 pstr->mbs[byte_idx] = ch;
453 if (BE (pstr->offsets_needed != 0, 0))
454 pstr->offsets[byte_idx] = src_idx;
455 ++src_idx;
457 /* And also cast it to wide char. */
458 pstr->wcs[byte_idx++] = (wchar_t) ch;
459 if (BE (mbclen == (size_t) -1, 0))
460 pstr->cur_state = prev_st;
462 else
464 /* The buffer doesn't have enough space, finish to build. */
465 pstr->cur_state = prev_st;
466 break;
469 pstr->valid_len = byte_idx;
470 pstr->valid_raw_len = src_idx;
471 return REG_NOERROR;
474 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
475 Return the index. */
477 static int
478 internal_function
479 re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
481 mbstate_t prev_st;
482 int rawbuf_idx;
483 size_t mbclen;
484 wchar_t wc = 0;
486 /* Skip the characters which are not necessary to check. */
487 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
488 rawbuf_idx < new_raw_idx;)
490 int remain_len;
491 remain_len = pstr->len - rawbuf_idx;
492 prev_st = pstr->cur_state;
493 mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
494 remain_len, &pstr->cur_state);
495 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
497 /* We treat these cases as a singlebyte character. */
498 mbclen = 1;
499 pstr->cur_state = prev_st;
501 /* Then proceed the next character. */
502 rawbuf_idx += mbclen;
504 *last_wc = (wint_t) wc;
505 return rawbuf_idx;
507 #endif /* RE_ENABLE_I18N */
509 /* Build the buffer PSTR->MBS, and apply the translation if we need.
510 This function is used in case of REG_ICASE. */
512 static void
513 internal_function
514 build_upper_buffer (re_string_t *pstr)
516 int char_idx, end_idx;
517 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
519 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
521 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
522 if (BE (pstr->trans != NULL, 0))
523 ch = pstr->trans[ch];
524 if (islower (ch))
525 pstr->mbs[char_idx] = toupper (ch);
526 else
527 pstr->mbs[char_idx] = ch;
529 pstr->valid_len = char_idx;
530 pstr->valid_raw_len = char_idx;
533 /* Apply TRANS to the buffer in PSTR. */
535 static void
536 internal_function
537 re_string_translate_buffer (re_string_t *pstr)
539 int buf_idx, end_idx;
540 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
542 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
544 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
545 pstr->mbs[buf_idx] = pstr->trans[ch];
548 pstr->valid_len = buf_idx;
549 pstr->valid_raw_len = buf_idx;
552 /* This function re-construct the buffers.
553 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
554 convert to upper case in case of REG_ICASE, apply translation. */
556 static reg_errcode_t
557 internal_function
558 re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
560 int offset = idx - pstr->raw_mbs_idx;
561 if (BE (offset < 0, 0))
563 /* Reset buffer. */
564 #ifdef RE_ENABLE_I18N
565 if (pstr->mb_cur_max > 1)
566 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
567 #endif
568 pstr->len = pstr->raw_len;
569 pstr->stop = pstr->raw_stop;
570 pstr->valid_len = 0;
571 pstr->raw_mbs_idx = 0;
572 pstr->valid_raw_len = 0;
573 pstr->offsets_needed = 0;
574 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
575 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
576 if (!pstr->mbs_allocated)
577 pstr->mbs = (unsigned char *) pstr->raw_mbs;
578 offset = idx;
581 if (BE (offset != 0, 1))
583 /* Are the characters which are already checked remain? */
584 if (BE (offset < pstr->valid_raw_len, 1)
585 #ifdef RE_ENABLE_I18N
586 /* Handling this would enlarge the code too much.
587 Accept a slowdown in that case. */
588 && pstr->offsets_needed == 0
589 #endif
592 /* Yes, move them to the front of the buffer. */
593 pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags);
594 #ifdef RE_ENABLE_I18N
595 if (pstr->mb_cur_max > 1)
596 memmove (pstr->wcs, pstr->wcs + offset,
597 (pstr->valid_len - offset) * sizeof (wint_t));
598 #endif
599 if (BE (pstr->mbs_allocated, 0))
600 memmove (pstr->mbs, pstr->mbs + offset,
601 pstr->valid_len - offset);
602 pstr->valid_len -= offset;
603 pstr->valid_raw_len -= offset;
604 #ifdef DEBUG
605 assert (pstr->valid_len > 0);
606 #endif
608 else
610 /* No, skip all characters until IDX. */
611 #ifdef RE_ENABLE_I18N
612 if (BE (pstr->offsets_needed, 0))
614 pstr->len = pstr->raw_len - idx + offset;
615 pstr->stop = pstr->raw_stop - idx + offset;
616 pstr->offsets_needed = 0;
618 #endif
619 pstr->valid_len = 0;
620 pstr->valid_raw_len = 0;
621 #ifdef RE_ENABLE_I18N
622 if (pstr->mb_cur_max > 1)
624 int wcs_idx;
625 wint_t wc = WEOF;
627 if (pstr->is_utf8)
629 const unsigned char *raw, *p, *end;
631 /* Special case UTF-8. Multi-byte chars start with any
632 byte other than 0x80 - 0xbf. */
633 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
634 end = raw + (offset - pstr->mb_cur_max);
635 p = raw + offset - 1;
636 #if 0
637 /* We know the wchar_t encoding is UCS4, so for the simple
638 case, ASCII characters, skip the conversion step. */
639 if (isascii (*p) && BE (pstr->trans == NULL, 1))
641 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
642 pstr->valid_len = 0;
643 wc = (wchar_t) *p;
645 else
646 #endif
647 for (; p >= end; --p)
648 if ((*p & 0xc0) != 0x80)
650 mbstate_t cur_state;
651 wchar_t wc2;
652 int mlen = raw + pstr->len - p;
653 size_t mbclen;
655 /* XXX Don't use mbrtowc, we know which conversion
656 to use (UTF-8 -> UCS4). */
657 memset (&cur_state, 0, sizeof (cur_state));
658 mbclen = mbrtowc (&wc2, (const char *) p, mlen,
659 &cur_state);
660 if (raw + offset - p <= mbclen
661 && mbclen < (size_t) -2)
663 memset (&pstr->cur_state, '\0',
664 sizeof (mbstate_t));
665 pstr->valid_len = mbclen - (raw + offset - p);
666 wc = wc2;
668 break;
672 if (wc == WEOF)
673 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
674 if (BE (pstr->valid_len, 0))
676 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
677 pstr->wcs[wcs_idx] = WEOF;
678 if (pstr->mbs_allocated)
679 memset (pstr->mbs, 255, pstr->valid_len);
681 pstr->valid_raw_len = pstr->valid_len;
682 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
683 && IS_WIDE_WORD_CHAR (wc))
684 ? CONTEXT_WORD
685 : ((IS_WIDE_NEWLINE (wc)
686 && pstr->newline_anchor)
687 ? CONTEXT_NEWLINE : 0));
689 else
690 #endif /* RE_ENABLE_I18N */
692 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
693 if (pstr->trans)
694 c = pstr->trans[c];
695 pstr->tip_context = (bitset_contain (pstr->word_char, c)
696 ? CONTEXT_WORD
697 : ((IS_NEWLINE (c) && pstr->newline_anchor)
698 ? CONTEXT_NEWLINE : 0));
701 if (!BE (pstr->mbs_allocated, 0))
702 pstr->mbs += offset;
704 pstr->raw_mbs_idx = idx;
705 pstr->len -= offset;
706 pstr->stop -= offset;
708 /* Then build the buffers. */
709 #ifdef RE_ENABLE_I18N
710 if (pstr->mb_cur_max > 1)
712 if (pstr->icase)
714 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
715 if (BE (ret != REG_NOERROR, 0))
716 return ret;
718 else
719 build_wcs_buffer (pstr);
721 else
722 #endif
723 if (BE (pstr->mbs_allocated, 0))
725 if (pstr->icase)
726 build_upper_buffer (pstr);
727 else if (pstr->trans != NULL)
728 re_string_translate_buffer (pstr);
730 else
731 pstr->valid_len = pstr->len;
733 pstr->cur_idx = 0;
734 return REG_NOERROR;
737 static unsigned char
738 internal_function __attribute ((pure))
739 re_string_peek_byte_case (const re_string_t *pstr, int idx)
741 int ch, off;
743 /* Handle the common (easiest) cases first. */
744 if (BE (!pstr->mbs_allocated, 1))
745 return re_string_peek_byte (pstr, idx);
747 #ifdef RE_ENABLE_I18N
748 if (pstr->mb_cur_max > 1
749 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
750 return re_string_peek_byte (pstr, idx);
751 #endif
753 off = pstr->cur_idx + idx;
754 #ifdef RE_ENABLE_I18N
755 if (pstr->offsets_needed)
756 off = pstr->offsets[off];
757 #endif
759 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
761 #ifdef RE_ENABLE_I18N
762 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
763 this function returns CAPITAL LETTER I instead of first byte of
764 DOTLESS SMALL LETTER I. The latter would confuse the parser,
765 since peek_byte_case doesn't advance cur_idx in any way. */
766 if (pstr->offsets_needed && !isascii (ch))
767 return re_string_peek_byte (pstr, idx);
768 #endif
770 return ch;
773 static unsigned char
774 internal_function __attribute ((pure))
775 re_string_fetch_byte_case (re_string_t *pstr)
777 if (BE (!pstr->mbs_allocated, 1))
778 return re_string_fetch_byte (pstr);
780 #ifdef RE_ENABLE_I18N
781 if (pstr->offsets_needed)
783 int off, ch;
785 /* For tr_TR.UTF-8 [[:islower:]] there is
786 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
787 in that case the whole multi-byte character and return
788 the original letter. On the other side, with
789 [[: DOTLESS SMALL LETTER I return [[:I, as doing
790 anything else would complicate things too much. */
792 if (!re_string_first_byte (pstr, pstr->cur_idx))
793 return re_string_fetch_byte (pstr);
795 off = pstr->offsets[pstr->cur_idx];
796 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
798 if (! isascii (ch))
799 return re_string_fetch_byte (pstr);
801 re_string_skip_bytes (pstr,
802 re_string_char_size_at (pstr, pstr->cur_idx));
803 return ch;
805 #endif
807 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
810 static void
811 internal_function
812 re_string_destruct (re_string_t *pstr)
814 #ifdef RE_ENABLE_I18N
815 re_free (pstr->wcs);
816 re_free (pstr->offsets);
817 #endif /* RE_ENABLE_I18N */
818 if (pstr->mbs_allocated)
819 re_free (pstr->mbs);
822 /* Return the context at IDX in INPUT. */
824 static unsigned int
825 internal_function
826 re_string_context_at (const re_string_t *input, int idx, int eflags)
828 int c;
829 if (BE (idx < 0, 0))
830 /* In this case, we use the value stored in input->tip_context,
831 since we can't know the character in input->mbs[-1] here. */
832 return input->tip_context;
833 if (BE (idx == input->len, 0))
834 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
835 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
836 #ifdef RE_ENABLE_I18N
837 if (input->mb_cur_max > 1)
839 wint_t wc;
840 int wc_idx = idx;
841 while(input->wcs[wc_idx] == WEOF)
843 #ifdef DEBUG
844 /* It must not happen. */
845 assert (wc_idx >= 0);
846 #endif
847 --wc_idx;
848 if (wc_idx < 0)
849 return input->tip_context;
851 wc = input->wcs[wc_idx];
852 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
853 return CONTEXT_WORD;
854 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
855 ? CONTEXT_NEWLINE : 0);
857 #endif
858 c = re_string_byte_at (input, idx);
859 if (bitset_contain (input->word_char, c))
860 return CONTEXT_WORD;
861 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
864 /* Functions for set operation. */
866 static reg_errcode_t
867 internal_function
868 re_node_set_alloc (re_node_set *set, int size)
870 set->alloc = size;
871 set->nelem = 0;
872 set->elems = re_malloc (int, size); /* can be NULL if size == 0
873 (see re_node_set_init_empty(set)) */
874 if (BE (set->elems == NULL && size != 0, 0))
875 return REG_ESPACE;
876 return REG_NOERROR;
879 static reg_errcode_t
880 internal_function
881 re_node_set_init_1 (re_node_set *set, int elem)
883 set->alloc = 1;
884 set->nelem = 1;
885 set->elems = re_malloc (int, 1);
886 if (BE (set->elems == NULL, 0))
888 set->alloc = set->nelem = 0;
889 return REG_ESPACE;
891 set->elems[0] = elem;
892 return REG_NOERROR;
895 static reg_errcode_t
896 internal_function
897 re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
899 set->alloc = 2;
900 set->elems = re_malloc (int, 2);
901 if (BE (set->elems == NULL, 0))
902 return REG_ESPACE;
903 if (elem1 == elem2)
905 set->nelem = 1;
906 set->elems[0] = elem1;
908 else
910 set->nelem = 2;
911 if (elem1 < elem2)
913 set->elems[0] = elem1;
914 set->elems[1] = elem2;
916 else
918 set->elems[0] = elem2;
919 set->elems[1] = elem1;
922 return REG_NOERROR;
925 static reg_errcode_t
926 internal_function
927 re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
929 dest->nelem = src->nelem;
930 if (src->nelem > 0)
932 dest->alloc = dest->nelem;
933 dest->elems = re_malloc (int, dest->alloc);
934 if (BE (dest->elems == NULL, 0))
936 dest->alloc = dest->nelem = 0;
937 return REG_ESPACE;
939 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
941 else
942 re_node_set_init_empty (dest);
943 return REG_NOERROR;
946 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
947 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
948 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
950 static reg_errcode_t
951 internal_function
952 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
953 const re_node_set *src2)
955 int i1, i2, is, id, delta, sbase;
956 if (src1->nelem == 0 || src2->nelem == 0)
957 return REG_NOERROR;
959 /* We need dest->nelem + 2 * elems_in_intersection; this is a
960 conservative estimate. */
961 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
963 int new_alloc = src1->nelem + src2->nelem + dest->alloc;
964 int *new_elems = re_realloc (dest->elems, int, new_alloc);
965 if (BE (new_elems == NULL, 0))
966 return REG_ESPACE;
967 dest->elems = new_elems;
968 dest->alloc = new_alloc;
971 /* Find the items in the intersection of SRC1 and SRC2, and copy
972 into the top of DEST those that are not already in DEST itself. */
973 sbase = dest->nelem + src1->nelem + src2->nelem;
974 i1 = src1->nelem - 1;
975 i2 = src2->nelem - 1;
976 id = dest->nelem - 1;
977 for (;;)
979 if (src1->elems[i1] == src2->elems[i2])
981 /* Try to find the item in DEST. Maybe we could binary search? */
982 while (id >= 0 && dest->elems[id] > src1->elems[i1])
983 --id;
985 if (id < 0 || dest->elems[id] != src1->elems[i1])
986 dest->elems[--sbase] = src1->elems[i1];
988 if (--i1 < 0 || --i2 < 0)
989 break;
992 /* Lower the highest of the two items. */
993 else if (src1->elems[i1] < src2->elems[i2])
995 if (--i2 < 0)
996 break;
998 else
1000 if (--i1 < 0)
1001 break;
1005 id = dest->nelem - 1;
1006 is = dest->nelem + src1->nelem + src2->nelem - 1;
1007 delta = is - sbase + 1;
1009 /* Now copy. When DELTA becomes zero, the remaining
1010 DEST elements are already in place; this is more or
1011 less the same loop that is in re_node_set_merge. */
1012 dest->nelem += delta;
1013 if (delta > 0 && id >= 0)
1014 for (;;)
1016 if (dest->elems[is] > dest->elems[id])
1018 /* Copy from the top. */
1019 dest->elems[id + delta--] = dest->elems[is--];
1020 if (delta == 0)
1021 break;
1023 else
1025 /* Slide from the bottom. */
1026 dest->elems[id + delta] = dest->elems[id];
1027 if (--id < 0)
1028 break;
1032 /* Copy remaining SRC elements. */
1033 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
1035 return REG_NOERROR;
1038 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1039 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1041 static reg_errcode_t
1042 internal_function
1043 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1044 const re_node_set *src2)
1046 int i1, i2, id;
1047 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1049 dest->alloc = src1->nelem + src2->nelem;
1050 dest->elems = re_malloc (int, dest->alloc);
1051 if (BE (dest->elems == NULL, 0))
1052 return REG_ESPACE;
1054 else
1056 if (src1 != NULL && src1->nelem > 0)
1057 return re_node_set_init_copy (dest, src1);
1058 if (src2 != NULL && src2->nelem > 0)
1059 return re_node_set_init_copy (dest, src2);
1060 re_node_set_init_empty (dest);
1061 return REG_NOERROR;
1063 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1065 if (src1->elems[i1] > src2->elems[i2])
1067 dest->elems[id++] = src2->elems[i2++];
1068 continue;
1070 if (src1->elems[i1] == src2->elems[i2])
1071 ++i2;
1072 dest->elems[id++] = src1->elems[i1++];
1074 if (i1 < src1->nelem)
1076 memcpy (dest->elems + id, src1->elems + i1,
1077 (src1->nelem - i1) * sizeof (int));
1078 id += src1->nelem - i1;
1080 else if (i2 < src2->nelem)
1082 memcpy (dest->elems + id, src2->elems + i2,
1083 (src2->nelem - i2) * sizeof (int));
1084 id += src2->nelem - i2;
1086 dest->nelem = id;
1087 return REG_NOERROR;
1090 /* Calculate the union set of the sets DEST and SRC. And store it to
1091 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1093 static reg_errcode_t
1094 internal_function
1095 re_node_set_merge (re_node_set *dest, const re_node_set *src)
1097 int is, id, sbase, delta;
1098 if (src == NULL || src->nelem == 0)
1099 return REG_NOERROR;
1100 if (dest->alloc < 2 * src->nelem + dest->nelem)
1102 int new_alloc = 2 * (src->nelem + dest->alloc);
1103 int *new_buffer = re_realloc (dest->elems, int, new_alloc);
1104 if (BE (new_buffer == NULL, 0))
1105 return REG_ESPACE;
1106 dest->elems = new_buffer;
1107 dest->alloc = new_alloc;
1110 if (BE (dest->nelem == 0, 0))
1112 dest->nelem = src->nelem;
1113 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1114 return REG_NOERROR;
1117 /* Copy into the top of DEST the items of SRC that are not
1118 found in DEST. Maybe we could binary search in DEST? */
1119 for (sbase = dest->nelem + 2 * src->nelem,
1120 is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
1122 if (dest->elems[id] == src->elems[is])
1123 is--, id--;
1124 else if (dest->elems[id] < src->elems[is])
1125 dest->elems[--sbase] = src->elems[is--];
1126 else /* if (dest->elems[id] > src->elems[is]) */
1127 --id;
1130 if (is >= 0)
1132 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1133 sbase -= is + 1;
1134 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
1137 id = dest->nelem - 1;
1138 is = dest->nelem + 2 * src->nelem - 1;
1139 delta = is - sbase + 1;
1140 if (delta == 0)
1141 return REG_NOERROR;
1143 /* Now copy. When DELTA becomes zero, the remaining
1144 DEST elements are already in place. */
1145 dest->nelem += delta;
1146 for (;;)
1148 if (dest->elems[is] > dest->elems[id])
1150 /* Copy from the top. */
1151 dest->elems[id + delta--] = dest->elems[is--];
1152 if (delta == 0)
1153 break;
1155 else
1157 /* Slide from the bottom. */
1158 dest->elems[id + delta] = dest->elems[id];
1159 if (--id < 0)
1161 /* Copy remaining SRC elements. */
1162 memcpy (dest->elems, dest->elems + sbase,
1163 delta * sizeof (int));
1164 break;
1169 return REG_NOERROR;
1172 /* Insert the new element ELEM to the re_node_set* SET.
1173 SET should not already have ELEM.
1174 return -1 if an error is occured, return 1 otherwise. */
1176 static int
1177 internal_function
1178 re_node_set_insert (re_node_set *set, int elem)
1180 int idx;
1181 /* In case the set is empty. */
1182 if (set->alloc == 0)
1184 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
1185 return 1;
1186 return -1;
1189 if (BE (set->nelem, 0) == 0)
1191 /* We already guaranteed above that set->alloc != 0. */
1192 set->elems[0] = elem;
1193 ++set->nelem;
1194 return 1;
1197 /* Realloc if we need. */
1198 if (set->alloc == set->nelem)
1200 int *new_elems;
1201 set->alloc = set->alloc * 2;
1202 new_elems = re_realloc (set->elems, int, set->alloc);
1203 if (BE (new_elems == NULL, 0))
1204 return -1;
1205 set->elems = new_elems;
1208 /* Move the elements which follows the new element. Test the
1209 first element separately to skip a check in the inner loop. */
1210 if (elem < set->elems[0])
1212 idx = 0;
1213 for (idx = set->nelem; idx > 0; idx--)
1214 set->elems[idx] = set->elems[idx - 1];
1216 else
1218 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1219 set->elems[idx] = set->elems[idx - 1];
1222 /* Insert the new element. */
1223 set->elems[idx] = elem;
1224 ++set->nelem;
1225 return 1;
1228 /* Insert the new element ELEM to the re_node_set* SET.
1229 SET should not already have any element greater than or equal to ELEM.
1230 Return -1 if an error is occured, return 1 otherwise. */
1232 static int
1233 internal_function
1234 re_node_set_insert_last (re_node_set *set, int elem)
1236 /* Realloc if we need. */
1237 if (set->alloc == set->nelem)
1239 int *new_elems;
1240 set->alloc = (set->alloc + 1) * 2;
1241 new_elems = re_realloc (set->elems, int, set->alloc);
1242 if (BE (new_elems == NULL, 0))
1243 return -1;
1244 set->elems = new_elems;
1247 /* Insert the new element. */
1248 set->elems[set->nelem++] = elem;
1249 return 1;
1252 /* Compare two node sets SET1 and SET2.
1253 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
1255 static int
1256 internal_function __attribute ((pure))
1257 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1259 int i;
1260 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1261 return 0;
1262 for (i = set1->nelem ; --i >= 0 ; )
1263 if (set1->elems[i] != set2->elems[i])
1264 return 0;
1265 return 1;
1268 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1270 static int
1271 internal_function __attribute ((pure))
1272 re_node_set_contains (const re_node_set *set, int elem)
1274 unsigned int idx, right, mid;
1275 if (set->nelem <= 0)
1276 return 0;
1278 /* Binary search the element. */
1279 idx = 0;
1280 right = set->nelem - 1;
1281 while (idx < right)
1283 mid = (idx + right) / 2;
1284 if (set->elems[mid] < elem)
1285 idx = mid + 1;
1286 else
1287 right = mid;
1289 return set->elems[idx] == elem ? idx + 1 : 0;
1292 static void
1293 internal_function
1294 re_node_set_remove_at (re_node_set *set, int idx)
1296 if (idx < 0 || idx >= set->nelem)
1297 return;
1298 --set->nelem;
1299 for (; idx < set->nelem; idx++)
1300 set->elems[idx] = set->elems[idx + 1];
1304 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1305 Or return -1, if an error will be occured. */
1307 static int
1308 internal_function
1309 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1311 #ifdef RE_ENABLE_I18N
1312 int type = token.type;
1313 #endif
1314 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1316 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1317 int *new_nexts, *new_indices;
1318 re_node_set *new_edests, *new_eclosures;
1319 re_token_t *new_nodes;
1321 /* Avoid overflows. */
1322 if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
1323 return -1;
1325 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1326 if (BE (new_nodes == NULL, 0))
1327 return -1;
1328 dfa->nodes = new_nodes;
1329 new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
1330 new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
1331 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1332 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1333 if (BE (new_nexts == NULL || new_indices == NULL
1334 || new_edests == NULL || new_eclosures == NULL, 0))
1335 return -1;
1336 dfa->nexts = new_nexts;
1337 dfa->org_indices = new_indices;
1338 dfa->edests = new_edests;
1339 dfa->eclosures = new_eclosures;
1340 dfa->nodes_alloc = new_nodes_alloc;
1342 dfa->nodes[dfa->nodes_len] = token;
1343 dfa->nodes[dfa->nodes_len].constraint = 0;
1344 #ifdef RE_ENABLE_I18N
1345 dfa->nodes[dfa->nodes_len].accept_mb =
1346 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1347 #endif
1348 dfa->nexts[dfa->nodes_len] = -1;
1349 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1350 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1351 return dfa->nodes_len++;
1354 static __inline__ unsigned int
1355 internal_function
1356 calc_state_hash (const re_node_set *nodes, unsigned int context)
1358 unsigned int hash = nodes->nelem + context;
1359 int i;
1360 for (i = 0 ; i < nodes->nelem ; i++)
1361 hash += nodes->elems[i];
1362 return hash;
1365 /* Search for the state whose node_set is equivalent to NODES.
1366 Return the pointer to the state, if we found it in the DFA.
1367 Otherwise create the new one and return it. In case of an error
1368 return NULL and set the error code in ERR.
1369 Note: - We assume NULL as the invalid state, then it is possible that
1370 return value is NULL and ERR is REG_NOERROR.
1371 - We never return non-NULL value in case of any errors, it is for
1372 optimization. */
1374 static re_dfastate_t *
1375 internal_function
1376 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1377 const re_node_set *nodes)
1379 unsigned int hash;
1380 re_dfastate_t *new_state;
1381 struct re_state_table_entry *spot;
1382 int i;
1383 if (BE (nodes->nelem == 0, 0))
1385 *err = REG_NOERROR;
1386 return NULL;
1388 hash = calc_state_hash (nodes, 0);
1389 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1391 for (i = 0 ; i < spot->num ; i++)
1393 re_dfastate_t *state = spot->array[i];
1394 if (hash != state->hash)
1395 continue;
1396 if (re_node_set_compare (&state->nodes, nodes))
1397 return state;
1400 /* There are no appropriate state in the dfa, create the new one. */
1401 new_state = create_ci_newstate (dfa, nodes, hash);
1402 if (BE (new_state == NULL, 0))
1403 *err = REG_ESPACE;
1405 return new_state;
1408 /* Search for the state whose node_set is equivalent to NODES and
1409 whose context is equivalent to CONTEXT.
1410 Return the pointer to the state, if we found it in the DFA.
1411 Otherwise create the new one and return it. In case of an error
1412 return NULL and set the error code in ERR.
1413 Note: - We assume NULL as the invalid state, then it is possible that
1414 return value is NULL and ERR is REG_NOERROR.
1415 - We never return non-NULL value in case of any errors, it is for
1416 optimization. */
1418 static re_dfastate_t *
1419 internal_function
1420 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1421 const re_node_set *nodes, unsigned int context)
1423 unsigned int hash;
1424 re_dfastate_t *new_state;
1425 struct re_state_table_entry *spot;
1426 int i;
1427 if (nodes->nelem == 0)
1429 *err = REG_NOERROR;
1430 return NULL;
1432 hash = calc_state_hash (nodes, context);
1433 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1435 for (i = 0 ; i < spot->num ; i++)
1437 re_dfastate_t *state = spot->array[i];
1438 if (state->hash == hash
1439 && state->context == context
1440 && re_node_set_compare (state->entrance_nodes, nodes))
1441 return state;
1443 /* There are no appropriate state in `dfa', create the new one. */
1444 new_state = create_cd_newstate (dfa, nodes, context, hash);
1445 if (BE (new_state == NULL, 0))
1446 *err = REG_ESPACE;
1448 return new_state;
1451 /* Finish initialization of the new state NEWSTATE, and using its hash value
1452 HASH put in the appropriate bucket of DFA's state table. Return value
1453 indicates the error code if failed. */
1455 static reg_errcode_t
1456 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1457 unsigned int hash)
1459 struct re_state_table_entry *spot;
1460 reg_errcode_t err;
1461 int i;
1463 newstate->hash = hash;
1464 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1465 if (BE (err != REG_NOERROR, 0))
1466 return REG_ESPACE;
1467 for (i = 0; i < newstate->nodes.nelem; i++)
1469 int elem = newstate->nodes.elems[i];
1470 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1471 re_node_set_insert_last (&newstate->non_eps_nodes, elem);
1474 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1475 if (BE (spot->alloc <= spot->num, 0))
1477 int new_alloc = 2 * spot->num + 2;
1478 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1479 new_alloc);
1480 if (BE (new_array == NULL, 0))
1481 return REG_ESPACE;
1482 spot->array = new_array;
1483 spot->alloc = new_alloc;
1485 spot->array[spot->num++] = newstate;
1486 return REG_NOERROR;
1489 static void
1490 free_state (re_dfastate_t *state)
1492 re_node_set_free (&state->non_eps_nodes);
1493 re_node_set_free (&state->inveclosure);
1494 if (state->entrance_nodes != &state->nodes)
1496 re_node_set_free (state->entrance_nodes);
1497 re_free (state->entrance_nodes);
1499 re_node_set_free (&state->nodes);
1500 re_free (state->word_trtable);
1501 re_free (state->trtable);
1502 re_free (state);
1505 /* Create the new state which is independ of contexts.
1506 Return the new state if succeeded, otherwise return NULL. */
1508 static re_dfastate_t *
1509 internal_function
1510 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1511 unsigned int hash)
1513 int i;
1514 reg_errcode_t err;
1515 re_dfastate_t *newstate;
1517 newstate = calloc (sizeof (re_dfastate_t), 1);
1518 if (BE (newstate == NULL, 0))
1519 return NULL;
1520 err = re_node_set_init_copy (&newstate->nodes, nodes);
1521 if (BE (err != REG_NOERROR, 0))
1523 re_free (newstate);
1524 return NULL;
1527 newstate->entrance_nodes = &newstate->nodes;
1528 for (i = 0 ; i < nodes->nelem ; i++)
1530 re_token_t *node = dfa->nodes + nodes->elems[i];
1531 re_token_type_t type = node->type;
1533 if (type == CHARACTER && !node->constraint)
1534 continue;
1535 #ifdef RE_ENABLE_I18N
1536 newstate->accept_mb |= node->accept_mb;
1537 #endif
1539 /* If the state has the halt node, the state is a halt state. */
1540 if (type == END_OF_RE)
1541 newstate->halt = 1;
1542 else if (type == OP_BACK_REF)
1543 newstate->has_backref = 1;
1544 else if (type == ANCHOR || node->constraint)
1545 newstate->has_constraint = 1;
1547 err = register_state (dfa, newstate, hash);
1548 if (BE (err != REG_NOERROR, 0))
1550 free_state (newstate);
1551 newstate = NULL;
1553 return newstate;
1556 /* Create the new state which is depend on the context CONTEXT.
1557 Return the new state if succeeded, otherwise return NULL. */
1559 static re_dfastate_t *
1560 internal_function
1561 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1562 unsigned int context, unsigned int hash)
1564 int i, nctx_nodes = 0;
1565 reg_errcode_t err;
1566 re_dfastate_t *newstate;
1568 newstate = calloc (sizeof (re_dfastate_t), 1);
1569 if (BE (newstate == NULL, 0))
1570 return NULL;
1571 err = re_node_set_init_copy (&newstate->nodes, nodes);
1572 if (BE (err != REG_NOERROR, 0))
1574 re_free (newstate);
1575 return NULL;
1578 newstate->context = context;
1579 newstate->entrance_nodes = &newstate->nodes;
1581 for (i = 0 ; i < nodes->nelem ; i++)
1583 unsigned int constraint = 0;
1584 re_token_t *node = dfa->nodes + nodes->elems[i];
1585 re_token_type_t type = node->type;
1586 if (node->constraint)
1587 constraint = node->constraint;
1589 if (type == CHARACTER && !constraint)
1590 continue;
1591 #ifdef RE_ENABLE_I18N
1592 newstate->accept_mb |= node->accept_mb;
1593 #endif /* RE_ENABLE_I18N */
1595 /* If the state has the halt node, the state is a halt state. */
1596 if (type == END_OF_RE)
1597 newstate->halt = 1;
1598 else if (type == OP_BACK_REF)
1599 newstate->has_backref = 1;
1600 else if (type == ANCHOR)
1601 constraint = node->opr.ctx_type;
1603 if (constraint)
1605 if (newstate->entrance_nodes == &newstate->nodes)
1607 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1608 if (BE (newstate->entrance_nodes == NULL, 0))
1610 free_state (newstate);
1611 return NULL;
1613 re_node_set_init_copy (newstate->entrance_nodes, nodes);
1614 nctx_nodes = 0;
1615 newstate->has_constraint = 1;
1618 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1620 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1621 ++nctx_nodes;
1625 err = register_state (dfa, newstate, hash);
1626 if (BE (err != REG_NOERROR, 0))
1628 free_state (newstate);
1629 newstate = NULL;
1631 return newstate;