1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 static void re_string_construct_common (const char *str
, int len
,
23 RE_TRANSLATE_TYPE trans
, int icase
,
24 const re_dfa_t
*dfa
) internal_function
;
26 static int re_string_skip_chars (re_string_t
*pstr
, int new_raw_idx
,
27 wint_t *last_wc
) internal_function
;
28 #endif /* RE_ENABLE_I18N */
29 static reg_errcode_t
register_state (re_dfa_t
*dfa
, re_dfastate_t
*newstate
,
30 unsigned int hash
) internal_function
;
31 static re_dfastate_t
*create_ci_newstate (re_dfa_t
*dfa
,
32 const re_node_set
*nodes
,
33 unsigned int hash
) internal_function
;
34 static re_dfastate_t
*create_cd_newstate (re_dfa_t
*dfa
,
35 const re_node_set
*nodes
,
37 unsigned int hash
) internal_function
;
38 static unsigned int inline calc_state_hash (const re_node_set
*nodes
,
39 unsigned int context
) internal_function
;
41 /* Functions for string operation. */
43 /* This function allocate the buffers. It is necessary to call
44 re_string_reconstruct before using the object. */
47 re_string_allocate (pstr
, str
, len
, init_len
, trans
, icase
, dfa
)
50 int len
, init_len
, icase
;
51 RE_TRANSLATE_TYPE trans
;
57 /* Ensure at least one character fits into the buffers. */
58 if (init_len
< dfa
->mb_cur_max
)
59 init_len
= dfa
->mb_cur_max
;
60 init_buf_len
= (len
+ 1 < init_len
) ? len
+ 1: init_len
;
61 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
);
63 ret
= re_string_realloc_buffers (pstr
, init_buf_len
);
64 if (BE (ret
!= REG_NOERROR
, 0))
67 pstr
->word_char
= dfa
->word_char
;
68 pstr
->word_ops_used
= dfa
->word_ops_used
;
69 pstr
->mbs
= pstr
->mbs_allocated
? pstr
->mbs
: (unsigned char *) str
;
70 pstr
->valid_len
= (pstr
->mbs_allocated
|| dfa
->mb_cur_max
> 1) ? 0 : len
;
71 pstr
->valid_raw_len
= pstr
->valid_len
;
75 /* This function allocate the buffers, and initialize them. */
78 re_string_construct (pstr
, str
, len
, trans
, icase
, dfa
)
82 RE_TRANSLATE_TYPE trans
;
86 memset (pstr
, '\0', sizeof (re_string_t
));
87 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
);
91 ret
= re_string_realloc_buffers (pstr
, len
+ 1);
92 if (BE (ret
!= REG_NOERROR
, 0))
95 pstr
->mbs
= pstr
->mbs_allocated
? pstr
->mbs
: (unsigned char *) str
;
100 if (dfa
->mb_cur_max
> 1)
104 ret
= build_wcs_upper_buffer (pstr
);
105 if (BE (ret
!= REG_NOERROR
, 0))
107 if (pstr
->valid_raw_len
>= len
)
109 if (pstr
->bufs_len
> pstr
->valid_len
+ dfa
->mb_cur_max
)
111 ret
= re_string_realloc_buffers (pstr
, pstr
->bufs_len
* 2);
112 if (BE (ret
!= REG_NOERROR
, 0))
117 #endif /* RE_ENABLE_I18N */
118 build_upper_buffer (pstr
);
122 #ifdef RE_ENABLE_I18N
123 if (dfa
->mb_cur_max
> 1)
124 build_wcs_buffer (pstr
);
126 #endif /* RE_ENABLE_I18N */
129 re_string_translate_buffer (pstr
);
132 pstr
->valid_len
= pstr
->bufs_len
;
133 pstr
->valid_raw_len
= pstr
->bufs_len
;
141 /* Helper functions for re_string_allocate, and re_string_construct. */
144 re_string_realloc_buffers (pstr
, new_buf_len
)
148 #ifdef RE_ENABLE_I18N
149 if (pstr
->mb_cur_max
> 1)
151 wint_t *new_array
= re_realloc (pstr
->wcs
, wint_t, new_buf_len
);
152 if (BE (new_array
== NULL
, 0))
154 pstr
->wcs
= new_array
;
155 if (pstr
->offsets
!= NULL
)
157 int *new_array
= re_realloc (pstr
->offsets
, int, new_buf_len
);
158 if (BE (new_array
== NULL
, 0))
160 pstr
->offsets
= new_array
;
163 #endif /* RE_ENABLE_I18N */
164 if (pstr
->mbs_allocated
)
166 unsigned char *new_array
= re_realloc (pstr
->mbs
, unsigned char,
168 if (BE (new_array
== NULL
, 0))
170 pstr
->mbs
= new_array
;
172 pstr
->bufs_len
= new_buf_len
;
178 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
)
182 RE_TRANSLATE_TYPE trans
;
186 pstr
->raw_mbs
= (const unsigned char *) str
;
189 pstr
->trans
= (unsigned RE_TRANSLATE_TYPE
) trans
;
190 pstr
->icase
= icase
? 1 : 0;
191 pstr
->mbs_allocated
= (trans
!= NULL
|| icase
);
192 pstr
->mb_cur_max
= dfa
->mb_cur_max
;
193 pstr
->is_utf8
= dfa
->is_utf8
;
194 pstr
->map_notascii
= dfa
->map_notascii
;
195 pstr
->stop
= pstr
->len
;
196 pstr
->raw_stop
= pstr
->stop
;
199 #ifdef RE_ENABLE_I18N
201 /* Build wide character buffer PSTR->WCS.
202 If the byte sequence of the string are:
203 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
204 Then wide character buffer will be:
205 <wc1> , WEOF , <wc2> , WEOF , <wc3>
206 We use WEOF for padding, they indicate that the position isn't
207 a first byte of a multibyte character.
209 Note that this function assumes PSTR->VALID_LEN elements are already
210 built and starts from PSTR->VALID_LEN. */
213 build_wcs_buffer (pstr
)
217 unsigned char buf
[pstr
->mb_cur_max
];
219 unsigned char buf
[64];
222 int byte_idx
, end_idx
, mbclen
, remain_len
;
224 /* Build the buffers from pstr->valid_len to either pstr->len or
226 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
227 for (byte_idx
= pstr
->valid_len
; byte_idx
< end_idx
;)
232 remain_len
= end_idx
- byte_idx
;
233 prev_st
= pstr
->cur_state
;
234 /* Apply the translation if we need. */
235 if (BE (pstr
->trans
!= NULL
, 0))
239 for (i
= 0; i
< pstr
->mb_cur_max
&& i
< remain_len
; ++i
)
241 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
+ i
];
242 buf
[i
] = pstr
->mbs
[byte_idx
+ i
] = pstr
->trans
[ch
];
244 p
= (const char *) buf
;
247 p
= (const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
;
248 mbclen
= mbrtowc (&wc
, p
, remain_len
, &pstr
->cur_state
);
249 if (BE (mbclen
== (size_t) -2, 0))
251 /* The buffer doesn't have enough space, finish to build. */
252 pstr
->cur_state
= prev_st
;
255 else if (BE (mbclen
== (size_t) -1 || mbclen
== 0, 0))
257 /* We treat these cases as a singlebyte character. */
259 wc
= (wchar_t) pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
260 if (BE (pstr
->trans
!= NULL
, 0))
261 wc
= pstr
->trans
[wc
];
262 pstr
->cur_state
= prev_st
;
265 /* Write wide character and padding. */
266 pstr
->wcs
[byte_idx
++] = wc
;
267 /* Write paddings. */
268 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
269 pstr
->wcs
[byte_idx
++] = WEOF
;
271 pstr
->valid_len
= byte_idx
;
272 pstr
->valid_raw_len
= byte_idx
;
275 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
276 but for REG_ICASE. */
279 build_wcs_upper_buffer (pstr
)
283 int src_idx
, byte_idx
, end_idx
, mbclen
, remain_len
;
285 unsigned char buf
[pstr
->mb_cur_max
];
287 unsigned char buf
[64];
290 byte_idx
= pstr
->valid_len
;
291 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
293 /* The following optimization assumes that ASCII characters can be
294 mapped to wide characters with a simple cast. */
295 if (! pstr
->map_notascii
&& pstr
->trans
== NULL
&& !pstr
->offsets_needed
)
297 while (byte_idx
< end_idx
)
301 if (isascii (pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
])
302 && mbsinit (&pstr
->cur_state
))
304 /* In case of a singlebyte character. */
306 = toupper (pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
]);
307 /* The next step uses the assumption that wchar_t is encoded
308 ASCII-safe: all ASCII values can be converted like this. */
309 pstr
->wcs
[byte_idx
] = (wchar_t) pstr
->mbs
[byte_idx
];
314 remain_len
= end_idx
- byte_idx
;
315 prev_st
= pstr
->cur_state
;
316 mbclen
= mbrtowc (&wc
,
317 ((const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
318 + byte_idx
), remain_len
, &pstr
->cur_state
);
319 if (BE (mbclen
> 0, 1))
327 mbcdlen
= wcrtomb (buf
, wcu
, &prev_st
);
328 if (BE (mbclen
== mbcdlen
, 1))
329 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbclen
);
337 memcpy (pstr
->mbs
+ byte_idx
,
338 pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
, mbclen
);
339 pstr
->wcs
[byte_idx
++] = wcu
;
340 /* Write paddings. */
341 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
342 pstr
->wcs
[byte_idx
++] = WEOF
;
344 else if (mbclen
== (size_t) -1 || mbclen
== 0)
346 /* It is an invalid character or '\0'. Just use the byte. */
347 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
348 pstr
->mbs
[byte_idx
] = ch
;
349 /* And also cast it to wide char. */
350 pstr
->wcs
[byte_idx
++] = (wchar_t) ch
;
351 if (BE (mbclen
== (size_t) -1, 0))
352 pstr
->cur_state
= prev_st
;
356 /* The buffer doesn't have enough space, finish to build. */
357 pstr
->cur_state
= prev_st
;
361 pstr
->valid_len
= byte_idx
;
362 pstr
->valid_raw_len
= byte_idx
;
366 for (src_idx
= pstr
->valid_raw_len
; byte_idx
< end_idx
;)
371 remain_len
= end_idx
- byte_idx
;
372 prev_st
= pstr
->cur_state
;
373 if (BE (pstr
->trans
!= NULL
, 0))
377 for (i
= 0; i
< pstr
->mb_cur_max
&& i
< remain_len
; ++i
)
379 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ src_idx
+ i
];
380 buf
[i
] = pstr
->trans
[ch
];
382 p
= (const char *) buf
;
385 p
= (const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ src_idx
;
386 mbclen
= mbrtowc (&wc
, p
, remain_len
, &pstr
->cur_state
);
387 if (BE (mbclen
> 0, 1))
395 mbcdlen
= wcrtomb ((char *) buf
, wcu
, &prev_st
);
396 if (BE (mbclen
== mbcdlen
, 1))
397 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbclen
);
402 if (byte_idx
+ mbcdlen
> pstr
->bufs_len
)
404 pstr
->cur_state
= prev_st
;
408 if (pstr
->offsets
== NULL
)
410 pstr
->offsets
= re_malloc (int, pstr
->bufs_len
);
412 if (pstr
->offsets
== NULL
)
415 if (!pstr
->offsets_needed
)
417 for (i
= 0; i
< byte_idx
; ++i
)
418 pstr
->offsets
[i
] = i
;
419 pstr
->offsets_needed
= 1;
422 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbcdlen
);
423 pstr
->wcs
[byte_idx
] = wcu
;
424 pstr
->offsets
[byte_idx
] = src_idx
;
425 for (i
= 1; i
< mbcdlen
; ++i
)
427 pstr
->offsets
[byte_idx
+ i
]
428 = src_idx
+ (i
< mbclen
? i
: mbclen
- 1);
429 pstr
->wcs
[byte_idx
+ i
] = WEOF
;
431 pstr
->len
+= mbcdlen
- mbclen
;
432 if (pstr
->raw_stop
> src_idx
)
433 pstr
->stop
+= mbcdlen
- mbclen
;
434 end_idx
= (pstr
->bufs_len
> pstr
->len
)
435 ? pstr
->len
: pstr
->bufs_len
;
442 memcpy (pstr
->mbs
+ byte_idx
, p
, mbclen
);
444 if (BE (pstr
->offsets_needed
!= 0, 0))
447 for (i
= 0; i
< mbclen
; ++i
)
448 pstr
->offsets
[byte_idx
+ i
] = src_idx
+ i
;
452 pstr
->wcs
[byte_idx
++] = wcu
;
453 /* Write paddings. */
454 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
455 pstr
->wcs
[byte_idx
++] = WEOF
;
457 else if (mbclen
== (size_t) -1 || mbclen
== 0)
459 /* It is an invalid character or '\0'. Just use the byte. */
460 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ src_idx
];
462 if (BE (pstr
->trans
!= NULL
, 0))
463 ch
= pstr
->trans
[ch
];
464 pstr
->mbs
[byte_idx
] = ch
;
466 if (BE (pstr
->offsets_needed
!= 0, 0))
467 pstr
->offsets
[byte_idx
] = src_idx
;
470 /* And also cast it to wide char. */
471 pstr
->wcs
[byte_idx
++] = (wchar_t) ch
;
472 if (BE (mbclen
== (size_t) -1, 0))
473 pstr
->cur_state
= prev_st
;
477 /* The buffer doesn't have enough space, finish to build. */
478 pstr
->cur_state
= prev_st
;
482 pstr
->valid_len
= byte_idx
;
483 pstr
->valid_raw_len
= src_idx
;
487 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
491 re_string_skip_chars (pstr
, new_raw_idx
, last_wc
)
497 int rawbuf_idx
, mbclen
;
500 /* Skip the characters which are not necessary to check. */
501 for (rawbuf_idx
= pstr
->raw_mbs_idx
+ pstr
->valid_raw_len
;
502 rawbuf_idx
< new_raw_idx
;)
505 remain_len
= pstr
->len
- rawbuf_idx
;
506 prev_st
= pstr
->cur_state
;
507 mbclen
= mbrtowc (&wc
, (const char *) pstr
->raw_mbs
+ rawbuf_idx
,
508 remain_len
, &pstr
->cur_state
);
509 if (BE (mbclen
== (size_t) -2 || mbclen
== (size_t) -1 || mbclen
== 0, 0))
511 /* We treat these cases as a singlebyte character. */
513 pstr
->cur_state
= prev_st
;
515 /* Then proceed the next character. */
516 rawbuf_idx
+= mbclen
;
518 *last_wc
= (wint_t) wc
;
521 #endif /* RE_ENABLE_I18N */
523 /* Build the buffer PSTR->MBS, and apply the translation if we need.
524 This function is used in case of REG_ICASE. */
527 build_upper_buffer (pstr
)
530 int char_idx
, end_idx
;
531 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
533 for (char_idx
= pstr
->valid_len
; char_idx
< end_idx
; ++char_idx
)
535 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ char_idx
];
536 if (BE (pstr
->trans
!= NULL
, 0))
537 ch
= pstr
->trans
[ch
];
539 pstr
->mbs
[char_idx
] = toupper (ch
);
541 pstr
->mbs
[char_idx
] = ch
;
543 pstr
->valid_len
= char_idx
;
544 pstr
->valid_raw_len
= char_idx
;
547 /* Apply TRANS to the buffer in PSTR. */
550 re_string_translate_buffer (pstr
)
553 int buf_idx
, end_idx
;
554 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
556 for (buf_idx
= pstr
->valid_len
; buf_idx
< end_idx
; ++buf_idx
)
558 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ buf_idx
];
559 pstr
->mbs
[buf_idx
] = pstr
->trans
[ch
];
562 pstr
->valid_len
= buf_idx
;
563 pstr
->valid_raw_len
= buf_idx
;
566 /* This function re-construct the buffers.
567 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
568 convert to upper case in case of REG_ICASE, apply translation. */
571 re_string_reconstruct (pstr
, idx
, eflags
)
575 int offset
= idx
- pstr
->raw_mbs_idx
;
576 if (BE (offset
< 0, 0))
579 #ifdef RE_ENABLE_I18N
580 if (pstr
->mb_cur_max
> 1)
581 memset (&pstr
->cur_state
, '\0', sizeof (mbstate_t));
582 #endif /* RE_ENABLE_I18N */
583 pstr
->len
= pstr
->raw_len
;
584 pstr
->stop
= pstr
->raw_stop
;
586 pstr
->raw_mbs_idx
= 0;
587 pstr
->valid_raw_len
= 0;
588 pstr
->offsets_needed
= 0;
589 pstr
->tip_context
= ((eflags
& REG_NOTBOL
) ? CONTEXT_BEGBUF
590 : CONTEXT_NEWLINE
| CONTEXT_BEGBUF
);
591 if (!pstr
->mbs_allocated
)
592 pstr
->mbs
= (unsigned char *) pstr
->raw_mbs
;
596 if (BE (offset
!= 0, 1))
598 /* Are the characters which are already checked remain? */
599 if (BE (offset
< pstr
->valid_raw_len
, 1)
600 #ifdef RE_ENABLE_I18N
601 /* Handling this would enlarge the code too much.
602 Accept a slowdown in that case. */
603 && pstr
->offsets_needed
== 0
607 /* Yes, move them to the front of the buffer. */
608 pstr
->tip_context
= re_string_context_at (pstr
, offset
- 1, eflags
);
609 #ifdef RE_ENABLE_I18N
610 if (pstr
->mb_cur_max
> 1)
611 memmove (pstr
->wcs
, pstr
->wcs
+ offset
,
612 (pstr
->valid_len
- offset
) * sizeof (wint_t));
613 #endif /* RE_ENABLE_I18N */
614 if (BE (pstr
->mbs_allocated
, 0))
615 memmove (pstr
->mbs
, pstr
->mbs
+ offset
,
616 pstr
->valid_len
- offset
);
617 pstr
->valid_len
-= offset
;
618 pstr
->valid_raw_len
-= offset
;
620 assert (pstr
->valid_len
> 0);
625 /* No, skip all characters until IDX. */
626 #ifdef RE_ENABLE_I18N
627 if (BE (pstr
->offsets_needed
, 0))
629 pstr
->len
= pstr
->raw_len
- idx
+ offset
;
630 pstr
->stop
= pstr
->raw_stop
- idx
+ offset
;
631 pstr
->offsets_needed
= 0;
635 pstr
->valid_raw_len
= 0;
636 #ifdef RE_ENABLE_I18N
637 if (pstr
->mb_cur_max
> 1)
644 const unsigned char *raw
, *p
, *q
, *end
;
646 /* Special case UTF-8. Multi-byte chars start with any
647 byte other than 0x80 - 0xbf. */
648 raw
= pstr
->raw_mbs
+ pstr
->raw_mbs_idx
;
649 end
= raw
+ (offset
- pstr
->mb_cur_max
);
650 for (p
= raw
+ offset
- 1; p
>= end
; --p
)
651 if ((*p
& 0xc0) != 0x80)
655 int mlen
= raw
+ pstr
->len
- p
;
656 unsigned char buf
[6];
659 if (BE (pstr
->trans
!= NULL
, 0))
661 int i
= mlen
< 6 ? mlen
: 6;
663 buf
[i
] = pstr
->trans
[p
[i
]];
666 /* XXX Don't use mbrtowc, we know which conversion
667 to use (UTF-8 -> UCS4). */
668 memset (&cur_state
, 0, sizeof (cur_state
));
669 mlen
= mbrtowc (&wc2
, p
, mlen
, &cur_state
)
670 - (raw
+ offset
- p
);
673 memset (&pstr
->cur_state
, '\0',
675 pstr
->valid_len
= mlen
;
683 pstr
->valid_len
= re_string_skip_chars (pstr
, idx
, &wc
) - idx
;
684 if (BE (pstr
->valid_len
, 0))
686 for (wcs_idx
= 0; wcs_idx
< pstr
->valid_len
; ++wcs_idx
)
687 pstr
->wcs
[wcs_idx
] = WEOF
;
688 if (pstr
->mbs_allocated
)
689 memset (pstr
->mbs
, 255, pstr
->valid_len
);
691 pstr
->valid_raw_len
= pstr
->valid_len
;
692 pstr
->tip_context
= ((BE (pstr
->word_ops_used
!= 0, 0)
693 && IS_WIDE_WORD_CHAR (wc
))
695 : ((IS_WIDE_NEWLINE (wc
)
696 && pstr
->newline_anchor
)
697 ? CONTEXT_NEWLINE
: 0));
700 #endif /* RE_ENABLE_I18N */
702 int c
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ offset
- 1];
705 pstr
->tip_context
= (bitset_contain (pstr
->word_char
, c
)
707 : ((IS_NEWLINE (c
) && pstr
->newline_anchor
)
708 ? CONTEXT_NEWLINE
: 0));
711 if (!BE (pstr
->mbs_allocated
, 0))
714 pstr
->raw_mbs_idx
= idx
;
716 pstr
->stop
-= offset
;
718 /* Then build the buffers. */
719 #ifdef RE_ENABLE_I18N
720 if (pstr
->mb_cur_max
> 1)
724 int ret
= build_wcs_upper_buffer (pstr
);
725 if (BE (ret
!= REG_NOERROR
, 0))
729 build_wcs_buffer (pstr
);
732 #endif /* RE_ENABLE_I18N */
733 if (BE (pstr
->mbs_allocated
, 0))
736 build_upper_buffer (pstr
);
737 else if (pstr
->trans
!= NULL
)
738 re_string_translate_buffer (pstr
);
741 pstr
->valid_len
= pstr
->len
;
748 re_string_peek_byte_case (pstr
, idx
)
749 const re_string_t
*pstr
;
754 /* Handle the common (easiest) cases first. */
755 if (BE (!pstr
->mbs_allocated
, 1))
756 return re_string_peek_byte (pstr
, idx
);
758 #ifdef RE_ENABLE_I18N
759 if (pstr
->mb_cur_max
> 1
760 && ! re_string_is_single_byte_char (pstr
, pstr
->cur_idx
+ idx
))
761 return re_string_peek_byte (pstr
, idx
);
764 off
= pstr
->cur_idx
+ idx
;
765 #ifdef RE_ENABLE_I18N
766 if (pstr
->offsets_needed
)
767 off
= pstr
->offsets
[off
];
770 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ off
];
772 #ifdef RE_ENABLE_I18N
773 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
774 this function returns CAPITAL LETTER I instead of first byte of
775 DOTLESS SMALL LETTER I. The latter would confuse the parser,
776 since peek_byte_case doesn't advance cur_idx in any way. */
777 if (pstr
->offsets_needed
&& !isascii (ch
))
778 return re_string_peek_byte (pstr
, idx
);
785 re_string_fetch_byte_case (pstr
)
788 if (BE (!pstr
->mbs_allocated
, 1))
789 return re_string_fetch_byte (pstr
);
791 #ifdef RE_ENABLE_I18N
792 if (pstr
->offsets_needed
)
796 /* For tr_TR.UTF-8 [[:islower:]] there is
797 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
798 in that case the whole multi-byte character and return
799 the original letter. On the other side, with
800 [[: DOTLESS SMALL LETTER I return [[:I, as doing
801 anything else would complicate things too much. */
803 if (!re_string_first_byte (pstr
, pstr
->cur_idx
))
804 return re_string_fetch_byte (pstr
);
806 off
= pstr
->offsets
[pstr
->cur_idx
];
807 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ off
];
810 return re_string_fetch_byte (pstr
);
812 re_string_skip_bytes (pstr
,
813 re_string_char_size_at (pstr
, pstr
->cur_idx
));
818 return pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ pstr
->cur_idx
++];
822 re_string_destruct (pstr
)
825 #ifdef RE_ENABLE_I18N
827 re_free (pstr
->offsets
);
828 #endif /* RE_ENABLE_I18N */
829 if (pstr
->mbs_allocated
)
833 /* Return the context at IDX in INPUT. */
836 re_string_context_at (input
, idx
, eflags
)
837 const re_string_t
*input
;
842 /* In this case, we use the value stored in input->tip_context,
843 since we can't know the character in input->mbs[-1] here. */
844 return input
->tip_context
;
845 if (BE (idx
== input
->len
, 0))
846 return ((eflags
& REG_NOTEOL
) ? CONTEXT_ENDBUF
847 : CONTEXT_NEWLINE
| CONTEXT_ENDBUF
);
848 #ifdef RE_ENABLE_I18N
849 if (input
->mb_cur_max
> 1)
853 while(input
->wcs
[wc_idx
] == WEOF
)
856 /* It must not happen. */
857 assert (wc_idx
>= 0);
861 return input
->tip_context
;
863 wc
= input
->wcs
[wc_idx
];
864 if (BE (input
->word_ops_used
!= 0, 0) && IS_WIDE_WORD_CHAR (wc
))
866 return (IS_WIDE_NEWLINE (wc
) && input
->newline_anchor
867 ? CONTEXT_NEWLINE
: 0);
872 c
= re_string_byte_at (input
, idx
);
873 if (bitset_contain (input
->word_char
, c
))
875 return IS_NEWLINE (c
) && input
->newline_anchor
? CONTEXT_NEWLINE
: 0;
879 /* Functions for set operation. */
882 re_node_set_alloc (set
, size
)
888 set
->elems
= re_malloc (int, size
);
889 if (BE (set
->elems
== NULL
, 0))
895 re_node_set_init_1 (set
, elem
)
901 set
->elems
= re_malloc (int, 1);
902 if (BE (set
->elems
== NULL
, 0))
904 set
->alloc
= set
->nelem
= 0;
907 set
->elems
[0] = elem
;
912 re_node_set_init_2 (set
, elem1
, elem2
)
917 set
->elems
= re_malloc (int, 2);
918 if (BE (set
->elems
== NULL
, 0))
923 set
->elems
[0] = elem1
;
930 set
->elems
[0] = elem1
;
931 set
->elems
[1] = elem2
;
935 set
->elems
[0] = elem2
;
936 set
->elems
[1] = elem1
;
943 re_node_set_init_copy (dest
, src
)
945 const re_node_set
*src
;
947 dest
->nelem
= src
->nelem
;
950 dest
->alloc
= dest
->nelem
;
951 dest
->elems
= re_malloc (int, dest
->alloc
);
952 if (BE (dest
->elems
== NULL
, 0))
954 dest
->alloc
= dest
->nelem
= 0;
957 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
960 re_node_set_init_empty (dest
);
964 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
965 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
966 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
969 re_node_set_add_intersect (dest
, src1
, src2
)
971 const re_node_set
*src1
, *src2
;
973 int i1
, i2
, is
, id
, delta
, sbase
;
974 if (src1
->nelem
== 0 || src2
->nelem
== 0)
977 /* We need dest->nelem + 2 * elems_in_intersection; this is a
978 conservative estimate. */
979 if (src1
->nelem
+ src2
->nelem
+ dest
->nelem
> dest
->alloc
)
981 int new_alloc
= src1
->nelem
+ src2
->nelem
+ dest
->alloc
;
982 int *new_elems
= re_realloc (dest
->elems
, int, new_alloc
);
983 if (BE (new_elems
== NULL
, 0))
985 dest
->elems
= new_elems
;
986 dest
->alloc
= new_alloc
;
989 /* Find the items in the intersection of SRC1 and SRC2, and copy
990 into the top of DEST those that are not already in DEST itself. */
991 sbase
= dest
->nelem
+ src1
->nelem
+ src2
->nelem
;
992 i1
= src1
->nelem
- 1;
993 i2
= src2
->nelem
- 1;
994 id
= dest
->nelem
- 1;
997 if (src1
->elems
[i1
] == src2
->elems
[i2
])
999 /* Try to find the item in DEST. Maybe we could binary search? */
1000 while (id
>= 0 && dest
->elems
[id
] > src1
->elems
[i1
])
1003 if (id
< 0 || dest
->elems
[id
] != src1
->elems
[i1
])
1004 dest
->elems
[--sbase
] = src1
->elems
[i1
];
1006 if (--i1
< 0 || --i2
< 0)
1010 /* Lower the highest of the two items. */
1011 else if (src1
->elems
[i1
] < src2
->elems
[i2
])
1023 id
= dest
->nelem
- 1;
1024 is
= dest
->nelem
+ src1
->nelem
+ src2
->nelem
- 1;
1025 delta
= is
- sbase
+ 1;
1027 /* Now copy. When DELTA becomes zero, the remaining
1028 DEST elements are already in place; this is more or
1029 less the same loop that is in re_node_set_merge. */
1030 dest
->nelem
+= delta
;
1031 if (delta
> 0 && id
>= 0)
1034 if (dest
->elems
[is
] > dest
->elems
[id
])
1036 /* Copy from the top. */
1037 dest
->elems
[id
+ delta
--] = dest
->elems
[is
--];
1043 /* Slide from the bottom. */
1044 dest
->elems
[id
+ delta
] = dest
->elems
[id
];
1050 /* Copy remaining SRC elements. */
1051 memcpy (dest
->elems
, dest
->elems
+ sbase
, delta
* sizeof (int));
1056 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1057 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1059 static reg_errcode_t
1060 re_node_set_init_union (dest
, src1
, src2
)
1062 const re_node_set
*src1
, *src2
;
1065 if (src1
!= NULL
&& src1
->nelem
> 0 && src2
!= NULL
&& src2
->nelem
> 0)
1067 dest
->alloc
= src1
->nelem
+ src2
->nelem
;
1068 dest
->elems
= re_malloc (int, dest
->alloc
);
1069 if (BE (dest
->elems
== NULL
, 0))
1074 if (src1
!= NULL
&& src1
->nelem
> 0)
1075 return re_node_set_init_copy (dest
, src1
);
1076 else if (src2
!= NULL
&& src2
->nelem
> 0)
1077 return re_node_set_init_copy (dest
, src2
);
1079 re_node_set_init_empty (dest
);
1082 for (i1
= i2
= id
= 0 ; i1
< src1
->nelem
&& i2
< src2
->nelem
;)
1084 if (src1
->elems
[i1
] > src2
->elems
[i2
])
1086 dest
->elems
[id
++] = src2
->elems
[i2
++];
1089 if (src1
->elems
[i1
] == src2
->elems
[i2
])
1091 dest
->elems
[id
++] = src1
->elems
[i1
++];
1093 if (i1
< src1
->nelem
)
1095 memcpy (dest
->elems
+ id
, src1
->elems
+ i1
,
1096 (src1
->nelem
- i1
) * sizeof (int));
1097 id
+= src1
->nelem
- i1
;
1099 else if (i2
< src2
->nelem
)
1101 memcpy (dest
->elems
+ id
, src2
->elems
+ i2
,
1102 (src2
->nelem
- i2
) * sizeof (int));
1103 id
+= src2
->nelem
- i2
;
1109 /* Calculate the union set of the sets DEST and SRC. And store it to
1110 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1112 static reg_errcode_t
1113 re_node_set_merge (dest
, src
)
1115 const re_node_set
*src
;
1117 int is
, id
, sbase
, delta
;
1118 if (src
== NULL
|| src
->nelem
== 0)
1120 if (dest
->alloc
< 2 * src
->nelem
+ dest
->nelem
)
1122 int new_alloc
= 2 * (src
->nelem
+ dest
->alloc
);
1123 int *new_buffer
= re_realloc (dest
->elems
, int, new_alloc
);
1124 if (BE (new_buffer
== NULL
, 0))
1126 dest
->elems
= new_buffer
;
1127 dest
->alloc
= new_alloc
;
1130 if (BE (dest
->nelem
== 0, 0))
1132 dest
->nelem
= src
->nelem
;
1133 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
1137 /* Copy into the top of DEST the items of SRC that are not
1138 found in DEST. Maybe we could binary search in DEST? */
1139 for (sbase
= dest
->nelem
+ 2 * src
->nelem
,
1140 is
= src
->nelem
- 1, id
= dest
->nelem
- 1; is
>= 0 && id
>= 0; )
1142 if (dest
->elems
[id
] == src
->elems
[is
])
1144 else if (dest
->elems
[id
] < src
->elems
[is
])
1145 dest
->elems
[--sbase
] = src
->elems
[is
--];
1146 else /* if (dest->elems[id] > src->elems[is]) */
1152 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1154 memcpy (dest
->elems
+ sbase
, src
->elems
, (is
+ 1) * sizeof (int));
1157 id
= dest
->nelem
- 1;
1158 is
= dest
->nelem
+ 2 * src
->nelem
- 1;
1159 delta
= is
- sbase
+ 1;
1163 /* Now copy. When DELTA becomes zero, the remaining
1164 DEST elements are already in place. */
1165 dest
->nelem
+= delta
;
1168 if (dest
->elems
[is
] > dest
->elems
[id
])
1170 /* Copy from the top. */
1171 dest
->elems
[id
+ delta
--] = dest
->elems
[is
--];
1177 /* Slide from the bottom. */
1178 dest
->elems
[id
+ delta
] = dest
->elems
[id
];
1181 /* Copy remaining SRC elements. */
1182 memcpy (dest
->elems
, dest
->elems
+ sbase
,
1183 delta
* sizeof (int));
1192 /* Insert the new element ELEM to the re_node_set* SET.
1193 SET should not already have ELEM.
1194 return -1 if an error is occured, return 1 otherwise. */
1197 re_node_set_insert (set
, elem
)
1202 /* In case the set is empty. */
1203 if (set
->alloc
== 0)
1205 if (BE (re_node_set_init_1 (set
, elem
) == REG_NOERROR
, 1))
1211 if (BE (set
->nelem
, 0) == 0)
1213 /* We already guaranteed above that set->alloc != 0. */
1214 set
->elems
[0] = elem
;
1219 /* Realloc if we need. */
1220 if (set
->alloc
== set
->nelem
)
1223 set
->alloc
= set
->alloc
* 2;
1224 new_array
= re_realloc (set
->elems
, int, set
->alloc
);
1225 if (BE (new_array
== NULL
, 0))
1227 set
->elems
= new_array
;
1230 /* Move the elements which follows the new element. Test the
1231 first element separately to skip a check in the inner loop. */
1232 if (elem
< set
->elems
[0])
1235 for (idx
= set
->nelem
; idx
> 0; idx
--)
1236 set
->elems
[idx
] = set
->elems
[idx
- 1];
1240 for (idx
= set
->nelem
; set
->elems
[idx
- 1] > elem
; idx
--)
1241 set
->elems
[idx
] = set
->elems
[idx
- 1];
1244 /* Insert the new element. */
1245 set
->elems
[idx
] = elem
;
1250 /* Insert the new element ELEM to the re_node_set* SET.
1251 SET should not already have any element greater than or equal to ELEM.
1252 Return -1 if an error is occured, return 1 otherwise. */
1255 re_node_set_insert_last (set
, elem
)
1259 /* Realloc if we need. */
1260 if (set
->alloc
== set
->nelem
)
1263 set
->alloc
= (set
->alloc
+ 1) * 2;
1264 new_array
= re_realloc (set
->elems
, int, set
->alloc
);
1265 if (BE (new_array
== NULL
, 0))
1267 set
->elems
= new_array
;
1270 /* Insert the new element. */
1271 set
->elems
[set
->nelem
++] = elem
;
1275 /* Compare two node sets SET1 and SET2.
1276 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
1279 re_node_set_compare (set1
, set2
)
1280 const re_node_set
*set1
, *set2
;
1283 if (set1
== NULL
|| set2
== NULL
|| set1
->nelem
!= set2
->nelem
)
1285 for (i
= set1
->nelem
; --i
>= 0 ; )
1286 if (set1
->elems
[i
] != set2
->elems
[i
])
1291 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1294 re_node_set_contains (set
, elem
)
1295 const re_node_set
*set
;
1298 unsigned int idx
, right
, mid
;
1299 if (set
->nelem
<= 0)
1302 /* Binary search the element. */
1304 right
= set
->nelem
- 1;
1307 mid
= (idx
+ right
) / 2;
1308 if (set
->elems
[mid
] < elem
)
1313 return set
->elems
[idx
] == elem
? idx
+ 1 : 0;
1317 re_node_set_remove_at (set
, idx
)
1321 if (idx
< 0 || idx
>= set
->nelem
)
1324 for (; idx
< set
->nelem
; idx
++)
1325 set
->elems
[idx
] = set
->elems
[idx
+ 1];
1329 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1330 Or return -1, if an error will be occured. */
1333 re_dfa_add_node (dfa
, token
, mode
)
1338 if (BE (dfa
->nodes_len
>= dfa
->nodes_alloc
, 0))
1340 int new_nodes_alloc
= dfa
->nodes_alloc
* 2;
1341 re_token_t
*new_array
= re_realloc (dfa
->nodes
, re_token_t
,
1343 if (BE (new_array
== NULL
, 0))
1345 dfa
->nodes
= new_array
;
1348 int *new_nexts
, *new_indices
;
1349 re_node_set
*new_edests
, *new_eclosures
, *new_inveclosures
;
1351 new_nexts
= re_realloc (dfa
->nexts
, int, new_nodes_alloc
);
1352 new_indices
= re_realloc (dfa
->org_indices
, int, new_nodes_alloc
);
1353 new_edests
= re_realloc (dfa
->edests
, re_node_set
, new_nodes_alloc
);
1354 new_eclosures
= re_realloc (dfa
->eclosures
, re_node_set
,
1356 new_inveclosures
= re_realloc (dfa
->inveclosures
, re_node_set
,
1358 if (BE (new_nexts
== NULL
|| new_indices
== NULL
1359 || new_edests
== NULL
|| new_eclosures
== NULL
1360 || new_inveclosures
== NULL
, 0))
1362 dfa
->nexts
= new_nexts
;
1363 dfa
->org_indices
= new_indices
;
1364 dfa
->edests
= new_edests
;
1365 dfa
->eclosures
= new_eclosures
;
1366 dfa
->inveclosures
= new_inveclosures
;
1368 dfa
->nodes_alloc
= new_nodes_alloc
;
1370 dfa
->nodes
[dfa
->nodes_len
] = token
;
1371 dfa
->nodes
[dfa
->nodes_len
].opt_subexp
= 0;
1372 dfa
->nodes
[dfa
->nodes_len
].duplicated
= 0;
1373 dfa
->nodes
[dfa
->nodes_len
].constraint
= 0;
1374 return dfa
->nodes_len
++;
1377 static unsigned int inline
1378 calc_state_hash (nodes
, context
)
1379 const re_node_set
*nodes
;
1380 unsigned int context
;
1382 unsigned int hash
= nodes
->nelem
+ context
;
1384 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1385 hash
+= nodes
->elems
[i
];
1389 /* Search for the state whose node_set is equivalent to NODES.
1390 Return the pointer to the state, if we found it in the DFA.
1391 Otherwise create the new one and return it. In case of an error
1392 return NULL and set the error code in ERR.
1393 Note: - We assume NULL as the invalid state, then it is possible that
1394 return value is NULL and ERR is REG_NOERROR.
1395 - We never return non-NULL value in case of any errors, it is for
1398 static re_dfastate_t
*
1399 re_acquire_state (err
, dfa
, nodes
)
1402 const re_node_set
*nodes
;
1405 re_dfastate_t
*new_state
;
1406 struct re_state_table_entry
*spot
;
1408 if (BE (nodes
->nelem
== 0, 0))
1413 hash
= calc_state_hash (nodes
, 0);
1414 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1416 for (i
= 0 ; i
< spot
->num
; i
++)
1418 re_dfastate_t
*state
= spot
->array
[i
];
1419 if (hash
!= state
->hash
)
1421 if (re_node_set_compare (&state
->nodes
, nodes
))
1425 /* There are no appropriate state in the dfa, create the new one. */
1426 new_state
= create_ci_newstate (dfa
, nodes
, hash
);
1427 if (BE (new_state
!= NULL
, 1))
1436 /* Search for the state whose node_set is equivalent to NODES and
1437 whose context is equivalent to CONTEXT.
1438 Return the pointer to the state, if we found it in the DFA.
1439 Otherwise create the new one and return it. In case of an error
1440 return NULL and set the error code in ERR.
1441 Note: - We assume NULL as the invalid state, then it is possible that
1442 return value is NULL and ERR is REG_NOERROR.
1443 - We never return non-NULL value in case of any errors, it is for
1446 static re_dfastate_t
*
1447 re_acquire_state_context (err
, dfa
, nodes
, context
)
1450 const re_node_set
*nodes
;
1451 unsigned int context
;
1454 re_dfastate_t
*new_state
;
1455 struct re_state_table_entry
*spot
;
1457 if (nodes
->nelem
== 0)
1462 hash
= calc_state_hash (nodes
, context
);
1463 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1465 for (i
= 0 ; i
< spot
->num
; i
++)
1467 re_dfastate_t
*state
= spot
->array
[i
];
1468 if (state
->hash
== hash
1469 && state
->context
== context
1470 && re_node_set_compare (state
->entrance_nodes
, nodes
))
1473 /* There are no appropriate state in `dfa', create the new one. */
1474 new_state
= create_cd_newstate (dfa
, nodes
, context
, hash
);
1475 if (BE (new_state
!= NULL
, 1))
1484 /* Finish initialization of the new state NEWSTATE, and using its hash value
1485 HASH put in the appropriate bucket of DFA's state table. Return value
1486 indicates the error code if failed. */
1488 static reg_errcode_t
1489 register_state (dfa
, newstate
, hash
)
1491 re_dfastate_t
*newstate
;
1494 struct re_state_table_entry
*spot
;
1498 newstate
->hash
= hash
;
1499 err
= re_node_set_alloc (&newstate
->non_eps_nodes
, newstate
->nodes
.nelem
);
1500 if (BE (err
!= REG_NOERROR
, 0))
1502 for (i
= 0; i
< newstate
->nodes
.nelem
; i
++)
1504 int elem
= newstate
->nodes
.elems
[i
];
1505 if (!IS_EPSILON_NODE (dfa
->nodes
[elem
].type
))
1506 re_node_set_insert_last (&newstate
->non_eps_nodes
, elem
);
1509 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1510 if (BE (spot
->alloc
<= spot
->num
, 0))
1512 int new_alloc
= 2 * spot
->num
+ 2;
1513 re_dfastate_t
**new_array
= re_realloc (spot
->array
, re_dfastate_t
*,
1515 if (BE (new_array
== NULL
, 0))
1517 spot
->array
= new_array
;
1518 spot
->alloc
= new_alloc
;
1520 spot
->array
[spot
->num
++] = newstate
;
1524 /* Create the new state which is independ of contexts.
1525 Return the new state if succeeded, otherwise return NULL. */
1527 static re_dfastate_t
*
1528 create_ci_newstate (dfa
, nodes
, hash
)
1530 const re_node_set
*nodes
;
1535 re_dfastate_t
*newstate
;
1537 newstate
= (re_dfastate_t
*) calloc (sizeof (re_dfastate_t
), 1);
1538 if (BE (newstate
== NULL
, 0))
1540 err
= re_node_set_init_copy (&newstate
->nodes
, nodes
);
1541 if (BE (err
!= REG_NOERROR
, 0))
1547 newstate
->entrance_nodes
= &newstate
->nodes
;
1548 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1550 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1551 re_token_type_t type
= node
->type
;
1552 if (type
== CHARACTER
&& !node
->constraint
)
1555 /* If the state has the halt node, the state is a halt state. */
1556 else if (type
== END_OF_RE
)
1558 #ifdef RE_ENABLE_I18N
1559 else if (type
== COMPLEX_BRACKET
1560 || type
== OP_UTF8_PERIOD
1561 || (type
== OP_PERIOD
&& dfa
->mb_cur_max
> 1))
1562 newstate
->accept_mb
= 1;
1563 #endif /* RE_ENABLE_I18N */
1564 else if (type
== OP_BACK_REF
)
1565 newstate
->has_backref
= 1;
1566 else if (type
== ANCHOR
|| node
->constraint
)
1567 newstate
->has_constraint
= 1;
1569 err
= register_state (dfa
, newstate
, hash
);
1570 if (BE (err
!= REG_NOERROR
, 0))
1572 free_state (newstate
);
1578 /* Create the new state which is depend on the context CONTEXT.
1579 Return the new state if succeeded, otherwise return NULL. */
1581 static re_dfastate_t
*
1582 create_cd_newstate (dfa
, nodes
, context
, hash
)
1584 const re_node_set
*nodes
;
1585 unsigned int context
, hash
;
1587 int i
, nctx_nodes
= 0;
1589 re_dfastate_t
*newstate
;
1591 newstate
= (re_dfastate_t
*) calloc (sizeof (re_dfastate_t
), 1);
1592 if (BE (newstate
== NULL
, 0))
1594 err
= re_node_set_init_copy (&newstate
->nodes
, nodes
);
1595 if (BE (err
!= REG_NOERROR
, 0))
1601 newstate
->context
= context
;
1602 newstate
->entrance_nodes
= &newstate
->nodes
;
1604 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1606 unsigned int constraint
= 0;
1607 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1608 re_token_type_t type
= node
->type
;
1609 if (node
->constraint
)
1610 constraint
= node
->constraint
;
1612 if (type
== CHARACTER
&& !constraint
)
1614 /* If the state has the halt node, the state is a halt state. */
1615 else if (type
== END_OF_RE
)
1617 #ifdef RE_ENABLE_I18N
1618 else if (type
== COMPLEX_BRACKET
1619 || type
== OP_UTF8_PERIOD
1620 || (type
== OP_PERIOD
&& dfa
->mb_cur_max
> 1))
1621 newstate
->accept_mb
= 1;
1622 #endif /* RE_ENABLE_I18N */
1623 else if (type
== OP_BACK_REF
)
1624 newstate
->has_backref
= 1;
1625 else if (type
== ANCHOR
)
1626 constraint
= node
->opr
.ctx_type
;
1630 if (newstate
->entrance_nodes
== &newstate
->nodes
)
1632 newstate
->entrance_nodes
= re_malloc (re_node_set
, 1);
1633 if (BE (newstate
->entrance_nodes
== NULL
, 0))
1635 free_state (newstate
);
1638 re_node_set_init_copy (newstate
->entrance_nodes
, nodes
);
1640 newstate
->has_constraint
= 1;
1643 if (NOT_SATISFY_PREV_CONSTRAINT (constraint
,context
))
1645 re_node_set_remove_at (&newstate
->nodes
, i
- nctx_nodes
);
1650 err
= register_state (dfa
, newstate
, hash
);
1651 if (BE (err
!= REG_NOERROR
, 0))
1653 free_state (newstate
);
1661 re_dfastate_t
*state
;
1663 re_node_set_free (&state
->non_eps_nodes
);
1664 re_node_set_free (&state
->inveclosure
);
1665 if (state
->entrance_nodes
!= &state
->nodes
)
1667 re_node_set_free (state
->entrance_nodes
);
1668 re_free (state
->entrance_nodes
);
1670 re_node_set_free (&state
->nodes
);
1671 re_free (state
->trtable
);